summaryrefslogtreecommitdiffstats
path: root/src/contrib
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 15:24:08 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 15:24:08 +0000
commitf449f278dd3c70e479a035f50a9bb817a9b433ba (patch)
tree8ca2bfb785dda9bb4d573acdf9b42aea9cd51383 /src/contrib
parentInitial commit. (diff)
downloadknot-f449f278dd3c70e479a035f50a9bb817a9b433ba.tar.xz
knot-f449f278dd3c70e479a035f50a9bb817a9b433ba.zip
Adding upstream version 3.2.6.upstream/3.2.6upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/contrib/Makefile.inc269
-rw-r--r--src/contrib/asan.h37
-rw-r--r--src/contrib/base32hex.c353
-rw-r--r--src/contrib/base32hex.h104
-rw-r--r--src/contrib/base64.c272
-rw-r--r--src/contrib/base64.h103
-rw-r--r--src/contrib/base64url.c287
-rw-r--r--src/contrib/base64url.h103
-rw-r--r--src/contrib/color.h31
-rw-r--r--src/contrib/conn_pool.c243
-rw-r--r--src/contrib/conn_pool.h100
-rw-r--r--src/contrib/ctype.h193
-rw-r--r--src/contrib/dnstap/convert.c142
-rw-r--r--src/contrib/dnstap/convert.h60
-rw-r--r--src/contrib/dnstap/dnstap.c41
-rw-r--r--src/contrib/dnstap/dnstap.h47
-rw-r--r--src/contrib/dnstap/dnstap.proto270
-rw-r--r--src/contrib/dnstap/message.c130
-rw-r--r--src/contrib/dnstap/message.h63
-rw-r--r--src/contrib/dnstap/reader.c103
-rw-r--r--src/contrib/dnstap/reader.h73
-rw-r--r--src/contrib/dnstap/writer.c120
-rw-r--r--src/contrib/dnstap/writer.h71
-rw-r--r--src/contrib/files.c254
-rw-r--r--src/contrib/files.h77
-rw-r--r--src/contrib/getline.c60
-rw-r--r--src/contrib/getline.h39
-rw-r--r--src/contrib/json.c237
-rw-r--r--src/contrib/json.h87
-rw-r--r--src/contrib/libbpf/LICENSE1
-rw-r--r--src/contrib/libbpf/bpf/bpf.c710
-rw-r--r--src/contrib/libbpf/bpf/bpf.h184
-rw-r--r--src/contrib/libbpf/bpf/bpf_core_read.h263
-rw-r--r--src/contrib/libbpf/bpf/bpf_endian.h72
-rw-r--r--src/contrib/libbpf/bpf/bpf_helper_defs.h2759
-rw-r--r--src/contrib/libbpf/bpf/bpf_helpers.h47
-rw-r--r--src/contrib/libbpf/bpf/bpf_prog_linfo.c246
-rw-r--r--src/contrib/libbpf/bpf/bpf_tracing.h195
-rw-r--r--src/contrib/libbpf/bpf/btf.c2884
-rw-r--r--src/contrib/libbpf/bpf/btf.h311
-rw-r--r--src/contrib/libbpf/bpf/btf_dump.c1386
-rw-r--r--src/contrib/libbpf/bpf/hashmap.c229
-rw-r--r--src/contrib/libbpf/bpf/hashmap.h178
-rw-r--r--src/contrib/libbpf/bpf/libbpf.c6581
-rw-r--r--src/contrib/libbpf/bpf/libbpf.h637
-rw-r--r--src/contrib/libbpf/bpf/libbpf_errno.c63
-rw-r--r--src/contrib/libbpf/bpf/libbpf_internal.h217
-rw-r--r--src/contrib/libbpf/bpf/libbpf_probes.c323
-rw-r--r--src/contrib/libbpf/bpf/libbpf_util.h47
-rw-r--r--src/contrib/libbpf/bpf/netlink.c451
-rw-r--r--src/contrib/libbpf/bpf/nlattr.c195
-rw-r--r--src/contrib/libbpf/bpf/nlattr.h106
-rw-r--r--src/contrib/libbpf/bpf/str_error.c18
-rw-r--r--src/contrib/libbpf/bpf/str_error.h6
-rw-r--r--src/contrib/libbpf/bpf/xsk.c797
-rw-r--r--src/contrib/libbpf/bpf/xsk.h246
-rw-r--r--src/contrib/libbpf/include/asm/barrier.h7
-rw-r--r--src/contrib/libbpf/include/linux/compiler.h70
-rw-r--r--src/contrib/libbpf/include/linux/err.h38
-rw-r--r--src/contrib/libbpf/include/linux/filter.h118
-rw-r--r--src/contrib/libbpf/include/linux/kernel.h44
-rw-r--r--src/contrib/libbpf/include/linux/list.h82
-rw-r--r--src/contrib/libbpf/include/linux/overflow.h90
-rw-r--r--src/contrib/libbpf/include/linux/ring_buffer.h18
-rw-r--r--src/contrib/libbpf/include/linux/types.h31
-rw-r--r--src/contrib/libbpf/include/uapi/linux/bpf.h3692
-rw-r--r--src/contrib/libbpf/include/uapi/linux/bpf_common.h57
-rw-r--r--src/contrib/libbpf/include/uapi/linux/btf.h165
-rw-r--r--src/contrib/libbpf/include/uapi/linux/if_link.h1033
-rw-r--r--src/contrib/libbpf/include/uapi/linux/if_xdp.h108
-rw-r--r--src/contrib/libbpf/include/uapi/linux/netlink.h252
-rw-r--r--src/contrib/libngtcp2/LICENSE1
-rw-r--r--src/contrib/libngtcp2/ngtcp2/crypto/gnutls.c644
-rw-r--r--src/contrib/libngtcp2/ngtcp2/crypto/shared.c1418
-rw-r--r--src/contrib/libngtcp2/ngtcp2/crypto/shared.h350
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.c335
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.h221
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.c117
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.h69
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.c90
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.h91
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.c692
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.h156
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.c1489
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.h149
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.c56
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.h108
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.c615
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.h421
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.c147
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.h175
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.c13698
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.h1115
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.c291
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.h208
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.c895
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.h148
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.c154
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.h34
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.c167
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.h98
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.c79
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.h89
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.c819
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.h345
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.c822
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.h123
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_macro.h58
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.c336
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.h136
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.c113
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.h72
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_net.h136
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.c40
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.h140
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.c46
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.h65
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.c77
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.h49
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.c2527
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.h1235
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.c160
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.h123
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.c230
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.h153
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.c164
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.h126
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.c172
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.h198
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.c1218
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.h161
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.c61
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.h80
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rcvry.h40
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.c120
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.h132
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.c319
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.h197
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.c137
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.h85
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.c1676
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.h467
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.c233
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.h94
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.c698
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.h310
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.c71
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.h46
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.c243
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.h120
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_version.c39
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.c99
-rw-r--r--src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.h65
-rw-r--r--src/contrib/libngtcp2/ngtcp2/ngtcp2.h5906
-rw-r--r--src/contrib/libngtcp2/ngtcp2/ngtcp2_crypto.h893
-rw-r--r--src/contrib/libngtcp2/ngtcp2/ngtcp2_crypto_gnutls.h107
-rw-r--r--src/contrib/libngtcp2/ngtcp2/version.h51
-rw-r--r--src/contrib/licenses/0BSD12
-rw-r--r--src/contrib/licenses/BSD-3-Clause21
-rw-r--r--src/contrib/licenses/LGPL-2.0339
-rw-r--r--src/contrib/licenses/LGPL-2.1503
-rw-r--r--src/contrib/licenses/MIT19
-rw-r--r--src/contrib/macros.h41
-rw-r--r--src/contrib/mempattern.c122
-rw-r--r--src/contrib/mempattern.h47
-rw-r--r--src/contrib/musl/inet_ntop.c79
-rw-r--r--src/contrib/musl/inet_ntop.h28
-rw-r--r--src/contrib/net.c747
-rw-r--r--src/contrib/net.h209
-rw-r--r--src/contrib/openbsd/LICENSE2
-rw-r--r--src/contrib/openbsd/siphash.c176
-rw-r--r--src/contrib/openbsd/siphash.h83
-rw-r--r--src/contrib/openbsd/strlcat.c48
-rw-r--r--src/contrib/openbsd/strlcat.h31
-rw-r--r--src/contrib/openbsd/strlcpy.c46
-rw-r--r--src/contrib/openbsd/strlcpy.h29
-rw-r--r--src/contrib/os.h37
-rw-r--r--src/contrib/proxyv2/proxyv2.c281
-rw-r--r--src/contrib/proxyv2/proxyv2.h29
-rw-r--r--src/contrib/qp-trie/trie.c1451
-rw-r--r--src/contrib/qp-trie/trie.h280
-rw-r--r--src/contrib/semaphore.c80
-rw-r--r--src/contrib/semaphore.h41
-rw-r--r--src/contrib/sockaddr.c368
-rw-r--r--src/contrib/sockaddr.h160
-rw-r--r--src/contrib/spinlock.h133
-rw-r--r--src/contrib/string.c247
-rw-r--r--src/contrib/string.h104
-rw-r--r--src/contrib/strtonum.h125
-rw-r--r--src/contrib/time.c441
-rw-r--r--src/contrib/time.h211
-rw-r--r--src/contrib/toeplitz.h122
-rw-r--r--src/contrib/tolower.h52
-rw-r--r--src/contrib/trim.h36
-rw-r--r--src/contrib/ucw/LICENSE1
-rw-r--r--src/contrib/ucw/array-sort.h195
-rw-r--r--src/contrib/ucw/binsearch.h50
-rw-r--r--src/contrib/ucw/heap.c153
-rw-r--r--src/contrib/ucw/heap.h46
-rw-r--r--src/contrib/ucw/lists.c264
-rw-r--r--src/contrib/ucw/lists.h74
-rw-r--r--src/contrib/ucw/mempool.c323
-rw-r--r--src/contrib/ucw/mempool.h124
-rw-r--r--src/contrib/url-parser/LICENSE1
-rw-r--r--src/contrib/url-parser/README.md14
-rw-r--r--src/contrib/url-parser/url_parser.c635
-rw-r--r--src/contrib/url-parser/url_parser.h64
-rw-r--r--src/contrib/vpool/vpool.c243
-rw-r--r--src/contrib/vpool/vpool.h60
-rw-r--r--src/contrib/wire_ctx.h355
210 files changed, 84761 insertions, 0 deletions
diff --git a/src/contrib/Makefile.inc b/src/contrib/Makefile.inc
new file mode 100644
index 0000000..64a0279
--- /dev/null
+++ b/src/contrib/Makefile.inc
@@ -0,0 +1,269 @@
+noinst_LTLIBRARIES += libcontrib.la
+
+libcontrib_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY)
+libcontrib_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS)
+libcontrib_la_LIBADD = $(pthread_LIBS)
+libcontrib_LIBS = libcontrib.la
+if USE_GNUTLS_MEMSET
+libcontrib_la_CPPFLAGS += $(gnutls_CFLAGS)
+libcontrib_LIBS += $(gnutls_LIBS)
+endif USE_GNUTLS_MEMSET
+
+EXTRA_DIST += \
+ contrib/licenses/0BSD \
+ contrib/licenses/BSD-3-Clause \
+ contrib/licenses/LGPL-2.0 \
+ contrib/licenses/LGPL-2.1 \
+ contrib/licenses/MIT \
+ contrib/libbpf/LICENSE \
+ contrib/libngtcp2/LICENSE \
+ contrib/openbsd/LICENSE \
+ contrib/ucw/LICENSE \
+ contrib/url-parser/LICENSE \
+ contrib/url-parser/README.md \
+ contrib/dnstap/dnstap.proto
+
+libcontrib_la_SOURCES = \
+ contrib/asan.h \
+ contrib/base32hex.c \
+ contrib/base32hex.h \
+ contrib/base64.c \
+ contrib/base64.h \
+ contrib/base64url.c \
+ contrib/base64url.h \
+ contrib/conn_pool.c \
+ contrib/conn_pool.h \
+ contrib/color.h \
+ contrib/ctype.h \
+ contrib/files.c \
+ contrib/files.h \
+ contrib/getline.c \
+ contrib/getline.h \
+ contrib/json.c \
+ contrib/json.h \
+ contrib/macros.h \
+ contrib/mempattern.c \
+ contrib/mempattern.h \
+ contrib/musl/inet_ntop.c \
+ contrib/musl/inet_ntop.h \
+ contrib/net.c \
+ contrib/net.h \
+ contrib/os.h \
+ contrib/qp-trie/trie.c \
+ contrib/qp-trie/trie.h \
+ contrib/semaphore.c \
+ contrib/semaphore.h \
+ contrib/sockaddr.c \
+ contrib/sockaddr.h \
+ contrib/spinlock.h \
+ contrib/string.c \
+ contrib/string.h \
+ contrib/strtonum.h \
+ contrib/time.c \
+ contrib/time.h \
+ contrib/toeplitz.h \
+ contrib/tolower.h \
+ contrib/trim.h \
+ contrib/wire_ctx.h \
+ contrib/openbsd/siphash.c \
+ contrib/openbsd/siphash.h \
+ contrib/openbsd/strlcat.c \
+ contrib/openbsd/strlcat.h \
+ contrib/openbsd/strlcpy.c \
+ contrib/openbsd/strlcpy.h \
+ contrib/proxyv2/proxyv2.c \
+ contrib/proxyv2/proxyv2.h \
+ contrib/ucw/array-sort.h \
+ contrib/ucw/binsearch.h \
+ contrib/ucw/heap.c \
+ contrib/ucw/heap.h \
+ contrib/ucw/lists.c \
+ contrib/ucw/lists.h \
+ contrib/ucw/mempool.c \
+ contrib/ucw/mempool.h \
+ contrib/url-parser/url_parser.c \
+ contrib/url-parser/url_parser.h \
+ contrib/vpool/vpool.c \
+ contrib/vpool/vpool.h
+
+if EMBEDDED_LIBBPF
+noinst_LTLIBRARIES += libembbpf.la
+
+libembbpf_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) $(embedded_libbpf_CFLAGS)
+libembbpf_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS)
+libembbpf_LIBS = libembbpf.la $(embedded_libbpf_LIBS)
+
+libembbpf_la_SOURCES = \
+ contrib/libbpf/include/asm/barrier.h \
+ contrib/libbpf/include/linux/compiler.h \
+ contrib/libbpf/include/linux/err.h \
+ contrib/libbpf/include/linux/filter.h \
+ contrib/libbpf/include/linux/kernel.h \
+ contrib/libbpf/include/linux/list.h \
+ contrib/libbpf/include/linux/overflow.h \
+ contrib/libbpf/include/linux/ring_buffer.h \
+ contrib/libbpf/include/linux/types.h \
+ contrib/libbpf/include/uapi/linux/bpf_common.h \
+ contrib/libbpf/include/uapi/linux/bpf.h \
+ contrib/libbpf/include/uapi/linux/btf.h \
+ contrib/libbpf/include/uapi/linux/if_link.h \
+ contrib/libbpf/include/uapi/linux/if_xdp.h \
+ contrib/libbpf/include/uapi/linux/netlink.h \
+ contrib/libbpf/bpf/bpf.c \
+ contrib/libbpf/bpf/bpf.h \
+ contrib/libbpf/bpf/bpf_core_read.h \
+ contrib/libbpf/bpf/bpf_endian.h \
+ contrib/libbpf/bpf/bpf_helper_defs.h \
+ contrib/libbpf/bpf/bpf_helpers.h \
+ contrib/libbpf/bpf/bpf_prog_linfo.c \
+ contrib/libbpf/bpf/bpf_tracing.h \
+ contrib/libbpf/bpf/btf.c \
+ contrib/libbpf/bpf/btf.h \
+ contrib/libbpf/bpf/btf_dump.c \
+ contrib/libbpf/bpf/hashmap.c \
+ contrib/libbpf/bpf/hashmap.h \
+ contrib/libbpf/bpf/libbpf.c \
+ contrib/libbpf/bpf/libbpf.h \
+ contrib/libbpf/bpf/libbpf_errno.c \
+ contrib/libbpf/bpf/libbpf_internal.h \
+ contrib/libbpf/bpf/libbpf_probes.c \
+ contrib/libbpf/bpf/libbpf_util.h \
+ contrib/libbpf/bpf/netlink.c \
+ contrib/libbpf/bpf/nlattr.c \
+ contrib/libbpf/bpf/nlattr.h \
+ contrib/libbpf/bpf/str_error.c \
+ contrib/libbpf/bpf/str_error.h \
+ contrib/libbpf/bpf/xsk.c \
+ contrib/libbpf/bpf/xsk.h
+endif EMBEDDED_LIBBPF
+
+if HAVE_LIBDNSTAP
+noinst_LTLIBRARIES += libdnstap.la
+
+libdnstap_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) $(DNSTAP_CFLAGS)
+libdnstap_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS)
+libdnstap_LIBS = libdnstap.la $(DNSTAP_LIBS)
+
+SUFFIXES = .proto .pb-c.c .pb-c.h
+
+.proto.pb-c.c:
+ $(AM_V_GEN)@PROTOC_C@ --c_out=. -I$(srcdir) $<
+
+.proto.pb-c.h:
+ $(AM_V_GEN)@PROTOC_C@ --c_out=. -I$(srcdir) $<
+
+libdnstap_la_SOURCES = \
+ contrib/dnstap/convert.c \
+ contrib/dnstap/convert.h \
+ contrib/dnstap/dnstap.c \
+ contrib/dnstap/dnstap.h \
+ contrib/dnstap/message.c \
+ contrib/dnstap/message.h \
+ contrib/dnstap/reader.c \
+ contrib/dnstap/reader.h \
+ contrib/dnstap/writer.c \
+ contrib/dnstap/writer.h
+
+nodist_libdnstap_la_SOURCES = \
+ contrib/dnstap/dnstap.pb-c.c \
+ contrib/dnstap/dnstap.pb-c.h
+
+BUILT_SOURCES += $(nodist_libdnstap_la_SOURCES)
+CLEANFILES += $(nodist_libdnstap_la_SOURCES)
+endif HAVE_LIBDNSTAP
+
+if EMBEDDED_LIBNGTCP2
+noinst_LTLIBRARIES += libembngtcp2.la
+
+libembngtcp2_la_CPPFLAGS = $(AM_CPPFLAGS) $(CFLAG_VISIBILITY) \
+ $(embedded_libngtcp2_CFLAGS) $(gnutls_CFLAGS)
+libembngtcp2_la_LDFLAGS = $(AM_LDFLAGS) $(LDFLAG_EXCLUDE_LIBS)
+libembngtcp2_LIBS = libembngtcp2.la $(embedded_libngtcp2_LIBS) $(gnutls_LIBS)
+
+libembngtcp2_la_SOURCES = \
+ contrib/libngtcp2/ngtcp2/crypto/gnutls.c \
+ contrib/libngtcp2/ngtcp2/crypto/shared.c \
+ contrib/libngtcp2/ngtcp2/crypto/shared.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_macro.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_net.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_rcvry.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.h \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_version.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.c \
+ contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.h \
+ contrib/libngtcp2/ngtcp2/ngtcp2.h \
+ contrib/libngtcp2/ngtcp2/ngtcp2_crypto.h \
+ contrib/libngtcp2/ngtcp2/ngtcp2_crypto_gnutls.h \
+ contrib/libngtcp2/ngtcp2/version.h
+endif EMBEDDED_LIBNGTCP2
diff --git a/src/contrib/asan.h b/src/contrib/asan.h
new file mode 100644
index 0000000..5feb2c1
--- /dev/null
+++ b/src/contrib/asan.h
@@ -0,0 +1,37 @@
+/* Copyright (C) 2015 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+/*
+ * see sanitizer/asan_interface.h in compiler-rt (LLVM)
+ */
+#ifndef __has_feature
+ #define __has_feature(feature) 0
+#endif
+#if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
+ void __asan_poison_memory_region(void const volatile *addr, size_t size);
+ void __asan_unpoison_memory_region(void const volatile *addr, size_t size);
+ #define ASAN_POISON_MEMORY_REGION(addr, size) \
+ __asan_poison_memory_region((addr), (size))
+ #define ASAN_UNPOISON_MEMORY_REGION(addr, size) \
+ __asan_unpoison_memory_region((addr), (size))
+#else
+ #define ASAN_POISON_MEMORY_REGION(addr, size) \
+ ((void)(addr), (void)(size))
+ #define ASAN_UNPOISON_MEMORY_REGION(addr, size) \
+ ((void)(addr), (void)(size))
+#endif
diff --git a/src/contrib/base32hex.c b/src/contrib/base32hex.c
new file mode 100644
index 0000000..ad216d2
--- /dev/null
+++ b/src/contrib/base32hex.c
@@ -0,0 +1,353 @@
+/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "contrib/base32hex.h"
+#include "libknot/errcode.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+
+/*! \brief Maximal length of binary input to Base32hex encoding. */
+#define MAX_BIN_DATA_LEN ((INT32_MAX / 8) * 5)
+
+/*! \brief Base32hex padding character. */
+static const uint8_t base32hex_pad = '=';
+/*! \brief Base32hex alphabet. */
+static const uint8_t base32hex_enc[] = "0123456789abcdefghijklmnopqrstuv";
+
+/*! \brief Indicates bad Base32hex character. */
+#define KO 255
+/*! \brief Indicates Base32hex padding character. */
+#define PD 32
+
+/*! \brief Transformation and validation table for decoding Base32hex. */
+static const uint8_t base32hex_dec[256] = {
+ [ 0] = KO, [ 43] = KO, ['V'] = 31, [129] = KO, [172] = KO, [215] = KO,
+ [ 1] = KO, [ 44] = KO, ['W'] = KO, [130] = KO, [173] = KO, [216] = KO,
+ [ 2] = KO, [ 45] = KO, ['X'] = KO, [131] = KO, [174] = KO, [217] = KO,
+ [ 3] = KO, [ 46] = KO, ['Y'] = KO, [132] = KO, [175] = KO, [218] = KO,
+ [ 4] = KO, [ 47] = KO, ['Z'] = KO, [133] = KO, [176] = KO, [219] = KO,
+ [ 5] = KO, ['0'] = 0, [ 91] = KO, [134] = KO, [177] = KO, [220] = KO,
+ [ 6] = KO, ['1'] = 1, [ 92] = KO, [135] = KO, [178] = KO, [221] = KO,
+ [ 7] = KO, ['2'] = 2, [ 93] = KO, [136] = KO, [179] = KO, [222] = KO,
+ [ 8] = KO, ['3'] = 3, [ 94] = KO, [137] = KO, [180] = KO, [223] = KO,
+ [ 9] = KO, ['4'] = 4, [ 95] = KO, [138] = KO, [181] = KO, [224] = KO,
+ [ 10] = KO, ['5'] = 5, [ 96] = KO, [139] = KO, [182] = KO, [225] = KO,
+ [ 11] = KO, ['6'] = 6, ['a'] = 10, [140] = KO, [183] = KO, [226] = KO,
+ [ 12] = KO, ['7'] = 7, ['b'] = 11, [141] = KO, [184] = KO, [227] = KO,
+ [ 13] = KO, ['8'] = 8, ['c'] = 12, [142] = KO, [185] = KO, [228] = KO,
+ [ 14] = KO, ['9'] = 9, ['d'] = 13, [143] = KO, [186] = KO, [229] = KO,
+ [ 15] = KO, [ 58] = KO, ['e'] = 14, [144] = KO, [187] = KO, [230] = KO,
+ [ 16] = KO, [ 59] = KO, ['f'] = 15, [145] = KO, [188] = KO, [231] = KO,
+ [ 17] = KO, [ 60] = KO, ['g'] = 16, [146] = KO, [189] = KO, [232] = KO,
+ [ 18] = KO, ['='] = PD, ['h'] = 17, [147] = KO, [190] = KO, [233] = KO,
+ [ 19] = KO, [ 62] = KO, ['i'] = 18, [148] = KO, [191] = KO, [234] = KO,
+ [ 20] = KO, [ 63] = KO, ['j'] = 19, [149] = KO, [192] = KO, [235] = KO,
+ [ 21] = KO, [ 64] = KO, ['k'] = 20, [150] = KO, [193] = KO, [236] = KO,
+ [ 22] = KO, ['A'] = 10, ['l'] = 21, [151] = KO, [194] = KO, [237] = KO,
+ [ 23] = KO, ['B'] = 11, ['m'] = 22, [152] = KO, [195] = KO, [238] = KO,
+ [ 24] = KO, ['C'] = 12, ['n'] = 23, [153] = KO, [196] = KO, [239] = KO,
+ [ 25] = KO, ['D'] = 13, ['o'] = 24, [154] = KO, [197] = KO, [240] = KO,
+ [ 26] = KO, ['E'] = 14, ['p'] = 25, [155] = KO, [198] = KO, [241] = KO,
+ [ 27] = KO, ['F'] = 15, ['q'] = 26, [156] = KO, [199] = KO, [242] = KO,
+ [ 28] = KO, ['G'] = 16, ['r'] = 27, [157] = KO, [200] = KO, [243] = KO,
+ [ 29] = KO, ['H'] = 17, ['s'] = 28, [158] = KO, [201] = KO, [244] = KO,
+ [ 30] = KO, ['I'] = 18, ['t'] = 29, [159] = KO, [202] = KO, [245] = KO,
+ [ 31] = KO, ['J'] = 19, ['u'] = 30, [160] = KO, [203] = KO, [246] = KO,
+ [ 32] = KO, ['K'] = 20, ['v'] = 31, [161] = KO, [204] = KO, [247] = KO,
+ [ 33] = KO, ['L'] = 21, ['w'] = KO, [162] = KO, [205] = KO, [248] = KO,
+ [ 34] = KO, ['M'] = 22, ['x'] = KO, [163] = KO, [206] = KO, [249] = KO,
+ [ 35] = KO, ['N'] = 23, ['y'] = KO, [164] = KO, [207] = KO, [250] = KO,
+ [ 36] = KO, ['O'] = 24, ['z'] = KO, [165] = KO, [208] = KO, [251] = KO,
+ [ 37] = KO, ['P'] = 25, [123] = KO, [166] = KO, [209] = KO, [252] = KO,
+ [ 38] = KO, ['Q'] = 26, [124] = KO, [167] = KO, [210] = KO, [253] = KO,
+ [ 39] = KO, ['R'] = 27, [125] = KO, [168] = KO, [211] = KO, [254] = KO,
+ [ 40] = KO, ['S'] = 28, [126] = KO, [169] = KO, [212] = KO, [255] = KO,
+ [ 41] = KO, ['T'] = 29, [127] = KO, [170] = KO, [213] = KO,
+ [ 42] = KO, ['U'] = 30, [128] = KO, [171] = KO, [214] = KO,
+};
+
+int32_t knot_base32hex_encode(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t *out,
+ const uint32_t out_len)
+{
+ // Checking inputs.
+ if (in == NULL || out == NULL) {
+ return KNOT_EINVAL;
+ }
+ if (in_len > MAX_BIN_DATA_LEN || out_len < ((in_len + 4) / 5) * 8) {
+ return KNOT_ERANGE;
+ }
+
+ uint8_t rest_len = in_len % 5;
+ const uint8_t *stop = in + in_len - rest_len;
+ uint8_t *text = out;
+
+ // Encoding loop takes 5 bytes and creates 8 characters.
+ while (in < stop) {
+ text[0] = base32hex_enc[in[0] >> 3];
+ text[1] = base32hex_enc[(in[0] & 0x07) << 2 | in[1] >> 6];
+ text[2] = base32hex_enc[(in[1] & 0x3E) >> 1];
+ text[3] = base32hex_enc[(in[1] & 0x01) << 4 | in[2] >> 4];
+ text[4] = base32hex_enc[(in[2] & 0x0F) << 1 | in[3] >> 7];
+ text[5] = base32hex_enc[(in[3] & 0x7C) >> 2];
+ text[6] = base32hex_enc[(in[3] & 0x03) << 3 | in[4] >> 5];
+ text[7] = base32hex_enc[in[4] & 0x1F];
+ text += 8;
+ in += 5;
+ }
+
+ // Processing of padding, if any.
+ switch (rest_len) {
+ case 4:
+ text[0] = base32hex_enc[in[0] >> 3];
+ text[1] = base32hex_enc[(in[0] & 0x07) << 2 | in[1] >> 6];
+ text[2] = base32hex_enc[(in[1] & 0x3E) >> 1];
+ text[3] = base32hex_enc[(in[1] & 0x01) << 4 | in[2] >> 4];
+ text[4] = base32hex_enc[(in[2] & 0x0F) << 1 | in[3] >> 7];
+ text[5] = base32hex_enc[(in[3] & 0x7C) >> 2];
+ text[6] = base32hex_enc[(in[3] & 0x03) << 3];
+ text[7] = base32hex_pad;
+ text += 8;
+ break;
+ case 3:
+ text[0] = base32hex_enc[in[0] >> 3];
+ text[1] = base32hex_enc[(in[0] & 0x07) << 2 | in[1] >> 6];
+ text[2] = base32hex_enc[(in[1] & 0x3E) >> 1];
+ text[3] = base32hex_enc[(in[1] & 0x01) << 4 | in[2] >> 4];
+ text[4] = base32hex_enc[(in[2] & 0x0F) << 1];
+ text[5] = base32hex_pad;
+ text[6] = base32hex_pad;
+ text[7] = base32hex_pad;
+ text += 8;
+ break;
+ case 2:
+ text[0] = base32hex_enc[in[0] >> 3];
+ text[1] = base32hex_enc[(in[0] & 0x07) << 2 | in[1] >> 6];
+ text[2] = base32hex_enc[(in[1] & 0x3E) >> 1];
+ text[3] = base32hex_enc[(in[1] & 0x01) << 4];
+ text[4] = base32hex_pad;
+ text[5] = base32hex_pad;
+ text[6] = base32hex_pad;
+ text[7] = base32hex_pad;
+ text += 8;
+ break;
+ case 1:
+ text[0] = base32hex_enc[in[0] >> 3];
+ text[1] = base32hex_enc[(in[0] & 0x07) << 2];
+ text[2] = base32hex_pad;
+ text[3] = base32hex_pad;
+ text[4] = base32hex_pad;
+ text[5] = base32hex_pad;
+ text[6] = base32hex_pad;
+ text[7] = base32hex_pad;
+ text += 8;
+ break;
+ }
+
+ return (text - out);
+}
+
+int32_t knot_base32hex_encode_alloc(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t **out)
+{
+ // Checking inputs.
+ if (out == NULL) {
+ return KNOT_EINVAL;
+ }
+ if (in_len > MAX_BIN_DATA_LEN) {
+ return KNOT_ERANGE;
+ }
+
+ // Compute output buffer length.
+ uint32_t out_len = ((in_len + 4) / 5) * 8;
+
+ // Allocate output buffer.
+ *out = malloc(out_len);
+ if (*out == NULL) {
+ return KNOT_ENOMEM;
+ }
+
+ // Encode data.
+ int32_t ret = knot_base32hex_encode(in, in_len, *out, out_len);
+ if (ret < 0) {
+ free(*out);
+ *out = NULL;
+ }
+
+ return ret;
+}
+
+int32_t knot_base32hex_decode(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t *out,
+ const uint32_t out_len)
+{
+ // Checking inputs.
+ if (in == NULL || out == NULL) {
+ return KNOT_EINVAL;
+ }
+ if (in_len > INT32_MAX || out_len < ((in_len + 7) / 8) * 5) {
+ return KNOT_ERANGE;
+ }
+ if ((in_len % 8) != 0) {
+ return KNOT_BASE32HEX_ESIZE;
+ }
+
+ const uint8_t *stop = in + in_len;
+ uint8_t *bin = out;
+ uint8_t pad_len = 0;
+ uint8_t c1, c2, c3, c4, c5, c6, c7, c8;
+
+ // Decoding loop takes 8 characters and creates 5 bytes.
+ while (in < stop) {
+ // Filling and transforming 8 Base32hex chars.
+ c1 = base32hex_dec[in[0]];
+ c2 = base32hex_dec[in[1]];
+ c3 = base32hex_dec[in[2]];
+ c4 = base32hex_dec[in[3]];
+ c5 = base32hex_dec[in[4]];
+ c6 = base32hex_dec[in[5]];
+ c7 = base32hex_dec[in[6]];
+ c8 = base32hex_dec[in[7]];
+
+ // Check 8. char if is bad or padding.
+ if (c8 >= PD) {
+ if (c8 == PD && pad_len == 0) {
+ pad_len = 1;
+ } else {
+ return KNOT_BASE32HEX_ECHAR;
+ }
+ }
+
+ // Check 7. char if is bad or padding (if so, 6. must be too).
+ if (c7 >= PD) {
+ if (c7 == PD && c6 == PD && pad_len == 1) {
+ pad_len = 3;
+ } else {
+ return KNOT_BASE32HEX_ECHAR;
+ }
+ }
+
+ // Check 6. char if is bad or padding.
+ if (c6 >= PD) {
+ if (!(c6 == PD && pad_len == 3)) {
+ return KNOT_BASE32HEX_ECHAR;
+ }
+ }
+
+ // Check 5. char if is bad or padding.
+ if (c5 >= PD) {
+ if (c5 == PD && pad_len == 3) {
+ pad_len = 4;
+ } else {
+ return KNOT_BASE32HEX_ECHAR;
+ }
+ }
+
+ // Check 4. char if is bad or padding (if so, 3. must be too).
+ if (c4 >= PD) {
+ if (c4 == PD && c3 == PD && pad_len == 4) {
+ pad_len = 6;
+ } else {
+ return KNOT_BASE32HEX_ECHAR;
+ }
+ }
+
+ // Check 3. char if is bad or padding.
+ if (c3 >= PD) {
+ if (!(c3 == PD && pad_len == 6)) {
+ return KNOT_BASE32HEX_ECHAR;
+ }
+ }
+
+ // 1. and 2. chars must not be padding.
+ if (c2 >= PD || c1 >= PD) {
+ return KNOT_BASE32HEX_ECHAR;
+ }
+
+ // Computing of output data based on padding length.
+ switch (pad_len) {
+ case 0:
+ bin[4] = (c7 << 5) + c8;
+ // FALLTHROUGH
+ case 1:
+ bin[3] = (c5 << 7) + (c6 << 2) + (c7 >> 3);
+ // FALLTHROUGH
+ case 3:
+ bin[2] = (c4 << 4) + (c5 >> 1);
+ // FALLTHROUGH
+ case 4:
+ bin[1] = (c2 << 6) + (c3 << 1) + (c4 >> 4);
+ // FALLTHROUGH
+ case 6:
+ bin[0] = (c1 << 3) + (c2 >> 2);
+ }
+
+ // Update output end.
+ switch (pad_len) {
+ case 0:
+ bin += 5;
+ break;
+ case 1:
+ bin += 4;
+ break;
+ case 3:
+ bin += 3;
+ break;
+ case 4:
+ bin += 2;
+ break;
+ case 6:
+ bin += 1;
+ break;
+ }
+
+ in += 8;
+ }
+
+ return (bin - out);
+}
+
+int32_t knot_base32hex_decode_alloc(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t **out)
+{
+ // Checking inputs.
+ if (out == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ // Compute output buffer length.
+ uint32_t out_len = ((in_len + 7) / 8) * 5;
+
+ // Allocate output buffer.
+ *out = malloc(out_len);
+ if (*out == NULL) {
+ return KNOT_ENOMEM;
+ }
+
+ // Decode data.
+ int32_t ret = knot_base32hex_decode(in, in_len, *out, out_len);
+ if (ret < 0) {
+ free(*out);
+ *out = NULL;
+ }
+
+ return ret;
+}
diff --git a/src/contrib/base32hex.h b/src/contrib/base32hex.h
new file mode 100644
index 0000000..4a6fd8f
--- /dev/null
+++ b/src/contrib/base32hex.h
@@ -0,0 +1,104 @@
+/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief Base32hex implementation (RFC 4648).
+ *
+ * \note Input Base32hex string can contain a-v characters. These characters
+ * are considered as A-V equivalent. Lower-case variant is used for encoding!
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+/*!
+ * \brief Encodes binary data using Base32hex.
+ *
+ * \note Output data buffer contains Base32hex text string which isn't
+ * terminated with '\0'!
+ *
+ * \param in Input binary data.
+ * \param in_len Length of input data.
+ * \param out Output data buffer.
+ * \param out_len Size of output buffer.
+ *
+ * \retval >=0 length of output string.
+ * \retval KNOT_E* if error.
+ */
+int32_t knot_base32hex_encode(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t *out,
+ const uint32_t out_len);
+
+/*!
+ * \brief Encodes binary data using Base32hex and output stores to own buffer.
+ *
+ * \note Output data buffer contains Base32hex text string which isn't
+ * terminated with '\0'!
+ *
+ * \note Output buffer should be deallocated after use.
+ *
+ * \param in Input binary data.
+ * \param in_len Length of input data.
+ * \param out Output data buffer.
+ *
+ * \retval >=0 length of output string.
+ * \retval KNOT_E* if error.
+ */
+int32_t knot_base32hex_encode_alloc(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t **out);
+
+/*!
+ * \brief Decodes text data using Base32hex.
+ *
+ * \note Input data needn't be terminated with '\0'.
+ *
+ * \note Input data must be continuous Base32hex string!
+ *
+ * \param in Input text data.
+ * \param in_len Length of input string.
+ * \param out Output data buffer.
+ * \param out_len Size of output buffer.
+ *
+ * \retval >=0 length of output data.
+ * \retval KNOT_E* if error.
+ */
+int32_t knot_base32hex_decode(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t *out,
+ const uint32_t out_len);
+
+/*!
+ * \brief Decodes text data using Base32hex and output stores to own buffer.
+ *
+ * \note Input data needn't be terminated with '\0'.
+ *
+ * \note Input data must be continuous Base32hex string!
+ *
+ * \note Output buffer should be deallocated after use.
+ *
+ * \param in Input text data.
+ * \param in_len Length of input string.
+ * \param out Output data buffer.
+ *
+ * \retval >=0 length of output data.
+ * \retval KNOT_E* if error.
+ */
+int32_t knot_base32hex_decode_alloc(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t **out);
diff --git a/src/contrib/base64.c b/src/contrib/base64.c
new file mode 100644
index 0000000..ab4a560
--- /dev/null
+++ b/src/contrib/base64.c
@@ -0,0 +1,272 @@
+/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "contrib/base64.h"
+#include "libknot/errcode.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+
+/*! \brief Maximal length of binary input to Base64 encoding. */
+#define MAX_BIN_DATA_LEN ((INT32_MAX / 4) * 3)
+
+/*! \brief Base64 padding character. */
+static const uint8_t base64_pad = '=';
+/*! \brief Base64 alphabet. */
+static const uint8_t base64_enc[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+/*! \brief Indicates bad Base64 character. */
+#define KO 255
+/*! \brief Indicates Base64 padding character. */
+#define PD 64
+
+/*! \brief Transformation and validation table for decoding Base64. */
+static const uint8_t base64_dec[256] = {
+ [ 0] = KO, ['+'] = 62, ['V'] = 21, [129] = KO, [172] = KO, [215] = KO,
+ [ 1] = KO, [ 44] = KO, ['W'] = 22, [130] = KO, [173] = KO, [216] = KO,
+ [ 2] = KO, [ 45] = KO, ['X'] = 23, [131] = KO, [174] = KO, [217] = KO,
+ [ 3] = KO, [ 46] = KO, ['Y'] = 24, [132] = KO, [175] = KO, [218] = KO,
+ [ 4] = KO, ['/'] = 63, ['Z'] = 25, [133] = KO, [176] = KO, [219] = KO,
+ [ 5] = KO, ['0'] = 52, [ 91] = KO, [134] = KO, [177] = KO, [220] = KO,
+ [ 6] = KO, ['1'] = 53, [ 92] = KO, [135] = KO, [178] = KO, [221] = KO,
+ [ 7] = KO, ['2'] = 54, [ 93] = KO, [136] = KO, [179] = KO, [222] = KO,
+ [ 8] = KO, ['3'] = 55, [ 94] = KO, [137] = KO, [180] = KO, [223] = KO,
+ [ 9] = KO, ['4'] = 56, [ 95] = KO, [138] = KO, [181] = KO, [224] = KO,
+ [ 10] = KO, ['5'] = 57, [ 96] = KO, [139] = KO, [182] = KO, [225] = KO,
+ [ 11] = KO, ['6'] = 58, ['a'] = 26, [140] = KO, [183] = KO, [226] = KO,
+ [ 12] = KO, ['7'] = 59, ['b'] = 27, [141] = KO, [184] = KO, [227] = KO,
+ [ 13] = KO, ['8'] = 60, ['c'] = 28, [142] = KO, [185] = KO, [228] = KO,
+ [ 14] = KO, ['9'] = 61, ['d'] = 29, [143] = KO, [186] = KO, [229] = KO,
+ [ 15] = KO, [ 58] = KO, ['e'] = 30, [144] = KO, [187] = KO, [230] = KO,
+ [ 16] = KO, [ 59] = KO, ['f'] = 31, [145] = KO, [188] = KO, [231] = KO,
+ [ 17] = KO, [ 60] = KO, ['g'] = 32, [146] = KO, [189] = KO, [232] = KO,
+ [ 18] = KO, ['='] = PD, ['h'] = 33, [147] = KO, [190] = KO, [233] = KO,
+ [ 19] = KO, [ 62] = KO, ['i'] = 34, [148] = KO, [191] = KO, [234] = KO,
+ [ 20] = KO, [ 63] = KO, ['j'] = 35, [149] = KO, [192] = KO, [235] = KO,
+ [ 21] = KO, [ 64] = KO, ['k'] = 36, [150] = KO, [193] = KO, [236] = KO,
+ [ 22] = KO, ['A'] = 0, ['l'] = 37, [151] = KO, [194] = KO, [237] = KO,
+ [ 23] = KO, ['B'] = 1, ['m'] = 38, [152] = KO, [195] = KO, [238] = KO,
+ [ 24] = KO, ['C'] = 2, ['n'] = 39, [153] = KO, [196] = KO, [239] = KO,
+ [ 25] = KO, ['D'] = 3, ['o'] = 40, [154] = KO, [197] = KO, [240] = KO,
+ [ 26] = KO, ['E'] = 4, ['p'] = 41, [155] = KO, [198] = KO, [241] = KO,
+ [ 27] = KO, ['F'] = 5, ['q'] = 42, [156] = KO, [199] = KO, [242] = KO,
+ [ 28] = KO, ['G'] = 6, ['r'] = 43, [157] = KO, [200] = KO, [243] = KO,
+ [ 29] = KO, ['H'] = 7, ['s'] = 44, [158] = KO, [201] = KO, [244] = KO,
+ [ 30] = KO, ['I'] = 8, ['t'] = 45, [159] = KO, [202] = KO, [245] = KO,
+ [ 31] = KO, ['J'] = 9, ['u'] = 46, [160] = KO, [203] = KO, [246] = KO,
+ [ 32] = KO, ['K'] = 10, ['v'] = 47, [161] = KO, [204] = KO, [247] = KO,
+ [ 33] = KO, ['L'] = 11, ['w'] = 48, [162] = KO, [205] = KO, [248] = KO,
+ [ 34] = KO, ['M'] = 12, ['x'] = 49, [163] = KO, [206] = KO, [249] = KO,
+ [ 35] = KO, ['N'] = 13, ['y'] = 50, [164] = KO, [207] = KO, [250] = KO,
+ [ 36] = KO, ['O'] = 14, ['z'] = 51, [165] = KO, [208] = KO, [251] = KO,
+ [ 37] = KO, ['P'] = 15, [123] = KO, [166] = KO, [209] = KO, [252] = KO,
+ [ 38] = KO, ['Q'] = 16, [124] = KO, [167] = KO, [210] = KO, [253] = KO,
+ [ 39] = KO, ['R'] = 17, [125] = KO, [168] = KO, [211] = KO, [254] = KO,
+ [ 40] = KO, ['S'] = 18, [126] = KO, [169] = KO, [212] = KO, [255] = KO,
+ [ 41] = KO, ['T'] = 19, [127] = KO, [170] = KO, [213] = KO,
+ [ 42] = KO, ['U'] = 20, [128] = KO, [171] = KO, [214] = KO,
+};
+
+int32_t knot_base64_encode(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t *out,
+ const uint32_t out_len)
+{
+ // Checking inputs.
+ if (in == NULL || out == NULL) {
+ return KNOT_EINVAL;
+ }
+ if (in_len > MAX_BIN_DATA_LEN || out_len < ((in_len + 2) / 3) * 4) {
+ return KNOT_ERANGE;
+ }
+
+ uint8_t rest_len = in_len % 3;
+ const uint8_t *stop = in + in_len - rest_len;
+ uint8_t *text = out;
+
+ // Encoding loop takes 3 bytes and creates 4 characters.
+ while (in < stop) {
+ text[0] = base64_enc[in[0] >> 2];
+ text[1] = base64_enc[(in[0] & 0x03) << 4 | in[1] >> 4];
+ text[2] = base64_enc[(in[1] & 0x0F) << 2 | in[2] >> 6];
+ text[3] = base64_enc[in[2] & 0x3F];
+ text += 4;
+ in += 3;
+ }
+
+ // Processing of padding, if any.
+ switch (rest_len) {
+ case 2:
+ text[0] = base64_enc[in[0] >> 2];
+ text[1] = base64_enc[(in[0] & 0x03) << 4 | in[1] >> 4];
+ text[2] = base64_enc[(in[1] & 0x0F) << 2];
+ text[3] = base64_pad;
+ text += 4;
+ break;
+ case 1:
+ text[0] = base64_enc[in[0] >> 2];
+ text[1] = base64_enc[(in[0] & 0x03) << 4];
+ text[2] = base64_pad;
+ text[3] = base64_pad;
+ text += 4;
+ break;
+ }
+
+ return (text - out);
+}
+
+int32_t knot_base64_encode_alloc(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t **out)
+{
+ // Checking inputs.
+ if (out == NULL) {
+ return KNOT_EINVAL;
+ }
+ if (in_len > MAX_BIN_DATA_LEN) {
+ return KNOT_ERANGE;
+ }
+
+ // Compute output buffer length.
+ uint32_t out_len = ((in_len + 2) / 3) * 4;
+
+ // Allocate output buffer.
+ *out = malloc(out_len);
+ if (*out == NULL) {
+ return KNOT_ENOMEM;
+ }
+
+ // Encode data.
+ int32_t ret = knot_base64_encode(in, in_len, *out, out_len);
+ if (ret < 0) {
+ free(*out);
+ *out = NULL;
+ }
+
+ return ret;
+}
+
+int32_t knot_base64_decode(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t *out,
+ const uint32_t out_len)
+{
+ // Checking inputs.
+ if (in == NULL || out == NULL) {
+ return KNOT_EINVAL;
+ }
+ if (in_len > INT32_MAX || out_len < ((in_len + 3) / 4) * 3) {
+ return KNOT_ERANGE;
+ }
+ if ((in_len % 4) != 0) {
+ return KNOT_BASE64_ESIZE;
+ }
+
+ const uint8_t *stop = in + in_len;
+ uint8_t *bin = out;
+ uint8_t pad_len = 0;
+ uint8_t c1, c2, c3, c4;
+
+ // Decoding loop takes 4 characters and creates 3 bytes.
+ while (in < stop) {
+ // Filling and transforming 4 Base64 chars.
+ c1 = base64_dec[in[0]];
+ c2 = base64_dec[in[1]];
+ c3 = base64_dec[in[2]];
+ c4 = base64_dec[in[3]];
+
+ // Check 4. char if is bad or padding.
+ if (c4 >= PD) {
+ if (c4 == PD && pad_len == 0) {
+ pad_len = 1;
+ } else {
+ return KNOT_BASE64_ECHAR;
+ }
+ }
+
+ // Check 3. char if is bad or padding.
+ if (c3 >= PD) {
+ if (c3 == PD && pad_len == 1) {
+ pad_len = 2;
+ } else {
+ return KNOT_BASE64_ECHAR;
+ }
+ }
+
+ // Check 1. and 2. chars if are not padding.
+ if (c2 >= PD || c1 >= PD) {
+ return KNOT_BASE64_ECHAR;
+ }
+
+ // Computing of output data based on padding length.
+ switch (pad_len) {
+ case 0:
+ bin[2] = (c3 << 6) + c4;
+ // FALLTHROUGH
+ case 1:
+ bin[1] = (c2 << 4) + (c3 >> 2);
+ // FALLTHROUGH
+ case 2:
+ bin[0] = (c1 << 2) + (c2 >> 4);
+ }
+
+ // Update output end.
+ switch (pad_len) {
+ case 0:
+ bin += 3;
+ break;
+ case 1:
+ bin += 2;
+ break;
+ case 2:
+ bin += 1;
+ break;
+ }
+
+ in += 4;
+ }
+
+ return (bin - out);
+}
+
+int32_t knot_base64_decode_alloc(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t **out)
+{
+ // Checking inputs.
+ if (out == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ // Compute output buffer length.
+ uint32_t out_len = ((in_len + 3) / 4) * 3;
+
+ // Allocate output buffer.
+ *out = malloc(out_len);
+ if (*out == NULL) {
+ return KNOT_ENOMEM;
+ }
+
+ // Decode data.
+ int32_t ret = knot_base64_decode(in, in_len, *out, out_len);
+ if (ret < 0) {
+ free(*out);
+ *out = NULL;
+ }
+
+ return ret;
+}
diff --git a/src/contrib/base64.h b/src/contrib/base64.h
new file mode 100644
index 0000000..0a98733
--- /dev/null
+++ b/src/contrib/base64.h
@@ -0,0 +1,103 @@
+/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief Base64 implementation (RFC 4648).
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+/*!
+ * \brief Encodes binary data using Base64.
+ *
+ * \note Output data buffer contains Base64 text string which isn't
+ * terminated with '\0'!
+ *
+ * \param in Input binary data.
+ * \param in_len Length of input data.
+ * \param out Output data buffer.
+ * \param out_len Size of output buffer.
+ *
+ * \retval >=0 length of output string.
+ * \retval KNOT_E* if error.
+ */
+int32_t knot_base64_encode(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t *out,
+ const uint32_t out_len);
+
+/*!
+ * \brief Encodes binary data using Base64 and output stores to own buffer.
+ *
+ * \note Output data buffer contains Base64 text string which isn't
+ * terminated with '\0'!
+ *
+ * \note Output buffer should be deallocated after use.
+ *
+ * \param in Input binary data.
+ * \param in_len Length of input data.
+ * \param out Output data buffer.
+ *
+ * \retval >=0 length of output string.
+ * \retval KNOT_E* if error.
+ */
+int32_t knot_base64_encode_alloc(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t **out);
+
+/*!
+ * \brief Decodes text data using Base64.
+ *
+ * \note Input data needn't be terminated with '\0'.
+ *
+ * \note Input data must be continuous Base64 string!
+ *
+ * \param in Input text data.
+ * \param in_len Length of input string.
+ * \param out Output data buffer.
+ * \param out_len Size of output buffer.
+ *
+ * \retval >=0 length of output data.
+ * \retval KNOT_E* if error.
+ */
+int32_t knot_base64_decode(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t *out,
+ const uint32_t out_len);
+
+/*!
+ * \brief Decodes text data using Base64 and output stores to own buffer.
+ *
+ * \note Input data needn't be terminated with '\0'.
+ *
+ * \note Input data must be continuous Base64 string!
+ *
+ * \note Output buffer should be deallocated after use.
+ *
+ * \param in Input text data.
+ * \param in_len Length of input string.
+ * \param out Output data buffer.
+ *
+ * \retval >=0 length of output data.
+ * \retval KNOT_E* if error.
+ */
+int32_t knot_base64_decode_alloc(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t **out);
+
+/*! @} */
diff --git a/src/contrib/base64url.c b/src/contrib/base64url.c
new file mode 100644
index 0000000..34e2bbe
--- /dev/null
+++ b/src/contrib/base64url.c
@@ -0,0 +1,287 @@
+/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "contrib/base64url.h"
+#include "libknot/errcode.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <ctype.h>
+
+/*! \brief Maximal length of binary input to Base64url encoding. */
+#define MAX_BIN_DATA_LEN ((INT32_MAX / 4) * 3)
+
+/*! \brief Base64url padding character. */
+static const uint8_t base64url_pad = '\0';
+/*! \brief Base64 alphabet. */
+static const uint8_t base64url_enc[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+
+/*! \brief Indicates bad Base64 character. */
+#define KO 255
+/*! \brief Indicates Base64 padding character. */
+#define PD 64
+
+/*! \brief Transformation and validation table for decoding Base64. */
+static const uint8_t base64url_dec[256] = {
+ [ 0] = PD, [ 43] = KO, ['V'] = 21, [129] = KO, [172] = KO, [215] = KO,
+ [ 1] = KO, [ 44] = KO, ['W'] = 22, [130] = KO, [173] = KO, [216] = KO,
+ [ 2] = KO, ['-'] = 62, ['X'] = 23, [131] = KO, [174] = KO, [217] = KO,
+ [ 3] = KO, [ 46] = KO, ['Y'] = 24, [132] = KO, [175] = KO, [218] = KO,
+ [ 4] = KO, [ 47] = KO, ['Z'] = 25, [133] = KO, [176] = KO, [219] = KO,
+ [ 5] = KO, ['0'] = 52, [ 91] = KO, [134] = KO, [177] = KO, [220] = KO,
+ [ 6] = KO, ['1'] = 53, [ 92] = KO, [135] = KO, [178] = KO, [221] = KO,
+ [ 7] = KO, ['2'] = 54, [ 93] = KO, [136] = KO, [179] = KO, [222] = KO,
+ [ 8] = KO, ['3'] = 55, [ 94] = KO, [137] = KO, [180] = KO, [223] = KO,
+ [ 9] = KO, ['4'] = 56, ['_'] = 63, [138] = KO, [181] = KO, [224] = KO,
+ [ 10] = KO, ['5'] = 57, [ 96] = KO, [139] = KO, [182] = KO, [225] = KO,
+ [ 11] = KO, ['6'] = 58, ['a'] = 26, [140] = KO, [183] = KO, [226] = KO,
+ [ 12] = KO, ['7'] = 59, ['b'] = 27, [141] = KO, [184] = KO, [227] = KO,
+ [ 13] = KO, ['8'] = 60, ['c'] = 28, [142] = KO, [185] = KO, [228] = KO,
+ [ 14] = KO, ['9'] = 61, ['d'] = 29, [143] = KO, [186] = KO, [229] = KO,
+ [ 15] = KO, [ 58] = KO, ['e'] = 30, [144] = KO, [187] = KO, [230] = KO,
+ [ 16] = KO, [ 59] = KO, ['f'] = 31, [145] = KO, [188] = KO, [231] = KO,
+ [ 17] = KO, [ 60] = KO, ['g'] = 32, [146] = KO, [189] = KO, [232] = KO,
+ [ 18] = KO, [ 61] = KO, ['h'] = 33, [147] = KO, [190] = KO, [233] = KO,
+ [ 19] = KO, [ 62] = KO, ['i'] = 34, [148] = KO, [191] = KO, [234] = KO,
+ [ 20] = KO, [ 63] = KO, ['j'] = 35, [149] = KO, [192] = KO, [235] = KO,
+ [ 21] = KO, [ 64] = KO, ['k'] = 36, [150] = KO, [193] = KO, [236] = KO,
+ [ 22] = KO, ['A'] = 0, ['l'] = 37, [151] = KO, [194] = KO, [237] = KO,
+ [ 23] = KO, ['B'] = 1, ['m'] = 38, [152] = KO, [195] = KO, [238] = KO,
+ [ 24] = KO, ['C'] = 2, ['n'] = 39, [153] = KO, [196] = KO, [239] = KO,
+ [ 25] = KO, ['D'] = 3, ['o'] = 40, [154] = KO, [197] = KO, [240] = KO,
+ [ 26] = KO, ['E'] = 4, ['p'] = 41, [155] = KO, [198] = KO, [241] = KO,
+ [ 27] = KO, ['F'] = 5, ['q'] = 42, [156] = KO, [199] = KO, [242] = KO,
+ [ 28] = KO, ['G'] = 6, ['r'] = 43, [157] = KO, [200] = KO, [243] = KO,
+ [ 29] = KO, ['H'] = 7, ['s'] = 44, [158] = KO, [201] = KO, [244] = KO,
+ [ 30] = KO, ['I'] = 8, ['t'] = 45, [159] = KO, [202] = KO, [245] = KO,
+ [ 31] = KO, ['J'] = 9, ['u'] = 46, [160] = KO, [203] = KO, [246] = KO,
+ [ 32] = KO, ['K'] = 10, ['v'] = 47, [161] = KO, [204] = KO, [247] = KO,
+ [ 33] = KO, ['L'] = 11, ['w'] = 48, [162] = KO, [205] = KO, [248] = KO,
+ [ 34] = KO, ['M'] = 12, ['x'] = 49, [163] = KO, [206] = KO, [249] = KO,
+ [ 35] = KO, ['N'] = 13, ['y'] = 50, [164] = KO, [207] = KO, [250] = KO,
+ [ 36] = KO, ['O'] = 14, ['z'] = 51, [165] = KO, [208] = KO, [251] = KO,
+ ['%'] = KO, ['P'] = 15, [123] = KO, [166] = KO, [209] = KO, [252] = KO,
+ [ 38] = KO, ['Q'] = 16, [124] = KO, [167] = KO, [210] = KO, [253] = KO,
+ [ 39] = KO, ['R'] = 17, [125] = KO, [168] = KO, [211] = KO, [254] = KO,
+ [ 40] = KO, ['S'] = 18, [126] = KO, [169] = KO, [212] = KO, [255] = KO,
+ [ 41] = KO, ['T'] = 19, [127] = KO, [170] = KO, [213] = KO,
+ [ 42] = KO, ['U'] = 20, [128] = KO, [171] = KO, [214] = KO,
+};
+
+int32_t knot_base64url_encode(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t *out,
+ const uint32_t out_len)
+{
+ // Checking inputs.
+ if (in == NULL || out == NULL) {
+ return KNOT_EINVAL;
+ }
+ if (in_len > MAX_BIN_DATA_LEN || out_len < ((in_len + 2) / 3) * 4) {
+ return KNOT_ERANGE;
+ }
+
+ uint8_t rest_len = in_len % 3;
+ const uint8_t *stop = in + in_len - rest_len;
+ uint8_t *text = out;
+
+ // Encoding loop takes 3 bytes and creates 4 characters.
+ while (in < stop) {
+ text[0] = base64url_enc[in[0] >> 2];
+ text[1] = base64url_enc[(in[0] & 0x03) << 4 | in[1] >> 4];
+ text[2] = base64url_enc[(in[1] & 0x0F) << 2 | in[2] >> 6];
+ text[3] = base64url_enc[in[2] & 0x3F];
+ text += 4;
+ in += 3;
+ }
+
+ // Processing of padding, if any.
+ switch (rest_len) {
+ case 2:
+ text[0] = base64url_enc[in[0] >> 2];
+ text[1] = base64url_enc[(in[0] & 0x03) << 4 | in[1] >> 4];
+ text[2] = base64url_enc[(in[1] & 0x0F) << 2];
+ text[3] = base64url_pad;
+ text += 3;
+ break;
+ case 1:
+ text[0] = base64url_enc[in[0] >> 2];
+ text[1] = base64url_enc[(in[0] & 0x03) << 4];
+ text[2] = base64url_pad;
+ text[3] = base64url_pad;
+ text += 2;
+ break;
+ }
+ return (text - out);
+}
+
+int32_t knot_base64url_encode_alloc(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t **out)
+{
+ // Checking inputs.
+ if (out == NULL) {
+ return KNOT_EINVAL;
+ }
+ if (in_len > MAX_BIN_DATA_LEN) {
+ return KNOT_ERANGE;
+ }
+
+ // Compute output buffer length.
+ uint32_t out_len = ((in_len + 2) / 3) * 4;
+
+ // Allocate output buffer.
+ *out = malloc(out_len);
+ if (*out == NULL) {
+ return KNOT_ENOMEM;
+ }
+
+ // Encode data.
+ int32_t ret = knot_base64url_encode(in, in_len, *out, out_len);
+ if (ret < 0) {
+ free(*out);
+ *out = NULL;
+ }
+
+ return ret;
+}
+
+int32_t knot_base64url_decode(const uint8_t *in,
+ uint32_t in_len,
+ uint8_t *out,
+ const uint32_t out_len)
+{
+ // Checking inputs.
+ if (in == NULL || out == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ // cut up to two "%3d" from the end of input
+ int pad3d = 0;
+ const uint8_t *end = in + in_len;
+ char *perc3d = "d3%d3%", *stop3d = perc3d + 6;
+ while (end != in && perc3d != stop3d && tolower(*--end) == *perc3d) {
+ if (*perc3d++ == '%') {
+ in_len -= 3;
+ pad3d++;
+ }
+ }
+
+ if (in_len > INT32_MAX || out_len < ((in_len + 3) / 4) * 3) {
+ return KNOT_ERANGE;
+ }
+
+ const uint8_t *stop = in + in_len;
+ uint8_t *bin = out;
+ uint8_t pad_len = 0;
+ uint8_t c1, c2, c3, c4;
+
+ // Decoding loop takes 4 characters and creates 3 bytes.
+ while (in < stop) {
+ // Filling and transforming 4 Base64 chars.
+ c1 = base64url_dec[in[0]] ;
+ c2 = base64url_dec[in[1]] ;
+ c3 = (in + 2 < stop) ? base64url_dec[in[2]] : PD;
+ c4 = (in + 3 < stop) ? base64url_dec[in[3]] : PD;
+
+ // Check 1. and 2. chars if are not padding
+ if (c1 >= PD || c2 >= PD) {
+ return KNOT_BASE64_ECHAR;
+ }
+ // Check 3. char if is bad or padding.
+ else if (c3 >= PD) {
+ if (c3 == PD) {
+ pad_len = 2;
+ } else {
+ return KNOT_BASE64_ECHAR;
+ }
+ }
+ // Check 3. char if is bad or padding.
+ else if (c4 >= PD) {
+ if (c4 == PD) {
+ pad_len = 1;
+ } else {
+ return KNOT_BASE64_ECHAR;
+ }
+ }
+
+ if (pad_len > 0 && in <= stop - 4) {
+ return KNOT_BASE64_ECHAR;
+ }
+
+ // Computing of output data based on padding length.
+ switch (pad_len) {
+ case 0:
+ bin[2] = (c3 << 6) + c4;
+ // FALLTHROUGH
+ case 1:
+ bin[1] = (c2 << 4) + (c3 >> 2);
+ // FALLTHROUGH
+ case 2:
+ bin[0] = (c1 << 2) + (c2 >> 4);
+ }
+
+ // Update output end.
+ switch (pad_len) {
+ case 0:
+ bin += 3;
+ break;
+ case 1:
+ bin += 2;
+ goto end;
+ case 2:
+ bin += 1;
+ goto end;
+ }
+
+ in += 4;
+ }
+
+end:
+ if (pad3d > pad_len) {
+ return KNOT_BASE64_ECHAR;
+ }
+ return (bin - out);
+}
+
+int32_t knot_base64url_decode_alloc(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t **out)
+{
+ // Checking inputs.
+ if (out == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ // Compute output buffer length.
+ uint32_t out_len = ((in_len + 3) / 4) * 3;
+
+ // Allocate output buffer.
+ *out = malloc(out_len);
+ if (*out == NULL) {
+ return KNOT_ENOMEM;
+ }
+
+ // Decode data.
+ int32_t ret = knot_base64url_decode(in, in_len, *out, out_len);
+ if (ret < 0) {
+ free(*out);
+ *out = NULL;
+ }
+
+ return ret;
+}
diff --git a/src/contrib/base64url.h b/src/contrib/base64url.h
new file mode 100644
index 0000000..ba4bb43
--- /dev/null
+++ b/src/contrib/base64url.h
@@ -0,0 +1,103 @@
+/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief Base64url implementation (RFC 4648).
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+/*!
+ * \brief Encodes binary data using Base64.
+ *
+ * \note Output data buffer contains Base64 text string which isn't
+ * terminated with '\0'!
+ *
+ * \param in Input binary data.
+ * \param in_len Length of input data.
+ * \param out Output data buffer.
+ * \param out_len Size of output buffer.
+ *
+ * \retval >=0 length of output string.
+ * \retval KNOT_E* if error.
+ */
+int32_t knot_base64url_encode(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t *out,
+ const uint32_t out_len);
+
+/*!
+ * \brief Encodes binary data using Base64 and output stores to own buffer.
+ *
+ * \note Output data buffer contains Base64 text string which isn't
+ * terminated with '\0'!
+ *
+ * \note Output buffer should be deallocated after use.
+ *
+ * \param in Input binary data.
+ * \param in_len Length of input data.
+ * \param out Output data buffer.
+ *
+ * \retval >=0 length of output string.
+ * \retval KNOT_E* if error.
+ */
+int32_t knot_base64url_encode_alloc(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t **out);
+
+/*!
+ * \brief Decodes text data using Base64.
+ *
+ * \note Input data needn't be terminated with '\0'.
+ *
+ * \note Input data must be continuous Base64 string!
+ *
+ * \param in Input text data.
+ * \param in_len Length of input string.
+ * \param out Output data buffer.
+ * \param out_len Size of output buffer.
+ *
+ * \retval >=0 length of output data.
+ * \retval KNOT_E* if error.
+ */
+int32_t knot_base64url_decode(const uint8_t *in,
+ uint32_t in_len,
+ uint8_t *out,
+ const uint32_t out_len);
+
+/*!
+ * \brief Decodes text data using Base64 and output stores to own buffer.
+ *
+ * \note Input data needn't be terminated with '\0'.
+ *
+ * \note Input data must be continuous Base64 string!
+ *
+ * \note Output buffer should be deallocated after use.
+ *
+ * \param in Input text data.
+ * \param in_len Length of input string.
+ * \param out Output data buffer.
+ *
+ * \retval >=0 length of output data.
+ * \retval KNOT_E* if error.
+ */
+int32_t knot_base64url_decode_alloc(const uint8_t *in,
+ const uint32_t in_len,
+ uint8_t **out);
+
+/*! @} */
diff --git a/src/contrib/color.h b/src/contrib/color.h
new file mode 100644
index 0000000..6a7ff6a
--- /dev/null
+++ b/src/contrib/color.h
@@ -0,0 +1,31 @@
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#define COL_RST(active) ((active) ? "\x1B[0m" : "")
+
+#define COL_BOLD(active) ((active) ? "\x1B[1m" : "")
+#define COL_DIM(active) ((active) ? "\x1B[2m" : "")
+#define COL_UNDR(active) ((active) ? "\x1B[4m" : "")
+
+#define COL_RED(active) ((active) ? "\x1B[31m" : "")
+#define COL_GRN(active) ((active) ? "\x1B[32m" : "")
+#define COL_YELW(active) ((active) ? "\x1B[93m" : "")
+#define COL_BLUE(active) ((active) ? "\x1B[34m" : "")
+#define COL_MGNT(active) ((active) ? "\x1B[35m" : "")
+#define COL_CYAN(active) ((active) ? "\x1B[36m" : "")
+#define COL_WHIT(active) ((active) ? "\x1B[97m" : "")
diff --git a/src/contrib/conn_pool.c b/src/contrib/conn_pool.c
new file mode 100644
index 0000000..8367555
--- /dev/null
+++ b/src/contrib/conn_pool.c
@@ -0,0 +1,243 @@
+/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "contrib/conn_pool.h"
+
+#include "contrib/sockaddr.h"
+
+conn_pool_t *global_conn_pool = NULL;
+
+static int pool_pop(conn_pool_t *pool, size_t i);
+
+/*!
+ * \brief Try to get an open connection older than specified timestamp.
+ *
+ * \param pool Pool to search in.
+ * \param older_than Timestamp that the connection must be older than.
+ * \param next_oldest Out: the timestamp of the oldest connection (other than the returned).
+ *
+ * \return -1 if error (no such connection), >= 0 connection file descriptor.
+ *
+ * \warning The returned connection is not necessarily the oldest one.
+ */
+static int get_old(conn_pool_t *pool,
+ knot_time_t older_than,
+ knot_time_t *next_oldest)
+{
+ assert(pool);
+
+ *next_oldest = 0;
+
+ int fd = -1;
+ pthread_mutex_lock(&pool->mutex);
+
+ for (size_t i = 0; i < pool->capacity; i++) {
+ knot_time_t la = pool->conns[i].last_active;
+ if (fd == -1 && knot_time_cmp(la, older_than) < 0) {
+ fd = pool_pop(pool, i);
+ } else if (knot_time_cmp(la, *next_oldest) < 0) {
+ *next_oldest = la;
+ }
+ }
+
+ pthread_mutex_unlock(&pool->mutex);
+ return fd;
+}
+
+static void *closing_thread(void *_arg)
+{
+ conn_pool_t *pool = _arg;
+
+ while (true) {
+ knot_time_t now = knot_time(), next = 0;
+ knot_timediff_t timeout = conn_pool_timeout(pool, 0);
+ assert(timeout != 0);
+
+ while (true) {
+ int old_fd = get_old(pool, now - timeout + 1, &next);
+ if (old_fd >= 0) {
+ close(old_fd);
+ } else {
+ break;
+ }
+ }
+
+ if (next == 0) {
+ sleep(timeout);
+ } else {
+ sleep(next + timeout - now);
+ }
+ }
+
+ return NULL; // we never get here since the thread will be cancelled instead
+}
+
+conn_pool_t *conn_pool_init(size_t capacity, knot_timediff_t timeout)
+{
+ if (capacity == 0 || timeout == 0) {
+ return NULL;
+ }
+
+ conn_pool_t *pool = calloc(1, sizeof(*pool) + capacity * sizeof(pool->conns[0]));
+ if (pool != NULL) {
+ pool->capacity = capacity;
+ pool->timeout = timeout;
+ if (pthread_mutex_init(&pool->mutex, 0) != 0) {
+ free(pool);
+ return NULL;
+ }
+ if (pthread_create(&pool->closing_thread, NULL, closing_thread, pool) != 0) {
+ pthread_mutex_destroy(&pool->mutex);
+ free(pool);
+ return NULL;
+ }
+ }
+ return pool;
+}
+
+void conn_pool_deinit(conn_pool_t *pool)
+{
+ if (pool != NULL) {
+ pthread_cancel(pool->closing_thread);
+ pthread_join(pool->closing_thread, NULL);
+
+ int fd;
+ knot_time_t unused;
+ while ((fd = get_old(pool, 0, &unused)) >= 0) {
+ close(fd);
+ }
+
+ pthread_mutex_destroy(&pool->mutex);
+ free(pool);
+ }
+}
+
+knot_timediff_t conn_pool_timeout(conn_pool_t *pool,
+ knot_timediff_t new_timeout)
+{
+ if (pool == NULL) {
+ return 0;
+ }
+
+ pthread_mutex_lock(&pool->mutex);
+
+ knot_timediff_t prev = pool->timeout;
+ if (new_timeout != 0) {
+ pool->timeout = new_timeout;
+ }
+
+ pthread_mutex_unlock(&pool->mutex);
+ return prev;
+}
+
+static int pool_pop(conn_pool_t *pool, size_t i)
+{
+ conn_pool_memb_t *conn = &pool->conns[i];
+ assert(conn->last_active != 0);
+ assert(pool->usage > 0);
+ int fd = conn->fd;
+ memset(conn, 0, sizeof(*conn));
+ pool->usage--;
+ return fd;
+}
+
+int conn_pool_get(conn_pool_t *pool,
+ struct sockaddr_storage *src,
+ struct sockaddr_storage *dst)
+{
+ if (pool == NULL) {
+ return -1;
+ }
+
+ int fd = -1;
+ pthread_mutex_lock(&pool->mutex);
+
+ for (size_t i = 0; i < pool->capacity; i++) {
+ if (pool->conns[i].last_active != 0 &&
+ sockaddr_cmp(&pool->conns[i].dst, dst, false) == 0 &&
+ sockaddr_cmp(&pool->conns[i].src, src, true) == 0) {
+ fd = pool_pop(pool, i);
+ break;
+ }
+ }
+
+ pthread_mutex_unlock(&pool->mutex);
+
+ if (fd >= 0) {
+ uint8_t unused;
+ int peek = recv(fd, &unused, 1, MSG_PEEK | MSG_DONTWAIT);
+ if (peek >= 0) { // closed or pending data
+ close(fd);
+ fd = -1;
+ }
+ }
+
+ return fd;
+}
+
+static void pool_push(conn_pool_t *pool, size_t i,
+ struct sockaddr_storage *src,
+ struct sockaddr_storage *dst,
+ int fd)
+{
+ conn_pool_memb_t *conn = &pool->conns[i];
+ assert(conn->last_active == 0);
+ assert(pool->usage < pool->capacity);
+ conn->last_active = knot_time();
+ conn->fd = fd;
+ memcpy(&conn->src, src, sizeof(conn->src));
+ memcpy(&conn->dst, dst, sizeof(conn->dst));
+ pool->usage++;
+}
+
+int conn_pool_put(conn_pool_t *pool,
+ struct sockaddr_storage *src,
+ struct sockaddr_storage *dst,
+ int fd)
+{
+ if (pool == NULL || pool->capacity == 0) {
+ return fd;
+ }
+
+ knot_time_t oldest_time = 0;
+ size_t oldest_i = pool->capacity;
+
+ pthread_mutex_lock(&pool->mutex);
+
+ for (size_t i = 0; i < pool->capacity; i++) {
+ knot_time_t la = pool->conns[i].last_active;
+ if (la == 0) {
+ pool_push(pool, i, src, dst, fd);
+ pthread_mutex_unlock(&pool->mutex);
+ return -1;
+ } else if (knot_time_cmp(la, oldest_time) < 0) {
+ oldest_time = la;
+ oldest_i = i;
+ }
+ }
+
+ assert(oldest_i < pool->capacity);
+ int oldest_fd = pool_pop(pool, oldest_i);
+ pool_push(pool, oldest_i, src, dst, fd);
+ pthread_mutex_unlock(&pool->mutex);
+ return oldest_fd;
+}
diff --git a/src/contrib/conn_pool.h b/src/contrib/conn_pool.h
new file mode 100644
index 0000000..f1f2c6c
--- /dev/null
+++ b/src/contrib/conn_pool.h
@@ -0,0 +1,100 @@
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <pthread.h>
+#include <stdbool.h>
+#include <sys/socket.h>
+
+#include "contrib/time.h"
+
+typedef struct {
+ struct sockaddr_storage src;
+ struct sockaddr_storage dst;
+ int fd;
+ knot_time_t last_active;
+} conn_pool_memb_t;
+
+typedef struct {
+ size_t capacity;
+ size_t usage;
+ knot_timediff_t timeout;
+ pthread_mutex_t mutex;
+ pthread_t closing_thread;
+ conn_pool_memb_t conns[];
+} conn_pool_t;
+
+extern conn_pool_t *global_conn_pool;
+
+/*!
+ * \brief Allocate connection pool.
+ *
+ * \param capacity Connection pool capacity (must be positive number).
+ * \param timeout Connection timeout (must be positive number).
+ *
+ * \return Connection pool or NULL if error.
+ */
+conn_pool_t *conn_pool_init(size_t capacity, knot_timediff_t timeout);
+
+/*!
+ * \brief Deallocate the pool, close all connections, terminate closing thread.
+ *
+ * \param pool Connection pool.
+ */
+void conn_pool_deinit(conn_pool_t *pool);
+
+/*!
+ * \brief Get and/or set connection timeout.
+ *
+ * \param pool Connection pool.
+ * \param new_timeout Optional: set new timeout (if positive number).
+ *
+ * \return Previous value of timeout.
+ */
+knot_timediff_t conn_pool_timeout(conn_pool_t *pool,
+ knot_timediff_t new_timeout);
+
+/*!
+ * \brief Try to get an open connection if present, check if alive.
+ *
+ * \param pool Pool to search in.
+ * \param src Connection source address.
+ * \param dst Connection destination address.
+ *
+ * \retval -1 If error (no such connection).
+ * \return >= 0 File descriptor of the connection.
+ */
+int conn_pool_get(conn_pool_t *pool,
+ struct sockaddr_storage *src,
+ struct sockaddr_storage *dst);
+
+/*!
+ * \brief Put an open connection to the pool, possibly displacing the oldest one there.
+ *
+ * \param pool Pool to insert into.
+ * \param src Connestion source address.
+ * \param dst Connection destination adress.
+ * \param fd Connection file descriptor.
+ *
+ * \retval -1 If connection stored to free slot.
+ * \retval fd If not able to store connection.
+ * \return >= 0 File descriptor of the displaced old connection.
+ */
+int conn_pool_put(conn_pool_t *pool,
+ struct sockaddr_storage *src,
+ struct sockaddr_storage *dst,
+ int fd);
diff --git a/src/contrib/ctype.h b/src/contrib/ctype.h
new file mode 100644
index 0000000..d1e9d27
--- /dev/null
+++ b/src/contrib/ctype.h
@@ -0,0 +1,193 @@
+/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief Locale-independent ctype functions.
+ */
+
+#pragma once
+
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+enum {
+ CT_DIGIT = 1 << 0,
+ CT_UPPER = 1 << 1,
+ CT_LOWER = 1 << 2,
+ CT_XDIGT = 1 << 3,
+ CT_PUNCT = 1 << 4,
+ CT_PRINT = 1 << 5,
+ CT_SPACE = 1 << 6,
+};
+
+static const uint8_t char_mask[256] = {
+ // 0 - 8
+ ['\t'] = CT_SPACE,
+ ['\n'] = CT_SPACE,
+ ['\v'] = CT_SPACE,
+ ['\f'] = CT_SPACE,
+ ['\r'] = CT_SPACE,
+ // 14 - 31
+ [' '] = CT_PRINT | CT_SPACE,
+
+ ['!'] = CT_PRINT | CT_PUNCT,
+ ['"'] = CT_PRINT | CT_PUNCT,
+ ['#'] = CT_PRINT | CT_PUNCT,
+ ['$'] = CT_PRINT | CT_PUNCT,
+ ['%'] = CT_PRINT | CT_PUNCT,
+ ['&'] = CT_PRINT | CT_PUNCT,
+ ['\''] = CT_PRINT | CT_PUNCT,
+ ['('] = CT_PRINT | CT_PUNCT,
+ [')'] = CT_PRINT | CT_PUNCT,
+ ['*'] = CT_PRINT | CT_PUNCT,
+ ['+'] = CT_PRINT | CT_PUNCT,
+ [','] = CT_PRINT | CT_PUNCT,
+ ['-'] = CT_PRINT | CT_PUNCT,
+ ['.'] = CT_PRINT | CT_PUNCT,
+ ['/'] = CT_PRINT | CT_PUNCT,
+
+ ['0'] = CT_PRINT | CT_DIGIT | CT_XDIGT,
+ ['1'] = CT_PRINT | CT_DIGIT | CT_XDIGT,
+ ['2'] = CT_PRINT | CT_DIGIT | CT_XDIGT,
+ ['3'] = CT_PRINT | CT_DIGIT | CT_XDIGT,
+ ['4'] = CT_PRINT | CT_DIGIT | CT_XDIGT,
+ ['5'] = CT_PRINT | CT_DIGIT | CT_XDIGT,
+ ['6'] = CT_PRINT | CT_DIGIT | CT_XDIGT,
+ ['7'] = CT_PRINT | CT_DIGIT | CT_XDIGT,
+ ['8'] = CT_PRINT | CT_DIGIT | CT_XDIGT,
+ ['9'] = CT_PRINT | CT_DIGIT | CT_XDIGT,
+
+ [':'] = CT_PRINT | CT_PUNCT,
+ [';'] = CT_PRINT | CT_PUNCT,
+ ['<'] = CT_PRINT | CT_PUNCT,
+ ['='] = CT_PRINT | CT_PUNCT,
+ ['>'] = CT_PRINT | CT_PUNCT,
+ ['?'] = CT_PRINT | CT_PUNCT,
+ ['@'] = CT_PRINT | CT_PUNCT,
+
+ ['A'] = CT_PRINT | CT_UPPER | CT_XDIGT,
+ ['B'] = CT_PRINT | CT_UPPER | CT_XDIGT,
+ ['C'] = CT_PRINT | CT_UPPER | CT_XDIGT,
+ ['D'] = CT_PRINT | CT_UPPER | CT_XDIGT,
+ ['E'] = CT_PRINT | CT_UPPER | CT_XDIGT,
+ ['F'] = CT_PRINT | CT_UPPER | CT_XDIGT,
+ ['G'] = CT_PRINT | CT_UPPER,
+ ['H'] = CT_PRINT | CT_UPPER,
+ ['I'] = CT_PRINT | CT_UPPER,
+ ['J'] = CT_PRINT | CT_UPPER,
+ ['K'] = CT_PRINT | CT_UPPER,
+ ['L'] = CT_PRINT | CT_UPPER,
+ ['M'] = CT_PRINT | CT_UPPER,
+ ['N'] = CT_PRINT | CT_UPPER,
+ ['O'] = CT_PRINT | CT_UPPER,
+ ['P'] = CT_PRINT | CT_UPPER,
+ ['Q'] = CT_PRINT | CT_UPPER,
+ ['R'] = CT_PRINT | CT_UPPER,
+ ['S'] = CT_PRINT | CT_UPPER,
+ ['T'] = CT_PRINT | CT_UPPER,
+ ['U'] = CT_PRINT | CT_UPPER,
+ ['V'] = CT_PRINT | CT_UPPER,
+ ['W'] = CT_PRINT | CT_UPPER,
+ ['X'] = CT_PRINT | CT_UPPER,
+ ['Y'] = CT_PRINT | CT_UPPER,
+ ['Z'] = CT_PRINT | CT_UPPER,
+
+ ['['] = CT_PRINT | CT_PUNCT,
+ ['\\'] = CT_PRINT | CT_PUNCT,
+ [']'] = CT_PRINT | CT_PUNCT,
+ ['^'] = CT_PRINT | CT_PUNCT,
+ ['_'] = CT_PRINT | CT_PUNCT,
+ ['`'] = CT_PRINT | CT_PUNCT,
+
+ ['a'] = CT_PRINT | CT_LOWER | CT_XDIGT,
+ ['b'] = CT_PRINT | CT_LOWER | CT_XDIGT,
+ ['c'] = CT_PRINT | CT_LOWER | CT_XDIGT,
+ ['d'] = CT_PRINT | CT_LOWER | CT_XDIGT,
+ ['e'] = CT_PRINT | CT_LOWER | CT_XDIGT,
+ ['f'] = CT_PRINT | CT_LOWER | CT_XDIGT,
+ ['g'] = CT_PRINT | CT_LOWER,
+ ['h'] = CT_PRINT | CT_LOWER,
+ ['i'] = CT_PRINT | CT_LOWER,
+ ['j'] = CT_PRINT | CT_LOWER,
+ ['k'] = CT_PRINT | CT_LOWER,
+ ['l'] = CT_PRINT | CT_LOWER,
+ ['m'] = CT_PRINT | CT_LOWER,
+ ['n'] = CT_PRINT | CT_LOWER,
+ ['o'] = CT_PRINT | CT_LOWER,
+ ['p'] = CT_PRINT | CT_LOWER,
+ ['q'] = CT_PRINT | CT_LOWER,
+ ['r'] = CT_PRINT | CT_LOWER,
+ ['s'] = CT_PRINT | CT_LOWER,
+ ['t'] = CT_PRINT | CT_LOWER,
+ ['u'] = CT_PRINT | CT_LOWER,
+ ['v'] = CT_PRINT | CT_LOWER,
+ ['w'] = CT_PRINT | CT_LOWER,
+ ['x'] = CT_PRINT | CT_LOWER,
+ ['y'] = CT_PRINT | CT_LOWER,
+ ['z'] = CT_PRINT | CT_LOWER,
+
+ ['{'] = CT_PRINT | CT_PUNCT,
+ ['|'] = CT_PRINT | CT_PUNCT,
+ ['}'] = CT_PRINT | CT_PUNCT,
+ ['~'] = CT_PRINT | CT_PUNCT,
+ // 127 - 255
+};
+
+static inline bool is_alnum(uint8_t c)
+{
+ return char_mask[c] & (CT_DIGIT | CT_UPPER | CT_LOWER);
+}
+
+static inline bool is_alpha(uint8_t c)
+{
+ return char_mask[c] & (CT_UPPER | CT_LOWER);
+}
+
+static inline bool is_digit(uint8_t c)
+{
+ return char_mask[c] & CT_DIGIT;
+}
+
+static inline bool is_xdigit(uint8_t c)
+{
+ return char_mask[c] & CT_XDIGT;
+}
+
+static inline bool is_lower(uint8_t c)
+{
+ return char_mask[c] & CT_LOWER;
+}
+
+static inline bool is_upper(uint8_t c)
+{
+ return char_mask[c] & CT_UPPER;
+}
+
+static inline bool is_print(uint8_t c)
+{
+ return char_mask[c] & CT_PRINT;
+}
+
+static inline bool is_punct(uint8_t c)
+{
+ return char_mask[c] & CT_PUNCT;
+}
+
+static inline bool is_space(uint8_t c)
+{
+ return char_mask[c] & CT_SPACE;
+}
diff --git a/src/contrib/dnstap/convert.c b/src/contrib/dnstap/convert.c
new file mode 100644
index 0000000..93ee8e9
--- /dev/null
+++ b/src/contrib/dnstap/convert.c
@@ -0,0 +1,142 @@
+/* Copyright (C) 2016 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+
+#include "contrib/dnstap/convert.h"
+#include "contrib/dnstap/dnstap.pb-c.h"
+
+/*!
+ * \brief Translation between real and Dnstap value.
+ */
+typedef struct mapping {
+ int real;
+ int dnstap;
+} mapping_t;
+
+/*!
+ * \brief Mapping for network family.
+ */
+static const mapping_t SOCKET_FAMILY_MAPPING[] = {
+ { AF_INET, DNSTAP__SOCKET_FAMILY__INET },
+ { AF_INET6, DNSTAP__SOCKET_FAMILY__INET6 },
+ { 0 }
+};
+
+/*!
+ * \brief Mapping from network protocol.
+ */
+static const mapping_t SOCKET_PROTOCOL_MAPPING[] = {
+ { IPPROTO_UDP, DNSTAP__SOCKET_PROTOCOL__UDP },
+ { IPPROTO_TCP, DNSTAP__SOCKET_PROTOCOL__TCP },
+ { 0 }
+};
+
+/*!
+ * \brief Get Dnstap value for a given real value.
+ */
+static int encode(const mapping_t *mapping, int real)
+{
+ for (const mapping_t *m = mapping; m->real != 0; m += 1) {
+ if (m->real == real) {
+ return m->dnstap;
+ }
+ }
+
+ return 0;
+}
+
+/*!
+ * \brief Get real value for a given Dnstap value.
+ */
+static int decode(const mapping_t *mapping, int dnstap)
+{
+ for (const mapping_t *m = mapping; m->real != 0; m += 1) {
+ if (m->dnstap == dnstap) {
+ return m->real;
+ }
+ }
+
+ return 0;
+}
+
+/* -- public API ----------------------------------------------------------- */
+
+Dnstap__SocketFamily dt_family_encode(int family)
+{
+ return encode(SOCKET_FAMILY_MAPPING, family);
+}
+
+int dt_family_decode(Dnstap__SocketFamily dnstap_family)
+{
+ return decode(SOCKET_FAMILY_MAPPING, dnstap_family);
+}
+
+Dnstap__SocketProtocol dt_protocol_encode(int protocol)
+{
+ return encode(SOCKET_PROTOCOL_MAPPING, protocol);
+}
+
+int dt_protocol_decode(Dnstap__SocketProtocol dnstap_protocol)
+{
+ return decode(SOCKET_PROTOCOL_MAPPING, dnstap_protocol);
+}
+
+bool dt_message_type_is_query(Dnstap__Message__Type type)
+{
+ switch (type) {
+ case DNSTAP__MESSAGE__TYPE__AUTH_QUERY:
+ case DNSTAP__MESSAGE__TYPE__CLIENT_QUERY:
+ case DNSTAP__MESSAGE__TYPE__FORWARDER_QUERY:
+ case DNSTAP__MESSAGE__TYPE__RESOLVER_QUERY:
+ case DNSTAP__MESSAGE__TYPE__STUB_QUERY:
+ case DNSTAP__MESSAGE__TYPE__TOOL_QUERY:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool dt_message_type_is_response(Dnstap__Message__Type type)
+{
+ switch (type) {
+ case DNSTAP__MESSAGE__TYPE__AUTH_RESPONSE:
+ case DNSTAP__MESSAGE__TYPE__CLIENT_RESPONSE:
+ case DNSTAP__MESSAGE__TYPE__FORWARDER_RESPONSE:
+ case DNSTAP__MESSAGE__TYPE__RESOLVER_RESPONSE:
+ case DNSTAP__MESSAGE__TYPE__STUB_RESPONSE:
+ case DNSTAP__MESSAGE__TYPE__TOOL_RESPONSE:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool dt_message_role_is_initiator(Dnstap__Message__Type type)
+{
+ switch (type) {
+ case DNSTAP__MESSAGE__TYPE__AUTH_QUERY:
+ case DNSTAP__MESSAGE__TYPE__AUTH_RESPONSE:
+ case DNSTAP__MESSAGE__TYPE__CLIENT_QUERY:
+ case DNSTAP__MESSAGE__TYPE__CLIENT_RESPONSE:
+ return false;
+ default:
+ return true;
+ }
+}
diff --git a/src/contrib/dnstap/convert.h b/src/contrib/dnstap/convert.h
new file mode 100644
index 0000000..0e2a86a
--- /dev/null
+++ b/src/contrib/dnstap/convert.h
@@ -0,0 +1,60 @@
+/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief Dnstap identifiers conversions.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+
+#include "contrib/dnstap/dnstap.pb-c.h"
+
+/*!
+ * \brief Get Dnstap socket family from the real one.
+ */
+Dnstap__SocketFamily dt_family_encode(int family);
+
+/*!
+ * \brief Get real socket family from the Dnstap one.
+ */
+int dt_family_decode(Dnstap__SocketFamily dnstap_family);
+
+/*!
+ * \brief Get Dnstap protocol from a real one.
+ */
+Dnstap__SocketProtocol dt_protocol_encode(int protocol);
+
+/*!
+ * \brief Get real protocol from the Dnstap one.
+ */
+int dt_protocol_decode(Dnstap__SocketProtocol dnstap_protocol);
+
+/*!
+ * Check if a message type is any type of a query.
+ */
+bool dt_message_type_is_query(Dnstap__Message__Type type);
+
+/*!
+ * Check if a message type is any type of a response.
+ */
+bool dt_message_type_is_response(Dnstap__Message__Type type);
+
+/*!
+ * Check if a message role is any type of an initiator.
+ */
+bool dt_message_role_is_initiator(Dnstap__Message__Type type);
diff --git a/src/contrib/dnstap/dnstap.c b/src/contrib/dnstap/dnstap.c
new file mode 100644
index 0000000..1516b4d
--- /dev/null
+++ b/src/contrib/dnstap/dnstap.c
@@ -0,0 +1,41 @@
+/* Copyright (C) 2014 Farsight Security, Inc. <software@farsightsecurity.com>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "contrib/dnstap/dnstap.h"
+#include "contrib/dnstap/dnstap.pb-c.h"
+
+#define DNSTAP_INITIAL_BUF_SIZE 256
+
+uint8_t* dt_pack(const Dnstap__Dnstap *d, uint8_t **buf, size_t *sz)
+{
+ ProtobufCBufferSimple sbuf = { { NULL } };
+
+ sbuf.base.append = protobuf_c_buffer_simple_append;
+ sbuf.len = 0;
+ sbuf.alloced = DNSTAP_INITIAL_BUF_SIZE;
+ sbuf.data = malloc(sbuf.alloced);
+ if (sbuf.data == NULL) {
+ return NULL;
+ }
+ sbuf.must_free_data = 1;
+
+ *sz = dnstap__dnstap__pack_to_buffer(d, (ProtobufCBuffer *) &sbuf);
+ *buf = sbuf.data;
+ return *buf;
+}
diff --git a/src/contrib/dnstap/dnstap.h b/src/contrib/dnstap/dnstap.h
new file mode 100644
index 0000000..f7aecbc
--- /dev/null
+++ b/src/contrib/dnstap/dnstap.h
@@ -0,0 +1,47 @@
+/* Copyright (C) 2014 Farsight Security, Inc. <software@farsightsecurity.com>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \author Robert Edmonds <edmonds@fsi.io>
+ *
+ * \brief Public interface for dnstap.
+ */
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "contrib/dnstap/dnstap.pb-c.h"
+
+/*! \brief Frame Streams "Content Type" value for dnstap. */
+#define DNSTAP_CONTENT_TYPE "protobuf:dnstap.Dnstap"
+
+/*!
+ * \brief Serializes a filled out dnstap protobuf struct. Dynamically allocates
+ * storage for the serialized frame.
+ *
+ * \note This function returns a copy of its parameter return value 'buf' to
+ * make error checking slightly easier.
+ *
+ * \param d dnstap protobuf struct.
+ * \param[out] buf Serialized frame.
+ * \param[out] sz Size in bytes of the serialized frame.
+ *
+ * \return Serialized frame.
+ * \retval NULL if error.
+ */
+uint8_t* dt_pack(const Dnstap__Dnstap *d, uint8_t **buf, size_t *sz);
diff --git a/src/contrib/dnstap/dnstap.proto b/src/contrib/dnstap/dnstap.proto
new file mode 100644
index 0000000..ea5c77a
--- /dev/null
+++ b/src/contrib/dnstap/dnstap.proto
@@ -0,0 +1,270 @@
+// dnstap: flexible, structured event replication format for DNS software
+//
+// This file contains the protobuf schemas for the "dnstap" structured event
+// replication format for DNS software.
+
+// Written in 2013-2014 by Farsight Security, Inc.
+//
+// To the extent possible under law, the author(s) have dedicated all
+// copyright and related and neighboring rights to this file to the public
+// domain worldwide. This file is distributed without any warranty.
+//
+// You should have received a copy of the CC0 Public Domain Dedication along
+// with this file. If not, see:
+//
+// <http://creativecommons.org/publicdomain/zero/1.0/>.
+
+syntax = "proto2";
+
+package dnstap;
+
+// "Dnstap": this is the top-level dnstap type, which is a "union" type that
+// contains other kinds of dnstap payloads, although currently only one type
+// of dnstap payload is defined.
+// See: https://developers.google.com/protocol-buffers/docs/techniques#union
+message Dnstap {
+ // DNS server identity.
+ // If enabled, this is the identity string of the DNS server which generated
+ // this message. Typically this would be the same string as returned by an
+ // "NSID" (RFC 5001) query.
+ optional bytes identity = 1;
+
+ // DNS server version.
+ // If enabled, this is the version string of the DNS server which generated
+ // this message. Typically this would be the same string as returned by a
+ // "version.bind" query.
+ optional bytes version = 2;
+
+ // Extra data for this payload.
+ // This field can be used for adding an arbitrary byte-string annotation to
+ // the payload. No encoding or interpretation is applied or enforced.
+ optional bytes extra = 3;
+
+ // Identifies which field below is filled in.
+ enum Type {
+ MESSAGE = 1;
+ }
+ required Type type = 15;
+
+ // One of the following will be filled in.
+ optional Message message = 14;
+}
+
+// SocketFamily: the network protocol family of a socket. This specifies how
+// to interpret "network address" fields.
+enum SocketFamily {
+ INET = 1; // IPv4 (RFC 791)
+ INET6 = 2; // IPv6 (RFC 2460)
+}
+
+// SocketProtocol: the transport protocol of a socket. This specifies how to
+// interpret "transport port" fields.
+enum SocketProtocol {
+ UDP = 1; // User Datagram Protocol (RFC 768)
+ TCP = 2; // Transmission Control Protocol (RFC 793)
+}
+
+// Message: a wire-format (RFC 1035 section 4) DNS message and associated
+// metadata. Applications generating "Message" payloads should follow
+// certain requirements based on the MessageType, see below.
+message Message {
+
+ // There are eight types of "Message" defined that correspond to the
+ // four arrows in the following diagram, slightly modified from RFC 1035
+ // section 2:
+
+ // +---------+ +----------+ +--------+
+ // | | query | | query | |
+ // | Stub |-SQ--------CQ->| Recursive|-RQ----AQ->| Auth. |
+ // | Resolver| | Server | | Name |
+ // | |<-SR--------CR-| |<-RR----AR-| Server |
+ // +---------+ response | | response | |
+ // +----------+ +--------+
+
+ // Each arrow has two Type values each, one for each "end" of each arrow,
+ // because these are considered to be distinct events. Each end of each
+ // arrow on the diagram above has been marked with a two-letter Type
+ // mnemonic. Clockwise from upper left, these mnemonic values are:
+ //
+ // SQ: STUB_QUERY
+ // CQ: CLIENT_QUERY
+ // RQ: RESOLVER_QUERY
+ // AQ: AUTH_QUERY
+ // AR: AUTH_RESPONSE
+ // RR: RESOLVER_RESPONSE
+ // CR: CLIENT_RESPONSE
+ // SR: STUB_RESPONSE
+
+ // Two additional types of "Message" have been defined for the
+ // "forwarding" case where an upstream DNS server is responsible for
+ // further recursion. These are not shown on the diagram above, but have
+ // the following mnemonic values:
+
+ // FQ: FORWARDER_QUERY
+ // FR: FORWARDER_RESPONSE
+
+ // The "Message" Type values are defined below.
+
+ enum Type {
+ // AUTH_QUERY is a DNS query message received from a resolver by an
+ // authoritative name server, from the perspective of the authoritative
+ // name server.
+ AUTH_QUERY = 1;
+
+ // AUTH_RESPONSE is a DNS response message sent from an authoritative
+ // name server to a resolver, from the perspective of the authoritative
+ // name server.
+ AUTH_RESPONSE = 2;
+
+ // RESOLVER_QUERY is a DNS query message sent from a resolver to an
+ // authoritative name server, from the perspective of the resolver.
+ // Resolvers typically clear the RD (recursion desired) bit when
+ // sending queries.
+ RESOLVER_QUERY = 3;
+
+ // RESOLVER_RESPONSE is a DNS response message received from an
+ // authoritative name server by a resolver, from the perspective of
+ // the resolver.
+ RESOLVER_RESPONSE = 4;
+
+ // CLIENT_QUERY is a DNS query message sent from a client to a DNS
+ // server which is expected to perform further recursion, from the
+ // perspective of the DNS server. The client may be a stub resolver or
+ // forwarder or some other type of software which typically sets the RD
+ // (recursion desired) bit when querying the DNS server. The DNS server
+ // may be a simple forwarding proxy or it may be a full recursive
+ // resolver.
+ CLIENT_QUERY = 5;
+
+ // CLIENT_RESPONSE is a DNS response message sent from a DNS server to
+ // a client, from the perspective of the DNS server. The DNS server
+ // typically sets the RA (recursion available) bit when responding.
+ CLIENT_RESPONSE = 6;
+
+ // FORWARDER_QUERY is a DNS query message sent from a downstream DNS
+ // server to an upstream DNS server which is expected to perform
+ // further recursion, from the perspective of the downstream DNS
+ // server.
+ FORWARDER_QUERY = 7;
+
+ // FORWARDER_RESPONSE is a DNS response message sent from an upstream
+ // DNS server performing recursion to a downstream DNS server, from the
+ // perspective of the downstream DNS server.
+ FORWARDER_RESPONSE = 8;
+
+ // STUB_QUERY is a DNS query message sent from a stub resolver to a DNS
+ // server, from the perspective of the stub resolver.
+ STUB_QUERY = 9;
+
+ // STUB_RESPONSE is a DNS response message sent from a DNS server to a
+ // stub resolver, from the perspective of the stub resolver.
+ STUB_RESPONSE = 10;
+
+ // TOOL_QUERY is a DNS query message sent from a DNS software tool to a
+ // DNS server, from the perspective of the tool.
+ TOOL_QUERY = 11;
+
+ // TOOL_RESPONSE is a DNS response message received by a DNS software
+ // tool from a DNS server, from the perspective of the tool.
+ TOOL_RESPONSE = 12;
+ }
+
+ // One of the Type values described above.
+ required Type type = 1;
+
+ // One of the SocketFamily values described above.
+ optional SocketFamily socket_family = 2;
+
+ // One of the SocketProtocol values described above.
+ optional SocketProtocol socket_protocol = 3;
+
+ // The network address of the message initiator.
+ // For SocketFamily INET, this field is 4 octets (IPv4 address).
+ // For SocketFamily INET6, this field is 16 octets (IPv6 address).
+ optional bytes query_address = 4;
+
+ // The network address of the message responder.
+ // For SocketFamily INET, this field is 4 octets (IPv4 address).
+ // For SocketFamily INET6, this field is 16 octets (IPv6 address).
+ optional bytes response_address = 5;
+
+ // The transport port of the message initiator.
+ // This is a 16-bit UDP or TCP port number, depending on SocketProtocol.
+ optional uint32 query_port = 6;
+
+ // The transport port of the message responder.
+ // This is a 16-bit UDP or TCP port number, depending on SocketProtocol.
+ optional uint32 response_port = 7;
+
+ // The time at which the DNS query message was sent or received, depending
+ // on whether this is an AUTH_QUERY, RESOLVER_QUERY, or CLIENT_QUERY.
+ // This is the number of seconds since the UNIX epoch.
+ optional uint64 query_time_sec = 8;
+
+ // The time at which the DNS query message was sent or received.
+ // This is the seconds fraction, expressed as a count of nanoseconds.
+ optional fixed32 query_time_nsec = 9;
+
+ // The initiator's original wire-format DNS query message, verbatim.
+ optional bytes query_message = 10;
+
+ // The "zone" or "bailiwick" pertaining to the DNS query message.
+ // This is a wire-format DNS domain name.
+ optional bytes query_zone = 11;
+
+ // The time at which the DNS response message was sent or received,
+ // depending on whether this is an AUTH_RESPONSE, RESOLVER_RESPONSE, or
+ // CLIENT_RESPONSE.
+ // This is the number of seconds since the UNIX epoch.
+ optional uint64 response_time_sec = 12;
+
+ // The time at which the DNS response message was sent or received.
+ // This is the seconds fraction, expressed as a count of nanoseconds.
+ optional fixed32 response_time_nsec = 13;
+
+ // The responder's original wire-format DNS response message, verbatim.
+ optional bytes response_message = 14;
+}
+
+// All fields except for 'type' in the Message schema are optional.
+// It is recommended that at least the following fields be filled in for
+// particular types of Messages.
+
+// AUTH_QUERY:
+// socket_family, socket_protocol
+// query_address, query_port
+// query_message
+// query_time_sec, query_time_nsec
+
+// AUTH_RESPONSE:
+// socket_family, socket_protocol
+// query_address, query_port
+// query_time_sec, query_time_nsec
+// response_message
+// response_time_sec, response_time_nsec
+
+// RESOLVER_QUERY:
+// socket_family, socket_protocol
+// query_message
+// query_time_sec, query_time_nsec
+// query_zone
+// response_address, response_port
+
+// RESOLVER_RESPONSE:
+// socket_family, socket_protocol
+// query_time_sec, query_time_nsec
+// query_zone
+// response_address, response_port
+// response_message
+// response_time_sec, response_time_nsec
+
+// CLIENT_QUERY:
+// socket_family, socket_protocol
+// query_message
+// query_time_sec, query_time_nsec
+
+// CLIENT_RESPONSE:
+// socket_family, socket_protocol
+// query_time_sec, query_time_nsec
+// response_message
+// response_time_sec, response_time_nsec
diff --git a/src/contrib/dnstap/message.c b/src/contrib/dnstap/message.c
new file mode 100644
index 0000000..a5f798e
--- /dev/null
+++ b/src/contrib/dnstap/message.c
@@ -0,0 +1,130 @@
+/* Copyright (C) 2017 Farsight Security, Inc. <software@farsightsecurity.com>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libknot/errcode.h"
+
+#include "contrib/dnstap/convert.h"
+#include "contrib/dnstap/message.h"
+
+static void set_address(const struct sockaddr *sockaddr,
+ ProtobufCBinaryData *addr,
+ protobuf_c_boolean *has_addr,
+ uint32_t *port,
+ protobuf_c_boolean *has_port)
+{
+ if (sockaddr == NULL) {
+ *has_addr = 0;
+ *has_port = 0;
+ return;
+ }
+
+ *has_addr = 1;
+ *has_port = 1;
+
+ if (sockaddr->sa_family == AF_INET) {
+ const struct sockaddr_in *sai;
+ sai = (const struct sockaddr_in *)sockaddr;
+ addr->len = sizeof(sai->sin_addr);
+ addr->data = (uint8_t *)&sai->sin_addr.s_addr;
+ *port = ntohs(sai->sin_port);
+ } else if (sockaddr->sa_family == AF_INET6) {
+ const struct sockaddr_in6 *sai6;
+ sai6 = (const struct sockaddr_in6 *)sockaddr;
+ addr->len = sizeof(sai6->sin6_addr);
+ addr->data = (uint8_t *)&sai6->sin6_addr.s6_addr;
+ *port = ntohs(sai6->sin6_port);
+ }
+}
+
+static int get_family(const struct sockaddr *query_sa,
+ const struct sockaddr *response_sa)
+{
+ const struct sockaddr *source = query_sa ? query_sa : response_sa;
+ if (source == NULL) {
+ return 0;
+ }
+
+ return dt_family_encode(source->sa_family);
+}
+
+int dt_message_fill(Dnstap__Message *m,
+ const Dnstap__Message__Type type,
+ const struct sockaddr *query_sa,
+ const struct sockaddr *response_sa,
+ const int protocol,
+ const void *wire,
+ const size_t len_wire,
+ const struct timespec *mtime)
+{
+ if (m == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ memset(m, 0, sizeof(*m));
+
+ m->base.descriptor = &dnstap__message__descriptor;
+
+ // Message.type
+ m->type = type;
+
+ // Message.socket_family
+ m->socket_family = get_family(query_sa, response_sa);
+ m->has_socket_family = m->socket_family != 0;
+
+ // Message.socket_protocol
+ m->socket_protocol = dt_protocol_encode(protocol);
+ m->has_socket_protocol = m->socket_protocol != 0;
+
+ // Message addresses
+ set_address(query_sa, &m->query_address, &m->has_query_address,
+ &m->query_port, &m->has_query_port);
+ set_address(response_sa, &m->response_address, &m->has_response_address,
+ &m->response_port, &m->has_response_port);
+
+ if (dt_message_type_is_query(type)) {
+ // Message.query_message
+ m->query_message.len = len_wire;
+ m->query_message.data = (uint8_t *)wire;
+ m->has_query_message = 1;
+ // Message.query_time_sec, Message.query_time_nsec
+ if (mtime != NULL) {
+ m->query_time_sec = mtime->tv_sec;
+ m->query_time_nsec = mtime->tv_nsec;
+ m->has_query_time_sec = 1;
+ m->has_query_time_nsec = 1;
+ }
+ } else if (dt_message_type_is_response(type)) {
+ // Message.response_message
+ m->response_message.len = len_wire;
+ m->response_message.data = (uint8_t *)wire;
+ m->has_response_message = 1;
+ // Message.response_time_sec, Message.response_time_nsec
+ if (mtime != NULL) {
+ m->response_time_sec = mtime->tv_sec;
+ m->response_time_nsec = mtime->tv_nsec;
+ m->has_response_time_sec = 1;
+ m->has_response_time_nsec = 1;
+ }
+ }
+
+ return KNOT_EOK;
+}
diff --git a/src/contrib/dnstap/message.h b/src/contrib/dnstap/message.h
new file mode 100644
index 0000000..b9e3aff
--- /dev/null
+++ b/src/contrib/dnstap/message.h
@@ -0,0 +1,63 @@
+/* Copyright (C) 2017 Farsight Security, Inc. <software@farsightsecurity.com>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \author Robert Edmonds <edmonds@fsi.io>
+ *
+ * \brief Dnstap message interface.
+ */
+
+#pragma once
+
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <stddef.h>
+
+#include "contrib/dnstap/dnstap.pb-c.h"
+
+/*!
+ * \brief Fill a Dnstap__Message structure with the given parameters.
+ *
+ * \param[out] m
+ * Dnstap__Message structure to fill. Will be zeroed first.
+ * \param type
+ * One of the DNSTAP__MESSAGE__TYPE__* values.
+ * \param query_sa
+ * sockaddr_in or sockaddr_in6 to use when filling the 'socket_family',
+ * 'query_address', 'query_port' fields.
+ * \param response_sa
+ * sockaddr_in or sockaddr_in6 to use when filling the 'socket_family',
+ * 'response_address', 'response_port' fields.
+ * \param protocol
+ * \c IPPROTO_UDP or \c IPPROTO_TCP.
+ * \param wire
+ * Wire-format query message or response message (depending on 'type').
+ * \param len_wire
+ * Length in bytes of 'wire'.
+ * \param mtime
+ * Message time. May be NULL.
+ *
+ * \retval KNOT_EOK
+ * \retval KNOT_EINVAL
+ */
+int dt_message_fill(Dnstap__Message *m,
+ const Dnstap__Message__Type type,
+ const struct sockaddr *query_sa,
+ const struct sockaddr *response_sa,
+ const int protocol,
+ const void *wire,
+ const size_t len_wire,
+ const struct timespec *mtime);
diff --git a/src/contrib/dnstap/reader.c b/src/contrib/dnstap/reader.c
new file mode 100644
index 0000000..dc1c053
--- /dev/null
+++ b/src/contrib/dnstap/reader.c
@@ -0,0 +1,103 @@
+/* Copyright (C) 2014 Farsight Security, Inc. <software@farsightsecurity.com>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libknot/attribute.h"
+#include "libknot/errcode.h"
+
+#include "contrib/dnstap/dnstap.h"
+#include "contrib/dnstap/reader.h"
+
+dt_reader_t* dt_reader_create(const char *file_path)
+{
+ struct fstrm_file_options *fopt = NULL;
+ struct fstrm_reader_options *ropt = NULL;
+ dt_reader_t *reader = NULL;
+ fstrm_res res;
+
+ reader = calloc(1, sizeof(dt_reader_t));
+ if (reader == NULL) {
+ goto fail;
+ }
+
+ // Open reader.
+ fopt = fstrm_file_options_init();
+ fstrm_file_options_set_file_path(fopt, file_path);
+ ropt = fstrm_reader_options_init();
+ fstrm_reader_options_add_content_type(ropt,
+ (const uint8_t *) DNSTAP_CONTENT_TYPE,
+ strlen(DNSTAP_CONTENT_TYPE));
+ reader->fr = fstrm_file_reader_init(fopt, ropt);
+ fstrm_file_options_destroy(&fopt);
+ fstrm_reader_options_destroy(&ropt);
+ if (reader->fr == NULL) {
+ goto fail;
+ }
+ res = fstrm_reader_open(reader->fr);
+ if (res != fstrm_res_success) {
+ goto fail;
+ }
+
+ return reader;
+fail:
+ dt_reader_free(reader);
+ return NULL;
+}
+
+void dt_reader_free(dt_reader_t *reader)
+{
+ if (reader == NULL) {
+ return;
+ }
+
+ fstrm_reader_destroy(&reader->fr);
+ free(reader);
+}
+
+int dt_reader_read(dt_reader_t *reader, Dnstap__Dnstap **d)
+{
+ fstrm_res res;
+ const uint8_t *data = NULL;
+ size_t len = 0;
+
+ res = fstrm_reader_read(reader->fr, &data, &len);
+ if (res == fstrm_res_success) {
+ *d = dnstap__dnstap__unpack(NULL, len, data);
+ if (*d == NULL) {
+ return KNOT_ENOMEM;
+ }
+ } else if (res == fstrm_res_failure) {
+ return KNOT_ERROR;
+ } else if (res == fstrm_res_stop) {
+ return KNOT_EOF;
+ }
+
+ return KNOT_EOK;
+}
+
+void dt_reader_free_frame(_unused_ dt_reader_t *reader, Dnstap__Dnstap **frame_ptr)
+{
+ if (!*frame_ptr) {
+ return;
+ }
+
+ dnstap__dnstap__free_unpacked(*frame_ptr, NULL);
+ *frame_ptr = NULL;
+}
diff --git a/src/contrib/dnstap/reader.h b/src/contrib/dnstap/reader.h
new file mode 100644
index 0000000..cc6385f
--- /dev/null
+++ b/src/contrib/dnstap/reader.h
@@ -0,0 +1,73 @@
+/* Copyright (C) 2017 Farsight Security, Inc. <software@farsightsecurity.com>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief Dnstap file reader.
+ */
+
+#pragma once
+
+#include <fstrm.h>
+#include <protobuf-c/protobuf-c.h>
+
+#include "contrib/dnstap/dnstap.pb-c.h"
+
+/*! \brief Structure for dnstap file reader. */
+typedef struct {
+ /*!< Input reader. */
+ struct fstrm_reader *fr;
+} dt_reader_t;
+
+/*!
+ * \brief Creates dnstap file reader structure.
+ *
+ * \param file_path Name of file to read input from.
+ *
+ * \retval reader if success.
+ * \retval NULL if error.
+ */
+dt_reader_t* dt_reader_create(const char *file_path);
+
+/*!
+ * \brief Close dnstap file reader.
+ *
+ * \param reader dnstap file reader structure.
+ */
+void dt_reader_free(dt_reader_t *reader);
+
+/*!
+ * \brief Read a dnstap protobuf from a dnstap file reader.
+ *
+ * Caller must deallocate the returned protobuf with the
+ * dnstap__dnstap__free_unpacked() function.
+ *
+ * \param[in] reader dnstap file reader structure.
+ * \param[out] d Unpacked dnstap protobuf.
+ *
+ * \retval KNOT_EOK
+ * \retval KNOT_ERROR
+ * \retval KNOT_EOF
+ * \retval KNOT_ENOMEM
+ */
+int dt_reader_read(dt_reader_t *reader, Dnstap__Dnstap **d);
+
+/*!
+ * \brief free the frame allocated by dt_read_data.
+ *
+ * \param reader Dnstap reader context.
+ * \param d The frame to be freed.
+ */
+void dt_reader_free_frame(dt_reader_t *reader, Dnstap__Dnstap **d);
diff --git a/src/contrib/dnstap/writer.c b/src/contrib/dnstap/writer.c
new file mode 100644
index 0000000..03961c1
--- /dev/null
+++ b/src/contrib/dnstap/writer.c
@@ -0,0 +1,120 @@
+/* Copyright (C) 2014 Farsight Security, Inc. <software@farsightsecurity.com>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "libknot/errcode.h"
+
+#include "contrib/dnstap/dnstap.h"
+#include "contrib/dnstap/writer.h"
+
+dt_writer_t* dt_writer_create(const char *file_path, const char *version)
+{
+ struct fstrm_file_options *fopt = NULL;
+ struct fstrm_writer_options *wopt = NULL;
+ dt_writer_t *writer = NULL;
+ fstrm_res res;
+
+ writer = calloc(1, sizeof(dt_writer_t));
+ if (writer == NULL) {
+ goto fail;
+ }
+
+ // Set "version".
+ if (version != NULL) {
+ writer->len_version = strlen(version);
+ writer->version = strdup(version);
+ if (!writer->version) {
+ goto fail;
+ }
+ }
+
+ // Open writer.
+ fopt = fstrm_file_options_init();
+ fstrm_file_options_set_file_path(fopt, file_path);
+ wopt = fstrm_writer_options_init();
+ fstrm_writer_options_add_content_type(wopt,
+ (const uint8_t *) DNSTAP_CONTENT_TYPE,
+ strlen(DNSTAP_CONTENT_TYPE));
+ writer->fw = fstrm_file_writer_init(fopt, wopt);
+ fstrm_file_options_destroy(&fopt);
+ fstrm_writer_options_destroy(&wopt);
+ if (writer->fw == NULL) {
+ goto fail;
+ }
+
+ res = fstrm_writer_open(writer->fw);
+ if (res != fstrm_res_success) {
+ goto fail;
+ }
+
+ return writer;
+fail:
+ dt_writer_free(writer);
+ return NULL;
+}
+
+void dt_writer_free(dt_writer_t *writer)
+{
+ if (writer == NULL) {
+ return;
+ }
+
+ fstrm_writer_destroy(&writer->fw);
+ free(writer->version);
+ free(writer);
+}
+
+int dt_writer_write(dt_writer_t *writer, const ProtobufCMessage *msg)
+{
+ Dnstap__Dnstap dnstap = DNSTAP__DNSTAP__INIT;
+ size_t len;
+ uint8_t *data;
+
+ if (writer->fw == NULL) {
+ return KNOT_EOK;
+ }
+
+ // Only handle dnstap/Message.
+ assert(msg->descriptor == &dnstap__message__descriptor);
+
+ // Fill out 'dnstap'.
+ if (writer->version) {
+ dnstap.version.data = writer->version;
+ dnstap.version.len = writer->len_version;
+ dnstap.has_version = 1;
+ }
+ dnstap.type = DNSTAP__DNSTAP__TYPE__MESSAGE;
+ dnstap.message = (Dnstap__Message *)msg;
+
+ // Serialize the dnstap frame.
+ if (!dt_pack(&dnstap, &data, &len)) {
+ return KNOT_ENOMEM;
+ }
+
+ // Write the dnstap frame to the output stream.
+ if (fstrm_writer_write(writer->fw, data, len) != fstrm_res_success) {
+ return KNOT_ERROR;
+ }
+
+ // Cleanup.
+ free(data);
+
+ return KNOT_EOK;
+}
diff --git a/src/contrib/dnstap/writer.h b/src/contrib/dnstap/writer.h
new file mode 100644
index 0000000..e6928c5
--- /dev/null
+++ b/src/contrib/dnstap/writer.h
@@ -0,0 +1,71 @@
+/* Copyright (C) 2014 Farsight Security, Inc. <software@farsightsecurity.com>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \author Robert Edmonds <edmonds@fsi.io>
+ *
+ * \brief Dnstap file writer.
+ */
+
+#pragma once
+
+#include <fstrm.h>
+#include <protobuf-c/protobuf-c.h>
+
+/*! \brief Structure for dnstap file writer. */
+typedef struct {
+ /*!< Output writer. */
+ struct fstrm_writer *fw;
+
+ /*!< dnstap "version" field. */
+ void *version;
+
+ /*!< length of dnstap "version" field. */
+ size_t len_version;
+} dt_writer_t;
+
+/*!
+ * \brief Creates dnstap file writer structure.
+ *
+ * \param file_path Name of file to write output to.
+ * \param version Version string of software. May be NULL.
+ *
+ * \retval writer if success.
+ * \retval NULL if error.
+ */
+dt_writer_t* dt_writer_create(const char *file_path, const char *version);
+
+/*!
+ * \brief Finish writing dnstap file writer and free resources.
+ *
+ * \param writer dnstap file writer structure.
+ */
+void dt_writer_free(dt_writer_t *writer);
+
+/*!
+ * \brief Write a protobuf to the dnstap file writer.
+ *
+ * Supported protobuf types for the 'msg' parameter:
+ * \c Dnstap__Message
+ *
+ * \param writer dnstap file writer structure.
+ * \param msg dnstap protobuf. Must be a supported type.
+ *
+ * \retval KNOT_EOK
+ * \retval KNOT_EINVAL
+ * \retval KNOT_ENOMEM
+ */
+int dt_writer_write(dt_writer_t *writer, const ProtobufCMessage *msg);
diff --git a/src/contrib/files.c b/src/contrib/files.c
new file mode 100644
index 0000000..c753ca8
--- /dev/null
+++ b/src/contrib/files.c
@@ -0,0 +1,254 @@
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <ftw.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "contrib/files.h"
+#include "contrib/string.h"
+#include "libknot/errcode.h"
+
+#if defined(MAXBSIZE)
+ #define BUFSIZE MAXBSIZE
+#else
+ #define BUFSIZE (64 * 1024)
+#endif
+
+char* abs_path(const char *path, const char *base_dir)
+{
+ if (path == NULL) {
+ return NULL;
+ } else if (path[0] == '/') {
+ return strdup(path);
+ } else {
+ char *full_path;
+ if (base_dir == NULL) {
+ char *cwd = realpath("./", NULL);
+ full_path = sprintf_alloc("%s/%s", cwd, path);
+ free(cwd);
+ } else {
+ full_path = sprintf_alloc("%s/%s", base_dir, path);
+ }
+ return full_path;
+ }
+}
+
+bool same_path(const char *path1, const char *path2)
+{
+ bool equal = false;
+ int err = 0;
+
+ struct stat sb1;
+ if (stat(path1, &sb1) == 0) {
+ struct stat sb2;
+ if (stat(path2, &sb2) == 0) {
+ if (sb1.st_dev == sb2.st_dev &&
+ sb1.st_ino == sb2.st_ino) {
+ equal = true;
+ }
+ } else {
+ err = errno;
+ }
+ } else {
+ err = errno;
+ }
+
+ if (err != 0) {
+ // Can't compare real absolute paths, as stat() failed already. Try the best.
+ char *full_path1 = abs_path(path1, NULL);
+ char *full_path2 = abs_path(path2, NULL);
+
+ if (strcmp(full_path1, full_path2) == 0) {
+ equal = true;
+ }
+
+ free(full_path1);
+ free(full_path2);
+ }
+
+ return equal;
+}
+
+static int remove_file(const char *path, const struct stat *stat, int type, struct FTW *ftw)
+{
+ (void)stat;
+ (void)ftw;
+ if (type == FTW_DP) {
+ return rmdir(path);
+ } else {
+ return unlink(path);
+ }
+}
+
+bool remove_path(const char *path)
+{
+ return (0 == nftw(path, remove_file, 1, FTW_DEPTH | FTW_PHYS));
+}
+
+int make_dir(const char *path, mode_t mode, bool ignore_existing)
+{
+ if (mkdir(path, mode) == 0) {
+ return KNOT_EOK;
+ }
+
+ if (!ignore_existing || errno != EEXIST) {
+ return knot_map_errno();
+ }
+
+ assert(errno == EEXIST);
+
+ struct stat st = { 0 };
+ if (stat(path, &st) != 0) {
+ return knot_map_errno();
+ }
+
+ if (!S_ISDIR(st.st_mode)) {
+ return KNOT_EEXIST;
+ }
+
+ return KNOT_EOK;
+}
+
+int make_path(const char *path, mode_t mode)
+{
+ if (path == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ char *dir = strdup(path);
+ if (dir == NULL) {
+ return KNOT_ENOMEM;
+ }
+
+ for (char *p = strchr(dir + 1, '/'); p != NULL; p = strchr(p + 1, '/')) {
+ *p = '\0';
+ if (mkdir(dir, mode) == -1 && errno != EEXIST) {
+ free(dir);
+ return knot_map_errno();
+ }
+ *p = '/';
+ }
+
+ free(dir);
+
+ return KNOT_EOK;
+}
+
+int open_tmp_file(const char *path, char **tmp_name, FILE **file, mode_t mode)
+{
+ int ret;
+
+ *tmp_name = sprintf_alloc("%s.XXXXXX", path);
+ if (*tmp_name == NULL) {
+ ret = KNOT_ENOMEM;
+ goto open_tmp_failed;
+ }
+
+ int fd = mkstemp(*tmp_name);
+ if (fd < 0) {
+ ret = knot_map_errno();
+ goto open_tmp_failed;
+ }
+
+ if (fchmod(fd, mode) != 0) {
+ ret = knot_map_errno();
+ close(fd);
+ unlink(*tmp_name);
+ goto open_tmp_failed;
+ }
+
+ *file = fdopen(fd, "w");
+ if (*file == NULL) {
+ ret = knot_map_errno();
+ close(fd);
+ unlink(*tmp_name);
+ goto open_tmp_failed;
+ }
+
+ return KNOT_EOK;
+open_tmp_failed:
+ free(*tmp_name);
+ *tmp_name = NULL;
+ *file = NULL;
+
+ assert(ret != KNOT_EOK);
+ return ret;
+}
+
+int copy_file(const char *dest, const char *src)
+{
+ if (dest == NULL || src == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ int ret = 0;
+ char *buf = NULL, *tmp_name = NULL;
+ FILE *file = NULL;
+
+ FILE *from = fopen(src, "r");
+ if (from == NULL) {
+ ret = errno == ENOENT ? KNOT_EFILE : knot_map_errno();
+ goto done;
+ }
+
+ buf = malloc(sizeof(*buf) * BUFSIZE);
+ if (buf == NULL) {
+ ret = KNOT_ENOMEM;
+ goto done;
+ }
+
+ ret = open_tmp_file(dest, &tmp_name, &file, S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP);
+ if (ret != KNOT_EOK) {
+ goto done;
+ }
+
+ ssize_t cnt;
+ while ((cnt = fread(buf, sizeof(*buf), BUFSIZE, from)) != 0 &&
+ (ret = (fwrite(buf, sizeof(*buf), cnt, file) == cnt))) {
+ }
+
+ ret = !ret || ferror(from);
+ if (ret != 0) {
+ ret = knot_map_errno();
+ unlink(tmp_name);
+ goto done;
+ }
+
+ ret = rename(tmp_name, dest);
+ if (ret != 0) {
+ ret = knot_map_errno();
+ unlink(tmp_name);
+ goto done;
+ }
+ ret = KNOT_EOK;
+
+done:
+ free(tmp_name);
+ if (file != NULL) {
+ fclose(file);
+ }
+ free(buf);
+ if (from != NULL) {
+ fclose(from);
+ }
+ return ret;
+}
diff --git a/src/contrib/files.h b/src/contrib/files.h
new file mode 100644
index 0000000..16a0f44
--- /dev/null
+++ b/src/contrib/files.h
@@ -0,0 +1,77 @@
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+/*!
+ * Gets the absolute path.
+ *
+ * \note The result must be explicitly deallocated.
+ *
+ * \param[in] path Absolute path or a relative path suffix; a string.
+ * \param[in] base_dir Path prefix for a relative string.
+ *
+ * \return Absolute path string pointer.
+ */
+char* abs_path(const char *path, const char *base_dir);
+
+/*!
+ * Try to compare two paths whether they are identical.
+ *
+ * \note If any of the two paths doesn't physically exist, their identity can't
+ * be detected in some special corner cases.
+ *
+ * \param[in] path1 Absolute or a relative path (a file, a directory, etc.)
+ * \param[in] path2 Absolute or a relative path (a file, a directory, etc.)
+ *
+ * \return True if both paths are identical (if they point to the same inode),
+ * false otherwise.
+ */
+bool same_path(const char *path1, const char *path2);
+
+/*!
+ * \brief Delete file or directory (recursive).
+ *
+ * \return true on success, false when one or more files failed to be removed.
+ */
+bool remove_path(const char *path);
+
+/*!
+ * Equivalent to mkdir(2), can succeed if the directory already exists.
+ */
+int make_dir(const char *path, mode_t mode, bool ignore_existing);
+
+/*!
+ * Makes a directory part of the path with all parent directories if not exist.
+ */
+int make_path(const char *path, mode_t mode);
+
+/*!
+ * Creates and opens for writing a temporary file based on given path.
+ */
+int open_tmp_file(const char *path, char **tmp_name, FILE **file, mode_t mode);
+
+/*!
+ * Copies a file, possibly overwriting existing one, as an atomic operation.
+ *
+ * \return KNOT_EOK on success, KNOT_EFILE if the source file doesn't exist,
+ * \or other KNOT_E* values in case of other errors.
+ */
+int copy_file(const char *dest, const char *src);
diff --git a/src/contrib/getline.c b/src/contrib/getline.c
new file mode 100644
index 0000000..46cdff9
--- /dev/null
+++ b/src/contrib/getline.c
@@ -0,0 +1,60 @@
+/* Copyright (C) 2011 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+// FreeBSD POSIX2008 getline
+#ifndef _WITH_GETLINE
+#define _WITH_GETLINE
+#endif
+
+#include "contrib/getline.h"
+
+#include <stdio.h> // getline or fgetln
+#include <stdlib.h> // free
+#include <string.h> // memcpy
+
+ssize_t knot_getline(char **lineptr, size_t *n, FILE *stream)
+{
+#ifdef HAVE_GETLINE
+ return getline(lineptr, n, stream);
+#else
+#ifdef HAVE_FGETLN
+ size_t length = 0;
+ char *buffer = fgetln(stream, &length);
+ if (buffer == NULL) {
+ return -1;
+ }
+
+ /* NOTE: Function fgetln doesn't return terminated string!
+ * Output buffer from the fgetln can't be freed.
+ */
+
+ // If the output buffer is not specified or is small, extend it.
+ if (*lineptr == NULL || *n <= length) {
+ char *tmp = realloc(*lineptr, length + 1);
+ if (tmp == NULL) {
+ return -1;
+ }
+ *lineptr = tmp;
+ *n = length + 1;
+ }
+
+ memcpy(*lineptr, buffer, length);
+ (*lineptr)[length] = '\0';
+
+ return length;
+#endif
+#endif
+}
diff --git a/src/contrib/getline.h b/src/contrib/getline.h
new file mode 100644
index 0000000..80f2db1
--- /dev/null
+++ b/src/contrib/getline.h
@@ -0,0 +1,39 @@
+/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief Multiplatform getline wrapper.
+ */
+
+#pragma once
+
+#include <stdio.h>
+#include <sys/types.h>
+
+/*!
+ * \brief Reads a line from a stream.
+ *
+ * This function has the same semantics as POSIX.1-2008 getline().
+ * If necessary, the output buffer will be allocated/reallocated.
+ *
+ * \param lineptr Output buffer.
+ * \param n Output buffer size.
+ * \param stream Input stream.
+ *
+ * \retval Number of characters read, including new line delimiter,
+ * not including terminating. -1 on error or EOF.
+ */
+ssize_t knot_getline(char **lineptr, size_t *n, FILE *stream);
diff --git a/src/contrib/json.c b/src/contrib/json.c
new file mode 100644
index 0000000..8b8bda4
--- /dev/null
+++ b/src/contrib/json.c
@@ -0,0 +1,237 @@
+/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "contrib/json.h"
+
+#include "contrib/string.h"
+
+#define MAX_DEPTH 8
+
+enum {
+ BLOCK_INVALID = 0,
+ BLOCK_OBJECT,
+ BLOCK_LIST,
+};
+
+/*! One indented block of JSON. */
+struct block {
+ /*! Block type. */
+ int type;
+ /*! Number of elements written. */
+ int count;
+};
+
+struct jsonw {
+ /*! Output file stream. */
+ FILE *out;
+ /*! Indentaiton string. */
+ const char *indent;
+ /*! List to be used as a stack of blocks in progress. */
+ struct block stack[MAX_DEPTH];
+ /*! Index pointing to the top of the stack. */
+ int top;
+ /*! Newline needed indication. */
+ bool wrap;
+};
+
+static const char *DEFAULT_INDENT = "\t";
+
+static void start_block(jsonw_t *w, int type)
+{
+ assert(w->top > 0);
+
+ struct block b = {
+ .type = type,
+ .count = 0,
+ };
+
+ w->top -= 1;
+ w->stack[w->top] = b;
+}
+
+static struct block *cur_block(jsonw_t *w)
+{
+ if (w && w->top < MAX_DEPTH) {
+ return &w->stack[w->top];
+ }
+ return NULL;
+}
+
+/*! Insert new line and indent for the next write. */
+static void wrap(jsonw_t *w)
+{
+ if (!w->wrap) {
+ w->wrap = true;
+ return;
+ }
+
+ fputc('\n', w->out);
+ int level = MAX_DEPTH - w->top;
+ for (int i = 0; i < level; i++) {
+ fprintf(w->out, "%s", w->indent);
+ }
+}
+
+static void end_block(jsonw_t *w)
+{
+ assert(w->top < MAX_DEPTH);
+
+ w->top += 1;
+}
+
+static void escaped_print(jsonw_t *w, const char *str)
+{
+ fputc('"', w->out);
+ for (const char *pos = str; *pos != '\0'; pos++) {
+ char c = *pos;
+ if (c == '\\' || c == '\"') {
+ fputc('\\', w->out);
+ }
+ fputc(c, w->out);
+ }
+ fputc('"', w->out);
+}
+
+static void align_key(jsonw_t *w, const char *key)
+{
+ struct block *top = cur_block(w);
+ if (top && top->count++) {
+ fputc(',', w->out);
+ }
+
+ wrap(w);
+
+ if (key && key[0]) {
+ escaped_print(w, key);
+ fprintf(w->out, ": ");
+ }
+}
+
+jsonw_t *jsonw_new(FILE *out, const char *indent)
+{
+ assert(out);
+
+ jsonw_t *w = calloc(1, sizeof(*w));
+ if (w == NULL) {
+ return w;
+ }
+
+ w->out = out;
+ w->indent = indent ? indent : DEFAULT_INDENT;
+ w->top = MAX_DEPTH;
+
+ return w;
+}
+
+void jsonw_free(jsonw_t **w)
+{
+ if (w == NULL) {
+ return;
+ }
+
+ wrap(*w);
+
+ free(*w);
+ *w = NULL;
+}
+
+void jsonw_object(jsonw_t *w, const char *key)
+{
+ assert(w);
+
+ align_key(w, key);
+ fprintf(w->out, "{");
+ start_block(w, BLOCK_OBJECT);
+}
+
+void jsonw_list(jsonw_t *w, const char *key)
+{
+ assert(w);
+
+ align_key(w, key);
+ fprintf(w->out, "[");
+ start_block(w, BLOCK_LIST);
+}
+
+void jsonw_str(jsonw_t *w, const char *key, const char *value)
+{
+ assert(w);
+
+ align_key(w, key);
+ escaped_print(w, value);
+}
+
+void jsonw_ulong(jsonw_t *w, const char *key, unsigned long value)
+{
+ assert(w);
+
+ align_key(w, key);
+ fprintf(w->out, "%lu", value);
+}
+
+void jsonw_int(jsonw_t *w, const char *key, int value)
+{
+ assert(w);
+
+ align_key(w, key);
+ fprintf(w->out, "%d", value);
+}
+
+
+void jsonw_bool(jsonw_t *w, const char *key, bool value)
+{
+ assert(w);
+
+ align_key(w, key);
+ fprintf(w->out, "%s", value ? "true" : "false");
+}
+
+void jsonw_hex(jsonw_t *w, const char *key, const uint8_t *data, size_t len)
+{
+ assert(w);
+
+ char *hex = bin_to_hex(data, len, true);
+ if (hex != NULL) {
+ jsonw_str(w, key, hex);
+ }
+ free(hex);
+}
+
+void jsonw_end(jsonw_t *w)
+{
+ assert(w);
+
+ struct block *top = cur_block(w);
+ if (top == NULL) {
+ return;
+ }
+
+ end_block(w);
+ wrap(w);
+
+ switch (top->type) {
+ case BLOCK_OBJECT:
+ fprintf(w->out, "}");
+ break;
+ case BLOCK_LIST:
+ fprintf(w->out, "]");
+ break;
+ }
+}
diff --git a/src/contrib/json.h b/src/contrib/json.h
new file mode 100644
index 0000000..983b329
--- /dev/null
+++ b/src/contrib/json.h
@@ -0,0 +1,87 @@
+/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+/*!
+ * Simple pretty JSON writer.
+ */
+struct jsonw;
+typedef struct jsonw jsonw_t;
+
+/*!
+ * Create new JSON writer.
+ *
+ * @param out Output file stream.
+ * @param indent Indentation string.
+ *
+ * @return JSON writer or NULL for allocation error.
+ */
+jsonw_t *jsonw_new(FILE *out, const char *indent);
+
+/*!
+ * Free JSON writer created with jsonw_new.
+ */
+void jsonw_free(jsonw_t **w);
+
+/*!
+ * Start writing a new object.
+ *
+ * The following writes will represent key and value pairs respectively until
+ * jsonw_end is called.
+ */
+void jsonw_object(jsonw_t *w, const char *key);
+
+/*!
+ * Start writing a new list.
+ *
+ * The following writes will represent values until jsonw_end is called.
+ */
+void jsonw_list(jsonw_t *w, const char *key);
+
+/*!
+ * Write string as JSON. The string will be escaped properly.
+ */
+void jsonw_str(jsonw_t *w, const char *key, const char *value);
+
+/*!
+ * Write unsigned long value as JSON.
+ */
+void jsonw_ulong(jsonw_t *w, const char *key, unsigned long value);
+
+/*!
+ * Write integer as JSON.
+ */
+void jsonw_int(jsonw_t *w, const char *key, int value);
+
+/*!
+ * Write boolean value as JSON.
+ */
+void jsonw_bool(jsonw_t *w, const char *key, bool value);
+
+/*!
+ * Write binary data encoded to HEX as JSON.
+ */
+void jsonw_hex(jsonw_t *w, const char *key, const uint8_t *data, size_t len);
+
+/*!
+ * Terminate in-progress object or list.
+ */
+void jsonw_end(jsonw_t *w);
diff --git a/src/contrib/libbpf/LICENSE b/src/contrib/libbpf/LICENSE
new file mode 100644
index 0000000..149c7b0
--- /dev/null
+++ b/src/contrib/libbpf/LICENSE
@@ -0,0 +1 @@
+../licenses/LGPL-2.1 \ No newline at end of file
diff --git a/src/contrib/libbpf/bpf/bpf.c b/src/contrib/libbpf/bpf/bpf.c
new file mode 100644
index 0000000..98596e1
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf.c
@@ -0,0 +1,710 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * common eBPF ELF operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <memory.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <errno.h>
+#include <linux/bpf.h>
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+/*
+ * When building perf, unistd.h is overridden. __NR_bpf is
+ * required to be defined explicitly.
+ */
+#ifndef __NR_bpf
+# if defined(__i386__)
+# define __NR_bpf 357
+# elif defined(__x86_64__)
+# define __NR_bpf 321
+# elif defined(__aarch64__)
+# define __NR_bpf 280
+# elif defined(__sparc__)
+# define __NR_bpf 349
+# elif defined(__s390__)
+# define __NR_bpf 351
+# elif defined(__arc__)
+# define __NR_bpf 280
+# else
+# error __NR_bpf not defined. libbpf does not support your arch.
+# endif
+#endif
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
+ unsigned int size)
+{
+ return syscall(__NR_bpf, cmd, attr, size);
+}
+
+static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
+{
+ int fd;
+
+ do {
+ fd = sys_bpf(BPF_PROG_LOAD, attr, size);
+ } while (fd < 0 && errno == EAGAIN);
+
+ return fd;
+}
+
+int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
+{
+ union bpf_attr attr;
+
+ memset(&attr, '\0', sizeof(attr));
+
+ attr.map_type = create_attr->map_type;
+ attr.key_size = create_attr->key_size;
+ attr.value_size = create_attr->value_size;
+ attr.max_entries = create_attr->max_entries;
+ attr.map_flags = create_attr->map_flags;
+ if (create_attr->name)
+ memcpy(attr.map_name, create_attr->name,
+ min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1));
+ attr.numa_node = create_attr->numa_node;
+ attr.btf_fd = create_attr->btf_fd;
+ attr.btf_key_type_id = create_attr->btf_key_type_id;
+ attr.btf_value_type_id = create_attr->btf_value_type_id;
+ attr.map_ifindex = create_attr->map_ifindex;
+ attr.inner_map_fd = create_attr->inner_map_fd;
+
+ return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size, int max_entries,
+ __u32 map_flags, int node)
+{
+ struct bpf_create_map_attr map_attr = {};
+
+ map_attr.name = name;
+ map_attr.map_type = map_type;
+ map_attr.map_flags = map_flags;
+ map_attr.key_size = key_size;
+ map_attr.value_size = value_size;
+ map_attr.max_entries = max_entries;
+ if (node >= 0) {
+ map_attr.numa_node = node;
+ map_attr.map_flags |= BPF_F_NUMA_NODE;
+ }
+
+ return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map(enum bpf_map_type map_type, int key_size,
+ int value_size, int max_entries, __u32 map_flags)
+{
+ struct bpf_create_map_attr map_attr = {};
+
+ map_attr.map_type = map_type;
+ map_attr.map_flags = map_flags;
+ map_attr.key_size = key_size;
+ map_attr.value_size = value_size;
+ map_attr.max_entries = max_entries;
+
+ return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size, int max_entries,
+ __u32 map_flags)
+{
+ struct bpf_create_map_attr map_attr = {};
+
+ map_attr.name = name;
+ map_attr.map_type = map_type;
+ map_attr.map_flags = map_flags;
+ map_attr.key_size = key_size;
+ map_attr.value_size = value_size;
+ map_attr.max_entries = max_entries;
+
+ return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int inner_map_fd, int max_entries,
+ __u32 map_flags, int node)
+{
+ union bpf_attr attr;
+
+ memset(&attr, '\0', sizeof(attr));
+
+ attr.map_type = map_type;
+ attr.key_size = key_size;
+ attr.value_size = 4;
+ attr.inner_map_fd = inner_map_fd;
+ attr.max_entries = max_entries;
+ attr.map_flags = map_flags;
+ if (name)
+ memcpy(attr.map_name, name,
+ min(strlen(name), BPF_OBJ_NAME_LEN - 1));
+
+ if (node >= 0) {
+ attr.map_flags |= BPF_F_NUMA_NODE;
+ attr.numa_node = node;
+ }
+
+ return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
+ int key_size, int inner_map_fd, int max_entries,
+ __u32 map_flags)
+{
+ return bpf_create_map_in_map_node(map_type, name, key_size,
+ inner_map_fd, max_entries, map_flags,
+ -1);
+}
+
+static void *
+alloc_zero_tailing_info(const void *orecord, __u32 cnt,
+ __u32 actual_rec_size, __u32 expected_rec_size)
+{
+ __u64 info_len = (__u64)actual_rec_size * cnt;
+ void *info, *nrecord;
+ int i;
+
+ info = malloc(info_len);
+ if (!info)
+ return NULL;
+
+ /* zero out bytes kernel does not understand */
+ nrecord = info;
+ for (i = 0; i < cnt; i++) {
+ memcpy(nrecord, orecord, expected_rec_size);
+ memset(nrecord + expected_rec_size, 0,
+ actual_rec_size - expected_rec_size);
+ orecord += actual_rec_size;
+ nrecord += actual_rec_size;
+ }
+
+ return info;
+}
+
+int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+ char *log_buf, size_t log_buf_sz)
+{
+ void *finfo = NULL, *linfo = NULL;
+ union bpf_attr attr;
+ __u32 log_level;
+ int fd;
+
+ if (!load_attr || !log_buf != !log_buf_sz)
+ return -EINVAL;
+
+ log_level = load_attr->log_level;
+ if (log_level > (4 | 2 | 1) || (log_level && !log_buf))
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = load_attr->prog_type;
+ attr.expected_attach_type = load_attr->expected_attach_type;
+ if (attr.prog_type == BPF_PROG_TYPE_TRACING) {
+ attr.attach_btf_id = load_attr->attach_btf_id;
+ attr.attach_prog_fd = load_attr->attach_prog_fd;
+ } else {
+ attr.prog_ifindex = load_attr->prog_ifindex;
+ attr.kern_version = load_attr->kern_version;
+ }
+ attr.insn_cnt = (__u32)load_attr->insns_cnt;
+ attr.insns = ptr_to_u64(load_attr->insns);
+ attr.license = ptr_to_u64(load_attr->license);
+
+ attr.log_level = log_level;
+ if (log_level) {
+ attr.log_buf = ptr_to_u64(log_buf);
+ attr.log_size = log_buf_sz;
+ } else {
+ attr.log_buf = ptr_to_u64(NULL);
+ attr.log_size = 0;
+ }
+
+ attr.prog_btf_fd = load_attr->prog_btf_fd;
+ attr.func_info_rec_size = load_attr->func_info_rec_size;
+ attr.func_info_cnt = load_attr->func_info_cnt;
+ attr.func_info = ptr_to_u64(load_attr->func_info);
+ attr.line_info_rec_size = load_attr->line_info_rec_size;
+ attr.line_info_cnt = load_attr->line_info_cnt;
+ attr.line_info = ptr_to_u64(load_attr->line_info);
+ if (load_attr->name)
+ memcpy(attr.prog_name, load_attr->name,
+ min(strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1));
+ attr.prog_flags = load_attr->prog_flags;
+
+ fd = sys_bpf_prog_load(&attr, sizeof(attr));
+ if (fd >= 0)
+ return fd;
+
+ /* After bpf_prog_load, the kernel may modify certain attributes
+ * to give user space a hint how to deal with loading failure.
+ * Check to see whether we can make some changes and load again.
+ */
+ while (errno == E2BIG && (!finfo || !linfo)) {
+ if (!finfo && attr.func_info_cnt &&
+ attr.func_info_rec_size < load_attr->func_info_rec_size) {
+ /* try with corrected func info records */
+ finfo = alloc_zero_tailing_info(load_attr->func_info,
+ load_attr->func_info_cnt,
+ load_attr->func_info_rec_size,
+ attr.func_info_rec_size);
+ if (!finfo)
+ goto done;
+
+ attr.func_info = ptr_to_u64(finfo);
+ attr.func_info_rec_size = load_attr->func_info_rec_size;
+ } else if (!linfo && attr.line_info_cnt &&
+ attr.line_info_rec_size <
+ load_attr->line_info_rec_size) {
+ linfo = alloc_zero_tailing_info(load_attr->line_info,
+ load_attr->line_info_cnt,
+ load_attr->line_info_rec_size,
+ attr.line_info_rec_size);
+ if (!linfo)
+ goto done;
+
+ attr.line_info = ptr_to_u64(linfo);
+ attr.line_info_rec_size = load_attr->line_info_rec_size;
+ } else {
+ break;
+ }
+
+ fd = sys_bpf_prog_load(&attr, sizeof(attr));
+
+ if (fd >= 0)
+ goto done;
+ }
+
+ if (log_level || !log_buf)
+ goto done;
+
+ /* Try again with log */
+ attr.log_buf = ptr_to_u64(log_buf);
+ attr.log_size = log_buf_sz;
+ attr.log_level = 1;
+ log_buf[0] = 0;
+ fd = sys_bpf_prog_load(&attr, sizeof(attr));
+done:
+ free(finfo);
+ free(linfo);
+ return fd;
+}
+
+int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, const char *license,
+ __u32 kern_version, char *log_buf,
+ size_t log_buf_sz)
+{
+ struct bpf_load_program_attr load_attr;
+
+ memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+ load_attr.prog_type = type;
+ load_attr.expected_attach_type = 0;
+ load_attr.name = NULL;
+ load_attr.insns = insns;
+ load_attr.insns_cnt = insns_cnt;
+ load_attr.license = license;
+ load_attr.kern_version = kern_version;
+
+ return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
+}
+
+int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, __u32 prog_flags, const char *license,
+ __u32 kern_version, char *log_buf, size_t log_buf_sz,
+ int log_level)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = type;
+ attr.insn_cnt = (__u32)insns_cnt;
+ attr.insns = ptr_to_u64(insns);
+ attr.license = ptr_to_u64(license);
+ attr.log_buf = ptr_to_u64(log_buf);
+ attr.log_size = log_buf_sz;
+ attr.log_level = log_level;
+ log_buf[0] = 0;
+ attr.kern_version = kern_version;
+ attr.prog_flags = prog_flags;
+
+ return sys_bpf_prog_load(&attr, sizeof(attr));
+}
+
+int bpf_map_update_elem(int fd, const void *key, const void *value,
+ __u64 flags)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+ attr.flags = flags;
+
+ return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_lookup_elem(int fd, const void *key, void *value)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+
+ return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+ attr.flags = flags;
+
+ return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+
+ return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_delete_elem(int fd, const void *key)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+
+ return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_get_next_key(int fd, const void *key, void *next_key)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.next_key = ptr_to_u64(next_key);
+
+ return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+}
+
+int bpf_map_freeze(int fd)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_fd = fd;
+
+ return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+}
+
+int bpf_obj_pin(int fd, const char *pathname)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.pathname = ptr_to_u64((void *)pathname);
+ attr.bpf_fd = fd;
+
+ return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+int bpf_obj_get(const char *pathname)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.pathname = ptr_to_u64((void *)pathname);
+
+ return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+}
+
+int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
+ unsigned int flags)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.target_fd = target_fd;
+ attr.attach_bpf_fd = prog_fd;
+ attr.attach_type = type;
+ attr.attach_flags = flags;
+
+ return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.target_fd = target_fd;
+ attr.attach_type = type;
+
+ return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.target_fd = target_fd;
+ attr.attach_bpf_fd = prog_fd;
+ attr.attach_type = type;
+
+ return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
+ __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
+{
+ union bpf_attr attr;
+ int ret;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.query.target_fd = target_fd;
+ attr.query.attach_type = type;
+ attr.query.query_flags = query_flags;
+ attr.query.prog_cnt = *prog_cnt;
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+
+ ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
+ if (attach_flags)
+ *attach_flags = attr.query.attach_flags;
+ *prog_cnt = attr.query.prog_cnt;
+ return ret;
+}
+
+int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
+ void *data_out, __u32 *size_out, __u32 *retval,
+ __u32 *duration)
+{
+ union bpf_attr attr;
+ int ret;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.test.prog_fd = prog_fd;
+ attr.test.data_in = ptr_to_u64(data);
+ attr.test.data_out = ptr_to_u64(data_out);
+ attr.test.data_size_in = size;
+ attr.test.repeat = repeat;
+
+ ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+ if (size_out)
+ *size_out = attr.test.data_size_out;
+ if (retval)
+ *retval = attr.test.retval;
+ if (duration)
+ *duration = attr.test.duration;
+ return ret;
+}
+
+int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
+{
+ union bpf_attr attr;
+ int ret;
+
+ if (!test_attr->data_out && test_attr->data_size_out > 0)
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.test.prog_fd = test_attr->prog_fd;
+ attr.test.data_in = ptr_to_u64(test_attr->data_in);
+ attr.test.data_out = ptr_to_u64(test_attr->data_out);
+ attr.test.data_size_in = test_attr->data_size_in;
+ attr.test.data_size_out = test_attr->data_size_out;
+ attr.test.ctx_in = ptr_to_u64(test_attr->ctx_in);
+ attr.test.ctx_out = ptr_to_u64(test_attr->ctx_out);
+ attr.test.ctx_size_in = test_attr->ctx_size_in;
+ attr.test.ctx_size_out = test_attr->ctx_size_out;
+ attr.test.repeat = test_attr->repeat;
+
+ ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+ test_attr->data_size_out = attr.test.data_size_out;
+ test_attr->ctx_size_out = attr.test.ctx_size_out;
+ test_attr->retval = attr.test.retval;
+ test_attr->duration = attr.test.duration;
+ return ret;
+}
+
+static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
+{
+ union bpf_attr attr;
+ int err;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.start_id = start_id;
+
+ err = sys_bpf(cmd, &attr, sizeof(attr));
+ if (!err)
+ *next_id = attr.next_id;
+
+ return err;
+}
+
+int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
+{
+ return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID);
+}
+
+int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
+{
+ return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID);
+}
+
+int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id)
+{
+ return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID);
+}
+
+int bpf_prog_get_fd_by_id(__u32 id)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_id = id;
+
+ return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_map_get_fd_by_id(__u32 id)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.map_id = id;
+
+ return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_btf_get_fd_by_id(__u32 id)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.btf_id = id;
+
+ return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
+{
+ union bpf_attr attr;
+ int err;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.info.bpf_fd = prog_fd;
+ attr.info.info_len = *info_len;
+ attr.info.info = ptr_to_u64(info);
+
+ err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+ if (!err)
+ *info_len = attr.info.info_len;
+
+ return err;
+}
+
+int bpf_raw_tracepoint_open(const char *name, int prog_fd)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.raw_tracepoint.name = ptr_to_u64(name);
+ attr.raw_tracepoint.prog_fd = prog_fd;
+
+ return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+}
+
+int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
+ bool do_log)
+{
+ union bpf_attr attr = {};
+ int fd;
+
+ attr.btf = ptr_to_u64(btf);
+ attr.btf_size = btf_size;
+
+retry:
+ if (do_log && log_buf && log_buf_size) {
+ attr.btf_log_level = 1;
+ attr.btf_log_size = log_buf_size;
+ attr.btf_log_buf = ptr_to_u64(log_buf);
+ }
+
+ fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
+ if (fd == -1 && !do_log && log_buf && log_buf_size) {
+ do_log = true;
+ goto retry;
+ }
+
+ return fd;
+}
+
+int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
+ __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
+ __u64 *probe_addr)
+{
+ union bpf_attr attr = {};
+ int err;
+
+ attr.task_fd_query.pid = pid;
+ attr.task_fd_query.fd = fd;
+ attr.task_fd_query.flags = flags;
+ attr.task_fd_query.buf = ptr_to_u64(buf);
+ attr.task_fd_query.buf_len = *buf_len;
+
+ err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+ *buf_len = attr.task_fd_query.buf_len;
+ *prog_id = attr.task_fd_query.prog_id;
+ *fd_type = attr.task_fd_query.fd_type;
+ *probe_offset = attr.task_fd_query.probe_offset;
+ *probe_addr = attr.task_fd_query.probe_addr;
+
+ return err;
+}
diff --git a/src/contrib/libbpf/bpf/bpf.h b/src/contrib/libbpf/bpf/bpf.h
new file mode 100644
index 0000000..3c791fa
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf.h
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * common eBPF ELF operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ */
+#ifndef __LIBBPF_BPF_H
+#define __LIBBPF_BPF_H
+
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef LIBBPF_API
+#define LIBBPF_API __attribute__((visibility("default")))
+#endif
+
+struct bpf_create_map_attr {
+ const char *name;
+ enum bpf_map_type map_type;
+ __u32 map_flags;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 numa_node;
+ __u32 btf_fd;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
+ __u32 map_ifindex;
+ __u32 inner_map_fd;
+};
+
+LIBBPF_API int
+bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
+LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size,
+ int max_entries, __u32 map_flags, int node);
+LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size,
+ int max_entries, __u32 map_flags);
+LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size,
+ int value_size, int max_entries, __u32 map_flags);
+LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type,
+ const char *name, int key_size,
+ int inner_map_fd, int max_entries,
+ __u32 map_flags, int node);
+LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type,
+ const char *name, int key_size,
+ int inner_map_fd, int max_entries,
+ __u32 map_flags);
+
+struct bpf_load_program_attr {
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ const char *name;
+ const struct bpf_insn *insns;
+ size_t insns_cnt;
+ const char *license;
+ union {
+ __u32 kern_version;
+ __u32 attach_prog_fd;
+ };
+ union {
+ __u32 prog_ifindex;
+ __u32 attach_btf_id;
+ };
+ __u32 prog_btf_fd;
+ __u32 func_info_rec_size;
+ const void *func_info;
+ __u32 func_info_cnt;
+ __u32 line_info_rec_size;
+ const void *line_info;
+ __u32 line_info_cnt;
+ __u32 log_level;
+ __u32 prog_flags;
+};
+
+/* Flags to direct loading requirements */
+#define MAPS_RELAX_COMPAT 0x01
+
+/* Recommend log buffer size */
+#define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */
+LIBBPF_API int
+bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+ char *log_buf, size_t log_buf_sz);
+LIBBPF_API int bpf_load_program(enum bpf_prog_type type,
+ const struct bpf_insn *insns, size_t insns_cnt,
+ const char *license, __u32 kern_version,
+ char *log_buf, size_t log_buf_sz);
+LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,
+ const struct bpf_insn *insns,
+ size_t insns_cnt, __u32 prog_flags,
+ const char *license, __u32 kern_version,
+ char *log_buf, size_t log_buf_sz,
+ int log_level);
+
+LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,
+ __u64 flags);
+
+LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
+LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value,
+ __u64 flags);
+LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
+ void *value);
+LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
+LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
+LIBBPF_API int bpf_map_freeze(int fd);
+LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
+LIBBPF_API int bpf_obj_get(const char *pathname);
+LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
+ enum bpf_attach_type type, unsigned int flags);
+LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
+LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
+ enum bpf_attach_type type);
+
+struct bpf_prog_test_run_attr {
+ int prog_fd;
+ int repeat;
+ const void *data_in;
+ __u32 data_size_in;
+ void *data_out; /* optional */
+ __u32 data_size_out; /* in: max length of data_out
+ * out: length of data_out */
+ __u32 retval; /* out: return code of the BPF program */
+ __u32 duration; /* out: average per repetition in ns */
+ const void *ctx_in; /* optional */
+ __u32 ctx_size_in;
+ void *ctx_out; /* optional */
+ __u32 ctx_size_out; /* in: max length of ctx_out
+ * out: length of cxt_out */
+};
+
+LIBBPF_API int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr);
+
+/*
+ * bpf_prog_test_run does not check that data_out is large enough. Consider
+ * using bpf_prog_test_run_xattr instead.
+ */
+LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
+ __u32 size, void *data_out, __u32 *size_out,
+ __u32 *retval, __u32 *duration);
+LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
+LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
+ __u32 query_flags, __u32 *attach_flags,
+ __u32 *prog_ids, __u32 *prog_cnt);
+LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
+LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
+ __u32 log_buf_size, bool do_log);
+LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
+ __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
+ __u64 *probe_offset, __u64 *probe_addr);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_BPF_H */
diff --git a/src/contrib/libbpf/bpf/bpf_core_read.h b/src/contrib/libbpf/bpf/bpf_core_read.h
new file mode 100644
index 0000000..7009dc9
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf_core_read.h
@@ -0,0 +1,263 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_CORE_READ_H__
+#define __BPF_CORE_READ_H__
+
+/*
+ * enum bpf_field_info_kind is passed as a second argument into
+ * __builtin_preserve_field_info() built-in to get a specific aspect of
+ * a field, captured as a first argument. __builtin_preserve_field_info(field,
+ * info_kind) returns __u32 integer and produces BTF field relocation, which
+ * is understood and processed by libbpf during BPF object loading. See
+ * selftests/bpf for examples.
+ */
+enum bpf_field_info_kind {
+ BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
+ BPF_FIELD_BYTE_SIZE = 1,
+ BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
+ BPF_FIELD_SIGNED = 3,
+ BPF_FIELD_LSHIFT_U64 = 4,
+ BPF_FIELD_RSHIFT_U64 = 5,
+};
+
+#define __CORE_RELO(src, field, info) \
+ __builtin_preserve_field_info((src)->field, BPF_FIELD_##info)
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
+ bpf_probe_read((void *)dst, \
+ __CORE_RELO(src, fld, BYTE_SIZE), \
+ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+#else
+/* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so
+ * for big-endian we need to adjust destination pointer accordingly, based on
+ * field byte size
+ */
+#define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \
+ bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \
+ __CORE_RELO(src, fld, BYTE_SIZE), \
+ (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET))
+#endif
+
+/*
+ * Extract bitfield, identified by s->field, and return its value as u64.
+ * All this is done in relocatable manner, so bitfield changes such as
+ * signedness, bit size, offset changes, this will be handled automatically.
+ * This version of macro is using bpf_probe_read() to read underlying integer
+ * storage. Macro functions as an expression and its return type is
+ * bpf_probe_read()'s return value: 0, on success, <0 on error.
+ */
+#define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \
+ unsigned long long val = 0; \
+ \
+ __CORE_BITFIELD_PROBE_READ(&val, s, field); \
+ val <<= __CORE_RELO(s, field, LSHIFT_U64); \
+ if (__CORE_RELO(s, field, SIGNED)) \
+ val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \
+ else \
+ val = val >> __CORE_RELO(s, field, RSHIFT_U64); \
+ val; \
+})
+
+/*
+ * Extract bitfield, identified by s->field, and return its value as u64.
+ * This version of macro is using direct memory reads and should be used from
+ * BPF program types that support such functionality (e.g., typed raw
+ * tracepoints).
+ */
+#define BPF_CORE_READ_BITFIELD(s, field) ({ \
+ const void *p = (const void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \
+ unsigned long long val; \
+ \
+ switch (__CORE_RELO(s, field, BYTE_SIZE)) { \
+ case 1: val = *(const unsigned char *)p; \
+ case 2: val = *(const unsigned short *)p; \
+ case 4: val = *(const unsigned int *)p; \
+ case 8: val = *(const unsigned long long *)p; \
+ } \
+ val <<= __CORE_RELO(s, field, LSHIFT_U64); \
+ if (__CORE_RELO(s, field, SIGNED)) \
+ val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \
+ else \
+ val = val >> __CORE_RELO(s, field, RSHIFT_U64); \
+ val; \
+})
+
+/*
+ * Convenience macro to check that field actually exists in target kernel's.
+ * Returns:
+ * 1, if matching field is present in target kernel;
+ * 0, if no matching field found.
+ */
+#define bpf_core_field_exists(field) \
+ __builtin_preserve_field_info(field, BPF_FIELD_EXISTS)
+
+/*
+ * Convenience macro to get byte size of a field. Works for integers,
+ * struct/unions, pointers, arrays, and enums.
+ */
+#define bpf_core_field_size(field) \
+ __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE)
+
+/*
+ * bpf_core_read() abstracts away bpf_probe_read() call and captures offset
+ * relocation for source address using __builtin_preserve_access_index()
+ * built-in, provided by Clang.
+ *
+ * __builtin_preserve_access_index() takes as an argument an expression of
+ * taking an address of a field within struct/union. It makes compiler emit
+ * a relocation, which records BTF type ID describing root struct/union and an
+ * accessor string which describes exact embedded field that was used to take
+ * an address. See detailed description of this relocation format and
+ * semantics in comments to struct bpf_field_reloc in libbpf_internal.h.
+ *
+ * This relocation allows libbpf to adjust BPF instruction to use correct
+ * actual field offset, based on target kernel BTF type that matches original
+ * (local) BTF, used to record relocation.
+ */
+#define bpf_core_read(dst, sz, src) \
+ bpf_probe_read(dst, sz, \
+ (const void *)__builtin_preserve_access_index(src))
+
+/*
+ * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str()
+ * additionally emitting BPF CO-RE field relocation for specified source
+ * argument.
+ */
+#define bpf_core_read_str(dst, sz, src) \
+ bpf_probe_read_str(dst, sz, \
+ (const void *)__builtin_preserve_access_index(src))
+
+#define ___concat(a, b) a ## b
+#define ___apply(fn, n) ___concat(fn, n)
+#define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N
+
+/*
+ * return number of provided arguments; used for switch-based variadic macro
+ * definitions (see ___last, ___arrow, etc below)
+ */
+#define ___narg(...) ___nth(_, ##__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+/*
+ * return 0 if no arguments are passed, N - otherwise; used for
+ * recursively-defined macros to specify termination (0) case, and generic
+ * (N) case (e.g., ___read_ptrs, ___core_read)
+ */
+#define ___empty(...) ___nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+
+#define ___last1(x) x
+#define ___last2(a, x) x
+#define ___last3(a, b, x) x
+#define ___last4(a, b, c, x) x
+#define ___last5(a, b, c, d, x) x
+#define ___last6(a, b, c, d, e, x) x
+#define ___last7(a, b, c, d, e, f, x) x
+#define ___last8(a, b, c, d, e, f, g, x) x
+#define ___last9(a, b, c, d, e, f, g, h, x) x
+#define ___last10(a, b, c, d, e, f, g, h, i, x) x
+#define ___last(...) ___apply(___last, ___narg(__VA_ARGS__))(__VA_ARGS__)
+
+#define ___nolast2(a, _) a
+#define ___nolast3(a, b, _) a, b
+#define ___nolast4(a, b, c, _) a, b, c
+#define ___nolast5(a, b, c, d, _) a, b, c, d
+#define ___nolast6(a, b, c, d, e, _) a, b, c, d, e
+#define ___nolast7(a, b, c, d, e, f, _) a, b, c, d, e, f
+#define ___nolast8(a, b, c, d, e, f, g, _) a, b, c, d, e, f, g
+#define ___nolast9(a, b, c, d, e, f, g, h, _) a, b, c, d, e, f, g, h
+#define ___nolast10(a, b, c, d, e, f, g, h, i, _) a, b, c, d, e, f, g, h, i
+#define ___nolast(...) ___apply(___nolast, ___narg(__VA_ARGS__))(__VA_ARGS__)
+
+#define ___arrow1(a) a
+#define ___arrow2(a, b) a->b
+#define ___arrow3(a, b, c) a->b->c
+#define ___arrow4(a, b, c, d) a->b->c->d
+#define ___arrow5(a, b, c, d, e) a->b->c->d->e
+#define ___arrow6(a, b, c, d, e, f) a->b->c->d->e->f
+#define ___arrow7(a, b, c, d, e, f, g) a->b->c->d->e->f->g
+#define ___arrow8(a, b, c, d, e, f, g, h) a->b->c->d->e->f->g->h
+#define ___arrow9(a, b, c, d, e, f, g, h, i) a->b->c->d->e->f->g->h->i
+#define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j
+#define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__)
+
+#define ___type(...) typeof(___arrow(__VA_ARGS__))
+
+#define ___read(read_fn, dst, src_type, src, accessor) \
+ read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor)
+
+/* "recursively" read a sequence of inner pointers using local __t var */
+#define ___rd_first(src, a) ___read(bpf_core_read, &__t, ___type(src), src, a);
+#define ___rd_last(...) \
+ ___read(bpf_core_read, &__t, \
+ ___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__));
+#define ___rd_p1(...) const void *__t; ___rd_first(__VA_ARGS__)
+#define ___rd_p2(...) ___rd_p1(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p3(...) ___rd_p2(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p4(...) ___rd_p3(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p5(...) ___rd_p4(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p6(...) ___rd_p5(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p7(...) ___rd_p6(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p8(...) ___rd_p7(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___rd_p9(...) ___rd_p8(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__)
+#define ___read_ptrs(src, ...) \
+ ___apply(___rd_p, ___narg(__VA_ARGS__))(src, __VA_ARGS__)
+
+#define ___core_read0(fn, dst, src, a) \
+ ___read(fn, dst, ___type(src), src, a);
+#define ___core_readN(fn, dst, src, ...) \
+ ___read_ptrs(src, ___nolast(__VA_ARGS__)) \
+ ___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t, \
+ ___last(__VA_ARGS__));
+#define ___core_read(fn, dst, src, a, ...) \
+ ___apply(___core_read, ___empty(__VA_ARGS__))(fn, dst, \
+ src, a, ##__VA_ARGS__)
+
+/*
+ * BPF_CORE_READ_INTO() is a more performance-conscious variant of
+ * BPF_CORE_READ(), in which final field is read into user-provided storage.
+ * See BPF_CORE_READ() below for more details on general usage.
+ */
+#define BPF_CORE_READ_INTO(dst, src, a, ...) \
+ ({ \
+ ___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__) \
+ })
+
+/*
+ * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as
+ * BPF_CORE_READ() for intermediate pointers, but then executes (and returns
+ * corresponding error code) bpf_core_read_str() for final string read.
+ */
+#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \
+ ({ \
+ ___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \
+ })
+
+/*
+ * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially
+ * when there are few pointer chasing steps.
+ * E.g., what in non-BPF world (or in BPF w/ BCC) would be something like:
+ * int x = s->a.b.c->d.e->f->g;
+ * can be succinctly achieved using BPF_CORE_READ as:
+ * int x = BPF_CORE_READ(s, a.b.c, d.e, f, g);
+ *
+ * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF
+ * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to:
+ * 1. const void *__t = s->a.b.c;
+ * 2. __t = __t->d.e;
+ * 3. __t = __t->f;
+ * 4. return __t->g;
+ *
+ * Equivalence is logical, because there is a heavy type casting/preservation
+ * involved, as well as all the reads are happening through bpf_probe_read()
+ * calls using __builtin_preserve_access_index() to emit CO-RE relocations.
+ *
+ * N.B. Only up to 9 "field accessors" are supported, which should be more
+ * than enough for any practical purpose.
+ */
+#define BPF_CORE_READ(src, a, ...) \
+ ({ \
+ ___type(src, a, ##__VA_ARGS__) __r; \
+ BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__); \
+ __r; \
+ })
+
+#endif
+
diff --git a/src/contrib/libbpf/bpf/bpf_endian.h b/src/contrib/libbpf/bpf/bpf_endian.h
new file mode 100644
index 0000000..fbe2800
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf_endian.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_ENDIAN__
+#define __BPF_ENDIAN__
+
+#include <linux/stddef.h>
+#include <linux/swab.h>
+
+/* LLVM's BPF target selects the endianness of the CPU
+ * it compiles on, or the user specifies (bpfel/bpfeb),
+ * respectively. The used __BYTE_ORDER__ is defined by
+ * the compiler, we cannot rely on __BYTE_ORDER from
+ * libc headers, since it doesn't reflect the actual
+ * requested byte order.
+ *
+ * Note, LLVM's BPF target has different __builtin_bswapX()
+ * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
+ * in bpfel and bpfeb case, which means below, that we map
+ * to cpu_to_be16(). We could use it unconditionally in BPF
+ * case, but better not rely on it, so that this header here
+ * can be used from application and BPF program side, which
+ * use different targets.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define __bpf_ntohs(x) __builtin_bswap16(x)
+# define __bpf_htons(x) __builtin_bswap16(x)
+# define __bpf_constant_ntohs(x) ___constant_swab16(x)
+# define __bpf_constant_htons(x) ___constant_swab16(x)
+# define __bpf_ntohl(x) __builtin_bswap32(x)
+# define __bpf_htonl(x) __builtin_bswap32(x)
+# define __bpf_constant_ntohl(x) ___constant_swab32(x)
+# define __bpf_constant_htonl(x) ___constant_swab32(x)
+# define __bpf_be64_to_cpu(x) __builtin_bswap64(x)
+# define __bpf_cpu_to_be64(x) __builtin_bswap64(x)
+# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x)
+# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# define __bpf_ntohs(x) (x)
+# define __bpf_htons(x) (x)
+# define __bpf_constant_ntohs(x) (x)
+# define __bpf_constant_htons(x) (x)
+# define __bpf_ntohl(x) (x)
+# define __bpf_htonl(x) (x)
+# define __bpf_constant_ntohl(x) (x)
+# define __bpf_constant_htonl(x) (x)
+# define __bpf_be64_to_cpu(x) (x)
+# define __bpf_cpu_to_be64(x) (x)
+# define __bpf_constant_be64_to_cpu(x) (x)
+# define __bpf_constant_cpu_to_be64(x) (x)
+#else
+# error "Fix your compiler's __BYTE_ORDER__?!"
+#endif
+
+#define bpf_htons(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_htons(x) : __bpf_htons(x))
+#define bpf_ntohs(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_ntohs(x) : __bpf_ntohs(x))
+#define bpf_htonl(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_htonl(x) : __bpf_htonl(x))
+#define bpf_ntohl(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_ntohl(x) : __bpf_ntohl(x))
+#define bpf_cpu_to_be64(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x))
+#define bpf_be64_to_cpu(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x))
+
+#endif /* __BPF_ENDIAN__ */
diff --git a/src/contrib/libbpf/bpf/bpf_helper_defs.h b/src/contrib/libbpf/bpf/bpf_helper_defs.h
new file mode 100644
index 0000000..1f357f6
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf_helper_defs.h
@@ -0,0 +1,2759 @@
+/* This is auto-generated file. See bpf_helpers_doc.py for details. */
+
+/* Forward declarations of BPF structs */
+struct bpf_fib_lookup;
+struct bpf_perf_event_data;
+struct bpf_perf_event_value;
+struct bpf_sock;
+struct bpf_sock_addr;
+struct bpf_sock_ops;
+struct bpf_sock_tuple;
+struct bpf_spin_lock;
+struct bpf_sysctl;
+struct bpf_tcp_sock;
+struct bpf_tunnel_key;
+struct bpf_xfrm_state;
+struct pt_regs;
+struct sk_reuseport_md;
+struct sockaddr;
+struct tcphdr;
+struct __sk_buff;
+struct sk_msg_md;
+struct xdp_md;
+
+/*
+ * bpf_map_lookup_elem
+ *
+ * Perform a lookup in *map* for an entry associated to *key*.
+ *
+ * Returns
+ * Map value associated to *key*, or **NULL** if no entry was
+ * found.
+ */
+static void *(*bpf_map_lookup_elem)(void *map, const void *key) = (void *) 1;
+
+/*
+ * bpf_map_update_elem
+ *
+ * Add or update the value of the entry associated to *key* in
+ * *map* with *value*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * Flag value **BPF_NOEXIST** cannot be used for maps of types
+ * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all
+ * elements always exist), the helper would return an error.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_map_update_elem)(void *map, const void *key, const void *value, __u64 flags) = (void *) 2;
+
+/*
+ * bpf_map_delete_elem
+ *
+ * Delete entry with *key* from *map*.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_map_delete_elem)(void *map, const void *key) = (void *) 3;
+
+/*
+ * bpf_probe_read
+ *
+ * For tracing programs, safely attempt to read *size* bytes from
+ * kernel space address *unsafe_ptr* and store the data in *dst*.
+ *
+ * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
+ * instead.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_probe_read)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 4;
+
+/*
+ * bpf_ktime_get_ns
+ *
+ * Return the time elapsed since system boot, in nanoseconds.
+ *
+ * Returns
+ * Current *ktime*.
+ */
+static __u64 (*bpf_ktime_get_ns)(void) = (void *) 5;
+
+/*
+ * bpf_trace_printk
+ *
+ * This helper is a "printk()-like" facility for debugging. It
+ * prints a message defined by format *fmt* (of size *fmt_size*)
+ * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * available. It can take up to three additional **u64**
+ * arguments (as an eBPF helpers, the total number of arguments is
+ * limited to five).
+ *
+ * Each time the helper is called, it appends a line to the trace.
+ * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
+ * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
+ * The format of the trace is customizable, and the exact output
+ * one will get depends on the options set in
+ * *\/sys/kernel/debug/tracing/trace_options* (see also the
+ * *README* file under the same directory). However, it usually
+ * defaults to something like:
+ *
+ * ::
+ *
+ * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ * In the above:
+ *
+ * * ``telnet`` is the name of the current task.
+ * * ``470`` is the PID of the current task.
+ * * ``001`` is the CPU number on which the task is
+ * running.
+ * * In ``.N..``, each character refers to a set of
+ * options (whether irqs are enabled, scheduling
+ * options, whether hard/softirqs are running, level of
+ * preempt_disabled respectively). **N** means that
+ * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ * are set.
+ * * ``419421.045894`` is a timestamp.
+ * * ``0x00000001`` is a fake value used by BPF for the
+ * instruction pointer register.
+ * * ``<formatted msg>`` is the message formatted with
+ * *fmt*.
+ *
+ * The conversion specifiers supported by *fmt* are similar, but
+ * more limited than for printk(). They are **%d**, **%i**,
+ * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ * of field, padding with zeroes, etc.) is available, and the
+ * helper will return **-EINVAL** (but print nothing) if it
+ * encounters an unknown specifier.
+ *
+ * Also, note that **bpf_trace_printk**\ () is slow, and should
+ * only be used for debugging purposes. For this reason, a notice
+ * bloc (spanning several lines) is printed to kernel logs and
+ * states that the helper should not be used "for production use"
+ * the first time this helper is used (or more precisely, when
+ * **trace_printk**\ () buffers are allocated). For passing values
+ * to user space, perf events should be preferred.
+ *
+ * Returns
+ * The number of bytes written to the buffer, or a negative error
+ * in case of failure.
+ */
+static int (*bpf_trace_printk)(const char *fmt, __u32 fmt_size, ...) = (void *) 6;
+
+/*
+ * bpf_get_prandom_u32
+ *
+ * Get a pseudo-random number.
+ *
+ * From a security point of view, this helper uses its own
+ * pseudo-random internal state, and cannot be used to infer the
+ * seed of other random functions in the kernel. However, it is
+ * essential to note that the generator used by the helper is not
+ * cryptographically secure.
+ *
+ * Returns
+ * A random 32-bit unsigned value.
+ */
+static __u32 (*bpf_get_prandom_u32)(void) = (void *) 7;
+
+/*
+ * bpf_get_smp_processor_id
+ *
+ * Get the SMP (symmetric multiprocessing) processor id. Note that
+ * all programs run with preemption disabled, which means that the
+ * SMP processor id is stable during all the execution of the
+ * program.
+ *
+ * Returns
+ * The SMP id of the processor running the program.
+ */
+static __u32 (*bpf_get_smp_processor_id)(void) = (void *) 8;
+
+/*
+ * bpf_skb_store_bytes
+ *
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. *flags* are a combination of
+ * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ * checksum for the packet after storing the bytes) and
+ * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ * **->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_store_bytes)(struct __sk_buff *skb, __u32 offset, const void *from, __u32 len, __u64 flags) = (void *) 9;
+
+/*
+ * bpf_l3_csum_replace
+ *
+ * Recompute the layer 3 (e.g. IP) checksum for the packet
+ * associated to *skb*. Computation is incremental, so the helper
+ * must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored in *size*.
+ * Alternatively, it is possible to store the difference between
+ * the previous and the new values of the header field in *to*, by
+ * setting *from* and *size* to 0. For both methods, *offset*
+ * indicates the location of the IP checksum within the packet.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_l3_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 from, __u64 to, __u64 size) = (void *) 10;
+
+/*
+ * bpf_l4_csum_replace
+ *
+ * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ * packet associated to *skb*. Computation is incremental, so the
+ * helper must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored on the lowest
+ * four bits of *flags*. Alternatively, it is possible to store
+ * the difference between the previous and the new values of the
+ * header field in *to*, by setting *from* and the four lowest
+ * bits of *flags* to 0. For both methods, *offset* indicates the
+ * location of the IP checksum within the packet. In addition to
+ * the size of the field, *flags* can be added (bitwise OR) actual
+ * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ * for updates resulting in a null checksum the value is set to
+ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ * the checksum is to be computed against a pseudo-header.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_l4_csum_replace)(struct __sk_buff *skb, __u32 offset, __u64 from, __u64 to, __u64 flags) = (void *) 11;
+
+/*
+ * bpf_tail_call
+ *
+ * This special helper is used to trigger a "tail call", or in
+ * other words, to jump into another eBPF program. The same stack
+ * frame is used (but values on stack and in registers for the
+ * caller are not accessible to the callee). This mechanism allows
+ * for program chaining, either for raising the maximum number of
+ * available eBPF instructions, or to execute given programs in
+ * conditional blocks. For security reasons, there is an upper
+ * limit to the number of successive tail calls that can be
+ * performed.
+ *
+ * Upon call of this helper, the program attempts to jump into a
+ * program referenced at index *index* in *prog_array_map*, a
+ * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ * *ctx*, a pointer to the context.
+ *
+ * If the call succeeds, the kernel immediately runs the first
+ * instruction of the new program. This is not a function call,
+ * and it never returns to the previous program. If the call
+ * fails, then the helper has no effect, and the caller continues
+ * to run its subsequent instructions. A call can fail if the
+ * destination program for the jump does not exist (i.e. *index*
+ * is superior to the number of entries in *prog_array_map*), or
+ * if the maximum number of tail calls has been reached for this
+ * chain of programs. This limit is defined in the kernel by the
+ * macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ * which is currently set to 32.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_tail_call)(void *ctx, void *prog_array_map, __u32 index) = (void *) 12;
+
+/*
+ * bpf_clone_redirect
+ *
+ * Clone and redirect the packet associated to *skb* to another
+ * net device of index *ifindex*. Both ingress and egress
+ * interfaces can be used for redirection. The **BPF_F_INGRESS**
+ * value in *flags* is used to make the distinction (ingress path
+ * is selected if the flag is present, egress path otherwise).
+ * This is the only flag supported for now.
+ *
+ * In comparison with **bpf_redirect**\ () helper,
+ * **bpf_clone_redirect**\ () has the associated cost of
+ * duplicating the packet buffer, but this can be executed out of
+ * the eBPF program. Conversely, **bpf_redirect**\ () is more
+ * efficient, but it is handled through an action code where the
+ * redirection happens only after the eBPF program has returned.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_clone_redirect)(struct __sk_buff *skb, __u32 ifindex, __u64 flags) = (void *) 13;
+
+/*
+ * bpf_get_current_pid_tgid
+ *
+ *
+ * Returns
+ * A 64-bit integer containing the current tgid and pid, and
+ * created as such:
+ * *current_task*\ **->tgid << 32 \|**
+ * *current_task*\ **->pid**.
+ */
+static __u64 (*bpf_get_current_pid_tgid)(void) = (void *) 14;
+
+/*
+ * bpf_get_current_uid_gid
+ *
+ *
+ * Returns
+ * A 64-bit integer containing the current GID and UID, and
+ * created as such: *current_gid* **<< 32 \|** *current_uid*.
+ */
+static __u64 (*bpf_get_current_uid_gid)(void) = (void *) 15;
+
+/*
+ * bpf_get_current_comm
+ *
+ * Copy the **comm** attribute of the current task into *buf* of
+ * *size_of_buf*. The **comm** attribute contains the name of
+ * the executable (excluding the path) for the current task. The
+ * *size_of_buf* must be strictly positive. On success, the
+ * helper makes sure that the *buf* is NUL-terminated. On failure,
+ * it is filled with zeroes.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_get_current_comm)(void *buf, __u32 size_of_buf) = (void *) 16;
+
+/*
+ * bpf_get_cgroup_classid
+ *
+ * Retrieve the classid for the current task, i.e. for the net_cls
+ * cgroup to which *skb* belongs.
+ *
+ * This helper can be used on TC egress path, but not on ingress.
+ *
+ * The net_cls cgroup provides an interface to tag network packets
+ * based on a user-provided identifier for all traffic coming from
+ * the tasks belonging to the related cgroup. See also the related
+ * kernel documentation, available from the Linux sources in file
+ * *Documentation/admin-guide/cgroup-v1/net_cls.rst*.
+ *
+ * The Linux kernel has two versions for cgroups: there are
+ * cgroups v1 and cgroups v2. Both are available to users, who can
+ * use a mixture of them, but note that the net_cls cgroup is for
+ * cgroup v1 only. This makes it incompatible with BPF programs
+ * run on cgroups, which is a cgroup-v2-only feature (a socket can
+ * only hold data for one version of cgroups at a time).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ * "**y**" or to "**m**".
+ *
+ * Returns
+ * The classid, or 0 for the default unconfigured classid.
+ */
+static __u32 (*bpf_get_cgroup_classid)(struct __sk_buff *skb) = (void *) 17;
+
+/*
+ * bpf_skb_vlan_push
+ *
+ * Push a *vlan_tci* (VLAN tag control information) of protocol
+ * *vlan_proto* to the packet associated to *skb*, then update
+ * the checksum. Note that if *vlan_proto* is different from
+ * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ * be **ETH_P_8021Q**.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_vlan_push)(struct __sk_buff *skb, __be16 vlan_proto, __u16 vlan_tci) = (void *) 18;
+
+/*
+ * bpf_skb_vlan_pop
+ *
+ * Pop a VLAN header from the packet associated to *skb*.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_vlan_pop)(struct __sk_buff *skb) = (void *) 19;
+
+/*
+ * bpf_skb_get_tunnel_key
+ *
+ * Get tunnel metadata. This helper takes a pointer *key* to an
+ * empty **struct bpf_tunnel_key** of **size**, that will be
+ * filled with tunnel metadata for the packet associated to *skb*.
+ * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ * indicates that the tunnel is based on IPv6 protocol instead of
+ * IPv4.
+ *
+ * The **struct bpf_tunnel_key** is an object that generalizes the
+ * principal parameters used by various tunneling protocols into a
+ * single struct. This way, it can be used to easily make a
+ * decision based on the contents of the encapsulation header,
+ * "summarized" in this struct. In particular, it holds the IP
+ * address of the remote end (IPv4 or IPv6, depending on the case)
+ * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ * this struct exposes the *key*\ **->tunnel_id**, which is
+ * generally mapped to a VNI (Virtual Network Identifier), making
+ * it programmable together with the **bpf_skb_set_tunnel_key**\
+ * () helper.
+ *
+ * Let's imagine that the following code is part of a program
+ * attached to the TC ingress interface, on one end of a GRE
+ * tunnel, and is supposed to filter out all messages coming from
+ * remote ends with IPv4 address other than 10.0.0.1:
+ *
+ * ::
+ *
+ * int ret;
+ * struct bpf_tunnel_key key = {};
+ *
+ * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ * if (ret < 0)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * if (key.remote_ipv4 != 0x0a000001)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * return TC_ACT_OK; // accept packet
+ *
+ * This interface can also be used with all encapsulation devices
+ * that can operate in "collect metadata" mode: instead of having
+ * one network device per specific configuration, the "collect
+ * metadata" mode only requires a single device where the
+ * configuration can be extracted from this helper.
+ *
+ * This can be used together with various tunnels such as VXLan,
+ * Geneve, GRE or IP in IP (IPIP).
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_get_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_key *key, __u32 size, __u64 flags) = (void *) 20;
+
+/*
+ * bpf_skb_set_tunnel_key
+ *
+ * Populate tunnel metadata for packet associated to *skb.* The
+ * tunnel metadata is set to the contents of *key*, of *size*. The
+ * *flags* can be set to a combination of the following values:
+ *
+ * **BPF_F_TUNINFO_IPV6**
+ * Indicate that the tunnel is based on IPv6 protocol
+ * instead of IPv4.
+ * **BPF_F_ZERO_CSUM_TX**
+ * For IPv4 packets, add a flag to tunnel metadata
+ * indicating that checksum computation should be skipped
+ * and checksum set to zeroes.
+ * **BPF_F_DONT_FRAGMENT**
+ * Add a flag to tunnel metadata indicating that the
+ * packet should not be fragmented.
+ * **BPF_F_SEQ_NUMBER**
+ * Add a flag to tunnel metadata indicating that a
+ * sequence number should be added to tunnel header before
+ * sending the packet. This flag was added for GRE
+ * encapsulation, but might be used with other protocols
+ * as well in the future.
+ *
+ * Here is a typical usage on the transmit path:
+ *
+ * ::
+ *
+ * struct bpf_tunnel_key key;
+ * populate key ...
+ * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ * See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ * helper for additional information.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_set_tunnel_key)(struct __sk_buff *skb, struct bpf_tunnel_key *key, __u32 size, __u64 flags) = (void *) 21;
+
+/*
+ * bpf_perf_event_read
+ *
+ * Read the value of a perf event counter. This helper relies on a
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ * the perf event counter is selected when *map* is updated with
+ * perf event file descriptors. The *map* is an array whose size
+ * is the number of available CPUs, and each cell contains a value
+ * relative to one CPU. The value to retrieve is indicated by
+ * *flags*, that contains the index of the CPU to look up, masked
+ * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * Note that before Linux 4.13, only hardware perf event can be
+ * retrieved.
+ *
+ * Also, be aware that the newer helper
+ * **bpf_perf_event_read_value**\ () is recommended over
+ * **bpf_perf_event_read**\ () in general. The latter has some ABI
+ * quirks where error and counter value are used as a return code
+ * (which is wrong to do since ranges may overlap). This issue is
+ * fixed with **bpf_perf_event_read_value**\ (), which at the same
+ * time provides more features over the **bpf_perf_event_read**\
+ * () interface. Please refer to the description of
+ * **bpf_perf_event_read_value**\ () for details.
+ *
+ * Returns
+ * The value of the perf event counter read from the map, or a
+ * negative error code in case of failure.
+ */
+static __u64 (*bpf_perf_event_read)(void *map, __u64 flags) = (void *) 22;
+
+/*
+ * bpf_redirect
+ *
+ * Redirect the packet to another net device of index *ifindex*.
+ * This helper is somewhat similar to **bpf_clone_redirect**\
+ * (), except that the packet is not cloned, which provides
+ * increased performance.
+ *
+ * Except for XDP, both ingress and egress interfaces can be used
+ * for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ * to make the distinction (ingress path is selected if the flag
+ * is present, egress path otherwise). Currently, XDP only
+ * supports redirection to the egress interface, and accepts no
+ * flag at all.
+ *
+ * The same effect can be attained with the more generic
+ * **bpf_redirect_map**\ (), which requires specific maps to be
+ * used but offers better performance.
+ *
+ * Returns
+ * For XDP, the helper returns **XDP_REDIRECT** on success or
+ * **XDP_ABORTED** on error. For other program types, the values
+ * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ * error.
+ */
+static int (*bpf_redirect)(__u32 ifindex, __u64 flags) = (void *) 23;
+
+/*
+ * bpf_get_route_realm
+ *
+ * Retrieve the realm or the route, that is to say the
+ * **tclassid** field of the destination for the *skb*. The
+ * indentifier retrieved is a user-provided tag, similar to the
+ * one used with the net_cls cgroup (see description for
+ * **bpf_get_cgroup_classid**\ () helper), but here this tag is
+ * held by a route (a destination entry), not by a task.
+ *
+ * Retrieving this identifier works with the clsact TC egress hook
+ * (see also **tc-bpf(8)**), or alternatively on conventional
+ * classful egress qdiscs, but not on TC ingress path. In case of
+ * clsact TC egress hook, this has the advantage that, internally,
+ * the destination entry has not been dropped yet in the transmit
+ * path. Therefore, the destination entry does not need to be
+ * artificially held via **netif_keep_dst**\ () for a classful
+ * qdisc until the *skb* is freed.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_IP_ROUTE_CLASSID** configuration option.
+ *
+ * Returns
+ * The realm of the route for the packet associated to *skb*, or 0
+ * if none was found.
+ */
+static __u32 (*bpf_get_route_realm)(struct __sk_buff *skb) = (void *) 24;
+
+/*
+ * bpf_perf_event_output
+ *
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * The context of the program *ctx* needs also be passed to the
+ * helper.
+ *
+ * On user space, a program willing to read the values needs to
+ * call **perf_event_open**\ () on the perf event (either for
+ * one or for all CPUs) and to store the file descriptor into the
+ * *map*. This must be done before the eBPF program can send data
+ * into it. An example is available in file
+ * *samples/bpf/trace_output_user.c* in the Linux kernel source
+ * tree (the eBPF program counterpart is in
+ * *samples/bpf/trace_output_kern.c*).
+ *
+ * **bpf_perf_event_output**\ () achieves better performance
+ * than **bpf_trace_printk**\ () for sharing data with user
+ * space, and is much better suitable for streaming data from eBPF
+ * programs.
+ *
+ * Note that this helper is not restricted to tracing use cases
+ * and can be used with programs attached to TC or XDP as well,
+ * where it allows for passing data to user space listeners. Data
+ * can be:
+ *
+ * * Only custom structs,
+ * * Only the packet payload, or
+ * * A combination of both.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_perf_event_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 25;
+
+/*
+ * bpf_skb_load_bytes
+ *
+ * This helper was provided as an easy way to load data from a
+ * packet. It can be used to load *len* bytes from *offset* from
+ * the packet associated to *skb*, into the buffer pointed by
+ * *to*.
+ *
+ * Since Linux 4.7, usage of this helper has mostly been replaced
+ * by "direct packet access", enabling packet data to be
+ * manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ * pointing respectively to the first byte of packet data and to
+ * the byte after the last byte of packet data. However, it
+ * remains useful if one wishes to read large quantities of data
+ * at once from a packet into the eBPF stack.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_load_bytes)(const void *skb, __u32 offset, void *to, __u32 len) = (void *) 26;
+
+/*
+ * bpf_get_stackid
+ *
+ * Walk a user or a kernel stack and return its id. To achieve
+ * this, the helper needs *ctx*, which is a pointer to the context
+ * on which the tracing program is executed, and a pointer to a
+ * *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * a combination of the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_FAST_STACK_CMP**
+ * Compare stacks by hash only.
+ * **BPF_F_REUSE_STACKID**
+ * If two different stacks hash into the same *stackid*,
+ * discard the old one.
+ *
+ * The stack id retrieved is a 32 bit long integer handle which
+ * can be further combined with other data (including other stack
+ * ids) and used as a key into maps. This can be useful for
+ * generating a variety of graphs (such as flame graphs or off-cpu
+ * graphs).
+ *
+ * For walking a stack, this helper is an improvement over
+ * **bpf_probe_read**\ (), which can be used with unrolled loops
+ * but is not efficient and consumes a lot of eBPF instructions.
+ * Instead, **bpf_get_stackid**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ *
+ * Returns
+ * The positive or null stack id on success, or a negative error
+ * in case of failure.
+ */
+static int (*bpf_get_stackid)(void *ctx, void *map, __u64 flags) = (void *) 27;
+
+/*
+ * bpf_csum_diff
+ *
+ * Compute a checksum difference, from the raw buffer pointed by
+ * *from*, of length *from_size* (that must be a multiple of 4),
+ * towards the raw buffer pointed by *to*, of size *to_size*
+ * (same remark). An optional *seed* can be added to the value
+ * (this can be cascaded, the seed may come from a previous call
+ * to the helper).
+ *
+ * This is flexible enough to be used in several ways:
+ *
+ * * With *from_size* == 0, *to_size* > 0 and *seed* set to
+ * checksum, it can be used when pushing new data.
+ * * With *from_size* > 0, *to_size* == 0 and *seed* set to
+ * checksum, it can be used when removing data from a packet.
+ * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ * can be used to compute a diff. Note that *from_size* and
+ * *to_size* do not need to be equal.
+ *
+ * This helper can be used in combination with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ * which one can feed in the difference computed with
+ * **bpf_csum_diff**\ ().
+ *
+ * Returns
+ * The checksum result, or a negative error code in case of
+ * failure.
+ */
+static __s64 (*bpf_csum_diff)(__be32 *from, __u32 from_size, __be32 *to, __u32 to_size, __wsum seed) = (void *) 28;
+
+/*
+ * bpf_skb_get_tunnel_opt
+ *
+ * Retrieve tunnel options metadata for the packet associated to
+ * *skb*, and store the raw tunnel option data to the buffer *opt*
+ * of *size*.
+ *
+ * This helper can be used with encapsulation devices that can
+ * operate in "collect metadata" mode (please refer to the related
+ * note in the description of **bpf_skb_get_tunnel_key**\ () for
+ * more details). A particular example where this can be used is
+ * in combination with the Geneve encapsulation protocol, where it
+ * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ * and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ * the eBPF program. This allows for full customization of these
+ * headers.
+ *
+ * Returns
+ * The size of the option data retrieved.
+ */
+static int (*bpf_skb_get_tunnel_opt)(struct __sk_buff *skb, void *opt, __u32 size) = (void *) 29;
+
+/*
+ * bpf_skb_set_tunnel_opt
+ *
+ * Set tunnel options metadata for the packet associated to *skb*
+ * to the option data contained in the raw buffer *opt* of *size*.
+ *
+ * See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ * helper for additional information.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_set_tunnel_opt)(struct __sk_buff *skb, void *opt, __u32 size) = (void *) 30;
+
+/*
+ * bpf_skb_change_proto
+ *
+ * Change the protocol of the *skb* to *proto*. Currently
+ * supported are transition from IPv4 to IPv6, and from IPv6 to
+ * IPv4. The helper takes care of the groundwork for the
+ * transition, including resizing the socket buffer. The eBPF
+ * program is expected to fill the new headers, if any, via
+ * **skb_store_bytes**\ () and to recompute the checksums with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ * (). The main case for this helper is to perform NAT64
+ * operations out of an eBPF program.
+ *
+ * Internally, the GSO type is marked as dodgy so that headers are
+ * checked and segments are recalculated by the GSO/GRO engine.
+ * The size for GSO target is adapted as well.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_change_proto)(struct __sk_buff *skb, __be16 proto, __u64 flags) = (void *) 31;
+
+/*
+ * bpf_skb_change_type
+ *
+ * Change the packet type for the packet associated to *skb*. This
+ * comes down to setting *skb*\ **->pkt_type** to *type*, except
+ * the eBPF program does not have a write access to *skb*\
+ * **->pkt_type** beside this helper. Using a helper here allows
+ * for graceful handling of errors.
+ *
+ * The major use case is to change incoming *skb*s to
+ * **PACKET_HOST** in a programmatic way instead of having to
+ * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ * example.
+ *
+ * Note that *type* only allows certain values. At this time, they
+ * are:
+ *
+ * **PACKET_HOST**
+ * Packet is for us.
+ * **PACKET_BROADCAST**
+ * Send packet to all.
+ * **PACKET_MULTICAST**
+ * Send packet to group.
+ * **PACKET_OTHERHOST**
+ * Send packet to someone else.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_change_type)(struct __sk_buff *skb, __u32 type) = (void *) 32;
+
+/*
+ * bpf_skb_under_cgroup
+ *
+ * Check whether *skb* is a descendant of the cgroup2 held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ *
+ * Returns
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* failed the cgroup2 descendant test.
+ * * 1, if the *skb* succeeded the cgroup2 descendant test.
+ * * A negative error code, if an error occurred.
+ */
+static int (*bpf_skb_under_cgroup)(struct __sk_buff *skb, void *map, __u32 index) = (void *) 33;
+
+/*
+ * bpf_get_hash_recalc
+ *
+ * Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ * not set, in particular if the hash was cleared due to mangling,
+ * recompute this hash. Later accesses to the hash can be done
+ * directly with *skb*\ **->hash**.
+ *
+ * Calling **bpf_set_hash_invalid**\ (), changing a packet
+ * prototype with **bpf_skb_change_proto**\ (), or calling
+ * **bpf_skb_store_bytes**\ () with the
+ * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ * the hash and to trigger a new computation for the next call to
+ * **bpf_get_hash_recalc**\ ().
+ *
+ * Returns
+ * The 32-bit hash.
+ */
+static __u32 (*bpf_get_hash_recalc)(struct __sk_buff *skb) = (void *) 34;
+
+/*
+ * bpf_get_current_task
+ *
+ *
+ * Returns
+ * A pointer to the current task struct.
+ */
+static __u64 (*bpf_get_current_task)(void) = (void *) 35;
+
+/*
+ * bpf_probe_write_user
+ *
+ * Attempt in a safe way to write *len* bytes from the buffer
+ * *src* to *dst* in memory. It only works for threads that are in
+ * user context, and *dst* must be a valid user space address.
+ *
+ * This helper should not be used to implement any kind of
+ * security mechanism because of TOC-TOU attacks, but rather to
+ * debug, divert, and manipulate execution of semi-cooperative
+ * processes.
+ *
+ * Keep in mind that this feature is meant for experiments, and it
+ * has a risk of crashing the system and running programs.
+ * Therefore, when an eBPF program using this helper is attached,
+ * a warning including PID and process name is printed to kernel
+ * logs.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_probe_write_user)(void *dst, const void *src, __u32 len) = (void *) 36;
+
+/*
+ * bpf_current_task_under_cgroup
+ *
+ * Check whether the probe is being run is the context of a given
+ * subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ *
+ * Returns
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* task belongs to the cgroup2.
+ * * 1, if the *skb* task does not belong to the cgroup2.
+ * * A negative error code, if an error occurred.
+ */
+static int (*bpf_current_task_under_cgroup)(void *map, __u32 index) = (void *) 37;
+
+/*
+ * bpf_skb_change_tail
+ *
+ * Resize (trim or grow) the packet associated to *skb* to the
+ * new *len*. The *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * The basic idea is that the helper performs the needed work to
+ * change the size of the packet, then the eBPF program rewrites
+ * the rest via helpers like **bpf_skb_store_bytes**\ (),
+ * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ * and others. This helper is a slow path utility intended for
+ * replies with control messages. And because it is targeted for
+ * slow path, the helper itself can afford to be slow: it
+ * implicitly linearizes, unclones and drops offloads from the
+ * *skb*.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_change_tail)(struct __sk_buff *skb, __u32 len, __u64 flags) = (void *) 38;
+
+/*
+ * bpf_skb_pull_data
+ *
+ * Pull in non-linear data in case the *skb* is non-linear and not
+ * all of *len* are part of the linear section. Make *len* bytes
+ * from *skb* readable and writable. If a zero value is passed for
+ * *len*, then the whole length of the *skb* is pulled.
+ *
+ * This helper is only needed for reading and writing with direct
+ * packet access.
+ *
+ * For direct packet access, testing that offsets to access
+ * are within packet boundaries (test on *skb*\ **->data_end**) is
+ * susceptible to fail if offsets are invalid, or if the requested
+ * data is in non-linear parts of the *skb*. On failure the
+ * program can just bail out, or in the case of a non-linear
+ * buffer, use a helper to make the data available. The
+ * **bpf_skb_load_bytes**\ () helper is a first solution to access
+ * the data. Another one consists in using **bpf_skb_pull_data**
+ * to pull in once the non-linear parts, then retesting and
+ * eventually access the data.
+ *
+ * At the same time, this also makes sure the *skb* is uncloned,
+ * which is a necessary condition for direct write. As this needs
+ * to be an invariant for the write part only, the verifier
+ * detects writes and adds a prologue that is calling
+ * **bpf_skb_pull_data()** to effectively unclone the *skb* from
+ * the very beginning in case it is indeed cloned.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_pull_data)(struct __sk_buff *skb, __u32 len) = (void *) 39;
+
+/*
+ * bpf_csum_update
+ *
+ * Add the checksum *csum* into *skb*\ **->csum** in case the
+ * driver has supplied a checksum for the entire packet into that
+ * field. Return an error otherwise. This helper is intended to be
+ * used in combination with **bpf_csum_diff**\ (), in particular
+ * when the checksum needs to be updated after data has been
+ * written into the packet through direct packet access.
+ *
+ * Returns
+ * The checksum on success, or a negative error code in case of
+ * failure.
+ */
+static __s64 (*bpf_csum_update)(struct __sk_buff *skb, __wsum csum) = (void *) 40;
+
+/*
+ * bpf_set_hash_invalid
+ *
+ * Invalidate the current *skb*\ **->hash**. It can be used after
+ * mangling on headers through direct packet access, in order to
+ * indicate that the hash is outdated and to trigger a
+ * recalculation the next time the kernel tries to access this
+ * hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ */
+static void (*bpf_set_hash_invalid)(struct __sk_buff *skb) = (void *) 41;
+
+/*
+ * bpf_get_numa_node_id
+ *
+ * Return the id of the current NUMA node. The primary use case
+ * for this helper is the selection of sockets for the local NUMA
+ * node, when the program is attached to sockets using the
+ * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ * but the helper is also available to other eBPF program types,
+ * similarly to **bpf_get_smp_processor_id**\ ().
+ *
+ * Returns
+ * The id of current NUMA node.
+ */
+static int (*bpf_get_numa_node_id)(void) = (void *) 42;
+
+/*
+ * bpf_skb_change_head
+ *
+ * Grows headroom of packet associated to *skb* and adjusts the
+ * offset of the MAC header accordingly, adding *len* bytes of
+ * space. It automatically extends and reallocates memory as
+ * required.
+ *
+ * This helper can be used on a layer 3 *skb* to push a MAC header
+ * for redirection into a layer 2 device.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_change_head)(struct __sk_buff *skb, __u32 len, __u64 flags) = (void *) 43;
+
+/*
+ * bpf_xdp_adjust_head
+ *
+ * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ * it is possible to use a negative value for *delta*. This helper
+ * can be used to prepare the packet for pushing or popping
+ * headers.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_xdp_adjust_head)(struct xdp_md *xdp_md, int delta) = (void *) 44;
+
+/*
+ * bpf_probe_read_str
+ *
+ * Copy a NUL terminated string from an unsafe kernel address
+ * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
+ * more details.
+ *
+ * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
+ * instead.
+ *
+ * Returns
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ */
+static int (*bpf_probe_read_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 45;
+
+/*
+ * bpf_get_socket_cookie
+ *
+ * If the **struct sk_buff** pointed by *skb* has a known socket,
+ * retrieve the cookie (generated by the kernel) of this socket.
+ * If no cookie has been set yet, generate a new cookie. Once
+ * generated, the socket cookie remains stable for the life of the
+ * socket. This helper can be useful for monitoring per socket
+ * networking traffic statistics as it provides a global socket
+ * identifier that can be assumed unique.
+ *
+ * Returns
+ * A 8-byte long non-decreasing number on success, or 0 if the
+ * socket field is missing inside *skb*.
+ */
+static __u64 (*bpf_get_socket_cookie)(void *ctx) = (void *) 46;
+
+/*
+ * bpf_get_socket_uid
+ *
+ *
+ * Returns
+ * The owner UID of the socket associated to *skb*. If the socket
+ * is **NULL**, or if it is not a full socket (i.e. if it is a
+ * time-wait or a request socket instead), **overflowuid** value
+ * is returned (note that **overflowuid** might also be the actual
+ * UID value for the socket).
+ */
+static __u32 (*bpf_get_socket_uid)(struct __sk_buff *skb) = (void *) 47;
+
+/*
+ * bpf_set_hash
+ *
+ * Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ * to value *hash*.
+ *
+ * Returns
+ * 0
+ */
+static __u32 (*bpf_set_hash)(struct __sk_buff *skb, __u32 hash) = (void *) 48;
+
+/*
+ * bpf_setsockopt
+ *
+ * Emulate a call to **setsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **setsockopt(2)** for more information.
+ * The option value of length *optlen* is pointed by *optval*.
+ *
+ * This helper actually implements a subset of **setsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **SOL_SOCKET**, which supports the following *optname*\ s:
+ * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * * **IPPROTO_TCP**, which supports the following *optname*\ s:
+ * **TCP_CONGESTION**, **TCP_BPF_IW**,
+ * **TCP_BPF_SNDCWND_CLAMP**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_setsockopt)(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 49;
+
+/*
+ * bpf_skb_adjust_room
+ *
+ * Grow or shrink the room for data in the packet associated to
+ * *skb* by *len_diff*, and according to the selected *mode*.
+ *
+ * There are two supported modes at this time:
+ *
+ * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
+ * (room space is added or removed below the layer 2 header).
+ *
+ * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ * (room space is added or removed below the layer 3 header).
+ *
+ * The following flags are supported at this time:
+ *
+ * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
+ * Adjusting mss in this way is not allowed for datagrams.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**,
+ * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**:
+ * Any new space is reserved to hold a tunnel header.
+ * Configure skb offsets and other fields accordingly.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**,
+ * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**:
+ * Use with ENCAP_L3 flags to further specify the tunnel type.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*):
+ * Use with ENCAP_L3/L4 flags to further specify the tunnel
+ * type; *len* is the length of the inner MAC header.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_adjust_room)(struct __sk_buff *skb, __s32 len_diff, __u32 mode, __u64 flags) = (void *) 50;
+
+/*
+ * bpf_redirect_map
+ *
+ * Redirect the packet to the endpoint referenced by *map* at
+ * index *key*. Depending on its type, this *map* can contain
+ * references to net devices (for forwarding packets through other
+ * ports), or to CPUs (for redirecting XDP frames to another CPU;
+ * but this is only implemented for native XDP (with driver
+ * support) as of this writing).
+ *
+ * The lower two bits of *flags* are used as the return code if
+ * the map lookup fails. This is so that the return value can be
+ * one of the XDP program return codes up to XDP_TX, as chosen by
+ * the caller. Any higher bits in the *flags* argument must be
+ * unset.
+ *
+ * When used to redirect packets to net devices, this helper
+ * provides a high performance increase over **bpf_redirect**\ ().
+ * This is due to various implementation details of the underlying
+ * mechanisms, one of which is the fact that **bpf_redirect_map**\
+ * () tries to send packet as a "bulk" to the device.
+ *
+ * Returns
+ * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ */
+static int (*bpf_redirect_map)(void *map, __u32 key, __u64 flags) = (void *) 51;
+
+/*
+ * bpf_sk_redirect_map
+ *
+ * Redirect the packet to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ *
+ * Returns
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ */
+static int (*bpf_sk_redirect_map)(struct __sk_buff *skb, void *map, __u32 key, __u64 flags) = (void *) 52;
+
+/*
+ * bpf_sock_map_update
+ *
+ * Add an entry to, or update a *map* referencing sockets. The
+ * *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_sock_map_update)(struct bpf_sock_ops *skops, void *map, void *key, __u64 flags) = (void *) 53;
+
+/*
+ * bpf_xdp_adjust_meta
+ *
+ * Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ * *delta* (which can be positive or negative). Note that this
+ * operation modifies the address stored in *xdp_md*\ **->data**,
+ * so the latter must be loaded only after the helper has been
+ * called.
+ *
+ * The use of *xdp_md*\ **->data_meta** is optional and programs
+ * are not required to use it. The rationale is that when the
+ * packet is processed with XDP (e.g. as DoS filter), it is
+ * possible to push further meta data along with it before passing
+ * to the stack, and to give the guarantee that an ingress eBPF
+ * program attached as a TC classifier on the same device can pick
+ * this up for further post-processing. Since TC works with socket
+ * buffers, it remains possible to set from XDP the **mark** or
+ * **priority** pointers, or other pointers for the socket buffer.
+ * Having this scratch space generic and programmable allows for
+ * more flexibility as the user is free to store whatever meta
+ * data they need.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_xdp_adjust_meta)(struct xdp_md *xdp_md, int delta) = (void *) 54;
+
+/*
+ * bpf_perf_event_read_value
+ *
+ * Read the value of a perf event counter, and store it into *buf*
+ * of size *buf_size*. This helper relies on a *map* of type
+ * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ * counter is selected when *map* is updated with perf event file
+ * descriptors. The *map* is an array whose size is the number of
+ * available CPUs, and each cell contains a value relative to one
+ * CPU. The value to retrieve is indicated by *flags*, that
+ * contains the index of the CPU to look up, masked with
+ * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * This helper behaves in a way close to
+ * **bpf_perf_event_read**\ () helper, save that instead of
+ * just returning the value observed, it fills the *buf*
+ * structure. This allows for additional data to be retrieved: in
+ * particular, the enabled and running times (in *buf*\
+ * **->enabled** and *buf*\ **->running**, respectively) are
+ * copied. In general, **bpf_perf_event_read_value**\ () is
+ * recommended over **bpf_perf_event_read**\ (), which has some
+ * ABI issues and provides fewer functionalities.
+ *
+ * These values are interesting, because hardware PMU (Performance
+ * Monitoring Unit) counters are limited resources. When there are
+ * more PMU based perf events opened than available counters,
+ * kernel will multiplex these events so each event gets certain
+ * percentage (but not all) of the PMU time. In case that
+ * multiplexing happens, the number of samples or counter value
+ * will not reflect the case compared to when no multiplexing
+ * occurs. This makes comparison between different runs difficult.
+ * Typically, the counter value should be normalized before
+ * comparing to other experiments. The usual normalization is done
+ * as follows.
+ *
+ * ::
+ *
+ * normalized_counter = counter * t_enabled / t_running
+ *
+ * Where t_enabled is the time enabled for event and t_running is
+ * the time running for event since last normalization. The
+ * enabled and running times are accumulated since the perf event
+ * open. To achieve scaling factor between two invocations of an
+ * eBPF program, users can can use CPU id as the key (which is
+ * typical for perf array usage model) to remember the previous
+ * value and do the calculation inside the eBPF program.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_perf_event_read_value)(void *map, __u64 flags, struct bpf_perf_event_value *buf, __u32 buf_size) = (void *) 55;
+
+/*
+ * bpf_perf_prog_read_value
+ *
+ * For en eBPF program attached to a perf event, retrieve the
+ * value of the event counter associated to *ctx* and store it in
+ * the structure pointed by *buf* and of size *buf_size*. Enabled
+ * and running times are also stored in the structure (see
+ * description of helper **bpf_perf_event_read_value**\ () for
+ * more details).
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_perf_prog_read_value)(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, __u32 buf_size) = (void *) 56;
+
+/*
+ * bpf_getsockopt
+ *
+ * Emulate a call to **getsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **getsockopt(2)** for more information.
+ * The retrieved value is stored in the structure pointed by
+ * *opval* and of length *optlen*.
+ *
+ * This helper actually implements a subset of **getsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **IPPROTO_TCP**, which supports *optname*
+ * **TCP_CONGESTION**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_getsockopt)(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) = (void *) 57;
+
+/*
+ * bpf_override_return
+ *
+ * Used for error injection, this helper uses kprobes to override
+ * the return value of the probed function, and to set it to *rc*.
+ * The first argument is the context *regs* on which the kprobe
+ * works.
+ *
+ * This helper works by setting setting the PC (program counter)
+ * to an override function which is run in place of the original
+ * probed function. This means the probed function is not run at
+ * all. The replacement function just returns with the required
+ * value.
+ *
+ * This helper has security implications, and thus is subject to
+ * restrictions. It is only available if the kernel was compiled
+ * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ * option, and in this case it only works on functions tagged with
+ * **ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ * Also, the helper is only available for the architectures having
+ * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ * x86 architecture is the only one to support this feature.
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_override_return)(struct pt_regs *regs, __u64 rc) = (void *) 58;
+
+/*
+ * bpf_sock_ops_cb_flags_set
+ *
+ * Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ * for the full TCP socket associated to *bpf_sock_ops* to
+ * *argval*.
+ *
+ * The primary use of this field is to determine if there should
+ * be calls to eBPF programs of type
+ * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ * code. A program of the same type can change its value, per
+ * connection and as necessary, when the connection is
+ * established. This field is directly accessible for reading, but
+ * this helper must be used for updates in order to return an
+ * error if an eBPF program tries to set a callback that is not
+ * supported in the current kernel.
+ *
+ * *argval* is a flag array which can combine these flags:
+ *
+ * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT)
+ *
+ * Therefore, this function can be used to clear a callback flag by
+ * setting the appropriate bit to zero. e.g. to disable the RTO
+ * callback:
+ *
+ * **bpf_sock_ops_cb_flags_set(bpf_sock,**
+ * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)**
+ *
+ * Here are some examples of where one could call such eBPF
+ * program:
+ *
+ * * When RTO fires.
+ * * When a packet is retransmitted.
+ * * When the connection terminates.
+ * * When a packet is sent.
+ * * When a packet is received.
+ *
+ * Returns
+ * Code **-EINVAL** if the socket is not a full TCP socket;
+ * otherwise, a positive number containing the bits that could not
+ * be set is returned (which comes down to 0 if all bits were set
+ * as required).
+ */
+static int (*bpf_sock_ops_cb_flags_set)(struct bpf_sock_ops *bpf_sock, int argval) = (void *) 59;
+
+/*
+ * bpf_msg_redirect_map
+ *
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ *
+ * Returns
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ */
+static int (*bpf_msg_redirect_map)(struct sk_msg_md *msg, void *map, __u32 key, __u64 flags) = (void *) 60;
+
+/*
+ * bpf_msg_apply_bytes
+ *
+ * For socket policies, apply the verdict of the eBPF program to
+ * the next *bytes* (number of bytes) of message *msg*.
+ *
+ * For example, this helper can be used in the following cases:
+ *
+ * * A single **sendmsg**\ () or **sendfile**\ () system call
+ * contains multiple logical messages that the eBPF program is
+ * supposed to read and for which it should apply a verdict.
+ * * An eBPF program only cares to read the first *bytes* of a
+ * *msg*. If the message has a large payload, then setting up
+ * and calling the eBPF program repeatedly for all bytes, even
+ * though the verdict is already known, would create unnecessary
+ * overhead.
+ *
+ * When called from within an eBPF program, the helper sets a
+ * counter internal to the BPF infrastructure, that is used to
+ * apply the last verdict to the next *bytes*. If *bytes* is
+ * smaller than the current data being processed from a
+ * **sendmsg**\ () or **sendfile**\ () system call, the first
+ * *bytes* will be sent and the eBPF program will be re-run with
+ * the pointer for start of data pointing to byte number *bytes*
+ * **+ 1**. If *bytes* is larger than the current data being
+ * processed, then the eBPF verdict will be applied to multiple
+ * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ * consumed.
+ *
+ * Note that if a socket closes with the internal counter holding
+ * a non-zero value, this is not a problem because data is not
+ * being buffered for *bytes* and is sent as it is received.
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_msg_apply_bytes)(struct sk_msg_md *msg, __u32 bytes) = (void *) 61;
+
+/*
+ * bpf_msg_cork_bytes
+ *
+ * For socket policies, prevent the execution of the verdict eBPF
+ * program for message *msg* until *bytes* (byte number) have been
+ * accumulated.
+ *
+ * This can be used when one needs a specific number of bytes
+ * before a verdict can be assigned, even if the data spans
+ * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ * case would be a user calling **sendmsg**\ () repeatedly with
+ * 1-byte long message segments. Obviously, this is bad for
+ * performance, but it is still valid. If the eBPF program needs
+ * *bytes* bytes to validate a header, this helper can be used to
+ * prevent the eBPF program to be called again until *bytes* have
+ * been accumulated.
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_msg_cork_bytes)(struct sk_msg_md *msg, __u32 bytes) = (void *) 62;
+
+/*
+ * bpf_msg_pull_data
+ *
+ * For socket policies, pull in non-linear data from user space
+ * for *msg* and set pointers *msg*\ **->data** and *msg*\
+ * **->data_end** to *start* and *end* bytes offsets into *msg*,
+ * respectively.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it can only parse data that the (**data**, **data_end**)
+ * pointers have already consumed. For **sendmsg**\ () hooks this
+ * is likely the first scatterlist element. But for calls relying
+ * on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ * be the range (**0**, **0**) because the data is shared with
+ * user space and by default the objective is to avoid allowing
+ * user space to modify data while (or after) eBPF verdict is
+ * being decided. This helper can be used to pull in data and to
+ * set the start and end pointer to given values. Data will be
+ * copied if necessary (i.e. if data was not linear and if start
+ * and end pointers do not point to the same chunk).
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_msg_pull_data)(struct sk_msg_md *msg, __u32 start, __u32 end, __u64 flags) = (void *) 63;
+
+/*
+ * bpf_bind
+ *
+ * Bind the socket associated to *ctx* to the address pointed by
+ * *addr*, of length *addr_len*. This allows for making outgoing
+ * connection from the desired IP address, which can be useful for
+ * example when all processes inside a cgroup should use one
+ * single IP address on a host that has multiple IP configured.
+ *
+ * This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ * domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ * **AF_INET6**). Looking for a free port to bind to can be
+ * expensive, therefore binding to port is not permitted by the
+ * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ * must be set to zero.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_bind)(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) = (void *) 64;
+
+/*
+ * bpf_xdp_adjust_tail
+ *
+ * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ * only possible to shrink the packet as of this writing,
+ * therefore *delta* must be a negative integer.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_xdp_adjust_tail)(struct xdp_md *xdp_md, int delta) = (void *) 65;
+
+/*
+ * bpf_skb_get_xfrm_state
+ *
+ * Retrieve the XFRM state (IP transform framework, see also
+ * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ * The retrieved value is stored in the **struct bpf_xfrm_state**
+ * pointed by *xfrm_state* and of length *size*.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_XFRM** configuration option.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_get_xfrm_state)(struct __sk_buff *skb, __u32 index, struct bpf_xfrm_state *xfrm_state, __u32 size, __u64 flags) = (void *) 66;
+
+/*
+ * bpf_get_stack
+ *
+ * Return a user or a kernel stack in bpf program provided buffer.
+ * To achieve this, the helper needs *ctx*, which is a pointer
+ * to the context on which the tracing program is executed.
+ * To store the stacktrace, the bpf program provides *buf* with
+ * a nonnegative *size*.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_USER_BUILD_ID**
+ * Collect buildid+offset instead of ips for user stack,
+ * only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * **bpf_get_stack**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * to sufficient large buffer size. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ *
+ * Returns
+ * A non-negative value equal to or less than *size* on success,
+ * or a negative error in case of failure.
+ */
+static int (*bpf_get_stack)(void *ctx, void *buf, __u32 size, __u64 flags) = (void *) 67;
+
+/*
+ * bpf_skb_load_bytes_relative
+ *
+ * This helper is similar to **bpf_skb_load_bytes**\ () in that
+ * it provides an easy way to load *len* bytes from *offset*
+ * from the packet associated to *skb*, into the buffer pointed
+ * by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+ * a fifth argument *start_header* exists in order to select a
+ * base offset to start from. *start_header* can be one of:
+ *
+ * **BPF_HDR_START_MAC**
+ * Base offset to load data from is *skb*'s mac header.
+ * **BPF_HDR_START_NET**
+ * Base offset to load data from is *skb*'s network header.
+ *
+ * In general, "direct packet access" is the preferred method to
+ * access packet data, however, this helper is in particular useful
+ * in socket filters where *skb*\ **->data** does not always point
+ * to the start of the mac header and where "direct packet access"
+ * is not available.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_load_bytes_relative)(const void *skb, __u32 offset, void *to, __u32 len, __u32 start_header) = (void *) 68;
+
+/*
+ * bpf_fib_lookup
+ *
+ * Do FIB lookup in kernel tables using parameters in *params*.
+ * If lookup is successful and result shows packet is to be
+ * forwarded, the neighbor tables are searched for the nexthop.
+ * If successful (ie., FIB lookup shows forwarding and nexthop
+ * is resolved), the nexthop address is returned in ipv4_dst
+ * or ipv6_dst based on family, smac is set to mac address of
+ * egress device, dmac is set to nexthop mac address, rt_metric
+ * is set to metric from route (IPv4/IPv6 only), and ifindex
+ * is set to the device index of the nexthop from the FIB lookup.
+ *
+ * *plen* argument is the size of the passed in struct.
+ * *flags* argument can be a combination of one or more of the
+ * following values:
+ *
+ * **BPF_FIB_LOOKUP_DIRECT**
+ * Do a direct table lookup vs full lookup using FIB
+ * rules.
+ * **BPF_FIB_LOOKUP_OUTPUT**
+ * Perform lookup from an egress perspective (default is
+ * ingress).
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** tc cls_act programs.
+ *
+ * Returns
+ * * < 0 if any input argument is invalid
+ * * 0 on success (packet is forwarded, nexthop neighbor exists)
+ * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
+ * packet is not forwarded or needs assist from full stack
+ */
+static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params, int plen, __u32 flags) = (void *) 69;
+
+/*
+ * bpf_sock_hash_update
+ *
+ * Add an entry to, or update a sockhash *map* referencing sockets.
+ * The *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_sock_hash_update)(struct bpf_sock_ops *skops, void *map, void *key, __u64 flags) = (void *) 70;
+
+/*
+ * bpf_msg_redirect_hash
+ *
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ *
+ * Returns
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ */
+static int (*bpf_msg_redirect_hash)(struct sk_msg_md *msg, void *map, void *key, __u64 flags) = (void *) 71;
+
+/*
+ * bpf_sk_redirect_hash
+ *
+ * This helper is used in programs implementing policies at the
+ * skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
+ * if the verdeict eBPF program returns **SK_PASS**), redirect it
+ * to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress otherwise). This is the only flag supported for now.
+ *
+ * Returns
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ */
+static int (*bpf_sk_redirect_hash)(struct __sk_buff *skb, void *map, void *key, __u64 flags) = (void *) 72;
+
+/*
+ * bpf_lwt_push_encap
+ *
+ * Encapsulate the packet associated to *skb* within a Layer 3
+ * protocol header. This header is provided in the buffer at
+ * address *hdr*, with *len* its size in bytes. *type* indicates
+ * the protocol of the header and can be one of:
+ *
+ * **BPF_LWT_ENCAP_SEG6**
+ * IPv6 encapsulation with Segment Routing Header
+ * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ * the IPv6 header is computed by the kernel.
+ * **BPF_LWT_ENCAP_SEG6_INLINE**
+ * Only works if *skb* contains an IPv6 packet. Insert a
+ * Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ * the IPv6 header.
+ * **BPF_LWT_ENCAP_IP**
+ * IP encapsulation (GRE/GUE/IPIP/etc). The outer header
+ * must be IPv4 or IPv6, followed by zero or more
+ * additional headers, up to **LWT_BPF_MAX_HEADROOM**
+ * total bytes in all prepended headers. Please note that
+ * if **skb_is_gso**\ (*skb*) is true, no more than two
+ * headers can be prepended, and the inner header, if
+ * present, should be either GRE or UDP/GUE.
+ *
+ * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs
+ * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can
+ * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and
+ * **BPF_PROG_TYPE_LWT_XMIT**.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_lwt_push_encap)(struct __sk_buff *skb, __u32 type, void *hdr, __u32 len) = (void *) 73;
+
+/*
+ * bpf_lwt_seg6_store_bytes
+ *
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ * inside the outermost IPv6 Segment Routing Header can be
+ * modified through this helper.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_lwt_seg6_store_bytes)(struct __sk_buff *skb, __u32 offset, const void *from, __u32 len) = (void *) 74;
+
+/*
+ * bpf_lwt_seg6_adjust_srh
+ *
+ * Adjust the size allocated to TLVs in the outermost IPv6
+ * Segment Routing Header contained in the packet associated to
+ * *skb*, at position *offset* by *delta* bytes. Only offsets
+ * after the segments are accepted. *delta* can be as well
+ * positive (growing) as negative (shrinking).
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_lwt_seg6_adjust_srh)(struct __sk_buff *skb, __u32 offset, __s32 delta) = (void *) 75;
+
+/*
+ * bpf_lwt_seg6_action
+ *
+ * Apply an IPv6 Segment Routing action of type *action* to the
+ * packet associated to *skb*. Each action takes a parameter
+ * contained at address *param*, and of length *param_len* bytes.
+ * *action* can be one of:
+ *
+ * **SEG6_LOCAL_ACTION_END_X**
+ * End.X action: Endpoint with Layer-3 cross-connect.
+ * Type of *param*: **struct in6_addr**.
+ * **SEG6_LOCAL_ACTION_END_T**
+ * End.T action: Endpoint with specific IPv6 table lookup.
+ * Type of *param*: **int**.
+ * **SEG6_LOCAL_ACTION_END_B6**
+ * End.B6 action: Endpoint bound to an SRv6 policy.
+ * Type of *param*: **struct ipv6_sr_hdr**.
+ * **SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ * End.B6.Encap action: Endpoint bound to an SRv6
+ * encapsulation policy.
+ * Type of *param*: **struct ipv6_sr_hdr**.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_lwt_seg6_action)(struct __sk_buff *skb, __u32 action, void *param, __u32 param_len) = (void *) 76;
+
+/*
+ * bpf_rc_repeat
+ *
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded repeat key message. This delays
+ * the generation of a key up event for previously generated
+ * key down event.
+ *
+ * Some IR protocols like NEC have a special IR message for
+ * repeating last button, for when a button is held down.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_rc_repeat)(void *ctx) = (void *) 77;
+
+/*
+ * bpf_rc_keydown
+ *
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded key press with *scancode*,
+ * *toggle* value in the given *protocol*. The scancode will be
+ * translated to a keycode using the rc keymap, and reported as
+ * an input key down event. After a period a key up event is
+ * generated. This period can be extended by calling either
+ * **bpf_rc_keydown**\ () again with the same values, or calling
+ * **bpf_rc_repeat**\ ().
+ *
+ * Some protocols include a toggle bit, in case the button was
+ * released and pressed again between consecutive scancodes.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * The *protocol* is the decoded protocol number (see
+ * **enum rc_proto** for some predefined values).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_rc_keydown)(void *ctx, __u32 protocol, __u64 scancode, __u32 toggle) = (void *) 78;
+
+/*
+ * bpf_skb_cgroup_id
+ *
+ * Return the cgroup v2 id of the socket associated with the *skb*.
+ * This is roughly similar to the **bpf_get_cgroup_classid**\ ()
+ * helper for cgroup v1 by providing a tag resp. identifier that
+ * can be matched on or used for map lookups e.g. to implement
+ * policy. The cgroup v2 id of a given path in the hierarchy is
+ * exposed in user space through the f_handle API in order to get
+ * to the same 64-bit id.
+ *
+ * This helper can be used on TC egress path, but not on ingress,
+ * and is available only if the kernel was compiled with the
+ * **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ *
+ * Returns
+ * The id is returned or 0 in case the id could not be retrieved.
+ */
+static __u64 (*bpf_skb_cgroup_id)(struct __sk_buff *skb) = (void *) 79;
+
+/*
+ * bpf_get_current_cgroup_id
+ *
+ *
+ * Returns
+ * A 64-bit integer containing the current cgroup id based
+ * on the cgroup within which the current task is running.
+ */
+static __u64 (*bpf_get_current_cgroup_id)(void) = (void *) 80;
+
+/*
+ * bpf_get_local_storage
+ *
+ * Get the pointer to the local storage area.
+ * The type and the size of the local storage is defined
+ * by the *map* argument.
+ * The *flags* meaning is specific for each map type,
+ * and has to be 0 for cgroup local storage.
+ *
+ * Depending on the BPF program type, a local storage area
+ * can be shared between multiple instances of the BPF program,
+ * running simultaneously.
+ *
+ * A user should care about the synchronization by himself.
+ * For example, by using the **BPF_STX_XADD** instruction to alter
+ * the shared data.
+ *
+ * Returns
+ * A pointer to the local storage area.
+ */
+static void *(*bpf_get_local_storage)(void *map, __u64 flags) = (void *) 81;
+
+/*
+ * bpf_sk_select_reuseport
+ *
+ * Select a **SO_REUSEPORT** socket from a
+ * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+ * It checks the selected socket is matching the incoming
+ * request in the socket buffer.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_sk_select_reuseport)(struct sk_reuseport_md *reuse, void *map, void *key, __u64 flags) = (void *) 82;
+
+/*
+ * bpf_skb_ancestor_cgroup_id
+ *
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *skb* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *skb*, then return value will be same as that
+ * of **bpf_skb_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *skb*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_skb_cgroup_id**\ ().
+ *
+ * Returns
+ * The id is returned or 0 in case the id could not be retrieved.
+ */
+static __u64 (*bpf_skb_ancestor_cgroup_id)(struct __sk_buff *skb, int ancestor_level) = (void *) 83;
+
+/*
+ * bpf_sk_lookup_tcp
+ *
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is a negative signed 32-bit integer, then the
+ * socket lookup table in the netns associated with the *ctx* will
+ * will be used. For the TC hooks, this is the netns of the device
+ * in the skb. For socket hooks, this is the netns of the socket.
+ * If *netns* is any other signed 32-bit value greater than or
+ * equal to zero then it specifies the ID of the netns relative to
+ * the netns associated with the *ctx*. *netns* values beyond the
+ * range of 32-bit integers are reserved for future use.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ *
+ * Returns
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from *reuse*\ **->socks**\ [] using the hash of the
+ * tuple.
+ */
+static struct bpf_sock *(*bpf_sk_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 84;
+
+/*
+ * bpf_sk_lookup_udp
+ *
+ * Look for UDP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is a negative signed 32-bit integer, then the
+ * socket lookup table in the netns associated with the *ctx* will
+ * will be used. For the TC hooks, this is the netns of the device
+ * in the skb. For socket hooks, this is the netns of the socket.
+ * If *netns* is any other signed 32-bit value greater than or
+ * equal to zero then it specifies the ID of the netns relative to
+ * the netns associated with the *ctx*. *netns* values beyond the
+ * range of 32-bit integers are reserved for future use.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ *
+ * Returns
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from *reuse*\ **->socks**\ [] using the hash of the
+ * tuple.
+ */
+static struct bpf_sock *(*bpf_sk_lookup_udp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 85;
+
+/*
+ * bpf_sk_release
+ *
+ * Release the reference held by *sock*. *sock* must be a
+ * non-**NULL** pointer that was returned from
+ * **bpf_sk_lookup_xxx**\ ().
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_sk_release)(struct bpf_sock *sock) = (void *) 86;
+
+/*
+ * bpf_map_push_elem
+ *
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is
+ * removed to make room for this.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_map_push_elem)(void *map, const void *value, __u64 flags) = (void *) 87;
+
+/*
+ * bpf_map_pop_elem
+ *
+ * Pop an element from *map*.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_map_pop_elem)(void *map, void *value) = (void *) 88;
+
+/*
+ * bpf_map_peek_elem
+ *
+ * Get an element from *map* without removing it.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_map_peek_elem)(void *map, void *value) = (void *) 89;
+
+/*
+ * bpf_msg_push_data
+ *
+ * For socket policies, insert *len* bytes into *msg* at offset
+ * *start*.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it may want to insert metadata or options into the *msg*.
+ * This can later be read and used by any of the lower layer BPF
+ * hooks.
+ *
+ * This helper may fail if under memory pressure (a malloc
+ * fails) in these cases BPF programs will get an appropriate
+ * error and BPF programs will need to handle them.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_msg_push_data)(struct sk_msg_md *msg, __u32 start, __u32 len, __u64 flags) = (void *) 90;
+
+/*
+ * bpf_msg_pop_data
+ *
+ * Will remove *len* bytes from a *msg* starting at byte *start*.
+ * This may result in **ENOMEM** errors under certain situations if
+ * an allocation and copy are required due to a full ring buffer.
+ * However, the helper will try to avoid doing the allocation
+ * if possible. Other errors can occur if input parameters are
+ * invalid either due to *start* byte not being valid part of *msg*
+ * payload and/or *pop* value being to large.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_msg_pop_data)(struct sk_msg_md *msg, __u32 start, __u32 len, __u64 flags) = (void *) 91;
+
+/*
+ * bpf_rc_pointer_rel
+ *
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded pointer movement.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_rc_pointer_rel)(void *ctx, __s32 rel_x, __s32 rel_y) = (void *) 92;
+
+/*
+ * bpf_spin_lock
+ *
+ * Acquire a spinlock represented by the pointer *lock*, which is
+ * stored as part of a value of a map. Taking the lock allows to
+ * safely update the rest of the fields in that value. The
+ * spinlock can (and must) later be released with a call to
+ * **bpf_spin_unlock**\ (\ *lock*\ ).
+ *
+ * Spinlocks in BPF programs come with a number of restrictions
+ * and constraints:
+ *
+ * * **bpf_spin_lock** objects are only allowed inside maps of
+ * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this
+ * list could be extended in the future).
+ * * BTF description of the map is mandatory.
+ * * The BPF program can take ONE lock at a time, since taking two
+ * or more could cause dead locks.
+ * * Only one **struct bpf_spin_lock** is allowed per map element.
+ * * When the lock is taken, calls (either BPF to BPF or helpers)
+ * are not allowed.
+ * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not
+ * allowed inside a spinlock-ed region.
+ * * The BPF program MUST call **bpf_spin_unlock**\ () to release
+ * the lock, on all execution paths, before it returns.
+ * * The BPF program can access **struct bpf_spin_lock** only via
+ * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ ()
+ * helpers. Loading or storing data into the **struct
+ * bpf_spin_lock** *lock*\ **;** field of a map is not allowed.
+ * * To use the **bpf_spin_lock**\ () helper, the BTF description
+ * of the map value must be a struct and have **struct
+ * bpf_spin_lock** *anyname*\ **;** field at the top level.
+ * Nested lock inside another struct is not allowed.
+ * * The **struct bpf_spin_lock** *lock* field in a map value must
+ * be aligned on a multiple of 4 bytes in that value.
+ * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy
+ * the **bpf_spin_lock** field to user space.
+ * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from
+ * a BPF program, do not update the **bpf_spin_lock** field.
+ * * **bpf_spin_lock** cannot be on the stack or inside a
+ * networking packet (it can only be inside of a map values).
+ * * **bpf_spin_lock** is available to root only.
+ * * Tracing programs and socket filter programs cannot use
+ * **bpf_spin_lock**\ () due to insufficient preemption checks
+ * (but this may change in the future).
+ * * **bpf_spin_lock** is not allowed in inner maps of map-in-map.
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_spin_lock)(struct bpf_spin_lock *lock) = (void *) 93;
+
+/*
+ * bpf_spin_unlock
+ *
+ * Release the *lock* previously locked by a call to
+ * **bpf_spin_lock**\ (\ *lock*\ ).
+ *
+ * Returns
+ * 0
+ */
+static int (*bpf_spin_unlock)(struct bpf_spin_lock *lock) = (void *) 94;
+
+/*
+ * bpf_sk_fullsock
+ *
+ * This helper gets a **struct bpf_sock** pointer such
+ * that all the fields in this **bpf_sock** can be accessed.
+ *
+ * Returns
+ * A **struct bpf_sock** pointer on success, or **NULL** in
+ * case of failure.
+ */
+static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = (void *) 95;
+
+/*
+ * bpf_tcp_sock
+ *
+ * This helper gets a **struct bpf_tcp_sock** pointer from a
+ * **struct bpf_sock** pointer.
+ *
+ * Returns
+ * A **struct bpf_tcp_sock** pointer on success, or **NULL** in
+ * case of failure.
+ */
+static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = (void *) 96;
+
+/*
+ * bpf_skb_ecn_set_ce
+ *
+ * Set ECN (Explicit Congestion Notification) field of IP header
+ * to **CE** (Congestion Encountered) if current value is **ECT**
+ * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6
+ * and IPv4.
+ *
+ * Returns
+ * 1 if the **CE** flag is set (either by the current helper call
+ * or because it was already present), 0 if it is not set.
+ */
+static int (*bpf_skb_ecn_set_ce)(struct __sk_buff *skb) = (void *) 97;
+
+/*
+ * bpf_get_listener_sock
+ *
+ * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state.
+ * **bpf_sk_release**\ () is unnecessary and not allowed.
+ *
+ * Returns
+ * A **struct bpf_sock** pointer on success, or **NULL** in
+ * case of failure.
+ */
+static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = (void *) 98;
+
+/*
+ * bpf_skc_lookup_tcp
+ *
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * This function is identical to **bpf_sk_lookup_tcp**\ (), except
+ * that it also returns timewait or request sockets. Use
+ * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the
+ * full structure.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ *
+ * Returns
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from *reuse*\ **->socks**\ [] using the hash of the
+ * tuple.
+ */
+static struct bpf_sock *(*bpf_skc_lookup_tcp)(void *ctx, struct bpf_sock_tuple *tuple, __u32 tuple_size, __u64 netns, __u64 flags) = (void *) 99;
+
+/*
+ * bpf_tcp_check_syncookie
+ *
+ * Check whether *iph* and *th* contain a valid SYN cookie ACK for
+ * the listening socket in *sk*.
+ *
+ * *iph* points to the start of the IPv4 or IPv6 header, while
+ * *iph_len* contains **sizeof**\ (**struct iphdr**) or
+ * **sizeof**\ (**struct ip6hdr**).
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains **sizeof**\ (**struct tcphdr**).
+ *
+ *
+ * Returns
+ * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
+ * error otherwise.
+ */
+static int (*bpf_tcp_check_syncookie)(struct bpf_sock *sk, void *iph, __u32 iph_len, struct tcphdr *th, __u32 th_len) = (void *) 100;
+
+/*
+ * bpf_sysctl_get_name
+ *
+ * Get name of sysctl in /proc/sys/ and copy it into provided by
+ * program buffer *buf* of size *buf_len*.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is
+ * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name
+ * only (e.g. "tcp_mem").
+ *
+ * Returns
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ */
+static int (*bpf_sysctl_get_name)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len, __u64 flags) = (void *) 101;
+
+/*
+ * bpf_sysctl_get_current_value
+ *
+ * Get current value of sysctl as it is presented in /proc/sys
+ * (incl. newline, etc), and copy it as a string into provided
+ * by program buffer *buf* of size *buf_len*.
+ *
+ * The whole value is copied, no matter what file position user
+ * space issued e.g. sys_read at.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * Returns
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * **-EINVAL** if current value was unavailable, e.g. because
+ * sysctl is uninitialized and read returns -EIO for it.
+ */
+static int (*bpf_sysctl_get_current_value)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len) = (void *) 102;
+
+/*
+ * bpf_sysctl_get_new_value
+ *
+ * Get new value being written by user space to sysctl (before
+ * the actual write happens) and copy it as a string into
+ * provided by program buffer *buf* of size *buf_len*.
+ *
+ * User space may write new value at file position > 0.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * Returns
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * **-EINVAL** if sysctl is being read.
+ */
+static int (*bpf_sysctl_get_new_value)(struct bpf_sysctl *ctx, char *buf, unsigned long buf_len) = (void *) 103;
+
+/*
+ * bpf_sysctl_set_new_value
+ *
+ * Override new value being written by user space to sysctl with
+ * value provided by program in buffer *buf* of size *buf_len*.
+ *
+ * *buf* should contain a string in same form as provided by user
+ * space on sysctl write.
+ *
+ * User space may write new value at file position > 0. To override
+ * the whole sysctl value file position should be set to zero.
+ *
+ * Returns
+ * 0 on success.
+ *
+ * **-E2BIG** if the *buf_len* is too big.
+ *
+ * **-EINVAL** if sysctl is being read.
+ */
+static int (*bpf_sysctl_set_new_value)(struct bpf_sysctl *ctx, const char *buf, unsigned long buf_len) = (void *) 104;
+
+/*
+ * bpf_strtol
+ *
+ * Convert the initial part of the string from buffer *buf* of
+ * size *buf_len* to a long integer according to the given base
+ * and save the result in *res*.
+ *
+ * The string may begin with an arbitrary amount of white space
+ * (as determined by **isspace**\ (3)) followed by a single
+ * optional '**-**' sign.
+ *
+ * Five least significant bits of *flags* encode base, other bits
+ * are currently unused.
+ *
+ * Base must be either 8, 10, 16 or 0 to detect it automatically
+ * similar to user space **strtol**\ (3).
+ *
+ * Returns
+ * Number of characters consumed on success. Must be positive but
+ * no more than *buf_len*.
+ *
+ * **-EINVAL** if no valid digits were found or unsupported base
+ * was provided.
+ *
+ * **-ERANGE** if resulting value was out of range.
+ */
+static int (*bpf_strtol)(const char *buf, unsigned long buf_len, __u64 flags, long *res) = (void *) 105;
+
+/*
+ * bpf_strtoul
+ *
+ * Convert the initial part of the string from buffer *buf* of
+ * size *buf_len* to an unsigned long integer according to the
+ * given base and save the result in *res*.
+ *
+ * The string may begin with an arbitrary amount of white space
+ * (as determined by **isspace**\ (3)).
+ *
+ * Five least significant bits of *flags* encode base, other bits
+ * are currently unused.
+ *
+ * Base must be either 8, 10, 16 or 0 to detect it automatically
+ * similar to user space **strtoul**\ (3).
+ *
+ * Returns
+ * Number of characters consumed on success. Must be positive but
+ * no more than *buf_len*.
+ *
+ * **-EINVAL** if no valid digits were found or unsupported base
+ * was provided.
+ *
+ * **-ERANGE** if resulting value was out of range.
+ */
+static int (*bpf_strtoul)(const char *buf, unsigned long buf_len, __u64 flags, unsigned long *res) = (void *) 106;
+
+/*
+ * bpf_sk_storage_get
+ *
+ * Get a bpf-local-storage from a *sk*.
+ *
+ * Logically, it could be thought of getting the value from
+ * a *map* with *sk* as the **key**. From this
+ * perspective, the usage is not much different from
+ * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this
+ * helper enforces the key must be a full socket and the map must
+ * be a **BPF_MAP_TYPE_SK_STORAGE** also.
+ *
+ * Underneath, the value is stored locally at *sk* instead of
+ * the *map*. The *map* is used as the bpf-local-storage
+ * "type". The bpf-local-storage "type" (i.e. the *map*) is
+ * searched against all bpf-local-storages residing at *sk*.
+ *
+ * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be
+ * used such that a new bpf-local-storage will be
+ * created if one does not exist. *value* can be used
+ * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify
+ * the initial value of a bpf-local-storage. If *value* is
+ * **NULL**, the new bpf-local-storage will be zero initialized.
+ *
+ * Returns
+ * A bpf-local-storage pointer is returned on success.
+ *
+ * **NULL** if not found or there was an error in adding
+ * a new bpf-local-storage.
+ */
+static void *(*bpf_sk_storage_get)(void *map, struct bpf_sock *sk, void *value, __u64 flags) = (void *) 107;
+
+/*
+ * bpf_sk_storage_delete
+ *
+ * Delete a bpf-local-storage from a *sk*.
+ *
+ * Returns
+ * 0 on success.
+ *
+ * **-ENOENT** if the bpf-local-storage cannot be found.
+ */
+static int (*bpf_sk_storage_delete)(void *map, struct bpf_sock *sk) = (void *) 108;
+
+/*
+ * bpf_send_signal
+ *
+ * Send signal *sig* to the current task.
+ *
+ * Returns
+ * 0 on success or successfully queued.
+ *
+ * **-EBUSY** if work queue under nmi is full.
+ *
+ * **-EINVAL** if *sig* is invalid.
+ *
+ * **-EPERM** if no permission to send the *sig*.
+ *
+ * **-EAGAIN** if bpf program can try again.
+ */
+static int (*bpf_send_signal)(__u32 sig) = (void *) 109;
+
+/*
+ * bpf_tcp_gen_syncookie
+ *
+ * Try to issue a SYN cookie for the packet with corresponding
+ * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
+ *
+ * *iph* points to the start of the IPv4 or IPv6 header, while
+ * *iph_len* contains **sizeof**\ (**struct iphdr**) or
+ * **sizeof**\ (**struct ip6hdr**).
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains the length of the TCP header.
+ *
+ *
+ * Returns
+ * On success, lower 32 bits hold the generated SYN cookie in
+ * followed by 16 bits which hold the MSS value for that cookie,
+ * and the top 16 bits are unused.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** SYN cookie cannot be issued due to error
+ *
+ * **-ENOENT** SYN cookie should not be issued (no SYN flood)
+ *
+ * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
+ *
+ * **-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ */
+static __s64 (*bpf_tcp_gen_syncookie)(struct bpf_sock *sk, void *iph, __u32 iph_len, struct tcphdr *th, __u32 th_len) = (void *) 110;
+
+/*
+ * bpf_skb_output
+ *
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * *ctx* is a pointer to in-kernel struct sk_buff.
+ *
+ * This helper is similar to **bpf_perf_event_output**\ () but
+ * restricted to raw_tracepoint bpf programs.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_skb_output)(void *ctx, void *map, __u64 flags, void *data, __u64 size) = (void *) 111;
+
+/*
+ * bpf_probe_read_user
+ *
+ * Safely attempt to read *size* bytes from user space address
+ * *unsafe_ptr* and store the data in *dst*.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_probe_read_user)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 112;
+
+/*
+ * bpf_probe_read_kernel
+ *
+ * Safely attempt to read *size* bytes from kernel space address
+ * *unsafe_ptr* and store the data in *dst*.
+ *
+ * Returns
+ * 0 on success, or a negative error in case of failure.
+ */
+static int (*bpf_probe_read_kernel)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 113;
+
+/*
+ * bpf_probe_read_user_str
+ *
+ * Copy a NUL terminated string from an unsafe user address
+ * *unsafe_ptr* to *dst*. The *size* should include the
+ * terminating NUL byte. In case the string length is smaller than
+ * *size*, the target is not padded with further NUL bytes. If the
+ * string length is larger than *size*, just *size*-1 bytes are
+ * copied and the last byte is set to NUL.
+ *
+ * On success, the length of the copied string is returned. This
+ * makes this helper useful in tracing programs for reading
+ * strings, and more importantly to get its length at runtime. See
+ * the following snippet:
+ *
+ * ::
+ *
+ * SEC("kprobe/sys_open")
+ * void bpf_sys_open(struct pt_regs *ctx)
+ * {
+ * char buf[PATHLEN]; // PATHLEN is defined to 256
+ * int res = bpf_probe_read_user_str(buf, sizeof(buf),
+ * ctx->di);
+ *
+ * // Consume buf, for example push it to
+ * // userspace via bpf_perf_event_output(); we
+ * // can use res (the string length) as event
+ * // size, after checking its boundaries.
+ * }
+ *
+ * In comparison, using **bpf_probe_read_user()** helper here
+ * instead to read the string would require to estimate the length
+ * at compile time, and would often result in copying more memory
+ * than necessary.
+ *
+ * Another useful use case is when parsing individual process
+ * arguments or individual environment variables navigating
+ * *current*\ **->mm->arg_start** and *current*\
+ * **->mm->env_start**: using this helper and the return value,
+ * one can quickly iterate at the right offset of the memory area.
+ *
+ * Returns
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ */
+static int (*bpf_probe_read_user_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 114;
+
+/*
+ * bpf_probe_read_kernel_str
+ *
+ * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
+ * to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ *
+ * Returns
+ * On success, the strictly positive length of the string, including
+ * the trailing NUL character. On error, a negative value.
+ */
+static int (*bpf_probe_read_kernel_str)(void *dst, __u32 size, const void *unsafe_ptr) = (void *) 115;
+
+
diff --git a/src/contrib/libbpf/bpf/bpf_helpers.h b/src/contrib/libbpf/bpf/bpf_helpers.h
new file mode 100644
index 0000000..0c7d282
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf_helpers.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_HELPERS__
+#define __BPF_HELPERS__
+
+#include "bpf_helper_defs.h"
+
+#define __uint(name, val) int (*name)[val]
+#define __type(name, val) typeof(val) *name
+
+/* Helper macro to print out debug messages */
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+/*
+ * Helper macro to place programs, maps, license in
+ * different sections in elf_bpf file. Section names
+ * are interpreted by elf_bpf loader
+ */
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+#ifndef __always_inline
+#define __always_inline __attribute__((always_inline))
+#endif
+
+/*
+ * Helper structure used by eBPF C program
+ * to describe BPF map attributes to libbpf loader
+ */
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+ unsigned int map_flags;
+};
+
+enum libbpf_pin_type {
+ LIBBPF_PIN_NONE,
+ /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+ LIBBPF_PIN_BY_NAME,
+};
+
+#endif
diff --git a/src/contrib/libbpf/bpf/bpf_prog_linfo.c b/src/contrib/libbpf/bpf/bpf_prog_linfo.c
new file mode 100644
index 0000000..3ed1a27
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf_prog_linfo.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2018 Facebook */
+
+#include <string.h>
+#include <stdlib.h>
+#include <linux/err.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+struct bpf_prog_linfo {
+ void *raw_linfo;
+ void *raw_jited_linfo;
+ __u32 *nr_jited_linfo_per_func;
+ __u32 *jited_linfo_func_idx;
+ __u32 nr_linfo;
+ __u32 nr_jited_func;
+ __u32 rec_size;
+ __u32 jited_rec_size;
+};
+
+static int dissect_jited_func(struct bpf_prog_linfo *prog_linfo,
+ const __u64 *ksym_func, const __u32 *ksym_len)
+{
+ __u32 nr_jited_func, nr_linfo;
+ const void *raw_jited_linfo;
+ const __u64 *jited_linfo;
+ __u64 last_jited_linfo;
+ /*
+ * Index to raw_jited_linfo:
+ * i: Index for searching the next ksym_func
+ * prev_i: Index to the last found ksym_func
+ */
+ __u32 i, prev_i;
+ __u32 f; /* Index to ksym_func */
+
+ raw_jited_linfo = prog_linfo->raw_jited_linfo;
+ jited_linfo = raw_jited_linfo;
+ if (ksym_func[0] != *jited_linfo)
+ goto errout;
+
+ prog_linfo->jited_linfo_func_idx[0] = 0;
+ nr_jited_func = prog_linfo->nr_jited_func;
+ nr_linfo = prog_linfo->nr_linfo;
+
+ for (prev_i = 0, i = 1, f = 1;
+ i < nr_linfo && f < nr_jited_func;
+ i++) {
+ raw_jited_linfo += prog_linfo->jited_rec_size;
+ last_jited_linfo = *jited_linfo;
+ jited_linfo = raw_jited_linfo;
+
+ if (ksym_func[f] == *jited_linfo) {
+ prog_linfo->jited_linfo_func_idx[f] = i;
+
+ /* Sanity check */
+ if (last_jited_linfo - ksym_func[f - 1] + 1 >
+ ksym_len[f - 1])
+ goto errout;
+
+ prog_linfo->nr_jited_linfo_per_func[f - 1] =
+ i - prev_i;
+ prev_i = i;
+
+ /*
+ * The ksym_func[f] is found in jited_linfo.
+ * Look for the next one.
+ */
+ f++;
+ } else if (*jited_linfo <= last_jited_linfo) {
+ /* Ensure the addr is increasing _within_ a func */
+ goto errout;
+ }
+ }
+
+ if (f != nr_jited_func)
+ goto errout;
+
+ prog_linfo->nr_jited_linfo_per_func[nr_jited_func - 1] =
+ nr_linfo - prev_i;
+
+ return 0;
+
+errout:
+ return -EINVAL;
+}
+
+void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo)
+{
+ if (!prog_linfo)
+ return;
+
+ free(prog_linfo->raw_linfo);
+ free(prog_linfo->raw_jited_linfo);
+ free(prog_linfo->nr_jited_linfo_per_func);
+ free(prog_linfo->jited_linfo_func_idx);
+ free(prog_linfo);
+}
+
+struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
+{
+ struct bpf_prog_linfo *prog_linfo;
+ __u32 nr_linfo, nr_jited_func;
+ __u64 data_sz;
+
+ nr_linfo = info->nr_line_info;
+
+ if (!nr_linfo)
+ return NULL;
+
+ /*
+ * The min size that bpf_prog_linfo has to access for
+ * searching purpose.
+ */
+ if (info->line_info_rec_size <
+ offsetof(struct bpf_line_info, file_name_off))
+ return NULL;
+
+ prog_linfo = calloc(1, sizeof(*prog_linfo));
+ if (!prog_linfo)
+ return NULL;
+
+ /* Copy xlated line_info */
+ prog_linfo->nr_linfo = nr_linfo;
+ prog_linfo->rec_size = info->line_info_rec_size;
+ data_sz = (__u64)nr_linfo * prog_linfo->rec_size;
+ prog_linfo->raw_linfo = malloc(data_sz);
+ if (!prog_linfo->raw_linfo)
+ goto err_free;
+ memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, data_sz);
+
+ nr_jited_func = info->nr_jited_ksyms;
+ if (!nr_jited_func ||
+ !info->jited_line_info ||
+ info->nr_jited_line_info != nr_linfo ||
+ info->jited_line_info_rec_size < sizeof(__u64) ||
+ info->nr_jited_func_lens != nr_jited_func ||
+ !info->jited_ksyms ||
+ !info->jited_func_lens)
+ /* Not enough info to provide jited_line_info */
+ return prog_linfo;
+
+ /* Copy jited_line_info */
+ prog_linfo->nr_jited_func = nr_jited_func;
+ prog_linfo->jited_rec_size = info->jited_line_info_rec_size;
+ data_sz = (__u64)nr_linfo * prog_linfo->jited_rec_size;
+ prog_linfo->raw_jited_linfo = malloc(data_sz);
+ if (!prog_linfo->raw_jited_linfo)
+ goto err_free;
+ memcpy(prog_linfo->raw_jited_linfo,
+ (void *)(long)info->jited_line_info, data_sz);
+
+ /* Number of jited_line_info per jited func */
+ prog_linfo->nr_jited_linfo_per_func = malloc(nr_jited_func *
+ sizeof(__u32));
+ if (!prog_linfo->nr_jited_linfo_per_func)
+ goto err_free;
+
+ /*
+ * For each jited func,
+ * the start idx to the "linfo" and "jited_linfo" array,
+ */
+ prog_linfo->jited_linfo_func_idx = malloc(nr_jited_func *
+ sizeof(__u32));
+ if (!prog_linfo->jited_linfo_func_idx)
+ goto err_free;
+
+ if (dissect_jited_func(prog_linfo,
+ (__u64 *)(long)info->jited_ksyms,
+ (__u32 *)(long)info->jited_func_lens))
+ goto err_free;
+
+ return prog_linfo;
+
+err_free:
+ bpf_prog_linfo__free(prog_linfo);
+ return NULL;
+}
+
+const struct bpf_line_info *
+bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
+ __u64 addr, __u32 func_idx, __u32 nr_skip)
+{
+ __u32 jited_rec_size, rec_size, nr_linfo, start, i;
+ const void *raw_jited_linfo, *raw_linfo;
+ const __u64 *jited_linfo;
+
+ if (func_idx >= prog_linfo->nr_jited_func)
+ return NULL;
+
+ nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx];
+ if (nr_skip >= nr_linfo)
+ return NULL;
+
+ start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip;
+ jited_rec_size = prog_linfo->jited_rec_size;
+ raw_jited_linfo = prog_linfo->raw_jited_linfo +
+ (start * jited_rec_size);
+ jited_linfo = raw_jited_linfo;
+ if (addr < *jited_linfo)
+ return NULL;
+
+ nr_linfo -= nr_skip;
+ rec_size = prog_linfo->rec_size;
+ raw_linfo = prog_linfo->raw_linfo + (start * rec_size);
+ for (i = 0; i < nr_linfo; i++) {
+ if (addr < *jited_linfo)
+ break;
+
+ raw_linfo += rec_size;
+ raw_jited_linfo += jited_rec_size;
+ jited_linfo = raw_jited_linfo;
+ }
+
+ return raw_linfo - rec_size;
+}
+
+const struct bpf_line_info *
+bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
+ __u32 insn_off, __u32 nr_skip)
+{
+ const struct bpf_line_info *linfo;
+ __u32 rec_size, nr_linfo, i;
+ const void *raw_linfo;
+
+ nr_linfo = prog_linfo->nr_linfo;
+ if (nr_skip >= nr_linfo)
+ return NULL;
+
+ rec_size = prog_linfo->rec_size;
+ raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size);
+ linfo = raw_linfo;
+ if (insn_off < linfo->insn_off)
+ return NULL;
+
+ nr_linfo -= nr_skip;
+ for (i = 0; i < nr_linfo; i++) {
+ if (insn_off < linfo->insn_off)
+ break;
+
+ raw_linfo += rec_size;
+ linfo = raw_linfo;
+ }
+
+ return raw_linfo - rec_size;
+}
diff --git a/src/contrib/libbpf/bpf/bpf_tracing.h b/src/contrib/libbpf/bpf/bpf_tracing.h
new file mode 100644
index 0000000..b0dafe8
--- /dev/null
+++ b/src/contrib/libbpf/bpf/bpf_tracing.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_TRACING_H__
+#define __BPF_TRACING_H__
+
+/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
+#if defined(__TARGET_ARCH_x86)
+ #define bpf_target_x86
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_s390)
+ #define bpf_target_s390
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm)
+ #define bpf_target_arm
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm64)
+ #define bpf_target_arm64
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_mips)
+ #define bpf_target_mips
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_powerpc)
+ #define bpf_target_powerpc
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_sparc)
+ #define bpf_target_sparc
+ #define bpf_target_defined
+#else
+ #undef bpf_target_defined
+#endif
+
+/* Fall back to what the compiler says */
+#ifndef bpf_target_defined
+#if defined(__x86_64__)
+ #define bpf_target_x86
+#elif defined(__s390__)
+ #define bpf_target_s390
+#elif defined(__arm__)
+ #define bpf_target_arm
+#elif defined(__aarch64__)
+ #define bpf_target_arm64
+#elif defined(__mips__)
+ #define bpf_target_mips
+#elif defined(__powerpc__)
+ #define bpf_target_powerpc
+#elif defined(__sparc__)
+ #define bpf_target_sparc
+#endif
+#endif
+
+#if defined(bpf_target_x86)
+
+#ifdef __KERNEL__
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->ip)
+#else
+#ifdef __i386__
+/* i386 kernel is built with -mregparm=3 */
+#define PT_REGS_PARM1(x) ((x)->eax)
+#define PT_REGS_PARM2(x) ((x)->edx)
+#define PT_REGS_PARM3(x) ((x)->ecx)
+#define PT_REGS_PARM4(x) 0
+#define PT_REGS_PARM5(x) 0
+#define PT_REGS_RET(x) ((x)->esp)
+#define PT_REGS_FP(x) ((x)->ebp)
+#define PT_REGS_RC(x) ((x)->eax)
+#define PT_REGS_SP(x) ((x)->esp)
+#define PT_REGS_IP(x) ((x)->eip)
+#else
+#define PT_REGS_PARM1(x) ((x)->rdi)
+#define PT_REGS_PARM2(x) ((x)->rsi)
+#define PT_REGS_PARM3(x) ((x)->rdx)
+#define PT_REGS_PARM4(x) ((x)->rcx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->rsp)
+#define PT_REGS_FP(x) ((x)->rbp)
+#define PT_REGS_RC(x) ((x)->rax)
+#define PT_REGS_SP(x) ((x)->rsp)
+#define PT_REGS_IP(x) ((x)->rip)
+#endif
+#endif
+
+#elif defined(bpf_target_s390)
+
+/* s390 provides user_pt_regs instead of struct pt_regs to userspace */
+struct pt_regs;
+#define PT_REGS_S390 const volatile user_pt_regs
+#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2])
+#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3])
+#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4])
+#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5])
+#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6])
+#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14])
+/* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11])
+#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2])
+#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15])
+#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr)
+
+#elif defined(bpf_target_arm)
+
+#define PT_REGS_PARM1(x) ((x)->uregs[0])
+#define PT_REGS_PARM2(x) ((x)->uregs[1])
+#define PT_REGS_PARM3(x) ((x)->uregs[2])
+#define PT_REGS_PARM4(x) ((x)->uregs[3])
+#define PT_REGS_PARM5(x) ((x)->uregs[4])
+#define PT_REGS_RET(x) ((x)->uregs[14])
+#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->uregs[0])
+#define PT_REGS_SP(x) ((x)->uregs[13])
+#define PT_REGS_IP(x) ((x)->uregs[12])
+
+#elif defined(bpf_target_arm64)
+
+/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
+struct pt_regs;
+#define PT_REGS_ARM64 const volatile struct user_pt_regs
+#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0])
+#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1])
+#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2])
+#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3])
+#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4])
+#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30])
+/* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29])
+#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0])
+#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp)
+#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc)
+
+#elif defined(bpf_target_mips)
+
+#define PT_REGS_PARM1(x) ((x)->regs[4])
+#define PT_REGS_PARM2(x) ((x)->regs[5])
+#define PT_REGS_PARM3(x) ((x)->regs[6])
+#define PT_REGS_PARM4(x) ((x)->regs[7])
+#define PT_REGS_PARM5(x) ((x)->regs[8])
+#define PT_REGS_RET(x) ((x)->regs[31])
+#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[1])
+#define PT_REGS_SP(x) ((x)->regs[29])
+#define PT_REGS_IP(x) ((x)->cp0_epc)
+
+#elif defined(bpf_target_powerpc)
+
+#define PT_REGS_PARM1(x) ((x)->gpr[3])
+#define PT_REGS_PARM2(x) ((x)->gpr[4])
+#define PT_REGS_PARM3(x) ((x)->gpr[5])
+#define PT_REGS_PARM4(x) ((x)->gpr[6])
+#define PT_REGS_PARM5(x) ((x)->gpr[7])
+#define PT_REGS_RC(x) ((x)->gpr[3])
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->nip)
+
+#elif defined(bpf_target_sparc)
+
+#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
+#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
+#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
+#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
+#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
+#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
+
+/* Should this also be a bpf_target check for the sparc case? */
+#if defined(__arch64__)
+#define PT_REGS_IP(x) ((x)->tpc)
+#else
+#define PT_REGS_IP(x) ((x)->pc)
+#endif
+
+#endif
+
+#if defined(bpf_target_powerpc)
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; })
+#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
+#elif defined(bpf_target_sparc)
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); })
+#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
+#else
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) \
+ ({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) \
+ ({ bpf_probe_read(&(ip), sizeof(ip), \
+ (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+#endif
+
+#endif
diff --git a/src/contrib/libbpf/bpf/btf.c b/src/contrib/libbpf/bpf/btf.c
new file mode 100644
index 0000000..88efa2b
--- /dev/null
+++ b/src/contrib/libbpf/bpf/btf.c
@@ -0,0 +1,2884 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2018 Facebook */
+
+#include <endian.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include <gelf.h>
+#include "btf.h"
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "hashmap.h"
+
+#define BTF_MAX_NR_TYPES 0x7fffffff
+#define BTF_MAX_STR_OFFSET 0x7fffffff
+
+static struct btf_type btf_void;
+
+struct btf {
+ union {
+ struct btf_header *hdr;
+ void *data;
+ };
+ struct btf_type **types;
+ const char *strings;
+ void *nohdr_data;
+ __u32 nr_types;
+ __u32 types_size;
+ __u32 data_size;
+ int fd;
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+static int btf_add_type(struct btf *btf, struct btf_type *t)
+{
+ if (btf->types_size - btf->nr_types < 2) {
+ struct btf_type **new_types;
+ __u32 expand_by, new_size;
+
+ if (btf->types_size == BTF_MAX_NR_TYPES)
+ return -E2BIG;
+
+ expand_by = max(btf->types_size >> 2, 16);
+ new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
+
+ new_types = realloc(btf->types, sizeof(*new_types) * new_size);
+ if (!new_types)
+ return -ENOMEM;
+
+ if (btf->nr_types == 0)
+ new_types[0] = &btf_void;
+
+ btf->types = new_types;
+ btf->types_size = new_size;
+ }
+
+ btf->types[++(btf->nr_types)] = t;
+
+ return 0;
+}
+
+static int btf_parse_hdr(struct btf *btf)
+{
+ const struct btf_header *hdr = btf->hdr;
+ __u32 meta_left;
+
+ if (btf->data_size < sizeof(struct btf_header)) {
+ pr_debug("BTF header not found\n");
+ return -EINVAL;
+ }
+
+ if (hdr->magic != BTF_MAGIC) {
+ pr_debug("Invalid BTF magic:%x\n", hdr->magic);
+ return -EINVAL;
+ }
+
+ if (hdr->version != BTF_VERSION) {
+ pr_debug("Unsupported BTF version:%u\n", hdr->version);
+ return -ENOTSUP;
+ }
+
+ if (hdr->flags) {
+ pr_debug("Unsupported BTF flags:%x\n", hdr->flags);
+ return -ENOTSUP;
+ }
+
+ meta_left = btf->data_size - sizeof(*hdr);
+ if (!meta_left) {
+ pr_debug("BTF has no data\n");
+ return -EINVAL;
+ }
+
+ if (meta_left < hdr->type_off) {
+ pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off);
+ return -EINVAL;
+ }
+
+ if (meta_left < hdr->str_off) {
+ pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off);
+ return -EINVAL;
+ }
+
+ if (hdr->type_off >= hdr->str_off) {
+ pr_debug("BTF type section offset >= string section offset. No type?\n");
+ return -EINVAL;
+ }
+
+ if (hdr->type_off & 0x02) {
+ pr_debug("BTF type section is not aligned to 4 bytes\n");
+ return -EINVAL;
+ }
+
+ btf->nohdr_data = btf->hdr + 1;
+
+ return 0;
+}
+
+static int btf_parse_str_sec(struct btf *btf)
+{
+ const struct btf_header *hdr = btf->hdr;
+ const char *start = btf->nohdr_data + hdr->str_off;
+ const char *end = start + btf->hdr->str_len;
+
+ if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET ||
+ start[0] || end[-1]) {
+ pr_debug("Invalid BTF string section\n");
+ return -EINVAL;
+ }
+
+ btf->strings = start;
+
+ return 0;
+}
+
+static int btf_type_size(struct btf_type *t)
+{
+ int base_size = sizeof(struct btf_type);
+ __u16 vlen = btf_vlen(t);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_FWD:
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ return base_size;
+ case BTF_KIND_INT:
+ return base_size + sizeof(__u32);
+ case BTF_KIND_ENUM:
+ return base_size + vlen * sizeof(struct btf_enum);
+ case BTF_KIND_ARRAY:
+ return base_size + sizeof(struct btf_array);
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ return base_size + vlen * sizeof(struct btf_member);
+ case BTF_KIND_FUNC_PROTO:
+ return base_size + vlen * sizeof(struct btf_param);
+ case BTF_KIND_VAR:
+ return base_size + sizeof(struct btf_var);
+ case BTF_KIND_DATASEC:
+ return base_size + vlen * sizeof(struct btf_var_secinfo);
+ default:
+ pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
+ return -EINVAL;
+ }
+}
+
+static int btf_parse_type_sec(struct btf *btf)
+{
+ struct btf_header *hdr = btf->hdr;
+ void *nohdr_data = btf->nohdr_data;
+ void *next_type = nohdr_data + hdr->type_off;
+ void *end_type = nohdr_data + hdr->str_off;
+
+ while (next_type < end_type) {
+ struct btf_type *t = next_type;
+ int type_size;
+ int err;
+
+ type_size = btf_type_size(t);
+ if (type_size < 0)
+ return type_size;
+ next_type += type_size;
+ err = btf_add_type(btf, t);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+__u32 btf__get_nr_types(const struct btf *btf)
+{
+ return btf->nr_types;
+}
+
+const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
+{
+ if (type_id > btf->nr_types)
+ return NULL;
+
+ return btf->types[type_id];
+}
+
+static bool btf_type_is_void(const struct btf_type *t)
+{
+ return t == &btf_void || btf_is_fwd(t);
+}
+
+static bool btf_type_is_void_or_null(const struct btf_type *t)
+{
+ return !t || btf_type_is_void(t);
+}
+
+#define MAX_RESOLVE_DEPTH 32
+
+__s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
+{
+ const struct btf_array *array;
+ const struct btf_type *t;
+ __u32 nelems = 1;
+ __s64 size = -1;
+ int i;
+
+ t = btf__type_by_id(btf, type_id);
+ for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
+ i++) {
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_DATASEC:
+ size = t->size;
+ goto done;
+ case BTF_KIND_PTR:
+ size = sizeof(void *);
+ goto done;
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_VAR:
+ type_id = t->type;
+ break;
+ case BTF_KIND_ARRAY:
+ array = btf_array(t);
+ if (nelems && array->nelems > UINT32_MAX / nelems)
+ return -E2BIG;
+ nelems *= array->nelems;
+ type_id = array->type;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ t = btf__type_by_id(btf, type_id);
+ }
+
+done:
+ if (size < 0)
+ return -EINVAL;
+ if (nelems && size > UINT32_MAX / nelems)
+ return -E2BIG;
+
+ return nelems * size;
+}
+
+int btf__resolve_type(const struct btf *btf, __u32 type_id)
+{
+ const struct btf_type *t;
+ int depth = 0;
+
+ t = btf__type_by_id(btf, type_id);
+ while (depth < MAX_RESOLVE_DEPTH &&
+ !btf_type_is_void_or_null(t) &&
+ (btf_is_mod(t) || btf_is_typedef(t) || btf_is_var(t))) {
+ type_id = t->type;
+ t = btf__type_by_id(btf, type_id);
+ depth++;
+ }
+
+ if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
+ return -EINVAL;
+
+ return type_id;
+}
+
+__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
+{
+ __u32 i;
+
+ if (!strcmp(type_name, "void"))
+ return 0;
+
+ for (i = 1; i <= btf->nr_types; i++) {
+ const struct btf_type *t = btf->types[i];
+ const char *name = btf__name_by_offset(btf, t->name_off);
+
+ if (name && !strcmp(type_name, name))
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
+ __u32 kind)
+{
+ __u32 i;
+
+ if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void"))
+ return 0;
+
+ for (i = 1; i <= btf->nr_types; i++) {
+ const struct btf_type *t = btf->types[i];
+ const char *name;
+
+ if (btf_kind(t) != kind)
+ continue;
+ name = btf__name_by_offset(btf, t->name_off);
+ if (name && !strcmp(type_name, name))
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+void btf__free(struct btf *btf)
+{
+ if (!btf)
+ return;
+
+ if (btf->fd != -1)
+ close(btf->fd);
+
+ free(btf->data);
+ free(btf->types);
+ free(btf);
+}
+
+struct btf *btf__new(__u8 *data, __u32 size)
+{
+ struct btf *btf;
+ int err;
+
+ btf = calloc(1, sizeof(struct btf));
+ if (!btf)
+ return ERR_PTR(-ENOMEM);
+
+ btf->fd = -1;
+
+ btf->data = malloc(size);
+ if (!btf->data) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ memcpy(btf->data, data, size);
+ btf->data_size = size;
+
+ err = btf_parse_hdr(btf);
+ if (err)
+ goto done;
+
+ err = btf_parse_str_sec(btf);
+ if (err)
+ goto done;
+
+ err = btf_parse_type_sec(btf);
+
+done:
+ if (err) {
+ btf__free(btf);
+ return ERR_PTR(err);
+ }
+
+ return btf;
+}
+
+static bool btf_check_endianness(const GElf_Ehdr *ehdr)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ return ehdr->e_ident[EI_DATA] == ELFDATA2LSB;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ return ehdr->e_ident[EI_DATA] == ELFDATA2MSB;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+}
+
+struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
+{
+ Elf_Data *btf_data = NULL, *btf_ext_data = NULL;
+ int err = 0, fd = -1, idx = 0;
+ struct btf *btf = NULL;
+ Elf_Scn *scn = NULL;
+ Elf *elf = NULL;
+ GElf_Ehdr ehdr;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ pr_warn("failed to init libelf for %s\n", path);
+ return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
+ }
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ err = -errno;
+ pr_warn("failed to open %s: %s\n", path, strerror(errno));
+ return ERR_PTR(err);
+ }
+
+ err = -LIBBPF_ERRNO__FORMAT;
+
+ elf = elf_begin(fd, ELF_C_READ, NULL);
+ if (!elf) {
+ pr_warn("failed to open %s as ELF file\n", path);
+ goto done;
+ }
+ if (!gelf_getehdr(elf, &ehdr)) {
+ pr_warn("failed to get EHDR from %s\n", path);
+ goto done;
+ }
+ if (!btf_check_endianness(&ehdr)) {
+ pr_warn("non-native ELF endianness is not supported\n");
+ goto done;
+ }
+ if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) {
+ pr_warn("failed to get e_shstrndx from %s\n", path);
+ goto done;
+ }
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ GElf_Shdr sh;
+ char *name;
+
+ idx++;
+ if (gelf_getshdr(scn, &sh) != &sh) {
+ pr_warn("failed to get section(%d) header from %s\n",
+ idx, path);
+ goto done;
+ }
+ name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
+ if (!name) {
+ pr_warn("failed to get section(%d) name from %s\n",
+ idx, path);
+ goto done;
+ }
+ if (strcmp(name, BTF_ELF_SEC) == 0) {
+ btf_data = elf_getdata(scn, 0);
+ if (!btf_data) {
+ pr_warn("failed to get section(%d, %s) data from %s\n",
+ idx, name, path);
+ goto done;
+ }
+ continue;
+ } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) {
+ btf_ext_data = elf_getdata(scn, 0);
+ if (!btf_ext_data) {
+ pr_warn("failed to get section(%d, %s) data from %s\n",
+ idx, name, path);
+ goto done;
+ }
+ continue;
+ }
+ }
+
+ err = 0;
+
+ if (!btf_data) {
+ err = -ENOENT;
+ goto done;
+ }
+ btf = btf__new(btf_data->d_buf, btf_data->d_size);
+ if (IS_ERR(btf))
+ goto done;
+
+ if (btf_ext && btf_ext_data) {
+ *btf_ext = btf_ext__new(btf_ext_data->d_buf,
+ btf_ext_data->d_size);
+ if (IS_ERR(*btf_ext))
+ goto done;
+ } else if (btf_ext) {
+ *btf_ext = NULL;
+ }
+done:
+ if (elf)
+ elf_end(elf);
+ close(fd);
+
+ if (err)
+ return ERR_PTR(err);
+ /*
+ * btf is always parsed before btf_ext, so no need to clean up
+ * btf_ext, if btf loading failed
+ */
+ if (IS_ERR(btf))
+ return btf;
+ if (btf_ext && IS_ERR(*btf_ext)) {
+ btf__free(btf);
+ err = PTR_ERR(*btf_ext);
+ return ERR_PTR(err);
+ }
+ return btf;
+}
+
+static int compare_vsi_off(const void *_a, const void *_b)
+{
+ const struct btf_var_secinfo *a = _a;
+ const struct btf_var_secinfo *b = _b;
+
+ return a->offset - b->offset;
+}
+
+static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
+ struct btf_type *t)
+{
+ __u32 size = 0, off = 0, i, vars = btf_vlen(t);
+ const char *name = btf__name_by_offset(btf, t->name_off);
+ const struct btf_type *t_var;
+ struct btf_var_secinfo *vsi;
+ const struct btf_var *var;
+ int ret;
+
+ if (!name) {
+ pr_debug("No name found in string section for DATASEC kind.\n");
+ return -ENOENT;
+ }
+
+ ret = bpf_object__section_size(obj, name, &size);
+ if (ret || !size || (t->size && t->size != size)) {
+ pr_debug("Invalid size for section %s: %u bytes\n", name, size);
+ return -ENOENT;
+ }
+
+ t->size = size;
+
+ for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
+ t_var = btf__type_by_id(btf, vsi->type);
+ var = btf_var(t_var);
+
+ if (!btf_is_var(t_var)) {
+ pr_debug("Non-VAR type seen in section %s\n", name);
+ return -EINVAL;
+ }
+
+ if (var->linkage == BTF_VAR_STATIC)
+ continue;
+
+ name = btf__name_by_offset(btf, t_var->name_off);
+ if (!name) {
+ pr_debug("No name found in string section for VAR kind\n");
+ return -ENOENT;
+ }
+
+ ret = bpf_object__variable_offset(obj, name, &off);
+ if (ret) {
+ pr_debug("No offset found in symbol table for VAR %s\n",
+ name);
+ return -ENOENT;
+ }
+
+ vsi->offset = off;
+ }
+
+ qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off);
+ return 0;
+}
+
+int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
+{
+ int err = 0;
+ __u32 i;
+
+ for (i = 1; i <= btf->nr_types; i++) {
+ struct btf_type *t = btf->types[i];
+
+ /* Loader needs to fix up some of the things compiler
+ * couldn't get its hands on while emitting BTF. This
+ * is section size and global variable offset. We use
+ * the info from the ELF itself for this purpose.
+ */
+ if (btf_is_datasec(t)) {
+ err = btf_fixup_datasec(obj, btf, t);
+ if (err)
+ break;
+ }
+ }
+
+ return err;
+}
+
+int btf__load(struct btf *btf)
+{
+ __u32 log_buf_size = BPF_LOG_BUF_SIZE;
+ char *log_buf = NULL;
+ int err = 0;
+
+ if (btf->fd >= 0)
+ return -EEXIST;
+
+ log_buf = malloc(log_buf_size);
+ if (!log_buf)
+ return -ENOMEM;
+
+ *log_buf = 0;
+
+ btf->fd = bpf_load_btf(btf->data, btf->data_size,
+ log_buf, log_buf_size, false);
+ if (btf->fd < 0) {
+ err = -errno;
+ pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno);
+ if (*log_buf)
+ pr_warn("%s\n", log_buf);
+ goto done;
+ }
+
+done:
+ free(log_buf);
+ return err;
+}
+
+int btf__fd(const struct btf *btf)
+{
+ return btf->fd;
+}
+
+const void *btf__get_raw_data(const struct btf *btf, __u32 *size)
+{
+ *size = btf->data_size;
+ return btf->data;
+}
+
+const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
+{
+ if (offset < btf->hdr->str_len)
+ return &btf->strings[offset];
+ else
+ return NULL;
+}
+
+int btf__get_from_id(__u32 id, struct btf **btf)
+{
+ struct bpf_btf_info btf_info = { 0 };
+ __u32 len = sizeof(btf_info);
+ __u32 last_size;
+ int btf_fd;
+ void *ptr;
+ int err;
+
+ err = 0;
+ *btf = NULL;
+ btf_fd = bpf_btf_get_fd_by_id(id);
+ if (btf_fd < 0)
+ return 0;
+
+ /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
+ * let's start with a sane default - 4KiB here - and resize it only if
+ * bpf_obj_get_info_by_fd() needs a bigger buffer.
+ */
+ btf_info.btf_size = 4096;
+ last_size = btf_info.btf_size;
+ ptr = malloc(last_size);
+ if (!ptr) {
+ err = -ENOMEM;
+ goto exit_free;
+ }
+
+ memset(ptr, 0, last_size);
+ btf_info.btf = ptr_to_u64(ptr);
+ err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+
+ if (!err && btf_info.btf_size > last_size) {
+ void *temp_ptr;
+
+ last_size = btf_info.btf_size;
+ temp_ptr = realloc(ptr, last_size);
+ if (!temp_ptr) {
+ err = -ENOMEM;
+ goto exit_free;
+ }
+ ptr = temp_ptr;
+ memset(ptr, 0, last_size);
+ btf_info.btf = ptr_to_u64(ptr);
+ err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+ }
+
+ if (err || btf_info.btf_size > last_size) {
+ err = errno;
+ goto exit_free;
+ }
+
+ *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size);
+ if (IS_ERR(*btf)) {
+ err = PTR_ERR(*btf);
+ *btf = NULL;
+ }
+
+exit_free:
+ close(btf_fd);
+ free(ptr);
+
+ return err;
+}
+
+int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
+ __u32 expected_key_size, __u32 expected_value_size,
+ __u32 *key_type_id, __u32 *value_type_id)
+{
+ const struct btf_type *container_type;
+ const struct btf_member *key, *value;
+ const size_t max_name = 256;
+ char container_name[max_name];
+ __s64 key_size, value_size;
+ __s32 container_id;
+
+ if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
+ max_name) {
+ pr_warn("map:%s length of '____btf_map_%s' is too long\n",
+ map_name, map_name);
+ return -EINVAL;
+ }
+
+ container_id = btf__find_by_name(btf, container_name);
+ if (container_id < 0) {
+ pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
+ map_name, container_name);
+ return container_id;
+ }
+
+ container_type = btf__type_by_id(btf, container_id);
+ if (!container_type) {
+ pr_warn("map:%s cannot find BTF type for container_id:%u\n",
+ map_name, container_id);
+ return -EINVAL;
+ }
+
+ if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
+ pr_warn("map:%s container_name:%s is an invalid container struct\n",
+ map_name, container_name);
+ return -EINVAL;
+ }
+
+ key = btf_members(container_type);
+ value = key + 1;
+
+ key_size = btf__resolve_size(btf, key->type);
+ if (key_size < 0) {
+ pr_warn("map:%s invalid BTF key_type_size\n", map_name);
+ return key_size;
+ }
+
+ if (expected_key_size != key_size) {
+ pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
+ map_name, (__u32)key_size, expected_key_size);
+ return -EINVAL;
+ }
+
+ value_size = btf__resolve_size(btf, value->type);
+ if (value_size < 0) {
+ pr_warn("map:%s invalid BTF value_type_size\n", map_name);
+ return value_size;
+ }
+
+ if (expected_value_size != value_size) {
+ pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
+ map_name, (__u32)value_size, expected_value_size);
+ return -EINVAL;
+ }
+
+ *key_type_id = key->type;
+ *value_type_id = value->type;
+
+ return 0;
+}
+
+struct btf_ext_sec_setup_param {
+ __u32 off;
+ __u32 len;
+ __u32 min_rec_size;
+ struct btf_ext_info *ext_info;
+ const char *desc;
+};
+
+static int btf_ext_setup_info(struct btf_ext *btf_ext,
+ struct btf_ext_sec_setup_param *ext_sec)
+{
+ const struct btf_ext_info_sec *sinfo;
+ struct btf_ext_info *ext_info;
+ __u32 info_left, record_size;
+ /* The start of the info sec (including the __u32 record_size). */
+ void *info;
+
+ if (ext_sec->len == 0)
+ return 0;
+
+ if (ext_sec->off & 0x03) {
+ pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n",
+ ext_sec->desc);
+ return -EINVAL;
+ }
+
+ info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off;
+ info_left = ext_sec->len;
+
+ if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) {
+ pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n",
+ ext_sec->desc, ext_sec->off, ext_sec->len);
+ return -EINVAL;
+ }
+
+ /* At least a record size */
+ if (info_left < sizeof(__u32)) {
+ pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc);
+ return -EINVAL;
+ }
+
+ /* The record size needs to meet the minimum standard */
+ record_size = *(__u32 *)info;
+ if (record_size < ext_sec->min_rec_size ||
+ record_size & 0x03) {
+ pr_debug("%s section in .BTF.ext has invalid record size %u\n",
+ ext_sec->desc, record_size);
+ return -EINVAL;
+ }
+
+ sinfo = info + sizeof(__u32);
+ info_left -= sizeof(__u32);
+
+ /* If no records, return failure now so .BTF.ext won't be used. */
+ if (!info_left) {
+ pr_debug("%s section in .BTF.ext has no records", ext_sec->desc);
+ return -EINVAL;
+ }
+
+ while (info_left) {
+ unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec);
+ __u64 total_record_size;
+ __u32 num_records;
+
+ if (info_left < sec_hdrlen) {
+ pr_debug("%s section header is not found in .BTF.ext\n",
+ ext_sec->desc);
+ return -EINVAL;
+ }
+
+ num_records = sinfo->num_info;
+ if (num_records == 0) {
+ pr_debug("%s section has incorrect num_records in .BTF.ext\n",
+ ext_sec->desc);
+ return -EINVAL;
+ }
+
+ total_record_size = sec_hdrlen +
+ (__u64)num_records * record_size;
+ if (info_left < total_record_size) {
+ pr_debug("%s section has incorrect num_records in .BTF.ext\n",
+ ext_sec->desc);
+ return -EINVAL;
+ }
+
+ info_left -= total_record_size;
+ sinfo = (void *)sinfo + total_record_size;
+ }
+
+ ext_info = ext_sec->ext_info;
+ ext_info->len = ext_sec->len - sizeof(__u32);
+ ext_info->rec_size = record_size;
+ ext_info->info = info + sizeof(__u32);
+
+ return 0;
+}
+
+static int btf_ext_setup_func_info(struct btf_ext *btf_ext)
+{
+ struct btf_ext_sec_setup_param param = {
+ .off = btf_ext->hdr->func_info_off,
+ .len = btf_ext->hdr->func_info_len,
+ .min_rec_size = sizeof(struct bpf_func_info_min),
+ .ext_info = &btf_ext->func_info,
+ .desc = "func_info"
+ };
+
+ return btf_ext_setup_info(btf_ext, &param);
+}
+
+static int btf_ext_setup_line_info(struct btf_ext *btf_ext)
+{
+ struct btf_ext_sec_setup_param param = {
+ .off = btf_ext->hdr->line_info_off,
+ .len = btf_ext->hdr->line_info_len,
+ .min_rec_size = sizeof(struct bpf_line_info_min),
+ .ext_info = &btf_ext->line_info,
+ .desc = "line_info",
+ };
+
+ return btf_ext_setup_info(btf_ext, &param);
+}
+
+static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext)
+{
+ struct btf_ext_sec_setup_param param = {
+ .off = btf_ext->hdr->field_reloc_off,
+ .len = btf_ext->hdr->field_reloc_len,
+ .min_rec_size = sizeof(struct bpf_field_reloc),
+ .ext_info = &btf_ext->field_reloc_info,
+ .desc = "field_reloc",
+ };
+
+ return btf_ext_setup_info(btf_ext, &param);
+}
+
+static int btf_ext_parse_hdr(__u8 *data, __u32 data_size)
+{
+ const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
+
+ if (data_size < offsetofend(struct btf_ext_header, hdr_len) ||
+ data_size < hdr->hdr_len) {
+ pr_debug("BTF.ext header not found");
+ return -EINVAL;
+ }
+
+ if (hdr->magic != BTF_MAGIC) {
+ pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic);
+ return -EINVAL;
+ }
+
+ if (hdr->version != BTF_VERSION) {
+ pr_debug("Unsupported BTF.ext version:%u\n", hdr->version);
+ return -ENOTSUP;
+ }
+
+ if (hdr->flags) {
+ pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags);
+ return -ENOTSUP;
+ }
+
+ if (data_size == hdr->hdr_len) {
+ pr_debug("BTF.ext has no data\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void btf_ext__free(struct btf_ext *btf_ext)
+{
+ if (!btf_ext)
+ return;
+ free(btf_ext->data);
+ free(btf_ext);
+}
+
+struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
+{
+ struct btf_ext *btf_ext;
+ int err;
+
+ err = btf_ext_parse_hdr(data, size);
+ if (err)
+ return ERR_PTR(err);
+
+ btf_ext = calloc(1, sizeof(struct btf_ext));
+ if (!btf_ext)
+ return ERR_PTR(-ENOMEM);
+
+ btf_ext->data_size = size;
+ btf_ext->data = malloc(size);
+ if (!btf_ext->data) {
+ err = -ENOMEM;
+ goto done;
+ }
+ memcpy(btf_ext->data, data, size);
+
+ if (btf_ext->hdr->hdr_len <
+ offsetofend(struct btf_ext_header, line_info_len))
+ goto done;
+ err = btf_ext_setup_func_info(btf_ext);
+ if (err)
+ goto done;
+
+ err = btf_ext_setup_line_info(btf_ext);
+ if (err)
+ goto done;
+
+ if (btf_ext->hdr->hdr_len <
+ offsetofend(struct btf_ext_header, field_reloc_len))
+ goto done;
+ err = btf_ext_setup_field_reloc(btf_ext);
+ if (err)
+ goto done;
+
+done:
+ if (err) {
+ btf_ext__free(btf_ext);
+ return ERR_PTR(err);
+ }
+
+ return btf_ext;
+}
+
+const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size)
+{
+ *size = btf_ext->data_size;
+ return btf_ext->data;
+}
+
+static int btf_ext_reloc_info(const struct btf *btf,
+ const struct btf_ext_info *ext_info,
+ const char *sec_name, __u32 insns_cnt,
+ void **info, __u32 *cnt)
+{
+ __u32 sec_hdrlen = sizeof(struct btf_ext_info_sec);
+ __u32 i, record_size, existing_len, records_len;
+ struct btf_ext_info_sec *sinfo;
+ const char *info_sec_name;
+ __u64 remain_len;
+ void *data;
+
+ record_size = ext_info->rec_size;
+ sinfo = ext_info->info;
+ remain_len = ext_info->len;
+ while (remain_len > 0) {
+ records_len = sinfo->num_info * record_size;
+ info_sec_name = btf__name_by_offset(btf, sinfo->sec_name_off);
+ if (strcmp(info_sec_name, sec_name)) {
+ remain_len -= sec_hdrlen + records_len;
+ sinfo = (void *)sinfo + sec_hdrlen + records_len;
+ continue;
+ }
+
+ existing_len = (*cnt) * record_size;
+ data = realloc(*info, existing_len + records_len);
+ if (!data)
+ return -ENOMEM;
+
+ memcpy(data + existing_len, sinfo->data, records_len);
+ /* adjust insn_off only, the rest data will be passed
+ * to the kernel.
+ */
+ for (i = 0; i < sinfo->num_info; i++) {
+ __u32 *insn_off;
+
+ insn_off = data + existing_len + (i * record_size);
+ *insn_off = *insn_off / sizeof(struct bpf_insn) +
+ insns_cnt;
+ }
+ *info = data;
+ *cnt += sinfo->num_info;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+int btf_ext__reloc_func_info(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const char *sec_name, __u32 insns_cnt,
+ void **func_info, __u32 *cnt)
+{
+ return btf_ext_reloc_info(btf, &btf_ext->func_info, sec_name,
+ insns_cnt, func_info, cnt);
+}
+
+int btf_ext__reloc_line_info(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const char *sec_name, __u32 insns_cnt,
+ void **line_info, __u32 *cnt)
+{
+ return btf_ext_reloc_info(btf, &btf_ext->line_info, sec_name,
+ insns_cnt, line_info, cnt);
+}
+
+__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext)
+{
+ return btf_ext->func_info.rec_size;
+}
+
+__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)
+{
+ return btf_ext->line_info.rec_size;
+}
+
+struct btf_dedup;
+
+static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
+ const struct btf_dedup_opts *opts);
+static void btf_dedup_free(struct btf_dedup *d);
+static int btf_dedup_strings(struct btf_dedup *d);
+static int btf_dedup_prim_types(struct btf_dedup *d);
+static int btf_dedup_struct_types(struct btf_dedup *d);
+static int btf_dedup_ref_types(struct btf_dedup *d);
+static int btf_dedup_compact_types(struct btf_dedup *d);
+static int btf_dedup_remap_types(struct btf_dedup *d);
+
+/*
+ * Deduplicate BTF types and strings.
+ *
+ * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF
+ * section with all BTF type descriptors and string data. It overwrites that
+ * memory in-place with deduplicated types and strings without any loss of
+ * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section
+ * is provided, all the strings referenced from .BTF.ext section are honored
+ * and updated to point to the right offsets after deduplication.
+ *
+ * If function returns with error, type/string data might be garbled and should
+ * be discarded.
+ *
+ * More verbose and detailed description of both problem btf_dedup is solving,
+ * as well as solution could be found at:
+ * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html
+ *
+ * Problem description and justification
+ * =====================================
+ *
+ * BTF type information is typically emitted either as a result of conversion
+ * from DWARF to BTF or directly by compiler. In both cases, each compilation
+ * unit contains information about a subset of all the types that are used
+ * in an application. These subsets are frequently overlapping and contain a lot
+ * of duplicated information when later concatenated together into a single
+ * binary. This algorithm ensures that each unique type is represented by single
+ * BTF type descriptor, greatly reducing resulting size of BTF data.
+ *
+ * Compilation unit isolation and subsequent duplication of data is not the only
+ * problem. The same type hierarchy (e.g., struct and all the type that struct
+ * references) in different compilation units can be represented in BTF to
+ * various degrees of completeness (or, rather, incompleteness) due to
+ * struct/union forward declarations.
+ *
+ * Let's take a look at an example, that we'll use to better understand the
+ * problem (and solution). Suppose we have two compilation units, each using
+ * same `struct S`, but each of them having incomplete type information about
+ * struct's fields:
+ *
+ * // CU #1:
+ * struct S;
+ * struct A {
+ * int a;
+ * struct A* self;
+ * struct S* parent;
+ * };
+ * struct B;
+ * struct S {
+ * struct A* a_ptr;
+ * struct B* b_ptr;
+ * };
+ *
+ * // CU #2:
+ * struct S;
+ * struct A;
+ * struct B {
+ * int b;
+ * struct B* self;
+ * struct S* parent;
+ * };
+ * struct S {
+ * struct A* a_ptr;
+ * struct B* b_ptr;
+ * };
+ *
+ * In case of CU #1, BTF data will know only that `struct B` exist (but no
+ * more), but will know the complete type information about `struct A`. While
+ * for CU #2, it will know full type information about `struct B`, but will
+ * only know about forward declaration of `struct A` (in BTF terms, it will
+ * have `BTF_KIND_FWD` type descriptor with name `B`).
+ *
+ * This compilation unit isolation means that it's possible that there is no
+ * single CU with complete type information describing structs `S`, `A`, and
+ * `B`. Also, we might get tons of duplicated and redundant type information.
+ *
+ * Additional complication we need to keep in mind comes from the fact that
+ * types, in general, can form graphs containing cycles, not just DAGs.
+ *
+ * While algorithm does deduplication, it also merges and resolves type
+ * information (unless disabled throught `struct btf_opts`), whenever possible.
+ * E.g., in the example above with two compilation units having partial type
+ * information for structs `A` and `B`, the output of algorithm will emit
+ * a single copy of each BTF type that describes structs `A`, `B`, and `S`
+ * (as well as type information for `int` and pointers), as if they were defined
+ * in a single compilation unit as:
+ *
+ * struct A {
+ * int a;
+ * struct A* self;
+ * struct S* parent;
+ * };
+ * struct B {
+ * int b;
+ * struct B* self;
+ * struct S* parent;
+ * };
+ * struct S {
+ * struct A* a_ptr;
+ * struct B* b_ptr;
+ * };
+ *
+ * Algorithm summary
+ * =================
+ *
+ * Algorithm completes its work in 6 separate passes:
+ *
+ * 1. Strings deduplication.
+ * 2. Primitive types deduplication (int, enum, fwd).
+ * 3. Struct/union types deduplication.
+ * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func
+ * protos, and const/volatile/restrict modifiers).
+ * 5. Types compaction.
+ * 6. Types remapping.
+ *
+ * Algorithm determines canonical type descriptor, which is a single
+ * representative type for each truly unique type. This canonical type is the
+ * one that will go into final deduplicated BTF type information. For
+ * struct/unions, it is also the type that algorithm will merge additional type
+ * information into (while resolving FWDs), as it discovers it from data in
+ * other CUs. Each input BTF type eventually gets either mapped to itself, if
+ * that type is canonical, or to some other type, if that type is equivalent
+ * and was chosen as canonical representative. This mapping is stored in
+ * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that
+ * FWD type got resolved to.
+ *
+ * To facilitate fast discovery of canonical types, we also maintain canonical
+ * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash
+ * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types
+ * that match that signature. With sufficiently good choice of type signature
+ * hashing function, we can limit number of canonical types for each unique type
+ * signature to a very small number, allowing to find canonical type for any
+ * duplicated type very quickly.
+ *
+ * Struct/union deduplication is the most critical part and algorithm for
+ * deduplicating structs/unions is described in greater details in comments for
+ * `btf_dedup_is_equiv` function.
+ */
+int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
+ const struct btf_dedup_opts *opts)
+{
+ struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts);
+ int err;
+
+ if (IS_ERR(d)) {
+ pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
+ return -EINVAL;
+ }
+
+ err = btf_dedup_strings(d);
+ if (err < 0) {
+ pr_debug("btf_dedup_strings failed:%d\n", err);
+ goto done;
+ }
+ err = btf_dedup_prim_types(d);
+ if (err < 0) {
+ pr_debug("btf_dedup_prim_types failed:%d\n", err);
+ goto done;
+ }
+ err = btf_dedup_struct_types(d);
+ if (err < 0) {
+ pr_debug("btf_dedup_struct_types failed:%d\n", err);
+ goto done;
+ }
+ err = btf_dedup_ref_types(d);
+ if (err < 0) {
+ pr_debug("btf_dedup_ref_types failed:%d\n", err);
+ goto done;
+ }
+ err = btf_dedup_compact_types(d);
+ if (err < 0) {
+ pr_debug("btf_dedup_compact_types failed:%d\n", err);
+ goto done;
+ }
+ err = btf_dedup_remap_types(d);
+ if (err < 0) {
+ pr_debug("btf_dedup_remap_types failed:%d\n", err);
+ goto done;
+ }
+
+done:
+ btf_dedup_free(d);
+ return err;
+}
+
+#define BTF_UNPROCESSED_ID ((__u32)-1)
+#define BTF_IN_PROGRESS_ID ((__u32)-2)
+
+struct btf_dedup {
+ /* .BTF section to be deduped in-place */
+ struct btf *btf;
+ /*
+ * Optional .BTF.ext section. When provided, any strings referenced
+ * from it will be taken into account when deduping strings
+ */
+ struct btf_ext *btf_ext;
+ /*
+ * This is a map from any type's signature hash to a list of possible
+ * canonical representative type candidates. Hash collisions are
+ * ignored, so even types of various kinds can share same list of
+ * candidates, which is fine because we rely on subsequent
+ * btf_xxx_equal() checks to authoritatively verify type equality.
+ */
+ struct hashmap *dedup_table;
+ /* Canonical types map */
+ __u32 *map;
+ /* Hypothetical mapping, used during type graph equivalence checks */
+ __u32 *hypot_map;
+ __u32 *hypot_list;
+ size_t hypot_cnt;
+ size_t hypot_cap;
+ /* Various option modifying behavior of algorithm */
+ struct btf_dedup_opts opts;
+};
+
+struct btf_str_ptr {
+ const char *str;
+ __u32 new_off;
+ bool used;
+};
+
+struct btf_str_ptrs {
+ struct btf_str_ptr *ptrs;
+ const char *data;
+ __u32 cnt;
+ __u32 cap;
+};
+
+static long hash_combine(long h, long value)
+{
+ return h * 31 + value;
+}
+
+#define for_each_dedup_cand(d, node, hash) \
+ hashmap__for_each_key_entry(d->dedup_table, node, (void *)hash)
+
+static int btf_dedup_table_add(struct btf_dedup *d, long hash, __u32 type_id)
+{
+ return hashmap__append(d->dedup_table,
+ (void *)hash, (void *)(long)type_id);
+}
+
+static int btf_dedup_hypot_map_add(struct btf_dedup *d,
+ __u32 from_id, __u32 to_id)
+{
+ if (d->hypot_cnt == d->hypot_cap) {
+ __u32 *new_list;
+
+ d->hypot_cap += max(16, d->hypot_cap / 2);
+ new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap);
+ if (!new_list)
+ return -ENOMEM;
+ d->hypot_list = new_list;
+ }
+ d->hypot_list[d->hypot_cnt++] = from_id;
+ d->hypot_map[from_id] = to_id;
+ return 0;
+}
+
+static void btf_dedup_clear_hypot_map(struct btf_dedup *d)
+{
+ int i;
+
+ for (i = 0; i < d->hypot_cnt; i++)
+ d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID;
+ d->hypot_cnt = 0;
+}
+
+static void btf_dedup_free(struct btf_dedup *d)
+{
+ hashmap__free(d->dedup_table);
+ d->dedup_table = NULL;
+
+ free(d->map);
+ d->map = NULL;
+
+ free(d->hypot_map);
+ d->hypot_map = NULL;
+
+ free(d->hypot_list);
+ d->hypot_list = NULL;
+
+ free(d);
+}
+
+static size_t btf_dedup_identity_hash_fn(const void *key, void *ctx)
+{
+ return (size_t)key;
+}
+
+static size_t btf_dedup_collision_hash_fn(const void *key, void *ctx)
+{
+ return 0;
+}
+
+static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)
+{
+ return k1 == k2;
+}
+
+static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,
+ const struct btf_dedup_opts *opts)
+{
+ struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));
+ hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn;
+ int i, err = 0;
+
+ if (!d)
+ return ERR_PTR(-ENOMEM);
+
+ d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds;
+ /* dedup_table_size is now used only to force collisions in tests */
+ if (opts && opts->dedup_table_size == 1)
+ hash_fn = btf_dedup_collision_hash_fn;
+
+ d->btf = btf;
+ d->btf_ext = btf_ext;
+
+ d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL);
+ if (IS_ERR(d->dedup_table)) {
+ err = PTR_ERR(d->dedup_table);
+ d->dedup_table = NULL;
+ goto done;
+ }
+
+ d->map = malloc(sizeof(__u32) * (1 + btf->nr_types));
+ if (!d->map) {
+ err = -ENOMEM;
+ goto done;
+ }
+ /* special BTF "void" type is made canonical immediately */
+ d->map[0] = 0;
+ for (i = 1; i <= btf->nr_types; i++) {
+ struct btf_type *t = d->btf->types[i];
+
+ /* VAR and DATASEC are never deduped and are self-canonical */
+ if (btf_is_var(t) || btf_is_datasec(t))
+ d->map[i] = i;
+ else
+ d->map[i] = BTF_UNPROCESSED_ID;
+ }
+
+ d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types));
+ if (!d->hypot_map) {
+ err = -ENOMEM;
+ goto done;
+ }
+ for (i = 0; i <= btf->nr_types; i++)
+ d->hypot_map[i] = BTF_UNPROCESSED_ID;
+
+done:
+ if (err) {
+ btf_dedup_free(d);
+ return ERR_PTR(err);
+ }
+
+ return d;
+}
+
+typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx);
+
+/*
+ * Iterate over all possible places in .BTF and .BTF.ext that can reference
+ * string and pass pointer to it to a provided callback `fn`.
+ */
+static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
+{
+ void *line_data_cur, *line_data_end;
+ int i, j, r, rec_size;
+ struct btf_type *t;
+
+ for (i = 1; i <= d->btf->nr_types; i++) {
+ t = d->btf->types[i];
+ r = fn(&t->name_off, ctx);
+ if (r)
+ return r;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ struct btf_member *m = btf_members(t);
+ __u16 vlen = btf_vlen(t);
+
+ for (j = 0; j < vlen; j++) {
+ r = fn(&m->name_off, ctx);
+ if (r)
+ return r;
+ m++;
+ }
+ break;
+ }
+ case BTF_KIND_ENUM: {
+ struct btf_enum *m = btf_enum(t);
+ __u16 vlen = btf_vlen(t);
+
+ for (j = 0; j < vlen; j++) {
+ r = fn(&m->name_off, ctx);
+ if (r)
+ return r;
+ m++;
+ }
+ break;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *m = btf_params(t);
+ __u16 vlen = btf_vlen(t);
+
+ for (j = 0; j < vlen; j++) {
+ r = fn(&m->name_off, ctx);
+ if (r)
+ return r;
+ m++;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (!d->btf_ext)
+ return 0;
+
+ line_data_cur = d->btf_ext->line_info.info;
+ line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len;
+ rec_size = d->btf_ext->line_info.rec_size;
+
+ while (line_data_cur < line_data_end) {
+ struct btf_ext_info_sec *sec = line_data_cur;
+ struct bpf_line_info_min *line_info;
+ __u32 num_info = sec->num_info;
+
+ r = fn(&sec->sec_name_off, ctx);
+ if (r)
+ return r;
+
+ line_data_cur += sizeof(struct btf_ext_info_sec);
+ for (i = 0; i < num_info; i++) {
+ line_info = line_data_cur;
+ r = fn(&line_info->file_name_off, ctx);
+ if (r)
+ return r;
+ r = fn(&line_info->line_off, ctx);
+ if (r)
+ return r;
+ line_data_cur += rec_size;
+ }
+ }
+
+ return 0;
+}
+
+static int str_sort_by_content(const void *a1, const void *a2)
+{
+ const struct btf_str_ptr *p1 = a1;
+ const struct btf_str_ptr *p2 = a2;
+
+ return strcmp(p1->str, p2->str);
+}
+
+static int str_sort_by_offset(const void *a1, const void *a2)
+{
+ const struct btf_str_ptr *p1 = a1;
+ const struct btf_str_ptr *p2 = a2;
+
+ if (p1->str != p2->str)
+ return p1->str < p2->str ? -1 : 1;
+ return 0;
+}
+
+static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem)
+{
+ const struct btf_str_ptr *p = pelem;
+
+ if (str_ptr != p->str)
+ return (const char *)str_ptr < p->str ? -1 : 1;
+ return 0;
+}
+
+static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx)
+{
+ struct btf_str_ptrs *strs;
+ struct btf_str_ptr *s;
+
+ if (*str_off_ptr == 0)
+ return 0;
+
+ strs = ctx;
+ s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
+ sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
+ if (!s)
+ return -EINVAL;
+ s->used = true;
+ return 0;
+}
+
+static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx)
+{
+ struct btf_str_ptrs *strs;
+ struct btf_str_ptr *s;
+
+ if (*str_off_ptr == 0)
+ return 0;
+
+ strs = ctx;
+ s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt,
+ sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp);
+ if (!s)
+ return -EINVAL;
+ *str_off_ptr = s->new_off;
+ return 0;
+}
+
+/*
+ * Dedup string and filter out those that are not referenced from either .BTF
+ * or .BTF.ext (if provided) sections.
+ *
+ * This is done by building index of all strings in BTF's string section,
+ * then iterating over all entities that can reference strings (e.g., type
+ * names, struct field names, .BTF.ext line info, etc) and marking corresponding
+ * strings as used. After that all used strings are deduped and compacted into
+ * sequential blob of memory and new offsets are calculated. Then all the string
+ * references are iterated again and rewritten using new offsets.
+ */
+static int btf_dedup_strings(struct btf_dedup *d)
+{
+ const struct btf_header *hdr = d->btf->hdr;
+ char *start = (char *)d->btf->nohdr_data + hdr->str_off;
+ char *end = start + d->btf->hdr->str_len;
+ char *p = start, *tmp_strs = NULL;
+ struct btf_str_ptrs strs = {
+ .cnt = 0,
+ .cap = 0,
+ .ptrs = NULL,
+ .data = start,
+ };
+ int i, j, err = 0, grp_idx;
+ bool grp_used;
+
+ /* build index of all strings */
+ while (p < end) {
+ if (strs.cnt + 1 > strs.cap) {
+ struct btf_str_ptr *new_ptrs;
+
+ strs.cap += max(strs.cnt / 2, 16);
+ new_ptrs = realloc(strs.ptrs,
+ sizeof(strs.ptrs[0]) * strs.cap);
+ if (!new_ptrs) {
+ err = -ENOMEM;
+ goto done;
+ }
+ strs.ptrs = new_ptrs;
+ }
+
+ strs.ptrs[strs.cnt].str = p;
+ strs.ptrs[strs.cnt].used = false;
+
+ p += strlen(p) + 1;
+ strs.cnt++;
+ }
+
+ /* temporary storage for deduplicated strings */
+ tmp_strs = malloc(d->btf->hdr->str_len);
+ if (!tmp_strs) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ /* mark all used strings */
+ strs.ptrs[0].used = true;
+ err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs);
+ if (err)
+ goto done;
+
+ /* sort strings by context, so that we can identify duplicates */
+ qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content);
+
+ /*
+ * iterate groups of equal strings and if any instance in a group was
+ * referenced, emit single instance and remember new offset
+ */
+ p = tmp_strs;
+ grp_idx = 0;
+ grp_used = strs.ptrs[0].used;
+ /* iterate past end to avoid code duplication after loop */
+ for (i = 1; i <= strs.cnt; i++) {
+ /*
+ * when i == strs.cnt, we want to skip string comparison and go
+ * straight to handling last group of strings (otherwise we'd
+ * need to handle last group after the loop w/ duplicated code)
+ */
+ if (i < strs.cnt &&
+ !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) {
+ grp_used = grp_used || strs.ptrs[i].used;
+ continue;
+ }
+
+ /*
+ * this check would have been required after the loop to handle
+ * last group of strings, but due to <= condition in a loop
+ * we avoid that duplication
+ */
+ if (grp_used) {
+ int new_off = p - tmp_strs;
+ __u32 len = strlen(strs.ptrs[grp_idx].str);
+
+ memmove(p, strs.ptrs[grp_idx].str, len + 1);
+ for (j = grp_idx; j < i; j++)
+ strs.ptrs[j].new_off = new_off;
+ p += len + 1;
+ }
+
+ if (i < strs.cnt) {
+ grp_idx = i;
+ grp_used = strs.ptrs[i].used;
+ }
+ }
+
+ /* replace original strings with deduped ones */
+ d->btf->hdr->str_len = p - tmp_strs;
+ memmove(start, tmp_strs, d->btf->hdr->str_len);
+ end = start + d->btf->hdr->str_len;
+
+ /* restore original order for further binary search lookups */
+ qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset);
+
+ /* remap string offsets */
+ err = btf_for_each_str_off(d, btf_str_remap_offset, &strs);
+ if (err)
+ goto done;
+
+ d->btf->hdr->str_len = end - start;
+
+done:
+ free(tmp_strs);
+ free(strs.ptrs);
+ return err;
+}
+
+static long btf_hash_common(struct btf_type *t)
+{
+ long h;
+
+ h = hash_combine(0, t->name_off);
+ h = hash_combine(h, t->info);
+ h = hash_combine(h, t->size);
+ return h;
+}
+
+static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
+{
+ return t1->name_off == t2->name_off &&
+ t1->info == t2->info &&
+ t1->size == t2->size;
+}
+
+/* Calculate type signature hash of INT. */
+static long btf_hash_int(struct btf_type *t)
+{
+ __u32 info = *(__u32 *)(t + 1);
+ long h;
+
+ h = btf_hash_common(t);
+ h = hash_combine(h, info);
+ return h;
+}
+
+/* Check structural equality of two INTs. */
+static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
+{
+ __u32 info1, info2;
+
+ if (!btf_equal_common(t1, t2))
+ return false;
+ info1 = *(__u32 *)(t1 + 1);
+ info2 = *(__u32 *)(t2 + 1);
+ return info1 == info2;
+}
+
+/* Calculate type signature hash of ENUM. */
+static long btf_hash_enum(struct btf_type *t)
+{
+ long h;
+
+ /* don't hash vlen and enum members to support enum fwd resolving */
+ h = hash_combine(0, t->name_off);
+ h = hash_combine(h, t->info & ~0xffff);
+ h = hash_combine(h, t->size);
+ return h;
+}
+
+/* Check structural equality of two ENUMs. */
+static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2)
+{
+ const struct btf_enum *m1, *m2;
+ __u16 vlen;
+ int i;
+
+ if (!btf_equal_common(t1, t2))
+ return false;
+
+ vlen = btf_vlen(t1);
+ m1 = btf_enum(t1);
+ m2 = btf_enum(t2);
+ for (i = 0; i < vlen; i++) {
+ if (m1->name_off != m2->name_off || m1->val != m2->val)
+ return false;
+ m1++;
+ m2++;
+ }
+ return true;
+}
+
+static inline bool btf_is_enum_fwd(struct btf_type *t)
+{
+ return btf_is_enum(t) && btf_vlen(t) == 0;
+}
+
+static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2)
+{
+ if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2))
+ return btf_equal_enum(t1, t2);
+ /* ignore vlen when comparing */
+ return t1->name_off == t2->name_off &&
+ (t1->info & ~0xffff) == (t2->info & ~0xffff) &&
+ t1->size == t2->size;
+}
+
+/*
+ * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs,
+ * as referenced type IDs equivalence is established separately during type
+ * graph equivalence check algorithm.
+ */
+static long btf_hash_struct(struct btf_type *t)
+{
+ const struct btf_member *member = btf_members(t);
+ __u32 vlen = btf_vlen(t);
+ long h = btf_hash_common(t);
+ int i;
+
+ for (i = 0; i < vlen; i++) {
+ h = hash_combine(h, member->name_off);
+ h = hash_combine(h, member->offset);
+ /* no hashing of referenced type ID, it can be unresolved yet */
+ member++;
+ }
+ return h;
+}
+
+/*
+ * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
+ * IDs. This check is performed during type graph equivalence check and
+ * referenced types equivalence is checked separately.
+ */
+static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2)
+{
+ const struct btf_member *m1, *m2;
+ __u16 vlen;
+ int i;
+
+ if (!btf_equal_common(t1, t2))
+ return false;
+
+ vlen = btf_vlen(t1);
+ m1 = btf_members(t1);
+ m2 = btf_members(t2);
+ for (i = 0; i < vlen; i++) {
+ if (m1->name_off != m2->name_off || m1->offset != m2->offset)
+ return false;
+ m1++;
+ m2++;
+ }
+ return true;
+}
+
+/*
+ * Calculate type signature hash of ARRAY, including referenced type IDs,
+ * under assumption that they were already resolved to canonical type IDs and
+ * are not going to change.
+ */
+static long btf_hash_array(struct btf_type *t)
+{
+ const struct btf_array *info = btf_array(t);
+ long h = btf_hash_common(t);
+
+ h = hash_combine(h, info->type);
+ h = hash_combine(h, info->index_type);
+ h = hash_combine(h, info->nelems);
+ return h;
+}
+
+/*
+ * Check exact equality of two ARRAYs, taking into account referenced
+ * type IDs, under assumption that they were already resolved to canonical
+ * type IDs and are not going to change.
+ * This function is called during reference types deduplication to compare
+ * ARRAY to potential canonical representative.
+ */
+static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2)
+{
+ const struct btf_array *info1, *info2;
+
+ if (!btf_equal_common(t1, t2))
+ return false;
+
+ info1 = btf_array(t1);
+ info2 = btf_array(t2);
+ return info1->type == info2->type &&
+ info1->index_type == info2->index_type &&
+ info1->nelems == info2->nelems;
+}
+
+/*
+ * Check structural compatibility of two ARRAYs, ignoring referenced type
+ * IDs. This check is performed during type graph equivalence check and
+ * referenced types equivalence is checked separately.
+ */
+static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2)
+{
+ if (!btf_equal_common(t1, t2))
+ return false;
+
+ return btf_array(t1)->nelems == btf_array(t2)->nelems;
+}
+
+/*
+ * Calculate type signature hash of FUNC_PROTO, including referenced type IDs,
+ * under assumption that they were already resolved to canonical type IDs and
+ * are not going to change.
+ */
+static long btf_hash_fnproto(struct btf_type *t)
+{
+ const struct btf_param *member = btf_params(t);
+ __u16 vlen = btf_vlen(t);
+ long h = btf_hash_common(t);
+ int i;
+
+ for (i = 0; i < vlen; i++) {
+ h = hash_combine(h, member->name_off);
+ h = hash_combine(h, member->type);
+ member++;
+ }
+ return h;
+}
+
+/*
+ * Check exact equality of two FUNC_PROTOs, taking into account referenced
+ * type IDs, under assumption that they were already resolved to canonical
+ * type IDs and are not going to change.
+ * This function is called during reference types deduplication to compare
+ * FUNC_PROTO to potential canonical representative.
+ */
+static bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2)
+{
+ const struct btf_param *m1, *m2;
+ __u16 vlen;
+ int i;
+
+ if (!btf_equal_common(t1, t2))
+ return false;
+
+ vlen = btf_vlen(t1);
+ m1 = btf_params(t1);
+ m2 = btf_params(t2);
+ for (i = 0; i < vlen; i++) {
+ if (m1->name_off != m2->name_off || m1->type != m2->type)
+ return false;
+ m1++;
+ m2++;
+ }
+ return true;
+}
+
+/*
+ * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type
+ * IDs. This check is performed during type graph equivalence check and
+ * referenced types equivalence is checked separately.
+ */
+static bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2)
+{
+ const struct btf_param *m1, *m2;
+ __u16 vlen;
+ int i;
+
+ /* skip return type ID */
+ if (t1->name_off != t2->name_off || t1->info != t2->info)
+ return false;
+
+ vlen = btf_vlen(t1);
+ m1 = btf_params(t1);
+ m2 = btf_params(t2);
+ for (i = 0; i < vlen; i++) {
+ if (m1->name_off != m2->name_off)
+ return false;
+ m1++;
+ m2++;
+ }
+ return true;
+}
+
+/*
+ * Deduplicate primitive types, that can't reference other types, by calculating
+ * their type signature hash and comparing them with any possible canonical
+ * candidate. If no canonical candidate matches, type itself is marked as
+ * canonical and is added into `btf_dedup->dedup_table` as another candidate.
+ */
+static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
+{
+ struct btf_type *t = d->btf->types[type_id];
+ struct hashmap_entry *hash_entry;
+ struct btf_type *cand;
+ /* if we don't find equivalent type, then we are canonical */
+ __u32 new_id = type_id;
+ __u32 cand_id;
+ long h;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_ARRAY:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_FUNC_PROTO:
+ case BTF_KIND_VAR:
+ case BTF_KIND_DATASEC:
+ return 0;
+
+ case BTF_KIND_INT:
+ h = btf_hash_int(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = (__u32)(long)hash_entry->value;
+ cand = d->btf->types[cand_id];
+ if (btf_equal_int(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ }
+ break;
+
+ case BTF_KIND_ENUM:
+ h = btf_hash_enum(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = (__u32)(long)hash_entry->value;
+ cand = d->btf->types[cand_id];
+ if (btf_equal_enum(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ if (d->opts.dont_resolve_fwds)
+ continue;
+ if (btf_compat_enum(t, cand)) {
+ if (btf_is_enum_fwd(t)) {
+ /* resolve fwd to full enum */
+ new_id = cand_id;
+ break;
+ }
+ /* resolve canonical enum fwd to full enum */
+ d->map[cand_id] = type_id;
+ }
+ }
+ break;
+
+ case BTF_KIND_FWD:
+ h = btf_hash_common(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = (__u32)(long)hash_entry->value;
+ cand = d->btf->types[cand_id];
+ if (btf_equal_common(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ }
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ d->map[type_id] = new_id;
+ if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int btf_dedup_prim_types(struct btf_dedup *d)
+{
+ int i, err;
+
+ for (i = 1; i <= d->btf->nr_types; i++) {
+ err = btf_dedup_prim_type(d, i);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+/*
+ * Check whether type is already mapped into canonical one (could be to itself).
+ */
+static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id)
+{
+ return d->map[type_id] <= BTF_MAX_NR_TYPES;
+}
+
+/*
+ * Resolve type ID into its canonical type ID, if any; otherwise return original
+ * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow
+ * STRUCT/UNION link and resolve it into canonical type ID as well.
+ */
+static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id)
+{
+ while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
+ type_id = d->map[type_id];
+ return type_id;
+}
+
+/*
+ * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original
+ * type ID.
+ */
+static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id)
+{
+ __u32 orig_type_id = type_id;
+
+ if (!btf_is_fwd(d->btf->types[type_id]))
+ return type_id;
+
+ while (is_type_mapped(d, type_id) && d->map[type_id] != type_id)
+ type_id = d->map[type_id];
+
+ if (!btf_is_fwd(d->btf->types[type_id]))
+ return type_id;
+
+ return orig_type_id;
+}
+
+
+static inline __u16 btf_fwd_kind(struct btf_type *t)
+{
+ return btf_kflag(t) ? BTF_KIND_UNION : BTF_KIND_STRUCT;
+}
+
+/*
+ * Check equivalence of BTF type graph formed by candidate struct/union (we'll
+ * call it "candidate graph" in this description for brevity) to a type graph
+ * formed by (potential) canonical struct/union ("canonical graph" for brevity
+ * here, though keep in mind that not all types in canonical graph are
+ * necessarily canonical representatives themselves, some of them might be
+ * duplicates or its uniqueness might not have been established yet).
+ * Returns:
+ * - >0, if type graphs are equivalent;
+ * - 0, if not equivalent;
+ * - <0, on error.
+ *
+ * Algorithm performs side-by-side DFS traversal of both type graphs and checks
+ * equivalence of BTF types at each step. If at any point BTF types in candidate
+ * and canonical graphs are not compatible structurally, whole graphs are
+ * incompatible. If types are structurally equivalent (i.e., all information
+ * except referenced type IDs is exactly the same), a mapping from `canon_id` to
+ * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`).
+ * If a type references other types, then those referenced types are checked
+ * for equivalence recursively.
+ *
+ * During DFS traversal, if we find that for current `canon_id` type we
+ * already have some mapping in hypothetical map, we check for two possible
+ * situations:
+ * - `canon_id` is mapped to exactly the same type as `cand_id`. This will
+ * happen when type graphs have cycles. In this case we assume those two
+ * types are equivalent.
+ * - `canon_id` is mapped to different type. This is contradiction in our
+ * hypothetical mapping, because same graph in canonical graph corresponds
+ * to two different types in candidate graph, which for equivalent type
+ * graphs shouldn't happen. This condition terminates equivalence check
+ * with negative result.
+ *
+ * If type graphs traversal exhausts types to check and find no contradiction,
+ * then type graphs are equivalent.
+ *
+ * When checking types for equivalence, there is one special case: FWD types.
+ * If FWD type resolution is allowed and one of the types (either from canonical
+ * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind
+ * flag) and their names match, hypothetical mapping is updated to point from
+ * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully,
+ * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently.
+ *
+ * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution,
+ * if there are two exactly named (or anonymous) structs/unions that are
+ * compatible structurally, one of which has FWD field, while other is concrete
+ * STRUCT/UNION, but according to C sources they are different structs/unions
+ * that are referencing different types with the same name. This is extremely
+ * unlikely to happen, but btf_dedup API allows to disable FWD resolution if
+ * this logic is causing problems.
+ *
+ * Doing FWD resolution means that both candidate and/or canonical graphs can
+ * consists of portions of the graph that come from multiple compilation units.
+ * This is due to the fact that types within single compilation unit are always
+ * deduplicated and FWDs are already resolved, if referenced struct/union
+ * definiton is available. So, if we had unresolved FWD and found corresponding
+ * STRUCT/UNION, they will be from different compilation units. This
+ * consequently means that when we "link" FWD to corresponding STRUCT/UNION,
+ * type graph will likely have at least two different BTF types that describe
+ * same type (e.g., most probably there will be two different BTF types for the
+ * same 'int' primitive type) and could even have "overlapping" parts of type
+ * graph that describe same subset of types.
+ *
+ * This in turn means that our assumption that each type in canonical graph
+ * must correspond to exactly one type in candidate graph might not hold
+ * anymore and will make it harder to detect contradictions using hypothetical
+ * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION
+ * resolution only in canonical graph. FWDs in candidate graphs are never
+ * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs
+ * that can occur:
+ * - Both types in canonical and candidate graphs are FWDs. If they are
+ * structurally equivalent, then they can either be both resolved to the
+ * same STRUCT/UNION or not resolved at all. In both cases they are
+ * equivalent and there is no need to resolve FWD on candidate side.
+ * - Both types in canonical and candidate graphs are concrete STRUCT/UNION,
+ * so nothing to resolve as well, algorithm will check equivalence anyway.
+ * - Type in canonical graph is FWD, while type in candidate is concrete
+ * STRUCT/UNION. In this case candidate graph comes from single compilation
+ * unit, so there is exactly one BTF type for each unique C type. After
+ * resolving FWD into STRUCT/UNION, there might be more than one BTF type
+ * in canonical graph mapping to single BTF type in candidate graph, but
+ * because hypothetical mapping maps from canonical to candidate types, it's
+ * alright, and we still maintain the property of having single `canon_id`
+ * mapping to single `cand_id` (there could be two different `canon_id`
+ * mapped to the same `cand_id`, but it's not contradictory).
+ * - Type in canonical graph is concrete STRUCT/UNION, while type in candidate
+ * graph is FWD. In this case we are just going to check compatibility of
+ * STRUCT/UNION and corresponding FWD, and if they are compatible, we'll
+ * assume that whatever STRUCT/UNION FWD resolves to must be equivalent to
+ * a concrete STRUCT/UNION from canonical graph. If the rest of type graphs
+ * turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from
+ * canonical graph.
+ */
+static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
+ __u32 canon_id)
+{
+ struct btf_type *cand_type;
+ struct btf_type *canon_type;
+ __u32 hypot_type_id;
+ __u16 cand_kind;
+ __u16 canon_kind;
+ int i, eq;
+
+ /* if both resolve to the same canonical, they must be equivalent */
+ if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id))
+ return 1;
+
+ canon_id = resolve_fwd_id(d, canon_id);
+
+ hypot_type_id = d->hypot_map[canon_id];
+ if (hypot_type_id <= BTF_MAX_NR_TYPES)
+ return hypot_type_id == cand_id;
+
+ if (btf_dedup_hypot_map_add(d, canon_id, cand_id))
+ return -ENOMEM;
+
+ cand_type = d->btf->types[cand_id];
+ canon_type = d->btf->types[canon_id];
+ cand_kind = btf_kind(cand_type);
+ canon_kind = btf_kind(canon_type);
+
+ if (cand_type->name_off != canon_type->name_off)
+ return 0;
+
+ /* FWD <--> STRUCT/UNION equivalence check, if enabled */
+ if (!d->opts.dont_resolve_fwds
+ && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)
+ && cand_kind != canon_kind) {
+ __u16 real_kind;
+ __u16 fwd_kind;
+
+ if (cand_kind == BTF_KIND_FWD) {
+ real_kind = canon_kind;
+ fwd_kind = btf_fwd_kind(cand_type);
+ } else {
+ real_kind = cand_kind;
+ fwd_kind = btf_fwd_kind(canon_type);
+ }
+ return fwd_kind == real_kind;
+ }
+
+ if (cand_kind != canon_kind)
+ return 0;
+
+ switch (cand_kind) {
+ case BTF_KIND_INT:
+ return btf_equal_int(cand_type, canon_type);
+
+ case BTF_KIND_ENUM:
+ if (d->opts.dont_resolve_fwds)
+ return btf_equal_enum(cand_type, canon_type);
+ else
+ return btf_compat_enum(cand_type, canon_type);
+
+ case BTF_KIND_FWD:
+ return btf_equal_common(cand_type, canon_type);
+
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ if (cand_type->info != canon_type->info)
+ return 0;
+ return btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
+
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *cand_arr, *canon_arr;
+
+ if (!btf_compat_array(cand_type, canon_type))
+ return 0;
+ cand_arr = btf_array(cand_type);
+ canon_arr = btf_array(canon_type);
+ eq = btf_dedup_is_equiv(d,
+ cand_arr->index_type, canon_arr->index_type);
+ if (eq <= 0)
+ return eq;
+ return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type);
+ }
+
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *cand_m, *canon_m;
+ __u16 vlen;
+
+ if (!btf_shallow_equal_struct(cand_type, canon_type))
+ return 0;
+ vlen = btf_vlen(cand_type);
+ cand_m = btf_members(cand_type);
+ canon_m = btf_members(canon_type);
+ for (i = 0; i < vlen; i++) {
+ eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type);
+ if (eq <= 0)
+ return eq;
+ cand_m++;
+ canon_m++;
+ }
+
+ return 1;
+ }
+
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *cand_p, *canon_p;
+ __u16 vlen;
+
+ if (!btf_compat_fnproto(cand_type, canon_type))
+ return 0;
+ eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type);
+ if (eq <= 0)
+ return eq;
+ vlen = btf_vlen(cand_type);
+ cand_p = btf_params(cand_type);
+ canon_p = btf_params(canon_type);
+ for (i = 0; i < vlen; i++) {
+ eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type);
+ if (eq <= 0)
+ return eq;
+ cand_p++;
+ canon_p++;
+ }
+ return 1;
+ }
+
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * Use hypothetical mapping, produced by successful type graph equivalence
+ * check, to augment existing struct/union canonical mapping, where possible.
+ *
+ * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record
+ * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional:
+ * it doesn't matter if FWD type was part of canonical graph or candidate one,
+ * we are recording the mapping anyway. As opposed to carefulness required
+ * for struct/union correspondence mapping (described below), for FWD resolution
+ * it's not important, as by the time that FWD type (reference type) will be
+ * deduplicated all structs/unions will be deduped already anyway.
+ *
+ * Recording STRUCT/UNION mapping is purely a performance optimization and is
+ * not required for correctness. It needs to be done carefully to ensure that
+ * struct/union from candidate's type graph is not mapped into corresponding
+ * struct/union from canonical type graph that itself hasn't been resolved into
+ * canonical representative. The only guarantee we have is that canonical
+ * struct/union was determined as canonical and that won't change. But any
+ * types referenced through that struct/union fields could have been not yet
+ * resolved, so in case like that it's too early to establish any kind of
+ * correspondence between structs/unions.
+ *
+ * No canonical correspondence is derived for primitive types (they are already
+ * deduplicated completely already anyway) or reference types (they rely on
+ * stability of struct/union canonical relationship for equivalence checks).
+ */
+static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
+{
+ __u32 cand_type_id, targ_type_id;
+ __u16 t_kind, c_kind;
+ __u32 t_id, c_id;
+ int i;
+
+ for (i = 0; i < d->hypot_cnt; i++) {
+ cand_type_id = d->hypot_list[i];
+ targ_type_id = d->hypot_map[cand_type_id];
+ t_id = resolve_type_id(d, targ_type_id);
+ c_id = resolve_type_id(d, cand_type_id);
+ t_kind = btf_kind(d->btf->types[t_id]);
+ c_kind = btf_kind(d->btf->types[c_id]);
+ /*
+ * Resolve FWD into STRUCT/UNION.
+ * It's ok to resolve FWD into STRUCT/UNION that's not yet
+ * mapped to canonical representative (as opposed to
+ * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because
+ * eventually that struct is going to be mapped and all resolved
+ * FWDs will automatically resolve to correct canonical
+ * representative. This will happen before ref type deduping,
+ * which critically depends on stability of these mapping. This
+ * stability is not a requirement for STRUCT/UNION equivalence
+ * checks, though.
+ */
+ if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD)
+ d->map[c_id] = t_id;
+ else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD)
+ d->map[t_id] = c_id;
+
+ if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) &&
+ c_kind != BTF_KIND_FWD &&
+ is_type_mapped(d, c_id) &&
+ !is_type_mapped(d, t_id)) {
+ /*
+ * as a perf optimization, we can map struct/union
+ * that's part of type graph we just verified for
+ * equivalence. We can do that for struct/union that has
+ * canonical representative only, though.
+ */
+ d->map[t_id] = c_id;
+ }
+ }
+}
+
+/*
+ * Deduplicate struct/union types.
+ *
+ * For each struct/union type its type signature hash is calculated, taking
+ * into account type's name, size, number, order and names of fields, but
+ * ignoring type ID's referenced from fields, because they might not be deduped
+ * completely until after reference types deduplication phase. This type hash
+ * is used to iterate over all potential canonical types, sharing same hash.
+ * For each canonical candidate we check whether type graphs that they form
+ * (through referenced types in fields and so on) are equivalent using algorithm
+ * implemented in `btf_dedup_is_equiv`. If such equivalence is found and
+ * BTF_KIND_FWD resolution is allowed, then hypothetical mapping
+ * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence
+ * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to
+ * potentially map other structs/unions to their canonical representatives,
+ * if such relationship hasn't yet been established. This speeds up algorithm
+ * by eliminating some of the duplicate work.
+ *
+ * If no matching canonical representative was found, struct/union is marked
+ * as canonical for itself and is added into btf_dedup->dedup_table hash map
+ * for further look ups.
+ */
+static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
+{
+ struct btf_type *cand_type, *t;
+ struct hashmap_entry *hash_entry;
+ /* if we don't find equivalent type, then we are canonical */
+ __u32 new_id = type_id;
+ __u16 kind;
+ long h;
+
+ /* already deduped or is in process of deduping (loop detected) */
+ if (d->map[type_id] <= BTF_MAX_NR_TYPES)
+ return 0;
+
+ t = d->btf->types[type_id];
+ kind = btf_kind(t);
+
+ if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
+ return 0;
+
+ h = btf_hash_struct(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ __u32 cand_id = (__u32)(long)hash_entry->value;
+ int eq;
+
+ /*
+ * Even though btf_dedup_is_equiv() checks for
+ * btf_shallow_equal_struct() internally when checking two
+ * structs (unions) for equivalence, we need to guard here
+ * from picking matching FWD type as a dedup candidate.
+ * This can happen due to hash collision. In such case just
+ * relying on btf_dedup_is_equiv() would lead to potentially
+ * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because
+ * FWD and compatible STRUCT/UNION are considered equivalent.
+ */
+ cand_type = d->btf->types[cand_id];
+ if (!btf_shallow_equal_struct(t, cand_type))
+ continue;
+
+ btf_dedup_clear_hypot_map(d);
+ eq = btf_dedup_is_equiv(d, type_id, cand_id);
+ if (eq < 0)
+ return eq;
+ if (!eq)
+ continue;
+ new_id = cand_id;
+ btf_dedup_merge_hypot_map(d);
+ break;
+ }
+
+ d->map[type_id] = new_id;
+ if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int btf_dedup_struct_types(struct btf_dedup *d)
+{
+ int i, err;
+
+ for (i = 1; i <= d->btf->nr_types; i++) {
+ err = btf_dedup_struct_type(d, i);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+/*
+ * Deduplicate reference type.
+ *
+ * Once all primitive and struct/union types got deduplicated, we can easily
+ * deduplicate all other (reference) BTF types. This is done in two steps:
+ *
+ * 1. Resolve all referenced type IDs into their canonical type IDs. This
+ * resolution can be done either immediately for primitive or struct/union types
+ * (because they were deduped in previous two phases) or recursively for
+ * reference types. Recursion will always terminate at either primitive or
+ * struct/union type, at which point we can "unwind" chain of reference types
+ * one by one. There is no danger of encountering cycles because in C type
+ * system the only way to form type cycle is through struct/union, so any chain
+ * of reference types, even those taking part in a type cycle, will inevitably
+ * reach struct/union at some point.
+ *
+ * 2. Once all referenced type IDs are resolved into canonical ones, BTF type
+ * becomes "stable", in the sense that no further deduplication will cause
+ * any changes to it. With that, it's now possible to calculate type's signature
+ * hash (this time taking into account referenced type IDs) and loop over all
+ * potential canonical representatives. If no match was found, current type
+ * will become canonical representative of itself and will be added into
+ * btf_dedup->dedup_table as another possible canonical representative.
+ */
+static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
+{
+ struct hashmap_entry *hash_entry;
+ __u32 new_id = type_id, cand_id;
+ struct btf_type *t, *cand;
+ /* if we don't find equivalent type, then we are representative type */
+ int ref_type_id;
+ long h;
+
+ if (d->map[type_id] == BTF_IN_PROGRESS_ID)
+ return -ELOOP;
+ if (d->map[type_id] <= BTF_MAX_NR_TYPES)
+ return resolve_type_id(d, type_id);
+
+ t = d->btf->types[type_id];
+ d->map[type_id] = BTF_IN_PROGRESS_ID;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ ref_type_id = btf_dedup_ref_type(d, t->type);
+ if (ref_type_id < 0)
+ return ref_type_id;
+ t->type = ref_type_id;
+
+ h = btf_hash_common(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = (__u32)(long)hash_entry->value;
+ cand = d->btf->types[cand_id];
+ if (btf_equal_common(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ }
+ break;
+
+ case BTF_KIND_ARRAY: {
+ struct btf_array *info = btf_array(t);
+
+ ref_type_id = btf_dedup_ref_type(d, info->type);
+ if (ref_type_id < 0)
+ return ref_type_id;
+ info->type = ref_type_id;
+
+ ref_type_id = btf_dedup_ref_type(d, info->index_type);
+ if (ref_type_id < 0)
+ return ref_type_id;
+ info->index_type = ref_type_id;
+
+ h = btf_hash_array(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = (__u32)(long)hash_entry->value;
+ cand = d->btf->types[cand_id];
+ if (btf_equal_array(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ }
+ break;
+ }
+
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *param;
+ __u16 vlen;
+ int i;
+
+ ref_type_id = btf_dedup_ref_type(d, t->type);
+ if (ref_type_id < 0)
+ return ref_type_id;
+ t->type = ref_type_id;
+
+ vlen = btf_vlen(t);
+ param = btf_params(t);
+ for (i = 0; i < vlen; i++) {
+ ref_type_id = btf_dedup_ref_type(d, param->type);
+ if (ref_type_id < 0)
+ return ref_type_id;
+ param->type = ref_type_id;
+ param++;
+ }
+
+ h = btf_hash_fnproto(t);
+ for_each_dedup_cand(d, hash_entry, h) {
+ cand_id = (__u32)(long)hash_entry->value;
+ cand = d->btf->types[cand_id];
+ if (btf_equal_fnproto(t, cand)) {
+ new_id = cand_id;
+ break;
+ }
+ }
+ break;
+ }
+
+ default:
+ return -EINVAL;
+ }
+
+ d->map[type_id] = new_id;
+ if (type_id == new_id && btf_dedup_table_add(d, h, type_id))
+ return -ENOMEM;
+
+ return new_id;
+}
+
+static int btf_dedup_ref_types(struct btf_dedup *d)
+{
+ int i, err;
+
+ for (i = 1; i <= d->btf->nr_types; i++) {
+ err = btf_dedup_ref_type(d, i);
+ if (err < 0)
+ return err;
+ }
+ /* we won't need d->dedup_table anymore */
+ hashmap__free(d->dedup_table);
+ d->dedup_table = NULL;
+ return 0;
+}
+
+/*
+ * Compact types.
+ *
+ * After we established for each type its corresponding canonical representative
+ * type, we now can eliminate types that are not canonical and leave only
+ * canonical ones layed out sequentially in memory by copying them over
+ * duplicates. During compaction btf_dedup->hypot_map array is reused to store
+ * a map from original type ID to a new compacted type ID, which will be used
+ * during next phase to "fix up" type IDs, referenced from struct/union and
+ * reference types.
+ */
+static int btf_dedup_compact_types(struct btf_dedup *d)
+{
+ struct btf_type **new_types;
+ __u32 next_type_id = 1;
+ char *types_start, *p;
+ int i, len;
+
+ /* we are going to reuse hypot_map to store compaction remapping */
+ d->hypot_map[0] = 0;
+ for (i = 1; i <= d->btf->nr_types; i++)
+ d->hypot_map[i] = BTF_UNPROCESSED_ID;
+
+ types_start = d->btf->nohdr_data + d->btf->hdr->type_off;
+ p = types_start;
+
+ for (i = 1; i <= d->btf->nr_types; i++) {
+ if (d->map[i] != i)
+ continue;
+
+ len = btf_type_size(d->btf->types[i]);
+ if (len < 0)
+ return len;
+
+ memmove(p, d->btf->types[i], len);
+ d->hypot_map[i] = next_type_id;
+ d->btf->types[next_type_id] = (struct btf_type *)p;
+ p += len;
+ next_type_id++;
+ }
+
+ /* shrink struct btf's internal types index and update btf_header */
+ d->btf->nr_types = next_type_id - 1;
+ d->btf->types_size = d->btf->nr_types;
+ d->btf->hdr->type_len = p - types_start;
+ new_types = realloc(d->btf->types,
+ (1 + d->btf->nr_types) * sizeof(struct btf_type *));
+ if (!new_types)
+ return -ENOMEM;
+ d->btf->types = new_types;
+
+ /* make sure string section follows type information without gaps */
+ d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data;
+ memmove(p, d->btf->strings, d->btf->hdr->str_len);
+ d->btf->strings = p;
+ p += d->btf->hdr->str_len;
+
+ d->btf->data_size = p - (char *)d->btf->data;
+ return 0;
+}
+
+/*
+ * Figure out final (deduplicated and compacted) type ID for provided original
+ * `type_id` by first resolving it into corresponding canonical type ID and
+ * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map,
+ * which is populated during compaction phase.
+ */
+static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
+{
+ __u32 resolved_type_id, new_type_id;
+
+ resolved_type_id = resolve_type_id(d, type_id);
+ new_type_id = d->hypot_map[resolved_type_id];
+ if (new_type_id > BTF_MAX_NR_TYPES)
+ return -EINVAL;
+ return new_type_id;
+}
+
+/*
+ * Remap referenced type IDs into deduped type IDs.
+ *
+ * After BTF types are deduplicated and compacted, their final type IDs may
+ * differ from original ones. The map from original to a corresponding
+ * deduped type ID is stored in btf_dedup->hypot_map and is populated during
+ * compaction phase. During remapping phase we are rewriting all type IDs
+ * referenced from any BTF type (e.g., struct fields, func proto args, etc) to
+ * their final deduped type IDs.
+ */
+static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
+{
+ struct btf_type *t = d->btf->types[type_id];
+ int i, r;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_ENUM:
+ break;
+
+ case BTF_KIND_FWD:
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ r = btf_dedup_remap_type_id(d, t->type);
+ if (r < 0)
+ return r;
+ t->type = r;
+ break;
+
+ case BTF_KIND_ARRAY: {
+ struct btf_array *arr_info = btf_array(t);
+
+ r = btf_dedup_remap_type_id(d, arr_info->type);
+ if (r < 0)
+ return r;
+ arr_info->type = r;
+ r = btf_dedup_remap_type_id(d, arr_info->index_type);
+ if (r < 0)
+ return r;
+ arr_info->index_type = r;
+ break;
+ }
+
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ struct btf_member *member = btf_members(t);
+ __u16 vlen = btf_vlen(t);
+
+ for (i = 0; i < vlen; i++) {
+ r = btf_dedup_remap_type_id(d, member->type);
+ if (r < 0)
+ return r;
+ member->type = r;
+ member++;
+ }
+ break;
+ }
+
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *param = btf_params(t);
+ __u16 vlen = btf_vlen(t);
+
+ r = btf_dedup_remap_type_id(d, t->type);
+ if (r < 0)
+ return r;
+ t->type = r;
+
+ for (i = 0; i < vlen; i++) {
+ r = btf_dedup_remap_type_id(d, param->type);
+ if (r < 0)
+ return r;
+ param->type = r;
+ param++;
+ }
+ break;
+ }
+
+ case BTF_KIND_DATASEC: {
+ struct btf_var_secinfo *var = btf_var_secinfos(t);
+ __u16 vlen = btf_vlen(t);
+
+ for (i = 0; i < vlen; i++) {
+ r = btf_dedup_remap_type_id(d, var->type);
+ if (r < 0)
+ return r;
+ var->type = r;
+ var++;
+ }
+ break;
+ }
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int btf_dedup_remap_types(struct btf_dedup *d)
+{
+ int i, r;
+
+ for (i = 1; i <= d->btf->nr_types; i++) {
+ r = btf_dedup_remap_type(d, i);
+ if (r < 0)
+ return r;
+ }
+ return 0;
+}
diff --git a/src/contrib/libbpf/bpf/btf.h b/src/contrib/libbpf/bpf/btf.h
new file mode 100644
index 0000000..d9ac73a
--- /dev/null
+++ b/src/contrib/libbpf/bpf/btf.h
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef __LIBBPF_BTF_H
+#define __LIBBPF_BTF_H
+
+#include <stdarg.h>
+#include <linux/btf.h>
+#include <linux/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef LIBBPF_API
+#define LIBBPF_API __attribute__((visibility("default")))
+#endif
+
+#define BTF_ELF_SEC ".BTF"
+#define BTF_EXT_ELF_SEC ".BTF.ext"
+#define MAPS_ELF_SEC ".maps"
+
+struct btf;
+struct btf_ext;
+struct btf_type;
+
+struct bpf_object;
+
+/*
+ * The .BTF.ext ELF section layout defined as
+ * struct btf_ext_header
+ * func_info subsection
+ *
+ * The func_info subsection layout:
+ * record size for struct bpf_func_info in the func_info subsection
+ * struct btf_sec_func_info for section #1
+ * a list of bpf_func_info records for section #1
+ * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h
+ * but may not be identical
+ * struct btf_sec_func_info for section #2
+ * a list of bpf_func_info records for section #2
+ * ......
+ *
+ * Note that the bpf_func_info record size in .BTF.ext may not
+ * be the same as the one defined in include/uapi/linux/bpf.h.
+ * The loader should ensure that record_size meets minimum
+ * requirement and pass the record as is to the kernel. The
+ * kernel will handle the func_info properly based on its contents.
+ */
+struct btf_ext_header {
+ __u16 magic;
+ __u8 version;
+ __u8 flags;
+ __u32 hdr_len;
+
+ /* All offsets are in bytes relative to the end of this header */
+ __u32 func_info_off;
+ __u32 func_info_len;
+ __u32 line_info_off;
+ __u32 line_info_len;
+
+ /* optional part of .BTF.ext header */
+ __u32 field_reloc_off;
+ __u32 field_reloc_len;
+};
+
+LIBBPF_API void btf__free(struct btf *btf);
+LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size);
+LIBBPF_API struct btf *btf__parse_elf(const char *path,
+ struct btf_ext **btf_ext);
+LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
+LIBBPF_API int btf__load(struct btf *btf);
+LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
+ const char *type_name);
+LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
+ const char *type_name, __u32 kind);
+LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
+LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
+ __u32 id);
+LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
+LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
+LIBBPF_API int btf__fd(const struct btf *btf);
+LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
+LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
+LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
+LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
+ __u32 expected_key_size,
+ __u32 expected_value_size,
+ __u32 *key_type_id, __u32 *value_type_id);
+
+LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size);
+LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);
+LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,
+ __u32 *size);
+LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const char *sec_name, __u32 insns_cnt,
+ void **func_info, __u32 *cnt);
+LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const char *sec_name, __u32 insns_cnt,
+ void **line_info, __u32 *cnt);
+LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext);
+LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext);
+
+struct btf_dedup_opts {
+ unsigned int dedup_table_size;
+ bool dont_resolve_fwds;
+};
+
+LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
+ const struct btf_dedup_opts *opts);
+
+struct btf_dump;
+
+struct btf_dump_opts {
+ void *ctx;
+};
+
+typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);
+
+LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const struct btf_dump_opts *opts,
+ btf_dump_printf_fn_t printf_fn);
+LIBBPF_API void btf_dump__free(struct btf_dump *d);
+
+LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
+
+/*
+ * A set of helpers for easier BTF types handling
+ */
+static inline __u16 btf_kind(const struct btf_type *t)
+{
+ return BTF_INFO_KIND(t->info);
+}
+
+static inline __u16 btf_vlen(const struct btf_type *t)
+{
+ return BTF_INFO_VLEN(t->info);
+}
+
+static inline bool btf_kflag(const struct btf_type *t)
+{
+ return BTF_INFO_KFLAG(t->info);
+}
+
+static inline bool btf_is_int(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_INT;
+}
+
+static inline bool btf_is_ptr(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_PTR;
+}
+
+static inline bool btf_is_array(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_ARRAY;
+}
+
+static inline bool btf_is_struct(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_STRUCT;
+}
+
+static inline bool btf_is_union(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_UNION;
+}
+
+static inline bool btf_is_composite(const struct btf_type *t)
+{
+ __u16 kind = btf_kind(t);
+
+ return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION;
+}
+
+static inline bool btf_is_enum(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_ENUM;
+}
+
+static inline bool btf_is_fwd(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_FWD;
+}
+
+static inline bool btf_is_typedef(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_TYPEDEF;
+}
+
+static inline bool btf_is_volatile(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_VOLATILE;
+}
+
+static inline bool btf_is_const(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_CONST;
+}
+
+static inline bool btf_is_restrict(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_RESTRICT;
+}
+
+static inline bool btf_is_mod(const struct btf_type *t)
+{
+ __u16 kind = btf_kind(t);
+
+ return kind == BTF_KIND_VOLATILE ||
+ kind == BTF_KIND_CONST ||
+ kind == BTF_KIND_RESTRICT;
+}
+
+static inline bool btf_is_func(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_FUNC;
+}
+
+static inline bool btf_is_func_proto(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_FUNC_PROTO;
+}
+
+static inline bool btf_is_var(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_VAR;
+}
+
+static inline bool btf_is_datasec(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_DATASEC;
+}
+
+static inline __u8 btf_int_encoding(const struct btf_type *t)
+{
+ return BTF_INT_ENCODING(*(__u32 *)(t + 1));
+}
+
+static inline __u8 btf_int_offset(const struct btf_type *t)
+{
+ return BTF_INT_OFFSET(*(__u32 *)(t + 1));
+}
+
+static inline __u8 btf_int_bits(const struct btf_type *t)
+{
+ return BTF_INT_BITS(*(__u32 *)(t + 1));
+}
+
+static inline struct btf_array *btf_array(const struct btf_type *t)
+{
+ return (struct btf_array *)(t + 1);
+}
+
+static inline struct btf_enum *btf_enum(const struct btf_type *t)
+{
+ return (struct btf_enum *)(t + 1);
+}
+
+static inline struct btf_member *btf_members(const struct btf_type *t)
+{
+ return (struct btf_member *)(t + 1);
+}
+
+/* Get bit offset of a member with specified index. */
+static inline __u32 btf_member_bit_offset(const struct btf_type *t,
+ __u32 member_idx)
+{
+ const struct btf_member *m = btf_members(t) + member_idx;
+ bool kflag = btf_kflag(t);
+
+ return kflag ? BTF_MEMBER_BIT_OFFSET(m->offset) : m->offset;
+}
+/*
+ * Get bitfield size of a member, assuming t is BTF_KIND_STRUCT or
+ * BTF_KIND_UNION. If member is not a bitfield, zero is returned.
+ */
+static inline __u32 btf_member_bitfield_size(const struct btf_type *t,
+ __u32 member_idx)
+{
+ const struct btf_member *m = btf_members(t) + member_idx;
+ bool kflag = btf_kflag(t);
+
+ return kflag ? BTF_MEMBER_BITFIELD_SIZE(m->offset) : 0;
+}
+
+static inline struct btf_param *btf_params(const struct btf_type *t)
+{
+ return (struct btf_param *)(t + 1);
+}
+
+static inline struct btf_var *btf_var(const struct btf_type *t)
+{
+ return (struct btf_var *)(t + 1);
+}
+
+static inline struct btf_var_secinfo *
+btf_var_secinfos(const struct btf_type *t)
+{
+ return (struct btf_var_secinfo *)(t + 1);
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_BTF_H */
diff --git a/src/contrib/libbpf/bpf/btf_dump.c b/src/contrib/libbpf/bpf/btf_dump.c
new file mode 100644
index 0000000..cb126d8
--- /dev/null
+++ b/src/contrib/libbpf/bpf/btf_dump.c
@@ -0,0 +1,1386 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * BTF-to-C type converter.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include "btf.h"
+#include "hashmap.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
+static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
+
+static const char *pfx(int lvl)
+{
+ return lvl >= PREFIX_CNT ? PREFIXES : &PREFIXES[PREFIX_CNT - lvl];
+}
+
+enum btf_dump_type_order_state {
+ NOT_ORDERED,
+ ORDERING,
+ ORDERED,
+};
+
+enum btf_dump_type_emit_state {
+ NOT_EMITTED,
+ EMITTING,
+ EMITTED,
+};
+
+/* per-type auxiliary state */
+struct btf_dump_type_aux_state {
+ /* topological sorting state */
+ enum btf_dump_type_order_state order_state: 2;
+ /* emitting state used to determine the need for forward declaration */
+ enum btf_dump_type_emit_state emit_state: 2;
+ /* whether forward declaration was already emitted */
+ __u8 fwd_emitted: 1;
+ /* whether unique non-duplicate name was already assigned */
+ __u8 name_resolved: 1;
+ /* whether type is referenced from any other type */
+ __u8 referenced: 1;
+};
+
+struct btf_dump {
+ const struct btf *btf;
+ const struct btf_ext *btf_ext;
+ btf_dump_printf_fn_t printf_fn;
+ struct btf_dump_opts opts;
+
+ /* per-type auxiliary state */
+ struct btf_dump_type_aux_state *type_states;
+ /* per-type optional cached unique name, must be freed, if present */
+ const char **cached_names;
+
+ /* topo-sorted list of dependent type definitions */
+ __u32 *emit_queue;
+ int emit_queue_cap;
+ int emit_queue_cnt;
+
+ /*
+ * stack of type declarations (e.g., chain of modifiers, arrays,
+ * funcs, etc)
+ */
+ __u32 *decl_stack;
+ int decl_stack_cap;
+ int decl_stack_cnt;
+
+ /* maps struct/union/enum name to a number of name occurrences */
+ struct hashmap *type_names;
+ /*
+ * maps typedef identifiers and enum value names to a number of such
+ * name occurrences
+ */
+ struct hashmap *ident_names;
+};
+
+static size_t str_hash_fn(const void *key, void *ctx)
+{
+ const char *s = key;
+ size_t h = 0;
+
+ while (*s) {
+ h = h * 31 + *s;
+ s++;
+ }
+ return h;
+}
+
+static bool str_equal_fn(const void *a, const void *b, void *ctx)
+{
+ return strcmp(a, b) == 0;
+}
+
+static const char *btf_name_of(const struct btf_dump *d, __u32 name_off)
+{
+ return btf__name_by_offset(d->btf, name_off);
+}
+
+static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ d->printf_fn(d->opts.ctx, fmt, args);
+ va_end(args);
+}
+
+struct btf_dump *btf_dump__new(const struct btf *btf,
+ const struct btf_ext *btf_ext,
+ const struct btf_dump_opts *opts,
+ btf_dump_printf_fn_t printf_fn)
+{
+ struct btf_dump *d;
+ int err;
+
+ d = calloc(1, sizeof(struct btf_dump));
+ if (!d)
+ return ERR_PTR(-ENOMEM);
+
+ d->btf = btf;
+ d->btf_ext = btf_ext;
+ d->printf_fn = printf_fn;
+ d->opts.ctx = opts ? opts->ctx : NULL;
+
+ d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+ if (IS_ERR(d->type_names)) {
+ err = PTR_ERR(d->type_names);
+ d->type_names = NULL;
+ btf_dump__free(d);
+ return ERR_PTR(err);
+ }
+ d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+ if (IS_ERR(d->ident_names)) {
+ err = PTR_ERR(d->ident_names);
+ d->ident_names = NULL;
+ btf_dump__free(d);
+ return ERR_PTR(err);
+ }
+
+ return d;
+}
+
+void btf_dump__free(struct btf_dump *d)
+{
+ int i, cnt;
+
+ if (!d)
+ return;
+
+ free(d->type_states);
+ if (d->cached_names) {
+ /* any set cached name is owned by us and should be freed */
+ for (i = 0, cnt = btf__get_nr_types(d->btf); i <= cnt; i++) {
+ if (d->cached_names[i])
+ free((void *)d->cached_names[i]);
+ }
+ }
+ free(d->cached_names);
+ free(d->emit_queue);
+ free(d->decl_stack);
+ hashmap__free(d->type_names);
+ hashmap__free(d->ident_names);
+
+ free(d);
+}
+
+static int btf_dump_mark_referenced(struct btf_dump *d);
+static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
+static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
+
+/*
+ * Dump BTF type in a compilable C syntax, including all the necessary
+ * dependent types, necessary for compilation. If some of the dependent types
+ * were already emitted as part of previous btf_dump__dump_type() invocation
+ * for another type, they won't be emitted again. This API allows callers to
+ * filter out BTF types according to user-defined criterias and emitted only
+ * minimal subset of types, necessary to compile everything. Full struct/union
+ * definitions will still be emitted, even if the only usage is through
+ * pointer and could be satisfied with just a forward declaration.
+ *
+ * Dumping is done in two high-level passes:
+ * 1. Topologically sort type definitions to satisfy C rules of compilation.
+ * 2. Emit type definitions in C syntax.
+ *
+ * Returns 0 on success; <0, otherwise.
+ */
+int btf_dump__dump_type(struct btf_dump *d, __u32 id)
+{
+ int err, i;
+
+ if (id > btf__get_nr_types(d->btf))
+ return -EINVAL;
+
+ /* type states are lazily allocated, as they might not be needed */
+ if (!d->type_states) {
+ d->type_states = calloc(1 + btf__get_nr_types(d->btf),
+ sizeof(d->type_states[0]));
+ if (!d->type_states)
+ return -ENOMEM;
+ d->cached_names = calloc(1 + btf__get_nr_types(d->btf),
+ sizeof(d->cached_names[0]));
+ if (!d->cached_names)
+ return -ENOMEM;
+
+ /* VOID is special */
+ d->type_states[0].order_state = ORDERED;
+ d->type_states[0].emit_state = EMITTED;
+
+ /* eagerly determine referenced types for anon enums */
+ err = btf_dump_mark_referenced(d);
+ if (err)
+ return err;
+ }
+
+ d->emit_queue_cnt = 0;
+ err = btf_dump_order_type(d, id, false);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i < d->emit_queue_cnt; i++)
+ btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/);
+
+ return 0;
+}
+
+/*
+ * Mark all types that are referenced from any other type. This is used to
+ * determine top-level anonymous enums that need to be emitted as an
+ * independent type declarations.
+ * Anonymous enums come in two flavors: either embedded in a struct's field
+ * definition, in which case they have to be declared inline as part of field
+ * type declaration; or as a top-level anonymous enum, typically used for
+ * declaring global constants. It's impossible to distinguish between two
+ * without knowning whether given enum type was referenced from other type:
+ * top-level anonymous enum won't be referenced by anything, while embedded
+ * one will.
+ */
+static int btf_dump_mark_referenced(struct btf_dump *d)
+{
+ int i, j, n = btf__get_nr_types(d->btf);
+ const struct btf_type *t;
+ __u16 vlen;
+
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(d->btf, i);
+ vlen = btf_vlen(t);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ break;
+
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ d->type_states[t->type].referenced = 1;
+ break;
+
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *a = btf_array(t);
+
+ d->type_states[a->index_type].referenced = 1;
+ d->type_states[a->type].referenced = 1;
+ break;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+
+ for (j = 0; j < vlen; j++, m++)
+ d->type_states[m->type].referenced = 1;
+ break;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *p = btf_params(t);
+
+ for (j = 0; j < vlen; j++, p++)
+ d->type_states[p->type].referenced = 1;
+ break;
+ }
+ case BTF_KIND_DATASEC: {
+ const struct btf_var_secinfo *v = btf_var_secinfos(t);
+
+ for (j = 0; j < vlen; j++, v++)
+ d->type_states[v->type].referenced = 1;
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+static int btf_dump_add_emit_queue_id(struct btf_dump *d, __u32 id)
+{
+ __u32 *new_queue;
+ size_t new_cap;
+
+ if (d->emit_queue_cnt >= d->emit_queue_cap) {
+ new_cap = max(16, d->emit_queue_cap * 3 / 2);
+ new_queue = realloc(d->emit_queue,
+ new_cap * sizeof(new_queue[0]));
+ if (!new_queue)
+ return -ENOMEM;
+ d->emit_queue = new_queue;
+ d->emit_queue_cap = new_cap;
+ }
+
+ d->emit_queue[d->emit_queue_cnt++] = id;
+ return 0;
+}
+
+/*
+ * Determine order of emitting dependent types and specified type to satisfy
+ * C compilation rules. This is done through topological sorting with an
+ * additional complication which comes from C rules. The main idea for C is
+ * that if some type is "embedded" into a struct/union, it's size needs to be
+ * known at the time of definition of containing type. E.g., for:
+ *
+ * struct A {};
+ * struct B { struct A x; }
+ *
+ * struct A *HAS* to be defined before struct B, because it's "embedded",
+ * i.e., it is part of struct B layout. But in the following case:
+ *
+ * struct A;
+ * struct B { struct A *x; }
+ * struct A {};
+ *
+ * it's enough to just have a forward declaration of struct A at the time of
+ * struct B definition, as struct B has a pointer to struct A, so the size of
+ * field x is known without knowing struct A size: it's sizeof(void *).
+ *
+ * Unfortunately, there are some trickier cases we need to handle, e.g.:
+ *
+ * struct A {}; // if this was forward-declaration: compilation error
+ * struct B {
+ * struct { // anonymous struct
+ * struct A y;
+ * } *x;
+ * };
+ *
+ * In this case, struct B's field x is a pointer, so it's size is known
+ * regardless of the size of (anonymous) struct it points to. But because this
+ * struct is anonymous and thus defined inline inside struct B, *and* it
+ * embeds struct A, compiler requires full definition of struct A to be known
+ * before struct B can be defined. This creates a transitive dependency
+ * between struct A and struct B. If struct A was forward-declared before
+ * struct B definition and fully defined after struct B definition, that would
+ * trigger compilation error.
+ *
+ * All this means that while we are doing topological sorting on BTF type
+ * graph, we need to determine relationships between different types (graph
+ * nodes):
+ * - weak link (relationship) between X and Y, if Y *CAN* be
+ * forward-declared at the point of X definition;
+ * - strong link, if Y *HAS* to be fully-defined before X can be defined.
+ *
+ * The rule is as follows. Given a chain of BTF types from X to Y, if there is
+ * BTF_KIND_PTR type in the chain and at least one non-anonymous type
+ * Z (excluding X, including Y), then link is weak. Otherwise, it's strong.
+ * Weak/strong relationship is determined recursively during DFS traversal and
+ * is returned as a result from btf_dump_order_type().
+ *
+ * btf_dump_order_type() is trying to avoid unnecessary forward declarations,
+ * but it is not guaranteeing that no extraneous forward declarations will be
+ * emitted.
+ *
+ * To avoid extra work, algorithm marks some of BTF types as ORDERED, when
+ * it's done with them, but not for all (e.g., VOLATILE, CONST, RESTRICT,
+ * ARRAY, FUNC_PROTO), as weak/strong semantics for those depends on the
+ * entire graph path, so depending where from one came to that BTF type, it
+ * might cause weak or strong ordering. For types like STRUCT/UNION/INT/ENUM,
+ * once they are processed, there is no need to do it again, so they are
+ * marked as ORDERED. We can mark PTR as ORDERED as well, as it semi-forces
+ * weak link, unless subsequent referenced STRUCT/UNION/ENUM is anonymous. But
+ * in any case, once those are processed, no need to do it again, as the
+ * result won't change.
+ *
+ * Returns:
+ * - 1, if type is part of strong link (so there is strong topological
+ * ordering requirements);
+ * - 0, if type is part of weak link (so can be satisfied through forward
+ * declaration);
+ * - <0, on error (e.g., unsatisfiable type loop detected).
+ */
+static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
+{
+ /*
+ * Order state is used to detect strong link cycles, but only for BTF
+ * kinds that are or could be an independent definition (i.e.,
+ * stand-alone fwd decl, enum, typedef, struct, union). Ptrs, arrays,
+ * func_protos, modifiers are just means to get to these definitions.
+ * Int/void don't need definitions, they are assumed to be always
+ * properly defined. We also ignore datasec, var, and funcs for now.
+ * So for all non-defining kinds, we never even set ordering state,
+ * for defining kinds we set ORDERING and subsequently ORDERED if it
+ * forms a strong link.
+ */
+ struct btf_dump_type_aux_state *tstate = &d->type_states[id];
+ const struct btf_type *t;
+ __u16 vlen;
+ int err, i;
+
+ /* return true, letting typedefs know that it's ok to be emitted */
+ if (tstate->order_state == ORDERED)
+ return 1;
+
+ t = btf__type_by_id(d->btf, id);
+
+ if (tstate->order_state == ORDERING) {
+ /* type loop, but resolvable through fwd declaration */
+ if (btf_is_composite(t) && through_ptr && t->name_off != 0)
+ return 0;
+ pr_warn("unsatisfiable type cycle, id:[%u]\n", id);
+ return -ELOOP;
+ }
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ tstate->order_state = ORDERED;
+ return 0;
+
+ case BTF_KIND_PTR:
+ err = btf_dump_order_type(d, t->type, true);
+ tstate->order_state = ORDERED;
+ return err;
+
+ case BTF_KIND_ARRAY:
+ return btf_dump_order_type(d, btf_array(t)->type, through_ptr);
+
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+ /*
+ * struct/union is part of strong link, only if it's embedded
+ * (so no ptr in a path) or it's anonymous (so has to be
+ * defined inline, even if declared through ptr)
+ */
+ if (through_ptr && t->name_off != 0)
+ return 0;
+
+ tstate->order_state = ORDERING;
+
+ vlen = btf_vlen(t);
+ for (i = 0; i < vlen; i++, m++) {
+ err = btf_dump_order_type(d, m->type, false);
+ if (err < 0)
+ return err;
+ }
+
+ if (t->name_off != 0) {
+ err = btf_dump_add_emit_queue_id(d, id);
+ if (err < 0)
+ return err;
+ }
+
+ tstate->order_state = ORDERED;
+ return 1;
+ }
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ /*
+ * non-anonymous or non-referenced enums are top-level
+ * declarations and should be emitted. Same logic can be
+ * applied to FWDs, it won't hurt anyways.
+ */
+ if (t->name_off != 0 || !tstate->referenced) {
+ err = btf_dump_add_emit_queue_id(d, id);
+ if (err)
+ return err;
+ }
+ tstate->order_state = ORDERED;
+ return 1;
+
+ case BTF_KIND_TYPEDEF: {
+ int is_strong;
+
+ is_strong = btf_dump_order_type(d, t->type, through_ptr);
+ if (is_strong < 0)
+ return is_strong;
+
+ /* typedef is similar to struct/union w.r.t. fwd-decls */
+ if (through_ptr && !is_strong)
+ return 0;
+
+ /* typedef is always a named definition */
+ err = btf_dump_add_emit_queue_id(d, id);
+ if (err)
+ return err;
+
+ d->type_states[id].order_state = ORDERED;
+ return 1;
+ }
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ return btf_dump_order_type(d, t->type, through_ptr);
+
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *p = btf_params(t);
+ bool is_strong;
+
+ err = btf_dump_order_type(d, t->type, through_ptr);
+ if (err < 0)
+ return err;
+ is_strong = err > 0;
+
+ vlen = btf_vlen(t);
+ for (i = 0; i < vlen; i++, p++) {
+ err = btf_dump_order_type(d, p->type, through_ptr);
+ if (err < 0)
+ return err;
+ if (err > 0)
+ is_strong = true;
+ }
+ return is_strong;
+ }
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ case BTF_KIND_DATASEC:
+ d->type_states[id].order_state = ORDERED;
+ return 0;
+
+ default:
+ return -EINVAL;
+ }
+}
+
+static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
+ const struct btf_type *t);
+static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t, int lvl);
+
+static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
+ const struct btf_type *t);
+static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t, int lvl);
+
+static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t);
+
+static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t, int lvl);
+
+/* a local view into a shared stack */
+struct id_stack {
+ const __u32 *ids;
+ int cnt;
+};
+
+static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
+ const char *fname, int lvl);
+static void btf_dump_emit_type_chain(struct btf_dump *d,
+ struct id_stack *decl_stack,
+ const char *fname, int lvl);
+
+static const char *btf_dump_type_name(struct btf_dump *d, __u32 id);
+static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id);
+static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
+ const char *orig_name);
+
+static bool btf_dump_is_blacklisted(struct btf_dump *d, __u32 id)
+{
+ const struct btf_type *t = btf__type_by_id(d->btf, id);
+
+ /* __builtin_va_list is a compiler built-in, which causes compilation
+ * errors, when compiling w/ different compiler, then used to compile
+ * original code (e.g., GCC to compile kernel, Clang to use generated
+ * C header from BTF). As it is built-in, it should be already defined
+ * properly internally in compiler.
+ */
+ if (t->name_off == 0)
+ return false;
+ return strcmp(btf_name_of(d, t->name_off), "__builtin_va_list") == 0;
+}
+
+/*
+ * Emit C-syntax definitions of types from chains of BTF types.
+ *
+ * High-level handling of determining necessary forward declarations are handled
+ * by btf_dump_emit_type() itself, but all nitty-gritty details of emitting type
+ * declarations/definitions in C syntax are handled by a combo of
+ * btf_dump_emit_type_decl()/btf_dump_emit_type_chain() w/ delegation to
+ * corresponding btf_dump_emit_*_{def,fwd}() functions.
+ *
+ * We also keep track of "containing struct/union type ID" to determine when
+ * we reference it from inside and thus can avoid emitting unnecessary forward
+ * declaration.
+ *
+ * This algorithm is designed in such a way, that even if some error occurs
+ * (either technical, e.g., out of memory, or logical, i.e., malformed BTF
+ * that doesn't comply to C rules completely), algorithm will try to proceed
+ * and produce as much meaningful output as possible.
+ */
+static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
+{
+ struct btf_dump_type_aux_state *tstate = &d->type_states[id];
+ bool top_level_def = cont_id == 0;
+ const struct btf_type *t;
+ __u16 kind;
+
+ if (tstate->emit_state == EMITTED)
+ return;
+
+ t = btf__type_by_id(d->btf, id);
+ kind = btf_kind(t);
+
+ if (tstate->emit_state == EMITTING) {
+ if (tstate->fwd_emitted)
+ return;
+
+ switch (kind) {
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ /*
+ * if we are referencing a struct/union that we are
+ * part of - then no need for fwd declaration
+ */
+ if (id == cont_id)
+ return;
+ if (t->name_off == 0) {
+ pr_warn("anonymous struct/union loop, id:[%u]\n",
+ id);
+ return;
+ }
+ btf_dump_emit_struct_fwd(d, id, t);
+ btf_dump_printf(d, ";\n\n");
+ tstate->fwd_emitted = 1;
+ break;
+ case BTF_KIND_TYPEDEF:
+ /*
+ * for typedef fwd_emitted means typedef definition
+ * was emitted, but it can be used only for "weak"
+ * references through pointer only, not for embedding
+ */
+ if (!btf_dump_is_blacklisted(d, id)) {
+ btf_dump_emit_typedef_def(d, id, t, 0);
+ btf_dump_printf(d, ";\n\n");
+ };
+ tstate->fwd_emitted = 1;
+ break;
+ default:
+ break;
+ }
+
+ return;
+ }
+
+ switch (kind) {
+ case BTF_KIND_INT:
+ tstate->emit_state = EMITTED;
+ break;
+ case BTF_KIND_ENUM:
+ if (top_level_def) {
+ btf_dump_emit_enum_def(d, id, t, 0);
+ btf_dump_printf(d, ";\n\n");
+ }
+ tstate->emit_state = EMITTED;
+ break;
+ case BTF_KIND_PTR:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ btf_dump_emit_type(d, t->type, cont_id);
+ break;
+ case BTF_KIND_ARRAY:
+ btf_dump_emit_type(d, btf_array(t)->type, cont_id);
+ break;
+ case BTF_KIND_FWD:
+ btf_dump_emit_fwd_def(d, id, t);
+ btf_dump_printf(d, ";\n\n");
+ tstate->emit_state = EMITTED;
+ break;
+ case BTF_KIND_TYPEDEF:
+ tstate->emit_state = EMITTING;
+ btf_dump_emit_type(d, t->type, id);
+ /*
+ * typedef can server as both definition and forward
+ * declaration; at this stage someone depends on
+ * typedef as a forward declaration (refers to it
+ * through pointer), so unless we already did it,
+ * emit typedef as a forward declaration
+ */
+ if (!tstate->fwd_emitted && !btf_dump_is_blacklisted(d, id)) {
+ btf_dump_emit_typedef_def(d, id, t, 0);
+ btf_dump_printf(d, ";\n\n");
+ }
+ tstate->emit_state = EMITTED;
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ tstate->emit_state = EMITTING;
+ /* if it's a top-level struct/union definition or struct/union
+ * is anonymous, then in C we'll be emitting all fields and
+ * their types (as opposed to just `struct X`), so we need to
+ * make sure that all types, referenced from struct/union
+ * members have necessary forward-declarations, where
+ * applicable
+ */
+ if (top_level_def || t->name_off == 0) {
+ const struct btf_member *m = btf_members(t);
+ __u16 vlen = btf_vlen(t);
+ int i, new_cont_id;
+
+ new_cont_id = t->name_off == 0 ? cont_id : id;
+ for (i = 0; i < vlen; i++, m++)
+ btf_dump_emit_type(d, m->type, new_cont_id);
+ } else if (!tstate->fwd_emitted && id != cont_id) {
+ btf_dump_emit_struct_fwd(d, id, t);
+ btf_dump_printf(d, ";\n\n");
+ tstate->fwd_emitted = 1;
+ }
+
+ if (top_level_def) {
+ btf_dump_emit_struct_def(d, id, t, 0);
+ btf_dump_printf(d, ";\n\n");
+ tstate->emit_state = EMITTED;
+ } else {
+ tstate->emit_state = NOT_EMITTED;
+ }
+ break;
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *p = btf_params(t);
+ __u16 vlen = btf_vlen(t);
+ int i;
+
+ btf_dump_emit_type(d, t->type, cont_id);
+ for (i = 0; i < vlen; i++, p++)
+ btf_dump_emit_type(d, p->type, cont_id);
+
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+static int btf_align_of(const struct btf *btf, __u32 id)
+{
+ const struct btf_type *t = btf__type_by_id(btf, id);
+ __u16 kind = btf_kind(t);
+
+ switch (kind) {
+ case BTF_KIND_INT:
+ case BTF_KIND_ENUM:
+ return min(sizeof(void *), t->size);
+ case BTF_KIND_PTR:
+ return sizeof(void *);
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ return btf_align_of(btf, t->type);
+ case BTF_KIND_ARRAY:
+ return btf_align_of(btf, btf_array(t)->type);
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+ __u16 vlen = btf_vlen(t);
+ int i, align = 1;
+
+ for (i = 0; i < vlen; i++, m++)
+ align = max(align, btf_align_of(btf, m->type));
+
+ return align;
+ }
+ default:
+ pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
+ return 1;
+ }
+}
+
+static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
+ const struct btf_type *t)
+{
+ const struct btf_member *m;
+ int align, i, bit_sz;
+ __u16 vlen;
+
+ align = btf_align_of(btf, id);
+ /* size of a non-packed struct has to be a multiple of its alignment*/
+ if (t->size % align)
+ return true;
+
+ m = btf_members(t);
+ vlen = btf_vlen(t);
+ /* all non-bitfield fields have to be naturally aligned */
+ for (i = 0; i < vlen; i++, m++) {
+ align = btf_align_of(btf, m->type);
+ bit_sz = btf_member_bitfield_size(t, i);
+ if (bit_sz == 0 && m->offset % (8 * align) != 0)
+ return true;
+ }
+
+ /*
+ * if original struct was marked as packed, but its layout is
+ * naturally aligned, we'll detect that it's not packed
+ */
+ return false;
+}
+
+static int chip_away_bits(int total, int at_most)
+{
+ return total % at_most ? : at_most;
+}
+
+static void btf_dump_emit_bit_padding(const struct btf_dump *d,
+ int cur_off, int m_off, int m_bit_sz,
+ int align, int lvl)
+{
+ int off_diff = m_off - cur_off;
+ int ptr_bits = sizeof(void *) * 8;
+
+ if (off_diff <= 0)
+ /* no gap */
+ return;
+ if (m_bit_sz == 0 && off_diff < align * 8)
+ /* natural padding will take care of a gap */
+ return;
+
+ while (off_diff > 0) {
+ const char *pad_type;
+ int pad_bits;
+
+ if (ptr_bits > 32 && off_diff > 32) {
+ pad_type = "long";
+ pad_bits = chip_away_bits(off_diff, ptr_bits);
+ } else if (off_diff > 16) {
+ pad_type = "int";
+ pad_bits = chip_away_bits(off_diff, 32);
+ } else if (off_diff > 8) {
+ pad_type = "short";
+ pad_bits = chip_away_bits(off_diff, 16);
+ } else {
+ pad_type = "char";
+ pad_bits = chip_away_bits(off_diff, 8);
+ }
+ btf_dump_printf(d, "\n%s%s: %d;", pfx(lvl), pad_type, pad_bits);
+ off_diff -= pad_bits;
+ }
+}
+
+static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
+ const struct btf_type *t)
+{
+ btf_dump_printf(d, "%s %s",
+ btf_is_struct(t) ? "struct" : "union",
+ btf_dump_type_name(d, id));
+}
+
+static void btf_dump_emit_struct_def(struct btf_dump *d,
+ __u32 id,
+ const struct btf_type *t,
+ int lvl)
+{
+ const struct btf_member *m = btf_members(t);
+ bool is_struct = btf_is_struct(t);
+ int align, i, packed, off = 0;
+ __u16 vlen = btf_vlen(t);
+
+ packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0;
+
+ btf_dump_printf(d, "%s%s%s {",
+ is_struct ? "struct" : "union",
+ t->name_off ? " " : "",
+ btf_dump_type_name(d, id));
+
+ for (i = 0; i < vlen; i++, m++) {
+ const char *fname;
+ int m_off, m_sz;
+
+ fname = btf_name_of(d, m->name_off);
+ m_sz = btf_member_bitfield_size(t, i);
+ m_off = btf_member_bit_offset(t, i);
+ align = packed ? 1 : btf_align_of(d->btf, m->type);
+
+ btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1);
+ btf_dump_printf(d, "\n%s", pfx(lvl + 1));
+ btf_dump_emit_type_decl(d, m->type, fname, lvl + 1);
+
+ if (m_sz) {
+ btf_dump_printf(d, ": %d", m_sz);
+ off = m_off + m_sz;
+ } else {
+ m_sz = max(0, btf__resolve_size(d->btf, m->type));
+ off = m_off + m_sz * 8;
+ }
+ btf_dump_printf(d, ";");
+ }
+
+ /* pad at the end, if necessary */
+ if (is_struct) {
+ align = packed ? 1 : btf_align_of(d->btf, id);
+ btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align,
+ lvl + 1);
+ }
+
+ if (vlen)
+ btf_dump_printf(d, "\n");
+ btf_dump_printf(d, "%s}", pfx(lvl));
+ if (packed)
+ btf_dump_printf(d, " __attribute__((packed))");
+}
+
+static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
+ const struct btf_type *t)
+{
+ btf_dump_printf(d, "enum %s", btf_dump_type_name(d, id));
+}
+
+static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t,
+ int lvl)
+{
+ const struct btf_enum *v = btf_enum(t);
+ __u16 vlen = btf_vlen(t);
+ const char *name;
+ size_t dup_cnt;
+ int i;
+
+ btf_dump_printf(d, "enum%s%s",
+ t->name_off ? " " : "",
+ btf_dump_type_name(d, id));
+
+ if (vlen) {
+ btf_dump_printf(d, " {");
+ for (i = 0; i < vlen; i++, v++) {
+ name = btf_name_of(d, v->name_off);
+ /* enumerators share namespace with typedef idents */
+ dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
+ if (dup_cnt > 1) {
+ btf_dump_printf(d, "\n%s%s___%zu = %d,",
+ pfx(lvl + 1), name, dup_cnt,
+ (__s32)v->val);
+ } else {
+ btf_dump_printf(d, "\n%s%s = %d,",
+ pfx(lvl + 1), name,
+ (__s32)v->val);
+ }
+ }
+ btf_dump_printf(d, "\n%s}", pfx(lvl));
+ }
+}
+
+static void btf_dump_emit_fwd_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t)
+{
+ const char *name = btf_dump_type_name(d, id);
+
+ if (btf_kflag(t))
+ btf_dump_printf(d, "union %s", name);
+ else
+ btf_dump_printf(d, "struct %s", name);
+}
+
+static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,
+ const struct btf_type *t, int lvl)
+{
+ const char *name = btf_dump_ident_name(d, id);
+
+ /*
+ * Old GCC versions are emitting invalid typedef for __gnuc_va_list
+ * pointing to VOID. This generates warnings from btf_dump() and
+ * results in uncompilable header file, so we are fixing it up here
+ * with valid typedef into __builtin_va_list.
+ */
+ if (t->type == 0 && strcmp(name, "__gnuc_va_list") == 0) {
+ btf_dump_printf(d, "typedef __builtin_va_list __gnuc_va_list");
+ return;
+ }
+
+ btf_dump_printf(d, "typedef ");
+ btf_dump_emit_type_decl(d, t->type, name, lvl);
+}
+
+static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
+{
+ __u32 *new_stack;
+ size_t new_cap;
+
+ if (d->decl_stack_cnt >= d->decl_stack_cap) {
+ new_cap = max(16, d->decl_stack_cap * 3 / 2);
+ new_stack = realloc(d->decl_stack,
+ new_cap * sizeof(new_stack[0]));
+ if (!new_stack)
+ return -ENOMEM;
+ d->decl_stack = new_stack;
+ d->decl_stack_cap = new_cap;
+ }
+
+ d->decl_stack[d->decl_stack_cnt++] = id;
+
+ return 0;
+}
+
+/*
+ * Emit type declaration (e.g., field type declaration in a struct or argument
+ * declaration in function prototype) in correct C syntax.
+ *
+ * For most types it's trivial, but there are few quirky type declaration
+ * cases worth mentioning:
+ * - function prototypes (especially nesting of function prototypes);
+ * - arrays;
+ * - const/volatile/restrict for pointers vs other types.
+ *
+ * For a good discussion of *PARSING* C syntax (as a human), see
+ * Peter van der Linden's "Expert C Programming: Deep C Secrets",
+ * Ch.3 "Unscrambling Declarations in C".
+ *
+ * It won't help with BTF to C conversion much, though, as it's an opposite
+ * problem. So we came up with this algorithm in reverse to van der Linden's
+ * parsing algorithm. It goes from structured BTF representation of type
+ * declaration to a valid compilable C syntax.
+ *
+ * For instance, consider this C typedef:
+ * typedef const int * const * arr[10] arr_t;
+ * It will be represented in BTF with this chain of BTF types:
+ * [typedef] -> [array] -> [ptr] -> [const] -> [ptr] -> [const] -> [int]
+ *
+ * Notice how [const] modifier always goes before type it modifies in BTF type
+ * graph, but in C syntax, const/volatile/restrict modifiers are written to
+ * the right of pointers, but to the left of other types. There are also other
+ * quirks, like function pointers, arrays of them, functions returning other
+ * functions, etc.
+ *
+ * We handle that by pushing all the types to a stack, until we hit "terminal"
+ * type (int/enum/struct/union/fwd). Then depending on the kind of a type on
+ * top of a stack, modifiers are handled differently. Array/function pointers
+ * have also wildly different syntax and how nesting of them are done. See
+ * code for authoritative definition.
+ *
+ * To avoid allocating new stack for each independent chain of BTF types, we
+ * share one bigger stack, with each chain working only on its own local view
+ * of a stack frame. Some care is required to "pop" stack frames after
+ * processing type declaration chain.
+ */
+static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
+ const char *fname, int lvl)
+{
+ struct id_stack decl_stack;
+ const struct btf_type *t;
+ int err, stack_start;
+
+ stack_start = d->decl_stack_cnt;
+ for (;;) {
+ err = btf_dump_push_decl_stack_id(d, id);
+ if (err < 0) {
+ /*
+ * if we don't have enough memory for entire type decl
+ * chain, restore stack, emit warning, and try to
+ * proceed nevertheless
+ */
+ pr_warn("not enough memory for decl stack:%d", err);
+ d->decl_stack_cnt = stack_start;
+ return;
+ }
+
+ /* VOID */
+ if (id == 0)
+ break;
+
+ t = btf__type_by_id(d->btf, id);
+ switch (btf_kind(t)) {
+ case BTF_KIND_PTR:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_FUNC_PROTO:
+ id = t->type;
+ break;
+ case BTF_KIND_ARRAY:
+ id = btf_array(t)->type;
+ break;
+ case BTF_KIND_INT:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_TYPEDEF:
+ goto done;
+ default:
+ pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
+ btf_kind(t), id);
+ goto done;
+ }
+ }
+done:
+ /*
+ * We might be inside a chain of declarations (e.g., array of function
+ * pointers returning anonymous (so inlined) structs, having another
+ * array field). Each of those needs its own "stack frame" to handle
+ * emitting of declarations. Those stack frames are non-overlapping
+ * portions of shared btf_dump->decl_stack. To make it a bit nicer to
+ * handle this set of nested stacks, we create a view corresponding to
+ * our own "stack frame" and work with it as an independent stack.
+ * We'll need to clean up after emit_type_chain() returns, though.
+ */
+ decl_stack.ids = d->decl_stack + stack_start;
+ decl_stack.cnt = d->decl_stack_cnt - stack_start;
+ btf_dump_emit_type_chain(d, &decl_stack, fname, lvl);
+ /*
+ * emit_type_chain() guarantees that it will pop its entire decl_stack
+ * frame before returning. But it works with a read-only view into
+ * decl_stack, so it doesn't actually pop anything from the
+ * perspective of shared btf_dump->decl_stack, per se. We need to
+ * reset decl_stack state to how it was before us to avoid it growing
+ * all the time.
+ */
+ d->decl_stack_cnt = stack_start;
+}
+
+static void btf_dump_emit_mods(struct btf_dump *d, struct id_stack *decl_stack)
+{
+ const struct btf_type *t;
+ __u32 id;
+
+ while (decl_stack->cnt) {
+ id = decl_stack->ids[decl_stack->cnt - 1];
+ t = btf__type_by_id(d->btf, id);
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_VOLATILE:
+ btf_dump_printf(d, "volatile ");
+ break;
+ case BTF_KIND_CONST:
+ btf_dump_printf(d, "const ");
+ break;
+ case BTF_KIND_RESTRICT:
+ btf_dump_printf(d, "restrict ");
+ break;
+ default:
+ return;
+ }
+ decl_stack->cnt--;
+ }
+}
+
+static void btf_dump_emit_name(const struct btf_dump *d,
+ const char *name, bool last_was_ptr)
+{
+ bool separate = name[0] && !last_was_ptr;
+
+ btf_dump_printf(d, "%s%s", separate ? " " : "", name);
+}
+
+static void btf_dump_emit_type_chain(struct btf_dump *d,
+ struct id_stack *decls,
+ const char *fname, int lvl)
+{
+ /*
+ * last_was_ptr is used to determine if we need to separate pointer
+ * asterisk (*) from previous part of type signature with space, so
+ * that we get `int ***`, instead of `int * * *`. We default to true
+ * for cases where we have single pointer in a chain. E.g., in ptr ->
+ * func_proto case. func_proto will start a new emit_type_chain call
+ * with just ptr, which should be emitted as (*) or (*<fname>), so we
+ * don't want to prepend space for that last pointer.
+ */
+ bool last_was_ptr = true;
+ const struct btf_type *t;
+ const char *name;
+ __u16 kind;
+ __u32 id;
+
+ while (decls->cnt) {
+ id = decls->ids[--decls->cnt];
+ if (id == 0) {
+ /* VOID is a special snowflake */
+ btf_dump_emit_mods(d, decls);
+ btf_dump_printf(d, "void");
+ last_was_ptr = false;
+ continue;
+ }
+
+ t = btf__type_by_id(d->btf, id);
+ kind = btf_kind(t);
+
+ switch (kind) {
+ case BTF_KIND_INT:
+ btf_dump_emit_mods(d, decls);
+ name = btf_name_of(d, t->name_off);
+ btf_dump_printf(d, "%s", name);
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ btf_dump_emit_mods(d, decls);
+ /* inline anonymous struct/union */
+ if (t->name_off == 0)
+ btf_dump_emit_struct_def(d, id, t, lvl);
+ else
+ btf_dump_emit_struct_fwd(d, id, t);
+ break;
+ case BTF_KIND_ENUM:
+ btf_dump_emit_mods(d, decls);
+ /* inline anonymous enum */
+ if (t->name_off == 0)
+ btf_dump_emit_enum_def(d, id, t, lvl);
+ else
+ btf_dump_emit_enum_fwd(d, id, t);
+ break;
+ case BTF_KIND_FWD:
+ btf_dump_emit_mods(d, decls);
+ btf_dump_emit_fwd_def(d, id, t);
+ break;
+ case BTF_KIND_TYPEDEF:
+ btf_dump_emit_mods(d, decls);
+ btf_dump_printf(d, "%s", btf_dump_ident_name(d, id));
+ break;
+ case BTF_KIND_PTR:
+ btf_dump_printf(d, "%s", last_was_ptr ? "*" : " *");
+ break;
+ case BTF_KIND_VOLATILE:
+ btf_dump_printf(d, " volatile");
+ break;
+ case BTF_KIND_CONST:
+ btf_dump_printf(d, " const");
+ break;
+ case BTF_KIND_RESTRICT:
+ btf_dump_printf(d, " restrict");
+ break;
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *a = btf_array(t);
+ const struct btf_type *next_t;
+ __u32 next_id;
+ bool multidim;
+ /*
+ * GCC has a bug
+ * (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=8354)
+ * which causes it to emit extra const/volatile
+ * modifiers for an array, if array's element type has
+ * const/volatile modifiers. Clang doesn't do that.
+ * In general, it doesn't seem very meaningful to have
+ * a const/volatile modifier for array, so we are
+ * going to silently skip them here.
+ */
+ while (decls->cnt) {
+ next_id = decls->ids[decls->cnt - 1];
+ next_t = btf__type_by_id(d->btf, next_id);
+ if (btf_is_mod(next_t))
+ decls->cnt--;
+ else
+ break;
+ }
+
+ if (decls->cnt == 0) {
+ btf_dump_emit_name(d, fname, last_was_ptr);
+ btf_dump_printf(d, "[%u]", a->nelems);
+ return;
+ }
+
+ next_id = decls->ids[decls->cnt - 1];
+ next_t = btf__type_by_id(d->btf, next_id);
+ multidim = btf_is_array(next_t);
+ /* we need space if we have named non-pointer */
+ if (fname[0] && !last_was_ptr)
+ btf_dump_printf(d, " ");
+ /* no parentheses for multi-dimensional array */
+ if (!multidim)
+ btf_dump_printf(d, "(");
+ btf_dump_emit_type_chain(d, decls, fname, lvl);
+ if (!multidim)
+ btf_dump_printf(d, ")");
+ btf_dump_printf(d, "[%u]", a->nelems);
+ return;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *p = btf_params(t);
+ __u16 vlen = btf_vlen(t);
+ int i;
+
+ btf_dump_emit_mods(d, decls);
+ if (decls->cnt) {
+ btf_dump_printf(d, " (");
+ btf_dump_emit_type_chain(d, decls, fname, lvl);
+ btf_dump_printf(d, ")");
+ } else {
+ btf_dump_emit_name(d, fname, last_was_ptr);
+ }
+ btf_dump_printf(d, "(");
+ /*
+ * Clang for BPF target generates func_proto with no
+ * args as a func_proto with a single void arg (e.g.,
+ * `int (*f)(void)` vs just `int (*f)()`). We are
+ * going to pretend there are no args for such case.
+ */
+ if (vlen == 1 && p->type == 0) {
+ btf_dump_printf(d, ")");
+ return;
+ }
+
+ for (i = 0; i < vlen; i++, p++) {
+ if (i > 0)
+ btf_dump_printf(d, ", ");
+
+ /* last arg of type void is vararg */
+ if (i == vlen - 1 && p->type == 0) {
+ btf_dump_printf(d, "...");
+ break;
+ }
+
+ name = btf_name_of(d, p->name_off);
+ btf_dump_emit_type_decl(d, p->type, name, lvl);
+ }
+
+ btf_dump_printf(d, ")");
+ return;
+ }
+ default:
+ pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
+ kind, id);
+ return;
+ }
+
+ last_was_ptr = kind == BTF_KIND_PTR;
+ }
+
+ btf_dump_emit_name(d, fname, last_was_ptr);
+}
+
+/* return number of duplicates (occurrences) of a given name */
+static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map,
+ const char *orig_name)
+{
+ size_t dup_cnt = 0;
+
+ hashmap__find(name_map, orig_name, (void **)&dup_cnt);
+ dup_cnt++;
+ hashmap__set(name_map, orig_name, (void *)dup_cnt, NULL, NULL);
+
+ return dup_cnt;
+}
+
+static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id,
+ struct hashmap *name_map)
+{
+ struct btf_dump_type_aux_state *s = &d->type_states[id];
+ const struct btf_type *t = btf__type_by_id(d->btf, id);
+ const char *orig_name = btf_name_of(d, t->name_off);
+ const char **cached_name = &d->cached_names[id];
+ size_t dup_cnt;
+
+ if (t->name_off == 0)
+ return "";
+
+ if (s->name_resolved)
+ return *cached_name ? *cached_name : orig_name;
+
+ dup_cnt = btf_dump_name_dups(d, name_map, orig_name);
+ if (dup_cnt > 1) {
+ const size_t max_len = 256;
+ char new_name[max_len];
+
+ snprintf(new_name, max_len, "%s___%zu", orig_name, dup_cnt);
+ *cached_name = strdup(new_name);
+ }
+
+ s->name_resolved = 1;
+ return *cached_name ? *cached_name : orig_name;
+}
+
+static const char *btf_dump_type_name(struct btf_dump *d, __u32 id)
+{
+ return btf_dump_resolve_name(d, id, d->type_names);
+}
+
+static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id)
+{
+ return btf_dump_resolve_name(d, id, d->ident_names);
+}
diff --git a/src/contrib/libbpf/bpf/hashmap.c b/src/contrib/libbpf/bpf/hashmap.c
new file mode 100644
index 0000000..6122272
--- /dev/null
+++ b/src/contrib/libbpf/bpf/hashmap.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+
+/* start with 4 buckets */
+#define HASHMAP_MIN_CAP_BITS 2
+
+static void hashmap_add_entry(struct hashmap_entry **pprev,
+ struct hashmap_entry *entry)
+{
+ entry->next = *pprev;
+ *pprev = entry;
+}
+
+static void hashmap_del_entry(struct hashmap_entry **pprev,
+ struct hashmap_entry *entry)
+{
+ *pprev = entry->next;
+ entry->next = NULL;
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn, void *ctx)
+{
+ map->hash_fn = hash_fn;
+ map->equal_fn = equal_fn;
+ map->ctx = ctx;
+
+ map->buckets = NULL;
+ map->cap = 0;
+ map->cap_bits = 0;
+ map->sz = 0;
+}
+
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn,
+ void *ctx)
+{
+ struct hashmap *map = malloc(sizeof(struct hashmap));
+
+ if (!map)
+ return ERR_PTR(-ENOMEM);
+ hashmap__init(map, hash_fn, equal_fn, ctx);
+ return map;
+}
+
+void hashmap__clear(struct hashmap *map)
+{
+ free(map->buckets);
+ map->cap = map->cap_bits = map->sz = 0;
+}
+
+void hashmap__free(struct hashmap *map)
+{
+ if (!map)
+ return;
+
+ hashmap__clear(map);
+ free(map);
+}
+
+size_t hashmap__size(const struct hashmap *map)
+{
+ return map->sz;
+}
+
+size_t hashmap__capacity(const struct hashmap *map)
+{
+ return map->cap;
+}
+
+static bool hashmap_needs_to_grow(struct hashmap *map)
+{
+ /* grow if empty or more than 75% filled */
+ return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap);
+}
+
+static int hashmap_grow(struct hashmap *map)
+{
+ struct hashmap_entry **new_buckets;
+ struct hashmap_entry *cur, *tmp;
+ size_t new_cap_bits, new_cap;
+ size_t h;
+ int bkt;
+
+ new_cap_bits = map->cap_bits + 1;
+ if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
+ new_cap_bits = HASHMAP_MIN_CAP_BITS;
+
+ new_cap = 1UL << new_cap_bits;
+ new_buckets = calloc(new_cap, sizeof(new_buckets[0]));
+ if (!new_buckets)
+ return -ENOMEM;
+
+ hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+ h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits);
+ hashmap_add_entry(&new_buckets[h], cur);
+ }
+
+ map->cap = new_cap;
+ map->cap_bits = new_cap_bits;
+ free(map->buckets);
+ map->buckets = new_buckets;
+
+ return 0;
+}
+
+static bool hashmap_find_entry(const struct hashmap *map,
+ const void *key, size_t hash,
+ struct hashmap_entry ***pprev,
+ struct hashmap_entry **entry)
+{
+ struct hashmap_entry *cur, **prev_ptr;
+
+ if (!map->buckets)
+ return false;
+
+ for (prev_ptr = &map->buckets[hash], cur = *prev_ptr;
+ cur;
+ prev_ptr = &cur->next, cur = cur->next) {
+ if (map->equal_fn(cur->key, key, map->ctx)) {
+ if (pprev)
+ *pprev = prev_ptr;
+ *entry = cur;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+ enum hashmap_insert_strategy strategy,
+ const void **old_key, void **old_value)
+{
+ struct hashmap_entry *entry;
+ size_t h;
+ int err;
+
+ if (old_key)
+ *old_key = NULL;
+ if (old_value)
+ *old_value = NULL;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (strategy != HASHMAP_APPEND &&
+ hashmap_find_entry(map, key, h, NULL, &entry)) {
+ if (old_key)
+ *old_key = entry->key;
+ if (old_value)
+ *old_value = entry->value;
+
+ if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) {
+ entry->key = key;
+ entry->value = value;
+ return 0;
+ } else if (strategy == HASHMAP_ADD) {
+ return -EEXIST;
+ }
+ }
+
+ if (strategy == HASHMAP_UPDATE)
+ return -ENOENT;
+
+ if (hashmap_needs_to_grow(map)) {
+ err = hashmap_grow(map);
+ if (err)
+ return err;
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ }
+
+ entry = malloc(sizeof(struct hashmap_entry));
+ if (!entry)
+ return -ENOMEM;
+
+ entry->key = key;
+ entry->value = value;
+ hashmap_add_entry(&map->buckets[h], entry);
+ map->sz++;
+
+ return 0;
+}
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+{
+ struct hashmap_entry *entry;
+ size_t h;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (!hashmap_find_entry(map, key, h, NULL, &entry))
+ return false;
+
+ if (value)
+ *value = entry->value;
+ return true;
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+ const void **old_key, void **old_value)
+{
+ struct hashmap_entry **pprev, *entry;
+ size_t h;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (!hashmap_find_entry(map, key, h, &pprev, &entry))
+ return false;
+
+ if (old_key)
+ *old_key = entry->key;
+ if (old_value)
+ *old_value = entry->value;
+
+ hashmap_del_entry(pprev, entry);
+ free(entry);
+ map->sz--;
+
+ return true;
+}
+
diff --git a/src/contrib/libbpf/bpf/hashmap.h b/src/contrib/libbpf/bpf/hashmap.h
new file mode 100644
index 0000000..bae8879
--- /dev/null
+++ b/src/contrib/libbpf/bpf/hashmap.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#ifndef __LIBBPF_HASHMAP_H
+#define __LIBBPF_HASHMAP_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#ifdef __GLIBC__
+#include <bits/wordsize.h>
+#else
+#include <bits/reg.h>
+#endif
+#include "libbpf_internal.h"
+
+static inline size_t hash_bits(size_t h, int bits)
+{
+ /* shuffle bits and return requested number of upper bits */
+ return (h * 11400714819323198485llu) >> (__WORDSIZE - bits);
+}
+
+typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
+typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+
+struct hashmap_entry {
+ const void *key;
+ void *value;
+ struct hashmap_entry *next;
+};
+
+struct hashmap {
+ hashmap_hash_fn hash_fn;
+ hashmap_equal_fn equal_fn;
+ void *ctx;
+
+ struct hashmap_entry **buckets;
+ size_t cap;
+ size_t cap_bits;
+ size_t sz;
+};
+
+#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \
+ .hash_fn = (hash_fn), \
+ .equal_fn = (equal_fn), \
+ .ctx = (ctx), \
+ .buckets = NULL, \
+ .cap = 0, \
+ .cap_bits = 0, \
+ .sz = 0, \
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn, void *ctx);
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn,
+ void *ctx);
+void hashmap__clear(struct hashmap *map);
+void hashmap__free(struct hashmap *map);
+
+size_t hashmap__size(const struct hashmap *map);
+size_t hashmap__capacity(const struct hashmap *map);
+
+/*
+ * Hashmap insertion strategy:
+ * - HASHMAP_ADD - only add key/value if key doesn't exist yet;
+ * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise,
+ * update value;
+ * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do
+ * nothing and return -ENOENT;
+ * - HASHMAP_APPEND - always add key/value pair, even if key already exists.
+ * This turns hashmap into a multimap by allowing multiple values to be
+ * associated with the same key. Most useful read API for such hashmap is
+ * hashmap__for_each_key_entry() iteration. If hashmap__find() is still
+ * used, it will return last inserted key/value entry (first in a bucket
+ * chain).
+ */
+enum hashmap_insert_strategy {
+ HASHMAP_ADD,
+ HASHMAP_SET,
+ HASHMAP_UPDATE,
+ HASHMAP_APPEND,
+};
+
+/*
+ * hashmap__insert() adds key/value entry w/ various semantics, depending on
+ * provided strategy value. If a given key/value pair replaced already
+ * existing key/value pair, both old key and old value will be returned
+ * through old_key and old_value to allow calling code do proper memory
+ * management.
+ */
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+ enum hashmap_insert_strategy strategy,
+ const void **old_key, void **old_value);
+
+static inline int hashmap__add(struct hashmap *map,
+ const void *key, void *value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
+}
+
+static inline int hashmap__set(struct hashmap *map,
+ const void *key, void *value,
+ const void **old_key, void **old_value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_SET,
+ old_key, old_value);
+}
+
+static inline int hashmap__update(struct hashmap *map,
+ const void *key, void *value,
+ const void **old_key, void **old_value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_UPDATE,
+ old_key, old_value);
+}
+
+static inline int hashmap__append(struct hashmap *map,
+ const void *key, void *value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+ const void **old_key, void **old_value);
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+
+/*
+ * hashmap__for_each_entry - iterate over all entries in hashmap
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry(map, cur, bkt) \
+ for (bkt = 0; bkt < map->cap; bkt++) \
+ for (cur = map->buckets[bkt]; cur; cur = cur->next)
+
+/*
+ * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe
+ * against removals
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @tmp: struct hashmap_entry * used as a temporary next cursor storage
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \
+ for (bkt = 0; bkt < map->cap; bkt++) \
+ for (cur = map->buckets[bkt]; \
+ cur && ({tmp = cur->next; true; }); \
+ cur = tmp)
+
+/*
+ * hashmap__for_each_key_entry - iterate over entries associated with given key
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @key: key to iterate entries for
+ */
+#define hashmap__for_each_key_entry(map, cur, _key) \
+ for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
+ map->cap_bits); \
+ map->buckets ? map->buckets[bkt] : NULL; }); \
+ cur; \
+ cur = cur->next) \
+ if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \
+ for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
+ map->cap_bits); \
+ cur = map->buckets ? map->buckets[bkt] : NULL; }); \
+ cur && ({ tmp = cur->next; true; }); \
+ cur = tmp) \
+ if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#endif /* __LIBBPF_HASHMAP_H */
diff --git a/src/contrib/libbpf/bpf/libbpf.c b/src/contrib/libbpf/bpf/libbpf.c
new file mode 100644
index 0000000..29d8d03
--- /dev/null
+++ b/src/contrib/libbpf/bpf/libbpf.c
@@ -0,0 +1,6581 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Common eBPF ELF object loading operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ * Copyright (C) 2017 Nicira, Inc.
+ * Copyright (C) 2019 Isovalent, Inc.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <libgen.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <endian.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <asm/unistd.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/filter.h>
+#include <linux/list.h>
+#include <linux/limits.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include <linux/version.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <sys/utsname.h>
+#include <libelf.h>
+#include <gelf.h>
+
+#include "libbpf.h"
+#include "bpf.h"
+#include "btf.h"
+#include "str_error.h"
+#include "libbpf_internal.h"
+#include "hashmap.h"
+
+#ifndef EM_BPF
+#define EM_BPF 247
+#endif
+
+#ifndef BPF_FS_MAGIC
+#define BPF_FS_MAGIC 0xcafe4a11
+#endif
+
+/* vsprintf() in __base_pr() uses nonliteral format string. It may break
+ * compilation if user enables corresponding warning. Disable it explicitly.
+ */
+#pragma GCC diagnostic ignored "-Wformat-nonliteral"
+
+#define __printf(a, b) __attribute__((format(printf, a, b)))
+
+static int __base_pr(enum libbpf_print_level level, const char *format,
+ va_list args)
+{
+ if (level == LIBBPF_DEBUG)
+ return 0;
+
+ return vfprintf(stderr, format, args);
+}
+
+static libbpf_print_fn_t __libbpf_pr = __base_pr;
+
+libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
+{
+ libbpf_print_fn_t old_print_fn = __libbpf_pr;
+
+ __libbpf_pr = fn;
+ return old_print_fn;
+}
+
+__printf(2, 3)
+void libbpf_print(enum libbpf_print_level level, const char *format, ...)
+{
+ va_list args;
+
+ if (!__libbpf_pr)
+ return;
+
+ va_start(args, format);
+ __libbpf_pr(level, format, args);
+ va_end(args);
+}
+
+#define STRERR_BUFSIZE 128
+
+#define CHECK_ERR(action, err, out) do { \
+ err = action; \
+ if (err) \
+ goto out; \
+} while (0)
+
+
+/* Copied from tools/perf/util/util.h */
+#ifndef zfree
+# define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
+#endif
+
+#ifndef zclose
+# define zclose(fd) ({ \
+ int ___err = 0; \
+ if ((fd) >= 0) \
+ ___err = close((fd)); \
+ fd = -1; \
+ ___err; })
+#endif
+
+#ifdef HAVE_LIBELF_MMAP_SUPPORT
+# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP
+#else
+# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
+#endif
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+struct bpf_capabilities {
+ /* v4.14: kernel support for program & map names. */
+ __u32 name:1;
+ /* v5.2: kernel support for global data sections. */
+ __u32 global_data:1;
+ /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
+ __u32 btf_func:1;
+ /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
+ __u32 btf_datasec:1;
+ /* BPF_F_MMAPABLE is supported for arrays */
+ __u32 array_mmap:1;
+};
+
+/*
+ * bpf_prog should be a better name but it has been used in
+ * linux/filter.h.
+ */
+struct bpf_program {
+ /* Index in elf obj file, for relocation use. */
+ int idx;
+ char *name;
+ int prog_ifindex;
+ char *section_name;
+ /* section_name with / replaced by _; makes recursive pinning
+ * in bpf_object__pin_programs easier
+ */
+ char *pin_name;
+ struct bpf_insn *insns;
+ size_t insns_cnt, main_prog_cnt;
+ enum bpf_prog_type type;
+
+ struct reloc_desc {
+ enum {
+ RELO_LD64,
+ RELO_CALL,
+ RELO_DATA,
+ } type;
+ int insn_idx;
+ int map_idx;
+ int sym_off;
+ } *reloc_desc;
+ int nr_reloc;
+ int log_level;
+
+ struct {
+ int nr;
+ int *fds;
+ } instances;
+ bpf_program_prep_t preprocessor;
+
+ struct bpf_object *obj;
+ void *priv;
+ bpf_program_clear_priv_t clear_priv;
+
+ enum bpf_attach_type expected_attach_type;
+ __u32 attach_btf_id;
+ __u32 attach_prog_fd;
+ void *func_info;
+ __u32 func_info_rec_size;
+ __u32 func_info_cnt;
+
+ struct bpf_capabilities *caps;
+
+ void *line_info;
+ __u32 line_info_rec_size;
+ __u32 line_info_cnt;
+ __u32 prog_flags;
+};
+
+enum libbpf_map_type {
+ LIBBPF_MAP_UNSPEC,
+ LIBBPF_MAP_DATA,
+ LIBBPF_MAP_BSS,
+ LIBBPF_MAP_RODATA,
+};
+
+static const char * const libbpf_type_to_btf_name[] = {
+ [LIBBPF_MAP_DATA] = ".data",
+ [LIBBPF_MAP_BSS] = ".bss",
+ [LIBBPF_MAP_RODATA] = ".rodata",
+};
+
+struct bpf_map {
+ int fd;
+ char *name;
+ int sec_idx;
+ size_t sec_offset;
+ int map_ifindex;
+ int inner_map_fd;
+ struct bpf_map_def def;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
+ void *priv;
+ bpf_map_clear_priv_t clear_priv;
+ enum libbpf_map_type libbpf_type;
+ char *pin_path;
+ bool pinned;
+ bool reused;
+};
+
+struct bpf_secdata {
+ void *rodata;
+ void *data;
+};
+
+static LIST_HEAD(bpf_objects_list);
+
+struct bpf_object {
+ char name[BPF_OBJ_NAME_LEN];
+ char license[64];
+ __u32 kern_version;
+
+ struct bpf_program *programs;
+ size_t nr_programs;
+ struct bpf_map *maps;
+ size_t nr_maps;
+ size_t maps_cap;
+ struct bpf_secdata sections;
+
+ bool loaded;
+ bool has_pseudo_calls;
+ bool relaxed_core_relocs;
+
+ /*
+ * Information when doing elf related work. Only valid if fd
+ * is valid.
+ */
+ struct {
+ int fd;
+ const void *obj_buf;
+ size_t obj_buf_sz;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ Elf_Data *symbols;
+ Elf_Data *data;
+ Elf_Data *rodata;
+ Elf_Data *bss;
+ size_t strtabidx;
+ struct {
+ GElf_Shdr shdr;
+ Elf_Data *data;
+ } *reloc_sects;
+ int nr_reloc_sects;
+ int maps_shndx;
+ int btf_maps_shndx;
+ int text_shndx;
+ int data_shndx;
+ int rodata_shndx;
+ int bss_shndx;
+ } efile;
+ /*
+ * All loaded bpf_object is linked in a list, which is
+ * hidden to caller. bpf_objects__<func> handlers deal with
+ * all objects.
+ */
+ struct list_head list;
+
+ struct btf *btf;
+ struct btf_ext *btf_ext;
+
+ void *priv;
+ bpf_object_clear_priv_t clear_priv;
+
+ struct bpf_capabilities caps;
+
+ char path[];
+};
+#define obj_elf_valid(o) ((o)->efile.elf)
+
+void bpf_program__unload(struct bpf_program *prog)
+{
+ int i;
+
+ if (!prog)
+ return;
+
+ /*
+ * If the object is opened but the program was never loaded,
+ * it is possible that prog->instances.nr == -1.
+ */
+ if (prog->instances.nr > 0) {
+ for (i = 0; i < prog->instances.nr; i++)
+ zclose(prog->instances.fds[i]);
+ } else if (prog->instances.nr != -1) {
+ pr_warn("Internal error: instances.nr is %d\n",
+ prog->instances.nr);
+ }
+
+ prog->instances.nr = -1;
+ zfree(&prog->instances.fds);
+
+ zfree(&prog->func_info);
+ zfree(&prog->line_info);
+}
+
+static void bpf_program__exit(struct bpf_program *prog)
+{
+ if (!prog)
+ return;
+
+ if (prog->clear_priv)
+ prog->clear_priv(prog, prog->priv);
+
+ prog->priv = NULL;
+ prog->clear_priv = NULL;
+
+ bpf_program__unload(prog);
+ zfree(&prog->name);
+ zfree(&prog->section_name);
+ zfree(&prog->pin_name);
+ zfree(&prog->insns);
+ zfree(&prog->reloc_desc);
+
+ prog->nr_reloc = 0;
+ prog->insns_cnt = 0;
+ prog->idx = -1;
+}
+
+static char *__bpf_program__pin_name(struct bpf_program *prog)
+{
+ char *name, *p;
+
+ name = p = strdup(prog->section_name);
+ while ((p = strchr(p, '/')))
+ *p = '_';
+
+ return name;
+}
+
+static int
+bpf_program__init(void *data, size_t size, char *section_name, int idx,
+ struct bpf_program *prog)
+{
+ const size_t bpf_insn_sz = sizeof(struct bpf_insn);
+
+ if (size == 0 || size % bpf_insn_sz) {
+ pr_warn("corrupted section '%s', size: %zu\n",
+ section_name, size);
+ return -EINVAL;
+ }
+
+ memset(prog, 0, sizeof(*prog));
+
+ prog->section_name = strdup(section_name);
+ if (!prog->section_name) {
+ pr_warn("failed to alloc name for prog under section(%d) %s\n",
+ idx, section_name);
+ goto errout;
+ }
+
+ prog->pin_name = __bpf_program__pin_name(prog);
+ if (!prog->pin_name) {
+ pr_warn("failed to alloc pin name for prog under section(%d) %s\n",
+ idx, section_name);
+ goto errout;
+ }
+
+ prog->insns = malloc(size);
+ if (!prog->insns) {
+ pr_warn("failed to alloc insns for prog under section %s\n",
+ section_name);
+ goto errout;
+ }
+ prog->insns_cnt = size / bpf_insn_sz;
+ memcpy(prog->insns, data, size);
+ prog->idx = idx;
+ prog->instances.fds = NULL;
+ prog->instances.nr = -1;
+ prog->type = BPF_PROG_TYPE_UNSPEC;
+
+ return 0;
+errout:
+ bpf_program__exit(prog);
+ return -ENOMEM;
+}
+
+static int
+bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
+ char *section_name, int idx)
+{
+ struct bpf_program prog, *progs;
+ int nr_progs, err;
+
+ err = bpf_program__init(data, size, section_name, idx, &prog);
+ if (err)
+ return err;
+
+ prog.caps = &obj->caps;
+ progs = obj->programs;
+ nr_progs = obj->nr_programs;
+
+ progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
+ if (!progs) {
+ /*
+ * In this case the original obj->programs
+ * is still valid, so don't need special treat for
+ * bpf_close_object().
+ */
+ pr_warn("failed to alloc a new program under section '%s'\n",
+ section_name);
+ bpf_program__exit(&prog);
+ return -ENOMEM;
+ }
+
+ pr_debug("found program %s\n", prog.section_name);
+ obj->programs = progs;
+ obj->nr_programs = nr_progs + 1;
+ prog.obj = obj;
+ progs[nr_progs] = prog;
+ return 0;
+}
+
+static int
+bpf_object__init_prog_names(struct bpf_object *obj)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ struct bpf_program *prog;
+ size_t pi, si;
+
+ for (pi = 0; pi < obj->nr_programs; pi++) {
+ const char *name = NULL;
+
+ prog = &obj->programs[pi];
+
+ for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
+ si++) {
+ GElf_Sym sym;
+
+ if (!gelf_getsym(symbols, si, &sym))
+ continue;
+ if (sym.st_shndx != prog->idx)
+ continue;
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
+ continue;
+
+ name = elf_strptr(obj->efile.elf,
+ obj->efile.strtabidx,
+ sym.st_name);
+ if (!name) {
+ pr_warn("failed to get sym name string for prog %s\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__LIBELF;
+ }
+ }
+
+ if (!name && prog->idx == obj->efile.text_shndx)
+ name = ".text";
+
+ if (!name) {
+ pr_warn("failed to find sym for prog %s\n",
+ prog->section_name);
+ return -EINVAL;
+ }
+
+ prog->name = strdup(name);
+ if (!prog->name) {
+ pr_warn("failed to allocate memory for prog sym %s\n",
+ name);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static __u32 get_kernel_version(void)
+{
+ __u32 major, minor, patch;
+ struct utsname info;
+
+ uname(&info);
+ if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
+ return 0;
+ return KERNEL_VERSION(major, minor, patch);
+}
+
+static struct bpf_object *bpf_object__new(const char *path,
+ const void *obj_buf,
+ size_t obj_buf_sz,
+ const char *obj_name)
+{
+ struct bpf_object *obj;
+ char *end;
+
+ obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
+ if (!obj) {
+ pr_warn("alloc memory failed for %s\n", path);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ strcpy(obj->path, path);
+ if (obj_name) {
+ strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
+ obj->name[sizeof(obj->name) - 1] = 0;
+ } else {
+ /* Using basename() GNU version which doesn't modify arg. */
+ strncpy(obj->name, basename((void *)path),
+ sizeof(obj->name) - 1);
+ end = strchr(obj->name, '.');
+ if (end)
+ *end = 0;
+ }
+
+ obj->efile.fd = -1;
+ /*
+ * Caller of this function should also call
+ * bpf_object__elf_finish() after data collection to return
+ * obj_buf to user. If not, we should duplicate the buffer to
+ * avoid user freeing them before elf finish.
+ */
+ obj->efile.obj_buf = obj_buf;
+ obj->efile.obj_buf_sz = obj_buf_sz;
+ obj->efile.maps_shndx = -1;
+ obj->efile.btf_maps_shndx = -1;
+ obj->efile.data_shndx = -1;
+ obj->efile.rodata_shndx = -1;
+ obj->efile.bss_shndx = -1;
+
+ obj->kern_version = get_kernel_version();
+ obj->loaded = false;
+
+ INIT_LIST_HEAD(&obj->list);
+ list_add(&obj->list, &bpf_objects_list);
+ return obj;
+}
+
+static void bpf_object__elf_finish(struct bpf_object *obj)
+{
+ if (!obj_elf_valid(obj))
+ return;
+
+ if (obj->efile.elf) {
+ elf_end(obj->efile.elf);
+ obj->efile.elf = NULL;
+ }
+ obj->efile.symbols = NULL;
+ obj->efile.data = NULL;
+ obj->efile.rodata = NULL;
+ obj->efile.bss = NULL;
+
+ zfree(&obj->efile.reloc_sects);
+ obj->efile.nr_reloc_sects = 0;
+ zclose(obj->efile.fd);
+ obj->efile.obj_buf = NULL;
+ obj->efile.obj_buf_sz = 0;
+}
+
+static int bpf_object__elf_init(struct bpf_object *obj)
+{
+ int err = 0;
+ GElf_Ehdr *ep;
+
+ if (obj_elf_valid(obj)) {
+ pr_warn("elf init: internal error\n");
+ return -LIBBPF_ERRNO__LIBELF;
+ }
+
+ if (obj->efile.obj_buf_sz > 0) {
+ /*
+ * obj_buf should have been validated by
+ * bpf_object__open_buffer().
+ */
+ obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,
+ obj->efile.obj_buf_sz);
+ } else {
+ obj->efile.fd = open(obj->path, O_RDONLY);
+ if (obj->efile.fd < 0) {
+ char errmsg[STRERR_BUFSIZE], *cp;
+
+ err = -errno;
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+ pr_warn("failed to open %s: %s\n", obj->path, cp);
+ return err;
+ }
+
+ obj->efile.elf = elf_begin(obj->efile.fd,
+ LIBBPF_ELF_C_READ_MMAP, NULL);
+ }
+
+ if (!obj->efile.elf) {
+ pr_warn("failed to open %s as ELF file\n", obj->path);
+ err = -LIBBPF_ERRNO__LIBELF;
+ goto errout;
+ }
+
+ if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
+ pr_warn("failed to get EHDR from %s\n", obj->path);
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto errout;
+ }
+ ep = &obj->efile.ehdr;
+
+ /* Old LLVM set e_machine to EM_NONE */
+ if (ep->e_type != ET_REL ||
+ (ep->e_machine && ep->e_machine != EM_BPF)) {
+ pr_warn("%s is not an eBPF object file\n", obj->path);
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto errout;
+ }
+
+ return 0;
+errout:
+ bpf_object__elf_finish(obj);
+ return err;
+}
+
+static int bpf_object__check_endianness(struct bpf_object *obj)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
+ return 0;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
+ return 0;
+#else
+# error "Unrecognized __BYTE_ORDER__"
+#endif
+ pr_warn("endianness mismatch.\n");
+ return -LIBBPF_ERRNO__ENDIAN;
+}
+
+static int
+bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
+{
+ memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
+ pr_debug("license of %s is %s\n", obj->path, obj->license);
+ return 0;
+}
+
+static int
+bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
+{
+ __u32 kver;
+
+ if (size != sizeof(kver)) {
+ pr_warn("invalid kver section in %s\n", obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ memcpy(&kver, data, sizeof(kver));
+ obj->kern_version = kver;
+ pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
+ return 0;
+}
+
+static int compare_bpf_map(const void *_a, const void *_b)
+{
+ const struct bpf_map *a = _a;
+ const struct bpf_map *b = _b;
+
+ if (a->sec_idx != b->sec_idx)
+ return a->sec_idx - b->sec_idx;
+ return a->sec_offset - b->sec_offset;
+}
+
+static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
+{
+ if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+ type == BPF_MAP_TYPE_HASH_OF_MAPS)
+ return true;
+ return false;
+}
+
+static int bpf_object_search_section_size(const struct bpf_object *obj,
+ const char *name, size_t *d_size)
+{
+ const GElf_Ehdr *ep = &obj->efile.ehdr;
+ Elf *elf = obj->efile.elf;
+ Elf_Scn *scn = NULL;
+ int idx = 0;
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ const char *sec_name;
+ Elf_Data *data;
+ GElf_Shdr sh;
+
+ idx++;
+ if (gelf_getshdr(scn, &sh) != &sh) {
+ pr_warn("failed to get section(%d) header from %s\n",
+ idx, obj->path);
+ return -EIO;
+ }
+
+ sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
+ if (!sec_name) {
+ pr_warn("failed to get section(%d) name from %s\n",
+ idx, obj->path);
+ return -EIO;
+ }
+
+ if (strcmp(name, sec_name))
+ continue;
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ pr_warn("failed to get section(%d) data from %s(%s)\n",
+ idx, name, obj->path);
+ return -EIO;
+ }
+
+ *d_size = data->d_size;
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
+ __u32 *size)
+{
+ int ret = -ENOENT;
+ size_t d_size;
+
+ *size = 0;
+ if (!name) {
+ return -EINVAL;
+ } else if (!strcmp(name, ".data")) {
+ if (obj->efile.data)
+ *size = obj->efile.data->d_size;
+ } else if (!strcmp(name, ".bss")) {
+ if (obj->efile.bss)
+ *size = obj->efile.bss->d_size;
+ } else if (!strcmp(name, ".rodata")) {
+ if (obj->efile.rodata)
+ *size = obj->efile.rodata->d_size;
+ } else {
+ ret = bpf_object_search_section_size(obj, name, &d_size);
+ if (!ret)
+ *size = d_size;
+ }
+
+ return *size ? 0 : ret;
+}
+
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
+ __u32 *off)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ const char *sname;
+ size_t si;
+
+ if (!name || !off)
+ return -EINVAL;
+
+ for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
+ GElf_Sym sym;
+
+ if (!gelf_getsym(symbols, si, &sym))
+ continue;
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
+ GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
+ continue;
+
+ sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name);
+ if (!sname) {
+ pr_warn("failed to get sym name string for var %s\n",
+ name);
+ return -EIO;
+ }
+ if (strcmp(name, sname) == 0) {
+ *off = sym.st_value;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
+{
+ struct bpf_map *new_maps;
+ size_t new_cap;
+ int i;
+
+ if (obj->nr_maps < obj->maps_cap)
+ return &obj->maps[obj->nr_maps++];
+
+ new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
+ new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
+ if (!new_maps) {
+ pr_warn("alloc maps for object failed\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ obj->maps_cap = new_cap;
+ obj->maps = new_maps;
+
+ /* zero out new maps */
+ memset(obj->maps + obj->nr_maps, 0,
+ (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
+ /*
+ * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
+ * when failure (zclose won't close negative fd)).
+ */
+ for (i = obj->nr_maps; i < obj->maps_cap; i++) {
+ obj->maps[i].fd = -1;
+ obj->maps[i].inner_map_fd = -1;
+ }
+
+ return &obj->maps[obj->nr_maps++];
+}
+
+static int
+bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
+ int sec_idx, Elf_Data *data, void **data_buff)
+{
+ char map_name[BPF_OBJ_NAME_LEN];
+ struct bpf_map_def *def;
+ struct bpf_map *map;
+
+ map = bpf_object__add_map(obj);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ map->libbpf_type = type;
+ map->sec_idx = sec_idx;
+ map->sec_offset = 0;
+ snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name,
+ libbpf_type_to_btf_name[type]);
+ map->name = strdup(map_name);
+ if (!map->name) {
+ pr_warn("failed to alloc map name\n");
+ return -ENOMEM;
+ }
+
+ def = &map->def;
+ def->type = BPF_MAP_TYPE_ARRAY;
+ def->key_size = sizeof(int);
+ def->value_size = data->d_size;
+ def->max_entries = 1;
+ def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0;
+ if (obj->caps.array_mmap)
+ def->map_flags |= BPF_F_MMAPABLE;
+
+ pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
+ map_name, map->sec_idx, map->sec_offset, def->map_flags);
+
+ if (data_buff) {
+ *data_buff = malloc(data->d_size);
+ if (!*data_buff) {
+ zfree(&map->name);
+ pr_warn("failed to alloc map content buffer\n");
+ return -ENOMEM;
+ }
+ memcpy(*data_buff, data->d_buf, data->d_size);
+ }
+
+ pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
+ return 0;
+}
+
+static int bpf_object__init_global_data_maps(struct bpf_object *obj)
+{
+ int err;
+
+ if (!obj->caps.global_data)
+ return 0;
+ /*
+ * Populate obj->maps with libbpf internal maps.
+ */
+ if (obj->efile.data_shndx >= 0) {
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
+ obj->efile.data_shndx,
+ obj->efile.data,
+ &obj->sections.data);
+ if (err)
+ return err;
+ }
+ if (obj->efile.rodata_shndx >= 0) {
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
+ obj->efile.rodata_shndx,
+ obj->efile.rodata,
+ &obj->sections.rodata);
+ if (err)
+ return err;
+ }
+ if (obj->efile.bss_shndx >= 0) {
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
+ obj->efile.bss_shndx,
+ obj->efile.bss, NULL);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ int i, map_def_sz = 0, nr_maps = 0, nr_syms;
+ Elf_Data *data = NULL;
+ Elf_Scn *scn;
+
+ if (obj->efile.maps_shndx < 0)
+ return 0;
+
+ if (!symbols)
+ return -EINVAL;
+
+ scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
+ if (scn)
+ data = elf_getdata(scn, NULL);
+ if (!scn || !data) {
+ pr_warn("failed to get Elf_Data from map section %d\n",
+ obj->efile.maps_shndx);
+ return -EINVAL;
+ }
+
+ /*
+ * Count number of maps. Each map has a name.
+ * Array of maps is not supported: only the first element is
+ * considered.
+ *
+ * TODO: Detect array of map and report error.
+ */
+ nr_syms = symbols->d_size / sizeof(GElf_Sym);
+ for (i = 0; i < nr_syms; i++) {
+ GElf_Sym sym;
+
+ if (!gelf_getsym(symbols, i, &sym))
+ continue;
+ if (sym.st_shndx != obj->efile.maps_shndx)
+ continue;
+ nr_maps++;
+ }
+ /* Assume equally sized map definitions */
+ pr_debug("maps in %s: %d maps in %zd bytes\n",
+ obj->path, nr_maps, data->d_size);
+
+ if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
+ pr_warn("unable to determine map definition size section %s, %d maps in %zd bytes\n",
+ obj->path, nr_maps, data->d_size);
+ return -EINVAL;
+ }
+ map_def_sz = data->d_size / nr_maps;
+
+ /* Fill obj->maps using data in "maps" section. */
+ for (i = 0; i < nr_syms; i++) {
+ GElf_Sym sym;
+ const char *map_name;
+ struct bpf_map_def *def;
+ struct bpf_map *map;
+
+ if (!gelf_getsym(symbols, i, &sym))
+ continue;
+ if (sym.st_shndx != obj->efile.maps_shndx)
+ continue;
+
+ map = bpf_object__add_map(obj);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name);
+ if (!map_name) {
+ pr_warn("failed to get map #%d name sym string for obj %s\n",
+ i, obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ map->libbpf_type = LIBBPF_MAP_UNSPEC;
+ map->sec_idx = sym.st_shndx;
+ map->sec_offset = sym.st_value;
+ pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
+ map_name, map->sec_idx, map->sec_offset);
+ if (sym.st_value + map_def_sz > data->d_size) {
+ pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
+ obj->path, map_name);
+ return -EINVAL;
+ }
+
+ map->name = strdup(map_name);
+ if (!map->name) {
+ pr_warn("failed to alloc map name\n");
+ return -ENOMEM;
+ }
+ pr_debug("map %d is \"%s\"\n", i, map->name);
+ def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
+ /*
+ * If the definition of the map in the object file fits in
+ * bpf_map_def, copy it. Any extra fields in our version
+ * of bpf_map_def will default to zero as a result of the
+ * calloc above.
+ */
+ if (map_def_sz <= sizeof(struct bpf_map_def)) {
+ memcpy(&map->def, def, map_def_sz);
+ } else {
+ /*
+ * Here the map structure being read is bigger than what
+ * we expect, truncate if the excess bits are all zero.
+ * If they are not zero, reject this map as
+ * incompatible.
+ */
+ char *b;
+
+ for (b = ((char *)def) + sizeof(struct bpf_map_def);
+ b < ((char *)def) + map_def_sz; b++) {
+ if (*b != 0) {
+ pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
+ obj->path, map_name);
+ if (strict)
+ return -EINVAL;
+ }
+ }
+ memcpy(&map->def, def, sizeof(struct bpf_map_def));
+ }
+ }
+ return 0;
+}
+
+static const struct btf_type *
+skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
+{
+ const struct btf_type *t = btf__type_by_id(btf, id);
+
+ if (res_id)
+ *res_id = id;
+
+ while (btf_is_mod(t) || btf_is_typedef(t)) {
+ if (res_id)
+ *res_id = t->type;
+ t = btf__type_by_id(btf, t->type);
+ }
+
+ return t;
+}
+
+/*
+ * Fetch integer attribute of BTF map definition. Such attributes are
+ * represented using a pointer to an array, in which dimensionality of array
+ * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
+ * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
+ * type definition, while using only sizeof(void *) space in ELF data section.
+ */
+static bool get_map_field_int(const char *map_name, const struct btf *btf,
+ const struct btf_type *def,
+ const struct btf_member *m, __u32 *res)
+{
+ const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
+ const char *name = btf__name_by_offset(btf, m->name_off);
+ const struct btf_array *arr_info;
+ const struct btf_type *arr_t;
+
+ if (!btf_is_ptr(t)) {
+ pr_warn("map '%s': attr '%s': expected PTR, got %u.\n",
+ map_name, name, btf_kind(t));
+ return false;
+ }
+
+ arr_t = btf__type_by_id(btf, t->type);
+ if (!arr_t) {
+ pr_warn("map '%s': attr '%s': type [%u] not found.\n",
+ map_name, name, t->type);
+ return false;
+ }
+ if (!btf_is_array(arr_t)) {
+ pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n",
+ map_name, name, btf_kind(arr_t));
+ return false;
+ }
+ arr_info = btf_array(arr_t);
+ *res = arr_info->nelems;
+ return true;
+}
+
+static int build_map_pin_path(struct bpf_map *map, const char *path)
+{
+ char buf[PATH_MAX];
+ int err, len;
+
+ if (!path)
+ path = "/sys/fs/bpf";
+
+ len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
+ if (len < 0)
+ return -EINVAL;
+ else if (len >= PATH_MAX)
+ return -ENAMETOOLONG;
+
+ err = bpf_map__set_pin_path(map, buf);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int bpf_object__init_user_btf_map(struct bpf_object *obj,
+ const struct btf_type *sec,
+ int var_idx, int sec_idx,
+ const Elf_Data *data, bool strict,
+ const char *pin_root_path)
+{
+ const struct btf_type *var, *def, *t;
+ const struct btf_var_secinfo *vi;
+ const struct btf_var *var_extra;
+ const struct btf_member *m;
+ const char *map_name;
+ struct bpf_map *map;
+ int vlen, i;
+
+ vi = btf_var_secinfos(sec) + var_idx;
+ var = btf__type_by_id(obj->btf, vi->type);
+ var_extra = btf_var(var);
+ map_name = btf__name_by_offset(obj->btf, var->name_off);
+ vlen = btf_vlen(var);
+
+ if (map_name == NULL || map_name[0] == '\0') {
+ pr_warn("map #%d: empty name.\n", var_idx);
+ return -EINVAL;
+ }
+ if ((__u64)vi->offset + vi->size > data->d_size) {
+ pr_warn("map '%s' BTF data is corrupted.\n", map_name);
+ return -EINVAL;
+ }
+ if (!btf_is_var(var)) {
+ pr_warn("map '%s': unexpected var kind %u.\n",
+ map_name, btf_kind(var));
+ return -EINVAL;
+ }
+ if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
+ var_extra->linkage != BTF_VAR_STATIC) {
+ pr_warn("map '%s': unsupported var linkage %u.\n",
+ map_name, var_extra->linkage);
+ return -EOPNOTSUPP;
+ }
+
+ def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
+ if (!btf_is_struct(def)) {
+ pr_warn("map '%s': unexpected def kind %u.\n",
+ map_name, btf_kind(var));
+ return -EINVAL;
+ }
+ if (def->size > vi->size) {
+ pr_warn("map '%s': invalid def size.\n", map_name);
+ return -EINVAL;
+ }
+
+ map = bpf_object__add_map(obj);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+ map->name = strdup(map_name);
+ if (!map->name) {
+ pr_warn("map '%s': failed to alloc map name.\n", map_name);
+ return -ENOMEM;
+ }
+ map->libbpf_type = LIBBPF_MAP_UNSPEC;
+ map->def.type = BPF_MAP_TYPE_UNSPEC;
+ map->sec_idx = sec_idx;
+ map->sec_offset = vi->offset;
+ pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
+ map_name, map->sec_idx, map->sec_offset);
+
+ vlen = btf_vlen(def);
+ m = btf_members(def);
+ for (i = 0; i < vlen; i++, m++) {
+ const char *name = btf__name_by_offset(obj->btf, m->name_off);
+
+ if (!name) {
+ pr_warn("map '%s': invalid field #%d.\n", map_name, i);
+ return -EINVAL;
+ }
+ if (strcmp(name, "type") == 0) {
+ if (!get_map_field_int(map_name, obj->btf, def, m,
+ &map->def.type))
+ return -EINVAL;
+ pr_debug("map '%s': found type = %u.\n",
+ map_name, map->def.type);
+ } else if (strcmp(name, "max_entries") == 0) {
+ if (!get_map_field_int(map_name, obj->btf, def, m,
+ &map->def.max_entries))
+ return -EINVAL;
+ pr_debug("map '%s': found max_entries = %u.\n",
+ map_name, map->def.max_entries);
+ } else if (strcmp(name, "map_flags") == 0) {
+ if (!get_map_field_int(map_name, obj->btf, def, m,
+ &map->def.map_flags))
+ return -EINVAL;
+ pr_debug("map '%s': found map_flags = %u.\n",
+ map_name, map->def.map_flags);
+ } else if (strcmp(name, "key_size") == 0) {
+ __u32 sz;
+
+ if (!get_map_field_int(map_name, obj->btf, def, m,
+ &sz))
+ return -EINVAL;
+ pr_debug("map '%s': found key_size = %u.\n",
+ map_name, sz);
+ if (map->def.key_size && map->def.key_size != sz) {
+ pr_warn("map '%s': conflicting key size %u != %u.\n",
+ map_name, map->def.key_size, sz);
+ return -EINVAL;
+ }
+ map->def.key_size = sz;
+ } else if (strcmp(name, "key") == 0) {
+ __s64 sz;
+
+ t = btf__type_by_id(obj->btf, m->type);
+ if (!t) {
+ pr_warn("map '%s': key type [%d] not found.\n",
+ map_name, m->type);
+ return -EINVAL;
+ }
+ if (!btf_is_ptr(t)) {
+ pr_warn("map '%s': key spec is not PTR: %u.\n",
+ map_name, btf_kind(t));
+ return -EINVAL;
+ }
+ sz = btf__resolve_size(obj->btf, t->type);
+ if (sz < 0) {
+ pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n",
+ map_name, t->type, sz);
+ return sz;
+ }
+ pr_debug("map '%s': found key [%u], sz = %lld.\n",
+ map_name, t->type, sz);
+ if (map->def.key_size && map->def.key_size != sz) {
+ pr_warn("map '%s': conflicting key size %u != %lld.\n",
+ map_name, map->def.key_size, sz);
+ return -EINVAL;
+ }
+ map->def.key_size = sz;
+ map->btf_key_type_id = t->type;
+ } else if (strcmp(name, "value_size") == 0) {
+ __u32 sz;
+
+ if (!get_map_field_int(map_name, obj->btf, def, m,
+ &sz))
+ return -EINVAL;
+ pr_debug("map '%s': found value_size = %u.\n",
+ map_name, sz);
+ if (map->def.value_size && map->def.value_size != sz) {
+ pr_warn("map '%s': conflicting value size %u != %u.\n",
+ map_name, map->def.value_size, sz);
+ return -EINVAL;
+ }
+ map->def.value_size = sz;
+ } else if (strcmp(name, "value") == 0) {
+ __s64 sz;
+
+ t = btf__type_by_id(obj->btf, m->type);
+ if (!t) {
+ pr_warn("map '%s': value type [%d] not found.\n",
+ map_name, m->type);
+ return -EINVAL;
+ }
+ if (!btf_is_ptr(t)) {
+ pr_warn("map '%s': value spec is not PTR: %u.\n",
+ map_name, btf_kind(t));
+ return -EINVAL;
+ }
+ sz = btf__resolve_size(obj->btf, t->type);
+ if (sz < 0) {
+ pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n",
+ map_name, t->type, sz);
+ return sz;
+ }
+ pr_debug("map '%s': found value [%u], sz = %lld.\n",
+ map_name, t->type, sz);
+ if (map->def.value_size && map->def.value_size != sz) {
+ pr_warn("map '%s': conflicting value size %u != %lld.\n",
+ map_name, map->def.value_size, sz);
+ return -EINVAL;
+ }
+ map->def.value_size = sz;
+ map->btf_value_type_id = t->type;
+ } else if (strcmp(name, "pinning") == 0) {
+ __u32 val;
+ int err;
+
+ if (!get_map_field_int(map_name, obj->btf, def, m,
+ &val))
+ return -EINVAL;
+ pr_debug("map '%s': found pinning = %u.\n",
+ map_name, val);
+
+ if (val != LIBBPF_PIN_NONE &&
+ val != LIBBPF_PIN_BY_NAME) {
+ pr_warn("map '%s': invalid pinning value %u.\n",
+ map_name, val);
+ return -EINVAL;
+ }
+ if (val == LIBBPF_PIN_BY_NAME) {
+ err = build_map_pin_path(map, pin_root_path);
+ if (err) {
+ pr_warn("map '%s': couldn't build pin path.\n",
+ map_name);
+ return err;
+ }
+ }
+ } else {
+ if (strict) {
+ pr_warn("map '%s': unknown field '%s'.\n",
+ map_name, name);
+ return -ENOTSUP;
+ }
+ pr_debug("map '%s': ignoring unknown field '%s'.\n",
+ map_name, name);
+ }
+ }
+
+ if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
+ pr_warn("map '%s': map type isn't specified.\n", map_name);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
+ const char *pin_root_path)
+{
+ const struct btf_type *sec = NULL;
+ int nr_types, i, vlen, err;
+ const struct btf_type *t;
+ const char *name;
+ Elf_Data *data;
+ Elf_Scn *scn;
+
+ if (obj->efile.btf_maps_shndx < 0)
+ return 0;
+
+ scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx);
+ if (scn)
+ data = elf_getdata(scn, NULL);
+ if (!scn || !data) {
+ pr_warn("failed to get Elf_Data from map section %d (%s)\n",
+ obj->efile.maps_shndx, MAPS_ELF_SEC);
+ return -EINVAL;
+ }
+
+ nr_types = btf__get_nr_types(obj->btf);
+ for (i = 1; i <= nr_types; i++) {
+ t = btf__type_by_id(obj->btf, i);
+ if (!btf_is_datasec(t))
+ continue;
+ name = btf__name_by_offset(obj->btf, t->name_off);
+ if (strcmp(name, MAPS_ELF_SEC) == 0) {
+ sec = t;
+ break;
+ }
+ }
+
+ if (!sec) {
+ pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
+ return -ENOENT;
+ }
+
+ vlen = btf_vlen(sec);
+ for (i = 0; i < vlen; i++) {
+ err = bpf_object__init_user_btf_map(obj, sec, i,
+ obj->efile.btf_maps_shndx,
+ data, strict,
+ pin_root_path);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps,
+ const char *pin_root_path)
+{
+ bool strict = !relaxed_maps;
+ int err;
+
+ err = bpf_object__init_user_maps(obj, strict);
+ if (err)
+ return err;
+
+ err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
+ if (err)
+ return err;
+
+ err = bpf_object__init_global_data_maps(obj);
+ if (err)
+ return err;
+
+ if (obj->nr_maps) {
+ qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]),
+ compare_bpf_map);
+ }
+ return 0;
+}
+
+static bool section_have_execinstr(struct bpf_object *obj, int idx)
+{
+ Elf_Scn *scn;
+ GElf_Shdr sh;
+
+ scn = elf_getscn(obj->efile.elf, idx);
+ if (!scn)
+ return false;
+
+ if (gelf_getshdr(scn, &sh) != &sh)
+ return false;
+
+ if (sh.sh_flags & SHF_EXECINSTR)
+ return true;
+
+ return false;
+}
+
+static void bpf_object__sanitize_btf(struct bpf_object *obj)
+{
+ bool has_datasec = obj->caps.btf_datasec;
+ bool has_func = obj->caps.btf_func;
+ struct btf *btf = obj->btf;
+ struct btf_type *t;
+ int i, j, vlen;
+
+ if (!obj->btf || (has_func && has_datasec))
+ return;
+
+ for (i = 1; i <= btf__get_nr_types(btf); i++) {
+ t = (struct btf_type *)btf__type_by_id(btf, i);
+
+ if (!has_datasec && btf_is_var(t)) {
+ /* replace VAR with INT */
+ t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
+ /*
+ * using size = 1 is the safest choice, 4 will be too
+ * big and cause kernel BTF validation failure if
+ * original variable took less than 4 bytes
+ */
+ t->size = 1;
+ *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
+ } else if (!has_datasec && btf_is_datasec(t)) {
+ /* replace DATASEC with STRUCT */
+ const struct btf_var_secinfo *v = btf_var_secinfos(t);
+ struct btf_member *m = btf_members(t);
+ struct btf_type *vt;
+ char *name;
+
+ name = (char *)btf__name_by_offset(btf, t->name_off);
+ while (*name) {
+ if (*name == '.')
+ *name = '_';
+ name++;
+ }
+
+ vlen = btf_vlen(t);
+ t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
+ for (j = 0; j < vlen; j++, v++, m++) {
+ /* order of field assignments is important */
+ m->offset = v->offset * 8;
+ m->type = v->type;
+ /* preserve variable name as member name */
+ vt = (void *)btf__type_by_id(btf, v->type);
+ m->name_off = vt->name_off;
+ }
+ } else if (!has_func && btf_is_func_proto(t)) {
+ /* replace FUNC_PROTO with ENUM */
+ vlen = btf_vlen(t);
+ t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
+ t->size = sizeof(__u32); /* kernel enforced */
+ } else if (!has_func && btf_is_func(t)) {
+ /* replace FUNC with TYPEDEF */
+ t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
+ }
+ }
+}
+
+static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
+{
+ if (!obj->btf_ext)
+ return;
+
+ if (!obj->caps.btf_func) {
+ btf_ext__free(obj->btf_ext);
+ obj->btf_ext = NULL;
+ }
+}
+
+static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
+{
+ return obj->efile.btf_maps_shndx >= 0;
+}
+
+static int bpf_object__init_btf(struct bpf_object *obj,
+ Elf_Data *btf_data,
+ Elf_Data *btf_ext_data)
+{
+ bool btf_required = bpf_object__is_btf_mandatory(obj);
+ int err = 0;
+
+ if (btf_data) {
+ obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
+ if (IS_ERR(obj->btf)) {
+ pr_warn("Error loading ELF section %s: %d.\n",
+ BTF_ELF_SEC, err);
+ goto out;
+ }
+ err = btf__finalize_data(obj, obj->btf);
+ if (err) {
+ pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
+ goto out;
+ }
+ }
+ if (btf_ext_data) {
+ if (!obj->btf) {
+ pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
+ BTF_EXT_ELF_SEC, BTF_ELF_SEC);
+ goto out;
+ }
+ obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
+ btf_ext_data->d_size);
+ if (IS_ERR(obj->btf_ext)) {
+ pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
+ BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
+ obj->btf_ext = NULL;
+ goto out;
+ }
+ }
+out:
+ if (err || IS_ERR(obj->btf)) {
+ if (btf_required)
+ err = err ? : PTR_ERR(obj->btf);
+ else
+ err = 0;
+ if (!IS_ERR_OR_NULL(obj->btf))
+ btf__free(obj->btf);
+ obj->btf = NULL;
+ }
+ if (btf_required && !obj->btf) {
+ pr_warn("BTF is required, but is missing or corrupted.\n");
+ return err == 0 ? -ENOENT : err;
+ }
+ return 0;
+}
+
+static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
+{
+ int err = 0;
+
+ if (!obj->btf)
+ return 0;
+
+ bpf_object__sanitize_btf(obj);
+ bpf_object__sanitize_btf_ext(obj);
+
+ err = btf__load(obj->btf);
+ if (err) {
+ pr_warn("Error loading %s into kernel: %d.\n",
+ BTF_ELF_SEC, err);
+ btf__free(obj->btf);
+ obj->btf = NULL;
+ /* btf_ext can't exist without btf, so free it as well */
+ if (obj->btf_ext) {
+ btf_ext__free(obj->btf_ext);
+ obj->btf_ext = NULL;
+ }
+
+ if (bpf_object__is_btf_mandatory(obj))
+ return err;
+ }
+ return 0;
+}
+
+static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
+ const char *pin_root_path)
+{
+ Elf *elf = obj->efile.elf;
+ GElf_Ehdr *ep = &obj->efile.ehdr;
+ Elf_Data *btf_ext_data = NULL;
+ Elf_Data *btf_data = NULL;
+ Elf_Scn *scn = NULL;
+ int idx = 0, err = 0;
+
+ /* Elf is corrupted/truncated, avoid calling elf_strptr. */
+ if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
+ pr_warn("failed to get e_shstrndx from %s\n", obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ char *name;
+ GElf_Shdr sh;
+ Elf_Data *data;
+
+ idx++;
+ if (gelf_getshdr(scn, &sh) != &sh) {
+ pr_warn("failed to get section(%d) header from %s\n",
+ idx, obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
+ if (!name) {
+ pr_warn("failed to get section(%d) name from %s\n",
+ idx, obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ pr_warn("failed to get section(%d) data from %s(%s)\n",
+ idx, name, obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
+ idx, name, (unsigned long)data->d_size,
+ (int)sh.sh_link, (unsigned long)sh.sh_flags,
+ (int)sh.sh_type);
+
+ if (strcmp(name, "license") == 0) {
+ err = bpf_object__init_license(obj,
+ data->d_buf,
+ data->d_size);
+ if (err)
+ return err;
+ } else if (strcmp(name, "version") == 0) {
+ err = bpf_object__init_kversion(obj,
+ data->d_buf,
+ data->d_size);
+ if (err)
+ return err;
+ } else if (strcmp(name, "maps") == 0) {
+ obj->efile.maps_shndx = idx;
+ } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
+ obj->efile.btf_maps_shndx = idx;
+ } else if (strcmp(name, BTF_ELF_SEC) == 0) {
+ btf_data = data;
+ } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
+ btf_ext_data = data;
+ } else if (sh.sh_type == SHT_SYMTAB) {
+ if (obj->efile.symbols) {
+ pr_warn("bpf: multiple SYMTAB in %s\n",
+ obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ obj->efile.symbols = data;
+ obj->efile.strtabidx = sh.sh_link;
+ } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
+ if (sh.sh_flags & SHF_EXECINSTR) {
+ if (strcmp(name, ".text") == 0)
+ obj->efile.text_shndx = idx;
+ err = bpf_object__add_program(obj, data->d_buf,
+ data->d_size,
+ name, idx);
+ if (err) {
+ char errmsg[STRERR_BUFSIZE];
+ char *cp;
+
+ cp = libbpf_strerror_r(-err, errmsg,
+ sizeof(errmsg));
+ pr_warn("failed to alloc program %s (%s): %s",
+ name, obj->path, cp);
+ return err;
+ }
+ } else if (strcmp(name, ".data") == 0) {
+ obj->efile.data = data;
+ obj->efile.data_shndx = idx;
+ } else if (strcmp(name, ".rodata") == 0) {
+ obj->efile.rodata = data;
+ obj->efile.rodata_shndx = idx;
+ } else {
+ pr_debug("skip section(%d) %s\n", idx, name);
+ }
+ } else if (sh.sh_type == SHT_REL) {
+ int nr_sects = obj->efile.nr_reloc_sects;
+ void *sects = obj->efile.reloc_sects;
+ int sec = sh.sh_info; /* points to other section */
+
+ /* Only do relo for section with exec instructions */
+ if (!section_have_execinstr(obj, sec)) {
+ pr_debug("skip relo %s(%d) for section(%d)\n",
+ name, idx, sec);
+ continue;
+ }
+
+ sects = reallocarray(sects, nr_sects + 1,
+ sizeof(*obj->efile.reloc_sects));
+ if (!sects) {
+ pr_warn("reloc_sects realloc failed\n");
+ return -ENOMEM;
+ }
+
+ obj->efile.reloc_sects = sects;
+ obj->efile.nr_reloc_sects++;
+
+ obj->efile.reloc_sects[nr_sects].shdr = sh;
+ obj->efile.reloc_sects[nr_sects].data = data;
+ } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) {
+ obj->efile.bss = data;
+ obj->efile.bss_shndx = idx;
+ } else {
+ pr_debug("skip section(%d) %s\n", idx, name);
+ }
+ }
+
+ if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
+ pr_warn("Corrupted ELF file: index of strtab invalid\n");
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ err = bpf_object__init_btf(obj, btf_data, btf_ext_data);
+ if (!err)
+ err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path);
+ if (!err)
+ err = bpf_object__sanitize_and_load_btf(obj);
+ if (!err)
+ err = bpf_object__init_prog_names(obj);
+ return err;
+}
+
+static struct bpf_program *
+bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
+{
+ struct bpf_program *prog;
+ size_t i;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ if (prog->idx == idx)
+ return prog;
+ }
+ return NULL;
+}
+
+struct bpf_program *
+bpf_object__find_program_by_title(const struct bpf_object *obj,
+ const char *title)
+{
+ struct bpf_program *pos;
+
+ bpf_object__for_each_program(pos, obj) {
+ if (pos->section_name && !strcmp(pos->section_name, title))
+ return pos;
+ }
+ return NULL;
+}
+
+static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
+ int shndx)
+{
+ return shndx == obj->efile.data_shndx ||
+ shndx == obj->efile.bss_shndx ||
+ shndx == obj->efile.rodata_shndx;
+}
+
+static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
+ int shndx)
+{
+ return shndx == obj->efile.maps_shndx ||
+ shndx == obj->efile.btf_maps_shndx;
+}
+
+static enum libbpf_map_type
+bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
+{
+ if (shndx == obj->efile.data_shndx)
+ return LIBBPF_MAP_DATA;
+ else if (shndx == obj->efile.bss_shndx)
+ return LIBBPF_MAP_BSS;
+ else if (shndx == obj->efile.rodata_shndx)
+ return LIBBPF_MAP_RODATA;
+ else
+ return LIBBPF_MAP_UNSPEC;
+}
+
+static int bpf_program__record_reloc(struct bpf_program *prog,
+ struct reloc_desc *reloc_desc,
+ __u32 insn_idx, const char *name,
+ const GElf_Sym *sym, const GElf_Rel *rel)
+{
+ struct bpf_insn *insn = &prog->insns[insn_idx];
+ size_t map_idx, nr_maps = prog->obj->nr_maps;
+ struct bpf_object *obj = prog->obj;
+ __u32 shdr_idx = sym->st_shndx;
+ enum libbpf_map_type type;
+ struct bpf_map *map;
+
+ /* sub-program call relocation */
+ if (insn->code == (BPF_JMP | BPF_CALL)) {
+ if (insn->src_reg != BPF_PSEUDO_CALL) {
+ pr_warn("incorrect bpf_call opcode\n");
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ /* text_shndx can be 0, if no default "main" program exists */
+ if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
+ pr_warn("bad call relo against section %u\n", shdr_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ if (sym->st_value % 8) {
+ pr_warn("bad call relo offset: %llu\n", (__u64)sym->st_value);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ reloc_desc->type = RELO_CALL;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->sym_off = sym->st_value;
+ obj->has_pseudo_calls = true;
+ return 0;
+ }
+
+ if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
+ pr_warn("invalid relo for insns[%d].code 0x%x\n",
+ insn_idx, insn->code);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
+ pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n",
+ name, shdr_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
+
+ /* generic map reference relocation */
+ if (type == LIBBPF_MAP_UNSPEC) {
+ if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
+ pr_warn("bad map relo against section %u\n",
+ shdr_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+ map = &obj->maps[map_idx];
+ if (map->libbpf_type != type ||
+ map->sec_idx != sym->st_shndx ||
+ map->sec_offset != sym->st_value)
+ continue;
+ pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n",
+ map_idx, map->name, map->sec_idx,
+ map->sec_offset, insn_idx);
+ break;
+ }
+ if (map_idx >= nr_maps) {
+ pr_warn("map relo failed to find map for sec %u, off %llu\n",
+ shdr_idx, (__u64)sym->st_value);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ reloc_desc->type = RELO_LD64;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->map_idx = map_idx;
+ reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
+ return 0;
+ }
+
+ /* global data map relocation */
+ if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
+ pr_warn("bad data relo against section %u\n", shdr_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ if (!obj->caps.global_data) {
+ pr_warn("relocation: kernel does not support global \'%s\' variable access in insns[%d]\n",
+ name, insn_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+ map = &obj->maps[map_idx];
+ if (map->libbpf_type != type)
+ continue;
+ pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n",
+ map_idx, map->name, map->sec_idx, map->sec_offset,
+ insn_idx);
+ break;
+ }
+ if (map_idx >= nr_maps) {
+ pr_warn("data relo failed to find map for sec %u\n",
+ shdr_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ reloc_desc->type = RELO_DATA;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->map_idx = map_idx;
+ reloc_desc->sym_off = sym->st_value;
+ return 0;
+}
+
+static int
+bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
+ Elf_Data *data, struct bpf_object *obj)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ int err, i, nrels;
+
+ pr_debug("collecting relocating info for: '%s'\n", prog->section_name);
+ nrels = shdr->sh_size / shdr->sh_entsize;
+
+ prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
+ if (!prog->reloc_desc) {
+ pr_warn("failed to alloc memory in relocation\n");
+ return -ENOMEM;
+ }
+ prog->nr_reloc = nrels;
+
+ for (i = 0; i < nrels; i++) {
+ const char *name;
+ __u32 insn_idx;
+ GElf_Sym sym;
+ GElf_Rel rel;
+
+ if (!gelf_getrel(data, i, &rel)) {
+ pr_warn("relocation: failed to get %d reloc\n", i);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
+ pr_warn("relocation: symbol %"PRIx64" not found\n",
+ GELF_R_SYM(rel.r_info));
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ if (rel.r_offset % sizeof(struct bpf_insn))
+ return -LIBBPF_ERRNO__FORMAT;
+
+ insn_idx = rel.r_offset / sizeof(struct bpf_insn);
+ name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name) ? : "<?>";
+
+ pr_debug("relo for shdr %u, symb %llu, value %llu, type %d, bind %d, name %d (\'%s\'), insn %u\n",
+ (__u32)sym.st_shndx, (__u64)GELF_R_SYM(rel.r_info),
+ (__u64)sym.st_value, GELF_ST_TYPE(sym.st_info),
+ GELF_ST_BIND(sym.st_info), sym.st_name, name,
+ insn_idx);
+
+ err = bpf_program__record_reloc(prog, &prog->reloc_desc[i],
+ insn_idx, name, &sym, &rel);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
+{
+ struct bpf_map_def *def = &map->def;
+ __u32 key_type_id = 0, value_type_id = 0;
+ int ret;
+
+ /* if it's BTF-defined map, we don't need to search for type IDs */
+ if (map->sec_idx == obj->efile.btf_maps_shndx)
+ return 0;
+
+ if (!bpf_map__is_internal(map)) {
+ ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
+ def->value_size, &key_type_id,
+ &value_type_id);
+ } else {
+ /*
+ * LLVM annotates global data differently in BTF, that is,
+ * only as '.data', '.bss' or '.rodata'.
+ */
+ ret = btf__find_by_name(obj->btf,
+ libbpf_type_to_btf_name[map->libbpf_type]);
+ }
+ if (ret < 0)
+ return ret;
+
+ map->btf_key_type_id = key_type_id;
+ map->btf_value_type_id = bpf_map__is_internal(map) ?
+ ret : value_type_id;
+ return 0;
+}
+
+int bpf_map__reuse_fd(struct bpf_map *map, int fd)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ int new_fd, err;
+ char *new_name;
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err)
+ return err;
+
+ new_name = strdup(info.name);
+ if (!new_name)
+ return -errno;
+
+ new_fd = open("/", O_RDONLY | O_CLOEXEC);
+ if (new_fd < 0) {
+ err = -errno;
+ goto err_free_new_name;
+ }
+
+ new_fd = dup3(fd, new_fd, O_CLOEXEC);
+ if (new_fd < 0) {
+ err = -errno;
+ goto err_close_new_fd;
+ }
+
+ err = zclose(map->fd);
+ if (err) {
+ err = -errno;
+ goto err_close_new_fd;
+ }
+ free(map->name);
+
+ map->fd = new_fd;
+ map->name = new_name;
+ map->def.type = info.type;
+ map->def.key_size = info.key_size;
+ map->def.value_size = info.value_size;
+ map->def.max_entries = info.max_entries;
+ map->def.map_flags = info.map_flags;
+ map->btf_key_type_id = info.btf_key_type_id;
+ map->btf_value_type_id = info.btf_value_type_id;
+ map->reused = true;
+
+ return 0;
+
+err_close_new_fd:
+ close(new_fd);
+err_free_new_name:
+ free(new_name);
+ return err;
+}
+
+int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+{
+ if (!map || !max_entries)
+ return -EINVAL;
+
+ /* If map already created, its attributes can't be changed. */
+ if (map->fd >= 0)
+ return -EBUSY;
+
+ map->def.max_entries = max_entries;
+
+ return 0;
+}
+
+static int
+bpf_object__probe_name(struct bpf_object *obj)
+{
+ struct bpf_load_program_attr attr;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int ret;
+
+ /* make sure basic loading works */
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insns = insns;
+ attr.insns_cnt = ARRAY_SIZE(insns);
+ attr.license = "GPL";
+
+ ret = bpf_load_program_xattr(&attr, NULL, 0);
+ if (ret < 0) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n",
+ __func__, cp, errno);
+ return -errno;
+ }
+ close(ret);
+
+ /* now try the same program, but with the name */
+
+ attr.name = "test";
+ ret = bpf_load_program_xattr(&attr, NULL, 0);
+ if (ret >= 0) {
+ obj->caps.name = 1;
+ close(ret);
+ }
+
+ return 0;
+}
+
+static int
+bpf_object__probe_global_data(struct bpf_object *obj)
+{
+ struct bpf_load_program_attr prg_attr;
+ struct bpf_create_map_attr map_attr;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct bpf_insn insns[] = {
+ BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int ret, map;
+
+ memset(&map_attr, 0, sizeof(map_attr));
+ map_attr.map_type = BPF_MAP_TYPE_ARRAY;
+ map_attr.key_size = sizeof(int);
+ map_attr.value_size = 32;
+ map_attr.max_entries = 1;
+
+ map = bpf_create_map_xattr(&map_attr);
+ if (map < 0) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
+ __func__, cp, errno);
+ return -errno;
+ }
+
+ insns[0].imm = map;
+
+ memset(&prg_attr, 0, sizeof(prg_attr));
+ prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ prg_attr.insns = insns;
+ prg_attr.insns_cnt = ARRAY_SIZE(insns);
+ prg_attr.license = "GPL";
+
+ ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
+ if (ret >= 0) {
+ obj->caps.global_data = 1;
+ close(ret);
+ }
+
+ close(map);
+ return 0;
+}
+
+static int bpf_object__probe_btf_func(struct bpf_object *obj)
+{
+ static const char strs[] = "\0int\0x\0a";
+ /* void x(int a) {} */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* FUNC_PROTO */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
+ BTF_PARAM_ENC(7, 1),
+ /* FUNC x */ /* [3] */
+ BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
+ };
+ int btf_fd;
+
+ btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs));
+ if (btf_fd >= 0) {
+ obj->caps.btf_func = 1;
+ close(btf_fd);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
+{
+ static const char strs[] = "\0x\0.data";
+ /* static int a; */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC val */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ };
+ int btf_fd;
+
+ btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs));
+ if (btf_fd >= 0) {
+ obj->caps.btf_datasec = 1;
+ close(btf_fd);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bpf_object__probe_array_mmap(struct bpf_object *obj)
+{
+ struct bpf_create_map_attr attr = {
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_flags = BPF_F_MMAPABLE,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1,
+ };
+ int fd;
+
+ fd = bpf_create_map_xattr(&attr);
+ if (fd >= 0) {
+ obj->caps.array_mmap = 1;
+ close(fd);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+bpf_object__probe_caps(struct bpf_object *obj)
+{
+ int (*probe_fn[])(struct bpf_object *obj) = {
+ bpf_object__probe_name,
+ bpf_object__probe_global_data,
+ bpf_object__probe_btf_func,
+ bpf_object__probe_btf_datasec,
+ bpf_object__probe_array_mmap,
+ };
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(probe_fn); i++) {
+ ret = probe_fn[i](obj);
+ if (ret < 0)
+ pr_debug("Probe #%d failed with %d.\n", i, ret);
+ }
+
+ return 0;
+}
+
+static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
+{
+ struct bpf_map_info map_info = {};
+ char msg[STRERR_BUFSIZE];
+ __u32 map_info_len;
+
+ map_info_len = sizeof(map_info);
+
+ if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) {
+ pr_warn("failed to get map info for map FD %d: %s\n",
+ map_fd, libbpf_strerror_r(errno, msg, sizeof(msg)));
+ return false;
+ }
+
+ return (map_info.type == map->def.type &&
+ map_info.key_size == map->def.key_size &&
+ map_info.value_size == map->def.value_size &&
+ map_info.max_entries == map->def.max_entries &&
+ map_info.map_flags == map->def.map_flags);
+}
+
+static int
+bpf_object__reuse_map(struct bpf_map *map)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err, pin_fd;
+
+ pin_fd = bpf_obj_get(map->pin_path);
+ if (pin_fd < 0) {
+ err = -errno;
+ if (err == -ENOENT) {
+ pr_debug("found no pinned map to reuse at '%s'\n",
+ map->pin_path);
+ return 0;
+ }
+
+ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
+ pr_warn("couldn't retrieve pinned map '%s': %s\n",
+ map->pin_path, cp);
+ return err;
+ }
+
+ if (!map_is_reuse_compat(map, pin_fd)) {
+ pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
+ map->pin_path);
+ close(pin_fd);
+ return -EINVAL;
+ }
+
+ err = bpf_map__reuse_fd(map, pin_fd);
+ if (err) {
+ close(pin_fd);
+ return err;
+ }
+ map->pinned = true;
+ pr_debug("reused pinned map at '%s'\n", map->pin_path);
+
+ return 0;
+}
+
+static int
+bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err, zero = 0;
+ __u8 *data;
+
+ /* Nothing to do here since kernel already zero-initializes .bss map. */
+ if (map->libbpf_type == LIBBPF_MAP_BSS)
+ return 0;
+
+ data = map->libbpf_type == LIBBPF_MAP_DATA ?
+ obj->sections.data : obj->sections.rodata;
+
+ err = bpf_map_update_elem(map->fd, &zero, data, 0);
+ /* Freeze .rodata map as read-only from syscall side. */
+ if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) {
+ err = bpf_map_freeze(map->fd);
+ if (err) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("Error freezing map(%s) as read-only: %s\n",
+ map->name, cp);
+ err = 0;
+ }
+ }
+ return err;
+}
+
+static int
+bpf_object__create_maps(struct bpf_object *obj)
+{
+ struct bpf_create_map_attr create_attr = {};
+ int nr_cpus = 0;
+ unsigned int i;
+ int err;
+
+ for (i = 0; i < obj->nr_maps; i++) {
+ struct bpf_map *map = &obj->maps[i];
+ struct bpf_map_def *def = &map->def;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int *pfd = &map->fd;
+
+ if (map->pin_path) {
+ err = bpf_object__reuse_map(map);
+ if (err) {
+ pr_warn("error reusing pinned map %s\n",
+ map->name);
+ return err;
+ }
+ }
+
+ if (map->fd >= 0) {
+ pr_debug("skip map create (preset) %s: fd=%d\n",
+ map->name, map->fd);
+ continue;
+ }
+
+ if (obj->caps.name)
+ create_attr.name = map->name;
+ create_attr.map_ifindex = map->map_ifindex;
+ create_attr.map_type = def->type;
+ create_attr.map_flags = def->map_flags;
+ create_attr.key_size = def->key_size;
+ create_attr.value_size = def->value_size;
+ if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
+ !def->max_entries) {
+ if (!nr_cpus)
+ nr_cpus = libbpf_num_possible_cpus();
+ if (nr_cpus < 0) {
+ pr_warn("failed to determine number of system CPUs: %d\n",
+ nr_cpus);
+ err = nr_cpus;
+ goto err_out;
+ }
+ pr_debug("map '%s': setting size to %d\n",
+ map->name, nr_cpus);
+ create_attr.max_entries = nr_cpus;
+ } else {
+ create_attr.max_entries = def->max_entries;
+ }
+ create_attr.btf_fd = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
+ if (bpf_map_type__is_map_in_map(def->type) &&
+ map->inner_map_fd >= 0)
+ create_attr.inner_map_fd = map->inner_map_fd;
+
+ if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
+ create_attr.btf_fd = btf__fd(obj->btf);
+ create_attr.btf_key_type_id = map->btf_key_type_id;
+ create_attr.btf_value_type_id = map->btf_value_type_id;
+ }
+
+ *pfd = bpf_create_map_xattr(&create_attr);
+ if (*pfd < 0 && (create_attr.btf_key_type_id ||
+ create_attr.btf_value_type_id)) {
+ err = -errno;
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+ pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
+ map->name, cp, err);
+ create_attr.btf_fd = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
+ map->btf_key_type_id = 0;
+ map->btf_value_type_id = 0;
+ *pfd = bpf_create_map_xattr(&create_attr);
+ }
+
+ if (*pfd < 0) {
+ size_t j;
+
+ err = -errno;
+err_out:
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+ pr_warn("failed to create map (name: '%s'): %s(%d)\n",
+ map->name, cp, err);
+ for (j = 0; j < i; j++)
+ zclose(obj->maps[j].fd);
+ return err;
+ }
+
+ if (bpf_map__is_internal(map)) {
+ err = bpf_object__populate_internal_map(obj, map);
+ if (err < 0) {
+ zclose(*pfd);
+ goto err_out;
+ }
+ }
+
+ if (map->pin_path && !map->pinned) {
+ err = bpf_map__pin(map, NULL);
+ if (err) {
+ pr_warn("failed to auto-pin map name '%s' at '%s'\n",
+ map->name, map->pin_path);
+ return err;
+ }
+ }
+
+ pr_debug("created map %s: fd=%d\n", map->name, *pfd);
+ }
+
+ return 0;
+}
+
+static int
+check_btf_ext_reloc_err(struct bpf_program *prog, int err,
+ void *btf_prog_info, const char *info_name)
+{
+ if (err != -ENOENT) {
+ pr_warn("Error in loading %s for sec %s.\n",
+ info_name, prog->section_name);
+ return err;
+ }
+
+ /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */
+
+ if (btf_prog_info) {
+ /*
+ * Some info has already been found but has problem
+ * in the last btf_ext reloc. Must have to error out.
+ */
+ pr_warn("Error in relocating %s for sec %s.\n",
+ info_name, prog->section_name);
+ return err;
+ }
+
+ /* Have problem loading the very first info. Ignore the rest. */
+ pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n",
+ info_name, prog->section_name, info_name);
+ return 0;
+}
+
+static int
+bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj,
+ const char *section_name, __u32 insn_offset)
+{
+ int err;
+
+ if (!insn_offset || prog->func_info) {
+ /*
+ * !insn_offset => main program
+ *
+ * For sub prog, the main program's func_info has to
+ * be loaded first (i.e. prog->func_info != NULL)
+ */
+ err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext,
+ section_name, insn_offset,
+ &prog->func_info,
+ &prog->func_info_cnt);
+ if (err)
+ return check_btf_ext_reloc_err(prog, err,
+ prog->func_info,
+ "bpf_func_info");
+
+ prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext);
+ }
+
+ if (!insn_offset || prog->line_info) {
+ err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext,
+ section_name, insn_offset,
+ &prog->line_info,
+ &prog->line_info_cnt);
+ if (err)
+ return check_btf_ext_reloc_err(prog, err,
+ prog->line_info,
+ "bpf_line_info");
+
+ prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext);
+ }
+
+ return 0;
+}
+
+#define BPF_CORE_SPEC_MAX_LEN 64
+
+/* represents BPF CO-RE field or array element accessor */
+struct bpf_core_accessor {
+ __u32 type_id; /* struct/union type or array element type */
+ __u32 idx; /* field index or array index */
+ const char *name; /* field name or NULL for array accessor */
+};
+
+struct bpf_core_spec {
+ const struct btf *btf;
+ /* high-level spec: named fields and array indices only */
+ struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
+ /* high-level spec length */
+ int len;
+ /* raw, low-level spec: 1-to-1 with accessor spec string */
+ int raw_spec[BPF_CORE_SPEC_MAX_LEN];
+ /* raw spec length */
+ int raw_len;
+ /* field bit offset represented by spec */
+ __u32 bit_offset;
+};
+
+static bool str_is_empty(const char *s)
+{
+ return !s || !s[0];
+}
+
+/*
+ * Turn bpf_field_reloc into a low- and high-level spec representation,
+ * validating correctness along the way, as well as calculating resulting
+ * field bit offset, specified by accessor string. Low-level spec captures
+ * every single level of nestedness, including traversing anonymous
+ * struct/union members. High-level one only captures semantically meaningful
+ * "turning points": named fields and array indicies.
+ * E.g., for this case:
+ *
+ * struct sample {
+ * int __unimportant;
+ * struct {
+ * int __1;
+ * int __2;
+ * int a[7];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ *
+ * int x = &s->a[3]; // access string = '0:1:2:3'
+ *
+ * Low-level spec has 1:1 mapping with each element of access string (it's
+ * just a parsed access string representation): [0, 1, 2, 3].
+ *
+ * High-level spec will capture only 3 points:
+ * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
+ * - field 'a' access (corresponds to '2' in low-level spec);
+ * - array element #3 access (corresponds to '3' in low-level spec).
+ *
+ */
+static int bpf_core_spec_parse(const struct btf *btf,
+ __u32 type_id,
+ const char *spec_str,
+ struct bpf_core_spec *spec)
+{
+ int access_idx, parsed_len, i;
+ const struct btf_type *t;
+ const char *name;
+ __u32 id;
+ __s64 sz;
+
+ if (str_is_empty(spec_str) || *spec_str == ':')
+ return -EINVAL;
+
+ memset(spec, 0, sizeof(*spec));
+ spec->btf = btf;
+
+ /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
+ while (*spec_str) {
+ if (*spec_str == ':')
+ ++spec_str;
+ if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
+ return -EINVAL;
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+ spec_str += parsed_len;
+ spec->raw_spec[spec->raw_len++] = access_idx;
+ }
+
+ if (spec->raw_len == 0)
+ return -EINVAL;
+
+ /* first spec value is always reloc type array index */
+ t = skip_mods_and_typedefs(btf, type_id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[0];
+ spec->spec[0].type_id = id;
+ spec->spec[0].idx = access_idx;
+ spec->len++;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->bit_offset = access_idx * sz * 8;
+
+ for (i = 1; i < spec->raw_len; i++) {
+ t = skip_mods_and_typedefs(btf, id, &id);
+ if (!t)
+ return -EINVAL;
+
+ access_idx = spec->raw_spec[i];
+
+ if (btf_is_composite(t)) {
+ const struct btf_member *m;
+ __u32 bit_offset;
+
+ if (access_idx >= btf_vlen(t))
+ return -EINVAL;
+
+ bit_offset = btf_member_bit_offset(t, access_idx);
+ spec->bit_offset += bit_offset;
+
+ m = btf_members(t) + access_idx;
+ if (m->name_off) {
+ name = btf__name_by_offset(btf, m->name_off);
+ if (str_is_empty(name))
+ return -EINVAL;
+
+ spec->spec[spec->len].type_id = id;
+ spec->spec[spec->len].idx = access_idx;
+ spec->spec[spec->len].name = name;
+ spec->len++;
+ }
+
+ id = m->type;
+ } else if (btf_is_array(t)) {
+ const struct btf_array *a = btf_array(t);
+
+ t = skip_mods_and_typedefs(btf, a->type, &id);
+ if (!t || access_idx >= a->nelems)
+ return -EINVAL;
+
+ spec->spec[spec->len].type_id = id;
+ spec->spec[spec->len].idx = access_idx;
+ spec->len++;
+
+ sz = btf__resolve_size(btf, id);
+ if (sz < 0)
+ return sz;
+ spec->bit_offset += access_idx * sz * 8;
+ } else {
+ pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n",
+ type_id, spec_str, i, id, btf_kind(t));
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static bool bpf_core_is_flavor_sep(const char *s)
+{
+ /* check X___Y name pattern, where X and Y are not underscores */
+ return s[0] != '_' && /* X */
+ s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */
+ s[4] != '_'; /* Y */
+}
+
+/* Given 'some_struct_name___with_flavor' return the length of a name prefix
+ * before last triple underscore. Struct name part after last triple
+ * underscore is ignored by BPF CO-RE relocation during relocation matching.
+ */
+static size_t bpf_core_essential_name_len(const char *name)
+{
+ size_t n = strlen(name);
+ int i;
+
+ for (i = n - 5; i >= 0; i--) {
+ if (bpf_core_is_flavor_sep(name + i))
+ return i + 1;
+ }
+ return n;
+}
+
+/* dynamically sized list of type IDs */
+struct ids_vec {
+ __u32 *data;
+ int len;
+};
+
+static void bpf_core_free_cands(struct ids_vec *cand_ids)
+{
+ free(cand_ids->data);
+ free(cand_ids);
+}
+
+static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
+ __u32 local_type_id,
+ const struct btf *targ_btf)
+{
+ size_t local_essent_len, targ_essent_len;
+ const char *local_name, *targ_name;
+ const struct btf_type *t;
+ struct ids_vec *cand_ids;
+ __u32 *new_ids;
+ int i, err, n;
+
+ t = btf__type_by_id(local_btf, local_type_id);
+ if (!t)
+ return ERR_PTR(-EINVAL);
+
+ local_name = btf__name_by_offset(local_btf, t->name_off);
+ if (str_is_empty(local_name))
+ return ERR_PTR(-EINVAL);
+ local_essent_len = bpf_core_essential_name_len(local_name);
+
+ cand_ids = calloc(1, sizeof(*cand_ids));
+ if (!cand_ids)
+ return ERR_PTR(-ENOMEM);
+
+ n = btf__get_nr_types(targ_btf);
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(targ_btf, i);
+ targ_name = btf__name_by_offset(targ_btf, t->name_off);
+ if (str_is_empty(targ_name))
+ continue;
+
+ targ_essent_len = bpf_core_essential_name_len(targ_name);
+ if (targ_essent_len != local_essent_len)
+ continue;
+
+ if (strncmp(local_name, targ_name, local_essent_len) == 0) {
+ pr_debug("[%d] %s: found candidate [%d] %s\n",
+ local_type_id, local_name, i, targ_name);
+ new_ids = realloc(cand_ids->data, cand_ids->len + 1);
+ if (!new_ids) {
+ err = -ENOMEM;
+ goto err_out;
+ }
+ cand_ids->data = new_ids;
+ cand_ids->data[cand_ids->len++] = i;
+ }
+ }
+ return cand_ids;
+err_out:
+ bpf_core_free_cands(cand_ids);
+ return ERR_PTR(err);
+}
+
+/* Check two types for compatibility, skipping const/volatile/restrict and
+ * typedefs, to ensure we are relocating compatible entities:
+ * - any two STRUCTs/UNIONs are compatible and can be mixed;
+ * - any two FWDs are compatible, if their names match (modulo flavor suffix);
+ * - any two PTRs are always compatible;
+ * - for ENUMs, names should be the same (ignoring flavor suffix) or at
+ * least one of enums should be anonymous;
+ * - for ENUMs, check sizes, names are ignored;
+ * - for INT, size and signedness are ignored;
+ * - for ARRAY, dimensionality is ignored, element types are checked for
+ * compatibility recursively;
+ * - everything else shouldn't be ever a target of relocation.
+ * These rules are not set in stone and probably will be adjusted as we get
+ * more experience with using BPF CO-RE relocations.
+ */
+static int bpf_core_fields_are_compat(const struct btf *local_btf,
+ __u32 local_id,
+ const struct btf *targ_btf,
+ __u32 targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+
+recur:
+ local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!local_type || !targ_type)
+ return -EINVAL;
+
+ if (btf_is_composite(local_type) && btf_is_composite(targ_type))
+ return 1;
+ if (btf_kind(local_type) != btf_kind(targ_type))
+ return 0;
+
+ switch (btf_kind(local_type)) {
+ case BTF_KIND_PTR:
+ return 1;
+ case BTF_KIND_FWD:
+ case BTF_KIND_ENUM: {
+ const char *local_name, *targ_name;
+ size_t local_len, targ_len;
+
+ local_name = btf__name_by_offset(local_btf,
+ local_type->name_off);
+ targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
+ local_len = bpf_core_essential_name_len(local_name);
+ targ_len = bpf_core_essential_name_len(targ_name);
+ /* one of them is anonymous or both w/ same flavor-less names */
+ return local_len == 0 || targ_len == 0 ||
+ (local_len == targ_len &&
+ strncmp(local_name, targ_name, local_len) == 0);
+ }
+ case BTF_KIND_INT:
+ /* just reject deprecated bitfield-like integers; all other
+ * integers are by default compatible between each other
+ */
+ return btf_int_offset(local_type) == 0 &&
+ btf_int_offset(targ_type) == 0;
+ case BTF_KIND_ARRAY:
+ local_id = btf_array(local_type)->type;
+ targ_id = btf_array(targ_type)->type;
+ goto recur;
+ default:
+ pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
+ btf_kind(local_type), local_id, targ_id);
+ return 0;
+ }
+}
+
+/*
+ * Given single high-level named field accessor in local type, find
+ * corresponding high-level accessor for a target type. Along the way,
+ * maintain low-level spec for target as well. Also keep updating target
+ * bit offset.
+ *
+ * Searching is performed through recursive exhaustive enumeration of all
+ * fields of a struct/union. If there are any anonymous (embedded)
+ * structs/unions, they are recursively searched as well. If field with
+ * desired name is found, check compatibility between local and target types,
+ * before returning result.
+ *
+ * 1 is returned, if field is found.
+ * 0 is returned if no compatible field is found.
+ * <0 is returned on error.
+ */
+static int bpf_core_match_member(const struct btf *local_btf,
+ const struct bpf_core_accessor *local_acc,
+ const struct btf *targ_btf,
+ __u32 targ_id,
+ struct bpf_core_spec *spec,
+ __u32 *next_targ_id)
+{
+ const struct btf_type *local_type, *targ_type;
+ const struct btf_member *local_member, *m;
+ const char *local_name, *targ_name;
+ __u32 local_id;
+ int i, n, found;
+
+ targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+ if (!btf_is_composite(targ_type))
+ return 0;
+
+ local_id = local_acc->type_id;
+ local_type = btf__type_by_id(local_btf, local_id);
+ local_member = btf_members(local_type) + local_acc->idx;
+ local_name = btf__name_by_offset(local_btf, local_member->name_off);
+
+ n = btf_vlen(targ_type);
+ m = btf_members(targ_type);
+ for (i = 0; i < n; i++, m++) {
+ __u32 bit_offset;
+
+ bit_offset = btf_member_bit_offset(targ_type, i);
+
+ /* too deep struct/union/array nesting */
+ if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ /* speculate this member will be the good one */
+ spec->bit_offset += bit_offset;
+ spec->raw_spec[spec->raw_len++] = i;
+
+ targ_name = btf__name_by_offset(targ_btf, m->name_off);
+ if (str_is_empty(targ_name)) {
+ /* embedded struct/union, we need to go deeper */
+ found = bpf_core_match_member(local_btf, local_acc,
+ targ_btf, m->type,
+ spec, next_targ_id);
+ if (found) /* either found or error */
+ return found;
+ } else if (strcmp(local_name, targ_name) == 0) {
+ /* matching named field */
+ struct bpf_core_accessor *targ_acc;
+
+ targ_acc = &spec->spec[spec->len++];
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = i;
+ targ_acc->name = targ_name;
+
+ *next_targ_id = m->type;
+ found = bpf_core_fields_are_compat(local_btf,
+ local_member->type,
+ targ_btf, m->type);
+ if (!found)
+ spec->len--; /* pop accessor */
+ return found;
+ }
+ /* member turned out not to be what we looked for */
+ spec->bit_offset -= bit_offset;
+ spec->raw_len--;
+ }
+
+ return 0;
+}
+
+/*
+ * Try to match local spec to a target type and, if successful, produce full
+ * target spec (high-level, low-level + bit offset).
+ */
+static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
+ const struct btf *targ_btf, __u32 targ_id,
+ struct bpf_core_spec *targ_spec)
+{
+ const struct btf_type *targ_type;
+ const struct bpf_core_accessor *local_acc;
+ struct bpf_core_accessor *targ_acc;
+ int i, sz, matched;
+
+ memset(targ_spec, 0, sizeof(*targ_spec));
+ targ_spec->btf = targ_btf;
+
+ local_acc = &local_spec->spec[0];
+ targ_acc = &targ_spec->spec[0];
+
+ for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
+ targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
+ &targ_id);
+ if (!targ_type)
+ return -EINVAL;
+
+ if (local_acc->name) {
+ matched = bpf_core_match_member(local_spec->btf,
+ local_acc,
+ targ_btf, targ_id,
+ targ_spec, &targ_id);
+ if (matched <= 0)
+ return matched;
+ } else {
+ /* for i=0, targ_id is already treated as array element
+ * type (because it's the original struct), for others
+ * we should find array element type first
+ */
+ if (i > 0) {
+ const struct btf_array *a;
+
+ if (!btf_is_array(targ_type))
+ return 0;
+
+ a = btf_array(targ_type);
+ if (local_acc->idx >= a->nelems)
+ return 0;
+ if (!skip_mods_and_typedefs(targ_btf, a->type,
+ &targ_id))
+ return -EINVAL;
+ }
+
+ /* too deep struct/union/array nesting */
+ if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
+ return -E2BIG;
+
+ targ_acc->type_id = targ_id;
+ targ_acc->idx = local_acc->idx;
+ targ_acc->name = NULL;
+ targ_spec->len++;
+ targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
+ targ_spec->raw_len++;
+
+ sz = btf__resolve_size(targ_btf, targ_id);
+ if (sz < 0)
+ return sz;
+ targ_spec->bit_offset += local_acc->idx * sz * 8;
+ }
+ }
+
+ return 1;
+}
+
+static int bpf_core_calc_field_relo(const struct bpf_program *prog,
+ const struct bpf_field_reloc *relo,
+ const struct bpf_core_spec *spec,
+ __u32 *val, bool *validate)
+{
+ const struct bpf_core_accessor *acc = &spec->spec[spec->len - 1];
+ const struct btf_type *t = btf__type_by_id(spec->btf, acc->type_id);
+ __u32 byte_off, byte_sz, bit_off, bit_sz;
+ const struct btf_member *m;
+ const struct btf_type *mt;
+ bool bitfield;
+ __s64 sz;
+
+ /* a[n] accessor needs special handling */
+ if (!acc->name) {
+ if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
+ *val = spec->bit_offset / 8;
+ } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
+ sz = btf__resolve_size(spec->btf, acc->type_id);
+ if (sz < 0)
+ return -EINVAL;
+ *val = sz;
+ } else {
+ pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
+ bpf_program__title(prog, false),
+ relo->kind, relo->insn_off / 8);
+ return -EINVAL;
+ }
+ if (validate)
+ *validate = true;
+ return 0;
+ }
+
+ m = btf_members(t) + acc->idx;
+ mt = skip_mods_and_typedefs(spec->btf, m->type, NULL);
+ bit_off = spec->bit_offset;
+ bit_sz = btf_member_bitfield_size(t, acc->idx);
+
+ bitfield = bit_sz > 0;
+ if (bitfield) {
+ byte_sz = mt->size;
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
+ /* figure out smallest int size necessary for bitfield load */
+ while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
+ if (byte_sz >= 8) {
+ /* bitfield can't be read with 64-bit read */
+ pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
+ bpf_program__title(prog, false),
+ relo->kind, relo->insn_off / 8);
+ return -E2BIG;
+ }
+ byte_sz *= 2;
+ byte_off = bit_off / 8 / byte_sz * byte_sz;
+ }
+ } else {
+ sz = btf__resolve_size(spec->btf, m->type);
+ if (sz < 0)
+ return -EINVAL;
+ byte_sz = sz;
+ byte_off = spec->bit_offset / 8;
+ bit_sz = byte_sz * 8;
+ }
+
+ /* for bitfields, all the relocatable aspects are ambiguous and we
+ * might disagree with compiler, so turn off validation of expected
+ * value, except for signedness
+ */
+ if (validate)
+ *validate = !bitfield;
+
+ switch (relo->kind) {
+ case BPF_FIELD_BYTE_OFFSET:
+ *val = byte_off;
+ break;
+ case BPF_FIELD_BYTE_SIZE:
+ *val = byte_sz;
+ break;
+ case BPF_FIELD_SIGNED:
+ /* enums will be assumed unsigned */
+ *val = btf_is_enum(mt) ||
+ (btf_int_encoding(mt) & BTF_INT_SIGNED);
+ if (validate)
+ *validate = true; /* signedness is never ambiguous */
+ break;
+ case BPF_FIELD_LSHIFT_U64:
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ *val = 64 - (bit_off + bit_sz - byte_off * 8);
+#else
+ *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
+#endif
+ break;
+ case BPF_FIELD_RSHIFT_U64:
+ *val = 64 - bit_sz;
+ if (validate)
+ *validate = true; /* right shift is never ambiguous */
+ break;
+ case BPF_FIELD_EXISTS:
+ default:
+ pr_warn("prog '%s': unknown relo %d at insn #%d\n",
+ bpf_program__title(prog, false),
+ relo->kind, relo->insn_off / 8);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Patch relocatable BPF instruction.
+ *
+ * Patched value is determined by relocation kind and target specification.
+ * For field existence relocation target spec will be NULL if field is not
+ * found.
+ * Expected insn->imm value is determined using relocation kind and local
+ * spec, and is checked before patching instruction. If actual insn->imm value
+ * is wrong, bail out with error.
+ *
+ * Currently three kinds of BPF instructions are supported:
+ * 1. rX = <imm> (assignment with immediate operand);
+ * 2. rX += <imm> (arithmetic operations with immediate operand);
+ */
+static int bpf_core_reloc_insn(struct bpf_program *prog,
+ const struct bpf_field_reloc *relo,
+ const struct bpf_core_spec *local_spec,
+ const struct bpf_core_spec *targ_spec)
+{
+ bool failed = false, validate = true;
+ __u32 orig_val, new_val;
+ struct bpf_insn *insn;
+ int insn_idx, err;
+ __u8 class;
+
+ if (relo->insn_off % sizeof(struct bpf_insn))
+ return -EINVAL;
+ insn_idx = relo->insn_off / sizeof(struct bpf_insn);
+
+ if (relo->kind == BPF_FIELD_EXISTS) {
+ orig_val = 1; /* can't generate EXISTS relo w/o local field */
+ new_val = targ_spec ? 1 : 0;
+ } else if (!targ_spec) {
+ failed = true;
+ new_val = (__u32)-1;
+ } else {
+ err = bpf_core_calc_field_relo(prog, relo, local_spec,
+ &orig_val, &validate);
+ if (err)
+ return err;
+ err = bpf_core_calc_field_relo(prog, relo, targ_spec,
+ &new_val, NULL);
+ if (err)
+ return err;
+ }
+
+ insn = &prog->insns[insn_idx];
+ class = BPF_CLASS(insn->code);
+
+ if (class == BPF_ALU || class == BPF_ALU64) {
+ if (BPF_SRC(insn->code) != BPF_K)
+ return -EINVAL;
+ if (!failed && validate && insn->imm != orig_val) {
+ pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n",
+ bpf_program__title(prog, false), insn_idx,
+ insn->imm, orig_val, new_val);
+ return -EINVAL;
+ }
+ orig_val = insn->imm;
+ insn->imm = new_val;
+ pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n",
+ bpf_program__title(prog, false), insn_idx,
+ failed ? " w/ failed reloc" : "", orig_val, new_val);
+ } else {
+ pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
+ bpf_program__title(prog, false),
+ insn_idx, insn->code, insn->src_reg, insn->dst_reg,
+ insn->off, insn->imm);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct btf *btf_load_raw(const char *path)
+{
+ struct btf *btf;
+ size_t read_cnt;
+ struct stat st;
+ void *data;
+ FILE *f;
+
+ if (stat(path, &st))
+ return ERR_PTR(-errno);
+
+ data = malloc(st.st_size);
+ if (!data)
+ return ERR_PTR(-ENOMEM);
+
+ f = fopen(path, "rb");
+ if (!f) {
+ btf = ERR_PTR(-errno);
+ goto cleanup;
+ }
+
+ read_cnt = fread(data, 1, st.st_size, f);
+ fclose(f);
+ if (read_cnt < st.st_size) {
+ btf = ERR_PTR(-EBADF);
+ goto cleanup;
+ }
+
+ btf = btf__new(data, read_cnt);
+
+cleanup:
+ free(data);
+ return btf;
+}
+
+/*
+ * Probe few well-known locations for vmlinux kernel image and try to load BTF
+ * data out of it to use for target BTF.
+ */
+static struct btf *bpf_core_find_kernel_btf(void)
+{
+ struct {
+ const char *path_fmt;
+ bool raw_btf;
+ } locations[] = {
+ /* try canonical vmlinux BTF through sysfs first */
+ { "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
+ /* fall back to trying to find vmlinux ELF on disk otherwise */
+ { "/boot/vmlinux-%1$s" },
+ { "/lib/modules/%1$s/vmlinux-%1$s" },
+ { "/lib/modules/%1$s/build/vmlinux" },
+ { "/usr/lib/modules/%1$s/kernel/vmlinux" },
+ { "/usr/lib/debug/boot/vmlinux-%1$s" },
+ { "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
+ { "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
+ };
+ char path[PATH_MAX + 1];
+ struct utsname buf;
+ struct btf *btf;
+ int i;
+
+ uname(&buf);
+
+ for (i = 0; i < ARRAY_SIZE(locations); i++) {
+ snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
+
+ if (access(path, R_OK))
+ continue;
+
+ if (locations[i].raw_btf)
+ btf = btf_load_raw(path);
+ else
+ btf = btf__parse_elf(path, NULL);
+
+ pr_debug("loading kernel BTF '%s': %ld\n",
+ path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
+ if (IS_ERR(btf))
+ continue;
+
+ return btf;
+ }
+
+ pr_warn("failed to find valid kernel BTF\n");
+ return ERR_PTR(-ESRCH);
+}
+
+/* Output spec definition in the format:
+ * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
+ * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
+ */
+static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
+{
+ const struct btf_type *t;
+ const char *s;
+ __u32 type_id;
+ int i;
+
+ type_id = spec->spec[0].type_id;
+ t = btf__type_by_id(spec->btf, type_id);
+ s = btf__name_by_offset(spec->btf, t->name_off);
+ libbpf_print(level, "[%u] %s + ", type_id, s);
+
+ for (i = 0; i < spec->raw_len; i++)
+ libbpf_print(level, "%d%s", spec->raw_spec[i],
+ i == spec->raw_len - 1 ? " => " : ":");
+
+ libbpf_print(level, "%u.%u @ &x",
+ spec->bit_offset / 8, spec->bit_offset % 8);
+
+ for (i = 0; i < spec->len; i++) {
+ if (spec->spec[i].name)
+ libbpf_print(level, ".%s", spec->spec[i].name);
+ else
+ libbpf_print(level, "[%u]", spec->spec[i].idx);
+ }
+
+}
+
+static size_t bpf_core_hash_fn(const void *key, void *ctx)
+{
+ return (size_t)key;
+}
+
+static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
+{
+ return k1 == k2;
+}
+
+static void *u32_as_hash_key(__u32 x)
+{
+ return (void *)(uintptr_t)x;
+}
+
+/*
+ * CO-RE relocate single instruction.
+ *
+ * The outline and important points of the algorithm:
+ * 1. For given local type, find corresponding candidate target types.
+ * Candidate type is a type with the same "essential" name, ignoring
+ * everything after last triple underscore (___). E.g., `sample`,
+ * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
+ * for each other. Names with triple underscore are referred to as
+ * "flavors" and are useful, among other things, to allow to
+ * specify/support incompatible variations of the same kernel struct, which
+ * might differ between different kernel versions and/or build
+ * configurations.
+ *
+ * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
+ * converter, when deduplicated BTF of a kernel still contains more than
+ * one different types with the same name. In that case, ___2, ___3, etc
+ * are appended starting from second name conflict. But start flavors are
+ * also useful to be defined "locally", in BPF program, to extract same
+ * data from incompatible changes between different kernel
+ * versions/configurations. For instance, to handle field renames between
+ * kernel versions, one can use two flavors of the struct name with the
+ * same common name and use conditional relocations to extract that field,
+ * depending on target kernel version.
+ * 2. For each candidate type, try to match local specification to this
+ * candidate target type. Matching involves finding corresponding
+ * high-level spec accessors, meaning that all named fields should match,
+ * as well as all array accesses should be within the actual bounds. Also,
+ * types should be compatible (see bpf_core_fields_are_compat for details).
+ * 3. It is supported and expected that there might be multiple flavors
+ * matching the spec. As long as all the specs resolve to the same set of
+ * offsets across all candidates, there is no error. If there is any
+ * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
+ * imprefection of BTF deduplication, which can cause slight duplication of
+ * the same BTF type, if some directly or indirectly referenced (by
+ * pointer) type gets resolved to different actual types in different
+ * object files. If such situation occurs, deduplicated BTF will end up
+ * with two (or more) structurally identical types, which differ only in
+ * types they refer to through pointer. This should be OK in most cases and
+ * is not an error.
+ * 4. Candidate types search is performed by linearly scanning through all
+ * types in target BTF. It is anticipated that this is overall more
+ * efficient memory-wise and not significantly worse (if not better)
+ * CPU-wise compared to prebuilding a map from all local type names to
+ * a list of candidate type names. It's also sped up by caching resolved
+ * list of matching candidates per each local "root" type ID, that has at
+ * least one bpf_field_reloc associated with it. This list is shared
+ * between multiple relocations for the same type ID and is updated as some
+ * of the candidates are pruned due to structural incompatibility.
+ */
+static int bpf_core_reloc_field(struct bpf_program *prog,
+ const struct bpf_field_reloc *relo,
+ int relo_idx,
+ const struct btf *local_btf,
+ const struct btf *targ_btf,
+ struct hashmap *cand_cache)
+{
+ const char *prog_name = bpf_program__title(prog, false);
+ struct bpf_core_spec local_spec, cand_spec, targ_spec;
+ const void *type_key = u32_as_hash_key(relo->type_id);
+ const struct btf_type *local_type, *cand_type;
+ const char *local_name, *cand_name;
+ struct ids_vec *cand_ids;
+ __u32 local_id, cand_id;
+ const char *spec_str;
+ int i, j, err;
+
+ local_id = relo->type_id;
+ local_type = btf__type_by_id(local_btf, local_id);
+ if (!local_type)
+ return -EINVAL;
+
+ local_name = btf__name_by_offset(local_btf, local_type->name_off);
+ if (str_is_empty(local_name))
+ return -EINVAL;
+
+ spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
+ if (str_is_empty(spec_str))
+ return -EINVAL;
+
+ err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n",
+ prog_name, relo_idx, local_id, local_name, spec_str,
+ err);
+ return -EINVAL;
+ }
+
+ pr_debug("prog '%s': relo #%d: kind %d, spec is ", prog_name, relo_idx,
+ relo->kind);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
+ libbpf_print(LIBBPF_DEBUG, "\n");
+
+ if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {
+ cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);
+ if (IS_ERR(cand_ids)) {
+ pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld",
+ prog_name, relo_idx, local_id, local_name,
+ PTR_ERR(cand_ids));
+ return PTR_ERR(cand_ids);
+ }
+ err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL);
+ if (err) {
+ bpf_core_free_cands(cand_ids);
+ return err;
+ }
+ }
+
+ for (i = 0, j = 0; i < cand_ids->len; i++) {
+ cand_id = cand_ids->data[i];
+ cand_type = btf__type_by_id(targ_btf, cand_id);
+ cand_name = btf__name_by_offset(targ_btf, cand_type->name_off);
+
+ err = bpf_core_spec_match(&local_spec, targ_btf,
+ cand_id, &cand_spec);
+ pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ",
+ prog_name, relo_idx, i, cand_name);
+ bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
+ libbpf_print(LIBBPF_DEBUG, ": %d\n", err);
+ if (err < 0) {
+ pr_warn("prog '%s': relo #%d: matching error: %d\n",
+ prog_name, relo_idx, err);
+ return err;
+ }
+ if (err == 0)
+ continue;
+
+ if (j == 0) {
+ targ_spec = cand_spec;
+ } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
+ /* if there are many candidates, they should all
+ * resolve to the same bit offset
+ */
+ pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n",
+ prog_name, relo_idx, cand_spec.bit_offset,
+ targ_spec.bit_offset);
+ return -EINVAL;
+ }
+
+ cand_ids->data[j++] = cand_spec.spec[0].type_id;
+ }
+
+ /*
+ * For BPF_FIELD_EXISTS relo or when relaxed CO-RE reloc mode is
+ * requested, it's expected that we might not find any candidates.
+ * In this case, if field wasn't found in any candidate, the list of
+ * candidates shouldn't change at all, we'll just handle relocating
+ * appropriately, depending on relo's kind.
+ */
+ if (j > 0)
+ cand_ids->len = j;
+
+ if (j == 0 && !prog->obj->relaxed_core_relocs &&
+ relo->kind != BPF_FIELD_EXISTS) {
+ pr_warn("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n",
+ prog_name, relo_idx, local_id, local_name, spec_str);
+ return -ESRCH;
+ }
+
+ /* bpf_core_reloc_insn should know how to handle missing targ_spec */
+ err = bpf_core_reloc_insn(prog, relo, &local_spec,
+ j ? &targ_spec : NULL);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
+ prog_name, relo_idx, relo->insn_off, err);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
+{
+ const struct btf_ext_info_sec *sec;
+ const struct bpf_field_reloc *rec;
+ const struct btf_ext_info *seg;
+ struct hashmap_entry *entry;
+ struct hashmap *cand_cache = NULL;
+ struct bpf_program *prog;
+ struct btf *targ_btf;
+ const char *sec_name;
+ int i, err = 0;
+
+ if (targ_btf_path)
+ targ_btf = btf__parse_elf(targ_btf_path, NULL);
+ else
+ targ_btf = bpf_core_find_kernel_btf();
+ if (IS_ERR(targ_btf)) {
+ pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
+ return PTR_ERR(targ_btf);
+ }
+
+ cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
+ if (IS_ERR(cand_cache)) {
+ err = PTR_ERR(cand_cache);
+ goto out;
+ }
+
+ seg = &obj->btf_ext->field_reloc_info;
+ for_each_btf_ext_sec(seg, sec) {
+ sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
+ if (str_is_empty(sec_name)) {
+ err = -EINVAL;
+ goto out;
+ }
+ prog = bpf_object__find_program_by_title(obj, sec_name);
+ if (!prog) {
+ pr_warn("failed to find program '%s' for CO-RE offset relocation\n",
+ sec_name);
+ err = -EINVAL;
+ goto out;
+ }
+
+ pr_debug("prog '%s': performing %d CO-RE offset relocs\n",
+ sec_name, sec->num_info);
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = bpf_core_reloc_field(prog, rec, i, obj->btf,
+ targ_btf, cand_cache);
+ if (err) {
+ pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
+ sec_name, i, err);
+ goto out;
+ }
+ }
+ }
+
+out:
+ btf__free(targ_btf);
+ if (!IS_ERR_OR_NULL(cand_cache)) {
+ hashmap__for_each_entry(cand_cache, entry, i) {
+ bpf_core_free_cands(entry->value);
+ }
+ hashmap__free(cand_cache);
+ }
+ return err;
+}
+
+static int
+bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
+{
+ int err = 0;
+
+ if (obj->btf_ext->field_reloc_info.len)
+ err = bpf_core_reloc_fields(obj, targ_btf_path);
+
+ return err;
+}
+
+static int
+bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
+ struct reloc_desc *relo)
+{
+ struct bpf_insn *insn, *new_insn;
+ struct bpf_program *text;
+ size_t new_cnt;
+ int err;
+
+ if (relo->type != RELO_CALL)
+ return -LIBBPF_ERRNO__RELOC;
+
+ if (prog->idx == obj->efile.text_shndx) {
+ pr_warn("relo in .text insn %d into off %d (insn #%d)\n",
+ relo->insn_idx, relo->sym_off, relo->sym_off / 8);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ if (prog->main_prog_cnt == 0) {
+ text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
+ if (!text) {
+ pr_warn("no .text section found yet relo into text exist\n");
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ new_cnt = prog->insns_cnt + text->insns_cnt;
+ new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));
+ if (!new_insn) {
+ pr_warn("oom in prog realloc\n");
+ return -ENOMEM;
+ }
+ prog->insns = new_insn;
+
+ if (obj->btf_ext) {
+ err = bpf_program_reloc_btf_ext(prog, obj,
+ text->section_name,
+ prog->insns_cnt);
+ if (err)
+ return err;
+ }
+
+ memcpy(new_insn + prog->insns_cnt, text->insns,
+ text->insns_cnt * sizeof(*insn));
+ prog->main_prog_cnt = prog->insns_cnt;
+ prog->insns_cnt = new_cnt;
+ pr_debug("added %zd insn from %s to prog %s\n",
+ text->insns_cnt, text->section_name,
+ prog->section_name);
+ }
+ insn = &prog->insns[relo->insn_idx];
+ insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx;
+ return 0;
+}
+
+static int
+bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
+{
+ int i, err;
+
+ if (!prog)
+ return 0;
+
+ if (obj->btf_ext) {
+ err = bpf_program_reloc_btf_ext(prog, obj,
+ prog->section_name, 0);
+ if (err)
+ return err;
+ }
+
+ if (!prog->reloc_desc)
+ return 0;
+
+ for (i = 0; i < prog->nr_reloc; i++) {
+ struct reloc_desc *relo = &prog->reloc_desc[i];
+
+ if (relo->type == RELO_LD64 || relo->type == RELO_DATA) {
+ struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+
+ if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
+ pr_warn("relocation out of range: '%s'\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ if (relo->type != RELO_DATA) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_FD;
+ } else {
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[1].imm = insn[0].imm + relo->sym_off;
+ }
+ insn[0].imm = obj->maps[relo->map_idx].fd;
+ } else if (relo->type == RELO_CALL) {
+ err = bpf_program__reloc_text(prog, obj, relo);
+ if (err)
+ return err;
+ }
+ }
+
+ zfree(&prog->reloc_desc);
+ prog->nr_reloc = 0;
+ return 0;
+}
+
+static int
+bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
+{
+ struct bpf_program *prog;
+ size_t i;
+ int err;
+
+ if (obj->btf_ext) {
+ err = bpf_object__relocate_core(obj, targ_btf_path);
+ if (err) {
+ pr_warn("failed to perform CO-RE relocations: %d\n",
+ err);
+ return err;
+ }
+ }
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+
+ err = bpf_program__relocate(prog, obj);
+ if (err) {
+ pr_warn("failed to relocate '%s'\n", prog->section_name);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static int bpf_object__collect_reloc(struct bpf_object *obj)
+{
+ int i, err;
+
+ if (!obj_elf_valid(obj)) {
+ pr_warn("Internal error: elf object is closed\n");
+ return -LIBBPF_ERRNO__INTERNAL;
+ }
+
+ for (i = 0; i < obj->efile.nr_reloc_sects; i++) {
+ GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr;
+ Elf_Data *data = obj->efile.reloc_sects[i].data;
+ int idx = shdr->sh_info;
+ struct bpf_program *prog;
+
+ if (shdr->sh_type != SHT_REL) {
+ pr_warn("internal error at %d\n", __LINE__);
+ return -LIBBPF_ERRNO__INTERNAL;
+ }
+
+ prog = bpf_object__find_prog_by_idx(obj, idx);
+ if (!prog) {
+ pr_warn("relocation failed: no section(%d)\n", idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ err = bpf_program__collect_reloc(prog, shdr, data, obj);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int
+load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
+ char *license, __u32 kern_version, int *pfd)
+{
+ struct bpf_load_program_attr load_attr;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int log_buf_size = BPF_LOG_BUF_SIZE;
+ char *log_buf;
+ int btf_fd, ret;
+
+ if (!insns || !insns_cnt)
+ return -EINVAL;
+
+ memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+ load_attr.prog_type = prog->type;
+ load_attr.expected_attach_type = prog->expected_attach_type;
+ if (prog->caps->name)
+ load_attr.name = prog->name;
+ load_attr.insns = insns;
+ load_attr.insns_cnt = insns_cnt;
+ load_attr.license = license;
+ if (prog->type == BPF_PROG_TYPE_TRACING) {
+ load_attr.attach_prog_fd = prog->attach_prog_fd;
+ load_attr.attach_btf_id = prog->attach_btf_id;
+ } else {
+ load_attr.kern_version = kern_version;
+ load_attr.prog_ifindex = prog->prog_ifindex;
+ }
+ /* if .BTF.ext was loaded, kernel supports associated BTF for prog */
+ if (prog->obj->btf_ext)
+ btf_fd = bpf_object__btf_fd(prog->obj);
+ else
+ btf_fd = -1;
+ load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0;
+ load_attr.func_info = prog->func_info;
+ load_attr.func_info_rec_size = prog->func_info_rec_size;
+ load_attr.func_info_cnt = prog->func_info_cnt;
+ load_attr.line_info = prog->line_info;
+ load_attr.line_info_rec_size = prog->line_info_rec_size;
+ load_attr.line_info_cnt = prog->line_info_cnt;
+ load_attr.log_level = prog->log_level;
+ load_attr.prog_flags = prog->prog_flags;
+
+retry_load:
+ log_buf = malloc(log_buf_size);
+ if (!log_buf)
+ pr_warn("Alloc log buffer for bpf loader error, continue without log\n");
+
+ ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
+
+ if (ret >= 0) {
+ if (load_attr.log_level)
+ pr_debug("verifier log:\n%s", log_buf);
+ *pfd = ret;
+ ret = 0;
+ goto out;
+ }
+
+ if (errno == ENOSPC) {
+ log_buf_size <<= 1;
+ free(log_buf);
+ goto retry_load;
+ }
+ ret = -errno;
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("load bpf program failed: %s\n", cp);
+
+ if (log_buf && log_buf[0] != '\0') {
+ ret = -LIBBPF_ERRNO__VERIFY;
+ pr_warn("-- BEGIN DUMP LOG ---\n");
+ pr_warn("\n%s\n", log_buf);
+ pr_warn("-- END LOG --\n");
+ } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
+ pr_warn("Program too large (%zu insns), at most %d insns\n",
+ load_attr.insns_cnt, BPF_MAXINSNS);
+ ret = -LIBBPF_ERRNO__PROG2BIG;
+ } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
+ /* Wrong program type? */
+ int fd;
+
+ load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
+ load_attr.expected_attach_type = 0;
+ fd = bpf_load_program_xattr(&load_attr, NULL, 0);
+ if (fd >= 0) {
+ close(fd);
+ ret = -LIBBPF_ERRNO__PROGTYPE;
+ goto out;
+ }
+ }
+
+out:
+ free(log_buf);
+ return ret;
+}
+
+int
+bpf_program__load(struct bpf_program *prog,
+ char *license, __u32 kern_version)
+{
+ int err = 0, fd, i;
+
+ if (prog->instances.nr < 0 || !prog->instances.fds) {
+ if (prog->preprocessor) {
+ pr_warn("Internal error: can't load program '%s'\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__INTERNAL;
+ }
+
+ prog->instances.fds = malloc(sizeof(int));
+ if (!prog->instances.fds) {
+ pr_warn("Not enough memory for BPF fds\n");
+ return -ENOMEM;
+ }
+ prog->instances.nr = 1;
+ prog->instances.fds[0] = -1;
+ }
+
+ if (!prog->preprocessor) {
+ if (prog->instances.nr != 1) {
+ pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n",
+ prog->section_name, prog->instances.nr);
+ }
+ err = load_program(prog, prog->insns, prog->insns_cnt,
+ license, kern_version, &fd);
+ if (!err)
+ prog->instances.fds[0] = fd;
+ goto out;
+ }
+
+ for (i = 0; i < prog->instances.nr; i++) {
+ struct bpf_prog_prep_result result;
+ bpf_program_prep_t preprocessor = prog->preprocessor;
+
+ memset(&result, 0, sizeof(result));
+ err = preprocessor(prog, i, prog->insns,
+ prog->insns_cnt, &result);
+ if (err) {
+ pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
+ i, prog->section_name);
+ goto out;
+ }
+
+ if (!result.new_insn_ptr || !result.new_insn_cnt) {
+ pr_debug("Skip loading the %dth instance of program '%s'\n",
+ i, prog->section_name);
+ prog->instances.fds[i] = -1;
+ if (result.pfd)
+ *result.pfd = -1;
+ continue;
+ }
+
+ err = load_program(prog, result.new_insn_ptr,
+ result.new_insn_cnt,
+ license, kern_version, &fd);
+
+ if (err) {
+ pr_warn("Loading the %dth instance of program '%s' failed\n",
+ i, prog->section_name);
+ goto out;
+ }
+
+ if (result.pfd)
+ *result.pfd = fd;
+ prog->instances.fds[i] = fd;
+ }
+out:
+ if (err)
+ pr_warn("failed to load program '%s'\n", prog->section_name);
+ zfree(&prog->insns);
+ prog->insns_cnt = 0;
+ return err;
+}
+
+static bool bpf_program__is_function_storage(const struct bpf_program *prog,
+ const struct bpf_object *obj)
+{
+ return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
+}
+
+static int
+bpf_object__load_progs(struct bpf_object *obj, int log_level)
+{
+ size_t i;
+ int err;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ if (bpf_program__is_function_storage(&obj->programs[i], obj))
+ continue;
+ obj->programs[i].log_level |= log_level;
+ err = bpf_program__load(&obj->programs[i],
+ obj->license,
+ obj->kern_version);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int libbpf_find_attach_btf_id(const char *name,
+ enum bpf_attach_type attach_type,
+ __u32 attach_prog_fd);
+static struct bpf_object *
+__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
+ struct bpf_object_open_opts *opts)
+{
+ const char *pin_root_path;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ const char *obj_name;
+ char tmp_name[64];
+ bool relaxed_maps;
+ __u32 attach_prog_fd;
+ int err;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ pr_warn("failed to init libelf for %s\n",
+ path ? : "(mem buf)");
+ return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
+ }
+
+ if (!OPTS_VALID(opts, bpf_object_open_opts))
+ return ERR_PTR(-EINVAL);
+
+ obj_name = OPTS_GET(opts, object_name, NULL);
+ if (obj_buf) {
+ if (!obj_name) {
+ snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
+ (unsigned long)obj_buf,
+ (unsigned long)obj_buf_sz);
+ obj_name = tmp_name;
+ }
+ path = obj_name;
+ pr_debug("loading object '%s' from buffer\n", obj_name);
+ }
+
+ obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
+ if (IS_ERR(obj))
+ return obj;
+
+ obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false);
+ relaxed_maps = OPTS_GET(opts, relaxed_maps, false);
+ pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
+ attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
+
+ CHECK_ERR(bpf_object__elf_init(obj), err, out);
+ CHECK_ERR(bpf_object__check_endianness(obj), err, out);
+ CHECK_ERR(bpf_object__probe_caps(obj), err, out);
+ CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path),
+ err, out);
+ CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
+ bpf_object__elf_finish(obj);
+
+ bpf_object__for_each_program(prog, obj) {
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type attach_type;
+
+ err = libbpf_prog_type_by_name(prog->section_name, &prog_type,
+ &attach_type);
+ if (err == -ESRCH)
+ /* couldn't guess, but user might manually specify */
+ continue;
+ if (err)
+ goto out;
+
+ bpf_program__set_type(prog, prog_type);
+ bpf_program__set_expected_attach_type(prog, attach_type);
+ if (prog_type == BPF_PROG_TYPE_TRACING) {
+ err = libbpf_find_attach_btf_id(prog->section_name,
+ attach_type,
+ attach_prog_fd);
+ if (err <= 0)
+ goto out;
+ prog->attach_btf_id = err;
+ prog->attach_prog_fd = attach_prog_fd;
+ }
+ }
+
+ return obj;
+out:
+ bpf_object__close(obj);
+ return ERR_PTR(err);
+}
+
+static struct bpf_object *
+__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
+{
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .relaxed_maps = flags & MAPS_RELAX_COMPAT,
+ );
+
+ /* param validation */
+ if (!attr->file)
+ return NULL;
+
+ pr_debug("loading %s\n", attr->file);
+ return __bpf_object__open(attr->file, NULL, 0, &opts);
+}
+
+struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
+{
+ return __bpf_object__open_xattr(attr, 0);
+}
+
+struct bpf_object *bpf_object__open(const char *path)
+{
+ struct bpf_object_open_attr attr = {
+ .file = path,
+ .prog_type = BPF_PROG_TYPE_UNSPEC,
+ };
+
+ return bpf_object__open_xattr(&attr);
+}
+
+struct bpf_object *
+bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts)
+{
+ if (!path)
+ return ERR_PTR(-EINVAL);
+
+ pr_debug("loading %s\n", path);
+
+ return __bpf_object__open(path, NULL, 0, opts);
+}
+
+struct bpf_object *
+bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
+ struct bpf_object_open_opts *opts)
+{
+ if (!obj_buf || obj_buf_sz == 0)
+ return ERR_PTR(-EINVAL);
+
+ return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
+}
+
+struct bpf_object *
+bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
+ const char *name)
+{
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .object_name = name,
+ /* wrong default, but backwards-compatible */
+ .relaxed_maps = true,
+ );
+
+ /* returning NULL is wrong, but backwards-compatible */
+ if (!obj_buf || obj_buf_sz == 0)
+ return NULL;
+
+ return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
+}
+
+int bpf_object__unload(struct bpf_object *obj)
+{
+ size_t i;
+
+ if (!obj)
+ return -EINVAL;
+
+ for (i = 0; i < obj->nr_maps; i++)
+ zclose(obj->maps[i].fd);
+
+ for (i = 0; i < obj->nr_programs; i++)
+ bpf_program__unload(&obj->programs[i]);
+
+ return 0;
+}
+
+int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
+{
+ struct bpf_object *obj;
+ int err, i;
+
+ if (!attr)
+ return -EINVAL;
+ obj = attr->obj;
+ if (!obj)
+ return -EINVAL;
+
+ if (obj->loaded) {
+ pr_warn("object should not be loaded twice\n");
+ return -EINVAL;
+ }
+
+ obj->loaded = true;
+
+ CHECK_ERR(bpf_object__create_maps(obj), err, out);
+ CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out);
+ CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
+
+ return 0;
+out:
+ /* unpin any maps that were auto-pinned during load */
+ for (i = 0; i < obj->nr_maps; i++)
+ if (obj->maps[i].pinned && !obj->maps[i].reused)
+ bpf_map__unpin(&obj->maps[i], NULL);
+
+ bpf_object__unload(obj);
+ pr_warn("failed to load object '%s'\n", obj->path);
+ return err;
+}
+
+int bpf_object__load(struct bpf_object *obj)
+{
+ struct bpf_object_load_attr attr = {
+ .obj = obj,
+ };
+
+ return bpf_object__load_xattr(&attr);
+}
+
+static int make_parent_dir(const char *path)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ char *dname, *dir;
+ int err = 0;
+
+ dname = strdup(path);
+ if (dname == NULL)
+ return -ENOMEM;
+
+ dir = dirname(dname);
+ if (mkdir(dir, 0700) && errno != EEXIST)
+ err = -errno;
+
+ free(dname);
+ if (err) {
+ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
+ pr_warn("failed to mkdir %s: %s\n", path, cp);
+ }
+ return err;
+}
+
+static int check_path(const char *path)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct statfs st_fs;
+ char *dname, *dir;
+ int err = 0;
+
+ if (path == NULL)
+ return -EINVAL;
+
+ dname = strdup(path);
+ if (dname == NULL)
+ return -ENOMEM;
+
+ dir = dirname(dname);
+ if (statfs(dir, &st_fs)) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("failed to statfs %s: %s\n", dir, cp);
+ err = -errno;
+ }
+ free(dname);
+
+ if (!err && st_fs.f_type != BPF_FS_MAGIC) {
+ pr_warn("specified path %s is not on BPF FS\n", path);
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
+ int instance)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err;
+
+ err = make_parent_dir(path);
+ if (err)
+ return err;
+
+ err = check_path(path);
+ if (err)
+ return err;
+
+ if (prog == NULL) {
+ pr_warn("invalid program pointer\n");
+ return -EINVAL;
+ }
+
+ if (instance < 0 || instance >= prog->instances.nr) {
+ pr_warn("invalid prog instance %d of prog %s (max %d)\n",
+ instance, prog->section_name, prog->instances.nr);
+ return -EINVAL;
+ }
+
+ if (bpf_obj_pin(prog->instances.fds[instance], path)) {
+ cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ pr_warn("failed to pin program: %s\n", cp);
+ return -errno;
+ }
+ pr_debug("pinned program '%s'\n", path);
+
+ return 0;
+}
+
+int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
+ int instance)
+{
+ int err;
+
+ err = check_path(path);
+ if (err)
+ return err;
+
+ if (prog == NULL) {
+ pr_warn("invalid program pointer\n");
+ return -EINVAL;
+ }
+
+ if (instance < 0 || instance >= prog->instances.nr) {
+ pr_warn("invalid prog instance %d of prog %s (max %d)\n",
+ instance, prog->section_name, prog->instances.nr);
+ return -EINVAL;
+ }
+
+ err = unlink(path);
+ if (err != 0)
+ return -errno;
+ pr_debug("unpinned program '%s'\n", path);
+
+ return 0;
+}
+
+int bpf_program__pin(struct bpf_program *prog, const char *path)
+{
+ int i, err;
+
+ err = make_parent_dir(path);
+ if (err)
+ return err;
+
+ err = check_path(path);
+ if (err)
+ return err;
+
+ if (prog == NULL) {
+ pr_warn("invalid program pointer\n");
+ return -EINVAL;
+ }
+
+ if (prog->instances.nr <= 0) {
+ pr_warn("no instances of prog %s to pin\n",
+ prog->section_name);
+ return -EINVAL;
+ }
+
+ if (prog->instances.nr == 1) {
+ /* don't create subdirs when pinning single instance */
+ return bpf_program__pin_instance(prog, path, 0);
+ }
+
+ for (i = 0; i < prog->instances.nr; i++) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
+ if (len < 0) {
+ err = -EINVAL;
+ goto err_unpin;
+ } else if (len >= PATH_MAX) {
+ err = -ENAMETOOLONG;
+ goto err_unpin;
+ }
+
+ err = bpf_program__pin_instance(prog, buf, i);
+ if (err)
+ goto err_unpin;
+ }
+
+ return 0;
+
+err_unpin:
+ for (i = i - 1; i >= 0; i--) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
+ if (len < 0)
+ continue;
+ else if (len >= PATH_MAX)
+ continue;
+
+ bpf_program__unpin_instance(prog, buf, i);
+ }
+
+ rmdir(path);
+
+ return err;
+}
+
+int bpf_program__unpin(struct bpf_program *prog, const char *path)
+{
+ int i, err;
+
+ err = check_path(path);
+ if (err)
+ return err;
+
+ if (prog == NULL) {
+ pr_warn("invalid program pointer\n");
+ return -EINVAL;
+ }
+
+ if (prog->instances.nr <= 0) {
+ pr_warn("no instances of prog %s to pin\n",
+ prog->section_name);
+ return -EINVAL;
+ }
+
+ if (prog->instances.nr == 1) {
+ /* don't create subdirs when pinning single instance */
+ return bpf_program__unpin_instance(prog, path, 0);
+ }
+
+ for (i = 0; i < prog->instances.nr; i++) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
+ if (len < 0)
+ return -EINVAL;
+ else if (len >= PATH_MAX)
+ return -ENAMETOOLONG;
+
+ err = bpf_program__unpin_instance(prog, buf, i);
+ if (err)
+ return err;
+ }
+
+ err = rmdir(path);
+ if (err)
+ return -errno;
+
+ return 0;
+}
+
+int bpf_map__pin(struct bpf_map *map, const char *path)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err;
+
+ if (map == NULL) {
+ pr_warn("invalid map pointer\n");
+ return -EINVAL;
+ }
+
+ if (map->pin_path) {
+ if (path && strcmp(path, map->pin_path)) {
+ pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
+ bpf_map__name(map), map->pin_path, path);
+ return -EINVAL;
+ } else if (map->pinned) {
+ pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
+ bpf_map__name(map), map->pin_path);
+ return 0;
+ }
+ } else {
+ if (!path) {
+ pr_warn("missing a path to pin map '%s' at\n",
+ bpf_map__name(map));
+ return -EINVAL;
+ } else if (map->pinned) {
+ pr_warn("map '%s' already pinned\n", bpf_map__name(map));
+ return -EEXIST;
+ }
+
+ map->pin_path = strdup(path);
+ if (!map->pin_path) {
+ err = -errno;
+ goto out_err;
+ }
+ }
+
+ err = make_parent_dir(map->pin_path);
+ if (err)
+ return err;
+
+ err = check_path(map->pin_path);
+ if (err)
+ return err;
+
+ if (bpf_obj_pin(map->fd, map->pin_path)) {
+ err = -errno;
+ goto out_err;
+ }
+
+ map->pinned = true;
+ pr_debug("pinned map '%s'\n", map->pin_path);
+
+ return 0;
+
+out_err:
+ cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
+ pr_warn("failed to pin map: %s\n", cp);
+ return err;
+}
+
+int bpf_map__unpin(struct bpf_map *map, const char *path)
+{
+ int err;
+
+ if (map == NULL) {
+ pr_warn("invalid map pointer\n");
+ return -EINVAL;
+ }
+
+ if (map->pin_path) {
+ if (path && strcmp(path, map->pin_path)) {
+ pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
+ bpf_map__name(map), map->pin_path, path);
+ return -EINVAL;
+ }
+ path = map->pin_path;
+ } else if (!path) {
+ pr_warn("no path to unpin map '%s' from\n",
+ bpf_map__name(map));
+ return -EINVAL;
+ }
+
+ err = check_path(path);
+ if (err)
+ return err;
+
+ err = unlink(path);
+ if (err != 0)
+ return -errno;
+
+ map->pinned = false;
+ pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
+
+ return 0;
+}
+
+int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
+{
+ char *new = NULL;
+
+ if (path) {
+ new = strdup(path);
+ if (!new)
+ return -errno;
+ }
+
+ free(map->pin_path);
+ map->pin_path = new;
+ return 0;
+}
+
+const char *bpf_map__get_pin_path(const struct bpf_map *map)
+{
+ return map->pin_path;
+}
+
+bool bpf_map__is_pinned(const struct bpf_map *map)
+{
+ return map->pinned;
+}
+
+int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
+{
+ struct bpf_map *map;
+ int err;
+
+ if (!obj)
+ return -ENOENT;
+
+ if (!obj->loaded) {
+ pr_warn("object not yet loaded; load it first\n");
+ return -ENOENT;
+ }
+
+ bpf_object__for_each_map(map, obj) {
+ char *pin_path = NULL;
+ char buf[PATH_MAX];
+
+ if (path) {
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
+ bpf_map__name(map));
+ if (len < 0) {
+ err = -EINVAL;
+ goto err_unpin_maps;
+ } else if (len >= PATH_MAX) {
+ err = -ENAMETOOLONG;
+ goto err_unpin_maps;
+ }
+ pin_path = buf;
+ } else if (!map->pin_path) {
+ continue;
+ }
+
+ err = bpf_map__pin(map, pin_path);
+ if (err)
+ goto err_unpin_maps;
+ }
+
+ return 0;
+
+err_unpin_maps:
+ while ((map = bpf_map__prev(map, obj))) {
+ if (!map->pin_path)
+ continue;
+
+ bpf_map__unpin(map, NULL);
+ }
+
+ return err;
+}
+
+int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
+{
+ struct bpf_map *map;
+ int err;
+
+ if (!obj)
+ return -ENOENT;
+
+ bpf_object__for_each_map(map, obj) {
+ char *pin_path = NULL;
+ char buf[PATH_MAX];
+
+ if (path) {
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
+ bpf_map__name(map));
+ if (len < 0)
+ return -EINVAL;
+ else if (len >= PATH_MAX)
+ return -ENAMETOOLONG;
+ pin_path = buf;
+ } else if (!map->pin_path) {
+ continue;
+ }
+
+ err = bpf_map__unpin(map, pin_path);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
+{
+ struct bpf_program *prog;
+ int err;
+
+ if (!obj)
+ return -ENOENT;
+
+ if (!obj->loaded) {
+ pr_warn("object not yet loaded; load it first\n");
+ return -ENOENT;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
+ prog->pin_name);
+ if (len < 0) {
+ err = -EINVAL;
+ goto err_unpin_programs;
+ } else if (len >= PATH_MAX) {
+ err = -ENAMETOOLONG;
+ goto err_unpin_programs;
+ }
+
+ err = bpf_program__pin(prog, buf);
+ if (err)
+ goto err_unpin_programs;
+ }
+
+ return 0;
+
+err_unpin_programs:
+ while ((prog = bpf_program__prev(prog, obj))) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
+ prog->pin_name);
+ if (len < 0)
+ continue;
+ else if (len >= PATH_MAX)
+ continue;
+
+ bpf_program__unpin(prog, buf);
+ }
+
+ return err;
+}
+
+int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
+{
+ struct bpf_program *prog;
+ int err;
+
+ if (!obj)
+ return -ENOENT;
+
+ bpf_object__for_each_program(prog, obj) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
+ prog->pin_name);
+ if (len < 0)
+ return -EINVAL;
+ else if (len >= PATH_MAX)
+ return -ENAMETOOLONG;
+
+ err = bpf_program__unpin(prog, buf);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int bpf_object__pin(struct bpf_object *obj, const char *path)
+{
+ int err;
+
+ err = bpf_object__pin_maps(obj, path);
+ if (err)
+ return err;
+
+ err = bpf_object__pin_programs(obj, path);
+ if (err) {
+ bpf_object__unpin_maps(obj, path);
+ return err;
+ }
+
+ return 0;
+}
+
+void bpf_object__close(struct bpf_object *obj)
+{
+ size_t i;
+
+ if (!obj)
+ return;
+
+ if (obj->clear_priv)
+ obj->clear_priv(obj, obj->priv);
+
+ bpf_object__elf_finish(obj);
+ bpf_object__unload(obj);
+ btf__free(obj->btf);
+ btf_ext__free(obj->btf_ext);
+
+ for (i = 0; i < obj->nr_maps; i++) {
+ zfree(&obj->maps[i].name);
+ zfree(&obj->maps[i].pin_path);
+ if (obj->maps[i].clear_priv)
+ obj->maps[i].clear_priv(&obj->maps[i],
+ obj->maps[i].priv);
+ obj->maps[i].priv = NULL;
+ obj->maps[i].clear_priv = NULL;
+ }
+
+ zfree(&obj->sections.rodata);
+ zfree(&obj->sections.data);
+ zfree(&obj->maps);
+ obj->nr_maps = 0;
+
+ if (obj->programs && obj->nr_programs) {
+ for (i = 0; i < obj->nr_programs; i++)
+ bpf_program__exit(&obj->programs[i]);
+ }
+ zfree(&obj->programs);
+
+ list_del(&obj->list);
+ free(obj);
+}
+
+struct bpf_object *
+bpf_object__next(struct bpf_object *prev)
+{
+ struct bpf_object *next;
+
+ if (!prev)
+ next = list_first_entry(&bpf_objects_list,
+ struct bpf_object,
+ list);
+ else
+ next = list_next_entry(prev, list);
+
+ /* Empty list is noticed here so don't need checking on entry. */
+ if (&next->list == &bpf_objects_list)
+ return NULL;
+
+ return next;
+}
+
+const char *bpf_object__name(const struct bpf_object *obj)
+{
+ return obj ? obj->name : ERR_PTR(-EINVAL);
+}
+
+unsigned int bpf_object__kversion(const struct bpf_object *obj)
+{
+ return obj ? obj->kern_version : 0;
+}
+
+struct btf *bpf_object__btf(const struct bpf_object *obj)
+{
+ return obj ? obj->btf : NULL;
+}
+
+int bpf_object__btf_fd(const struct bpf_object *obj)
+{
+ return obj->btf ? btf__fd(obj->btf) : -1;
+}
+
+int bpf_object__set_priv(struct bpf_object *obj, void *priv,
+ bpf_object_clear_priv_t clear_priv)
+{
+ if (obj->priv && obj->clear_priv)
+ obj->clear_priv(obj, obj->priv);
+
+ obj->priv = priv;
+ obj->clear_priv = clear_priv;
+ return 0;
+}
+
+void *bpf_object__priv(const struct bpf_object *obj)
+{
+ return obj ? obj->priv : ERR_PTR(-EINVAL);
+}
+
+static struct bpf_program *
+__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
+ bool forward)
+{
+ size_t nr_programs = obj->nr_programs;
+ ssize_t idx;
+
+ if (!nr_programs)
+ return NULL;
+
+ if (!p)
+ /* Iter from the beginning */
+ return forward ? &obj->programs[0] :
+ &obj->programs[nr_programs - 1];
+
+ if (p->obj != obj) {
+ pr_warn("error: program handler doesn't match object\n");
+ return NULL;
+ }
+
+ idx = (p - obj->programs) + (forward ? 1 : -1);
+ if (idx >= obj->nr_programs || idx < 0)
+ return NULL;
+ return &obj->programs[idx];
+}
+
+struct bpf_program *
+bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
+{
+ struct bpf_program *prog = prev;
+
+ do {
+ prog = __bpf_program__iter(prog, obj, true);
+ } while (prog && bpf_program__is_function_storage(prog, obj));
+
+ return prog;
+}
+
+struct bpf_program *
+bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
+{
+ struct bpf_program *prog = next;
+
+ do {
+ prog = __bpf_program__iter(prog, obj, false);
+ } while (prog && bpf_program__is_function_storage(prog, obj));
+
+ return prog;
+}
+
+int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+ bpf_program_clear_priv_t clear_priv)
+{
+ if (prog->priv && prog->clear_priv)
+ prog->clear_priv(prog, prog->priv);
+
+ prog->priv = priv;
+ prog->clear_priv = clear_priv;
+ return 0;
+}
+
+void *bpf_program__priv(const struct bpf_program *prog)
+{
+ return prog ? prog->priv : ERR_PTR(-EINVAL);
+}
+
+void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
+{
+ prog->prog_ifindex = ifindex;
+}
+
+const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
+{
+ const char *title;
+
+ title = prog->section_name;
+ if (needs_copy) {
+ title = strdup(title);
+ if (!title) {
+ pr_warn("failed to strdup program title\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+
+ return title;
+}
+
+int bpf_program__fd(const struct bpf_program *prog)
+{
+ return bpf_program__nth_fd(prog, 0);
+}
+
+size_t bpf_program__size(const struct bpf_program *prog)
+{
+ return prog->insns_cnt * sizeof(struct bpf_insn);
+}
+
+int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
+ bpf_program_prep_t prep)
+{
+ int *instances_fds;
+
+ if (nr_instances <= 0 || !prep)
+ return -EINVAL;
+
+ if (prog->instances.nr > 0 || prog->instances.fds) {
+ pr_warn("Can't set pre-processor after loading\n");
+ return -EINVAL;
+ }
+
+ instances_fds = malloc(sizeof(int) * nr_instances);
+ if (!instances_fds) {
+ pr_warn("alloc memory failed for fds\n");
+ return -ENOMEM;
+ }
+
+ /* fill all fd with -1 */
+ memset(instances_fds, -1, sizeof(int) * nr_instances);
+
+ prog->instances.nr = nr_instances;
+ prog->instances.fds = instances_fds;
+ prog->preprocessor = prep;
+ return 0;
+}
+
+int bpf_program__nth_fd(const struct bpf_program *prog, int n)
+{
+ int fd;
+
+ if (!prog)
+ return -EINVAL;
+
+ if (n >= prog->instances.nr || n < 0) {
+ pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
+ n, prog->section_name, prog->instances.nr);
+ return -EINVAL;
+ }
+
+ fd = prog->instances.fds[n];
+ if (fd < 0) {
+ pr_warn("%dth instance of program '%s' is invalid\n",
+ n, prog->section_name);
+ return -ENOENT;
+ }
+
+ return fd;
+}
+
+enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
+{
+ return prog->type;
+}
+
+void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
+{
+ prog->type = type;
+}
+
+static bool bpf_program__is_type(const struct bpf_program *prog,
+ enum bpf_prog_type type)
+{
+ return prog ? (prog->type == type) : false;
+}
+
+#define BPF_PROG_TYPE_FNS(NAME, TYPE) \
+int bpf_program__set_##NAME(struct bpf_program *prog) \
+{ \
+ if (!prog) \
+ return -EINVAL; \
+ bpf_program__set_type(prog, TYPE); \
+ return 0; \
+} \
+ \
+bool bpf_program__is_##NAME(const struct bpf_program *prog) \
+{ \
+ return bpf_program__is_type(prog, TYPE); \
+} \
+
+BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
+BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
+BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
+BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
+BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
+BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
+BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
+BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
+BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
+
+enum bpf_attach_type
+bpf_program__get_expected_attach_type(struct bpf_program *prog)
+{
+ return prog->expected_attach_type;
+}
+
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+ enum bpf_attach_type type)
+{
+ prog->expected_attach_type = type;
+}
+
+#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, btf, atype) \
+ { string, sizeof(string) - 1, ptype, eatype, is_attachable, btf, atype }
+
+/* Programs that can NOT be attached. */
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
+
+/* Programs that can be attached. */
+#define BPF_APROG_SEC(string, ptype, atype) \
+ BPF_PROG_SEC_IMPL(string, ptype, 0, 1, 0, atype)
+
+/* Programs that must specify expected attach type at load time. */
+#define BPF_EAPROG_SEC(string, ptype, eatype) \
+ BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype)
+
+/* Programs that use BTF to identify attach point */
+#define BPF_PROG_BTF(string, ptype, eatype) \
+ BPF_PROG_SEC_IMPL(string, ptype, eatype, 0, 1, 0)
+
+/* Programs that can be attached but attach type can't be identified by section
+ * name. Kept for backward compatibility.
+ */
+#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
+
+static const struct {
+ const char *sec;
+ size_t len;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ bool is_attachable;
+ bool is_attach_btf;
+ enum bpf_attach_type attach_type;
+} section_names[] = {
+ BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
+ BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE),
+ BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE),
+ BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE),
+ BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE),
+ BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS),
+ BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT),
+ BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT),
+ BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT),
+ BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
+ BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT),
+ BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING,
+ BPF_TRACE_RAW_TP),
+ BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING,
+ BPF_TRACE_FENTRY),
+ BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING,
+ BPF_TRACE_FEXIT),
+ BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
+ BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
+ BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
+ BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT),
+ BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT),
+ BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL),
+ BPF_APROG_SEC("cgroup_skb/ingress", BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_CGROUP_INET_INGRESS),
+ BPF_APROG_SEC("cgroup_skb/egress", BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_CGROUP_INET_EGRESS),
+ BPF_APROG_COMPAT("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB),
+ BPF_APROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_CGROUP_INET_SOCK_CREATE),
+ BPF_EAPROG_SEC("cgroup/post_bind4", BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_CGROUP_INET4_POST_BIND),
+ BPF_EAPROG_SEC("cgroup/post_bind6", BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_CGROUP_INET6_POST_BIND),
+ BPF_APROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE,
+ BPF_CGROUP_DEVICE),
+ BPF_APROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS,
+ BPF_CGROUP_SOCK_OPS),
+ BPF_APROG_SEC("sk_skb/stream_parser", BPF_PROG_TYPE_SK_SKB,
+ BPF_SK_SKB_STREAM_PARSER),
+ BPF_APROG_SEC("sk_skb/stream_verdict", BPF_PROG_TYPE_SK_SKB,
+ BPF_SK_SKB_STREAM_VERDICT),
+ BPF_APROG_COMPAT("sk_skb", BPF_PROG_TYPE_SK_SKB),
+ BPF_APROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG,
+ BPF_SK_MSG_VERDICT),
+ BPF_APROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2,
+ BPF_LIRC_MODE2),
+ BPF_APROG_SEC("flow_dissector", BPF_PROG_TYPE_FLOW_DISSECTOR,
+ BPF_FLOW_DISSECTOR),
+ BPF_EAPROG_SEC("cgroup/bind4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET4_BIND),
+ BPF_EAPROG_SEC("cgroup/bind6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET6_BIND),
+ BPF_EAPROG_SEC("cgroup/connect4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET4_CONNECT),
+ BPF_EAPROG_SEC("cgroup/connect6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET6_CONNECT),
+ BPF_EAPROG_SEC("cgroup/sendmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_UDP4_SENDMSG),
+ BPF_EAPROG_SEC("cgroup/sendmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_UDP6_SENDMSG),
+ BPF_EAPROG_SEC("cgroup/recvmsg4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_UDP4_RECVMSG),
+ BPF_EAPROG_SEC("cgroup/recvmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_UDP6_RECVMSG),
+ BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL,
+ BPF_CGROUP_SYSCTL),
+ BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ BPF_CGROUP_GETSOCKOPT),
+ BPF_EAPROG_SEC("cgroup/setsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ BPF_CGROUP_SETSOCKOPT),
+};
+
+#undef BPF_PROG_SEC_IMPL
+#undef BPF_PROG_SEC
+#undef BPF_APROG_SEC
+#undef BPF_EAPROG_SEC
+#undef BPF_APROG_COMPAT
+
+#define MAX_TYPE_NAME_SIZE 32
+
+static char *libbpf_get_type_names(bool attach_type)
+{
+ int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE;
+ char *buf;
+
+ buf = malloc(len);
+ if (!buf)
+ return NULL;
+
+ buf[0] = '\0';
+ /* Forge string buf with all available names */
+ for (i = 0; i < ARRAY_SIZE(section_names); i++) {
+ if (attach_type && !section_names[i].is_attachable)
+ continue;
+
+ if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) {
+ free(buf);
+ return NULL;
+ }
+ strcat(buf, " ");
+ strcat(buf, section_names[i].sec);
+ }
+
+ return buf;
+}
+
+int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+ enum bpf_attach_type *expected_attach_type)
+{
+ char *type_names;
+ int i;
+
+ if (!name)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(section_names); i++) {
+ if (strncmp(name, section_names[i].sec, section_names[i].len))
+ continue;
+ *prog_type = section_names[i].prog_type;
+ *expected_attach_type = section_names[i].expected_attach_type;
+ return 0;
+ }
+ pr_warn("failed to guess program type from ELF section '%s'\n", name);
+ type_names = libbpf_get_type_names(false);
+ if (type_names != NULL) {
+ pr_info("supported section(type) names are:%s\n", type_names);
+ free(type_names);
+ }
+
+ return -ESRCH;
+}
+
+#define BTF_PREFIX "btf_trace_"
+int libbpf_find_vmlinux_btf_id(const char *name,
+ enum bpf_attach_type attach_type)
+{
+ struct btf *btf = bpf_core_find_kernel_btf();
+ char raw_tp_btf[128] = BTF_PREFIX;
+ char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1;
+ const char *btf_name;
+ int err = -EINVAL;
+ __u32 kind;
+
+ if (IS_ERR(btf)) {
+ pr_warn("vmlinux BTF is not found\n");
+ return -EINVAL;
+ }
+
+ if (attach_type == BPF_TRACE_RAW_TP) {
+ /* prepend "btf_trace_" prefix per kernel convention */
+ strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX));
+ btf_name = raw_tp_btf;
+ kind = BTF_KIND_TYPEDEF;
+ } else {
+ btf_name = name;
+ kind = BTF_KIND_FUNC;
+ }
+ err = btf__find_by_name_kind(btf, btf_name, kind);
+ btf__free(btf);
+ return err;
+}
+
+static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
+{
+ struct bpf_prog_info_linear *info_linear;
+ struct bpf_prog_info *info;
+ struct btf *btf = NULL;
+ int err = -EINVAL;
+
+ info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
+ if (IS_ERR_OR_NULL(info_linear)) {
+ pr_warn("failed get_prog_info_linear for FD %d\n",
+ attach_prog_fd);
+ return -EINVAL;
+ }
+ info = &info_linear->info;
+ if (!info->btf_id) {
+ pr_warn("The target program doesn't have BTF\n");
+ goto out;
+ }
+ if (btf__get_from_id(info->btf_id, &btf)) {
+ pr_warn("Failed to get BTF of the program\n");
+ goto out;
+ }
+ err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
+ btf__free(btf);
+ if (err <= 0) {
+ pr_warn("%s is not found in prog's BTF\n", name);
+ goto out;
+ }
+out:
+ free(info_linear);
+ return err;
+}
+
+static int libbpf_find_attach_btf_id(const char *name,
+ enum bpf_attach_type attach_type,
+ __u32 attach_prog_fd)
+{
+ int i, err;
+
+ if (!name)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(section_names); i++) {
+ if (!section_names[i].is_attach_btf)
+ continue;
+ if (strncmp(name, section_names[i].sec, section_names[i].len))
+ continue;
+ if (attach_prog_fd)
+ err = libbpf_find_prog_btf_id(name + section_names[i].len,
+ attach_prog_fd);
+ else
+ err = libbpf_find_vmlinux_btf_id(name + section_names[i].len,
+ attach_type);
+ if (err <= 0)
+ pr_warn("%s is not found in vmlinux BTF\n", name);
+ return err;
+ }
+ pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
+ return -ESRCH;
+}
+
+int libbpf_attach_type_by_name(const char *name,
+ enum bpf_attach_type *attach_type)
+{
+ char *type_names;
+ int i;
+
+ if (!name)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(section_names); i++) {
+ if (strncmp(name, section_names[i].sec, section_names[i].len))
+ continue;
+ if (!section_names[i].is_attachable)
+ return -EINVAL;
+ *attach_type = section_names[i].attach_type;
+ return 0;
+ }
+ pr_warn("failed to guess attach type based on ELF section name '%s'\n", name);
+ type_names = libbpf_get_type_names(true);
+ if (type_names != NULL) {
+ pr_info("attachable section(type) names are:%s\n", type_names);
+ free(type_names);
+ }
+
+ return -EINVAL;
+}
+
+int bpf_map__fd(const struct bpf_map *map)
+{
+ return map ? map->fd : -EINVAL;
+}
+
+const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
+{
+ return map ? &map->def : ERR_PTR(-EINVAL);
+}
+
+const char *bpf_map__name(const struct bpf_map *map)
+{
+ return map ? map->name : NULL;
+}
+
+__u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
+{
+ return map ? map->btf_key_type_id : 0;
+}
+
+__u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
+{
+ return map ? map->btf_value_type_id : 0;
+}
+
+int bpf_map__set_priv(struct bpf_map *map, void *priv,
+ bpf_map_clear_priv_t clear_priv)
+{
+ if (!map)
+ return -EINVAL;
+
+ if (map->priv) {
+ if (map->clear_priv)
+ map->clear_priv(map, map->priv);
+ }
+
+ map->priv = priv;
+ map->clear_priv = clear_priv;
+ return 0;
+}
+
+void *bpf_map__priv(const struct bpf_map *map)
+{
+ return map ? map->priv : ERR_PTR(-EINVAL);
+}
+
+bool bpf_map__is_offload_neutral(const struct bpf_map *map)
+{
+ return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
+}
+
+bool bpf_map__is_internal(const struct bpf_map *map)
+{
+ return map->libbpf_type != LIBBPF_MAP_UNSPEC;
+}
+
+void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
+{
+ map->map_ifindex = ifindex;
+}
+
+int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
+{
+ if (!bpf_map_type__is_map_in_map(map->def.type)) {
+ pr_warn("error: unsupported map type\n");
+ return -EINVAL;
+ }
+ if (map->inner_map_fd != -1) {
+ pr_warn("error: inner_map_fd already specified\n");
+ return -EINVAL;
+ }
+ map->inner_map_fd = fd;
+ return 0;
+}
+
+static struct bpf_map *
+__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
+{
+ ssize_t idx;
+ struct bpf_map *s, *e;
+
+ if (!obj || !obj->maps)
+ return NULL;
+
+ s = obj->maps;
+ e = obj->maps + obj->nr_maps;
+
+ if ((m < s) || (m >= e)) {
+ pr_warn("error in %s: map handler doesn't belong to object\n",
+ __func__);
+ return NULL;
+ }
+
+ idx = (m - obj->maps) + i;
+ if (idx >= obj->nr_maps || idx < 0)
+ return NULL;
+ return &obj->maps[idx];
+}
+
+struct bpf_map *
+bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
+{
+ if (prev == NULL)
+ return obj->maps;
+
+ return __bpf_map__iter(prev, obj, 1);
+}
+
+struct bpf_map *
+bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
+{
+ if (next == NULL) {
+ if (!obj->nr_maps)
+ return NULL;
+ return obj->maps + obj->nr_maps - 1;
+ }
+
+ return __bpf_map__iter(next, obj, -1);
+}
+
+struct bpf_map *
+bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
+{
+ struct bpf_map *pos;
+
+ bpf_object__for_each_map(pos, obj) {
+ if (pos->name && !strcmp(pos->name, name))
+ return pos;
+ }
+ return NULL;
+}
+
+int
+bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
+{
+ return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
+}
+
+struct bpf_map *
+bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
+{
+ return ERR_PTR(-ENOTSUP);
+}
+
+long libbpf_get_error(const void *ptr)
+{
+ return PTR_ERR_OR_ZERO(ptr);
+}
+
+int bpf_prog_load(const char *file, enum bpf_prog_type type,
+ struct bpf_object **pobj, int *prog_fd)
+{
+ struct bpf_prog_load_attr attr;
+
+ memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+ attr.file = file;
+ attr.prog_type = type;
+ attr.expected_attach_type = 0;
+
+ return bpf_prog_load_xattr(&attr, pobj, prog_fd);
+}
+
+int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
+ struct bpf_object **pobj, int *prog_fd)
+{
+ struct bpf_object_open_attr open_attr = {};
+ struct bpf_program *prog, *first_prog = NULL;
+ struct bpf_object *obj;
+ struct bpf_map *map;
+ int err;
+
+ if (!attr)
+ return -EINVAL;
+ if (!attr->file)
+ return -EINVAL;
+
+ open_attr.file = attr->file;
+ open_attr.prog_type = attr->prog_type;
+
+ obj = bpf_object__open_xattr(&open_attr);
+ if (IS_ERR_OR_NULL(obj))
+ return -ENOENT;
+
+ bpf_object__for_each_program(prog, obj) {
+ enum bpf_attach_type attach_type = attr->expected_attach_type;
+ /*
+ * to preserve backwards compatibility, bpf_prog_load treats
+ * attr->prog_type, if specified, as an override to whatever
+ * bpf_object__open guessed
+ */
+ if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
+ bpf_program__set_type(prog, attr->prog_type);
+ bpf_program__set_expected_attach_type(prog,
+ attach_type);
+ }
+ if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
+ /*
+ * we haven't guessed from section name and user
+ * didn't provide a fallback type, too bad...
+ */
+ bpf_object__close(obj);
+ return -EINVAL;
+ }
+
+ prog->prog_ifindex = attr->ifindex;
+ prog->log_level = attr->log_level;
+ prog->prog_flags = attr->prog_flags;
+ if (!first_prog)
+ first_prog = prog;
+ }
+
+ bpf_object__for_each_map(map, obj) {
+ if (!bpf_map__is_offload_neutral(map))
+ map->map_ifindex = attr->ifindex;
+ }
+
+ if (!first_prog) {
+ pr_warn("object file doesn't contain bpf program\n");
+ bpf_object__close(obj);
+ return -ENOENT;
+ }
+
+ err = bpf_object__load(obj);
+ if (err) {
+ bpf_object__close(obj);
+ return -EINVAL;
+ }
+
+ *pobj = obj;
+ *prog_fd = bpf_program__fd(first_prog);
+ return 0;
+}
+
+struct bpf_link {
+ int (*destroy)(struct bpf_link *link);
+};
+
+int bpf_link__destroy(struct bpf_link *link)
+{
+ int err;
+
+ if (!link)
+ return 0;
+
+ err = link->destroy(link);
+ free(link);
+
+ return err;
+}
+
+struct bpf_link_fd {
+ struct bpf_link link; /* has to be at the top of struct */
+ int fd; /* hook FD */
+};
+
+static int bpf_link__destroy_perf_event(struct bpf_link *link)
+{
+ struct bpf_link_fd *l = (void *)link;
+ int err;
+
+ err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0);
+ if (err)
+ err = -errno;
+
+ close(l->fd);
+ return err;
+}
+
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
+ int pfd)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link_fd *link;
+ int prog_fd, err;
+
+ if (pfd < 0) {
+ pr_warn("program '%s': invalid perf event FD %d\n",
+ bpf_program__title(prog, false), pfd);
+ return ERR_PTR(-EINVAL);
+ }
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n",
+ bpf_program__title(prog, false));
+ return ERR_PTR(-EINVAL);
+ }
+
+ link = malloc(sizeof(*link));
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+ link->link.destroy = &bpf_link__destroy_perf_event;
+ link->fd = pfd;
+
+ if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
+ err = -errno;
+ free(link);
+ pr_warn("program '%s': failed to attach to pfd %d: %s\n",
+ bpf_program__title(prog, false), pfd,
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ return ERR_PTR(err);
+ }
+ if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
+ err = -errno;
+ free(link);
+ pr_warn("program '%s': failed to enable pfd %d: %s\n",
+ bpf_program__title(prog, false), pfd,
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ return ERR_PTR(err);
+ }
+ return (struct bpf_link *)link;
+}
+
+/*
+ * this function is expected to parse integer in the range of [0, 2^31-1] from
+ * given file using scanf format string fmt. If actual parsed value is
+ * negative, the result might be indistinguishable from error
+ */
+static int parse_uint_from_file(const char *file, const char *fmt)
+{
+ char buf[STRERR_BUFSIZE];
+ int err, ret;
+ FILE *f;
+
+ f = fopen(file, "r");
+ if (!f) {
+ err = -errno;
+ pr_debug("failed to open '%s': %s\n", file,
+ libbpf_strerror_r(err, buf, sizeof(buf)));
+ return err;
+ }
+ err = fscanf(f, fmt, &ret);
+ if (err != 1) {
+ err = err == EOF ? -EIO : -errno;
+ pr_debug("failed to parse '%s': %s\n", file,
+ libbpf_strerror_r(err, buf, sizeof(buf)));
+ fclose(f);
+ return err;
+ }
+ fclose(f);
+ return ret;
+}
+
+static int determine_kprobe_perf_type(void)
+{
+ const char *file = "/sys/bus/event_source/devices/kprobe/type";
+
+ return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_uprobe_perf_type(void)
+{
+ const char *file = "/sys/bus/event_source/devices/uprobe/type";
+
+ return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_kprobe_retprobe_bit(void)
+{
+ const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
+
+ return parse_uint_from_file(file, "config:%d\n");
+}
+
+static int determine_uprobe_retprobe_bit(void)
+{
+ const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
+
+ return parse_uint_from_file(file, "config:%d\n");
+}
+
+static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
+ uint64_t offset, int pid)
+{
+ struct perf_event_attr attr = {};
+ char errmsg[STRERR_BUFSIZE];
+ int type, pfd, err;
+
+ type = uprobe ? determine_uprobe_perf_type()
+ : determine_kprobe_perf_type();
+ if (type < 0) {
+ pr_warn("failed to determine %s perf type: %s\n",
+ uprobe ? "uprobe" : "kprobe",
+ libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
+ return type;
+ }
+ if (retprobe) {
+ int bit = uprobe ? determine_uprobe_retprobe_bit()
+ : determine_kprobe_retprobe_bit();
+
+ if (bit < 0) {
+ pr_warn("failed to determine %s retprobe bit: %s\n",
+ uprobe ? "uprobe" : "kprobe",
+ libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
+ return bit;
+ }
+ attr.config |= 1 << bit;
+ }
+ attr.size = sizeof(attr);
+ attr.type = type;
+ attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
+ attr.config2 = offset; /* kprobe_addr or probe_offset */
+
+ /* pid filter is meaningful only for uprobes */
+ pfd = syscall(__NR_perf_event_open, &attr,
+ pid < 0 ? -1 : pid /* pid */,
+ pid == -1 ? 0 : -1 /* cpu */,
+ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+ if (pfd < 0) {
+ err = -errno;
+ pr_warn("%s perf_event_open() failed: %s\n",
+ uprobe ? "uprobe" : "kprobe",
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ return err;
+ }
+ return pfd;
+}
+
+struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
+ bool retprobe,
+ const char *func_name)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link *link;
+ int pfd, err;
+
+ pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
+ 0 /* offset */, -1 /* pid */);
+ if (pfd < 0) {
+ pr_warn("program '%s': failed to create %s '%s' perf event: %s\n",
+ bpf_program__title(prog, false),
+ retprobe ? "kretprobe" : "kprobe", func_name,
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(pfd);
+ }
+ link = bpf_program__attach_perf_event(prog, pfd);
+ if (IS_ERR(link)) {
+ close(pfd);
+ err = PTR_ERR(link);
+ pr_warn("program '%s': failed to attach to %s '%s': %s\n",
+ bpf_program__title(prog, false),
+ retprobe ? "kretprobe" : "kprobe", func_name,
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ return link;
+ }
+ return link;
+}
+
+struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
+ bool retprobe, pid_t pid,
+ const char *binary_path,
+ size_t func_offset)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link *link;
+ int pfd, err;
+
+ pfd = perf_event_open_probe(true /* uprobe */, retprobe,
+ binary_path, func_offset, pid);
+ if (pfd < 0) {
+ pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
+ bpf_program__title(prog, false),
+ retprobe ? "uretprobe" : "uprobe",
+ binary_path, func_offset,
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(pfd);
+ }
+ link = bpf_program__attach_perf_event(prog, pfd);
+ if (IS_ERR(link)) {
+ close(pfd);
+ err = PTR_ERR(link);
+ pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
+ bpf_program__title(prog, false),
+ retprobe ? "uretprobe" : "uprobe",
+ binary_path, func_offset,
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ return link;
+ }
+ return link;
+}
+
+static int determine_tracepoint_id(const char *tp_category,
+ const char *tp_name)
+{
+ char file[PATH_MAX];
+ int ret;
+
+ ret = snprintf(file, sizeof(file),
+ "/sys/kernel/debug/tracing/events/%s/%s/id",
+ tp_category, tp_name);
+ if (ret < 0)
+ return -errno;
+ if (ret >= sizeof(file)) {
+ pr_debug("tracepoint %s/%s path is too long\n",
+ tp_category, tp_name);
+ return -E2BIG;
+ }
+ return parse_uint_from_file(file, "%d\n");
+}
+
+static int perf_event_open_tracepoint(const char *tp_category,
+ const char *tp_name)
+{
+ struct perf_event_attr attr = {};
+ char errmsg[STRERR_BUFSIZE];
+ int tp_id, pfd, err;
+
+ tp_id = determine_tracepoint_id(tp_category, tp_name);
+ if (tp_id < 0) {
+ pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
+ tp_category, tp_name,
+ libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
+ return tp_id;
+ }
+
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.size = sizeof(attr);
+ attr.config = tp_id;
+
+ pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
+ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+ if (pfd < 0) {
+ err = -errno;
+ pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
+ tp_category, tp_name,
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ return err;
+ }
+ return pfd;
+}
+
+struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link *link;
+ int pfd, err;
+
+ pfd = perf_event_open_tracepoint(tp_category, tp_name);
+ if (pfd < 0) {
+ pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
+ bpf_program__title(prog, false),
+ tp_category, tp_name,
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(pfd);
+ }
+ link = bpf_program__attach_perf_event(prog, pfd);
+ if (IS_ERR(link)) {
+ close(pfd);
+ err = PTR_ERR(link);
+ pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n",
+ bpf_program__title(prog, false),
+ tp_category, tp_name,
+ libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+ return link;
+ }
+ return link;
+}
+
+static int bpf_link__destroy_fd(struct bpf_link *link)
+{
+ struct bpf_link_fd *l = (void *)link;
+
+ return close(l->fd);
+}
+
+struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
+ const char *tp_name)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link_fd *link;
+ int prog_fd, pfd;
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("program '%s': can't attach before loaded\n",
+ bpf_program__title(prog, false));
+ return ERR_PTR(-EINVAL);
+ }
+
+ link = malloc(sizeof(*link));
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+ link->link.destroy = &bpf_link__destroy_fd;
+
+ pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
+ if (pfd < 0) {
+ pfd = -errno;
+ free(link);
+ pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n",
+ bpf_program__title(prog, false), tp_name,
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(pfd);
+ }
+ link->fd = pfd;
+ return (struct bpf_link *)link;
+}
+
+struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link_fd *link;
+ int prog_fd, pfd;
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("program '%s': can't attach before loaded\n",
+ bpf_program__title(prog, false));
+ return ERR_PTR(-EINVAL);
+ }
+
+ link = malloc(sizeof(*link));
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+ link->link.destroy = &bpf_link__destroy_fd;
+
+ pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
+ if (pfd < 0) {
+ pfd = -errno;
+ free(link);
+ pr_warn("program '%s': failed to attach to trace: %s\n",
+ bpf_program__title(prog, false),
+ libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(pfd);
+ }
+ link->fd = pfd;
+ return (struct bpf_link *)link;
+}
+
+enum bpf_perf_event_ret
+bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+ void **copy_mem, size_t *copy_size,
+ bpf_perf_event_print_t fn, void *private_data)
+{
+ struct perf_event_mmap_page *header = mmap_mem;
+ __u64 data_head = ring_buffer_read_head(header);
+ __u64 data_tail = header->data_tail;
+ void *base = ((__u8 *)header) + page_size;
+ int ret = LIBBPF_PERF_EVENT_CONT;
+ struct perf_event_header *ehdr;
+ size_t ehdr_size;
+
+ while (data_head != data_tail) {
+ ehdr = base + (data_tail & (mmap_size - 1));
+ ehdr_size = ehdr->size;
+
+ if (((void *)ehdr) + ehdr_size > base + mmap_size) {
+ void *copy_start = ehdr;
+ size_t len_first = base + mmap_size - copy_start;
+ size_t len_secnd = ehdr_size - len_first;
+
+ if (*copy_size < ehdr_size) {
+ free(*copy_mem);
+ *copy_mem = malloc(ehdr_size);
+ if (!*copy_mem) {
+ *copy_size = 0;
+ ret = LIBBPF_PERF_EVENT_ERROR;
+ break;
+ }
+ *copy_size = ehdr_size;
+ }
+
+ memcpy(*copy_mem, copy_start, len_first);
+ memcpy(*copy_mem + len_first, base, len_secnd);
+ ehdr = *copy_mem;
+ }
+
+ ret = fn(ehdr, private_data);
+ data_tail += ehdr_size;
+ if (ret != LIBBPF_PERF_EVENT_CONT)
+ break;
+ }
+
+ ring_buffer_write_tail(header, data_tail);
+ return ret;
+}
+
+struct perf_buffer;
+
+struct perf_buffer_params {
+ struct perf_event_attr *attr;
+ /* if event_cb is specified, it takes precendence */
+ perf_buffer_event_fn event_cb;
+ /* sample_cb and lost_cb are higher-level common-case callbacks */
+ perf_buffer_sample_fn sample_cb;
+ perf_buffer_lost_fn lost_cb;
+ void *ctx;
+ int cpu_cnt;
+ int *cpus;
+ int *map_keys;
+};
+
+struct perf_cpu_buf {
+ struct perf_buffer *pb;
+ void *base; /* mmap()'ed memory */
+ void *buf; /* for reconstructing segmented data */
+ size_t buf_size;
+ int fd;
+ int cpu;
+ int map_key;
+};
+
+struct perf_buffer {
+ perf_buffer_event_fn event_cb;
+ perf_buffer_sample_fn sample_cb;
+ perf_buffer_lost_fn lost_cb;
+ void *ctx; /* passed into callbacks */
+
+ size_t page_size;
+ size_t mmap_size;
+ struct perf_cpu_buf **cpu_bufs;
+ struct epoll_event *events;
+ int cpu_cnt;
+ int epoll_fd; /* perf event FD */
+ int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
+};
+
+static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
+ struct perf_cpu_buf *cpu_buf)
+{
+ if (!cpu_buf)
+ return;
+ if (cpu_buf->base &&
+ munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
+ pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
+ if (cpu_buf->fd >= 0) {
+ ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
+ close(cpu_buf->fd);
+ }
+ free(cpu_buf->buf);
+ free(cpu_buf);
+}
+
+void perf_buffer__free(struct perf_buffer *pb)
+{
+ int i;
+
+ if (!pb)
+ return;
+ if (pb->cpu_bufs) {
+ for (i = 0; i < pb->cpu_cnt && pb->cpu_bufs[i]; i++) {
+ struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
+
+ bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
+ perf_buffer__free_cpu_buf(pb, cpu_buf);
+ }
+ free(pb->cpu_bufs);
+ }
+ if (pb->epoll_fd >= 0)
+ close(pb->epoll_fd);
+ free(pb->events);
+ free(pb);
+}
+
+static struct perf_cpu_buf *
+perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
+ int cpu, int map_key)
+{
+ struct perf_cpu_buf *cpu_buf;
+ char msg[STRERR_BUFSIZE];
+ int err;
+
+ cpu_buf = calloc(1, sizeof(*cpu_buf));
+ if (!cpu_buf)
+ return ERR_PTR(-ENOMEM);
+
+ cpu_buf->pb = pb;
+ cpu_buf->cpu = cpu;
+ cpu_buf->map_key = map_key;
+
+ cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
+ -1, PERF_FLAG_FD_CLOEXEC);
+ if (cpu_buf->fd < 0) {
+ err = -errno;
+ pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
+ cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+ goto error;
+ }
+
+ cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ cpu_buf->fd, 0);
+ if (cpu_buf->base == MAP_FAILED) {
+ cpu_buf->base = NULL;
+ err = -errno;
+ pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
+ cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+ goto error;
+ }
+
+ if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
+ err = -errno;
+ pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
+ cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
+ goto error;
+ }
+
+ return cpu_buf;
+
+error:
+ perf_buffer__free_cpu_buf(pb, cpu_buf);
+ return (struct perf_cpu_buf *)ERR_PTR(err);
+}
+
+static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
+ struct perf_buffer_params *p);
+
+struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
+ const struct perf_buffer_opts *opts)
+{
+ struct perf_buffer_params p = {};
+ struct perf_event_attr attr = { 0, };
+
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ p.attr = &attr;
+ p.sample_cb = opts ? opts->sample_cb : NULL;
+ p.lost_cb = opts ? opts->lost_cb : NULL;
+ p.ctx = opts ? opts->ctx : NULL;
+
+ return __perf_buffer__new(map_fd, page_cnt, &p);
+}
+
+struct perf_buffer *
+perf_buffer__new_raw(int map_fd, size_t page_cnt,
+ const struct perf_buffer_raw_opts *opts)
+{
+ struct perf_buffer_params p = {};
+
+ p.attr = opts->attr;
+ p.event_cb = opts->event_cb;
+ p.ctx = opts->ctx;
+ p.cpu_cnt = opts->cpu_cnt;
+ p.cpus = opts->cpus;
+ p.map_keys = opts->map_keys;
+
+ return __perf_buffer__new(map_fd, page_cnt, &p);
+}
+
+static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
+ struct perf_buffer_params *p)
+{
+ struct bpf_map_info map = {};
+ char msg[STRERR_BUFSIZE];
+ struct perf_buffer *pb;
+ __u32 map_info_len;
+ int err, i;
+
+ if (page_cnt & (page_cnt - 1)) {
+ pr_warn("page count should be power of two, but is %zu\n",
+ page_cnt);
+ return ERR_PTR(-EINVAL);
+ }
+
+ map_info_len = sizeof(map);
+ err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
+ if (err) {
+ err = -errno;
+ pr_warn("failed to get map info for map FD %d: %s\n",
+ map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
+ return ERR_PTR(err);
+ }
+
+ if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+ pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+ map.name);
+ return ERR_PTR(-EINVAL);
+ }
+
+ pb = calloc(1, sizeof(*pb));
+ if (!pb)
+ return ERR_PTR(-ENOMEM);
+
+ pb->event_cb = p->event_cb;
+ pb->sample_cb = p->sample_cb;
+ pb->lost_cb = p->lost_cb;
+ pb->ctx = p->ctx;
+
+ pb->page_size = getpagesize();
+ pb->mmap_size = pb->page_size * page_cnt;
+ pb->map_fd = map_fd;
+
+ pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (pb->epoll_fd < 0) {
+ err = -errno;
+ pr_warn("failed to create epoll instance: %s\n",
+ libbpf_strerror_r(err, msg, sizeof(msg)));
+ goto error;
+ }
+
+ if (p->cpu_cnt > 0) {
+ pb->cpu_cnt = p->cpu_cnt;
+ } else {
+ pb->cpu_cnt = libbpf_num_possible_cpus();
+ if (pb->cpu_cnt < 0) {
+ err = pb->cpu_cnt;
+ goto error;
+ }
+ if (map.max_entries < pb->cpu_cnt)
+ pb->cpu_cnt = map.max_entries;
+ }
+
+ pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
+ if (!pb->events) {
+ err = -ENOMEM;
+ pr_warn("failed to allocate events: out of memory\n");
+ goto error;
+ }
+ pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
+ if (!pb->cpu_bufs) {
+ err = -ENOMEM;
+ pr_warn("failed to allocate buffers: out of memory\n");
+ goto error;
+ }
+
+ for (i = 0; i < pb->cpu_cnt; i++) {
+ struct perf_cpu_buf *cpu_buf;
+ int cpu, map_key;
+
+ cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
+ map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
+
+ cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
+ if (IS_ERR(cpu_buf)) {
+ err = PTR_ERR(cpu_buf);
+ goto error;
+ }
+
+ pb->cpu_bufs[i] = cpu_buf;
+
+ err = bpf_map_update_elem(pb->map_fd, &map_key,
+ &cpu_buf->fd, 0);
+ if (err) {
+ err = -errno;
+ pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
+ cpu, map_key, cpu_buf->fd,
+ libbpf_strerror_r(err, msg, sizeof(msg)));
+ goto error;
+ }
+
+ pb->events[i].events = EPOLLIN;
+ pb->events[i].data.ptr = cpu_buf;
+ if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
+ &pb->events[i]) < 0) {
+ err = -errno;
+ pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
+ cpu, cpu_buf->fd,
+ libbpf_strerror_r(err, msg, sizeof(msg)));
+ goto error;
+ }
+ }
+
+ return pb;
+
+error:
+ if (pb)
+ perf_buffer__free(pb);
+ return ERR_PTR(err);
+}
+
+struct perf_sample_raw {
+ struct perf_event_header header;
+ uint32_t size;
+ char data[0];
+};
+
+struct perf_sample_lost {
+ struct perf_event_header header;
+ uint64_t id;
+ uint64_t lost;
+ uint64_t sample_id;
+};
+
+static enum bpf_perf_event_ret
+perf_buffer__process_record(struct perf_event_header *e, void *ctx)
+{
+ struct perf_cpu_buf *cpu_buf = ctx;
+ struct perf_buffer *pb = cpu_buf->pb;
+ void *data = e;
+
+ /* user wants full control over parsing perf event */
+ if (pb->event_cb)
+ return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
+
+ switch (e->type) {
+ case PERF_RECORD_SAMPLE: {
+ struct perf_sample_raw *s = data;
+
+ if (pb->sample_cb)
+ pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
+ break;
+ }
+ case PERF_RECORD_LOST: {
+ struct perf_sample_lost *s = data;
+
+ if (pb->lost_cb)
+ pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
+ break;
+ }
+ default:
+ pr_warn("unknown perf sample type %d\n", e->type);
+ return LIBBPF_PERF_EVENT_ERROR;
+ }
+ return LIBBPF_PERF_EVENT_CONT;
+}
+
+static int perf_buffer__process_records(struct perf_buffer *pb,
+ struct perf_cpu_buf *cpu_buf)
+{
+ enum bpf_perf_event_ret ret;
+
+ ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
+ pb->page_size, &cpu_buf->buf,
+ &cpu_buf->buf_size,
+ perf_buffer__process_record, cpu_buf);
+ if (ret != LIBBPF_PERF_EVENT_CONT)
+ return ret;
+ return 0;
+}
+
+int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
+{
+ int i, cnt, err;
+
+ cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
+ for (i = 0; i < cnt; i++) {
+ struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
+
+ err = perf_buffer__process_records(pb, cpu_buf);
+ if (err) {
+ pr_warn("error while processing records: %d\n", err);
+ return err;
+ }
+ }
+ return cnt < 0 ? -errno : cnt;
+}
+
+struct bpf_prog_info_array_desc {
+ int array_offset; /* e.g. offset of jited_prog_insns */
+ int count_offset; /* e.g. offset of jited_prog_len */
+ int size_offset; /* > 0: offset of rec size,
+ * < 0: fix size of -size_offset
+ */
+};
+
+static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
+ [BPF_PROG_INFO_JITED_INSNS] = {
+ offsetof(struct bpf_prog_info, jited_prog_insns),
+ offsetof(struct bpf_prog_info, jited_prog_len),
+ -1,
+ },
+ [BPF_PROG_INFO_XLATED_INSNS] = {
+ offsetof(struct bpf_prog_info, xlated_prog_insns),
+ offsetof(struct bpf_prog_info, xlated_prog_len),
+ -1,
+ },
+ [BPF_PROG_INFO_MAP_IDS] = {
+ offsetof(struct bpf_prog_info, map_ids),
+ offsetof(struct bpf_prog_info, nr_map_ids),
+ -(int)sizeof(__u32),
+ },
+ [BPF_PROG_INFO_JITED_KSYMS] = {
+ offsetof(struct bpf_prog_info, jited_ksyms),
+ offsetof(struct bpf_prog_info, nr_jited_ksyms),
+ -(int)sizeof(__u64),
+ },
+ [BPF_PROG_INFO_JITED_FUNC_LENS] = {
+ offsetof(struct bpf_prog_info, jited_func_lens),
+ offsetof(struct bpf_prog_info, nr_jited_func_lens),
+ -(int)sizeof(__u32),
+ },
+ [BPF_PROG_INFO_FUNC_INFO] = {
+ offsetof(struct bpf_prog_info, func_info),
+ offsetof(struct bpf_prog_info, nr_func_info),
+ offsetof(struct bpf_prog_info, func_info_rec_size),
+ },
+ [BPF_PROG_INFO_LINE_INFO] = {
+ offsetof(struct bpf_prog_info, line_info),
+ offsetof(struct bpf_prog_info, nr_line_info),
+ offsetof(struct bpf_prog_info, line_info_rec_size),
+ },
+ [BPF_PROG_INFO_JITED_LINE_INFO] = {
+ offsetof(struct bpf_prog_info, jited_line_info),
+ offsetof(struct bpf_prog_info, nr_jited_line_info),
+ offsetof(struct bpf_prog_info, jited_line_info_rec_size),
+ },
+ [BPF_PROG_INFO_PROG_TAGS] = {
+ offsetof(struct bpf_prog_info, prog_tags),
+ offsetof(struct bpf_prog_info, nr_prog_tags),
+ -(int)sizeof(__u8) * BPF_TAG_SIZE,
+ },
+
+};
+
+static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
+ int offset)
+{
+ __u32 *array = (__u32 *)info;
+
+ if (offset >= 0)
+ return array[offset / sizeof(__u32)];
+ return -(int)offset;
+}
+
+static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
+ int offset)
+{
+ __u64 *array = (__u64 *)info;
+
+ if (offset >= 0)
+ return array[offset / sizeof(__u64)];
+ return -(int)offset;
+}
+
+static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
+ __u32 val)
+{
+ __u32 *array = (__u32 *)info;
+
+ if (offset >= 0)
+ array[offset / sizeof(__u32)] = val;
+}
+
+static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
+ __u64 val)
+{
+ __u64 *array = (__u64 *)info;
+
+ if (offset >= 0)
+ array[offset / sizeof(__u64)] = val;
+}
+
+struct bpf_prog_info_linear *
+bpf_program__get_prog_info_linear(int fd, __u64 arrays)
+{
+ struct bpf_prog_info_linear *info_linear;
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ __u32 data_len = 0;
+ int i, err;
+ void *ptr;
+
+ if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
+ return ERR_PTR(-EINVAL);
+
+ /* step 1: get array dimensions */
+ err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
+ if (err) {
+ pr_debug("can't get prog info: %s", strerror(errno));
+ return ERR_PTR(-EFAULT);
+ }
+
+ /* step 2: calculate total size of all arrays */
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ bool include_array = (arrays & (1UL << i)) > 0;
+ struct bpf_prog_info_array_desc *desc;
+ __u32 count, size;
+
+ desc = bpf_prog_info_array_desc + i;
+
+ /* kernel is too old to support this field */
+ if (info_len < desc->array_offset + sizeof(__u32) ||
+ info_len < desc->count_offset + sizeof(__u32) ||
+ (desc->size_offset > 0 && info_len < desc->size_offset))
+ include_array = false;
+
+ if (!include_array) {
+ arrays &= ~(1UL << i); /* clear the bit */
+ continue;
+ }
+
+ count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+ size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+
+ data_len += count * size;
+ }
+
+ /* step 3: allocate continuous memory */
+ data_len = roundup(data_len, sizeof(__u64));
+ info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
+ if (!info_linear)
+ return ERR_PTR(-ENOMEM);
+
+ /* step 4: fill data to info_linear->info */
+ info_linear->arrays = arrays;
+ memset(&info_linear->info, 0, sizeof(info));
+ ptr = info_linear->data;
+
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ struct bpf_prog_info_array_desc *desc;
+ __u32 count, size;
+
+ if ((arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpf_prog_info_array_desc + i;
+ count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+ size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+ bpf_prog_info_set_offset_u32(&info_linear->info,
+ desc->count_offset, count);
+ bpf_prog_info_set_offset_u32(&info_linear->info,
+ desc->size_offset, size);
+ bpf_prog_info_set_offset_u64(&info_linear->info,
+ desc->array_offset,
+ ptr_to_u64(ptr));
+ ptr += count * size;
+ }
+
+ /* step 5: call syscall again to get required arrays */
+ err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
+ if (err) {
+ pr_debug("can't get prog info: %s", strerror(errno));
+ free(info_linear);
+ return ERR_PTR(-EFAULT);
+ }
+
+ /* step 6: verify the data */
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ struct bpf_prog_info_array_desc *desc;
+ __u32 v1, v2;
+
+ if ((arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpf_prog_info_array_desc + i;
+ v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
+ v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+ desc->count_offset);
+ if (v1 != v2)
+ pr_warn("%s: mismatch in element count\n", __func__);
+
+ v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
+ v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
+ desc->size_offset);
+ if (v1 != v2)
+ pr_warn("%s: mismatch in rec size\n", __func__);
+ }
+
+ /* step 7: update info_len and data_len */
+ info_linear->info_len = sizeof(struct bpf_prog_info);
+ info_linear->data_len = data_len;
+
+ return info_linear;
+}
+
+void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
+{
+ int i;
+
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ struct bpf_prog_info_array_desc *desc;
+ __u64 addr, offs;
+
+ if ((info_linear->arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpf_prog_info_array_desc + i;
+ addr = bpf_prog_info_read_offset_u64(&info_linear->info,
+ desc->array_offset);
+ offs = addr - ptr_to_u64(info_linear->data);
+ bpf_prog_info_set_offset_u64(&info_linear->info,
+ desc->array_offset, offs);
+ }
+}
+
+void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
+{
+ int i;
+
+ for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
+ struct bpf_prog_info_array_desc *desc;
+ __u64 addr, offs;
+
+ if ((info_linear->arrays & (1UL << i)) == 0)
+ continue;
+
+ desc = bpf_prog_info_array_desc + i;
+ offs = bpf_prog_info_read_offset_u64(&info_linear->info,
+ desc->array_offset);
+ addr = offs + ptr_to_u64(info_linear->data);
+ bpf_prog_info_set_offset_u64(&info_linear->info,
+ desc->array_offset, addr);
+ }
+}
+
+int libbpf_num_possible_cpus(void)
+{
+ static const char *fcpu = "/sys/devices/system/cpu/possible";
+ int len = 0, n = 0, il = 0, ir = 0;
+ unsigned int start = 0, end = 0;
+ int tmp_cpus = 0;
+ static int cpus;
+ char buf[128];
+ int error = 0;
+ int fd = -1;
+
+ tmp_cpus = READ_ONCE(cpus);
+ if (tmp_cpus > 0)
+ return tmp_cpus;
+
+ fd = open(fcpu, O_RDONLY);
+ if (fd < 0) {
+ error = errno;
+ pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error));
+ return -error;
+ }
+ len = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (len <= 0) {
+ error = len ? errno : EINVAL;
+ pr_warn("Failed to read # of possible cpus from %s: %s\n",
+ fcpu, strerror(error));
+ return -error;
+ }
+ if (len == sizeof(buf)) {
+ pr_warn("File %s size overflow\n", fcpu);
+ return -EOVERFLOW;
+ }
+ buf[len] = '\0';
+
+ for (ir = 0, tmp_cpus = 0; ir <= len; ir++) {
+ /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
+ if (buf[ir] == ',' || buf[ir] == '\0') {
+ buf[ir] = '\0';
+ n = sscanf(&buf[il], "%u-%u", &start, &end);
+ if (n <= 0) {
+ pr_warn("Failed to get # CPUs from %s\n",
+ &buf[il]);
+ return -EINVAL;
+ } else if (n == 1) {
+ end = start;
+ }
+ tmp_cpus += end - start + 1;
+ il = ir + 1;
+ }
+ }
+ if (tmp_cpus <= 0) {
+ pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu);
+ return -EINVAL;
+ }
+
+ WRITE_ONCE(cpus, tmp_cpus);
+ return tmp_cpus;
+}
diff --git a/src/contrib/libbpf/bpf/libbpf.h b/src/contrib/libbpf/bpf/libbpf.h
new file mode 100644
index 0000000..0dbf4bf
--- /dev/null
+++ b/src/contrib/libbpf/bpf/libbpf.h
@@ -0,0 +1,637 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Common eBPF ELF object loading operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+#ifndef __LIBBPF_LIBBPF_H
+#define __LIBBPF_LIBBPF_H
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <sys/types.h> // for size_t
+#include <linux/bpf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef LIBBPF_API
+#define LIBBPF_API __attribute__((visibility("default")))
+#endif
+
+enum libbpf_errno {
+ __LIBBPF_ERRNO__START = 4000,
+
+ /* Something wrong in libelf */
+ LIBBPF_ERRNO__LIBELF = __LIBBPF_ERRNO__START,
+ LIBBPF_ERRNO__FORMAT, /* BPF object format invalid */
+ LIBBPF_ERRNO__KVERSION, /* Incorrect or no 'version' section */
+ LIBBPF_ERRNO__ENDIAN, /* Endian mismatch */
+ LIBBPF_ERRNO__INTERNAL, /* Internal error in libbpf */
+ LIBBPF_ERRNO__RELOC, /* Relocation failed */
+ LIBBPF_ERRNO__LOAD, /* Load program failure for unknown reason */
+ LIBBPF_ERRNO__VERIFY, /* Kernel verifier blocks program loading */
+ LIBBPF_ERRNO__PROG2BIG, /* Program too big */
+ LIBBPF_ERRNO__KVER, /* Incorrect kernel version */
+ LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */
+ LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */
+ LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */
+ LIBBPF_ERRNO__NLPARSE, /* netlink parsing error */
+ __LIBBPF_ERRNO__END,
+};
+
+LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size);
+
+enum libbpf_print_level {
+ LIBBPF_WARN,
+ LIBBPF_INFO,
+ LIBBPF_DEBUG,
+};
+
+typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level,
+ const char *, va_list ap);
+
+LIBBPF_API libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn);
+
+/* Hide internal to user */
+struct bpf_object;
+
+struct bpf_object_open_attr {
+ const char *file;
+ enum bpf_prog_type prog_type;
+};
+
+/* Helper macro to declare and initialize libbpf options struct
+ *
+ * This dance with uninitialized declaration, followed by memset to zero,
+ * followed by assignment using compound literal syntax is done to preserve
+ * ability to use a nice struct field initialization syntax and **hopefully**
+ * have all the padding bytes initialized to zero. It's not guaranteed though,
+ * when copying literal, that compiler won't copy garbage in literal's padding
+ * bytes, but that's the best way I've found and it seems to work in practice.
+ *
+ * Macro declares opts struct of given type and name, zero-initializes,
+ * including any extra padding, it with memset() and then assigns initial
+ * values provided by users in struct initializer-syntax as varargs.
+ */
+#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \
+ struct TYPE NAME = ({ \
+ memset(&NAME, 0, sizeof(struct TYPE)); \
+ (struct TYPE) { \
+ .sz = sizeof(struct TYPE), \
+ __VA_ARGS__ \
+ }; \
+ })
+
+struct bpf_object_open_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* object name override, if provided:
+ * - for object open from file, this will override setting object
+ * name from file path's base name;
+ * - for object open from memory buffer, this will specify an object
+ * name and will override default "<addr>-<buf-size>" name;
+ */
+ const char *object_name;
+ /* parse map definitions non-strictly, allowing extra attributes/data */
+ bool relaxed_maps;
+ /* process CO-RE relocations non-strictly, allowing them to fail */
+ bool relaxed_core_relocs;
+ /* maps that set the 'pinning' attribute in their definition will have
+ * their pin_path attribute set to a file in this directory, and be
+ * auto-pinned to that path on load; defaults to "/sys/fs/bpf".
+ */
+ const char *pin_root_path;
+ __u32 attach_prog_fd;
+};
+#define bpf_object_open_opts__last_field attach_prog_fd
+
+LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
+LIBBPF_API struct bpf_object *
+bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts);
+LIBBPF_API struct bpf_object *
+bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
+ struct bpf_object_open_opts *opts);
+
+/* deprecated bpf_object__open variants */
+LIBBPF_API struct bpf_object *
+bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
+ const char *name);
+LIBBPF_API struct bpf_object *
+bpf_object__open_xattr(struct bpf_object_open_attr *attr);
+
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
+ __u32 *size);
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
+ __u32 *off);
+
+enum libbpf_pin_type {
+ LIBBPF_PIN_NONE,
+ /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
+ LIBBPF_PIN_BY_NAME,
+};
+
+/* pin_maps and unpin_maps can both be called with a NULL path, in which case
+ * they will use the pin_path attribute of each map (and ignore all maps that
+ * don't have a pin_path set).
+ */
+LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
+LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
+ const char *path);
+LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj,
+ const char *path);
+LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj,
+ const char *path);
+LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path);
+LIBBPF_API void bpf_object__close(struct bpf_object *object);
+
+struct bpf_object_load_attr {
+ struct bpf_object *obj;
+ int log_level;
+ const char *target_btf_path;
+};
+
+/* Load/unload object into/from kernel */
+LIBBPF_API int bpf_object__load(struct bpf_object *obj);
+LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
+LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
+LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
+LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
+
+struct btf;
+LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
+LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
+
+LIBBPF_API struct bpf_program *
+bpf_object__find_program_by_title(const struct bpf_object *obj,
+ const char *title);
+
+LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);
+#define bpf_object__for_each_safe(pos, tmp) \
+ for ((pos) = bpf_object__next(NULL), \
+ (tmp) = bpf_object__next(pos); \
+ (pos) != NULL; \
+ (pos) = (tmp), (tmp) = bpf_object__next(tmp))
+
+typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
+LIBBPF_API int bpf_object__set_priv(struct bpf_object *obj, void *priv,
+ bpf_object_clear_priv_t clear_priv);
+LIBBPF_API void *bpf_object__priv(const struct bpf_object *prog);
+
+LIBBPF_API int
+libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+ enum bpf_attach_type *expected_attach_type);
+LIBBPF_API int libbpf_attach_type_by_name(const char *name,
+ enum bpf_attach_type *attach_type);
+LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name,
+ enum bpf_attach_type attach_type);
+
+/* Accessors of bpf_program */
+struct bpf_program;
+LIBBPF_API struct bpf_program *bpf_program__next(struct bpf_program *prog,
+ const struct bpf_object *obj);
+
+#define bpf_object__for_each_program(pos, obj) \
+ for ((pos) = bpf_program__next(NULL, (obj)); \
+ (pos) != NULL; \
+ (pos) = bpf_program__next((pos), (obj)))
+
+LIBBPF_API struct bpf_program *bpf_program__prev(struct bpf_program *prog,
+ const struct bpf_object *obj);
+
+typedef void (*bpf_program_clear_priv_t)(struct bpf_program *, void *);
+
+LIBBPF_API int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+ bpf_program_clear_priv_t clear_priv);
+
+LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);
+LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
+ __u32 ifindex);
+
+LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
+ bool needs_copy);
+
+/* returns program size in bytes */
+LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog);
+
+LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license,
+ __u32 kern_version);
+LIBBPF_API int bpf_program__fd(const struct bpf_program *prog);
+LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog,
+ const char *path,
+ int instance);
+LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog,
+ const char *path,
+ int instance);
+LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
+LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path);
+LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
+
+struct bpf_link;
+
+LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
+ const char *func_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
+ pid_t pid, const char *binary_path,
+ size_t func_offset);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_tracepoint(struct bpf_program *prog,
+ const char *tp_category,
+ const char *tp_name);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
+ const char *tp_name);
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_trace(struct bpf_program *prog);
+struct bpf_insn;
+
+/*
+ * Libbpf allows callers to adjust BPF programs before being loaded
+ * into kernel. One program in an object file can be transformed into
+ * multiple variants to be attached to different hooks.
+ *
+ * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
+ * form an API for this purpose.
+ *
+ * - bpf_program_prep_t:
+ * Defines a 'preprocessor', which is a caller defined function
+ * passed to libbpf through bpf_program__set_prep(), and will be
+ * called before program is loaded. The processor should adjust
+ * the program one time for each instance according to the instance id
+ * passed to it.
+ *
+ * - bpf_program__set_prep:
+ * Attaches a preprocessor to a BPF program. The number of instances
+ * that should be created is also passed through this function.
+ *
+ * - bpf_program__nth_fd:
+ * After the program is loaded, get resulting FD of a given instance
+ * of the BPF program.
+ *
+ * If bpf_program__set_prep() is not used, the program would be loaded
+ * without adjustment during bpf_object__load(). The program has only
+ * one instance. In this case bpf_program__fd(prog) is equal to
+ * bpf_program__nth_fd(prog, 0).
+ */
+
+struct bpf_prog_prep_result {
+ /*
+ * If not NULL, load new instruction array.
+ * If set to NULL, don't load this instance.
+ */
+ struct bpf_insn *new_insn_ptr;
+ int new_insn_cnt;
+
+ /* If not NULL, result FD is written to it. */
+ int *pfd;
+};
+
+/*
+ * Parameters of bpf_program_prep_t:
+ * - prog: The bpf_program being loaded.
+ * - n: Index of instance being generated.
+ * - insns: BPF instructions array.
+ * - insns_cnt:Number of instructions in insns.
+ * - res: Output parameter, result of transformation.
+ *
+ * Return value:
+ * - Zero: pre-processing success.
+ * - Non-zero: pre-processing error, stop loading.
+ */
+typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
+ struct bpf_insn *insns, int insns_cnt,
+ struct bpf_prog_prep_result *res);
+
+LIBBPF_API int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
+ bpf_program_prep_t prep);
+
+LIBBPF_API int bpf_program__nth_fd(const struct bpf_program *prog, int n);
+
+/*
+ * Adjust type of BPF program. Default is kprobe.
+ */
+LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog);
+
+LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
+LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
+ enum bpf_prog_type type);
+
+LIBBPF_API enum bpf_attach_type
+bpf_program__get_expected_attach_type(struct bpf_program *prog);
+LIBBPF_API void
+bpf_program__set_expected_attach_type(struct bpf_program *prog,
+ enum bpf_attach_type type);
+
+LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);
+
+/*
+ * No need for __attribute__((packed)), all members of 'bpf_map_def'
+ * are all aligned. In addition, using __attribute__((packed))
+ * would trigger a -Wpacked warning message, and lead to an error
+ * if -Werror is set.
+ */
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+ unsigned int map_flags;
+};
+
+/*
+ * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel,
+ * so no need to worry about a name clash.
+ */
+struct bpf_map;
+LIBBPF_API struct bpf_map *
+bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name);
+
+LIBBPF_API int
+bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);
+
+/*
+ * Get bpf_map through the offset of corresponding struct bpf_map_def
+ * in the BPF object file.
+ */
+LIBBPF_API struct bpf_map *
+bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
+
+LIBBPF_API struct bpf_map *
+bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
+#define bpf_object__for_each_map(pos, obj) \
+ for ((pos) = bpf_map__next(NULL, (obj)); \
+ (pos) != NULL; \
+ (pos) = bpf_map__next((pos), (obj)))
+#define bpf_map__for_each bpf_object__for_each_map
+
+LIBBPF_API struct bpf_map *
+bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
+
+LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
+LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
+LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
+LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
+
+typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
+LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
+ bpf_map_clear_priv_t clear_priv);
+LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
+LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
+LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
+LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
+LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
+LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
+LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
+LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
+LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
+LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
+LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
+
+LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd);
+
+LIBBPF_API long libbpf_get_error(const void *ptr);
+
+struct bpf_prog_load_attr {
+ const char *file;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ int ifindex;
+ int log_level;
+ int prog_flags;
+};
+
+LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
+ struct bpf_object **pobj, int *prog_fd);
+LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,
+ struct bpf_object **pobj, int *prog_fd);
+
+struct xdp_link_info {
+ __u32 prog_id;
+ __u32 drv_prog_id;
+ __u32 hw_prog_id;
+ __u32 skb_prog_id;
+ __u8 attach_mode;
+};
+
+LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
+LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
+ size_t info_size, __u32 flags);
+
+struct perf_buffer;
+
+typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu,
+ void *data, __u32 size);
+typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt);
+
+/* common use perf buffer options */
+struct perf_buffer_opts {
+ /* if specified, sample_cb is called for each sample */
+ perf_buffer_sample_fn sample_cb;
+ /* if specified, lost_cb is called for each batch of lost samples */
+ perf_buffer_lost_fn lost_cb;
+ /* ctx is provided to sample_cb and lost_cb */
+ void *ctx;
+};
+
+LIBBPF_API struct perf_buffer *
+perf_buffer__new(int map_fd, size_t page_cnt,
+ const struct perf_buffer_opts *opts);
+
+enum bpf_perf_event_ret {
+ LIBBPF_PERF_EVENT_DONE = 0,
+ LIBBPF_PERF_EVENT_ERROR = -1,
+ LIBBPF_PERF_EVENT_CONT = -2,
+};
+
+struct perf_event_header;
+
+typedef enum bpf_perf_event_ret
+(*perf_buffer_event_fn)(void *ctx, int cpu, struct perf_event_header *event);
+
+/* raw perf buffer options, giving most power and control */
+struct perf_buffer_raw_opts {
+ /* perf event attrs passed directly into perf_event_open() */
+ struct perf_event_attr *attr;
+ /* raw event callback */
+ perf_buffer_event_fn event_cb;
+ /* ctx is provided to event_cb */
+ void *ctx;
+ /* if cpu_cnt == 0, open all on all possible CPUs (up to the number of
+ * max_entries of given PERF_EVENT_ARRAY map)
+ */
+ int cpu_cnt;
+ /* if cpu_cnt > 0, cpus is an array of CPUs to open ring buffers on */
+ int *cpus;
+ /* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */
+ int *map_keys;
+};
+
+LIBBPF_API struct perf_buffer *
+perf_buffer__new_raw(int map_fd, size_t page_cnt,
+ const struct perf_buffer_raw_opts *opts);
+
+LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
+LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
+
+typedef enum bpf_perf_event_ret
+ (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
+ void *private_data);
+LIBBPF_API enum bpf_perf_event_ret
+bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
+ void **copy_mem, size_t *copy_size,
+ bpf_perf_event_print_t fn, void *private_data);
+
+struct nlattr;
+typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+int libbpf_netlink_open(unsigned int *nl_pid);
+int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+ libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
+int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
+ libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie);
+int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+ libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
+int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+ libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
+
+struct bpf_prog_linfo;
+struct bpf_prog_info;
+
+LIBBPF_API void bpf_prog_linfo__free(struct bpf_prog_linfo *prog_linfo);
+LIBBPF_API struct bpf_prog_linfo *
+bpf_prog_linfo__new(const struct bpf_prog_info *info);
+LIBBPF_API const struct bpf_line_info *
+bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
+ __u64 addr, __u32 func_idx, __u32 nr_skip);
+LIBBPF_API const struct bpf_line_info *
+bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
+ __u32 insn_off, __u32 nr_skip);
+
+/*
+ * Probe for supported system features
+ *
+ * Note that running many of these probes in a short amount of time can cause
+ * the kernel to reach the maximal size of lockable memory allowed for the
+ * user, causing subsequent probes to fail. In this case, the caller may want
+ * to adjust that limit with setrlimit().
+ */
+LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type,
+ __u32 ifindex);
+LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex);
+LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id,
+ enum bpf_prog_type prog_type, __u32 ifindex);
+
+/*
+ * Get bpf_prog_info in continuous memory
+ *
+ * struct bpf_prog_info has multiple arrays. The user has option to choose
+ * arrays to fetch from kernel. The following APIs provide an uniform way to
+ * fetch these data. All arrays in bpf_prog_info are stored in a single
+ * continuous memory region. This makes it easy to store the info in a
+ * file.
+ *
+ * Before writing bpf_prog_info_linear to files, it is necessary to
+ * translate pointers in bpf_prog_info to offsets. Helper functions
+ * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr()
+ * are introduced to switch between pointers and offsets.
+ *
+ * Examples:
+ * # To fetch map_ids and prog_tags:
+ * __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) |
+ * (1UL << BPF_PROG_INFO_PROG_TAGS);
+ * struct bpf_prog_info_linear *info_linear =
+ * bpf_program__get_prog_info_linear(fd, arrays);
+ *
+ * # To save data in file
+ * bpf_program__bpil_addr_to_offs(info_linear);
+ * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len);
+ *
+ * # To read data from file
+ * read(f, info_linear, <proper_size>);
+ * bpf_program__bpil_offs_to_addr(info_linear);
+ */
+enum bpf_prog_info_array {
+ BPF_PROG_INFO_FIRST_ARRAY = 0,
+ BPF_PROG_INFO_JITED_INSNS = 0,
+ BPF_PROG_INFO_XLATED_INSNS,
+ BPF_PROG_INFO_MAP_IDS,
+ BPF_PROG_INFO_JITED_KSYMS,
+ BPF_PROG_INFO_JITED_FUNC_LENS,
+ BPF_PROG_INFO_FUNC_INFO,
+ BPF_PROG_INFO_LINE_INFO,
+ BPF_PROG_INFO_JITED_LINE_INFO,
+ BPF_PROG_INFO_PROG_TAGS,
+ BPF_PROG_INFO_LAST_ARRAY,
+};
+
+struct bpf_prog_info_linear {
+ /* size of struct bpf_prog_info, when the tool is compiled */
+ __u32 info_len;
+ /* total bytes allocated for data, round up to 8 bytes */
+ __u32 data_len;
+ /* which arrays are included in data */
+ __u64 arrays;
+ struct bpf_prog_info info;
+ __u8 data[];
+};
+
+LIBBPF_API struct bpf_prog_info_linear *
+bpf_program__get_prog_info_linear(int fd, __u64 arrays);
+
+LIBBPF_API void
+bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear);
+
+LIBBPF_API void
+bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
+
+/*
+ * A helper function to get the number of possible CPUs before looking up
+ * per-CPU maps. Negative errno is returned on failure.
+ *
+ * Example usage:
+ *
+ * int ncpus = libbpf_num_possible_cpus();
+ * if (ncpus < 0) {
+ * // error handling
+ * }
+ * long values[ncpus];
+ * bpf_map_lookup_elem(per_cpu_map_fd, key, values);
+ *
+ */
+LIBBPF_API int libbpf_num_possible_cpus(void);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_LIBBPF_H */
diff --git a/src/contrib/libbpf/bpf/libbpf_errno.c b/src/contrib/libbpf/bpf/libbpf_errno.c
new file mode 100644
index 0000000..4343e40
--- /dev/null
+++ b/src/contrib/libbpf/bpf/libbpf_errno.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ * Copyright (C) 2017 Nicira, Inc.
+ */
+
+#undef _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+
+#include "libbpf.h"
+
+#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START)
+#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c)
+#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
+
+static const char *libbpf_strerror_table[NR_ERRNO] = {
+ [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf",
+ [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid",
+ [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost",
+ [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch",
+ [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf",
+ [ERRCODE_OFFSET(RELOC)] = "Relocation failed",
+ [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading",
+ [ERRCODE_OFFSET(PROG2BIG)] = "Program too big",
+ [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version",
+ [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type",
+ [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message",
+ [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence",
+ [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing",
+};
+
+int libbpf_strerror(int err, char *buf, size_t size)
+{
+ if (!buf || !size)
+ return -1;
+
+ err = err > 0 ? err : -err;
+
+ if (err < __LIBBPF_ERRNO__START) {
+ int ret;
+
+ ret = strerror_r(err, buf, size);
+ buf[size - 1] = '\0';
+ return ret;
+ }
+
+ if (err < __LIBBPF_ERRNO__END) {
+ const char *msg;
+
+ msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
+ snprintf(buf, size, "%s", msg);
+ buf[size - 1] = '\0';
+ return 0;
+ }
+
+ snprintf(buf, size, "Unknown libbpf error %d", err);
+ buf[size - 1] = '\0';
+ return -1;
+}
diff --git a/src/contrib/libbpf/bpf/libbpf_internal.h b/src/contrib/libbpf/bpf/libbpf_internal.h
new file mode 100644
index 0000000..97ac17a
--- /dev/null
+++ b/src/contrib/libbpf/bpf/libbpf_internal.h
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Internal libbpf helpers.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+
+#ifndef __LIBBPF_LIBBPF_INTERNAL_H
+#define __LIBBPF_LIBBPF_INTERNAL_H
+
+#include "libbpf.h"
+
+#define BTF_INFO_ENC(kind, kind_flag, vlen) \
+ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
+ ((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
+ BTF_INT_ENC(encoding, bits_offset, bits)
+#define BTF_MEMBER_ENC(name, type, bits_offset) (name), (type), (bits_offset)
+#define BTF_PARAM_ENC(name, type) (name), (type)
+#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
+
+#ifndef min
+# define min(x, y) ((x) < (y) ? (x) : (y))
+#endif
+#ifndef max
+# define max(x, y) ((x) < (y) ? (y) : (x))
+#endif
+#ifndef offsetofend
+# define offsetofend(TYPE, FIELD) \
+ (offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD))
+#endif
+
+/* Symbol versioning is different between static and shared library.
+ * Properly versioned symbols are needed for shared library, but
+ * only the symbol of the new version is needed for static library.
+ */
+#ifdef SHARED
+# define COMPAT_VERSION(internal_name, api_name, version) \
+ asm(".symver " #internal_name "," #api_name "@" #version);
+# define DEFAULT_VERSION(internal_name, api_name, version) \
+ asm(".symver " #internal_name "," #api_name "@@" #version);
+#else
+# define COMPAT_VERSION(internal_name, api_name, version)
+# define DEFAULT_VERSION(internal_name, api_name, version) \
+ extern typeof(internal_name) api_name \
+ __attribute__((alias(#internal_name)));
+#endif
+
+extern void libbpf_print(enum libbpf_print_level level,
+ const char *format, ...)
+ __attribute__((format(printf, 2, 3)));
+
+#define __pr(level, fmt, ...) \
+do { \
+ libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define pr_warn(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
+
+static inline bool libbpf_validate_opts(const char *opts,
+ size_t opts_sz, size_t user_sz,
+ const char *type_name)
+{
+ if (user_sz < sizeof(size_t)) {
+ pr_warn("%s size (%zu) is too small\n", type_name, user_sz);
+ return false;
+ }
+ if (user_sz > opts_sz) {
+ size_t i;
+
+ for (i = opts_sz; i < user_sz; i++) {
+ if (opts[i]) {
+ pr_warn("%s has non-zero extra bytes",
+ type_name);
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+#define OPTS_VALID(opts, type) \
+ (!(opts) || libbpf_validate_opts((const char *)opts, \
+ offsetofend(struct type, \
+ type##__last_field), \
+ (opts)->sz, #type))
+#define OPTS_HAS(opts, field) \
+ ((opts) && opts->sz >= offsetofend(typeof(*(opts)), field))
+#define OPTS_GET(opts, field, fallback_value) \
+ (OPTS_HAS(opts, field) ? (opts)->field : fallback_value)
+
+int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
+ const char *str_sec, size_t str_len);
+
+struct btf_ext_info {
+ /*
+ * info points to the individual info section (e.g. func_info and
+ * line_info) from the .BTF.ext. It does not include the __u32 rec_size.
+ */
+ void *info;
+ __u32 rec_size;
+ __u32 len;
+};
+
+#define for_each_btf_ext_sec(seg, sec) \
+ for (sec = (seg)->info; \
+ (void *)sec < (seg)->info + (seg)->len; \
+ sec = (void *)sec + sizeof(struct btf_ext_info_sec) + \
+ (seg)->rec_size * sec->num_info)
+
+#define for_each_btf_ext_rec(seg, sec, i, rec) \
+ for (i = 0, rec = (void *)&(sec)->data; \
+ i < (sec)->num_info; \
+ i++, rec = (void *)rec + (seg)->rec_size)
+
+struct btf_ext {
+ union {
+ struct btf_ext_header *hdr;
+ void *data;
+ };
+ struct btf_ext_info func_info;
+ struct btf_ext_info line_info;
+ struct btf_ext_info field_reloc_info;
+ __u32 data_size;
+};
+
+struct btf_ext_info_sec {
+ __u32 sec_name_off;
+ __u32 num_info;
+ /* Followed by num_info * record_size number of bytes */
+ __u8 data[0];
+};
+
+/* The minimum bpf_func_info checked by the loader */
+struct bpf_func_info_min {
+ __u32 insn_off;
+ __u32 type_id;
+};
+
+/* The minimum bpf_line_info checked by the loader */
+struct bpf_line_info_min {
+ __u32 insn_off;
+ __u32 file_name_off;
+ __u32 line_off;
+ __u32 line_col;
+};
+
+/* bpf_field_info_kind encodes which aspect of captured field has to be
+ * adjusted by relocations. Currently supported values are:
+ * - BPF_FIELD_BYTE_OFFSET: field offset (in bytes);
+ * - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise);
+ */
+enum bpf_field_info_kind {
+ BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */
+ BPF_FIELD_BYTE_SIZE = 1,
+ BPF_FIELD_EXISTS = 2, /* field existence in target kernel */
+ BPF_FIELD_SIGNED = 3,
+ BPF_FIELD_LSHIFT_U64 = 4,
+ BPF_FIELD_RSHIFT_U64 = 5,
+};
+
+/* The minimum bpf_field_reloc checked by the loader
+ *
+ * Field relocation captures the following data:
+ * - insn_off - instruction offset (in bytes) within a BPF program that needs
+ * its insn->imm field to be relocated with actual field info;
+ * - type_id - BTF type ID of the "root" (containing) entity of a relocatable
+ * field;
+ * - access_str_off - offset into corresponding .BTF string section. String
+ * itself encodes an accessed field using a sequence of field and array
+ * indicies, separated by colon (:). It's conceptually very close to LLVM's
+ * getelementptr ([0]) instruction's arguments for identifying offset to
+ * a field.
+ *
+ * Example to provide a better feel.
+ *
+ * struct sample {
+ * int a;
+ * struct {
+ * int b[10];
+ * };
+ * };
+ *
+ * struct sample *s = ...;
+ * int x = &s->a; // encoded as "0:0" (a is field #0)
+ * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1,
+ * // b is field #0 inside anon struct, accessing elem #5)
+ * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array)
+ *
+ * type_id for all relocs in this example will capture BTF type id of
+ * `struct sample`.
+ *
+ * Such relocation is emitted when using __builtin_preserve_access_index()
+ * Clang built-in, passing expression that captures field address, e.g.:
+ *
+ * bpf_probe_read(&dst, sizeof(dst),
+ * __builtin_preserve_access_index(&src->a.b.c));
+ *
+ * In this case Clang will emit field relocation recording necessary data to
+ * be able to find offset of embedded `a.b.c` field within `src` struct.
+ *
+ * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction
+ */
+struct bpf_field_reloc {
+ __u32 insn_off;
+ __u32 type_id;
+ __u32 access_str_off;
+ enum bpf_field_info_kind kind;
+};
+
+#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/src/contrib/libbpf/bpf/libbpf_probes.c b/src/contrib/libbpf/bpf/libbpf_probes.c
new file mode 100644
index 0000000..a9eb8b3
--- /dev/null
+++ b/src/contrib/libbpf/bpf/libbpf_probes.c
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2019 Netronome Systems, Inc. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <net/if.h>
+#include <sys/utsname.h>
+
+#include <linux/btf.h>
+#include <linux/filter.h>
+#include <linux/kernel.h>
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+static bool grep(const char *buffer, const char *pattern)
+{
+ return !!strstr(buffer, pattern);
+}
+
+static int get_vendor_id(int ifindex)
+{
+ char ifname[IF_NAMESIZE], path[64], buf[8];
+ ssize_t len;
+ int fd;
+
+ if (!if_indextoname(ifindex, ifname))
+ return -1;
+
+ snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname);
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ len = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (len < 0)
+ return -1;
+ if (len >= (ssize_t)sizeof(buf))
+ return -1;
+ buf[len] = '\0';
+
+ return strtol(buf, NULL, 0);
+}
+
+static int get_kernel_version(void)
+{
+ int version, subversion, patchlevel;
+ struct utsname utsn;
+
+ /* Return 0 on failure, and attempt to probe with empty kversion */
+ if (uname(&utsn))
+ return 0;
+
+ if (sscanf(utsn.release, "%d.%d.%d",
+ &version, &subversion, &patchlevel) != 3)
+ return 0;
+
+ return (version << 16) + (subversion << 8) + patchlevel;
+}
+
+static void
+probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
+ size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex)
+{
+ struct bpf_load_program_attr xattr = {};
+ int fd;
+
+ switch (prog_type) {
+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+ xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
+ break;
+ case BPF_PROG_TYPE_KPROBE:
+ xattr.kern_version = get_kernel_version();
+ break;
+ case BPF_PROG_TYPE_UNSPEC:
+ case BPF_PROG_TYPE_SOCKET_FILTER:
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_SCHED_ACT:
+ case BPF_PROG_TYPE_TRACEPOINT:
+ case BPF_PROG_TYPE_XDP:
+ case BPF_PROG_TYPE_PERF_EVENT:
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ case BPF_PROG_TYPE_CGROUP_SOCK:
+ case BPF_PROG_TYPE_LWT_IN:
+ case BPF_PROG_TYPE_LWT_OUT:
+ case BPF_PROG_TYPE_LWT_XMIT:
+ case BPF_PROG_TYPE_SOCK_OPS:
+ case BPF_PROG_TYPE_SK_SKB:
+ case BPF_PROG_TYPE_CGROUP_DEVICE:
+ case BPF_PROG_TYPE_SK_MSG:
+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
+ case BPF_PROG_TYPE_LWT_SEG6LOCAL:
+ case BPF_PROG_TYPE_LIRC_MODE2:
+ case BPF_PROG_TYPE_SK_REUSEPORT:
+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
+ case BPF_PROG_TYPE_CGROUP_SYSCTL:
+ case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+ case BPF_PROG_TYPE_TRACING:
+ default:
+ break;
+ }
+
+ xattr.prog_type = prog_type;
+ xattr.insns = insns;
+ xattr.insns_cnt = insns_cnt;
+ xattr.license = "GPL";
+ xattr.prog_ifindex = ifindex;
+
+ fd = bpf_load_program_xattr(&xattr, buf, buf_len);
+ if (fd >= 0)
+ close(fd);
+}
+
+bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex)
+{
+ struct bpf_insn insns[2] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN()
+ };
+
+ if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS)
+ /* nfp returns -EINVAL on exit(0) with TC offload */
+ insns[0].imm = 2;
+
+ errno = 0;
+ probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex);
+
+ return errno != EINVAL && errno != EOPNOTSUPP;
+}
+
+int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
+ const char *str_sec, size_t str_len)
+{
+ struct btf_header hdr = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
+ .type_len = types_len,
+ .str_off = types_len,
+ .str_len = str_len,
+ };
+ int btf_fd, btf_len;
+ __u8 *raw_btf;
+
+ btf_len = hdr.hdr_len + hdr.type_len + hdr.str_len;
+ raw_btf = malloc(btf_len);
+ if (!raw_btf)
+ return -ENOMEM;
+
+ memcpy(raw_btf, &hdr, sizeof(hdr));
+ memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);
+ memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len);
+
+ btf_fd = bpf_load_btf(raw_btf, btf_len, NULL, 0, false);
+
+ free(raw_btf);
+ return btf_fd;
+}
+
+static int load_sk_storage_btf(void)
+{
+ const char strs[] = "\0bpf_spin_lock\0val\0cnt\0l";
+ /* struct bpf_spin_lock {
+ * int val;
+ * };
+ * struct val {
+ * int cnt;
+ * struct bpf_spin_lock l;
+ * };
+ */
+ __u32 types[] = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* struct bpf_spin_lock */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),
+ BTF_MEMBER_ENC(15, 1, 0), /* int val; */
+ /* struct val */ /* [3] */
+ BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+ BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */
+ BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */
+ };
+
+ return libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs));
+}
+
+bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
+{
+ int key_size, value_size, max_entries, map_flags;
+ __u32 btf_key_type_id = 0, btf_value_type_id = 0;
+ struct bpf_create_map_attr attr = {};
+ int fd = -1, btf_fd = -1, fd_inner;
+
+ key_size = sizeof(__u32);
+ value_size = sizeof(__u32);
+ max_entries = 1;
+ map_flags = 0;
+
+ switch (map_type) {
+ case BPF_MAP_TYPE_STACK_TRACE:
+ value_size = sizeof(__u64);
+ break;
+ case BPF_MAP_TYPE_LPM_TRIE:
+ key_size = sizeof(__u64);
+ value_size = sizeof(__u64);
+ map_flags = BPF_F_NO_PREALLOC;
+ break;
+ case BPF_MAP_TYPE_CGROUP_STORAGE:
+ case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
+ key_size = sizeof(struct bpf_cgroup_storage_key);
+ value_size = sizeof(__u64);
+ max_entries = 0;
+ break;
+ case BPF_MAP_TYPE_QUEUE:
+ case BPF_MAP_TYPE_STACK:
+ key_size = 0;
+ break;
+ case BPF_MAP_TYPE_SK_STORAGE:
+ btf_key_type_id = 1;
+ btf_value_type_id = 3;
+ value_size = 8;
+ max_entries = 0;
+ map_flags = BPF_F_NO_PREALLOC;
+ btf_fd = load_sk_storage_btf();
+ if (btf_fd < 0)
+ return false;
+ break;
+ case BPF_MAP_TYPE_UNSPEC:
+ case BPF_MAP_TYPE_HASH:
+ case BPF_MAP_TYPE_ARRAY:
+ case BPF_MAP_TYPE_PROG_ARRAY:
+ case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ case BPF_MAP_TYPE_PERCPU_ARRAY:
+ case BPF_MAP_TYPE_CGROUP_ARRAY:
+ case BPF_MAP_TYPE_LRU_HASH:
+ case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ case BPF_MAP_TYPE_DEVMAP:
+ case BPF_MAP_TYPE_DEVMAP_HASH:
+ case BPF_MAP_TYPE_SOCKMAP:
+ case BPF_MAP_TYPE_CPUMAP:
+ case BPF_MAP_TYPE_XSKMAP:
+ case BPF_MAP_TYPE_SOCKHASH:
+ case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+ default:
+ break;
+ }
+
+ if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+ map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+ /* TODO: probe for device, once libbpf has a function to create
+ * map-in-map for offload
+ */
+ if (ifindex)
+ return false;
+
+ fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH,
+ sizeof(__u32), sizeof(__u32), 1, 0);
+ if (fd_inner < 0)
+ return false;
+ fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32),
+ fd_inner, 1, 0);
+ close(fd_inner);
+ } else {
+ /* Note: No other restriction on map type probes for offload */
+ attr.map_type = map_type;
+ attr.key_size = key_size;
+ attr.value_size = value_size;
+ attr.max_entries = max_entries;
+ attr.map_flags = map_flags;
+ attr.map_ifindex = ifindex;
+ if (btf_fd >= 0) {
+ attr.btf_fd = btf_fd;
+ attr.btf_key_type_id = btf_key_type_id;
+ attr.btf_value_type_id = btf_value_type_id;
+ }
+
+ fd = bpf_create_map_xattr(&attr);
+ }
+ if (fd >= 0)
+ close(fd);
+ if (btf_fd >= 0)
+ close(btf_fd);
+
+ return fd >= 0;
+}
+
+bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type,
+ __u32 ifindex)
+{
+ struct bpf_insn insns[2] = {
+ BPF_EMIT_CALL(id),
+ BPF_EXIT_INSN()
+ };
+ char buf[4096] = {};
+ bool res;
+
+ probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf),
+ ifindex);
+ res = !grep(buf, "invalid func ") && !grep(buf, "unknown func ");
+
+ if (ifindex) {
+ switch (get_vendor_id(ifindex)) {
+ case 0x19ee: /* Netronome specific */
+ res = res && !grep(buf, "not supported by FW") &&
+ !grep(buf, "unsupported function id");
+ break;
+ default:
+ break;
+ }
+ }
+
+ return res;
+}
diff --git a/src/contrib/libbpf/bpf/libbpf_util.h b/src/contrib/libbpf/bpf/libbpf_util.h
new file mode 100644
index 0000000..59c779c
--- /dev/null
+++ b/src/contrib/libbpf/bpf/libbpf_util.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (c) 2019 Facebook */
+
+#ifndef __LIBBPF_LIBBPF_UTIL_H
+#define __LIBBPF_LIBBPF_UTIL_H
+
+#include <stdbool.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Use these barrier functions instead of smp_[rw]mb() when they are
+ * used in a libbpf header file. That way they can be built into the
+ * application that uses libbpf.
+ */
+#if defined(__i386__) || defined(__x86_64__)
+# define libbpf_smp_rmb() asm volatile("" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("" : : : "memory")
+# define libbpf_smp_mb() \
+ asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
+/* Hinders stores to be observed before older loads. */
+# define libbpf_smp_rwmb() asm volatile("" : : : "memory")
+#elif defined(__aarch64__)
+# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
+# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_rwmb() libbpf_smp_mb()
+#elif defined(__arm__)
+/* These are only valid for armv7 and above */
+# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
+# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
+# define libbpf_smp_rwmb() libbpf_smp_mb()
+#else
+/* Architecture missing native barrier functions. */
+# define libbpf_smp_rmb() __sync_synchronize()
+# define libbpf_smp_wmb() __sync_synchronize()
+# define libbpf_smp_mb() __sync_synchronize()
+# define libbpf_smp_rwmb() __sync_synchronize()
+#endif
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif
diff --git a/src/contrib/libbpf/bpf/netlink.c b/src/contrib/libbpf/bpf/netlink.c
new file mode 100644
index 0000000..5065c1a
--- /dev/null
+++ b/src/contrib/libbpf/bpf/netlink.c
@@ -0,0 +1,451 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <memory.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/rtnetlink.h>
+#include <sys/socket.h>
+#include <errno.h>
+#include <time.h>
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "nlattr.h"
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,
+ void *cookie);
+
+struct xdp_id_md {
+ int ifindex;
+ __u32 flags;
+ struct xdp_link_info info;
+};
+
+int libbpf_netlink_open(__u32 *nl_pid)
+{
+ struct sockaddr_nl sa;
+ socklen_t addrlen;
+ int one = 1, ret;
+ int sock;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0)
+ return -errno;
+
+ if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one)) < 0) {
+ pr_warn("Netlink error reporting not supported\n");
+ }
+
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ addrlen = sizeof(sa);
+ if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ if (addrlen != sizeof(sa)) {
+ ret = -LIBBPF_ERRNO__INTERNAL;
+ goto cleanup;
+ }
+
+ *nl_pid = sa.nl_pid;
+ return sock;
+
+cleanup:
+ close(sock);
+ return ret;
+}
+
+static int bpf_netlink_recv(int sock, __u32 nl_pid, int seq,
+ __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
+ void *cookie)
+{
+ bool multipart = true;
+ struct nlmsgerr *err;
+ struct nlmsghdr *nh;
+ char buf[4096];
+ int len, ret;
+
+ while (multipart) {
+ multipart = false;
+ len = recv(sock, buf, sizeof(buf), 0);
+ if (len < 0) {
+ ret = -errno;
+ goto done;
+ }
+
+ if (len == 0)
+ break;
+
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != nl_pid) {
+ ret = -LIBBPF_ERRNO__WRNGPID;
+ goto done;
+ }
+ if (nh->nlmsg_seq != seq) {
+ ret = -LIBBPF_ERRNO__INVSEQ;
+ goto done;
+ }
+ if (nh->nlmsg_flags & NLM_F_MULTI)
+ multipart = true;
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ ret = err->error;
+ libbpf_nla_dump_errormsg(nh);
+ goto done;
+ case NLMSG_DONE:
+ return 0;
+ default:
+ break;
+ }
+ if (_fn) {
+ ret = _fn(nh, fn, cookie);
+ if (ret)
+ return ret;
+ }
+ }
+ }
+ ret = 0;
+done:
+ return ret;
+}
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+ int sock, seq = 0, ret;
+ struct nlattr *nla, *nla_xdp;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifinfo;
+ char attrbuf[64];
+ } req;
+ __u32 nl_pid;
+
+ sock = libbpf_netlink_open(&nl_pid);
+ if (sock < 0)
+ return sock;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_SETLINK;
+ req.nh.nlmsg_pid = 0;
+ req.nh.nlmsg_seq = ++seq;
+ req.ifinfo.ifi_family = AF_UNSPEC;
+ req.ifinfo.ifi_index = ifindex;
+
+ /* started nested attribute for XDP */
+ nla = (struct nlattr *)(((char *)&req)
+ + NLMSG_ALIGN(req.nh.nlmsg_len));
+ nla->nla_type = NLA_F_NESTED | IFLA_XDP;
+ nla->nla_len = NLA_HDRLEN;
+
+ /* add XDP fd */
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = IFLA_XDP_FD;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+ nla->nla_len += nla_xdp->nla_len;
+
+ /* if user passed in any flags, add those too */
+ if (flags) {
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = IFLA_XDP_FLAGS;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+ nla->nla_len += nla_xdp->nla_len;
+ }
+
+ req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+ ret = bpf_netlink_recv(sock, nl_pid, seq, NULL, NULL, NULL);
+
+cleanup:
+ close(sock);
+ return ret;
+}
+
+static int __dump_link_nlmsg(struct nlmsghdr *nlh,
+ libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+ struct nlattr *tb[IFLA_MAX + 1], *attr;
+ struct ifinfomsg *ifi = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
+ attr = (struct nlattr *) ((void *) ifi + NLMSG_ALIGN(sizeof(*ifi)));
+ if (libbpf_nla_parse(tb, IFLA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_link_nlmsg(cookie, ifi, tb);
+}
+
+static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)
+{
+ struct nlattr *xdp_tb[IFLA_XDP_MAX + 1];
+ struct xdp_id_md *xdp_id = cookie;
+ struct ifinfomsg *ifinfo = msg;
+ int ret;
+
+ if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index)
+ return 0;
+
+ if (!tb[IFLA_XDP])
+ return 0;
+
+ ret = libbpf_nla_parse_nested(xdp_tb, IFLA_XDP_MAX, tb[IFLA_XDP], NULL);
+ if (ret)
+ return ret;
+
+ if (!xdp_tb[IFLA_XDP_ATTACHED])
+ return 0;
+
+ xdp_id->info.attach_mode = libbpf_nla_getattr_u8(
+ xdp_tb[IFLA_XDP_ATTACHED]);
+
+ if (xdp_id->info.attach_mode == XDP_ATTACHED_NONE)
+ return 0;
+
+ if (xdp_tb[IFLA_XDP_PROG_ID])
+ xdp_id->info.prog_id = libbpf_nla_getattr_u32(
+ xdp_tb[IFLA_XDP_PROG_ID]);
+
+ if (xdp_tb[IFLA_XDP_SKB_PROG_ID])
+ xdp_id->info.skb_prog_id = libbpf_nla_getattr_u32(
+ xdp_tb[IFLA_XDP_SKB_PROG_ID]);
+
+ if (xdp_tb[IFLA_XDP_DRV_PROG_ID])
+ xdp_id->info.drv_prog_id = libbpf_nla_getattr_u32(
+ xdp_tb[IFLA_XDP_DRV_PROG_ID]);
+
+ if (xdp_tb[IFLA_XDP_HW_PROG_ID])
+ xdp_id->info.hw_prog_id = libbpf_nla_getattr_u32(
+ xdp_tb[IFLA_XDP_HW_PROG_ID]);
+
+ return 0;
+}
+
+int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
+ size_t info_size, __u32 flags)
+{
+ struct xdp_id_md xdp_id = {};
+ int sock, ret;
+ __u32 nl_pid;
+ __u32 mask;
+
+ if (flags & ~XDP_FLAGS_MASK || !info_size)
+ return -EINVAL;
+
+ /* Check whether the single {HW,DRV,SKB} mode is set */
+ flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
+ mask = flags - 1;
+ if (flags && flags & mask)
+ return -EINVAL;
+
+ sock = libbpf_netlink_open(&nl_pid);
+ if (sock < 0)
+ return sock;
+
+ xdp_id.ifindex = ifindex;
+ xdp_id.flags = flags;
+
+ ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id);
+ if (!ret) {
+ size_t sz = min(info_size, sizeof(xdp_id.info));
+
+ memcpy(info, &xdp_id.info, sz);
+ memset((void *) info + sz, 0, info_size - sz);
+ }
+
+ close(sock);
+ return ret;
+}
+
+static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
+{
+ if (info->attach_mode != XDP_ATTACHED_MULTI)
+ return info->prog_id;
+ if (flags & XDP_FLAGS_DRV_MODE)
+ return info->drv_prog_id;
+ if (flags & XDP_FLAGS_HW_MODE)
+ return info->hw_prog_id;
+ if (flags & XDP_FLAGS_SKB_MODE)
+ return info->skb_prog_id;
+
+ return 0;
+}
+
+int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
+{
+ struct xdp_link_info info;
+ int ret;
+
+ ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags);
+ if (!ret)
+ *prog_id = get_xdp_id(&info, flags);
+
+ return ret;
+}
+
+int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+ libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct ifinfomsg ifm;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+ .nlh.nlmsg_type = RTM_GETLINK,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .ifm.ifi_family = AF_PACKET,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return bpf_netlink_recv(sock, nl_pid, seq, __dump_link_nlmsg,
+ dump_link_nlmsg, cookie);
+}
+
+static int __dump_class_nlmsg(struct nlmsghdr *nlh,
+ libbpf_dump_nlmsg_t dump_class_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_class_nlmsg(cookie, t, tb);
+}
+
+int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
+ libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETTCLASS,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return bpf_netlink_recv(sock, nl_pid, seq, __dump_class_nlmsg,
+ dump_class_nlmsg, cookie);
+}
+
+static int __dump_qdisc_nlmsg(struct nlmsghdr *nlh,
+ libbpf_dump_nlmsg_t dump_qdisc_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_qdisc_nlmsg(cookie, t, tb);
+}
+
+int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+ libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETQDISC,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return bpf_netlink_recv(sock, nl_pid, seq, __dump_qdisc_nlmsg,
+ dump_qdisc_nlmsg, cookie);
+}
+
+static int __dump_filter_nlmsg(struct nlmsghdr *nlh,
+ libbpf_dump_nlmsg_t dump_filter_nlmsg,
+ void *cookie)
+{
+ struct nlattr *tb[TCA_MAX + 1], *attr;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len;
+
+ len = nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*t));
+ attr = (struct nlattr *) ((void *) t + NLMSG_ALIGN(sizeof(*t)));
+ if (libbpf_nla_parse(tb, TCA_MAX, attr, len, NULL) != 0)
+ return -LIBBPF_ERRNO__NLPARSE;
+
+ return dump_filter_nlmsg(cookie, t, tb);
+}
+
+int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+ libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct tcmsg t;
+ } req = {
+ .nlh.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg)),
+ .nlh.nlmsg_type = RTM_GETTFILTER,
+ .nlh.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+ .t.tcm_family = AF_UNSPEC,
+ .t.tcm_ifindex = ifindex,
+ .t.tcm_parent = handle,
+ };
+ int seq = time(NULL);
+
+ req.nlh.nlmsg_seq = seq;
+ if (send(sock, &req, req.nlh.nlmsg_len, 0) < 0)
+ return -errno;
+
+ return bpf_netlink_recv(sock, nl_pid, seq, __dump_filter_nlmsg,
+ dump_filter_nlmsg, cookie);
+}
diff --git a/src/contrib/libbpf/bpf/nlattr.c b/src/contrib/libbpf/bpf/nlattr.c
new file mode 100644
index 0000000..8db44bb
--- /dev/null
+++ b/src/contrib/libbpf/bpf/nlattr.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * NETLINK Netlink attributes
+ *
+ * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <errno.h>
+#include "nlattr.h"
+#include "libbpf_internal.h"
+#include <linux/rtnetlink.h>
+#include <string.h>
+#include <stdio.h>
+
+static uint16_t nla_attr_minlen[LIBBPF_NLA_TYPE_MAX+1] = {
+ [LIBBPF_NLA_U8] = sizeof(uint8_t),
+ [LIBBPF_NLA_U16] = sizeof(uint16_t),
+ [LIBBPF_NLA_U32] = sizeof(uint32_t),
+ [LIBBPF_NLA_U64] = sizeof(uint64_t),
+ [LIBBPF_NLA_STRING] = 1,
+ [LIBBPF_NLA_FLAG] = 0,
+};
+
+static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
+{
+ int totlen = NLA_ALIGN(nla->nla_len);
+
+ *remaining -= totlen;
+ return (struct nlattr *) ((char *) nla + totlen);
+}
+
+static int nla_ok(const struct nlattr *nla, int remaining)
+{
+ return remaining >= sizeof(*nla) &&
+ nla->nla_len >= sizeof(*nla) &&
+ nla->nla_len <= remaining;
+}
+
+static int nla_type(const struct nlattr *nla)
+{
+ return nla->nla_type & NLA_TYPE_MASK;
+}
+
+static int validate_nla(struct nlattr *nla, int maxtype,
+ struct libbpf_nla_policy *policy)
+{
+ struct libbpf_nla_policy *pt;
+ unsigned int minlen = 0;
+ int type = nla_type(nla);
+
+ if (type < 0 || type > maxtype)
+ return 0;
+
+ pt = &policy[type];
+
+ if (pt->type > LIBBPF_NLA_TYPE_MAX)
+ return 0;
+
+ if (pt->minlen)
+ minlen = pt->minlen;
+ else if (pt->type != LIBBPF_NLA_UNSPEC)
+ minlen = nla_attr_minlen[pt->type];
+
+ if (libbpf_nla_len(nla) < minlen)
+ return -1;
+
+ if (pt->maxlen && libbpf_nla_len(nla) > pt->maxlen)
+ return -1;
+
+ if (pt->type == LIBBPF_NLA_STRING) {
+ char *data = libbpf_nla_data(nla);
+
+ if (data[libbpf_nla_len(nla) - 1] != '\0')
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int nlmsg_len(const struct nlmsghdr *nlh)
+{
+ return nlh->nlmsg_len - NLMSG_HDRLEN;
+}
+
+/**
+ * Create attribute index based on a stream of attributes.
+ * @arg tb Index array to be filled (maxtype+1 elements).
+ * @arg maxtype Maximum attribute type expected and accepted.
+ * @arg head Head of attribute stream.
+ * @arg len Length of attribute stream.
+ * @arg policy Attribute validation policy.
+ *
+ * Iterates over the stream of attributes and stores a pointer to each
+ * attribute in the index array using the attribute type as index to
+ * the array. Attribute with a type greater than the maximum type
+ * specified will be silently ignored in order to maintain backwards
+ * compatibility. If \a policy is not NULL, the attribute will be
+ * validated using the specified policy.
+ *
+ * @see nla_validate
+ * @return 0 on success or a negative error code.
+ */
+int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,
+ int len, struct libbpf_nla_policy *policy)
+{
+ struct nlattr *nla;
+ int rem, err;
+
+ memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+
+ libbpf_nla_for_each_attr(nla, head, len, rem) {
+ int type = nla_type(nla);
+
+ if (type > maxtype)
+ continue;
+
+ if (policy) {
+ err = validate_nla(nla, maxtype, policy);
+ if (err < 0)
+ goto errout;
+ }
+
+ if (tb[type])
+ pr_warn("Attribute of type %#x found multiple times in message, "
+ "previous attribute is being ignored.\n", type);
+
+ tb[type] = nla;
+ }
+
+ err = 0;
+errout:
+ return err;
+}
+
+/**
+ * Create attribute index based on nested attribute
+ * @arg tb Index array to be filled (maxtype+1 elements).
+ * @arg maxtype Maximum attribute type expected and accepted.
+ * @arg nla Nested Attribute.
+ * @arg policy Attribute validation policy.
+ *
+ * Feeds the stream of attributes nested into the specified attribute
+ * to libbpf_nla_parse().
+ *
+ * @see libbpf_nla_parse
+ * @return 0 on success or a negative error code.
+ */
+int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype,
+ struct nlattr *nla,
+ struct libbpf_nla_policy *policy)
+{
+ return libbpf_nla_parse(tb, maxtype, libbpf_nla_data(nla),
+ libbpf_nla_len(nla), policy);
+}
+
+/* dump netlink extended ack error message */
+int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh)
+{
+ struct libbpf_nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = {
+ [NLMSGERR_ATTR_MSG] = { .type = LIBBPF_NLA_STRING },
+ [NLMSGERR_ATTR_OFFS] = { .type = LIBBPF_NLA_U32 },
+ };
+ struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr;
+ struct nlmsgerr *err;
+ char *errmsg = NULL;
+ int hlen, alen;
+
+ /* no TLVs, nothing to do here */
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return 0;
+
+ err = (struct nlmsgerr *)NLMSG_DATA(nlh);
+ hlen = sizeof(*err);
+
+ /* if NLM_F_CAPPED is set then the inner err msg was capped */
+ if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+ hlen += nlmsg_len(&err->msg);
+
+ attr = (struct nlattr *) ((void *) err + hlen);
+ alen = nlh->nlmsg_len - hlen;
+
+ if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen,
+ extack_policy) != 0) {
+ pr_warn("Failed to parse extended error attributes\n");
+ return 0;
+ }
+
+ if (tb[NLMSGERR_ATTR_MSG])
+ errmsg = (char *) libbpf_nla_data(tb[NLMSGERR_ATTR_MSG]);
+
+ pr_warn("Kernel error message: %s\n", errmsg);
+
+ return 0;
+}
diff --git a/src/contrib/libbpf/bpf/nlattr.h b/src/contrib/libbpf/bpf/nlattr.h
new file mode 100644
index 0000000..6cc3ac9
--- /dev/null
+++ b/src/contrib/libbpf/bpf/nlattr.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * NETLINK Netlink attributes
+ *
+ * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
+ */
+
+#ifndef __LIBBPF_NLATTR_H
+#define __LIBBPF_NLATTR_H
+
+#include <stdint.h>
+#include <linux/netlink.h>
+/* avoid multiple definition of netlink features */
+#define __LINUX_NETLINK_H
+
+/**
+ * Standard attribute types to specify validation policy
+ */
+enum {
+ LIBBPF_NLA_UNSPEC, /**< Unspecified type, binary data chunk */
+ LIBBPF_NLA_U8, /**< 8 bit integer */
+ LIBBPF_NLA_U16, /**< 16 bit integer */
+ LIBBPF_NLA_U32, /**< 32 bit integer */
+ LIBBPF_NLA_U64, /**< 64 bit integer */
+ LIBBPF_NLA_STRING, /**< NUL terminated character string */
+ LIBBPF_NLA_FLAG, /**< Flag */
+ LIBBPF_NLA_MSECS, /**< Micro seconds (64bit) */
+ LIBBPF_NLA_NESTED, /**< Nested attributes */
+ __LIBBPF_NLA_TYPE_MAX,
+};
+
+#define LIBBPF_NLA_TYPE_MAX (__LIBBPF_NLA_TYPE_MAX - 1)
+
+/**
+ * @ingroup attr
+ * Attribute validation policy.
+ *
+ * See section @core_doc{core_attr_parse,Attribute Parsing} for more details.
+ */
+struct libbpf_nla_policy {
+ /** Type of attribute or LIBBPF_NLA_UNSPEC */
+ uint16_t type;
+
+ /** Minimal length of payload required */
+ uint16_t minlen;
+
+ /** Maximal length of payload allowed */
+ uint16_t maxlen;
+};
+
+/**
+ * @ingroup attr
+ * Iterate over a stream of attributes
+ * @arg pos loop counter, set to current attribute
+ * @arg head head of attribute stream
+ * @arg len length of attribute stream
+ * @arg rem initialized to len, holds bytes currently remaining in stream
+ */
+#define libbpf_nla_for_each_attr(pos, head, len, rem) \
+ for (pos = head, rem = len; \
+ nla_ok(pos, rem); \
+ pos = nla_next(pos, &(rem)))
+
+/**
+ * libbpf_nla_data - head of payload
+ * @nla: netlink attribute
+ */
+static inline void *libbpf_nla_data(const struct nlattr *nla)
+{
+ return (char *) nla + NLA_HDRLEN;
+}
+
+static inline uint8_t libbpf_nla_getattr_u8(const struct nlattr *nla)
+{
+ return *(uint8_t *)libbpf_nla_data(nla);
+}
+
+static inline uint32_t libbpf_nla_getattr_u32(const struct nlattr *nla)
+{
+ return *(uint32_t *)libbpf_nla_data(nla);
+}
+
+static inline const char *libbpf_nla_getattr_str(const struct nlattr *nla)
+{
+ return (const char *)libbpf_nla_data(nla);
+}
+
+/**
+ * libbpf_nla_len - length of payload
+ * @nla: netlink attribute
+ */
+static inline int libbpf_nla_len(const struct nlattr *nla)
+{
+ return nla->nla_len - NLA_HDRLEN;
+}
+
+int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,
+ int len, struct libbpf_nla_policy *policy);
+int libbpf_nla_parse_nested(struct nlattr *tb[], int maxtype,
+ struct nlattr *nla,
+ struct libbpf_nla_policy *policy);
+
+int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh);
+
+#endif /* __LIBBPF_NLATTR_H */
diff --git a/src/contrib/libbpf/bpf/str_error.c b/src/contrib/libbpf/bpf/str_error.c
new file mode 100644
index 0000000..b8064ee
--- /dev/null
+++ b/src/contrib/libbpf/bpf/str_error.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+#undef _GNU_SOURCE
+#include <string.h>
+#include <stdio.h>
+#include "str_error.h"
+
+/*
+ * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl
+ * libc, while checking strerror_r() return to avoid having to check this in
+ * all places calling it.
+ */
+char *libbpf_strerror_r(int err, char *dst, int len)
+{
+ int ret = strerror_r(err < 0 ? -err : err, dst, len);
+ if (ret)
+ snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret);
+ return dst;
+}
diff --git a/src/contrib/libbpf/bpf/str_error.h b/src/contrib/libbpf/bpf/str_error.h
new file mode 100644
index 0000000..a139334
--- /dev/null
+++ b/src/contrib/libbpf/bpf/str_error.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __LIBBPF_STR_ERROR_H
+#define __LIBBPF_STR_ERROR_H
+
+char *libbpf_strerror_r(int err, char *dst, int len);
+#endif /* __LIBBPF_STR_ERROR_H */
diff --git a/src/contrib/libbpf/bpf/xsk.c b/src/contrib/libbpf/bpf/xsk.c
new file mode 100644
index 0000000..8e0ffa8
--- /dev/null
+++ b/src/contrib/libbpf/bpf/xsk.c
@@ -0,0 +1,797 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright(c) 2018 - 2019 Intel Corporation.
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <asm/barrier.h>
+#include <linux/compiler.h>
+#include <linux/ethtool.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_xdp.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "bpf.h"
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "xsk.h"
+
+#ifndef SOL_XDP
+ #define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+ #define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+ #define PF_XDP AF_XDP
+#endif
+
+struct xsk_umem {
+ struct xsk_ring_prod *fill;
+ struct xsk_ring_cons *comp;
+ char *umem_area;
+ struct xsk_umem_config config;
+ int fd;
+ int refcount;
+};
+
+struct xsk_socket {
+ struct xsk_ring_cons *rx;
+ struct xsk_ring_prod *tx;
+ __u64 outstanding_tx;
+ struct xsk_umem *umem;
+ struct xsk_socket_config config;
+ int fd;
+ int ifindex;
+ int prog_fd;
+ int xsks_map_fd;
+ __u32 queue_id;
+ char ifname[IFNAMSIZ];
+};
+
+struct xsk_nl_info {
+ bool xdp_prog_attached;
+ int ifindex;
+ int fd;
+};
+
+/* Up until and including Linux 5.3 */
+struct xdp_ring_offset_v1 {
+ __u64 producer;
+ __u64 consumer;
+ __u64 desc;
+};
+
+/* Up until and including Linux 5.3 */
+struct xdp_mmap_offsets_v1 {
+ struct xdp_ring_offset_v1 rx;
+ struct xdp_ring_offset_v1 tx;
+ struct xdp_ring_offset_v1 fr;
+ struct xdp_ring_offset_v1 cr;
+};
+
+int xsk_umem__fd(const struct xsk_umem *umem)
+{
+ return umem ? umem->fd : -EINVAL;
+}
+
+int xsk_socket__fd(const struct xsk_socket *xsk)
+{
+ return xsk ? xsk->fd : -EINVAL;
+}
+
+static bool xsk_page_aligned(void *buffer)
+{
+ unsigned long addr = (unsigned long)buffer;
+
+ return !(addr & (getpagesize() - 1));
+}
+
+static void xsk_set_umem_config(struct xsk_umem_config *cfg,
+ const struct xsk_umem_config *usr_cfg)
+{
+ if (!usr_cfg) {
+ cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+ cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+ cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
+ return;
+ }
+
+ cfg->fill_size = usr_cfg->fill_size;
+ cfg->comp_size = usr_cfg->comp_size;
+ cfg->frame_size = usr_cfg->frame_size;
+ cfg->frame_headroom = usr_cfg->frame_headroom;
+ cfg->flags = usr_cfg->flags;
+}
+
+static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
+ const struct xsk_socket_config *usr_cfg)
+{
+ if (!usr_cfg) {
+ cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg->libbpf_flags = 0;
+ cfg->xdp_flags = 0;
+ cfg->bind_flags = 0;
+ return 0;
+ }
+
+ if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)
+ return -EINVAL;
+
+ cfg->rx_size = usr_cfg->rx_size;
+ cfg->tx_size = usr_cfg->tx_size;
+ cfg->libbpf_flags = usr_cfg->libbpf_flags;
+ cfg->xdp_flags = usr_cfg->xdp_flags;
+ cfg->bind_flags = usr_cfg->bind_flags;
+
+ return 0;
+}
+
+static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off)
+{
+ struct xdp_mmap_offsets_v1 off_v1;
+
+ /* getsockopt on a kernel <= 5.3 has no flags fields.
+ * Copy over the offsets to the correct places in the >=5.4 format
+ * and put the flags where they would have been on that kernel.
+ */
+ memcpy(&off_v1, off, sizeof(off_v1));
+
+ off->rx.producer = off_v1.rx.producer;
+ off->rx.consumer = off_v1.rx.consumer;
+ off->rx.desc = off_v1.rx.desc;
+ off->rx.flags = off_v1.rx.consumer + sizeof(__u32);
+
+ off->tx.producer = off_v1.tx.producer;
+ off->tx.consumer = off_v1.tx.consumer;
+ off->tx.desc = off_v1.tx.desc;
+ off->tx.flags = off_v1.tx.consumer + sizeof(__u32);
+
+ off->fr.producer = off_v1.fr.producer;
+ off->fr.consumer = off_v1.fr.consumer;
+ off->fr.desc = off_v1.fr.desc;
+ off->fr.flags = off_v1.fr.consumer + sizeof(__u32);
+
+ off->cr.producer = off_v1.cr.producer;
+ off->cr.consumer = off_v1.cr.consumer;
+ off->cr.desc = off_v1.cr.desc;
+ off->cr.flags = off_v1.cr.consumer + sizeof(__u32);
+}
+
+static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
+{
+ socklen_t optlen;
+ int err;
+
+ optlen = sizeof(*off);
+ err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
+ if (err)
+ return err;
+
+ if (optlen == sizeof(*off))
+ return 0;
+
+ if (optlen == sizeof(struct xdp_mmap_offsets_v1)) {
+ xsk_mmap_offsets_v1(off);
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
+ __u64 size, struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *usr_config)
+{
+ struct xdp_mmap_offsets off;
+ struct xdp_umem_reg mr;
+ struct xsk_umem *umem;
+ void *map;
+ int err;
+
+ if (!umem_area || !umem_ptr || !fill || !comp)
+ return -EFAULT;
+ if (!size && !xsk_page_aligned(umem_area))
+ return -EINVAL;
+
+ umem = calloc(1, sizeof(*umem));
+ if (!umem)
+ return -ENOMEM;
+
+ umem->fd = socket(AF_XDP, SOCK_RAW, 0);
+ if (umem->fd < 0) {
+ err = -errno;
+ goto out_umem_alloc;
+ }
+
+ umem->umem_area = umem_area;
+ xsk_set_umem_config(&umem->config, usr_config);
+
+ memset(&mr, 0, sizeof(mr));
+ mr.addr = (uintptr_t)umem_area;
+ mr.len = size;
+ mr.chunk_size = umem->config.frame_size;
+ mr.headroom = umem->config.frame_headroom;
+ mr.flags = umem->config.flags;
+
+ err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+ err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING,
+ &umem->config.fill_size,
+ sizeof(umem->config.fill_size));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+ err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
+ &umem->config.comp_size,
+ sizeof(umem->config.comp_size));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
+ XDP_UMEM_PGOFF_FILL_RING);
+ if (map == MAP_FAILED) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ umem->fill = fill;
+ fill->mask = umem->config.fill_size - 1;
+ fill->size = umem->config.fill_size;
+ fill->producer = map + off.fr.producer;
+ fill->consumer = map + off.fr.consumer;
+ fill->flags = map + off.fr.flags;
+ fill->ring = map + off.fr.desc;
+ fill->cached_cons = umem->config.fill_size;
+
+ map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
+ XDP_UMEM_PGOFF_COMPLETION_RING);
+ if (map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap;
+ }
+
+ umem->comp = comp;
+ comp->mask = umem->config.comp_size - 1;
+ comp->size = umem->config.comp_size;
+ comp->producer = map + off.cr.producer;
+ comp->consumer = map + off.cr.consumer;
+ comp->flags = map + off.cr.flags;
+ comp->ring = map + off.cr.desc;
+
+ *umem_ptr = umem;
+ return 0;
+
+out_mmap:
+ munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
+out_socket:
+ close(umem->fd);
+out_umem_alloc:
+ free(umem);
+ return err;
+}
+
+struct xsk_umem_config_v1 {
+ __u32 fill_size;
+ __u32 comp_size;
+ __u32 frame_size;
+ __u32 frame_headroom;
+};
+
+int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
+ __u64 size, struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *usr_config)
+{
+ struct xsk_umem_config config;
+
+ memcpy(&config, usr_config, sizeof(struct xsk_umem_config_v1));
+ config.flags = 0;
+
+ return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
+ &config);
+}
+COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
+DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
+
+static int xsk_load_xdp_prog(struct xsk_socket *xsk)
+{
+ static const int log_buf_size = 16 * 1024;
+ char log_buf[log_buf_size];
+ int err, prog_fd;
+
+ /* This is the C-program:
+ * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
+ * {
+ * int ret, index = ctx->rx_queue_index;
+ *
+ * // A set entry here means that the correspnding queue_id
+ * // has an active AF_XDP socket bound to it.
+ * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS);
+ * if (ret > 0)
+ * return ret;
+ *
+ * // Fallback for pre-5.3 kernels, not supporting default
+ * // action in the flags parameter.
+ * if (bpf_map_lookup_elem(&xsks_map, &index))
+ * return bpf_redirect_map(&xsks_map, index, 0);
+ * return XDP_PASS;
+ * }
+ */
+ struct bpf_insn prog[] = {
+ /* r2 = *(u32 *)(r1 + 16) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16),
+ /* *(u32 *)(r10 - 4) = r2 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4),
+ /* r1 = xskmap[] */
+ BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ /* r3 = XDP_PASS */
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ /* call bpf_redirect_map */
+ BPF_EMIT_CALL(BPF_FUNC_redirect_map),
+ /* if w0 != 0 goto pc+13 */
+ BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13),
+ /* r2 = r10 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ /* r2 += -4 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ /* r1 = xskmap[] */
+ BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ /* call bpf_map_lookup_elem */
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ /* r1 = r0 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ /* r0 = XDP_PASS */
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ /* if r1 == 0 goto pc+5 */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),
+ /* r2 = *(u32 *)(r10 - 4) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4),
+ /* r1 = xskmap[] */
+ BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),
+ /* r3 = 0 */
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ /* call bpf_redirect_map */
+ BPF_EMIT_CALL(BPF_FUNC_redirect_map),
+ /* The jumps are to this instruction */
+ BPF_EXIT_INSN(),
+ };
+ size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+
+ prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt,
+ "LGPL-2.1 or BSD-2-Clause", 0, log_buf,
+ log_buf_size);
+ if (prog_fd < 0) {
+ pr_warn("BPF log buffer:\n%s", log_buf);
+ return prog_fd;
+ }
+
+ err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags);
+ if (err) {
+ close(prog_fd);
+ return err;
+ }
+
+ xsk->prog_fd = prog_fd;
+ return 0;
+}
+
+static int xsk_get_max_queues(struct xsk_socket *xsk)
+{
+ struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+ struct ifreq ifr = {};
+ int fd, err, ret;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ ifr.ifr_data = (void *)&channels;
+ memcpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ - 1);
+ ifr.ifr_name[IFNAMSIZ - 1] = '\0';
+ err = ioctl(fd, SIOCETHTOOL, &ifr);
+ if (err && errno != EOPNOTSUPP) {
+ ret = -errno;
+ goto out;
+ }
+
+ if (err) {
+ /* If the device says it has no channels, then all traffic
+ * is sent to a single stream, so max queues = 1.
+ */
+ ret = 1;
+ } else {
+ /* Take the max of rx, tx, combined. Drivers return
+ * the number of channels in different ways.
+ */
+ ret = max(channels.max_rx, channels.max_tx);
+ ret = max(ret, (int)channels.max_combined);
+ }
+
+out:
+ close(fd);
+ return ret;
+}
+
+static int xsk_create_bpf_maps(struct xsk_socket *xsk)
+{
+ int max_queues;
+ int fd;
+
+ max_queues = xsk_get_max_queues(xsk);
+ if (max_queues < 0)
+ return max_queues;
+
+ fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map",
+ sizeof(int), sizeof(int), max_queues, 0);
+ if (fd < 0)
+ return fd;
+
+ xsk->xsks_map_fd = fd;
+
+ return 0;
+}
+
+static void xsk_delete_bpf_maps(struct xsk_socket *xsk)
+{
+ bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
+ close(xsk->xsks_map_fd);
+}
+
+static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
+{
+ __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info);
+ __u32 map_len = sizeof(struct bpf_map_info);
+ struct bpf_prog_info prog_info = {};
+ struct bpf_map_info map_info;
+ int fd, err;
+
+ err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
+ if (err)
+ return err;
+
+ num_maps = prog_info.nr_map_ids;
+
+ map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids));
+ if (!map_ids)
+ return -ENOMEM;
+
+ memset(&prog_info, 0, prog_len);
+ prog_info.nr_map_ids = num_maps;
+ prog_info.map_ids = (__u64)(unsigned long)map_ids;
+
+ err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len);
+ if (err)
+ goto out_map_ids;
+
+ xsk->xsks_map_fd = -1;
+
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ fd = bpf_map_get_fd_by_id(map_ids[i]);
+ if (fd < 0)
+ continue;
+
+ err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
+ if (err) {
+ close(fd);
+ continue;
+ }
+
+ if (!strcmp(map_info.name, "xsks_map")) {
+ xsk->xsks_map_fd = fd;
+ continue;
+ }
+
+ close(fd);
+ }
+
+ err = 0;
+ if (xsk->xsks_map_fd == -1)
+ err = -ENOENT;
+
+out_map_ids:
+ free(map_ids);
+ return err;
+}
+
+static int xsk_set_bpf_maps(struct xsk_socket *xsk)
+{
+ return bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id,
+ &xsk->fd, 0);
+}
+
+static int xsk_setup_xdp_prog(struct xsk_socket *xsk)
+{
+ __u32 prog_id = 0;
+ int err;
+
+ err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id,
+ xsk->config.xdp_flags);
+ if (err)
+ return err;
+
+ if (!prog_id) {
+ err = xsk_create_bpf_maps(xsk);
+ if (err)
+ return err;
+
+ err = xsk_load_xdp_prog(xsk);
+ if (err) {
+ xsk_delete_bpf_maps(xsk);
+ return err;
+ }
+ } else {
+ xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (xsk->prog_fd < 0)
+ return -errno;
+ err = xsk_lookup_bpf_maps(xsk);
+ if (err) {
+ close(xsk->prog_fd);
+ return err;
+ }
+ }
+
+ if (xsk->rx)
+ err = xsk_set_bpf_maps(xsk);
+ if (err) {
+ xsk_delete_bpf_maps(xsk);
+ close(xsk->prog_fd);
+ return err;
+ }
+
+ return 0;
+}
+
+int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *usr_config)
+{
+ void *rx_map = NULL, *tx_map = NULL;
+ struct sockaddr_xdp sxdp = {};
+ struct xdp_mmap_offsets off;
+ struct xsk_socket *xsk;
+ int err;
+
+ if (!umem || !xsk_ptr || !(rx || tx))
+ return -EFAULT;
+
+ xsk = calloc(1, sizeof(*xsk));
+ if (!xsk)
+ return -ENOMEM;
+
+ err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
+ if (err)
+ goto out_xsk_alloc;
+
+ if (umem->refcount &&
+ !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
+ pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n");
+ err = -EBUSY;
+ goto out_xsk_alloc;
+ }
+
+ if (umem->refcount++ > 0) {
+ xsk->fd = socket(AF_XDP, SOCK_RAW, 0);
+ if (xsk->fd < 0) {
+ err = -errno;
+ goto out_xsk_alloc;
+ }
+ } else {
+ xsk->fd = umem->fd;
+ }
+
+ xsk->outstanding_tx = 0;
+ xsk->queue_id = queue_id;
+ xsk->umem = umem;
+ xsk->ifindex = if_nametoindex(ifname);
+ if (!xsk->ifindex) {
+ err = -errno;
+ goto out_socket;
+ }
+ memcpy(xsk->ifname, ifname, IFNAMSIZ - 1);
+ xsk->ifname[IFNAMSIZ - 1] = '\0';
+
+ if (rx) {
+ err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
+ &xsk->config.rx_size,
+ sizeof(xsk->config.rx_size));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+ }
+ if (tx) {
+ err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
+ &xsk->config.tx_size,
+ sizeof(xsk->config.tx_size));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+ }
+
+ err = xsk_get_mmap_offsets(xsk->fd, &off);
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ if (rx) {
+ rx_map = mmap(NULL, off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_RX_RING);
+ if (rx_map == MAP_FAILED) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ rx->mask = xsk->config.rx_size - 1;
+ rx->size = xsk->config.rx_size;
+ rx->producer = rx_map + off.rx.producer;
+ rx->consumer = rx_map + off.rx.consumer;
+ rx->flags = rx_map + off.rx.flags;
+ rx->ring = rx_map + off.rx.desc;
+ }
+ xsk->rx = rx;
+
+ if (tx) {
+ tx_map = mmap(NULL, off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_TX_RING);
+ if (tx_map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap_rx;
+ }
+
+ tx->mask = xsk->config.tx_size - 1;
+ tx->size = xsk->config.tx_size;
+ tx->producer = tx_map + off.tx.producer;
+ tx->consumer = tx_map + off.tx.consumer;
+ tx->flags = tx_map + off.tx.flags;
+ tx->ring = tx_map + off.tx.desc;
+ tx->cached_cons = xsk->config.tx_size;
+ }
+ xsk->tx = tx;
+
+ sxdp.sxdp_family = PF_XDP;
+ sxdp.sxdp_ifindex = xsk->ifindex;
+ sxdp.sxdp_queue_id = xsk->queue_id;
+ if (umem->refcount > 1) {
+ sxdp.sxdp_flags = XDP_SHARED_UMEM;
+ sxdp.sxdp_shared_umem_fd = umem->fd;
+ } else {
+ sxdp.sxdp_flags = xsk->config.bind_flags;
+ }
+
+ err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
+ if (err) {
+ err = -errno;
+ goto out_mmap_tx;
+ }
+
+ xsk->prog_fd = -1;
+
+ if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) {
+ err = xsk_setup_xdp_prog(xsk);
+ if (err)
+ goto out_mmap_tx;
+ }
+
+ *xsk_ptr = xsk;
+ return 0;
+
+out_mmap_tx:
+ if (tx)
+ munmap(tx_map, off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc));
+out_mmap_rx:
+ if (rx)
+ munmap(rx_map, off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc));
+out_socket:
+ if (--umem->refcount)
+ close(xsk->fd);
+out_xsk_alloc:
+ free(xsk);
+ return err;
+}
+
+int xsk_umem__delete(struct xsk_umem *umem)
+{
+ struct xdp_mmap_offsets off;
+ int err;
+
+ if (!umem)
+ return 0;
+
+ if (umem->refcount)
+ return -EBUSY;
+
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (!err) {
+ munmap(umem->fill->ring - off.fr.desc,
+ off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ munmap(umem->comp->ring - off.cr.desc,
+ off.cr.desc + umem->config.comp_size * sizeof(__u64));
+ }
+
+ close(umem->fd);
+ free(umem);
+
+ return 0;
+}
+
+void xsk_socket__delete(struct xsk_socket *xsk)
+{
+ size_t desc_sz = sizeof(struct xdp_desc);
+ struct xdp_mmap_offsets off;
+ int err;
+
+ if (!xsk)
+ return;
+
+ if (xsk->prog_fd != -1) {
+ xsk_delete_bpf_maps(xsk);
+ close(xsk->prog_fd);
+ }
+
+ err = xsk_get_mmap_offsets(xsk->fd, &off);
+ if (!err) {
+ if (xsk->rx) {
+ munmap(xsk->rx->ring - off.rx.desc,
+ off.rx.desc + xsk->config.rx_size * desc_sz);
+ }
+ if (xsk->tx) {
+ munmap(xsk->tx->ring - off.tx.desc,
+ off.tx.desc + xsk->config.tx_size * desc_sz);
+ }
+
+ }
+
+ xsk->umem->refcount--;
+ /* Do not close an fd that also has an associated umem connected
+ * to it.
+ */
+ if (xsk->fd != xsk->umem->fd)
+ close(xsk->fd);
+ free(xsk);
+}
diff --git a/src/contrib/libbpf/bpf/xsk.h b/src/contrib/libbpf/bpf/xsk.h
new file mode 100644
index 0000000..584f682
--- /dev/null
+++ b/src/contrib/libbpf/bpf/xsk.h
@@ -0,0 +1,246 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright(c) 2018 - 2019 Intel Corporation.
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#ifndef __LIBBPF_XSK_H
+#define __LIBBPF_XSK_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <linux/if_xdp.h>
+
+#include "libbpf.h"
+#include "libbpf_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Do not access these members directly. Use the functions below. */
+#define DEFINE_XSK_RING(name) \
+struct name { \
+ __u32 cached_prod; \
+ __u32 cached_cons; \
+ __u32 mask; \
+ __u32 size; \
+ __u32 *producer; \
+ __u32 *consumer; \
+ void *ring; \
+ __u32 *flags; \
+}
+
+DEFINE_XSK_RING(xsk_ring_prod);
+DEFINE_XSK_RING(xsk_ring_cons);
+
+/* For a detailed explanation on the memory barriers associated with the
+ * ring, please take a look at net/xdp/xsk_queue.h.
+ */
+
+struct xsk_umem;
+struct xsk_socket;
+
+static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill,
+ __u32 idx)
+{
+ __u64 *addrs = (__u64 *)fill->ring;
+
+ return &addrs[idx & fill->mask];
+}
+
+static inline const __u64 *
+xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx)
+{
+ const __u64 *addrs = (const __u64 *)comp->ring;
+
+ return &addrs[idx & comp->mask];
+}
+
+static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx,
+ __u32 idx)
+{
+ struct xdp_desc *descs = (struct xdp_desc *)tx->ring;
+
+ return &descs[idx & tx->mask];
+}
+
+static inline const struct xdp_desc *
+xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
+{
+ const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring;
+
+ return &descs[idx & rx->mask];
+}
+
+static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r)
+{
+ return *r->flags & XDP_RING_NEED_WAKEUP;
+}
+
+static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
+{
+ __u32 free_entries = r->cached_cons - r->cached_prod;
+
+ if (free_entries >= nb)
+ return free_entries;
+
+ /* Refresh the local tail pointer.
+ * cached_cons is r->size bigger than the real consumer pointer so
+ * that this addition can be avoided in the more frequently
+ * executed code that computs free_entries in the beginning of
+ * this function. Without this optimization it whould have been
+ * free_entries = r->cached_prod - r->cached_cons + r->size.
+ */
+ r->cached_cons = *r->consumer + r->size;
+
+ return r->cached_cons - r->cached_prod;
+}
+
+static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
+{
+ __u32 entries = r->cached_prod - r->cached_cons;
+
+ if (entries == 0) {
+ r->cached_prod = *r->producer;
+ entries = r->cached_prod - r->cached_cons;
+ }
+
+ return (entries > nb) ? nb : entries;
+}
+
+static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod,
+ size_t nb, __u32 *idx)
+{
+ if (xsk_prod_nb_free(prod, nb) < nb)
+ return 0;
+
+ *idx = prod->cached_prod;
+ prod->cached_prod += nb;
+
+ return nb;
+}
+
+static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb)
+{
+ /* Make sure everything has been written to the ring before indicating
+ * this to the kernel by writing the producer pointer.
+ */
+ libbpf_smp_wmb();
+
+ *prod->producer += nb;
+}
+
+static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons,
+ size_t nb, __u32 *idx)
+{
+ size_t entries = xsk_cons_nb_avail(cons, nb);
+
+ if (entries > 0) {
+ /* Make sure we do not speculatively read the data before
+ * we have received the packet buffers from the ring.
+ */
+ libbpf_smp_rmb();
+
+ *idx = cons->cached_cons;
+ cons->cached_cons += entries;
+ }
+
+ return entries;
+}
+
+static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb)
+{
+ /* Make sure data has been read before indicating we are done
+ * with the entries by updating the consumer pointer.
+ */
+ libbpf_smp_rwmb();
+
+ *cons->consumer += nb;
+}
+
+static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
+{
+ return &((char *)umem_area)[addr];
+}
+
+static inline __u64 xsk_umem__extract_addr(__u64 addr)
+{
+ return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
+}
+
+static inline __u64 xsk_umem__extract_offset(__u64 addr)
+{
+ return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+}
+
+static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr)
+{
+ return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr);
+}
+
+LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem);
+LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk);
+
+#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048
+#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048
+#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */
+#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
+#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
+#define XSK_UMEM__DEFAULT_FLAGS 0
+
+struct xsk_umem_config {
+ __u32 fill_size;
+ __u32 comp_size;
+ __u32 frame_size;
+ __u32 frame_headroom;
+ __u32 flags;
+};
+
+/* Flags for the libbpf_flags field. */
+#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0)
+
+struct xsk_socket_config {
+ __u32 rx_size;
+ __u32 tx_size;
+ __u32 libbpf_flags;
+ __u32 xdp_flags;
+ __u16 bind_flags;
+};
+
+/* Set config to NULL to get the default configuration. */
+LIBBPF_API int xsk_umem__create(struct xsk_umem **umem,
+ void *umem_area, __u64 size,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *config);
+LIBBPF_API int xsk_umem__create_v0_0_2(struct xsk_umem **umem,
+ void *umem_area, __u64 size,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *config);
+LIBBPF_API int xsk_umem__create_v0_0_4(struct xsk_umem **umem,
+ void *umem_area, __u64 size,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *config);
+LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk,
+ const char *ifname, __u32 queue_id,
+ struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *config);
+
+/* Returns 0 for success and -EBUSY if the umem is still in use. */
+LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem);
+LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_XSK_H */
diff --git a/src/contrib/libbpf/include/asm/barrier.h b/src/contrib/libbpf/include/asm/barrier.h
new file mode 100644
index 0000000..1fc6aee
--- /dev/null
+++ b/src/contrib/libbpf/include/asm/barrier.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __ASM_BARRIER_H
+#define __ASM_BARRIER_H
+
+#include <linux/compiler.h>
+
+#endif
diff --git a/src/contrib/libbpf/include/linux/compiler.h b/src/contrib/libbpf/include/linux/compiler.h
new file mode 100644
index 0000000..26336dc
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/compiler.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LINUX_COMPILER_H
+#define __LINUX_COMPILER_H
+
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+#define READ_ONCE(x) (*(volatile typeof(x) *)&x)
+#define WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v)
+
+#define barrier() asm volatile("" ::: "memory")
+
+#if defined(__x86_64__)
+
+# define smp_rmb() barrier()
+# define smp_wmb() barrier()
+# define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
+
+# define smp_store_release(p, v) \
+do { \
+ barrier(); \
+ WRITE_ONCE(*p, v); \
+} while (0)
+
+# define smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p = READ_ONCE(*p); \
+ barrier(); \
+ ___p; \
+})
+
+#elif defined(__aarch64__)
+
+# define smp_rmb() asm volatile("dmb ishld" ::: "memory")
+# define smp_wmb() asm volatile("dmb ishst" ::: "memory")
+# define smp_mb() asm volatile("dmb ish" ::: "memory")
+
+#endif
+
+#ifndef smp_mb
+# define smp_mb() __sync_synchronize()
+#endif
+
+#ifndef smp_rmb
+# define smp_rmb() smp_mb()
+#endif
+
+#ifndef smp_wmb
+# define smp_wmb() smp_mb()
+#endif
+
+#ifndef smp_store_release
+# define smp_store_release(p, v) \
+do { \
+ smp_mb(); \
+ WRITE_ONCE(*p, v); \
+} while (0)
+#endif
+
+#ifndef smp_load_acquire
+# define smp_load_acquire(p) \
+({ \
+ typeof(*p) ___p = READ_ONCE(*p); \
+ smp_mb(); \
+ ___p; \
+})
+#endif
+
+#endif /* __LINUX_COMPILER_H */
diff --git a/src/contrib/libbpf/include/linux/err.h b/src/contrib/libbpf/include/linux/err.h
new file mode 100644
index 0000000..1b1dafb
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/err.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LINUX_ERR_H
+#define __LINUX_ERR_H
+
+#include <linux/types.h>
+#include <asm/errno.h>
+
+#define MAX_ERRNO 4095
+
+#define IS_ERR_VALUE(x) ((x) >= (unsigned long)-MAX_ERRNO)
+
+static inline void * ERR_PTR(long error_)
+{
+ return (void *) error_;
+}
+
+static inline long PTR_ERR(const void *ptr)
+{
+ return (long) ptr;
+}
+
+static inline bool IS_ERR(const void *ptr)
+{
+ return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline bool IS_ERR_OR_NULL(const void *ptr)
+{
+ return (!ptr) || IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline long PTR_ERR_OR_ZERO(const void *ptr)
+{
+ return IS_ERR(ptr) ? PTR_ERR(ptr) : 0;
+}
+
+#endif
diff --git a/src/contrib/libbpf/include/linux/filter.h b/src/contrib/libbpf/include/linux/filter.h
new file mode 100644
index 0000000..b0700e2
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/filter.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LINUX_FILTER_H
+#define __LINUX_FILTER_H
+
+#include <linux/bpf.h>
+
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_MOV64_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_EMIT_CALL(FUNC) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_CALL, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((FUNC) - BPF_FUNC_unspec) })
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+#define BPF_MOV64_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_MOV32_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_LD_IMM64_RAW_FULL(DST, SRC, OFF1, OFF2, IMM1, IMM2) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF1, \
+ .imm = IMM1 }), \
+ ((struct bpf_insn) { \
+ .code = 0, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = OFF2, \
+ .imm = IMM2 })
+
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_FD, 0, 0, \
+ MAP_FD, 0)
+
+#define BPF_LD_MAP_VALUE(DST, MAP_FD, VALUE_OFF) \
+ BPF_LD_IMM64_RAW_FULL(DST, BPF_PSEUDO_MAP_VALUE, 0, 0, \
+ MAP_FD, VALUE_OFF)
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+#endif
diff --git a/src/contrib/libbpf/include/linux/kernel.h b/src/contrib/libbpf/include/linux/kernel.h
new file mode 100644
index 0000000..a4a7a9d
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/kernel.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LINUX_KERNEL_H
+#define __LINUX_KERNEL_H
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+#ifndef container_of
+#define container_of(ptr, type, member) ({ \
+ const typeof(((type *)0)->member) * __mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)); })
+#endif
+
+#ifndef max
+#define max(x, y) ({ \
+ typeof(x) _max1 = (x); \
+ typeof(y) _max2 = (y); \
+ (void) (&_max1 == &_max2); \
+ _max1 > _max2 ? _max1 : _max2; })
+#endif
+
+#ifndef min
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+#endif
+
+#ifndef roundup
+#define roundup(x, y) ( \
+{ \
+ const typeof(y) __y = y; \
+ (((x) + (__y - 1)) / __y) * __y; \
+} \
+)
+#endif
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define __KERNEL_DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+
+#endif
diff --git a/src/contrib/libbpf/include/linux/list.h b/src/contrib/libbpf/include/linux/list.h
new file mode 100644
index 0000000..e3814f7
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/list.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LINUX_LIST_H
+#define __LINUX_LIST_H
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+#define POISON_POINTER_DELTA 0
+#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA)
+#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA)
+
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+ list->next = list;
+ list->prev = list;
+}
+
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty() on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+static inline void __list_del_entry(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->next = LIST_POISON1;
+ entry->prev = LIST_POISON2;
+}
+
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+#define list_next_entry(pos, member) \
+ list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#endif
diff --git a/src/contrib/libbpf/include/linux/overflow.h b/src/contrib/libbpf/include/linux/overflow.h
new file mode 100644
index 0000000..53d7580
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/overflow.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LINUX_OVERFLOW_H
+#define __LINUX_OVERFLOW_H
+
+#define is_signed_type(type) (((type)(-1)) < (type)1)
+#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
+#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
+#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+
+#ifndef unlikely
+#define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
+
+#ifdef __GNUC__
+#define GCC_VERSION (__GNUC__ * 10000 \
+ + __GNUC_MINOR__ * 100 \
+ + __GNUC_PATCHLEVEL__)
+#if GCC_VERSION >= 50100
+#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
+#endif
+#endif
+
+#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
+
+#define check_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ __builtin_mul_overflow(__a, __b, __d); \
+})
+
+#else
+
+/*
+ * If one of a or b is a compile-time constant, this avoids a division.
+ */
+#define __unsigned_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = __a * __b; \
+ __builtin_constant_p(__b) ? \
+ __b > 0 && __a > type_max(typeof(__a)) / __b : \
+ __a > 0 && __b > type_max(typeof(__b)) / __a; \
+})
+
+/*
+ * Signed multiplication is rather hard. gcc always follows C99, so
+ * division is truncated towards 0. This means that we can write the
+ * overflow check like this:
+ *
+ * (a > 0 && (b > MAX/a || b < MIN/a)) ||
+ * (a < -1 && (b > MIN/a || b < MAX/a) ||
+ * (a == -1 && b == MIN)
+ *
+ * The redundant casts of -1 are to silence an annoying -Wtype-limits
+ * (included in -Wextra) warning: When the type is u8 or u16, the
+ * __b_c_e in check_mul_overflow obviously selects
+ * __unsigned_mul_overflow, but unfortunately gcc still parses this
+ * code and warns about the limited range of __b.
+ */
+
+#define __signed_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ typeof(a) __tmax = type_max(typeof(a)); \
+ typeof(a) __tmin = type_min(typeof(a)); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = (__u64)__a * (__u64)__b; \
+ (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \
+ (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \
+ (__b == (typeof(__b))-1 && __a == __tmin); \
+})
+
+#define check_mul_overflow(a, b, d) \
+ __builtin_choose_expr(is_signed_type(typeof(a)), \
+ __signed_mul_overflow(a, b, d), \
+ __unsigned_mul_overflow(a, b, d))
+
+
+#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
+
+#endif
diff --git a/src/contrib/libbpf/include/linux/ring_buffer.h b/src/contrib/libbpf/include/linux/ring_buffer.h
new file mode 100644
index 0000000..fc4677b
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/ring_buffer.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef _TOOLS_LINUX_RING_BUFFER_H_
+#define _TOOLS_LINUX_RING_BUFFER_H_
+
+#include <linux/compiler.h>
+
+static inline __u64 ring_buffer_read_head(struct perf_event_mmap_page *base)
+{
+ return smp_load_acquire(&base->data_head);
+}
+
+static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base,
+ __u64 tail)
+{
+ smp_store_release(&base->data_tail, tail);
+}
+
+#endif /* _TOOLS_LINUX_RING_BUFFER_H_ */
diff --git a/src/contrib/libbpf/include/linux/types.h b/src/contrib/libbpf/include/linux/types.h
new file mode 100644
index 0000000..bae1ed8
--- /dev/null
+++ b/src/contrib/libbpf/include/linux/types.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+#ifndef __LINUX_TYPES_H
+#define __LINUX_TYPES_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <asm/types.h>
+#include <asm/posix_types.h>
+
+#define __bitwise__
+#define __bitwise __bitwise__
+
+typedef __u16 __bitwise __le16;
+typedef __u16 __bitwise __be16;
+typedef __u32 __bitwise __le32;
+typedef __u32 __bitwise __be32;
+typedef __u64 __bitwise __le64;
+typedef __u64 __bitwise __be64;
+
+#ifndef __aligned_u64
+# define __aligned_u64 __u64 __attribute__((aligned(8)))
+#endif
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#endif
diff --git a/src/contrib/libbpf/include/uapi/linux/bpf.h b/src/contrib/libbpf/include/uapi/linux/bpf.h
new file mode 100644
index 0000000..dbbcf0b
--- /dev/null
+++ b/src/contrib/libbpf/include/uapi/linux/bpf.h
@@ -0,0 +1,3692 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_H__
+#define _UAPI__LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_JMP32 0x06 /* jmp mode in word width */
+#define BPF_ALU64 0x07 /* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW 0x18 /* double word (64-bit) */
+#define BPF_XADD 0xc0 /* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV 0xb0 /* mov reg to reg */
+#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END 0xd0 /* flags for endianness conversion: */
+#define BPF_TO_LE 0x00 /* convert to little-endian */
+#define BPF_TO_BE 0x08 /* convert to big-endian */
+#define BPF_FROM_LE BPF_TO_LE
+#define BPF_FROM_BE BPF_TO_BE
+
+/* jmp encodings */
+#define BPF_JNE 0x50 /* jump != */
+#define BPF_JLT 0xa0 /* LT is unsigned, '<' */
+#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */
+#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
+#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
+#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
+#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
+#define BPF_CALL 0x80 /* function call */
+#define BPF_EXIT 0x90 /* function return */
+
+/* Register numbers */
+enum {
+ BPF_REG_0 = 0,
+ BPF_REG_1,
+ BPF_REG_2,
+ BPF_REG_3,
+ BPF_REG_4,
+ BPF_REG_5,
+ BPF_REG_6,
+ BPF_REG_7,
+ BPF_REG_8,
+ BPF_REG_9,
+ BPF_REG_10,
+ __MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG __MAX_BPF_REG
+
+struct bpf_insn {
+ __u8 code; /* opcode */
+ __u8 dst_reg:4; /* dest register */
+ __u8 src_reg:4; /* source register */
+ __s16 off; /* signed offset */
+ __s32 imm; /* signed immediate constant */
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+struct bpf_lpm_trie_key {
+ __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
+ __u8 data[0]; /* Arbitrary size */
+};
+
+struct bpf_cgroup_storage_key {
+ __u64 cgroup_inode_id; /* cgroup inode id */
+ __u32 attach_type; /* program attach type */
+};
+
+/* BPF syscall commands, see bpf(2) man-page for details. */
+enum bpf_cmd {
+ BPF_MAP_CREATE,
+ BPF_MAP_LOOKUP_ELEM,
+ BPF_MAP_UPDATE_ELEM,
+ BPF_MAP_DELETE_ELEM,
+ BPF_MAP_GET_NEXT_KEY,
+ BPF_PROG_LOAD,
+ BPF_OBJ_PIN,
+ BPF_OBJ_GET,
+ BPF_PROG_ATTACH,
+ BPF_PROG_DETACH,
+ BPF_PROG_TEST_RUN,
+ BPF_PROG_GET_NEXT_ID,
+ BPF_MAP_GET_NEXT_ID,
+ BPF_PROG_GET_FD_BY_ID,
+ BPF_MAP_GET_FD_BY_ID,
+ BPF_OBJ_GET_INFO_BY_FD,
+ BPF_PROG_QUERY,
+ BPF_RAW_TRACEPOINT_OPEN,
+ BPF_BTF_LOAD,
+ BPF_BTF_GET_FD_BY_ID,
+ BPF_TASK_FD_QUERY,
+ BPF_MAP_LOOKUP_AND_DELETE_ELEM,
+ BPF_MAP_FREEZE,
+ BPF_BTF_GET_NEXT_ID,
+};
+
+enum bpf_map_type {
+ BPF_MAP_TYPE_UNSPEC,
+ BPF_MAP_TYPE_HASH,
+ BPF_MAP_TYPE_ARRAY,
+ BPF_MAP_TYPE_PROG_ARRAY,
+ BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ BPF_MAP_TYPE_PERCPU_HASH,
+ BPF_MAP_TYPE_PERCPU_ARRAY,
+ BPF_MAP_TYPE_STACK_TRACE,
+ BPF_MAP_TYPE_CGROUP_ARRAY,
+ BPF_MAP_TYPE_LRU_HASH,
+ BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ BPF_MAP_TYPE_LPM_TRIE,
+ BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ BPF_MAP_TYPE_HASH_OF_MAPS,
+ BPF_MAP_TYPE_DEVMAP,
+ BPF_MAP_TYPE_SOCKMAP,
+ BPF_MAP_TYPE_CPUMAP,
+ BPF_MAP_TYPE_XSKMAP,
+ BPF_MAP_TYPE_SOCKHASH,
+ BPF_MAP_TYPE_CGROUP_STORAGE,
+ BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ BPF_MAP_TYPE_QUEUE,
+ BPF_MAP_TYPE_STACK,
+ BPF_MAP_TYPE_SK_STORAGE,
+ BPF_MAP_TYPE_DEVMAP_HASH,
+};
+
+/* Note that tracing related programs such as
+ * BPF_PROG_TYPE_{KPROBE,TRACEPOINT,PERF_EVENT,RAW_TRACEPOINT}
+ * are not subject to a stable API since kernel internal data
+ * structures can change from release to release and may
+ * therefore break existing tracing BPF programs. Tracing BPF
+ * programs correspond to /a/ specific kernel which is to be
+ * analyzed, and not /a/ specific kernel /and/ all future ones.
+ */
+enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ BPF_PROG_TYPE_SCHED_CLS,
+ BPF_PROG_TYPE_SCHED_ACT,
+ BPF_PROG_TYPE_TRACEPOINT,
+ BPF_PROG_TYPE_XDP,
+ BPF_PROG_TYPE_PERF_EVENT,
+ BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_PROG_TYPE_LWT_IN,
+ BPF_PROG_TYPE_LWT_OUT,
+ BPF_PROG_TYPE_LWT_XMIT,
+ BPF_PROG_TYPE_SOCK_OPS,
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_CGROUP_DEVICE,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_RAW_TRACEPOINT,
+ BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_PROG_TYPE_LWT_SEG6LOCAL,
+ BPF_PROG_TYPE_LIRC_MODE2,
+ BPF_PROG_TYPE_SK_REUSEPORT,
+ BPF_PROG_TYPE_FLOW_DISSECTOR,
+ BPF_PROG_TYPE_CGROUP_SYSCTL,
+ BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
+ BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ BPF_PROG_TYPE_TRACING,
+};
+
+enum bpf_attach_type {
+ BPF_CGROUP_INET_INGRESS,
+ BPF_CGROUP_INET_EGRESS,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_SOCK_OPS,
+ BPF_SK_SKB_STREAM_PARSER,
+ BPF_SK_SKB_STREAM_VERDICT,
+ BPF_CGROUP_DEVICE,
+ BPF_SK_MSG_VERDICT,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_LIRC_MODE2,
+ BPF_FLOW_DISSECTOR,
+ BPF_CGROUP_SYSCTL,
+ BPF_CGROUP_UDP4_RECVMSG,
+ BPF_CGROUP_UDP6_RECVMSG,
+ BPF_CGROUP_GETSOCKOPT,
+ BPF_CGROUP_SETSOCKOPT,
+ BPF_TRACE_RAW_TP,
+ BPF_TRACE_FENTRY,
+ BPF_TRACE_FEXIT,
+ __MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
+/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
+ *
+ * NONE(default): No further bpf programs allowed in the subtree.
+ *
+ * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
+ * the program in this cgroup yields to sub-cgroup program.
+ *
+ * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
+ * that cgroup program gets run in addition to the program in this cgroup.
+ *
+ * Only one program is allowed to be attached to a cgroup with
+ * NONE or BPF_F_ALLOW_OVERRIDE flag.
+ * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
+ * release old program and attach the new one. Attach flags has to match.
+ *
+ * Multiple programs are allowed to be attached to a cgroup with
+ * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
+ * (those that were attached first, run first)
+ * The programs of sub-cgroup are executed first, then programs of
+ * this cgroup and then programs of parent cgroup.
+ * When children program makes decision (like picking TCP CA or sock bind)
+ * parent program has a chance to override it.
+ *
+ * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
+ * A cgroup with NONE doesn't allow any programs in sub-cgroups.
+ * Ex1:
+ * cgrp1 (MULTI progs A, B) ->
+ * cgrp2 (OVERRIDE prog C) ->
+ * cgrp3 (MULTI prog D) ->
+ * cgrp4 (OVERRIDE prog E) ->
+ * cgrp5 (NONE prog F)
+ * the event in cgrp5 triggers execution of F,D,A,B in that order.
+ * if prog F is detached, the execution is E,D,A,B
+ * if prog F and D are detached, the execution is E,A,B
+ * if prog F, E and D are detached, the execution is C,A,B
+ *
+ * All eligible programs are executed regardless of return code from
+ * earlier programs.
+ */
+#define BPF_F_ALLOW_OVERRIDE (1U << 0)
+#define BPF_F_ALLOW_MULTI (1U << 1)
+
+/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+ * verifier will perform strict alignment checking as if the kernel
+ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+ * and NET_IP_ALIGN defined to 2.
+ */
+#define BPF_F_STRICT_ALIGNMENT (1U << 0)
+
+/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the
+ * verifier will allow any alignment whatsoever. On platforms
+ * with strict alignment requirements for loads ands stores (such
+ * as sparc and mips) the verifier validates that all loads and
+ * stores provably follow this requirement. This flag turns that
+ * checking and enforcement off.
+ *
+ * It is mostly used for testing when we want to validate the
+ * context and memory access aspects of the verifier, but because
+ * of an unaligned access the alignment check would trigger before
+ * the one we are interested in.
+ */
+#define BPF_F_ANY_ALIGNMENT (1U << 1)
+
+/* BPF_F_TEST_RND_HI32 is used in BPF_PROG_LOAD command for testing purpose.
+ * Verifier does sub-register def/use analysis and identifies instructions whose
+ * def only matters for low 32-bit, high 32-bit is never referenced later
+ * through implicit zero extension. Therefore verifier notifies JIT back-ends
+ * that it is safe to ignore clearing high 32-bit for these instructions. This
+ * saves some back-ends a lot of code-gen. However such optimization is not
+ * necessary on some arches, for example x86_64, arm64 etc, whose JIT back-ends
+ * hence hasn't used verifier's analysis result. But, we really want to have a
+ * way to be able to verify the correctness of the described optimization on
+ * x86_64 on which testsuites are frequently exercised.
+ *
+ * So, this flag is introduced. Once it is set, verifier will randomize high
+ * 32-bit for those instructions who has been identified as safe to ignore them.
+ * Then, if verifier is not doing correct analysis, such randomization will
+ * regress tests to expose bugs.
+ */
+#define BPF_F_TEST_RND_HI32 (1U << 2)
+
+/* The verifier internal test flag. Behavior is undefined */
+#define BPF_F_TEST_STATE_FREQ (1U << 3)
+
+/* When BPF ldimm64's insn[0].src_reg != 0 then this can have
+ * two extensions:
+ *
+ * insn[0].src_reg: BPF_PSEUDO_MAP_FD BPF_PSEUDO_MAP_VALUE
+ * insn[0].imm: map fd map fd
+ * insn[1].imm: 0 offset into value
+ * insn[0].off: 0 0
+ * insn[1].off: 0 0
+ * ldimm64 rewrite: address of map address of map[0]+offset
+ * verifier type: CONST_PTR_TO_MAP PTR_TO_MAP_VALUE
+ */
+#define BPF_PSEUDO_MAP_FD 1
+#define BPF_PSEUDO_MAP_VALUE 2
+
+/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
+ * offset to another bpf function
+ */
+#define BPF_PSEUDO_CALL 1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY 0 /* create new element or update existing */
+#define BPF_NOEXIST 1 /* create new element if it didn't exist */
+#define BPF_EXIST 2 /* update existing element */
+#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
+
+/* flags for BPF_MAP_CREATE command */
+#define BPF_F_NO_PREALLOC (1U << 0)
+/* Instead of having one common LRU list in the
+ * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
+ * which can scale and perform better.
+ * Note, the LRU nodes (including free nodes) cannot be moved
+ * across different LRU lists.
+ */
+#define BPF_F_NO_COMMON_LRU (1U << 1)
+/* Specify numa node during map creation */
+#define BPF_F_NUMA_NODE (1U << 2)
+
+#define BPF_OBJ_NAME_LEN 16U
+
+/* Flags for accessing BPF object from syscall side. */
+#define BPF_F_RDONLY (1U << 3)
+#define BPF_F_WRONLY (1U << 4)
+
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID (1U << 5)
+
+/* Zero-initialize hash function seed. This should only be used for testing. */
+#define BPF_F_ZERO_SEED (1U << 6)
+
+/* Flags for accessing BPF object from program side. */
+#define BPF_F_RDONLY_PROG (1U << 7)
+#define BPF_F_WRONLY_PROG (1U << 8)
+
+/* Clone map from listener for newly accepted socket */
+#define BPF_F_CLONE (1U << 9)
+
+/* Enable memory-mapping BPF map */
+#define BPF_F_MMAPABLE (1U << 10)
+
+/* flags for BPF_PROG_QUERY */
+#define BPF_F_QUERY_EFFECTIVE (1U << 0)
+
+enum bpf_stack_build_id_status {
+ /* user space need an empty entry to identify end of a trace */
+ BPF_STACK_BUILD_ID_EMPTY = 0,
+ /* with valid build_id and offset */
+ BPF_STACK_BUILD_ID_VALID = 1,
+ /* couldn't get build_id, fallback to ip */
+ BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+ __s32 status;
+ unsigned char build_id[BPF_BUILD_ID_SIZE];
+ union {
+ __u64 offset;
+ __u64 ip;
+ };
+};
+
+union bpf_attr {
+ struct { /* anonymous struct used by BPF_MAP_CREATE command */
+ __u32 map_type; /* one of enum bpf_map_type */
+ __u32 key_size; /* size of key in bytes */
+ __u32 value_size; /* size of value in bytes */
+ __u32 max_entries; /* max number of entries in a map */
+ __u32 map_flags; /* BPF_MAP_CREATE related
+ * flags defined above.
+ */
+ __u32 inner_map_fd; /* fd pointing to the inner map */
+ __u32 numa_node; /* numa node (effective only if
+ * BPF_F_NUMA_NODE is set).
+ */
+ char map_name[BPF_OBJ_NAME_LEN];
+ __u32 map_ifindex; /* ifindex of netdev to create on */
+ __u32 btf_fd; /* fd pointing to a BTF type data */
+ __u32 btf_key_type_id; /* BTF type_id of the key */
+ __u32 btf_value_type_id; /* BTF type_id of the value */
+ };
+
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ __u32 map_fd;
+ __aligned_u64 key;
+ union {
+ __aligned_u64 value;
+ __aligned_u64 next_key;
+ };
+ __u64 flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_LOAD command */
+ __u32 prog_type; /* one of enum bpf_prog_type */
+ __u32 insn_cnt;
+ __aligned_u64 insns;
+ __aligned_u64 license;
+ __u32 log_level; /* verbosity level of verifier */
+ __u32 log_size; /* size of user buffer */
+ __aligned_u64 log_buf; /* user supplied buffer */
+ __u32 kern_version; /* not used */
+ __u32 prog_flags;
+ char prog_name[BPF_OBJ_NAME_LEN];
+ __u32 prog_ifindex; /* ifindex of netdev to prep for */
+ /* For some prog types expected attach type must be known at
+ * load time to verify attach type specific parts of prog
+ * (context accesses, allowed helpers, etc).
+ */
+ __u32 expected_attach_type;
+ __u32 prog_btf_fd; /* fd pointing to BTF type data */
+ __u32 func_info_rec_size; /* userspace bpf_func_info size */
+ __aligned_u64 func_info; /* func info */
+ __u32 func_info_cnt; /* number of bpf_func_info records */
+ __u32 line_info_rec_size; /* userspace bpf_line_info size */
+ __aligned_u64 line_info; /* line info */
+ __u32 line_info_cnt; /* number of bpf_line_info records */
+ __u32 attach_btf_id; /* in-kernel BTF type id to attach to */
+ __u32 attach_prog_fd; /* 0 to attach to vmlinux */
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_* commands */
+ __aligned_u64 pathname;
+ __u32 bpf_fd;
+ __u32 file_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+ __u32 target_fd; /* container object to attach to */
+ __u32 attach_bpf_fd; /* eBPF program to attach */
+ __u32 attach_type;
+ __u32 attach_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+ __u32 prog_fd;
+ __u32 retval;
+ __u32 data_size_in; /* input: len of data_in */
+ __u32 data_size_out; /* input/output: len of data_out
+ * returns ENOSPC if data_out
+ * is too small.
+ */
+ __aligned_u64 data_in;
+ __aligned_u64 data_out;
+ __u32 repeat;
+ __u32 duration;
+ __u32 ctx_size_in; /* input: len of ctx_in */
+ __u32 ctx_size_out; /* input/output: len of ctx_out
+ * returns ENOSPC if ctx_out
+ * is too small.
+ */
+ __aligned_u64 ctx_in;
+ __aligned_u64 ctx_out;
+ } test;
+
+ struct { /* anonymous struct used by BPF_*_GET_*_ID */
+ union {
+ __u32 start_id;
+ __u32 prog_id;
+ __u32 map_id;
+ __u32 btf_id;
+ };
+ __u32 next_id;
+ __u32 open_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
+ __u32 bpf_fd;
+ __u32 info_len;
+ __aligned_u64 info;
+ } info;
+
+ struct { /* anonymous struct used by BPF_PROG_QUERY command */
+ __u32 target_fd; /* container object to query */
+ __u32 attach_type;
+ __u32 query_flags;
+ __u32 attach_flags;
+ __aligned_u64 prog_ids;
+ __u32 prog_cnt;
+ } query;
+
+ struct {
+ __u64 name;
+ __u32 prog_fd;
+ } raw_tracepoint;
+
+ struct { /* anonymous struct for BPF_BTF_LOAD */
+ __aligned_u64 btf;
+ __aligned_u64 btf_log_buf;
+ __u32 btf_size;
+ __u32 btf_log_size;
+ __u32 btf_log_level;
+ };
+
+ struct {
+ __u32 pid; /* input: pid */
+ __u32 fd; /* input: fd */
+ __u32 flags; /* input: flags */
+ __u32 buf_len; /* input/output: buf len */
+ __aligned_u64 buf; /* input/output:
+ * tp_name for tracepoint
+ * symbol for kprobe
+ * filename for uprobe
+ */
+ __u32 prog_id; /* output: prod_id */
+ __u32 fd_type; /* output: BPF_FD_TYPE_* */
+ __u64 probe_offset; /* output: probe_offset */
+ __u64 probe_addr; /* output: probe_addr */
+ } task_fd_query;
+} __attribute__((aligned(8)));
+
+/* The description below is an attempt at providing documentation to eBPF
+ * developers about the multiple available eBPF helper functions. It can be
+ * parsed and used to produce a manual page. The workflow is the following,
+ * and requires the rst2man utility:
+ *
+ * $ ./scripts/bpf_helpers_doc.py \
+ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
+ * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
+ * $ man /tmp/bpf-helpers.7
+ *
+ * Note that in order to produce this external documentation, some RST
+ * formatting is used in the descriptions to get "bold" and "italics" in
+ * manual pages. Also note that the few trailing white spaces are
+ * intentional, removing them would break paragraphs for rst2man.
+ *
+ * Start of BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Perform a lookup in *map* for an entry associated to *key*.
+ * Return
+ * Map value associated to *key*, or **NULL** if no entry was
+ * found.
+ *
+ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * Description
+ * Add or update the value of the entry associated to *key* in
+ * *map* with *value*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * Flag value **BPF_NOEXIST** cannot be used for maps of types
+ * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all
+ * elements always exist), the helper would return an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Delete entry with *key* from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * For tracing programs, safely attempt to read *size* bytes from
+ * kernel space address *unsafe_ptr* and store the data in *dst*.
+ *
+ * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
+ * instead.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_ktime_get_ns(void)
+ * Description
+ * Return the time elapsed since system boot, in nanoseconds.
+ * Return
+ * Current *ktime*.
+ *
+ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * Description
+ * This helper is a "printk()-like" facility for debugging. It
+ * prints a message defined by format *fmt* (of size *fmt_size*)
+ * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * available. It can take up to three additional **u64**
+ * arguments (as an eBPF helpers, the total number of arguments is
+ * limited to five).
+ *
+ * Each time the helper is called, it appends a line to the trace.
+ * Lines are discarded while *\/sys/kernel/debug/tracing/trace* is
+ * open, use *\/sys/kernel/debug/tracing/trace_pipe* to avoid this.
+ * The format of the trace is customizable, and the exact output
+ * one will get depends on the options set in
+ * *\/sys/kernel/debug/tracing/trace_options* (see also the
+ * *README* file under the same directory). However, it usually
+ * defaults to something like:
+ *
+ * ::
+ *
+ * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ * In the above:
+ *
+ * * ``telnet`` is the name of the current task.
+ * * ``470`` is the PID of the current task.
+ * * ``001`` is the CPU number on which the task is
+ * running.
+ * * In ``.N..``, each character refers to a set of
+ * options (whether irqs are enabled, scheduling
+ * options, whether hard/softirqs are running, level of
+ * preempt_disabled respectively). **N** means that
+ * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ * are set.
+ * * ``419421.045894`` is a timestamp.
+ * * ``0x00000001`` is a fake value used by BPF for the
+ * instruction pointer register.
+ * * ``<formatted msg>`` is the message formatted with
+ * *fmt*.
+ *
+ * The conversion specifiers supported by *fmt* are similar, but
+ * more limited than for printk(). They are **%d**, **%i**,
+ * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ * of field, padding with zeroes, etc.) is available, and the
+ * helper will return **-EINVAL** (but print nothing) if it
+ * encounters an unknown specifier.
+ *
+ * Also, note that **bpf_trace_printk**\ () is slow, and should
+ * only be used for debugging purposes. For this reason, a notice
+ * bloc (spanning several lines) is printed to kernel logs and
+ * states that the helper should not be used "for production use"
+ * the first time this helper is used (or more precisely, when
+ * **trace_printk**\ () buffers are allocated). For passing values
+ * to user space, perf events should be preferred.
+ * Return
+ * The number of bytes written to the buffer, or a negative error
+ * in case of failure.
+ *
+ * u32 bpf_get_prandom_u32(void)
+ * Description
+ * Get a pseudo-random number.
+ *
+ * From a security point of view, this helper uses its own
+ * pseudo-random internal state, and cannot be used to infer the
+ * seed of other random functions in the kernel. However, it is
+ * essential to note that the generator used by the helper is not
+ * cryptographically secure.
+ * Return
+ * A random 32-bit unsigned value.
+ *
+ * u32 bpf_get_smp_processor_id(void)
+ * Description
+ * Get the SMP (symmetric multiprocessing) processor id. Note that
+ * all programs run with preemption disabled, which means that the
+ * SMP processor id is stable during all the execution of the
+ * program.
+ * Return
+ * The SMP id of the processor running the program.
+ *
+ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. *flags* are a combination of
+ * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ * checksum for the packet after storing the bytes) and
+ * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ * **->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * Description
+ * Recompute the layer 3 (e.g. IP) checksum for the packet
+ * associated to *skb*. Computation is incremental, so the helper
+ * must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored in *size*.
+ * Alternatively, it is possible to store the difference between
+ * the previous and the new values of the header field in *to*, by
+ * setting *from* and *size* to 0. For both methods, *offset*
+ * indicates the location of the IP checksum within the packet.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * Description
+ * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ * packet associated to *skb*. Computation is incremental, so the
+ * helper must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored on the lowest
+ * four bits of *flags*. Alternatively, it is possible to store
+ * the difference between the previous and the new values of the
+ * header field in *to*, by setting *from* and the four lowest
+ * bits of *flags* to 0. For both methods, *offset* indicates the
+ * location of the IP checksum within the packet. In addition to
+ * the size of the field, *flags* can be added (bitwise OR) actual
+ * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ * for updates resulting in a null checksum the value is set to
+ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ * the checksum is to be computed against a pseudo-header.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * Description
+ * This special helper is used to trigger a "tail call", or in
+ * other words, to jump into another eBPF program. The same stack
+ * frame is used (but values on stack and in registers for the
+ * caller are not accessible to the callee). This mechanism allows
+ * for program chaining, either for raising the maximum number of
+ * available eBPF instructions, or to execute given programs in
+ * conditional blocks. For security reasons, there is an upper
+ * limit to the number of successive tail calls that can be
+ * performed.
+ *
+ * Upon call of this helper, the program attempts to jump into a
+ * program referenced at index *index* in *prog_array_map*, a
+ * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ * *ctx*, a pointer to the context.
+ *
+ * If the call succeeds, the kernel immediately runs the first
+ * instruction of the new program. This is not a function call,
+ * and it never returns to the previous program. If the call
+ * fails, then the helper has no effect, and the caller continues
+ * to run its subsequent instructions. A call can fail if the
+ * destination program for the jump does not exist (i.e. *index*
+ * is superior to the number of entries in *prog_array_map*), or
+ * if the maximum number of tail calls has been reached for this
+ * chain of programs. This limit is defined in the kernel by the
+ * macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ * which is currently set to 32.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * Description
+ * Clone and redirect the packet associated to *skb* to another
+ * net device of index *ifindex*. Both ingress and egress
+ * interfaces can be used for redirection. The **BPF_F_INGRESS**
+ * value in *flags* is used to make the distinction (ingress path
+ * is selected if the flag is present, egress path otherwise).
+ * This is the only flag supported for now.
+ *
+ * In comparison with **bpf_redirect**\ () helper,
+ * **bpf_clone_redirect**\ () has the associated cost of
+ * duplicating the packet buffer, but this can be executed out of
+ * the eBPF program. Conversely, **bpf_redirect**\ () is more
+ * efficient, but it is handled through an action code where the
+ * redirection happens only after the eBPF program has returned.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_get_current_pid_tgid(void)
+ * Return
+ * A 64-bit integer containing the current tgid and pid, and
+ * created as such:
+ * *current_task*\ **->tgid << 32 \|**
+ * *current_task*\ **->pid**.
+ *
+ * u64 bpf_get_current_uid_gid(void)
+ * Return
+ * A 64-bit integer containing the current GID and UID, and
+ * created as such: *current_gid* **<< 32 \|** *current_uid*.
+ *
+ * int bpf_get_current_comm(void *buf, u32 size_of_buf)
+ * Description
+ * Copy the **comm** attribute of the current task into *buf* of
+ * *size_of_buf*. The **comm** attribute contains the name of
+ * the executable (excluding the path) for the current task. The
+ * *size_of_buf* must be strictly positive. On success, the
+ * helper makes sure that the *buf* is NUL-terminated. On failure,
+ * it is filled with zeroes.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
+ * Description
+ * Retrieve the classid for the current task, i.e. for the net_cls
+ * cgroup to which *skb* belongs.
+ *
+ * This helper can be used on TC egress path, but not on ingress.
+ *
+ * The net_cls cgroup provides an interface to tag network packets
+ * based on a user-provided identifier for all traffic coming from
+ * the tasks belonging to the related cgroup. See also the related
+ * kernel documentation, available from the Linux sources in file
+ * *Documentation/admin-guide/cgroup-v1/net_cls.rst*.
+ *
+ * The Linux kernel has two versions for cgroups: there are
+ * cgroups v1 and cgroups v2. Both are available to users, who can
+ * use a mixture of them, but note that the net_cls cgroup is for
+ * cgroup v1 only. This makes it incompatible with BPF programs
+ * run on cgroups, which is a cgroup-v2-only feature (a socket can
+ * only hold data for one version of cgroups at a time).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ * "**y**" or to "**m**".
+ * Return
+ * The classid, or 0 for the default unconfigured classid.
+ *
+ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * Description
+ * Push a *vlan_tci* (VLAN tag control information) of protocol
+ * *vlan_proto* to the packet associated to *skb*, then update
+ * the checksum. Note that if *vlan_proto* is different from
+ * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ * be **ETH_P_8021Q**.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * Description
+ * Pop a VLAN header from the packet associated to *skb*.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Get tunnel metadata. This helper takes a pointer *key* to an
+ * empty **struct bpf_tunnel_key** of **size**, that will be
+ * filled with tunnel metadata for the packet associated to *skb*.
+ * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ * indicates that the tunnel is based on IPv6 protocol instead of
+ * IPv4.
+ *
+ * The **struct bpf_tunnel_key** is an object that generalizes the
+ * principal parameters used by various tunneling protocols into a
+ * single struct. This way, it can be used to easily make a
+ * decision based on the contents of the encapsulation header,
+ * "summarized" in this struct. In particular, it holds the IP
+ * address of the remote end (IPv4 or IPv6, depending on the case)
+ * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ * this struct exposes the *key*\ **->tunnel_id**, which is
+ * generally mapped to a VNI (Virtual Network Identifier), making
+ * it programmable together with the **bpf_skb_set_tunnel_key**\
+ * () helper.
+ *
+ * Let's imagine that the following code is part of a program
+ * attached to the TC ingress interface, on one end of a GRE
+ * tunnel, and is supposed to filter out all messages coming from
+ * remote ends with IPv4 address other than 10.0.0.1:
+ *
+ * ::
+ *
+ * int ret;
+ * struct bpf_tunnel_key key = {};
+ *
+ * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ * if (ret < 0)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * if (key.remote_ipv4 != 0x0a000001)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * return TC_ACT_OK; // accept packet
+ *
+ * This interface can also be used with all encapsulation devices
+ * that can operate in "collect metadata" mode: instead of having
+ * one network device per specific configuration, the "collect
+ * metadata" mode only requires a single device where the
+ * configuration can be extracted from this helper.
+ *
+ * This can be used together with various tunnels such as VXLan,
+ * Geneve, GRE or IP in IP (IPIP).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Populate tunnel metadata for packet associated to *skb.* The
+ * tunnel metadata is set to the contents of *key*, of *size*. The
+ * *flags* can be set to a combination of the following values:
+ *
+ * **BPF_F_TUNINFO_IPV6**
+ * Indicate that the tunnel is based on IPv6 protocol
+ * instead of IPv4.
+ * **BPF_F_ZERO_CSUM_TX**
+ * For IPv4 packets, add a flag to tunnel metadata
+ * indicating that checksum computation should be skipped
+ * and checksum set to zeroes.
+ * **BPF_F_DONT_FRAGMENT**
+ * Add a flag to tunnel metadata indicating that the
+ * packet should not be fragmented.
+ * **BPF_F_SEQ_NUMBER**
+ * Add a flag to tunnel metadata indicating that a
+ * sequence number should be added to tunnel header before
+ * sending the packet. This flag was added for GRE
+ * encapsulation, but might be used with other protocols
+ * as well in the future.
+ *
+ * Here is a typical usage on the transmit path:
+ *
+ * ::
+ *
+ * struct bpf_tunnel_key key;
+ * populate key ...
+ * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ * See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
+ * Description
+ * Read the value of a perf event counter. This helper relies on a
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ * the perf event counter is selected when *map* is updated with
+ * perf event file descriptors. The *map* is an array whose size
+ * is the number of available CPUs, and each cell contains a value
+ * relative to one CPU. The value to retrieve is indicated by
+ * *flags*, that contains the index of the CPU to look up, masked
+ * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * Note that before Linux 4.13, only hardware perf event can be
+ * retrieved.
+ *
+ * Also, be aware that the newer helper
+ * **bpf_perf_event_read_value**\ () is recommended over
+ * **bpf_perf_event_read**\ () in general. The latter has some ABI
+ * quirks where error and counter value are used as a return code
+ * (which is wrong to do since ranges may overlap). This issue is
+ * fixed with **bpf_perf_event_read_value**\ (), which at the same
+ * time provides more features over the **bpf_perf_event_read**\
+ * () interface. Please refer to the description of
+ * **bpf_perf_event_read_value**\ () for details.
+ * Return
+ * The value of the perf event counter read from the map, or a
+ * negative error code in case of failure.
+ *
+ * int bpf_redirect(u32 ifindex, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*.
+ * This helper is somewhat similar to **bpf_clone_redirect**\
+ * (), except that the packet is not cloned, which provides
+ * increased performance.
+ *
+ * Except for XDP, both ingress and egress interfaces can be used
+ * for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ * to make the distinction (ingress path is selected if the flag
+ * is present, egress path otherwise). Currently, XDP only
+ * supports redirection to the egress interface, and accepts no
+ * flag at all.
+ *
+ * The same effect can be attained with the more generic
+ * **bpf_redirect_map**\ (), which requires specific maps to be
+ * used but offers better performance.
+ * Return
+ * For XDP, the helper returns **XDP_REDIRECT** on success or
+ * **XDP_ABORTED** on error. For other program types, the values
+ * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ * error.
+ *
+ * u32 bpf_get_route_realm(struct sk_buff *skb)
+ * Description
+ * Retrieve the realm or the route, that is to say the
+ * **tclassid** field of the destination for the *skb*. The
+ * indentifier retrieved is a user-provided tag, similar to the
+ * one used with the net_cls cgroup (see description for
+ * **bpf_get_cgroup_classid**\ () helper), but here this tag is
+ * held by a route (a destination entry), not by a task.
+ *
+ * Retrieving this identifier works with the clsact TC egress hook
+ * (see also **tc-bpf(8)**), or alternatively on conventional
+ * classful egress qdiscs, but not on TC ingress path. In case of
+ * clsact TC egress hook, this has the advantage that, internally,
+ * the destination entry has not been dropped yet in the transmit
+ * path. Therefore, the destination entry does not need to be
+ * artificially held via **netif_keep_dst**\ () for a classful
+ * qdisc until the *skb* is freed.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_IP_ROUTE_CLASSID** configuration option.
+ * Return
+ * The realm of the route for the packet associated to *skb*, or 0
+ * if none was found.
+ *
+ * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * The context of the program *ctx* needs also be passed to the
+ * helper.
+ *
+ * On user space, a program willing to read the values needs to
+ * call **perf_event_open**\ () on the perf event (either for
+ * one or for all CPUs) and to store the file descriptor into the
+ * *map*. This must be done before the eBPF program can send data
+ * into it. An example is available in file
+ * *samples/bpf/trace_output_user.c* in the Linux kernel source
+ * tree (the eBPF program counterpart is in
+ * *samples/bpf/trace_output_kern.c*).
+ *
+ * **bpf_perf_event_output**\ () achieves better performance
+ * than **bpf_trace_printk**\ () for sharing data with user
+ * space, and is much better suitable for streaming data from eBPF
+ * programs.
+ *
+ * Note that this helper is not restricted to tracing use cases
+ * and can be used with programs attached to TC or XDP as well,
+ * where it allows for passing data to user space listeners. Data
+ * can be:
+ *
+ * * Only custom structs,
+ * * Only the packet payload, or
+ * * A combination of both.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
+ * Description
+ * This helper was provided as an easy way to load data from a
+ * packet. It can be used to load *len* bytes from *offset* from
+ * the packet associated to *skb*, into the buffer pointed by
+ * *to*.
+ *
+ * Since Linux 4.7, usage of this helper has mostly been replaced
+ * by "direct packet access", enabling packet data to be
+ * manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ * pointing respectively to the first byte of packet data and to
+ * the byte after the last byte of packet data. However, it
+ * remains useful if one wishes to read large quantities of data
+ * at once from a packet into the eBPF stack.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
+ * Description
+ * Walk a user or a kernel stack and return its id. To achieve
+ * this, the helper needs *ctx*, which is a pointer to the context
+ * on which the tracing program is executed, and a pointer to a
+ * *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * a combination of the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_FAST_STACK_CMP**
+ * Compare stacks by hash only.
+ * **BPF_F_REUSE_STACKID**
+ * If two different stacks hash into the same *stackid*,
+ * discard the old one.
+ *
+ * The stack id retrieved is a 32 bit long integer handle which
+ * can be further combined with other data (including other stack
+ * ids) and used as a key into maps. This can be useful for
+ * generating a variety of graphs (such as flame graphs or off-cpu
+ * graphs).
+ *
+ * For walking a stack, this helper is an improvement over
+ * **bpf_probe_read**\ (), which can be used with unrolled loops
+ * but is not efficient and consumes a lot of eBPF instructions.
+ * Instead, **bpf_get_stackid**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * The positive or null stack id on success, or a negative error
+ * in case of failure.
+ *
+ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
+ * Description
+ * Compute a checksum difference, from the raw buffer pointed by
+ * *from*, of length *from_size* (that must be a multiple of 4),
+ * towards the raw buffer pointed by *to*, of size *to_size*
+ * (same remark). An optional *seed* can be added to the value
+ * (this can be cascaded, the seed may come from a previous call
+ * to the helper).
+ *
+ * This is flexible enough to be used in several ways:
+ *
+ * * With *from_size* == 0, *to_size* > 0 and *seed* set to
+ * checksum, it can be used when pushing new data.
+ * * With *from_size* > 0, *to_size* == 0 and *seed* set to
+ * checksum, it can be used when removing data from a packet.
+ * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ * can be used to compute a diff. Note that *from_size* and
+ * *to_size* do not need to be equal.
+ *
+ * This helper can be used in combination with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ * which one can feed in the difference computed with
+ * **bpf_csum_diff**\ ().
+ * Return
+ * The checksum result, or a negative error code in case of
+ * failure.
+ *
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
+ * Description
+ * Retrieve tunnel options metadata for the packet associated to
+ * *skb*, and store the raw tunnel option data to the buffer *opt*
+ * of *size*.
+ *
+ * This helper can be used with encapsulation devices that can
+ * operate in "collect metadata" mode (please refer to the related
+ * note in the description of **bpf_skb_get_tunnel_key**\ () for
+ * more details). A particular example where this can be used is
+ * in combination with the Geneve encapsulation protocol, where it
+ * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ * and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ * the eBPF program. This allows for full customization of these
+ * headers.
+ * Return
+ * The size of the option data retrieved.
+ *
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
+ * Description
+ * Set tunnel options metadata for the packet associated to *skb*
+ * to the option data contained in the raw buffer *opt* of *size*.
+ *
+ * See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * Description
+ * Change the protocol of the *skb* to *proto*. Currently
+ * supported are transition from IPv4 to IPv6, and from IPv6 to
+ * IPv4. The helper takes care of the groundwork for the
+ * transition, including resizing the socket buffer. The eBPF
+ * program is expected to fill the new headers, if any, via
+ * **skb_store_bytes**\ () and to recompute the checksums with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ * (). The main case for this helper is to perform NAT64
+ * operations out of an eBPF program.
+ *
+ * Internally, the GSO type is marked as dodgy so that headers are
+ * checked and segments are recalculated by the GSO/GRO engine.
+ * The size for GSO target is adapted as well.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * Description
+ * Change the packet type for the packet associated to *skb*. This
+ * comes down to setting *skb*\ **->pkt_type** to *type*, except
+ * the eBPF program does not have a write access to *skb*\
+ * **->pkt_type** beside this helper. Using a helper here allows
+ * for graceful handling of errors.
+ *
+ * The major use case is to change incoming *skb*s to
+ * **PACKET_HOST** in a programmatic way instead of having to
+ * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ * example.
+ *
+ * Note that *type* only allows certain values. At this time, they
+ * are:
+ *
+ * **PACKET_HOST**
+ * Packet is for us.
+ * **PACKET_BROADCAST**
+ * Send packet to all.
+ * **PACKET_MULTICAST**
+ * Send packet to group.
+ * **PACKET_OTHERHOST**
+ * Send packet to someone else.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * Description
+ * Check whether *skb* is a descendant of the cgroup2 held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* failed the cgroup2 descendant test.
+ * * 1, if the *skb* succeeded the cgroup2 descendant test.
+ * * A negative error code, if an error occurred.
+ *
+ * u32 bpf_get_hash_recalc(struct sk_buff *skb)
+ * Description
+ * Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ * not set, in particular if the hash was cleared due to mangling,
+ * recompute this hash. Later accesses to the hash can be done
+ * directly with *skb*\ **->hash**.
+ *
+ * Calling **bpf_set_hash_invalid**\ (), changing a packet
+ * prototype with **bpf_skb_change_proto**\ (), or calling
+ * **bpf_skb_store_bytes**\ () with the
+ * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ * the hash and to trigger a new computation for the next call to
+ * **bpf_get_hash_recalc**\ ().
+ * Return
+ * The 32-bit hash.
+ *
+ * u64 bpf_get_current_task(void)
+ * Return
+ * A pointer to the current task struct.
+ *
+ * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * Description
+ * Attempt in a safe way to write *len* bytes from the buffer
+ * *src* to *dst* in memory. It only works for threads that are in
+ * user context, and *dst* must be a valid user space address.
+ *
+ * This helper should not be used to implement any kind of
+ * security mechanism because of TOC-TOU attacks, but rather to
+ * debug, divert, and manipulate execution of semi-cooperative
+ * processes.
+ *
+ * Keep in mind that this feature is meant for experiments, and it
+ * has a risk of crashing the system and running programs.
+ * Therefore, when an eBPF program using this helper is attached,
+ * a warning including PID and process name is printed to kernel
+ * logs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * Description
+ * Check whether the probe is being run is the context of a given
+ * subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* task belongs to the cgroup2.
+ * * 1, if the *skb* task does not belong to the cgroup2.
+ * * A negative error code, if an error occurred.
+ *
+ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Resize (trim or grow) the packet associated to *skb* to the
+ * new *len*. The *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * The basic idea is that the helper performs the needed work to
+ * change the size of the packet, then the eBPF program rewrites
+ * the rest via helpers like **bpf_skb_store_bytes**\ (),
+ * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ * and others. This helper is a slow path utility intended for
+ * replies with control messages. And because it is targeted for
+ * slow path, the helper itself can afford to be slow: it
+ * implicitly linearizes, unclones and drops offloads from the
+ * *skb*.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * Description
+ * Pull in non-linear data in case the *skb* is non-linear and not
+ * all of *len* are part of the linear section. Make *len* bytes
+ * from *skb* readable and writable. If a zero value is passed for
+ * *len*, then the whole length of the *skb* is pulled.
+ *
+ * This helper is only needed for reading and writing with direct
+ * packet access.
+ *
+ * For direct packet access, testing that offsets to access
+ * are within packet boundaries (test on *skb*\ **->data_end**) is
+ * susceptible to fail if offsets are invalid, or if the requested
+ * data is in non-linear parts of the *skb*. On failure the
+ * program can just bail out, or in the case of a non-linear
+ * buffer, use a helper to make the data available. The
+ * **bpf_skb_load_bytes**\ () helper is a first solution to access
+ * the data. Another one consists in using **bpf_skb_pull_data**
+ * to pull in once the non-linear parts, then retesting and
+ * eventually access the data.
+ *
+ * At the same time, this also makes sure the *skb* is uncloned,
+ * which is a necessary condition for direct write. As this needs
+ * to be an invariant for the write part only, the verifier
+ * detects writes and adds a prologue that is calling
+ * **bpf_skb_pull_data()** to effectively unclone the *skb* from
+ * the very beginning in case it is indeed cloned.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
+ * Description
+ * Add the checksum *csum* into *skb*\ **->csum** in case the
+ * driver has supplied a checksum for the entire packet into that
+ * field. Return an error otherwise. This helper is intended to be
+ * used in combination with **bpf_csum_diff**\ (), in particular
+ * when the checksum needs to be updated after data has been
+ * written into the packet through direct packet access.
+ * Return
+ * The checksum on success, or a negative error code in case of
+ * failure.
+ *
+ * void bpf_set_hash_invalid(struct sk_buff *skb)
+ * Description
+ * Invalidate the current *skb*\ **->hash**. It can be used after
+ * mangling on headers through direct packet access, in order to
+ * indicate that the hash is outdated and to trigger a
+ * recalculation the next time the kernel tries to access this
+ * hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ * int bpf_get_numa_node_id(void)
+ * Description
+ * Return the id of the current NUMA node. The primary use case
+ * for this helper is the selection of sockets for the local NUMA
+ * node, when the program is attached to sockets using the
+ * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ * but the helper is also available to other eBPF program types,
+ * similarly to **bpf_get_smp_processor_id**\ ().
+ * Return
+ * The id of current NUMA node.
+ *
+ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Grows headroom of packet associated to *skb* and adjusts the
+ * offset of the MAC header accordingly, adding *len* bytes of
+ * space. It automatically extends and reallocates memory as
+ * required.
+ *
+ * This helper can be used on a layer 3 *skb* to push a MAC header
+ * for redirection into a layer 2 device.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ * it is possible to use a negative value for *delta*. This helper
+ * can be used to prepare the packet for pushing or popping
+ * headers.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Copy a NUL terminated string from an unsafe kernel address
+ * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
+ * more details.
+ *
+ * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
+ * instead.
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * u64 bpf_get_socket_cookie(struct sk_buff *skb)
+ * Description
+ * If the **struct sk_buff** pointed by *skb* has a known socket,
+ * retrieve the cookie (generated by the kernel) of this socket.
+ * If no cookie has been set yet, generate a new cookie. Once
+ * generated, the socket cookie remains stable for the life of the
+ * socket. This helper can be useful for monitoring per socket
+ * networking traffic statistics as it provides a global socket
+ * identifier that can be assumed unique.
+ * Return
+ * A 8-byte long non-decreasing number on success, or 0 if the
+ * socket field is missing inside *skb*.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_addr** context.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_ops** context.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
+ * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * Return
+ * The owner UID of the socket associated to *skb*. If the socket
+ * is **NULL**, or if it is not a full socket (i.e. if it is a
+ * time-wait or a request socket instead), **overflowuid** value
+ * is returned (note that **overflowuid** might also be the actual
+ * UID value for the socket).
+ *
+ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * Description
+ * Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ * to value *hash*.
+ * Return
+ * 0
+ *
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
+ * Description
+ * Emulate a call to **setsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **setsockopt(2)** for more information.
+ * The option value of length *optlen* is pointed by *optval*.
+ *
+ * This helper actually implements a subset of **setsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **SOL_SOCKET**, which supports the following *optname*\ s:
+ * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * * **IPPROTO_TCP**, which supports the following *optname*\ s:
+ * **TCP_CONGESTION**, **TCP_BPF_IW**,
+ * **TCP_BPF_SNDCWND_CLAMP**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
+ * Description
+ * Grow or shrink the room for data in the packet associated to
+ * *skb* by *len_diff*, and according to the selected *mode*.
+ *
+ * There are two supported modes at this time:
+ *
+ * * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
+ * (room space is added or removed below the layer 2 header).
+ *
+ * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ * (room space is added or removed below the layer 3 header).
+ *
+ * The following flags are supported at this time:
+ *
+ * * **BPF_F_ADJ_ROOM_FIXED_GSO**: Do not adjust gso_size.
+ * Adjusting mss in this way is not allowed for datagrams.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV4**,
+ * **BPF_F_ADJ_ROOM_ENCAP_L3_IPV6**:
+ * Any new space is reserved to hold a tunnel header.
+ * Configure skb offsets and other fields accordingly.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L4_GRE**,
+ * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP**:
+ * Use with ENCAP_L3 flags to further specify the tunnel type.
+ *
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2**\ (*len*):
+ * Use with ENCAP_L3/L4 flags to further specify the tunnel
+ * type; *len* is the length of the inner MAC header.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the endpoint referenced by *map* at
+ * index *key*. Depending on its type, this *map* can contain
+ * references to net devices (for forwarding packets through other
+ * ports), or to CPUs (for redirecting XDP frames to another CPU;
+ * but this is only implemented for native XDP (with driver
+ * support) as of this writing).
+ *
+ * The lower two bits of *flags* are used as the return code if
+ * the map lookup fails. This is so that the return value can be
+ * one of the XDP program return codes up to XDP_TX, as chosen by
+ * the caller. Any higher bits in the *flags* argument must be
+ * unset.
+ *
+ * When used to redirect packets to net devices, this helper
+ * provides a high performance increase over **bpf_redirect**\ ().
+ * This is due to various implementation details of the underlying
+ * mechanisms, one of which is the fact that **bpf_redirect_map**\
+ * () tries to send packet as a "bulk" to the device.
+ * Return
+ * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ *
+ * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Add an entry to, or update a *map* referencing sockets. The
+ * *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ * *delta* (which can be positive or negative). Note that this
+ * operation modifies the address stored in *xdp_md*\ **->data**,
+ * so the latter must be loaded only after the helper has been
+ * called.
+ *
+ * The use of *xdp_md*\ **->data_meta** is optional and programs
+ * are not required to use it. The rationale is that when the
+ * packet is processed with XDP (e.g. as DoS filter), it is
+ * possible to push further meta data along with it before passing
+ * to the stack, and to give the guarantee that an ingress eBPF
+ * program attached as a TC classifier on the same device can pick
+ * this up for further post-processing. Since TC works with socket
+ * buffers, it remains possible to set from XDP the **mark** or
+ * **priority** pointers, or other pointers for the socket buffer.
+ * Having this scratch space generic and programmable allows for
+ * more flexibility as the user is free to store whatever meta
+ * data they need.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * Read the value of a perf event counter, and store it into *buf*
+ * of size *buf_size*. This helper relies on a *map* of type
+ * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ * counter is selected when *map* is updated with perf event file
+ * descriptors. The *map* is an array whose size is the number of
+ * available CPUs, and each cell contains a value relative to one
+ * CPU. The value to retrieve is indicated by *flags*, that
+ * contains the index of the CPU to look up, masked with
+ * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * This helper behaves in a way close to
+ * **bpf_perf_event_read**\ () helper, save that instead of
+ * just returning the value observed, it fills the *buf*
+ * structure. This allows for additional data to be retrieved: in
+ * particular, the enabled and running times (in *buf*\
+ * **->enabled** and *buf*\ **->running**, respectively) are
+ * copied. In general, **bpf_perf_event_read_value**\ () is
+ * recommended over **bpf_perf_event_read**\ (), which has some
+ * ABI issues and provides fewer functionalities.
+ *
+ * These values are interesting, because hardware PMU (Performance
+ * Monitoring Unit) counters are limited resources. When there are
+ * more PMU based perf events opened than available counters,
+ * kernel will multiplex these events so each event gets certain
+ * percentage (but not all) of the PMU time. In case that
+ * multiplexing happens, the number of samples or counter value
+ * will not reflect the case compared to when no multiplexing
+ * occurs. This makes comparison between different runs difficult.
+ * Typically, the counter value should be normalized before
+ * comparing to other experiments. The usual normalization is done
+ * as follows.
+ *
+ * ::
+ *
+ * normalized_counter = counter * t_enabled / t_running
+ *
+ * Where t_enabled is the time enabled for event and t_running is
+ * the time running for event since last normalization. The
+ * enabled and running times are accumulated since the perf event
+ * open. To achieve scaling factor between two invocations of an
+ * eBPF program, users can can use CPU id as the key (which is
+ * typical for perf array usage model) to remember the previous
+ * value and do the calculation inside the eBPF program.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * For en eBPF program attached to a perf event, retrieve the
+ * value of the event counter associated to *ctx* and store it in
+ * the structure pointed by *buf* and of size *buf_size*. Enabled
+ * and running times are also stored in the structure (see
+ * description of helper **bpf_perf_event_read_value**\ () for
+ * more details).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
+ * Description
+ * Emulate a call to **getsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **getsockopt(2)** for more information.
+ * The retrieved value is stored in the structure pointed by
+ * *opval* and of length *optlen*.
+ *
+ * This helper actually implements a subset of **getsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **IPPROTO_TCP**, which supports *optname*
+ * **TCP_CONGESTION**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_override_return(struct pt_regs *regs, u64 rc)
+ * Description
+ * Used for error injection, this helper uses kprobes to override
+ * the return value of the probed function, and to set it to *rc*.
+ * The first argument is the context *regs* on which the kprobe
+ * works.
+ *
+ * This helper works by setting setting the PC (program counter)
+ * to an override function which is run in place of the original
+ * probed function. This means the probed function is not run at
+ * all. The replacement function just returns with the required
+ * value.
+ *
+ * This helper has security implications, and thus is subject to
+ * restrictions. It is only available if the kernel was compiled
+ * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ * option, and in this case it only works on functions tagged with
+ * **ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ * Also, the helper is only available for the architectures having
+ * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ * x86 architecture is the only one to support this feature.
+ * Return
+ * 0
+ *
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ * Description
+ * Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ * for the full TCP socket associated to *bpf_sock_ops* to
+ * *argval*.
+ *
+ * The primary use of this field is to determine if there should
+ * be calls to eBPF programs of type
+ * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ * code. A program of the same type can change its value, per
+ * connection and as necessary, when the connection is
+ * established. This field is directly accessible for reading, but
+ * this helper must be used for updates in order to return an
+ * error if an eBPF program tries to set a callback that is not
+ * supported in the current kernel.
+ *
+ * *argval* is a flag array which can combine these flags:
+ *
+ * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ * * **BPF_SOCK_OPS_RTT_CB_FLAG** (every RTT)
+ *
+ * Therefore, this function can be used to clear a callback flag by
+ * setting the appropriate bit to zero. e.g. to disable the RTO
+ * callback:
+ *
+ * **bpf_sock_ops_cb_flags_set(bpf_sock,**
+ * **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)**
+ *
+ * Here are some examples of where one could call such eBPF
+ * program:
+ *
+ * * When RTO fires.
+ * * When a packet is retransmitted.
+ * * When the connection terminates.
+ * * When a packet is sent.
+ * * When a packet is received.
+ * Return
+ * Code **-EINVAL** if the socket is not a full TCP socket;
+ * otherwise, a positive number containing the bits that could not
+ * be set is returned (which comes down to 0 if all bits were set
+ * as required).
+ *
+ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, apply the verdict of the eBPF program to
+ * the next *bytes* (number of bytes) of message *msg*.
+ *
+ * For example, this helper can be used in the following cases:
+ *
+ * * A single **sendmsg**\ () or **sendfile**\ () system call
+ * contains multiple logical messages that the eBPF program is
+ * supposed to read and for which it should apply a verdict.
+ * * An eBPF program only cares to read the first *bytes* of a
+ * *msg*. If the message has a large payload, then setting up
+ * and calling the eBPF program repeatedly for all bytes, even
+ * though the verdict is already known, would create unnecessary
+ * overhead.
+ *
+ * When called from within an eBPF program, the helper sets a
+ * counter internal to the BPF infrastructure, that is used to
+ * apply the last verdict to the next *bytes*. If *bytes* is
+ * smaller than the current data being processed from a
+ * **sendmsg**\ () or **sendfile**\ () system call, the first
+ * *bytes* will be sent and the eBPF program will be re-run with
+ * the pointer for start of data pointing to byte number *bytes*
+ * **+ 1**. If *bytes* is larger than the current data being
+ * processed, then the eBPF verdict will be applied to multiple
+ * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ * consumed.
+ *
+ * Note that if a socket closes with the internal counter holding
+ * a non-zero value, this is not a problem because data is not
+ * being buffered for *bytes* and is sent as it is received.
+ * Return
+ * 0
+ *
+ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, prevent the execution of the verdict eBPF
+ * program for message *msg* until *bytes* (byte number) have been
+ * accumulated.
+ *
+ * This can be used when one needs a specific number of bytes
+ * before a verdict can be assigned, even if the data spans
+ * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ * case would be a user calling **sendmsg**\ () repeatedly with
+ * 1-byte long message segments. Obviously, this is bad for
+ * performance, but it is still valid. If the eBPF program needs
+ * *bytes* bytes to validate a header, this helper can be used to
+ * prevent the eBPF program to be called again until *bytes* have
+ * been accumulated.
+ * Return
+ * 0
+ *
+ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * Description
+ * For socket policies, pull in non-linear data from user space
+ * for *msg* and set pointers *msg*\ **->data** and *msg*\
+ * **->data_end** to *start* and *end* bytes offsets into *msg*,
+ * respectively.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it can only parse data that the (**data**, **data_end**)
+ * pointers have already consumed. For **sendmsg**\ () hooks this
+ * is likely the first scatterlist element. But for calls relying
+ * on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ * be the range (**0**, **0**) because the data is shared with
+ * user space and by default the objective is to avoid allowing
+ * user space to modify data while (or after) eBPF verdict is
+ * being decided. This helper can be used to pull in data and to
+ * set the start and end pointer to given values. Data will be
+ * copied if necessary (i.e. if data was not linear and if start
+ * and end pointers do not point to the same chunk).
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ * Description
+ * Bind the socket associated to *ctx* to the address pointed by
+ * *addr*, of length *addr_len*. This allows for making outgoing
+ * connection from the desired IP address, which can be useful for
+ * example when all processes inside a cgroup should use one
+ * single IP address on a host that has multiple IP configured.
+ *
+ * This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ * domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ * **AF_INET6**). Looking for a free port to bind to can be
+ * expensive, therefore binding to port is not permitted by the
+ * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ * must be set to zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ * only possible to shrink the packet as of this writing,
+ * therefore *delta* must be a negative integer.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * Description
+ * Retrieve the XFRM state (IP transform framework, see also
+ * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ * The retrieved value is stored in the **struct bpf_xfrm_state**
+ * pointed by *xfrm_state* and of length *size*.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_XFRM** configuration option.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
+ * Description
+ * Return a user or a kernel stack in bpf program provided buffer.
+ * To achieve this, the helper needs *ctx*, which is a pointer
+ * to the context on which the tracing program is executed.
+ * To store the stacktrace, the bpf program provides *buf* with
+ * a nonnegative *size*.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_USER_BUILD_ID**
+ * Collect buildid+offset instead of ips for user stack,
+ * only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * **bpf_get_stack**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * to sufficient large buffer size. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * A non-negative value equal to or less than *size* on success,
+ * or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * Description
+ * This helper is similar to **bpf_skb_load_bytes**\ () in that
+ * it provides an easy way to load *len* bytes from *offset*
+ * from the packet associated to *skb*, into the buffer pointed
+ * by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+ * a fifth argument *start_header* exists in order to select a
+ * base offset to start from. *start_header* can be one of:
+ *
+ * **BPF_HDR_START_MAC**
+ * Base offset to load data from is *skb*'s mac header.
+ * **BPF_HDR_START_NET**
+ * Base offset to load data from is *skb*'s network header.
+ *
+ * In general, "direct packet access" is the preferred method to
+ * access packet data, however, this helper is in particular useful
+ * in socket filters where *skb*\ **->data** does not always point
+ * to the start of the mac header and where "direct packet access"
+ * is not available.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ * Description
+ * Do FIB lookup in kernel tables using parameters in *params*.
+ * If lookup is successful and result shows packet is to be
+ * forwarded, the neighbor tables are searched for the nexthop.
+ * If successful (ie., FIB lookup shows forwarding and nexthop
+ * is resolved), the nexthop address is returned in ipv4_dst
+ * or ipv6_dst based on family, smac is set to mac address of
+ * egress device, dmac is set to nexthop mac address, rt_metric
+ * is set to metric from route (IPv4/IPv6 only), and ifindex
+ * is set to the device index of the nexthop from the FIB lookup.
+ *
+ * *plen* argument is the size of the passed in struct.
+ * *flags* argument can be a combination of one or more of the
+ * following values:
+ *
+ * **BPF_FIB_LOOKUP_DIRECT**
+ * Do a direct table lookup vs full lookup using FIB
+ * rules.
+ * **BPF_FIB_LOOKUP_OUTPUT**
+ * Perform lookup from an egress perspective (default is
+ * ingress).
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** tc cls_act programs.
+ * Return
+ * * < 0 if any input argument is invalid
+ * * 0 on success (packet is forwarded, nexthop neighbor exists)
+ * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
+ * packet is not forwarded or needs assist from full stack
+ *
+ * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Add an entry to, or update a sockhash *map* referencing sockets.
+ * The *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
+ * if the verdeict eBPF program returns **SK_PASS**), redirect it
+ * to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ * Description
+ * Encapsulate the packet associated to *skb* within a Layer 3
+ * protocol header. This header is provided in the buffer at
+ * address *hdr*, with *len* its size in bytes. *type* indicates
+ * the protocol of the header and can be one of:
+ *
+ * **BPF_LWT_ENCAP_SEG6**
+ * IPv6 encapsulation with Segment Routing Header
+ * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ * the IPv6 header is computed by the kernel.
+ * **BPF_LWT_ENCAP_SEG6_INLINE**
+ * Only works if *skb* contains an IPv6 packet. Insert a
+ * Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ * the IPv6 header.
+ * **BPF_LWT_ENCAP_IP**
+ * IP encapsulation (GRE/GUE/IPIP/etc). The outer header
+ * must be IPv4 or IPv6, followed by zero or more
+ * additional headers, up to **LWT_BPF_MAX_HEADROOM**
+ * total bytes in all prepended headers. Please note that
+ * if **skb_is_gso**\ (*skb*) is true, no more than two
+ * headers can be prepended, and the inner header, if
+ * present, should be either GRE or UDP/GUE.
+ *
+ * **BPF_LWT_ENCAP_SEG6**\ \* types can be called by BPF programs
+ * of type **BPF_PROG_TYPE_LWT_IN**; **BPF_LWT_ENCAP_IP** type can
+ * be called by bpf programs of types **BPF_PROG_TYPE_LWT_IN** and
+ * **BPF_PROG_TYPE_LWT_XMIT**.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ * inside the outermost IPv6 Segment Routing Header can be
+ * modified through this helper.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ * Description
+ * Adjust the size allocated to TLVs in the outermost IPv6
+ * Segment Routing Header contained in the packet associated to
+ * *skb*, at position *offset* by *delta* bytes. Only offsets
+ * after the segments are accepted. *delta* can be as well
+ * positive (growing) as negative (shrinking).
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ * Description
+ * Apply an IPv6 Segment Routing action of type *action* to the
+ * packet associated to *skb*. Each action takes a parameter
+ * contained at address *param*, and of length *param_len* bytes.
+ * *action* can be one of:
+ *
+ * **SEG6_LOCAL_ACTION_END_X**
+ * End.X action: Endpoint with Layer-3 cross-connect.
+ * Type of *param*: **struct in6_addr**.
+ * **SEG6_LOCAL_ACTION_END_T**
+ * End.T action: Endpoint with specific IPv6 table lookup.
+ * Type of *param*: **int**.
+ * **SEG6_LOCAL_ACTION_END_B6**
+ * End.B6 action: Endpoint bound to an SRv6 policy.
+ * Type of *param*: **struct ipv6_sr_hdr**.
+ * **SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ * End.B6.Encap action: Endpoint bound to an SRv6
+ * encapsulation policy.
+ * Type of *param*: **struct ipv6_sr_hdr**.
+ *
+ * A call to this helper is susceptible to change the underlying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_repeat(void *ctx)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded repeat key message. This delays
+ * the generation of a key up event for previously generated
+ * key down event.
+ *
+ * Some IR protocols like NEC have a special IR message for
+ * repeating last button, for when a button is held down.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ *
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded key press with *scancode*,
+ * *toggle* value in the given *protocol*. The scancode will be
+ * translated to a keycode using the rc keymap, and reported as
+ * an input key down event. After a period a key up event is
+ * generated. This period can be extended by calling either
+ * **bpf_rc_keydown**\ () again with the same values, or calling
+ * **bpf_rc_repeat**\ ().
+ *
+ * Some protocols include a toggle bit, in case the button was
+ * released and pressed again between consecutive scancodes.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * The *protocol* is the decoded protocol number (see
+ * **enum rc_proto** for some predefined values).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ *
+ * u64 bpf_skb_cgroup_id(struct sk_buff *skb)
+ * Description
+ * Return the cgroup v2 id of the socket associated with the *skb*.
+ * This is roughly similar to the **bpf_get_cgroup_classid**\ ()
+ * helper for cgroup v1 by providing a tag resp. identifier that
+ * can be matched on or used for map lookups e.g. to implement
+ * policy. The cgroup v2 id of a given path in the hierarchy is
+ * exposed in user space through the f_handle API in order to get
+ * to the same 64-bit id.
+ *
+ * This helper can be used on TC egress path, but not on ingress,
+ * and is available only if the kernel was compiled with the
+ * **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_get_current_cgroup_id(void)
+ * Return
+ * A 64-bit integer containing the current cgroup id based
+ * on the cgroup within which the current task is running.
+ *
+ * void *bpf_get_local_storage(void *map, u64 flags)
+ * Description
+ * Get the pointer to the local storage area.
+ * The type and the size of the local storage is defined
+ * by the *map* argument.
+ * The *flags* meaning is specific for each map type,
+ * and has to be 0 for cgroup local storage.
+ *
+ * Depending on the BPF program type, a local storage area
+ * can be shared between multiple instances of the BPF program,
+ * running simultaneously.
+ *
+ * A user should care about the synchronization by himself.
+ * For example, by using the **BPF_STX_XADD** instruction to alter
+ * the shared data.
+ * Return
+ * A pointer to the local storage area.
+ *
+ * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Select a **SO_REUSEPORT** socket from a
+ * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
+ * It checks the selected socket is matching the incoming
+ * request in the socket buffer.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *skb* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *skb*, then return value will be same as that
+ * of **bpf_skb_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *skb*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_skb_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is a negative signed 32-bit integer, then the
+ * socket lookup table in the netns associated with the *ctx* will
+ * will be used. For the TC hooks, this is the netns of the device
+ * in the skb. For socket hooks, this is the netns of the socket.
+ * If *netns* is any other signed 32-bit value greater than or
+ * equal to zero then it specifies the ID of the netns relative to
+ * the netns associated with the *ctx*. *netns* values beyond the
+ * range of 32-bit integers are reserved for future use.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from *reuse*\ **->socks**\ [] using the hash of the
+ * tuple.
+ *
+ * struct bpf_sock *bpf_sk_lookup_udp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for UDP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * The *ctx* should point to the context of the program, such as
+ * the skb or socket (depending on the hook in use). This is used
+ * to determine the base network namespace for the lookup.
+ *
+ * *tuple_size* must be one of:
+ *
+ * **sizeof**\ (*tuple*\ **->ipv4**)
+ * Look for an IPv4 socket.
+ * **sizeof**\ (*tuple*\ **->ipv6**)
+ * Look for an IPv6 socket.
+ *
+ * If the *netns* is a negative signed 32-bit integer, then the
+ * socket lookup table in the netns associated with the *ctx* will
+ * will be used. For the TC hooks, this is the netns of the device
+ * in the skb. For socket hooks, this is the netns of the socket.
+ * If *netns* is any other signed 32-bit value greater than or
+ * equal to zero then it specifies the ID of the netns relative to
+ * the netns associated with the *ctx*. *netns* values beyond the
+ * range of 32-bit integers are reserved for future use.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from *reuse*\ **->socks**\ [] using the hash of the
+ * tuple.
+ *
+ * int bpf_sk_release(struct bpf_sock *sock)
+ * Description
+ * Release the reference held by *sock*. *sock* must be a
+ * non-**NULL** pointer that was returned from
+ * **bpf_sk_lookup_xxx**\ ().
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is
+ * removed to make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
+ * Description
+ * For socket policies, insert *len* bytes into *msg* at offset
+ * *start*.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it may want to insert metadata or options into the *msg*.
+ * This can later be read and used by any of the lower layer BPF
+ * hooks.
+ *
+ * This helper may fail if under memory pressure (a malloc
+ * fails) in these cases BPF programs will get an appropriate
+ * error and BPF programs will need to handle them.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
+ * Description
+ * Will remove *len* bytes from a *msg* starting at byte *start*.
+ * This may result in **ENOMEM** errors under certain situations if
+ * an allocation and copy are required due to a full ring buffer.
+ * However, the helper will try to avoid doing the allocation
+ * if possible. Other errors can occur if input parameters are
+ * invalid either due to *start* byte not being valid part of *msg*
+ * payload and/or *pop* value being to large.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded pointer movement.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ *
+ * int bpf_spin_lock(struct bpf_spin_lock *lock)
+ * Description
+ * Acquire a spinlock represented by the pointer *lock*, which is
+ * stored as part of a value of a map. Taking the lock allows to
+ * safely update the rest of the fields in that value. The
+ * spinlock can (and must) later be released with a call to
+ * **bpf_spin_unlock**\ (\ *lock*\ ).
+ *
+ * Spinlocks in BPF programs come with a number of restrictions
+ * and constraints:
+ *
+ * * **bpf_spin_lock** objects are only allowed inside maps of
+ * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this
+ * list could be extended in the future).
+ * * BTF description of the map is mandatory.
+ * * The BPF program can take ONE lock at a time, since taking two
+ * or more could cause dead locks.
+ * * Only one **struct bpf_spin_lock** is allowed per map element.
+ * * When the lock is taken, calls (either BPF to BPF or helpers)
+ * are not allowed.
+ * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not
+ * allowed inside a spinlock-ed region.
+ * * The BPF program MUST call **bpf_spin_unlock**\ () to release
+ * the lock, on all execution paths, before it returns.
+ * * The BPF program can access **struct bpf_spin_lock** only via
+ * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ ()
+ * helpers. Loading or storing data into the **struct
+ * bpf_spin_lock** *lock*\ **;** field of a map is not allowed.
+ * * To use the **bpf_spin_lock**\ () helper, the BTF description
+ * of the map value must be a struct and have **struct
+ * bpf_spin_lock** *anyname*\ **;** field at the top level.
+ * Nested lock inside another struct is not allowed.
+ * * The **struct bpf_spin_lock** *lock* field in a map value must
+ * be aligned on a multiple of 4 bytes in that value.
+ * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy
+ * the **bpf_spin_lock** field to user space.
+ * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from
+ * a BPF program, do not update the **bpf_spin_lock** field.
+ * * **bpf_spin_lock** cannot be on the stack or inside a
+ * networking packet (it can only be inside of a map values).
+ * * **bpf_spin_lock** is available to root only.
+ * * Tracing programs and socket filter programs cannot use
+ * **bpf_spin_lock**\ () due to insufficient preemption checks
+ * (but this may change in the future).
+ * * **bpf_spin_lock** is not allowed in inner maps of map-in-map.
+ * Return
+ * 0
+ *
+ * int bpf_spin_unlock(struct bpf_spin_lock *lock)
+ * Description
+ * Release the *lock* previously locked by a call to
+ * **bpf_spin_lock**\ (\ *lock*\ ).
+ * Return
+ * 0
+ *
+ * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
+ * Description
+ * This helper gets a **struct bpf_sock** pointer such
+ * that all the fields in this **bpf_sock** can be accessed.
+ * Return
+ * A **struct bpf_sock** pointer on success, or **NULL** in
+ * case of failure.
+ *
+ * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
+ * Description
+ * This helper gets a **struct bpf_tcp_sock** pointer from a
+ * **struct bpf_sock** pointer.
+ * Return
+ * A **struct bpf_tcp_sock** pointer on success, or **NULL** in
+ * case of failure.
+ *
+ * int bpf_skb_ecn_set_ce(struct sk_buff *skb)
+ * Description
+ * Set ECN (Explicit Congestion Notification) field of IP header
+ * to **CE** (Congestion Encountered) if current value is **ECT**
+ * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6
+ * and IPv4.
+ * Return
+ * 1 if the **CE** flag is set (either by the current helper call
+ * or because it was already present), 0 if it is not set.
+ *
+ * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
+ * Description
+ * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state.
+ * **bpf_sk_release**\ () is unnecessary and not allowed.
+ * Return
+ * A **struct bpf_sock** pointer on success, or **NULL** in
+ * case of failure.
+ *
+ * struct bpf_sock *bpf_skc_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
+ * Description
+ * Look for TCP socket matching *tuple*, optionally in a child
+ * network namespace *netns*. The return value must be checked,
+ * and if non-**NULL**, released via **bpf_sk_release**\ ().
+ *
+ * This function is identical to **bpf_sk_lookup_tcp**\ (), except
+ * that it also returns timewait or request sockets. Use
+ * **bpf_sk_fullsock**\ () or **bpf_tcp_sock**\ () to access the
+ * full structure.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_NET** configuration option.
+ * Return
+ * Pointer to **struct bpf_sock**, or **NULL** in case of failure.
+ * For sockets with reuseport option, the **struct bpf_sock**
+ * result is from *reuse*\ **->socks**\ [] using the hash of the
+ * tuple.
+ *
+ * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * Description
+ * Check whether *iph* and *th* contain a valid SYN cookie ACK for
+ * the listening socket in *sk*.
+ *
+ * *iph* points to the start of the IPv4 or IPv6 header, while
+ * *iph_len* contains **sizeof**\ (**struct iphdr**) or
+ * **sizeof**\ (**struct ip6hdr**).
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains **sizeof**\ (**struct tcphdr**).
+ *
+ * Return
+ * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
+ * error otherwise.
+ *
+ * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
+ * Description
+ * Get name of sysctl in /proc/sys/ and copy it into provided by
+ * program buffer *buf* of size *buf_len*.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is
+ * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name
+ * only (e.g. "tcp_mem").
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * Description
+ * Get current value of sysctl as it is presented in /proc/sys
+ * (incl. newline, etc), and copy it as a string into provided
+ * by program buffer *buf* of size *buf_len*.
+ *
+ * The whole value is copied, no matter what file position user
+ * space issued e.g. sys_read at.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * **-EINVAL** if current value was unavailable, e.g. because
+ * sysctl is uninitialized and read returns -EIO for it.
+ *
+ * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * Description
+ * Get new value being written by user space to sysctl (before
+ * the actual write happens) and copy it as a string into
+ * provided by program buffer *buf* of size *buf_len*.
+ *
+ * User space may write new value at file position > 0.
+ *
+ * The buffer is always NUL terminated, unless it's zero-sized.
+ * Return
+ * Number of character copied (not including the trailing NUL).
+ *
+ * **-E2BIG** if the buffer wasn't big enough (*buf* will contain
+ * truncated name in this case).
+ *
+ * **-EINVAL** if sysctl is being read.
+ *
+ * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
+ * Description
+ * Override new value being written by user space to sysctl with
+ * value provided by program in buffer *buf* of size *buf_len*.
+ *
+ * *buf* should contain a string in same form as provided by user
+ * space on sysctl write.
+ *
+ * User space may write new value at file position > 0. To override
+ * the whole sysctl value file position should be set to zero.
+ * Return
+ * 0 on success.
+ *
+ * **-E2BIG** if the *buf_len* is too big.
+ *
+ * **-EINVAL** if sysctl is being read.
+ *
+ * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
+ * Description
+ * Convert the initial part of the string from buffer *buf* of
+ * size *buf_len* to a long integer according to the given base
+ * and save the result in *res*.
+ *
+ * The string may begin with an arbitrary amount of white space
+ * (as determined by **isspace**\ (3)) followed by a single
+ * optional '**-**' sign.
+ *
+ * Five least significant bits of *flags* encode base, other bits
+ * are currently unused.
+ *
+ * Base must be either 8, 10, 16 or 0 to detect it automatically
+ * similar to user space **strtol**\ (3).
+ * Return
+ * Number of characters consumed on success. Must be positive but
+ * no more than *buf_len*.
+ *
+ * **-EINVAL** if no valid digits were found or unsupported base
+ * was provided.
+ *
+ * **-ERANGE** if resulting value was out of range.
+ *
+ * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
+ * Description
+ * Convert the initial part of the string from buffer *buf* of
+ * size *buf_len* to an unsigned long integer according to the
+ * given base and save the result in *res*.
+ *
+ * The string may begin with an arbitrary amount of white space
+ * (as determined by **isspace**\ (3)).
+ *
+ * Five least significant bits of *flags* encode base, other bits
+ * are currently unused.
+ *
+ * Base must be either 8, 10, 16 or 0 to detect it automatically
+ * similar to user space **strtoul**\ (3).
+ * Return
+ * Number of characters consumed on success. Must be positive but
+ * no more than *buf_len*.
+ *
+ * **-EINVAL** if no valid digits were found or unsupported base
+ * was provided.
+ *
+ * **-ERANGE** if resulting value was out of range.
+ *
+ * void *bpf_sk_storage_get(struct bpf_map *map, struct bpf_sock *sk, void *value, u64 flags)
+ * Description
+ * Get a bpf-local-storage from a *sk*.
+ *
+ * Logically, it could be thought of getting the value from
+ * a *map* with *sk* as the **key**. From this
+ * perspective, the usage is not much different from
+ * **bpf_map_lookup_elem**\ (*map*, **&**\ *sk*) except this
+ * helper enforces the key must be a full socket and the map must
+ * be a **BPF_MAP_TYPE_SK_STORAGE** also.
+ *
+ * Underneath, the value is stored locally at *sk* instead of
+ * the *map*. The *map* is used as the bpf-local-storage
+ * "type". The bpf-local-storage "type" (i.e. the *map*) is
+ * searched against all bpf-local-storages residing at *sk*.
+ *
+ * An optional *flags* (**BPF_SK_STORAGE_GET_F_CREATE**) can be
+ * used such that a new bpf-local-storage will be
+ * created if one does not exist. *value* can be used
+ * together with **BPF_SK_STORAGE_GET_F_CREATE** to specify
+ * the initial value of a bpf-local-storage. If *value* is
+ * **NULL**, the new bpf-local-storage will be zero initialized.
+ * Return
+ * A bpf-local-storage pointer is returned on success.
+ *
+ * **NULL** if not found or there was an error in adding
+ * a new bpf-local-storage.
+ *
+ * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * Description
+ * Delete a bpf-local-storage from a *sk*.
+ * Return
+ * 0 on success.
+ *
+ * **-ENOENT** if the bpf-local-storage cannot be found.
+ *
+ * int bpf_send_signal(u32 sig)
+ * Description
+ * Send signal *sig* to the current task.
+ * Return
+ * 0 on success or successfully queued.
+ *
+ * **-EBUSY** if work queue under nmi is full.
+ *
+ * **-EINVAL** if *sig* is invalid.
+ *
+ * **-EPERM** if no permission to send the *sig*.
+ *
+ * **-EAGAIN** if bpf program can try again.
+ *
+ * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * Description
+ * Try to issue a SYN cookie for the packet with corresponding
+ * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
+ *
+ * *iph* points to the start of the IPv4 or IPv6 header, while
+ * *iph_len* contains **sizeof**\ (**struct iphdr**) or
+ * **sizeof**\ (**struct ip6hdr**).
+ *
+ * *th* points to the start of the TCP header, while *th_len*
+ * contains the length of the TCP header.
+ *
+ * Return
+ * On success, lower 32 bits hold the generated SYN cookie in
+ * followed by 16 bits which hold the MSS value for that cookie,
+ * and the top 16 bits are unused.
+ *
+ * On failure, the returned value is one of the following:
+ *
+ * **-EINVAL** SYN cookie cannot be issued due to error
+ *
+ * **-ENOENT** SYN cookie should not be issued (no SYN flood)
+ *
+ * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies
+ *
+ * **-EPROTONOSUPPORT** IP packet version is not 4 or 6
+ *
+ * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * *ctx* is a pointer to in-kernel struct sk_buff.
+ *
+ * This helper is similar to **bpf_perf_event_output**\ () but
+ * restricted to raw_tracepoint bpf programs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Safely attempt to read *size* bytes from user space address
+ * *unsafe_ptr* and store the data in *dst*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Safely attempt to read *size* bytes from kernel space address
+ * *unsafe_ptr* and store the data in *dst*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Copy a NUL terminated string from an unsafe user address
+ * *unsafe_ptr* to *dst*. The *size* should include the
+ * terminating NUL byte. In case the string length is smaller than
+ * *size*, the target is not padded with further NUL bytes. If the
+ * string length is larger than *size*, just *size*-1 bytes are
+ * copied and the last byte is set to NUL.
+ *
+ * On success, the length of the copied string is returned. This
+ * makes this helper useful in tracing programs for reading
+ * strings, and more importantly to get its length at runtime. See
+ * the following snippet:
+ *
+ * ::
+ *
+ * SEC("kprobe/sys_open")
+ * void bpf_sys_open(struct pt_regs *ctx)
+ * {
+ * char buf[PATHLEN]; // PATHLEN is defined to 256
+ * int res = bpf_probe_read_user_str(buf, sizeof(buf),
+ * ctx->di);
+ *
+ * // Consume buf, for example push it to
+ * // userspace via bpf_perf_event_output(); we
+ * // can use res (the string length) as event
+ * // size, after checking its boundaries.
+ * }
+ *
+ * In comparison, using **bpf_probe_read_user()** helper here
+ * instead to read the string would require to estimate the length
+ * at compile time, and would often result in copying more memory
+ * than necessary.
+ *
+ * Another useful use case is when parsing individual process
+ * arguments or individual environment variables navigating
+ * *current*\ **->mm->arg_start** and *current*\
+ * **->mm->env_start**: using this helper and the return value,
+ * one can quickly iterate at the right offset of the memory area.
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
+ * Description
+ * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
+ * to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ * Return
+ * On success, the strictly positive length of the string, including
+ * the trailing NUL character. On error, a negative value.
+ */
+#define __BPF_FUNC_MAPPER(FN) \
+ FN(unspec), \
+ FN(map_lookup_elem), \
+ FN(map_update_elem), \
+ FN(map_delete_elem), \
+ FN(probe_read), \
+ FN(ktime_get_ns), \
+ FN(trace_printk), \
+ FN(get_prandom_u32), \
+ FN(get_smp_processor_id), \
+ FN(skb_store_bytes), \
+ FN(l3_csum_replace), \
+ FN(l4_csum_replace), \
+ FN(tail_call), \
+ FN(clone_redirect), \
+ FN(get_current_pid_tgid), \
+ FN(get_current_uid_gid), \
+ FN(get_current_comm), \
+ FN(get_cgroup_classid), \
+ FN(skb_vlan_push), \
+ FN(skb_vlan_pop), \
+ FN(skb_get_tunnel_key), \
+ FN(skb_set_tunnel_key), \
+ FN(perf_event_read), \
+ FN(redirect), \
+ FN(get_route_realm), \
+ FN(perf_event_output), \
+ FN(skb_load_bytes), \
+ FN(get_stackid), \
+ FN(csum_diff), \
+ FN(skb_get_tunnel_opt), \
+ FN(skb_set_tunnel_opt), \
+ FN(skb_change_proto), \
+ FN(skb_change_type), \
+ FN(skb_under_cgroup), \
+ FN(get_hash_recalc), \
+ FN(get_current_task), \
+ FN(probe_write_user), \
+ FN(current_task_under_cgroup), \
+ FN(skb_change_tail), \
+ FN(skb_pull_data), \
+ FN(csum_update), \
+ FN(set_hash_invalid), \
+ FN(get_numa_node_id), \
+ FN(skb_change_head), \
+ FN(xdp_adjust_head), \
+ FN(probe_read_str), \
+ FN(get_socket_cookie), \
+ FN(get_socket_uid), \
+ FN(set_hash), \
+ FN(setsockopt), \
+ FN(skb_adjust_room), \
+ FN(redirect_map), \
+ FN(sk_redirect_map), \
+ FN(sock_map_update), \
+ FN(xdp_adjust_meta), \
+ FN(perf_event_read_value), \
+ FN(perf_prog_read_value), \
+ FN(getsockopt), \
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set), \
+ FN(msg_redirect_map), \
+ FN(msg_apply_bytes), \
+ FN(msg_cork_bytes), \
+ FN(msg_pull_data), \
+ FN(bind), \
+ FN(xdp_adjust_tail), \
+ FN(skb_get_xfrm_state), \
+ FN(get_stack), \
+ FN(skb_load_bytes_relative), \
+ FN(fib_lookup), \
+ FN(sock_hash_update), \
+ FN(msg_redirect_hash), \
+ FN(sk_redirect_hash), \
+ FN(lwt_push_encap), \
+ FN(lwt_seg6_store_bytes), \
+ FN(lwt_seg6_adjust_srh), \
+ FN(lwt_seg6_action), \
+ FN(rc_repeat), \
+ FN(rc_keydown), \
+ FN(skb_cgroup_id), \
+ FN(get_current_cgroup_id), \
+ FN(get_local_storage), \
+ FN(sk_select_reuseport), \
+ FN(skb_ancestor_cgroup_id), \
+ FN(sk_lookup_tcp), \
+ FN(sk_lookup_udp), \
+ FN(sk_release), \
+ FN(map_push_elem), \
+ FN(map_pop_elem), \
+ FN(map_peek_elem), \
+ FN(msg_push_data), \
+ FN(msg_pop_data), \
+ FN(rc_pointer_rel), \
+ FN(spin_lock), \
+ FN(spin_unlock), \
+ FN(sk_fullsock), \
+ FN(tcp_sock), \
+ FN(skb_ecn_set_ce), \
+ FN(get_listener_sock), \
+ FN(skc_lookup_tcp), \
+ FN(tcp_check_syncookie), \
+ FN(sysctl_get_name), \
+ FN(sysctl_get_current_value), \
+ FN(sysctl_get_new_value), \
+ FN(sysctl_set_new_value), \
+ FN(strtol), \
+ FN(strtoul), \
+ FN(sk_storage_get), \
+ FN(sk_storage_delete), \
+ FN(send_signal), \
+ FN(tcp_gen_syncookie), \
+ FN(skb_output), \
+ FN(probe_read_user), \
+ FN(probe_read_kernel), \
+ FN(probe_read_user_str), \
+ FN(probe_read_kernel_str),
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
+enum bpf_func_id {
+ __BPF_FUNC_MAPPER(__BPF_ENUM_FN)
+ __BPF_FUNC_MAX_ID,
+};
+#undef __BPF_ENUM_FN
+
+/* All flags used by eBPF helper functions, placed here. */
+
+/* BPF_FUNC_skb_store_bytes flags. */
+#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
+#define BPF_F_INVALIDATE_HASH (1ULL << 1)
+
+/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
+ * First 4 bits are for passing the header field size.
+ */
+#define BPF_F_HDR_FIELD_MASK 0xfULL
+
+/* BPF_FUNC_l4_csum_replace flags. */
+#define BPF_F_PSEUDO_HDR (1ULL << 4)
+#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
+#define BPF_F_MARK_ENFORCE (1ULL << 6)
+
+/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
+#define BPF_F_INGRESS (1ULL << 0)
+
+/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
+#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
+
+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
+#define BPF_F_SKIP_FIELD_MASK 0xffULL
+#define BPF_F_USER_STACK (1ULL << 8)
+/* flags used by BPF_FUNC_get_stackid only. */
+#define BPF_F_FAST_STACK_CMP (1ULL << 9)
+#define BPF_F_REUSE_STACKID (1ULL << 10)
+/* flags used by BPF_FUNC_get_stack only. */
+#define BPF_F_USER_BUILD_ID (1ULL << 11)
+
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
+#define BPF_F_DONT_FRAGMENT (1ULL << 2)
+#define BPF_F_SEQ_NUMBER (1ULL << 3)
+
+/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
+ * BPF_FUNC_perf_event_read_value flags.
+ */
+#define BPF_F_INDEX_MASK 0xffffffffULL
+#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
+
+/* Current network namespace */
+#define BPF_F_CURRENT_NETNS (-1L)
+
+/* BPF_FUNC_skb_adjust_room flags. */
+#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
+
+#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff
+#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56
+
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
+#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
+#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \
+ BPF_ADJ_ROOM_ENCAP_L2_MASK) \
+ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
+
+/* BPF_FUNC_sysctl_get_name flags. */
+#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0)
+
+/* BPF_FUNC_sk_storage_get flags */
+#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0)
+
+/* Mode for BPF_FUNC_skb_adjust_room helper. */
+enum bpf_adj_room_mode {
+ BPF_ADJ_ROOM_NET,
+ BPF_ADJ_ROOM_MAC,
+};
+
+/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
+enum bpf_hdr_start_off {
+ BPF_HDR_START_MAC,
+ BPF_HDR_START_NET,
+};
+
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+ BPF_LWT_ENCAP_SEG6,
+ BPF_LWT_ENCAP_SEG6_INLINE,
+ BPF_LWT_ENCAP_IP,
+};
+
+#define __bpf_md_ptr(type, name) \
+union { \
+ type name; \
+ __u64 :64; \
+} __attribute__((aligned(8)))
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+ __u32 len;
+ __u32 pkt_type;
+ __u32 mark;
+ __u32 queue_mapping;
+ __u32 protocol;
+ __u32 vlan_present;
+ __u32 vlan_tci;
+ __u32 vlan_proto;
+ __u32 priority;
+ __u32 ingress_ifindex;
+ __u32 ifindex;
+ __u32 tc_index;
+ __u32 cb[5];
+ __u32 hash;
+ __u32 tc_classid;
+ __u32 data;
+ __u32 data_end;
+ __u32 napi_id;
+
+ /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ /* ... here. */
+
+ __u32 data_meta;
+ __bpf_md_ptr(struct bpf_flow_keys *, flow_keys);
+ __u64 tstamp;
+ __u32 wire_len;
+ __u32 gso_segs;
+ __bpf_md_ptr(struct bpf_sock *, sk);
+};
+
+struct bpf_tunnel_key {
+ __u32 tunnel_id;
+ union {
+ __u32 remote_ipv4;
+ __u32 remote_ipv6[4];
+ };
+ __u8 tunnel_tos;
+ __u8 tunnel_ttl;
+ __u16 tunnel_ext; /* Padding, future use. */
+ __u32 tunnel_label;
+};
+
+/* user accessible mirror of in-kernel xfrm_state.
+ * new fields can only be added to the end of this structure
+ */
+struct bpf_xfrm_state {
+ __u32 reqid;
+ __u32 spi; /* Stored in network byte order */
+ __u16 family;
+ __u16 ext; /* Padding, future use. */
+ union {
+ __u32 remote_ipv4; /* Stored in network byte order */
+ __u32 remote_ipv6[4]; /* Stored in network byte order */
+ };
+};
+
+/* Generic BPF return codes which all BPF program types may support.
+ * The values are binary compatible with their TC_ACT_* counter-part to
+ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
+ * programs.
+ *
+ * XDP is handled seprately, see XDP_*.
+ */
+enum bpf_ret_code {
+ BPF_OK = 0,
+ /* 1 reserved */
+ BPF_DROP = 2,
+ /* 3-6 reserved */
+ BPF_REDIRECT = 7,
+ /* >127 are reserved for prog type specific return codes.
+ *
+ * BPF_LWT_REROUTE: used by BPF_PROG_TYPE_LWT_IN and
+ * BPF_PROG_TYPE_LWT_XMIT to indicate that skb had been
+ * changed and should be routed based on its new L3 header.
+ * (This is an L3 redirect, as opposed to L2 redirect
+ * represented by BPF_REDIRECT above).
+ */
+ BPF_LWT_REROUTE = 128,
+};
+
+struct bpf_sock {
+ __u32 bound_dev_if;
+ __u32 family;
+ __u32 type;
+ __u32 protocol;
+ __u32 mark;
+ __u32 priority;
+ /* IP address also allows 1 and 2 bytes access */
+ __u32 src_ip4;
+ __u32 src_ip6[4];
+ __u32 src_port; /* host byte order */
+ __u32 dst_port; /* network byte order */
+ __u32 dst_ip4;
+ __u32 dst_ip6[4];
+ __u32 state;
+};
+
+struct bpf_tcp_sock {
+ __u32 snd_cwnd; /* Sending congestion window */
+ __u32 srtt_us; /* smoothed round trip time << 3 in usecs */
+ __u32 rtt_min;
+ __u32 snd_ssthresh; /* Slow start size threshold */
+ __u32 rcv_nxt; /* What we want to receive next */
+ __u32 snd_nxt; /* Next sequence we send */
+ __u32 snd_una; /* First byte we want an ack for */
+ __u32 mss_cache; /* Cached effective mss, not including SACKS */
+ __u32 ecn_flags; /* ECN status bits. */
+ __u32 rate_delivered; /* saved rate sample: packets delivered */
+ __u32 rate_interval_us; /* saved rate sample: time elapsed */
+ __u32 packets_out; /* Packets which are "in flight" */
+ __u32 retrans_out; /* Retransmitted packets out */
+ __u32 total_retrans; /* Total retransmits for entire connection */
+ __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
+ * total number of segments in.
+ */
+ __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
+ * total number of data segments in.
+ */
+ __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
+ * The total number of segments sent.
+ */
+ __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
+ * total number of data segments sent.
+ */
+ __u32 lost_out; /* Lost packets */
+ __u32 sacked_out; /* SACK'd packets */
+ __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
+ * sum(delta(rcv_nxt)), or how many bytes
+ * were acked.
+ */
+ __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
+ * sum(delta(snd_una)), or how many bytes
+ * were acked.
+ */
+ __u32 dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups
+ * total number of DSACK blocks received
+ */
+ __u32 delivered; /* Total data packets delivered incl. rexmits */
+ __u32 delivered_ce; /* Like the above but only ECE marked packets */
+ __u32 icsk_retransmits; /* Number of unrecovered [RTO] timeouts */
+};
+
+struct bpf_sock_tuple {
+ union {
+ struct {
+ __be32 saddr;
+ __be32 daddr;
+ __be16 sport;
+ __be16 dport;
+ } ipv4;
+ struct {
+ __be32 saddr[4];
+ __be32 daddr[4];
+ __be16 sport;
+ __be16 dport;
+ } ipv6;
+ };
+};
+
+struct bpf_xdp_sock {
+ __u32 queue_id;
+};
+
+#define XDP_PACKET_HEADROOM 256
+
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will
+ * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
+ */
+enum xdp_action {
+ XDP_ABORTED = 0,
+ XDP_DROP,
+ XDP_PASS,
+ XDP_TX,
+ XDP_REDIRECT,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+ __u32 data;
+ __u32 data_end;
+ __u32 data_meta;
+ /* Below access go through struct xdp_rxq_info */
+ __u32 ingress_ifindex; /* rxq->dev->ifindex */
+ __u32 rx_queue_index; /* rxq->queue_index */
+};
+
+enum sk_action {
+ SK_DROP = 0,
+ SK_PASS,
+};
+
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+ __bpf_md_ptr(void *, data);
+ __bpf_md_ptr(void *, data_end);
+
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ __u32 size; /* Total size of sk_msg */
+};
+
+struct sk_reuseport_md {
+ /*
+ * Start of directly accessible data. It begins from
+ * the tcp/udp header.
+ */
+ __bpf_md_ptr(void *, data);
+ /* End of directly accessible data */
+ __bpf_md_ptr(void *, data_end);
+ /*
+ * Total length of packet (starting from the tcp/udp header).
+ * Note that the directly accessible bytes (data_end - data)
+ * could be less than this "len". Those bytes could be
+ * indirectly read by a helper "bpf_skb_load_bytes()".
+ */
+ __u32 len;
+ /*
+ * Eth protocol in the mac header (network byte order). e.g.
+ * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
+ */
+ __u32 eth_protocol;
+ __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
+ __u32 bind_inany; /* Is sock bound to an INANY address? */
+ __u32 hash; /* A hash of the packet 4 tuples */
+};
+
+#define BPF_TAG_SIZE 8
+
+struct bpf_prog_info {
+ __u32 type;
+ __u32 id;
+ __u8 tag[BPF_TAG_SIZE];
+ __u32 jited_prog_len;
+ __u32 xlated_prog_len;
+ __aligned_u64 jited_prog_insns;
+ __aligned_u64 xlated_prog_insns;
+ __u64 load_time; /* ns since boottime */
+ __u32 created_by_uid;
+ __u32 nr_map_ids;
+ __aligned_u64 map_ids;
+ char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u32 gpl_compatible:1;
+ __u32 :31; /* alignment pad */
+ __u64 netns_dev;
+ __u64 netns_ino;
+ __u32 nr_jited_ksyms;
+ __u32 nr_jited_func_lens;
+ __aligned_u64 jited_ksyms;
+ __aligned_u64 jited_func_lens;
+ __u32 btf_id;
+ __u32 func_info_rec_size;
+ __aligned_u64 func_info;
+ __u32 nr_func_info;
+ __u32 nr_line_info;
+ __aligned_u64 line_info;
+ __aligned_u64 jited_line_info;
+ __u32 nr_jited_line_info;
+ __u32 line_info_rec_size;
+ __u32 jited_line_info_rec_size;
+ __u32 nr_prog_tags;
+ __aligned_u64 prog_tags;
+ __u64 run_time_ns;
+ __u64 run_cnt;
+} __attribute__((aligned(8)));
+
+struct bpf_map_info {
+ __u32 type;
+ __u32 id;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 map_flags;
+ char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u32 :32;
+ __u64 netns_dev;
+ __u64 netns_ino;
+ __u32 btf_id;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
+} __attribute__((aligned(8)));
+
+struct bpf_btf_info {
+ __aligned_u64 btf;
+ __u32 btf_size;
+ __u32 id;
+} __attribute__((aligned(8)));
+
+/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
+ * by user and intended to be used by socket (e.g. to bind to, depends on
+ * attach attach type).
+ */
+struct bpf_sock_addr {
+ __u32 user_family; /* Allows 4-byte read, but no write. */
+ __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write.
+ * Stored in network byte order.
+ */
+ __u32 user_port; /* Allows 4-byte read and write.
+ * Stored in network byte order
+ */
+ __u32 family; /* Allows 4-byte read, but no write */
+ __u32 type; /* Allows 4-byte read, but no write */
+ __u32 protocol; /* Allows 4-byte read, but no write */
+ __u32 msg_src_ip4; /* Allows 1,2,4-byte read and 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 msg_src_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write.
+ * Stored in network byte order.
+ */
+ __bpf_md_ptr(struct bpf_sock *, sk);
+};
+
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * Some of this fields are in network (bigendian) byte order and may need
+ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+ __u32 op;
+ union {
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
+ };
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ __u32 is_fullsock; /* Some TCP fields are only valid if
+ * there is a full socket. If not, the
+ * fields read as zero.
+ */
+ __u32 snd_cwnd;
+ __u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
+ __bpf_md_ptr(struct bpf_sock *, sk);
+};
+
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently
+ * supported cb flags
+ */
+
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+ BPF_SOCK_OPS_VOID,
+ BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or
+ * -1 if default value should be used
+ */
+ BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized
+ * window (in packets) or -1 if default
+ * value should be used
+ */
+ BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an
+ * active connection is initialized
+ */
+ BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an
+ * active connection is
+ * established
+ */
+ BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a
+ * passive connection is
+ * established
+ */
+ BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
+ * needs ECN
+ */
+ BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is
+ * based on the path and may be
+ * dependent on the congestion control
+ * algorithm. In general it indicates
+ * a congestion threshold. RTTs above
+ * this indicate congestion
+ */
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
+ BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after
+ * socket transition to LISTEN state.
+ */
+ BPF_SOCK_OPS_RTT_CB, /* Called on every RTT.
+ */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
+};
+
+#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
+#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
+
+struct bpf_perf_event_value {
+ __u64 counter;
+ __u64 enabled;
+ __u64 running;
+};
+
+#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
+#define BPF_DEVCG_ACC_READ (1ULL << 1)
+#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
+
+#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
+#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
+
+struct bpf_cgroup_dev_ctx {
+ /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
+ __u32 access_type;
+ __u32 major;
+ __u32 minor;
+};
+
+struct bpf_raw_tracepoint_args {
+ __u64 args[0];
+};
+
+/* DIRECT: Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT: Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT (1U << 0)
+#define BPF_FIB_LOOKUP_OUTPUT (1U << 1)
+
+enum {
+ BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
+ BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */
+ BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */
+ BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */
+ BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */
+ BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
+ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
+ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
+ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+};
+
+struct bpf_fib_lookup {
+ /* input: network family for lookup (AF_INET, AF_INET6)
+ * output: network family of egress nexthop
+ */
+ __u8 family;
+
+ /* set if lookup is to consider L4 data - e.g., FIB rules */
+ __u8 l4_protocol;
+ __be16 sport;
+ __be16 dport;
+
+ /* total length of packet from network header - used for MTU check */
+ __u16 tot_len;
+
+ /* input: L3 device index for lookup
+ * output: device index from FIB lookup
+ */
+ __u32 ifindex;
+
+ union {
+ /* inputs to lookup */
+ __u8 tos; /* AF_INET */
+ __be32 flowinfo; /* AF_INET6, flow_label + priority */
+
+ /* output: metric of fib result (IPv4/IPv6 only) */
+ __u32 rt_metric;
+ };
+
+ union {
+ __be32 ipv4_src;
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ };
+
+ /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in
+ * network header. output: bpf_fib_lookup sets to gateway address
+ * if FIB lookup returns gateway route
+ */
+ union {
+ __be32 ipv4_dst;
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+};
+
+enum bpf_task_fd_type {
+ BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_UPROBE, /* filename + offset */
+ BPF_FD_TYPE_URETPROBE, /* filename + offset */
+};
+
+#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0)
+#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1)
+#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2)
+
+struct bpf_flow_keys {
+ __u16 nhoff;
+ __u16 thoff;
+ __u16 addr_proto; /* ETH_P_* of valid addrs */
+ __u8 is_frag;
+ __u8 is_first_frag;
+ __u8 is_encap;
+ __u8 ip_proto;
+ __be16 n_proto;
+ __be16 sport;
+ __be16 dport;
+ union {
+ struct {
+ __be32 ipv4_src;
+ __be32 ipv4_dst;
+ };
+ struct {
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+ };
+ __u32 flags;
+ __be32 flow_label;
+};
+
+struct bpf_func_info {
+ __u32 insn_off;
+ __u32 type_id;
+};
+
+#define BPF_LINE_INFO_LINE_NUM(line_col) ((line_col) >> 10)
+#define BPF_LINE_INFO_LINE_COL(line_col) ((line_col) & 0x3ff)
+
+struct bpf_line_info {
+ __u32 insn_off;
+ __u32 file_name_off;
+ __u32 line_off;
+ __u32 line_col;
+};
+
+struct bpf_spin_lock {
+ __u32 val;
+};
+
+struct bpf_sysctl {
+ __u32 write; /* Sysctl is being read (= 0) or written (= 1).
+ * Allows 1,2,4-byte read, but no write.
+ */
+ __u32 file_pos; /* Sysctl file position to read from, write to.
+ * Allows 1,2,4-byte read an 4-byte write.
+ */
+};
+
+struct bpf_sockopt {
+ __bpf_md_ptr(struct bpf_sock *, sk);
+ __bpf_md_ptr(void *, optval);
+ __bpf_md_ptr(void *, optval_end);
+
+ __s32 level;
+ __s32 optname;
+ __s32 optlen;
+ __s32 retval;
+};
+
+#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/src/contrib/libbpf/include/uapi/linux/bpf_common.h b/src/contrib/libbpf/include/uapi/linux/bpf_common.h
new file mode 100644
index 0000000..ee97668
--- /dev/null
+++ b/src/contrib/libbpf/include/uapi/linux/bpf_common.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_BPF_COMMON_H__
+#define _UAPI__LINUX_BPF_COMMON_H__
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define BPF_LD 0x00
+#define BPF_LDX 0x01
+#define BPF_ST 0x02
+#define BPF_STX 0x03
+#define BPF_ALU 0x04
+#define BPF_JMP 0x05
+#define BPF_RET 0x06
+#define BPF_MISC 0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code) ((code) & 0x18)
+#define BPF_W 0x00 /* 32-bit */
+#define BPF_H 0x08 /* 16-bit */
+#define BPF_B 0x10 /* 8-bit */
+/* eBPF BPF_DW 0x18 64-bit */
+#define BPF_MODE(code) ((code) & 0xe0)
+#define BPF_IMM 0x00
+#define BPF_ABS 0x20
+#define BPF_IND 0x40
+#define BPF_MEM 0x60
+#define BPF_LEN 0x80
+#define BPF_MSH 0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code) ((code) & 0xf0)
+#define BPF_ADD 0x00
+#define BPF_SUB 0x10
+#define BPF_MUL 0x20
+#define BPF_DIV 0x30
+#define BPF_OR 0x40
+#define BPF_AND 0x50
+#define BPF_LSH 0x60
+#define BPF_RSH 0x70
+#define BPF_NEG 0x80
+#define BPF_MOD 0x90
+#define BPF_XOR 0xa0
+
+#define BPF_JA 0x00
+#define BPF_JEQ 0x10
+#define BPF_JGT 0x20
+#define BPF_JGE 0x30
+#define BPF_JSET 0x40
+#define BPF_SRC(code) ((code) & 0x08)
+#define BPF_K 0x00
+#define BPF_X 0x08
+
+#ifndef BPF_MAXINSNS
+#define BPF_MAXINSNS 4096
+#endif
+
+#endif /* _UAPI__LINUX_BPF_COMMON_H__ */
diff --git a/src/contrib/libbpf/include/uapi/linux/btf.h b/src/contrib/libbpf/include/uapi/linux/btf.h
new file mode 100644
index 0000000..63ae4a3
--- /dev/null
+++ b/src/contrib/libbpf/include/uapi/linux/btf.h
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2018 Facebook */
+#ifndef _UAPI__LINUX_BTF_H__
+#define _UAPI__LINUX_BTF_H__
+
+#include <linux/types.h>
+
+#define BTF_MAGIC 0xeB9F
+#define BTF_VERSION 1
+
+struct btf_header {
+ __u16 magic;
+ __u8 version;
+ __u8 flags;
+ __u32 hdr_len;
+
+ /* All offsets are in bytes relative to the end of this header */
+ __u32 type_off; /* offset of type section */
+ __u32 type_len; /* length of type section */
+ __u32 str_off; /* offset of string section */
+ __u32 str_len; /* length of string section */
+};
+
+/* Max # of type identifier */
+#define BTF_MAX_TYPE 0x0000ffff
+/* Max offset into the string section */
+#define BTF_MAX_NAME_OFFSET 0x0000ffff
+/* Max # of struct/union/enum members or func args */
+#define BTF_MAX_VLEN 0xffff
+
+struct btf_type {
+ __u32 name_off;
+ /* "info" bits arrangement
+ * bits 0-15: vlen (e.g. # of struct's members)
+ * bits 16-23: unused
+ * bits 24-27: kind (e.g. int, ptr, array...etc)
+ * bits 28-30: unused
+ * bit 31: kind_flag, currently used by
+ * struct, union and fwd
+ */
+ __u32 info;
+ /* "size" is used by INT, ENUM, STRUCT, UNION and DATASEC.
+ * "size" tells the size of the type it is describing.
+ *
+ * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+ * FUNC, FUNC_PROTO and VAR.
+ * "type" is a type_id referring to another type.
+ */
+ union {
+ __u32 size;
+ __u32 type;
+ };
+};
+
+#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
+#define BTF_INFO_VLEN(info) ((info) & 0xffff)
+#define BTF_INFO_KFLAG(info) ((info) >> 31)
+
+#define BTF_KIND_UNKN 0 /* Unknown */
+#define BTF_KIND_INT 1 /* Integer */
+#define BTF_KIND_PTR 2 /* Pointer */
+#define BTF_KIND_ARRAY 3 /* Array */
+#define BTF_KIND_STRUCT 4 /* Struct */
+#define BTF_KIND_UNION 5 /* Union */
+#define BTF_KIND_ENUM 6 /* Enumeration */
+#define BTF_KIND_FWD 7 /* Forward */
+#define BTF_KIND_TYPEDEF 8 /* Typedef */
+#define BTF_KIND_VOLATILE 9 /* Volatile */
+#define BTF_KIND_CONST 10 /* Const */
+#define BTF_KIND_RESTRICT 11 /* Restrict */
+#define BTF_KIND_FUNC 12 /* Function */
+#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
+#define BTF_KIND_VAR 14 /* Variable */
+#define BTF_KIND_DATASEC 15 /* Section */
+#define BTF_KIND_MAX BTF_KIND_DATASEC
+#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
+
+/* For some specific BTF_KIND, "struct btf_type" is immediately
+ * followed by extra data.
+ */
+
+/* BTF_KIND_INT is followed by a u32 and the following
+ * is the 32 bits arrangement:
+ */
+#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
+#define BTF_INT_OFFSET(VAL) (((VAL) & 0x00ff0000) >> 16)
+#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff)
+
+/* Attributes stored in the BTF_INT_ENCODING */
+#define BTF_INT_SIGNED (1 << 0)
+#define BTF_INT_CHAR (1 << 1)
+#define BTF_INT_BOOL (1 << 2)
+
+/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
+ * The exact number of btf_enum is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_enum {
+ __u32 name_off;
+ __s32 val;
+};
+
+/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
+struct btf_array {
+ __u32 type;
+ __u32 index_type;
+ __u32 nelems;
+};
+
+/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
+ * by multiple "struct btf_member". The exact number
+ * of btf_member is stored in the vlen (of the info in
+ * "struct btf_type").
+ */
+struct btf_member {
+ __u32 name_off;
+ __u32 type;
+ /* If the type info kind_flag is set, the btf_member offset
+ * contains both member bitfield size and bit offset. The
+ * bitfield size is set for bitfield members. If the type
+ * info kind_flag is not set, the offset contains only bit
+ * offset.
+ */
+ __u32 offset;
+};
+
+/* If the struct/union type info kind_flag is set, the
+ * following two macros are used to access bitfield_size
+ * and bit_offset from btf_member.offset.
+ */
+#define BTF_MEMBER_BITFIELD_SIZE(val) ((val) >> 24)
+#define BTF_MEMBER_BIT_OFFSET(val) ((val) & 0xffffff)
+
+/* BTF_KIND_FUNC_PROTO is followed by multiple "struct btf_param".
+ * The exact number of btf_param is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_param {
+ __u32 name_off;
+ __u32 type;
+};
+
+enum {
+ BTF_VAR_STATIC = 0,
+ BTF_VAR_GLOBAL_ALLOCATED,
+};
+
+/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe
+ * additional information related to the variable such as its linkage.
+ */
+struct btf_var {
+ __u32 linkage;
+};
+
+/* BTF_KIND_DATASEC is followed by multiple "struct btf_var_secinfo"
+ * to describe all BTF_KIND_VAR types it contains along with it's
+ * in-section offset as well as size.
+ */
+struct btf_var_secinfo {
+ __u32 type;
+ __u32 offset;
+ __u32 size;
+};
+
+#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/src/contrib/libbpf/include/uapi/linux/if_link.h b/src/contrib/libbpf/include/uapi/linux/if_link.h
new file mode 100644
index 0000000..8aec876
--- /dev/null
+++ b/src/contrib/libbpf/include/uapi/linux/if_link.h
@@ -0,0 +1,1033 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_IF_LINK_H
+#define _UAPI_LINUX_IF_LINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+/* This struct should be in sync with struct rtnl_link_stats64 */
+struct rtnl_link_stats {
+ __u32 rx_packets; /* total packets received */
+ __u32 tx_packets; /* total packets transmitted */
+ __u32 rx_bytes; /* total bytes received */
+ __u32 tx_bytes; /* total bytes transmitted */
+ __u32 rx_errors; /* bad packets received */
+ __u32 tx_errors; /* packet transmit problems */
+ __u32 rx_dropped; /* no space in linux buffers */
+ __u32 tx_dropped; /* no space available in linux */
+ __u32 multicast; /* multicast packets received */
+ __u32 collisions;
+
+ /* detailed rx_errors: */
+ __u32 rx_length_errors;
+ __u32 rx_over_errors; /* receiver ring buff overflow */
+ __u32 rx_crc_errors; /* recved pkt with crc error */
+ __u32 rx_frame_errors; /* recv'd frame alignment error */
+ __u32 rx_fifo_errors; /* recv'r fifo overrun */
+ __u32 rx_missed_errors; /* receiver missed packet */
+
+ /* detailed tx_errors */
+ __u32 tx_aborted_errors;
+ __u32 tx_carrier_errors;
+ __u32 tx_fifo_errors;
+ __u32 tx_heartbeat_errors;
+ __u32 tx_window_errors;
+
+ /* for cslip etc */
+ __u32 rx_compressed;
+ __u32 tx_compressed;
+
+ __u32 rx_nohandler; /* dropped, no handler found */
+};
+
+/* The main device statistics structure */
+struct rtnl_link_stats64 {
+ __u64 rx_packets; /* total packets received */
+ __u64 tx_packets; /* total packets transmitted */
+ __u64 rx_bytes; /* total bytes received */
+ __u64 tx_bytes; /* total bytes transmitted */
+ __u64 rx_errors; /* bad packets received */
+ __u64 tx_errors; /* packet transmit problems */
+ __u64 rx_dropped; /* no space in linux buffers */
+ __u64 tx_dropped; /* no space available in linux */
+ __u64 multicast; /* multicast packets received */
+ __u64 collisions;
+
+ /* detailed rx_errors: */
+ __u64 rx_length_errors;
+ __u64 rx_over_errors; /* receiver ring buff overflow */
+ __u64 rx_crc_errors; /* recved pkt with crc error */
+ __u64 rx_frame_errors; /* recv'd frame alignment error */
+ __u64 rx_fifo_errors; /* recv'r fifo overrun */
+ __u64 rx_missed_errors; /* receiver missed packet */
+
+ /* detailed tx_errors */
+ __u64 tx_aborted_errors;
+ __u64 tx_carrier_errors;
+ __u64 tx_fifo_errors;
+ __u64 tx_heartbeat_errors;
+ __u64 tx_window_errors;
+
+ /* for cslip etc */
+ __u64 rx_compressed;
+ __u64 tx_compressed;
+
+ __u64 rx_nohandler; /* dropped, no handler found */
+};
+
+/* The struct should be in sync with struct ifmap */
+struct rtnl_link_ifmap {
+ __u64 mem_start;
+ __u64 mem_end;
+ __u64 base_addr;
+ __u16 irq;
+ __u8 dma;
+ __u8 port;
+};
+
+/*
+ * IFLA_AF_SPEC
+ * Contains nested attributes for address family specific attributes.
+ * Each address family may create a attribute with the address family
+ * number as type and create its own attribute structure in it.
+ *
+ * Example:
+ * [IFLA_AF_SPEC] = {
+ * [AF_INET] = {
+ * [IFLA_INET_CONF] = ...,
+ * },
+ * [AF_INET6] = {
+ * [IFLA_INET6_FLAGS] = ...,
+ * [IFLA_INET6_CONF] = ...,
+ * }
+ * }
+ */
+
+enum {
+ IFLA_UNSPEC,
+ IFLA_ADDRESS,
+ IFLA_BROADCAST,
+ IFLA_IFNAME,
+ IFLA_MTU,
+ IFLA_LINK,
+ IFLA_QDISC,
+ IFLA_STATS,
+ IFLA_COST,
+#define IFLA_COST IFLA_COST
+ IFLA_PRIORITY,
+#define IFLA_PRIORITY IFLA_PRIORITY
+ IFLA_MASTER,
+#define IFLA_MASTER IFLA_MASTER
+ IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */
+#define IFLA_WIRELESS IFLA_WIRELESS
+ IFLA_PROTINFO, /* Protocol specific information for a link */
+#define IFLA_PROTINFO IFLA_PROTINFO
+ IFLA_TXQLEN,
+#define IFLA_TXQLEN IFLA_TXQLEN
+ IFLA_MAP,
+#define IFLA_MAP IFLA_MAP
+ IFLA_WEIGHT,
+#define IFLA_WEIGHT IFLA_WEIGHT
+ IFLA_OPERSTATE,
+ IFLA_LINKMODE,
+ IFLA_LINKINFO,
+#define IFLA_LINKINFO IFLA_LINKINFO
+ IFLA_NET_NS_PID,
+ IFLA_IFALIAS,
+ IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */
+ IFLA_VFINFO_LIST,
+ IFLA_STATS64,
+ IFLA_VF_PORTS,
+ IFLA_PORT_SELF,
+ IFLA_AF_SPEC,
+ IFLA_GROUP, /* Group the device belongs to */
+ IFLA_NET_NS_FD,
+ IFLA_EXT_MASK, /* Extended info mask, VFs, etc */
+ IFLA_PROMISCUITY, /* Promiscuity count: > 0 means acts PROMISC */
+#define IFLA_PROMISCUITY IFLA_PROMISCUITY
+ IFLA_NUM_TX_QUEUES,
+ IFLA_NUM_RX_QUEUES,
+ IFLA_CARRIER,
+ IFLA_PHYS_PORT_ID,
+ IFLA_CARRIER_CHANGES,
+ IFLA_PHYS_SWITCH_ID,
+ IFLA_LINK_NETNSID,
+ IFLA_PHYS_PORT_NAME,
+ IFLA_PROTO_DOWN,
+ IFLA_GSO_MAX_SEGS,
+ IFLA_GSO_MAX_SIZE,
+ IFLA_PAD,
+ IFLA_XDP,
+ IFLA_EVENT,
+ IFLA_NEW_NETNSID,
+ IFLA_IF_NETNSID,
+ IFLA_TARGET_NETNSID = IFLA_IF_NETNSID, /* new alias */
+ IFLA_CARRIER_UP_COUNT,
+ IFLA_CARRIER_DOWN_COUNT,
+ IFLA_NEW_IFINDEX,
+ IFLA_MIN_MTU,
+ IFLA_MAX_MTU,
+ IFLA_PROP_LIST,
+ IFLA_ALT_IFNAME, /* Alternative ifname */
+ __IFLA_MAX
+};
+
+
+#define IFLA_MAX (__IFLA_MAX - 1)
+
+/* backwards compatibility for userspace */
+#ifndef __KERNEL__
+#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
+#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg))
+#endif
+
+enum {
+ IFLA_INET_UNSPEC,
+ IFLA_INET_CONF,
+ __IFLA_INET_MAX,
+};
+
+#define IFLA_INET_MAX (__IFLA_INET_MAX - 1)
+
+/* ifi_flags.
+
+ IFF_* flags.
+
+ The only change is:
+ IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
+ more not changeable by user. They describe link media
+ characteristics and set by device driver.
+
+ Comments:
+ - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
+ - If neither of these three flags are set;
+ the interface is NBMA.
+
+ - IFF_MULTICAST does not mean anything special:
+ multicasts can be used on all not-NBMA links.
+ IFF_MULTICAST means that this media uses special encapsulation
+ for multicast frames. Apparently, all IFF_POINTOPOINT and
+ IFF_BROADCAST devices are able to use multicasts too.
+ */
+
+/* IFLA_LINK.
+ For usual devices it is equal ifi_index.
+ If it is a "virtual interface" (f.e. tunnel), ifi_link
+ can point to real physical interface (f.e. for bandwidth calculations),
+ or maybe 0, what means, that real media is unknown (usual
+ for IPIP tunnels, when route to endpoint is allowed to change)
+ */
+
+/* Subtype attributes for IFLA_PROTINFO */
+enum {
+ IFLA_INET6_UNSPEC,
+ IFLA_INET6_FLAGS, /* link flags */
+ IFLA_INET6_CONF, /* sysctl parameters */
+ IFLA_INET6_STATS, /* statistics */
+ IFLA_INET6_MCAST, /* MC things. What of them? */
+ IFLA_INET6_CACHEINFO, /* time values and max reasm size */
+ IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */
+ IFLA_INET6_TOKEN, /* device token */
+ IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
+ __IFLA_INET6_MAX
+};
+
+#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1)
+
+enum in6_addr_gen_mode {
+ IN6_ADDR_GEN_MODE_EUI64,
+ IN6_ADDR_GEN_MODE_NONE,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
+ IN6_ADDR_GEN_MODE_RANDOM,
+};
+
+/* Bridge section */
+
+enum {
+ IFLA_BR_UNSPEC,
+ IFLA_BR_FORWARD_DELAY,
+ IFLA_BR_HELLO_TIME,
+ IFLA_BR_MAX_AGE,
+ IFLA_BR_AGEING_TIME,
+ IFLA_BR_STP_STATE,
+ IFLA_BR_PRIORITY,
+ IFLA_BR_VLAN_FILTERING,
+ IFLA_BR_VLAN_PROTOCOL,
+ IFLA_BR_GROUP_FWD_MASK,
+ IFLA_BR_ROOT_ID,
+ IFLA_BR_BRIDGE_ID,
+ IFLA_BR_ROOT_PORT,
+ IFLA_BR_ROOT_PATH_COST,
+ IFLA_BR_TOPOLOGY_CHANGE,
+ IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
+ IFLA_BR_HELLO_TIMER,
+ IFLA_BR_TCN_TIMER,
+ IFLA_BR_TOPOLOGY_CHANGE_TIMER,
+ IFLA_BR_GC_TIMER,
+ IFLA_BR_GROUP_ADDR,
+ IFLA_BR_FDB_FLUSH,
+ IFLA_BR_MCAST_ROUTER,
+ IFLA_BR_MCAST_SNOOPING,
+ IFLA_BR_MCAST_QUERY_USE_IFADDR,
+ IFLA_BR_MCAST_QUERIER,
+ IFLA_BR_MCAST_HASH_ELASTICITY,
+ IFLA_BR_MCAST_HASH_MAX,
+ IFLA_BR_MCAST_LAST_MEMBER_CNT,
+ IFLA_BR_MCAST_STARTUP_QUERY_CNT,
+ IFLA_BR_MCAST_LAST_MEMBER_INTVL,
+ IFLA_BR_MCAST_MEMBERSHIP_INTVL,
+ IFLA_BR_MCAST_QUERIER_INTVL,
+ IFLA_BR_MCAST_QUERY_INTVL,
+ IFLA_BR_MCAST_QUERY_RESPONSE_INTVL,
+ IFLA_BR_MCAST_STARTUP_QUERY_INTVL,
+ IFLA_BR_NF_CALL_IPTABLES,
+ IFLA_BR_NF_CALL_IP6TABLES,
+ IFLA_BR_NF_CALL_ARPTABLES,
+ IFLA_BR_VLAN_DEFAULT_PVID,
+ IFLA_BR_PAD,
+ IFLA_BR_VLAN_STATS_ENABLED,
+ IFLA_BR_MCAST_STATS_ENABLED,
+ IFLA_BR_MCAST_IGMP_VERSION,
+ IFLA_BR_MCAST_MLD_VERSION,
+ IFLA_BR_VLAN_STATS_PER_PORT,
+ IFLA_BR_MULTI_BOOLOPT,
+ __IFLA_BR_MAX,
+};
+
+#define IFLA_BR_MAX (__IFLA_BR_MAX - 1)
+
+struct ifla_bridge_id {
+ __u8 prio[2];
+ __u8 addr[6]; /* ETH_ALEN */
+};
+
+enum {
+ BRIDGE_MODE_UNSPEC,
+ BRIDGE_MODE_HAIRPIN,
+};
+
+enum {
+ IFLA_BRPORT_UNSPEC,
+ IFLA_BRPORT_STATE, /* Spanning tree state */
+ IFLA_BRPORT_PRIORITY, /* " priority */
+ IFLA_BRPORT_COST, /* " cost */
+ IFLA_BRPORT_MODE, /* mode (hairpin) */
+ IFLA_BRPORT_GUARD, /* bpdu guard */
+ IFLA_BRPORT_PROTECT, /* root port protection */
+ IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */
+ IFLA_BRPORT_LEARNING, /* mac learning */
+ IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
+ IFLA_BRPORT_PROXYARP, /* proxy ARP */
+ IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
+ IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */
+ IFLA_BRPORT_ROOT_ID, /* designated root */
+ IFLA_BRPORT_BRIDGE_ID, /* designated bridge */
+ IFLA_BRPORT_DESIGNATED_PORT,
+ IFLA_BRPORT_DESIGNATED_COST,
+ IFLA_BRPORT_ID,
+ IFLA_BRPORT_NO,
+ IFLA_BRPORT_TOPOLOGY_CHANGE_ACK,
+ IFLA_BRPORT_CONFIG_PENDING,
+ IFLA_BRPORT_MESSAGE_AGE_TIMER,
+ IFLA_BRPORT_FORWARD_DELAY_TIMER,
+ IFLA_BRPORT_HOLD_TIMER,
+ IFLA_BRPORT_FLUSH,
+ IFLA_BRPORT_MULTICAST_ROUTER,
+ IFLA_BRPORT_PAD,
+ IFLA_BRPORT_MCAST_FLOOD,
+ IFLA_BRPORT_MCAST_TO_UCAST,
+ IFLA_BRPORT_VLAN_TUNNEL,
+ IFLA_BRPORT_BCAST_FLOOD,
+ IFLA_BRPORT_GROUP_FWD_MASK,
+ IFLA_BRPORT_NEIGH_SUPPRESS,
+ IFLA_BRPORT_ISOLATED,
+ IFLA_BRPORT_BACKUP_PORT,
+ __IFLA_BRPORT_MAX
+};
+#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
+
+struct ifla_cacheinfo {
+ __u32 max_reasm_len;
+ __u32 tstamp; /* ipv6InterfaceTable updated timestamp */
+ __u32 reachable_time;
+ __u32 retrans_time;
+};
+
+enum {
+ IFLA_INFO_UNSPEC,
+ IFLA_INFO_KIND,
+ IFLA_INFO_DATA,
+ IFLA_INFO_XSTATS,
+ IFLA_INFO_SLAVE_KIND,
+ IFLA_INFO_SLAVE_DATA,
+ __IFLA_INFO_MAX,
+};
+
+#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1)
+
+/* VLAN section */
+
+enum {
+ IFLA_VLAN_UNSPEC,
+ IFLA_VLAN_ID,
+ IFLA_VLAN_FLAGS,
+ IFLA_VLAN_EGRESS_QOS,
+ IFLA_VLAN_INGRESS_QOS,
+ IFLA_VLAN_PROTOCOL,
+ __IFLA_VLAN_MAX,
+};
+
+#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1)
+
+struct ifla_vlan_flags {
+ __u32 flags;
+ __u32 mask;
+};
+
+enum {
+ IFLA_VLAN_QOS_UNSPEC,
+ IFLA_VLAN_QOS_MAPPING,
+ __IFLA_VLAN_QOS_MAX
+};
+
+#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1)
+
+struct ifla_vlan_qos_mapping {
+ __u32 from;
+ __u32 to;
+};
+
+/* MACVLAN section */
+enum {
+ IFLA_MACVLAN_UNSPEC,
+ IFLA_MACVLAN_MODE,
+ IFLA_MACVLAN_FLAGS,
+ IFLA_MACVLAN_MACADDR_MODE,
+ IFLA_MACVLAN_MACADDR,
+ IFLA_MACVLAN_MACADDR_DATA,
+ IFLA_MACVLAN_MACADDR_COUNT,
+ __IFLA_MACVLAN_MAX,
+};
+
+#define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1)
+
+enum macvlan_mode {
+ MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */
+ MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */
+ MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */
+ MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */
+ MACVLAN_MODE_SOURCE = 16,/* use source MAC address list to assign */
+};
+
+enum macvlan_macaddr_mode {
+ MACVLAN_MACADDR_ADD,
+ MACVLAN_MACADDR_DEL,
+ MACVLAN_MACADDR_FLUSH,
+ MACVLAN_MACADDR_SET,
+};
+
+#define MACVLAN_FLAG_NOPROMISC 1
+
+/* VRF section */
+enum {
+ IFLA_VRF_UNSPEC,
+ IFLA_VRF_TABLE,
+ __IFLA_VRF_MAX
+};
+
+#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1)
+
+enum {
+ IFLA_VRF_PORT_UNSPEC,
+ IFLA_VRF_PORT_TABLE,
+ __IFLA_VRF_PORT_MAX
+};
+
+#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1)
+
+/* MACSEC section */
+enum {
+ IFLA_MACSEC_UNSPEC,
+ IFLA_MACSEC_SCI,
+ IFLA_MACSEC_PORT,
+ IFLA_MACSEC_ICV_LEN,
+ IFLA_MACSEC_CIPHER_SUITE,
+ IFLA_MACSEC_WINDOW,
+ IFLA_MACSEC_ENCODING_SA,
+ IFLA_MACSEC_ENCRYPT,
+ IFLA_MACSEC_PROTECT,
+ IFLA_MACSEC_INC_SCI,
+ IFLA_MACSEC_ES,
+ IFLA_MACSEC_SCB,
+ IFLA_MACSEC_REPLAY_PROTECT,
+ IFLA_MACSEC_VALIDATION,
+ IFLA_MACSEC_PAD,
+ __IFLA_MACSEC_MAX,
+};
+
+#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
+
+/* XFRM section */
+enum {
+ IFLA_XFRM_UNSPEC,
+ IFLA_XFRM_LINK,
+ IFLA_XFRM_IF_ID,
+ __IFLA_XFRM_MAX
+};
+
+#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1)
+
+enum macsec_validation_type {
+ MACSEC_VALIDATE_DISABLED = 0,
+ MACSEC_VALIDATE_CHECK = 1,
+ MACSEC_VALIDATE_STRICT = 2,
+ __MACSEC_VALIDATE_END,
+ MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1,
+};
+
+/* IPVLAN section */
+enum {
+ IFLA_IPVLAN_UNSPEC,
+ IFLA_IPVLAN_MODE,
+ IFLA_IPVLAN_FLAGS,
+ __IFLA_IPVLAN_MAX
+};
+
+#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1)
+
+enum ipvlan_mode {
+ IPVLAN_MODE_L2 = 0,
+ IPVLAN_MODE_L3,
+ IPVLAN_MODE_L3S,
+ IPVLAN_MODE_MAX
+};
+
+#define IPVLAN_F_PRIVATE 0x01
+#define IPVLAN_F_VEPA 0x02
+
+/* VXLAN section */
+enum {
+ IFLA_VXLAN_UNSPEC,
+ IFLA_VXLAN_ID,
+ IFLA_VXLAN_GROUP, /* group or remote address */
+ IFLA_VXLAN_LINK,
+ IFLA_VXLAN_LOCAL,
+ IFLA_VXLAN_TTL,
+ IFLA_VXLAN_TOS,
+ IFLA_VXLAN_LEARNING,
+ IFLA_VXLAN_AGEING,
+ IFLA_VXLAN_LIMIT,
+ IFLA_VXLAN_PORT_RANGE, /* source port */
+ IFLA_VXLAN_PROXY,
+ IFLA_VXLAN_RSC,
+ IFLA_VXLAN_L2MISS,
+ IFLA_VXLAN_L3MISS,
+ IFLA_VXLAN_PORT, /* destination port */
+ IFLA_VXLAN_GROUP6,
+ IFLA_VXLAN_LOCAL6,
+ IFLA_VXLAN_UDP_CSUM,
+ IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+ IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+ IFLA_VXLAN_REMCSUM_TX,
+ IFLA_VXLAN_REMCSUM_RX,
+ IFLA_VXLAN_GBP,
+ IFLA_VXLAN_REMCSUM_NOPARTIAL,
+ IFLA_VXLAN_COLLECT_METADATA,
+ IFLA_VXLAN_LABEL,
+ IFLA_VXLAN_GPE,
+ IFLA_VXLAN_TTL_INHERIT,
+ IFLA_VXLAN_DF,
+ __IFLA_VXLAN_MAX
+};
+#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
+
+struct ifla_vxlan_port_range {
+ __be16 low;
+ __be16 high;
+};
+
+enum ifla_vxlan_df {
+ VXLAN_DF_UNSET = 0,
+ VXLAN_DF_SET,
+ VXLAN_DF_INHERIT,
+ __VXLAN_DF_END,
+ VXLAN_DF_MAX = __VXLAN_DF_END - 1,
+};
+
+/* GENEVE section */
+enum {
+ IFLA_GENEVE_UNSPEC,
+ IFLA_GENEVE_ID,
+ IFLA_GENEVE_REMOTE,
+ IFLA_GENEVE_TTL,
+ IFLA_GENEVE_TOS,
+ IFLA_GENEVE_PORT, /* destination port */
+ IFLA_GENEVE_COLLECT_METADATA,
+ IFLA_GENEVE_REMOTE6,
+ IFLA_GENEVE_UDP_CSUM,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
+ IFLA_GENEVE_LABEL,
+ IFLA_GENEVE_TTL_INHERIT,
+ IFLA_GENEVE_DF,
+ __IFLA_GENEVE_MAX
+};
+#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
+
+enum ifla_geneve_df {
+ GENEVE_DF_UNSET = 0,
+ GENEVE_DF_SET,
+ GENEVE_DF_INHERIT,
+ __GENEVE_DF_END,
+ GENEVE_DF_MAX = __GENEVE_DF_END - 1,
+};
+
+/* PPP section */
+enum {
+ IFLA_PPP_UNSPEC,
+ IFLA_PPP_DEV_FD,
+ __IFLA_PPP_MAX
+};
+#define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1)
+
+/* GTP section */
+
+enum ifla_gtp_role {
+ GTP_ROLE_GGSN = 0,
+ GTP_ROLE_SGSN,
+};
+
+enum {
+ IFLA_GTP_UNSPEC,
+ IFLA_GTP_FD0,
+ IFLA_GTP_FD1,
+ IFLA_GTP_PDP_HASHSIZE,
+ IFLA_GTP_ROLE,
+ __IFLA_GTP_MAX,
+};
+#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
+
+/* Bonding section */
+
+enum {
+ IFLA_BOND_UNSPEC,
+ IFLA_BOND_MODE,
+ IFLA_BOND_ACTIVE_SLAVE,
+ IFLA_BOND_MIIMON,
+ IFLA_BOND_UPDELAY,
+ IFLA_BOND_DOWNDELAY,
+ IFLA_BOND_USE_CARRIER,
+ IFLA_BOND_ARP_INTERVAL,
+ IFLA_BOND_ARP_IP_TARGET,
+ IFLA_BOND_ARP_VALIDATE,
+ IFLA_BOND_ARP_ALL_TARGETS,
+ IFLA_BOND_PRIMARY,
+ IFLA_BOND_PRIMARY_RESELECT,
+ IFLA_BOND_FAIL_OVER_MAC,
+ IFLA_BOND_XMIT_HASH_POLICY,
+ IFLA_BOND_RESEND_IGMP,
+ IFLA_BOND_NUM_PEER_NOTIF,
+ IFLA_BOND_ALL_SLAVES_ACTIVE,
+ IFLA_BOND_MIN_LINKS,
+ IFLA_BOND_LP_INTERVAL,
+ IFLA_BOND_PACKETS_PER_SLAVE,
+ IFLA_BOND_AD_LACP_RATE,
+ IFLA_BOND_AD_SELECT,
+ IFLA_BOND_AD_INFO,
+ IFLA_BOND_AD_ACTOR_SYS_PRIO,
+ IFLA_BOND_AD_USER_PORT_KEY,
+ IFLA_BOND_AD_ACTOR_SYSTEM,
+ IFLA_BOND_TLB_DYNAMIC_LB,
+ IFLA_BOND_PEER_NOTIF_DELAY,
+ __IFLA_BOND_MAX,
+};
+
+#define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1)
+
+enum {
+ IFLA_BOND_AD_INFO_UNSPEC,
+ IFLA_BOND_AD_INFO_AGGREGATOR,
+ IFLA_BOND_AD_INFO_NUM_PORTS,
+ IFLA_BOND_AD_INFO_ACTOR_KEY,
+ IFLA_BOND_AD_INFO_PARTNER_KEY,
+ IFLA_BOND_AD_INFO_PARTNER_MAC,
+ __IFLA_BOND_AD_INFO_MAX,
+};
+
+#define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1)
+
+enum {
+ IFLA_BOND_SLAVE_UNSPEC,
+ IFLA_BOND_SLAVE_STATE,
+ IFLA_BOND_SLAVE_MII_STATUS,
+ IFLA_BOND_SLAVE_LINK_FAILURE_COUNT,
+ IFLA_BOND_SLAVE_PERM_HWADDR,
+ IFLA_BOND_SLAVE_QUEUE_ID,
+ IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
+ IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
+ IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+ __IFLA_BOND_SLAVE_MAX,
+};
+
+#define IFLA_BOND_SLAVE_MAX (__IFLA_BOND_SLAVE_MAX - 1)
+
+/* SR-IOV virtual function management section */
+
+enum {
+ IFLA_VF_INFO_UNSPEC,
+ IFLA_VF_INFO,
+ __IFLA_VF_INFO_MAX,
+};
+
+#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1)
+
+enum {
+ IFLA_VF_UNSPEC,
+ IFLA_VF_MAC, /* Hardware queue specific attributes */
+ IFLA_VF_VLAN, /* VLAN ID and QoS */
+ IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */
+ IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */
+ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */
+ IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */
+ IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query
+ * on/off switch
+ */
+ IFLA_VF_STATS, /* network device statistics */
+ IFLA_VF_TRUST, /* Trust VF */
+ IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */
+ IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */
+ IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */
+ IFLA_VF_BROADCAST, /* VF broadcast */
+ __IFLA_VF_MAX,
+};
+
+#define IFLA_VF_MAX (__IFLA_VF_MAX - 1)
+
+struct ifla_vf_mac {
+ __u32 vf;
+ __u8 mac[32]; /* MAX_ADDR_LEN */
+};
+
+struct ifla_vf_broadcast {
+ __u8 broadcast[32];
+};
+
+struct ifla_vf_vlan {
+ __u32 vf;
+ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+ __u32 qos;
+};
+
+enum {
+ IFLA_VF_VLAN_INFO_UNSPEC,
+ IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */
+ __IFLA_VF_VLAN_INFO_MAX,
+};
+
+#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1)
+#define MAX_VLAN_LIST_LEN 1
+
+struct ifla_vf_vlan_info {
+ __u32 vf;
+ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+ __u32 qos;
+ __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */
+};
+
+struct ifla_vf_tx_rate {
+ __u32 vf;
+ __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
+};
+
+struct ifla_vf_rate {
+ __u32 vf;
+ __u32 min_tx_rate; /* Min Bandwidth in Mbps */
+ __u32 max_tx_rate; /* Max Bandwidth in Mbps */
+};
+
+struct ifla_vf_spoofchk {
+ __u32 vf;
+ __u32 setting;
+};
+
+struct ifla_vf_guid {
+ __u32 vf;
+ __u64 guid;
+};
+
+enum {
+ IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */
+ IFLA_VF_LINK_STATE_ENABLE, /* link always up */
+ IFLA_VF_LINK_STATE_DISABLE, /* link always down */
+ __IFLA_VF_LINK_STATE_MAX,
+};
+
+struct ifla_vf_link_state {
+ __u32 vf;
+ __u32 link_state;
+};
+
+struct ifla_vf_rss_query_en {
+ __u32 vf;
+ __u32 setting;
+};
+
+enum {
+ IFLA_VF_STATS_RX_PACKETS,
+ IFLA_VF_STATS_TX_PACKETS,
+ IFLA_VF_STATS_RX_BYTES,
+ IFLA_VF_STATS_TX_BYTES,
+ IFLA_VF_STATS_BROADCAST,
+ IFLA_VF_STATS_MULTICAST,
+ IFLA_VF_STATS_PAD,
+ IFLA_VF_STATS_RX_DROPPED,
+ IFLA_VF_STATS_TX_DROPPED,
+ __IFLA_VF_STATS_MAX,
+};
+
+#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1)
+
+struct ifla_vf_trust {
+ __u32 vf;
+ __u32 setting;
+};
+
+/* VF ports management section
+ *
+ * Nested layout of set/get msg is:
+ *
+ * [IFLA_NUM_VF]
+ * [IFLA_VF_PORTS]
+ * [IFLA_VF_PORT]
+ * [IFLA_PORT_*], ...
+ * [IFLA_VF_PORT]
+ * [IFLA_PORT_*], ...
+ * ...
+ * [IFLA_PORT_SELF]
+ * [IFLA_PORT_*], ...
+ */
+
+enum {
+ IFLA_VF_PORT_UNSPEC,
+ IFLA_VF_PORT, /* nest */
+ __IFLA_VF_PORT_MAX,
+};
+
+#define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1)
+
+enum {
+ IFLA_PORT_UNSPEC,
+ IFLA_PORT_VF, /* __u32 */
+ IFLA_PORT_PROFILE, /* string */
+ IFLA_PORT_VSI_TYPE, /* 802.1Qbg (pre-)standard VDP */
+ IFLA_PORT_INSTANCE_UUID, /* binary UUID */
+ IFLA_PORT_HOST_UUID, /* binary UUID */
+ IFLA_PORT_REQUEST, /* __u8 */
+ IFLA_PORT_RESPONSE, /* __u16, output only */
+ __IFLA_PORT_MAX,
+};
+
+#define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1)
+
+#define PORT_PROFILE_MAX 40
+#define PORT_UUID_MAX 16
+#define PORT_SELF_VF -1
+
+enum {
+ PORT_REQUEST_PREASSOCIATE = 0,
+ PORT_REQUEST_PREASSOCIATE_RR,
+ PORT_REQUEST_ASSOCIATE,
+ PORT_REQUEST_DISASSOCIATE,
+};
+
+enum {
+ PORT_VDP_RESPONSE_SUCCESS = 0,
+ PORT_VDP_RESPONSE_INVALID_FORMAT,
+ PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES,
+ PORT_VDP_RESPONSE_UNUSED_VTID,
+ PORT_VDP_RESPONSE_VTID_VIOLATION,
+ PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION,
+ PORT_VDP_RESPONSE_OUT_OF_SYNC,
+ /* 0x08-0xFF reserved for future VDP use */
+ PORT_PROFILE_RESPONSE_SUCCESS = 0x100,
+ PORT_PROFILE_RESPONSE_INPROGRESS,
+ PORT_PROFILE_RESPONSE_INVALID,
+ PORT_PROFILE_RESPONSE_BADSTATE,
+ PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES,
+ PORT_PROFILE_RESPONSE_ERROR,
+};
+
+struct ifla_port_vsi {
+ __u8 vsi_mgr_id;
+ __u8 vsi_type_id[3];
+ __u8 vsi_type_version;
+ __u8 pad[3];
+};
+
+
+/* IPoIB section */
+
+enum {
+ IFLA_IPOIB_UNSPEC,
+ IFLA_IPOIB_PKEY,
+ IFLA_IPOIB_MODE,
+ IFLA_IPOIB_UMCAST,
+ __IFLA_IPOIB_MAX
+};
+
+enum {
+ IPOIB_MODE_DATAGRAM = 0, /* using unreliable datagram QPs */
+ IPOIB_MODE_CONNECTED = 1, /* using connected QPs */
+};
+
+#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
+
+
+/* HSR section */
+
+enum {
+ IFLA_HSR_UNSPEC,
+ IFLA_HSR_SLAVE1,
+ IFLA_HSR_SLAVE2,
+ IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */
+ IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */
+ IFLA_HSR_SEQ_NR,
+ IFLA_HSR_VERSION, /* HSR version */
+ __IFLA_HSR_MAX,
+};
+
+#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
+
+/* STATS section */
+
+struct if_stats_msg {
+ __u8 family;
+ __u8 pad1;
+ __u16 pad2;
+ __u32 ifindex;
+ __u32 filter_mask;
+};
+
+/* A stats attribute can be netdev specific or a global stat.
+ * For netdev stats, lets use the prefix IFLA_STATS_LINK_*
+ */
+enum {
+ IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */
+ IFLA_STATS_LINK_64,
+ IFLA_STATS_LINK_XSTATS,
+ IFLA_STATS_LINK_XSTATS_SLAVE,
+ IFLA_STATS_LINK_OFFLOAD_XSTATS,
+ IFLA_STATS_AF_SPEC,
+ __IFLA_STATS_MAX,
+};
+
+#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
+
+#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1))
+
+/* These are embedded into IFLA_STATS_LINK_XSTATS:
+ * [IFLA_STATS_LINK_XSTATS]
+ * -> [LINK_XSTATS_TYPE_xxx]
+ * -> [rtnl link type specific attributes]
+ */
+enum {
+ LINK_XSTATS_TYPE_UNSPEC,
+ LINK_XSTATS_TYPE_BRIDGE,
+ LINK_XSTATS_TYPE_BOND,
+ __LINK_XSTATS_TYPE_MAX
+};
+#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
+
+/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */
+enum {
+ IFLA_OFFLOAD_XSTATS_UNSPEC,
+ IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */
+ __IFLA_OFFLOAD_XSTATS_MAX
+};
+#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1)
+
+/* XDP section */
+
+#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0)
+#define XDP_FLAGS_SKB_MODE (1U << 1)
+#define XDP_FLAGS_DRV_MODE (1U << 2)
+#define XDP_FLAGS_HW_MODE (1U << 3)
+#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \
+ XDP_FLAGS_DRV_MODE | \
+ XDP_FLAGS_HW_MODE)
+#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \
+ XDP_FLAGS_MODES)
+
+/* These are stored into IFLA_XDP_ATTACHED on dump. */
+enum {
+ XDP_ATTACHED_NONE = 0,
+ XDP_ATTACHED_DRV,
+ XDP_ATTACHED_SKB,
+ XDP_ATTACHED_HW,
+ XDP_ATTACHED_MULTI,
+};
+
+enum {
+ IFLA_XDP_UNSPEC,
+ IFLA_XDP_FD,
+ IFLA_XDP_ATTACHED,
+ IFLA_XDP_FLAGS,
+ IFLA_XDP_PROG_ID,
+ IFLA_XDP_DRV_PROG_ID,
+ IFLA_XDP_SKB_PROG_ID,
+ IFLA_XDP_HW_PROG_ID,
+ __IFLA_XDP_MAX,
+};
+
+#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1)
+
+enum {
+ IFLA_EVENT_NONE,
+ IFLA_EVENT_REBOOT, /* internal reset / reboot */
+ IFLA_EVENT_FEATURES, /* change in offload features */
+ IFLA_EVENT_BONDING_FAILOVER, /* change in active slave */
+ IFLA_EVENT_NOTIFY_PEERS, /* re-sent grat. arp/ndisc */
+ IFLA_EVENT_IGMP_RESEND, /* re-sent IGMP JOIN */
+ IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */
+};
+
+/* tun section */
+
+enum {
+ IFLA_TUN_UNSPEC,
+ IFLA_TUN_OWNER,
+ IFLA_TUN_GROUP,
+ IFLA_TUN_TYPE,
+ IFLA_TUN_PI,
+ IFLA_TUN_VNET_HDR,
+ IFLA_TUN_PERSIST,
+ IFLA_TUN_MULTI_QUEUE,
+ IFLA_TUN_NUM_QUEUES,
+ IFLA_TUN_NUM_DISABLED_QUEUES,
+ __IFLA_TUN_MAX,
+};
+
+#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
+
+/* rmnet section */
+
+#define RMNET_FLAGS_INGRESS_DEAGGREGATION (1U << 0)
+#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3)
+
+enum {
+ IFLA_RMNET_UNSPEC,
+ IFLA_RMNET_MUX_ID,
+ IFLA_RMNET_FLAGS,
+ __IFLA_RMNET_MAX,
+};
+
+#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1)
+
+struct ifla_rmnet_flags {
+ __u32 flags;
+ __u32 mask;
+};
+
+#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/src/contrib/libbpf/include/uapi/linux/if_xdp.h b/src/contrib/libbpf/include/uapi/linux/if_xdp.h
new file mode 100644
index 0000000..be328c5
--- /dev/null
+++ b/src/contrib/libbpf/include/uapi/linux/if_xdp.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * if_xdp: XDP socket user-space interface
+ * Copyright(c) 2018 Intel Corporation.
+ *
+ * Author(s): Björn Töpel <bjorn.topel@intel.com>
+ * Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#ifndef _LINUX_IF_XDP_H
+#define _LINUX_IF_XDP_H
+
+#include <linux/types.h>
+
+/* Options for the sxdp_flags field */
+#define XDP_SHARED_UMEM (1 << 0)
+#define XDP_COPY (1 << 1) /* Force copy-mode */
+#define XDP_ZEROCOPY (1 << 2) /* Force zero-copy mode */
+/* If this option is set, the driver might go sleep and in that case
+ * the XDP_RING_NEED_WAKEUP flag in the fill and/or Tx rings will be
+ * set. If it is set, the application need to explicitly wake up the
+ * driver with a poll() (Rx and Tx) or sendto() (Tx only). If you are
+ * running the driver and the application on the same core, you should
+ * use this option so that the kernel will yield to the user space
+ * application.
+ */
+#define XDP_USE_NEED_WAKEUP (1 << 3)
+
+/* Flags for xsk_umem_config flags */
+#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
+
+struct sockaddr_xdp {
+ __u16 sxdp_family;
+ __u16 sxdp_flags;
+ __u32 sxdp_ifindex;
+ __u32 sxdp_queue_id;
+ __u32 sxdp_shared_umem_fd;
+};
+
+/* XDP_RING flags */
+#define XDP_RING_NEED_WAKEUP (1 << 0)
+
+struct xdp_ring_offset {
+ __u64 producer;
+ __u64 consumer;
+ __u64 desc;
+ __u64 flags;
+};
+
+struct xdp_mmap_offsets {
+ struct xdp_ring_offset rx;
+ struct xdp_ring_offset tx;
+ struct xdp_ring_offset fr; /* Fill */
+ struct xdp_ring_offset cr; /* Completion */
+};
+
+/* XDP socket options */
+#define XDP_MMAP_OFFSETS 1
+#define XDP_RX_RING 2
+#define XDP_TX_RING 3
+#define XDP_UMEM_REG 4
+#define XDP_UMEM_FILL_RING 5
+#define XDP_UMEM_COMPLETION_RING 6
+#define XDP_STATISTICS 7
+#define XDP_OPTIONS 8
+
+struct xdp_umem_reg {
+ __u64 addr; /* Start of packet data area */
+ __u64 len; /* Length of packet data area */
+ __u32 chunk_size;
+ __u32 headroom;
+ __u32 flags;
+};
+
+struct xdp_statistics {
+ __u64 rx_dropped; /* Dropped for reasons other than invalid desc */
+ __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */
+ __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */
+};
+
+struct xdp_options {
+ __u32 flags;
+};
+
+/* Flags for the flags field of struct xdp_options */
+#define XDP_OPTIONS_ZEROCOPY (1 << 0)
+
+/* Pgoff for mmaping the rings */
+#define XDP_PGOFF_RX_RING 0
+#define XDP_PGOFF_TX_RING 0x80000000
+#define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL
+#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL
+
+/* Masks for unaligned chunks mode */
+#define XSK_UNALIGNED_BUF_OFFSET_SHIFT 48
+#define XSK_UNALIGNED_BUF_ADDR_MASK \
+ ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1)
+
+/* Rx/Tx descriptor */
+struct xdp_desc {
+ __u64 addr;
+ __u32 len;
+ __u32 options;
+};
+
+/* UMEM descriptor is __u64 */
+
+#endif /* _LINUX_IF_XDP_H */
diff --git a/src/contrib/libbpf/include/uapi/linux/netlink.h b/src/contrib/libbpf/include/uapi/linux/netlink.h
new file mode 100644
index 0000000..0a4d733
--- /dev/null
+++ b/src/contrib/libbpf/include/uapi/linux/netlink.h
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_NETLINK_H
+#define _UAPI__LINUX_NETLINK_H
+
+#include <linux/kernel.h>
+#include <linux/socket.h> /* for __kernel_sa_family_t */
+#include <linux/types.h>
+
+#define NETLINK_ROUTE 0 /* Routing/device hook */
+#define NETLINK_UNUSED 1 /* Unused number */
+#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */
+#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */
+#define NETLINK_SOCK_DIAG 4 /* socket monitoring */
+#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */
+#define NETLINK_XFRM 6 /* ipsec */
+#define NETLINK_SELINUX 7 /* SELinux event notifications */
+#define NETLINK_ISCSI 8 /* Open-iSCSI */
+#define NETLINK_AUDIT 9 /* auditing */
+#define NETLINK_FIB_LOOKUP 10
+#define NETLINK_CONNECTOR 11
+#define NETLINK_NETFILTER 12 /* netfilter subsystem */
+#define NETLINK_IP6_FW 13
+#define NETLINK_DNRTMSG 14 /* DECnet routing messages */
+#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */
+#define NETLINK_GENERIC 16
+/* leave room for NETLINK_DM (DM Events) */
+#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */
+#define NETLINK_ECRYPTFS 19
+#define NETLINK_RDMA 20
+#define NETLINK_CRYPTO 21 /* Crypto layer */
+#define NETLINK_SMC 22 /* SMC monitoring */
+
+#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG
+
+#define MAX_LINKS 32
+
+struct sockaddr_nl {
+ __kernel_sa_family_t nl_family; /* AF_NETLINK */
+ unsigned short nl_pad; /* zero */
+ __u32 nl_pid; /* port ID */
+ __u32 nl_groups; /* multicast groups mask */
+};
+
+struct nlmsghdr {
+ __u32 nlmsg_len; /* Length of message including header */
+ __u16 nlmsg_type; /* Message content */
+ __u16 nlmsg_flags; /* Additional flags */
+ __u32 nlmsg_seq; /* Sequence number */
+ __u32 nlmsg_pid; /* Sending process port ID */
+};
+
+/* Flags values */
+
+#define NLM_F_REQUEST 0x01 /* It is request message. */
+#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */
+#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */
+#define NLM_F_ECHO 0x08 /* Echo this request */
+#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */
+#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */
+
+/* Modifiers to GET request */
+#define NLM_F_ROOT 0x100 /* specify tree root */
+#define NLM_F_MATCH 0x200 /* return all matching */
+#define NLM_F_ATOMIC 0x400 /* atomic GET */
+#define NLM_F_DUMP (NLM_F_ROOT|NLM_F_MATCH)
+
+/* Modifiers to NEW request */
+#define NLM_F_REPLACE 0x100 /* Override existing */
+#define NLM_F_EXCL 0x200 /* Do not touch, if it exists */
+#define NLM_F_CREATE 0x400 /* Create, if it does not exist */
+#define NLM_F_APPEND 0x800 /* Add to end of list */
+
+/* Modifiers to DELETE request */
+#define NLM_F_NONREC 0x100 /* Do not delete recursively */
+
+/* Flags for ACK message */
+#define NLM_F_CAPPED 0x100 /* request was capped */
+#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */
+
+/*
+ 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL
+ 4.4BSD CHANGE NLM_F_REPLACE
+
+ True CHANGE NLM_F_CREATE|NLM_F_REPLACE
+ Append NLM_F_CREATE
+ Check NLM_F_EXCL
+ */
+
+#define NLMSG_ALIGNTO 4U
+#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
+#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
+#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN)
+#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
+#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
+#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \
+ (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len)))
+#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \
+ (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \
+ (nlh)->nlmsg_len <= (len))
+#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len)))
+
+#define NLMSG_NOOP 0x1 /* Nothing. */
+#define NLMSG_ERROR 0x2 /* Error */
+#define NLMSG_DONE 0x3 /* End of a dump */
+#define NLMSG_OVERRUN 0x4 /* Data lost */
+
+#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */
+
+struct nlmsgerr {
+ int error;
+ struct nlmsghdr msg;
+ /*
+ * followed by the message contents unless NETLINK_CAP_ACK was set
+ * or the ACK indicates success (error == 0)
+ * message length is aligned with NLMSG_ALIGN()
+ */
+ /*
+ * followed by TLVs defined in enum nlmsgerr_attrs
+ * if NETLINK_EXT_ACK was set
+ */
+};
+
+/**
+ * enum nlmsgerr_attrs - nlmsgerr attributes
+ * @NLMSGERR_ATTR_UNUSED: unused
+ * @NLMSGERR_ATTR_MSG: error message string (string)
+ * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original
+ * message, counting from the beginning of the header (u32)
+ * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to
+ * be used - in the success case - to identify a created
+ * object or operation or similar (binary)
+ * @__NLMSGERR_ATTR_MAX: number of attributes
+ * @NLMSGERR_ATTR_MAX: highest attribute number
+ */
+enum nlmsgerr_attrs {
+ NLMSGERR_ATTR_UNUSED,
+ NLMSGERR_ATTR_MSG,
+ NLMSGERR_ATTR_OFFS,
+ NLMSGERR_ATTR_COOKIE,
+
+ __NLMSGERR_ATTR_MAX,
+ NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1
+};
+
+#define NETLINK_ADD_MEMBERSHIP 1
+#define NETLINK_DROP_MEMBERSHIP 2
+#define NETLINK_PKTINFO 3
+#define NETLINK_BROADCAST_ERROR 4
+#define NETLINK_NO_ENOBUFS 5
+#ifndef __KERNEL__
+#define NETLINK_RX_RING 6
+#define NETLINK_TX_RING 7
+#endif
+#define NETLINK_LISTEN_ALL_NSID 8
+#define NETLINK_LIST_MEMBERSHIPS 9
+#define NETLINK_CAP_ACK 10
+#define NETLINK_EXT_ACK 11
+#define NETLINK_GET_STRICT_CHK 12
+
+struct nl_pktinfo {
+ __u32 group;
+};
+
+struct nl_mmap_req {
+ unsigned int nm_block_size;
+ unsigned int nm_block_nr;
+ unsigned int nm_frame_size;
+ unsigned int nm_frame_nr;
+};
+
+struct nl_mmap_hdr {
+ unsigned int nm_status;
+ unsigned int nm_len;
+ __u32 nm_group;
+ /* credentials */
+ __u32 nm_pid;
+ __u32 nm_uid;
+ __u32 nm_gid;
+};
+
+#ifndef __KERNEL__
+enum nl_mmap_status {
+ NL_MMAP_STATUS_UNUSED,
+ NL_MMAP_STATUS_RESERVED,
+ NL_MMAP_STATUS_VALID,
+ NL_MMAP_STATUS_COPY,
+ NL_MMAP_STATUS_SKIP,
+};
+
+#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO
+#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT)
+#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr))
+#endif
+
+#define NET_MAJOR 36 /* Major 36 is reserved for networking */
+
+enum {
+ NETLINK_UNCONNECTED = 0,
+ NETLINK_CONNECTED,
+};
+
+/*
+ * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)-->
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ * | Header | Pad | Payload | Pad |
+ * | (struct nlattr) | ing | | ing |
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ * <-------------- nlattr->nla_len -------------->
+ */
+
+struct nlattr {
+ __u16 nla_len;
+ __u16 nla_type;
+};
+
+/*
+ * nla_type (16 bits)
+ * +---+---+-------------------------------+
+ * | N | O | Attribute Type |
+ * +---+---+-------------------------------+
+ * N := Carries nested attributes
+ * O := Payload stored in network byte order
+ *
+ * Note: The N and O flag are mutually exclusive.
+ */
+#define NLA_F_NESTED (1 << 15)
+#define NLA_F_NET_BYTEORDER (1 << 14)
+#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER)
+
+#define NLA_ALIGNTO 4
+#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
+#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr)))
+
+/* Generic 32 bitflags attribute content sent to the kernel.
+ *
+ * The value is a bitmap that defines the values being set
+ * The selector is a bitmask that defines which value is legit
+ *
+ * Examples:
+ * value = 0x0, and selector = 0x1
+ * implies we are selecting bit 1 and we want to set its value to 0.
+ *
+ * value = 0x2, and selector = 0x2
+ * implies we are selecting bit 2 and we want to set its value to 1.
+ *
+ */
+struct nla_bitfield32 {
+ __u32 value;
+ __u32 selector;
+};
+
+#endif /* _UAPI__LINUX_NETLINK_H */
diff --git a/src/contrib/libngtcp2/LICENSE b/src/contrib/libngtcp2/LICENSE
new file mode 100644
index 0000000..0161703
--- /dev/null
+++ b/src/contrib/libngtcp2/LICENSE
@@ -0,0 +1 @@
+../licenses/MIT \ No newline at end of file
diff --git a/src/contrib/libngtcp2/ngtcp2/crypto/gnutls.c b/src/contrib/libngtcp2/ngtcp2/crypto/gnutls.c
new file mode 100644
index 0000000..73ea0c1
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/crypto/gnutls.c
@@ -0,0 +1,644 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2020 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <assert.h>
+
+#include <ngtcp2/ngtcp2_crypto.h>
+#include <ngtcp2/ngtcp2_crypto_gnutls.h>
+
+#include <gnutls/gnutls.h>
+#include <gnutls/crypto.h>
+#include <string.h>
+
+#include "shared.h"
+
+ngtcp2_crypto_aead *ngtcp2_crypto_aead_aes_128_gcm(ngtcp2_crypto_aead *aead) {
+ return ngtcp2_crypto_aead_init(aead, (void *)GNUTLS_CIPHER_AES_128_GCM);
+}
+
+ngtcp2_crypto_md *ngtcp2_crypto_md_sha256(ngtcp2_crypto_md *md) {
+ md->native_handle = (void *)GNUTLS_DIG_SHA256;
+ return md;
+}
+
+ngtcp2_crypto_ctx *ngtcp2_crypto_ctx_initial(ngtcp2_crypto_ctx *ctx) {
+ ngtcp2_crypto_aead_init(&ctx->aead, (void *)GNUTLS_CIPHER_AES_128_GCM);
+ ctx->md.native_handle = (void *)GNUTLS_DIG_SHA256;
+ ctx->hp.native_handle = (void *)GNUTLS_CIPHER_AES_128_CBC;
+ ctx->max_encryption = 0;
+ ctx->max_decryption_failure = 0;
+ return ctx;
+}
+
+ngtcp2_crypto_aead *ngtcp2_crypto_aead_init(ngtcp2_crypto_aead *aead,
+ void *aead_native_handle) {
+ aead->native_handle = aead_native_handle;
+ aead->max_overhead = gnutls_cipher_get_tag_size(
+ (gnutls_cipher_algorithm_t)(intptr_t)aead_native_handle);
+ return aead;
+}
+
+ngtcp2_crypto_aead *ngtcp2_crypto_aead_retry(ngtcp2_crypto_aead *aead) {
+ return ngtcp2_crypto_aead_init(aead, (void *)GNUTLS_CIPHER_AES_128_GCM);
+}
+
+static gnutls_cipher_algorithm_t
+crypto_get_hp(gnutls_cipher_algorithm_t cipher) {
+ switch (cipher) {
+ case GNUTLS_CIPHER_AES_128_GCM:
+ case GNUTLS_CIPHER_AES_128_CCM:
+ return GNUTLS_CIPHER_AES_128_CBC;
+ case GNUTLS_CIPHER_AES_256_GCM:
+ case GNUTLS_CIPHER_AES_256_CCM:
+ return GNUTLS_CIPHER_AES_256_CBC;
+ case GNUTLS_CIPHER_CHACHA20_POLY1305:
+ return GNUTLS_CIPHER_CHACHA20_32;
+ default:
+ return GNUTLS_CIPHER_UNKNOWN;
+ }
+}
+
+static uint64_t
+crypto_get_aead_max_encryption(gnutls_cipher_algorithm_t cipher) {
+ switch (cipher) {
+ case GNUTLS_CIPHER_AES_128_GCM:
+ case GNUTLS_CIPHER_AES_256_GCM:
+ return NGTCP2_CRYPTO_MAX_ENCRYPTION_AES_GCM;
+ case GNUTLS_CIPHER_CHACHA20_POLY1305:
+ return NGTCP2_CRYPTO_MAX_ENCRYPTION_CHACHA20_POLY1305;
+ case GNUTLS_CIPHER_AES_128_CCM:
+ case GNUTLS_CIPHER_AES_256_CCM:
+ return NGTCP2_CRYPTO_MAX_ENCRYPTION_AES_CCM;
+ default:
+ return 0;
+ }
+}
+
+static uint64_t
+crypto_get_aead_max_decryption_failure(gnutls_cipher_algorithm_t cipher) {
+ switch (cipher) {
+ case GNUTLS_CIPHER_AES_128_GCM:
+ case GNUTLS_CIPHER_AES_256_GCM:
+ return NGTCP2_CRYPTO_MAX_DECRYPTION_FAILURE_AES_GCM;
+ case GNUTLS_CIPHER_CHACHA20_POLY1305:
+ return NGTCP2_CRYPTO_MAX_DECRYPTION_FAILURE_CHACHA20_POLY1305;
+ case GNUTLS_CIPHER_AES_128_CCM:
+ case GNUTLS_CIPHER_AES_256_CCM:
+ return NGTCP2_CRYPTO_MAX_DECRYPTION_FAILURE_AES_CCM;
+ default:
+ return 0;
+ }
+}
+
+ngtcp2_crypto_ctx *ngtcp2_crypto_ctx_tls(ngtcp2_crypto_ctx *ctx,
+ void *tls_native_handle) {
+ gnutls_session_t session = tls_native_handle;
+ gnutls_cipher_algorithm_t cipher;
+ gnutls_digest_algorithm_t hash;
+ gnutls_cipher_algorithm_t hp_cipher;
+
+ cipher = gnutls_cipher_get(session);
+ if (cipher != GNUTLS_CIPHER_UNKNOWN && cipher != GNUTLS_CIPHER_NULL) {
+ ngtcp2_crypto_aead_init(&ctx->aead, (void *)cipher);
+ }
+
+ hash = gnutls_prf_hash_get(session);
+ if (hash != GNUTLS_DIG_UNKNOWN && hash != GNUTLS_DIG_NULL) {
+ ctx->md.native_handle = (void *)hash;
+ }
+
+ hp_cipher = crypto_get_hp(cipher);
+ if (hp_cipher != GNUTLS_CIPHER_UNKNOWN) {
+ ctx->hp.native_handle = (void *)hp_cipher;
+ }
+
+ ctx->max_encryption = crypto_get_aead_max_encryption(cipher);
+ ctx->max_decryption_failure = crypto_get_aead_max_decryption_failure(cipher);
+
+ return ctx;
+}
+
+ngtcp2_crypto_ctx *ngtcp2_crypto_ctx_tls_early(ngtcp2_crypto_ctx *ctx,
+ void *tls_native_handle) {
+ gnutls_session_t session = tls_native_handle;
+ gnutls_cipher_algorithm_t cipher;
+ gnutls_digest_algorithm_t hash;
+ gnutls_cipher_algorithm_t hp_cipher;
+
+ cipher = gnutls_early_cipher_get(session);
+ if (cipher != GNUTLS_CIPHER_UNKNOWN && cipher != GNUTLS_CIPHER_NULL) {
+ ngtcp2_crypto_aead_init(&ctx->aead, (void *)cipher);
+ }
+
+ hash = gnutls_early_prf_hash_get(session);
+ if (hash != GNUTLS_DIG_UNKNOWN && hash != GNUTLS_DIG_NULL) {
+ ctx->md.native_handle = (void *)hash;
+ }
+
+ hp_cipher = crypto_get_hp(cipher);
+ if (hp_cipher != GNUTLS_CIPHER_UNKNOWN) {
+ ctx->hp.native_handle = (void *)hp_cipher;
+ }
+
+ ctx->max_encryption = crypto_get_aead_max_encryption(cipher);
+ ctx->max_decryption_failure = crypto_get_aead_max_decryption_failure(cipher);
+
+ return ctx;
+}
+
+size_t ngtcp2_crypto_md_hashlen(const ngtcp2_crypto_md *md) {
+ return gnutls_hash_get_len(
+ (gnutls_digest_algorithm_t)(intptr_t)md->native_handle);
+}
+
+size_t ngtcp2_crypto_aead_keylen(const ngtcp2_crypto_aead *aead) {
+ return gnutls_cipher_get_key_size(
+ (gnutls_cipher_algorithm_t)(intptr_t)aead->native_handle);
+}
+
+size_t ngtcp2_crypto_aead_noncelen(const ngtcp2_crypto_aead *aead) {
+ return gnutls_cipher_get_iv_size(
+ (gnutls_cipher_algorithm_t)(intptr_t)aead->native_handle);
+}
+
+int ngtcp2_crypto_aead_ctx_encrypt_init(ngtcp2_crypto_aead_ctx *aead_ctx,
+ const ngtcp2_crypto_aead *aead,
+ const uint8_t *key, size_t noncelen) {
+ gnutls_cipher_algorithm_t cipher =
+ (gnutls_cipher_algorithm_t)(intptr_t)aead->native_handle;
+ gnutls_aead_cipher_hd_t hd;
+ gnutls_datum_t _key;
+
+ (void)noncelen;
+
+ _key.data = (void *)key;
+ _key.size = (unsigned int)ngtcp2_crypto_aead_keylen(aead);
+
+ if (gnutls_aead_cipher_init(&hd, cipher, &_key) != 0) {
+ return -1;
+ }
+
+ aead_ctx->native_handle = hd;
+
+ return 0;
+}
+
+int ngtcp2_crypto_aead_ctx_decrypt_init(ngtcp2_crypto_aead_ctx *aead_ctx,
+ const ngtcp2_crypto_aead *aead,
+ const uint8_t *key, size_t noncelen) {
+ gnutls_cipher_algorithm_t cipher =
+ (gnutls_cipher_algorithm_t)(intptr_t)aead->native_handle;
+ gnutls_aead_cipher_hd_t hd;
+ gnutls_datum_t _key;
+
+ (void)noncelen;
+
+ _key.data = (void *)key;
+ _key.size = (unsigned int)ngtcp2_crypto_aead_keylen(aead);
+
+ if (gnutls_aead_cipher_init(&hd, cipher, &_key) != 0) {
+ return -1;
+ }
+
+ aead_ctx->native_handle = hd;
+
+ return 0;
+}
+
+void ngtcp2_crypto_aead_ctx_free(ngtcp2_crypto_aead_ctx *aead_ctx) {
+ if (aead_ctx->native_handle) {
+ gnutls_aead_cipher_deinit(aead_ctx->native_handle);
+ }
+}
+
+int ngtcp2_crypto_cipher_ctx_encrypt_init(ngtcp2_crypto_cipher_ctx *cipher_ctx,
+ const ngtcp2_crypto_cipher *cipher,
+ const uint8_t *key) {
+ gnutls_cipher_algorithm_t _cipher =
+ (gnutls_cipher_algorithm_t)(intptr_t)cipher->native_handle;
+ gnutls_cipher_hd_t hd;
+ gnutls_datum_t _key;
+
+ _key.data = (void *)key;
+ _key.size = (unsigned int)gnutls_cipher_get_key_size(_cipher);
+
+ if (gnutls_cipher_init(&hd, _cipher, &_key, NULL) != 0) {
+ return -1;
+ }
+
+ cipher_ctx->native_handle = hd;
+
+ return 0;
+}
+
+void ngtcp2_crypto_cipher_ctx_free(ngtcp2_crypto_cipher_ctx *cipher_ctx) {
+ if (cipher_ctx->native_handle) {
+ gnutls_cipher_deinit(cipher_ctx->native_handle);
+ }
+}
+
+int ngtcp2_crypto_hkdf_extract(uint8_t *dest, const ngtcp2_crypto_md *md,
+ const uint8_t *secret, size_t secretlen,
+ const uint8_t *salt, size_t saltlen) {
+ gnutls_mac_algorithm_t prf =
+ (gnutls_mac_algorithm_t)(intptr_t)md->native_handle;
+ gnutls_datum_t _secret = {(void *)secret, (unsigned int)secretlen};
+ gnutls_datum_t _salt = {(void *)salt, (unsigned int)saltlen};
+
+ if (gnutls_hkdf_extract(prf, &_secret, &_salt, dest) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_hkdf_expand(uint8_t *dest, size_t destlen,
+ const ngtcp2_crypto_md *md, const uint8_t *secret,
+ size_t secretlen, const uint8_t *info,
+ size_t infolen) {
+ gnutls_mac_algorithm_t prf =
+ (gnutls_mac_algorithm_t)(intptr_t)md->native_handle;
+ gnutls_datum_t _secret = {(void *)secret, (unsigned int)secretlen};
+ gnutls_datum_t _info = {(void *)info, (unsigned int)infolen};
+
+ if (gnutls_hkdf_expand(prf, &_secret, &_info, dest, destlen) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_hkdf(uint8_t *dest, size_t destlen,
+ const ngtcp2_crypto_md *md, const uint8_t *secret,
+ size_t secretlen, const uint8_t *salt, size_t saltlen,
+ const uint8_t *info, size_t infolen) {
+ gnutls_mac_algorithm_t prf =
+ (gnutls_mac_algorithm_t)(intptr_t)md->native_handle;
+ size_t keylen = ngtcp2_crypto_md_hashlen(md);
+ uint8_t key[64];
+ gnutls_datum_t _secret = {(void *)secret, (unsigned int)secretlen};
+ gnutls_datum_t _key = {(void *)key, (unsigned int)keylen};
+ gnutls_datum_t _salt = {(void *)salt, (unsigned int)saltlen};
+ gnutls_datum_t _info = {(void *)info, (unsigned int)infolen};
+
+ assert(keylen <= sizeof(key));
+
+ if (gnutls_hkdf_extract(prf, &_secret, &_salt, key) != 0) {
+ return -1;
+ }
+
+ if (gnutls_hkdf_expand(prf, &_key, &_info, dest, destlen) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_encrypt(uint8_t *dest, const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *plaintext, size_t plaintextlen,
+ const uint8_t *nonce, size_t noncelen,
+ const uint8_t *aad, size_t aadlen) {
+ gnutls_cipher_algorithm_t cipher =
+ (gnutls_cipher_algorithm_t)(intptr_t)aead->native_handle;
+ gnutls_aead_cipher_hd_t hd = aead_ctx->native_handle;
+ size_t taglen = gnutls_cipher_get_tag_size(cipher);
+ size_t ciphertextlen = plaintextlen + taglen;
+
+ if (gnutls_aead_cipher_encrypt(hd, nonce, noncelen, aad, aadlen, taglen,
+ plaintext, plaintextlen, dest,
+ &ciphertextlen) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_decrypt(uint8_t *dest, const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *ciphertext, size_t ciphertextlen,
+ const uint8_t *nonce, size_t noncelen,
+ const uint8_t *aad, size_t aadlen) {
+ gnutls_cipher_algorithm_t cipher =
+ (gnutls_cipher_algorithm_t)(intptr_t)aead->native_handle;
+ gnutls_aead_cipher_hd_t hd = aead_ctx->native_handle;
+ size_t taglen = gnutls_cipher_get_tag_size(cipher);
+ size_t plaintextlen;
+
+ if (taglen > ciphertextlen) {
+ return -1;
+ }
+
+ plaintextlen = ciphertextlen - taglen;
+
+ if (gnutls_aead_cipher_decrypt(hd, nonce, noncelen, aad, aadlen, taglen,
+ ciphertext, ciphertextlen, dest,
+ &plaintextlen) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_hp_mask(uint8_t *dest, const ngtcp2_crypto_cipher *hp,
+ const ngtcp2_crypto_cipher_ctx *hp_ctx,
+ const uint8_t *sample) {
+ gnutls_cipher_algorithm_t cipher =
+ (gnutls_cipher_algorithm_t)(intptr_t)hp->native_handle;
+ gnutls_cipher_hd_t hd = hp_ctx->native_handle;
+
+ switch (cipher) {
+ case GNUTLS_CIPHER_AES_128_CBC:
+ case GNUTLS_CIPHER_AES_256_CBC: {
+ uint8_t iv[16];
+ uint8_t buf[16];
+
+ /* Emulate one block AES-ECB by invalidating the effect of IV */
+ memset(iv, 0, sizeof(iv));
+
+ gnutls_cipher_set_iv(hd, iv, sizeof(iv));
+
+ if (gnutls_cipher_encrypt2(hd, sample, 16, buf, sizeof(buf)) != 0) {
+ return -1;
+ }
+
+ memcpy(dest, buf, 5);
+ } break;
+
+ case GNUTLS_CIPHER_CHACHA20_32: {
+ static const uint8_t PLAINTEXT[] = "\x00\x00\x00\x00\x00";
+ uint8_t buf[5 + 16];
+ size_t buflen = sizeof(buf);
+
+ gnutls_cipher_set_iv(hd, (void *)sample, 16);
+
+ if (gnutls_cipher_encrypt2(hd, PLAINTEXT, sizeof(PLAINTEXT) - 1, buf,
+ buflen) != 0) {
+ return -1;
+ }
+
+ memcpy(dest, buf, 5);
+ } break;
+ default:
+ assert(0);
+ }
+
+ return 0;
+}
+
+ngtcp2_crypto_level ngtcp2_crypto_gnutls_from_gnutls_record_encryption_level(
+ gnutls_record_encryption_level_t gtls_level) {
+ switch (gtls_level) {
+ case GNUTLS_ENCRYPTION_LEVEL_INITIAL:
+ return NGTCP2_CRYPTO_LEVEL_INITIAL;
+ case GNUTLS_ENCRYPTION_LEVEL_HANDSHAKE:
+ return NGTCP2_CRYPTO_LEVEL_HANDSHAKE;
+ case GNUTLS_ENCRYPTION_LEVEL_APPLICATION:
+ return NGTCP2_CRYPTO_LEVEL_APPLICATION;
+ case GNUTLS_ENCRYPTION_LEVEL_EARLY:
+ return NGTCP2_CRYPTO_LEVEL_EARLY;
+ default:
+ assert(0);
+ abort();
+ }
+}
+
+gnutls_record_encryption_level_t
+ngtcp2_crypto_gnutls_from_ngtcp2_level(ngtcp2_crypto_level crypto_level) {
+ switch (crypto_level) {
+ case NGTCP2_CRYPTO_LEVEL_INITIAL:
+ return GNUTLS_ENCRYPTION_LEVEL_INITIAL;
+ case NGTCP2_CRYPTO_LEVEL_HANDSHAKE:
+ return GNUTLS_ENCRYPTION_LEVEL_HANDSHAKE;
+ case NGTCP2_CRYPTO_LEVEL_APPLICATION:
+ return GNUTLS_ENCRYPTION_LEVEL_APPLICATION;
+ case NGTCP2_CRYPTO_LEVEL_EARLY:
+ return GNUTLS_ENCRYPTION_LEVEL_EARLY;
+ default:
+ assert(0);
+ abort();
+ }
+}
+
+int ngtcp2_crypto_read_write_crypto_data(ngtcp2_conn *conn,
+ ngtcp2_crypto_level crypto_level,
+ const uint8_t *data, size_t datalen) {
+ gnutls_session_t session = ngtcp2_conn_get_tls_native_handle(conn);
+ int rv;
+
+ if (datalen > 0) {
+ rv = gnutls_handshake_write(
+ session, ngtcp2_crypto_gnutls_from_ngtcp2_level(crypto_level), data,
+ datalen);
+ if (rv != 0) {
+ if (!gnutls_error_is_fatal(rv)) {
+ return 0;
+ }
+ gnutls_alert_send_appropriate(session, rv);
+ return -1;
+ }
+ }
+
+ if (!ngtcp2_conn_get_handshake_completed(conn)) {
+ rv = gnutls_handshake(session);
+ if (rv < 0) {
+ if (!gnutls_error_is_fatal(rv)) {
+ return 0;
+ }
+ gnutls_alert_send_appropriate(session, rv);
+ return -1;
+ }
+
+ ngtcp2_conn_handshake_completed(conn);
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_set_remote_transport_params(ngtcp2_conn *conn, void *tls) {
+ (void)conn;
+ (void)tls;
+ /* Nothing to do; GnuTLS applications are supposed to register the
+ quic_transport_parameters extension with
+ gnutls_session_ext_register. */
+ return 0;
+}
+
+int ngtcp2_crypto_set_local_transport_params(void *tls, const uint8_t *buf,
+ size_t len) {
+ (void)tls;
+ (void)buf;
+ (void)len;
+ /* Nothing to do; GnuTLS applications are supposed to register the
+ quic_transport_parameters extension with
+ gnutls_session_ext_register. */
+ return 0;
+}
+
+int ngtcp2_crypto_get_path_challenge_data_cb(ngtcp2_conn *conn, uint8_t *data,
+ void *user_data) {
+ (void)conn;
+ (void)user_data;
+
+ if (gnutls_rnd(GNUTLS_RND_RANDOM, data, NGTCP2_PATH_CHALLENGE_DATALEN) != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_random(uint8_t *data, size_t datalen) {
+ if (gnutls_rnd(GNUTLS_RND_RANDOM, data, datalen) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int secret_func(gnutls_session_t session,
+ gnutls_record_encryption_level_t gtls_level,
+ const void *rx_secret, const void *tx_secret,
+ size_t secretlen) {
+ ngtcp2_crypto_conn_ref *conn_ref = gnutls_session_get_ptr(session);
+ ngtcp2_conn *conn = conn_ref->get_conn(conn_ref);
+ ngtcp2_crypto_level level =
+ ngtcp2_crypto_gnutls_from_gnutls_record_encryption_level(gtls_level);
+
+ if (rx_secret &&
+ ngtcp2_crypto_derive_and_install_rx_key(conn, NULL, NULL, NULL, level,
+ rx_secret, secretlen) != 0) {
+ return -1;
+ }
+
+ if (tx_secret &&
+ ngtcp2_crypto_derive_and_install_tx_key(conn, NULL, NULL, NULL, level,
+ tx_secret, secretlen) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int read_func(gnutls_session_t session,
+ gnutls_record_encryption_level_t gtls_level,
+ gnutls_handshake_description_t htype, const void *data,
+ size_t datalen) {
+ ngtcp2_crypto_conn_ref *conn_ref = gnutls_session_get_ptr(session);
+ ngtcp2_conn *conn = conn_ref->get_conn(conn_ref);
+ ngtcp2_crypto_level level =
+ ngtcp2_crypto_gnutls_from_gnutls_record_encryption_level(gtls_level);
+ int rv;
+
+ if (htype == GNUTLS_HANDSHAKE_CHANGE_CIPHER_SPEC) {
+ return 0;
+ }
+
+ rv = ngtcp2_conn_submit_crypto_data(conn, level, data, datalen);
+ if (rv != 0) {
+ ngtcp2_conn_set_tls_error(conn, rv);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int alert_read_func(gnutls_session_t session,
+ gnutls_record_encryption_level_t gtls_level,
+ gnutls_alert_level_t alert_level,
+ gnutls_alert_description_t alert_desc) {
+ ngtcp2_crypto_conn_ref *conn_ref = gnutls_session_get_ptr(session);
+ ngtcp2_conn *conn = conn_ref->get_conn(conn_ref);
+ (void)gtls_level;
+ (void)alert_level;
+
+ ngtcp2_conn_set_tls_alert(conn, (uint8_t)alert_desc);
+
+ return 0;
+}
+
+static int tp_recv_func(gnutls_session_t session, const uint8_t *data,
+ size_t datalen) {
+ ngtcp2_crypto_conn_ref *conn_ref = gnutls_session_get_ptr(session);
+ ngtcp2_conn *conn = conn_ref->get_conn(conn_ref);
+ int rv;
+
+ rv = ngtcp2_conn_decode_remote_transport_params(conn, data, datalen);
+ if (rv != 0) {
+ ngtcp2_conn_set_tls_error(conn, rv);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int tp_send_func(gnutls_session_t session, gnutls_buffer_t extdata) {
+ ngtcp2_crypto_conn_ref *conn_ref = gnutls_session_get_ptr(session);
+ ngtcp2_conn *conn = conn_ref->get_conn(conn_ref);
+ uint8_t buf[256];
+ ngtcp2_ssize nwrite;
+ int rv;
+
+ nwrite = ngtcp2_conn_encode_local_transport_params(conn, buf, sizeof(buf));
+ if (nwrite < 0) {
+ return -1;
+ }
+
+ rv = gnutls_buffer_append_data(extdata, buf, (size_t)nwrite);
+ if (rv != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int crypto_gnutls_configure_session(gnutls_session_t session) {
+ int rv;
+
+ gnutls_handshake_set_secret_function(session, secret_func);
+ gnutls_handshake_set_read_function(session, read_func);
+ gnutls_alert_set_read_function(session, alert_read_func);
+
+ rv = gnutls_session_ext_register(
+ session, "QUIC Transport Parameters",
+ NGTCP2_TLSEXT_QUIC_TRANSPORT_PARAMETERS_V1, GNUTLS_EXT_TLS, tp_recv_func,
+ tp_send_func, NULL, NULL, NULL,
+ GNUTLS_EXT_FLAG_TLS | GNUTLS_EXT_FLAG_CLIENT_HELLO | GNUTLS_EXT_FLAG_EE);
+ if (rv != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_gnutls_configure_server_session(gnutls_session_t session) {
+ return crypto_gnutls_configure_session(session);
+}
+
+int ngtcp2_crypto_gnutls_configure_client_session(gnutls_session_t session) {
+ return crypto_gnutls_configure_session(session);
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/crypto/shared.c b/src/contrib/libngtcp2/ngtcp2/crypto/shared.c
new file mode 100644
index 0000000..f38e0b1
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/crypto/shared.c
@@ -0,0 +1,1418 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "shared.h"
+
+#ifdef WIN32
+# include <winsock2.h>
+# include <ws2tcpip.h>
+#else
+# include <netinet/in.h>
+#endif
+
+#include <string.h>
+#include <assert.h>
+
+#include "ngtcp2_macro.h"
+#include "ngtcp2_net.h"
+
+ngtcp2_crypto_md *ngtcp2_crypto_md_init(ngtcp2_crypto_md *md,
+ void *md_native_handle) {
+ md->native_handle = md_native_handle;
+ return md;
+}
+
+int ngtcp2_crypto_hkdf_expand_label(uint8_t *dest, size_t destlen,
+ const ngtcp2_crypto_md *md,
+ const uint8_t *secret, size_t secretlen,
+ const uint8_t *label, size_t labellen) {
+ static const uint8_t LABEL[] = "tls13 ";
+ uint8_t info[256];
+ uint8_t *p = info;
+
+ *p++ = (uint8_t)(destlen / 256);
+ *p++ = (uint8_t)(destlen % 256);
+ *p++ = (uint8_t)(sizeof(LABEL) - 1 + labellen);
+ memcpy(p, LABEL, sizeof(LABEL) - 1);
+ p += sizeof(LABEL) - 1;
+ memcpy(p, label, labellen);
+ p += labellen;
+ *p++ = 0;
+
+ return ngtcp2_crypto_hkdf_expand(dest, destlen, md, secret, secretlen, info,
+ (size_t)(p - info));
+}
+
+#define NGTCP2_CRYPTO_INITIAL_SECRETLEN 32
+
+int ngtcp2_crypto_derive_initial_secrets(uint32_t version, uint8_t *rx_secret,
+ uint8_t *tx_secret,
+ uint8_t *initial_secret,
+ const ngtcp2_cid *client_dcid,
+ ngtcp2_crypto_side side) {
+ static const uint8_t CLABEL[] = "client in";
+ static const uint8_t SLABEL[] = "server in";
+ uint8_t initial_secret_buf[NGTCP2_CRYPTO_INITIAL_SECRETLEN];
+ uint8_t *client_secret;
+ uint8_t *server_secret;
+ ngtcp2_crypto_ctx ctx;
+ const uint8_t *salt;
+ size_t saltlen;
+
+ if (!initial_secret) {
+ initial_secret = initial_secret_buf;
+ }
+
+ ngtcp2_crypto_ctx_initial(&ctx);
+
+ switch (version) {
+ case NGTCP2_PROTO_VER_V1:
+ salt = (const uint8_t *)NGTCP2_INITIAL_SALT_V1;
+ saltlen = sizeof(NGTCP2_INITIAL_SALT_V1) - 1;
+ break;
+ case NGTCP2_PROTO_VER_V2:
+ salt = (const uint8_t *)NGTCP2_INITIAL_SALT_V2;
+ saltlen = sizeof(NGTCP2_INITIAL_SALT_V2) - 1;
+ break;
+ default:
+ salt = (const uint8_t *)NGTCP2_INITIAL_SALT_DRAFT;
+ saltlen = sizeof(NGTCP2_INITIAL_SALT_DRAFT) - 1;
+ }
+
+ if (ngtcp2_crypto_hkdf_extract(initial_secret, &ctx.md, client_dcid->data,
+ client_dcid->datalen, salt, saltlen) != 0) {
+ return -1;
+ }
+
+ if (side == NGTCP2_CRYPTO_SIDE_SERVER) {
+ client_secret = rx_secret;
+ server_secret = tx_secret;
+ } else {
+ client_secret = tx_secret;
+ server_secret = rx_secret;
+ }
+
+ if (ngtcp2_crypto_hkdf_expand_label(
+ client_secret, NGTCP2_CRYPTO_INITIAL_SECRETLEN, &ctx.md,
+ initial_secret, NGTCP2_CRYPTO_INITIAL_SECRETLEN, CLABEL,
+ sizeof(CLABEL) - 1) != 0 ||
+ ngtcp2_crypto_hkdf_expand_label(
+ server_secret, NGTCP2_CRYPTO_INITIAL_SECRETLEN, &ctx.md,
+ initial_secret, NGTCP2_CRYPTO_INITIAL_SECRETLEN, SLABEL,
+ sizeof(SLABEL) - 1) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+size_t ngtcp2_crypto_packet_protection_ivlen(const ngtcp2_crypto_aead *aead) {
+ size_t noncelen = ngtcp2_crypto_aead_noncelen(aead);
+ return ngtcp2_max(8, noncelen);
+}
+
+int ngtcp2_crypto_derive_packet_protection_key(
+ uint8_t *key, uint8_t *iv, uint8_t *hp_key, uint32_t version,
+ const ngtcp2_crypto_aead *aead, const ngtcp2_crypto_md *md,
+ const uint8_t *secret, size_t secretlen) {
+ static const uint8_t KEY_LABEL_V1[] = "quic key";
+ static const uint8_t IV_LABEL_V1[] = "quic iv";
+ static const uint8_t HP_KEY_LABEL_V1[] = "quic hp";
+ static const uint8_t KEY_LABEL_V2[] = "quicv2 key";
+ static const uint8_t IV_LABEL_V2[] = "quicv2 iv";
+ static const uint8_t HP_KEY_LABEL_V2[] = "quicv2 hp";
+ size_t keylen = ngtcp2_crypto_aead_keylen(aead);
+ size_t ivlen = ngtcp2_crypto_packet_protection_ivlen(aead);
+ const uint8_t *key_label;
+ size_t key_labellen;
+ const uint8_t *iv_label;
+ size_t iv_labellen;
+ const uint8_t *hp_key_label;
+ size_t hp_key_labellen;
+
+ switch (version) {
+ case NGTCP2_PROTO_VER_V2:
+ key_label = KEY_LABEL_V2;
+ key_labellen = sizeof(KEY_LABEL_V2) - 1;
+ iv_label = IV_LABEL_V2;
+ iv_labellen = sizeof(IV_LABEL_V2) - 1;
+ hp_key_label = HP_KEY_LABEL_V2;
+ hp_key_labellen = sizeof(HP_KEY_LABEL_V2) - 1;
+ break;
+ default:
+ key_label = KEY_LABEL_V1;
+ key_labellen = sizeof(KEY_LABEL_V1) - 1;
+ iv_label = IV_LABEL_V1;
+ iv_labellen = sizeof(IV_LABEL_V1) - 1;
+ hp_key_label = HP_KEY_LABEL_V1;
+ hp_key_labellen = sizeof(HP_KEY_LABEL_V1) - 1;
+ }
+
+ if (ngtcp2_crypto_hkdf_expand_label(key, keylen, md, secret, secretlen,
+ key_label, key_labellen) != 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_hkdf_expand_label(iv, ivlen, md, secret, secretlen,
+ iv_label, iv_labellen) != 0) {
+ return -1;
+ }
+
+ if (hp_key != NULL &&
+ ngtcp2_crypto_hkdf_expand_label(hp_key, keylen, md, secret, secretlen,
+ hp_key_label, hp_key_labellen) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_update_traffic_secret(uint8_t *dest,
+ const ngtcp2_crypto_md *md,
+ const uint8_t *secret,
+ size_t secretlen) {
+ static const uint8_t LABEL[] = "quic ku";
+
+ if (ngtcp2_crypto_hkdf_expand_label(dest, secretlen, md, secret, secretlen,
+ LABEL, sizeof(LABEL) - 1) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_derive_and_install_rx_key(ngtcp2_conn *conn, uint8_t *key,
+ uint8_t *iv, uint8_t *hp_key,
+ ngtcp2_crypto_level level,
+ const uint8_t *secret,
+ size_t secretlen) {
+ const ngtcp2_crypto_ctx *ctx;
+ const ngtcp2_crypto_aead *aead;
+ const ngtcp2_crypto_md *md;
+ const ngtcp2_crypto_cipher *hp;
+ ngtcp2_crypto_aead_ctx aead_ctx = {0};
+ ngtcp2_crypto_cipher_ctx hp_ctx = {0};
+ void *tls = ngtcp2_conn_get_tls_native_handle(conn);
+ uint8_t keybuf[64], ivbuf[64], hp_keybuf[64];
+ size_t ivlen;
+ int rv;
+ ngtcp2_crypto_ctx cctx;
+ uint32_t version;
+
+ if (level == NGTCP2_CRYPTO_LEVEL_EARLY && !ngtcp2_conn_is_server(conn)) {
+ return 0;
+ }
+
+ if (!key) {
+ key = keybuf;
+ }
+ if (!iv) {
+ iv = ivbuf;
+ }
+ if (!hp_key) {
+ hp_key = hp_keybuf;
+ }
+
+ switch (level) {
+ case NGTCP2_CRYPTO_LEVEL_EARLY:
+ ngtcp2_crypto_ctx_tls_early(&cctx, tls);
+ ngtcp2_conn_set_early_crypto_ctx(conn, &cctx);
+ ctx = ngtcp2_conn_get_early_crypto_ctx(conn);
+ version = ngtcp2_conn_get_client_chosen_version(conn);
+ break;
+ case NGTCP2_CRYPTO_LEVEL_HANDSHAKE:
+ if (ngtcp2_conn_is_server(conn) &&
+ !ngtcp2_conn_get_negotiated_version(conn)) {
+ rv = ngtcp2_crypto_set_remote_transport_params(conn, tls);
+ if (rv != 0) {
+ return -1;
+ }
+ }
+ /* fall through */
+ default:
+ ctx = ngtcp2_conn_get_crypto_ctx(conn);
+ version = ngtcp2_conn_get_negotiated_version(conn);
+
+ if (!ctx->aead.native_handle) {
+ ngtcp2_crypto_ctx_tls(&cctx, tls);
+ ngtcp2_conn_set_crypto_ctx(conn, &cctx);
+ ctx = ngtcp2_conn_get_crypto_ctx(conn);
+ }
+ }
+
+ aead = &ctx->aead;
+ md = &ctx->md;
+ hp = &ctx->hp;
+ ivlen = ngtcp2_crypto_packet_protection_ivlen(aead);
+
+ if (ngtcp2_crypto_derive_packet_protection_key(key, iv, hp_key, version, aead,
+ md, secret, secretlen) != 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_aead_ctx_decrypt_init(&aead_ctx, aead, key, ivlen) != 0) {
+ goto fail;
+ }
+
+ if (ngtcp2_crypto_cipher_ctx_encrypt_init(&hp_ctx, hp, hp_key) != 0) {
+ goto fail;
+ }
+
+ switch (level) {
+ case NGTCP2_CRYPTO_LEVEL_EARLY:
+ rv = ngtcp2_conn_install_early_key(conn, &aead_ctx, iv, ivlen, &hp_ctx);
+ if (rv != 0) {
+ goto fail;
+ }
+ break;
+ case NGTCP2_CRYPTO_LEVEL_HANDSHAKE:
+ rv = ngtcp2_conn_install_rx_handshake_key(conn, &aead_ctx, iv, ivlen,
+ &hp_ctx);
+ if (rv != 0) {
+ goto fail;
+ }
+ break;
+ case NGTCP2_CRYPTO_LEVEL_APPLICATION:
+ if (!ngtcp2_conn_is_server(conn)) {
+ rv = ngtcp2_crypto_set_remote_transport_params(conn, tls);
+ if (rv != 0) {
+ goto fail;
+ }
+ }
+
+ rv = ngtcp2_conn_install_rx_key(conn, secret, secretlen, &aead_ctx, iv,
+ ivlen, &hp_ctx);
+ if (rv != 0) {
+ goto fail;
+ }
+
+ break;
+ default:
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ ngtcp2_crypto_cipher_ctx_free(&hp_ctx);
+ ngtcp2_crypto_aead_ctx_free(&aead_ctx);
+
+ return -1;
+}
+
+/*
+ * crypto_set_local_transport_params gets local QUIC transport
+ * parameters from |conn| and sets it to |tls|.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+static int crypto_set_local_transport_params(ngtcp2_conn *conn, void *tls) {
+ ngtcp2_ssize nwrite;
+ uint8_t buf[256];
+
+ nwrite = ngtcp2_conn_encode_local_transport_params(conn, buf, sizeof(buf));
+ if (nwrite < 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_set_local_transport_params(tls, buf, (size_t)nwrite) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_derive_and_install_tx_key(ngtcp2_conn *conn, uint8_t *key,
+ uint8_t *iv, uint8_t *hp_key,
+ ngtcp2_crypto_level level,
+ const uint8_t *secret,
+ size_t secretlen) {
+ const ngtcp2_crypto_ctx *ctx;
+ const ngtcp2_crypto_aead *aead;
+ const ngtcp2_crypto_md *md;
+ const ngtcp2_crypto_cipher *hp;
+ ngtcp2_crypto_aead_ctx aead_ctx = {0};
+ ngtcp2_crypto_cipher_ctx hp_ctx = {0};
+ void *tls = ngtcp2_conn_get_tls_native_handle(conn);
+ uint8_t keybuf[64], ivbuf[64], hp_keybuf[64];
+ size_t ivlen;
+ int rv;
+ ngtcp2_crypto_ctx cctx;
+ uint32_t version;
+
+ if (level == NGTCP2_CRYPTO_LEVEL_EARLY && ngtcp2_conn_is_server(conn)) {
+ return 0;
+ }
+
+ if (!key) {
+ key = keybuf;
+ }
+ if (!iv) {
+ iv = ivbuf;
+ }
+ if (!hp_key) {
+ hp_key = hp_keybuf;
+ }
+
+ switch (level) {
+ case NGTCP2_CRYPTO_LEVEL_EARLY:
+ ngtcp2_crypto_ctx_tls_early(&cctx, tls);
+ ngtcp2_conn_set_early_crypto_ctx(conn, &cctx);
+ ctx = ngtcp2_conn_get_early_crypto_ctx(conn);
+ version = ngtcp2_conn_get_client_chosen_version(conn);
+ break;
+ case NGTCP2_CRYPTO_LEVEL_HANDSHAKE:
+ if (ngtcp2_conn_is_server(conn) &&
+ !ngtcp2_conn_get_negotiated_version(conn)) {
+ rv = ngtcp2_crypto_set_remote_transport_params(conn, tls);
+ if (rv != 0) {
+ return -1;
+ }
+ }
+ /* fall through */
+ default:
+ ctx = ngtcp2_conn_get_crypto_ctx(conn);
+ version = ngtcp2_conn_get_negotiated_version(conn);
+
+ if (!ctx->aead.native_handle) {
+ ngtcp2_crypto_ctx_tls(&cctx, tls);
+ ngtcp2_conn_set_crypto_ctx(conn, &cctx);
+ ctx = ngtcp2_conn_get_crypto_ctx(conn);
+ }
+ }
+
+ aead = &ctx->aead;
+ md = &ctx->md;
+ hp = &ctx->hp;
+ ivlen = ngtcp2_crypto_packet_protection_ivlen(aead);
+
+ if (ngtcp2_crypto_derive_packet_protection_key(key, iv, hp_key, version, aead,
+ md, secret, secretlen) != 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_aead_ctx_encrypt_init(&aead_ctx, aead, key, ivlen) != 0) {
+ goto fail;
+ }
+
+ if (ngtcp2_crypto_cipher_ctx_encrypt_init(&hp_ctx, hp, hp_key) != 0) {
+ goto fail;
+ }
+
+ switch (level) {
+ case NGTCP2_CRYPTO_LEVEL_EARLY:
+ rv = ngtcp2_conn_install_early_key(conn, &aead_ctx, iv, ivlen, &hp_ctx);
+ if (rv != 0) {
+ goto fail;
+ }
+ break;
+ case NGTCP2_CRYPTO_LEVEL_HANDSHAKE:
+ rv = ngtcp2_conn_install_tx_handshake_key(conn, &aead_ctx, iv, ivlen,
+ &hp_ctx);
+ if (rv != 0) {
+ goto fail;
+ }
+
+ if (ngtcp2_conn_is_server(conn) &&
+ crypto_set_local_transport_params(conn, tls) != 0) {
+ goto fail;
+ }
+
+ break;
+ case NGTCP2_CRYPTO_LEVEL_APPLICATION:
+ rv = ngtcp2_conn_install_tx_key(conn, secret, secretlen, &aead_ctx, iv,
+ ivlen, &hp_ctx);
+ if (rv != 0) {
+ goto fail;
+ }
+
+ break;
+ default:
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ ngtcp2_crypto_cipher_ctx_free(&hp_ctx);
+ ngtcp2_crypto_aead_ctx_free(&aead_ctx);
+
+ return -1;
+}
+
+int ngtcp2_crypto_derive_and_install_initial_key(
+ ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret,
+ uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv,
+ uint8_t *rx_hp_key, uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp_key,
+ uint32_t version, const ngtcp2_cid *client_dcid) {
+ uint8_t rx_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN];
+ uint8_t tx_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN];
+ uint8_t initial_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN];
+ uint8_t rx_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN];
+ uint8_t rx_ivbuf[NGTCP2_CRYPTO_INITIAL_IVLEN];
+ uint8_t rx_hp_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN];
+ uint8_t tx_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN];
+ uint8_t tx_ivbuf[NGTCP2_CRYPTO_INITIAL_IVLEN];
+ uint8_t tx_hp_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN];
+ ngtcp2_crypto_ctx ctx;
+ ngtcp2_crypto_aead retry_aead;
+ ngtcp2_crypto_aead_ctx rx_aead_ctx = {0};
+ ngtcp2_crypto_cipher_ctx rx_hp_ctx = {0};
+ ngtcp2_crypto_aead_ctx tx_aead_ctx = {0};
+ ngtcp2_crypto_cipher_ctx tx_hp_ctx = {0};
+ ngtcp2_crypto_aead_ctx retry_aead_ctx = {0};
+ int rv;
+ int server = ngtcp2_conn_is_server(conn);
+ const uint8_t *retry_key;
+ size_t retry_noncelen;
+
+ ngtcp2_crypto_ctx_initial(&ctx);
+
+ if (!rx_secret) {
+ rx_secret = rx_secretbuf;
+ }
+ if (!tx_secret) {
+ tx_secret = tx_secretbuf;
+ }
+ if (!initial_secret) {
+ initial_secret = initial_secretbuf;
+ }
+
+ if (!rx_key) {
+ rx_key = rx_keybuf;
+ }
+ if (!rx_iv) {
+ rx_iv = rx_ivbuf;
+ }
+ if (!rx_hp_key) {
+ rx_hp_key = rx_hp_keybuf;
+ }
+ if (!tx_key) {
+ tx_key = tx_keybuf;
+ }
+ if (!tx_iv) {
+ tx_iv = tx_ivbuf;
+ }
+ if (!tx_hp_key) {
+ tx_hp_key = tx_hp_keybuf;
+ }
+
+ ngtcp2_conn_set_initial_crypto_ctx(conn, &ctx);
+
+ if (ngtcp2_crypto_derive_initial_secrets(
+ version, rx_secret, tx_secret, initial_secret, client_dcid,
+ server ? NGTCP2_CRYPTO_SIDE_SERVER : NGTCP2_CRYPTO_SIDE_CLIENT) !=
+ 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_derive_packet_protection_key(
+ rx_key, rx_iv, rx_hp_key, version, &ctx.aead, &ctx.md, rx_secret,
+ NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_derive_packet_protection_key(
+ tx_key, tx_iv, tx_hp_key, version, &ctx.aead, &ctx.md, tx_secret,
+ NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_aead_ctx_decrypt_init(&rx_aead_ctx, &ctx.aead, rx_key,
+ NGTCP2_CRYPTO_INITIAL_IVLEN) != 0) {
+ goto fail;
+ }
+
+ if (ngtcp2_crypto_cipher_ctx_encrypt_init(&rx_hp_ctx, &ctx.hp, rx_hp_key) !=
+ 0) {
+ goto fail;
+ }
+
+ if (ngtcp2_crypto_aead_ctx_encrypt_init(&tx_aead_ctx, &ctx.aead, tx_key,
+ NGTCP2_CRYPTO_INITIAL_IVLEN) != 0) {
+ goto fail;
+ }
+
+ if (ngtcp2_crypto_cipher_ctx_encrypt_init(&tx_hp_ctx, &ctx.hp, tx_hp_key) !=
+ 0) {
+ goto fail;
+ }
+
+ if (!server && !ngtcp2_conn_after_retry(conn)) {
+ ngtcp2_crypto_aead_retry(&retry_aead);
+
+ switch (version) {
+ case NGTCP2_PROTO_VER_V1:
+ retry_key = (const uint8_t *)NGTCP2_RETRY_KEY_V1;
+ retry_noncelen = sizeof(NGTCP2_RETRY_NONCE_V1) - 1;
+ break;
+ case NGTCP2_PROTO_VER_V2:
+ retry_key = (const uint8_t *)NGTCP2_RETRY_KEY_V2;
+ retry_noncelen = sizeof(NGTCP2_RETRY_NONCE_V2) - 1;
+ break;
+ default:
+ retry_key = (const uint8_t *)NGTCP2_RETRY_KEY_DRAFT;
+ retry_noncelen = sizeof(NGTCP2_RETRY_NONCE_DRAFT) - 1;
+ }
+
+ if (ngtcp2_crypto_aead_ctx_encrypt_init(&retry_aead_ctx, &retry_aead,
+ retry_key, retry_noncelen) != 0) {
+ goto fail;
+ }
+ }
+
+ rv = ngtcp2_conn_install_initial_key(conn, &rx_aead_ctx, rx_iv, &rx_hp_ctx,
+ &tx_aead_ctx, tx_iv, &tx_hp_ctx,
+ NGTCP2_CRYPTO_INITIAL_IVLEN);
+ if (rv != 0) {
+ goto fail;
+ }
+
+ if (retry_aead_ctx.native_handle) {
+ ngtcp2_conn_set_retry_aead(conn, &retry_aead, &retry_aead_ctx);
+ }
+
+ return 0;
+
+fail:
+ ngtcp2_crypto_aead_ctx_free(&retry_aead_ctx);
+ ngtcp2_crypto_cipher_ctx_free(&tx_hp_ctx);
+ ngtcp2_crypto_aead_ctx_free(&tx_aead_ctx);
+ ngtcp2_crypto_cipher_ctx_free(&rx_hp_ctx);
+ ngtcp2_crypto_aead_ctx_free(&rx_aead_ctx);
+
+ return -1;
+}
+
+int ngtcp2_crypto_derive_and_install_vneg_initial_key(
+ ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret,
+ uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv,
+ uint8_t *rx_hp_key, uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp_key,
+ uint32_t version, const ngtcp2_cid *client_dcid) {
+ uint8_t rx_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN];
+ uint8_t tx_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN];
+ uint8_t initial_secretbuf[NGTCP2_CRYPTO_INITIAL_SECRETLEN];
+ uint8_t rx_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN];
+ uint8_t rx_ivbuf[NGTCP2_CRYPTO_INITIAL_IVLEN];
+ uint8_t rx_hp_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN];
+ uint8_t tx_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN];
+ uint8_t tx_ivbuf[NGTCP2_CRYPTO_INITIAL_IVLEN];
+ uint8_t tx_hp_keybuf[NGTCP2_CRYPTO_INITIAL_KEYLEN];
+ const ngtcp2_crypto_ctx *ctx = ngtcp2_conn_get_initial_crypto_ctx(conn);
+ ngtcp2_crypto_aead_ctx rx_aead_ctx = {0};
+ ngtcp2_crypto_cipher_ctx rx_hp_ctx = {0};
+ ngtcp2_crypto_aead_ctx tx_aead_ctx = {0};
+ ngtcp2_crypto_cipher_ctx tx_hp_ctx = {0};
+ int rv;
+ int server = ngtcp2_conn_is_server(conn);
+
+ if (!rx_secret) {
+ rx_secret = rx_secretbuf;
+ }
+ if (!tx_secret) {
+ tx_secret = tx_secretbuf;
+ }
+ if (!initial_secret) {
+ initial_secret = initial_secretbuf;
+ }
+
+ if (!rx_key) {
+ rx_key = rx_keybuf;
+ }
+ if (!rx_iv) {
+ rx_iv = rx_ivbuf;
+ }
+ if (!rx_hp_key) {
+ rx_hp_key = rx_hp_keybuf;
+ }
+ if (!tx_key) {
+ tx_key = tx_keybuf;
+ }
+ if (!tx_iv) {
+ tx_iv = tx_ivbuf;
+ }
+ if (!tx_hp_key) {
+ tx_hp_key = tx_hp_keybuf;
+ }
+
+ if (ngtcp2_crypto_derive_initial_secrets(
+ version, rx_secret, tx_secret, initial_secret, client_dcid,
+ server ? NGTCP2_CRYPTO_SIDE_SERVER : NGTCP2_CRYPTO_SIDE_CLIENT) !=
+ 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_derive_packet_protection_key(
+ rx_key, rx_iv, rx_hp_key, version, &ctx->aead, &ctx->md, rx_secret,
+ NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_derive_packet_protection_key(
+ tx_key, tx_iv, tx_hp_key, version, &ctx->aead, &ctx->md, tx_secret,
+ NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_aead_ctx_decrypt_init(&rx_aead_ctx, &ctx->aead, rx_key,
+ NGTCP2_CRYPTO_INITIAL_IVLEN) != 0) {
+ goto fail;
+ }
+
+ if (ngtcp2_crypto_cipher_ctx_encrypt_init(&rx_hp_ctx, &ctx->hp, rx_hp_key) !=
+ 0) {
+ goto fail;
+ }
+
+ if (ngtcp2_crypto_aead_ctx_encrypt_init(&tx_aead_ctx, &ctx->aead, tx_key,
+ NGTCP2_CRYPTO_INITIAL_IVLEN) != 0) {
+ goto fail;
+ }
+
+ if (ngtcp2_crypto_cipher_ctx_encrypt_init(&tx_hp_ctx, &ctx->hp, tx_hp_key) !=
+ 0) {
+ goto fail;
+ }
+
+ rv = ngtcp2_conn_install_vneg_initial_key(
+ conn, version, &rx_aead_ctx, rx_iv, &rx_hp_ctx, &tx_aead_ctx, tx_iv,
+ &tx_hp_ctx, NGTCP2_CRYPTO_INITIAL_IVLEN);
+ if (rv != 0) {
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ ngtcp2_crypto_cipher_ctx_free(&tx_hp_ctx);
+ ngtcp2_crypto_aead_ctx_free(&tx_aead_ctx);
+ ngtcp2_crypto_cipher_ctx_free(&rx_hp_ctx);
+ ngtcp2_crypto_aead_ctx_free(&rx_aead_ctx);
+
+ return -1;
+}
+
+int ngtcp2_crypto_update_key(
+ ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret,
+ ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_key, uint8_t *rx_iv,
+ ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_key, uint8_t *tx_iv,
+ const uint8_t *current_rx_secret, const uint8_t *current_tx_secret,
+ size_t secretlen) {
+ const ngtcp2_crypto_ctx *ctx = ngtcp2_conn_get_crypto_ctx(conn);
+ const ngtcp2_crypto_aead *aead = &ctx->aead;
+ const ngtcp2_crypto_md *md = &ctx->md;
+ size_t ivlen = ngtcp2_crypto_packet_protection_ivlen(aead);
+ uint32_t version = ngtcp2_conn_get_negotiated_version(conn);
+
+ if (ngtcp2_crypto_update_traffic_secret(rx_secret, md, current_rx_secret,
+ secretlen) != 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_derive_packet_protection_key(
+ rx_key, rx_iv, NULL, version, aead, md, rx_secret, secretlen) != 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_update_traffic_secret(tx_secret, md, current_tx_secret,
+ secretlen) != 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_derive_packet_protection_key(
+ tx_key, tx_iv, NULL, version, aead, md, tx_secret, secretlen) != 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_aead_ctx_decrypt_init(rx_aead_ctx, aead, rx_key, ivlen) !=
+ 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_aead_ctx_encrypt_init(tx_aead_ctx, aead, tx_key, ivlen) !=
+ 0) {
+ ngtcp2_crypto_aead_ctx_free(rx_aead_ctx);
+ rx_aead_ctx->native_handle = NULL;
+ return -1;
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_encrypt_cb(uint8_t *dest, const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *plaintext, size_t plaintextlen,
+ const uint8_t *nonce, size_t noncelen,
+ const uint8_t *aad, size_t aadlen) {
+ if (ngtcp2_crypto_encrypt(dest, aead, aead_ctx, plaintext, plaintextlen,
+ nonce, noncelen, aad, aadlen) != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ return 0;
+}
+
+int ngtcp2_crypto_decrypt_cb(uint8_t *dest, const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *ciphertext, size_t ciphertextlen,
+ const uint8_t *nonce, size_t noncelen,
+ const uint8_t *aad, size_t aadlen) {
+ if (ngtcp2_crypto_decrypt(dest, aead, aead_ctx, ciphertext, ciphertextlen,
+ nonce, noncelen, aad, aadlen) != 0) {
+ return NGTCP2_ERR_DECRYPT;
+ }
+ return 0;
+}
+
+int ngtcp2_crypto_hp_mask_cb(uint8_t *dest, const ngtcp2_crypto_cipher *hp,
+ const ngtcp2_crypto_cipher_ctx *hp_ctx,
+ const uint8_t *sample) {
+ if (ngtcp2_crypto_hp_mask(dest, hp, hp_ctx, sample) != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ return 0;
+}
+
+int ngtcp2_crypto_update_key_cb(
+ ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret,
+ ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_iv,
+ ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_iv,
+ const uint8_t *current_rx_secret, const uint8_t *current_tx_secret,
+ size_t secretlen, void *user_data) {
+ uint8_t rx_key[64];
+ uint8_t tx_key[64];
+ (void)conn;
+ (void)user_data;
+
+ if (ngtcp2_crypto_update_key(conn, rx_secret, tx_secret, rx_aead_ctx, rx_key,
+ rx_iv, tx_aead_ctx, tx_key, tx_iv,
+ current_rx_secret, current_tx_secret,
+ secretlen) != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ return 0;
+}
+
+int ngtcp2_crypto_generate_stateless_reset_token(uint8_t *token,
+ const uint8_t *secret,
+ size_t secretlen,
+ const ngtcp2_cid *cid) {
+ static const uint8_t info[] = "stateless_reset";
+ ngtcp2_crypto_md md;
+
+ if (ngtcp2_crypto_hkdf(token, NGTCP2_STATELESS_RESET_TOKENLEN,
+ ngtcp2_crypto_md_sha256(&md), secret, secretlen,
+ cid->data, cid->datalen, info,
+ sizeof(info) - 1) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int crypto_derive_token_key(uint8_t *key, size_t keylen, uint8_t *iv,
+ size_t ivlen, const ngtcp2_crypto_md *md,
+ const uint8_t *secret, size_t secretlen,
+ const uint8_t *salt, size_t saltlen,
+ const uint8_t *info_prefix,
+ size_t info_prefixlen) {
+ static const uint8_t key_info_suffix[] = " key";
+ static const uint8_t iv_info_suffix[] = " iv";
+ uint8_t intsecret[32];
+ uint8_t info[32];
+ uint8_t *p;
+
+ assert(ngtcp2_crypto_md_hashlen(md) == sizeof(intsecret));
+ assert(info_prefixlen + sizeof(key_info_suffix) - 1 <= sizeof(info));
+ assert(info_prefixlen + sizeof(iv_info_suffix) - 1 <= sizeof(info));
+
+ if (ngtcp2_crypto_hkdf_extract(intsecret, md, secret, secretlen, salt,
+ saltlen) != 0) {
+ return -1;
+ }
+
+ memcpy(info, info_prefix, info_prefixlen);
+ p = info + info_prefixlen;
+
+ memcpy(p, key_info_suffix, sizeof(key_info_suffix) - 1);
+ p += sizeof(key_info_suffix) - 1;
+
+ if (ngtcp2_crypto_hkdf_expand(key, keylen, md, intsecret, sizeof(intsecret),
+ info, (size_t)(p - info)) != 0) {
+ return -1;
+ }
+
+ p = info + info_prefixlen;
+
+ memcpy(p, iv_info_suffix, sizeof(iv_info_suffix) - 1);
+ p += sizeof(iv_info_suffix) - 1;
+
+ if (ngtcp2_crypto_hkdf_expand(iv, ivlen, md, intsecret, sizeof(intsecret),
+ info, (size_t)(p - info)) != 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+static size_t crypto_generate_retry_token_aad(uint8_t *dest, uint32_t version,
+ const ngtcp2_sockaddr *sa,
+ ngtcp2_socklen salen,
+ const ngtcp2_cid *retry_scid) {
+ uint8_t *p = dest;
+
+ version = ngtcp2_htonl(version);
+ memcpy(p, &version, sizeof(version));
+ memcpy(p, sa, (size_t)salen);
+ p += salen;
+ memcpy(p, retry_scid->data, retry_scid->datalen);
+ p += retry_scid->datalen;
+
+ return (size_t)(p - dest);
+}
+
+static const uint8_t retry_token_info_prefix[] = "retry_token";
+
+ngtcp2_ssize ngtcp2_crypto_generate_retry_token(
+ uint8_t *token, const uint8_t *secret, size_t secretlen, uint32_t version,
+ const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen,
+ const ngtcp2_cid *retry_scid, const ngtcp2_cid *odcid, ngtcp2_tstamp ts) {
+ uint8_t plaintext[NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN];
+ uint8_t rand_data[NGTCP2_CRYPTO_TOKEN_RAND_DATALEN];
+ uint8_t key[32];
+ uint8_t iv[32];
+ size_t keylen;
+ size_t ivlen;
+ ngtcp2_crypto_aead aead;
+ ngtcp2_crypto_md md;
+ ngtcp2_crypto_aead_ctx aead_ctx;
+ size_t plaintextlen;
+ uint8_t aad[sizeof(version) + sizeof(ngtcp2_sockaddr_storage) +
+ NGTCP2_MAX_CIDLEN];
+ size_t aadlen;
+ uint8_t *p = plaintext;
+ ngtcp2_tstamp ts_be = ngtcp2_htonl64(ts);
+ int rv;
+
+ memset(plaintext, 0, sizeof(plaintext));
+
+ *p++ = (uint8_t)odcid->datalen;
+ memcpy(p, odcid->data, odcid->datalen);
+ p += NGTCP2_MAX_CIDLEN;
+ memcpy(p, &ts_be, sizeof(ts_be));
+ p += sizeof(ts_be);
+
+ plaintextlen = (size_t)(p - plaintext);
+
+ if (ngtcp2_crypto_random(rand_data, sizeof(rand_data)) != 0) {
+ return -1;
+ }
+
+ ngtcp2_crypto_aead_aes_128_gcm(&aead);
+ ngtcp2_crypto_md_sha256(&md);
+
+ keylen = ngtcp2_crypto_aead_keylen(&aead);
+ ivlen = ngtcp2_crypto_aead_noncelen(&aead);
+
+ assert(sizeof(key) >= keylen);
+ assert(sizeof(iv) >= ivlen);
+
+ if (crypto_derive_token_key(key, keylen, iv, ivlen, &md, secret, secretlen,
+ rand_data, sizeof(rand_data),
+ retry_token_info_prefix,
+ sizeof(retry_token_info_prefix) - 1) != 0) {
+ return -1;
+ }
+
+ aadlen = crypto_generate_retry_token_aad(aad, version, remote_addr,
+ remote_addrlen, retry_scid);
+
+ p = token;
+ *p++ = NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY;
+
+ if (ngtcp2_crypto_aead_ctx_encrypt_init(&aead_ctx, &aead, key, ivlen) != 0) {
+ return -1;
+ }
+
+ rv = ngtcp2_crypto_encrypt(p, &aead, &aead_ctx, plaintext, plaintextlen, iv,
+ ivlen, aad, aadlen);
+
+ ngtcp2_crypto_aead_ctx_free(&aead_ctx);
+
+ if (rv != 0) {
+ return -1;
+ }
+
+ p += plaintextlen + aead.max_overhead;
+ memcpy(p, rand_data, sizeof(rand_data));
+ p += sizeof(rand_data);
+
+ return p - token;
+}
+
+int ngtcp2_crypto_verify_retry_token(
+ ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen,
+ const uint8_t *secret, size_t secretlen, uint32_t version,
+ const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen,
+ const ngtcp2_cid *dcid, ngtcp2_duration timeout, ngtcp2_tstamp ts) {
+ uint8_t
+ plaintext[/* cid len = */ 1 + NGTCP2_MAX_CIDLEN + sizeof(ngtcp2_tstamp)];
+ uint8_t key[32];
+ uint8_t iv[32];
+ size_t keylen;
+ size_t ivlen;
+ ngtcp2_crypto_aead_ctx aead_ctx;
+ ngtcp2_crypto_aead aead;
+ ngtcp2_crypto_md md;
+ uint8_t aad[sizeof(version) + sizeof(ngtcp2_sockaddr_storage) +
+ NGTCP2_MAX_CIDLEN];
+ size_t aadlen;
+ const uint8_t *rand_data;
+ const uint8_t *ciphertext;
+ size_t ciphertextlen;
+ size_t cil;
+ int rv;
+ ngtcp2_tstamp gen_ts;
+
+ if (tokenlen != NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN ||
+ token[0] != NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY) {
+ return -1;
+ }
+
+ rand_data = token + tokenlen - NGTCP2_CRYPTO_TOKEN_RAND_DATALEN;
+ ciphertext = token + 1;
+ ciphertextlen = tokenlen - 1 - NGTCP2_CRYPTO_TOKEN_RAND_DATALEN;
+
+ ngtcp2_crypto_aead_aes_128_gcm(&aead);
+ ngtcp2_crypto_md_sha256(&md);
+
+ keylen = ngtcp2_crypto_aead_keylen(&aead);
+ ivlen = ngtcp2_crypto_aead_noncelen(&aead);
+
+ if (crypto_derive_token_key(key, keylen, iv, ivlen, &md, secret, secretlen,
+ rand_data, NGTCP2_CRYPTO_TOKEN_RAND_DATALEN,
+ retry_token_info_prefix,
+ sizeof(retry_token_info_prefix) - 1) != 0) {
+ return -1;
+ }
+
+ aadlen = crypto_generate_retry_token_aad(aad, version, remote_addr,
+ remote_addrlen, dcid);
+
+ if (ngtcp2_crypto_aead_ctx_decrypt_init(&aead_ctx, &aead, key, ivlen) != 0) {
+ return -1;
+ }
+
+ rv = ngtcp2_crypto_decrypt(plaintext, &aead, &aead_ctx, ciphertext,
+ ciphertextlen, iv, ivlen, aad, aadlen);
+
+ ngtcp2_crypto_aead_ctx_free(&aead_ctx);
+
+ if (rv != 0) {
+ return -1;
+ }
+
+ cil = plaintext[0];
+
+ assert(cil == 0 || (cil >= NGTCP2_MIN_CIDLEN && cil <= NGTCP2_MAX_CIDLEN));
+
+ memcpy(&gen_ts, plaintext + /* cid len = */ 1 + NGTCP2_MAX_CIDLEN,
+ sizeof(gen_ts));
+
+ gen_ts = ngtcp2_ntohl64(gen_ts);
+ if (gen_ts + timeout <= ts) {
+ return -1;
+ }
+
+ ngtcp2_cid_init(odcid, plaintext + /* cid len = */ 1, cil);
+
+ return 0;
+}
+
+static size_t crypto_generate_regular_token_aad(uint8_t *dest,
+ const ngtcp2_sockaddr *sa) {
+ const uint8_t *addr;
+ size_t addrlen;
+
+ switch (sa->sa_family) {
+ case AF_INET:
+ addr = (const uint8_t *)&((const ngtcp2_sockaddr_in *)(void *)sa)->sin_addr;
+ addrlen = sizeof(((const ngtcp2_sockaddr_in *)(void *)sa)->sin_addr);
+ break;
+ case AF_INET6:
+ addr =
+ (const uint8_t *)&((const ngtcp2_sockaddr_in6 *)(void *)sa)->sin6_addr;
+ addrlen = sizeof(((const ngtcp2_sockaddr_in6 *)(void *)sa)->sin6_addr);
+ break;
+ default:
+ assert(0);
+ abort();
+ }
+
+ memcpy(dest, addr, addrlen);
+
+ return addrlen;
+}
+
+static const uint8_t regular_token_info_prefix[] = "regular_token";
+
+ngtcp2_ssize ngtcp2_crypto_generate_regular_token(
+ uint8_t *token, const uint8_t *secret, size_t secretlen,
+ const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen,
+ ngtcp2_tstamp ts) {
+ uint8_t plaintext[sizeof(ngtcp2_tstamp)];
+ uint8_t rand_data[NGTCP2_CRYPTO_TOKEN_RAND_DATALEN];
+ uint8_t key[32];
+ uint8_t iv[32];
+ size_t keylen;
+ size_t ivlen;
+ ngtcp2_crypto_aead aead;
+ ngtcp2_crypto_md md;
+ ngtcp2_crypto_aead_ctx aead_ctx;
+ size_t plaintextlen;
+ uint8_t aad[sizeof(ngtcp2_sockaddr_in6)];
+ size_t aadlen;
+ uint8_t *p = plaintext;
+ ngtcp2_tstamp ts_be = ngtcp2_htonl64(ts);
+ int rv;
+ (void)remote_addrlen;
+
+ memcpy(p, &ts_be, sizeof(ts_be));
+ p += sizeof(ts_be);
+
+ plaintextlen = (size_t)(p - plaintext);
+
+ if (ngtcp2_crypto_random(rand_data, sizeof(rand_data)) != 0) {
+ return -1;
+ }
+
+ ngtcp2_crypto_aead_aes_128_gcm(&aead);
+ ngtcp2_crypto_md_sha256(&md);
+
+ keylen = ngtcp2_crypto_aead_keylen(&aead);
+ ivlen = ngtcp2_crypto_aead_noncelen(&aead);
+
+ assert(sizeof(key) >= keylen);
+ assert(sizeof(iv) >= ivlen);
+
+ if (crypto_derive_token_key(key, keylen, iv, ivlen, &md, secret, secretlen,
+ rand_data, sizeof(rand_data),
+ regular_token_info_prefix,
+ sizeof(regular_token_info_prefix) - 1) != 0) {
+ return -1;
+ }
+
+ aadlen = crypto_generate_regular_token_aad(aad, remote_addr);
+
+ p = token;
+ *p++ = NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR;
+
+ if (ngtcp2_crypto_aead_ctx_encrypt_init(&aead_ctx, &aead, key, ivlen) != 0) {
+ return -1;
+ }
+
+ rv = ngtcp2_crypto_encrypt(p, &aead, &aead_ctx, plaintext, plaintextlen, iv,
+ ivlen, aad, aadlen);
+
+ ngtcp2_crypto_aead_ctx_free(&aead_ctx);
+
+ if (rv != 0) {
+ return -1;
+ }
+
+ p += plaintextlen + aead.max_overhead;
+ memcpy(p, rand_data, sizeof(rand_data));
+ p += sizeof(rand_data);
+
+ return p - token;
+}
+
+int ngtcp2_crypto_verify_regular_token(const uint8_t *token, size_t tokenlen,
+ const uint8_t *secret, size_t secretlen,
+ const ngtcp2_sockaddr *remote_addr,
+ ngtcp2_socklen remote_addrlen,
+ ngtcp2_duration timeout,
+ ngtcp2_tstamp ts) {
+ uint8_t plaintext[sizeof(ngtcp2_tstamp)];
+ uint8_t key[32];
+ uint8_t iv[32];
+ size_t keylen;
+ size_t ivlen;
+ ngtcp2_crypto_aead_ctx aead_ctx;
+ ngtcp2_crypto_aead aead;
+ ngtcp2_crypto_md md;
+ uint8_t aad[sizeof(ngtcp2_sockaddr_in6)];
+ size_t aadlen;
+ const uint8_t *rand_data;
+ const uint8_t *ciphertext;
+ size_t ciphertextlen;
+ int rv;
+ ngtcp2_tstamp gen_ts;
+ (void)remote_addrlen;
+
+ if (tokenlen != NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN ||
+ token[0] != NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR) {
+ return -1;
+ }
+
+ rand_data = token + tokenlen - NGTCP2_CRYPTO_TOKEN_RAND_DATALEN;
+ ciphertext = token + 1;
+ ciphertextlen = tokenlen - 1 - NGTCP2_CRYPTO_TOKEN_RAND_DATALEN;
+
+ ngtcp2_crypto_aead_aes_128_gcm(&aead);
+ ngtcp2_crypto_md_sha256(&md);
+
+ keylen = ngtcp2_crypto_aead_keylen(&aead);
+ ivlen = ngtcp2_crypto_aead_noncelen(&aead);
+
+ if (crypto_derive_token_key(key, keylen, iv, ivlen, &md, secret, secretlen,
+ rand_data, NGTCP2_CRYPTO_TOKEN_RAND_DATALEN,
+ regular_token_info_prefix,
+ sizeof(regular_token_info_prefix) - 1) != 0) {
+ return -1;
+ }
+
+ aadlen = crypto_generate_regular_token_aad(aad, remote_addr);
+
+ if (ngtcp2_crypto_aead_ctx_decrypt_init(&aead_ctx, &aead, key, ivlen) != 0) {
+ return -1;
+ }
+
+ rv = ngtcp2_crypto_decrypt(plaintext, &aead, &aead_ctx, ciphertext,
+ ciphertextlen, iv, ivlen, aad, aadlen);
+
+ ngtcp2_crypto_aead_ctx_free(&aead_ctx);
+
+ if (rv != 0) {
+ return -1;
+ }
+
+ memcpy(&gen_ts, plaintext, sizeof(gen_ts));
+
+ gen_ts = ngtcp2_ntohl64(gen_ts);
+ if (gen_ts + timeout <= ts) {
+ return -1;
+ }
+
+ return 0;
+}
+
+ngtcp2_ssize ngtcp2_crypto_write_connection_close(
+ uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid,
+ const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason,
+ size_t reasonlen) {
+ uint8_t rx_secret[NGTCP2_CRYPTO_INITIAL_SECRETLEN];
+ uint8_t tx_secret[NGTCP2_CRYPTO_INITIAL_SECRETLEN];
+ uint8_t initial_secret[NGTCP2_CRYPTO_INITIAL_SECRETLEN];
+ uint8_t tx_key[NGTCP2_CRYPTO_INITIAL_KEYLEN];
+ uint8_t tx_iv[NGTCP2_CRYPTO_INITIAL_IVLEN];
+ uint8_t tx_hp_key[NGTCP2_CRYPTO_INITIAL_KEYLEN];
+ ngtcp2_crypto_ctx ctx;
+ ngtcp2_ssize spktlen;
+ ngtcp2_crypto_aead_ctx aead_ctx = {0};
+ ngtcp2_crypto_cipher_ctx hp_ctx = {0};
+
+ ngtcp2_crypto_ctx_initial(&ctx);
+
+ if (ngtcp2_crypto_derive_initial_secrets(version, rx_secret, tx_secret,
+ initial_secret, scid,
+ NGTCP2_CRYPTO_SIDE_SERVER) != 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_derive_packet_protection_key(
+ tx_key, tx_iv, tx_hp_key, version, &ctx.aead, &ctx.md, tx_secret,
+ NGTCP2_CRYPTO_INITIAL_SECRETLEN) != 0) {
+ return -1;
+ }
+
+ if (ngtcp2_crypto_aead_ctx_encrypt_init(&aead_ctx, &ctx.aead, tx_key,
+ NGTCP2_CRYPTO_INITIAL_IVLEN) != 0) {
+ spktlen = -1;
+ goto end;
+ }
+
+ if (ngtcp2_crypto_cipher_ctx_encrypt_init(&hp_ctx, &ctx.hp, tx_hp_key) != 0) {
+ spktlen = -1;
+ goto end;
+ }
+
+ spktlen = ngtcp2_pkt_write_connection_close(
+ dest, destlen, version, dcid, scid, error_code, reason, reasonlen,
+ ngtcp2_crypto_encrypt_cb, &ctx.aead, &aead_ctx, tx_iv,
+ ngtcp2_crypto_hp_mask_cb, &ctx.hp, &hp_ctx);
+ if (spktlen < 0) {
+ spktlen = -1;
+ }
+
+end:
+ ngtcp2_crypto_cipher_ctx_free(&hp_ctx);
+ ngtcp2_crypto_aead_ctx_free(&aead_ctx);
+
+ return spktlen;
+}
+
+ngtcp2_ssize ngtcp2_crypto_write_retry(uint8_t *dest, size_t destlen,
+ uint32_t version, const ngtcp2_cid *dcid,
+ const ngtcp2_cid *scid,
+ const ngtcp2_cid *odcid,
+ const uint8_t *token, size_t tokenlen) {
+ ngtcp2_crypto_aead aead;
+ ngtcp2_ssize spktlen;
+ ngtcp2_crypto_aead_ctx aead_ctx = {0};
+ const uint8_t *key;
+ size_t noncelen;
+
+ ngtcp2_crypto_aead_retry(&aead);
+
+ switch (version) {
+ case NGTCP2_PROTO_VER_V1:
+ key = (const uint8_t *)NGTCP2_RETRY_KEY_V1;
+ noncelen = sizeof(NGTCP2_RETRY_NONCE_V1) - 1;
+ break;
+ case NGTCP2_PROTO_VER_V2:
+ key = (const uint8_t *)NGTCP2_RETRY_KEY_V2;
+ noncelen = sizeof(NGTCP2_RETRY_NONCE_V2) - 1;
+ break;
+ default:
+ key = (const uint8_t *)NGTCP2_RETRY_KEY_DRAFT;
+ noncelen = sizeof(NGTCP2_RETRY_NONCE_DRAFT) - 1;
+ }
+
+ if (ngtcp2_crypto_aead_ctx_encrypt_init(&aead_ctx, &aead, key, noncelen) !=
+ 0) {
+ return -1;
+ }
+
+ spktlen = ngtcp2_pkt_write_retry(dest, destlen, version, dcid, scid, odcid,
+ token, tokenlen, ngtcp2_crypto_encrypt_cb,
+ &aead, &aead_ctx);
+ if (spktlen < 0) {
+ spktlen = -1;
+ }
+
+ ngtcp2_crypto_aead_ctx_free(&aead_ctx);
+
+ return spktlen;
+}
+
+int ngtcp2_crypto_client_initial_cb(ngtcp2_conn *conn, void *user_data) {
+ const ngtcp2_cid *dcid = ngtcp2_conn_get_dcid(conn);
+ void *tls = ngtcp2_conn_get_tls_native_handle(conn);
+ (void)user_data;
+
+ if (ngtcp2_crypto_derive_and_install_initial_key(
+ conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ ngtcp2_conn_get_client_chosen_version(conn), dcid) != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ if (crypto_set_local_transport_params(conn, tls) != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ if (ngtcp2_crypto_read_write_crypto_data(conn, NGTCP2_CRYPTO_LEVEL_INITIAL,
+ NULL, 0) != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_recv_retry_cb(ngtcp2_conn *conn, const ngtcp2_pkt_hd *hd,
+ void *user_data) {
+ (void)user_data;
+
+ if (ngtcp2_crypto_derive_and_install_initial_key(
+ conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ ngtcp2_conn_get_client_chosen_version(conn), &hd->scid) != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_recv_client_initial_cb(ngtcp2_conn *conn,
+ const ngtcp2_cid *dcid,
+ void *user_data) {
+ (void)user_data;
+
+ if (ngtcp2_crypto_derive_and_install_initial_key(
+ conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ ngtcp2_conn_get_client_chosen_version(conn), dcid) != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+int ngtcp2_crypto_version_negotiation_cb(ngtcp2_conn *conn, uint32_t version,
+ const ngtcp2_cid *client_dcid,
+ void *user_data) {
+ (void)user_data;
+
+ if (ngtcp2_crypto_derive_and_install_vneg_initial_key(
+ conn, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, version,
+ client_dcid) != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+void ngtcp2_crypto_delete_crypto_aead_ctx_cb(ngtcp2_conn *conn,
+ ngtcp2_crypto_aead_ctx *aead_ctx,
+ void *user_data) {
+ (void)conn;
+ (void)user_data;
+
+ ngtcp2_crypto_aead_ctx_free(aead_ctx);
+}
+
+void ngtcp2_crypto_delete_crypto_cipher_ctx_cb(
+ ngtcp2_conn *conn, ngtcp2_crypto_cipher_ctx *cipher_ctx, void *user_data) {
+ (void)conn;
+ (void)user_data;
+
+ ngtcp2_crypto_cipher_ctx_free(cipher_ctx);
+}
+
+int ngtcp2_crypto_recv_crypto_data_cb(ngtcp2_conn *conn,
+ ngtcp2_crypto_level crypto_level,
+ uint64_t offset, const uint8_t *data,
+ size_t datalen, void *user_data) {
+ int rv;
+ (void)offset;
+ (void)user_data;
+
+ if (ngtcp2_crypto_read_write_crypto_data(conn, crypto_level, data, datalen) !=
+ 0) {
+ rv = ngtcp2_conn_get_tls_error(conn);
+ if (rv) {
+ return rv;
+ }
+ return NGTCP2_ERR_CRYPTO;
+ }
+
+ return 0;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/crypto/shared.h b/src/contrib/libngtcp2/ngtcp2/crypto/shared.h
new file mode 100644
index 0000000..c7690c1
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/crypto/shared.h
@@ -0,0 +1,350 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_SHARED_H
+#define NGTCP2_SHARED_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2_crypto.h>
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_INITIAL_SALT_DRAFT` is a salt value which is used to
+ * derive initial secret. It is used for QUIC draft versions.
+ */
+#define NGTCP2_INITIAL_SALT_DRAFT \
+ "\xaf\xbf\xec\x28\x99\x93\xd2\x4c\x9e\x97\x86\xf1\x9c\x61\x11\xe0\x43\x90" \
+ "\xa8\x99"
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_INITIAL_SALT_V1` is a salt value which is used to
+ * derive initial secret. It is used for QUIC v1.
+ */
+#define NGTCP2_INITIAL_SALT_V1 \
+ "\x38\x76\x2c\xf7\xf5\x59\x34\xb3\x4d\x17\x9a\xe6\xa4\xc8\x0c\xad\xcc\xbb" \
+ "\x7f\x0a"
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_INITIAL_SALT_V2` is a salt value which is used to
+ * derive initial secret. It is used for QUIC v2.
+ */
+#define NGTCP2_INITIAL_SALT_V2 \
+ "\x0d\xed\xe3\xde\xf7\x00\xa6\xdb\x81\x93\x81\xbe\x6e\x26\x9d\xcb\xf9\xbd" \
+ "\x2e\xd9"
+
+/* Maximum key usage (encryption) limits */
+#define NGTCP2_CRYPTO_MAX_ENCRYPTION_AES_GCM (1ULL << 23)
+#define NGTCP2_CRYPTO_MAX_ENCRYPTION_CHACHA20_POLY1305 (1ULL << 62)
+#define NGTCP2_CRYPTO_MAX_ENCRYPTION_AES_CCM (2965820ULL)
+
+/* Maximum authentication failure (decryption) limits during the
+ lifetime of a connection. */
+#define NGTCP2_CRYPTO_MAX_DECRYPTION_FAILURE_AES_GCM (1ULL << 52)
+#define NGTCP2_CRYPTO_MAX_DECRYPTION_FAILURE_CHACHA20_POLY1305 (1ULL << 36)
+#define NGTCP2_CRYPTO_MAX_DECRYPTION_FAILURE_AES_CCM (2965820ULL)
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_ctx_initial` initializes |ctx| for Initial packet
+ * encryption and decryption.
+ */
+ngtcp2_crypto_ctx *ngtcp2_crypto_ctx_initial(ngtcp2_crypto_ctx *ctx);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_aead_init` initializes |aead| with the provided
+ * |aead_native_handle| which is an underlying AEAD object.
+ *
+ * If libngtcp2_crypto_openssl is linked, |aead_native_handle| must be
+ * a pointer to EVP_CIPHER.
+ *
+ * If libngtcp2_crypto_gnutls is linked, |aead_native_handle| must be
+ * gnutls_cipher_algorithm_t casted to ``void *``.
+ *
+ * If libngtcp2_crypto_boringssl is linked, |aead_native_handle| must
+ * be a pointer to EVP_AEAD.
+ */
+ngtcp2_crypto_aead *ngtcp2_crypto_aead_init(ngtcp2_crypto_aead *aead,
+ void *aead_native_handle);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_aead_retry` initializes |aead| with the AEAD cipher
+ * AEAD_AES_128_GCM for Retry packet integrity protection.
+ */
+ngtcp2_crypto_aead *ngtcp2_crypto_aead_retry(ngtcp2_crypto_aead *aead);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_derive_initial_secrets` derives initial secrets.
+ * |rx_secret| and |tx_secret| must point to the buffer of at least 32
+ * bytes capacity. rx for read and tx for write. This function
+ * writes rx and tx secrets into |rx_secret| and |tx_secret|
+ * respectively. The length of secret is 32 bytes long.
+ * |client_dcid| is the destination connection ID in first Initial
+ * packet of client. If |initial_secret| is not NULL, the initial
+ * secret is written to it. It must point to the buffer which has at
+ * least 32 bytes capacity. The initial secret is 32 bytes long.
+ * |side| specifies the side of application.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+int ngtcp2_crypto_derive_initial_secrets(uint32_t version, uint8_t *rx_secret,
+ uint8_t *tx_secret,
+ uint8_t *initial_secret,
+ const ngtcp2_cid *client_dcid,
+ ngtcp2_crypto_side side);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_derive_packet_protection_key` derives packet
+ * protection key. This function writes packet protection key into
+ * the buffer pointed by |key|. The length of derived key is
+ * `ngtcp2_crypto_aead_keylen(aead) <ngtcp2_crypto_aead_keylen>`
+ * bytes. |key| must have enough capacity to store the key. This
+ * function writes packet protection IV into |iv|. The length of
+ * derived IV is `ngtcp2_crypto_packet_protection_ivlen(aead)
+ * <ngtcp2_crypto_packet_protection_ivlen>` bytes. |iv| must have
+ * enough capacity to store the IV.
+ *
+ * If |hp| is not NULL, this function also derives packet header
+ * protection key and writes the key into the buffer pointed by |hp|.
+ * The length of derived key is `ngtcp2_crypto_aead_keylen(aead)
+ * <ngtcp2_crypto_aead_keylen>` bytes. |hp|, if not NULL, must have
+ * enough capacity to store the key.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+int ngtcp2_crypto_derive_packet_protection_key(uint8_t *key, uint8_t *iv,
+ uint8_t *hp, uint32_t version,
+ const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_md *md,
+ const uint8_t *secret,
+ size_t secretlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_update_traffic_secret` derives the next generation
+ * of the traffic secret. |secret| specifies the current secret and
+ * its length is given in |secretlen|. The length of new key is the
+ * same as the current key. This function writes new key into the
+ * buffer pointed by |dest|. |dest| must have the enough capacity to
+ * store the new key.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+int ngtcp2_crypto_update_traffic_secret(uint8_t *dest,
+ const ngtcp2_crypto_md *md,
+ const uint8_t *secret,
+ size_t secretlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_set_local_transport_params` sets QUIC transport
+ * parameter, which is encoded in wire format and stored in the buffer
+ * pointed by |buf| of length |len|, to the native handle |tls|.
+ *
+ * |tls| points to a implementation dependent TLS session object. If
+ * libngtcp2_crypto_openssl is linked, |tls| must be a pointer to SSL
+ * object.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+int ngtcp2_crypto_set_local_transport_params(void *tls, const uint8_t *buf,
+ size_t len);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_set_remote_transport_params` retrieves a remote QUIC
+ * transport parameters from |tls| and sets it to |conn| using
+ * `ngtcp2_conn_set_remote_transport_params`.
+ *
+ * |tls| points to a implementation dependent TLS session object. If
+ * libngtcp2_crypto_openssl is linked, |tls| must be a pointer to SSL
+ * object.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+int ngtcp2_crypto_set_remote_transport_params(ngtcp2_conn *conn, void *tls);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_derive_and_install_initial_key` derives initial
+ * keying materials and installs keys to |conn|.
+ *
+ * If |rx_secret| is not NULL, the secret for decryption is written to
+ * the buffer pointed by |rx_secret|. The length of secret is 32
+ * bytes, and |rx_secret| must point to the buffer which has enough
+ * capacity.
+ *
+ * If |tx_secret| is not NULL, the secret for encryption is written to
+ * the buffer pointed by |tx_secret|. The length of secret is 32
+ * bytes, and |tx_secret| must point to the buffer which has enough
+ * capacity.
+ *
+ * If |initial_secret| is not NULL, the initial secret is written to
+ * the buffer pointed by |initial_secret|. The length of secret is 32
+ * bytes, and |initial_secret| must point to the buffer which has
+ * enough capacity.
+ *
+ * |client_dcid| is the destination connection ID in first Initial
+ * packet of client.
+ *
+ * If |rx_key| is not NULL, the derived packet protection key for
+ * decryption is written to the buffer pointed by |rx_key|. If
+ * |rx_iv| is not NULL, the derived packet protection IV for
+ * decryption is written to the buffer pointed by |rx_iv|. If |rx_hp|
+ * is not NULL, the derived header protection key for decryption is
+ * written to the buffer pointed by |rx_hp|.
+ *
+ * If |tx_key| is not NULL, the derived packet protection key for
+ * encryption is written to the buffer pointed by |tx_key|. If
+ * |tx_iv| is not NULL, the derived packet protection IV for
+ * encryption is written to the buffer pointed by |tx_iv|. If |tx_hp|
+ * is not NULL, the derived header protection key for encryption is
+ * written to the buffer pointed by |tx_hp|.
+ *
+ * The length of packet protection key and header protection key is 16
+ * bytes long. The length of packet protection IV is 12 bytes long.
+ *
+ * This function calls `ngtcp2_conn_set_initial_crypto_ctx` to set
+ * initial AEAD and message digest algorithm. After the successful
+ * call of this function, application can use
+ * `ngtcp2_conn_get_initial_crypto_ctx` to get the object.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+int ngtcp2_crypto_derive_and_install_initial_key(
+ ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret,
+ uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv, uint8_t *rx_hp,
+ uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp, uint32_t version,
+ const ngtcp2_cid *client_dcid);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_derive_and_install_vneg_initial_key` derives initial
+ * keying materials and installs keys to |conn|. This function is
+ * dedicated to install keys for |version| which is negotiated, or
+ * being negotiated.
+ *
+ * If |rx_secret| is not NULL, the secret for decryption is written to
+ * the buffer pointed by |rx_secret|. The length of secret is 32
+ * bytes, and |rx_secret| must point to the buffer which has enough
+ * capacity.
+ *
+ * If |tx_secret| is not NULL, the secret for encryption is written to
+ * the buffer pointed by |tx_secret|. The length of secret is 32
+ * bytes, and |tx_secret| must point to the buffer which has enough
+ * capacity.
+ *
+ * If |initial_secret| is not NULL, the initial secret is written to
+ * the buffer pointed by |initial_secret|. The length of secret is 32
+ * bytes, and |initial_secret| must point to the buffer which has
+ * enough capacity.
+ *
+ * |client_dcid| is the destination connection ID in first Initial
+ * packet of client.
+ *
+ * If |rx_key| is not NULL, the derived packet protection key for
+ * decryption is written to the buffer pointed by |rx_key|. If
+ * |rx_iv| is not NULL, the derived packet protection IV for
+ * decryption is written to the buffer pointed by |rx_iv|. If |rx_hp|
+ * is not NULL, the derived header protection key for decryption is
+ * written to the buffer pointed by |rx_hp|.
+ *
+ * If |tx_key| is not NULL, the derived packet protection key for
+ * encryption is written to the buffer pointed by |tx_key|. If
+ * |tx_iv| is not NULL, the derived packet protection IV for
+ * encryption is written to the buffer pointed by |tx_iv|. If |tx_hp|
+ * is not NULL, the derived header protection key for encryption is
+ * written to the buffer pointed by |tx_hp|.
+ *
+ * The length of packet protection key and header protection key is 16
+ * bytes long. The length of packet protection IV is 12 bytes long.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+int ngtcp2_crypto_derive_and_install_vneg_initial_key(
+ ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret,
+ uint8_t *initial_secret, uint8_t *rx_key, uint8_t *rx_iv, uint8_t *rx_hp,
+ uint8_t *tx_key, uint8_t *tx_iv, uint8_t *tx_hp, uint32_t version,
+ const ngtcp2_cid *client_dcid);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_cipher_ctx_encrypt_init` initializes |cipher_ctx|
+ * with new cipher context object for encryption which is constructed
+ * to use |key| as encryption key. |cipher| specifies cipher to use.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+int ngtcp2_crypto_cipher_ctx_encrypt_init(ngtcp2_crypto_cipher_ctx *cipher_ctx,
+ const ngtcp2_crypto_cipher *cipher,
+ const uint8_t *key);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_cipher_ctx_free` frees up resources used by
+ * |cipher_ctx|. This function does not free the memory pointed by
+ * |cipher_ctx| itself.
+ */
+void ngtcp2_crypto_cipher_ctx_free(ngtcp2_crypto_cipher_ctx *cipher_ctx);
+
+/*
+ * `ngtcp2_crypto_md_sha256` initializes |md| with SHA256 message
+ * digest algorithm and returns |md|.
+ */
+ngtcp2_crypto_md *ngtcp2_crypto_md_sha256(ngtcp2_crypto_md *md);
+
+ngtcp2_crypto_aead *ngtcp2_crypto_aead_aes_128_gcm(ngtcp2_crypto_aead *aead);
+
+/*
+ * `ngtcp2_crypto_random` writes cryptographically-secure random
+ * |datalen| bytes into the buffer pointed by |data|.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+int ngtcp2_crypto_random(uint8_t *data, size_t datalen);
+
+#endif /* NGTCP2_SHARED_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.c
new file mode 100644
index 0000000..3f1f9b3
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.c
@@ -0,0 +1,335 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_acktr.h"
+
+#include <assert.h>
+
+#include "ngtcp2_macro.h"
+
+static void acktr_entry_init(ngtcp2_acktr_entry *ent, int64_t pkt_num,
+ ngtcp2_tstamp tstamp) {
+ ent->pkt_num = pkt_num;
+ ent->len = 1;
+ ent->tstamp = tstamp;
+}
+
+int ngtcp2_acktr_entry_objalloc_new(ngtcp2_acktr_entry **ent, int64_t pkt_num,
+ ngtcp2_tstamp tstamp,
+ ngtcp2_objalloc *objalloc) {
+ *ent = ngtcp2_objalloc_acktr_entry_get(objalloc);
+ if (*ent == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ acktr_entry_init(*ent, pkt_num, tstamp);
+
+ return 0;
+}
+
+void ngtcp2_acktr_entry_objalloc_del(ngtcp2_acktr_entry *ent,
+ ngtcp2_objalloc *objalloc) {
+ ngtcp2_objalloc_acktr_entry_release(objalloc, ent);
+}
+
+static int greater(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs) {
+ return *(int64_t *)lhs > *(int64_t *)rhs;
+}
+
+int ngtcp2_acktr_init(ngtcp2_acktr *acktr, ngtcp2_log *log,
+ const ngtcp2_mem *mem) {
+ int rv;
+
+ ngtcp2_objalloc_acktr_entry_init(&acktr->objalloc, 32, mem);
+
+ rv = ngtcp2_ringbuf_init(&acktr->acks, 32, sizeof(ngtcp2_acktr_ack_entry),
+ mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ ngtcp2_ksl_init(&acktr->ents, greater, sizeof(int64_t), mem);
+
+ acktr->log = log;
+ acktr->mem = mem;
+ acktr->flags = NGTCP2_ACKTR_FLAG_NONE;
+ acktr->first_unacked_ts = UINT64_MAX;
+ acktr->rx_npkt = 0;
+
+ return 0;
+}
+
+void ngtcp2_acktr_free(ngtcp2_acktr *acktr) {
+#ifdef NOMEMPOOL
+ ngtcp2_ksl_it it;
+#endif /* NOMEMPOOL */
+
+ if (acktr == NULL) {
+ return;
+ }
+
+#ifdef NOMEMPOOL
+ for (it = ngtcp2_ksl_begin(&acktr->ents); !ngtcp2_ksl_it_end(&it);
+ ngtcp2_ksl_it_next(&it)) {
+ ngtcp2_acktr_entry_objalloc_del(ngtcp2_ksl_it_get(&it), &acktr->objalloc);
+ }
+#endif /* NOMEMPOOL */
+
+ ngtcp2_ksl_free(&acktr->ents);
+
+ ngtcp2_ringbuf_free(&acktr->acks);
+
+ ngtcp2_objalloc_free(&acktr->objalloc);
+}
+
+int ngtcp2_acktr_add(ngtcp2_acktr *acktr, int64_t pkt_num, int active_ack,
+ ngtcp2_tstamp ts) {
+ ngtcp2_ksl_it it, prev_it;
+ ngtcp2_acktr_entry *ent, *prev_ent, *delent;
+ int rv;
+ int added = 0;
+
+ if (ngtcp2_ksl_len(&acktr->ents)) {
+ it = ngtcp2_ksl_lower_bound(&acktr->ents, &pkt_num);
+ if (ngtcp2_ksl_it_end(&it)) {
+ ngtcp2_ksl_it_prev(&it);
+ ent = ngtcp2_ksl_it_get(&it);
+
+ assert(ent->pkt_num >= pkt_num + (int64_t)ent->len);
+
+ if (ent->pkt_num == pkt_num + (int64_t)ent->len) {
+ ++ent->len;
+ added = 1;
+ }
+ } else {
+ ent = ngtcp2_ksl_it_get(&it);
+
+ assert(ent->pkt_num != pkt_num);
+
+ if (ngtcp2_ksl_it_begin(&it)) {
+ if (ent->pkt_num + 1 == pkt_num) {
+ ngtcp2_ksl_update_key(&acktr->ents, &ent->pkt_num, &pkt_num);
+ ent->pkt_num = pkt_num;
+ ent->tstamp = ts;
+ ++ent->len;
+ added = 1;
+ }
+ } else {
+ prev_it = it;
+ ngtcp2_ksl_it_prev(&prev_it);
+ prev_ent = ngtcp2_ksl_it_get(&prev_it);
+
+ assert(prev_ent->pkt_num >= pkt_num + (int64_t)prev_ent->len);
+
+ if (ent->pkt_num + 1 == pkt_num) {
+ if (prev_ent->pkt_num == pkt_num + (int64_t)prev_ent->len) {
+ prev_ent->len += ent->len + 1;
+ ngtcp2_ksl_remove_hint(&acktr->ents, NULL, &it, &ent->pkt_num);
+ ngtcp2_acktr_entry_objalloc_del(ent, &acktr->objalloc);
+ added = 1;
+ } else {
+ ngtcp2_ksl_update_key(&acktr->ents, &ent->pkt_num, &pkt_num);
+ ent->pkt_num = pkt_num;
+ ent->tstamp = ts;
+ ++ent->len;
+ added = 1;
+ }
+ } else if (prev_ent->pkt_num == pkt_num + (int64_t)prev_ent->len) {
+ ++prev_ent->len;
+ added = 1;
+ }
+ }
+ }
+ }
+
+ if (!added) {
+ rv = ngtcp2_acktr_entry_objalloc_new(&ent, pkt_num, ts, &acktr->objalloc);
+ if (rv != 0) {
+ return rv;
+ }
+ rv = ngtcp2_ksl_insert(&acktr->ents, NULL, &ent->pkt_num, ent);
+ if (rv != 0) {
+ ngtcp2_acktr_entry_objalloc_del(ent, &acktr->objalloc);
+ return rv;
+ }
+ }
+
+ if (active_ack) {
+ acktr->flags |= NGTCP2_ACKTR_FLAG_ACTIVE_ACK;
+ if (acktr->first_unacked_ts == UINT64_MAX) {
+ acktr->first_unacked_ts = ts;
+ }
+ }
+
+ if (ngtcp2_ksl_len(&acktr->ents) > NGTCP2_ACKTR_MAX_ENT) {
+ it = ngtcp2_ksl_end(&acktr->ents);
+ ngtcp2_ksl_it_prev(&it);
+ delent = ngtcp2_ksl_it_get(&it);
+ ngtcp2_ksl_remove_hint(&acktr->ents, NULL, &it, &delent->pkt_num);
+ ngtcp2_acktr_entry_objalloc_del(delent, &acktr->objalloc);
+ }
+
+ return 0;
+}
+
+void ngtcp2_acktr_forget(ngtcp2_acktr *acktr, ngtcp2_acktr_entry *ent) {
+ ngtcp2_ksl_it it;
+
+ it = ngtcp2_ksl_lower_bound(&acktr->ents, &ent->pkt_num);
+ assert(*(int64_t *)ngtcp2_ksl_it_key(&it) == (int64_t)ent->pkt_num);
+
+ for (; !ngtcp2_ksl_it_end(&it);) {
+ ent = ngtcp2_ksl_it_get(&it);
+ ngtcp2_ksl_remove_hint(&acktr->ents, &it, &it, &ent->pkt_num);
+ ngtcp2_acktr_entry_objalloc_del(ent, &acktr->objalloc);
+ }
+}
+
+ngtcp2_ksl_it ngtcp2_acktr_get(ngtcp2_acktr *acktr) {
+ return ngtcp2_ksl_begin(&acktr->ents);
+}
+
+int ngtcp2_acktr_empty(ngtcp2_acktr *acktr) {
+ ngtcp2_ksl_it it = ngtcp2_ksl_begin(&acktr->ents);
+ return ngtcp2_ksl_it_end(&it);
+}
+
+ngtcp2_acktr_ack_entry *ngtcp2_acktr_add_ack(ngtcp2_acktr *acktr,
+ int64_t pkt_num,
+ int64_t largest_ack) {
+ ngtcp2_acktr_ack_entry *ent = ngtcp2_ringbuf_push_front(&acktr->acks);
+
+ ent->largest_ack = largest_ack;
+ ent->pkt_num = pkt_num;
+
+ return ent;
+}
+
+/*
+ * acktr_remove removes |ent| from |acktr|. |it| must point to the
+ * node whose key identifies |ent|. The iterator which points to the
+ * entry next to |ent| is assigned to |it|.
+ */
+static void acktr_remove(ngtcp2_acktr *acktr, ngtcp2_ksl_it *it,
+ ngtcp2_acktr_entry *ent) {
+ ngtcp2_ksl_remove_hint(&acktr->ents, it, it, &ent->pkt_num);
+ ngtcp2_acktr_entry_objalloc_del(ent, &acktr->objalloc);
+}
+
+static void acktr_on_ack(ngtcp2_acktr *acktr, ngtcp2_ringbuf *rb,
+ size_t ack_ent_offset) {
+ ngtcp2_acktr_ack_entry *ack_ent;
+ ngtcp2_acktr_entry *ent;
+ ngtcp2_ksl_it it;
+
+ assert(ngtcp2_ringbuf_len(rb));
+
+ ack_ent = ngtcp2_ringbuf_get(rb, ack_ent_offset);
+
+ /* Assume that ngtcp2_pkt_validate_ack(fr) returns 0 */
+ it = ngtcp2_ksl_lower_bound(&acktr->ents, &ack_ent->largest_ack);
+ for (; !ngtcp2_ksl_it_end(&it);) {
+ ent = ngtcp2_ksl_it_get(&it);
+ acktr_remove(acktr, &it, ent);
+ }
+
+ if (ngtcp2_ksl_len(&acktr->ents)) {
+ assert(ngtcp2_ksl_it_end(&it));
+
+ ngtcp2_ksl_it_prev(&it);
+ ent = ngtcp2_ksl_it_get(&it);
+ if (ent->pkt_num > ack_ent->largest_ack &&
+ ack_ent->largest_ack >= ent->pkt_num - (int64_t)(ent->len - 1)) {
+ ent->len = (size_t)(ent->pkt_num - ack_ent->largest_ack);
+ }
+ }
+
+ ngtcp2_ringbuf_resize(rb, ack_ent_offset);
+}
+
+void ngtcp2_acktr_recv_ack(ngtcp2_acktr *acktr, const ngtcp2_ack *fr) {
+ ngtcp2_acktr_ack_entry *ent;
+ int64_t largest_ack = fr->largest_ack, min_ack;
+ size_t i, j;
+ ngtcp2_ringbuf *rb = &acktr->acks;
+ size_t nacks = ngtcp2_ringbuf_len(rb);
+
+ /* Assume that ngtcp2_pkt_validate_ack(fr) returns 0 */
+ for (j = 0; j < nacks; ++j) {
+ ent = ngtcp2_ringbuf_get(rb, j);
+ if (largest_ack >= ent->pkt_num) {
+ break;
+ }
+ }
+ if (j == nacks) {
+ return;
+ }
+
+ min_ack = largest_ack - (int64_t)fr->first_ack_range;
+
+ if (min_ack <= ent->pkt_num && ent->pkt_num <= largest_ack) {
+ acktr_on_ack(acktr, rb, j);
+ return;
+ }
+
+ for (i = 0; i < fr->rangecnt && j < nacks; ++i) {
+ largest_ack = min_ack - (int64_t)fr->ranges[i].gap - 2;
+ min_ack = largest_ack - (int64_t)fr->ranges[i].len;
+
+ for (;;) {
+ if (ent->pkt_num > largest_ack) {
+ ++j;
+ if (j == nacks) {
+ return;
+ }
+ ent = ngtcp2_ringbuf_get(rb, j);
+ continue;
+ }
+ if (ent->pkt_num < min_ack) {
+ break;
+ }
+ acktr_on_ack(acktr, rb, j);
+ return;
+ }
+ }
+}
+
+void ngtcp2_acktr_commit_ack(ngtcp2_acktr *acktr) {
+ acktr->flags &= (uint16_t) ~(NGTCP2_ACKTR_FLAG_ACTIVE_ACK |
+ NGTCP2_ACKTR_FLAG_IMMEDIATE_ACK |
+ NGTCP2_ACKTR_FLAG_CANCEL_TIMER);
+ acktr->first_unacked_ts = UINT64_MAX;
+ acktr->rx_npkt = 0;
+}
+
+int ngtcp2_acktr_require_active_ack(ngtcp2_acktr *acktr,
+ ngtcp2_duration max_ack_delay,
+ ngtcp2_tstamp ts) {
+ return acktr->first_unacked_ts != UINT64_MAX &&
+ acktr->first_unacked_ts + max_ack_delay <= ts;
+}
+
+void ngtcp2_acktr_immediate_ack(ngtcp2_acktr *acktr) {
+ acktr->flags |= NGTCP2_ACKTR_FLAG_IMMEDIATE_ACK;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.h
new file mode 100644
index 0000000..70a3c71
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_acktr.h
@@ -0,0 +1,221 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_ACKTR_H
+#define NGTCP2_ACKTR_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_mem.h"
+#include "ngtcp2_ringbuf.h"
+#include "ngtcp2_ksl.h"
+#include "ngtcp2_pkt.h"
+#include "ngtcp2_objalloc.h"
+
+/* NGTCP2_ACKTR_MAX_ENT is the maximum number of ngtcp2_acktr_entry
+ which ngtcp2_acktr stores. */
+#define NGTCP2_ACKTR_MAX_ENT 1024
+
+typedef struct ngtcp2_log ngtcp2_log;
+
+/*
+ * ngtcp2_acktr_entry is a range of packets which need to be acked.
+ */
+typedef struct ngtcp2_acktr_entry {
+ union {
+ struct {
+ /* pkt_num is the largest packet number to acknowledge in this
+ range. */
+ int64_t pkt_num;
+ /* len is the consecutive packets started from pkt_num which
+ includes pkt_num itself counting in decreasing order. So pkt_num
+ = 987 and len = 2, this entry includes packet 987 and 986. */
+ size_t len;
+ /* tstamp is the timestamp when a packet denoted by pkt_num is
+ received. */
+ ngtcp2_tstamp tstamp;
+ };
+
+ ngtcp2_opl_entry oplent;
+ };
+} ngtcp2_acktr_entry;
+
+ngtcp2_objalloc_def(acktr_entry, ngtcp2_acktr_entry, oplent);
+
+/*
+ * ngtcp2_acktr_entry_objalloc_new allocates memory for ent, and
+ * initializes it with the given parameters. The pointer to the
+ * allocated object is stored to |*ent|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_acktr_entry_objalloc_new(ngtcp2_acktr_entry **ent, int64_t pkt_num,
+ ngtcp2_tstamp tstamp,
+ ngtcp2_objalloc *objalloc);
+
+/*
+ * ngtcp2_acktr_entry_objalloc_del deallocates memory allocated for
+ * |ent|.
+ */
+void ngtcp2_acktr_entry_objalloc_del(ngtcp2_acktr_entry *ent,
+ ngtcp2_objalloc *objalloc);
+
+typedef struct ngtcp2_acktr_ack_entry {
+ /* largest_ack is the largest packet number in outgoing ACK frame */
+ int64_t largest_ack;
+ /* pkt_num is the packet number that ACK frame is included. */
+ int64_t pkt_num;
+} ngtcp2_acktr_ack_entry;
+
+/* NGTCP2_ACKTR_FLAG_NONE indicates that no flag set. */
+#define NGTCP2_ACKTR_FLAG_NONE 0x00u
+/* NGTCP2_ACKTR_FLAG_IMMEDIATE_ACK indicates that immediate
+ acknowledgement is required. */
+#define NGTCP2_ACKTR_FLAG_IMMEDIATE_ACK 0x01u
+/* NGTCP2_ACKTR_FLAG_ACTIVE_ACK indicates that there are pending
+ protected packet to be acknowledged. */
+#define NGTCP2_ACKTR_FLAG_ACTIVE_ACK 0x02u
+/* NGTCP2_ACKTR_FLAG_CANCEL_TIMER is set when ACK delay timer is
+ expired and canceled. */
+#define NGTCP2_ACKTR_FLAG_CANCEL_TIMER 0x0100u
+
+/*
+ * ngtcp2_acktr tracks received packets which we have to send ack.
+ */
+typedef struct ngtcp2_acktr {
+ ngtcp2_objalloc objalloc;
+ ngtcp2_ringbuf acks;
+ /* ents includes ngtcp2_acktr_entry sorted by decreasing order of
+ packet number. */
+ ngtcp2_ksl ents;
+ ngtcp2_log *log;
+ const ngtcp2_mem *mem;
+ /* flags is bitwise OR of zero, or more of NGTCP2_ACKTR_FLAG_*. */
+ uint16_t flags;
+ /* first_unacked_ts is timestamp when ngtcp2_acktr_entry is added
+ first time after the last outgoing ACK frame. */
+ ngtcp2_tstamp first_unacked_ts;
+ /* rx_npkt is the number of ACK eliciting packets received without
+ sending ACK. */
+ size_t rx_npkt;
+} ngtcp2_acktr;
+
+/*
+ * ngtcp2_acktr_init initializes |acktr|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_acktr_init(ngtcp2_acktr *acktr, ngtcp2_log *log,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_acktr_free frees resources allocated for |acktr|. It frees
+ * any ngtcp2_acktr_entry added to |acktr|.
+ */
+void ngtcp2_acktr_free(ngtcp2_acktr *acktr);
+
+/*
+ * ngtcp2_acktr_add adds packet number |pkt_num| to |acktr|.
+ * |active_ack| is nonzero if |pkt_num| is retransmittable packet.
+ *
+ * This function assumes that |acktr| does not contain |pkt_num|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * OUt of memory.
+ */
+int ngtcp2_acktr_add(ngtcp2_acktr *acktr, int64_t pkt_num, int active_ack,
+ ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_acktr_forget removes all entries which have the packet
+ * number that is equal to or less than ent->pkt_num. This function
+ * assumes that |acktr| includes |ent|.
+ */
+void ngtcp2_acktr_forget(ngtcp2_acktr *acktr, ngtcp2_acktr_entry *ent);
+
+/*
+ * ngtcp2_acktr_get returns the pointer to pointer to the entry which
+ * has the largest packet number to be acked. If there is no entry,
+ * returned value satisfies ngtcp2_ksl_it_end(&it) != 0.
+ */
+ngtcp2_ksl_it ngtcp2_acktr_get(ngtcp2_acktr *acktr);
+
+/*
+ * ngtcp2_acktr_empty returns nonzero if it has no packet to
+ * acknowledge.
+ */
+int ngtcp2_acktr_empty(ngtcp2_acktr *acktr);
+
+/*
+ * ngtcp2_acktr_add_ack records outgoing ACK frame whose largest
+ * acknowledged packet number is |largest_ack|. |pkt_num| is the
+ * packet number of a packet in which ACK frame is included. This
+ * function returns a pointer to the object it adds.
+ */
+ngtcp2_acktr_ack_entry *
+ngtcp2_acktr_add_ack(ngtcp2_acktr *acktr, int64_t pkt_num, int64_t largest_ack);
+
+/*
+ * ngtcp2_acktr_recv_ack processes the incoming ACK frame |fr|.
+ * |pkt_num| is a packet number which includes |fr|. If we receive
+ * ACK which acknowledges the ACKs added by ngtcp2_acktr_add_ack,
+ * ngtcp2_acktr_entry which the outgoing ACK acknowledges is removed.
+ */
+void ngtcp2_acktr_recv_ack(ngtcp2_acktr *acktr, const ngtcp2_ack *fr);
+
+/*
+ * ngtcp2_acktr_commit_ack tells |acktr| that ACK frame is generated.
+ */
+void ngtcp2_acktr_commit_ack(ngtcp2_acktr *acktr);
+
+/*
+ * ngtcp2_acktr_require_active_ack returns nonzero if ACK frame should
+ * be generated actively.
+ */
+int ngtcp2_acktr_require_active_ack(ngtcp2_acktr *acktr,
+ ngtcp2_duration max_ack_delay,
+ ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_acktr_immediate_ack tells |acktr| that immediate
+ * acknowledgement is required.
+ */
+void ngtcp2_acktr_immediate_ack(ngtcp2_acktr *acktr);
+
+#endif /* NGTCP2_ACKTR_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.c
new file mode 100644
index 0000000..f389abe
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.c
@@ -0,0 +1,117 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_addr.h"
+
+#include <string.h>
+#include <assert.h>
+
+#include "ngtcp2_unreachable.h"
+
+ngtcp2_addr *ngtcp2_addr_init(ngtcp2_addr *dest, const ngtcp2_sockaddr *addr,
+ ngtcp2_socklen addrlen) {
+ dest->addrlen = addrlen;
+ dest->addr = (ngtcp2_sockaddr *)addr;
+ return dest;
+}
+
+void ngtcp2_addr_copy(ngtcp2_addr *dest, const ngtcp2_addr *src) {
+ dest->addrlen = src->addrlen;
+ if (src->addrlen) {
+ memcpy(dest->addr, src->addr, (size_t)src->addrlen);
+ }
+}
+
+void ngtcp2_addr_copy_byte(ngtcp2_addr *dest, const ngtcp2_sockaddr *addr,
+ ngtcp2_socklen addrlen) {
+ dest->addrlen = addrlen;
+ if (addrlen) {
+ memcpy(dest->addr, addr, (size_t)addrlen);
+ }
+}
+
+static int sockaddr_eq(const ngtcp2_sockaddr *a, const ngtcp2_sockaddr *b) {
+ assert(a->sa_family == b->sa_family);
+
+ switch (a->sa_family) {
+ case NGTCP2_AF_INET: {
+ const ngtcp2_sockaddr_in *ai = (const ngtcp2_sockaddr_in *)(void *)a,
+ *bi = (const ngtcp2_sockaddr_in *)(void *)b;
+ return ai->sin_port == bi->sin_port &&
+ memcmp(&ai->sin_addr, &bi->sin_addr, sizeof(ai->sin_addr)) == 0;
+ }
+ case NGTCP2_AF_INET6: {
+ const ngtcp2_sockaddr_in6 *ai = (const ngtcp2_sockaddr_in6 *)(void *)a,
+ *bi = (const ngtcp2_sockaddr_in6 *)(void *)b;
+ return ai->sin6_port == bi->sin6_port &&
+ memcmp(&ai->sin6_addr, &bi->sin6_addr, sizeof(ai->sin6_addr)) == 0;
+ }
+ default:
+ ngtcp2_unreachable();
+ }
+}
+
+int ngtcp2_addr_eq(const ngtcp2_addr *a, const ngtcp2_addr *b) {
+ return a->addr->sa_family == b->addr->sa_family &&
+ sockaddr_eq(a->addr, b->addr);
+}
+
+uint32_t ngtcp2_addr_compare(const ngtcp2_addr *aa, const ngtcp2_addr *bb) {
+ uint32_t flags = NGTCP2_ADDR_COMPARE_FLAG_NONE;
+ const ngtcp2_sockaddr *a = aa->addr;
+ const ngtcp2_sockaddr *b = bb->addr;
+
+ if (a->sa_family != b->sa_family) {
+ return NGTCP2_ADDR_COMPARE_FLAG_FAMILY;
+ }
+
+ switch (a->sa_family) {
+ case NGTCP2_AF_INET: {
+ const ngtcp2_sockaddr_in *ai = (const ngtcp2_sockaddr_in *)(void *)a,
+ *bi = (const ngtcp2_sockaddr_in *)(void *)b;
+ if (memcmp(&ai->sin_addr, &bi->sin_addr, sizeof(ai->sin_addr))) {
+ flags |= NGTCP2_ADDR_COMPARE_FLAG_ADDR;
+ }
+ if (ai->sin_port != bi->sin_port) {
+ flags |= NGTCP2_ADDR_COMPARE_FLAG_PORT;
+ }
+ return flags;
+ }
+ case NGTCP2_AF_INET6: {
+ const ngtcp2_sockaddr_in6 *ai = (const ngtcp2_sockaddr_in6 *)(void *)a,
+ *bi = (const ngtcp2_sockaddr_in6 *)(void *)b;
+ if (memcmp(&ai->sin6_addr, &bi->sin6_addr, sizeof(ai->sin6_addr))) {
+ flags |= NGTCP2_ADDR_COMPARE_FLAG_ADDR;
+ }
+ if (ai->sin6_port != bi->sin6_port) {
+ flags |= NGTCP2_ADDR_COMPARE_FLAG_PORT;
+ }
+ return flags;
+ }
+ default:
+ ngtcp2_unreachable();
+ }
+}
+
+int ngtcp2_addr_empty(const ngtcp2_addr *addr) { return addr->addrlen == 0; }
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.h
new file mode 100644
index 0000000..f1d7f7b
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_addr.h
@@ -0,0 +1,69 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_ADDR_H
+#define NGTCP2_ADDR_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+/*
+ * ngtcp2_addr_copy copies |src| to |dest|. This function assumes
+ * that dest->addr points to a buffer which have sufficient size to
+ * store the copy.
+ */
+void ngtcp2_addr_copy(ngtcp2_addr *dest, const ngtcp2_addr *src);
+
+/*
+ * ngtcp2_addr_eq returns nonzero if |a| equals |b|.
+ */
+int ngtcp2_addr_eq(const ngtcp2_addr *a, const ngtcp2_addr *b);
+
+/* NGTCP2_ADDR_COMPARE_FLAG_NONE indicates that no flag set. */
+#define NGTCP2_ADDR_COMPARE_FLAG_NONE 0x0u
+/* NGTCP2_ADDR_COMPARE_FLAG_ADDR indicates IP addresses do not
+ match. */
+#define NGTCP2_ADDR_COMPARE_FLAG_ADDR 0x1u
+/* NGTCP2_ADDR_COMPARE_FLAG_PORT indicates ports do not match. */
+#define NGTCP2_ADDR_COMPARE_FLAG_PORT 0x2u
+/* NGTCP2_ADDR_COMPARE_FLAG_FAMILY indicates address families do not
+ match. */
+#define NGTCP2_ADDR_COMPARE_FLAG_FAMILY 0x4u
+
+/*
+ * ngtcp2_addr_compare compares address and port between |a| and |b|,
+ * and returns zero or more of NGTCP2_ADDR_COMPARE_FLAG_*.
+ */
+uint32_t ngtcp2_addr_compare(const ngtcp2_addr *a, const ngtcp2_addr *b);
+
+/*
+ * ngtcp2_addr_empty returns nonzero if |addr| has zero length
+ * address.
+ */
+int ngtcp2_addr_empty(const ngtcp2_addr *addr);
+
+#endif /* NGTCP2_ADDR_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.c
new file mode 100644
index 0000000..5cc39ee
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.c
@@ -0,0 +1,90 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2022 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_balloc.h"
+
+#include <assert.h>
+
+#include "ngtcp2_mem.h"
+
+void ngtcp2_balloc_init(ngtcp2_balloc *balloc, size_t blklen,
+ const ngtcp2_mem *mem) {
+ assert((blklen & 0xfu) == 0);
+
+ balloc->mem = mem;
+ balloc->blklen = blklen;
+ balloc->head = NULL;
+ ngtcp2_buf_init(&balloc->buf, (void *)"", 0);
+}
+
+void ngtcp2_balloc_free(ngtcp2_balloc *balloc) {
+ if (balloc == NULL) {
+ return;
+ }
+
+ ngtcp2_balloc_clear(balloc);
+}
+
+void ngtcp2_balloc_clear(ngtcp2_balloc *balloc) {
+ ngtcp2_memblock_hd *p, *next;
+
+ for (p = balloc->head; p; p = next) {
+ next = p->next;
+ ngtcp2_mem_free(balloc->mem, p);
+ }
+
+ balloc->head = NULL;
+ ngtcp2_buf_init(&balloc->buf, (void *)"", 0);
+}
+
+int ngtcp2_balloc_get(ngtcp2_balloc *balloc, void **pbuf, size_t n) {
+ uint8_t *p;
+ ngtcp2_memblock_hd *hd;
+
+ assert(n <= balloc->blklen);
+
+ if (ngtcp2_buf_left(&balloc->buf) < n) {
+ p = ngtcp2_mem_malloc(balloc->mem,
+ sizeof(ngtcp2_memblock_hd) + 0x10u + balloc->blklen);
+ if (p == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ hd = (ngtcp2_memblock_hd *)(void *)p;
+ hd->next = balloc->head;
+ balloc->head = hd;
+ ngtcp2_buf_init(
+ &balloc->buf,
+ (uint8_t *)(((uintptr_t)p + sizeof(ngtcp2_memblock_hd) + 0xfu) &
+ ~(uintptr_t)0xfu),
+ balloc->blklen);
+ }
+
+ assert(((uintptr_t)balloc->buf.last & 0xfu) == 0);
+
+ *pbuf = balloc->buf.last;
+ balloc->buf.last += (n + 0xfu) & ~(uintptr_t)0xfu;
+
+ return 0;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.h
new file mode 100644
index 0000000..1fb1632
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_balloc.h
@@ -0,0 +1,91 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2022 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_BALLOC_H
+#define NGTCP2_BALLOC_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_buf.h"
+
+typedef struct ngtcp2_memblock_hd ngtcp2_memblock_hd;
+
+/*
+ * ngtcp2_memblock_hd is the header of memory block.
+ */
+struct ngtcp2_memblock_hd {
+ ngtcp2_memblock_hd *next;
+};
+
+/*
+ * ngtcp2_balloc is a custom memory allocator. It allocates |blklen|
+ * bytes of memory at once on demand, and returns its slice when the
+ * allocation is requested.
+ */
+typedef struct ngtcp2_balloc {
+ /* mem is the underlying memory allocator. */
+ const ngtcp2_mem *mem;
+ /* blklen is the size of memory block. */
+ size_t blklen;
+ /* head points to the list of memory block allocated so far. */
+ ngtcp2_memblock_hd *head;
+ /* buf wraps the current memory block for allocation requests. */
+ ngtcp2_buf buf;
+} ngtcp2_balloc;
+
+/*
+ * ngtcp2_balloc_init initializes |balloc| with |blklen| which is the
+ * size of memory block.
+ */
+void ngtcp2_balloc_init(ngtcp2_balloc *balloc, size_t blklen,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_balloc_free releases all allocated memory blocks.
+ */
+void ngtcp2_balloc_free(ngtcp2_balloc *balloc);
+
+/*
+ * ngtcp2_balloc_get allocates |n| bytes of memory and assigns its
+ * pointer to |*pbuf|.
+ *
+ * It returns 0 if it succeeds, or one of the following negative error
+ * codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_balloc_get(ngtcp2_balloc *balloc, void **pbuf, size_t n);
+
+/*
+ * ngtcp2_balloc_clear releases all allocated memory blocks and
+ * initializes its state.
+ */
+void ngtcp2_balloc_clear(ngtcp2_balloc *balloc);
+
+#endif /* NGTCP2_BALLOC_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.c
new file mode 100644
index 0000000..ed26d3e
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.c
@@ -0,0 +1,692 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2021 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_bbr.h"
+
+#include <assert.h>
+
+#include "ngtcp2_log.h"
+#include "ngtcp2_macro.h"
+#include "ngtcp2_mem.h"
+#include "ngtcp2_rcvry.h"
+#include "ngtcp2_rst.h"
+
+static const double pacing_gain_cycle[] = {1.25, 0.75, 1, 1, 1, 1, 1, 1};
+
+#define NGTCP2_BBR_GAIN_CYCLELEN ngtcp2_arraylen(pacing_gain_cycle)
+
+#define NGTCP2_BBR_HIGH_GAIN 2.89
+#define NGTCP2_BBR_PROBE_RTT_DURATION (200 * NGTCP2_MILLISECONDS)
+#define NGTCP2_RTPROP_FILTERLEN (10 * NGTCP2_SECONDS)
+#define NGTCP2_BBR_BTL_BW_FILTERLEN 10
+
+static void bbr_update_on_ack(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts);
+static void bbr_update_model_and_state(ngtcp2_bbr_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack,
+ ngtcp2_tstamp ts);
+static void bbr_update_control_parameters(ngtcp2_bbr_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack);
+static void bbr_on_transmit(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat);
+static void bbr_init_round_counting(ngtcp2_bbr_cc *cc);
+static void bbr_update_round(ngtcp2_bbr_cc *cc, const ngtcp2_cc_ack *ack);
+static void bbr_update_btl_bw(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack);
+static void bbr_update_rtprop(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+static void bbr_init_pacing_rate(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat);
+static void bbr_set_pacing_rate_with_gain(ngtcp2_bbr_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ double pacing_gain);
+static void bbr_set_pacing_rate(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat);
+static void bbr_set_send_quantum(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat);
+static void bbr_update_target_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat);
+static void bbr_modulate_cwnd_for_recovery(ngtcp2_bbr_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack);
+static void bbr_save_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat);
+static void bbr_restore_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat);
+static void bbr_modulate_cwnd_for_probe_rtt(ngtcp2_bbr_cc *cc,
+ ngtcp2_conn_stat *cstat);
+static void bbr_set_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack);
+static void bbr_init(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp initial_ts);
+static void bbr_enter_startup(ngtcp2_bbr_cc *cc);
+static void bbr_init_full_pipe(ngtcp2_bbr_cc *cc);
+static void bbr_check_full_pipe(ngtcp2_bbr_cc *cc);
+static void bbr_enter_drain(ngtcp2_bbr_cc *cc);
+static void bbr_check_drain(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+static void bbr_enter_probe_bw(ngtcp2_bbr_cc *cc, ngtcp2_tstamp ts);
+static void bbr_check_cycle_phase(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts);
+static void bbr_advance_cycle_phase(ngtcp2_bbr_cc *cc, ngtcp2_tstamp ts);
+static int bbr_is_next_cycle_phase(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts);
+static void bbr_handle_restart_from_idle(ngtcp2_bbr_cc *cc,
+ ngtcp2_conn_stat *cstat);
+static void bbr_check_probe_rtt(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+static void bbr_enter_probe_rtt(ngtcp2_bbr_cc *cc);
+static void bbr_handle_probe_rtt(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+static void bbr_exit_probe_rtt(ngtcp2_bbr_cc *cc, ngtcp2_tstamp ts);
+
+void ngtcp2_bbr_cc_init(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_rst *rst, ngtcp2_tstamp initial_ts,
+ ngtcp2_rand rand, const ngtcp2_rand_ctx *rand_ctx,
+ ngtcp2_log *log) {
+ cc->ccb.log = log;
+ cc->rst = rst;
+ cc->rand = rand;
+ cc->rand_ctx = *rand_ctx;
+ cc->initial_cwnd = cstat->cwnd;
+ bbr_init(cc, cstat, initial_ts);
+}
+
+void ngtcp2_bbr_cc_free(ngtcp2_bbr_cc *cc) { (void)cc; }
+
+int ngtcp2_cc_bbr_cc_init(ngtcp2_cc *cc, ngtcp2_log *log,
+ ngtcp2_conn_stat *cstat, ngtcp2_rst *rst,
+ ngtcp2_tstamp initial_ts, ngtcp2_rand rand,
+ const ngtcp2_rand_ctx *rand_ctx,
+ const ngtcp2_mem *mem) {
+ ngtcp2_bbr_cc *bbr_cc;
+
+ bbr_cc = ngtcp2_mem_calloc(mem, 1, sizeof(ngtcp2_bbr_cc));
+ if (bbr_cc == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ ngtcp2_bbr_cc_init(bbr_cc, cstat, rst, initial_ts, rand, rand_ctx, log);
+
+ cc->ccb = &bbr_cc->ccb;
+ cc->on_pkt_acked = ngtcp2_cc_bbr_cc_on_pkt_acked;
+ cc->congestion_event = ngtcp2_cc_bbr_cc_congestion_event;
+ cc->on_spurious_congestion = ngtcp2_cc_bbr_cc_on_spurious_congestion;
+ cc->on_persistent_congestion = ngtcp2_cc_bbr_cc_on_persistent_congestion;
+ cc->on_ack_recv = ngtcp2_cc_bbr_cc_on_ack_recv;
+ cc->on_pkt_sent = ngtcp2_cc_bbr_cc_on_pkt_sent;
+ cc->new_rtt_sample = ngtcp2_cc_bbr_cc_new_rtt_sample;
+ cc->reset = ngtcp2_cc_bbr_cc_reset;
+ cc->event = ngtcp2_cc_bbr_cc_event;
+
+ return 0;
+}
+
+void ngtcp2_cc_bbr_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem) {
+ ngtcp2_bbr_cc *bbr_cc = ngtcp2_struct_of(cc->ccb, ngtcp2_bbr_cc, ccb);
+
+ ngtcp2_bbr_cc_free(bbr_cc);
+ ngtcp2_mem_free(mem, bbr_cc);
+}
+
+void ngtcp2_cc_bbr_cc_on_pkt_acked(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts) {
+ (void)ccx;
+ (void)cstat;
+ (void)pkt;
+ (void)ts;
+}
+
+static int in_congestion_recovery(const ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp sent_time) {
+ return cstat->congestion_recovery_start_ts != UINT64_MAX &&
+ sent_time <= cstat->congestion_recovery_start_ts;
+}
+
+void ngtcp2_cc_bbr_cc_congestion_event(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp sent_ts,
+ ngtcp2_tstamp ts) {
+ ngtcp2_bbr_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr_cc, ccb);
+
+ if (cc->in_loss_recovery || cc->congestion_recovery_start_ts != UINT64_MAX ||
+ in_congestion_recovery(cstat, sent_ts)) {
+ return;
+ }
+
+ cc->congestion_recovery_start_ts = ts;
+}
+
+void ngtcp2_cc_bbr_cc_on_spurious_congestion(ngtcp2_cc *ccx,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ ngtcp2_bbr_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr_cc, ccb);
+ (void)ts;
+
+ cc->congestion_recovery_start_ts = UINT64_MAX;
+ cstat->congestion_recovery_start_ts = UINT64_MAX;
+
+ if (cc->in_loss_recovery) {
+ cc->in_loss_recovery = 0;
+ cc->packet_conservation = 0;
+ bbr_restore_cwnd(cc, cstat);
+ }
+}
+
+void ngtcp2_cc_bbr_cc_on_persistent_congestion(ngtcp2_cc *ccx,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ ngtcp2_bbr_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr_cc, ccb);
+ (void)ts;
+
+ cstat->congestion_recovery_start_ts = UINT64_MAX;
+ cc->congestion_recovery_start_ts = UINT64_MAX;
+ cc->in_loss_recovery = 0;
+ cc->packet_conservation = 0;
+
+ bbr_save_cwnd(cc, cstat);
+ cstat->cwnd = 2 * cstat->max_tx_udp_payload_size;
+}
+
+void ngtcp2_cc_bbr_cc_on_ack_recv(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) {
+ ngtcp2_bbr_cc *bbr_cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr_cc, ccb);
+
+ bbr_update_on_ack(bbr_cc, cstat, ack, ts);
+}
+
+void ngtcp2_cc_bbr_cc_on_pkt_sent(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt) {
+ ngtcp2_bbr_cc *bbr_cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr_cc, ccb);
+ (void)pkt;
+
+ bbr_on_transmit(bbr_cc, cstat);
+}
+
+void ngtcp2_cc_bbr_cc_new_rtt_sample(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ (void)ccx;
+ (void)cstat;
+ (void)ts;
+}
+
+void ngtcp2_cc_bbr_cc_reset(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ ngtcp2_bbr_cc *bbr_cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr_cc, ccb);
+ bbr_init(bbr_cc, cstat, ts);
+}
+
+void ngtcp2_cc_bbr_cc_event(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ ngtcp2_cc_event_type event, ngtcp2_tstamp ts) {
+ (void)ccx;
+ (void)cstat;
+ (void)event;
+ (void)ts;
+}
+
+static void bbr_update_on_ack(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) {
+ bbr_update_model_and_state(cc, cstat, ack, ts);
+ bbr_update_control_parameters(cc, cstat, ack);
+}
+
+static void bbr_update_model_and_state(ngtcp2_bbr_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack,
+ ngtcp2_tstamp ts) {
+ bbr_update_btl_bw(cc, cstat, ack);
+ bbr_check_cycle_phase(cc, cstat, ack, ts);
+ bbr_check_full_pipe(cc);
+ bbr_check_drain(cc, cstat, ts);
+ bbr_update_rtprop(cc, cstat, ts);
+ bbr_check_probe_rtt(cc, cstat, ts);
+}
+
+static void bbr_update_control_parameters(ngtcp2_bbr_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack) {
+ bbr_set_pacing_rate(cc, cstat);
+ bbr_set_send_quantum(cc, cstat);
+ bbr_set_cwnd(cc, cstat, ack);
+}
+
+static void bbr_on_transmit(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat) {
+ bbr_handle_restart_from_idle(cc, cstat);
+}
+
+static void bbr_init_round_counting(ngtcp2_bbr_cc *cc) {
+ cc->next_round_delivered = 0;
+ cc->round_start = 0;
+ cc->round_count = 0;
+}
+
+static void bbr_update_round(ngtcp2_bbr_cc *cc, const ngtcp2_cc_ack *ack) {
+ if (ack->pkt_delivered >= cc->next_round_delivered) {
+ cc->next_round_delivered = cc->rst->delivered;
+ ++cc->round_count;
+ cc->round_start = 1;
+
+ return;
+ }
+
+ cc->round_start = 0;
+}
+
+static void bbr_handle_recovery(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack) {
+ if (cc->in_loss_recovery) {
+ if (ack->pkt_delivered >= cc->congestion_recovery_next_round_delivered) {
+ cc->packet_conservation = 0;
+ }
+
+ if (!in_congestion_recovery(cstat, ack->largest_acked_sent_ts)) {
+ cc->in_loss_recovery = 0;
+ cc->packet_conservation = 0;
+ bbr_restore_cwnd(cc, cstat);
+ }
+
+ return;
+ }
+
+ if (cc->congestion_recovery_start_ts != UINT64_MAX) {
+ cc->in_loss_recovery = 1;
+ bbr_save_cwnd(cc, cstat);
+ cstat->cwnd =
+ cstat->bytes_in_flight +
+ ngtcp2_max(ack->bytes_delivered, cstat->max_tx_udp_payload_size);
+
+ cstat->congestion_recovery_start_ts = cc->congestion_recovery_start_ts;
+ cc->congestion_recovery_start_ts = UINT64_MAX;
+ cc->packet_conservation = 1;
+ cc->congestion_recovery_next_round_delivered = cc->rst->delivered;
+ }
+}
+
+static void bbr_update_btl_bw(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack) {
+ bbr_update_round(cc, ack);
+ bbr_handle_recovery(cc, cstat, ack);
+
+ if (cstat->delivery_rate_sec < cc->btl_bw && cc->rst->rs.is_app_limited) {
+ return;
+ }
+
+ ngtcp2_window_filter_update(&cc->btl_bw_filter, cstat->delivery_rate_sec,
+ cc->round_count);
+
+ cc->btl_bw = ngtcp2_window_filter_get_best(&cc->btl_bw_filter);
+}
+
+static void bbr_update_rtprop(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ cc->rtprop_expired = ts > cc->rtprop_stamp + NGTCP2_RTPROP_FILTERLEN;
+
+ /* Need valid RTT sample */
+ if (cstat->latest_rtt &&
+ (cstat->latest_rtt <= cc->rt_prop || cc->rtprop_expired)) {
+ cc->rt_prop = cstat->latest_rtt;
+ cc->rtprop_stamp = ts;
+
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "bbr update RTprop=%" PRIu64, cc->rt_prop);
+ }
+}
+
+static void bbr_init_pacing_rate(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat) {
+ double nominal_bandwidth =
+ (double)cc->initial_cwnd / (double)NGTCP2_MILLISECONDS;
+
+ cstat->pacing_rate = cc->pacing_gain * nominal_bandwidth;
+}
+
+static void bbr_set_pacing_rate_with_gain(ngtcp2_bbr_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ double pacing_gain) {
+ double rate = pacing_gain * (double)cc->btl_bw / NGTCP2_SECONDS;
+
+ if (cc->filled_pipe || rate > cstat->pacing_rate) {
+ cstat->pacing_rate = rate;
+ }
+}
+
+static void bbr_set_pacing_rate(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat) {
+ bbr_set_pacing_rate_with_gain(cc, cstat, cc->pacing_gain);
+}
+
+static void bbr_set_send_quantum(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat) {
+ uint64_t send_quantum;
+ (void)cc;
+
+ if (cstat->pacing_rate < 1.2 * 1024 * 1024 / 8 / NGTCP2_SECONDS) {
+ cstat->send_quantum = cstat->max_tx_udp_payload_size;
+ } else if (cstat->pacing_rate < 24.0 * 1024 * 1024 / 8 / NGTCP2_SECONDS) {
+ cstat->send_quantum = cstat->max_tx_udp_payload_size * 2;
+ } else {
+ send_quantum =
+ (uint64_t)(cstat->pacing_rate * (double)(cstat->min_rtt == UINT64_MAX
+ ? NGTCP2_MILLISECONDS
+ : cstat->min_rtt));
+ cstat->send_quantum = (size_t)ngtcp2_min(send_quantum, 64 * 1024);
+ }
+
+ cstat->send_quantum =
+ ngtcp2_max(cstat->send_quantum, cstat->max_tx_udp_payload_size * 10);
+}
+
+static uint64_t bbr_inflight(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ double gain) {
+ uint64_t quanta = 3 * cstat->send_quantum;
+ double estimated_bdp;
+
+ if (cc->rt_prop == UINT64_MAX) {
+ /* no valid RTT samples yet */
+ return cc->initial_cwnd;
+ }
+
+ estimated_bdp = (double)cc->btl_bw * (double)cc->rt_prop / NGTCP2_SECONDS;
+
+ return (uint64_t)(gain * estimated_bdp) + quanta;
+}
+
+static void bbr_update_target_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat) {
+ cc->target_cwnd = bbr_inflight(cc, cstat, cc->cwnd_gain);
+}
+
+static void bbr_modulate_cwnd_for_recovery(ngtcp2_bbr_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack) {
+ if (ack->bytes_lost > 0) {
+ if (cstat->cwnd > ack->bytes_lost) {
+ cstat->cwnd -= ack->bytes_lost;
+ cstat->cwnd = ngtcp2_max(cstat->cwnd, 2 * cstat->max_tx_udp_payload_size);
+ } else {
+ cstat->cwnd = 2 * cstat->max_tx_udp_payload_size;
+ }
+ }
+
+ if (cc->packet_conservation) {
+ cstat->cwnd =
+ ngtcp2_max(cstat->cwnd, cstat->bytes_in_flight + ack->bytes_delivered);
+ }
+}
+
+static void bbr_save_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat) {
+ if (!cc->in_loss_recovery && cc->state != NGTCP2_BBR_STATE_PROBE_RTT) {
+ cc->prior_cwnd = cstat->cwnd;
+ return;
+ }
+
+ cc->prior_cwnd = ngtcp2_max(cc->prior_cwnd, cstat->cwnd);
+}
+
+static void bbr_restore_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat) {
+ cstat->cwnd = ngtcp2_max(cstat->cwnd, cc->prior_cwnd);
+}
+
+static uint64_t min_pipe_cwnd(size_t max_udp_payload_size) {
+ return max_udp_payload_size * 4;
+}
+
+static void bbr_modulate_cwnd_for_probe_rtt(ngtcp2_bbr_cc *cc,
+ ngtcp2_conn_stat *cstat) {
+ if (cc->state == NGTCP2_BBR_STATE_PROBE_RTT) {
+ cstat->cwnd =
+ ngtcp2_min(cstat->cwnd, min_pipe_cwnd(cstat->max_tx_udp_payload_size));
+ }
+}
+
+static void bbr_set_cwnd(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack) {
+ bbr_update_target_cwnd(cc, cstat);
+ bbr_modulate_cwnd_for_recovery(cc, cstat, ack);
+
+ if (!cc->packet_conservation) {
+ if (cc->filled_pipe) {
+ cstat->cwnd =
+ ngtcp2_min(cstat->cwnd + ack->bytes_delivered, cc->target_cwnd);
+ } else if (cstat->cwnd < cc->target_cwnd ||
+ cc->rst->delivered < cc->initial_cwnd) {
+ cstat->cwnd += ack->bytes_delivered;
+ }
+
+ cstat->cwnd =
+ ngtcp2_max(cstat->cwnd, min_pipe_cwnd(cstat->max_tx_udp_payload_size));
+ }
+
+ bbr_modulate_cwnd_for_probe_rtt(cc, cstat);
+}
+
+static void bbr_init(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp initial_ts) {
+ cc->pacing_gain = NGTCP2_BBR_HIGH_GAIN;
+ cc->prior_cwnd = 0;
+ cc->target_cwnd = 0;
+ cc->btl_bw = 0;
+ cc->rt_prop = UINT64_MAX;
+ cc->rtprop_stamp = initial_ts;
+ cc->cycle_stamp = UINT64_MAX;
+ cc->probe_rtt_done_stamp = UINT64_MAX;
+ cc->cycle_index = 0;
+ cc->rtprop_expired = 0;
+ cc->idle_restart = 0;
+ cc->packet_conservation = 0;
+ cc->probe_rtt_round_done = 0;
+
+ cc->congestion_recovery_start_ts = UINT64_MAX;
+ cc->congestion_recovery_next_round_delivered = 0;
+ cc->in_loss_recovery = 0;
+
+ cstat->send_quantum = cstat->max_tx_udp_payload_size * 10;
+
+ ngtcp2_window_filter_init(&cc->btl_bw_filter, NGTCP2_BBR_BTL_BW_FILTERLEN);
+
+ bbr_init_round_counting(cc);
+ bbr_init_full_pipe(cc);
+ bbr_init_pacing_rate(cc, cstat);
+ bbr_enter_startup(cc);
+}
+
+static void bbr_enter_startup(ngtcp2_bbr_cc *cc) {
+ cc->state = NGTCP2_BBR_STATE_STARTUP;
+ cc->pacing_gain = NGTCP2_BBR_HIGH_GAIN;
+ cc->cwnd_gain = NGTCP2_BBR_HIGH_GAIN;
+}
+
+static void bbr_init_full_pipe(ngtcp2_bbr_cc *cc) {
+ cc->filled_pipe = 0;
+ cc->full_bw = 0;
+ cc->full_bw_count = 0;
+}
+
+static void bbr_check_full_pipe(ngtcp2_bbr_cc *cc) {
+ if (cc->filled_pipe || !cc->round_start || cc->rst->rs.is_app_limited) {
+ /* no need to check for a full pipe now. */
+ return;
+ }
+
+ /* cc->btl_bw still growing? */
+ if (cc->btl_bw * 100 >= cc->full_bw * 125) {
+ /* record new baseline level */
+ cc->full_bw = cc->btl_bw;
+ cc->full_bw_count = 0;
+ return;
+ }
+ /* another round w/o much growth */
+ ++cc->full_bw_count;
+ if (cc->full_bw_count >= 3) {
+ cc->filled_pipe = 1;
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "bbr filled pipe, btl_bw=%" PRIu64, cc->btl_bw);
+ }
+}
+
+static void bbr_enter_drain(ngtcp2_bbr_cc *cc) {
+ cc->state = NGTCP2_BBR_STATE_DRAIN;
+ /* pace slowly */
+ cc->pacing_gain = 1.0 / NGTCP2_BBR_HIGH_GAIN;
+ /* maintain cwnd */
+ cc->cwnd_gain = NGTCP2_BBR_HIGH_GAIN;
+}
+
+static void bbr_check_drain(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ if (cc->state == NGTCP2_BBR_STATE_STARTUP && cc->filled_pipe) {
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "bbr exit Startup and enter Drain");
+
+ bbr_enter_drain(cc);
+ }
+
+ if (cc->state == NGTCP2_BBR_STATE_DRAIN &&
+ cstat->bytes_in_flight <= bbr_inflight(cc, cstat, 1.0)) {
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "bbr exit Drain and enter ProbeBW");
+
+ /* we estimate queue is drained */
+ bbr_enter_probe_bw(cc, ts);
+ }
+}
+
+static void bbr_enter_probe_bw(ngtcp2_bbr_cc *cc, ngtcp2_tstamp ts) {
+ uint8_t rand;
+
+ cc->state = NGTCP2_BBR_STATE_PROBE_BW;
+ cc->pacing_gain = 1;
+ cc->cwnd_gain = 2;
+
+ assert(cc->rand);
+
+ cc->rand(&rand, 1, &cc->rand_ctx);
+
+ cc->cycle_index = NGTCP2_BBR_GAIN_CYCLELEN - 1 - (size_t)(rand * 7 / 256);
+ bbr_advance_cycle_phase(cc, ts);
+}
+
+static void bbr_check_cycle_phase(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) {
+ if (cc->state == NGTCP2_BBR_STATE_PROBE_BW &&
+ bbr_is_next_cycle_phase(cc, cstat, ack, ts)) {
+ bbr_advance_cycle_phase(cc, ts);
+ }
+}
+
+static void bbr_advance_cycle_phase(ngtcp2_bbr_cc *cc, ngtcp2_tstamp ts) {
+ cc->cycle_stamp = ts;
+ cc->cycle_index = (cc->cycle_index + 1) & (NGTCP2_BBR_GAIN_CYCLELEN - 1);
+ cc->pacing_gain = pacing_gain_cycle[cc->cycle_index];
+}
+
+static int bbr_is_next_cycle_phase(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) {
+ int is_full_length = (ts - cc->cycle_stamp) > cc->rt_prop;
+
+ if (cc->pacing_gain > 1) {
+ return is_full_length && (ack->bytes_lost > 0 ||
+ ack->prior_bytes_in_flight >=
+ bbr_inflight(cc, cstat, cc->pacing_gain));
+ }
+
+ if (cc->pacing_gain < 1) {
+ return is_full_length ||
+ ack->prior_bytes_in_flight <= bbr_inflight(cc, cstat, 1);
+ }
+
+ return is_full_length;
+}
+
+static void bbr_handle_restart_from_idle(ngtcp2_bbr_cc *cc,
+ ngtcp2_conn_stat *cstat) {
+ if (cstat->bytes_in_flight == 0 && cc->rst->app_limited) {
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, "bbr restart from idle");
+
+ cc->idle_restart = 1;
+
+ if (cc->state == NGTCP2_BBR_STATE_PROBE_BW) {
+ bbr_set_pacing_rate_with_gain(cc, cstat, 1);
+ }
+ }
+}
+
+static void bbr_check_probe_rtt(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ if (cc->state != NGTCP2_BBR_STATE_PROBE_RTT && cc->rtprop_expired &&
+ !cc->idle_restart) {
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV, "bbr enter ProbeRTT");
+
+ bbr_enter_probe_rtt(cc);
+ bbr_save_cwnd(cc, cstat);
+ cc->probe_rtt_done_stamp = UINT64_MAX;
+ }
+
+ if (cc->state == NGTCP2_BBR_STATE_PROBE_RTT) {
+ bbr_handle_probe_rtt(cc, cstat, ts);
+ }
+
+ cc->idle_restart = 0;
+}
+
+static void bbr_enter_probe_rtt(ngtcp2_bbr_cc *cc) {
+ cc->state = NGTCP2_BBR_STATE_PROBE_RTT;
+ cc->pacing_gain = 1;
+ cc->cwnd_gain = 1;
+}
+
+static void bbr_handle_probe_rtt(ngtcp2_bbr_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ uint64_t app_limited = cc->rst->delivered + cstat->bytes_in_flight;
+
+ /* Ignore low rate samples during NGTCP2_BBR_STATE_PROBE_RTT. */
+ cc->rst->app_limited = app_limited ? app_limited : 1;
+
+ if (cc->probe_rtt_done_stamp == UINT64_MAX &&
+ cstat->bytes_in_flight <= min_pipe_cwnd(cstat->max_tx_udp_payload_size)) {
+ cc->probe_rtt_done_stamp = ts + NGTCP2_BBR_PROBE_RTT_DURATION;
+ cc->probe_rtt_round_done = 0;
+ cc->next_round_delivered = cc->rst->delivered;
+
+ return;
+ }
+
+ if (cc->probe_rtt_done_stamp != UINT64_MAX) {
+ if (cc->round_start) {
+ cc->probe_rtt_round_done = 1;
+ }
+
+ if (cc->probe_rtt_round_done && ts > cc->probe_rtt_done_stamp) {
+ cc->rtprop_stamp = ts;
+ bbr_restore_cwnd(cc, cstat);
+ bbr_exit_probe_rtt(cc, ts);
+ }
+ }
+}
+
+static void bbr_exit_probe_rtt(ngtcp2_bbr_cc *cc, ngtcp2_tstamp ts) {
+ if (cc->filled_pipe) {
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "bbr exit ProbeRTT and enter ProbeBW");
+
+ bbr_enter_probe_bw(cc, ts);
+
+ return;
+ }
+
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "bbr exit ProbeRTT and enter Startup");
+
+ bbr_enter_startup(cc);
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.h
new file mode 100644
index 0000000..7311f05
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr.h
@@ -0,0 +1,156 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2021 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_BBR_H
+#define NGTCP2_BBR_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_cc.h"
+#include "ngtcp2_window_filter.h"
+
+typedef struct ngtcp2_rst ngtcp2_rst;
+
+typedef enum ngtcp2_bbr_state {
+ NGTCP2_BBR_STATE_STARTUP,
+ NGTCP2_BBR_STATE_DRAIN,
+ NGTCP2_BBR_STATE_PROBE_BW,
+ NGTCP2_BBR_STATE_PROBE_RTT,
+} ngtcp2_bbr_state;
+
+/*
+ * ngtcp2_bbr_cc is BBR congestion controller, described in
+ * https://tools.ietf.org/html/draft-cardwell-iccrg-bbr-congestion-control-00
+ */
+typedef struct ngtcp2_bbr_cc {
+ ngtcp2_cc_base ccb;
+
+ /* The max filter used to estimate BBR.BtlBw. */
+ ngtcp2_window_filter btl_bw_filter;
+ uint64_t initial_cwnd;
+ ngtcp2_rst *rst;
+ ngtcp2_rand rand;
+ ngtcp2_rand_ctx rand_ctx;
+
+ /* BBR variables */
+
+ /* The dynamic gain factor used to scale BBR.BtlBw to
+ produce BBR.pacing_rate. */
+ double pacing_gain;
+ /* The dynamic gain factor used to scale the estimated BDP to produce a
+ congestion window (cwnd). */
+ double cwnd_gain;
+ uint64_t full_bw;
+ /* packet.delivered value denoting the end of a packet-timed round trip. */
+ uint64_t next_round_delivered;
+ /* Count of packet-timed round trips. */
+ uint64_t round_count;
+ uint64_t prior_cwnd;
+ /* target_cwnd is the upper bound on the volume of data BBR
+ allows in flight. */
+ uint64_t target_cwnd;
+ /* BBR's estimated bottleneck bandwidth available to the
+ transport flow, estimated from the maximum delivery rate sample in a
+ sliding window. */
+ uint64_t btl_bw;
+ /* BBR's estimated two-way round-trip propagation delay of
+ the path, estimated from the windowed minimum recent round-trip delay
+ sample. */
+ ngtcp2_duration rt_prop;
+ /* The wall clock time at which the current BBR.RTProp
+ sample was obtained. */
+ ngtcp2_tstamp rtprop_stamp;
+ ngtcp2_tstamp cycle_stamp;
+ ngtcp2_tstamp probe_rtt_done_stamp;
+ /* congestion_recovery_start_ts is the time when congestion recovery
+ period started.*/
+ ngtcp2_tstamp congestion_recovery_start_ts;
+ uint64_t congestion_recovery_next_round_delivered;
+ size_t full_bw_count;
+ size_t cycle_index;
+ ngtcp2_bbr_state state;
+ /* A boolean that records whether BBR estimates that it has ever fully
+ utilized its available bandwidth ("filled the pipe"). */
+ int filled_pipe;
+ /* A boolean that BBR sets to true once per packet-timed round trip,
+ on ACKs that advance BBR.round_count. */
+ int round_start;
+ int rtprop_expired;
+ int idle_restart;
+ int packet_conservation;
+ int probe_rtt_round_done;
+ /* in_loss_recovery becomes nonzero when BBR enters loss recovery
+ period. */
+ int in_loss_recovery;
+} ngtcp2_bbr_cc;
+
+int ngtcp2_cc_bbr_cc_init(ngtcp2_cc *cc, ngtcp2_log *log,
+ ngtcp2_conn_stat *cstat, ngtcp2_rst *rst,
+ ngtcp2_tstamp initial_ts, ngtcp2_rand rand,
+ const ngtcp2_rand_ctx *rand_ctx,
+ const ngtcp2_mem *mem);
+
+void ngtcp2_cc_bbr_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem);
+
+void ngtcp2_bbr_cc_init(ngtcp2_bbr_cc *bbr_cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_rst *rst, ngtcp2_tstamp initial_ts,
+ ngtcp2_rand rand, const ngtcp2_rand_ctx *rand_ctx,
+ ngtcp2_log *log);
+
+void ngtcp2_bbr_cc_free(ngtcp2_bbr_cc *cc);
+
+void ngtcp2_cc_bbr_cc_on_pkt_acked(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts);
+
+void ngtcp2_cc_bbr_cc_congestion_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp sent_ts, ngtcp2_tstamp ts);
+
+void ngtcp2_cc_bbr_cc_on_spurious_congestion(ngtcp2_cc *ccx,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+void ngtcp2_cc_bbr_cc_on_persistent_congestion(ngtcp2_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+void ngtcp2_cc_bbr_cc_on_ack_recv(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts);
+
+void ngtcp2_cc_bbr_cc_on_pkt_sent(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt);
+
+void ngtcp2_cc_bbr_cc_new_rtt_sample(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+void ngtcp2_cc_bbr_cc_reset(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+void ngtcp2_cc_bbr_cc_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_cc_event_type event, ngtcp2_tstamp ts);
+
+#endif /* NGTCP2_BBR_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.c
new file mode 100644
index 0000000..508ab55
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.c
@@ -0,0 +1,1489 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2021 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_bbr2.h"
+
+#include <assert.h>
+
+#include "ngtcp2_log.h"
+#include "ngtcp2_macro.h"
+#include "ngtcp2_mem.h"
+#include "ngtcp2_rcvry.h"
+#include "ngtcp2_rst.h"
+
+#define NGTCP2_BBR_MAX_BW_FILTERLEN 2
+
+#define NGTCP2_BBR_EXTRA_ACKED_FILTERLEN 10
+
+#define NGTCP2_BBR_STARTUP_PACING_GAIN ((double)2.77)
+
+#define NGTCP2_BBR_STARTUP_CWND_GAIN 2
+
+#define NGTCP2_BBR_PROBE_RTT_CWND_GAIN ((double)0.5)
+
+#define NGTCP2_BBR_BETA_NUMER 7
+#define NGTCP2_BBR_BETA_DENOM 10
+
+#define NGTCP2_BBR_LOSS_THRESH_NUMER 2
+#define NGTCP2_BBR_LOSS_THRESH_DENOM 100
+
+#define NGTCP2_BBR_HEADROOM_NUMER 15
+#define NGTCP2_BBR_HEADROOM_DENOM 100
+
+#define NGTCP2_BBR_PROBE_RTT_INTERVAL (5 * NGTCP2_SECONDS)
+#define NGTCP2_BBR_MIN_RTT_FILTERLEN (10 * NGTCP2_SECONDS)
+
+#define NGTCP2_BBR_PROBE_RTT_DURATION (200 * NGTCP2_MILLISECONDS)
+
+#define NGTCP2_BBR_PACING_MARGIN_PERCENT 1
+
+static void bbr_on_init(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp initial_ts);
+
+static void bbr_on_transmit(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+static void bbr_reset_congestion_signals(ngtcp2_bbr2_cc *bbr);
+
+static void bbr_reset_lower_bounds(ngtcp2_bbr2_cc *bbr);
+
+static void bbr_init_round_counting(ngtcp2_bbr2_cc *bbr);
+
+static void bbr_init_full_pipe(ngtcp2_bbr2_cc *bbr);
+
+static void bbr_init_pacing_rate(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat);
+
+static void bbr_set_pacing_rate_with_gain(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ double pacing_gain);
+
+static void bbr_set_pacing_rate(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat);
+
+static void bbr_enter_startup(ngtcp2_bbr2_cc *bbr);
+
+static void bbr_check_startup_done(ngtcp2_bbr2_cc *bbr,
+ const ngtcp2_cc_ack *ack);
+
+static void bbr_update_on_ack(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts);
+
+static void bbr_update_model_and_state(ngtcp2_bbr2_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack,
+ ngtcp2_tstamp ts);
+
+static void bbr_update_control_parameters(ngtcp2_bbr2_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack);
+
+static void bbr_update_on_loss(ngtcp2_bbr2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts);
+
+static void bbr_update_latest_delivery_signals(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat);
+
+static void bbr_advance_latest_delivery_signals(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat);
+
+static void bbr_update_congestion_signals(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack);
+
+static void bbr_adapt_lower_bounds_from_congestion(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat);
+
+static void bbr_init_lower_bounds(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat);
+
+static void bbr_loss_lower_bounds(ngtcp2_bbr2_cc *bbr);
+
+static void bbr_bound_bw_for_model(ngtcp2_bbr2_cc *bbr);
+
+static void bbr_update_max_bw(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack);
+
+static void bbr_update_round(ngtcp2_bbr2_cc *bbr, const ngtcp2_cc_ack *ack);
+
+static void bbr_start_round(ngtcp2_bbr2_cc *bbr);
+
+static int bbr_is_in_probe_bw_state(ngtcp2_bbr2_cc *bbr);
+
+static void bbr_update_ack_aggregation(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack,
+ ngtcp2_tstamp ts);
+
+static void bbr_enter_drain(ngtcp2_bbr2_cc *bbr);
+
+static void bbr_check_drain(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+static void bbr_enter_probe_bw(ngtcp2_bbr2_cc *bbr, ngtcp2_tstamp ts);
+
+static void bbr_start_probe_bw_down(ngtcp2_bbr2_cc *bbr, ngtcp2_tstamp ts);
+
+static void bbr_start_probe_bw_cruise(ngtcp2_bbr2_cc *bbr);
+
+static void bbr_start_probe_bw_refill(ngtcp2_bbr2_cc *bbr);
+
+static void bbr_start_probe_bw_up(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+static void bbr_update_probe_bw_cycle_phase(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack,
+ ngtcp2_tstamp ts);
+
+static int bbr_check_time_to_cruise(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts);
+
+static int bbr_has_elapsed_in_phase(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_duration interval, ngtcp2_tstamp ts);
+
+static uint64_t bbr_inflight_with_headroom(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat);
+
+static void bbr_raise_inflight_hi_slope(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat);
+
+static void bbr_probe_inflight_hi_upward(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack);
+
+static void bbr_adapt_upper_bounds(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts);
+
+static int bbr_check_time_to_probe_bw(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+static void bbr_pick_probe_wait(ngtcp2_bbr2_cc *bbr);
+
+static int bbr_is_reno_coexistence_probe_time(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat);
+
+static uint64_t bbr_target_inflight(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat);
+
+static int bbr_check_inflight_too_high(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+static int is_inflight_too_high(const ngtcp2_rs *rs);
+
+static void bbr_handle_inflight_too_high(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_rs *rs, ngtcp2_tstamp ts);
+
+static void bbr_handle_lost_packet(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts);
+
+static uint64_t bbr_inflight_hi_from_lost_packet(ngtcp2_bbr2_cc *bbr,
+ const ngtcp2_rs *rs,
+ const ngtcp2_cc_pkt *pkt);
+
+static void bbr_update_min_rtt(ngtcp2_bbr2_cc *bbr, const ngtcp2_cc_ack *ack,
+ ngtcp2_tstamp ts);
+
+static void bbr_check_probe_rtt(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+static void bbr_enter_probe_rtt(ngtcp2_bbr2_cc *bbr);
+
+static void bbr_handle_probe_rtt(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+static void bbr_check_probe_rtt_done(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts);
+
+static void bbr_mark_connection_app_limited(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat);
+
+static void bbr_exit_probe_rtt(ngtcp2_bbr2_cc *bbr, ngtcp2_tstamp ts);
+
+static void bbr_handle_restart_from_idle(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+static uint64_t bbr_bdp_multiple(ngtcp2_bbr2_cc *bbr, uint64_t bw, double gain);
+
+static uint64_t bbr_quantization_budget(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ uint64_t inflight);
+
+static uint64_t bbr_inflight(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ uint64_t bw, double gain);
+
+static void bbr_update_max_inflight(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat);
+
+static void bbr_update_offload_budget(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat);
+
+static uint64_t min_pipe_cwnd(size_t max_udp_payload_size);
+
+static void bbr_advance_max_bw_filter(ngtcp2_bbr2_cc *bbr);
+
+static void bbr_modulate_cwnd_for_recovery(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack);
+
+static void bbr_save_cwnd(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat);
+
+static void bbr_restore_cwnd(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat);
+
+static uint64_t bbr_probe_rtt_cwnd(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat);
+
+static void bbr_bound_cwnd_for_probe_rtt(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat);
+
+static void bbr_set_cwnd(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack);
+
+static void bbr_bound_cwnd_for_model(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat);
+
+static void bbr_set_send_quantum(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat);
+
+static int in_congestion_recovery(const ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp sent_time);
+
+static void bbr_handle_recovery(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack);
+
+static void bbr_on_init(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp initial_ts) {
+ ngtcp2_window_filter_init(&bbr->max_bw_filter, NGTCP2_BBR_MAX_BW_FILTERLEN);
+ ngtcp2_window_filter_init(&bbr->extra_acked_filter,
+ NGTCP2_BBR_EXTRA_ACKED_FILTERLEN);
+
+ bbr->min_rtt = UINT64_MAX;
+ bbr->min_rtt_stamp = initial_ts;
+ /* remark: Use UINT64_MAX instead of 0 for consistency. */
+ bbr->probe_rtt_done_stamp = UINT64_MAX;
+ bbr->probe_rtt_round_done = 0;
+ bbr->prior_cwnd = 0;
+ bbr->idle_restart = 0;
+ bbr->extra_acked_interval_start = initial_ts;
+ bbr->extra_acked_delivered = 0;
+
+ bbr_reset_congestion_signals(bbr);
+ bbr_reset_lower_bounds(bbr);
+ bbr_init_round_counting(bbr);
+ bbr_init_full_pipe(bbr);
+ bbr_init_pacing_rate(bbr, cstat);
+ bbr_enter_startup(bbr);
+
+ cstat->send_quantum = cstat->max_tx_udp_payload_size * 10;
+
+ /* Missing in documentation */
+ bbr->loss_round_start = 0;
+ bbr->loss_round_delivered = UINT64_MAX;
+
+ bbr->rounds_since_bw_probe = 0;
+
+ bbr->max_bw = 0;
+ bbr->bw = 0;
+
+ bbr->cycle_count = 0;
+
+ bbr->extra_acked = 0;
+
+ bbr->bytes_lost_in_round = 0;
+ bbr->loss_events_in_round = 0;
+
+ bbr->offload_budget = 0;
+
+ bbr->probe_up_cnt = UINT64_MAX;
+ bbr->cycle_stamp = UINT64_MAX;
+ bbr->ack_phase = 0;
+ bbr->bw_probe_wait = 0;
+ bbr->bw_probe_samples = 0;
+ bbr->bw_probe_up_rounds = 0;
+ bbr->bw_probe_up_acks = 0;
+
+ bbr->inflight_hi = UINT64_MAX;
+ bbr->bw_hi = UINT64_MAX;
+
+ bbr->probe_rtt_expired = 0;
+ bbr->probe_rtt_min_delay = UINT64_MAX;
+ bbr->probe_rtt_min_stamp = initial_ts;
+
+ bbr->in_loss_recovery = 0;
+ bbr->packet_conservation = 0;
+
+ bbr->max_inflight = 0;
+
+ bbr->congestion_recovery_start_ts = UINT64_MAX;
+ bbr->congestion_recovery_next_round_delivered = 0;
+
+ bbr->prior_inflight_lo = 0;
+ bbr->prior_inflight_hi = 0;
+ bbr->prior_bw_lo = 0;
+}
+
+static void bbr_reset_congestion_signals(ngtcp2_bbr2_cc *bbr) {
+ bbr->loss_in_round = 0;
+ bbr->bw_latest = 0;
+ bbr->inflight_latest = 0;
+}
+
+static void bbr_reset_lower_bounds(ngtcp2_bbr2_cc *bbr) {
+ bbr->bw_lo = UINT64_MAX;
+ bbr->inflight_lo = UINT64_MAX;
+}
+
+static void bbr_init_round_counting(ngtcp2_bbr2_cc *bbr) {
+ bbr->next_round_delivered = 0;
+ bbr->round_start = 0;
+ bbr->round_count = 0;
+}
+
+static void bbr_init_full_pipe(ngtcp2_bbr2_cc *bbr) {
+ bbr->filled_pipe = 0;
+ bbr->full_bw = 0;
+ bbr->full_bw_count = 0;
+}
+
+static void bbr_check_startup_full_bandwidth(ngtcp2_bbr2_cc *bbr) {
+ if (bbr->filled_pipe || !bbr->round_start || bbr->rst->rs.is_app_limited) {
+ return;
+ }
+
+ if (bbr->max_bw * 100 >= bbr->full_bw * 125) {
+ bbr->full_bw = bbr->max_bw;
+ bbr->full_bw_count = 0;
+ }
+
+ ++bbr->full_bw_count;
+
+ if (bbr->full_bw_count >= 3) {
+ bbr->filled_pipe = 1;
+
+ ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "bbr2 filled pipe, full_bw=%" PRIu64, bbr->full_bw);
+ }
+}
+
+static void bbr_check_startup_high_loss(ngtcp2_bbr2_cc *bbr,
+ const ngtcp2_cc_ack *ack) {
+ if (bbr->filled_pipe || !bbr->round_start || bbr->rst->rs.is_app_limited) {
+ return;
+ }
+
+ if (bbr->loss_events_in_round <= 3) {
+ return;
+ }
+
+ /* loss_thresh = 2% */
+ if (bbr->bytes_lost_in_round * 100 <= ack->prior_bytes_in_flight * 2) {
+ return;
+ }
+
+ bbr->filled_pipe = 1;
+}
+
+static void bbr_init_pacing_rate(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat) {
+ double nominal_bandwidth = (double)bbr->initial_cwnd;
+
+ cstat->pacing_rate = NGTCP2_BBR_STARTUP_PACING_GAIN * nominal_bandwidth /
+ (double)NGTCP2_MILLISECONDS;
+}
+
+static void bbr_set_pacing_rate_with_gain(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ double pacing_gain) {
+ double rate = pacing_gain * (double)bbr->bw *
+ (100 - NGTCP2_BBR_PACING_MARGIN_PERCENT) / 100 / NGTCP2_SECONDS;
+
+ if (bbr->filled_pipe || rate > cstat->pacing_rate) {
+ cstat->pacing_rate = rate;
+ }
+}
+
+static void bbr_set_pacing_rate(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat) {
+ bbr_set_pacing_rate_with_gain(bbr, cstat, bbr->pacing_gain);
+}
+
+static void bbr_enter_startup(ngtcp2_bbr2_cc *bbr) {
+ ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV, "bbr2 enter Startup");
+
+ bbr->state = NGTCP2_BBR2_STATE_STARTUP;
+ bbr->pacing_gain = NGTCP2_BBR_STARTUP_PACING_GAIN;
+ bbr->cwnd_gain = NGTCP2_BBR_STARTUP_CWND_GAIN;
+}
+
+static void bbr_check_startup_done(ngtcp2_bbr2_cc *bbr,
+ const ngtcp2_cc_ack *ack) {
+ bbr_check_startup_full_bandwidth(bbr);
+ bbr_check_startup_high_loss(bbr, ack);
+
+ if (bbr->state == NGTCP2_BBR2_STATE_STARTUP && bbr->filled_pipe) {
+ bbr_enter_drain(bbr);
+ }
+}
+
+static void bbr_on_transmit(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ bbr_handle_restart_from_idle(bbr, cstat, ts);
+}
+
+static void bbr_update_on_ack(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) {
+ bbr_update_model_and_state(bbr, cstat, ack, ts);
+ bbr_update_control_parameters(bbr, cstat, ack);
+}
+
+static void bbr_update_model_and_state(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack,
+ ngtcp2_tstamp ts) {
+ bbr_update_latest_delivery_signals(bbr, cstat);
+ bbr_update_congestion_signals(bbr, cstat, ack);
+ bbr_update_ack_aggregation(bbr, cstat, ack, ts);
+ bbr_check_startup_done(bbr, ack);
+ bbr_check_drain(bbr, cstat, ts);
+ bbr_update_probe_bw_cycle_phase(bbr, cstat, ack, ts);
+ bbr_update_min_rtt(bbr, ack, ts);
+ bbr_check_probe_rtt(bbr, cstat, ts);
+ bbr_advance_latest_delivery_signals(bbr, cstat);
+ bbr_bound_bw_for_model(bbr);
+}
+
+static void bbr_update_control_parameters(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack) {
+ bbr_set_pacing_rate(bbr, cstat);
+ bbr_set_send_quantum(bbr, cstat);
+ bbr_set_cwnd(bbr, cstat, ack);
+}
+
+static void bbr_update_on_loss(ngtcp2_bbr2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts) {
+ bbr_handle_lost_packet(cc, cstat, pkt, ts);
+}
+
+static void bbr_update_latest_delivery_signals(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat) {
+ bbr->loss_round_start = 0;
+ bbr->bw_latest = ngtcp2_max(bbr->bw_latest, cstat->delivery_rate_sec);
+ bbr->inflight_latest =
+ ngtcp2_max(bbr->inflight_latest, bbr->rst->rs.delivered);
+
+ if (bbr->rst->rs.prior_delivered >= bbr->loss_round_delivered) {
+ bbr->loss_round_delivered = bbr->rst->delivered;
+ bbr->loss_round_start = 1;
+ }
+}
+
+static void bbr_advance_latest_delivery_signals(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat) {
+ if (bbr->loss_round_start) {
+ bbr->bw_latest = cstat->delivery_rate_sec;
+ bbr->inflight_latest = bbr->rst->rs.delivered;
+ }
+}
+
+static void bbr_update_congestion_signals(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack) {
+ bbr_update_max_bw(bbr, cstat, ack);
+
+ if (ack->bytes_lost) {
+ bbr->bytes_lost_in_round += ack->bytes_lost;
+ ++bbr->loss_events_in_round;
+
+ if (!bbr->loss_in_round) {
+ bbr->loss_in_round = 1;
+ bbr->loss_round_delivered = bbr->rst->delivered;
+ }
+ }
+
+ if (!bbr->loss_round_start) {
+ return;
+ }
+
+ bbr_adapt_lower_bounds_from_congestion(bbr, cstat);
+
+ bbr->loss_in_round = 0;
+}
+
+static void bbr_adapt_lower_bounds_from_congestion(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat) {
+ if (!bbr->filled_pipe || bbr_is_in_probe_bw_state(bbr)) {
+ return;
+ }
+
+ if (bbr->loss_in_round) {
+ bbr_init_lower_bounds(bbr, cstat);
+ bbr_loss_lower_bounds(bbr);
+ }
+}
+
+static void bbr_init_lower_bounds(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat) {
+ if (bbr->bw_lo == UINT64_MAX) {
+ bbr->bw_lo = bbr->max_bw;
+ }
+
+ if (bbr->inflight_lo == UINT64_MAX) {
+ bbr->inflight_lo = cstat->cwnd;
+ }
+}
+
+static void bbr_loss_lower_bounds(ngtcp2_bbr2_cc *bbr) {
+ bbr->bw_lo = ngtcp2_max(bbr->bw_latest, bbr->bw_lo * NGTCP2_BBR_BETA_NUMER /
+ NGTCP2_BBR_BETA_DENOM);
+ bbr->inflight_lo = ngtcp2_max(bbr->inflight_latest,
+ bbr->inflight_lo * NGTCP2_BBR_BETA_NUMER /
+ NGTCP2_BBR_BETA_DENOM);
+}
+
+static void bbr_bound_bw_for_model(ngtcp2_bbr2_cc *bbr) {
+ bbr->bw = ngtcp2_min(bbr->max_bw, bbr->bw_lo);
+ bbr->bw = ngtcp2_min(bbr->bw, bbr->bw_hi);
+}
+
+static void bbr_update_max_bw(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack) {
+ bbr_update_round(bbr, ack);
+ bbr_handle_recovery(bbr, cstat, ack);
+
+ if (cstat->delivery_rate_sec >= bbr->max_bw || !bbr->rst->rs.is_app_limited) {
+ ngtcp2_window_filter_update(&bbr->max_bw_filter, cstat->delivery_rate_sec,
+ bbr->cycle_count);
+
+ bbr->max_bw = ngtcp2_window_filter_get_best(&bbr->max_bw_filter);
+ }
+}
+
+static void bbr_update_round(ngtcp2_bbr2_cc *bbr, const ngtcp2_cc_ack *ack) {
+ if (ack->pkt_delivered >= bbr->next_round_delivered) {
+ bbr_start_round(bbr);
+
+ ++bbr->round_count;
+ ++bbr->rounds_since_bw_probe;
+ bbr->round_start = 1;
+
+ bbr->bytes_lost_in_round = 0;
+ bbr->loss_events_in_round = 0;
+
+ bbr->rst->is_cwnd_limited = 0;
+
+ return;
+ }
+
+ bbr->round_start = 0;
+}
+
+static void bbr_start_round(ngtcp2_bbr2_cc *bbr) {
+ bbr->next_round_delivered = bbr->rst->delivered;
+}
+
+static int bbr_is_in_probe_bw_state(ngtcp2_bbr2_cc *bbr) {
+ switch (bbr->state) {
+ case NGTCP2_BBR2_STATE_PROBE_BW_DOWN:
+ case NGTCP2_BBR2_STATE_PROBE_BW_CRUISE:
+ case NGTCP2_BBR2_STATE_PROBE_BW_REFILL:
+ case NGTCP2_BBR2_STATE_PROBE_BW_UP:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static void bbr_update_ack_aggregation(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack,
+ ngtcp2_tstamp ts) {
+ ngtcp2_duration interval = ts - bbr->extra_acked_interval_start;
+ uint64_t expected_delivered = bbr->bw * interval / NGTCP2_SECONDS;
+ uint64_t extra;
+
+ if (bbr->extra_acked_delivered <= expected_delivered) {
+ bbr->extra_acked_delivered = 0;
+ bbr->extra_acked_interval_start = ts;
+ expected_delivered = 0;
+ }
+
+ bbr->extra_acked_delivered += ack->bytes_delivered;
+ extra = bbr->extra_acked_delivered - expected_delivered;
+ extra = ngtcp2_min(extra, cstat->cwnd);
+
+ ngtcp2_window_filter_update(&bbr->extra_acked_filter, extra,
+ bbr->round_count);
+
+ bbr->extra_acked = ngtcp2_window_filter_get_best(&bbr->extra_acked_filter);
+}
+
+static void bbr_enter_drain(ngtcp2_bbr2_cc *bbr) {
+ ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV, "bbr2 enter Drain");
+
+ bbr->state = NGTCP2_BBR2_STATE_DRAIN;
+ bbr->pacing_gain = 1. / NGTCP2_BBR_STARTUP_CWND_GAIN;
+ bbr->cwnd_gain = NGTCP2_BBR_STARTUP_CWND_GAIN;
+}
+
+static void bbr_check_drain(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ if (bbr->state == NGTCP2_BBR2_STATE_DRAIN &&
+ cstat->bytes_in_flight <= bbr_inflight(bbr, cstat, bbr->bw, 1.0)) {
+ bbr_enter_probe_bw(bbr, ts);
+ }
+}
+
+static void bbr_enter_probe_bw(ngtcp2_bbr2_cc *bbr, ngtcp2_tstamp ts) {
+ bbr_start_probe_bw_down(bbr, ts);
+}
+
+static void bbr_start_probe_bw_down(ngtcp2_bbr2_cc *bbr, ngtcp2_tstamp ts) {
+ ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "bbr2 start ProbeBW_DOWN");
+
+ bbr_reset_congestion_signals(bbr);
+
+ bbr->probe_up_cnt = UINT64_MAX;
+
+ bbr_pick_probe_wait(bbr);
+
+ bbr->cycle_stamp = ts;
+ bbr->ack_phase = NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_STOPPING;
+
+ bbr_start_round(bbr);
+
+ bbr->state = NGTCP2_BBR2_STATE_PROBE_BW_DOWN;
+ bbr->pacing_gain = 0.9;
+ bbr->cwnd_gain = 2;
+}
+
+static void bbr_start_probe_bw_cruise(ngtcp2_bbr2_cc *bbr) {
+ ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "bbr2 start ProbeBW_CRUISE");
+
+ bbr->state = NGTCP2_BBR2_STATE_PROBE_BW_CRUISE;
+ bbr->pacing_gain = 1.0;
+ bbr->cwnd_gain = 2;
+}
+
+static void bbr_start_probe_bw_refill(ngtcp2_bbr2_cc *bbr) {
+ ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "bbr2 start ProbeBW_REFILL");
+
+ bbr_reset_lower_bounds(bbr);
+
+ bbr->bw_probe_up_rounds = 0;
+ bbr->bw_probe_up_acks = 0;
+ bbr->ack_phase = NGTCP2_BBR2_ACK_PHASE_ACKS_REFILLING;
+
+ bbr_start_round(bbr);
+
+ bbr->state = NGTCP2_BBR2_STATE_PROBE_BW_REFILL;
+ bbr->pacing_gain = 1.0;
+ bbr->cwnd_gain = 2;
+}
+
+static void bbr_start_probe_bw_up(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV, "bbr2 start ProbeBW_UP");
+
+ bbr->ack_phase = NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_STARTING;
+
+ bbr_start_round(bbr);
+
+ bbr->cycle_stamp = ts;
+ bbr->state = NGTCP2_BBR2_STATE_PROBE_BW_UP;
+ bbr->pacing_gain = 1.25;
+ bbr->cwnd_gain = 2;
+
+ bbr_raise_inflight_hi_slope(bbr, cstat);
+}
+
+static void bbr_update_probe_bw_cycle_phase(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack,
+ ngtcp2_tstamp ts) {
+ if (!bbr->filled_pipe) {
+ return;
+ }
+
+ bbr_adapt_upper_bounds(bbr, cstat, ack, ts);
+
+ if (!bbr_is_in_probe_bw_state(bbr)) {
+ return;
+ }
+
+ switch (bbr->state) {
+ case NGTCP2_BBR2_STATE_PROBE_BW_DOWN:
+ if (bbr_check_time_to_probe_bw(bbr, cstat, ts)) {
+ return;
+ }
+
+ if (bbr_check_time_to_cruise(bbr, cstat, ts)) {
+ bbr_start_probe_bw_cruise(bbr);
+ }
+
+ break;
+ case NGTCP2_BBR2_STATE_PROBE_BW_CRUISE:
+ if (bbr_check_time_to_probe_bw(bbr, cstat, ts)) {
+ return;
+ }
+
+ break;
+ case NGTCP2_BBR2_STATE_PROBE_BW_REFILL:
+ if (bbr->round_start) {
+ bbr->bw_probe_samples = 1;
+ bbr_start_probe_bw_up(bbr, cstat, ts);
+ }
+
+ break;
+ case NGTCP2_BBR2_STATE_PROBE_BW_UP:
+ if (bbr_has_elapsed_in_phase(bbr, bbr->min_rtt, ts) &&
+ cstat->bytes_in_flight > bbr_inflight(bbr, cstat, bbr->max_bw, 1.25)) {
+ bbr_start_probe_bw_down(bbr, ts);
+ }
+
+ break;
+ default:
+ break;
+ }
+}
+
+static int bbr_check_time_to_cruise(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts) {
+ (void)ts;
+
+ if (cstat->bytes_in_flight > bbr_inflight_with_headroom(bbr, cstat)) {
+ return 0;
+ }
+
+ if (cstat->bytes_in_flight <= bbr_inflight(bbr, cstat, bbr->max_bw, 1.0)) {
+ return 1;
+ }
+
+ return 0;
+}
+
+static int bbr_has_elapsed_in_phase(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_duration interval,
+ ngtcp2_tstamp ts) {
+ return ts > bbr->cycle_stamp + interval;
+}
+
+static uint64_t bbr_inflight_with_headroom(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat) {
+ uint64_t headroom;
+ uint64_t mpcwnd;
+ if (bbr->inflight_hi == UINT64_MAX) {
+ return UINT64_MAX;
+ }
+
+ headroom = ngtcp2_max(cstat->max_tx_udp_payload_size,
+ bbr->inflight_hi * NGTCP2_BBR_HEADROOM_NUMER /
+ NGTCP2_BBR_HEADROOM_DENOM);
+ mpcwnd = min_pipe_cwnd(cstat->max_tx_udp_payload_size);
+
+ if (bbr->inflight_hi > headroom) {
+ return ngtcp2_max(bbr->inflight_hi - headroom, mpcwnd);
+ }
+
+ return mpcwnd;
+}
+
+static void bbr_raise_inflight_hi_slope(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat) {
+ uint64_t growth_this_round = cstat->max_tx_udp_payload_size
+ << bbr->bw_probe_up_rounds;
+
+ bbr->bw_probe_up_rounds = ngtcp2_min(bbr->bw_probe_up_rounds + 1, 30);
+ bbr->probe_up_cnt = ngtcp2_max(cstat->cwnd / growth_this_round, 1) *
+ cstat->max_tx_udp_payload_size;
+}
+
+static void bbr_probe_inflight_hi_upward(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack) {
+ uint64_t delta;
+
+ if (!bbr->rst->is_cwnd_limited || cstat->cwnd < bbr->inflight_hi) {
+ return;
+ }
+
+ bbr->bw_probe_up_acks += ack->bytes_delivered;
+
+ if (bbr->bw_probe_up_acks >= bbr->probe_up_cnt) {
+ delta = bbr->bw_probe_up_acks / bbr->probe_up_cnt;
+ bbr->bw_probe_up_acks -= delta * bbr->probe_up_cnt;
+ bbr->inflight_hi += delta * cstat->max_tx_udp_payload_size;
+ }
+
+ if (bbr->round_start) {
+ bbr_raise_inflight_hi_slope(bbr, cstat);
+ }
+}
+
+static void bbr_adapt_upper_bounds(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) {
+ if (bbr->ack_phase == NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_STARTING &&
+ bbr->round_start) {
+ bbr->ack_phase = NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_FEEDBACK;
+ }
+
+ if (bbr->ack_phase == NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_STOPPING &&
+ bbr->round_start) {
+ if (bbr_is_in_probe_bw_state(bbr) && !bbr->rst->rs.is_app_limited) {
+ bbr_advance_max_bw_filter(bbr);
+ }
+ }
+
+ if (!bbr_check_inflight_too_high(bbr, cstat, ts)) {
+ /* bbr->bw_hi never be updated */
+ if (bbr->inflight_hi == UINT64_MAX /* || bbr->bw_hi == UINT64_MAX */) {
+ return;
+ }
+
+ if (bbr->rst->rs.tx_in_flight > bbr->inflight_hi) {
+ bbr->inflight_hi = bbr->rst->rs.tx_in_flight;
+ }
+
+ if (cstat->delivery_rate_sec > bbr->bw_hi) {
+ bbr->bw_hi = cstat->delivery_rate_sec;
+ }
+
+ if (bbr->state == NGTCP2_BBR2_STATE_PROBE_BW_UP) {
+ bbr_probe_inflight_hi_upward(bbr, cstat, ack);
+ }
+ }
+}
+
+static int bbr_check_time_to_probe_bw(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ if (bbr_has_elapsed_in_phase(bbr, bbr->bw_probe_wait, ts) ||
+ bbr_is_reno_coexistence_probe_time(bbr, cstat)) {
+ bbr_start_probe_bw_refill(bbr);
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static void bbr_pick_probe_wait(ngtcp2_bbr2_cc *bbr) {
+ uint8_t rand;
+
+ bbr->rand(&rand, 1, &bbr->rand_ctx);
+
+ bbr->rounds_since_bw_probe = (uint64_t)(rand * 2 / 256);
+
+ bbr->rand(&rand, 1, &bbr->rand_ctx);
+
+ bbr->bw_probe_wait = 2 * NGTCP2_SECONDS +
+ (ngtcp2_tstamp)((double)rand / 255. * NGTCP2_SECONDS);
+}
+
+static int bbr_is_reno_coexistence_probe_time(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat) {
+ uint64_t reno_rounds =
+ bbr_target_inflight(bbr, cstat) / cstat->max_tx_udp_payload_size;
+
+ return bbr->rounds_since_bw_probe >= ngtcp2_min(reno_rounds, 63);
+}
+
+static uint64_t bbr_target_inflight(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat) {
+ uint64_t bdp = bbr_inflight(bbr, cstat, bbr->bw, 1.0);
+
+ return ngtcp2_min(bdp, cstat->cwnd);
+}
+
+static int bbr_check_inflight_too_high(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ if (is_inflight_too_high(&bbr->rst->rs)) {
+ if (bbr->bw_probe_samples) {
+ bbr_handle_inflight_too_high(bbr, cstat, &bbr->rst->rs, ts);
+ }
+
+ return 1;
+ }
+
+ return 0;
+}
+
+static int is_inflight_too_high(const ngtcp2_rs *rs) {
+ return rs->lost * NGTCP2_BBR_LOSS_THRESH_DENOM >
+ rs->tx_in_flight * NGTCP2_BBR_LOSS_THRESH_NUMER;
+}
+
+static void bbr_handle_inflight_too_high(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_rs *rs,
+ ngtcp2_tstamp ts) {
+ bbr->bw_probe_samples = 0;
+
+ if (!rs->is_app_limited) {
+ bbr->prior_inflight_hi = bbr->inflight_hi;
+
+ bbr->inflight_hi = ngtcp2_max(
+ rs->tx_in_flight, bbr_target_inflight(bbr, cstat) *
+ NGTCP2_BBR_BETA_NUMER / NGTCP2_BBR_BETA_DENOM);
+ }
+
+ if (bbr->state == NGTCP2_BBR2_STATE_PROBE_BW_UP) {
+ bbr_start_probe_bw_down(bbr, ts);
+ }
+}
+
+static void bbr_handle_lost_packet(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts) {
+ ngtcp2_rs rs = {0};
+
+ if (!bbr->bw_probe_samples) {
+ return;
+ }
+
+ rs.tx_in_flight = pkt->tx_in_flight;
+ /* bbr->rst->lost is not incremented for pkt yet */
+ rs.lost = bbr->rst->lost + pkt->pktlen - pkt->lost;
+ rs.is_app_limited = pkt->is_app_limited;
+
+ if (is_inflight_too_high(&rs)) {
+ rs.tx_in_flight = bbr_inflight_hi_from_lost_packet(bbr, &rs, pkt);
+
+ bbr_handle_inflight_too_high(bbr, cstat, &rs, ts);
+ }
+}
+
+static uint64_t bbr_inflight_hi_from_lost_packet(ngtcp2_bbr2_cc *bbr,
+ const ngtcp2_rs *rs,
+ const ngtcp2_cc_pkt *pkt) {
+ uint64_t inflight_prev, lost_prev, lost_prefix;
+ (void)bbr;
+
+ assert(rs->tx_in_flight >= pkt->pktlen);
+
+ inflight_prev = rs->tx_in_flight - pkt->pktlen;
+
+ assert(rs->lost >= pkt->pktlen);
+
+ lost_prev = rs->lost - pkt->pktlen;
+
+ if (inflight_prev * NGTCP2_BBR_LOSS_THRESH_NUMER <
+ lost_prev * NGTCP2_BBR_LOSS_THRESH_DENOM) {
+ return inflight_prev;
+ }
+
+ lost_prefix = (inflight_prev * NGTCP2_BBR_LOSS_THRESH_NUMER -
+ lost_prev * NGTCP2_BBR_LOSS_THRESH_DENOM) /
+ (NGTCP2_BBR_LOSS_THRESH_DENOM - NGTCP2_BBR_LOSS_THRESH_NUMER);
+
+ return inflight_prev + lost_prefix;
+}
+
+static void bbr_update_min_rtt(ngtcp2_bbr2_cc *bbr, const ngtcp2_cc_ack *ack,
+ ngtcp2_tstamp ts) {
+ int min_rtt_expired;
+
+ bbr->probe_rtt_expired =
+ ts > bbr->probe_rtt_min_stamp + NGTCP2_BBR_PROBE_RTT_INTERVAL;
+
+ if (ack->rtt != UINT64_MAX &&
+ (ack->rtt < bbr->probe_rtt_min_delay || bbr->probe_rtt_expired)) {
+ bbr->probe_rtt_min_delay = ack->rtt;
+ bbr->probe_rtt_min_stamp = ts;
+ }
+
+ min_rtt_expired = ts > bbr->min_rtt_stamp + NGTCP2_BBR_MIN_RTT_FILTERLEN;
+
+ if (bbr->probe_rtt_min_delay < bbr->min_rtt || min_rtt_expired) {
+ bbr->min_rtt = bbr->probe_rtt_min_delay;
+ bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp;
+
+ ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "bbr2 update min_rtt=%" PRIu64, bbr->min_rtt);
+ }
+}
+
+static void bbr_check_probe_rtt(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ if (bbr->state != NGTCP2_BBR2_STATE_PROBE_RTT && bbr->probe_rtt_expired &&
+ !bbr->idle_restart) {
+ bbr_enter_probe_rtt(bbr);
+ bbr_save_cwnd(bbr, cstat);
+
+ bbr->probe_rtt_done_stamp = UINT64_MAX;
+ bbr->ack_phase = NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_STOPPING;
+
+ bbr_start_round(bbr);
+ }
+
+ if (bbr->state == NGTCP2_BBR2_STATE_PROBE_RTT) {
+ bbr_handle_probe_rtt(bbr, cstat, ts);
+ }
+
+ if (bbr->rst->rs.delivered) {
+ bbr->idle_restart = 0;
+ }
+}
+
+static void bbr_enter_probe_rtt(ngtcp2_bbr2_cc *bbr) {
+ ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV, "bbr2 enter ProbeRTT");
+
+ bbr->state = NGTCP2_BBR2_STATE_PROBE_RTT;
+ bbr->pacing_gain = 1;
+ bbr->cwnd_gain = NGTCP2_BBR_PROBE_RTT_CWND_GAIN;
+}
+
+static void bbr_handle_probe_rtt(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ bbr_mark_connection_app_limited(bbr, cstat);
+
+ if (bbr->probe_rtt_done_stamp == UINT64_MAX &&
+ cstat->bytes_in_flight <= bbr_probe_rtt_cwnd(bbr, cstat)) {
+ bbr->probe_rtt_done_stamp = ts + NGTCP2_BBR_PROBE_RTT_DURATION;
+ bbr->probe_rtt_round_done = 0;
+
+ bbr_start_round(bbr);
+
+ return;
+ }
+
+ if (bbr->probe_rtt_done_stamp != UINT64_MAX) {
+ if (bbr->round_start) {
+ bbr->probe_rtt_round_done = 1;
+ }
+
+ if (bbr->probe_rtt_round_done) {
+ bbr_check_probe_rtt_done(bbr, cstat, ts);
+ }
+ }
+}
+
+static void bbr_check_probe_rtt_done(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ if (bbr->probe_rtt_done_stamp != UINT64_MAX &&
+ ts > bbr->probe_rtt_done_stamp) {
+ bbr->probe_rtt_min_stamp = ts;
+ bbr_restore_cwnd(bbr, cstat);
+ bbr_exit_probe_rtt(bbr, ts);
+ }
+}
+
+static void bbr_mark_connection_app_limited(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat) {
+ uint64_t app_limited = bbr->rst->delivered + cstat->bytes_in_flight;
+
+ if (app_limited) {
+ bbr->rst->app_limited = app_limited;
+ } else {
+ bbr->rst->app_limited = cstat->max_tx_udp_payload_size;
+ }
+}
+
+static void bbr_exit_probe_rtt(ngtcp2_bbr2_cc *bbr, ngtcp2_tstamp ts) {
+ bbr_reset_lower_bounds(bbr);
+
+ if (bbr->filled_pipe) {
+ bbr_start_probe_bw_down(bbr, ts);
+ bbr_start_probe_bw_cruise(bbr);
+ } else {
+ bbr_enter_startup(bbr);
+ }
+}
+
+static void bbr_handle_restart_from_idle(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ if (cstat->bytes_in_flight == 0 && bbr->rst->app_limited) {
+ ngtcp2_log_info(bbr->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "bbr2 restart from idle");
+
+ bbr->idle_restart = 1;
+ bbr->extra_acked_interval_start = ts;
+
+ if (bbr_is_in_probe_bw_state(bbr)) {
+ bbr_set_pacing_rate_with_gain(bbr, cstat, 1);
+ } else if (bbr->state == NGTCP2_BBR2_STATE_PROBE_RTT) {
+ bbr_check_probe_rtt_done(bbr, cstat, ts);
+ }
+ }
+}
+
+static uint64_t bbr_bdp_multiple(ngtcp2_bbr2_cc *bbr, uint64_t bw,
+ double gain) {
+ uint64_t bdp;
+
+ if (bbr->min_rtt == UINT64_MAX) {
+ return bbr->initial_cwnd;
+ }
+
+ bdp = bw * bbr->min_rtt / NGTCP2_SECONDS;
+
+ return (uint64_t)(gain * (double)bdp);
+}
+
+static uint64_t min_pipe_cwnd(size_t max_udp_payload_size) {
+ return max_udp_payload_size * 4;
+}
+
+static uint64_t bbr_quantization_budget(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ uint64_t inflight) {
+ bbr_update_offload_budget(bbr, cstat);
+
+ inflight = ngtcp2_max(inflight, bbr->offload_budget);
+ inflight =
+ ngtcp2_max(inflight, min_pipe_cwnd(cstat->max_tx_udp_payload_size));
+
+ if (bbr->state == NGTCP2_BBR2_STATE_PROBE_BW_UP) {
+ inflight += 2 * cstat->max_tx_udp_payload_size;
+ }
+
+ return inflight;
+}
+
+static uint64_t bbr_inflight(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ uint64_t bw, double gain) {
+ uint64_t inflight = bbr_bdp_multiple(bbr, bw, gain);
+
+ return bbr_quantization_budget(bbr, cstat, inflight);
+}
+
+static void bbr_update_max_inflight(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat) {
+ uint64_t inflight;
+
+ /* Not documented */
+ /* bbr_update_aggregation_budget(bbr); */
+
+ inflight = bbr_bdp_multiple(bbr, bbr->bw, bbr->cwnd_gain) + bbr->extra_acked;
+ bbr->max_inflight = bbr_quantization_budget(bbr, cstat, inflight);
+}
+
+static void bbr_update_offload_budget(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat) {
+ bbr->offload_budget = 3 * cstat->send_quantum;
+}
+
+static void bbr_advance_max_bw_filter(ngtcp2_bbr2_cc *bbr) {
+ ++bbr->cycle_count;
+}
+
+static void bbr_modulate_cwnd_for_recovery(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack) {
+ if (ack->bytes_lost > 0) {
+ if (cstat->cwnd > ack->bytes_lost) {
+ cstat->cwnd -= ack->bytes_lost;
+ cstat->cwnd = ngtcp2_max(cstat->cwnd, 2 * cstat->max_tx_udp_payload_size);
+ } else {
+ cstat->cwnd = 2 * cstat->max_tx_udp_payload_size;
+ }
+ }
+
+ if (bbr->packet_conservation) {
+ cstat->cwnd =
+ ngtcp2_max(cstat->cwnd, cstat->bytes_in_flight + ack->bytes_delivered);
+ }
+}
+
+static void bbr_save_cwnd(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat) {
+ if (!bbr->in_loss_recovery && bbr->state != NGTCP2_BBR2_STATE_PROBE_RTT) {
+ bbr->prior_cwnd = cstat->cwnd;
+ return;
+ }
+
+ bbr->prior_cwnd = ngtcp2_max(bbr->prior_cwnd, cstat->cwnd);
+}
+
+static void bbr_restore_cwnd(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat) {
+ cstat->cwnd = ngtcp2_max(cstat->cwnd, bbr->prior_cwnd);
+}
+
+static uint64_t bbr_probe_rtt_cwnd(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat) {
+ uint64_t probe_rtt_cwnd =
+ bbr_bdp_multiple(bbr, bbr->bw, NGTCP2_BBR_PROBE_RTT_CWND_GAIN);
+ uint64_t mpcwnd = min_pipe_cwnd(cstat->max_tx_udp_payload_size);
+
+ return ngtcp2_max(probe_rtt_cwnd, mpcwnd);
+}
+
+static void bbr_bound_cwnd_for_probe_rtt(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat) {
+ uint64_t probe_rtt_cwnd;
+
+ if (bbr->state == NGTCP2_BBR2_STATE_PROBE_RTT) {
+ probe_rtt_cwnd = bbr_probe_rtt_cwnd(bbr, cstat);
+
+ cstat->cwnd = ngtcp2_min(cstat->cwnd, probe_rtt_cwnd);
+ }
+}
+
+static void bbr_set_cwnd(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack) {
+ uint64_t mpcwnd;
+
+ bbr_update_max_inflight(bbr, cstat);
+ bbr_modulate_cwnd_for_recovery(bbr, cstat, ack);
+
+ if (!bbr->packet_conservation) {
+ if (bbr->filled_pipe) {
+ cstat->cwnd += ack->bytes_delivered;
+ cstat->cwnd = ngtcp2_min(cstat->cwnd, bbr->max_inflight);
+ } else if (cstat->cwnd < bbr->max_inflight ||
+ bbr->rst->delivered < bbr->initial_cwnd) {
+ cstat->cwnd += ack->bytes_delivered;
+ }
+
+ mpcwnd = min_pipe_cwnd(cstat->max_tx_udp_payload_size);
+ cstat->cwnd = ngtcp2_max(cstat->cwnd, mpcwnd);
+ }
+
+ bbr_bound_cwnd_for_probe_rtt(bbr, cstat);
+ bbr_bound_cwnd_for_model(bbr, cstat);
+}
+
+static void bbr_bound_cwnd_for_model(ngtcp2_bbr2_cc *bbr,
+ ngtcp2_conn_stat *cstat) {
+ uint64_t cap = UINT64_MAX;
+ uint64_t mpcwnd = min_pipe_cwnd(cstat->max_tx_udp_payload_size);
+
+ if (bbr_is_in_probe_bw_state(bbr) &&
+ bbr->state != NGTCP2_BBR2_STATE_PROBE_BW_CRUISE) {
+ cap = bbr->inflight_hi;
+ } else if (bbr->state == NGTCP2_BBR2_STATE_PROBE_RTT ||
+ bbr->state == NGTCP2_BBR2_STATE_PROBE_BW_CRUISE) {
+ cap = bbr_inflight_with_headroom(bbr, cstat);
+ }
+
+ cap = ngtcp2_min(cap, bbr->inflight_lo);
+ cap = ngtcp2_max(cap, mpcwnd);
+
+ cstat->cwnd = ngtcp2_min(cstat->cwnd, cap);
+}
+
+static void bbr_set_send_quantum(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat) {
+ size_t send_quantum =
+ (size_t)(cstat->pacing_rate * (double)(bbr->min_rtt == UINT64_MAX
+ ? NGTCP2_MILLISECONDS
+ : bbr->min_rtt));
+ (void)bbr;
+
+ cstat->send_quantum = ngtcp2_min(send_quantum, 64 * 1024);
+ cstat->send_quantum =
+ ngtcp2_max(cstat->send_quantum, cstat->max_tx_udp_payload_size * 10);
+}
+
+static int in_congestion_recovery(const ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp sent_time) {
+ return cstat->congestion_recovery_start_ts != UINT64_MAX &&
+ sent_time <= cstat->congestion_recovery_start_ts;
+}
+
+static void bbr_handle_recovery(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack) {
+ if (bbr->in_loss_recovery) {
+ if (ack->pkt_delivered >= bbr->congestion_recovery_next_round_delivered) {
+ bbr->packet_conservation = 0;
+ }
+
+ if (!in_congestion_recovery(cstat, ack->largest_acked_sent_ts)) {
+ bbr->in_loss_recovery = 0;
+ bbr->packet_conservation = 0;
+ bbr_restore_cwnd(bbr, cstat);
+ }
+
+ return;
+ }
+
+ if (bbr->congestion_recovery_start_ts != UINT64_MAX) {
+ bbr->in_loss_recovery = 1;
+ bbr_save_cwnd(bbr, cstat);
+ cstat->cwnd =
+ cstat->bytes_in_flight +
+ ngtcp2_max(ack->bytes_delivered, cstat->max_tx_udp_payload_size);
+
+ cstat->congestion_recovery_start_ts = bbr->congestion_recovery_start_ts;
+ bbr->congestion_recovery_start_ts = UINT64_MAX;
+ bbr->packet_conservation = 1;
+ bbr->congestion_recovery_next_round_delivered = bbr->rst->delivered;
+ bbr->prior_inflight_lo = bbr->inflight_lo;
+ bbr->prior_bw_lo = bbr->bw_lo;
+ }
+}
+
+static void bbr2_cc_init(ngtcp2_bbr2_cc *bbr, ngtcp2_conn_stat *cstat,
+ ngtcp2_rst *rst, ngtcp2_tstamp initial_ts,
+ ngtcp2_rand rand, const ngtcp2_rand_ctx *rand_ctx,
+ ngtcp2_log *log) {
+ bbr->ccb.log = log;
+ bbr->rst = rst;
+ bbr->rand = rand;
+ bbr->rand_ctx = *rand_ctx;
+ bbr->initial_cwnd = cstat->cwnd;
+
+ bbr_on_init(bbr, cstat, initial_ts);
+}
+
+static void bbr2_cc_free(ngtcp2_bbr2_cc *bbr) { (void)bbr; }
+
+static void bbr2_cc_on_pkt_acked(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts) {
+ (void)ccx;
+ (void)cstat;
+ (void)pkt;
+ (void)ts;
+}
+
+static void bbr2_cc_on_pkt_lost(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts) {
+ ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr2_cc, ccb);
+
+ bbr_update_on_loss(bbr, cstat, pkt, ts);
+}
+
+static void bbr2_cc_congestion_event(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp sent_ts, ngtcp2_tstamp ts) {
+ ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr2_cc, ccb);
+
+ if (!bbr->filled_pipe || bbr->in_loss_recovery ||
+ bbr->congestion_recovery_start_ts != UINT64_MAX ||
+ in_congestion_recovery(cstat, sent_ts)) {
+ return;
+ }
+
+ bbr->congestion_recovery_start_ts = ts;
+}
+
+static void bbr2_cc_on_spurious_congestion(ngtcp2_cc *ccx,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr2_cc, ccb);
+ (void)ts;
+
+ bbr->congestion_recovery_start_ts = UINT64_MAX;
+ cstat->congestion_recovery_start_ts = UINT64_MAX;
+
+ if (bbr->in_loss_recovery) {
+ bbr->in_loss_recovery = 0;
+ bbr->packet_conservation = 0;
+ bbr_restore_cwnd(bbr, cstat);
+ bbr->full_bw_count = 0;
+ bbr->loss_in_round = 0;
+ bbr->inflight_lo = ngtcp2_max(bbr->inflight_lo, bbr->prior_inflight_lo);
+ bbr->inflight_hi = ngtcp2_max(bbr->inflight_hi, bbr->prior_inflight_hi);
+ bbr->bw_lo = ngtcp2_max(bbr->bw_lo, bbr->prior_bw_lo);
+ }
+}
+
+static void bbr2_cc_on_persistent_congestion(ngtcp2_cc *ccx,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr2_cc, ccb);
+ (void)ts;
+
+ cstat->congestion_recovery_start_ts = UINT64_MAX;
+ bbr->congestion_recovery_start_ts = UINT64_MAX;
+ bbr->in_loss_recovery = 0;
+ bbr->packet_conservation = 0;
+
+ bbr_save_cwnd(bbr, cstat);
+ cstat->cwnd = cstat->bytes_in_flight + cstat->max_tx_udp_payload_size;
+ cstat->cwnd =
+ ngtcp2_max(cstat->cwnd, min_pipe_cwnd(cstat->max_tx_udp_payload_size));
+}
+
+static void bbr2_cc_on_ack_recv(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) {
+ ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr2_cc, ccb);
+
+ bbr_update_on_ack(bbr, cstat, ack, ts);
+}
+
+static void bbr2_cc_on_pkt_sent(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt) {
+ ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr2_cc, ccb);
+
+ bbr_on_transmit(bbr, cstat, pkt->sent_ts);
+}
+
+static void bbr2_cc_new_rtt_sample(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ (void)ccx;
+ (void)cstat;
+ (void)ts;
+}
+
+static void bbr2_cc_reset(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(ccx->ccb, ngtcp2_bbr2_cc, ccb);
+
+ bbr_on_init(bbr, cstat, ts);
+}
+
+static void bbr2_cc_event(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ ngtcp2_cc_event_type event, ngtcp2_tstamp ts) {
+ (void)ccx;
+ (void)cstat;
+ (void)event;
+ (void)ts;
+}
+
+int ngtcp2_cc_bbr2_cc_init(ngtcp2_cc *cc, ngtcp2_log *log,
+ ngtcp2_conn_stat *cstat, ngtcp2_rst *rst,
+ ngtcp2_tstamp initial_ts, ngtcp2_rand rand,
+ const ngtcp2_rand_ctx *rand_ctx,
+ const ngtcp2_mem *mem) {
+ ngtcp2_bbr2_cc *bbr;
+
+ bbr = ngtcp2_mem_calloc(mem, 1, sizeof(ngtcp2_bbr2_cc));
+ if (bbr == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ bbr2_cc_init(bbr, cstat, rst, initial_ts, rand, rand_ctx, log);
+
+ cc->ccb = &bbr->ccb;
+ cc->on_pkt_acked = bbr2_cc_on_pkt_acked;
+ cc->on_pkt_lost = bbr2_cc_on_pkt_lost;
+ cc->congestion_event = bbr2_cc_congestion_event;
+ cc->on_spurious_congestion = bbr2_cc_on_spurious_congestion;
+ cc->on_persistent_congestion = bbr2_cc_on_persistent_congestion;
+ cc->on_ack_recv = bbr2_cc_on_ack_recv;
+ cc->on_pkt_sent = bbr2_cc_on_pkt_sent;
+ cc->new_rtt_sample = bbr2_cc_new_rtt_sample;
+ cc->reset = bbr2_cc_reset;
+ cc->event = bbr2_cc_event;
+
+ return 0;
+}
+
+void ngtcp2_cc_bbr2_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem) {
+ ngtcp2_bbr2_cc *bbr = ngtcp2_struct_of(cc->ccb, ngtcp2_bbr2_cc, ccb);
+
+ bbr2_cc_free(bbr);
+ ngtcp2_mem_free(mem, bbr);
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.h
new file mode 100644
index 0000000..50dc05a
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_bbr2.h
@@ -0,0 +1,149 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2021 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_BBR2_H
+#define NGTCP2_BBR2_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_cc.h"
+#include "ngtcp2_window_filter.h"
+
+typedef struct ngtcp2_rst ngtcp2_rst;
+
+typedef enum ngtcp2_bbr2_state {
+ NGTCP2_BBR2_STATE_STARTUP,
+ NGTCP2_BBR2_STATE_DRAIN,
+ NGTCP2_BBR2_STATE_PROBE_BW_DOWN,
+ NGTCP2_BBR2_STATE_PROBE_BW_CRUISE,
+ NGTCP2_BBR2_STATE_PROBE_BW_REFILL,
+ NGTCP2_BBR2_STATE_PROBE_BW_UP,
+ NGTCP2_BBR2_STATE_PROBE_RTT,
+} ngtcp2_bbr2_state;
+
+typedef enum ngtcp2_bbr2_ack_phase {
+ NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_STARTING,
+ NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_STOPPING,
+ NGTCP2_BBR2_ACK_PHASE_ACKS_PROBE_FEEDBACK,
+ NGTCP2_BBR2_ACK_PHASE_ACKS_REFILLING,
+} ngtcp2_bbr2_ack_phase;
+
+/*
+ * ngtcp2_bbr2_cc is BBR v2 congestion controller, described in
+ * https://datatracker.ietf.org/doc/html/draft-cardwell-iccrg-bbr-congestion-control-01
+ */
+typedef struct ngtcp2_bbr2_cc {
+ ngtcp2_cc_base ccb;
+
+ uint64_t initial_cwnd;
+ ngtcp2_rst *rst;
+ ngtcp2_rand rand;
+ ngtcp2_rand_ctx rand_ctx;
+
+ /* max_bw_filter for tracking the maximum recent delivery rate
+ samples for estimating max_bw. */
+ ngtcp2_window_filter max_bw_filter;
+
+ ngtcp2_window_filter extra_acked_filter;
+
+ ngtcp2_duration min_rtt;
+ ngtcp2_tstamp min_rtt_stamp;
+ ngtcp2_tstamp probe_rtt_done_stamp;
+ int probe_rtt_round_done;
+ uint64_t prior_cwnd;
+ int idle_restart;
+ ngtcp2_tstamp extra_acked_interval_start;
+ uint64_t extra_acked_delivered;
+
+ /* Congestion signals */
+ int loss_in_round;
+ uint64_t bw_latest;
+ uint64_t inflight_latest;
+
+ /* Lower bounds */
+ uint64_t bw_lo;
+ uint64_t inflight_lo;
+
+ /* Round counting */
+ uint64_t next_round_delivered;
+ int round_start;
+ uint64_t round_count;
+
+ /* Full pipe */
+ int filled_pipe;
+ uint64_t full_bw;
+ size_t full_bw_count;
+
+ /* Pacing rate */
+ double pacing_gain;
+
+ ngtcp2_bbr2_state state;
+ double cwnd_gain;
+
+ int loss_round_start;
+ uint64_t loss_round_delivered;
+ uint64_t rounds_since_bw_probe;
+ uint64_t max_bw;
+ uint64_t bw;
+ uint64_t cycle_count;
+ uint64_t extra_acked;
+ uint64_t bytes_lost_in_round;
+ size_t loss_events_in_round;
+ uint64_t offload_budget;
+ uint64_t probe_up_cnt;
+ ngtcp2_tstamp cycle_stamp;
+ ngtcp2_bbr2_ack_phase ack_phase;
+ ngtcp2_duration bw_probe_wait;
+ int bw_probe_samples;
+ size_t bw_probe_up_rounds;
+ uint64_t bw_probe_up_acks;
+ uint64_t inflight_hi;
+ uint64_t bw_hi;
+ int probe_rtt_expired;
+ ngtcp2_duration probe_rtt_min_delay;
+ ngtcp2_tstamp probe_rtt_min_stamp;
+ int in_loss_recovery;
+ int packet_conservation;
+ uint64_t max_inflight;
+ ngtcp2_tstamp congestion_recovery_start_ts;
+ uint64_t congestion_recovery_next_round_delivered;
+
+ uint64_t prior_inflight_lo;
+ uint64_t prior_inflight_hi;
+ uint64_t prior_bw_lo;
+} ngtcp2_bbr2_cc;
+
+int ngtcp2_cc_bbr2_cc_init(ngtcp2_cc *cc, ngtcp2_log *log,
+ ngtcp2_conn_stat *cstat, ngtcp2_rst *rst,
+ ngtcp2_tstamp initial_ts, ngtcp2_rand rand,
+ const ngtcp2_rand_ctx *rand_ctx,
+ const ngtcp2_mem *mem);
+
+void ngtcp2_cc_bbr2_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem);
+
+#endif /* NGTCP2_BBR2_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.c
new file mode 100644
index 0000000..75326d6
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.c
@@ -0,0 +1,56 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_buf.h"
+#include "ngtcp2_mem.h"
+
+void ngtcp2_buf_init(ngtcp2_buf *buf, uint8_t *begin, size_t len) {
+ buf->begin = buf->pos = buf->last = begin;
+ buf->end = begin + len;
+}
+
+void ngtcp2_buf_reset(ngtcp2_buf *buf) { buf->pos = buf->last = buf->begin; }
+
+size_t ngtcp2_buf_cap(const ngtcp2_buf *buf) {
+ return (size_t)(buf->end - buf->begin);
+}
+
+int ngtcp2_buf_chain_new(ngtcp2_buf_chain **pbufchain, size_t len,
+ const ngtcp2_mem *mem) {
+ *pbufchain = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_buf_chain) + len);
+ if (*pbufchain == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ (*pbufchain)->next = NULL;
+
+ ngtcp2_buf_init(&(*pbufchain)->buf,
+ (uint8_t *)(*pbufchain) + sizeof(ngtcp2_buf_chain), len);
+
+ return 0;
+}
+
+void ngtcp2_buf_chain_del(ngtcp2_buf_chain *bufchain, const ngtcp2_mem *mem) {
+ ngtcp2_mem_free(mem, bufchain);
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.h
new file mode 100644
index 0000000..107d413
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_buf.h
@@ -0,0 +1,108 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_BUF_H
+#define NGTCP2_BUF_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+typedef struct ngtcp2_buf {
+ /* begin points to the beginning of the buffer. */
+ uint8_t *begin;
+ /* end points to the one beyond of the last byte of the buffer */
+ uint8_t *end;
+ /* pos pointers to the start of data. Typically, this points to the
+ point that next data should be read. Initially, it points to
+ |begin|. */
+ uint8_t *pos;
+ /* last points to the one beyond of the last data of the buffer.
+ Typically, new data is written at this point. Initially, it
+ points to |begin|. */
+ uint8_t *last;
+} ngtcp2_buf;
+
+/*
+ * ngtcp2_buf_init initializes |buf| with the given buffer.
+ */
+void ngtcp2_buf_init(ngtcp2_buf *buf, uint8_t *begin, size_t len);
+
+/*
+ * ngtcp2_buf_reset resets pos and last fields to match begin field to
+ * make ngtcp2_buf_len(buf) return 0.
+ */
+void ngtcp2_buf_reset(ngtcp2_buf *buf);
+
+/*
+ * ngtcp2_buf_left returns the number of additional bytes which can be
+ * written to the underlying buffer. In other words, it returns
+ * buf->end - buf->last.
+ */
+#define ngtcp2_buf_left(BUF) (size_t)((BUF)->end - (BUF)->last)
+
+/*
+ * ngtcp2_buf_len returns the number of bytes left to read. In other
+ * words, it returns buf->last - buf->pos.
+ */
+#define ngtcp2_buf_len(BUF) (size_t)((BUF)->last - (BUF)->pos)
+
+/*
+ * ngtcp2_buf_cap returns the capacity of the buffer. In other words,
+ * it returns buf->end - buf->begin.
+ */
+size_t ngtcp2_buf_cap(const ngtcp2_buf *buf);
+
+/*
+ * ngtcp2_buf_chain is a linked list of ngtcp2_buf.
+ */
+typedef struct ngtcp2_buf_chain ngtcp2_buf_chain;
+
+struct ngtcp2_buf_chain {
+ ngtcp2_buf_chain *next;
+ ngtcp2_buf buf;
+};
+
+/*
+ * ngtcp2_buf_chain_new creates new ngtcp2_buf_chain and initializes
+ * the internal buffer with |len| bytes space.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+int ngtcp2_buf_chain_new(ngtcp2_buf_chain **pbufchain, size_t len,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_buf_chain_del deletes the resource allocated by |bufchain|.
+ * It also deletes the memory pointed by |bufchain|.
+ */
+void ngtcp2_buf_chain_del(ngtcp2_buf_chain *bufchain, const ngtcp2_mem *mem);
+
+#endif /* NGTCP2_BUF_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.c
new file mode 100644
index 0000000..0536639
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.c
@@ -0,0 +1,615 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2018 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_cc.h"
+
+#include <assert.h>
+
+#if defined(_MSC_VER)
+# include <intrin.h>
+#endif
+
+#include "ngtcp2_log.h"
+#include "ngtcp2_macro.h"
+#include "ngtcp2_mem.h"
+#include "ngtcp2_rcvry.h"
+
+uint64_t ngtcp2_cc_compute_initcwnd(size_t max_udp_payload_size) {
+ uint64_t n = 2 * max_udp_payload_size;
+ n = ngtcp2_max(n, 14720);
+ return ngtcp2_min(10 * max_udp_payload_size, n);
+}
+
+ngtcp2_cc_pkt *ngtcp2_cc_pkt_init(ngtcp2_cc_pkt *pkt, int64_t pkt_num,
+ size_t pktlen, ngtcp2_pktns_id pktns_id,
+ ngtcp2_tstamp sent_ts, uint64_t lost,
+ uint64_t tx_in_flight, int is_app_limited) {
+ pkt->pkt_num = pkt_num;
+ pkt->pktlen = pktlen;
+ pkt->pktns_id = pktns_id;
+ pkt->sent_ts = sent_ts;
+ pkt->lost = lost;
+ pkt->tx_in_flight = tx_in_flight;
+ pkt->is_app_limited = is_app_limited;
+
+ return pkt;
+}
+
+static void reno_cc_reset(ngtcp2_reno_cc *cc) {
+ cc->max_delivery_rate_sec = 0;
+ cc->target_cwnd = 0;
+ cc->pending_add = 0;
+}
+
+void ngtcp2_reno_cc_init(ngtcp2_reno_cc *cc, ngtcp2_log *log) {
+ cc->ccb.log = log;
+ reno_cc_reset(cc);
+}
+
+void ngtcp2_reno_cc_free(ngtcp2_reno_cc *cc) { (void)cc; }
+
+int ngtcp2_cc_reno_cc_init(ngtcp2_cc *cc, ngtcp2_log *log,
+ const ngtcp2_mem *mem) {
+ ngtcp2_reno_cc *reno_cc;
+
+ reno_cc = ngtcp2_mem_calloc(mem, 1, sizeof(ngtcp2_reno_cc));
+ if (reno_cc == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ ngtcp2_reno_cc_init(reno_cc, log);
+
+ cc->ccb = &reno_cc->ccb;
+ cc->on_pkt_acked = ngtcp2_cc_reno_cc_on_pkt_acked;
+ cc->congestion_event = ngtcp2_cc_reno_cc_congestion_event;
+ cc->on_persistent_congestion = ngtcp2_cc_reno_cc_on_persistent_congestion;
+ cc->on_ack_recv = ngtcp2_cc_reno_cc_on_ack_recv;
+ cc->reset = ngtcp2_cc_reno_cc_reset;
+
+ return 0;
+}
+
+void ngtcp2_cc_reno_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem) {
+ ngtcp2_reno_cc *reno_cc = ngtcp2_struct_of(cc->ccb, ngtcp2_reno_cc, ccb);
+
+ ngtcp2_reno_cc_free(reno_cc);
+ ngtcp2_mem_free(mem, reno_cc);
+}
+
+static int in_congestion_recovery(const ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp sent_time) {
+ return cstat->congestion_recovery_start_ts != UINT64_MAX &&
+ sent_time <= cstat->congestion_recovery_start_ts;
+}
+
+void ngtcp2_cc_reno_cc_on_pkt_acked(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt,
+ ngtcp2_tstamp ts) {
+ ngtcp2_reno_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_reno_cc, ccb);
+ uint64_t m;
+ (void)ts;
+
+ if (in_congestion_recovery(cstat, pkt->sent_ts)) {
+ return;
+ }
+
+ if (cc->target_cwnd && cc->target_cwnd < cstat->cwnd) {
+ return;
+ }
+
+ if (cstat->cwnd < cstat->ssthresh) {
+ cstat->cwnd += pkt->pktlen;
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "pkn=%" PRId64 " acked, slow start cwnd=%" PRIu64,
+ pkt->pkt_num, cstat->cwnd);
+ return;
+ }
+
+ m = cstat->max_tx_udp_payload_size * pkt->pktlen + cc->pending_add;
+ cc->pending_add = m % cstat->cwnd;
+
+ cstat->cwnd += m / cstat->cwnd;
+}
+
+void ngtcp2_cc_reno_cc_congestion_event(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp sent_ts,
+ ngtcp2_tstamp ts) {
+ ngtcp2_reno_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_reno_cc, ccb);
+ uint64_t min_cwnd;
+
+ if (in_congestion_recovery(cstat, sent_ts)) {
+ return;
+ }
+
+ cstat->congestion_recovery_start_ts = ts;
+ cstat->cwnd >>= NGTCP2_LOSS_REDUCTION_FACTOR_BITS;
+ min_cwnd = 2 * cstat->max_tx_udp_payload_size;
+ cstat->cwnd = ngtcp2_max(cstat->cwnd, min_cwnd);
+ cstat->ssthresh = cstat->cwnd;
+
+ cc->pending_add = 0;
+
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "reduce cwnd because of packet loss cwnd=%" PRIu64,
+ cstat->cwnd);
+}
+
+void ngtcp2_cc_reno_cc_on_persistent_congestion(ngtcp2_cc *ccx,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ (void)ccx;
+ (void)ts;
+
+ cstat->cwnd = 2 * cstat->max_tx_udp_payload_size;
+ cstat->congestion_recovery_start_ts = UINT64_MAX;
+}
+
+void ngtcp2_cc_reno_cc_on_ack_recv(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts) {
+ ngtcp2_reno_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_reno_cc, ccb);
+ uint64_t target_cwnd, initcwnd;
+ (void)ack;
+ (void)ts;
+
+ /* TODO Use sliding window for min rtt measurement */
+ /* TODO Use sliding window */
+ cc->max_delivery_rate_sec =
+ ngtcp2_max(cc->max_delivery_rate_sec, cstat->delivery_rate_sec);
+
+ if (cstat->min_rtt != UINT64_MAX && cc->max_delivery_rate_sec) {
+ target_cwnd = cc->max_delivery_rate_sec * cstat->min_rtt / NGTCP2_SECONDS;
+ initcwnd = ngtcp2_cc_compute_initcwnd(cstat->max_tx_udp_payload_size);
+ cc->target_cwnd = ngtcp2_max(initcwnd, target_cwnd) * 289 / 100;
+
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "target_cwnd=%" PRIu64 " max_delivery_rate_sec=%" PRIu64
+ " min_rtt=%" PRIu64,
+ cc->target_cwnd, cc->max_delivery_rate_sec, cstat->min_rtt);
+ }
+}
+
+void ngtcp2_cc_reno_cc_reset(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ ngtcp2_reno_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_reno_cc, ccb);
+ (void)cstat;
+ (void)ts;
+
+ reno_cc_reset(cc);
+}
+
+static void cubic_cc_reset(ngtcp2_cubic_cc *cc) {
+ cc->max_delivery_rate_sec = 0;
+ cc->target_cwnd = 0;
+ cc->w_last_max = 0;
+ cc->w_tcp = 0;
+ cc->origin_point = 0;
+ cc->epoch_start = UINT64_MAX;
+ cc->k = 0;
+
+ cc->prior.cwnd = 0;
+ cc->prior.ssthresh = 0;
+ cc->prior.w_last_max = 0;
+ cc->prior.w_tcp = 0;
+ cc->prior.origin_point = 0;
+ cc->prior.epoch_start = UINT64_MAX;
+ cc->prior.k = 0;
+
+ cc->rtt_sample_count = 0;
+ cc->current_round_min_rtt = UINT64_MAX;
+ cc->last_round_min_rtt = UINT64_MAX;
+ cc->window_end = -1;
+}
+
+void ngtcp2_cubic_cc_init(ngtcp2_cubic_cc *cc, ngtcp2_log *log) {
+ cc->ccb.log = log;
+ cubic_cc_reset(cc);
+}
+
+void ngtcp2_cubic_cc_free(ngtcp2_cubic_cc *cc) { (void)cc; }
+
+int ngtcp2_cc_cubic_cc_init(ngtcp2_cc *cc, ngtcp2_log *log,
+ const ngtcp2_mem *mem) {
+ ngtcp2_cubic_cc *cubic_cc;
+
+ cubic_cc = ngtcp2_mem_calloc(mem, 1, sizeof(ngtcp2_cubic_cc));
+ if (cubic_cc == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ ngtcp2_cubic_cc_init(cubic_cc, log);
+
+ cc->ccb = &cubic_cc->ccb;
+ cc->on_pkt_acked = ngtcp2_cc_cubic_cc_on_pkt_acked;
+ cc->congestion_event = ngtcp2_cc_cubic_cc_congestion_event;
+ cc->on_spurious_congestion = ngtcp2_cc_cubic_cc_on_spurious_congestion;
+ cc->on_persistent_congestion = ngtcp2_cc_cubic_cc_on_persistent_congestion;
+ cc->on_ack_recv = ngtcp2_cc_cubic_cc_on_ack_recv;
+ cc->on_pkt_sent = ngtcp2_cc_cubic_cc_on_pkt_sent;
+ cc->new_rtt_sample = ngtcp2_cc_cubic_cc_new_rtt_sample;
+ cc->reset = ngtcp2_cc_cubic_cc_reset;
+ cc->event = ngtcp2_cc_cubic_cc_event;
+
+ return 0;
+}
+
+void ngtcp2_cc_cubic_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem) {
+ ngtcp2_cubic_cc *cubic_cc = ngtcp2_struct_of(cc->ccb, ngtcp2_cubic_cc, ccb);
+
+ ngtcp2_cubic_cc_free(cubic_cc);
+ ngtcp2_mem_free(mem, cubic_cc);
+}
+
+static uint64_t ngtcp2_cbrt(uint64_t n) {
+ int d;
+ uint64_t a;
+
+ if (n == 0) {
+ return 0;
+ }
+
+#if defined(_MSC_VER)
+# if defined(_M_X64)
+ d = (int)__lzcnt64(n);
+# elif defined(_M_ARM64)
+ {
+ unsigned long index;
+ d = sizeof(uint64_t) * CHAR_BIT;
+ if (_BitScanReverse64(&index, n)) {
+ d = d - 1 - index;
+ }
+ }
+# else
+ if ((n >> 32) != 0) {
+ d = __lzcnt((unsigned int)(n >> 32));
+ } else {
+ d = 32 + __lzcnt((unsigned int)n);
+ }
+# endif
+#else
+ d = __builtin_clzll(n);
+#endif
+ a = 1ULL << ((64 - d) / 3 + 1);
+
+ for (; a * a * a > n;) {
+ a = (2 * a + n / a / a) / 3;
+ }
+ return a;
+}
+
+/* HyStart++ constants */
+#define NGTCP2_HS_MIN_SSTHRESH 16
+#define NGTCP2_HS_N_RTT_SAMPLE 8
+#define NGTCP2_HS_MIN_ETA (4 * NGTCP2_MILLISECONDS)
+#define NGTCP2_HS_MAX_ETA (16 * NGTCP2_MILLISECONDS)
+
+void ngtcp2_cc_cubic_cc_on_pkt_acked(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt,
+ ngtcp2_tstamp ts) {
+ ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb);
+ ngtcp2_duration t, eta;
+ uint64_t target, cwnd_thres;
+ uint64_t tx, kx, time_delta, delta;
+ uint64_t add, tcp_add;
+ uint64_t m;
+
+ if (pkt->pktns_id == NGTCP2_PKTNS_ID_APPLICATION && cc->window_end != -1 &&
+ cc->window_end <= pkt->pkt_num) {
+ cc->window_end = -1;
+ }
+
+ if (in_congestion_recovery(cstat, pkt->sent_ts)) {
+ return;
+ }
+
+ if (cc->target_cwnd && cc->target_cwnd < cstat->cwnd) {
+ return;
+ }
+
+ if (cstat->cwnd < cstat->ssthresh) {
+ /* slow-start */
+ cstat->cwnd += pkt->pktlen;
+
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "pkn=%" PRId64 " acked, slow start cwnd=%" PRIu64,
+ pkt->pkt_num, cstat->cwnd);
+
+ if (cc->last_round_min_rtt != UINT64_MAX &&
+ cc->current_round_min_rtt != UINT64_MAX &&
+ cstat->cwnd >=
+ NGTCP2_HS_MIN_SSTHRESH * cstat->max_tx_udp_payload_size &&
+ cc->rtt_sample_count >= NGTCP2_HS_N_RTT_SAMPLE) {
+ eta = cc->last_round_min_rtt / 8;
+
+ if (eta < NGTCP2_HS_MIN_ETA) {
+ eta = NGTCP2_HS_MIN_ETA;
+ } else if (eta > NGTCP2_HS_MAX_ETA) {
+ eta = NGTCP2_HS_MAX_ETA;
+ }
+
+ if (cc->current_round_min_rtt >= cc->last_round_min_rtt + eta) {
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "HyStart++ exit slow start");
+
+ cc->w_last_max = cstat->cwnd;
+ cstat->ssthresh = cstat->cwnd;
+ }
+ }
+
+ return;
+ }
+
+ /* congestion avoidance */
+
+ if (cc->epoch_start == UINT64_MAX) {
+ cc->epoch_start = ts;
+ if (cstat->cwnd < cc->w_last_max) {
+ cc->k = ngtcp2_cbrt((cc->w_last_max - cstat->cwnd) * 10 / 4 /
+ cstat->max_tx_udp_payload_size);
+ cc->origin_point = cc->w_last_max;
+ } else {
+ cc->k = 0;
+ cc->origin_point = cstat->cwnd;
+ }
+
+ cc->w_tcp = cstat->cwnd;
+
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "cubic-ca epoch_start=%" PRIu64 " k=%" PRIu64
+ " origin_point=%" PRIu64,
+ cc->epoch_start, cc->k, cc->origin_point);
+
+ cc->pending_add = 0;
+ cc->pending_w_add = 0;
+ }
+
+ t = ts - cc->epoch_start;
+
+ tx = (t << 10) / NGTCP2_SECONDS;
+ kx = (cc->k << 10);
+
+ if (tx > kx) {
+ time_delta = tx - kx;
+ } else {
+ time_delta = kx - tx;
+ }
+
+ delta = cstat->max_tx_udp_payload_size *
+ ((((time_delta * time_delta) >> 10) * time_delta) >> 10) * 4 / 10;
+ delta >>= 10;
+
+ if (tx > kx) {
+ target = cc->origin_point + delta;
+ } else {
+ target = cc->origin_point - delta;
+ }
+
+ cwnd_thres =
+ (target * (((t + cstat->smoothed_rtt) << 10) / NGTCP2_SECONDS)) >> 10;
+ if (cwnd_thres < cstat->cwnd) {
+ target = cstat->cwnd;
+ } else if (2 * cwnd_thres > 3 * cstat->cwnd) {
+ target = cstat->cwnd * 3 / 2;
+ } else {
+ target = cwnd_thres;
+ }
+
+ if (target > cstat->cwnd) {
+ m = cc->pending_add +
+ cstat->max_tx_udp_payload_size * (target - cstat->cwnd);
+ add = m / cstat->cwnd;
+ cc->pending_add = m % cstat->cwnd;
+ } else {
+ m = cc->pending_add + cstat->max_tx_udp_payload_size;
+ add = m / (100 * cstat->cwnd);
+ cc->pending_add = m % (100 * cstat->cwnd);
+ }
+
+ m = cc->pending_w_add + cstat->max_tx_udp_payload_size * pkt->pktlen;
+
+ cc->w_tcp += m / cstat->cwnd;
+ cc->pending_w_add = m % cstat->cwnd;
+
+ if (cc->w_tcp > cstat->cwnd) {
+ tcp_add = cstat->max_tx_udp_payload_size * (cc->w_tcp - cstat->cwnd) /
+ cstat->cwnd;
+ if (tcp_add > add) {
+ add = tcp_add;
+ }
+ }
+
+ cstat->cwnd += add;
+
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "pkn=%" PRId64 " acked, cubic-ca cwnd=%" PRIu64 " t=%" PRIu64
+ " k=%" PRIi64 " time_delta=%" PRIu64 " delta=%" PRIu64
+ " target=%" PRIu64 " w_tcp=%" PRIu64,
+ pkt->pkt_num, cstat->cwnd, t, cc->k, time_delta >> 4, delta,
+ target, cc->w_tcp);
+}
+
+void ngtcp2_cc_cubic_cc_congestion_event(ngtcp2_cc *ccx,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp sent_ts,
+ ngtcp2_tstamp ts) {
+ ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb);
+ uint64_t min_cwnd;
+
+ if (in_congestion_recovery(cstat, sent_ts)) {
+ return;
+ }
+
+ if (cc->prior.cwnd < cstat->cwnd) {
+ cc->prior.cwnd = cstat->cwnd;
+ cc->prior.ssthresh = cstat->ssthresh;
+ cc->prior.w_last_max = cc->w_last_max;
+ cc->prior.w_tcp = cc->w_tcp;
+ cc->prior.origin_point = cc->origin_point;
+ cc->prior.epoch_start = cc->epoch_start;
+ cc->prior.k = cc->k;
+ }
+
+ cstat->congestion_recovery_start_ts = ts;
+
+ cc->epoch_start = UINT64_MAX;
+ if (cstat->cwnd < cc->w_last_max) {
+ cc->w_last_max = cstat->cwnd * 17 / 10 / 2;
+ } else {
+ cc->w_last_max = cstat->cwnd;
+ }
+
+ min_cwnd = 2 * cstat->max_tx_udp_payload_size;
+ cstat->ssthresh = cstat->cwnd * 7 / 10;
+ cstat->ssthresh = ngtcp2_max(cstat->ssthresh, min_cwnd);
+ cstat->cwnd = cstat->ssthresh;
+
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "reduce cwnd because of packet loss cwnd=%" PRIu64,
+ cstat->cwnd);
+}
+
+void ngtcp2_cc_cubic_cc_on_spurious_congestion(ngtcp2_cc *ccx,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb);
+ (void)ts;
+
+ if (cstat->cwnd >= cc->prior.cwnd) {
+ return;
+ }
+
+ cstat->congestion_recovery_start_ts = UINT64_MAX;
+
+ cstat->cwnd = cc->prior.cwnd;
+ cstat->ssthresh = cc->prior.ssthresh;
+ cc->w_last_max = cc->prior.w_last_max;
+ cc->w_tcp = cc->prior.w_tcp;
+ cc->origin_point = cc->prior.origin_point;
+ cc->epoch_start = cc->prior.epoch_start;
+ cc->k = cc->prior.k;
+
+ cc->prior.cwnd = 0;
+ cc->prior.ssthresh = 0;
+ cc->prior.w_last_max = 0;
+ cc->prior.w_tcp = 0;
+ cc->prior.origin_point = 0;
+ cc->prior.epoch_start = UINT64_MAX;
+ cc->prior.k = 0;
+
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "spurious congestion is detected and congestion state is "
+ "restored cwnd=%" PRIu64,
+ cstat->cwnd);
+}
+
+void ngtcp2_cc_cubic_cc_on_persistent_congestion(ngtcp2_cc *ccx,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ (void)ccx;
+ (void)ts;
+
+ cstat->cwnd = 2 * cstat->max_tx_udp_payload_size;
+ cstat->congestion_recovery_start_ts = UINT64_MAX;
+}
+
+void ngtcp2_cc_cubic_cc_on_ack_recv(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack,
+ ngtcp2_tstamp ts) {
+ ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb);
+ uint64_t target_cwnd, initcwnd;
+ (void)ack;
+ (void)ts;
+
+ /* TODO Use sliding window for min rtt measurement */
+ /* TODO Use sliding window */
+ cc->max_delivery_rate_sec =
+ ngtcp2_max(cc->max_delivery_rate_sec, cstat->delivery_rate_sec);
+
+ if (cstat->min_rtt != UINT64_MAX && cc->max_delivery_rate_sec) {
+ target_cwnd = cc->max_delivery_rate_sec * cstat->min_rtt / NGTCP2_SECONDS;
+ initcwnd = ngtcp2_cc_compute_initcwnd(cstat->max_tx_udp_payload_size);
+ cc->target_cwnd = ngtcp2_max(initcwnd, target_cwnd) * 289 / 100;
+
+ ngtcp2_log_info(cc->ccb.log, NGTCP2_LOG_EVENT_RCV,
+ "target_cwnd=%" PRIu64 " max_delivery_rate_sec=%" PRIu64
+ " min_rtt=%" PRIu64,
+ cc->target_cwnd, cc->max_delivery_rate_sec, cstat->min_rtt);
+ }
+}
+
+void ngtcp2_cc_cubic_cc_on_pkt_sent(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt) {
+ ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb);
+ (void)cstat;
+
+ if (pkt->pktns_id != NGTCP2_PKTNS_ID_APPLICATION || cc->window_end != -1) {
+ return;
+ }
+
+ cc->window_end = pkt->pkt_num;
+ cc->last_round_min_rtt = cc->current_round_min_rtt;
+ cc->current_round_min_rtt = UINT64_MAX;
+ cc->rtt_sample_count = 0;
+}
+
+void ngtcp2_cc_cubic_cc_new_rtt_sample(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb);
+ (void)ts;
+
+ if (cc->window_end == -1) {
+ return;
+ }
+
+ cc->current_round_min_rtt =
+ ngtcp2_min(cc->current_round_min_rtt, cstat->latest_rtt);
+ ++cc->rtt_sample_count;
+}
+
+void ngtcp2_cc_cubic_cc_reset(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb);
+ (void)cstat;
+ (void)ts;
+
+ cubic_cc_reset(cc);
+}
+
+void ngtcp2_cc_cubic_cc_event(ngtcp2_cc *ccx, ngtcp2_conn_stat *cstat,
+ ngtcp2_cc_event_type event, ngtcp2_tstamp ts) {
+ ngtcp2_cubic_cc *cc = ngtcp2_struct_of(ccx->ccb, ngtcp2_cubic_cc, ccb);
+ ngtcp2_tstamp last_ts;
+
+ if (event != NGTCP2_CC_EVENT_TYPE_TX_START || cc->epoch_start == UINT64_MAX) {
+ return;
+ }
+
+ last_ts = cstat->last_tx_pkt_ts[NGTCP2_PKTNS_ID_APPLICATION];
+ if (last_ts == UINT64_MAX || last_ts <= cc->epoch_start) {
+ return;
+ }
+
+ assert(ts >= last_ts);
+
+ cc->epoch_start += ts - last_ts;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.h
new file mode 100644
index 0000000..6d9e0c2
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cc.h
@@ -0,0 +1,421 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2018 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_CC_H
+#define NGTCP2_CC_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#define NGTCP2_LOSS_REDUCTION_FACTOR_BITS 1
+#define NGTCP2_PERSISTENT_CONGESTION_THRESHOLD 3
+
+typedef struct ngtcp2_log ngtcp2_log;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_cc_base` is the base structure of custom congestion
+ * control algorithm. It must be the first field of custom congestion
+ * controller.
+ */
+typedef struct ngtcp2_cc_base {
+ /**
+ * :member:`log` is ngtcp2 library internal logger.
+ */
+ ngtcp2_log *log;
+} ngtcp2_cc_base;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_cc_pkt` is a convenient structure to include
+ * acked/lost/sent packet.
+ */
+typedef struct ngtcp2_cc_pkt {
+ /**
+ * :member:`pkt_num` is the packet number
+ */
+ int64_t pkt_num;
+ /**
+ * :member:`pktlen` is the length of packet.
+ */
+ size_t pktlen;
+ /**
+ * :member:`pktns_id` is the ID of packet number space which this
+ * packet belongs to.
+ */
+ ngtcp2_pktns_id pktns_id;
+ /**
+ * :member:`sent_ts` is the timestamp when packet is sent.
+ */
+ ngtcp2_tstamp sent_ts;
+ /**
+ * :member:`lost` is the number of bytes lost when this packet was
+ * sent.
+ */
+ uint64_t lost;
+ /**
+ * :member:`tx_in_flight` is the bytes in flight when this packet
+ * was sent.
+ */
+ uint64_t tx_in_flight;
+ /**
+ * :member:`is_app_limited` is nonzero if the connection is
+ * app-limited when this packet was sent.
+ */
+ int is_app_limited;
+} ngtcp2_cc_pkt;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_cc_ack` is a convenient structure which stores
+ * acknowledged and lost bytes.
+ */
+typedef struct ngtcp2_cc_ack {
+ /**
+ * :member:`prior_bytes_in_flight` is the in-flight bytes before
+ * processing this ACK.
+ */
+ uint64_t prior_bytes_in_flight;
+ /**
+ * :member:`bytes_delivered` is the number of bytes acknowledged.
+ */
+ uint64_t bytes_delivered;
+ /**
+ * :member:`bytes_lost` is the number of bytes declared lost.
+ */
+ uint64_t bytes_lost;
+ /**
+ * :member:`pkt_delivered` is the cumulative acknowledged bytes when
+ * the last packet acknowledged by this ACK was sent.
+ */
+ uint64_t pkt_delivered;
+ /**
+ * :member:`largest_acked_sent_ts` is the time when the largest
+ * acknowledged packet was sent.
+ */
+ ngtcp2_tstamp largest_acked_sent_ts;
+ /**
+ * :member:`rtt` is the RTT sample. It is UINT64_MAX if no RTT
+ * sample is available.
+ */
+ ngtcp2_duration rtt;
+} ngtcp2_cc_ack;
+
+typedef struct ngtcp2_cc ngtcp2_cc;
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_cc_on_pkt_acked` is a callback function which is
+ * called with an acknowledged packet.
+ */
+typedef void (*ngtcp2_cc_on_pkt_acked)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt,
+ ngtcp2_tstamp ts);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_cc_on_pkt_lost` is a callback function which is
+ * called with a lost packet.
+ */
+typedef void (*ngtcp2_cc_on_pkt_lost)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt,
+ ngtcp2_tstamp ts);
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_cc_congestion_event` is a callback function which is
+ * called when congestion event happens (e.g., when packet is lost).
+ */
+typedef void (*ngtcp2_cc_congestion_event)(ngtcp2_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp sent_ts,
+ ngtcp2_tstamp ts);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_cc_on_spurious_congestion` is a callback function
+ * which is called when a spurious congestion is detected.
+ */
+typedef void (*ngtcp2_cc_on_spurious_congestion)(ngtcp2_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_cc_on_persistent_congestion` is a callback function
+ * which is called when persistent congestion is established.
+ */
+typedef void (*ngtcp2_cc_on_persistent_congestion)(ngtcp2_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_cc_on_ack_recv` is a callback function which is
+ * called when an acknowledgement is received.
+ */
+typedef void (*ngtcp2_cc_on_ack_recv)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack,
+ ngtcp2_tstamp ts);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_cc_on_pkt_sent` is a callback function which is
+ * called when an ack-eliciting packet is sent.
+ */
+typedef void (*ngtcp2_cc_on_pkt_sent)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_cc_new_rtt_sample` is a callback function which is
+ * called when new RTT sample is obtained.
+ */
+typedef void (*ngtcp2_cc_new_rtt_sample)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_cc_reset` is a callback function which is called when
+ * congestion state must be reset.
+ */
+typedef void (*ngtcp2_cc_reset)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+/**
+ * @enum
+ *
+ * :type:`ngtcp2_cc_event_type` defines congestion control events.
+ */
+typedef enum ngtcp2_cc_event_type {
+ /**
+ * :enum:`NGTCP2_CC_EVENT_TX_START` occurs when ack-eliciting packet
+ * is sent and no other ack-eliciting packet is present.
+ */
+ NGTCP2_CC_EVENT_TYPE_TX_START
+} ngtcp2_cc_event_type;
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_cc_event` is a callback function which is called when
+ * a specific event happens.
+ */
+typedef void (*ngtcp2_cc_event)(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_cc_event_type event, ngtcp2_tstamp ts);
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_cc` is congestion control algorithm interface to
+ * allow custom implementation.
+ */
+typedef struct ngtcp2_cc {
+ /**
+ * :member:`ccb` is a pointer to :type:`ngtcp2_cc_base` which
+ * usually contains a state.
+ */
+ ngtcp2_cc_base *ccb;
+ /**
+ * :member:`on_pkt_acked` is a callback function which is called
+ * when a packet is acknowledged.
+ */
+ ngtcp2_cc_on_pkt_acked on_pkt_acked;
+ /**
+ * :member:`on_pkt_lost` is a callback function which is called when
+ * a packet is lost.
+ */
+ ngtcp2_cc_on_pkt_lost on_pkt_lost;
+ /**
+ * :member:`congestion_event` is a callback function which is called
+ * when congestion event happens (.e.g, packet is lost).
+ */
+ ngtcp2_cc_congestion_event congestion_event;
+ /**
+ * :member:`on_spurious_congestion` is a callback function which is
+ * called when a spurious congestion is detected.
+ */
+ ngtcp2_cc_on_spurious_congestion on_spurious_congestion;
+ /**
+ * :member:`on_persistent_congestion` is a callback function which
+ * is called when persistent congestion is established.
+ */
+ ngtcp2_cc_on_persistent_congestion on_persistent_congestion;
+ /**
+ * :member:`on_ack_recv` is a callback function which is called when
+ * an acknowledgement is received.
+ */
+ ngtcp2_cc_on_ack_recv on_ack_recv;
+ /**
+ * :member:`on_pkt_sent` is a callback function which is called when
+ * ack-eliciting packet is sent.
+ */
+ ngtcp2_cc_on_pkt_sent on_pkt_sent;
+ /**
+ * :member:`new_rtt_sample` is a callback function which is called
+ * when new RTT sample is obtained.
+ */
+ ngtcp2_cc_new_rtt_sample new_rtt_sample;
+ /**
+ * :member:`reset` is a callback function which is called when
+ * congestion control state must be reset.
+ */
+ ngtcp2_cc_reset reset;
+ /**
+ * :member:`event` is a callback function which is called when a
+ * specific event happens.
+ */
+ ngtcp2_cc_event event;
+} ngtcp2_cc;
+
+/*
+ * ngtcp2_cc_compute_initcwnd computes initial cwnd.
+ */
+uint64_t ngtcp2_cc_compute_initcwnd(size_t max_packet_size);
+
+ngtcp2_cc_pkt *ngtcp2_cc_pkt_init(ngtcp2_cc_pkt *pkt, int64_t pkt_num,
+ size_t pktlen, ngtcp2_pktns_id pktns_id,
+ ngtcp2_tstamp sent_ts, uint64_t lost,
+ uint64_t tx_in_flight, int is_app_limited);
+
+/* ngtcp2_reno_cc is the RENO congestion controller. */
+typedef struct ngtcp2_reno_cc {
+ ngtcp2_cc_base ccb;
+ uint64_t max_delivery_rate_sec;
+ uint64_t target_cwnd;
+ uint64_t pending_add;
+} ngtcp2_reno_cc;
+
+int ngtcp2_cc_reno_cc_init(ngtcp2_cc *cc, ngtcp2_log *log,
+ const ngtcp2_mem *mem);
+
+void ngtcp2_cc_reno_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem);
+
+void ngtcp2_reno_cc_init(ngtcp2_reno_cc *cc, ngtcp2_log *log);
+
+void ngtcp2_reno_cc_free(ngtcp2_reno_cc *cc);
+
+void ngtcp2_cc_reno_cc_on_pkt_acked(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt, ngtcp2_tstamp ts);
+
+void ngtcp2_cc_reno_cc_congestion_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp sent_ts,
+ ngtcp2_tstamp ts);
+
+void ngtcp2_cc_reno_cc_on_persistent_congestion(ngtcp2_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+void ngtcp2_cc_reno_cc_on_ack_recv(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts);
+
+void ngtcp2_cc_reno_cc_reset(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+/* ngtcp2_cubic_cc is CUBIC congestion controller. */
+typedef struct ngtcp2_cubic_cc {
+ ngtcp2_cc_base ccb;
+ uint64_t max_delivery_rate_sec;
+ uint64_t target_cwnd;
+ uint64_t w_last_max;
+ uint64_t w_tcp;
+ uint64_t origin_point;
+ ngtcp2_tstamp epoch_start;
+ uint64_t k;
+ /* prior stores the congestion state when a congestion event occurs
+ in order to restore the state when it turns out that the event is
+ spurious. */
+ struct {
+ uint64_t cwnd;
+ uint64_t ssthresh;
+ uint64_t w_last_max;
+ uint64_t w_tcp;
+ uint64_t origin_point;
+ ngtcp2_tstamp epoch_start;
+ uint64_t k;
+ } prior;
+ /* HyStart++ variables */
+ size_t rtt_sample_count;
+ uint64_t current_round_min_rtt;
+ uint64_t last_round_min_rtt;
+ int64_t window_end;
+ uint64_t pending_add;
+ uint64_t pending_w_add;
+} ngtcp2_cubic_cc;
+
+int ngtcp2_cc_cubic_cc_init(ngtcp2_cc *cc, ngtcp2_log *log,
+ const ngtcp2_mem *mem);
+
+void ngtcp2_cc_cubic_cc_free(ngtcp2_cc *cc, const ngtcp2_mem *mem);
+
+void ngtcp2_cubic_cc_init(ngtcp2_cubic_cc *cc, ngtcp2_log *log);
+
+void ngtcp2_cubic_cc_free(ngtcp2_cubic_cc *cc);
+
+void ngtcp2_cc_cubic_cc_on_pkt_acked(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt,
+ ngtcp2_tstamp ts);
+
+void ngtcp2_cc_cubic_cc_congestion_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp sent_ts,
+ ngtcp2_tstamp ts);
+
+void ngtcp2_cc_cubic_cc_on_spurious_congestion(ngtcp2_cc *ccx,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+void ngtcp2_cc_cubic_cc_on_persistent_congestion(ngtcp2_cc *cc,
+ ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+void ngtcp2_cc_cubic_cc_on_ack_recv(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_ack *ack, ngtcp2_tstamp ts);
+
+void ngtcp2_cc_cubic_cc_on_pkt_sent(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_cc_pkt *pkt);
+
+void ngtcp2_cc_cubic_cc_new_rtt_sample(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+void ngtcp2_cc_cubic_cc_reset(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+void ngtcp2_cc_cubic_cc_event(ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ ngtcp2_cc_event_type event, ngtcp2_tstamp ts);
+
+#endif /* NGTCP2_CC_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.c
new file mode 100644
index 0000000..f3b92b5
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.c
@@ -0,0 +1,147 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2018 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_cid.h"
+
+#include <assert.h>
+#include <string.h>
+
+#include "ngtcp2_path.h"
+#include "ngtcp2_str.h"
+
+void ngtcp2_cid_zero(ngtcp2_cid *cid) { memset(cid, 0, sizeof(*cid)); }
+
+void ngtcp2_cid_init(ngtcp2_cid *cid, const uint8_t *data, size_t datalen) {
+ assert(datalen <= NGTCP2_MAX_CIDLEN);
+
+ cid->datalen = datalen;
+ if (datalen) {
+ ngtcp2_cpymem(cid->data, data, datalen);
+ }
+}
+
+int ngtcp2_cid_eq(const ngtcp2_cid *cid, const ngtcp2_cid *other) {
+ return cid->datalen == other->datalen &&
+ 0 == memcmp(cid->data, other->data, cid->datalen);
+}
+
+int ngtcp2_cid_less(const ngtcp2_cid *lhs, const ngtcp2_cid *rhs) {
+ int s = lhs->datalen < rhs->datalen;
+ size_t n = s ? lhs->datalen : rhs->datalen;
+ int c = memcmp(lhs->data, rhs->data, n);
+
+ return c < 0 || (c == 0 && s);
+}
+
+int ngtcp2_cid_empty(const ngtcp2_cid *cid) { return cid->datalen == 0; }
+
+void ngtcp2_scid_init(ngtcp2_scid *scid, uint64_t seq, const ngtcp2_cid *cid) {
+ scid->pe.index = NGTCP2_PQ_BAD_INDEX;
+ scid->seq = seq;
+ scid->cid = *cid;
+ scid->retired_ts = UINT64_MAX;
+ scid->flags = NGTCP2_SCID_FLAG_NONE;
+}
+
+void ngtcp2_scid_copy(ngtcp2_scid *dest, const ngtcp2_scid *src) {
+ ngtcp2_scid_init(dest, src->seq, &src->cid);
+ dest->retired_ts = src->retired_ts;
+ dest->flags = src->flags;
+}
+
+void ngtcp2_dcid_init(ngtcp2_dcid *dcid, uint64_t seq, const ngtcp2_cid *cid,
+ const uint8_t *token) {
+ dcid->seq = seq;
+ dcid->cid = *cid;
+ if (token) {
+ memcpy(dcid->token, token, NGTCP2_STATELESS_RESET_TOKENLEN);
+ dcid->flags = NGTCP2_DCID_FLAG_TOKEN_PRESENT;
+ } else {
+ dcid->flags = NGTCP2_DCID_FLAG_NONE;
+ }
+ ngtcp2_path_storage_zero(&dcid->ps);
+ dcid->retired_ts = UINT64_MAX;
+ dcid->bound_ts = UINT64_MAX;
+ dcid->bytes_sent = 0;
+ dcid->bytes_recv = 0;
+ dcid->max_udp_payload_size = NGTCP2_MAX_UDP_PAYLOAD_SIZE;
+}
+
+void ngtcp2_dcid_set_token(ngtcp2_dcid *dcid, const uint8_t *token) {
+ assert(token);
+
+ dcid->flags |= NGTCP2_DCID_FLAG_TOKEN_PRESENT;
+ memcpy(dcid->token, token, NGTCP2_STATELESS_RESET_TOKENLEN);
+}
+
+void ngtcp2_dcid_set_path(ngtcp2_dcid *dcid, const ngtcp2_path *path) {
+ ngtcp2_path_copy(&dcid->ps.path, path);
+}
+
+void ngtcp2_dcid_copy(ngtcp2_dcid *dest, const ngtcp2_dcid *src) {
+ ngtcp2_dcid_init(dest, src->seq, &src->cid,
+ (src->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) ? src->token
+ : NULL);
+ ngtcp2_path_copy(&dest->ps.path, &src->ps.path);
+ dest->retired_ts = src->retired_ts;
+ dest->bound_ts = src->bound_ts;
+ dest->flags = src->flags;
+ dest->bytes_sent = src->bytes_sent;
+ dest->bytes_recv = src->bytes_recv;
+ dest->max_udp_payload_size = src->max_udp_payload_size;
+}
+
+void ngtcp2_dcid_copy_cid_token(ngtcp2_dcid *dest, const ngtcp2_dcid *src) {
+ dest->seq = src->seq;
+ dest->cid = src->cid;
+ if (src->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) {
+ dest->flags |= NGTCP2_DCID_FLAG_TOKEN_PRESENT;
+ memcpy(dest->token, src->token, NGTCP2_STATELESS_RESET_TOKENLEN);
+ } else if (dest->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) {
+ dest->flags &= (uint8_t)~NGTCP2_DCID_FLAG_TOKEN_PRESENT;
+ }
+}
+
+int ngtcp2_dcid_verify_uniqueness(ngtcp2_dcid *dcid, uint64_t seq,
+ const ngtcp2_cid *cid, const uint8_t *token) {
+ if (dcid->seq == seq) {
+ return ngtcp2_cid_eq(&dcid->cid, cid) &&
+ (dcid->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) &&
+ memcmp(dcid->token, token,
+ NGTCP2_STATELESS_RESET_TOKENLEN) == 0
+ ? 0
+ : NGTCP2_ERR_PROTO;
+ }
+
+ return !ngtcp2_cid_eq(&dcid->cid, cid) ? 0 : NGTCP2_ERR_PROTO;
+}
+
+int ngtcp2_dcid_verify_stateless_reset_token(const ngtcp2_dcid *dcid,
+ const uint8_t *token) {
+ return (dcid->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) &&
+ ngtcp2_cmemeq(dcid->token, token,
+ NGTCP2_STATELESS_RESET_TOKENLEN)
+ ? 0
+ : NGTCP2_ERR_INVALID_ARGUMENT;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.h
new file mode 100644
index 0000000..0b37441
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_cid.h
@@ -0,0 +1,175 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2018 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_CID_H
+#define NGTCP2_CID_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_pq.h"
+#include "ngtcp2_path.h"
+
+/* NGTCP2_SCID_FLAG_NONE indicates that no flag is set. */
+#define NGTCP2_SCID_FLAG_NONE 0x00u
+/* NGTCP2_SCID_FLAG_USED indicates that a local endpoint observed that
+ a remote endpoint uses a particular Connection ID. */
+#define NGTCP2_SCID_FLAG_USED 0x01u
+/* NGTCP2_SCID_FLAG_RETIRED indicates that a particular Connection ID
+ is retired. */
+#define NGTCP2_SCID_FLAG_RETIRED 0x02u
+
+typedef struct ngtcp2_scid {
+ ngtcp2_pq_entry pe;
+ /* seq is the sequence number associated to the CID. */
+ uint64_t seq;
+ /* cid is a connection ID */
+ ngtcp2_cid cid;
+ /* retired_ts is the timestamp when peer tells that this CID is
+ retired. */
+ ngtcp2_tstamp retired_ts;
+ /* flags is the bitwise OR of zero or more of NGTCP2_SCID_FLAG_*. */
+ uint8_t flags;
+} ngtcp2_scid;
+
+/* NGTCP2_DCID_FLAG_NONE indicates that no flag is set. */
+#define NGTCP2_DCID_FLAG_NONE 0x00u
+/* NGTCP2_DCID_FLAG_PATH_VALIDATED indicates that an associated path
+ has been validated. */
+#define NGTCP2_DCID_FLAG_PATH_VALIDATED 0x01u
+/* NGTCP2_DCID_FLAG_TOKEN_PRESENT indicates that a stateless reset
+ token is set in token field. */
+#define NGTCP2_DCID_FLAG_TOKEN_PRESENT 0x02u
+
+typedef struct ngtcp2_dcid {
+ /* seq is the sequence number associated to the CID. */
+ uint64_t seq;
+ /* cid is a connection ID */
+ ngtcp2_cid cid;
+ /* path is a path which cid is bound to. The addresses are zero
+ length if cid has not been bound to a particular path yet. */
+ ngtcp2_path_storage ps;
+ /* retired_ts is the timestamp when peer tells that this CID is
+ retired. */
+ ngtcp2_tstamp retired_ts;
+ /* bound_ts is the timestamp when this connection ID is bound to a
+ particular path. It is only assigned when a connection ID is
+ used just for sending PATH_RESPONSE and is not zero-length. */
+ ngtcp2_tstamp bound_ts;
+ /* bytes_sent is the number of bytes sent to an associated path. */
+ uint64_t bytes_sent;
+ /* bytes_recv is the number of bytes received from an associated
+ path. */
+ uint64_t bytes_recv;
+ /* max_udp_payload_size is the maximum size of UDP payload that is
+ allowed to send to this path. */
+ size_t max_udp_payload_size;
+ /* flags is bitwise OR of zero or more of NGTCP2_DCID_FLAG_*. */
+ uint8_t flags;
+ /* token is a stateless reset token associated to this CID.
+ Actually, the stateless reset token is tied to the connection,
+ not to the particular connection ID. */
+ uint8_t token[NGTCP2_STATELESS_RESET_TOKENLEN];
+} ngtcp2_dcid;
+
+/* ngtcp2_cid_zero makes |cid| zero-length. */
+void ngtcp2_cid_zero(ngtcp2_cid *cid);
+
+/*
+ * ngtcp2_cid_less returns nonzero if |lhs| is lexicographical smaller
+ * than |rhs|.
+ */
+int ngtcp2_cid_less(const ngtcp2_cid *lhs, const ngtcp2_cid *rhs);
+
+/*
+ * ngtcp2_cid_empty returns nonzero if |cid| includes empty connection
+ * ID.
+ */
+int ngtcp2_cid_empty(const ngtcp2_cid *cid);
+
+/*
+ * ngtcp2_scid_init initializes |scid| with the given parameters.
+ */
+void ngtcp2_scid_init(ngtcp2_scid *scid, uint64_t seq, const ngtcp2_cid *cid);
+
+/*
+ * ngtcp2_scid_copy copies |src| into |dest|.
+ */
+void ngtcp2_scid_copy(ngtcp2_scid *dest, const ngtcp2_scid *src);
+
+/*
+ * ngtcp2_dcid_init initializes |dcid| with the given parameters. If
+ * |token| is NULL, the function fills dcid->token it with 0. |token|
+ * must be NGTCP2_STATELESS_RESET_TOKENLEN bytes long.
+ */
+void ngtcp2_dcid_init(ngtcp2_dcid *dcid, uint64_t seq, const ngtcp2_cid *cid,
+ const uint8_t *token);
+
+/*
+ * ngtcp2_dcid_set_token sets |token| to |dcid|. |token| must not be
+ * NULL and must be NGTCP2_STATELESS_RESET_TOKENLEN bytes long.
+ */
+void ngtcp2_dcid_set_token(ngtcp2_dcid *dcid, const uint8_t *token);
+
+/*
+ * ngtcp2_dcid_set_path sets |path| to |dcid|. It sets
+ * max_udp_payload_size to the minimum UDP payload size supported
+ * by the IP protocol version.
+ */
+void ngtcp2_dcid_set_path(ngtcp2_dcid *dcid, const ngtcp2_path *path);
+
+/*
+ * ngtcp2_dcid_copy copies |src| into |dest|.
+ */
+void ngtcp2_dcid_copy(ngtcp2_dcid *dest, const ngtcp2_dcid *src);
+
+/*
+ * ngtcp2_dcid_copy_cid_token behaves like ngtcp2_dcid_copy, but it
+ * only copies cid, seq, and path.
+ */
+void ngtcp2_dcid_copy_cid_token(ngtcp2_dcid *dest, const ngtcp2_dcid *src);
+
+/*
+ * ngtcp2_dcid_verify_uniqueness verifies uniqueness of (|seq|, |cid|,
+ * |token|) tuple against |dcid|.
+ */
+int ngtcp2_dcid_verify_uniqueness(ngtcp2_dcid *dcid, uint64_t seq,
+ const ngtcp2_cid *cid, const uint8_t *token);
+
+/*
+ * ngtcp2_dcid_verify_stateless_reset_token verifies stateless reset
+ * token |token| against the one included in |dcid|. This function
+ * returns 0 if the verification succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * Tokens do not match; or |dcid| does not contain a token.
+ */
+int ngtcp2_dcid_verify_stateless_reset_token(const ngtcp2_dcid *dcid,
+ const uint8_t *token);
+
+#endif /* NGTCP2_CID_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.c
new file mode 100644
index 0000000..fb00ca8
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.c
@@ -0,0 +1,13698 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_conn.h"
+
+#include <string.h>
+#include <assert.h>
+
+#include "ngtcp2_macro.h"
+#include "ngtcp2_log.h"
+#include "ngtcp2_cid.h"
+#include "ngtcp2_conv.h"
+#include "ngtcp2_vec.h"
+#include "ngtcp2_addr.h"
+#include "ngtcp2_path.h"
+#include "ngtcp2_rcvry.h"
+#include "ngtcp2_unreachable.h"
+#include "ngtcp2_net.h"
+
+/* NGTCP2_FLOW_WINDOW_RTT_FACTOR is the factor of RTT when flow
+ control window auto-tuning is triggered. */
+#define NGTCP2_FLOW_WINDOW_RTT_FACTOR 2
+/* NGTCP2_FLOW_WINDOW_SCALING_FACTOR is the growth factor of flow
+ control window. */
+#define NGTCP2_FLOW_WINDOW_SCALING_FACTOR 2
+/* NGTCP2_MIN_COALESCED_PAYLOADLEN is the minimum length of QUIC
+ packet payload that should be coalesced to a long packet. */
+#define NGTCP2_MIN_COALESCED_PAYLOADLEN 128
+
+/*
+ * conn_local_stream returns nonzero if |stream_id| indicates that it
+ * is the stream initiated by local endpoint.
+ */
+static int conn_local_stream(ngtcp2_conn *conn, int64_t stream_id) {
+ return (uint8_t)(stream_id & 1) == conn->server;
+}
+
+/*
+ * bidi_stream returns nonzero if |stream_id| is a bidirectional
+ * stream ID.
+ */
+static int bidi_stream(int64_t stream_id) { return (stream_id & 0x2) == 0; }
+
+/*
+ * conn_is_handshake_completed returns nonzero if QUIC handshake has
+ * completed.
+ */
+static int conn_is_handshake_completed(ngtcp2_conn *conn) {
+ return (conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED) &&
+ conn->pktns.crypto.rx.ckm && conn->pktns.crypto.tx.ckm;
+}
+
+static int conn_call_recv_client_initial(ngtcp2_conn *conn,
+ const ngtcp2_cid *dcid) {
+ int rv;
+
+ assert(conn->callbacks.recv_client_initial);
+
+ rv = conn->callbacks.recv_client_initial(conn, dcid, conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_handshake_completed(ngtcp2_conn *conn) {
+ int rv;
+
+ if (!conn->callbacks.handshake_completed) {
+ return 0;
+ }
+
+ rv = conn->callbacks.handshake_completed(conn, conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_recv_stream_data(ngtcp2_conn *conn, ngtcp2_strm *strm,
+ uint32_t flags, uint64_t offset,
+ const uint8_t *data, size_t datalen) {
+ int rv;
+
+ if (!conn->callbacks.recv_stream_data) {
+ return 0;
+ }
+
+ rv = conn->callbacks.recv_stream_data(conn, flags, strm->stream_id, offset,
+ data, datalen, conn->user_data,
+ strm->stream_user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_recv_crypto_data(ngtcp2_conn *conn,
+ ngtcp2_crypto_level crypto_level,
+ uint64_t offset, const uint8_t *data,
+ size_t datalen) {
+ int rv;
+
+ assert(conn->callbacks.recv_crypto_data);
+
+ rv = conn->callbacks.recv_crypto_data(conn, crypto_level, offset, data,
+ datalen, conn->user_data);
+ switch (rv) {
+ case 0:
+ case NGTCP2_ERR_CRYPTO:
+ case NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM:
+ case NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM:
+ case NGTCP2_ERR_TRANSPORT_PARAM:
+ case NGTCP2_ERR_PROTO:
+ case NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE:
+ case NGTCP2_ERR_NOMEM:
+ case NGTCP2_ERR_CALLBACK_FAILURE:
+ return rv;
+ default:
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+}
+
+static int conn_call_stream_open(ngtcp2_conn *conn, ngtcp2_strm *strm) {
+ int rv;
+
+ if (!conn->callbacks.stream_open) {
+ return 0;
+ }
+
+ rv = conn->callbacks.stream_open(conn, strm->stream_id, conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_stream_close(ngtcp2_conn *conn, ngtcp2_strm *strm) {
+ int rv;
+ uint32_t flags = NGTCP2_STREAM_CLOSE_FLAG_NONE;
+
+ if (!conn->callbacks.stream_close) {
+ return 0;
+ }
+
+ if (strm->flags & NGTCP2_STRM_FLAG_APP_ERROR_CODE_SET) {
+ flags |= NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET;
+ }
+
+ rv = conn->callbacks.stream_close(conn, flags, strm->stream_id,
+ strm->app_error_code, conn->user_data,
+ strm->stream_user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_stream_reset(ngtcp2_conn *conn, int64_t stream_id,
+ uint64_t final_size, uint64_t app_error_code,
+ void *stream_user_data) {
+ int rv;
+
+ if (!conn->callbacks.stream_reset) {
+ return 0;
+ }
+
+ rv = conn->callbacks.stream_reset(conn, stream_id, final_size, app_error_code,
+ conn->user_data, stream_user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_extend_max_local_streams_bidi(ngtcp2_conn *conn,
+ uint64_t max_streams) {
+ int rv;
+
+ if (!conn->callbacks.extend_max_local_streams_bidi) {
+ return 0;
+ }
+
+ rv = conn->callbacks.extend_max_local_streams_bidi(conn, max_streams,
+ conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_extend_max_local_streams_uni(ngtcp2_conn *conn,
+ uint64_t max_streams) {
+ int rv;
+
+ if (!conn->callbacks.extend_max_local_streams_uni) {
+ return 0;
+ }
+
+ rv = conn->callbacks.extend_max_local_streams_uni(conn, max_streams,
+ conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_get_new_connection_id(ngtcp2_conn *conn, ngtcp2_cid *cid,
+ uint8_t *token, size_t cidlen) {
+ int rv;
+
+ assert(conn->callbacks.get_new_connection_id);
+
+ rv = conn->callbacks.get_new_connection_id(conn, cid, token, cidlen,
+ conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_remove_connection_id(ngtcp2_conn *conn,
+ const ngtcp2_cid *cid) {
+ int rv;
+
+ if (!conn->callbacks.remove_connection_id) {
+ return 0;
+ }
+
+ rv = conn->callbacks.remove_connection_id(conn, cid, conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_path_validation(ngtcp2_conn *conn, const ngtcp2_pv *pv,
+ ngtcp2_path_validation_result res) {
+ int rv;
+ uint32_t flags = NGTCP2_PATH_VALIDATION_FLAG_NONE;
+
+ if (!conn->callbacks.path_validation) {
+ return 0;
+ }
+
+ if (pv->flags & NGTCP2_PV_FLAG_PREFERRED_ADDR) {
+ flags |= NGTCP2_PATH_VALIDATION_FLAG_PREFERRED_ADDR;
+ }
+
+ rv = conn->callbacks.path_validation(conn, flags, &pv->dcid.ps.path, res,
+ conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_select_preferred_addr(ngtcp2_conn *conn,
+ ngtcp2_path *dest) {
+ int rv;
+
+ if (!conn->callbacks.select_preferred_addr) {
+ return 0;
+ }
+
+ assert(conn->remote.transport_params);
+ assert(conn->remote.transport_params->preferred_address_present);
+
+ rv = conn->callbacks.select_preferred_addr(
+ conn, dest, &conn->remote.transport_params->preferred_address,
+ conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_extend_max_remote_streams_bidi(ngtcp2_conn *conn,
+ uint64_t max_streams) {
+ int rv;
+
+ if (!conn->callbacks.extend_max_remote_streams_bidi) {
+ return 0;
+ }
+
+ rv = conn->callbacks.extend_max_remote_streams_bidi(conn, max_streams,
+ conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_extend_max_remote_streams_uni(ngtcp2_conn *conn,
+ uint64_t max_streams) {
+ int rv;
+
+ if (!conn->callbacks.extend_max_remote_streams_uni) {
+ return 0;
+ }
+
+ rv = conn->callbacks.extend_max_remote_streams_uni(conn, max_streams,
+ conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_extend_max_stream_data(ngtcp2_conn *conn,
+ ngtcp2_strm *strm,
+ int64_t stream_id,
+ uint64_t datalen) {
+ int rv;
+
+ if (!conn->callbacks.extend_max_stream_data) {
+ return 0;
+ }
+
+ rv = conn->callbacks.extend_max_stream_data(
+ conn, stream_id, datalen, conn->user_data, strm->stream_user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_dcid_status(ngtcp2_conn *conn,
+ ngtcp2_connection_id_status_type type,
+ const ngtcp2_dcid *dcid) {
+ int rv;
+
+ if (!conn->callbacks.dcid_status) {
+ return 0;
+ }
+
+ rv = conn->callbacks.dcid_status(
+ conn, (int)type, dcid->seq, &dcid->cid,
+ (dcid->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) ? dcid->token : NULL,
+ conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_activate_dcid(ngtcp2_conn *conn, const ngtcp2_dcid *dcid) {
+ return conn_call_dcid_status(conn, NGTCP2_CONNECTION_ID_STATUS_TYPE_ACTIVATE,
+ dcid);
+}
+
+static int conn_call_deactivate_dcid(ngtcp2_conn *conn,
+ const ngtcp2_dcid *dcid) {
+ return conn_call_dcid_status(
+ conn, NGTCP2_CONNECTION_ID_STATUS_TYPE_DEACTIVATE, dcid);
+}
+
+static int conn_call_stream_stop_sending(ngtcp2_conn *conn, int64_t stream_id,
+ uint64_t app_error_code,
+ void *stream_user_data) {
+ int rv;
+
+ if (!conn->callbacks.stream_stop_sending) {
+ return 0;
+ }
+
+ rv = conn->callbacks.stream_stop_sending(conn, stream_id, app_error_code,
+ conn->user_data, stream_user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static void conn_call_delete_crypto_aead_ctx(ngtcp2_conn *conn,
+ ngtcp2_crypto_aead_ctx *aead_ctx) {
+ if (!aead_ctx->native_handle) {
+ return;
+ }
+
+ assert(conn->callbacks.delete_crypto_aead_ctx);
+
+ conn->callbacks.delete_crypto_aead_ctx(conn, aead_ctx, conn->user_data);
+}
+
+static void
+conn_call_delete_crypto_cipher_ctx(ngtcp2_conn *conn,
+ ngtcp2_crypto_cipher_ctx *cipher_ctx) {
+ if (!cipher_ctx->native_handle) {
+ return;
+ }
+
+ assert(conn->callbacks.delete_crypto_cipher_ctx);
+
+ conn->callbacks.delete_crypto_cipher_ctx(conn, cipher_ctx, conn->user_data);
+}
+
+static int conn_call_client_initial(ngtcp2_conn *conn) {
+ int rv;
+
+ assert(conn->callbacks.client_initial);
+
+ rv = conn->callbacks.client_initial(conn, conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_get_path_challenge_data(ngtcp2_conn *conn, uint8_t *data) {
+ int rv;
+
+ assert(conn->callbacks.get_path_challenge_data);
+
+ rv = conn->callbacks.get_path_challenge_data(conn, data, conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_recv_version_negotiation(ngtcp2_conn *conn,
+ const ngtcp2_pkt_hd *hd,
+ const uint32_t *sv, size_t nsv) {
+ int rv;
+
+ if (!conn->callbacks.recv_version_negotiation) {
+ return 0;
+ }
+
+ rv = conn->callbacks.recv_version_negotiation(conn, hd, sv, nsv,
+ conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_recv_retry(ngtcp2_conn *conn, const ngtcp2_pkt_hd *hd) {
+ int rv;
+
+ assert(conn->callbacks.recv_retry);
+
+ rv = conn->callbacks.recv_retry(conn, hd, conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int
+conn_call_recv_stateless_reset(ngtcp2_conn *conn,
+ const ngtcp2_pkt_stateless_reset *sr) {
+ int rv;
+
+ if (!conn->callbacks.recv_stateless_reset) {
+ return 0;
+ }
+
+ rv = conn->callbacks.recv_stateless_reset(conn, sr, conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_recv_new_token(ngtcp2_conn *conn, const uint8_t *token,
+ size_t tokenlen) {
+ int rv;
+
+ if (!conn->callbacks.recv_new_token) {
+ return 0;
+ }
+
+ rv = conn->callbacks.recv_new_token(conn, token, tokenlen, conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_handshake_confirmed(ngtcp2_conn *conn) {
+ int rv;
+
+ if (!conn->callbacks.handshake_confirmed) {
+ return 0;
+ }
+
+ rv = conn->callbacks.handshake_confirmed(conn, conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_recv_datagram(ngtcp2_conn *conn,
+ const ngtcp2_datagram *fr) {
+ const uint8_t *data;
+ size_t datalen;
+ int rv;
+ uint32_t flags = NGTCP2_DATAGRAM_FLAG_NONE;
+
+ if (!conn->callbacks.recv_datagram) {
+ return 0;
+ }
+
+ if (fr->datacnt) {
+ assert(fr->datacnt == 1);
+
+ data = fr->data->base;
+ datalen = fr->data->len;
+ } else {
+ data = NULL;
+ datalen = 0;
+ }
+
+ if (!conn_is_handshake_completed(conn)) {
+ flags |= NGTCP2_DATAGRAM_FLAG_EARLY;
+ }
+
+ rv = conn->callbacks.recv_datagram(conn, flags, data, datalen,
+ conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int
+conn_call_update_key(ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret,
+ ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_iv,
+ ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_iv,
+ const uint8_t *current_rx_secret,
+ const uint8_t *current_tx_secret, size_t secretlen) {
+ int rv;
+
+ assert(conn->callbacks.update_key);
+
+ rv = conn->callbacks.update_key(
+ conn, rx_secret, tx_secret, rx_aead_ctx, rx_iv, tx_aead_ctx, tx_iv,
+ current_rx_secret, current_tx_secret, secretlen, conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_version_negotiation(ngtcp2_conn *conn, uint32_t version,
+ const ngtcp2_cid *dcid) {
+ int rv;
+
+ assert(conn->callbacks.version_negotiation);
+
+ rv =
+ conn->callbacks.version_negotiation(conn, version, dcid, conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_recv_rx_key(ngtcp2_conn *conn, ngtcp2_crypto_level level) {
+ int rv;
+
+ if (!conn->callbacks.recv_rx_key) {
+ return 0;
+ }
+
+ rv = conn->callbacks.recv_rx_key(conn, level, conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int conn_call_recv_tx_key(ngtcp2_conn *conn, ngtcp2_crypto_level level) {
+ int rv;
+
+ if (!conn->callbacks.recv_tx_key) {
+ return 0;
+ }
+
+ rv = conn->callbacks.recv_tx_key(conn, level, conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ return 0;
+}
+
+static int crypto_offset_less(const ngtcp2_ksl_key *lhs,
+ const ngtcp2_ksl_key *rhs) {
+ return *(int64_t *)lhs < *(int64_t *)rhs;
+}
+
+static int pktns_init(ngtcp2_pktns *pktns, ngtcp2_pktns_id pktns_id,
+ ngtcp2_rst *rst, ngtcp2_cc *cc, ngtcp2_log *log,
+ ngtcp2_qlog *qlog, ngtcp2_objalloc *rtb_entry_objalloc,
+ ngtcp2_objalloc *frc_objalloc, const ngtcp2_mem *mem) {
+ int rv;
+
+ memset(pktns, 0, sizeof(*pktns));
+
+ ngtcp2_gaptr_init(&pktns->rx.pngap, mem);
+
+ pktns->tx.last_pkt_num = -1;
+ pktns->rx.max_pkt_num = -1;
+ pktns->rx.max_ack_eliciting_pkt_num = -1;
+
+ rv = ngtcp2_acktr_init(&pktns->acktr, log, mem);
+ if (rv != 0) {
+ goto fail_acktr_init;
+ }
+
+ ngtcp2_strm_init(&pktns->crypto.strm, 0, NGTCP2_STRM_FLAG_NONE, 0, 0, NULL,
+ NULL, mem);
+
+ ngtcp2_ksl_init(&pktns->crypto.tx.frq, crypto_offset_less, sizeof(uint64_t),
+ mem);
+
+ ngtcp2_rtb_init(&pktns->rtb, pktns_id, &pktns->crypto.strm, rst, cc, log,
+ qlog, rtb_entry_objalloc, frc_objalloc, mem);
+
+ return 0;
+
+fail_acktr_init:
+ ngtcp2_gaptr_free(&pktns->rx.pngap);
+
+ return rv;
+}
+
+static int pktns_new(ngtcp2_pktns **ppktns, ngtcp2_pktns_id pktns_id,
+ ngtcp2_rst *rst, ngtcp2_cc *cc, ngtcp2_log *log,
+ ngtcp2_qlog *qlog, ngtcp2_objalloc *rtb_entry_objalloc,
+ ngtcp2_objalloc *frc_objalloc, const ngtcp2_mem *mem) {
+ int rv;
+
+ *ppktns = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_pktns));
+ if (*ppktns == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ rv = pktns_init(*ppktns, pktns_id, rst, cc, log, qlog, rtb_entry_objalloc,
+ frc_objalloc, mem);
+ if (rv != 0) {
+ ngtcp2_mem_free(mem, *ppktns);
+ }
+
+ return rv;
+}
+
+static int cycle_less(const ngtcp2_pq_entry *lhs, const ngtcp2_pq_entry *rhs) {
+ ngtcp2_strm *ls = ngtcp2_struct_of(lhs, ngtcp2_strm, pe);
+ ngtcp2_strm *rs = ngtcp2_struct_of(rhs, ngtcp2_strm, pe);
+
+ if (ls->cycle == rs->cycle) {
+ return ls->stream_id < rs->stream_id;
+ }
+
+ return rs->cycle - ls->cycle <= 1;
+}
+
+static void delete_buffed_pkts(ngtcp2_pkt_chain *pc, const ngtcp2_mem *mem) {
+ ngtcp2_pkt_chain *next;
+
+ for (; pc;) {
+ next = pc->next;
+ ngtcp2_pkt_chain_del(pc, mem);
+ pc = next;
+ }
+}
+
+static void delete_buf_chain(ngtcp2_buf_chain *bufchain,
+ const ngtcp2_mem *mem) {
+ ngtcp2_buf_chain *next;
+
+ for (; bufchain;) {
+ next = bufchain->next;
+ ngtcp2_buf_chain_del(bufchain, mem);
+ bufchain = next;
+ }
+}
+
+static void pktns_free(ngtcp2_pktns *pktns, const ngtcp2_mem *mem) {
+ ngtcp2_frame_chain *frc;
+ ngtcp2_ksl_it it;
+
+ delete_buf_chain(pktns->crypto.tx.data, mem);
+
+ delete_buffed_pkts(pktns->rx.buffed_pkts, mem);
+
+ ngtcp2_frame_chain_list_objalloc_del(pktns->tx.frq, pktns->rtb.frc_objalloc,
+ mem);
+
+ ngtcp2_crypto_km_del(pktns->crypto.rx.ckm, mem);
+ ngtcp2_crypto_km_del(pktns->crypto.tx.ckm, mem);
+
+ for (it = ngtcp2_ksl_begin(&pktns->crypto.tx.frq); !ngtcp2_ksl_it_end(&it);
+ ngtcp2_ksl_it_next(&it)) {
+ frc = ngtcp2_ksl_it_get(&it);
+ ngtcp2_frame_chain_objalloc_del(frc, pktns->rtb.frc_objalloc, mem);
+ }
+
+ ngtcp2_ksl_free(&pktns->crypto.tx.frq);
+ ngtcp2_rtb_free(&pktns->rtb);
+ ngtcp2_strm_free(&pktns->crypto.strm);
+ ngtcp2_acktr_free(&pktns->acktr);
+ ngtcp2_gaptr_free(&pktns->rx.pngap);
+}
+
+static void pktns_del(ngtcp2_pktns *pktns, const ngtcp2_mem *mem) {
+ if (pktns == NULL) {
+ return;
+ }
+
+ pktns_free(pktns, mem);
+
+ ngtcp2_mem_free(mem, pktns);
+}
+
+static void cc_del(ngtcp2_cc *cc, ngtcp2_cc_algo cc_algo,
+ const ngtcp2_mem *mem) {
+ switch (cc_algo) {
+ case NGTCP2_CC_ALGO_RENO:
+ ngtcp2_cc_reno_cc_free(cc, mem);
+ break;
+ case NGTCP2_CC_ALGO_CUBIC:
+ ngtcp2_cc_cubic_cc_free(cc, mem);
+ break;
+ case NGTCP2_CC_ALGO_BBR:
+ ngtcp2_cc_bbr_cc_free(cc, mem);
+ break;
+ case NGTCP2_CC_ALGO_BBR2:
+ ngtcp2_cc_bbr2_cc_free(cc, mem);
+ break;
+ default:
+ break;
+ }
+}
+
+static int cid_less(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs) {
+ return ngtcp2_cid_less(lhs, rhs);
+}
+
+static int retired_ts_less(const ngtcp2_pq_entry *lhs,
+ const ngtcp2_pq_entry *rhs) {
+ const ngtcp2_scid *a = ngtcp2_struct_of(lhs, ngtcp2_scid, pe);
+ const ngtcp2_scid *b = ngtcp2_struct_of(rhs, ngtcp2_scid, pe);
+
+ return a->retired_ts < b->retired_ts;
+}
+
+/*
+ * conn_reset_conn_stat_cc resets congestion state in |cstat|.
+ */
+static void conn_reset_conn_stat_cc(ngtcp2_conn *conn,
+ ngtcp2_conn_stat *cstat) {
+ cstat->latest_rtt = 0;
+ cstat->min_rtt = UINT64_MAX;
+ cstat->smoothed_rtt = conn->local.settings.initial_rtt;
+ cstat->rttvar = conn->local.settings.initial_rtt / 2;
+ cstat->first_rtt_sample_ts = UINT64_MAX;
+ cstat->pto_count = 0;
+ cstat->loss_detection_timer = UINT64_MAX;
+ cstat->cwnd =
+ ngtcp2_cc_compute_initcwnd(conn->local.settings.max_tx_udp_payload_size);
+ cstat->ssthresh = UINT64_MAX;
+ cstat->congestion_recovery_start_ts = UINT64_MAX;
+ cstat->bytes_in_flight = 0;
+ cstat->delivery_rate_sec = 0;
+ cstat->pacing_rate = 0.0;
+ cstat->send_quantum = 64 * 1024;
+}
+
+/*
+ * reset_conn_stat_recovery resets the fields related to the recovery
+ * function
+ */
+static void reset_conn_stat_recovery(ngtcp2_conn_stat *cstat) {
+ /* Initializes them with UINT64_MAX. */
+ memset(cstat->loss_time, 0xff, sizeof(cstat->loss_time));
+ memset(cstat->last_tx_pkt_ts, 0xff, sizeof(cstat->last_tx_pkt_ts));
+}
+
+/*
+ * conn_reset_conn_stat resets |cstat|. The following fields are not
+ * reset: initial_rtt and max_udp_payload_size.
+ */
+static void conn_reset_conn_stat(ngtcp2_conn *conn, ngtcp2_conn_stat *cstat) {
+ conn_reset_conn_stat_cc(conn, cstat);
+ reset_conn_stat_recovery(cstat);
+}
+
+static void delete_scid(ngtcp2_ksl *scids, const ngtcp2_mem *mem) {
+ ngtcp2_ksl_it it;
+
+ for (it = ngtcp2_ksl_begin(scids); !ngtcp2_ksl_it_end(&it);
+ ngtcp2_ksl_it_next(&it)) {
+ ngtcp2_mem_free(mem, ngtcp2_ksl_it_get(&it));
+ }
+}
+
+/*
+ * compute_pto computes PTO.
+ */
+static ngtcp2_duration compute_pto(ngtcp2_duration smoothed_rtt,
+ ngtcp2_duration rttvar,
+ ngtcp2_duration max_ack_delay) {
+ ngtcp2_duration var = ngtcp2_max(4 * rttvar, NGTCP2_GRANULARITY);
+ return smoothed_rtt + var + max_ack_delay;
+}
+
+/*
+ * conn_compute_initial_pto computes PTO using the initial RTT.
+ */
+static ngtcp2_duration conn_compute_initial_pto(ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns) {
+ ngtcp2_duration initial_rtt = conn->local.settings.initial_rtt;
+ ngtcp2_duration max_ack_delay;
+
+ if (pktns->rtb.pktns_id == NGTCP2_PKTNS_ID_APPLICATION &&
+ conn->remote.transport_params) {
+ max_ack_delay = conn->remote.transport_params->max_ack_delay;
+ } else {
+ max_ack_delay = 0;
+ }
+ return compute_pto(initial_rtt, initial_rtt / 2, max_ack_delay);
+}
+
+/*
+ * conn_compute_pto computes the current PTO.
+ */
+static ngtcp2_duration conn_compute_pto(ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns) {
+ ngtcp2_conn_stat *cstat = &conn->cstat;
+ ngtcp2_duration max_ack_delay;
+
+ if (pktns->rtb.pktns_id == NGTCP2_PKTNS_ID_APPLICATION &&
+ conn->remote.transport_params) {
+ max_ack_delay = conn->remote.transport_params->max_ack_delay;
+ } else {
+ max_ack_delay = 0;
+ }
+ return compute_pto(cstat->smoothed_rtt, cstat->rttvar, max_ack_delay);
+}
+
+ngtcp2_duration ngtcp2_conn_compute_pto(ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns) {
+ return conn_compute_pto(conn, pktns);
+}
+
+static void conn_handle_tx_ecn(ngtcp2_conn *conn, ngtcp2_pkt_info *pi,
+ uint16_t *prtb_entry_flags, ngtcp2_pktns *pktns,
+ const ngtcp2_pkt_hd *hd, ngtcp2_tstamp ts) {
+ assert(pi);
+
+ if (pi->ecn != NGTCP2_ECN_NOT_ECT) {
+ /* We have already made a transition of validation state and
+ deceided to send UDP datagram with ECN bit set. Coalesced QUIC
+ packets also bear ECN bits set. */
+ if (pktns->tx.ecn.start_pkt_num == INT64_MAX) {
+ pktns->tx.ecn.start_pkt_num = hd->pkt_num;
+ }
+
+ ++pktns->tx.ecn.validation_pkt_sent;
+
+ if (prtb_entry_flags) {
+ *prtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ECN;
+ }
+
+ ++pktns->tx.ecn.ect0;
+
+ return;
+ }
+
+ switch (conn->tx.ecn.state) {
+ case NGTCP2_ECN_STATE_TESTING:
+ if (conn->tx.ecn.validation_start_ts == UINT64_MAX) {
+ assert(0 == pktns->tx.ecn.validation_pkt_sent);
+ assert(0 == pktns->tx.ecn.validation_pkt_lost);
+
+ conn->tx.ecn.validation_start_ts = ts;
+ } else if (ts - conn->tx.ecn.validation_start_ts >=
+ 3 * conn_compute_pto(conn, pktns)) {
+ conn->tx.ecn.state = NGTCP2_ECN_STATE_UNKNOWN;
+ break;
+ }
+
+ if (pktns->tx.ecn.start_pkt_num == INT64_MAX) {
+ pktns->tx.ecn.start_pkt_num = hd->pkt_num;
+ }
+
+ ++pktns->tx.ecn.validation_pkt_sent;
+
+ if (++conn->tx.ecn.dgram_sent == NGTCP2_ECN_MAX_NUM_VALIDATION_PKTS) {
+ conn->tx.ecn.state = NGTCP2_ECN_STATE_UNKNOWN;
+ }
+ /* fall through */
+ case NGTCP2_ECN_STATE_CAPABLE:
+ /* pi is provided per UDP datagram. */
+ assert(NGTCP2_ECN_NOT_ECT == pi->ecn);
+
+ pi->ecn = NGTCP2_ECN_ECT_0;
+
+ if (prtb_entry_flags) {
+ *prtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ECN;
+ }
+
+ ++pktns->tx.ecn.ect0;
+ break;
+ case NGTCP2_ECN_STATE_UNKNOWN:
+ case NGTCP2_ECN_STATE_FAILED:
+ break;
+ default:
+ ngtcp2_unreachable();
+ }
+}
+
+static void conn_reset_ecn_validation_state(ngtcp2_conn *conn) {
+ ngtcp2_pktns *in_pktns = conn->in_pktns;
+ ngtcp2_pktns *hs_pktns = conn->hs_pktns;
+ ngtcp2_pktns *pktns = &conn->pktns;
+
+ conn->tx.ecn.state = NGTCP2_ECN_STATE_TESTING;
+ conn->tx.ecn.validation_start_ts = UINT64_MAX;
+ conn->tx.ecn.dgram_sent = 0;
+
+ if (in_pktns) {
+ in_pktns->tx.ecn.start_pkt_num = INT64_MAX;
+ in_pktns->tx.ecn.validation_pkt_sent = 0;
+ in_pktns->tx.ecn.validation_pkt_lost = 0;
+ }
+
+ if (hs_pktns) {
+ hs_pktns->tx.ecn.start_pkt_num = INT64_MAX;
+ hs_pktns->tx.ecn.validation_pkt_sent = 0;
+ hs_pktns->tx.ecn.validation_pkt_lost = 0;
+ }
+
+ pktns->tx.ecn.start_pkt_num = INT64_MAX;
+ pktns->tx.ecn.validation_pkt_sent = 0;
+ pktns->tx.ecn.validation_pkt_lost = 0;
+}
+
+/* server_default_available_versions is the default available_versions
+ field sent by server. */
+static uint8_t server_default_available_versions[] = {0, 0, 0, 1};
+
+/*
+ * available_versions_new allocates new buffer, and writes |versions|
+ * of length |versionslen| in network byte order, suitable for sending
+ * in available_versions field of version_information QUIC transport
+ * parameter. The pointer to the allocated buffer is assigned to
+ * |*pbuf|.
+ *
+ * This function returns 0 if it succeeds, or one of the negative
+ * error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int available_versions_new(uint8_t **pbuf, const uint32_t *versions,
+ size_t versionslen, const ngtcp2_mem *mem) {
+ size_t i;
+ uint8_t *buf = ngtcp2_mem_malloc(mem, sizeof(uint32_t) * versionslen);
+
+ if (buf == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ *pbuf = buf;
+
+ for (i = 0; i < versionslen; ++i) {
+ buf = ngtcp2_put_uint32be(buf, versions[i]);
+ }
+
+ return 0;
+}
+
+static void
+conn_set_local_transport_params(ngtcp2_conn *conn,
+ const ngtcp2_transport_params *params) {
+ ngtcp2_transport_params *p = &conn->local.transport_params;
+ uint32_t chosen_version = p->version_info.chosen_version;
+
+ *p = *params;
+
+ /* grease_quic_bit is always enabled. */
+ p->grease_quic_bit = 1;
+
+ if (conn->server) {
+ p->version_info.chosen_version = chosen_version;
+ } else {
+ p->version_info.chosen_version = conn->client_chosen_version;
+ }
+ p->version_info.available_versions = conn->vneg.available_versions;
+ p->version_info.available_versionslen = conn->vneg.available_versionslen;
+ p->version_info_present = 1;
+}
+
+static int conn_new(ngtcp2_conn **pconn, const ngtcp2_cid *dcid,
+ const ngtcp2_cid *scid, const ngtcp2_path *path,
+ uint32_t client_chosen_version, int callbacks_version,
+ const ngtcp2_callbacks *callbacks, int settings_version,
+ const ngtcp2_settings *settings,
+ int transport_params_version,
+ const ngtcp2_transport_params *params,
+ const ngtcp2_mem *mem, void *user_data, int server) {
+ int rv;
+ ngtcp2_scid *scident;
+ uint8_t *buf;
+ uint8_t fixed_bit_byte;
+ size_t i;
+ uint32_t *preferred_versions;
+ (void)callbacks_version;
+ (void)settings_version;
+ (void)transport_params_version;
+
+ assert(settings->max_window <= NGTCP2_MAX_VARINT);
+ assert(settings->max_stream_window <= NGTCP2_MAX_VARINT);
+ assert(settings->max_tx_udp_payload_size);
+ assert(settings->max_tx_udp_payload_size <= NGTCP2_HARD_MAX_UDP_PAYLOAD_SIZE);
+ assert(params->active_connection_id_limit <= NGTCP2_MAX_DCID_POOL_SIZE);
+ assert(params->initial_max_data <= NGTCP2_MAX_VARINT);
+ assert(params->initial_max_stream_data_bidi_local <= NGTCP2_MAX_VARINT);
+ assert(params->initial_max_stream_data_bidi_remote <= NGTCP2_MAX_VARINT);
+ assert(params->initial_max_stream_data_uni <= NGTCP2_MAX_VARINT);
+ assert(server || callbacks->client_initial);
+ assert(!server || callbacks->recv_client_initial);
+ assert(callbacks->recv_crypto_data);
+ assert(callbacks->encrypt);
+ assert(callbacks->decrypt);
+ assert(callbacks->hp_mask);
+ assert(server || callbacks->recv_retry);
+ assert(callbacks->rand);
+ assert(callbacks->get_new_connection_id);
+ assert(callbacks->update_key);
+ assert(callbacks->delete_crypto_aead_ctx);
+ assert(callbacks->delete_crypto_cipher_ctx);
+ assert(callbacks->get_path_challenge_data);
+ assert(!server || !ngtcp2_is_reserved_version(client_chosen_version));
+
+ if (mem == NULL) {
+ mem = ngtcp2_mem_default();
+ }
+
+ *pconn = ngtcp2_mem_calloc(mem, 1, sizeof(ngtcp2_conn));
+ if (*pconn == NULL) {
+ rv = NGTCP2_ERR_NOMEM;
+ goto fail_conn;
+ }
+
+ ngtcp2_objalloc_frame_chain_init(&(*pconn)->frc_objalloc, 64, mem);
+ ngtcp2_objalloc_rtb_entry_init(&(*pconn)->rtb_entry_objalloc, 64, mem);
+ ngtcp2_objalloc_strm_init(&(*pconn)->strm_objalloc, 64, mem);
+
+ ngtcp2_static_ringbuf_dcid_bound_init(&(*pconn)->dcid.bound);
+
+ ngtcp2_static_ringbuf_dcid_unused_init(&(*pconn)->dcid.unused);
+
+ ngtcp2_static_ringbuf_dcid_retired_init(&(*pconn)->dcid.retired);
+
+ ngtcp2_gaptr_init(&(*pconn)->dcid.seqgap, mem);
+
+ ngtcp2_ksl_init(&(*pconn)->scid.set, cid_less, sizeof(ngtcp2_cid), mem);
+
+ ngtcp2_pq_init(&(*pconn)->scid.used, retired_ts_less, mem);
+
+ ngtcp2_map_init(&(*pconn)->strms, mem);
+
+ ngtcp2_pq_init(&(*pconn)->tx.strmq, cycle_less, mem);
+
+ ngtcp2_idtr_init(&(*pconn)->remote.bidi.idtr, !server, mem);
+
+ ngtcp2_idtr_init(&(*pconn)->remote.uni.idtr, !server, mem);
+
+ ngtcp2_static_ringbuf_path_challenge_init(&(*pconn)->rx.path_challenge);
+
+ ngtcp2_log_init(&(*pconn)->log, scid, settings->log_printf,
+ settings->initial_ts, user_data);
+ ngtcp2_qlog_init(&(*pconn)->qlog, settings->qlog.write, settings->initial_ts,
+ user_data);
+ if ((*pconn)->qlog.write) {
+ buf = ngtcp2_mem_malloc(mem, NGTCP2_QLOG_BUFLEN);
+ if (buf == NULL) {
+ rv = NGTCP2_ERR_NOMEM;
+ goto fail_qlog_buf;
+ }
+ ngtcp2_buf_init(&(*pconn)->qlog.buf, buf, NGTCP2_QLOG_BUFLEN);
+ }
+
+ (*pconn)->local.settings = *settings;
+
+ if (settings->tokenlen) {
+ buf = ngtcp2_mem_malloc(mem, settings->tokenlen);
+ if (buf == NULL) {
+ rv = NGTCP2_ERR_NOMEM;
+ goto fail_token;
+ }
+ memcpy(buf, settings->token, settings->tokenlen);
+ (*pconn)->local.settings.token = buf;
+ } else {
+ (*pconn)->local.settings.token = NULL;
+ }
+
+ if (!(*pconn)->local.settings.original_version) {
+ (*pconn)->local.settings.original_version = client_chosen_version;
+ }
+
+ conn_reset_conn_stat(*pconn, &(*pconn)->cstat);
+ (*pconn)->cstat.initial_rtt = settings->initial_rtt;
+ (*pconn)->cstat.max_tx_udp_payload_size =
+ (*pconn)->local.settings.max_tx_udp_payload_size;
+
+ ngtcp2_rst_init(&(*pconn)->rst);
+
+ (*pconn)->cc_algo = settings->cc_algo;
+
+ switch (settings->cc_algo) {
+ case NGTCP2_CC_ALGO_RENO:
+ rv = ngtcp2_cc_reno_cc_init(&(*pconn)->cc, &(*pconn)->log, mem);
+ if (rv != 0) {
+ goto fail_cc_init;
+ }
+ break;
+ case NGTCP2_CC_ALGO_CUBIC:
+ rv = ngtcp2_cc_cubic_cc_init(&(*pconn)->cc, &(*pconn)->log, mem);
+ if (rv != 0) {
+ goto fail_cc_init;
+ }
+ break;
+ case NGTCP2_CC_ALGO_BBR:
+ rv = ngtcp2_cc_bbr_cc_init(&(*pconn)->cc, &(*pconn)->log, &(*pconn)->cstat,
+ &(*pconn)->rst, settings->initial_ts,
+ callbacks->rand, &settings->rand_ctx, mem);
+ if (rv != 0) {
+ goto fail_cc_init;
+ }
+ break;
+ case NGTCP2_CC_ALGO_BBR2:
+ rv = ngtcp2_cc_bbr2_cc_init(&(*pconn)->cc, &(*pconn)->log, &(*pconn)->cstat,
+ &(*pconn)->rst, settings->initial_ts,
+ callbacks->rand, &settings->rand_ctx, mem);
+ if (rv != 0) {
+ goto fail_cc_init;
+ }
+ break;
+ default:
+ ngtcp2_unreachable();
+ }
+
+ rv = pktns_new(&(*pconn)->in_pktns, NGTCP2_PKTNS_ID_INITIAL, &(*pconn)->rst,
+ &(*pconn)->cc, &(*pconn)->log, &(*pconn)->qlog,
+ &(*pconn)->rtb_entry_objalloc, &(*pconn)->frc_objalloc, mem);
+ if (rv != 0) {
+ goto fail_in_pktns_init;
+ }
+
+ rv = pktns_new(&(*pconn)->hs_pktns, NGTCP2_PKTNS_ID_HANDSHAKE, &(*pconn)->rst,
+ &(*pconn)->cc, &(*pconn)->log, &(*pconn)->qlog,
+ &(*pconn)->rtb_entry_objalloc, &(*pconn)->frc_objalloc, mem);
+ if (rv != 0) {
+ goto fail_hs_pktns_init;
+ }
+
+ rv = pktns_init(&(*pconn)->pktns, NGTCP2_PKTNS_ID_APPLICATION, &(*pconn)->rst,
+ &(*pconn)->cc, &(*pconn)->log, &(*pconn)->qlog,
+ &(*pconn)->rtb_entry_objalloc, &(*pconn)->frc_objalloc, mem);
+ if (rv != 0) {
+ goto fail_pktns_init;
+ }
+
+ scident = ngtcp2_mem_malloc(mem, sizeof(*scident));
+ if (scident == NULL) {
+ rv = NGTCP2_ERR_NOMEM;
+ goto fail_scident;
+ }
+
+ /* Set stateless reset token later if it is available in the local
+ transport parameters */
+ ngtcp2_scid_init(scident, 0, scid);
+
+ rv = ngtcp2_ksl_insert(&(*pconn)->scid.set, NULL, &scident->cid, scident);
+ if (rv != 0) {
+ goto fail_scid_set_insert;
+ }
+
+ scident = NULL;
+
+ ngtcp2_dcid_init(&(*pconn)->dcid.current, 0, dcid, NULL);
+ ngtcp2_dcid_set_path(&(*pconn)->dcid.current, path);
+
+ rv = ngtcp2_gaptr_push(&(*pconn)->dcid.seqgap, 0, 1);
+ if (rv != 0) {
+ goto fail_seqgap_push;
+ }
+
+ if (settings->preferred_versionslen) {
+ if (!server && !ngtcp2_is_reserved_version(client_chosen_version)) {
+ for (i = 0; i < settings->preferred_versionslen; ++i) {
+ if (settings->preferred_versions[i] == client_chosen_version) {
+ break;
+ }
+ }
+
+ assert(i < settings->preferred_versionslen);
+ }
+
+ preferred_versions = ngtcp2_mem_malloc(
+ mem, sizeof(uint32_t) * settings->preferred_versionslen);
+ if (preferred_versions == NULL) {
+ rv = NGTCP2_ERR_NOMEM;
+ goto fail_preferred_versions;
+ }
+
+ for (i = 0; i < settings->preferred_versionslen; ++i) {
+ assert(ngtcp2_is_supported_version(settings->preferred_versions[i]));
+
+ preferred_versions[i] = settings->preferred_versions[i];
+ }
+
+ (*pconn)->vneg.preferred_versions = preferred_versions;
+ (*pconn)->vneg.preferred_versionslen = settings->preferred_versionslen;
+ }
+
+ if (settings->available_versionslen) {
+ if (!server && !ngtcp2_is_reserved_version(client_chosen_version)) {
+ for (i = 0; i < settings->available_versionslen; ++i) {
+ if (settings->available_versions[i] == client_chosen_version) {
+ break;
+ }
+ }
+
+ assert(i < settings->available_versionslen);
+ }
+
+ for (i = 0; i < settings->available_versionslen; ++i) {
+ assert(ngtcp2_is_reserved_version(settings->available_versions[i]) ||
+ ngtcp2_is_supported_version(settings->available_versions[i]));
+ }
+
+ rv = available_versions_new(&buf, settings->available_versions,
+ settings->available_versionslen, mem);
+ if (rv != 0) {
+ goto fail_available_versions;
+ }
+
+ (*pconn)->vneg.available_versions = buf;
+ (*pconn)->vneg.available_versionslen =
+ sizeof(uint32_t) * settings->available_versionslen;
+ } else if (server) {
+ if (settings->preferred_versionslen) {
+ rv = available_versions_new(&buf, settings->preferred_versions,
+ settings->preferred_versionslen, mem);
+ if (rv != 0) {
+ goto fail_available_versions;
+ }
+
+ (*pconn)->vneg.available_versions = buf;
+ (*pconn)->vneg.available_versionslen =
+ sizeof(uint32_t) * settings->preferred_versionslen;
+ } else {
+ (*pconn)->vneg.available_versions = server_default_available_versions;
+ (*pconn)->vneg.available_versionslen =
+ sizeof(server_default_available_versions);
+ }
+ } else if (!server && !ngtcp2_is_reserved_version(client_chosen_version)) {
+ rv = available_versions_new(&buf, &client_chosen_version, 1, mem);
+ if (rv != 0) {
+ goto fail_available_versions;
+ }
+
+ (*pconn)->vneg.available_versions = buf;
+ (*pconn)->vneg.available_versionslen = sizeof(uint32_t);
+ }
+
+ (*pconn)->client_chosen_version = client_chosen_version;
+
+ conn_set_local_transport_params(*pconn, params);
+
+ callbacks->rand(&fixed_bit_byte, 1, &settings->rand_ctx);
+ if (fixed_bit_byte & 1) {
+ (*pconn)->flags |= NGTCP2_CONN_FLAG_CLEAR_FIXED_BIT;
+ }
+
+ (*pconn)->keep_alive.last_ts = UINT64_MAX;
+
+ (*pconn)->server = server;
+ (*pconn)->oscid = *scid;
+ (*pconn)->callbacks = *callbacks;
+ (*pconn)->mem = mem;
+ (*pconn)->user_data = user_data;
+ (*pconn)->idle_ts = settings->initial_ts;
+ (*pconn)->crypto.key_update.confirmed_ts = UINT64_MAX;
+ (*pconn)->tx.last_max_data_ts = UINT64_MAX;
+ (*pconn)->tx.pacing.next_ts = UINT64_MAX;
+ (*pconn)->early.discard_started_ts = UINT64_MAX;
+
+ conn_reset_ecn_validation_state(*pconn);
+
+ ngtcp2_qlog_start(&(*pconn)->qlog, server ? &settings->qlog.odcid : dcid,
+ server);
+
+ return 0;
+
+fail_available_versions:
+ ngtcp2_mem_free(mem, (*pconn)->vneg.preferred_versions);
+fail_preferred_versions:
+fail_seqgap_push:
+fail_scid_set_insert:
+ ngtcp2_mem_free(mem, scident);
+fail_scident:
+ pktns_free(&(*pconn)->pktns, mem);
+fail_pktns_init:
+ pktns_del((*pconn)->hs_pktns, mem);
+fail_hs_pktns_init:
+ pktns_del((*pconn)->in_pktns, mem);
+fail_in_pktns_init:
+ cc_del(&(*pconn)->cc, settings->cc_algo, mem);
+fail_cc_init:
+ ngtcp2_mem_free(mem, (uint8_t *)(*pconn)->local.settings.token);
+fail_token:
+ ngtcp2_mem_free(mem, (*pconn)->qlog.buf.begin);
+fail_qlog_buf:
+ ngtcp2_idtr_free(&(*pconn)->remote.uni.idtr);
+ ngtcp2_idtr_free(&(*pconn)->remote.bidi.idtr);
+ ngtcp2_map_free(&(*pconn)->strms);
+ delete_scid(&(*pconn)->scid.set, mem);
+ ngtcp2_ksl_free(&(*pconn)->scid.set);
+ ngtcp2_gaptr_free(&(*pconn)->dcid.seqgap);
+ ngtcp2_objalloc_free(&(*pconn)->strm_objalloc);
+ ngtcp2_objalloc_free(&(*pconn)->rtb_entry_objalloc);
+ ngtcp2_objalloc_free(&(*pconn)->frc_objalloc);
+ ngtcp2_mem_free(mem, *pconn);
+fail_conn:
+ return rv;
+}
+
+int ngtcp2_conn_client_new_versioned(
+ ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid,
+ const ngtcp2_path *path, uint32_t client_chosen_version,
+ int callbacks_version, const ngtcp2_callbacks *callbacks,
+ int settings_version, const ngtcp2_settings *settings,
+ int transport_params_version, const ngtcp2_transport_params *params,
+ const ngtcp2_mem *mem, void *user_data) {
+ int rv;
+
+ rv = conn_new(pconn, dcid, scid, path, client_chosen_version,
+ callbacks_version, callbacks, settings_version, settings,
+ transport_params_version, params, mem, user_data, 0);
+ if (rv != 0) {
+ return rv;
+ }
+ (*pconn)->rcid = *dcid;
+ (*pconn)->state = NGTCP2_CS_CLIENT_INITIAL;
+ (*pconn)->local.bidi.next_stream_id = 0;
+ (*pconn)->local.uni.next_stream_id = 2;
+
+ rv = ngtcp2_conn_commit_local_transport_params(*pconn);
+ if (rv != 0) {
+ ngtcp2_conn_del(*pconn);
+ return rv;
+ }
+
+ return 0;
+}
+
+int ngtcp2_conn_server_new_versioned(
+ ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid,
+ const ngtcp2_path *path, uint32_t client_chosen_version,
+ int callbacks_version, const ngtcp2_callbacks *callbacks,
+ int settings_version, const ngtcp2_settings *settings,
+ int transport_params_version, const ngtcp2_transport_params *params,
+ const ngtcp2_mem *mem, void *user_data) {
+ int rv;
+
+ rv = conn_new(pconn, dcid, scid, path, client_chosen_version,
+ callbacks_version, callbacks, settings_version, settings,
+ transport_params_version, params, mem, user_data, 1);
+ if (rv != 0) {
+ return rv;
+ }
+
+ (*pconn)->state = NGTCP2_CS_SERVER_INITIAL;
+ (*pconn)->local.bidi.next_stream_id = 1;
+ (*pconn)->local.uni.next_stream_id = 3;
+
+ if ((*pconn)->local.settings.tokenlen) {
+ /* Usage of token lifts amplification limit */
+ (*pconn)->dcid.current.flags |= NGTCP2_DCID_FLAG_PATH_VALIDATED;
+ }
+
+ return 0;
+}
+
+/*
+ * conn_fc_credits returns the number of bytes allowed to be sent to
+ * the given stream. Both connection and stream level flow control
+ * credits are considered.
+ */
+static uint64_t conn_fc_credits(ngtcp2_conn *conn, ngtcp2_strm *strm) {
+ return ngtcp2_min(strm->tx.max_offset - strm->tx.offset,
+ conn->tx.max_offset - conn->tx.offset);
+}
+
+/*
+ * conn_enforce_flow_control returns the number of bytes allowed to be
+ * sent to the given stream. |len| might be shorted because of
+ * available flow control credits.
+ */
+static uint64_t conn_enforce_flow_control(ngtcp2_conn *conn, ngtcp2_strm *strm,
+ uint64_t len) {
+ uint64_t fc_credits = conn_fc_credits(conn, strm);
+ return ngtcp2_min(len, fc_credits);
+}
+
+static int delete_strms_each(void *data, void *ptr) {
+ ngtcp2_conn *conn = ptr;
+ ngtcp2_strm *s = data;
+
+ ngtcp2_strm_free(s);
+ ngtcp2_objalloc_strm_release(&conn->strm_objalloc, s);
+
+ return 0;
+}
+
+static void conn_vneg_crypto_free(ngtcp2_conn *conn) {
+ if (conn->vneg.rx.ckm) {
+ conn_call_delete_crypto_aead_ctx(conn, &conn->vneg.rx.ckm->aead_ctx);
+ }
+ conn_call_delete_crypto_cipher_ctx(conn, &conn->vneg.rx.hp_ctx);
+
+ if (conn->vneg.tx.ckm) {
+ conn_call_delete_crypto_aead_ctx(conn, &conn->vneg.tx.ckm->aead_ctx);
+ }
+ conn_call_delete_crypto_cipher_ctx(conn, &conn->vneg.tx.hp_ctx);
+
+ ngtcp2_crypto_km_del(conn->vneg.rx.ckm, conn->mem);
+ ngtcp2_crypto_km_del(conn->vneg.tx.ckm, conn->mem);
+}
+
+void ngtcp2_conn_del(ngtcp2_conn *conn) {
+ if (conn == NULL) {
+ return;
+ }
+
+ ngtcp2_qlog_end(&conn->qlog);
+
+ if (conn->early.ckm) {
+ conn_call_delete_crypto_aead_ctx(conn, &conn->early.ckm->aead_ctx);
+ }
+ conn_call_delete_crypto_cipher_ctx(conn, &conn->early.hp_ctx);
+
+ if (conn->crypto.key_update.old_rx_ckm) {
+ conn_call_delete_crypto_aead_ctx(
+ conn, &conn->crypto.key_update.old_rx_ckm->aead_ctx);
+ }
+ if (conn->crypto.key_update.new_rx_ckm) {
+ conn_call_delete_crypto_aead_ctx(
+ conn, &conn->crypto.key_update.new_rx_ckm->aead_ctx);
+ }
+ if (conn->crypto.key_update.new_tx_ckm) {
+ conn_call_delete_crypto_aead_ctx(
+ conn, &conn->crypto.key_update.new_tx_ckm->aead_ctx);
+ }
+
+ if (conn->pktns.crypto.rx.ckm) {
+ conn_call_delete_crypto_aead_ctx(conn,
+ &conn->pktns.crypto.rx.ckm->aead_ctx);
+ }
+ conn_call_delete_crypto_cipher_ctx(conn, &conn->pktns.crypto.rx.hp_ctx);
+
+ if (conn->pktns.crypto.tx.ckm) {
+ conn_call_delete_crypto_aead_ctx(conn,
+ &conn->pktns.crypto.tx.ckm->aead_ctx);
+ }
+ conn_call_delete_crypto_cipher_ctx(conn, &conn->pktns.crypto.tx.hp_ctx);
+
+ if (conn->hs_pktns) {
+ if (conn->hs_pktns->crypto.rx.ckm) {
+ conn_call_delete_crypto_aead_ctx(
+ conn, &conn->hs_pktns->crypto.rx.ckm->aead_ctx);
+ }
+ conn_call_delete_crypto_cipher_ctx(conn, &conn->hs_pktns->crypto.rx.hp_ctx);
+
+ if (conn->hs_pktns->crypto.tx.ckm) {
+ conn_call_delete_crypto_aead_ctx(
+ conn, &conn->hs_pktns->crypto.tx.ckm->aead_ctx);
+ }
+ conn_call_delete_crypto_cipher_ctx(conn, &conn->hs_pktns->crypto.tx.hp_ctx);
+ }
+ if (conn->in_pktns) {
+ if (conn->in_pktns->crypto.rx.ckm) {
+ conn_call_delete_crypto_aead_ctx(
+ conn, &conn->in_pktns->crypto.rx.ckm->aead_ctx);
+ }
+ conn_call_delete_crypto_cipher_ctx(conn, &conn->in_pktns->crypto.rx.hp_ctx);
+
+ if (conn->in_pktns->crypto.tx.ckm) {
+ conn_call_delete_crypto_aead_ctx(
+ conn, &conn->in_pktns->crypto.tx.ckm->aead_ctx);
+ }
+ conn_call_delete_crypto_cipher_ctx(conn, &conn->in_pktns->crypto.tx.hp_ctx);
+ }
+
+ conn_call_delete_crypto_aead_ctx(conn, &conn->crypto.retry_aead_ctx);
+
+ ngtcp2_transport_params_del(conn->remote.transport_params, conn->mem);
+ ngtcp2_transport_params_del(conn->remote.pending_transport_params, conn->mem);
+
+ conn_vneg_crypto_free(conn);
+
+ ngtcp2_mem_free(conn->mem, conn->vneg.preferred_versions);
+ if (conn->vneg.available_versions != server_default_available_versions) {
+ ngtcp2_mem_free(conn->mem, conn->vneg.available_versions);
+ }
+
+ ngtcp2_mem_free(conn->mem, conn->crypto.decrypt_buf.base);
+ ngtcp2_mem_free(conn->mem, conn->crypto.decrypt_hp_buf.base);
+ ngtcp2_mem_free(conn->mem, (uint8_t *)conn->local.settings.token);
+
+ ngtcp2_crypto_km_del(conn->crypto.key_update.old_rx_ckm, conn->mem);
+ ngtcp2_crypto_km_del(conn->crypto.key_update.new_rx_ckm, conn->mem);
+ ngtcp2_crypto_km_del(conn->crypto.key_update.new_tx_ckm, conn->mem);
+ ngtcp2_crypto_km_del(conn->early.ckm, conn->mem);
+
+ pktns_free(&conn->pktns, conn->mem);
+ pktns_del(conn->hs_pktns, conn->mem);
+ pktns_del(conn->in_pktns, conn->mem);
+
+ cc_del(&conn->cc, conn->cc_algo, conn->mem);
+
+ ngtcp2_mem_free(conn->mem, conn->qlog.buf.begin);
+
+ ngtcp2_pmtud_del(conn->pmtud);
+ ngtcp2_pv_del(conn->pv);
+
+ ngtcp2_mem_free(conn->mem, (uint8_t *)conn->rx.ccerr.reason);
+
+ ngtcp2_idtr_free(&conn->remote.uni.idtr);
+ ngtcp2_idtr_free(&conn->remote.bidi.idtr);
+ ngtcp2_mem_free(conn->mem, conn->tx.ack);
+ ngtcp2_pq_free(&conn->tx.strmq);
+ ngtcp2_map_each_free(&conn->strms, delete_strms_each, (void *)conn);
+ ngtcp2_map_free(&conn->strms);
+
+ ngtcp2_pq_free(&conn->scid.used);
+ delete_scid(&conn->scid.set, conn->mem);
+ ngtcp2_ksl_free(&conn->scid.set);
+ ngtcp2_gaptr_free(&conn->dcid.seqgap);
+
+ ngtcp2_objalloc_free(&conn->strm_objalloc);
+ ngtcp2_objalloc_free(&conn->rtb_entry_objalloc);
+ ngtcp2_objalloc_free(&conn->frc_objalloc);
+
+ ngtcp2_mem_free(conn->mem, conn);
+}
+
+/*
+ * conn_ensure_ack_ranges makes sure that conn->tx.ack->ack.ranges can
+ * contain at least |n| additional ngtcp2_ack_range.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int conn_ensure_ack_ranges(ngtcp2_conn *conn, size_t n) {
+ ngtcp2_frame *fr;
+ size_t max = conn->tx.max_ack_ranges;
+
+ if (n <= max) {
+ return 0;
+ }
+
+ max *= 2;
+
+ assert(max >= n);
+
+ fr = ngtcp2_mem_realloc(conn->mem, conn->tx.ack,
+ sizeof(ngtcp2_ack) + sizeof(ngtcp2_ack_range) * max);
+ if (fr == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ conn->tx.ack = fr;
+ conn->tx.max_ack_ranges = max;
+
+ return 0;
+}
+
+/*
+ * conn_compute_ack_delay computes ACK delay for outgoing protected
+ * ACK.
+ */
+static ngtcp2_duration conn_compute_ack_delay(ngtcp2_conn *conn) {
+ return ngtcp2_min(conn->local.transport_params.max_ack_delay,
+ conn->cstat.smoothed_rtt / 8);
+}
+
+/*
+ * conn_create_ack_frame creates ACK frame, and assigns its pointer to
+ * |*pfr| if there are any received packets to acknowledge. If there
+ * are no packets to acknowledge, this function returns 0, and |*pfr|
+ * is untouched. The caller is advised to set |*pfr| to NULL before
+ * calling this function, and check it after this function returns.
+ * If |nodelay| is nonzero, delayed ACK timer is ignored.
+ *
+ * The memory for ACK frame is dynamically allocated by this function.
+ * A caller is responsible to free it.
+ *
+ * Call ngtcp2_acktr_commit_ack after a created ACK frame is
+ * successfully serialized into a packet.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int conn_create_ack_frame(ngtcp2_conn *conn, ngtcp2_frame **pfr,
+ ngtcp2_pktns *pktns, uint8_t type,
+ ngtcp2_tstamp ts, ngtcp2_duration ack_delay,
+ uint64_t ack_delay_exponent) {
+ /* TODO Measure an actual size of ACK blocks to find the best
+ default value. */
+ const size_t initial_max_ack_ranges = 8;
+ int64_t last_pkt_num;
+ ngtcp2_acktr *acktr = &pktns->acktr;
+ ngtcp2_ack_range *range;
+ ngtcp2_ksl_it it;
+ ngtcp2_acktr_entry *rpkt;
+ ngtcp2_ack *ack;
+ size_t range_idx;
+ ngtcp2_tstamp largest_ack_ts;
+ int rv;
+
+ if (acktr->flags & NGTCP2_ACKTR_FLAG_IMMEDIATE_ACK) {
+ ack_delay = 0;
+ }
+
+ if (!ngtcp2_acktr_require_active_ack(acktr, ack_delay, ts)) {
+ return 0;
+ }
+
+ it = ngtcp2_acktr_get(acktr);
+ if (ngtcp2_ksl_it_end(&it)) {
+ ngtcp2_acktr_commit_ack(acktr);
+ return 0;
+ }
+
+ if (conn->tx.ack == NULL) {
+ conn->tx.ack = ngtcp2_mem_malloc(
+ conn->mem,
+ sizeof(ngtcp2_ack) + sizeof(ngtcp2_ack_range) * initial_max_ack_ranges);
+ if (conn->tx.ack == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+ conn->tx.max_ack_ranges = initial_max_ack_ranges;
+ }
+
+ ack = &conn->tx.ack->ack;
+
+ if (pktns->rx.ecn.ect0 || pktns->rx.ecn.ect1 || pktns->rx.ecn.ce) {
+ ack->type = NGTCP2_FRAME_ACK_ECN;
+ ack->ecn.ect0 = pktns->rx.ecn.ect0;
+ ack->ecn.ect1 = pktns->rx.ecn.ect1;
+ ack->ecn.ce = pktns->rx.ecn.ce;
+ } else {
+ ack->type = NGTCP2_FRAME_ACK;
+ }
+ ack->rangecnt = 0;
+
+ rpkt = ngtcp2_ksl_it_get(&it);
+
+ if (rpkt->pkt_num == pktns->rx.max_pkt_num) {
+ last_pkt_num = rpkt->pkt_num - (int64_t)(rpkt->len - 1);
+ largest_ack_ts = rpkt->tstamp;
+ ack->largest_ack = rpkt->pkt_num;
+ ack->first_ack_range = rpkt->len - 1;
+
+ ngtcp2_ksl_it_next(&it);
+ } else {
+ assert(rpkt->pkt_num < pktns->rx.max_pkt_num);
+
+ last_pkt_num = pktns->rx.max_pkt_num;
+ largest_ack_ts = pktns->rx.max_pkt_ts;
+ ack->largest_ack = pktns->rx.max_pkt_num;
+ ack->first_ack_range = 0;
+ }
+
+ if (type == NGTCP2_PKT_1RTT) {
+ ack->ack_delay_unscaled = ts - largest_ack_ts;
+ ack->ack_delay = ack->ack_delay_unscaled / NGTCP2_MICROSECONDS /
+ (1ULL << ack_delay_exponent);
+ } else {
+ ack->ack_delay_unscaled = 0;
+ ack->ack_delay = 0;
+ }
+
+ for (; !ngtcp2_ksl_it_end(&it); ngtcp2_ksl_it_next(&it)) {
+ if (ack->rangecnt == NGTCP2_MAX_ACK_RANGES) {
+ break;
+ }
+
+ rpkt = ngtcp2_ksl_it_get(&it);
+
+ range_idx = ack->rangecnt++;
+ rv = conn_ensure_ack_ranges(conn, ack->rangecnt);
+ if (rv != 0) {
+ return rv;
+ }
+ ack = &conn->tx.ack->ack;
+ range = &ack->ranges[range_idx];
+ range->gap = (uint64_t)(last_pkt_num - rpkt->pkt_num - 2);
+ range->len = rpkt->len - 1;
+
+ last_pkt_num = rpkt->pkt_num - (int64_t)(rpkt->len - 1);
+ }
+
+ /* TODO Just remove entries which cannot fit into a single ACK frame
+ for now. */
+ if (!ngtcp2_ksl_it_end(&it)) {
+ ngtcp2_acktr_forget(acktr, ngtcp2_ksl_it_get(&it));
+ }
+
+ *pfr = conn->tx.ack;
+
+ return 0;
+}
+
+/*
+ * conn_ppe_write_frame writes |fr| to |ppe|. If |hd_logged| is not
+ * NULL and |*hd_logged| is zero, packet header is logged, and 1 is
+ * assigned to |*hd_logged|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer is too small.
+ */
+static int conn_ppe_write_frame_hd_log(ngtcp2_conn *conn, ngtcp2_ppe *ppe,
+ int *hd_logged, const ngtcp2_pkt_hd *hd,
+ ngtcp2_frame *fr) {
+ int rv;
+
+ rv = ngtcp2_ppe_encode_frame(ppe, fr);
+ if (rv != 0) {
+ assert(NGTCP2_ERR_NOBUF == rv);
+ return rv;
+ }
+
+ if (hd_logged && !*hd_logged) {
+ *hd_logged = 1;
+ ngtcp2_log_tx_pkt_hd(&conn->log, hd);
+ ngtcp2_qlog_pkt_sent_start(&conn->qlog);
+ }
+
+ ngtcp2_log_tx_fr(&conn->log, hd, fr);
+ ngtcp2_qlog_write_frame(&conn->qlog, fr);
+
+ return 0;
+}
+
+/*
+ * conn_ppe_write_frame writes |fr| to |ppe|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer is too small.
+ */
+static int conn_ppe_write_frame(ngtcp2_conn *conn, ngtcp2_ppe *ppe,
+ const ngtcp2_pkt_hd *hd, ngtcp2_frame *fr) {
+ return conn_ppe_write_frame_hd_log(conn, ppe, NULL, hd, fr);
+}
+
+/*
+ * conn_on_pkt_sent is called when new non-ACK-only packet is sent.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+static int conn_on_pkt_sent(ngtcp2_conn *conn, ngtcp2_rtb *rtb,
+ ngtcp2_rtb_entry *ent) {
+ int rv;
+
+ /* This function implements OnPacketSent, but it handles only
+ non-ACK-only packet. */
+ rv = ngtcp2_rtb_add(rtb, ent, &conn->cstat);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) {
+ conn->cstat.last_tx_pkt_ts[rtb->pktns_id] = ent->ts;
+ }
+
+ ngtcp2_conn_set_loss_detection_timer(conn, ent->ts);
+
+ return 0;
+}
+
+/*
+ * pktns_select_pkt_numlen selects shortest packet number encoding for
+ * the next packet number based on the largest acknowledged packet
+ * number. It returns the number of bytes to encode the packet
+ * number.
+ */
+static size_t pktns_select_pkt_numlen(ngtcp2_pktns *pktns) {
+ int64_t pkt_num = pktns->tx.last_pkt_num + 1;
+ ngtcp2_rtb *rtb = &pktns->rtb;
+ int64_t n = pkt_num - rtb->largest_acked_tx_pkt_num;
+
+ if (NGTCP2_MAX_PKT_NUM / 2 < n) {
+ return 4;
+ }
+
+ n = n * 2 - 1;
+
+ if (n > 0xffffff) {
+ return 4;
+ }
+ if (n > 0xffff) {
+ return 3;
+ }
+ if (n > 0xff) {
+ return 2;
+ }
+ return 1;
+}
+
+/*
+ * conn_get_cwnd returns cwnd for the current path.
+ */
+static uint64_t conn_get_cwnd(ngtcp2_conn *conn) {
+ return conn->pv && (conn->pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE)
+ ? ngtcp2_cc_compute_initcwnd(conn->cstat.max_tx_udp_payload_size)
+ : conn->cstat.cwnd;
+}
+
+/*
+ * conn_cwnd_is_zero returns nonzero if the number of bytes the local
+ * endpoint can sent at this time is zero.
+ */
+static int conn_cwnd_is_zero(ngtcp2_conn *conn) {
+ uint64_t bytes_in_flight = conn->cstat.bytes_in_flight;
+ uint64_t cwnd = conn_get_cwnd(conn);
+
+ if (bytes_in_flight >= cwnd) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_RCV,
+ "cwnd limited bytes_in_flight=%lu cwnd=%lu",
+ bytes_in_flight, cwnd);
+ }
+
+ return bytes_in_flight >= cwnd;
+}
+
+/*
+ * conn_retry_early_payloadlen returns the estimated wire length of
+ * the first STREAM frame of 0-RTT packet which should be
+ * retransmitted due to Retry packet.
+ */
+static uint64_t conn_retry_early_payloadlen(ngtcp2_conn *conn) {
+ ngtcp2_frame_chain *frc;
+ ngtcp2_strm *strm;
+ uint64_t len;
+
+ if (conn->flags & NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED) {
+ return 0;
+ }
+
+ for (; !ngtcp2_pq_empty(&conn->tx.strmq);) {
+ strm = ngtcp2_conn_tx_strmq_top(conn);
+ if (ngtcp2_strm_streamfrq_empty(strm)) {
+ ngtcp2_conn_tx_strmq_pop(conn);
+ continue;
+ }
+
+ frc = ngtcp2_strm_streamfrq_top(strm);
+
+ len = ngtcp2_vec_len(frc->fr.stream.data, frc->fr.stream.datacnt) +
+ NGTCP2_STREAM_OVERHEAD;
+
+ /* Take the min because in conn_should_pad_pkt we take max in
+ order to deal with unbreakable DATAGRAM. */
+ return ngtcp2_min(len, NGTCP2_MIN_COALESCED_PAYLOADLEN);
+ }
+
+ return 0;
+}
+
+static void conn_cryptofrq_clear(ngtcp2_conn *conn, ngtcp2_pktns *pktns) {
+ ngtcp2_frame_chain *frc;
+ ngtcp2_ksl_it it;
+
+ for (it = ngtcp2_ksl_begin(&pktns->crypto.tx.frq); !ngtcp2_ksl_it_end(&it);
+ ngtcp2_ksl_it_next(&it)) {
+ frc = ngtcp2_ksl_it_get(&it);
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ }
+ ngtcp2_ksl_clear(&pktns->crypto.tx.frq);
+}
+
+/*
+ * conn_cryptofrq_unacked_offset returns the CRYPTO frame offset by
+ * taking into account acknowledged offset. If there is no data to
+ * send, this function returns (uint64_t)-1.
+ */
+static uint64_t conn_cryptofrq_unacked_offset(ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns) {
+ ngtcp2_frame_chain *frc;
+ ngtcp2_crypto *fr;
+ ngtcp2_range gap;
+ ngtcp2_rtb *rtb = &pktns->rtb;
+ ngtcp2_ksl_it it;
+ uint64_t datalen;
+
+ (void)conn;
+
+ for (it = ngtcp2_ksl_begin(&pktns->crypto.tx.frq); !ngtcp2_ksl_it_end(&it);
+ ngtcp2_ksl_it_next(&it)) {
+ frc = ngtcp2_ksl_it_get(&it);
+ fr = &frc->fr.crypto;
+
+ gap = ngtcp2_strm_get_unacked_range_after(rtb->crypto, fr->offset);
+
+ datalen = ngtcp2_vec_len(fr->data, fr->datacnt);
+
+ if (gap.begin <= fr->offset) {
+ return fr->offset;
+ }
+ if (gap.begin < fr->offset + datalen) {
+ return gap.begin;
+ }
+ }
+
+ return (uint64_t)-1;
+}
+
+static int conn_cryptofrq_unacked_pop(ngtcp2_conn *conn, ngtcp2_pktns *pktns,
+ ngtcp2_frame_chain **pfrc) {
+ ngtcp2_frame_chain *frc, *nfrc;
+ ngtcp2_crypto *fr, *nfr;
+ uint64_t offset, end_offset;
+ size_t idx, end_idx;
+ uint64_t base_offset, end_base_offset;
+ ngtcp2_range gap;
+ ngtcp2_rtb *rtb = &pktns->rtb;
+ ngtcp2_vec *v;
+ int rv;
+ ngtcp2_ksl_it it;
+
+ *pfrc = NULL;
+
+ for (it = ngtcp2_ksl_begin(&pktns->crypto.tx.frq); !ngtcp2_ksl_it_end(&it);) {
+ frc = ngtcp2_ksl_it_get(&it);
+ fr = &frc->fr.crypto;
+
+ ngtcp2_ksl_remove_hint(&pktns->crypto.tx.frq, &it, &it, &fr->offset);
+
+ idx = 0;
+ offset = fr->offset;
+ base_offset = 0;
+
+ gap = ngtcp2_strm_get_unacked_range_after(rtb->crypto, offset);
+ if (gap.begin < offset) {
+ gap.begin = offset;
+ }
+
+ for (; idx < fr->datacnt && offset < gap.begin; ++idx) {
+ v = &fr->data[idx];
+ if (offset + v->len > gap.begin) {
+ base_offset = gap.begin - offset;
+ break;
+ }
+
+ offset += v->len;
+ }
+
+ if (idx == fr->datacnt) {
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ continue;
+ }
+
+ assert(gap.begin == offset + base_offset);
+
+ end_idx = idx;
+ end_offset = offset;
+ end_base_offset = 0;
+
+ for (; end_idx < fr->datacnt; ++end_idx) {
+ v = &fr->data[end_idx];
+ if (end_offset + v->len > gap.end) {
+ end_base_offset = gap.end - end_offset;
+ break;
+ }
+
+ end_offset += v->len;
+ }
+
+ if (fr->offset == offset && base_offset == 0 && fr->datacnt == end_idx) {
+ *pfrc = frc;
+ return 0;
+ }
+
+ if (fr->datacnt == end_idx) {
+ memmove(fr->data, fr->data + idx, sizeof(fr->data[0]) * (end_idx - idx));
+
+ assert(fr->data[0].len > base_offset);
+
+ fr->offset = offset + base_offset;
+ fr->datacnt = end_idx - idx;
+ fr->data[0].base += base_offset;
+ fr->data[0].len -= (size_t)base_offset;
+
+ *pfrc = frc;
+ return 0;
+ }
+
+ rv = ngtcp2_frame_chain_crypto_datacnt_objalloc_new(
+ &nfrc, fr->datacnt - end_idx, &conn->frc_objalloc, conn->mem);
+ if (rv != 0) {
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+
+ nfr = &nfrc->fr.crypto;
+ nfr->type = NGTCP2_FRAME_CRYPTO;
+ memcpy(nfr->data, fr->data + end_idx,
+ sizeof(nfr->data[0]) * (fr->datacnt - end_idx));
+
+ assert(nfr->data[0].len > end_base_offset);
+
+ nfr->offset = end_offset + end_base_offset;
+ nfr->datacnt = fr->datacnt - end_idx;
+ nfr->data[0].base += end_base_offset;
+ nfr->data[0].len -= (size_t)end_base_offset;
+
+ rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL, &nfr->offset, nfrc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(nfrc, &conn->frc_objalloc, conn->mem);
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+
+ if (end_base_offset) {
+ ++end_idx;
+ }
+
+ memmove(fr->data, fr->data + idx, sizeof(fr->data[0]) * (end_idx - idx));
+
+ assert(fr->data[0].len > base_offset);
+
+ fr->offset = offset + base_offset;
+ fr->datacnt = end_idx - idx;
+ if (end_base_offset) {
+ assert(fr->data[fr->datacnt - 1].len > end_base_offset);
+ fr->data[fr->datacnt - 1].len = (size_t)end_base_offset;
+ }
+ fr->data[0].base += base_offset;
+ fr->data[0].len -= (size_t)base_offset;
+
+ *pfrc = frc;
+ return 0;
+ }
+
+ return 0;
+}
+static int conn_cryptofrq_pop(ngtcp2_conn *conn, ngtcp2_frame_chain **pfrc,
+ ngtcp2_pktns *pktns, size_t left) {
+ ngtcp2_crypto *fr, *nfr;
+ ngtcp2_frame_chain *frc, *nfrc;
+ int rv;
+ size_t nmerged;
+ uint64_t datalen;
+ ngtcp2_vec a[NGTCP2_MAX_CRYPTO_DATACNT];
+ ngtcp2_vec b[NGTCP2_MAX_CRYPTO_DATACNT];
+ size_t acnt, bcnt;
+ ngtcp2_ksl_it it;
+
+ rv = conn_cryptofrq_unacked_pop(conn, pktns, &frc);
+ if (rv != 0) {
+ return rv;
+ }
+ if (frc == NULL) {
+ *pfrc = NULL;
+ return 0;
+ }
+
+ fr = &frc->fr.crypto;
+ datalen = ngtcp2_vec_len(fr->data, fr->datacnt);
+
+ if (datalen > left) {
+ ngtcp2_vec_copy(a, fr->data, fr->datacnt);
+ acnt = fr->datacnt;
+
+ bcnt = 0;
+ ngtcp2_vec_split(a, &acnt, b, &bcnt, left, NGTCP2_MAX_CRYPTO_DATACNT);
+
+ assert(acnt > 0);
+ assert(bcnt > 0);
+
+ rv = ngtcp2_frame_chain_crypto_datacnt_objalloc_new(
+ &nfrc, bcnt, &conn->frc_objalloc, conn->mem);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+
+ nfr = &nfrc->fr.crypto;
+ nfr->type = NGTCP2_FRAME_CRYPTO;
+ nfr->offset = fr->offset + left;
+ nfr->datacnt = bcnt;
+ ngtcp2_vec_copy(nfr->data, b, bcnt);
+
+ rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL, &nfr->offset, nfrc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(nfrc, &conn->frc_objalloc, conn->mem);
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+
+ rv = ngtcp2_frame_chain_crypto_datacnt_objalloc_new(
+ &nfrc, acnt, &conn->frc_objalloc, conn->mem);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+
+ nfr = &nfrc->fr.crypto;
+ *nfr = *fr;
+ nfr->datacnt = acnt;
+ ngtcp2_vec_copy(nfr->data, a, acnt);
+
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+
+ *pfrc = nfrc;
+
+ return 0;
+ }
+
+ left -= (size_t)datalen;
+
+ ngtcp2_vec_copy(a, fr->data, fr->datacnt);
+ acnt = fr->datacnt;
+
+ for (; left && ngtcp2_ksl_len(&pktns->crypto.tx.frq);) {
+ it = ngtcp2_ksl_begin(&pktns->crypto.tx.frq);
+ nfrc = ngtcp2_ksl_it_get(&it);
+ nfr = &nfrc->fr.crypto;
+
+ if (nfr->offset != fr->offset + datalen) {
+ assert(fr->offset + datalen < nfr->offset);
+ break;
+ }
+
+ rv = conn_cryptofrq_unacked_pop(conn, pktns, &nfrc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+ if (nfrc == NULL) {
+ break;
+ }
+
+ nfr = &nfrc->fr.crypto;
+
+ nmerged = ngtcp2_vec_merge(a, &acnt, nfr->data, &nfr->datacnt, left,
+ NGTCP2_MAX_CRYPTO_DATACNT);
+ if (nmerged == 0) {
+ rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL, &nfr->offset, nfrc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(nfrc, &conn->frc_objalloc, conn->mem);
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+ break;
+ }
+
+ datalen += nmerged;
+ left -= nmerged;
+
+ if (nfr->datacnt == 0) {
+ ngtcp2_frame_chain_objalloc_del(nfrc, &conn->frc_objalloc, conn->mem);
+ continue;
+ }
+
+ nfr->offset += nmerged;
+
+ rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL, &nfr->offset, nfrc);
+ if (rv != 0) {
+ ngtcp2_frame_chain_objalloc_del(nfrc, &conn->frc_objalloc, conn->mem);
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+
+ break;
+ }
+
+ if (acnt == fr->datacnt) {
+ assert(acnt > 0);
+ fr->data[acnt - 1] = a[acnt - 1];
+
+ *pfrc = frc;
+ return 0;
+ }
+
+ assert(acnt > fr->datacnt);
+
+ rv = ngtcp2_frame_chain_crypto_datacnt_objalloc_new(
+ &nfrc, acnt, &conn->frc_objalloc, conn->mem);
+ if (rv != 0) {
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+
+ nfr = &nfrc->fr.crypto;
+ *nfr = *fr;
+ nfr->datacnt = acnt;
+ ngtcp2_vec_copy(nfr->data, a, acnt);
+
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+
+ *pfrc = nfrc;
+
+ return 0;
+}
+
+/*
+ * conn_verify_dcid verifies that destination connection ID in |hd| is
+ * valid for the connection. If it is successfully verified and the
+ * remote endpoint uses new DCID in the packet, nonzero value is
+ * assigned to |*pnew_cid_used| if it is not NULL. Otherwise 0 is
+ * assigned to it.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * |dcid| is not known to the local endpoint.
+ */
+static int conn_verify_dcid(ngtcp2_conn *conn, int *pnew_cid_used,
+ const ngtcp2_pkt_hd *hd) {
+ ngtcp2_ksl_it it;
+ ngtcp2_scid *scid;
+ int rv;
+
+ it = ngtcp2_ksl_lower_bound(&conn->scid.set, &hd->dcid);
+ if (ngtcp2_ksl_it_end(&it)) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ scid = ngtcp2_ksl_it_get(&it);
+ if (!ngtcp2_cid_eq(&scid->cid, &hd->dcid)) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ if (!(scid->flags & NGTCP2_SCID_FLAG_USED)) {
+ scid->flags |= NGTCP2_SCID_FLAG_USED;
+
+ if (scid->pe.index == NGTCP2_PQ_BAD_INDEX) {
+ rv = ngtcp2_pq_push(&conn->scid.used, &scid->pe);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ if (pnew_cid_used) {
+ *pnew_cid_used = 1;
+ }
+ } else if (pnew_cid_used) {
+ *pnew_cid_used = 0;
+ }
+
+ return 0;
+}
+
+/*
+ * conn_should_pad_pkt returns nonzero if the packet should be padded.
+ * |type| is the type of packet. |left| is the space left in packet
+ * buffer. |write_datalen| is the number of bytes which will be sent
+ * in the next, coalesced 0-RTT or 1RTT packet.
+ */
+static int conn_should_pad_pkt(ngtcp2_conn *conn, uint8_t type, size_t left,
+ uint64_t write_datalen, int ack_eliciting,
+ int require_padding) {
+ uint64_t min_payloadlen;
+
+ if (type == NGTCP2_PKT_INITIAL) {
+ if (conn->server) {
+ if (!ack_eliciting) {
+ return 0;
+ }
+
+ if (conn->hs_pktns->crypto.tx.ckm &&
+ (conn->hs_pktns->rtb.probe_pkt_left ||
+ ngtcp2_ksl_len(&conn->hs_pktns->crypto.tx.frq) ||
+ !ngtcp2_acktr_empty(&conn->hs_pktns->acktr))) {
+ /* If we have something to send in Handshake packet, then add
+ PADDING in Handshake packet. */
+ min_payloadlen = NGTCP2_MIN_COALESCED_PAYLOADLEN;
+ } else {
+ return 1;
+ }
+ } else {
+ if (conn->hs_pktns->crypto.tx.ckm &&
+ (conn->hs_pktns->rtb.probe_pkt_left ||
+ ngtcp2_ksl_len(&conn->hs_pktns->crypto.tx.frq) ||
+ !ngtcp2_acktr_empty(&conn->hs_pktns->acktr))) {
+ /* If we have something to send in Handshake packet, then add
+ PADDING in Handshake packet. */
+ min_payloadlen = NGTCP2_MIN_COALESCED_PAYLOADLEN;
+ } else if ((!conn->early.ckm && !conn->pktns.crypto.tx.ckm) ||
+ write_datalen == 0) {
+ return 1;
+ } else {
+ /* If we have something to send in 0RTT or 1RTT packet, then
+ add PADDING in that packet. Take maximum in case that
+ write_datalen includes DATAGRAM which cannot be split. */
+ min_payloadlen =
+ ngtcp2_max(write_datalen, NGTCP2_MIN_COALESCED_PAYLOADLEN);
+ }
+ }
+ } else {
+ assert(type == NGTCP2_PKT_HANDSHAKE);
+
+ if (!require_padding) {
+ return 0;
+ }
+
+ if (!conn->pktns.crypto.tx.ckm || write_datalen == 0) {
+ return 1;
+ }
+
+ min_payloadlen = ngtcp2_max(write_datalen, NGTCP2_MIN_COALESCED_PAYLOADLEN);
+ }
+
+ /* TODO the next packet type should be taken into account */
+ return left <
+ /* TODO Assuming that pkt_num is encoded in 1 byte. */
+ NGTCP2_MIN_LONG_HEADERLEN + conn->dcid.current.cid.datalen +
+ conn->oscid.datalen + NGTCP2_PKT_LENGTHLEN - 1 + min_payloadlen +
+ NGTCP2_MAX_AEAD_OVERHEAD;
+}
+
+static void conn_restart_timer_on_write(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ conn->idle_ts = ts;
+ conn->flags &= (uint32_t)~NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE;
+}
+
+static void conn_restart_timer_on_read(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ conn->idle_ts = ts;
+ conn->flags |= NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE;
+}
+
+/*
+ * conn_keep_alive_enabled returns nonzero if keep-alive is enabled.
+ */
+static int conn_keep_alive_enabled(ngtcp2_conn *conn) {
+ return conn->keep_alive.last_ts != UINT64_MAX && conn->keep_alive.timeout;
+}
+
+/*
+ * conn_keep_alive_expired returns nonzero if keep-alive timer has
+ * expired.
+ */
+static int conn_keep_alive_expired(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ return conn_keep_alive_enabled(conn) &&
+ conn->keep_alive.last_ts + conn->keep_alive.timeout <= ts;
+}
+
+/*
+ * conn_keep_alive_expiry returns the expiry time of keep-alive timer.
+ */
+static ngtcp2_tstamp conn_keep_alive_expiry(ngtcp2_conn *conn) {
+ if ((conn->flags & NGTCP2_CONN_FLAG_KEEP_ALIVE_CANCELLED) ||
+ !conn_keep_alive_enabled(conn)) {
+ return UINT64_MAX;
+ }
+
+ return conn->keep_alive.last_ts + conn->keep_alive.timeout;
+}
+
+/*
+ * conn_cancel_expired_keep_alive_timer cancels the expired keep-alive
+ * timer.
+ */
+static void conn_cancel_expired_keep_alive_timer(ngtcp2_conn *conn,
+ ngtcp2_tstamp ts) {
+ if (!(conn->flags & NGTCP2_CONN_FLAG_KEEP_ALIVE_CANCELLED) &&
+ conn_keep_alive_expired(conn, ts)) {
+ conn->flags |= NGTCP2_CONN_FLAG_KEEP_ALIVE_CANCELLED;
+ }
+}
+
+/*
+ * conn_update_keep_alive_last_ts updates the base time point of
+ * keep-alive timer.
+ */
+static void conn_update_keep_alive_last_ts(ngtcp2_conn *conn,
+ ngtcp2_tstamp ts) {
+ conn->keep_alive.last_ts = ts;
+ conn->flags &= (uint32_t)~NGTCP2_CONN_FLAG_KEEP_ALIVE_CANCELLED;
+}
+
+void ngtcp2_conn_set_keep_alive_timeout(ngtcp2_conn *conn,
+ ngtcp2_duration timeout) {
+ conn->keep_alive.timeout = timeout;
+}
+
+/*
+ * NGTCP2_PKT_PACING_OVERHEAD defines overhead of userspace event
+ * loop. Packet pacing might require sub milliseconds packet spacing,
+ * but userspace event loop might not offer such precision.
+ * Typically, if delay is 0.5 microseconds, the actual delay after
+ * which we can send packet is well over 1 millisecond when event loop
+ * is involved (which includes other stuff, like reading packets etc
+ * in a typical single threaded use case).
+ */
+#define NGTCP2_PKT_PACING_OVERHEAD NGTCP2_MILLISECONDS
+
+static void conn_cancel_expired_pkt_tx_timer(ngtcp2_conn *conn,
+ ngtcp2_tstamp ts) {
+ if (conn->tx.pacing.next_ts == UINT64_MAX) {
+ return;
+ }
+
+ if (conn->tx.pacing.next_ts > ts + NGTCP2_PKT_PACING_OVERHEAD) {
+ return;
+ }
+
+ conn->tx.pacing.next_ts = UINT64_MAX;
+}
+
+static int conn_pacing_pkt_tx_allowed(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ return conn->tx.pacing.next_ts == UINT64_MAX ||
+ conn->tx.pacing.next_ts <= ts + NGTCP2_PKT_PACING_OVERHEAD;
+}
+
+static uint8_t conn_pkt_flags(ngtcp2_conn *conn) {
+ if (conn->remote.transport_params &&
+ conn->remote.transport_params->grease_quic_bit &&
+ (conn->flags & NGTCP2_CONN_FLAG_CLEAR_FIXED_BIT)) {
+ return NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR;
+ }
+
+ return NGTCP2_PKT_FLAG_NONE;
+}
+
+static uint8_t conn_pkt_flags_long(ngtcp2_conn *conn) {
+ return NGTCP2_PKT_FLAG_LONG_FORM | conn_pkt_flags(conn);
+}
+
+static uint8_t conn_pkt_flags_short(ngtcp2_conn *conn) {
+ return (uint8_t)(conn_pkt_flags(conn) | ((conn->pktns.crypto.tx.ckm->flags &
+ NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE)
+ ? NGTCP2_PKT_FLAG_KEY_PHASE
+ : NGTCP2_PKT_FLAG_NONE));
+}
+
+/*
+ * conn_write_handshake_pkt writes handshake packet in the buffer
+ * pointed by |dest| whose length is |destlen|. |type| specifies long
+ * packet type. It should be either NGTCP2_PKT_INITIAL or
+ * NGTCP2_PKT_HANDSHAKE_PKT.
+ *
+ * |write_datalen| is the minimum length of application data ready to
+ * send in subsequent 0RTT or 1RTT packet.
+ *
+ * This function returns the number of bytes written in |dest| if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static ngtcp2_ssize
+conn_write_handshake_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest,
+ size_t destlen, uint8_t type, uint8_t flags,
+ uint64_t write_datalen, ngtcp2_tstamp ts) {
+ int rv;
+ ngtcp2_ppe ppe;
+ ngtcp2_pkt_hd hd;
+ ngtcp2_frame_chain *frq = NULL, **pfrc = &frq;
+ ngtcp2_frame_chain *nfrc;
+ ngtcp2_frame *ackfr = NULL, lfr;
+ ngtcp2_ssize spktlen;
+ ngtcp2_crypto_cc cc;
+ ngtcp2_rtb_entry *rtbent;
+ ngtcp2_pktns *pktns;
+ size_t left;
+ uint16_t rtb_entry_flags = NGTCP2_RTB_ENTRY_FLAG_NONE;
+ int require_padding = (flags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING) != 0;
+ int pkt_empty = 1;
+ int padded = 0;
+ int hd_logged = 0;
+ uint64_t crypto_offset;
+ ngtcp2_ssize num_reclaimed;
+ uint32_t version;
+
+ switch (type) {
+ case NGTCP2_PKT_INITIAL:
+ if (!conn->in_pktns) {
+ return 0;
+ }
+ assert(conn->in_pktns->crypto.tx.ckm);
+ pktns = conn->in_pktns;
+ version = conn->negotiated_version ? conn->negotiated_version
+ : conn->client_chosen_version;
+ if (version == conn->client_chosen_version) {
+ cc.ckm = pktns->crypto.tx.ckm;
+ cc.hp_ctx = pktns->crypto.tx.hp_ctx;
+ } else {
+ assert(conn->vneg.version == version);
+
+ cc.ckm = conn->vneg.tx.ckm;
+ cc.hp_ctx = conn->vneg.tx.hp_ctx;
+ }
+ break;
+ case NGTCP2_PKT_HANDSHAKE:
+ if (!conn->hs_pktns || !conn->hs_pktns->crypto.tx.ckm) {
+ return 0;
+ }
+ pktns = conn->hs_pktns;
+ version = conn->negotiated_version;
+ cc.ckm = pktns->crypto.tx.ckm;
+ cc.hp_ctx = pktns->crypto.tx.hp_ctx;
+ break;
+ default:
+ ngtcp2_unreachable();
+ }
+
+ cc.aead = pktns->crypto.ctx.aead;
+ cc.hp = pktns->crypto.ctx.hp;
+ cc.encrypt = conn->callbacks.encrypt;
+ cc.hp_mask = conn->callbacks.hp_mask;
+
+ ngtcp2_pkt_hd_init(&hd, conn_pkt_flags_long(conn), type,
+ &conn->dcid.current.cid, &conn->oscid,
+ pktns->tx.last_pkt_num + 1, pktns_select_pkt_numlen(pktns),
+ version, 0);
+
+ if (!conn->server && type == NGTCP2_PKT_INITIAL &&
+ conn->local.settings.tokenlen) {
+ hd.token = conn->local.settings.token;
+ hd.tokenlen = conn->local.settings.tokenlen;
+ }
+
+ ngtcp2_ppe_init(&ppe, dest, destlen, &cc);
+
+ rv = ngtcp2_ppe_encode_hd(&ppe, &hd);
+ if (rv != 0) {
+ assert(NGTCP2_ERR_NOBUF == rv);
+ return 0;
+ }
+
+ if (!ngtcp2_ppe_ensure_hp_sample(&ppe)) {
+ return 0;
+ }
+
+ rv = conn_create_ack_frame(conn, &ackfr, pktns, type, ts,
+ /* ack_delay = */ 0,
+ NGTCP2_DEFAULT_ACK_DELAY_EXPONENT);
+ if (rv != 0) {
+ ngtcp2_frame_chain_list_objalloc_del(frq, &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+
+ if (ackfr) {
+ rv = conn_ppe_write_frame_hd_log(conn, &ppe, &hd_logged, &hd, ackfr);
+ if (rv != 0) {
+ assert(NGTCP2_ERR_NOBUF == rv);
+ } else {
+ ngtcp2_acktr_commit_ack(&pktns->acktr);
+ ngtcp2_acktr_add_ack(&pktns->acktr, hd.pkt_num, ackfr->ack.largest_ack);
+ pkt_empty = 0;
+ }
+ }
+
+ /* Server requires at least NGTCP2_MAX_UDP_PAYLOAD_SIZE bytes in
+ order to send ack-eliciting Initial packet. */
+ if (!conn->server || type != NGTCP2_PKT_INITIAL ||
+ destlen >= NGTCP2_MAX_UDP_PAYLOAD_SIZE) {
+ build_pkt:
+ for (; ngtcp2_ksl_len(&pktns->crypto.tx.frq);) {
+ left = ngtcp2_ppe_left(&ppe);
+
+ crypto_offset = conn_cryptofrq_unacked_offset(conn, pktns);
+ if (crypto_offset == (size_t)-1) {
+ conn_cryptofrq_clear(conn, pktns);
+ break;
+ }
+
+ left = ngtcp2_pkt_crypto_max_datalen(crypto_offset, left, left);
+ if (left == (size_t)-1) {
+ break;
+ }
+
+ rv = conn_cryptofrq_pop(conn, &nfrc, pktns, left);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_list_objalloc_del(frq, &conn->frc_objalloc,
+ conn->mem);
+ return rv;
+ }
+
+ if (nfrc == NULL) {
+ break;
+ }
+
+ rv = conn_ppe_write_frame_hd_log(conn, &ppe, &hd_logged, &hd, &nfrc->fr);
+ if (rv != 0) {
+ ngtcp2_unreachable();
+ }
+
+ *pfrc = nfrc;
+ pfrc = &(*pfrc)->next;
+
+ pkt_empty = 0;
+ rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE;
+ }
+
+ if (!(rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) &&
+ pktns->rtb.num_retransmittable && pktns->rtb.probe_pkt_left) {
+ num_reclaimed = ngtcp2_rtb_reclaim_on_pto(&pktns->rtb, conn, pktns, 1);
+ if (num_reclaimed < 0) {
+ ngtcp2_frame_chain_list_objalloc_del(frq, &conn->frc_objalloc,
+ conn->mem);
+ return rv;
+ }
+ if (num_reclaimed) {
+ goto build_pkt;
+ }
+ /* We had pktns->rtb.num_retransmittable > 0 but the contents of
+ those packets have been acknowledged (i.e., retransmission in
+ another packet). For server, in this case, we don't have to
+ send any probe packet. Client needs to send probe packets
+ until it knows that server has completed address validation or
+ handshake has been confirmed. */
+ if (pktns->rtb.num_pto_eliciting == 0 &&
+ (conn->server ||
+ (conn->flags & (NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED |
+ NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED)))) {
+ pktns->rtb.probe_pkt_left = 0;
+ ngtcp2_conn_set_loss_detection_timer(conn, ts);
+ /* TODO If packet is empty, we should return now if cwnd is
+ zero. */
+ }
+ }
+
+ if (!(rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) &&
+ pktns->rtb.probe_pkt_left) {
+ lfr.type = NGTCP2_FRAME_PING;
+
+ rv = conn_ppe_write_frame_hd_log(conn, &ppe, &hd_logged, &hd, &lfr);
+ if (rv != 0) {
+ assert(rv == NGTCP2_ERR_NOBUF);
+ } else {
+ rtb_entry_flags |=
+ NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | NGTCP2_RTB_ENTRY_FLAG_PROBE;
+ pkt_empty = 0;
+ }
+ }
+
+ if (!pkt_empty) {
+ if (!(rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) {
+ /* The intention of smaller limit is get more chance to measure
+ RTT samples in early phase. */
+ if (pktns->tx.num_non_ack_pkt >= 1) {
+ lfr.type = NGTCP2_FRAME_PING;
+
+ rv = conn_ppe_write_frame_hd_log(conn, &ppe, &hd_logged, &hd, &lfr);
+ if (rv != 0) {
+ assert(rv == NGTCP2_ERR_NOBUF);
+ } else {
+ rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING;
+ pktns->tx.num_non_ack_pkt = 0;
+ }
+ } else {
+ ++pktns->tx.num_non_ack_pkt;
+ }
+ } else {
+ pktns->tx.num_non_ack_pkt = 0;
+ }
+ }
+ }
+
+ if (pkt_empty) {
+ return 0;
+ }
+
+ /* If we cannot write another packet, then we need to add padding to
+ Initial here. */
+ if (conn_should_pad_pkt(
+ conn, type, ngtcp2_ppe_left(&ppe), write_datalen,
+ (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) != 0,
+ require_padding)) {
+ lfr.type = NGTCP2_FRAME_PADDING;
+ lfr.padding.len = ngtcp2_ppe_padding(&ppe);
+ } else {
+ lfr.type = NGTCP2_FRAME_PADDING;
+ lfr.padding.len = ngtcp2_ppe_padding_hp_sample(&ppe);
+ }
+
+ if (lfr.padding.len) {
+ padded = 1;
+ ngtcp2_log_tx_fr(&conn->log, &hd, &lfr);
+ ngtcp2_qlog_write_frame(&conn->qlog, &lfr);
+ }
+
+ spktlen = ngtcp2_ppe_final(&ppe, NULL);
+ if (spktlen < 0) {
+ assert(ngtcp2_err_is_fatal((int)spktlen));
+ ngtcp2_frame_chain_list_objalloc_del(frq, &conn->frc_objalloc, conn->mem);
+ return spktlen;
+ }
+
+ ngtcp2_qlog_pkt_sent_end(&conn->qlog, &hd, (size_t)spktlen);
+
+ if ((rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) || padded) {
+ if (pi) {
+ conn_handle_tx_ecn(conn, pi, &rtb_entry_flags, pktns, &hd, ts);
+ }
+
+ rv = ngtcp2_rtb_entry_objalloc_new(&rtbent, &hd, frq, ts, (size_t)spktlen,
+ rtb_entry_flags,
+ &conn->rtb_entry_objalloc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_list_objalloc_del(frq, &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+
+ rv = conn_on_pkt_sent(conn, &pktns->rtb, rtbent);
+ if (rv != 0) {
+ ngtcp2_rtb_entry_objalloc_del(rtbent, &conn->rtb_entry_objalloc,
+ &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+
+ if ((rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) &&
+ (conn->flags & NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE)) {
+ conn_restart_timer_on_write(conn, ts);
+ }
+ } else if (pi && conn->tx.ecn.state == NGTCP2_ECN_STATE_CAPABLE) {
+ conn_handle_tx_ecn(conn, pi, NULL, pktns, &hd, ts);
+ }
+
+ if (pktns->rtb.probe_pkt_left &&
+ (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) {
+ --pktns->rtb.probe_pkt_left;
+ }
+
+ conn_update_keep_alive_last_ts(conn, ts);
+
+ conn->dcid.current.bytes_sent += (uint64_t)spktlen;
+
+ conn->tx.pacing.pktlen += (size_t)spktlen;
+
+ ngtcp2_qlog_metrics_updated(&conn->qlog, &conn->cstat);
+
+ ++pktns->tx.last_pkt_num;
+
+ return spktlen;
+}
+
+/*
+ * conn_write_ack_pkt writes QUIC packet for type |type| which only
+ * includes ACK frame in the buffer pointed by |dest| whose length is
+ * |destlen|.
+ *
+ * This function returns the number of bytes written in |dest| if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static ngtcp2_ssize conn_write_ack_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi,
+ uint8_t *dest, size_t destlen,
+ uint8_t type, ngtcp2_tstamp ts) {
+ int rv;
+ ngtcp2_frame *ackfr;
+ ngtcp2_pktns *pktns;
+ ngtcp2_duration ack_delay;
+ uint64_t ack_delay_exponent;
+ ngtcp2_ssize spktlen;
+
+ assert(!(conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING));
+
+ switch (type) {
+ case NGTCP2_PKT_INITIAL:
+ assert(conn->server);
+ pktns = conn->in_pktns;
+ ack_delay = 0;
+ ack_delay_exponent = NGTCP2_DEFAULT_ACK_DELAY_EXPONENT;
+ break;
+ case NGTCP2_PKT_HANDSHAKE:
+ pktns = conn->hs_pktns;
+ ack_delay = 0;
+ ack_delay_exponent = NGTCP2_DEFAULT_ACK_DELAY_EXPONENT;
+ break;
+ case NGTCP2_PKT_1RTT:
+ pktns = &conn->pktns;
+ ack_delay = conn_compute_ack_delay(conn);
+ ack_delay_exponent = conn->local.transport_params.ack_delay_exponent;
+ break;
+ default:
+ ngtcp2_unreachable();
+ }
+
+ if (!pktns->crypto.tx.ckm) {
+ return 0;
+ }
+
+ ackfr = NULL;
+ rv = conn_create_ack_frame(conn, &ackfr, pktns, type, ts, ack_delay,
+ ack_delay_exponent);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (!ackfr) {
+ return 0;
+ }
+
+ spktlen = ngtcp2_conn_write_single_frame_pkt(
+ conn, pi, dest, destlen, type, NGTCP2_WRITE_PKT_FLAG_NONE,
+ &conn->dcid.current.cid, ackfr, NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts);
+
+ if (spktlen <= 0) {
+ return spktlen;
+ }
+
+ conn->dcid.current.bytes_sent += (uint64_t)spktlen;
+
+ return spktlen;
+}
+
+static void conn_discard_pktns(ngtcp2_conn *conn, ngtcp2_pktns **ppktns,
+ ngtcp2_tstamp ts) {
+ ngtcp2_pktns *pktns = *ppktns;
+ uint64_t bytes_in_flight;
+
+ bytes_in_flight = pktns->rtb.cc_bytes_in_flight;
+
+ assert(conn->cstat.bytes_in_flight >= bytes_in_flight);
+
+ conn->cstat.bytes_in_flight -= bytes_in_flight;
+ conn->cstat.pto_count = 0;
+ conn->cstat.last_tx_pkt_ts[pktns->rtb.pktns_id] = UINT64_MAX;
+ conn->cstat.loss_time[pktns->rtb.pktns_id] = UINT64_MAX;
+
+ conn_call_delete_crypto_aead_ctx(conn, &pktns->crypto.rx.ckm->aead_ctx);
+ conn_call_delete_crypto_cipher_ctx(conn, &pktns->crypto.rx.hp_ctx);
+ conn_call_delete_crypto_aead_ctx(conn, &pktns->crypto.tx.ckm->aead_ctx);
+ conn_call_delete_crypto_cipher_ctx(conn, &pktns->crypto.tx.hp_ctx);
+
+ pktns_del(pktns, conn->mem);
+ *ppktns = NULL;
+
+ ngtcp2_conn_set_loss_detection_timer(conn, ts);
+}
+
+/*
+ * conn_discard_initial_state discards state for Initial packet number
+ * space.
+ */
+static void conn_discard_initial_state(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ if (!conn->in_pktns) {
+ return;
+ }
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "discarding Initial packet number space");
+
+ conn_discard_pktns(conn, &conn->in_pktns, ts);
+
+ conn_vneg_crypto_free(conn);
+
+ memset(&conn->vneg.rx, 0, sizeof(conn->vneg.rx));
+ memset(&conn->vneg.tx, 0, sizeof(conn->vneg.tx));
+}
+
+/*
+ * conn_discard_handshake_state discards state for Handshake packet
+ * number space.
+ */
+static void conn_discard_handshake_state(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ if (!conn->hs_pktns) {
+ return;
+ }
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "discarding Handshake packet number space");
+
+ conn_discard_pktns(conn, &conn->hs_pktns, ts);
+}
+
+/*
+ * conn_discard_early_key discards early key.
+ */
+static void conn_discard_early_key(ngtcp2_conn *conn) {
+ assert(conn->early.ckm);
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "discarding early key");
+
+ conn_call_delete_crypto_aead_ctx(conn, &conn->early.ckm->aead_ctx);
+ conn_call_delete_crypto_cipher_ctx(conn, &conn->early.hp_ctx);
+ memset(&conn->early.hp_ctx, 0, sizeof(conn->early.hp_ctx));
+
+ ngtcp2_crypto_km_del(conn->early.ckm, conn->mem);
+ conn->early.ckm = NULL;
+}
+
+/*
+ * conn_write_handshake_ack_pkts writes packets which contain ACK
+ * frame only. This function writes at most 2 packets for each
+ * Initial and Handshake packet.
+ */
+static ngtcp2_ssize conn_write_handshake_ack_pkts(ngtcp2_conn *conn,
+ ngtcp2_pkt_info *pi,
+ uint8_t *dest, size_t destlen,
+ ngtcp2_tstamp ts) {
+ ngtcp2_ssize res = 0, nwrite = 0;
+
+ /* In the most cases, client sends ACK in conn_write_handshake_pkt.
+ This function is only called when it is CWND limited or pacing
+ limited. It is not required for client to send ACK for server
+ Initial. This is because once it gets server Initial, it gets
+ Handshake tx key and discards Initial key. The only good reason
+ to send ACK is give server RTT measurement early. */
+ if (conn->server && conn->in_pktns) {
+ nwrite =
+ conn_write_ack_pkt(conn, pi, dest, destlen, NGTCP2_PKT_INITIAL, ts);
+ if (nwrite < 0) {
+ assert(nwrite != NGTCP2_ERR_NOBUF);
+ return nwrite;
+ }
+
+ res += nwrite;
+ dest += nwrite;
+ destlen -= (size_t)nwrite;
+ }
+
+ if (conn->hs_pktns->crypto.tx.ckm) {
+ nwrite =
+ conn_write_ack_pkt(conn, pi, dest, destlen, NGTCP2_PKT_HANDSHAKE, ts);
+ if (nwrite < 0) {
+ assert(nwrite != NGTCP2_ERR_NOBUF);
+ return nwrite;
+ }
+
+ res += nwrite;
+
+ if (!conn->server && nwrite) {
+ conn_discard_initial_state(conn, ts);
+ }
+ }
+
+ return res;
+}
+
+/*
+ * conn_write_client_initial writes Initial packet in the buffer
+ * pointed by |dest| whose length is |destlen|.
+ *
+ * This function returns the number of bytes written in |dest| if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static ngtcp2_ssize conn_write_client_initial(ngtcp2_conn *conn,
+ ngtcp2_pkt_info *pi,
+ uint8_t *dest, size_t destlen,
+ uint64_t early_datalen,
+ ngtcp2_tstamp ts) {
+ int rv;
+
+ rv = conn_call_client_initial(conn);
+ if (rv != 0) {
+ return rv;
+ }
+
+ return conn_write_handshake_pkt(conn, pi, dest, destlen, NGTCP2_PKT_INITIAL,
+ NGTCP2_WRITE_PKT_FLAG_NONE, early_datalen,
+ ts);
+}
+
+/*
+ * dcid_tx_left returns the maximum number of bytes that server is
+ * allowed to send to an unvalidated path associated to |dcid|.
+ */
+static uint64_t dcid_tx_left(ngtcp2_dcid *dcid) {
+ if (dcid->flags & NGTCP2_DCID_FLAG_PATH_VALIDATED) {
+ return SIZE_MAX;
+ }
+ /* From QUIC spec: Prior to validating the client address, servers
+ MUST NOT send more than three times as many bytes as the number
+ of bytes they have received. */
+ assert(dcid->bytes_recv * 3 >= dcid->bytes_sent);
+
+ return dcid->bytes_recv * 3 - dcid->bytes_sent;
+}
+
+/*
+ * conn_server_tx_left returns the maximum number of bytes that server
+ * is allowed to send to an unvalidated path.
+ */
+static uint64_t conn_server_tx_left(ngtcp2_conn *conn, ngtcp2_dcid *dcid) {
+ assert(conn->server);
+
+ /* If pv->dcid has the current path, use conn->dcid.current. This
+ is because conn->dcid.current gets update for bytes_recv and
+ bytes_sent. */
+ if (ngtcp2_path_eq(&dcid->ps.path, &conn->dcid.current.ps.path)) {
+ return dcid_tx_left(&conn->dcid.current);
+ }
+
+ return dcid_tx_left(dcid);
+}
+
+/*
+ * conn_write_handshake_pkts writes Initial and Handshake packets in
+ * the buffer pointed by |dest| whose length is |destlen|.
+ *
+ * This function returns the number of bytes written in |dest| if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static ngtcp2_ssize conn_write_handshake_pkts(ngtcp2_conn *conn,
+ ngtcp2_pkt_info *pi,
+ uint8_t *dest, size_t destlen,
+ uint64_t write_datalen,
+ ngtcp2_tstamp ts) {
+ ngtcp2_ssize nwrite;
+ ngtcp2_ssize res = 0;
+ ngtcp2_rtb_entry *rtbent;
+ uint8_t wflags = NGTCP2_WRITE_PKT_FLAG_NONE;
+ ngtcp2_conn_stat *cstat = &conn->cstat;
+ ngtcp2_ksl_it it;
+
+ /* As a client, we would like to discard Initial packet number space
+ when sending the first Handshake packet. When sending Handshake
+ packet, it should be one of 1) sending ACK, 2) sending PTO probe
+ packet, or 3) sending CRYPTO. If we have pending acknowledgement
+ for Initial, then do not discard Initial packet number space.
+ Otherwise, if either 1) or 2) is satisfied, discard Initial
+ packet number space. When sending Handshake CRYPTO, it indicates
+ that client has received Handshake CRYPTO from server. Initial
+ packet number space is discarded because 1) is met. If there is
+ pending Initial ACK, Initial packet number space is discarded
+ after writing the first Handshake packet.
+ */
+ if (!conn->server && conn->hs_pktns->crypto.tx.ckm && conn->in_pktns &&
+ !ngtcp2_acktr_require_active_ack(&conn->in_pktns->acktr,
+ /* max_ack_delay = */ 0, ts) &&
+ (ngtcp2_acktr_require_active_ack(&conn->hs_pktns->acktr,
+ /* max_ack_delay = */ 0, ts) ||
+ conn->hs_pktns->rtb.probe_pkt_left)) {
+ /* Discard Initial state here so that Handshake packet is not
+ padded. */
+ conn_discard_initial_state(conn, ts);
+ } else if (conn->in_pktns) {
+ nwrite =
+ conn_write_handshake_pkt(conn, pi, dest, destlen, NGTCP2_PKT_INITIAL,
+ NGTCP2_WRITE_PKT_FLAG_NONE, write_datalen, ts);
+ if (nwrite < 0) {
+ assert(nwrite != NGTCP2_ERR_NOBUF);
+ return nwrite;
+ }
+
+ if (nwrite == 0) {
+ if (conn->server && (conn->in_pktns->rtb.probe_pkt_left ||
+ ngtcp2_ksl_len(&conn->in_pktns->crypto.tx.frq))) {
+ if (cstat->loss_detection_timer != UINT64_MAX &&
+ conn_server_tx_left(conn, &conn->dcid.current) <
+ NGTCP2_MAX_UDP_PAYLOAD_SIZE) {
+ ngtcp2_log_info(
+ &conn->log, NGTCP2_LOG_EVENT_RCV,
+ "loss detection timer canceled due to amplification limit");
+ cstat->loss_detection_timer = UINT64_MAX;
+ }
+
+ return 0;
+ }
+ } else {
+ res += nwrite;
+ dest += nwrite;
+ destlen -= (size_t)nwrite;
+
+ if (destlen) {
+ /* We might have already added padding to Initial, but in that
+ case, we should have destlen == 0 and no Handshake packet
+ will be written. */
+ if (conn->server) {
+ it = ngtcp2_rtb_head(&conn->in_pktns->rtb);
+ if (!ngtcp2_ksl_it_end(&it)) {
+ rtbent = ngtcp2_ksl_it_get(&it);
+ if (rtbent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) {
+ wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING;
+ }
+ }
+ } else {
+ wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING;
+ }
+ }
+ }
+ }
+
+ nwrite = conn_write_handshake_pkt(
+ conn, pi, dest, destlen, NGTCP2_PKT_HANDSHAKE, wflags, write_datalen, ts);
+ if (nwrite < 0) {
+ assert(nwrite != NGTCP2_ERR_NOBUF);
+ return nwrite;
+ }
+
+ res += nwrite;
+
+ if (!conn->server && conn->hs_pktns->crypto.tx.ckm && nwrite) {
+ /* We don't need to send further Initial packet if we have
+ Handshake key and sent something with it. So discard initial
+ state here. */
+ conn_discard_initial_state(conn, ts);
+ }
+
+ return res;
+}
+
+/*
+ * conn_initial_stream_rx_offset returns the initial maximum offset of
+ * data for a stream denoted by |stream_id|.
+ */
+static uint64_t conn_initial_stream_rx_offset(ngtcp2_conn *conn,
+ int64_t stream_id) {
+ int local_stream = conn_local_stream(conn, stream_id);
+
+ if (bidi_stream(stream_id)) {
+ if (local_stream) {
+ return conn->local.transport_params.initial_max_stream_data_bidi_local;
+ }
+ return conn->local.transport_params.initial_max_stream_data_bidi_remote;
+ }
+
+ if (local_stream) {
+ return 0;
+ }
+ return conn->local.transport_params.initial_max_stream_data_uni;
+}
+
+/*
+ * conn_should_send_max_stream_data returns nonzero if MAX_STREAM_DATA
+ * frame should be send for |strm|.
+ */
+static int conn_should_send_max_stream_data(ngtcp2_conn *conn,
+ ngtcp2_strm *strm) {
+ uint64_t inc = strm->rx.unsent_max_offset - strm->rx.max_offset;
+ (void)conn;
+
+ return strm->rx.window < 2 * inc;
+}
+
+/*
+ * conn_should_send_max_data returns nonzero if MAX_DATA frame should
+ * be sent.
+ */
+static int conn_should_send_max_data(ngtcp2_conn *conn) {
+ uint64_t inc = conn->rx.unsent_max_offset - conn->rx.max_offset;
+
+ return conn->rx.window < 2 * inc;
+}
+
+/*
+ * conn_required_num_new_connection_id returns the number of
+ * additional connection ID the local endpoint has to provide to the
+ * remote endpoint.
+ */
+static size_t conn_required_num_new_connection_id(ngtcp2_conn *conn) {
+ uint64_t n;
+ size_t len = ngtcp2_ksl_len(&conn->scid.set);
+
+ if (len >= NGTCP2_MAX_SCID_POOL_SIZE) {
+ return 0;
+ }
+
+ assert(conn->remote.transport_params);
+ assert(conn->remote.transport_params->active_connection_id_limit);
+
+ /* len includes retired CID. We don't provide extra CID if doing so
+ exceeds NGTCP2_MAX_SCID_POOL_SIZE. */
+
+ n = conn->remote.transport_params->active_connection_id_limit +
+ conn->scid.num_retired;
+
+ return (size_t)ngtcp2_min(NGTCP2_MAX_SCID_POOL_SIZE, n) - len;
+}
+
+/*
+ * conn_enqueue_new_connection_id generates additional connection IDs
+ * and prepares to send them to the remote endpoint.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static int conn_enqueue_new_connection_id(ngtcp2_conn *conn) {
+ size_t i, need = conn_required_num_new_connection_id(conn);
+ size_t cidlen = conn->oscid.datalen;
+ ngtcp2_cid cid;
+ uint64_t seq;
+ int rv;
+ uint8_t token[NGTCP2_STATELESS_RESET_TOKENLEN];
+ ngtcp2_frame_chain *nfrc;
+ ngtcp2_pktns *pktns = &conn->pktns;
+ ngtcp2_scid *scid;
+ ngtcp2_ksl_it it;
+
+ for (i = 0; i < need; ++i) {
+ rv = conn_call_get_new_connection_id(conn, &cid, token, cidlen);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (cid.datalen != cidlen) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ /* Assert uniqueness */
+ it = ngtcp2_ksl_lower_bound(&conn->scid.set, &cid);
+ if (!ngtcp2_ksl_it_end(&it) &&
+ ngtcp2_cid_eq(ngtcp2_ksl_it_key(&it), &cid)) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ seq = ++conn->scid.last_seq;
+
+ scid = ngtcp2_mem_malloc(conn->mem, sizeof(*scid));
+ if (scid == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ ngtcp2_scid_init(scid, seq, &cid);
+
+ rv = ngtcp2_ksl_insert(&conn->scid.set, NULL, &scid->cid, scid);
+ if (rv != 0) {
+ ngtcp2_mem_free(conn->mem, scid);
+ return rv;
+ }
+
+ rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc);
+ if (rv != 0) {
+ return rv;
+ }
+
+ nfrc->fr.type = NGTCP2_FRAME_NEW_CONNECTION_ID;
+ nfrc->fr.new_connection_id.seq = seq;
+ nfrc->fr.new_connection_id.retire_prior_to = 0;
+ nfrc->fr.new_connection_id.cid = cid;
+ memcpy(nfrc->fr.new_connection_id.stateless_reset_token, token,
+ sizeof(token));
+ nfrc->next = pktns->tx.frq;
+ pktns->tx.frq = nfrc;
+ }
+
+ return 0;
+}
+
+/*
+ * conn_remove_retired_connection_id removes the already retired
+ * connection ID. It waits PTO before actually removing a connection
+ * ID after it receives RETIRE_CONNECTION_ID from peer to catch
+ * reordered packets.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static int conn_remove_retired_connection_id(ngtcp2_conn *conn,
+ ngtcp2_duration pto,
+ ngtcp2_tstamp ts) {
+ ngtcp2_duration timeout = pto;
+ ngtcp2_scid *scid;
+ ngtcp2_dcid *dcid;
+ int rv;
+
+ for (; !ngtcp2_pq_empty(&conn->scid.used);) {
+ scid = ngtcp2_struct_of(ngtcp2_pq_top(&conn->scid.used), ngtcp2_scid, pe);
+
+ if (scid->retired_ts == UINT64_MAX || scid->retired_ts + timeout >= ts) {
+ break;
+ }
+
+ assert(scid->flags & NGTCP2_SCID_FLAG_RETIRED);
+
+ rv = conn_call_remove_connection_id(conn, &scid->cid);
+ if (rv != 0) {
+ return rv;
+ }
+
+ ngtcp2_ksl_remove(&conn->scid.set, NULL, &scid->cid);
+ ngtcp2_pq_pop(&conn->scid.used);
+ ngtcp2_mem_free(conn->mem, scid);
+
+ assert(conn->scid.num_retired);
+ --conn->scid.num_retired;
+ }
+
+ for (; ngtcp2_ringbuf_len(&conn->dcid.retired.rb);) {
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.retired.rb, 0);
+ if (dcid->retired_ts + timeout >= ts) {
+ break;
+ }
+
+ rv = conn_call_deactivate_dcid(conn, dcid);
+ if (rv != 0) {
+ return rv;
+ }
+
+ ngtcp2_ringbuf_pop_front(&conn->dcid.retired.rb);
+ }
+
+ return 0;
+}
+
+/*
+ * conn_min_short_pktlen returns the minimum length of Short packet
+ * this endpoint sends.
+ */
+static size_t conn_min_short_pktlen(ngtcp2_conn *conn) {
+ return conn->dcid.current.cid.datalen + NGTCP2_MIN_PKT_EXPANDLEN;
+}
+
+/*
+ * conn_handle_unconfirmed_key_update_from_remote deals with key
+ * update which has not been confirmed yet and initiated by the remote
+ * endpoint.
+ *
+ * If key update was initiated by the remote endpoint, acknowledging a
+ * packet encrypted with the new key completes key update procedure.
+ */
+static void conn_handle_unconfirmed_key_update_from_remote(ngtcp2_conn *conn,
+ int64_t largest_ack,
+ ngtcp2_tstamp ts) {
+ if (!(conn->flags & NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED) ||
+ (conn->flags & NGTCP2_CONN_FLAG_KEY_UPDATE_INITIATOR) ||
+ largest_ack < conn->pktns.crypto.rx.ckm->pkt_num) {
+ return;
+ }
+
+ conn->flags &= (uint32_t)~NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED;
+ conn->crypto.key_update.confirmed_ts = ts;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CRY, "key update confirmed");
+}
+
+/*
+ * conn_write_pkt writes a protected packet in the buffer pointed by
+ * |dest| whose length if |destlen|. |type| specifies the type of
+ * packet. It can be NGTCP2_PKT_1RTT or NGTCP2_PKT_0RTT.
+ *
+ * This function can send new stream data. In order to send stream
+ * data, specify the underlying stream and parameters to
+ * |vmsg|->stream. If |vmsg|->stream.fin is set to nonzero, it
+ * signals that the given data is the final portion of the stream.
+ * |vmsg|->stream.data vector of length |vmsg|->stream.datacnt
+ * specifies stream data to send. The number of bytes sent to the
+ * stream is assigned to *|vmsg|->stream.pdatalen. If 0 length STREAM
+ * data is sent, 0 is assigned to it. The caller should initialize
+ * *|vmsg|->stream.pdatalen to -1.
+ *
+ * If |require_padding| is nonzero, padding bytes are added to occupy
+ * the remaining packet payload.
+ *
+ * This function returns the number of bytes written in |dest| if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ * NGTCP2_ERR_STREAM_DATA_BLOCKED
+ * Stream data could not be written because of flow control.
+ */
+static ngtcp2_ssize conn_write_pkt(ngtcp2_conn *conn, ngtcp2_pkt_info *pi,
+ uint8_t *dest, size_t destlen,
+ ngtcp2_vmsg *vmsg, uint8_t type,
+ uint8_t flags, ngtcp2_tstamp ts) {
+ int rv = 0;
+ ngtcp2_crypto_cc *cc = &conn->pkt.cc;
+ ngtcp2_ppe *ppe = &conn->pkt.ppe;
+ ngtcp2_pkt_hd *hd = &conn->pkt.hd;
+ ngtcp2_frame *ackfr = NULL, lfr;
+ ngtcp2_ssize nwrite;
+ ngtcp2_frame_chain **pfrc, *nfrc, *frc;
+ ngtcp2_rtb_entry *ent;
+ ngtcp2_strm *strm;
+ int pkt_empty = 1;
+ uint64_t ndatalen = 0;
+ int send_stream = 0;
+ int stream_blocked = 0;
+ int send_datagram = 0;
+ ngtcp2_pktns *pktns = &conn->pktns;
+ size_t left;
+ uint64_t datalen = 0;
+ ngtcp2_vec data[NGTCP2_MAX_STREAM_DATACNT];
+ size_t datacnt;
+ uint16_t rtb_entry_flags = NGTCP2_RTB_ENTRY_FLAG_NONE;
+ int hd_logged = 0;
+ ngtcp2_path_challenge_entry *pcent;
+ uint8_t hd_flags = NGTCP2_PKT_FLAG_NONE;
+ int require_padding = (flags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING) != 0;
+ int write_more = (flags & NGTCP2_WRITE_PKT_FLAG_MORE) != 0;
+ int ppe_pending = (conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING) != 0;
+ size_t min_pktlen = conn_min_short_pktlen(conn);
+ int padded = 0;
+ ngtcp2_cc_pkt cc_pkt;
+ uint64_t crypto_offset;
+ uint64_t stream_offset;
+ ngtcp2_ssize num_reclaimed;
+ int fin;
+ uint64_t target_max_data;
+ ngtcp2_conn_stat *cstat = &conn->cstat;
+ uint64_t delta;
+ const ngtcp2_cid *scid = NULL;
+ int keep_alive_expired = 0;
+ uint32_t version = 0;
+
+ /* Return 0 if destlen is less than minimum packet length which can
+ trigger Stateless Reset */
+ if (destlen < min_pktlen) {
+ return 0;
+ }
+
+ if (vmsg) {
+ switch (vmsg->type) {
+ case NGTCP2_VMSG_TYPE_STREAM:
+ datalen = ngtcp2_vec_len(vmsg->stream.data, vmsg->stream.datacnt);
+ ndatalen = conn_enforce_flow_control(conn, vmsg->stream.strm, datalen);
+ /* 0 length STREAM frame is allowed */
+ if (ndatalen || datalen == 0) {
+ send_stream = 1;
+ } else {
+ stream_blocked = 1;
+ }
+ break;
+ case NGTCP2_VMSG_TYPE_DATAGRAM:
+ datalen = ngtcp2_vec_len(vmsg->datagram.data, vmsg->datagram.datacnt);
+ send_datagram = 1;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!ppe_pending) {
+ switch (type) {
+ case NGTCP2_PKT_1RTT:
+ hd_flags = conn_pkt_flags_short(conn);
+ scid = NULL;
+ cc->aead = pktns->crypto.ctx.aead;
+ cc->hp = pktns->crypto.ctx.hp;
+ cc->ckm = pktns->crypto.tx.ckm;
+ cc->hp_ctx = pktns->crypto.tx.hp_ctx;
+
+ assert(conn->negotiated_version);
+
+ version = conn->negotiated_version;
+
+ /* transport parameter is only valid after handshake completion
+ which means we don't know how many connection ID that remote
+ peer can accept before handshake completion. */
+ if (conn->oscid.datalen && conn_is_handshake_completed(conn)) {
+ rv = conn_enqueue_new_connection_id(conn);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ break;
+ case NGTCP2_PKT_0RTT:
+ assert(!conn->server);
+ if (!conn->early.ckm) {
+ return 0;
+ }
+ hd_flags = conn_pkt_flags_long(conn);
+ scid = &conn->oscid;
+ cc->aead = conn->early.ctx.aead;
+ cc->hp = conn->early.ctx.hp;
+ cc->ckm = conn->early.ckm;
+ cc->hp_ctx = conn->early.hp_ctx;
+ version = conn->client_chosen_version;
+ break;
+ default:
+ /* Unreachable */
+ ngtcp2_unreachable();
+ }
+
+ cc->encrypt = conn->callbacks.encrypt;
+ cc->hp_mask = conn->callbacks.hp_mask;
+
+ if (conn_should_send_max_data(conn)) {
+ rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (conn->local.settings.max_window &&
+ conn->tx.last_max_data_ts != UINT64_MAX &&
+ ts - conn->tx.last_max_data_ts <
+ NGTCP2_FLOW_WINDOW_RTT_FACTOR * cstat->smoothed_rtt &&
+ conn->local.settings.max_window > conn->rx.window) {
+ target_max_data = NGTCP2_FLOW_WINDOW_SCALING_FACTOR * conn->rx.window;
+ if (target_max_data > conn->local.settings.max_window) {
+ target_max_data = conn->local.settings.max_window;
+ }
+
+ delta = target_max_data - conn->rx.window;
+ if (conn->rx.unsent_max_offset + delta > NGTCP2_MAX_VARINT) {
+ delta = NGTCP2_MAX_VARINT - conn->rx.unsent_max_offset;
+ }
+
+ conn->rx.window = target_max_data;
+ } else {
+ delta = 0;
+ }
+
+ conn->tx.last_max_data_ts = ts;
+
+ nfrc->fr.type = NGTCP2_FRAME_MAX_DATA;
+ nfrc->fr.max_data.max_data = conn->rx.unsent_max_offset + delta;
+ nfrc->next = pktns->tx.frq;
+ pktns->tx.frq = nfrc;
+
+ conn->rx.max_offset = conn->rx.unsent_max_offset =
+ nfrc->fr.max_data.max_data;
+ }
+
+ ngtcp2_pkt_hd_init(hd, hd_flags, type, &conn->dcid.current.cid, scid,
+ pktns->tx.last_pkt_num + 1,
+ pktns_select_pkt_numlen(pktns), version, 0);
+
+ ngtcp2_ppe_init(ppe, dest, destlen, cc);
+
+ rv = ngtcp2_ppe_encode_hd(ppe, hd);
+ if (rv != 0) {
+ assert(NGTCP2_ERR_NOBUF == rv);
+ return 0;
+ }
+
+ if (!ngtcp2_ppe_ensure_hp_sample(ppe)) {
+ return 0;
+ }
+
+ if (ngtcp2_ringbuf_len(&conn->rx.path_challenge.rb)) {
+ pcent = ngtcp2_ringbuf_get(&conn->rx.path_challenge.rb, 0);
+
+ /* PATH_RESPONSE is bound to the path that the corresponding
+ PATH_CHALLENGE is received. */
+ if (ngtcp2_path_eq(&conn->dcid.current.ps.path, &pcent->ps.path)) {
+ lfr.type = NGTCP2_FRAME_PATH_RESPONSE;
+ memcpy(lfr.path_response.data, pcent->data,
+ sizeof(lfr.path_response.data));
+
+ rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &lfr);
+ if (rv != 0) {
+ assert(NGTCP2_ERR_NOBUF == rv);
+ } else {
+ ngtcp2_ringbuf_pop_front(&conn->rx.path_challenge.rb);
+
+ pkt_empty = 0;
+ rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING;
+ require_padding =
+ !conn->server || destlen >= NGTCP2_MAX_UDP_PAYLOAD_SIZE;
+ /* We don't retransmit PATH_RESPONSE. */
+ }
+ }
+ }
+
+ rv = conn_create_ack_frame(conn, &ackfr, pktns, type, ts,
+ conn_compute_ack_delay(conn),
+ conn->local.transport_params.ack_delay_exponent);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+
+ if (ackfr) {
+ rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, ackfr);
+ if (rv != 0) {
+ assert(NGTCP2_ERR_NOBUF == rv);
+ } else {
+ ngtcp2_acktr_commit_ack(&pktns->acktr);
+ ngtcp2_acktr_add_ack(&pktns->acktr, hd->pkt_num,
+ ackfr->ack.largest_ack);
+ if (type == NGTCP2_PKT_1RTT) {
+ conn_handle_unconfirmed_key_update_from_remote(
+ conn, ackfr->ack.largest_ack, ts);
+ }
+ pkt_empty = 0;
+ }
+ }
+
+ build_pkt:
+ for (pfrc = &pktns->tx.frq; *pfrc;) {
+ if ((*pfrc)->binder &&
+ ((*pfrc)->binder->flags & NGTCP2_FRAME_CHAIN_BINDER_FLAG_ACK)) {
+ frc = *pfrc;
+ *pfrc = (*pfrc)->next;
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ continue;
+ }
+
+ switch ((*pfrc)->fr.type) {
+ case NGTCP2_FRAME_STOP_SENDING:
+ strm =
+ ngtcp2_conn_find_stream(conn, (*pfrc)->fr.stop_sending.stream_id);
+ if (strm == NULL || (strm->flags & NGTCP2_STRM_FLAG_SHUT_RD)) {
+ frc = *pfrc;
+ *pfrc = (*pfrc)->next;
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ continue;
+ }
+
+ if (!(strm->flags & NGTCP2_STRM_FLAG_STREAM_STOP_SENDING_CALLED)) {
+ strm->flags |= NGTCP2_STRM_FLAG_STREAM_STOP_SENDING_CALLED;
+
+ rv = conn_call_stream_stop_sending(
+ conn, (*pfrc)->fr.stop_sending.stream_id,
+ (*pfrc)->fr.stop_sending.app_error_code, strm->stream_user_data);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+ }
+
+ break;
+ case NGTCP2_FRAME_STREAM:
+ ngtcp2_unreachable();
+ case NGTCP2_FRAME_MAX_STREAMS_BIDI:
+ if ((*pfrc)->fr.max_streams.max_streams <
+ conn->remote.bidi.max_streams) {
+ frc = *pfrc;
+ *pfrc = (*pfrc)->next;
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ continue;
+ }
+ break;
+ case NGTCP2_FRAME_MAX_STREAMS_UNI:
+ if ((*pfrc)->fr.max_streams.max_streams <
+ conn->remote.uni.max_streams) {
+ frc = *pfrc;
+ *pfrc = (*pfrc)->next;
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ continue;
+ }
+ break;
+ case NGTCP2_FRAME_MAX_STREAM_DATA:
+ strm = ngtcp2_conn_find_stream(conn,
+ (*pfrc)->fr.max_stream_data.stream_id);
+ if (strm == NULL || (strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) ||
+ (*pfrc)->fr.max_stream_data.max_stream_data < strm->rx.max_offset) {
+ frc = *pfrc;
+ *pfrc = (*pfrc)->next;
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ continue;
+ }
+ break;
+ case NGTCP2_FRAME_MAX_DATA:
+ if ((*pfrc)->fr.max_data.max_data < conn->rx.max_offset) {
+ frc = *pfrc;
+ *pfrc = (*pfrc)->next;
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ continue;
+ }
+ break;
+ case NGTCP2_FRAME_CRYPTO:
+ ngtcp2_unreachable();
+ }
+
+ rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &(*pfrc)->fr);
+ if (rv != 0) {
+ assert(NGTCP2_ERR_NOBUF == rv);
+ break;
+ }
+
+ pkt_empty = 0;
+ rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE;
+ pfrc = &(*pfrc)->next;
+ }
+
+ if (rv != NGTCP2_ERR_NOBUF) {
+ for (; ngtcp2_ksl_len(&pktns->crypto.tx.frq);) {
+ left = ngtcp2_ppe_left(ppe);
+
+ crypto_offset = conn_cryptofrq_unacked_offset(conn, pktns);
+ if (crypto_offset == (size_t)-1) {
+ conn_cryptofrq_clear(conn, pktns);
+ break;
+ }
+
+ left = ngtcp2_pkt_crypto_max_datalen(crypto_offset, left, left);
+
+ if (left == (size_t)-1) {
+ break;
+ }
+
+ rv = conn_cryptofrq_pop(conn, &nfrc, pktns, left);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+
+ if (nfrc == NULL) {
+ break;
+ }
+
+ rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr);
+ if (rv != 0) {
+ ngtcp2_unreachable();
+ }
+
+ *pfrc = nfrc;
+ pfrc = &(*pfrc)->next;
+
+ pkt_empty = 0;
+ rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE;
+ }
+ }
+
+ /* Write MAX_STREAM_ID after RESET_STREAM so that we can extend stream
+ ID space in one packet. */
+ if (rv != NGTCP2_ERR_NOBUF && *pfrc == NULL &&
+ conn->remote.bidi.unsent_max_streams > conn->remote.bidi.max_streams) {
+ rv = conn_call_extend_max_remote_streams_bidi(
+ conn, conn->remote.bidi.unsent_max_streams);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+
+ rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+ nfrc->fr.type = NGTCP2_FRAME_MAX_STREAMS_BIDI;
+ nfrc->fr.max_streams.max_streams = conn->remote.bidi.unsent_max_streams;
+ *pfrc = nfrc;
+
+ conn->remote.bidi.max_streams = conn->remote.bidi.unsent_max_streams;
+
+ rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &(*pfrc)->fr);
+ if (rv != 0) {
+ assert(NGTCP2_ERR_NOBUF == rv);
+ } else {
+ pkt_empty = 0;
+ rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE;
+ pfrc = &(*pfrc)->next;
+ }
+ }
+
+ if (rv != NGTCP2_ERR_NOBUF && *pfrc == NULL) {
+ if (conn->remote.uni.unsent_max_streams > conn->remote.uni.max_streams) {
+ rv = conn_call_extend_max_remote_streams_uni(
+ conn, conn->remote.uni.unsent_max_streams);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+
+ rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+ nfrc->fr.type = NGTCP2_FRAME_MAX_STREAMS_UNI;
+ nfrc->fr.max_streams.max_streams = conn->remote.uni.unsent_max_streams;
+ *pfrc = nfrc;
+
+ conn->remote.uni.max_streams = conn->remote.uni.unsent_max_streams;
+
+ rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd,
+ &(*pfrc)->fr);
+ if (rv != 0) {
+ assert(NGTCP2_ERR_NOBUF == rv);
+ } else {
+ pkt_empty = 0;
+ rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE;
+ pfrc = &(*pfrc)->next;
+ }
+ }
+ }
+
+ if (rv != NGTCP2_ERR_NOBUF) {
+ for (; !ngtcp2_pq_empty(&conn->tx.strmq);) {
+ strm = ngtcp2_conn_tx_strmq_top(conn);
+
+ if (!(strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) &&
+ conn_should_send_max_stream_data(conn, strm)) {
+ rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+
+ if (conn->local.settings.max_stream_window &&
+ strm->tx.last_max_stream_data_ts != UINT64_MAX &&
+ ts - strm->tx.last_max_stream_data_ts <
+ NGTCP2_FLOW_WINDOW_RTT_FACTOR * cstat->smoothed_rtt &&
+ conn->local.settings.max_stream_window > strm->rx.window) {
+ target_max_data =
+ NGTCP2_FLOW_WINDOW_SCALING_FACTOR * strm->rx.window;
+ if (target_max_data > conn->local.settings.max_stream_window) {
+ target_max_data = conn->local.settings.max_stream_window;
+ }
+
+ delta = target_max_data - strm->rx.window;
+ if (strm->rx.unsent_max_offset + delta > NGTCP2_MAX_VARINT) {
+ delta = NGTCP2_MAX_VARINT - strm->rx.unsent_max_offset;
+ }
+
+ strm->rx.window = target_max_data;
+ } else {
+ delta = 0;
+ }
+
+ strm->tx.last_max_stream_data_ts = ts;
+
+ nfrc->fr.type = NGTCP2_FRAME_MAX_STREAM_DATA;
+ nfrc->fr.max_stream_data.stream_id = strm->stream_id;
+ nfrc->fr.max_stream_data.max_stream_data =
+ strm->rx.unsent_max_offset + delta;
+ ngtcp2_list_insert(nfrc, pfrc);
+
+ rv =
+ conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr);
+ if (rv != 0) {
+ assert(NGTCP2_ERR_NOBUF == rv);
+ break;
+ }
+
+ pkt_empty = 0;
+ rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE;
+ pfrc = &(*pfrc)->next;
+ strm->rx.max_offset = strm->rx.unsent_max_offset =
+ nfrc->fr.max_stream_data.max_stream_data;
+ }
+
+ if (ngtcp2_strm_streamfrq_empty(strm)) {
+ ngtcp2_conn_tx_strmq_pop(conn);
+ continue;
+ }
+
+ stream_offset = ngtcp2_strm_streamfrq_unacked_offset(strm);
+ if (stream_offset == (uint64_t)-1) {
+ ngtcp2_strm_streamfrq_clear(strm);
+ ngtcp2_conn_tx_strmq_pop(conn);
+ assert(conn->tx.strmq_nretrans);
+ --conn->tx.strmq_nretrans;
+ continue;
+ }
+
+ left = ngtcp2_ppe_left(ppe);
+
+ left = ngtcp2_pkt_stream_max_datalen(strm->stream_id, stream_offset,
+ left, left);
+
+ if (left == (size_t)-1) {
+ break;
+ }
+
+ rv = ngtcp2_strm_streamfrq_pop(strm, &nfrc, left);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+
+ if (nfrc == NULL) {
+ /* TODO Why? */
+ break;
+ }
+
+ rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr);
+ if (rv != 0) {
+ ngtcp2_unreachable();
+ }
+
+ *pfrc = nfrc;
+ pfrc = &(*pfrc)->next;
+
+ pkt_empty = 0;
+ rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE;
+
+ if (ngtcp2_strm_streamfrq_empty(strm)) {
+ ngtcp2_conn_tx_strmq_pop(conn);
+ assert(conn->tx.strmq_nretrans);
+ --conn->tx.strmq_nretrans;
+ continue;
+ }
+
+ ngtcp2_conn_tx_strmq_pop(conn);
+ ++strm->cycle;
+ rv = ngtcp2_conn_tx_strmq_push(conn, strm);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+ }
+ }
+
+ if (rv != NGTCP2_ERR_NOBUF && !send_stream && !send_datagram &&
+ !(rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) &&
+ pktns->rtb.num_retransmittable && pktns->tx.frq == NULL &&
+ pktns->rtb.probe_pkt_left) {
+ num_reclaimed = ngtcp2_rtb_reclaim_on_pto(&pktns->rtb, conn, pktns, 1);
+ if (num_reclaimed < 0) {
+ return rv;
+ }
+ if (num_reclaimed) {
+ goto build_pkt;
+ }
+
+ /* We had pktns->rtb.num_retransmittable > 0 but we were unable
+ to reclaim any frame. In this case, we do not have to send
+ any probe packet. */
+ if (pktns->rtb.num_pto_eliciting == 0) {
+ pktns->rtb.probe_pkt_left = 0;
+ ngtcp2_conn_set_loss_detection_timer(conn, ts);
+
+ if (pkt_empty && conn_cwnd_is_zero(conn) && !require_padding) {
+ return 0;
+ }
+ }
+ }
+ } else {
+ pfrc = conn->pkt.pfrc;
+ rtb_entry_flags |= conn->pkt.rtb_entry_flags;
+ pkt_empty = conn->pkt.pkt_empty;
+ hd_logged = conn->pkt.hd_logged;
+ }
+
+ left = ngtcp2_ppe_left(ppe);
+
+ if (rv != NGTCP2_ERR_NOBUF && send_stream && *pfrc == NULL &&
+ (ndatalen = ngtcp2_pkt_stream_max_datalen(
+ vmsg->stream.strm->stream_id, vmsg->stream.strm->tx.offset, ndatalen,
+ left)) != (size_t)-1 &&
+ (ndatalen || datalen == 0)) {
+ datacnt = ngtcp2_vec_copy_at_most(data, NGTCP2_MAX_STREAM_DATACNT,
+ vmsg->stream.data, vmsg->stream.datacnt,
+ (size_t)ndatalen);
+ ndatalen = ngtcp2_vec_len(data, datacnt);
+
+ assert((datacnt == 0 && datalen == 0) || (datacnt && datalen));
+
+ rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new(
+ &nfrc, datacnt, &conn->frc_objalloc, conn->mem);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+
+ nfrc->fr.stream.type = NGTCP2_FRAME_STREAM;
+ nfrc->fr.stream.flags = 0;
+ nfrc->fr.stream.stream_id = vmsg->stream.strm->stream_id;
+ nfrc->fr.stream.offset = vmsg->stream.strm->tx.offset;
+ nfrc->fr.stream.datacnt = datacnt;
+ ngtcp2_vec_copy(nfrc->fr.stream.data, data, datacnt);
+
+ fin = (vmsg->stream.flags & NGTCP2_WRITE_STREAM_FLAG_FIN) &&
+ ndatalen == datalen;
+ nfrc->fr.stream.fin = (uint8_t)fin;
+
+ rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr);
+ if (rv != 0) {
+ ngtcp2_unreachable();
+ }
+
+ *pfrc = nfrc;
+ pfrc = &(*pfrc)->next;
+
+ pkt_empty = 0;
+ rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE;
+
+ vmsg->stream.strm->tx.offset += ndatalen;
+ conn->tx.offset += ndatalen;
+
+ if (fin) {
+ ngtcp2_strm_shutdown(vmsg->stream.strm, NGTCP2_STRM_FLAG_SHUT_WR);
+ }
+
+ if (vmsg->stream.pdatalen) {
+ *vmsg->stream.pdatalen = (ngtcp2_ssize)ndatalen;
+ }
+ } else {
+ send_stream = 0;
+ }
+
+ if (rv != NGTCP2_ERR_NOBUF && send_datagram &&
+ left >= ngtcp2_pkt_datagram_framelen((size_t)datalen)) {
+ if (conn->callbacks.ack_datagram || conn->callbacks.lost_datagram) {
+ rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+
+ nfrc->fr.datagram.type = NGTCP2_FRAME_DATAGRAM_LEN;
+ nfrc->fr.datagram.dgram_id = vmsg->datagram.dgram_id;
+ nfrc->fr.datagram.datacnt = vmsg->datagram.datacnt;
+ nfrc->fr.datagram.data = (ngtcp2_vec *)vmsg->datagram.data;
+
+ rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &nfrc->fr);
+ assert(rv == 0);
+
+ /* Because DATAGRAM will not be retransmitted, we do not use
+ data anymore. Just nullify it. The only reason to keep
+ track a frame is keep dgram_id to pass it to
+ ngtcp2_ack_datagram or ngtcp2_lost_datagram callbacks. */
+ nfrc->fr.datagram.datacnt = 0;
+ nfrc->fr.datagram.data = NULL;
+
+ *pfrc = nfrc;
+ pfrc = &(*pfrc)->next;
+ } else {
+ lfr.datagram.type = NGTCP2_FRAME_DATAGRAM_LEN;
+ lfr.datagram.datacnt = vmsg->datagram.datacnt;
+ lfr.datagram.data = (ngtcp2_vec *)vmsg->datagram.data;
+
+ rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &lfr);
+ assert(rv == 0);
+ }
+
+ pkt_empty = 0;
+ rtb_entry_flags |=
+ NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | NGTCP2_RTB_ENTRY_FLAG_DATAGRAM;
+
+ if (vmsg->datagram.paccepted) {
+ *vmsg->datagram.paccepted = 1;
+ }
+ } else {
+ send_datagram = 0;
+ }
+
+ if (pkt_empty) {
+ assert(rv == 0 || NGTCP2_ERR_NOBUF == rv);
+ if (rv == 0 && stream_blocked && ngtcp2_conn_get_max_data_left(conn)) {
+ return NGTCP2_ERR_STREAM_DATA_BLOCKED;
+ }
+
+ keep_alive_expired = conn_keep_alive_expired(conn, ts);
+
+ if (conn->pktns.rtb.probe_pkt_left == 0 && !keep_alive_expired &&
+ !require_padding) {
+ return 0;
+ }
+ } else if (write_more) {
+ conn->pkt.pfrc = pfrc;
+ conn->pkt.pkt_empty = pkt_empty;
+ conn->pkt.rtb_entry_flags = rtb_entry_flags;
+ conn->pkt.hd_logged = hd_logged;
+ conn->flags |= NGTCP2_CONN_FLAG_PPE_PENDING;
+
+ assert(vmsg);
+
+ switch (vmsg->type) {
+ case NGTCP2_VMSG_TYPE_STREAM:
+ if (send_stream) {
+ if (ngtcp2_ppe_left(ppe)) {
+ return NGTCP2_ERR_WRITE_MORE;
+ }
+ break;
+ }
+
+ if (ngtcp2_conn_get_max_data_left(conn) && stream_blocked) {
+ return NGTCP2_ERR_STREAM_DATA_BLOCKED;
+ }
+ break;
+ case NGTCP2_VMSG_TYPE_DATAGRAM:
+ if (send_datagram && ngtcp2_ppe_left(ppe)) {
+ return NGTCP2_ERR_WRITE_MORE;
+ }
+ /* If DATAGRAM cannot be written due to insufficient space,
+ continue to create a packet with the hope that application
+ calls ngtcp2_conn_writev_datagram again. */
+ break;
+ default:
+ ngtcp2_unreachable();
+ }
+ }
+
+ if (!(rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) {
+ if (pktns->tx.num_non_ack_pkt >= NGTCP2_MAX_NON_ACK_TX_PKT ||
+ keep_alive_expired || conn->pktns.rtb.probe_pkt_left) {
+ lfr.type = NGTCP2_FRAME_PING;
+
+ rv = conn_ppe_write_frame_hd_log(conn, ppe, &hd_logged, hd, &lfr);
+ if (rv != 0) {
+ assert(rv == NGTCP2_ERR_NOBUF);
+ /* TODO If buffer is too small, PING cannot be written if
+ packet is still empty. */
+ } else {
+ rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING;
+ if (conn->pktns.rtb.probe_pkt_left) {
+ rtb_entry_flags |= NGTCP2_RTB_ENTRY_FLAG_PROBE;
+ }
+ pktns->tx.num_non_ack_pkt = 0;
+ }
+ } else {
+ ++pktns->tx.num_non_ack_pkt;
+ }
+ } else {
+ pktns->tx.num_non_ack_pkt = 0;
+ }
+
+ /* TODO Push STREAM frame back to ngtcp2_strm if there is an error
+ before ngtcp2_rtb_entry is safely created and added. */
+ if (require_padding ||
+ /* Making full sized packet will help GSO a bit */
+ ngtcp2_ppe_left(ppe) < 10) {
+ lfr.padding.len = ngtcp2_ppe_padding(ppe);
+ } else {
+ lfr.padding.len = ngtcp2_ppe_padding_size(ppe, min_pktlen);
+ }
+
+ if (lfr.padding.len) {
+ lfr.type = NGTCP2_FRAME_PADDING;
+ padded = 1;
+ ngtcp2_log_tx_fr(&conn->log, hd, &lfr);
+ ngtcp2_qlog_write_frame(&conn->qlog, &lfr);
+ }
+
+ nwrite = ngtcp2_ppe_final(ppe, NULL);
+ if (nwrite < 0) {
+ assert(ngtcp2_err_is_fatal((int)nwrite));
+ return nwrite;
+ }
+
+ ++cc->ckm->use_count;
+
+ ngtcp2_qlog_pkt_sent_end(&conn->qlog, hd, (size_t)nwrite);
+
+ /* TODO ack-eliciting vs needs-tracking */
+ /* probe packet needs tracking but it does not need ACK, could be lost. */
+ if ((rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) || padded) {
+ if (pi) {
+ conn_handle_tx_ecn(conn, pi, &rtb_entry_flags, pktns, hd, ts);
+ }
+
+ rv = ngtcp2_rtb_entry_objalloc_new(&ent, hd, NULL, ts, (size_t)nwrite,
+ rtb_entry_flags,
+ &conn->rtb_entry_objalloc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal((int)nwrite));
+ return rv;
+ }
+
+ if (*pfrc != pktns->tx.frq) {
+ ent->frc = pktns->tx.frq;
+ pktns->tx.frq = *pfrc;
+ *pfrc = NULL;
+ }
+
+ if ((rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) &&
+ pktns->rtb.num_ack_eliciting == 0 && conn->cc.event) {
+ conn->cc.event(&conn->cc, &conn->cstat, NGTCP2_CC_EVENT_TYPE_TX_START,
+ ts);
+ }
+
+ rv = conn_on_pkt_sent(conn, &pktns->rtb, ent);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_rtb_entry_objalloc_del(ent, &conn->rtb_entry_objalloc,
+ &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+
+ if (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) {
+ if (conn->cc.on_pkt_sent) {
+ conn->cc.on_pkt_sent(
+ &conn->cc, &conn->cstat,
+ ngtcp2_cc_pkt_init(&cc_pkt, hd->pkt_num, (size_t)nwrite,
+ NGTCP2_PKTNS_ID_APPLICATION, ts, ent->rst.lost,
+ ent->rst.tx_in_flight, ent->rst.is_app_limited));
+ }
+
+ if (conn->flags & NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE) {
+ conn_restart_timer_on_write(conn, ts);
+ }
+ }
+ } else if (pi && conn->tx.ecn.state == NGTCP2_ECN_STATE_CAPABLE) {
+ conn_handle_tx_ecn(conn, pi, NULL, pktns, hd, ts);
+ }
+
+ conn->flags &= (uint32_t)~NGTCP2_CONN_FLAG_PPE_PENDING;
+
+ if (pktns->rtb.probe_pkt_left &&
+ (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) {
+ --pktns->rtb.probe_pkt_left;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "probe pkt size=%td",
+ nwrite);
+ }
+
+ conn_update_keep_alive_last_ts(conn, ts);
+
+ conn->dcid.current.bytes_sent += (uint64_t)nwrite;
+
+ conn->tx.pacing.pktlen += (size_t)nwrite;
+
+ ngtcp2_qlog_metrics_updated(&conn->qlog, &conn->cstat);
+
+ ++pktns->tx.last_pkt_num;
+
+ return nwrite;
+}
+
+ngtcp2_ssize ngtcp2_conn_write_single_frame_pkt(
+ ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen,
+ uint8_t type, uint8_t flags, const ngtcp2_cid *dcid, ngtcp2_frame *fr,
+ uint16_t rtb_entry_flags, const ngtcp2_path *path, ngtcp2_tstamp ts) {
+ int rv;
+ ngtcp2_ppe ppe;
+ ngtcp2_pkt_hd hd;
+ ngtcp2_frame lfr;
+ ngtcp2_ssize nwrite;
+ ngtcp2_crypto_cc cc;
+ ngtcp2_pktns *pktns;
+ uint8_t hd_flags;
+ ngtcp2_rtb_entry *rtbent;
+ int padded = 0;
+ const ngtcp2_cid *scid;
+ uint32_t version;
+
+ switch (type) {
+ case NGTCP2_PKT_INITIAL:
+ pktns = conn->in_pktns;
+ hd_flags = conn_pkt_flags_long(conn);
+ scid = &conn->oscid;
+ version = conn->negotiated_version ? conn->negotiated_version
+ : conn->client_chosen_version;
+ if (version == conn->client_chosen_version) {
+ cc.ckm = pktns->crypto.tx.ckm;
+ cc.hp_ctx = pktns->crypto.tx.hp_ctx;
+ } else {
+ assert(version == conn->vneg.version);
+
+ cc.ckm = conn->vneg.tx.ckm;
+ cc.hp_ctx = conn->vneg.tx.hp_ctx;
+ }
+ break;
+ case NGTCP2_PKT_HANDSHAKE:
+ pktns = conn->hs_pktns;
+ hd_flags = conn_pkt_flags_long(conn);
+ scid = &conn->oscid;
+ version = conn->negotiated_version;
+ cc.ckm = pktns->crypto.tx.ckm;
+ cc.hp_ctx = pktns->crypto.tx.hp_ctx;
+ break;
+ case NGTCP2_PKT_1RTT:
+ pktns = &conn->pktns;
+ hd_flags = conn_pkt_flags_short(conn);
+ scid = NULL;
+ version = conn->negotiated_version;
+ cc.ckm = pktns->crypto.tx.ckm;
+ cc.hp_ctx = pktns->crypto.tx.hp_ctx;
+ break;
+ default:
+ /* We don't support 0-RTT packet in this function. */
+ ngtcp2_unreachable();
+ }
+
+ cc.aead = pktns->crypto.ctx.aead;
+ cc.hp = pktns->crypto.ctx.hp;
+ cc.encrypt = conn->callbacks.encrypt;
+ cc.hp_mask = conn->callbacks.hp_mask;
+
+ ngtcp2_pkt_hd_init(&hd, hd_flags, type, dcid, scid,
+ pktns->tx.last_pkt_num + 1, pktns_select_pkt_numlen(pktns),
+ version, 0);
+
+ ngtcp2_ppe_init(&ppe, dest, destlen, &cc);
+
+ rv = ngtcp2_ppe_encode_hd(&ppe, &hd);
+ if (rv != 0) {
+ assert(NGTCP2_ERR_NOBUF == rv);
+ return 0;
+ }
+
+ if (!ngtcp2_ppe_ensure_hp_sample(&ppe)) {
+ return 0;
+ }
+
+ ngtcp2_log_tx_pkt_hd(&conn->log, &hd);
+ ngtcp2_qlog_pkt_sent_start(&conn->qlog);
+
+ rv = conn_ppe_write_frame(conn, &ppe, &hd, fr);
+ if (rv != 0) {
+ assert(NGTCP2_ERR_NOBUF == rv);
+ return 0;
+ }
+
+ lfr.type = NGTCP2_FRAME_PADDING;
+ if (flags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING) {
+ lfr.padding.len = ngtcp2_ppe_padding(&ppe);
+ } else {
+ switch (fr->type) {
+ case NGTCP2_FRAME_PATH_CHALLENGE:
+ case NGTCP2_FRAME_PATH_RESPONSE:
+ if (!conn->server || destlen >= NGTCP2_MAX_UDP_PAYLOAD_SIZE) {
+ lfr.padding.len = ngtcp2_ppe_padding(&ppe);
+ } else {
+ lfr.padding.len = 0;
+ }
+ break;
+ default:
+ if (type == NGTCP2_PKT_1RTT) {
+ lfr.padding.len =
+ ngtcp2_ppe_padding_size(&ppe, conn_min_short_pktlen(conn));
+ } else {
+ lfr.padding.len = ngtcp2_ppe_padding_hp_sample(&ppe);
+ }
+ }
+ }
+ if (lfr.padding.len) {
+ padded = 1;
+ ngtcp2_log_tx_fr(&conn->log, &hd, &lfr);
+ ngtcp2_qlog_write_frame(&conn->qlog, &lfr);
+ }
+
+ nwrite = ngtcp2_ppe_final(&ppe, NULL);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ if (type == NGTCP2_PKT_1RTT) {
+ ++cc.ckm->use_count;
+ }
+
+ ngtcp2_qlog_pkt_sent_end(&conn->qlog, &hd, (size_t)nwrite);
+
+ /* Do this when we are sure that there is no error. */
+ switch (fr->type) {
+ case NGTCP2_FRAME_ACK:
+ case NGTCP2_FRAME_ACK_ECN:
+ ngtcp2_acktr_commit_ack(&pktns->acktr);
+ ngtcp2_acktr_add_ack(&pktns->acktr, hd.pkt_num, fr->ack.largest_ack);
+ if (type == NGTCP2_PKT_1RTT) {
+ conn_handle_unconfirmed_key_update_from_remote(conn, fr->ack.largest_ack,
+ ts);
+ }
+ break;
+ }
+
+ if (((rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) || padded) &&
+ (!path || ngtcp2_path_eq(&conn->dcid.current.ps.path, path))) {
+ if (pi && !(rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE)) {
+ conn_handle_tx_ecn(conn, pi, &rtb_entry_flags, pktns, &hd, ts);
+ }
+
+ rv = ngtcp2_rtb_entry_objalloc_new(&rtbent, &hd, NULL, ts, (size_t)nwrite,
+ rtb_entry_flags,
+ &conn->rtb_entry_objalloc);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = conn_on_pkt_sent(conn, &pktns->rtb, rtbent);
+ if (rv != 0) {
+ ngtcp2_rtb_entry_objalloc_del(rtbent, &conn->rtb_entry_objalloc,
+ &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+
+ if (rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) {
+ if (conn->flags & NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE) {
+ conn_restart_timer_on_write(conn, ts);
+ }
+
+ if (pktns->rtb.probe_pkt_left && path &&
+ ngtcp2_path_eq(&conn->dcid.current.ps.path, path)) {
+ --pktns->rtb.probe_pkt_left;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "probe pkt size=%td",
+ nwrite);
+ }
+ }
+ } else if (pi && !(rtb_entry_flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) &&
+ conn->tx.ecn.state == NGTCP2_ECN_STATE_CAPABLE) {
+ conn_handle_tx_ecn(conn, pi, NULL, pktns, &hd, ts);
+ }
+
+ if (path && ngtcp2_path_eq(&conn->dcid.current.ps.path, path)) {
+ conn_update_keep_alive_last_ts(conn, ts);
+ }
+
+ if (!padded) {
+ switch (fr->type) {
+ case NGTCP2_FRAME_ACK:
+ case NGTCP2_FRAME_ACK_ECN:
+ break;
+ default:
+ conn->tx.pacing.pktlen += (size_t)nwrite;
+ }
+ } else {
+ conn->tx.pacing.pktlen += (size_t)nwrite;
+ }
+
+ ngtcp2_qlog_metrics_updated(&conn->qlog, &conn->cstat);
+
+ ++pktns->tx.last_pkt_num;
+
+ return nwrite;
+}
+
+/*
+ * conn_process_early_rtb makes any pending 0RTT packet 1RTT packet.
+ */
+static void conn_process_early_rtb(ngtcp2_conn *conn) {
+ ngtcp2_rtb_entry *ent;
+ ngtcp2_rtb *rtb = &conn->pktns.rtb;
+ ngtcp2_ksl_it it;
+
+ for (it = ngtcp2_rtb_head(rtb); !ngtcp2_ksl_it_end(&it);
+ ngtcp2_ksl_it_next(&it)) {
+ ent = ngtcp2_ksl_it_get(&it);
+
+ if ((ent->hd.flags & NGTCP2_PKT_FLAG_LONG_FORM) == 0 ||
+ ent->hd.type != NGTCP2_PKT_0RTT) {
+ continue;
+ }
+
+ /* 0-RTT packet is retransmitted as a 1RTT packet. */
+ ent->hd.flags &= (uint8_t)~NGTCP2_PKT_FLAG_LONG_FORM;
+ ent->hd.type = NGTCP2_PKT_1RTT;
+ }
+}
+
+/*
+ * conn_handshake_remnants_left returns nonzero if there may be
+ * handshake packets the local endpoint has to send, including new
+ * packets and lost ones.
+ */
+static int conn_handshake_remnants_left(ngtcp2_conn *conn) {
+ ngtcp2_pktns *in_pktns = conn->in_pktns;
+ ngtcp2_pktns *hs_pktns = conn->hs_pktns;
+
+ return !conn_is_handshake_completed(conn) ||
+ (in_pktns && (in_pktns->rtb.num_pto_eliciting ||
+ ngtcp2_ksl_len(&in_pktns->crypto.tx.frq))) ||
+ (hs_pktns && (hs_pktns->rtb.num_pto_eliciting ||
+ ngtcp2_ksl_len(&hs_pktns->crypto.tx.frq)));
+}
+
+/*
+ * conn_retire_dcid_seq retires destination connection ID denoted by
+ * |seq|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CONNECTION_ID_LIMIT
+ * The number of unacknowledged retirement exceeds the limit.
+ */
+static int conn_retire_dcid_seq(ngtcp2_conn *conn, uint64_t seq) {
+ ngtcp2_pktns *pktns = &conn->pktns;
+ ngtcp2_frame_chain *nfrc;
+ int rv;
+
+ rv = ngtcp2_conn_track_retired_dcid_seq(conn, seq);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc);
+ if (rv != 0) {
+ return rv;
+ }
+
+ nfrc->fr.type = NGTCP2_FRAME_RETIRE_CONNECTION_ID;
+ nfrc->fr.retire_connection_id.seq = seq;
+ nfrc->next = pktns->tx.frq;
+ pktns->tx.frq = nfrc;
+
+ return 0;
+}
+
+/*
+ * conn_retire_dcid retires |dcid|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+static int conn_retire_dcid(ngtcp2_conn *conn, const ngtcp2_dcid *dcid,
+ ngtcp2_tstamp ts) {
+ ngtcp2_ringbuf *rb = &conn->dcid.retired.rb;
+ ngtcp2_dcid *dest, *stale_dcid;
+ int rv;
+
+ assert(dcid->cid.datalen);
+
+ if (ngtcp2_ringbuf_full(rb)) {
+ stale_dcid = ngtcp2_ringbuf_get(rb, 0);
+ rv = conn_call_deactivate_dcid(conn, stale_dcid);
+ if (rv != 0) {
+ return rv;
+ }
+
+ ngtcp2_ringbuf_pop_front(rb);
+ }
+
+ dest = ngtcp2_ringbuf_push_back(rb);
+ ngtcp2_dcid_copy(dest, dcid);
+ dest->retired_ts = ts;
+
+ return conn_retire_dcid_seq(conn, dcid->seq);
+}
+
+/*
+ * conn_bind_dcid stores the DCID to |*pdcid| bound to |path|. If
+ * such DCID is not found, bind the new DCID to |path| and stores it
+ * to |*pdcid|. If a remote endpoint uses zero-length connection ID,
+ * the pointer to conn->dcid.current is assigned to |*pdcid|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_CONN_ID_BLOCKED
+ * No unused DCID is available
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+static int conn_bind_dcid(ngtcp2_conn *conn, ngtcp2_dcid **pdcid,
+ const ngtcp2_path *path, ngtcp2_tstamp ts) {
+ ngtcp2_dcid *dcid, *ndcid;
+ ngtcp2_cid cid;
+ size_t i, len;
+ int rv;
+
+ assert(!ngtcp2_path_eq(&conn->dcid.current.ps.path, path));
+ assert(!conn->pv || !ngtcp2_path_eq(&conn->pv->dcid.ps.path, path));
+ assert(!conn->pv || !(conn->pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) ||
+ !ngtcp2_path_eq(&conn->pv->fallback_dcid.ps.path, path));
+
+ len = ngtcp2_ringbuf_len(&conn->dcid.bound.rb);
+ for (i = 0; i < len; ++i) {
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, i);
+
+ if (ngtcp2_path_eq(&dcid->ps.path, path)) {
+ *pdcid = dcid;
+ return 0;
+ }
+ }
+
+ if (conn->dcid.current.cid.datalen == 0) {
+ ndcid = ngtcp2_ringbuf_push_back(&conn->dcid.bound.rb);
+ ngtcp2_cid_zero(&cid);
+ ngtcp2_dcid_init(ndcid, ++conn->dcid.zerolen_seq, &cid, NULL);
+ ngtcp2_dcid_set_path(ndcid, path);
+
+ *pdcid = ndcid;
+
+ return 0;
+ }
+
+ if (ngtcp2_ringbuf_len(&conn->dcid.unused.rb) == 0) {
+ return NGTCP2_ERR_CONN_ID_BLOCKED;
+ }
+
+ if (ngtcp2_ringbuf_full(&conn->dcid.bound.rb)) {
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, 0);
+ rv = conn_retire_dcid(conn, dcid, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0);
+ ndcid = ngtcp2_ringbuf_push_back(&conn->dcid.bound.rb);
+
+ ngtcp2_dcid_copy(ndcid, dcid);
+ ndcid->bound_ts = ts;
+ ngtcp2_dcid_set_path(ndcid, path);
+
+ ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb);
+
+ *pdcid = ndcid;
+
+ return 0;
+}
+
+static int conn_start_pmtud(ngtcp2_conn *conn) {
+ int rv;
+ size_t hard_max_udp_payload_size;
+
+ assert(!conn->local.settings.no_pmtud);
+ assert(!conn->pmtud);
+ assert(conn_is_handshake_completed(conn));
+ assert(conn->remote.transport_params);
+ assert(conn->remote.transport_params->max_udp_payload_size >=
+ NGTCP2_MAX_UDP_PAYLOAD_SIZE);
+
+ hard_max_udp_payload_size = (size_t)ngtcp2_min(
+ conn->remote.transport_params->max_udp_payload_size,
+ (uint64_t)conn->local.settings.max_tx_udp_payload_size);
+
+ rv = ngtcp2_pmtud_new(&conn->pmtud, conn->dcid.current.max_udp_payload_size,
+ hard_max_udp_payload_size,
+ conn->pktns.tx.last_pkt_num + 1, conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (ngtcp2_pmtud_finished(conn->pmtud)) {
+ ngtcp2_conn_stop_pmtud(conn);
+ }
+
+ return 0;
+}
+
+int ngtcp2_conn_start_pmtud(ngtcp2_conn *conn) {
+ return conn_start_pmtud(conn);
+}
+
+void ngtcp2_conn_stop_pmtud(ngtcp2_conn *conn) {
+ if (!conn->pmtud) {
+ return;
+ }
+
+ ngtcp2_pmtud_del(conn->pmtud);
+
+ conn->pmtud = NULL;
+}
+
+static ngtcp2_ssize conn_write_pmtud_probe(ngtcp2_conn *conn,
+ ngtcp2_pkt_info *pi, uint8_t *dest,
+ size_t destlen, ngtcp2_tstamp ts) {
+ size_t probelen;
+ ngtcp2_ssize nwrite;
+ ngtcp2_frame lfr;
+
+ assert(conn->pmtud);
+ assert(!ngtcp2_pmtud_finished(conn->pmtud));
+
+ if (!ngtcp2_pmtud_require_probe(conn->pmtud)) {
+ return 0;
+ }
+
+ probelen = ngtcp2_pmtud_probelen(conn->pmtud);
+ if (probelen > destlen) {
+ return 0;
+ }
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "sending PMTUD probe packet len=%zu", probelen);
+
+ lfr.type = NGTCP2_FRAME_PING;
+
+ nwrite = ngtcp2_conn_write_single_frame_pkt(
+ conn, pi, dest, probelen, NGTCP2_PKT_1RTT,
+ NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING, &conn->dcid.current.cid, &lfr,
+ NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING |
+ NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE,
+ NULL, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ assert(nwrite);
+
+ ngtcp2_pmtud_probe_sent(conn->pmtud, conn_compute_pto(conn, &conn->pktns),
+ ts);
+
+ return nwrite;
+}
+
+/*
+ * conn_stop_pv stops the path validation which is currently running.
+ * This function does nothing if no path validation is currently being
+ * performed.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+static int conn_stop_pv(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ int rv = 0;
+ ngtcp2_pv *pv = conn->pv;
+
+ if (pv == NULL) {
+ return 0;
+ }
+
+ if (pv->dcid.cid.datalen && pv->dcid.seq != conn->dcid.current.seq) {
+ rv = conn_retire_dcid(conn, &pv->dcid, ts);
+ if (rv != 0) {
+ goto fin;
+ }
+ }
+
+ if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) &&
+ pv->fallback_dcid.cid.datalen &&
+ pv->fallback_dcid.seq != conn->dcid.current.seq &&
+ pv->fallback_dcid.seq != pv->dcid.seq) {
+ rv = conn_retire_dcid(conn, &pv->fallback_dcid, ts);
+ if (rv != 0) {
+ goto fin;
+ }
+ }
+
+fin:
+ ngtcp2_pv_del(pv);
+ conn->pv = NULL;
+
+ return rv;
+}
+
+/*
+ * conn_abort_pv aborts the current path validation and frees
+ * resources allocated for it. This function assumes that conn->pv is
+ * not NULL.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static int conn_abort_pv(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ ngtcp2_pv *pv = conn->pv;
+ int rv;
+
+ assert(pv);
+
+ if (!(pv->flags & NGTCP2_PV_FLAG_DONT_CARE)) {
+ rv = conn_call_path_validation(conn, pv,
+ NGTCP2_PATH_VALIDATION_RESULT_ABORTED);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ return conn_stop_pv(conn, ts);
+}
+
+static size_t conn_shape_udp_payload(ngtcp2_conn *conn, const ngtcp2_dcid *dcid,
+ size_t payloadlen) {
+ if (conn->remote.transport_params &&
+ conn->remote.transport_params->max_udp_payload_size) {
+ assert(conn->remote.transport_params->max_udp_payload_size >=
+ NGTCP2_MAX_UDP_PAYLOAD_SIZE);
+
+ payloadlen =
+ (size_t)ngtcp2_min((uint64_t)payloadlen,
+ conn->remote.transport_params->max_udp_payload_size);
+ }
+
+ payloadlen =
+ ngtcp2_min(payloadlen, conn->local.settings.max_tx_udp_payload_size);
+
+ if (conn->local.settings.no_tx_udp_payload_size_shaping) {
+ return payloadlen;
+ }
+
+ return ngtcp2_min(payloadlen, dcid->max_udp_payload_size);
+}
+
+static void conn_reset_congestion_state(ngtcp2_conn *conn, ngtcp2_tstamp ts);
+
+/*
+ * conn_on_path_validation_failed is called when path validation
+ * fails. This function may delete |pv|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static int conn_on_path_validation_failed(ngtcp2_conn *conn, ngtcp2_pv *pv,
+ ngtcp2_tstamp ts) {
+ int rv;
+
+ if (!(pv->flags & NGTCP2_PV_FLAG_DONT_CARE)) {
+ rv = conn_call_path_validation(conn, pv,
+ NGTCP2_PATH_VALIDATION_RESULT_FAILURE);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ if (pv->flags & NGTCP2_PV_FLAG_MTU_PROBE) {
+ return NGTCP2_ERR_NO_VIABLE_PATH;
+ }
+
+ if (pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) {
+ ngtcp2_dcid_copy(&conn->dcid.current, &pv->fallback_dcid);
+ conn_reset_congestion_state(conn, ts);
+ }
+
+ return conn_stop_pv(conn, ts);
+}
+
+/*
+ * conn_write_path_challenge writes a packet which includes
+ * PATH_CHALLENGE frame into |dest| of length |destlen|.
+ *
+ * This function returns the number of bytes written to |dest|, or one
+ * of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static ngtcp2_ssize conn_write_path_challenge(ngtcp2_conn *conn,
+ ngtcp2_path *path,
+ ngtcp2_pkt_info *pi,
+ uint8_t *dest, size_t destlen,
+ ngtcp2_tstamp ts) {
+ ngtcp2_ssize nwrite;
+ ngtcp2_tstamp expiry;
+ ngtcp2_pv *pv = conn->pv;
+ ngtcp2_frame lfr;
+ ngtcp2_duration timeout;
+ uint8_t flags;
+ uint64_t tx_left;
+ int rv;
+
+ if (ngtcp2_pv_validation_timed_out(pv, ts)) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PTV,
+ "path validation was timed out");
+ rv = conn_on_path_validation_failed(conn, pv, ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ /* We might set path to the one which we just failed validate.
+ Set it to the current path here. */
+ if (path) {
+ ngtcp2_path_copy(path, &conn->dcid.current.ps.path);
+ }
+
+ return 0;
+ }
+
+ ngtcp2_pv_handle_entry_expiry(pv, ts);
+
+ if (!ngtcp2_pv_should_send_probe(pv)) {
+ return 0;
+ }
+
+ rv = conn_call_get_path_challenge_data(conn, lfr.path_challenge.data);
+ if (rv != 0) {
+ return rv;
+ }
+
+ lfr.type = NGTCP2_FRAME_PATH_CHALLENGE;
+
+ timeout = conn_compute_pto(conn, &conn->pktns);
+ timeout = ngtcp2_max(timeout, 3 * conn->cstat.initial_rtt);
+ expiry = ts + timeout * (1ULL << pv->round);
+
+ destlen = ngtcp2_min(destlen, NGTCP2_MAX_UDP_PAYLOAD_SIZE);
+
+ if (conn->server) {
+ if (!(pv->dcid.flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) {
+ tx_left = conn_server_tx_left(conn, &pv->dcid);
+ destlen = (size_t)ngtcp2_min((uint64_t)destlen, tx_left);
+ if (destlen == 0) {
+ return 0;
+ }
+ }
+
+ if (destlen < NGTCP2_MAX_UDP_PAYLOAD_SIZE) {
+ flags = NGTCP2_PV_ENTRY_FLAG_UNDERSIZED;
+ } else {
+ flags = NGTCP2_PV_ENTRY_FLAG_NONE;
+ }
+ } else {
+ flags = NGTCP2_PV_ENTRY_FLAG_NONE;
+ }
+
+ ngtcp2_pv_add_entry(pv, lfr.path_challenge.data, expiry, flags, ts);
+
+ nwrite = ngtcp2_conn_write_single_frame_pkt(
+ conn, pi, dest, destlen, NGTCP2_PKT_1RTT, NGTCP2_WRITE_PKT_FLAG_NONE,
+ &pv->dcid.cid, &lfr,
+ NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING | NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING,
+ &pv->dcid.ps.path, ts);
+ if (nwrite <= 0) {
+ return nwrite;
+ }
+
+ if (path) {
+ ngtcp2_path_copy(path, &pv->dcid.ps.path);
+ }
+
+ if (ngtcp2_path_eq(&pv->dcid.ps.path, &conn->dcid.current.ps.path)) {
+ conn->dcid.current.bytes_sent += (uint64_t)nwrite;
+ } else {
+ pv->dcid.bytes_sent += (uint64_t)nwrite;
+ }
+
+ return nwrite;
+}
+
+/*
+ * conn_write_path_response writes a packet which includes
+ * PATH_RESPONSE frame into |dest| of length |destlen|.
+ *
+ * This function returns the number of bytes written to |dest|, or one
+ * of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static ngtcp2_ssize conn_write_path_response(ngtcp2_conn *conn,
+ ngtcp2_path *path,
+ ngtcp2_pkt_info *pi, uint8_t *dest,
+ size_t destlen, ngtcp2_tstamp ts) {
+ ngtcp2_pv *pv = conn->pv;
+ ngtcp2_path_challenge_entry *pcent = NULL;
+ ngtcp2_dcid *dcid = NULL;
+ ngtcp2_frame lfr;
+ ngtcp2_ssize nwrite;
+ int rv;
+ uint64_t tx_left;
+
+ for (; ngtcp2_ringbuf_len(&conn->rx.path_challenge.rb);) {
+ pcent = ngtcp2_ringbuf_get(&conn->rx.path_challenge.rb, 0);
+
+ if (ngtcp2_path_eq(&conn->dcid.current.ps.path, &pcent->ps.path)) {
+ /* Send PATH_RESPONSE from conn_write_pkt. */
+ return 0;
+ }
+
+ if (pv) {
+ if (ngtcp2_path_eq(&pv->dcid.ps.path, &pcent->ps.path)) {
+ dcid = &pv->dcid;
+ break;
+ }
+ if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) &&
+ ngtcp2_path_eq(&pv->fallback_dcid.ps.path, &pcent->ps.path)) {
+ dcid = &pv->fallback_dcid;
+ break;
+ }
+ }
+
+ if (conn->server) {
+ break;
+ }
+
+ /* Client does not expect to respond to path validation against
+ unknown path */
+ ngtcp2_ringbuf_pop_front(&conn->rx.path_challenge.rb);
+ pcent = NULL;
+ }
+
+ if (pcent == NULL) {
+ return 0;
+ }
+
+ if (dcid == NULL) {
+ /* client is expected to have |path| in conn->dcid.current or
+ conn->pv. */
+ assert(conn->server);
+
+ rv = conn_bind_dcid(conn, &dcid, &pcent->ps.path, ts);
+ if (rv != 0) {
+ if (ngtcp2_err_is_fatal(rv)) {
+ return rv;
+ }
+ return 0;
+ }
+ }
+
+ destlen = ngtcp2_min(destlen, NGTCP2_MAX_UDP_PAYLOAD_SIZE);
+
+ if (conn->server && !(dcid->flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) {
+ tx_left = conn_server_tx_left(conn, dcid);
+ destlen = (size_t)ngtcp2_min((uint64_t)destlen, tx_left);
+ if (destlen == 0) {
+ return 0;
+ }
+ }
+
+ lfr.type = NGTCP2_FRAME_PATH_RESPONSE;
+ memcpy(lfr.path_response.data, pcent->data, sizeof(lfr.path_response.data));
+
+ nwrite = ngtcp2_conn_write_single_frame_pkt(
+ conn, pi, dest, destlen, NGTCP2_PKT_1RTT, NGTCP2_WRITE_PKT_FLAG_NONE,
+ &dcid->cid, &lfr, NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING, &pcent->ps.path,
+ ts);
+ if (nwrite <= 0) {
+ return nwrite;
+ }
+
+ if (path) {
+ ngtcp2_path_copy(path, &pcent->ps.path);
+ }
+
+ ngtcp2_ringbuf_pop_front(&conn->rx.path_challenge.rb);
+
+ dcid->bytes_sent += (uint64_t)nwrite;
+
+ return nwrite;
+}
+
+ngtcp2_ssize ngtcp2_conn_write_pkt_versioned(ngtcp2_conn *conn,
+ ngtcp2_path *path,
+ int pkt_info_version,
+ ngtcp2_pkt_info *pi, uint8_t *dest,
+ size_t destlen, ngtcp2_tstamp ts) {
+ return ngtcp2_conn_writev_stream_versioned(
+ conn, path, pkt_info_version, pi, dest, destlen,
+ /* pdatalen = */ NULL, NGTCP2_WRITE_STREAM_FLAG_NONE,
+ /* stream_id = */ -1,
+ /* datav = */ NULL, /* datavcnt = */ 0, ts);
+}
+
+/*
+ * conn_on_version_negotiation is called when Version Negotiation
+ * packet is received. The function decodes the data in the buffer
+ * pointed by |payload| whose length is |payloadlen| as Version
+ * Negotiation packet payload. The packet header is given in |hd|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * Packet payload is badly formatted.
+ */
+static int conn_on_version_negotiation(ngtcp2_conn *conn,
+ const ngtcp2_pkt_hd *hd,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ uint32_t sv[16];
+ uint32_t *p;
+ int rv = 0;
+ size_t nsv;
+ size_t i;
+
+ if (payloadlen % sizeof(uint32_t)) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ /* Version Negotiation packet is ignored if client has reacted upon
+ Version Negotiation packet. */
+ if (conn->local.settings.original_version != conn->client_chosen_version) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ if (payloadlen > sizeof(sv)) {
+ p = ngtcp2_mem_malloc(conn->mem, payloadlen);
+ if (p == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+ } else {
+ p = sv;
+ }
+
+ nsv = ngtcp2_pkt_decode_version_negotiation(p, payload, payloadlen);
+
+ ngtcp2_log_rx_vn(&conn->log, hd, p, nsv);
+
+ ngtcp2_qlog_version_negotiation_pkt_received(&conn->qlog, hd, p, nsv);
+
+ if (!ngtcp2_is_reserved_version(conn->local.settings.original_version)) {
+ for (i = 0; i < nsv; ++i) {
+ if (p[i] == conn->local.settings.original_version) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "ignore Version Negotiation because it contains the "
+ "original version");
+
+ rv = NGTCP2_ERR_INVALID_ARGUMENT;
+ goto fin;
+ }
+ }
+ }
+
+ rv = conn_call_recv_version_negotiation(conn, hd, p, nsv);
+ if (rv != 0) {
+ goto fin;
+ }
+
+fin:
+ if (p != sv) {
+ ngtcp2_mem_free(conn->mem, p);
+ }
+
+ return rv;
+}
+
+static uint64_t conn_tx_strmq_first_cycle(ngtcp2_conn *conn) {
+ ngtcp2_strm *strm;
+
+ if (ngtcp2_pq_empty(&conn->tx.strmq)) {
+ return 0;
+ }
+
+ strm = ngtcp2_struct_of(ngtcp2_pq_top(&conn->tx.strmq), ngtcp2_strm, pe);
+ return strm->cycle;
+}
+
+uint64_t ngtcp2_conn_tx_strmq_first_cycle(ngtcp2_conn *conn) {
+ ngtcp2_strm *strm;
+
+ if (ngtcp2_pq_empty(&conn->tx.strmq)) {
+ return 0;
+ }
+
+ strm = ngtcp2_struct_of(ngtcp2_pq_top(&conn->tx.strmq), ngtcp2_strm, pe);
+ return strm->cycle;
+}
+
+int ngtcp2_conn_resched_frames(ngtcp2_conn *conn, ngtcp2_pktns *pktns,
+ ngtcp2_frame_chain **pfrc) {
+ ngtcp2_frame_chain **first = pfrc;
+ ngtcp2_frame_chain *frc;
+ ngtcp2_stream *sfr;
+ ngtcp2_strm *strm;
+ int rv;
+ int streamfrq_empty;
+
+ if (*pfrc == NULL) {
+ return 0;
+ }
+
+ for (; *pfrc;) {
+ switch ((*pfrc)->fr.type) {
+ case NGTCP2_FRAME_STREAM:
+ frc = *pfrc;
+
+ *pfrc = frc->next;
+ frc->next = NULL;
+ sfr = &frc->fr.stream;
+
+ strm = ngtcp2_conn_find_stream(conn, sfr->stream_id);
+ if (!strm) {
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ break;
+ }
+ streamfrq_empty = ngtcp2_strm_streamfrq_empty(strm);
+ rv = ngtcp2_strm_streamfrq_push(strm, frc);
+ if (rv != 0) {
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+ if (!ngtcp2_strm_is_tx_queued(strm)) {
+ strm->cycle = conn_tx_strmq_first_cycle(conn);
+ rv = ngtcp2_conn_tx_strmq_push(conn, strm);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+ if (streamfrq_empty) {
+ ++conn->tx.strmq_nretrans;
+ }
+ break;
+ case NGTCP2_FRAME_CRYPTO:
+ frc = *pfrc;
+
+ *pfrc = frc->next;
+ frc->next = NULL;
+
+ rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL,
+ &frc->fr.crypto.offset, frc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+ break;
+ default:
+ pfrc = &(*pfrc)->next;
+ }
+ }
+
+ *pfrc = pktns->tx.frq;
+ pktns->tx.frq = *first;
+
+ return 0;
+}
+
+/*
+ * conn_on_retry is called when Retry packet is received. The
+ * function decodes the data in the buffer pointed by |pkt| whose
+ * length is |pktlen| as Retry packet. The length of long packet
+ * header is given in |hdpktlen|. |pkt| includes packet header.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * Packet payload is badly formatted.
+ * NGTCP2_ERR_PROTO
+ * ODCID does not match; or Token is empty.
+ */
+static int conn_on_retry(ngtcp2_conn *conn, const ngtcp2_pkt_hd *hd,
+ size_t hdpktlen, const uint8_t *pkt, size_t pktlen,
+ ngtcp2_tstamp ts) {
+ int rv;
+ ngtcp2_pkt_retry retry;
+ ngtcp2_pktns *in_pktns = conn->in_pktns;
+ ngtcp2_rtb *rtb = &conn->pktns.rtb;
+ ngtcp2_rtb *in_rtb;
+ uint8_t cidbuf[sizeof(retry.odcid.data) * 2 + 1];
+ uint8_t *token;
+
+ if (!in_pktns || conn->flags & NGTCP2_CONN_FLAG_RECV_RETRY) {
+ return 0;
+ }
+
+ in_rtb = &in_pktns->rtb;
+
+ rv = ngtcp2_pkt_decode_retry(&retry, pkt + hdpktlen, pktlen - hdpktlen);
+ if (rv != 0) {
+ return rv;
+ }
+
+ retry.odcid = conn->dcid.current.cid;
+
+ rv = ngtcp2_pkt_verify_retry_tag(
+ conn->client_chosen_version, &retry, pkt, pktlen, conn->callbacks.encrypt,
+ &conn->crypto.retry_aead, &conn->crypto.retry_aead_ctx);
+ if (rv != 0) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "unable to verify Retry packet integrity");
+ return rv;
+ }
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, "odcid=0x%s",
+ (const char *)ngtcp2_encode_hex(cidbuf, retry.odcid.data,
+ retry.odcid.datalen));
+
+ if (retry.tokenlen == 0) {
+ return NGTCP2_ERR_PROTO;
+ }
+
+ if (ngtcp2_cid_eq(&conn->dcid.current.cid, &hd->scid)) {
+ return 0;
+ }
+
+ ngtcp2_qlog_retry_pkt_received(&conn->qlog, hd, &retry);
+
+ /* DCID must be updated before invoking callback because client
+ generates new initial keys there. */
+ conn->dcid.current.cid = hd->scid;
+ conn->retry_scid = hd->scid;
+
+ conn->flags |= NGTCP2_CONN_FLAG_RECV_RETRY;
+
+ rv = conn_call_recv_retry(conn, hd);
+ if (rv != 0) {
+ return rv;
+ }
+
+ conn->state = NGTCP2_CS_CLIENT_INITIAL;
+
+ /* Just freeing memory is dangerous because we might free twice. */
+
+ rv = ngtcp2_rtb_remove_all(rtb, conn, &conn->pktns, &conn->cstat);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = ngtcp2_rtb_remove_all(in_rtb, conn, in_pktns, &conn->cstat);
+ if (rv != 0) {
+ return rv;
+ }
+
+ ngtcp2_mem_free(conn->mem, (uint8_t *)conn->local.settings.token);
+ conn->local.settings.token = NULL;
+ conn->local.settings.tokenlen = 0;
+
+ token = ngtcp2_mem_malloc(conn->mem, retry.tokenlen);
+ if (token == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ ngtcp2_cpymem(token, retry.token, retry.tokenlen);
+
+ conn->local.settings.token = token;
+ conn->local.settings.tokenlen = retry.tokenlen;
+
+ reset_conn_stat_recovery(&conn->cstat);
+ conn_reset_congestion_state(conn, ts);
+ conn_reset_ecn_validation_state(conn);
+
+ return 0;
+}
+
+int ngtcp2_conn_detect_lost_pkt(ngtcp2_conn *conn, ngtcp2_pktns *pktns,
+ ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts) {
+ return ngtcp2_rtb_detect_lost_pkt(&pktns->rtb, conn, pktns, cstat, ts);
+}
+
+/*
+ * conn_recv_ack processes received ACK frame |fr|. |pkt_ts| is the
+ * timestamp when packet is received. |ts| should be the current
+ * time. Usually they are the same, but for buffered packets,
+ * |pkt_ts| would be earlier than |ts|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ * NGTCP2_ERR_ACK_FRAME
+ * ACK frame is malformed.
+ * NGTCP2_ERR_PROTO
+ * |fr| acknowledges a packet this endpoint has not sent.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User callback failed.
+ */
+static int conn_recv_ack(ngtcp2_conn *conn, ngtcp2_pktns *pktns, ngtcp2_ack *fr,
+ ngtcp2_tstamp pkt_ts, ngtcp2_tstamp ts) {
+ int rv;
+ ngtcp2_frame_chain *frc = NULL;
+ ngtcp2_ssize num_acked;
+ ngtcp2_conn_stat *cstat = &conn->cstat;
+
+ if (pktns->tx.last_pkt_num < fr->largest_ack) {
+ return NGTCP2_ERR_PROTO;
+ }
+
+ rv = ngtcp2_pkt_validate_ack(fr);
+ if (rv != 0) {
+ return rv;
+ }
+
+ ngtcp2_acktr_recv_ack(&pktns->acktr, fr);
+
+ num_acked = ngtcp2_rtb_recv_ack(&pktns->rtb, fr, &conn->cstat, conn, pktns,
+ pkt_ts, ts);
+ if (num_acked < 0) {
+ /* TODO assert this */
+ assert(ngtcp2_err_is_fatal((int)num_acked));
+ ngtcp2_frame_chain_list_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ return (int)num_acked;
+ }
+
+ if (num_acked == 0) {
+ return 0;
+ }
+
+ pktns->rtb.probe_pkt_left = 0;
+
+ if (cstat->pto_count &&
+ (conn->server || (conn->flags & NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED))) {
+ /* Reset PTO count but no less than 2 to avoid frequent probe
+ packet transmission. */
+ cstat->pto_count = ngtcp2_min(cstat->pto_count, 2);
+ }
+
+ ngtcp2_conn_set_loss_detection_timer(conn, ts);
+
+ return 0;
+}
+
+/*
+ * conn_assign_recved_ack_delay_unscaled assigns
+ * fr->ack_delay_unscaled.
+ */
+static void assign_recved_ack_delay_unscaled(ngtcp2_ack *fr,
+ uint64_t ack_delay_exponent) {
+ fr->ack_delay_unscaled =
+ fr->ack_delay * (1ULL << ack_delay_exponent) * NGTCP2_MICROSECONDS;
+}
+
+/*
+ * conn_recv_max_stream_data processes received MAX_STREAM_DATA frame
+ * |fr|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_STREAM_STATE
+ * Stream ID indicates that it is a local stream, and the local
+ * endpoint has not initiated it; or stream is peer initiated
+ * unidirectional stream.
+ * NGTCP2_ERR_STREAM_LIMIT
+ * Stream ID exceeds allowed limit.
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int conn_recv_max_stream_data(ngtcp2_conn *conn,
+ const ngtcp2_max_stream_data *fr) {
+ ngtcp2_strm *strm;
+ ngtcp2_idtr *idtr;
+ int local_stream = conn_local_stream(conn, fr->stream_id);
+ int bidi = bidi_stream(fr->stream_id);
+ int rv;
+
+ if (bidi) {
+ if (local_stream) {
+ if (conn->local.bidi.next_stream_id <= fr->stream_id) {
+ return NGTCP2_ERR_STREAM_STATE;
+ }
+ } else if (conn->remote.bidi.max_streams <
+ ngtcp2_ord_stream_id(fr->stream_id)) {
+ return NGTCP2_ERR_STREAM_LIMIT;
+ }
+
+ idtr = &conn->remote.bidi.idtr;
+ } else {
+ if (!local_stream || conn->local.uni.next_stream_id <= fr->stream_id) {
+ return NGTCP2_ERR_STREAM_STATE;
+ }
+
+ idtr = &conn->remote.uni.idtr;
+ }
+
+ strm = ngtcp2_conn_find_stream(conn, fr->stream_id);
+ if (strm == NULL) {
+ if (local_stream) {
+ /* Stream has been closed. */
+ return 0;
+ }
+
+ rv = ngtcp2_idtr_open(idtr, fr->stream_id);
+ if (rv != 0) {
+ if (ngtcp2_err_is_fatal(rv)) {
+ return rv;
+ }
+ assert(rv == NGTCP2_ERR_STREAM_IN_USE);
+ /* Stream has been closed. */
+ return 0;
+ }
+
+ strm = ngtcp2_objalloc_strm_get(&conn->strm_objalloc);
+ if (strm == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+ rv = ngtcp2_conn_init_stream(conn, strm, fr->stream_id, NULL);
+ if (rv != 0) {
+ ngtcp2_objalloc_strm_release(&conn->strm_objalloc, strm);
+ return rv;
+ }
+
+ rv = conn_call_stream_open(conn, strm);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ if (strm->tx.max_offset < fr->max_stream_data) {
+ strm->tx.max_offset = fr->max_stream_data;
+
+ /* Don't call callback if stream is half-closed local */
+ if (strm->flags & NGTCP2_STRM_FLAG_SHUT_WR) {
+ return 0;
+ }
+
+ rv = conn_call_extend_max_stream_data(conn, strm, fr->stream_id,
+ fr->max_stream_data);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * conn_recv_max_data processes received MAX_DATA frame |fr|.
+ */
+static void conn_recv_max_data(ngtcp2_conn *conn, const ngtcp2_max_data *fr) {
+ conn->tx.max_offset = ngtcp2_max(conn->tx.max_offset, fr->max_data);
+}
+
+/*
+ * conn_buffer_pkt buffers |pkt| of length |pktlen|, chaining it from
+ * |*ppc|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int conn_buffer_pkt(ngtcp2_conn *conn, ngtcp2_pktns *pktns,
+ const ngtcp2_path *path, const ngtcp2_pkt_info *pi,
+ const uint8_t *pkt, size_t pktlen, size_t dgramlen,
+ ngtcp2_tstamp ts) {
+ int rv;
+ ngtcp2_pkt_chain **ppc = &pktns->rx.buffed_pkts, *pc;
+ size_t i;
+ for (i = 0; *ppc && i < NGTCP2_MAX_NUM_BUFFED_RX_PKTS;
+ ppc = &(*ppc)->next, ++i)
+ ;
+
+ if (i == NGTCP2_MAX_NUM_BUFFED_RX_PKTS) {
+ return 0;
+ }
+
+ rv =
+ ngtcp2_pkt_chain_new(&pc, path, pi, pkt, pktlen, dgramlen, ts, conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ *ppc = pc;
+
+ return 0;
+}
+
+static int ensure_decrypt_buffer(ngtcp2_vec *vec, size_t n, size_t initial,
+ const ngtcp2_mem *mem) {
+ uint8_t *nbuf;
+ size_t len;
+
+ if (vec->len >= n) {
+ return 0;
+ }
+
+ len = vec->len == 0 ? initial : vec->len * 2;
+ for (; len < n; len *= 2)
+ ;
+ nbuf = ngtcp2_mem_realloc(mem, vec->base, len);
+ if (nbuf == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+ vec->base = nbuf;
+ vec->len = len;
+
+ return 0;
+}
+
+/*
+ * conn_ensure_decrypt_hp_buffer ensures that
+ * conn->crypto.decrypt_hp_buf has at least |n| bytes space.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int conn_ensure_decrypt_hp_buffer(ngtcp2_conn *conn, size_t n) {
+ return ensure_decrypt_buffer(&conn->crypto.decrypt_hp_buf, n, 256, conn->mem);
+}
+
+/*
+ * conn_ensure_decrypt_buffer ensures that conn->crypto.decrypt_buf
+ * has at least |n| bytes space.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int conn_ensure_decrypt_buffer(ngtcp2_conn *conn, size_t n) {
+ return ensure_decrypt_buffer(&conn->crypto.decrypt_buf, n, 2048, conn->mem);
+}
+
+/*
+ * decrypt_pkt decrypts the data pointed by |payload| whose length is
+ * |payloadlen|, and writes plaintext data to the buffer pointed by
+ * |dest|. The buffer pointed by |aad| is the Additional
+ * Authenticated Data, and its length is |aadlen|. |pkt_num| is used
+ * to create a nonce. |ckm| is the cryptographic key, and iv to use.
+ * |decrypt| is a callback function which actually decrypts a packet.
+ *
+ * This function returns the number of bytes written in |dest| if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User callback failed.
+ * NGTCP2_ERR_DECRYPT
+ * Failed to decrypt a packet.
+ */
+static ngtcp2_ssize decrypt_pkt(uint8_t *dest, const ngtcp2_crypto_aead *aead,
+ const uint8_t *payload, size_t payloadlen,
+ const uint8_t *aad, size_t aadlen,
+ int64_t pkt_num, ngtcp2_crypto_km *ckm,
+ ngtcp2_decrypt decrypt) {
+ /* TODO nonce is limited to 64 bytes. */
+ uint8_t nonce[64];
+ int rv;
+
+ assert(sizeof(nonce) >= ckm->iv.len);
+
+ ngtcp2_crypto_create_nonce(nonce, ckm->iv.base, ckm->iv.len, pkt_num);
+
+ rv = decrypt(dest, aead, &ckm->aead_ctx, payload, payloadlen, nonce,
+ ckm->iv.len, aad, aadlen);
+
+ if (rv != 0) {
+ if (rv == NGTCP2_ERR_DECRYPT) {
+ return rv;
+ }
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ assert(payloadlen >= aead->max_overhead);
+
+ return (ngtcp2_ssize)(payloadlen - aead->max_overhead);
+}
+
+/*
+ * decrypt_hp decryptes packet header. The packet number starts at
+ * |pkt| + |pkt_num_offset|. The entire plaintext QUIC packet header
+ * will be written to the buffer pointed by |dest| whose capacity is
+ * |destlen|.
+ *
+ * This function returns the number of bytes written to |dest|, or one
+ * of the following negative error codes:
+ *
+ * NGTCP2_ERR_PROTO
+ * Packet is badly formatted
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed; or it does not return
+ * expected result.
+ */
+static ngtcp2_ssize
+decrypt_hp(ngtcp2_pkt_hd *hd, uint8_t *dest, const ngtcp2_crypto_cipher *hp,
+ const uint8_t *pkt, size_t pktlen, size_t pkt_num_offset,
+ const ngtcp2_crypto_cipher_ctx *hp_ctx, ngtcp2_hp_mask hp_mask) {
+ size_t sample_offset;
+ uint8_t *p = dest;
+ uint8_t mask[NGTCP2_HP_SAMPLELEN];
+ size_t i;
+ int rv;
+
+ assert(hp_mask);
+
+ if (pkt_num_offset + 4 + NGTCP2_HP_SAMPLELEN > pktlen) {
+ return NGTCP2_ERR_PROTO;
+ }
+
+ p = ngtcp2_cpymem(p, pkt, pkt_num_offset);
+
+ sample_offset = pkt_num_offset + 4;
+
+ rv = hp_mask(mask, hp, hp_ctx, pkt + sample_offset);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ if (hd->flags & NGTCP2_PKT_FLAG_LONG_FORM) {
+ dest[0] = (uint8_t)(dest[0] ^ (mask[0] & 0x0f));
+ } else {
+ dest[0] = (uint8_t)(dest[0] ^ (mask[0] & 0x1f));
+ if (dest[0] & NGTCP2_SHORT_KEY_PHASE_BIT) {
+ hd->flags |= NGTCP2_PKT_FLAG_KEY_PHASE;
+ }
+ }
+
+ hd->pkt_numlen = (size_t)((dest[0] & NGTCP2_PKT_NUMLEN_MASK) + 1);
+
+ for (i = 0; i < hd->pkt_numlen; ++i) {
+ *p++ = *(pkt + pkt_num_offset + i) ^ mask[i + 1];
+ }
+
+ hd->pkt_num = ngtcp2_get_pkt_num(p - hd->pkt_numlen, hd->pkt_numlen);
+
+ return p - dest;
+}
+
+/*
+ * conn_emit_pending_crypto_data delivers pending stream data to the
+ * application due to packet reordering.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User callback failed
+ * NGTCP2_ERR_CRYPTO
+ * TLS backend reported error
+ */
+static int conn_emit_pending_crypto_data(ngtcp2_conn *conn,
+ ngtcp2_crypto_level crypto_level,
+ ngtcp2_strm *strm,
+ uint64_t rx_offset) {
+ size_t datalen;
+ const uint8_t *data;
+ int rv;
+ uint64_t offset;
+
+ if (!strm->rx.rob) {
+ return 0;
+ }
+
+ for (;;) {
+ datalen = ngtcp2_rob_data_at(strm->rx.rob, &data, rx_offset);
+ if (datalen == 0) {
+ assert(rx_offset == ngtcp2_strm_rx_offset(strm));
+ return 0;
+ }
+
+ offset = rx_offset;
+ rx_offset += datalen;
+
+ rv = conn_call_recv_crypto_data(conn, crypto_level, offset, data, datalen);
+ if (rv != 0) {
+ return rv;
+ }
+
+ ngtcp2_rob_pop(strm->rx.rob, rx_offset - datalen, datalen);
+ }
+}
+
+/*
+ * conn_recv_connection_close is called when CONNECTION_CLOSE or
+ * APPLICATION_CLOSE frame is received.
+ */
+static int conn_recv_connection_close(ngtcp2_conn *conn,
+ ngtcp2_connection_close *fr) {
+ ngtcp2_connection_close_error *ccerr = &conn->rx.ccerr;
+
+ conn->state = NGTCP2_CS_DRAINING;
+ if (fr->type == NGTCP2_FRAME_CONNECTION_CLOSE) {
+ ccerr->type = NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT;
+ } else {
+ ccerr->type = NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_APPLICATION;
+ }
+ ccerr->error_code = fr->error_code;
+ ccerr->frame_type = fr->frame_type;
+
+ if (!fr->reasonlen) {
+ ccerr->reasonlen = 0;
+
+ return 0;
+ }
+
+ if (ccerr->reason == NULL) {
+ ccerr->reason = ngtcp2_mem_malloc(
+ conn->mem, NGTCP2_CONNECTION_CLOSE_ERROR_MAX_REASONLEN);
+ if (ccerr->reason == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+ }
+
+ ccerr->reasonlen =
+ ngtcp2_min(fr->reasonlen, NGTCP2_CONNECTION_CLOSE_ERROR_MAX_REASONLEN);
+ ngtcp2_cpymem((uint8_t *)ccerr->reason, fr->reason, ccerr->reasonlen);
+
+ return 0;
+}
+
+static void conn_recv_path_challenge(ngtcp2_conn *conn, const ngtcp2_path *path,
+ ngtcp2_path_challenge *fr) {
+ ngtcp2_path_challenge_entry *ent;
+
+ /* client only responds to PATH_CHALLENGE from the current path or
+ path which client is migrating to. */
+ if (!conn->server && !ngtcp2_path_eq(&conn->dcid.current.ps.path, path) &&
+ (!conn->pv || !ngtcp2_path_eq(&conn->pv->dcid.ps.path, path))) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "discard PATH_CHALLENGE from the path which is not current "
+ "or endpoint is migrating to");
+ return;
+ }
+
+ ent = ngtcp2_ringbuf_push_front(&conn->rx.path_challenge.rb);
+ ngtcp2_path_challenge_entry_init(ent, path, fr->data);
+}
+
+/*
+ * conn_reset_congestion_state resets congestion state.
+ */
+static void conn_reset_congestion_state(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ conn_reset_conn_stat_cc(conn, &conn->cstat);
+
+ conn->cc.reset(&conn->cc, &conn->cstat, ts);
+
+ if (conn->hs_pktns) {
+ ngtcp2_rtb_reset_cc_state(&conn->hs_pktns->rtb,
+ conn->hs_pktns->tx.last_pkt_num + 1);
+ }
+ ngtcp2_rtb_reset_cc_state(&conn->pktns.rtb, conn->pktns.tx.last_pkt_num + 1);
+ ngtcp2_rst_init(&conn->rst);
+
+ conn->tx.pacing.next_ts = UINT64_MAX;
+}
+
+static int conn_recv_path_response(ngtcp2_conn *conn, ngtcp2_path_response *fr,
+ ngtcp2_tstamp ts) {
+ int rv;
+ ngtcp2_duration pto, timeout;
+ ngtcp2_pv *pv = conn->pv, *npv;
+ uint8_t ent_flags;
+
+ if (!pv) {
+ return 0;
+ }
+
+ rv = ngtcp2_pv_validate(pv, &ent_flags, fr->data);
+ if (rv != 0) {
+ assert(!ngtcp2_err_is_fatal(rv));
+
+ return 0;
+ }
+
+ if (!(pv->flags & NGTCP2_PV_FLAG_DONT_CARE)) {
+ if (!(pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE)) {
+ if (pv->dcid.seq != conn->dcid.current.seq) {
+ assert(conn->dcid.current.cid.datalen);
+
+ rv = conn_retire_dcid(conn, &conn->dcid.current, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ ngtcp2_dcid_copy(&conn->dcid.current, &pv->dcid);
+ }
+ conn_reset_congestion_state(conn, ts);
+ conn_reset_ecn_validation_state(conn);
+ }
+
+ if (ngtcp2_path_eq(&pv->dcid.ps.path, &conn->dcid.current.ps.path)) {
+ conn->dcid.current.flags |= NGTCP2_DCID_FLAG_PATH_VALIDATED;
+
+ if (!conn->local.settings.no_pmtud) {
+ ngtcp2_conn_stop_pmtud(conn);
+
+ if (!(pv->flags & NGTCP2_PV_ENTRY_FLAG_UNDERSIZED)) {
+ rv = conn_start_pmtud(conn);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+ }
+ }
+
+ rv = conn_call_path_validation(conn, pv,
+ NGTCP2_PATH_VALIDATION_RESULT_SUCCESS);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ if (pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) {
+ pto = conn_compute_pto(conn, &conn->pktns);
+ timeout = 3 * ngtcp2_max(pto, pv->fallback_pto);
+
+ if (ent_flags & NGTCP2_PV_ENTRY_FLAG_UNDERSIZED) {
+ assert(conn->server);
+
+ /* Validate path again */
+ rv = ngtcp2_pv_new(&npv, &pv->dcid, timeout,
+ NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE |
+ NGTCP2_PV_FLAG_MTU_PROBE,
+ &conn->log, conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ npv->dcid.flags |= NGTCP2_DCID_FLAG_PATH_VALIDATED;
+ ngtcp2_dcid_copy(&npv->fallback_dcid, &pv->fallback_dcid);
+ npv->fallback_pto = pv->fallback_pto;
+ } else {
+ rv = ngtcp2_pv_new(&npv, &pv->fallback_dcid, timeout,
+ NGTCP2_PV_FLAG_DONT_CARE, &conn->log, conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ /* Unset the flag bit so that conn_stop_pv does not retire
+ DCID. */
+ pv->flags &= (uint8_t)~NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE;
+
+ rv = conn_stop_pv(conn, ts);
+ if (rv != 0) {
+ ngtcp2_pv_del(npv);
+ return rv;
+ }
+
+ conn->pv = npv;
+
+ return 0;
+ }
+
+ return conn_stop_pv(conn, ts);
+}
+
+/*
+ * pkt_num_bits returns the number of bits available when packet
+ * number is encoded in |pkt_numlen| bytes.
+ */
+static size_t pkt_num_bits(size_t pkt_numlen) {
+ switch (pkt_numlen) {
+ case 1:
+ return 8;
+ case 2:
+ return 16;
+ case 3:
+ return 24;
+ case 4:
+ return 32;
+ default:
+ ngtcp2_unreachable();
+ }
+}
+
+/*
+ * pktns_pkt_num_is_duplicate returns nonzero if |pkt_num| is
+ * duplicated packet number.
+ */
+static int pktns_pkt_num_is_duplicate(ngtcp2_pktns *pktns, int64_t pkt_num) {
+ return ngtcp2_gaptr_is_pushed(&pktns->rx.pngap, (uint64_t)pkt_num, 1);
+}
+
+/*
+ * pktns_commit_recv_pkt_num marks packet number |pkt_num| as
+ * received.
+ */
+static int pktns_commit_recv_pkt_num(ngtcp2_pktns *pktns, int64_t pkt_num,
+ int ack_eliciting, ngtcp2_tstamp ts) {
+ int rv;
+
+ if (ack_eliciting && pktns->rx.max_ack_eliciting_pkt_num + 1 != pkt_num) {
+ ngtcp2_acktr_immediate_ack(&pktns->acktr);
+ }
+ if (pktns->rx.max_pkt_num < pkt_num) {
+ pktns->rx.max_pkt_num = pkt_num;
+ pktns->rx.max_pkt_ts = ts;
+ }
+ if (ack_eliciting && pktns->rx.max_ack_eliciting_pkt_num < pkt_num) {
+ pktns->rx.max_ack_eliciting_pkt_num = pkt_num;
+ }
+
+ rv = ngtcp2_gaptr_push(&pktns->rx.pngap, (uint64_t)pkt_num, 1);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (ngtcp2_ksl_len(&pktns->rx.pngap.gap) > 256) {
+ ngtcp2_gaptr_drop_first_gap(&pktns->rx.pngap);
+ }
+
+ return 0;
+}
+
+/*
+ * verify_token verifies |hd| contains |token| in its token field. It
+ * returns 0 if it succeeds, or NGTCP2_ERR_PROTO.
+ */
+static int verify_token(const uint8_t *token, size_t tokenlen,
+ const ngtcp2_pkt_hd *hd) {
+ if (tokenlen == hd->tokenlen && ngtcp2_cmemeq(token, hd->token, tokenlen)) {
+ return 0;
+ }
+ return NGTCP2_ERR_PROTO;
+}
+
+static void pktns_increase_ecn_counts(ngtcp2_pktns *pktns,
+ const ngtcp2_pkt_info *pi) {
+ switch (pi->ecn & NGTCP2_ECN_MASK) {
+ case NGTCP2_ECN_ECT_0:
+ ++pktns->rx.ecn.ect0;
+ break;
+ case NGTCP2_ECN_ECT_1:
+ ++pktns->rx.ecn.ect1;
+ break;
+ case NGTCP2_ECN_CE:
+ ++pktns->rx.ecn.ce;
+ break;
+ }
+}
+
+/*
+ * vneg_available_versions_includes returns nonzero if
+ * |available_versions| of length |available_versionslen| includes
+ * |version|. |available_versions| is the wire image of
+ * available_versions field of version_information transport
+ * parameter, and each version is encoded in network byte order.
+ */
+static int vneg_available_versions_includes(const uint8_t *available_versions,
+ size_t available_versionslen,
+ uint32_t version) {
+ size_t i;
+ uint32_t v;
+
+ assert(!(available_versionslen & 0x3));
+
+ if (available_versionslen == 0) {
+ return 0;
+ }
+
+ for (i = 0; i < available_versionslen; i += sizeof(uint32_t)) {
+ available_versions = ngtcp2_get_uint32(&v, available_versions);
+
+ if (version == v) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int conn_recv_crypto(ngtcp2_conn *conn, ngtcp2_crypto_level crypto_level,
+ ngtcp2_strm *strm, const ngtcp2_crypto *fr);
+
+static ngtcp2_ssize conn_recv_pkt(ngtcp2_conn *conn, const ngtcp2_path *path,
+ const ngtcp2_pkt_info *pi, const uint8_t *pkt,
+ size_t pktlen, size_t dgramlen,
+ ngtcp2_tstamp pkt_ts, ngtcp2_tstamp ts);
+
+static int conn_process_buffered_protected_pkt(ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns,
+ ngtcp2_tstamp ts);
+
+/*
+ * conn_recv_handshake_pkt processes received packet |pkt| whose
+ * length is |pktlen| during handshake period. The buffer pointed by
+ * |pkt| might contain multiple packets. This function only processes
+ * one packet. |pkt_ts| is the timestamp when packet is received.
+ * |ts| should be the current time. Usually they are the same, but
+ * for buffered packets, |pkt_ts| would be earlier than |ts|.
+ *
+ * This function returns the number of bytes it reads if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_RECV_VERSION_NEGOTIATION
+ * Version Negotiation packet is received.
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ * NGTCP2_ERR_DISCARD_PKT
+ * Packet was discarded because plain text header was malformed;
+ * or its payload could not be decrypted.
+ * NGTCP2_ERR_FRAME_FORMAT
+ * Frame is badly formatted
+ * NGTCP2_ERR_ACK_FRAME
+ * ACK frame is malformed.
+ * NGTCP2_ERR_CRYPTO
+ * TLS stack reported error.
+ * NGTCP2_ERR_PROTO
+ * Generic QUIC protocol error.
+ *
+ * In addition to the above error codes, error codes returned from
+ * conn_recv_pkt are also returned.
+ */
+static ngtcp2_ssize
+conn_recv_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_path *path,
+ const ngtcp2_pkt_info *pi, const uint8_t *pkt,
+ size_t pktlen, size_t dgramlen, ngtcp2_tstamp pkt_ts,
+ ngtcp2_tstamp ts) {
+ ngtcp2_ssize nread;
+ ngtcp2_pkt_hd hd;
+ ngtcp2_max_frame mfr;
+ ngtcp2_frame *fr = &mfr.fr;
+ int rv;
+ int require_ack = 0;
+ size_t hdpktlen;
+ const uint8_t *payload;
+ size_t payloadlen;
+ ngtcp2_ssize nwrite;
+ ngtcp2_crypto_aead *aead;
+ ngtcp2_crypto_cipher *hp;
+ ngtcp2_crypto_km *ckm;
+ ngtcp2_crypto_cipher_ctx *hp_ctx;
+ ngtcp2_hp_mask hp_mask;
+ ngtcp2_decrypt decrypt;
+ ngtcp2_pktns *pktns;
+ ngtcp2_strm *crypto;
+ ngtcp2_crypto_level crypto_level;
+ int invalid_reserved_bits = 0;
+
+ if (pktlen == 0) {
+ return 0;
+ }
+
+ if (!(pkt[0] & NGTCP2_HEADER_FORM_BIT)) {
+ if (conn->state == NGTCP2_CS_SERVER_INITIAL) {
+ /* Ignore 1RTT packet unless server's first Handshake packet has
+ been transmitted. */
+ return (ngtcp2_ssize)pktlen;
+ }
+
+ if (conn->pktns.crypto.rx.ckm) {
+ return 0;
+ }
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "buffering 1RTT packet len=%zu", pktlen);
+
+ rv = conn_buffer_pkt(conn, &conn->pktns, path, pi, pkt, pktlen, dgramlen,
+ ts);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+ return (ngtcp2_ssize)pktlen;
+ }
+
+ nread = ngtcp2_pkt_decode_hd_long(&hd, pkt, pktlen);
+ if (nread < 0) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ if (hd.type == NGTCP2_PKT_VERSION_NEGOTIATION) {
+ hdpktlen = (size_t)nread;
+
+ ngtcp2_log_rx_pkt_hd(&conn->log, &hd);
+
+ if (conn->server) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ /* Receiving Version Negotiation packet after getting Handshake
+ packet from server is invalid. */
+ if (conn->flags & NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ if (!ngtcp2_cid_eq(&conn->oscid, &hd.dcid)) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet was ignored because of mismatched DCID");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ if (!ngtcp2_cid_eq(&conn->dcid.current.cid, &hd.scid)) {
+ /* Just discard invalid Version Negotiation packet */
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet was ignored because of mismatched SCID");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+ rv = conn_on_version_negotiation(conn, &hd, pkt + hdpktlen,
+ pktlen - hdpktlen);
+ if (rv != 0) {
+ if (ngtcp2_err_is_fatal(rv)) {
+ return rv;
+ }
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+ return NGTCP2_ERR_RECV_VERSION_NEGOTIATION;
+ } else if (hd.type == NGTCP2_PKT_RETRY) {
+ hdpktlen = (size_t)nread;
+
+ ngtcp2_log_rx_pkt_hd(&conn->log, &hd);
+
+ if (conn->server) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ /* Receiving Retry packet after getting Initial packet from server
+ is invalid. */
+ if (conn->flags & NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ if (conn->client_chosen_version != hd.version) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ rv = conn_on_retry(conn, &hd, hdpktlen, pkt, pktlen, ts);
+ if (rv != 0) {
+ if (ngtcp2_err_is_fatal(rv)) {
+ return rv;
+ }
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+ return (ngtcp2_ssize)pktlen;
+ }
+
+ if (pktlen < (size_t)nread + hd.len) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ pktlen = (size_t)nread + hd.len;
+
+ if (!ngtcp2_is_supported_version(hd.version)) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ if (conn->server) {
+ if (hd.version != conn->client_chosen_version &&
+ (!conn->negotiated_version || hd.version != conn->negotiated_version)) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+ } else if (hd.version != conn->client_chosen_version &&
+ conn->negotiated_version &&
+ hd.version != conn->negotiated_version) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ /* Quoted from spec: if subsequent packets of those types include a
+ different Source Connection ID, they MUST be discarded. */
+ if ((conn->flags & NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED) &&
+ !ngtcp2_cid_eq(&conn->dcid.current.cid, &hd.scid)) {
+ ngtcp2_log_rx_pkt_hd(&conn->log, &hd);
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet was ignored because of mismatched SCID");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ switch (hd.type) {
+ case NGTCP2_PKT_0RTT:
+ if (!conn->server) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ if (hd.version != conn->client_chosen_version) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ if (conn->flags & NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED) {
+ if (conn->early.ckm) {
+ ngtcp2_ssize nread2;
+ /* TODO Avoid to parse header twice. */
+ nread2 =
+ conn_recv_pkt(conn, path, pi, pkt, pktlen, dgramlen, pkt_ts, ts);
+ if (nread2 < 0) {
+ return nread2;
+ }
+ }
+
+ /* Discard 0-RTT packet if we don't have a key to decrypt it. */
+ return (ngtcp2_ssize)pktlen;
+ }
+
+ /* Buffer re-ordered 0-RTT packet. */
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "buffering 0-RTT packet len=%zu", pktlen);
+
+ rv = conn_buffer_pkt(conn, conn->in_pktns, path, pi, pkt, pktlen, dgramlen,
+ ts);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+
+ return (ngtcp2_ssize)pktlen;
+ case NGTCP2_PKT_INITIAL:
+ if (!conn->in_pktns) {
+ ngtcp2_log_info(
+ &conn->log, NGTCP2_LOG_EVENT_PKT,
+ "Initial packet is discarded because keys have been discarded");
+ return (ngtcp2_ssize)pktlen;
+ }
+
+ assert(conn->in_pktns);
+
+ if (conn->server) {
+ if (dgramlen < NGTCP2_MAX_UDP_PAYLOAD_SIZE) {
+ ngtcp2_log_info(
+ &conn->log, NGTCP2_LOG_EVENT_PKT,
+ "Initial packet was ignored because it is included in UDP datagram "
+ "less than %zu bytes: %zu bytes",
+ NGTCP2_MAX_UDP_PAYLOAD_SIZE, dgramlen);
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+ if (conn->local.settings.tokenlen) {
+ rv = verify_token(conn->local.settings.token,
+ conn->local.settings.tokenlen, &hd);
+ if (rv != 0) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet was ignored because token is invalid");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+ }
+ if ((conn->flags & NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED) == 0) {
+ /* Set rcid here so that it is available to callback. If this
+ packet is discarded later in this function and no packet is
+ processed in this connection attempt so far, connection
+ will be dropped. */
+ conn->rcid = hd.dcid;
+
+ rv = conn_call_recv_client_initial(conn, &hd.dcid);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+ } else {
+ if (hd.tokenlen != 0) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet was ignored because token is not empty");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ if (hd.version != conn->client_chosen_version &&
+ !conn->negotiated_version && conn->vneg.version != hd.version) {
+ if (!vneg_available_versions_includes(conn->vneg.available_versions,
+ conn->vneg.available_versionslen,
+ hd.version)) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ /* Install new Initial keys using QUIC version = hd.version */
+ rv = conn_call_version_negotiation(
+ conn, hd.version,
+ (conn->flags & NGTCP2_CONN_FLAG_RECV_RETRY)
+ ? &conn->dcid.current.cid
+ : &conn->rcid);
+ if (rv != 0) {
+ return rv;
+ }
+
+ assert(conn->vneg.version == hd.version);
+ }
+ }
+
+ pktns = conn->in_pktns;
+ crypto = &pktns->crypto.strm;
+ crypto_level = NGTCP2_CRYPTO_LEVEL_INITIAL;
+
+ if (hd.version == conn->client_chosen_version) {
+ ckm = pktns->crypto.rx.ckm;
+ hp_ctx = &pktns->crypto.rx.hp_ctx;
+ } else {
+ assert(conn->vneg.version == hd.version);
+
+ ckm = conn->vneg.rx.ckm;
+ hp_ctx = &conn->vneg.rx.hp_ctx;
+ }
+
+ break;
+ case NGTCP2_PKT_HANDSHAKE:
+ if (hd.version != conn->negotiated_version) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ if (!conn->hs_pktns->crypto.rx.ckm) {
+ if (conn->server) {
+ ngtcp2_log_info(
+ &conn->log, NGTCP2_LOG_EVENT_PKT,
+ "Handshake packet at this point is unexpected and discarded");
+ return (ngtcp2_ssize)pktlen;
+ }
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "buffering Handshake packet len=%zu", pktlen);
+
+ rv = conn_buffer_pkt(conn, conn->hs_pktns, path, pi, pkt, pktlen,
+ dgramlen, ts);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+ return (ngtcp2_ssize)pktlen;
+ }
+
+ pktns = conn->hs_pktns;
+ crypto = &pktns->crypto.strm;
+ crypto_level = NGTCP2_CRYPTO_LEVEL_HANDSHAKE;
+ ckm = pktns->crypto.rx.ckm;
+ hp_ctx = &pktns->crypto.rx.hp_ctx;
+
+ break;
+ default:
+ /* unknown packet type */
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet was ignored because of unknown packet type");
+ return (ngtcp2_ssize)pktlen;
+ }
+
+ hp_mask = conn->callbacks.hp_mask;
+ decrypt = conn->callbacks.decrypt;
+ aead = &pktns->crypto.ctx.aead;
+ hp = &pktns->crypto.ctx.hp;
+
+ assert(ckm);
+ assert(hp_mask);
+ assert(decrypt);
+
+ rv = conn_ensure_decrypt_hp_buffer(conn, (size_t)nread + 4);
+ if (rv != 0) {
+ return rv;
+ }
+
+ nwrite = decrypt_hp(&hd, conn->crypto.decrypt_hp_buf.base, hp, pkt, pktlen,
+ (size_t)nread, hp_ctx, hp_mask);
+ if (nwrite < 0) {
+ if (ngtcp2_err_is_fatal((int)nwrite)) {
+ return nwrite;
+ }
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "could not decrypt packet number");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ hdpktlen = (size_t)nwrite;
+ payload = pkt + hdpktlen;
+ payloadlen = hd.len - hd.pkt_numlen;
+
+ hd.pkt_num = ngtcp2_pkt_adjust_pkt_num(pktns->rx.max_pkt_num, hd.pkt_num,
+ pkt_num_bits(hd.pkt_numlen));
+ if (hd.pkt_num > NGTCP2_MAX_PKT_NUM) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "pkn=%" PRId64 " is greater than maximum pkn", hd.pkt_num);
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ ngtcp2_log_rx_pkt_hd(&conn->log, &hd);
+
+ rv = ngtcp2_pkt_verify_reserved_bits(conn->crypto.decrypt_hp_buf.base[0]);
+ if (rv != 0) {
+ invalid_reserved_bits = 1;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet has incorrect reserved bits");
+
+ /* Will return error after decrypting payload */
+ }
+
+ if (pktns_pkt_num_is_duplicate(pktns, hd.pkt_num)) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet was discarded because of duplicated packet number");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ rv = conn_ensure_decrypt_buffer(conn, payloadlen);
+ if (rv != 0) {
+ return rv;
+ }
+
+ nwrite = decrypt_pkt(conn->crypto.decrypt_buf.base, aead, payload, payloadlen,
+ conn->crypto.decrypt_hp_buf.base, hdpktlen, hd.pkt_num,
+ ckm, decrypt);
+ if (nwrite < 0) {
+ if (ngtcp2_err_is_fatal((int)nwrite)) {
+ return nwrite;
+ }
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "could not decrypt packet payload");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ if (invalid_reserved_bits) {
+ return NGTCP2_ERR_PROTO;
+ }
+
+ if (!conn->server && hd.version != conn->client_chosen_version &&
+ !conn->negotiated_version) {
+ conn->negotiated_version = hd.version;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "the negotiated version is 0x%08x",
+ conn->negotiated_version);
+ }
+
+ payload = conn->crypto.decrypt_buf.base;
+ payloadlen = (size_t)nwrite;
+
+ switch (hd.type) {
+ case NGTCP2_PKT_INITIAL:
+ if (!conn->server || ((conn->flags & NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED) &&
+ !ngtcp2_cid_eq(&conn->rcid, &hd.dcid))) {
+ rv = conn_verify_dcid(conn, NULL, &hd);
+ if (rv != 0) {
+ if (ngtcp2_err_is_fatal(rv)) {
+ return rv;
+ }
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet was ignored because of mismatched DCID");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+ }
+ break;
+ case NGTCP2_PKT_HANDSHAKE:
+ rv = conn_verify_dcid(conn, NULL, &hd);
+ if (rv != 0) {
+ if (ngtcp2_err_is_fatal(rv)) {
+ return rv;
+ }
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet was ignored because of mismatched DCID");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+ break;
+ default:
+ ngtcp2_unreachable();
+ }
+
+ if (payloadlen == 0) {
+ /* QUIC packet must contain at least one frame */
+ if (hd.type == NGTCP2_PKT_INITIAL) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+ return NGTCP2_ERR_PROTO;
+ }
+
+ if (hd.type == NGTCP2_PKT_INITIAL &&
+ !(conn->flags & NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED)) {
+ conn->flags |= NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED;
+ if (!conn->server) {
+ conn->dcid.current.cid = hd.scid;
+ }
+ }
+
+ ngtcp2_qlog_pkt_received_start(&conn->qlog);
+
+ for (; payloadlen;) {
+ nread = ngtcp2_pkt_decode_frame(fr, payload, payloadlen);
+ if (nread < 0) {
+ return nread;
+ }
+
+ payload += nread;
+ payloadlen -= (size_t)nread;
+
+ switch (fr->type) {
+ case NGTCP2_FRAME_ACK:
+ case NGTCP2_FRAME_ACK_ECN:
+ fr->ack.ack_delay = 0;
+ fr->ack.ack_delay_unscaled = 0;
+ break;
+ }
+
+ ngtcp2_log_rx_fr(&conn->log, &hd, fr);
+
+ switch (fr->type) {
+ case NGTCP2_FRAME_ACK:
+ case NGTCP2_FRAME_ACK_ECN:
+ if (!conn->server && hd.type == NGTCP2_PKT_HANDSHAKE) {
+ conn->flags |= NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED;
+ }
+ rv = conn_recv_ack(conn, pktns, &fr->ack, pkt_ts, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ break;
+ case NGTCP2_FRAME_PADDING:
+ break;
+ case NGTCP2_FRAME_CRYPTO:
+ if (!conn->server && !conn->negotiated_version &&
+ ngtcp2_vec_len(fr->crypto.data, fr->crypto.datacnt)) {
+ conn->negotiated_version = hd.version;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "the negotiated version is 0x%08x",
+ conn->negotiated_version);
+ }
+
+ rv = conn_recv_crypto(conn, crypto_level, crypto, &fr->crypto);
+ if (rv != 0) {
+ return rv;
+ }
+ require_ack = 1;
+ break;
+ case NGTCP2_FRAME_CONNECTION_CLOSE:
+ rv = conn_recv_connection_close(conn, &fr->connection_close);
+ if (rv != 0) {
+ return rv;
+ }
+ break;
+ case NGTCP2_FRAME_PING:
+ require_ack = 1;
+ break;
+ default:
+ return NGTCP2_ERR_PROTO;
+ }
+
+ ngtcp2_qlog_write_frame(&conn->qlog, fr);
+ }
+
+ if (hd.type == NGTCP2_PKT_HANDSHAKE) {
+ /* Successful processing of Handshake packet from a remote
+ endpoint validates its source address. */
+ conn->dcid.current.flags |= NGTCP2_DCID_FLAG_PATH_VALIDATED;
+ }
+
+ ngtcp2_qlog_pkt_received_end(&conn->qlog, &hd, pktlen);
+
+ rv = pktns_commit_recv_pkt_num(pktns, hd.pkt_num, require_ack, pkt_ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ pktns_increase_ecn_counts(pktns, pi);
+
+ /* TODO Initial and Handshake are always acknowledged without
+ delay. */
+ if (require_ack &&
+ (++pktns->acktr.rx_npkt >= conn->local.settings.ack_thresh ||
+ (pi->ecn & NGTCP2_ECN_MASK) == NGTCP2_ECN_CE)) {
+ ngtcp2_acktr_immediate_ack(&pktns->acktr);
+ }
+
+ rv = ngtcp2_conn_sched_ack(conn, &pktns->acktr, hd.pkt_num, require_ack,
+ pkt_ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ conn_restart_timer_on_read(conn, ts);
+
+ ngtcp2_qlog_metrics_updated(&conn->qlog, &conn->cstat);
+
+ return conn->state == NGTCP2_CS_DRAINING ? NGTCP2_ERR_DRAINING
+ : (ngtcp2_ssize)pktlen;
+}
+
+static int is_unrecoverable_error(int liberr) {
+ switch (liberr) {
+ case NGTCP2_ERR_CRYPTO:
+ case NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM:
+ case NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM:
+ case NGTCP2_ERR_TRANSPORT_PARAM:
+ case NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE:
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * conn_recv_handshake_cpkt processes compound packet during
+ * handshake. The buffer pointed by |pkt| might contain multiple
+ * packets. The 1RTT packet must be the last one because it does not
+ * have payload length field.
+ *
+ * This function returns the same error code returned by
+ * conn_recv_handshake_pkt.
+ */
+static ngtcp2_ssize conn_recv_handshake_cpkt(ngtcp2_conn *conn,
+ const ngtcp2_path *path,
+ const ngtcp2_pkt_info *pi,
+ const uint8_t *pkt, size_t pktlen,
+ ngtcp2_tstamp ts) {
+ ngtcp2_ssize nread;
+ size_t dgramlen = pktlen;
+ const uint8_t *origpkt = pkt;
+ uint32_t version;
+
+ if (ngtcp2_path_eq(&conn->dcid.current.ps.path, path)) {
+ conn->dcid.current.bytes_recv += dgramlen;
+ }
+
+ while (pktlen) {
+ nread =
+ conn_recv_handshake_pkt(conn, path, pi, pkt, pktlen, dgramlen, ts, ts);
+ if (nread < 0) {
+ if (ngtcp2_err_is_fatal((int)nread)) {
+ return nread;
+ }
+
+ if (nread == NGTCP2_ERR_DRAINING) {
+ return NGTCP2_ERR_DRAINING;
+ }
+
+ if ((pkt[0] & NGTCP2_HEADER_FORM_BIT) && pktlen > 4) {
+ /* Not a Version Negotiation packet */
+ ngtcp2_get_uint32(&version, &pkt[1]);
+ if (ngtcp2_pkt_get_type_long(version, pkt[0]) == NGTCP2_PKT_INITIAL) {
+ if (conn->server) {
+ if (is_unrecoverable_error((int)nread)) {
+ /* If server gets crypto error from TLS stack, it is
+ unrecoverable, therefore drop connection. */
+ return nread;
+ }
+
+ /* If server discards first Initial, then drop connection
+ state. This is because SCID in packet might be corrupted
+ and the current connection state might wrongly discard
+ valid packet and prevent the handshake from
+ completing. */
+ if (conn->in_pktns && conn->in_pktns->rx.max_pkt_num == -1) {
+ return NGTCP2_ERR_DROP_CONN;
+ }
+
+ return (ngtcp2_ssize)dgramlen;
+ }
+ /* client */
+ if (is_unrecoverable_error((int)nread)) {
+ /* If client gets crypto error from TLS stack, it is
+ unrecoverable, therefore drop connection. */
+ return nread;
+ }
+ return (ngtcp2_ssize)dgramlen;
+ }
+ }
+
+ if (nread == NGTCP2_ERR_DISCARD_PKT) {
+ return (ngtcp2_ssize)dgramlen;
+ }
+
+ return nread;
+ }
+
+ if (nread == 0) {
+ assert(!(pkt[0] & NGTCP2_HEADER_FORM_BIT));
+ return pkt - origpkt;
+ }
+
+ assert(pktlen >= (size_t)nread);
+ pkt += nread;
+ pktlen -= (size_t)nread;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "read packet %td left %zu", nread, pktlen);
+ }
+
+ return (ngtcp2_ssize)dgramlen;
+}
+
+int ngtcp2_conn_init_stream(ngtcp2_conn *conn, ngtcp2_strm *strm,
+ int64_t stream_id, void *stream_user_data) {
+ int rv;
+ uint64_t max_rx_offset;
+ uint64_t max_tx_offset;
+ int local_stream = conn_local_stream(conn, stream_id);
+
+ assert(conn->remote.transport_params);
+
+ if (bidi_stream(stream_id)) {
+ if (local_stream) {
+ max_rx_offset =
+ conn->local.transport_params.initial_max_stream_data_bidi_local;
+ max_tx_offset =
+ conn->remote.transport_params->initial_max_stream_data_bidi_remote;
+ } else {
+ max_rx_offset =
+ conn->local.transport_params.initial_max_stream_data_bidi_remote;
+ max_tx_offset =
+ conn->remote.transport_params->initial_max_stream_data_bidi_local;
+ }
+ } else if (local_stream) {
+ max_rx_offset = 0;
+ max_tx_offset = conn->remote.transport_params->initial_max_stream_data_uni;
+ } else {
+ max_rx_offset = conn->local.transport_params.initial_max_stream_data_uni;
+ max_tx_offset = 0;
+ }
+
+ ngtcp2_strm_init(strm, stream_id, NGTCP2_STRM_FLAG_NONE, max_rx_offset,
+ max_tx_offset, stream_user_data, &conn->frc_objalloc,
+ conn->mem);
+
+ rv = ngtcp2_map_insert(&conn->strms, (ngtcp2_map_key_type)strm->stream_id,
+ strm);
+ if (rv != 0) {
+ assert(rv != NGTCP2_ERR_INVALID_ARGUMENT);
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ ngtcp2_strm_free(strm);
+ return rv;
+}
+
+/*
+ * conn_emit_pending_stream_data passes buffered ordered stream data
+ * to the application. |rx_offset| is the first offset to deliver to
+ * the application. This function assumes that the data up to
+ * |rx_offset| has been delivered already. This function only passes
+ * the ordered data without any gap. If there is a gap, it stops
+ * providing the data to the application, and returns.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User callback failed.
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int conn_emit_pending_stream_data(ngtcp2_conn *conn, ngtcp2_strm *strm,
+ uint64_t rx_offset) {
+ size_t datalen;
+ const uint8_t *data;
+ int rv;
+ uint64_t offset;
+ uint32_t sdflags;
+ int handshake_completed = conn_is_handshake_completed(conn);
+
+ if (!strm->rx.rob) {
+ return 0;
+ }
+
+ for (;;) {
+ /* Stop calling callback if application has called
+ ngtcp2_conn_shutdown_stream_read() inside the callback.
+ Because it doubly counts connection window. */
+ if (strm->flags & NGTCP2_STRM_FLAG_STOP_SENDING) {
+ return 0;
+ }
+
+ datalen = ngtcp2_rob_data_at(strm->rx.rob, &data, rx_offset);
+ if (datalen == 0) {
+ assert(rx_offset == ngtcp2_strm_rx_offset(strm));
+ return 0;
+ }
+
+ offset = rx_offset;
+ rx_offset += datalen;
+
+ sdflags = NGTCP2_STREAM_DATA_FLAG_NONE;
+ if ((strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) &&
+ rx_offset == strm->rx.last_offset) {
+ sdflags |= NGTCP2_STREAM_DATA_FLAG_FIN;
+ }
+ if (!handshake_completed) {
+ sdflags |= NGTCP2_STREAM_DATA_FLAG_EARLY;
+ }
+
+ rv = conn_call_recv_stream_data(conn, strm, sdflags, offset, data, datalen);
+ if (rv != 0) {
+ return rv;
+ }
+
+ ngtcp2_rob_pop(strm->rx.rob, rx_offset - datalen, datalen);
+ }
+}
+
+/*
+ * conn_recv_crypto is called when CRYPTO frame |fr| is received.
+ * |rx_offset_base| is the offset in the entire TLS handshake stream.
+ * fr->offset specifies the offset in each encryption level.
+ * |max_rx_offset| is, if it is nonzero, the maximum offset in the
+ * entire TLS handshake stream that |fr| can carry. |crypto_level| is
+ * the encryption level where this data is received.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_PROTO
+ * CRYPTO frame has invalid offset.
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CRYPTO
+ * TLS stack reported error.
+ * NGTCP2_ERR_FRAME_ENCODING
+ * The end offset exceeds the maximum value.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static int conn_recv_crypto(ngtcp2_conn *conn, ngtcp2_crypto_level crypto_level,
+ ngtcp2_strm *crypto, const ngtcp2_crypto *fr) {
+ uint64_t fr_end_offset;
+ uint64_t rx_offset;
+ int rv;
+
+ if (fr->datacnt == 0) {
+ return 0;
+ }
+
+ fr_end_offset = fr->offset + fr->data[0].len;
+
+ if (NGTCP2_MAX_VARINT < fr_end_offset) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ rx_offset = ngtcp2_strm_rx_offset(crypto);
+
+ if (fr_end_offset <= rx_offset) {
+ if (conn->server &&
+ !(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_EARLY_RETRANSMIT) &&
+ crypto_level == NGTCP2_CRYPTO_LEVEL_INITIAL) {
+ /* recovery draft: Speeding Up Handshake Completion
+
+ When a server receives an Initial packet containing duplicate
+ CRYPTO data, it can assume the client did not receive all of
+ the server's CRYPTO data sent in Initial packets, or the
+ client's estimated RTT is too small. ... To speed up
+ handshake completion under these conditions, an endpoint MAY
+ send a packet containing unacknowledged CRYPTO data earlier
+ than the PTO expiry, subject to address validation limits;
+ ... */
+ conn->flags |= NGTCP2_CONN_FLAG_HANDSHAKE_EARLY_RETRANSMIT;
+ conn->in_pktns->rtb.probe_pkt_left = 1;
+ conn->hs_pktns->rtb.probe_pkt_left = 1;
+ }
+ return 0;
+ }
+
+ crypto->rx.last_offset = ngtcp2_max(crypto->rx.last_offset, fr_end_offset);
+
+ /* TODO Before dispatching incoming data to TLS stack, make sure
+ that previous data in previous encryption level has been
+ completely sent to TLS stack. Usually, if data is left, it is an
+ error because key is generated after consuming all data in the
+ previous encryption level. */
+ if (fr->offset <= rx_offset) {
+ size_t ncut = (size_t)(rx_offset - fr->offset);
+ const uint8_t *data = fr->data[0].base + ncut;
+ size_t datalen = fr->data[0].len - ncut;
+ uint64_t offset = rx_offset;
+
+ rx_offset += datalen;
+ rv = ngtcp2_strm_update_rx_offset(crypto, rx_offset);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = conn_call_recv_crypto_data(conn, crypto_level, offset, data, datalen);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = conn_emit_pending_crypto_data(conn, crypto_level, crypto, rx_offset);
+ if (rv != 0) {
+ return rv;
+ }
+
+ return 0;
+ }
+
+ if (fr_end_offset - rx_offset > NGTCP2_MAX_REORDERED_CRYPTO_DATA) {
+ return NGTCP2_ERR_CRYPTO_BUFFER_EXCEEDED;
+ }
+
+ return ngtcp2_strm_recv_reordering(crypto, fr->data[0].base, fr->data[0].len,
+ fr->offset);
+}
+
+/*
+ * conn_max_data_violated returns nonzero if receiving |datalen|
+ * violates connection flow control on local endpoint.
+ */
+static int conn_max_data_violated(ngtcp2_conn *conn, uint64_t datalen) {
+ return conn->rx.max_offset - conn->rx.offset < datalen;
+}
+
+/*
+ * conn_recv_stream is called when STREAM frame |fr| is received.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_STREAM_STATE
+ * STREAM frame is received for a local stream which is not
+ * initiated; or STREAM frame is received for a local
+ * unidirectional stream
+ * NGTCP2_ERR_STREAM_LIMIT
+ * STREAM frame has remote stream ID which is strictly greater
+ * than the allowed limit.
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ * NGTCP2_ERR_FLOW_CONTROL
+ * Flow control limit is violated; or the end offset of stream
+ * data is beyond the NGTCP2_MAX_VARINT.
+ * NGTCP2_ERR_FINAL_SIZE
+ * STREAM frame has strictly larger end offset than it is
+ * permitted.
+ */
+static int conn_recv_stream(ngtcp2_conn *conn, const ngtcp2_stream *fr) {
+ int rv;
+ ngtcp2_strm *strm;
+ ngtcp2_idtr *idtr;
+ uint64_t rx_offset, fr_end_offset;
+ int local_stream;
+ int bidi;
+ uint64_t datalen = ngtcp2_vec_len(fr->data, fr->datacnt);
+ uint32_t sdflags = NGTCP2_STREAM_DATA_FLAG_NONE;
+
+ local_stream = conn_local_stream(conn, fr->stream_id);
+ bidi = bidi_stream(fr->stream_id);
+
+ if (bidi) {
+ if (local_stream) {
+ if (conn->local.bidi.next_stream_id <= fr->stream_id) {
+ return NGTCP2_ERR_STREAM_STATE;
+ }
+ } else if (conn->remote.bidi.max_streams <
+ ngtcp2_ord_stream_id(fr->stream_id)) {
+ return NGTCP2_ERR_STREAM_LIMIT;
+ }
+
+ idtr = &conn->remote.bidi.idtr;
+ } else {
+ if (local_stream) {
+ return NGTCP2_ERR_STREAM_STATE;
+ }
+ if (conn->remote.uni.max_streams < ngtcp2_ord_stream_id(fr->stream_id)) {
+ return NGTCP2_ERR_STREAM_LIMIT;
+ }
+
+ idtr = &conn->remote.uni.idtr;
+ }
+
+ if (NGTCP2_MAX_VARINT - datalen < fr->offset) {
+ return NGTCP2_ERR_FLOW_CONTROL;
+ }
+
+ strm = ngtcp2_conn_find_stream(conn, fr->stream_id);
+ if (strm == NULL) {
+ if (local_stream) {
+ /* TODO The stream has been closed. This should be responded
+ with RESET_STREAM, or simply ignored. */
+ return 0;
+ }
+
+ rv = ngtcp2_idtr_open(idtr, fr->stream_id);
+ if (rv != 0) {
+ if (ngtcp2_err_is_fatal(rv)) {
+ return rv;
+ }
+ assert(rv == NGTCP2_ERR_STREAM_IN_USE);
+ /* TODO The stream has been closed. This should be responded
+ with RESET_STREAM, or simply ignored. */
+ return 0;
+ }
+
+ strm = ngtcp2_objalloc_strm_get(&conn->strm_objalloc);
+ if (strm == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+ /* TODO Perhaps, call new_stream callback? */
+ rv = ngtcp2_conn_init_stream(conn, strm, fr->stream_id, NULL);
+ if (rv != 0) {
+ ngtcp2_objalloc_strm_release(&conn->strm_objalloc, strm);
+ return rv;
+ }
+
+ if (!bidi) {
+ ngtcp2_strm_shutdown(strm, NGTCP2_STRM_FLAG_SHUT_WR);
+ strm->flags |= NGTCP2_STRM_FLAG_FIN_ACKED;
+ }
+
+ rv = conn_call_stream_open(conn, strm);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ fr_end_offset = fr->offset + datalen;
+
+ if (strm->rx.max_offset < fr_end_offset) {
+ return NGTCP2_ERR_FLOW_CONTROL;
+ }
+
+ if (strm->rx.last_offset < fr_end_offset) {
+ uint64_t len = fr_end_offset - strm->rx.last_offset;
+
+ if (conn_max_data_violated(conn, len)) {
+ return NGTCP2_ERR_FLOW_CONTROL;
+ }
+
+ conn->rx.offset += len;
+
+ if (strm->flags & NGTCP2_STRM_FLAG_STOP_SENDING) {
+ ngtcp2_conn_extend_max_offset(conn, len);
+ }
+ }
+
+ rx_offset = ngtcp2_strm_rx_offset(strm);
+
+ if (fr->fin) {
+ if (strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) {
+ if (strm->rx.last_offset != fr_end_offset) {
+ return NGTCP2_ERR_FINAL_SIZE;
+ }
+
+ if (strm->flags & NGTCP2_STRM_FLAG_RECV_RST) {
+ return 0;
+ }
+
+ if (rx_offset == fr_end_offset) {
+ return 0;
+ }
+ } else if (strm->rx.last_offset > fr_end_offset) {
+ return NGTCP2_ERR_FINAL_SIZE;
+ } else {
+ strm->rx.last_offset = fr_end_offset;
+
+ ngtcp2_strm_shutdown(strm, NGTCP2_STRM_FLAG_SHUT_RD);
+ }
+ } else {
+ if ((strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) &&
+ strm->rx.last_offset < fr_end_offset) {
+ return NGTCP2_ERR_FINAL_SIZE;
+ }
+
+ strm->rx.last_offset = ngtcp2_max(strm->rx.last_offset, fr_end_offset);
+
+ if (fr_end_offset <= rx_offset) {
+ return 0;
+ }
+
+ if (strm->flags & NGTCP2_STRM_FLAG_RECV_RST) {
+ return 0;
+ }
+ }
+
+ if (fr->offset <= rx_offset) {
+ size_t ncut = (size_t)(rx_offset - fr->offset);
+ uint64_t offset = rx_offset;
+ const uint8_t *data;
+ int fin;
+
+ if (fr->datacnt) {
+ data = fr->data[0].base + ncut;
+ datalen -= ncut;
+
+ rx_offset += datalen;
+ rv = ngtcp2_strm_update_rx_offset(strm, rx_offset);
+ if (rv != 0) {
+ return rv;
+ }
+ } else {
+ data = NULL;
+ datalen = 0;
+ }
+
+ if (strm->flags & NGTCP2_STRM_FLAG_STOP_SENDING) {
+ return ngtcp2_conn_close_stream_if_shut_rdwr(conn, strm);
+ }
+
+ fin = (strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) &&
+ rx_offset == strm->rx.last_offset;
+
+ if (fin || datalen) {
+ if (fin) {
+ sdflags |= NGTCP2_STREAM_DATA_FLAG_FIN;
+ }
+ if (!conn_is_handshake_completed(conn)) {
+ sdflags |= NGTCP2_STREAM_DATA_FLAG_EARLY;
+ }
+ rv = conn_call_recv_stream_data(conn, strm, sdflags, offset, data,
+ (size_t)datalen);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = conn_emit_pending_stream_data(conn, strm, rx_offset);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+ } else if (fr->datacnt) {
+ rv = ngtcp2_strm_recv_reordering(strm, fr->data[0].base, fr->data[0].len,
+ fr->offset);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+ return ngtcp2_conn_close_stream_if_shut_rdwr(conn, strm);
+}
+
+/*
+ * conn_reset_stream adds RESET_STREAM frame to the transmission
+ * queue.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int conn_reset_stream(ngtcp2_conn *conn, ngtcp2_strm *strm,
+ uint64_t app_error_code) {
+ int rv;
+ ngtcp2_frame_chain *frc;
+ ngtcp2_pktns *pktns = &conn->pktns;
+
+ rv = ngtcp2_frame_chain_objalloc_new(&frc, &conn->frc_objalloc);
+ if (rv != 0) {
+ return rv;
+ }
+
+ frc->fr.type = NGTCP2_FRAME_RESET_STREAM;
+ frc->fr.reset_stream.stream_id = strm->stream_id;
+ frc->fr.reset_stream.app_error_code = app_error_code;
+ frc->fr.reset_stream.final_size = strm->tx.offset;
+
+ /* TODO This prepends RESET_STREAM to pktns->tx.frq. */
+ frc->next = pktns->tx.frq;
+ pktns->tx.frq = frc;
+
+ return 0;
+}
+
+/*
+ * conn_stop_sending adds STOP_SENDING frame to the transmission
+ * queue.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int conn_stop_sending(ngtcp2_conn *conn, ngtcp2_strm *strm,
+ uint64_t app_error_code) {
+ int rv;
+ ngtcp2_frame_chain *frc;
+ ngtcp2_pktns *pktns = &conn->pktns;
+
+ rv = ngtcp2_frame_chain_objalloc_new(&frc, &conn->frc_objalloc);
+ if (rv != 0) {
+ return rv;
+ }
+
+ frc->fr.type = NGTCP2_FRAME_STOP_SENDING;
+ frc->fr.stop_sending.stream_id = strm->stream_id;
+ frc->fr.stop_sending.app_error_code = app_error_code;
+
+ /* TODO This prepends STOP_SENDING to pktns->tx.frq. */
+ frc->next = pktns->tx.frq;
+ pktns->tx.frq = frc;
+
+ return 0;
+}
+
+/*
+ * handle_max_remote_streams_extension extends
+ * |*punsent_max_remote_streams| by |n| if a condition allows it.
+ */
+static void
+handle_max_remote_streams_extension(uint64_t *punsent_max_remote_streams,
+ size_t n) {
+ if (
+#if SIZE_MAX > UINT32_MAX
+ NGTCP2_MAX_STREAMS < n ||
+#endif /* SIZE_MAX > UINT32_MAX */
+ *punsent_max_remote_streams > (uint64_t)(NGTCP2_MAX_STREAMS - n)) {
+ *punsent_max_remote_streams = NGTCP2_MAX_STREAMS;
+ } else {
+ *punsent_max_remote_streams += n;
+ }
+}
+
+/*
+ * conn_recv_reset_stream is called when RESET_STREAM |fr| is
+ * received.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_STREAM_STATE
+ * RESET_STREAM frame is received to the local stream which is not
+ * initiated.
+ * NGTCP2_ERR_STREAM_LIMIT
+ * RESET_STREAM frame has remote stream ID which is strictly
+ * greater than the allowed limit.
+ * NGTCP2_ERR_PROTO
+ * RESET_STREAM frame is received to the local unidirectional
+ * stream
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ * NGTCP2_ERR_FLOW_CONTROL
+ * Flow control limit is violated; or the final size is beyond the
+ * NGTCP2_MAX_VARINT.
+ * NGTCP2_ERR_FINAL_SIZE
+ * The final offset is strictly larger than it is permitted.
+ */
+static int conn_recv_reset_stream(ngtcp2_conn *conn,
+ const ngtcp2_reset_stream *fr) {
+ ngtcp2_strm *strm;
+ int local_stream = conn_local_stream(conn, fr->stream_id);
+ int bidi = bidi_stream(fr->stream_id);
+ uint64_t datalen;
+ ngtcp2_idtr *idtr;
+ int rv;
+
+ /* TODO share this piece of code */
+ if (bidi) {
+ if (local_stream) {
+ if (conn->local.bidi.next_stream_id <= fr->stream_id) {
+ return NGTCP2_ERR_STREAM_STATE;
+ }
+ } else if (conn->remote.bidi.max_streams <
+ ngtcp2_ord_stream_id(fr->stream_id)) {
+ return NGTCP2_ERR_STREAM_LIMIT;
+ }
+
+ idtr = &conn->remote.bidi.idtr;
+ } else {
+ if (local_stream) {
+ return NGTCP2_ERR_PROTO;
+ }
+ if (conn->remote.uni.max_streams < ngtcp2_ord_stream_id(fr->stream_id)) {
+ return NGTCP2_ERR_STREAM_LIMIT;
+ }
+
+ idtr = &conn->remote.uni.idtr;
+ }
+
+ if (NGTCP2_MAX_VARINT < fr->final_size) {
+ return NGTCP2_ERR_FLOW_CONTROL;
+ }
+
+ strm = ngtcp2_conn_find_stream(conn, fr->stream_id);
+ if (strm == NULL) {
+ if (local_stream) {
+ return 0;
+ }
+
+ rv = ngtcp2_idtr_open(idtr, fr->stream_id);
+ if (rv != 0) {
+ if (ngtcp2_err_is_fatal(rv)) {
+ return rv;
+ }
+ assert(rv == NGTCP2_ERR_STREAM_IN_USE);
+ return 0;
+ }
+
+ if (conn_initial_stream_rx_offset(conn, fr->stream_id) < fr->final_size ||
+ conn_max_data_violated(conn, fr->final_size)) {
+ return NGTCP2_ERR_FLOW_CONTROL;
+ }
+
+ /* Stream is reset before we create ngtcp2_strm object. */
+ conn->rx.offset += fr->final_size;
+ ngtcp2_conn_extend_max_offset(conn, fr->final_size);
+
+ rv = conn_call_stream_reset(conn, fr->stream_id, fr->final_size,
+ fr->app_error_code, NULL);
+ if (rv != 0) {
+ return rv;
+ }
+
+ /* There will be no activity in this stream because we got
+ RESET_STREAM and don't write stream data any further. This
+ effectively allows another new stream for peer. */
+ if (bidi) {
+ handle_max_remote_streams_extension(&conn->remote.bidi.unsent_max_streams,
+ 1);
+ } else {
+ handle_max_remote_streams_extension(&conn->remote.uni.unsent_max_streams,
+ 1);
+ }
+
+ return 0;
+ }
+
+ if ((strm->flags & NGTCP2_STRM_FLAG_SHUT_RD)) {
+ if (strm->rx.last_offset != fr->final_size) {
+ return NGTCP2_ERR_FINAL_SIZE;
+ }
+ } else if (strm->rx.last_offset > fr->final_size) {
+ return NGTCP2_ERR_FINAL_SIZE;
+ }
+
+ if (strm->flags & NGTCP2_STRM_FLAG_RECV_RST) {
+ return 0;
+ }
+
+ if (strm->rx.max_offset < fr->final_size) {
+ return NGTCP2_ERR_FLOW_CONTROL;
+ }
+
+ datalen = fr->final_size - strm->rx.last_offset;
+
+ if (conn_max_data_violated(conn, datalen)) {
+ return NGTCP2_ERR_FLOW_CONTROL;
+ }
+
+ rv = conn_call_stream_reset(conn, fr->stream_id, fr->final_size,
+ fr->app_error_code, strm->stream_user_data);
+ if (rv != 0) {
+ return rv;
+ }
+
+ /* Extend connection flow control window for the amount of data
+ which are not passed to application. */
+ if (!(strm->flags & NGTCP2_STRM_FLAG_STOP_SENDING)) {
+ ngtcp2_conn_extend_max_offset(conn, strm->rx.last_offset -
+ ngtcp2_strm_rx_offset(strm));
+ }
+
+ conn->rx.offset += datalen;
+ ngtcp2_conn_extend_max_offset(conn, datalen);
+
+ strm->rx.last_offset = fr->final_size;
+ strm->flags |= NGTCP2_STRM_FLAG_SHUT_RD | NGTCP2_STRM_FLAG_RECV_RST;
+
+ ngtcp2_strm_set_app_error_code(strm, fr->app_error_code);
+
+ return ngtcp2_conn_close_stream_if_shut_rdwr(conn, strm);
+}
+
+/*
+ * conn_recv_stop_sending is called when STOP_SENDING |fr| is received.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_STREAM_STATE
+ * STOP_SENDING frame is received for a local stream which is not
+ * initiated; or STOP_SENDING frame is received for a local
+ * unidirectional stream.
+ * NGTCP2_ERR_STREAM_LIMIT
+ * STOP_SENDING frame has remote stream ID which is strictly
+ * greater than the allowed limit.
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static int conn_recv_stop_sending(ngtcp2_conn *conn,
+ const ngtcp2_stop_sending *fr) {
+ int rv;
+ ngtcp2_strm *strm;
+ ngtcp2_idtr *idtr;
+ int local_stream = conn_local_stream(conn, fr->stream_id);
+ int bidi = bidi_stream(fr->stream_id);
+
+ if (bidi) {
+ if (local_stream) {
+ if (conn->local.bidi.next_stream_id <= fr->stream_id) {
+ return NGTCP2_ERR_STREAM_STATE;
+ }
+ } else if (conn->remote.bidi.max_streams <
+ ngtcp2_ord_stream_id(fr->stream_id)) {
+ return NGTCP2_ERR_STREAM_LIMIT;
+ }
+
+ idtr = &conn->remote.bidi.idtr;
+ } else {
+ if (!local_stream || conn->local.uni.next_stream_id <= fr->stream_id) {
+ return NGTCP2_ERR_STREAM_STATE;
+ }
+
+ idtr = &conn->remote.uni.idtr;
+ }
+
+ strm = ngtcp2_conn_find_stream(conn, fr->stream_id);
+ if (strm == NULL) {
+ if (local_stream) {
+ return 0;
+ }
+ rv = ngtcp2_idtr_open(idtr, fr->stream_id);
+ if (rv != 0) {
+ if (ngtcp2_err_is_fatal(rv)) {
+ return rv;
+ }
+ assert(rv == NGTCP2_ERR_STREAM_IN_USE);
+ return 0;
+ }
+
+ /* Frame is received reset before we create ngtcp2_strm
+ object. */
+ strm = ngtcp2_objalloc_strm_get(&conn->strm_objalloc);
+ if (strm == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+ rv = ngtcp2_conn_init_stream(conn, strm, fr->stream_id, NULL);
+ if (rv != 0) {
+ ngtcp2_objalloc_strm_release(&conn->strm_objalloc, strm);
+ return rv;
+ }
+
+ rv = conn_call_stream_open(conn, strm);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ ngtcp2_strm_set_app_error_code(strm, fr->app_error_code);
+
+ /* No RESET_STREAM is required if we have sent FIN and all data have
+ been acknowledged. */
+ if (!ngtcp2_strm_is_all_tx_data_fin_acked(strm) &&
+ !(strm->flags & NGTCP2_STRM_FLAG_SENT_RST)) {
+ rv = conn_reset_stream(conn, strm, fr->app_error_code);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ strm->flags |= NGTCP2_STRM_FLAG_SHUT_WR | NGTCP2_STRM_FLAG_SENT_RST;
+
+ if (ngtcp2_strm_is_tx_queued(strm) && !ngtcp2_strm_streamfrq_empty(strm)) {
+ assert(conn->tx.strmq_nretrans);
+ --conn->tx.strmq_nretrans;
+ }
+
+ ngtcp2_strm_streamfrq_clear(strm);
+
+ return ngtcp2_conn_close_stream_if_shut_rdwr(conn, strm);
+}
+
+/*
+ * check_stateless_reset returns nonzero if Stateless Reset |sr|
+ * coming via |path| is valid against |dcid|.
+ */
+static int check_stateless_reset(const ngtcp2_dcid *dcid,
+ const ngtcp2_path *path,
+ const ngtcp2_pkt_stateless_reset *sr) {
+ return ngtcp2_path_eq(&dcid->ps.path, path) &&
+ ngtcp2_dcid_verify_stateless_reset_token(
+ dcid, sr->stateless_reset_token) == 0;
+}
+
+/*
+ * conn_on_stateless_reset decodes Stateless Reset from the buffer
+ * pointed by |payload| whose length is |payloadlen|. |payload|
+ * should start after first byte of packet.
+ *
+ * If Stateless Reset is decoded, and the Stateless Reset Token is
+ * validated, the connection is closed.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * Could not decode Stateless Reset; or Stateless Reset Token does
+ * not match; or No stateless reset token is available.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User callback failed.
+ */
+static int conn_on_stateless_reset(ngtcp2_conn *conn, const ngtcp2_path *path,
+ const uint8_t *payload, size_t payloadlen) {
+ int rv = 1;
+ ngtcp2_pv *pv = conn->pv;
+ ngtcp2_dcid *dcid;
+ ngtcp2_pkt_stateless_reset sr;
+ size_t len, i;
+
+ rv = ngtcp2_pkt_decode_stateless_reset(&sr, payload, payloadlen);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (!check_stateless_reset(&conn->dcid.current, path, &sr) &&
+ (!pv || (!check_stateless_reset(&pv->dcid, path, &sr) &&
+ (!(pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) ||
+ !check_stateless_reset(&pv->fallback_dcid, path, &sr))))) {
+ len = ngtcp2_ringbuf_len(&conn->dcid.retired.rb);
+ for (i = 0; i < len; ++i) {
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.retired.rb, i);
+ if (check_stateless_reset(dcid, path, &sr)) {
+ break;
+ }
+ }
+
+ if (i == len) {
+ len = ngtcp2_ringbuf_len(&conn->dcid.bound.rb);
+ for (i = 0; i < len; ++i) {
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, i);
+ if (check_stateless_reset(dcid, path, &sr)) {
+ break;
+ }
+ }
+
+ if (i == len) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+ }
+ }
+
+ conn->state = NGTCP2_CS_DRAINING;
+
+ ngtcp2_log_rx_sr(&conn->log, &sr);
+
+ ngtcp2_qlog_stateless_reset_pkt_received(&conn->qlog, &sr);
+
+ return conn_call_recv_stateless_reset(conn, &sr);
+}
+
+/*
+ * conn_recv_max_streams processes the incoming MAX_STREAMS frame
+ * |fr|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User callback failed.
+ * NGTCP2_ERR_FRAME_ENCODING
+ * The maximum streams field exceeds the maximum value.
+ */
+static int conn_recv_max_streams(ngtcp2_conn *conn,
+ const ngtcp2_max_streams *fr) {
+ uint64_t n;
+
+ if (fr->max_streams > NGTCP2_MAX_STREAMS) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ n = ngtcp2_min(fr->max_streams, NGTCP2_MAX_STREAMS);
+
+ if (fr->type == NGTCP2_FRAME_MAX_STREAMS_BIDI) {
+ if (conn->local.bidi.max_streams < n) {
+ conn->local.bidi.max_streams = n;
+ return conn_call_extend_max_local_streams_bidi(conn, n);
+ }
+ return 0;
+ }
+
+ if (conn->local.uni.max_streams < n) {
+ conn->local.uni.max_streams = n;
+ return conn_call_extend_max_local_streams_uni(conn, n);
+ }
+ return 0;
+}
+
+static int conn_retire_dcid_prior_to(ngtcp2_conn *conn, ngtcp2_ringbuf *rb,
+ uint64_t retire_prior_to) {
+ size_t i;
+ ngtcp2_dcid *dcid, *last;
+ int rv;
+
+ for (i = 0; i < ngtcp2_ringbuf_len(rb);) {
+ dcid = ngtcp2_ringbuf_get(rb, i);
+ if (dcid->seq >= retire_prior_to) {
+ ++i;
+ continue;
+ }
+
+ rv = conn_retire_dcid_seq(conn, dcid->seq);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (i == 0) {
+ ngtcp2_ringbuf_pop_front(rb);
+ continue;
+ }
+
+ if (i == ngtcp2_ringbuf_len(rb) - 1) {
+ ngtcp2_ringbuf_pop_back(rb);
+ break;
+ }
+
+ last = ngtcp2_ringbuf_get(rb, ngtcp2_ringbuf_len(rb) - 1);
+ ngtcp2_dcid_copy(dcid, last);
+ ngtcp2_ringbuf_pop_back(rb);
+ }
+
+ return 0;
+}
+
+/*
+ * conn_recv_new_connection_id processes the incoming
+ * NEW_CONNECTION_ID frame |fr|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_PROTO
+ * |fr| has the duplicated sequence number with different CID or
+ * token; or DCID is zero-length.
+ */
+static int conn_recv_new_connection_id(ngtcp2_conn *conn,
+ const ngtcp2_new_connection_id *fr) {
+ size_t i, len;
+ ngtcp2_dcid *dcid;
+ ngtcp2_pv *pv = conn->pv;
+ int rv;
+ int found = 0;
+ size_t extra_dcid = 0;
+
+ if (conn->dcid.current.cid.datalen == 0) {
+ return NGTCP2_ERR_PROTO;
+ }
+
+ if (fr->retire_prior_to > fr->seq) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ rv = ngtcp2_dcid_verify_uniqueness(&conn->dcid.current, fr->seq, &fr->cid,
+ fr->stateless_reset_token);
+ if (rv != 0) {
+ return rv;
+ }
+ if (ngtcp2_cid_eq(&conn->dcid.current.cid, &fr->cid)) {
+ found = 1;
+ }
+
+ if (pv) {
+ rv = ngtcp2_dcid_verify_uniqueness(&pv->dcid, fr->seq, &fr->cid,
+ fr->stateless_reset_token);
+ if (rv != 0) {
+ return rv;
+ }
+ if (ngtcp2_cid_eq(&pv->dcid.cid, &fr->cid)) {
+ found = 1;
+ }
+ }
+
+ len = ngtcp2_ringbuf_len(&conn->dcid.bound.rb);
+
+ for (i = 0; i < len; ++i) {
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, i);
+ rv = ngtcp2_dcid_verify_uniqueness(dcid, fr->seq, &fr->cid,
+ fr->stateless_reset_token);
+ if (rv != 0) {
+ return NGTCP2_ERR_PROTO;
+ }
+ if (ngtcp2_cid_eq(&dcid->cid, &fr->cid)) {
+ found = 1;
+ }
+ }
+
+ len = ngtcp2_ringbuf_len(&conn->dcid.unused.rb);
+
+ for (i = 0; i < len; ++i) {
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, i);
+ rv = ngtcp2_dcid_verify_uniqueness(dcid, fr->seq, &fr->cid,
+ fr->stateless_reset_token);
+ if (rv != 0) {
+ return NGTCP2_ERR_PROTO;
+ }
+ if (ngtcp2_cid_eq(&dcid->cid, &fr->cid)) {
+ found = 1;
+ }
+ }
+
+ if (conn->dcid.retire_prior_to < fr->retire_prior_to) {
+ conn->dcid.retire_prior_to = fr->retire_prior_to;
+
+ rv = conn_retire_dcid_prior_to(conn, &conn->dcid.bound.rb,
+ fr->retire_prior_to);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = conn_retire_dcid_prior_to(conn, &conn->dcid.unused.rb,
+ conn->dcid.retire_prior_to);
+ if (rv != 0) {
+ return rv;
+ }
+ } else if (fr->seq < conn->dcid.retire_prior_to) {
+ /* If packets are reordered, we might have retire_prior_to which
+ is larger than fr->seq.
+
+ A malicious peer might send crafted NEW_CONNECTION_ID to force
+ local endpoint to create lots of RETIRE_CONNECTION_ID frames.
+ For example, a peer might send seq = 50000 and retire_prior_to
+ = 50000. Then send NEW_CONNECTION_ID frames with seq <
+ 50000. */
+ return conn_retire_dcid_seq(conn, fr->seq);
+ }
+
+ if (found) {
+ return 0;
+ }
+
+ if (ngtcp2_gaptr_is_pushed(&conn->dcid.seqgap, fr->seq, 1)) {
+ return 0;
+ }
+
+ rv = ngtcp2_gaptr_push(&conn->dcid.seqgap, fr->seq, 1);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (ngtcp2_ksl_len(&conn->dcid.seqgap.gap) > 32) {
+ ngtcp2_gaptr_drop_first_gap(&conn->dcid.seqgap);
+ }
+
+ len = ngtcp2_ringbuf_len(&conn->dcid.unused.rb);
+
+ if (conn->dcid.current.seq >= conn->dcid.retire_prior_to) {
+ ++extra_dcid;
+ }
+ if (pv) {
+ if (pv->dcid.seq != conn->dcid.current.seq &&
+ pv->dcid.seq >= conn->dcid.retire_prior_to) {
+ ++extra_dcid;
+ }
+ if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) &&
+ pv->fallback_dcid.seq != conn->dcid.current.seq &&
+ pv->fallback_dcid.seq >= conn->dcid.retire_prior_to) {
+ ++extra_dcid;
+ }
+ }
+
+ if (conn->local.transport_params.active_connection_id_limit <=
+ len + extra_dcid) {
+ return NGTCP2_ERR_CONNECTION_ID_LIMIT;
+ }
+
+ if (len >= NGTCP2_MAX_DCID_POOL_SIZE) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "too many connection ID");
+ return 0;
+ }
+
+ dcid = ngtcp2_ringbuf_push_back(&conn->dcid.unused.rb);
+ ngtcp2_dcid_init(dcid, fr->seq, &fr->cid, fr->stateless_reset_token);
+
+ return 0;
+}
+
+/*
+ * conn_post_process_recv_new_connection_id handles retirement request
+ * of active DCIDs.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static int conn_post_process_recv_new_connection_id(ngtcp2_conn *conn,
+ ngtcp2_tstamp ts) {
+ ngtcp2_pv *pv = conn->pv;
+ ngtcp2_dcid *dcid;
+ int rv;
+
+ if (conn->dcid.current.seq < conn->dcid.retire_prior_to) {
+ if (ngtcp2_ringbuf_len(&conn->dcid.unused.rb) == 0) {
+ return 0;
+ }
+
+ rv = conn_retire_dcid(conn, &conn->dcid.current, ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0);
+ if (pv) {
+ if (conn->dcid.current.seq == pv->dcid.seq) {
+ ngtcp2_dcid_copy_cid_token(&pv->dcid, dcid);
+ }
+ if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) &&
+ conn->dcid.current.seq == pv->fallback_dcid.seq) {
+ ngtcp2_dcid_copy_cid_token(&pv->fallback_dcid, dcid);
+ }
+ }
+
+ ngtcp2_dcid_copy_cid_token(&conn->dcid.current, dcid);
+ ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb);
+
+ rv = conn_call_activate_dcid(conn, &conn->dcid.current);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ if (pv) {
+ if (pv->dcid.seq < conn->dcid.retire_prior_to) {
+ if (ngtcp2_ringbuf_len(&conn->dcid.unused.rb)) {
+ rv = conn_retire_dcid(conn, &pv->dcid, ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0);
+
+ if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) &&
+ pv->dcid.seq == pv->fallback_dcid.seq) {
+ ngtcp2_dcid_copy_cid_token(&pv->fallback_dcid, dcid);
+ }
+
+ ngtcp2_dcid_copy_cid_token(&pv->dcid, dcid);
+ ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb);
+
+ rv = conn_call_activate_dcid(conn, &pv->dcid);
+ if (rv != 0) {
+ return rv;
+ }
+ } else {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PTV,
+ "path migration is aborted because connection ID is"
+ "retired and no unused connection ID is available");
+
+ return conn_abort_pv(conn, ts);
+ }
+ }
+ if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) &&
+ pv->fallback_dcid.seq < conn->dcid.retire_prior_to) {
+ if (ngtcp2_ringbuf_len(&conn->dcid.unused.rb)) {
+ rv = conn_retire_dcid(conn, &pv->fallback_dcid, ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0);
+ ngtcp2_dcid_copy_cid_token(&pv->fallback_dcid, dcid);
+ ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb);
+
+ rv = conn_call_activate_dcid(conn, &pv->fallback_dcid);
+ if (rv != 0) {
+ return rv;
+ }
+ } else {
+ /* Now we have no fallback dcid. */
+ return conn_abort_pv(conn, ts);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * conn_recv_retire_connection_id processes the incoming
+ * RETIRE_CONNECTION_ID frame |fr|. |hd| is a packet header which
+ * |fr| is included.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_PROTO
+ * SCID is zero-length.
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Attempt to retire CID which is used as DCID to send this frame.
+ */
+static int conn_recv_retire_connection_id(ngtcp2_conn *conn,
+ const ngtcp2_pkt_hd *hd,
+ const ngtcp2_retire_connection_id *fr,
+ ngtcp2_tstamp ts) {
+ ngtcp2_ksl_it it;
+ ngtcp2_scid *scid;
+
+ if (conn->oscid.datalen == 0 || conn->scid.last_seq < fr->seq) {
+ return NGTCP2_ERR_PROTO;
+ }
+
+ for (it = ngtcp2_ksl_begin(&conn->scid.set); !ngtcp2_ksl_it_end(&it);
+ ngtcp2_ksl_it_next(&it)) {
+ scid = ngtcp2_ksl_it_get(&it);
+ if (scid->seq == fr->seq) {
+ if (ngtcp2_cid_eq(&scid->cid, &hd->dcid)) {
+ return NGTCP2_ERR_PROTO;
+ }
+
+ if (!(scid->flags & NGTCP2_SCID_FLAG_RETIRED)) {
+ scid->flags |= NGTCP2_SCID_FLAG_RETIRED;
+ ++conn->scid.num_retired;
+ }
+
+ if (scid->pe.index != NGTCP2_PQ_BAD_INDEX) {
+ ngtcp2_pq_remove(&conn->scid.used, &scid->pe);
+ scid->pe.index = NGTCP2_PQ_BAD_INDEX;
+ }
+
+ scid->retired_ts = ts;
+
+ return ngtcp2_pq_push(&conn->scid.used, &scid->pe);
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * conn_recv_new_token processes the incoming NEW_TOKEN frame |fr|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Token is empty
+ * NGTCP2_ERR_PROTO:
+ * Server received NEW_TOKEN.
+ */
+static int conn_recv_new_token(ngtcp2_conn *conn, const ngtcp2_new_token *fr) {
+ if (conn->server) {
+ return NGTCP2_ERR_PROTO;
+ }
+
+ if (fr->tokenlen == 0) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ return conn_call_recv_new_token(conn, fr->token, fr->tokenlen);
+}
+
+/*
+ * conn_recv_streams_blocked_bidi processes the incoming
+ * STREAMS_BLOCKED (0x16).
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Maximum Streams is larger than advertised value.
+ */
+static int conn_recv_streams_blocked_bidi(ngtcp2_conn *conn,
+ ngtcp2_streams_blocked *fr) {
+ if (fr->max_streams > conn->remote.bidi.max_streams) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ return 0;
+}
+
+/*
+ * conn_recv_streams_blocked_uni processes the incoming
+ * STREAMS_BLOCKED (0x17).
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Maximum Streams is larger than advertised value.
+ */
+static int conn_recv_streams_blocked_uni(ngtcp2_conn *conn,
+ ngtcp2_streams_blocked *fr) {
+ if (fr->max_streams > conn->remote.uni.max_streams) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ return 0;
+}
+
+/*
+ * conn_select_preferred_addr asks a client application to select a
+ * server address from preferred addresses received from server. If a
+ * client chooses the address, path validation will start.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static int conn_select_preferred_addr(ngtcp2_conn *conn) {
+ ngtcp2_path_storage ps;
+ int rv;
+ ngtcp2_duration pto, initial_pto, timeout;
+ ngtcp2_pv *pv;
+ ngtcp2_dcid *dcid;
+
+ if (ngtcp2_ringbuf_len(&conn->dcid.unused.rb) == 0) {
+ return 0;
+ }
+
+ ngtcp2_path_storage_zero(&ps);
+ ngtcp2_addr_copy(&ps.path.local, &conn->dcid.current.ps.path.local);
+
+ rv = conn_call_select_preferred_addr(conn, &ps.path);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (ps.path.remote.addrlen == 0 ||
+ ngtcp2_addr_eq(&conn->dcid.current.ps.path.remote, &ps.path.remote)) {
+ return 0;
+ }
+
+ assert(conn->pv == NULL);
+
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0);
+ ngtcp2_dcid_set_path(dcid, &ps.path);
+
+ pto = conn_compute_pto(conn, &conn->pktns);
+ initial_pto = conn_compute_initial_pto(conn, &conn->pktns);
+ timeout = 3 * ngtcp2_max(pto, initial_pto);
+
+ rv = ngtcp2_pv_new(&pv, dcid, timeout, NGTCP2_PV_FLAG_PREFERRED_ADDR,
+ &conn->log, conn->mem);
+ if (rv != 0) {
+ /* TODO Call ngtcp2_dcid_free here if it is introduced */
+ return rv;
+ }
+
+ ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb);
+ conn->pv = pv;
+
+ return conn_call_activate_dcid(conn, &pv->dcid);
+}
+
+/*
+ * conn_recv_handshake_done processes the incoming HANDSHAKE_DONE
+ * frame |fr|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_PROTO
+ * Server received HANDSHAKE_DONE frame.
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static int conn_recv_handshake_done(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ int rv;
+
+ if (conn->server) {
+ return NGTCP2_ERR_PROTO;
+ }
+
+ if (conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED) {
+ return 0;
+ }
+
+ conn->flags |= NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED |
+ NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED;
+
+ conn->pktns.rtb.persistent_congestion_start_ts = ts;
+
+ conn_discard_handshake_state(conn, ts);
+
+ assert(conn->remote.transport_params);
+
+ if (conn->remote.transport_params->preferred_address_present) {
+ rv = conn_select_preferred_addr(conn);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ rv = conn_call_handshake_confirmed(conn);
+ if (rv != 0) {
+ return rv;
+ }
+
+ /* Re-arm loss detection timer after handshake has been
+ confirmed. */
+ ngtcp2_conn_set_loss_detection_timer(conn, ts);
+
+ return 0;
+}
+
+/*
+ * conn_recv_datagram processes the incoming DATAGRAM frame |fr|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+static int conn_recv_datagram(ngtcp2_conn *conn, ngtcp2_datagram *fr) {
+ assert(conn->local.transport_params.max_datagram_frame_size);
+
+ return conn_call_recv_datagram(conn, fr);
+}
+
+/*
+ * conn_key_phase_changed returns nonzero if |hd| indicates that the
+ * key phase has unexpected value.
+ */
+static int conn_key_phase_changed(ngtcp2_conn *conn, const ngtcp2_pkt_hd *hd) {
+ ngtcp2_pktns *pktns = &conn->pktns;
+
+ return !(pktns->crypto.rx.ckm->flags & NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE) ^
+ !(hd->flags & NGTCP2_PKT_FLAG_KEY_PHASE);
+}
+
+/*
+ * conn_prepare_key_update installs new updated keys.
+ */
+static int conn_prepare_key_update(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ int rv;
+ ngtcp2_tstamp confirmed_ts = conn->crypto.key_update.confirmed_ts;
+ ngtcp2_duration pto = conn_compute_pto(conn, &conn->pktns);
+ ngtcp2_pktns *pktns = &conn->pktns;
+ ngtcp2_crypto_km *rx_ckm = pktns->crypto.rx.ckm;
+ ngtcp2_crypto_km *tx_ckm = pktns->crypto.tx.ckm;
+ ngtcp2_crypto_km *new_rx_ckm, *new_tx_ckm;
+ ngtcp2_crypto_aead_ctx rx_aead_ctx = {0}, tx_aead_ctx = {0};
+ size_t secretlen, ivlen;
+
+ if ((conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED) &&
+ tx_ckm->use_count >= pktns->crypto.ctx.max_encryption &&
+ ngtcp2_conn_initiate_key_update(conn, ts) != 0) {
+ return NGTCP2_ERR_AEAD_LIMIT_REACHED;
+ }
+
+ if ((conn->flags & NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED) ||
+ (confirmed_ts != UINT64_MAX && confirmed_ts + pto > ts)) {
+ return 0;
+ }
+
+ if (conn->crypto.key_update.new_rx_ckm ||
+ conn->crypto.key_update.new_tx_ckm) {
+ assert(conn->crypto.key_update.new_rx_ckm);
+ assert(conn->crypto.key_update.new_tx_ckm);
+ return 0;
+ }
+
+ secretlen = rx_ckm->secret.len;
+ ivlen = rx_ckm->iv.len;
+
+ rv = ngtcp2_crypto_km_nocopy_new(&conn->crypto.key_update.new_rx_ckm,
+ secretlen, ivlen, conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = ngtcp2_crypto_km_nocopy_new(&conn->crypto.key_update.new_tx_ckm,
+ secretlen, ivlen, conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ new_rx_ckm = conn->crypto.key_update.new_rx_ckm;
+ new_tx_ckm = conn->crypto.key_update.new_tx_ckm;
+
+ rv = conn_call_update_key(
+ conn, new_rx_ckm->secret.base, new_tx_ckm->secret.base, &rx_aead_ctx,
+ new_rx_ckm->iv.base, &tx_aead_ctx, new_tx_ckm->iv.base,
+ rx_ckm->secret.base, tx_ckm->secret.base, secretlen);
+ if (rv != 0) {
+ return rv;
+ }
+
+ new_rx_ckm->aead_ctx = rx_aead_ctx;
+ new_tx_ckm->aead_ctx = tx_aead_ctx;
+
+ if (!(rx_ckm->flags & NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE)) {
+ new_rx_ckm->flags |= NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE;
+ new_tx_ckm->flags |= NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE;
+ }
+
+ if (conn->crypto.key_update.old_rx_ckm) {
+ conn_call_delete_crypto_aead_ctx(
+ conn, &conn->crypto.key_update.old_rx_ckm->aead_ctx);
+ ngtcp2_crypto_km_del(conn->crypto.key_update.old_rx_ckm, conn->mem);
+ conn->crypto.key_update.old_rx_ckm = NULL;
+ }
+
+ return 0;
+}
+
+/*
+ * conn_rotate_keys rotates keys. The current key moves to old key,
+ * and new key moves to the current key. If the local endpoint
+ * initiated this key update, pass nonzero as |initiator|.
+ */
+static void conn_rotate_keys(ngtcp2_conn *conn, int64_t pkt_num,
+ int initiator) {
+ ngtcp2_pktns *pktns = &conn->pktns;
+
+ assert(conn->crypto.key_update.new_rx_ckm);
+ assert(conn->crypto.key_update.new_tx_ckm);
+ assert(!conn->crypto.key_update.old_rx_ckm);
+ assert(!(conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING));
+
+ conn->crypto.key_update.old_rx_ckm = pktns->crypto.rx.ckm;
+
+ pktns->crypto.rx.ckm = conn->crypto.key_update.new_rx_ckm;
+ conn->crypto.key_update.new_rx_ckm = NULL;
+ pktns->crypto.rx.ckm->pkt_num = pkt_num;
+
+ assert(pktns->crypto.tx.ckm);
+
+ conn_call_delete_crypto_aead_ctx(conn, &pktns->crypto.tx.ckm->aead_ctx);
+ ngtcp2_crypto_km_del(pktns->crypto.tx.ckm, conn->mem);
+
+ pktns->crypto.tx.ckm = conn->crypto.key_update.new_tx_ckm;
+ conn->crypto.key_update.new_tx_ckm = NULL;
+ pktns->crypto.tx.ckm->pkt_num = pktns->tx.last_pkt_num + 1;
+
+ conn->flags |= NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED;
+ if (initiator) {
+ conn->flags |= NGTCP2_CONN_FLAG_KEY_UPDATE_INITIATOR;
+ }
+}
+
+/*
+ * conn_path_validation_in_progress returns nonzero if path validation
+ * against |path| is underway.
+ */
+static int conn_path_validation_in_progress(ngtcp2_conn *conn,
+ const ngtcp2_path *path) {
+ ngtcp2_pv *pv = conn->pv;
+
+ return pv && ngtcp2_path_eq(&pv->dcid.ps.path, path);
+}
+
+/*
+ * conn_recv_non_probing_pkt_on_new_path is called when non-probing
+ * packet is received via new path. It starts path validation against
+ * the new path.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_CONN_ID_BLOCKED
+ * No DCID is available
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+static int conn_recv_non_probing_pkt_on_new_path(ngtcp2_conn *conn,
+ const ngtcp2_path *path,
+ size_t dgramlen,
+ int new_cid_used,
+ ngtcp2_tstamp ts) {
+
+ ngtcp2_dcid dcid, *bound_dcid, *last;
+ ngtcp2_pv *pv;
+ int rv;
+ ngtcp2_duration pto, initial_pto, timeout;
+ int require_new_cid;
+ int local_addr_eq;
+ uint32_t remote_addr_cmp;
+ size_t len, i;
+
+ assert(conn->server);
+
+ if (conn->pv && (conn->pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) &&
+ ngtcp2_path_eq(&conn->pv->fallback_dcid.ps.path, path)) {
+ /* If new path equals fallback path, that means connection
+ migrated back to the original path. Fallback path is
+ considered to be validated. */
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PTV,
+ "path is migrated back to the original path");
+ ngtcp2_dcid_copy(&conn->dcid.current, &conn->pv->fallback_dcid);
+ conn_reset_congestion_state(conn, ts);
+ conn->dcid.current.bytes_recv += dgramlen;
+ conn_reset_ecn_validation_state(conn);
+
+ rv = conn_abort_pv(conn, ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ /* Run PMTUD just in case if it is prematurely aborted */
+ assert(!conn->pmtud);
+
+ return conn_start_pmtud(conn);
+ }
+
+ remote_addr_cmp =
+ ngtcp2_addr_compare(&conn->dcid.current.ps.path.remote, &path->remote);
+ local_addr_eq =
+ ngtcp2_addr_eq(&conn->dcid.current.ps.path.local, &path->local);
+
+ /*
+ * When to change DCID? RFC 9002 section 9.5 says:
+ *
+ * An endpoint MUST NOT reuse a connection ID when sending from more
+ * than one local address -- for example, when initiating connection
+ * migration as described in Section 9.2 or when probing a new
+ * network path as described in Section 9.1.
+ *
+ * Similarly, an endpoint MUST NOT reuse a connection ID when
+ * sending to more than one destination address. Due to network
+ * changes outside the control of its peer, an endpoint might
+ * receive packets from a new source address with the same
+ * Destination Connection ID field value, in which case it MAY
+ * continue to use the current connection ID with the new remote
+ * address while still sending from the same local address.
+ */
+ require_new_cid = conn->dcid.current.cid.datalen &&
+ ((new_cid_used && remote_addr_cmp) || !local_addr_eq);
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "non-probing packet was received from new remote address");
+
+ pto = conn_compute_pto(conn, &conn->pktns);
+ initial_pto = conn_compute_initial_pto(conn, &conn->pktns);
+ timeout = 3 * ngtcp2_max(pto, initial_pto);
+
+ len = ngtcp2_ringbuf_len(&conn->dcid.bound.rb);
+
+ for (i = 0; i < len; ++i) {
+ bound_dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, i);
+ if (ngtcp2_path_eq(&bound_dcid->ps.path, path)) {
+ ngtcp2_log_info(
+ &conn->log, NGTCP2_LOG_EVENT_CON,
+ "Found DCID which has already been bound to the new path");
+
+ ngtcp2_dcid_copy(&dcid, bound_dcid);
+ if (i == 0) {
+ ngtcp2_ringbuf_pop_front(&conn->dcid.bound.rb);
+ } else if (i == ngtcp2_ringbuf_len(&conn->dcid.bound.rb) - 1) {
+ ngtcp2_ringbuf_pop_back(&conn->dcid.bound.rb);
+ } else {
+ last = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, len - 1);
+ ngtcp2_dcid_copy(bound_dcid, last);
+ ngtcp2_ringbuf_pop_back(&conn->dcid.bound.rb);
+ }
+ require_new_cid = 0;
+
+ if (dcid.cid.datalen) {
+ rv = conn_call_activate_dcid(conn, &dcid);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+ break;
+ }
+ }
+
+ if (i == len) {
+ if (require_new_cid) {
+ if (ngtcp2_ringbuf_len(&conn->dcid.unused.rb) == 0) {
+ return NGTCP2_ERR_CONN_ID_BLOCKED;
+ }
+ ngtcp2_dcid_copy(&dcid, ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0));
+ ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb);
+
+ rv = conn_call_activate_dcid(conn, &dcid);
+ if (rv != 0) {
+ return rv;
+ }
+ } else {
+ /* Use the current DCID if a remote endpoint does not change
+ DCID. */
+ ngtcp2_dcid_copy(&dcid, &conn->dcid.current);
+ dcid.bytes_sent = 0;
+ dcid.bytes_recv = 0;
+ dcid.flags &= (uint8_t)~NGTCP2_DCID_FLAG_PATH_VALIDATED;
+ }
+ }
+
+ ngtcp2_dcid_set_path(&dcid, path);
+ dcid.bytes_recv += dgramlen;
+
+ rv = ngtcp2_pv_new(&pv, &dcid, timeout, NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE,
+ &conn->log, conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (conn->pv && (conn->pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE)) {
+ ngtcp2_dcid_copy(&pv->fallback_dcid, &conn->pv->fallback_dcid);
+ pv->fallback_pto = conn->pv->fallback_pto;
+ /* Unset the flag bit so that conn_stop_pv does not retire
+ DCID. */
+ conn->pv->flags &= (uint8_t)~NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE;
+ } else {
+ ngtcp2_dcid_copy(&pv->fallback_dcid, &conn->dcid.current);
+ pv->fallback_pto = pto;
+ }
+
+ if (!local_addr_eq || (remote_addr_cmp & (NGTCP2_ADDR_COMPARE_FLAG_ADDR |
+ NGTCP2_ADDR_COMPARE_FLAG_FAMILY))) {
+ conn_reset_congestion_state(conn, ts);
+ } else {
+ /* For NAT rebinding, keep max_udp_payload_size since client most
+ likely does not send a padded PATH_CHALLENGE. */
+ dcid.max_udp_payload_size = ngtcp2_max(
+ dcid.max_udp_payload_size, conn->dcid.current.max_udp_payload_size);
+ }
+
+ ngtcp2_dcid_copy(&conn->dcid.current, &dcid);
+
+ conn_reset_ecn_validation_state(conn);
+
+ ngtcp2_conn_stop_pmtud(conn);
+
+ if (conn->pv) {
+ ngtcp2_log_info(
+ &conn->log, NGTCP2_LOG_EVENT_PTV,
+ "path migration is aborted because new migration has started");
+ rv = conn_abort_pv(conn, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ conn->pv = pv;
+
+ return 0;
+}
+
+/*
+ * conn_recv_pkt_from_new_path is called when a 1RTT packet is
+ * received from new path (not current path). This packet would be a
+ * packet which only contains probing frame, or reordered packet, or a
+ * path is being validated.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_CONN_ID_BLOCKED
+ * No unused DCID is available
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+static int conn_recv_pkt_from_new_path(ngtcp2_conn *conn,
+ const ngtcp2_path *path, size_t dgramlen,
+ int path_challenge_recved,
+ ngtcp2_tstamp ts) {
+ ngtcp2_pv *pv = conn->pv;
+ ngtcp2_dcid *bound_dcid;
+ int rv;
+
+ if (pv) {
+ if (ngtcp2_path_eq(&pv->dcid.ps.path, path)) {
+ pv->dcid.bytes_recv += dgramlen;
+ return 0;
+ }
+
+ if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) &&
+ ngtcp2_path_eq(&pv->fallback_dcid.ps.path, path)) {
+ pv->fallback_dcid.bytes_recv += dgramlen;
+ return 0;
+ }
+ }
+
+ if (!path_challenge_recved) {
+ return 0;
+ }
+
+ rv = conn_bind_dcid(conn, &bound_dcid, path, ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ ngtcp2_dcid_set_path(bound_dcid, path);
+ bound_dcid->bytes_recv += dgramlen;
+
+ return 0;
+}
+
+/*
+ * conn_recv_delayed_handshake_pkt processes the received Handshake
+ * packet which is received after handshake completed. This function
+ * does the minimal job, and its purpose is send acknowledgement of
+ * this packet to the peer. We assume that hd->type ==
+ * NGTCP2_PKT_HANDSHAKE.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Frame is badly formatted; or frame type is unknown.
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ * NGTCP2_ERR_DISCARD_PKT
+ * Packet was discarded.
+ * NGTCP2_ERR_ACK_FRAME
+ * ACK frame is malformed.
+ * NGTCP2_ERR_PROTO
+ * Frame that is not allowed in Handshake packet is received.
+ */
+static int
+conn_recv_delayed_handshake_pkt(ngtcp2_conn *conn, const ngtcp2_pkt_info *pi,
+ const ngtcp2_pkt_hd *hd, size_t pktlen,
+ const uint8_t *payload, size_t payloadlen,
+ ngtcp2_tstamp pkt_ts, ngtcp2_tstamp ts) {
+ ngtcp2_ssize nread;
+ ngtcp2_max_frame mfr;
+ ngtcp2_frame *fr = &mfr.fr;
+ int rv;
+ int require_ack = 0;
+ ngtcp2_pktns *pktns;
+
+ assert(hd->type == NGTCP2_PKT_HANDSHAKE);
+
+ pktns = conn->hs_pktns;
+
+ if (payloadlen == 0) {
+ /* QUIC packet must contain at least one frame */
+ return NGTCP2_ERR_PROTO;
+ }
+
+ ngtcp2_qlog_pkt_received_start(&conn->qlog);
+
+ for (; payloadlen;) {
+ nread = ngtcp2_pkt_decode_frame(fr, payload, payloadlen);
+ if (nread < 0) {
+ return (int)nread;
+ }
+
+ payload += nread;
+ payloadlen -= (size_t)nread;
+
+ switch (fr->type) {
+ case NGTCP2_FRAME_ACK:
+ case NGTCP2_FRAME_ACK_ECN:
+ fr->ack.ack_delay = 0;
+ fr->ack.ack_delay_unscaled = 0;
+ break;
+ }
+
+ ngtcp2_log_rx_fr(&conn->log, hd, fr);
+
+ switch (fr->type) {
+ case NGTCP2_FRAME_ACK:
+ case NGTCP2_FRAME_ACK_ECN:
+ if (!conn->server) {
+ conn->flags |= NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED;
+ }
+ rv = conn_recv_ack(conn, pktns, &fr->ack, pkt_ts, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ break;
+ case NGTCP2_FRAME_PADDING:
+ break;
+ case NGTCP2_FRAME_CONNECTION_CLOSE:
+ rv = conn_recv_connection_close(conn, &fr->connection_close);
+ if (rv != 0) {
+ return rv;
+ }
+ break;
+ case NGTCP2_FRAME_CRYPTO:
+ case NGTCP2_FRAME_PING:
+ require_ack = 1;
+ break;
+ default:
+ return NGTCP2_ERR_PROTO;
+ }
+
+ ngtcp2_qlog_write_frame(&conn->qlog, fr);
+ }
+
+ ngtcp2_qlog_pkt_received_end(&conn->qlog, hd, pktlen);
+
+ rv = pktns_commit_recv_pkt_num(pktns, hd->pkt_num, require_ack, pkt_ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ pktns_increase_ecn_counts(pktns, pi);
+
+ if (require_ack &&
+ (++pktns->acktr.rx_npkt >= conn->local.settings.ack_thresh ||
+ (pi->ecn & NGTCP2_ECN_MASK) == NGTCP2_ECN_CE)) {
+ ngtcp2_acktr_immediate_ack(&pktns->acktr);
+ }
+
+ rv = ngtcp2_conn_sched_ack(conn, &pktns->acktr, hd->pkt_num, require_ack,
+ pkt_ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ conn_restart_timer_on_read(conn, ts);
+
+ ngtcp2_qlog_metrics_updated(&conn->qlog, &conn->cstat);
+
+ return 0;
+}
+
+/*
+ * conn_allow_path_change_under_disable_active_migration returns
+ * nonzero if a packet from |path| is acceptable under
+ * disable_active_migration is on.
+ */
+static int
+conn_allow_path_change_under_disable_active_migration(ngtcp2_conn *conn,
+ const ngtcp2_path *path) {
+ uint32_t remote_addr_cmp;
+ const ngtcp2_preferred_addr *paddr;
+ ngtcp2_addr addr;
+
+ assert(conn->server);
+ assert(conn->local.transport_params.disable_active_migration);
+
+ /* If local address does not change, it must be passive migration
+ (NAT rebinding). */
+ if (ngtcp2_addr_eq(&conn->dcid.current.ps.path.local, &path->local)) {
+ remote_addr_cmp =
+ ngtcp2_addr_compare(&conn->dcid.current.ps.path.remote, &path->remote);
+
+ return (remote_addr_cmp | NGTCP2_ADDR_COMPARE_FLAG_PORT) ==
+ NGTCP2_ADDR_COMPARE_FLAG_PORT;
+ }
+
+ /* If local address changes, it must be one of the preferred
+ addresses. */
+
+ if (!conn->local.transport_params.preferred_address_present) {
+ return 0;
+ }
+
+ paddr = &conn->local.transport_params.preferred_address;
+
+ if (paddr->ipv4_present) {
+ ngtcp2_addr_init(&addr, (const ngtcp2_sockaddr *)&paddr->ipv4,
+ sizeof(paddr->ipv4));
+
+ if (ngtcp2_addr_eq(&addr, &path->local)) {
+ return 1;
+ }
+ }
+
+ if (paddr->ipv6_present) {
+ ngtcp2_addr_init(&addr, (const ngtcp2_sockaddr *)&paddr->ipv6,
+ sizeof(paddr->ipv6));
+
+ if (ngtcp2_addr_eq(&addr, &path->local)) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * conn_recv_pkt processes a packet contained in the buffer pointed by
+ * |pkt| of length |pktlen|. |pkt| may contain multiple QUIC packets.
+ * This function only processes the first packet. |pkt_ts| is the
+ * timestamp when packet is received. |ts| should be the current
+ * time. Usually they are the same, but for buffered packets,
+ * |pkt_ts| would be earlier than |ts|.
+ *
+ * This function returns the number of bytes processed if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_DISCARD_PKT
+ * Packet was discarded because plain text header was malformed;
+ * or its payload could not be decrypted.
+ * NGTCP2_ERR_PROTO
+ * Packet is badly formatted; or 0RTT packet contains other than
+ * PADDING or STREAM frames; or other QUIC protocol violation is
+ * found.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Frame is badly formatted; or frame type is unknown.
+ * NGTCP2_ERR_ACK_FRAME
+ * ACK frame is malformed.
+ * NGTCP2_ERR_STREAM_STATE
+ * Frame is received to the local stream which is not initiated.
+ * NGTCP2_ERR_STREAM_LIMIT
+ * Frame has remote stream ID which is strictly greater than the
+ * allowed limit.
+ * NGTCP2_ERR_FLOW_CONTROL
+ * Flow control limit is violated.
+ * NGTCP2_ERR_FINAL_SIZE
+ * Frame has strictly larger end offset than it is permitted.
+ */
+static ngtcp2_ssize conn_recv_pkt(ngtcp2_conn *conn, const ngtcp2_path *path,
+ const ngtcp2_pkt_info *pi, const uint8_t *pkt,
+ size_t pktlen, size_t dgramlen,
+ ngtcp2_tstamp pkt_ts, ngtcp2_tstamp ts) {
+ ngtcp2_pkt_hd hd;
+ int rv = 0;
+ size_t hdpktlen;
+ const uint8_t *payload;
+ size_t payloadlen;
+ ngtcp2_ssize nread, nwrite;
+ ngtcp2_max_frame mfr;
+ ngtcp2_frame *fr = &mfr.fr;
+ int require_ack = 0;
+ ngtcp2_crypto_aead *aead;
+ ngtcp2_crypto_cipher *hp;
+ ngtcp2_crypto_km *ckm;
+ ngtcp2_crypto_cipher_ctx *hp_ctx;
+ ngtcp2_hp_mask hp_mask;
+ ngtcp2_decrypt decrypt;
+ ngtcp2_pktns *pktns;
+ int non_probing_pkt = 0;
+ int key_phase_bit_changed = 0;
+ int force_decrypt_failure = 0;
+ int recv_ncid = 0;
+ int new_cid_used = 0;
+ int path_challenge_recved = 0;
+
+ if (conn->server && conn->local.transport_params.disable_active_migration &&
+ !ngtcp2_path_eq(&conn->dcid.current.ps.path, path) &&
+ !conn_allow_path_change_under_disable_active_migration(conn, path)) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet is discarded because active migration is disabled");
+
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ if (pkt[0] & NGTCP2_HEADER_FORM_BIT) {
+ nread = ngtcp2_pkt_decode_hd_long(&hd, pkt, pktlen);
+ if (nread < 0) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "could not decode long header");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ if (pktlen < (size_t)nread + hd.len) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ assert(conn->negotiated_version);
+
+ if (hd.version != conn->client_chosen_version &&
+ hd.version != conn->negotiated_version) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ pktlen = (size_t)nread + hd.len;
+
+ /* Quoted from spec: if subsequent packets of those types include
+ a different Source Connection ID, they MUST be discarded. */
+ if (!ngtcp2_cid_eq(&conn->dcid.current.cid, &hd.scid)) {
+ ngtcp2_log_rx_pkt_hd(&conn->log, &hd);
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet was ignored because of mismatched SCID");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ switch (hd.type) {
+ case NGTCP2_PKT_INITIAL:
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "delayed Initial packet was discarded");
+ return (ngtcp2_ssize)pktlen;
+ case NGTCP2_PKT_HANDSHAKE:
+ if (hd.version != conn->negotiated_version) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ if (!conn->hs_pktns) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "delayed Handshake packet was discarded");
+ return (ngtcp2_ssize)pktlen;
+ }
+
+ pktns = conn->hs_pktns;
+ aead = &pktns->crypto.ctx.aead;
+ hp = &pktns->crypto.ctx.hp;
+ ckm = pktns->crypto.rx.ckm;
+ hp_ctx = &pktns->crypto.rx.hp_ctx;
+ hp_mask = conn->callbacks.hp_mask;
+ decrypt = conn->callbacks.decrypt;
+ break;
+ case NGTCP2_PKT_0RTT:
+ if (!conn->server || hd.version != conn->client_chosen_version) {
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ if (!conn->early.ckm) {
+ return (ngtcp2_ssize)pktlen;
+ }
+
+ pktns = &conn->pktns;
+ aead = &conn->early.ctx.aead;
+ hp = &conn->early.ctx.hp;
+ ckm = conn->early.ckm;
+ hp_ctx = &conn->early.hp_ctx;
+ hp_mask = conn->callbacks.hp_mask;
+ decrypt = conn->callbacks.decrypt;
+ break;
+ default:
+ ngtcp2_log_rx_pkt_hd(&conn->log, &hd);
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet type 0x%02x was ignored", hd.type);
+ return (ngtcp2_ssize)pktlen;
+ }
+ } else {
+ nread = ngtcp2_pkt_decode_hd_short(&hd, pkt, pktlen, conn->oscid.datalen);
+ if (nread < 0) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "could not decode short header");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ pktns = &conn->pktns;
+ aead = &pktns->crypto.ctx.aead;
+ hp = &pktns->crypto.ctx.hp;
+ ckm = pktns->crypto.rx.ckm;
+ hp_ctx = &pktns->crypto.rx.hp_ctx;
+ hp_mask = conn->callbacks.hp_mask;
+ decrypt = conn->callbacks.decrypt;
+ }
+
+ rv = conn_ensure_decrypt_hp_buffer(conn, (size_t)nread + 4);
+ if (rv != 0) {
+ return rv;
+ }
+
+ nwrite = decrypt_hp(&hd, conn->crypto.decrypt_hp_buf.base, hp, pkt, pktlen,
+ (size_t)nread, hp_ctx, hp_mask);
+ if (nwrite < 0) {
+ if (ngtcp2_err_is_fatal((int)nwrite)) {
+ return nwrite;
+ }
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "could not decrypt packet number");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ hdpktlen = (size_t)nwrite;
+ payload = pkt + hdpktlen;
+ payloadlen = pktlen - hdpktlen;
+
+ hd.pkt_num = ngtcp2_pkt_adjust_pkt_num(pktns->rx.max_pkt_num, hd.pkt_num,
+ pkt_num_bits(hd.pkt_numlen));
+ if (hd.pkt_num > NGTCP2_MAX_PKT_NUM) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "pkn=%" PRId64 " is greater than maximum pkn", hd.pkt_num);
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ ngtcp2_log_rx_pkt_hd(&conn->log, &hd);
+
+ if (hd.type == NGTCP2_PKT_1RTT) {
+ key_phase_bit_changed = conn_key_phase_changed(conn, &hd);
+ }
+
+ rv = conn_ensure_decrypt_buffer(conn, payloadlen);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (key_phase_bit_changed) {
+ assert(hd.type == NGTCP2_PKT_1RTT);
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT, "unexpected KEY_PHASE");
+
+ if (ckm->pkt_num > hd.pkt_num) {
+ if (conn->crypto.key_update.old_rx_ckm) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "decrypting with old key");
+ ckm = conn->crypto.key_update.old_rx_ckm;
+ } else {
+ force_decrypt_failure = 1;
+ }
+ } else if (pktns->rx.max_pkt_num < hd.pkt_num) {
+ assert(ckm->pkt_num < hd.pkt_num);
+ if (!conn->crypto.key_update.new_rx_ckm) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "new key is not available");
+ force_decrypt_failure = 1;
+ } else {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "decrypting with new key");
+ ckm = conn->crypto.key_update.new_rx_ckm;
+ }
+ } else {
+ force_decrypt_failure = 1;
+ }
+ }
+
+ nwrite = decrypt_pkt(conn->crypto.decrypt_buf.base, aead, payload, payloadlen,
+ conn->crypto.decrypt_hp_buf.base, hdpktlen, hd.pkt_num,
+ ckm, decrypt);
+
+ if (force_decrypt_failure) {
+ nwrite = NGTCP2_ERR_DECRYPT;
+ }
+
+ if (nwrite < 0) {
+ if (ngtcp2_err_is_fatal((int)nwrite)) {
+ return nwrite;
+ }
+
+ assert(NGTCP2_ERR_DECRYPT == nwrite);
+
+ if (hd.type == NGTCP2_PKT_1RTT &&
+ ++conn->crypto.decryption_failure_count >=
+ pktns->crypto.ctx.max_decryption_failure) {
+ return NGTCP2_ERR_AEAD_LIMIT_REACHED;
+ }
+
+ if (hd.flags & NGTCP2_PKT_FLAG_LONG_FORM) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "could not decrypt packet payload");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "could not decrypt packet payload");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ rv = ngtcp2_pkt_verify_reserved_bits(conn->crypto.decrypt_hp_buf.base[0]);
+ if (rv != 0) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet has incorrect reserved bits");
+
+ return NGTCP2_ERR_PROTO;
+ }
+
+ if (pktns_pkt_num_is_duplicate(pktns, hd.pkt_num)) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet was discarded because of duplicated packet number");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ payload = conn->crypto.decrypt_buf.base;
+ payloadlen = (size_t)nwrite;
+
+ if (payloadlen == 0) {
+ /* QUIC packet must contain at least one frame */
+ return NGTCP2_ERR_PROTO;
+ }
+
+ if (hd.flags & NGTCP2_PKT_FLAG_LONG_FORM) {
+ switch (hd.type) {
+ case NGTCP2_PKT_HANDSHAKE:
+ rv = conn_verify_dcid(conn, NULL, &hd);
+ if (rv != 0) {
+ if (ngtcp2_err_is_fatal(rv)) {
+ return rv;
+ }
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet was ignored because of mismatched DCID");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+
+ rv = conn_recv_delayed_handshake_pkt(conn, pi, &hd, pktlen, payload,
+ payloadlen, pkt_ts, ts);
+ if (rv < 0) {
+ return (ngtcp2_ssize)rv;
+ }
+
+ return (ngtcp2_ssize)pktlen;
+ case NGTCP2_PKT_0RTT:
+ if (!ngtcp2_cid_eq(&conn->rcid, &hd.dcid)) {
+ rv = conn_verify_dcid(conn, NULL, &hd);
+ if (rv != 0) {
+ if (ngtcp2_err_is_fatal(rv)) {
+ return rv;
+ }
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet was ignored because of mismatched DCID");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+ }
+ break;
+ default:
+ /* Unreachable */
+ ngtcp2_unreachable();
+ }
+ } else {
+ rv = conn_verify_dcid(conn, &new_cid_used, &hd);
+ if (rv != 0) {
+ if (ngtcp2_err_is_fatal(rv)) {
+ return rv;
+ }
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "packet was ignored because of mismatched DCID");
+ return NGTCP2_ERR_DISCARD_PKT;
+ }
+ }
+
+ ngtcp2_qlog_pkt_received_start(&conn->qlog);
+
+ for (; payloadlen;) {
+ nread = ngtcp2_pkt_decode_frame(fr, payload, payloadlen);
+ if (nread < 0) {
+ return nread;
+ }
+
+ payload += nread;
+ payloadlen -= (size_t)nread;
+
+ switch (fr->type) {
+ case NGTCP2_FRAME_ACK:
+ case NGTCP2_FRAME_ACK_ECN:
+ if ((hd.flags & NGTCP2_PKT_FLAG_LONG_FORM) &&
+ hd.type == NGTCP2_PKT_0RTT) {
+ return NGTCP2_ERR_PROTO;
+ }
+ assert(conn->remote.transport_params);
+ assign_recved_ack_delay_unscaled(
+ &fr->ack, conn->remote.transport_params->ack_delay_exponent);
+ break;
+ }
+
+ ngtcp2_log_rx_fr(&conn->log, &hd, fr);
+
+ if (hd.type == NGTCP2_PKT_0RTT) {
+ switch (fr->type) {
+ case NGTCP2_FRAME_PADDING:
+ case NGTCP2_FRAME_PING:
+ case NGTCP2_FRAME_RESET_STREAM:
+ case NGTCP2_FRAME_STOP_SENDING:
+ case NGTCP2_FRAME_STREAM:
+ case NGTCP2_FRAME_MAX_DATA:
+ case NGTCP2_FRAME_MAX_STREAM_DATA:
+ case NGTCP2_FRAME_MAX_STREAMS_BIDI:
+ case NGTCP2_FRAME_MAX_STREAMS_UNI:
+ case NGTCP2_FRAME_DATA_BLOCKED:
+ case NGTCP2_FRAME_STREAM_DATA_BLOCKED:
+ case NGTCP2_FRAME_STREAMS_BLOCKED_BIDI:
+ case NGTCP2_FRAME_STREAMS_BLOCKED_UNI:
+ case NGTCP2_FRAME_NEW_CONNECTION_ID:
+ case NGTCP2_FRAME_PATH_CHALLENGE:
+ case NGTCP2_FRAME_CONNECTION_CLOSE:
+ case NGTCP2_FRAME_CONNECTION_CLOSE_APP:
+ case NGTCP2_FRAME_DATAGRAM:
+ case NGTCP2_FRAME_DATAGRAM_LEN:
+ break;
+ default:
+ return NGTCP2_ERR_PROTO;
+ }
+ }
+
+ switch (fr->type) {
+ case NGTCP2_FRAME_ACK:
+ case NGTCP2_FRAME_ACK_ECN:
+ case NGTCP2_FRAME_PADDING:
+ case NGTCP2_FRAME_CONNECTION_CLOSE:
+ case NGTCP2_FRAME_CONNECTION_CLOSE_APP:
+ break;
+ default:
+ require_ack = 1;
+ }
+
+ switch (fr->type) {
+ case NGTCP2_FRAME_ACK:
+ case NGTCP2_FRAME_ACK_ECN:
+ if (!conn->server) {
+ conn->flags |= NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED;
+ }
+ rv = conn_recv_ack(conn, pktns, &fr->ack, pkt_ts, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_STREAM:
+ rv = conn_recv_stream(conn, &fr->stream);
+ if (rv != 0) {
+ return rv;
+ }
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_CRYPTO:
+ rv = conn_recv_crypto(conn, NGTCP2_CRYPTO_LEVEL_APPLICATION,
+ &pktns->crypto.strm, &fr->crypto);
+ if (rv != 0) {
+ return rv;
+ }
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_RESET_STREAM:
+ rv = conn_recv_reset_stream(conn, &fr->reset_stream);
+ if (rv != 0) {
+ return rv;
+ }
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_STOP_SENDING:
+ rv = conn_recv_stop_sending(conn, &fr->stop_sending);
+ if (rv != 0) {
+ return rv;
+ }
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_MAX_STREAM_DATA:
+ rv = conn_recv_max_stream_data(conn, &fr->max_stream_data);
+ if (rv != 0) {
+ return rv;
+ }
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_MAX_DATA:
+ conn_recv_max_data(conn, &fr->max_data);
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_MAX_STREAMS_BIDI:
+ case NGTCP2_FRAME_MAX_STREAMS_UNI:
+ rv = conn_recv_max_streams(conn, &fr->max_streams);
+ if (rv != 0) {
+ return rv;
+ }
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_CONNECTION_CLOSE:
+ case NGTCP2_FRAME_CONNECTION_CLOSE_APP:
+ rv = conn_recv_connection_close(conn, &fr->connection_close);
+ if (rv != 0) {
+ return rv;
+ }
+ break;
+ case NGTCP2_FRAME_PING:
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_PATH_CHALLENGE:
+ conn_recv_path_challenge(conn, path, &fr->path_challenge);
+ path_challenge_recved = 1;
+ break;
+ case NGTCP2_FRAME_PATH_RESPONSE:
+ rv = conn_recv_path_response(conn, &fr->path_response, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ break;
+ case NGTCP2_FRAME_NEW_CONNECTION_ID:
+ rv = conn_recv_new_connection_id(conn, &fr->new_connection_id);
+ if (rv != 0) {
+ return rv;
+ }
+ recv_ncid = 1;
+ break;
+ case NGTCP2_FRAME_RETIRE_CONNECTION_ID:
+ rv = conn_recv_retire_connection_id(conn, &hd, &fr->retire_connection_id,
+ ts);
+ if (rv != 0) {
+ return rv;
+ }
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_NEW_TOKEN:
+ rv = conn_recv_new_token(conn, &fr->new_token);
+ if (rv != 0) {
+ return rv;
+ }
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_HANDSHAKE_DONE:
+ rv = conn_recv_handshake_done(conn, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_STREAMS_BLOCKED_BIDI:
+ rv = conn_recv_streams_blocked_bidi(conn, &fr->streams_blocked);
+ if (rv != 0) {
+ return rv;
+ }
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_STREAMS_BLOCKED_UNI:
+ rv = conn_recv_streams_blocked_uni(conn, &fr->streams_blocked);
+ if (rv != 0) {
+ return rv;
+ }
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_DATA_BLOCKED:
+ /* TODO Not implemented yet */
+ non_probing_pkt = 1;
+ break;
+ case NGTCP2_FRAME_DATAGRAM:
+ case NGTCP2_FRAME_DATAGRAM_LEN:
+ if ((uint64_t)nread >
+ conn->local.transport_params.max_datagram_frame_size) {
+ return NGTCP2_ERR_PROTO;
+ }
+ rv = conn_recv_datagram(conn, &fr->datagram);
+ if (rv != 0) {
+ return rv;
+ }
+ non_probing_pkt = 1;
+ break;
+ }
+
+ ngtcp2_qlog_write_frame(&conn->qlog, fr);
+ }
+
+ ngtcp2_qlog_pkt_received_end(&conn->qlog, &hd, pktlen);
+
+ if (recv_ncid) {
+ rv = conn_post_process_recv_new_connection_id(conn, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ if (conn->server && hd.type == NGTCP2_PKT_1RTT &&
+ !ngtcp2_path_eq(&conn->dcid.current.ps.path, path)) {
+ if (non_probing_pkt && pktns->rx.max_pkt_num < hd.pkt_num &&
+ !conn_path_validation_in_progress(conn, path)) {
+ rv = conn_recv_non_probing_pkt_on_new_path(conn, path, dgramlen,
+ new_cid_used, ts);
+ if (rv != 0) {
+ if (ngtcp2_err_is_fatal(rv)) {
+ return rv;
+ }
+
+ /* DCID is not available. Just continue. */
+ assert(NGTCP2_ERR_CONN_ID_BLOCKED == rv);
+ }
+ } else {
+ rv = conn_recv_pkt_from_new_path(conn, path, dgramlen,
+ path_challenge_recved, ts);
+ if (rv != 0) {
+ if (ngtcp2_err_is_fatal(rv)) {
+ return rv;
+ }
+
+ /* DCID is not available. Just continue. */
+ assert(NGTCP2_ERR_CONN_ID_BLOCKED == rv);
+ }
+ }
+ }
+
+ if (hd.type == NGTCP2_PKT_1RTT) {
+ if (ckm == conn->crypto.key_update.new_rx_ckm) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "rotate keys");
+ conn_rotate_keys(conn, hd.pkt_num, /* initiator = */ 0);
+ } else if (ckm->pkt_num > hd.pkt_num) {
+ ckm->pkt_num = hd.pkt_num;
+ }
+
+ if (conn->server && conn->early.ckm &&
+ conn->early.discard_started_ts == UINT64_MAX) {
+ conn->early.discard_started_ts = ts;
+ }
+
+ if (ngtcp2_path_eq(&conn->dcid.current.ps.path, path)) {
+ conn_update_keep_alive_last_ts(conn, ts);
+ }
+ }
+
+ rv = pktns_commit_recv_pkt_num(pktns, hd.pkt_num, require_ack, pkt_ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ pktns_increase_ecn_counts(pktns, pi);
+
+ if (require_ack &&
+ (++pktns->acktr.rx_npkt >= conn->local.settings.ack_thresh ||
+ (pi->ecn & NGTCP2_ECN_MASK) == NGTCP2_ECN_CE)) {
+ ngtcp2_acktr_immediate_ack(&pktns->acktr);
+ }
+
+ rv = ngtcp2_conn_sched_ack(conn, &pktns->acktr, hd.pkt_num, require_ack,
+ pkt_ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ conn_restart_timer_on_read(conn, ts);
+
+ ngtcp2_qlog_metrics_updated(&conn->qlog, &conn->cstat);
+
+ return conn->state == NGTCP2_CS_DRAINING ? NGTCP2_ERR_DRAINING
+ : (ngtcp2_ssize)pktlen;
+}
+
+/*
+ * conn_process_buffered_protected_pkt processes buffered 0RTT or 1RTT
+ * packets.
+ *
+ * This function returns 0 if it succeeds, or the same negative error
+ * codes from conn_recv_pkt.
+ */
+static int conn_process_buffered_protected_pkt(ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns,
+ ngtcp2_tstamp ts) {
+ ngtcp2_ssize nread;
+ ngtcp2_pkt_chain **ppc, *next;
+ int rv;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "processing buffered protected packet");
+
+ for (ppc = &pktns->rx.buffed_pkts; *ppc;) {
+ next = (*ppc)->next;
+ nread = conn_recv_pkt(conn, &(*ppc)->path.path, &(*ppc)->pi, (*ppc)->pkt,
+ (*ppc)->pktlen, (*ppc)->dgramlen, (*ppc)->ts, ts);
+ if (nread < 0 && !ngtcp2_err_is_fatal((int)nread) &&
+ nread != NGTCP2_ERR_DRAINING) {
+ /* TODO We don't know this is the first QUIC packet in a
+ datagram. */
+ rv = conn_on_stateless_reset(conn, &(*ppc)->path.path, (*ppc)->pkt,
+ (*ppc)->pktlen);
+ if (rv == 0) {
+ ngtcp2_pkt_chain_del(*ppc, conn->mem);
+ *ppc = next;
+ return NGTCP2_ERR_DRAINING;
+ }
+ }
+
+ ngtcp2_pkt_chain_del(*ppc, conn->mem);
+ *ppc = next;
+ if (nread < 0) {
+ if (nread == NGTCP2_ERR_DISCARD_PKT) {
+ continue;
+ }
+ return (int)nread;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * conn_process_buffered_handshake_pkt processes buffered Handshake
+ * packets.
+ *
+ * This function returns 0 if it succeeds, or the same negative error
+ * codes from conn_recv_handshake_pkt.
+ */
+static int conn_process_buffered_handshake_pkt(ngtcp2_conn *conn,
+ ngtcp2_tstamp ts) {
+ ngtcp2_pktns *pktns = conn->hs_pktns;
+ ngtcp2_ssize nread;
+ ngtcp2_pkt_chain **ppc, *next;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "processing buffered handshake packet");
+
+ for (ppc = &pktns->rx.buffed_pkts; *ppc;) {
+ next = (*ppc)->next;
+ nread = conn_recv_handshake_pkt(conn, &(*ppc)->path.path, &(*ppc)->pi,
+ (*ppc)->pkt, (*ppc)->pktlen,
+ (*ppc)->dgramlen, (*ppc)->ts, ts);
+ ngtcp2_pkt_chain_del(*ppc, conn->mem);
+ *ppc = next;
+ if (nread < 0) {
+ if (nread == NGTCP2_ERR_DISCARD_PKT) {
+ continue;
+ }
+ return (int)nread;
+ }
+ }
+
+ return 0;
+}
+
+static void conn_sync_stream_id_limit(ngtcp2_conn *conn) {
+ ngtcp2_transport_params *params = conn->remote.transport_params;
+
+ assert(params);
+
+ conn->local.bidi.max_streams = params->initial_max_streams_bidi;
+ conn->local.uni.max_streams = params->initial_max_streams_uni;
+}
+
+static int strm_set_max_offset(void *data, void *ptr) {
+ ngtcp2_conn *conn = ptr;
+ ngtcp2_transport_params *params = conn->remote.transport_params;
+ ngtcp2_strm *strm = data;
+ uint64_t max_offset;
+ int rv;
+
+ assert(params);
+
+ if (!conn_local_stream(conn, strm->stream_id)) {
+ return 0;
+ }
+
+ if (bidi_stream(strm->stream_id)) {
+ max_offset = params->initial_max_stream_data_bidi_remote;
+ } else {
+ max_offset = params->initial_max_stream_data_uni;
+ }
+
+ if (strm->tx.max_offset < max_offset) {
+ strm->tx.max_offset = max_offset;
+
+ /* Don't call callback if stream is half-closed local */
+ if (strm->flags & NGTCP2_STRM_FLAG_SHUT_WR) {
+ return 0;
+ }
+
+ rv = conn_call_extend_max_stream_data(conn, strm, strm->stream_id,
+ strm->tx.max_offset);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ return 0;
+}
+
+static int conn_sync_stream_data_limit(ngtcp2_conn *conn) {
+ return ngtcp2_map_each(&conn->strms, strm_set_max_offset, conn);
+}
+
+/*
+ * conn_handshake_completed is called once cryptographic handshake has
+ * completed.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User callback failed.
+ */
+static int conn_handshake_completed(ngtcp2_conn *conn) {
+ int rv;
+
+ conn->flags |= NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED;
+
+ rv = conn_call_handshake_completed(conn);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (conn->local.bidi.max_streams > 0) {
+ rv = conn_call_extend_max_local_streams_bidi(conn,
+ conn->local.bidi.max_streams);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+ if (conn->local.uni.max_streams > 0) {
+ rv = conn_call_extend_max_local_streams_uni(conn,
+ conn->local.uni.max_streams);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * conn_recv_cpkt processes compound packet after handshake. The
+ * buffer pointed by |pkt| might contain multiple packets. The 1RTT
+ * packet must be the last one because it does not have payload length
+ * field.
+ *
+ * This function returns 0 if it succeeds, or the same negative error
+ * codes from conn_recv_pkt except for NGTCP2_ERR_DISCARD_PKT.
+ */
+static int conn_recv_cpkt(ngtcp2_conn *conn, const ngtcp2_path *path,
+ const ngtcp2_pkt_info *pi, const uint8_t *pkt,
+ size_t pktlen, ngtcp2_tstamp ts) {
+ ngtcp2_ssize nread;
+ int rv;
+ const uint8_t *origpkt = pkt;
+ size_t dgramlen = pktlen;
+
+ if (ngtcp2_path_eq(&conn->dcid.current.ps.path, path)) {
+ conn->dcid.current.bytes_recv += dgramlen;
+ }
+
+ while (pktlen) {
+ nread = conn_recv_pkt(conn, path, pi, pkt, pktlen, dgramlen, ts, ts);
+ if (nread < 0) {
+ if (ngtcp2_err_is_fatal((int)nread)) {
+ return (int)nread;
+ }
+
+ if (nread == NGTCP2_ERR_DRAINING) {
+ return NGTCP2_ERR_DRAINING;
+ }
+
+ if (origpkt == pkt) {
+ rv = conn_on_stateless_reset(conn, path, origpkt, dgramlen);
+ if (rv == 0) {
+ return NGTCP2_ERR_DRAINING;
+ }
+ }
+ if (nread == NGTCP2_ERR_DISCARD_PKT) {
+ return 0;
+ }
+ return (int)nread;
+ }
+
+ assert(pktlen >= (size_t)nread);
+ pkt += nread;
+ pktlen -= (size_t)nread;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_PKT,
+ "read packet %td left %zu", nread, pktlen);
+ }
+
+ return 0;
+}
+
+/*
+ * conn_is_retired_path returns nonzero if |path| is included in
+ * retired path list.
+ */
+static int conn_is_retired_path(ngtcp2_conn *conn, const ngtcp2_path *path) {
+ size_t i, len = ngtcp2_ringbuf_len(&conn->dcid.retired.rb);
+ ngtcp2_dcid *dcid;
+
+ for (i = 0; i < len; ++i) {
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.retired.rb, i);
+ if (ngtcp2_path_eq(&dcid->ps.path, path)) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * conn_enqueue_handshake_done enqueues HANDSHAKE_DONE frame for
+ * transmission.
+ */
+static int conn_enqueue_handshake_done(ngtcp2_conn *conn) {
+ ngtcp2_pktns *pktns = &conn->pktns;
+ ngtcp2_frame_chain *nfrc;
+ int rv;
+
+ assert(conn->server);
+
+ rv = ngtcp2_frame_chain_objalloc_new(&nfrc, &conn->frc_objalloc);
+ if (rv != 0) {
+ return rv;
+ }
+
+ nfrc->fr.type = NGTCP2_FRAME_HANDSHAKE_DONE;
+ nfrc->next = pktns->tx.frq;
+ pktns->tx.frq = nfrc;
+
+ return 0;
+}
+
+/**
+ * @function
+ *
+ * `conn_read_handshake` performs QUIC cryptographic handshake by
+ * reading given data. |pkt| points to the buffer to read and
+ * |pktlen| is the length of the buffer. |path| is the network path.
+ *
+ * This function returns the number of bytes processed. Unless the
+ * last packet is 1RTT packet and an application decryption key has
+ * been installed, it returns |pktlen| if it succeeds. If it finds
+ * 1RTT packet and an application decryption key has been installed,
+ * it returns the number of bytes just before 1RTT packet begins.
+ *
+ * This function returns the number of bytes processed if it succeeds,
+ * or one of the following negative error codes: (TBD).
+ */
+static ngtcp2_ssize conn_read_handshake(ngtcp2_conn *conn,
+ const ngtcp2_path *path,
+ const ngtcp2_pkt_info *pi,
+ const uint8_t *pkt, size_t pktlen,
+ ngtcp2_tstamp ts) {
+ int rv;
+ ngtcp2_ssize nread;
+
+ switch (conn->state) {
+ case NGTCP2_CS_CLIENT_INITIAL:
+ /* TODO Better to log something when we ignore input */
+ return (ngtcp2_ssize)pktlen;
+ case NGTCP2_CS_CLIENT_WAIT_HANDSHAKE:
+ nread = conn_recv_handshake_cpkt(conn, path, pi, pkt, pktlen, ts);
+ if (nread < 0) {
+ return nread;
+ }
+
+ if (conn->state == NGTCP2_CS_CLIENT_INITIAL) {
+ /* Retry packet was received */
+ return (ngtcp2_ssize)pktlen;
+ }
+
+ assert(conn->hs_pktns);
+
+ if (conn->hs_pktns->crypto.rx.ckm && conn->in_pktns) {
+ rv = conn_process_buffered_handshake_pkt(conn, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ if (conn_is_handshake_completed(conn) &&
+ !(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED)) {
+ rv = conn_handshake_completed(conn);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = conn_process_buffered_protected_pkt(conn, &conn->pktns, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ return nread;
+ case NGTCP2_CS_SERVER_INITIAL:
+ nread = conn_recv_handshake_cpkt(conn, path, pi, pkt, pktlen, ts);
+ if (nread < 0) {
+ return nread;
+ }
+
+ /*
+ * Client ServerHello might not fit into single Initial packet
+ * (e.g., resuming session with client authentication). If we get
+ * Client Initial which does not increase offset or it is 0RTT
+ * packet buffered, perform address validation in order to buffer
+ * validated data only.
+ */
+ if (ngtcp2_strm_rx_offset(&conn->in_pktns->crypto.strm) == 0) {
+ if (conn->in_pktns->crypto.strm.rx.rob &&
+ ngtcp2_rob_data_buffered(conn->in_pktns->crypto.strm.rx.rob)) {
+ /* Address has been validated with token */
+ if (conn->local.settings.tokenlen) {
+ return nread;
+ }
+ return NGTCP2_ERR_RETRY;
+ }
+ if (conn->in_pktns->rx.buffed_pkts) {
+ /* 0RTT is buffered, force retry */
+ return NGTCP2_ERR_RETRY;
+ }
+ /* If neither CRYPTO frame nor 0RTT packet is processed, just
+ drop connection. */
+ return NGTCP2_ERR_DROP_CONN;
+ }
+
+ /* Process re-ordered 0-RTT packets which arrived before Initial
+ packet. */
+ if (conn->early.ckm) {
+ assert(conn->in_pktns);
+
+ rv = conn_process_buffered_protected_pkt(conn, conn->in_pktns, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ return nread;
+ case NGTCP2_CS_SERVER_WAIT_HANDSHAKE:
+ nread = conn_recv_handshake_cpkt(conn, path, pi, pkt, pktlen, ts);
+ if (nread < 0) {
+ return nread;
+ }
+
+ if (conn->hs_pktns->crypto.rx.ckm) {
+ rv = conn_process_buffered_handshake_pkt(conn, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ if (conn->hs_pktns->rx.max_pkt_num != -1) {
+ conn_discard_initial_state(conn, ts);
+ }
+
+ if (!conn_is_handshake_completed(conn)) {
+ /* If server hits amplification limit, it cancels loss detection
+ timer. If server receives a packet from client, the limit is
+ increased and server can send more. If server has
+ ack-eliciting Initial or Handshake packets, it should resend
+ it if timer fired but timer is not armed in this case. So
+ instead of resending Initial/Handshake packets, if server has
+ 1RTT data to send, it might send them and then might hit
+ amplification limit again until it hits stream data limit.
+ Initial/Handshake data is not resent. In order to avoid this
+ situation, try to arm loss detection and check the expiry
+ here so that on next write call, we can resend
+ Initial/Handshake first. */
+ if (conn->cstat.loss_detection_timer == UINT64_MAX) {
+ ngtcp2_conn_set_loss_detection_timer(conn, ts);
+ if (ngtcp2_conn_loss_detection_expiry(conn) <= ts) {
+ rv = ngtcp2_conn_on_loss_detection_timer(conn, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+ }
+
+ if ((size_t)nread < pktlen) {
+ /* We have 1RTT packet and application rx key, but the
+ handshake has not completed yet. */
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "buffering 1RTT packet len=%zu",
+ pktlen - (size_t)nread);
+
+ rv = conn_buffer_pkt(conn, &conn->pktns, path, pi, pkt + nread,
+ pktlen - (size_t)nread, pktlen, ts);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ return rv;
+ }
+
+ return (ngtcp2_ssize)pktlen;
+ }
+
+ return nread;
+ }
+
+ if (!(conn->flags & NGTCP2_CONN_FLAG_TRANSPORT_PARAM_RECVED)) {
+ return NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM;
+ }
+
+ rv = conn_handshake_completed(conn);
+ if (rv != 0) {
+ return rv;
+ }
+ conn->state = NGTCP2_CS_POST_HANDSHAKE;
+
+ rv = conn_call_activate_dcid(conn, &conn->dcid.current);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = conn_process_buffered_protected_pkt(conn, &conn->pktns, ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ conn_discard_handshake_state(conn, ts);
+
+ rv = conn_enqueue_handshake_done(conn);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (!conn->local.settings.no_pmtud) {
+ rv = conn_start_pmtud(conn);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ conn->pktns.rtb.persistent_congestion_start_ts = ts;
+
+ /* Re-arm loss detection timer here after handshake has been
+ confirmed. */
+ ngtcp2_conn_set_loss_detection_timer(conn, ts);
+
+ return nread;
+ case NGTCP2_CS_CLOSING:
+ return NGTCP2_ERR_CLOSING;
+ case NGTCP2_CS_DRAINING:
+ return NGTCP2_ERR_DRAINING;
+ default:
+ return (ngtcp2_ssize)pktlen;
+ }
+}
+
+int ngtcp2_conn_read_pkt_versioned(ngtcp2_conn *conn, const ngtcp2_path *path,
+ int pkt_info_version,
+ const ngtcp2_pkt_info *pi,
+ const uint8_t *pkt, size_t pktlen,
+ ngtcp2_tstamp ts) {
+ int rv = 0;
+ ngtcp2_ssize nread = 0;
+ const ngtcp2_pkt_info zero_pi = {0};
+ (void)pkt_info_version;
+
+ assert(!(conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING));
+
+ conn->log.last_ts = ts;
+ conn->qlog.last_ts = ts;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "recv packet len=%zu",
+ pktlen);
+
+ if (pktlen == 0) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ /* client does not expect a packet from unknown path. */
+ if (!conn->server && !ngtcp2_path_eq(&conn->dcid.current.ps.path, path) &&
+ (!conn->pv || !ngtcp2_path_eq(&conn->pv->dcid.ps.path, path)) &&
+ !conn_is_retired_path(conn, path)) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "ignore packet from unknown path");
+ return 0;
+ }
+
+ if (!pi) {
+ pi = &zero_pi;
+ }
+
+ switch (conn->state) {
+ case NGTCP2_CS_CLIENT_INITIAL:
+ case NGTCP2_CS_CLIENT_WAIT_HANDSHAKE:
+ case NGTCP2_CS_CLIENT_TLS_HANDSHAKE_FAILED:
+ nread = conn_read_handshake(conn, path, pi, pkt, pktlen, ts);
+ if (nread < 0) {
+ return (int)nread;
+ }
+
+ if ((size_t)nread == pktlen) {
+ return 0;
+ }
+
+ assert(conn->pktns.crypto.rx.ckm);
+
+ pkt += nread;
+ pktlen -= (size_t)nread;
+
+ break;
+ case NGTCP2_CS_SERVER_INITIAL:
+ case NGTCP2_CS_SERVER_WAIT_HANDSHAKE:
+ case NGTCP2_CS_SERVER_TLS_HANDSHAKE_FAILED:
+ if (!ngtcp2_path_eq(&conn->dcid.current.ps.path, path)) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "ignore packet from unknown path during handshake");
+
+ if (conn->state == NGTCP2_CS_SERVER_INITIAL &&
+ ngtcp2_strm_rx_offset(&conn->in_pktns->crypto.strm) == 0 &&
+ (!conn->in_pktns->crypto.strm.rx.rob ||
+ !ngtcp2_rob_data_buffered(conn->in_pktns->crypto.strm.rx.rob))) {
+ return NGTCP2_ERR_DROP_CONN;
+ }
+
+ return 0;
+ }
+
+ nread = conn_read_handshake(conn, path, pi, pkt, pktlen, ts);
+ if (nread < 0) {
+ return (int)nread;
+ }
+
+ if ((size_t)nread == pktlen) {
+ return 0;
+ }
+
+ assert(conn->pktns.crypto.rx.ckm);
+
+ pkt += nread;
+ pktlen -= (size_t)nread;
+
+ break;
+ case NGTCP2_CS_CLOSING:
+ return NGTCP2_ERR_CLOSING;
+ case NGTCP2_CS_DRAINING:
+ return NGTCP2_ERR_DRAINING;
+ case NGTCP2_CS_POST_HANDSHAKE:
+ rv = conn_prepare_key_update(conn, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ break;
+ default:
+ ngtcp2_unreachable();
+ }
+
+ return conn_recv_cpkt(conn, path, pi, pkt, pktlen, ts);
+}
+
+/*
+ * conn_check_pkt_num_exhausted returns nonzero if packet number is
+ * exhausted in at least one of packet number space.
+ */
+static int conn_check_pkt_num_exhausted(ngtcp2_conn *conn) {
+ ngtcp2_pktns *in_pktns = conn->in_pktns;
+ ngtcp2_pktns *hs_pktns = conn->hs_pktns;
+
+ return (in_pktns && in_pktns->tx.last_pkt_num == NGTCP2_MAX_PKT_NUM) ||
+ (hs_pktns && hs_pktns->tx.last_pkt_num == NGTCP2_MAX_PKT_NUM) ||
+ conn->pktns.tx.last_pkt_num == NGTCP2_MAX_PKT_NUM;
+}
+
+/*
+ * conn_retransmit_retry_early retransmits 0RTT packet after Retry is
+ * received from server.
+ */
+static ngtcp2_ssize conn_retransmit_retry_early(ngtcp2_conn *conn,
+ ngtcp2_pkt_info *pi,
+ uint8_t *dest, size_t destlen,
+ uint8_t flags,
+ ngtcp2_tstamp ts) {
+ return conn_write_pkt(conn, pi, dest, destlen, NULL, NGTCP2_PKT_0RTT, flags,
+ ts);
+}
+
+/*
+ * conn_handshake_probe_left returns nonzero if there are probe
+ * packets to be sent for Initial or Handshake packet number space
+ * left.
+ */
+static int conn_handshake_probe_left(ngtcp2_conn *conn) {
+ return (conn->in_pktns && conn->in_pktns->rtb.probe_pkt_left) ||
+ conn->hs_pktns->rtb.probe_pkt_left;
+}
+
+/*
+ * conn_validate_early_transport_params_limits validates that the
+ * limits in transport parameters remembered by client for early data
+ * are not reduced. This function is only used by client and should
+ * only be called when early data is accepted by server.
+ */
+static int conn_validate_early_transport_params_limits(ngtcp2_conn *conn) {
+ const ngtcp2_transport_params *params = conn->remote.transport_params;
+
+ assert(!conn->server);
+ assert(params);
+
+ if (conn->early.transport_params.active_connection_id_limit >
+ params->active_connection_id_limit ||
+ conn->early.transport_params.initial_max_data >
+ params->initial_max_data ||
+ conn->early.transport_params.initial_max_stream_data_bidi_local >
+ params->initial_max_stream_data_bidi_local ||
+ conn->early.transport_params.initial_max_stream_data_bidi_remote >
+ params->initial_max_stream_data_bidi_remote ||
+ conn->early.transport_params.initial_max_stream_data_uni >
+ params->initial_max_stream_data_uni ||
+ conn->early.transport_params.initial_max_streams_bidi >
+ params->initial_max_streams_bidi ||
+ conn->early.transport_params.initial_max_streams_uni >
+ params->initial_max_streams_uni ||
+ conn->early.transport_params.max_datagram_frame_size >
+ params->max_datagram_frame_size) {
+ return NGTCP2_ERR_PROTO;
+ }
+
+ return 0;
+}
+
+/*
+ * conn_write_handshake writes QUIC handshake packets to the buffer
+ * pointed by |dest| of length |destlen|. |write_datalen| specifies
+ * the expected length of 0RTT or 1RTT packet payload. Specify 0 to
+ * |write_datalen| if there is no such data.
+ *
+ * This function returns the number of bytes written to the buffer, or
+ * one of the following negative error codes:
+ *
+ * NGTCP2_ERR_PKT_NUM_EXHAUSTED
+ * Packet number is exhausted.
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ * NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM
+ * Required transport parameter is missing.
+ * NGTCP2_CS_CLOSING
+ * Connection is in closing state.
+ * NGTCP2_CS_DRAINING
+ * Connection is in draining state.
+ *
+ * In addition to the above negative error codes, the same error codes
+ * from conn_recv_pkt may also be returned.
+ */
+static ngtcp2_ssize conn_write_handshake(ngtcp2_conn *conn, ngtcp2_pkt_info *pi,
+ uint8_t *dest, size_t destlen,
+ uint64_t write_datalen,
+ ngtcp2_tstamp ts) {
+ int rv;
+ ngtcp2_ssize res = 0, nwrite = 0, early_spktlen = 0;
+ size_t origlen = destlen;
+ uint64_t pending_early_datalen;
+ ngtcp2_dcid *dcid;
+ ngtcp2_preferred_addr *paddr;
+
+ switch (conn->state) {
+ case NGTCP2_CS_CLIENT_INITIAL:
+ pending_early_datalen = conn_retry_early_payloadlen(conn);
+ if (pending_early_datalen) {
+ write_datalen = pending_early_datalen;
+ }
+
+ if (!(conn->flags & NGTCP2_CONN_FLAG_RECV_RETRY)) {
+ nwrite =
+ conn_write_client_initial(conn, pi, dest, destlen, write_datalen, ts);
+ if (nwrite <= 0) {
+ return nwrite;
+ }
+ } else {
+ nwrite = conn_write_handshake_pkt(
+ conn, pi, dest, destlen, NGTCP2_PKT_INITIAL,
+ NGTCP2_WRITE_PKT_FLAG_NONE, write_datalen, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+ }
+
+ if (pending_early_datalen) {
+ early_spktlen = conn_retransmit_retry_early(
+ conn, pi, dest + nwrite, destlen - (size_t)nwrite,
+ nwrite ? NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING
+ : NGTCP2_WRITE_PKT_FLAG_NONE,
+ ts);
+
+ if (early_spktlen < 0) {
+ assert(ngtcp2_err_is_fatal((int)early_spktlen));
+ return early_spktlen;
+ }
+ }
+
+ conn->state = NGTCP2_CS_CLIENT_WAIT_HANDSHAKE;
+
+ res = nwrite + early_spktlen;
+
+ return res;
+ case NGTCP2_CS_CLIENT_WAIT_HANDSHAKE:
+ if (!conn_handshake_probe_left(conn) && conn_cwnd_is_zero(conn)) {
+ destlen = 0;
+ } else {
+ if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED)) {
+ pending_early_datalen = conn_retry_early_payloadlen(conn);
+ if (pending_early_datalen) {
+ write_datalen = pending_early_datalen;
+ }
+ }
+
+ nwrite =
+ conn_write_handshake_pkts(conn, pi, dest, destlen, write_datalen, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ res += nwrite;
+ dest += nwrite;
+ destlen -= (size_t)nwrite;
+ }
+
+ if (!conn_is_handshake_completed(conn)) {
+ if (!(conn->flags & NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED)) {
+ nwrite = conn_retransmit_retry_early(conn, pi, dest, destlen,
+ NGTCP2_WRITE_PKT_FLAG_NONE, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ res += nwrite;
+ }
+
+ if (res == 0) {
+ nwrite = conn_write_handshake_ack_pkts(conn, pi, dest, origlen, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+ res = nwrite;
+ }
+
+ return res;
+ }
+
+ if (!(conn->flags & NGTCP2_CONN_FLAG_TRANSPORT_PARAM_RECVED)) {
+ return NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM;
+ }
+
+ if ((conn->flags & NGTCP2_CONN_FLAG_EARLY_KEY_INSTALLED) &&
+ !(conn->flags & NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED)) {
+ rv = conn_validate_early_transport_params_limits(conn);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ /* Server might increase stream data limits. Extend it if we have
+ streams created for early data. */
+ rv = conn_sync_stream_data_limit(conn);
+ if (rv != 0) {
+ return rv;
+ }
+
+ conn->state = NGTCP2_CS_POST_HANDSHAKE;
+
+ assert(conn->remote.transport_params);
+
+ if (conn->remote.transport_params->preferred_address_present) {
+ assert(!ngtcp2_ringbuf_full(&conn->dcid.unused.rb));
+
+ paddr = &conn->remote.transport_params->preferred_address;
+ dcid = ngtcp2_ringbuf_push_back(&conn->dcid.unused.rb);
+ ngtcp2_dcid_init(dcid, 1, &paddr->cid, paddr->stateless_reset_token);
+
+ rv = ngtcp2_gaptr_push(&conn->dcid.seqgap, 1, 1);
+ if (rv != 0) {
+ return (ngtcp2_ssize)rv;
+ }
+ }
+
+ if (conn->remote.transport_params->stateless_reset_token_present) {
+ assert(conn->dcid.current.seq == 0);
+ assert(!(conn->dcid.current.flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT));
+ ngtcp2_dcid_set_token(
+ &conn->dcid.current,
+ conn->remote.transport_params->stateless_reset_token);
+ }
+
+ rv = conn_call_activate_dcid(conn, &conn->dcid.current);
+ if (rv != 0) {
+ return rv;
+ }
+
+ conn_process_early_rtb(conn);
+
+ if (!conn->local.settings.no_pmtud) {
+ rv = conn_start_pmtud(conn);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ return res;
+ case NGTCP2_CS_SERVER_INITIAL:
+ nwrite =
+ conn_write_handshake_pkts(conn, pi, dest, destlen, write_datalen, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ if (nwrite) {
+ conn->state = NGTCP2_CS_SERVER_WAIT_HANDSHAKE;
+ }
+
+ return nwrite;
+ case NGTCP2_CS_SERVER_WAIT_HANDSHAKE:
+ if (conn_handshake_probe_left(conn) || !conn_cwnd_is_zero(conn)) {
+ nwrite =
+ conn_write_handshake_pkts(conn, pi, dest, destlen, write_datalen, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ res += nwrite;
+ dest += nwrite;
+ destlen -= (size_t)nwrite;
+ }
+
+ if (res == 0) {
+ nwrite = conn_write_handshake_ack_pkts(conn, pi, dest, origlen, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ res += nwrite;
+ dest += nwrite;
+ origlen -= (size_t)nwrite;
+ }
+
+ return res;
+ case NGTCP2_CS_CLOSING:
+ return NGTCP2_ERR_CLOSING;
+ case NGTCP2_CS_DRAINING:
+ return NGTCP2_ERR_DRAINING;
+ default:
+ return 0;
+ }
+}
+
+/**
+ * @function
+ *
+ * `conn_client_write_handshake` writes client side handshake data and
+ * 0RTT packet.
+ *
+ * In order to send STREAM data in 0RTT packet, specify
+ * |vmsg|->stream. |vmsg|->stream.strm, |vmsg|->stream.fin,
+ * |vmsg|->stream.data, and |vmsg|->stream.datacnt are stream to which
+ * 0-RTT data is sent, whether it is a last data chunk in this stream,
+ * a vector of 0-RTT data, and its number of elements respectively.
+ * The amount of 0RTT data sent is assigned to
+ * *|vmsg|->stream.pdatalen. If no data is sent, -1 is assigned.
+ * Note that 0 length STREAM frame is allowed in QUIC, so 0 might be
+ * assigned to *|vmsg|->stream.pdatalen.
+ *
+ * This function returns 0 if it cannot write any frame because buffer
+ * is too small, or packet is congestion limited. Application should
+ * keep reading and wait for congestion window to grow.
+ *
+ * This function returns the number of bytes written to the buffer
+ * pointed by |dest| if it succeeds, or one of the following negative
+ * error codes: (TBD).
+ */
+static ngtcp2_ssize conn_client_write_handshake(ngtcp2_conn *conn,
+ ngtcp2_pkt_info *pi,
+ uint8_t *dest, size_t destlen,
+ ngtcp2_vmsg *vmsg,
+ ngtcp2_tstamp ts) {
+ int send_stream = 0;
+ int send_datagram = 0;
+ ngtcp2_ssize spktlen, early_spktlen;
+ uint64_t datalen;
+ uint64_t write_datalen = 0;
+ uint8_t wflags = NGTCP2_WRITE_PKT_FLAG_NONE;
+ int ppe_pending = (conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING) != 0;
+ uint32_t version;
+
+ assert(!conn->server);
+
+ /* conn->early.ckm might be created in the first call of
+ conn_handshake(). Check it later. */
+ if (vmsg) {
+ switch (vmsg->type) {
+ case NGTCP2_VMSG_TYPE_STREAM:
+ datalen = ngtcp2_vec_len(vmsg->stream.data, vmsg->stream.datacnt);
+ send_stream =
+ conn_retry_early_payloadlen(conn) == 0 &&
+ /* 0 length STREAM frame is allowed */
+ (datalen == 0 ||
+ (datalen > 0 &&
+ (vmsg->stream.strm->tx.max_offset - vmsg->stream.strm->tx.offset) &&
+ (conn->tx.max_offset - conn->tx.offset)));
+ if (send_stream) {
+ write_datalen =
+ conn_enforce_flow_control(conn, vmsg->stream.strm, datalen);
+ write_datalen =
+ ngtcp2_min(write_datalen, NGTCP2_MIN_COALESCED_PAYLOADLEN);
+ write_datalen += NGTCP2_STREAM_OVERHEAD;
+
+ if (vmsg->stream.flags & NGTCP2_WRITE_STREAM_FLAG_MORE) {
+ wflags |= NGTCP2_WRITE_PKT_FLAG_MORE;
+ }
+ } else {
+ vmsg = NULL;
+ }
+ break;
+ case NGTCP2_VMSG_TYPE_DATAGRAM:
+ datalen = ngtcp2_vec_len(vmsg->datagram.data, vmsg->datagram.datacnt);
+ send_datagram = conn_retry_early_payloadlen(conn) == 0;
+ if (send_datagram) {
+ write_datalen = datalen + NGTCP2_DATAGRAM_OVERHEAD;
+
+ if (vmsg->datagram.flags & NGTCP2_WRITE_DATAGRAM_FLAG_MORE) {
+ wflags |= NGTCP2_WRITE_PKT_FLAG_MORE;
+ }
+ } else {
+ vmsg = NULL;
+ }
+ break;
+ }
+ }
+
+ if (!ppe_pending) {
+ spktlen = conn_write_handshake(conn, pi, dest, destlen, write_datalen, ts);
+
+ if (spktlen < 0) {
+ return spktlen;
+ }
+
+ if ((conn->flags & NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED) ||
+ !conn->early.ckm || (!send_stream && !send_datagram)) {
+ return spktlen;
+ }
+
+ /* If spktlen > 0, we are making a compound packet. If Initial
+ packet is written, we have to pad bytes to 0-RTT packet. */
+ version = conn->negotiated_version ? conn->negotiated_version
+ : conn->client_chosen_version;
+ if (spktlen > 0 &&
+ ngtcp2_pkt_get_type_long(version, dest[0]) == NGTCP2_PKT_INITIAL) {
+ wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING;
+ conn->pkt.require_padding = 1;
+ } else {
+ conn->pkt.require_padding = 0;
+ }
+ } else {
+ assert(!conn->pktns.crypto.rx.ckm);
+ assert(!conn->pktns.crypto.tx.ckm);
+ assert(conn->early.ckm);
+
+ if (conn->pkt.require_padding) {
+ wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING;
+ }
+ spktlen = conn->pkt.hs_spktlen;
+ }
+
+ dest += spktlen;
+ destlen -= (size_t)spktlen;
+
+ if (conn_cwnd_is_zero(conn)) {
+ return spktlen;
+ }
+
+ early_spktlen = conn_write_pkt(conn, pi, dest, destlen, vmsg, NGTCP2_PKT_0RTT,
+ wflags, ts);
+
+ if (early_spktlen < 0) {
+ switch (early_spktlen) {
+ case NGTCP2_ERR_STREAM_DATA_BLOCKED:
+ return spktlen;
+ case NGTCP2_ERR_WRITE_MORE:
+ conn->pkt.hs_spktlen = spktlen;
+ break;
+ }
+ return early_spktlen;
+ }
+
+ return spktlen + early_spktlen;
+}
+
+void ngtcp2_conn_handshake_completed(ngtcp2_conn *conn) {
+ conn->flags |= NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED;
+ if (conn->server) {
+ conn->flags |= NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED;
+ }
+}
+
+int ngtcp2_conn_get_handshake_completed(ngtcp2_conn *conn) {
+ return conn_is_handshake_completed(conn) &&
+ (conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED);
+}
+
+int ngtcp2_conn_sched_ack(ngtcp2_conn *conn, ngtcp2_acktr *acktr,
+ int64_t pkt_num, int active_ack, ngtcp2_tstamp ts) {
+ int rv;
+ (void)conn;
+
+ rv = ngtcp2_acktr_add(acktr, pkt_num, active_ack, ts);
+ if (rv != 0) {
+ assert(rv != NGTCP2_ERR_INVALID_ARGUMENT);
+ return rv;
+ }
+
+ return 0;
+}
+
+int ngtcp2_accept(ngtcp2_pkt_hd *dest, const uint8_t *pkt, size_t pktlen) {
+ ngtcp2_ssize nread;
+ ngtcp2_pkt_hd hd, *p;
+
+ if (dest) {
+ p = dest;
+ } else {
+ p = &hd;
+ }
+
+ if (pktlen == 0 || (pkt[0] & NGTCP2_HEADER_FORM_BIT) == 0) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ nread = ngtcp2_pkt_decode_hd_long(p, pkt, pktlen);
+ if (nread < 0) {
+ return (int)nread;
+ }
+
+ switch (p->type) {
+ case NGTCP2_PKT_INITIAL:
+ break;
+ case NGTCP2_PKT_0RTT:
+ /* 0-RTT packet may arrive before Initial packet due to
+ re-ordering. ngtcp2 does not buffer 0RTT packet unless the
+ very first Initial packet is received or token is received. */
+ return NGTCP2_ERR_RETRY;
+ default:
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ if (pktlen < NGTCP2_MAX_UDP_PAYLOAD_SIZE ||
+ (p->tokenlen == 0 && p->dcid.datalen < NGTCP2_MIN_INITIAL_DCIDLEN)) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ return 0;
+}
+
+int ngtcp2_conn_install_initial_key(
+ ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *rx_aead_ctx,
+ const uint8_t *rx_iv, const ngtcp2_crypto_cipher_ctx *rx_hp_ctx,
+ const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv,
+ const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen) {
+ ngtcp2_pktns *pktns = conn->in_pktns;
+ int rv;
+
+ assert(ivlen >= 8);
+ assert(pktns);
+
+ conn_call_delete_crypto_cipher_ctx(conn, &pktns->crypto.rx.hp_ctx);
+ pktns->crypto.rx.hp_ctx.native_handle = NULL;
+
+ if (pktns->crypto.rx.ckm) {
+ conn_call_delete_crypto_aead_ctx(conn, &pktns->crypto.rx.ckm->aead_ctx);
+ ngtcp2_crypto_km_del(pktns->crypto.rx.ckm, conn->mem);
+ pktns->crypto.rx.ckm = NULL;
+ }
+
+ conn_call_delete_crypto_cipher_ctx(conn, &pktns->crypto.tx.hp_ctx);
+ pktns->crypto.tx.hp_ctx.native_handle = NULL;
+
+ if (pktns->crypto.tx.ckm) {
+ conn_call_delete_crypto_aead_ctx(conn, &pktns->crypto.tx.ckm->aead_ctx);
+ ngtcp2_crypto_km_del(pktns->crypto.tx.ckm, conn->mem);
+ pktns->crypto.tx.ckm = NULL;
+ }
+
+ rv = ngtcp2_crypto_km_new(&pktns->crypto.rx.ckm, NULL, 0, NULL, rx_iv, ivlen,
+ conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = ngtcp2_crypto_km_new(&pktns->crypto.tx.ckm, NULL, 0, NULL, tx_iv, ivlen,
+ conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ /* Take owner ship after we are sure that no failure occurs, so that
+ caller can delete these contexts on failure. */
+ pktns->crypto.rx.ckm->aead_ctx = *rx_aead_ctx;
+ pktns->crypto.rx.hp_ctx = *rx_hp_ctx;
+ pktns->crypto.tx.ckm->aead_ctx = *tx_aead_ctx;
+ pktns->crypto.tx.hp_ctx = *tx_hp_ctx;
+
+ return 0;
+}
+
+int ngtcp2_conn_install_vneg_initial_key(
+ ngtcp2_conn *conn, uint32_t version,
+ const ngtcp2_crypto_aead_ctx *rx_aead_ctx, const uint8_t *rx_iv,
+ const ngtcp2_crypto_cipher_ctx *rx_hp_ctx,
+ const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv,
+ const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen) {
+ int rv;
+
+ assert(ivlen >= 8);
+
+ conn_call_delete_crypto_cipher_ctx(conn, &conn->vneg.rx.hp_ctx);
+ conn->vneg.rx.hp_ctx.native_handle = NULL;
+
+ if (conn->vneg.rx.ckm) {
+ conn_call_delete_crypto_aead_ctx(conn, &conn->vneg.rx.ckm->aead_ctx);
+ ngtcp2_crypto_km_del(conn->vneg.rx.ckm, conn->mem);
+ conn->vneg.rx.ckm = NULL;
+ }
+
+ conn_call_delete_crypto_cipher_ctx(conn, &conn->vneg.tx.hp_ctx);
+ conn->vneg.tx.hp_ctx.native_handle = NULL;
+
+ if (conn->vneg.tx.ckm) {
+ conn_call_delete_crypto_aead_ctx(conn, &conn->vneg.tx.ckm->aead_ctx);
+ ngtcp2_crypto_km_del(conn->vneg.tx.ckm, conn->mem);
+ conn->vneg.tx.ckm = NULL;
+ }
+
+ rv = ngtcp2_crypto_km_new(&conn->vneg.rx.ckm, NULL, 0, NULL, rx_iv, ivlen,
+ conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = ngtcp2_crypto_km_new(&conn->vneg.tx.ckm, NULL, 0, NULL, tx_iv, ivlen,
+ conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ /* Take owner ship after we are sure that no failure occurs, so that
+ caller can delete these contexts on failure. */
+ conn->vneg.rx.ckm->aead_ctx = *rx_aead_ctx;
+ conn->vneg.rx.hp_ctx = *rx_hp_ctx;
+ conn->vneg.tx.ckm->aead_ctx = *tx_aead_ctx;
+ conn->vneg.tx.hp_ctx = *tx_hp_ctx;
+ conn->vneg.version = version;
+
+ return 0;
+}
+
+int ngtcp2_conn_install_rx_handshake_key(
+ ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx) {
+ ngtcp2_pktns *pktns = conn->hs_pktns;
+ int rv;
+
+ assert(ivlen >= 8);
+ assert(pktns);
+ assert(!pktns->crypto.rx.hp_ctx.native_handle);
+ assert(!pktns->crypto.rx.ckm);
+
+ rv = ngtcp2_crypto_km_new(&pktns->crypto.rx.ckm, NULL, 0, aead_ctx, iv, ivlen,
+ conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ pktns->crypto.rx.hp_ctx = *hp_ctx;
+
+ rv = conn_call_recv_rx_key(conn, NGTCP2_CRYPTO_LEVEL_HANDSHAKE);
+ if (rv != 0) {
+ ngtcp2_crypto_km_del(pktns->crypto.rx.ckm, conn->mem);
+ pktns->crypto.rx.ckm = NULL;
+
+ memset(&pktns->crypto.rx.hp_ctx, 0, sizeof(pktns->crypto.rx.hp_ctx));
+
+ return rv;
+ }
+
+ return 0;
+}
+
+int ngtcp2_conn_install_tx_handshake_key(
+ ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx) {
+ ngtcp2_pktns *pktns = conn->hs_pktns;
+ int rv;
+
+ assert(ivlen >= 8);
+ assert(pktns);
+ assert(!pktns->crypto.tx.hp_ctx.native_handle);
+ assert(!pktns->crypto.tx.ckm);
+
+ rv = ngtcp2_crypto_km_new(&pktns->crypto.tx.ckm, NULL, 0, aead_ctx, iv, ivlen,
+ conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ pktns->crypto.tx.hp_ctx = *hp_ctx;
+
+ if (conn->server) {
+ rv = ngtcp2_conn_commit_local_transport_params(conn);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ rv = conn_call_recv_tx_key(conn, NGTCP2_CRYPTO_LEVEL_HANDSHAKE);
+ if (rv != 0) {
+ ngtcp2_crypto_km_del(pktns->crypto.tx.ckm, conn->mem);
+ pktns->crypto.tx.ckm = NULL;
+
+ memset(&pktns->crypto.tx.hp_ctx, 0, sizeof(pktns->crypto.tx.hp_ctx));
+
+ return rv;
+ }
+
+ return 0;
+}
+
+int ngtcp2_conn_install_early_key(ngtcp2_conn *conn,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *iv, size_t ivlen,
+ const ngtcp2_crypto_cipher_ctx *hp_ctx) {
+ int rv;
+
+ assert(ivlen >= 8);
+ assert(!conn->early.hp_ctx.native_handle);
+ assert(!conn->early.ckm);
+
+ rv = ngtcp2_crypto_km_new(&conn->early.ckm, NULL, 0, aead_ctx, iv, ivlen,
+ conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ conn->early.hp_ctx = *hp_ctx;
+
+ conn->flags |= NGTCP2_CONN_FLAG_EARLY_KEY_INSTALLED;
+
+ if (conn->server) {
+ rv = conn_call_recv_rx_key(conn, NGTCP2_CRYPTO_LEVEL_EARLY);
+ } else {
+ rv = conn_call_recv_tx_key(conn, NGTCP2_CRYPTO_LEVEL_EARLY);
+ }
+ if (rv != 0) {
+ ngtcp2_crypto_km_del(conn->early.ckm, conn->mem);
+ conn->early.ckm = NULL;
+
+ memset(&conn->early.hp_ctx, 0, sizeof(conn->early.hp_ctx));
+
+ return rv;
+ }
+
+ return 0;
+}
+
+int ngtcp2_conn_install_rx_key(ngtcp2_conn *conn, const uint8_t *secret,
+ size_t secretlen,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *iv, size_t ivlen,
+ const ngtcp2_crypto_cipher_ctx *hp_ctx) {
+ ngtcp2_pktns *pktns = &conn->pktns;
+ int rv;
+
+ assert(ivlen >= 8);
+ assert(!pktns->crypto.rx.hp_ctx.native_handle);
+ assert(!pktns->crypto.rx.ckm);
+
+ rv = ngtcp2_crypto_km_new(&pktns->crypto.rx.ckm, secret, secretlen, aead_ctx,
+ iv, ivlen, conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ pktns->crypto.rx.hp_ctx = *hp_ctx;
+
+ if (!conn->server) {
+ if (conn->remote.pending_transport_params) {
+ ngtcp2_transport_params_del(conn->remote.transport_params, conn->mem);
+
+ conn->remote.transport_params = conn->remote.pending_transport_params;
+ conn->remote.pending_transport_params = NULL;
+ conn_sync_stream_id_limit(conn);
+ conn->tx.max_offset = conn->remote.transport_params->initial_max_data;
+ }
+
+ if (conn->early.ckm) {
+ conn_discard_early_key(conn);
+ }
+ }
+
+ rv = conn_call_recv_rx_key(conn, NGTCP2_CRYPTO_LEVEL_APPLICATION);
+ if (rv != 0) {
+ ngtcp2_crypto_km_del(pktns->crypto.rx.ckm, conn->mem);
+ pktns->crypto.rx.ckm = NULL;
+
+ memset(&pktns->crypto.rx.hp_ctx, 0, sizeof(pktns->crypto.rx.hp_ctx));
+
+ return rv;
+ }
+
+ return 0;
+}
+
+int ngtcp2_conn_install_tx_key(ngtcp2_conn *conn, const uint8_t *secret,
+ size_t secretlen,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *iv, size_t ivlen,
+ const ngtcp2_crypto_cipher_ctx *hp_ctx) {
+ ngtcp2_pktns *pktns = &conn->pktns;
+ int rv;
+
+ assert(ivlen >= 8);
+ assert(!pktns->crypto.tx.hp_ctx.native_handle);
+ assert(!pktns->crypto.tx.ckm);
+
+ rv = ngtcp2_crypto_km_new(&pktns->crypto.tx.ckm, secret, secretlen, aead_ctx,
+ iv, ivlen, conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ pktns->crypto.tx.hp_ctx = *hp_ctx;
+
+ if (conn->server) {
+ if (conn->remote.pending_transport_params) {
+ ngtcp2_transport_params_del(conn->remote.transport_params, conn->mem);
+
+ conn->remote.transport_params = conn->remote.pending_transport_params;
+ conn->remote.pending_transport_params = NULL;
+ conn_sync_stream_id_limit(conn);
+ conn->tx.max_offset = conn->remote.transport_params->initial_max_data;
+ }
+ } else if (conn->early.ckm) {
+ conn_discard_early_key(conn);
+ }
+
+ rv = conn_call_recv_tx_key(conn, NGTCP2_CRYPTO_LEVEL_APPLICATION);
+ if (rv != 0) {
+ ngtcp2_crypto_km_del(pktns->crypto.tx.ckm, conn->mem);
+ pktns->crypto.tx.ckm = NULL;
+
+ memset(&pktns->crypto.tx.hp_ctx, 0, sizeof(pktns->crypto.tx.hp_ctx));
+
+ return rv;
+ }
+
+ return 0;
+}
+
+int ngtcp2_conn_initiate_key_update(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ ngtcp2_tstamp confirmed_ts = conn->crypto.key_update.confirmed_ts;
+ ngtcp2_duration pto = conn_compute_pto(conn, &conn->pktns);
+
+ assert(conn->state == NGTCP2_CS_POST_HANDSHAKE);
+
+ if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED) ||
+ (conn->flags & NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED) ||
+ !conn->crypto.key_update.new_tx_ckm ||
+ !conn->crypto.key_update.new_rx_ckm ||
+ (confirmed_ts != UINT64_MAX && confirmed_ts + 3 * pto > ts)) {
+ return NGTCP2_ERR_INVALID_STATE;
+ }
+
+ conn_rotate_keys(conn, NGTCP2_MAX_PKT_NUM, /* initiator = */ 1);
+
+ return 0;
+}
+
+/*
+ * conn_retire_stale_bound_dcid retires stale destination connection
+ * ID in conn->dcid.bound to keep some unused destination connection
+ * IDs available.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int conn_retire_stale_bound_dcid(ngtcp2_conn *conn,
+ ngtcp2_duration timeout,
+ ngtcp2_tstamp ts) {
+ size_t i;
+ ngtcp2_dcid *dcid, *last;
+ int rv;
+
+ for (i = 0; i < ngtcp2_ringbuf_len(&conn->dcid.bound.rb);) {
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, i);
+
+ assert(dcid->cid.datalen);
+
+ if (dcid->bound_ts + timeout > ts) {
+ ++i;
+ continue;
+ }
+
+ rv = conn_retire_dcid_seq(conn, dcid->seq);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (i == 0) {
+ ngtcp2_ringbuf_pop_front(&conn->dcid.bound.rb);
+ continue;
+ }
+
+ if (i == ngtcp2_ringbuf_len(&conn->dcid.bound.rb) - 1) {
+ ngtcp2_ringbuf_pop_back(&conn->dcid.bound.rb);
+ break;
+ }
+
+ last = ngtcp2_ringbuf_get(&conn->dcid.bound.rb,
+ ngtcp2_ringbuf_len(&conn->dcid.bound.rb) - 1);
+ ngtcp2_dcid_copy(dcid, last);
+ ngtcp2_ringbuf_pop_back(&conn->dcid.bound.rb);
+ }
+
+ return 0;
+}
+
+ngtcp2_tstamp ngtcp2_conn_loss_detection_expiry(ngtcp2_conn *conn) {
+ return conn->cstat.loss_detection_timer;
+}
+
+ngtcp2_tstamp ngtcp2_conn_internal_expiry(ngtcp2_conn *conn) {
+ ngtcp2_tstamp res = UINT64_MAX, t;
+ ngtcp2_duration pto = conn_compute_pto(conn, &conn->pktns);
+ ngtcp2_scid *scid;
+ ngtcp2_dcid *dcid;
+ size_t i, len;
+
+ if (conn->pv) {
+ res = ngtcp2_pv_next_expiry(conn->pv);
+ }
+
+ if (conn->pmtud) {
+ res = ngtcp2_min(res, conn->pmtud->expiry);
+ }
+
+ if (!ngtcp2_pq_empty(&conn->scid.used)) {
+ scid = ngtcp2_struct_of(ngtcp2_pq_top(&conn->scid.used), ngtcp2_scid, pe);
+ if (scid->retired_ts != UINT64_MAX) {
+ t = scid->retired_ts + pto;
+ res = ngtcp2_min(res, t);
+ }
+ }
+
+ if (ngtcp2_ringbuf_len(&conn->dcid.retired.rb)) {
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.retired.rb, 0);
+ t = dcid->retired_ts + pto;
+ res = ngtcp2_min(res, t);
+ }
+
+ if (conn->dcid.current.cid.datalen) {
+ len = ngtcp2_ringbuf_len(&conn->dcid.bound.rb);
+ for (i = 0; i < len; ++i) {
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.bound.rb, i);
+
+ assert(dcid->cid.datalen);
+ assert(dcid->bound_ts != UINT64_MAX);
+
+ t = dcid->bound_ts + 3 * pto;
+ res = ngtcp2_min(res, t);
+ }
+ }
+
+ if (conn->server && conn->early.ckm &&
+ conn->early.discard_started_ts != UINT64_MAX) {
+ t = conn->early.discard_started_ts + 3 * pto;
+ res = ngtcp2_min(res, t);
+ }
+
+ return res;
+}
+
+ngtcp2_tstamp ngtcp2_conn_ack_delay_expiry(ngtcp2_conn *conn) {
+ ngtcp2_acktr *acktr = &conn->pktns.acktr;
+
+ if (!(acktr->flags & NGTCP2_ACKTR_FLAG_CANCEL_TIMER) &&
+ acktr->first_unacked_ts != UINT64_MAX) {
+ return acktr->first_unacked_ts + conn_compute_ack_delay(conn);
+ }
+ return UINT64_MAX;
+}
+
+static ngtcp2_tstamp conn_handshake_expiry(ngtcp2_conn *conn) {
+ if (conn_is_handshake_completed(conn) ||
+ conn->local.settings.handshake_timeout == UINT64_MAX) {
+ return UINT64_MAX;
+ }
+
+ return conn->local.settings.initial_ts +
+ conn->local.settings.handshake_timeout;
+}
+
+ngtcp2_tstamp ngtcp2_conn_get_expiry(ngtcp2_conn *conn) {
+ ngtcp2_tstamp t1 = ngtcp2_conn_loss_detection_expiry(conn);
+ ngtcp2_tstamp t2 = ngtcp2_conn_ack_delay_expiry(conn);
+ ngtcp2_tstamp t3 = ngtcp2_conn_internal_expiry(conn);
+ ngtcp2_tstamp t4 = ngtcp2_conn_lost_pkt_expiry(conn);
+ ngtcp2_tstamp t5 = conn_keep_alive_expiry(conn);
+ ngtcp2_tstamp t6 = conn_handshake_expiry(conn);
+ ngtcp2_tstamp t7 = ngtcp2_conn_get_idle_expiry(conn);
+ ngtcp2_tstamp res = ngtcp2_min(t1, t2);
+ res = ngtcp2_min(res, t3);
+ res = ngtcp2_min(res, t4);
+ res = ngtcp2_min(res, t5);
+ res = ngtcp2_min(res, t6);
+ res = ngtcp2_min(res, t7);
+ return ngtcp2_min(res, conn->tx.pacing.next_ts);
+}
+
+int ngtcp2_conn_handle_expiry(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ int rv;
+ ngtcp2_duration pto = conn_compute_pto(conn, &conn->pktns);
+
+ assert(!(conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING));
+
+ if (ngtcp2_conn_get_idle_expiry(conn) <= ts) {
+ return NGTCP2_ERR_IDLE_CLOSE;
+ }
+
+ ngtcp2_conn_cancel_expired_ack_delay_timer(conn, ts);
+
+ conn_cancel_expired_keep_alive_timer(conn, ts);
+
+ conn_cancel_expired_pkt_tx_timer(conn, ts);
+
+ ngtcp2_conn_remove_lost_pkt(conn, ts);
+
+ if (conn->pv) {
+ ngtcp2_pv_cancel_expired_timer(conn->pv, ts);
+ }
+
+ if (conn->pmtud) {
+ ngtcp2_pmtud_handle_expiry(conn->pmtud, ts);
+ if (ngtcp2_pmtud_finished(conn->pmtud)) {
+ ngtcp2_conn_stop_pmtud(conn);
+ }
+ }
+
+ if (ngtcp2_conn_loss_detection_expiry(conn) <= ts) {
+ rv = ngtcp2_conn_on_loss_detection_timer(conn, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ if (conn->dcid.current.cid.datalen) {
+ rv = conn_retire_stale_bound_dcid(conn, 3 * pto, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ rv = conn_remove_retired_connection_id(conn, pto, ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (conn->server && conn->early.ckm &&
+ conn->early.discard_started_ts != UINT64_MAX) {
+ if (conn->early.discard_started_ts + 3 * pto <= ts) {
+ conn_discard_early_key(conn);
+ }
+ }
+
+ if (!conn_is_handshake_completed(conn) &&
+ conn->local.settings.handshake_timeout != UINT64_MAX &&
+ conn->local.settings.initial_ts +
+ conn->local.settings.handshake_timeout <=
+ ts) {
+ return NGTCP2_ERR_HANDSHAKE_TIMEOUT;
+ }
+
+ return 0;
+}
+
+static void acktr_cancel_expired_ack_delay_timer(ngtcp2_acktr *acktr,
+ ngtcp2_duration max_ack_delay,
+ ngtcp2_tstamp ts) {
+ if (!(acktr->flags & NGTCP2_ACKTR_FLAG_CANCEL_TIMER) &&
+ acktr->first_unacked_ts != UINT64_MAX &&
+ acktr->first_unacked_ts + max_ack_delay <= ts) {
+ acktr->flags |= NGTCP2_ACKTR_FLAG_CANCEL_TIMER;
+ }
+}
+
+void ngtcp2_conn_cancel_expired_ack_delay_timer(ngtcp2_conn *conn,
+ ngtcp2_tstamp ts) {
+ ngtcp2_duration ack_delay = conn_compute_ack_delay(conn);
+
+ if (conn->in_pktns) {
+ acktr_cancel_expired_ack_delay_timer(&conn->in_pktns->acktr, 0, ts);
+ }
+ if (conn->hs_pktns) {
+ acktr_cancel_expired_ack_delay_timer(&conn->hs_pktns->acktr, 0, ts);
+ }
+ acktr_cancel_expired_ack_delay_timer(&conn->pktns.acktr, ack_delay, ts);
+}
+
+ngtcp2_tstamp ngtcp2_conn_lost_pkt_expiry(ngtcp2_conn *conn) {
+ ngtcp2_tstamp res = UINT64_MAX, ts;
+
+ if (conn->in_pktns) {
+ ts = ngtcp2_rtb_lost_pkt_ts(&conn->in_pktns->rtb);
+ if (ts != UINT64_MAX) {
+ ts += conn_compute_pto(conn, conn->in_pktns);
+ res = ngtcp2_min(res, ts);
+ }
+ }
+
+ if (conn->hs_pktns) {
+ ts = ngtcp2_rtb_lost_pkt_ts(&conn->hs_pktns->rtb);
+ if (ts != UINT64_MAX) {
+ ts += conn_compute_pto(conn, conn->hs_pktns);
+ res = ngtcp2_min(res, ts);
+ }
+ }
+
+ ts = ngtcp2_rtb_lost_pkt_ts(&conn->pktns.rtb);
+ if (ts != UINT64_MAX) {
+ ts += conn_compute_pto(conn, &conn->pktns);
+ res = ngtcp2_min(res, ts);
+ }
+
+ return res;
+}
+
+void ngtcp2_conn_remove_lost_pkt(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ ngtcp2_duration pto;
+
+ if (conn->in_pktns) {
+ pto = conn_compute_pto(conn, conn->in_pktns);
+ ngtcp2_rtb_remove_expired_lost_pkt(&conn->in_pktns->rtb, pto, ts);
+ }
+ if (conn->hs_pktns) {
+ pto = conn_compute_pto(conn, conn->hs_pktns);
+ ngtcp2_rtb_remove_expired_lost_pkt(&conn->hs_pktns->rtb, pto, ts);
+ }
+ pto = conn_compute_pto(conn, &conn->pktns);
+ ngtcp2_rtb_remove_expired_lost_pkt(&conn->pktns.rtb, pto, ts);
+}
+
+/*
+ * select_preferred_version selects the most preferred version.
+ * |fallback_version| is chosen if no preference is made, or
+ * |preferred_versions| does not include any of |chosen_version| or
+ * |available_versions|. |chosen_version| is treated as an extra
+ * other version.
+ */
+static uint32_t select_preferred_version(const uint32_t *preferred_versions,
+ size_t preferred_versionslen,
+ uint32_t chosen_version,
+ const uint8_t *available_versions,
+ size_t available_versionslen,
+ uint32_t fallback_version) {
+ size_t i, j;
+ const uint8_t *p;
+ uint32_t v;
+
+ if (!preferred_versionslen ||
+ (!available_versionslen && chosen_version == fallback_version)) {
+ return fallback_version;
+ }
+
+ for (i = 0; i < preferred_versionslen; ++i) {
+ if (preferred_versions[i] == chosen_version) {
+ return chosen_version;
+ }
+ for (j = 0, p = available_versions; j < available_versionslen;
+ j += sizeof(uint32_t)) {
+ p = ngtcp2_get_uint32(&v, p);
+
+ if (preferred_versions[i] == v) {
+ return v;
+ }
+ }
+ }
+
+ return fallback_version;
+}
+
+/*
+ * conn_client_validate_transport_params validates |params| as client.
+ * |params| must be sent with Encrypted Extensions.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_TRANSPORT_PARAM
+ * params contains preferred address but server chose zero-length
+ * connection ID.
+ * NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE
+ * Validation against version negotiation parameters failed.
+ */
+static int
+conn_client_validate_transport_params(ngtcp2_conn *conn,
+ const ngtcp2_transport_params *params) {
+ if (!ngtcp2_cid_eq(&conn->rcid, &params->original_dcid)) {
+ return NGTCP2_ERR_TRANSPORT_PARAM;
+ }
+
+ if (conn->flags & NGTCP2_CONN_FLAG_RECV_RETRY) {
+ if (!params->retry_scid_present) {
+ return NGTCP2_ERR_TRANSPORT_PARAM;
+ }
+ if (!ngtcp2_cid_eq(&conn->retry_scid, &params->retry_scid)) {
+ return NGTCP2_ERR_TRANSPORT_PARAM;
+ }
+ } else if (params->retry_scid_present) {
+ return NGTCP2_ERR_TRANSPORT_PARAM;
+ }
+
+ if (params->preferred_address_present &&
+ conn->dcid.current.cid.datalen == 0) {
+ return NGTCP2_ERR_TRANSPORT_PARAM;
+ }
+
+ if (params->version_info_present) {
+ if (conn->negotiated_version != params->version_info.chosen_version) {
+ return NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE;
+ }
+
+ assert(vneg_available_versions_includes(conn->vneg.available_versions,
+ conn->vneg.available_versionslen,
+ conn->negotiated_version));
+ } else if (conn->client_chosen_version != conn->negotiated_version) {
+ return NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE;
+ }
+
+ /* When client reacted upon Version Negotiation */
+ if (conn->local.settings.original_version != conn->client_chosen_version) {
+ if (!params->version_info_present) {
+ assert(conn->client_chosen_version == conn->negotiated_version);
+
+ /* QUIC v1 (and the supported draft versions) are treated
+ specially. If version_info is missing, no further validation
+ is necessary.
+ https://datatracker.ietf.org/doc/html/draft-ietf-quic-version-negotiation-10#section-8
+ */
+ if (conn->client_chosen_version == NGTCP2_PROTO_VER_V1 ||
+ (NGTCP2_PROTO_VER_DRAFT_MIN <= conn->client_chosen_version &&
+ conn->client_chosen_version <= NGTCP2_PROTO_VER_DRAFT_MAX)) {
+ return 0;
+ }
+
+ return NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE;
+ }
+
+ /* Server choose original version after Version Negotiation.
+ Draft does not say this particular case, but this smells like
+ misbehaved server because server should accept original_version
+ in the original connection. */
+ if (conn->local.settings.original_version ==
+ params->version_info.chosen_version) {
+ return NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE;
+ }
+
+ /* Check version downgrade on incompatible version negotiation. */
+ if (params->version_info.available_versionslen == 0) {
+ return NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE;
+ }
+
+ if (conn->client_chosen_version !=
+ select_preferred_version(conn->vneg.preferred_versions,
+ conn->vneg.preferred_versionslen,
+ params->version_info.chosen_version,
+ params->version_info.available_versions,
+ params->version_info.available_versionslen,
+ /* fallback_version = */ 0)) {
+ return NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE;
+ }
+ }
+
+ return 0;
+}
+
+uint32_t
+ngtcp2_conn_server_negotiate_version(ngtcp2_conn *conn,
+ const ngtcp2_version_info *version_info) {
+ assert(conn->server);
+ assert(conn->client_chosen_version == version_info->chosen_version);
+
+ return select_preferred_version(
+ conn->vneg.preferred_versions, conn->vneg.preferred_versionslen,
+ version_info->chosen_version, version_info->available_versions,
+ version_info->available_versionslen, version_info->chosen_version);
+}
+
+int ngtcp2_conn_set_remote_transport_params(
+ ngtcp2_conn *conn, const ngtcp2_transport_params *params) {
+ int rv;
+
+ /* We expect this function is called once per QUIC connection, but
+ GnuTLS server seems to call TLS extension callback twice if it
+ sends HelloRetryRequest. In practice, same QUIC transport
+ parameters are sent in the 2nd client flight, just returning 0
+ would cause no harm. */
+ if (conn->flags & NGTCP2_CONN_FLAG_TRANSPORT_PARAM_RECVED) {
+ return 0;
+ }
+
+ /* Assume that ngtcp2_decode_transport_params sets default value if
+ active_connection_id_limit is omitted. */
+ if (params->active_connection_id_limit <
+ NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT) {
+ return NGTCP2_ERR_TRANSPORT_PARAM;
+ }
+
+ /* We assume that conn->dcid.current.cid is still the initial one.
+ This requires that transport parameter must be fed into
+ ngtcp2_conn as early as possible. */
+ if (!ngtcp2_cid_eq(&conn->dcid.current.cid, &params->initial_scid)) {
+ return NGTCP2_ERR_TRANSPORT_PARAM;
+ }
+
+ if (params->max_udp_payload_size < NGTCP2_MAX_UDP_PAYLOAD_SIZE) {
+ return NGTCP2_ERR_TRANSPORT_PARAM;
+ }
+
+ if (conn->server) {
+ if (params->version_info_present) {
+ if (!vneg_available_versions_includes(
+ params->version_info.available_versions,
+ params->version_info.available_versionslen,
+ params->version_info.chosen_version)) {
+ return NGTCP2_ERR_TRANSPORT_PARAM;
+ }
+
+ if (params->version_info.chosen_version != conn->client_chosen_version) {
+ return NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE;
+ }
+
+ conn->negotiated_version =
+ ngtcp2_conn_server_negotiate_version(conn, &params->version_info);
+ if (conn->negotiated_version != conn->client_chosen_version) {
+ rv = conn_call_version_negotiation(conn, conn->negotiated_version,
+ &conn->rcid);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+ } else {
+ conn->negotiated_version = conn->client_chosen_version;
+ }
+
+ conn->local.transport_params.version_info.chosen_version =
+ conn->negotiated_version;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "the negotiated version is 0x%08x",
+ conn->negotiated_version);
+ } else {
+ rv = conn_client_validate_transport_params(conn, params);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ ngtcp2_log_remote_tp(&conn->log,
+ conn->server
+ ? NGTCP2_TRANSPORT_PARAMS_TYPE_CLIENT_HELLO
+ : NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS,
+ params);
+
+ ngtcp2_qlog_parameters_set_transport_params(&conn->qlog, params, conn->server,
+ NGTCP2_QLOG_SIDE_REMOTE);
+
+ if ((conn->server && conn->pktns.crypto.tx.ckm) ||
+ (!conn->server && conn->pktns.crypto.rx.ckm)) {
+ ngtcp2_transport_params_del(conn->remote.transport_params, conn->mem);
+ conn->remote.transport_params = NULL;
+
+ rv = ngtcp2_transport_params_copy_new(&conn->remote.transport_params,
+ params, conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+ conn_sync_stream_id_limit(conn);
+ conn->tx.max_offset = conn->remote.transport_params->initial_max_data;
+ } else {
+ assert(!conn->remote.pending_transport_params);
+
+ rv = ngtcp2_transport_params_copy_new(
+ &conn->remote.pending_transport_params, params, conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ conn->flags |= NGTCP2_CONN_FLAG_TRANSPORT_PARAM_RECVED;
+
+ return 0;
+}
+
+int ngtcp2_conn_decode_remote_transport_params(ngtcp2_conn *conn,
+ const uint8_t *data,
+ size_t datalen) {
+ ngtcp2_transport_params params;
+ int rv;
+
+ rv = ngtcp2_decode_transport_params(
+ &params,
+ conn->server ? NGTCP2_TRANSPORT_PARAMS_TYPE_CLIENT_HELLO
+ : NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS,
+ data, datalen);
+ if (rv != 0) {
+ return rv;
+ }
+
+ return ngtcp2_conn_set_remote_transport_params(conn, &params);
+}
+
+const ngtcp2_transport_params *
+ngtcp2_conn_get_remote_transport_params(ngtcp2_conn *conn) {
+ if (conn->remote.pending_transport_params) {
+ return conn->remote.pending_transport_params;
+ }
+
+ return conn->remote.transport_params;
+}
+
+void ngtcp2_conn_set_early_remote_transport_params_versioned(
+ ngtcp2_conn *conn, int transport_params_version,
+ const ngtcp2_transport_params *params) {
+ ngtcp2_transport_params *p;
+ (void)transport_params_version;
+
+ assert(!conn->server);
+ assert(!conn->remote.transport_params);
+
+ /* Assume that all pointer fields in p are NULL */
+ p = ngtcp2_mem_calloc(conn->mem, 1, sizeof(*p));
+
+ conn->remote.transport_params = p;
+
+ p->initial_max_streams_bidi = params->initial_max_streams_bidi;
+ p->initial_max_streams_uni = params->initial_max_streams_uni;
+ p->initial_max_stream_data_bidi_local =
+ params->initial_max_stream_data_bidi_local;
+ p->initial_max_stream_data_bidi_remote =
+ params->initial_max_stream_data_bidi_remote;
+ p->initial_max_stream_data_uni = params->initial_max_stream_data_uni;
+ p->initial_max_data = params->initial_max_data;
+ p->active_connection_id_limit =
+ ngtcp2_max(NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT,
+ params->active_connection_id_limit);
+ p->max_idle_timeout = params->max_idle_timeout;
+ if (!params->max_udp_payload_size) {
+ p->max_udp_payload_size = NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE;
+ } else {
+ p->max_udp_payload_size =
+ ngtcp2_max(NGTCP2_MAX_UDP_PAYLOAD_SIZE, params->max_udp_payload_size);
+ }
+ p->disable_active_migration = params->disable_active_migration;
+ p->max_datagram_frame_size = params->max_datagram_frame_size;
+
+ /* These parameters are treated specially. If server accepts early
+ data, it must not set values for these parameters that are
+ smaller than these remembered values. */
+ conn->early.transport_params.initial_max_streams_bidi =
+ params->initial_max_streams_bidi;
+ conn->early.transport_params.initial_max_streams_uni =
+ params->initial_max_streams_uni;
+ conn->early.transport_params.initial_max_stream_data_bidi_local =
+ params->initial_max_stream_data_bidi_local;
+ conn->early.transport_params.initial_max_stream_data_bidi_remote =
+ params->initial_max_stream_data_bidi_remote;
+ conn->early.transport_params.initial_max_stream_data_uni =
+ params->initial_max_stream_data_uni;
+ conn->early.transport_params.initial_max_data = params->initial_max_data;
+ conn->early.transport_params.active_connection_id_limit =
+ params->active_connection_id_limit;
+ conn->early.transport_params.max_datagram_frame_size =
+ params->max_datagram_frame_size;
+
+ conn_sync_stream_id_limit(conn);
+
+ conn->tx.max_offset = p->initial_max_data;
+
+ ngtcp2_qlog_parameters_set_transport_params(&conn->qlog, p, conn->server,
+ NGTCP2_QLOG_SIDE_REMOTE);
+}
+
+int ngtcp2_conn_set_local_transport_params_versioned(
+ ngtcp2_conn *conn, int transport_params_version,
+ const ngtcp2_transport_params *params) {
+ (void)transport_params_version;
+
+ assert(conn->server);
+ assert(params->active_connection_id_limit <= NGTCP2_MAX_DCID_POOL_SIZE);
+
+ if (conn->hs_pktns == NULL || conn->hs_pktns->crypto.tx.ckm) {
+ return NGTCP2_ERR_INVALID_STATE;
+ }
+
+ conn_set_local_transport_params(conn, params);
+
+ return 0;
+}
+
+int ngtcp2_conn_commit_local_transport_params(ngtcp2_conn *conn) {
+ const ngtcp2_mem *mem = conn->mem;
+ ngtcp2_transport_params *params = &conn->local.transport_params;
+ ngtcp2_scid *scident;
+ int rv;
+
+ assert(1 == ngtcp2_ksl_len(&conn->scid.set));
+
+ if (params->active_connection_id_limit == 0) {
+ params->active_connection_id_limit =
+ NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT;
+ }
+
+ params->initial_scid = conn->oscid;
+
+ if (conn->oscid.datalen == 0) {
+ params->preferred_address_present = 0;
+ }
+
+ if (conn->server && params->preferred_address_present) {
+ scident = ngtcp2_mem_malloc(mem, sizeof(*scident));
+ if (scident == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ ngtcp2_scid_init(scident, 1, &params->preferred_address.cid);
+
+ rv = ngtcp2_ksl_insert(&conn->scid.set, NULL, &scident->cid, scident);
+ if (rv != 0) {
+ ngtcp2_mem_free(mem, scident);
+ return rv;
+ }
+
+ conn->scid.last_seq = 1;
+ }
+
+ conn->rx.window = conn->rx.unsent_max_offset = conn->rx.max_offset =
+ params->initial_max_data;
+ conn->remote.bidi.unsent_max_streams = params->initial_max_streams_bidi;
+ conn->remote.bidi.max_streams = params->initial_max_streams_bidi;
+ conn->remote.uni.unsent_max_streams = params->initial_max_streams_uni;
+ conn->remote.uni.max_streams = params->initial_max_streams_uni;
+
+ conn->flags |= NGTCP2_CONN_FLAG_LOCAL_TRANSPORT_PARAMS_COMMITTED;
+
+ ngtcp2_qlog_parameters_set_transport_params(&conn->qlog, params, conn->server,
+ NGTCP2_QLOG_SIDE_LOCAL);
+
+ return 0;
+}
+
+const ngtcp2_transport_params *
+ngtcp2_conn_get_local_transport_params(ngtcp2_conn *conn) {
+ return &conn->local.transport_params;
+}
+
+ngtcp2_ssize ngtcp2_conn_encode_local_transport_params(ngtcp2_conn *conn,
+ uint8_t *dest,
+ size_t destlen) {
+ return ngtcp2_encode_transport_params(
+ dest, destlen,
+ conn->server ? NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS
+ : NGTCP2_TRANSPORT_PARAMS_TYPE_CLIENT_HELLO,
+ &conn->local.transport_params);
+}
+
+int ngtcp2_conn_open_bidi_stream(ngtcp2_conn *conn, int64_t *pstream_id,
+ void *stream_user_data) {
+ int rv;
+ ngtcp2_strm *strm;
+
+ if (ngtcp2_conn_get_streams_bidi_left(conn) == 0) {
+ return NGTCP2_ERR_STREAM_ID_BLOCKED;
+ }
+
+ strm = ngtcp2_objalloc_strm_get(&conn->strm_objalloc);
+ if (strm == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ rv = ngtcp2_conn_init_stream(conn, strm, conn->local.bidi.next_stream_id,
+ stream_user_data);
+ if (rv != 0) {
+ ngtcp2_objalloc_strm_release(&conn->strm_objalloc, strm);
+ return rv;
+ }
+
+ *pstream_id = conn->local.bidi.next_stream_id;
+ conn->local.bidi.next_stream_id += 4;
+
+ return 0;
+}
+
+int ngtcp2_conn_open_uni_stream(ngtcp2_conn *conn, int64_t *pstream_id,
+ void *stream_user_data) {
+ int rv;
+ ngtcp2_strm *strm;
+
+ if (ngtcp2_conn_get_streams_uni_left(conn) == 0) {
+ return NGTCP2_ERR_STREAM_ID_BLOCKED;
+ }
+
+ strm = ngtcp2_objalloc_strm_get(&conn->strm_objalloc);
+ if (strm == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ rv = ngtcp2_conn_init_stream(conn, strm, conn->local.uni.next_stream_id,
+ stream_user_data);
+ if (rv != 0) {
+ ngtcp2_objalloc_strm_release(&conn->strm_objalloc, strm);
+ return rv;
+ }
+ ngtcp2_strm_shutdown(strm, NGTCP2_STRM_FLAG_SHUT_RD);
+
+ *pstream_id = conn->local.uni.next_stream_id;
+ conn->local.uni.next_stream_id += 4;
+
+ return 0;
+}
+
+ngtcp2_strm *ngtcp2_conn_find_stream(ngtcp2_conn *conn, int64_t stream_id) {
+ return ngtcp2_map_find(&conn->strms, (uint64_t)stream_id);
+}
+
+ngtcp2_ssize ngtcp2_conn_write_stream_versioned(
+ ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version,
+ ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen,
+ uint32_t flags, int64_t stream_id, const uint8_t *data, size_t datalen,
+ ngtcp2_tstamp ts) {
+ ngtcp2_vec datav;
+
+ datav.len = datalen;
+ datav.base = (uint8_t *)data;
+
+ return ngtcp2_conn_writev_stream_versioned(conn, path, pkt_info_version, pi,
+ dest, destlen, pdatalen, flags,
+ stream_id, &datav, 1, ts);
+}
+
+static ngtcp2_ssize conn_write_vmsg_wrapper(ngtcp2_conn *conn,
+ ngtcp2_path *path,
+ int pkt_info_version,
+ ngtcp2_pkt_info *pi, uint8_t *dest,
+ size_t destlen, ngtcp2_vmsg *vmsg,
+ ngtcp2_tstamp ts) {
+ ngtcp2_conn_stat *cstat = &conn->cstat;
+ ngtcp2_ssize nwrite;
+ int undersized;
+
+ nwrite = ngtcp2_conn_write_vmsg(conn, path, pkt_info_version, pi, dest,
+ destlen, vmsg, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ if (cstat->bytes_in_flight >= cstat->cwnd) {
+ conn->rst.is_cwnd_limited = 1;
+ }
+
+ if (vmsg == NULL && cstat->bytes_in_flight < cstat->cwnd &&
+ conn->tx.strmq_nretrans == 0) {
+ if (conn->local.settings.no_tx_udp_payload_size_shaping) {
+ undersized =
+ (size_t)nwrite < conn->local.settings.max_tx_udp_payload_size;
+ } else {
+ undersized = (size_t)nwrite < conn->dcid.current.max_udp_payload_size;
+ }
+
+ if (undersized) {
+ conn->rst.app_limited = conn->rst.delivered + cstat->bytes_in_flight;
+
+ if (conn->rst.app_limited == 0) {
+ conn->rst.app_limited = cstat->max_tx_udp_payload_size;
+ }
+ }
+ }
+
+ return nwrite;
+}
+
+ngtcp2_ssize ngtcp2_conn_writev_stream_versioned(
+ ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version,
+ ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen,
+ uint32_t flags, int64_t stream_id, const ngtcp2_vec *datav, size_t datavcnt,
+ ngtcp2_tstamp ts) {
+ ngtcp2_vmsg vmsg, *pvmsg;
+ ngtcp2_strm *strm;
+ int64_t datalen;
+
+ if (pdatalen) {
+ *pdatalen = -1;
+ }
+
+ if (stream_id != -1) {
+ strm = ngtcp2_conn_find_stream(conn, stream_id);
+ if (strm == NULL) {
+ return NGTCP2_ERR_STREAM_NOT_FOUND;
+ }
+
+ if (strm->flags & NGTCP2_STRM_FLAG_SHUT_WR) {
+ return NGTCP2_ERR_STREAM_SHUT_WR;
+ }
+
+ datalen = ngtcp2_vec_len_varint(datav, datavcnt);
+ if (datalen == -1) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ if ((uint64_t)datalen > NGTCP2_MAX_VARINT - strm->tx.offset ||
+ (uint64_t)datalen > NGTCP2_MAX_VARINT - conn->tx.offset) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ vmsg.type = NGTCP2_VMSG_TYPE_STREAM;
+ vmsg.stream.strm = strm;
+ vmsg.stream.flags = flags;
+ vmsg.stream.data = datav;
+ vmsg.stream.datacnt = datavcnt;
+ vmsg.stream.pdatalen = pdatalen;
+
+ pvmsg = &vmsg;
+ } else {
+ pvmsg = NULL;
+ }
+
+ return conn_write_vmsg_wrapper(conn, path, pkt_info_version, pi, dest,
+ destlen, pvmsg, ts);
+}
+
+ngtcp2_ssize ngtcp2_conn_writev_datagram_versioned(
+ ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version,
+ ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, int *paccepted,
+ uint32_t flags, uint64_t dgram_id, const ngtcp2_vec *datav, size_t datavcnt,
+ ngtcp2_tstamp ts) {
+ ngtcp2_vmsg vmsg;
+ int64_t datalen;
+
+ if (paccepted) {
+ *paccepted = 0;
+ }
+
+ if (conn->remote.transport_params == NULL ||
+ conn->remote.transport_params->max_datagram_frame_size == 0) {
+ return NGTCP2_ERR_INVALID_STATE;
+ }
+
+ datalen = ngtcp2_vec_len_varint(datav, datavcnt);
+ if (datalen == -1
+#if UINT64_MAX > SIZE_MAX
+ || (uint64_t)datalen > SIZE_MAX
+#endif /* UINT64_MAX > SIZE_MAX */
+ ) {
+ return NGTCP2_ERR_INVALID_STATE;
+ }
+
+ if (conn->remote.transport_params->max_datagram_frame_size <
+ ngtcp2_pkt_datagram_framelen((size_t)datalen)) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ vmsg.type = NGTCP2_VMSG_TYPE_DATAGRAM;
+ vmsg.datagram.dgram_id = dgram_id;
+ vmsg.datagram.flags = flags;
+ vmsg.datagram.data = datav;
+ vmsg.datagram.datacnt = datavcnt;
+ vmsg.datagram.paccepted = paccepted;
+
+ return conn_write_vmsg_wrapper(conn, path, pkt_info_version, pi, dest,
+ destlen, &vmsg, ts);
+}
+
+ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path,
+ int pkt_info_version, ngtcp2_pkt_info *pi,
+ uint8_t *dest, size_t destlen,
+ ngtcp2_vmsg *vmsg, ngtcp2_tstamp ts) {
+ ngtcp2_ssize nwrite;
+ size_t origlen;
+ size_t origdestlen = destlen;
+ int rv;
+ uint8_t wflags = NGTCP2_WRITE_PKT_FLAG_NONE;
+ int ppe_pending = (conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING) != 0;
+ ngtcp2_conn_stat *cstat = &conn->cstat;
+ ngtcp2_ssize res = 0;
+ uint64_t server_tx_left;
+ uint64_t datalen;
+ uint64_t write_datalen = 0;
+ int64_t prev_in_pkt_num = -1;
+ ngtcp2_ksl_it it;
+ ngtcp2_rtb_entry *rtbent;
+ (void)pkt_info_version;
+
+ conn->log.last_ts = ts;
+ conn->qlog.last_ts = ts;
+
+ if (path) {
+ ngtcp2_path_copy(path, &conn->dcid.current.ps.path);
+ }
+
+ origlen = destlen =
+ conn_shape_udp_payload(conn, &conn->dcid.current, destlen);
+
+ if (!ppe_pending && pi) {
+ pi->ecn = NGTCP2_ECN_NOT_ECT;
+ }
+
+ switch (conn->state) {
+ case NGTCP2_CS_CLIENT_INITIAL:
+ case NGTCP2_CS_CLIENT_WAIT_HANDSHAKE:
+ case NGTCP2_CS_CLIENT_TLS_HANDSHAKE_FAILED:
+ if (!conn_pacing_pkt_tx_allowed(conn, ts)) {
+ assert(!ppe_pending);
+
+ return conn_write_handshake_ack_pkts(conn, pi, dest, origlen, ts);
+ }
+
+ nwrite = conn_client_write_handshake(conn, pi, dest, destlen, vmsg, ts);
+ /* We might be unable to write a packet because of depletion of
+ congestion window budget, perhaps due to packet loss that
+ shrinks the window drastically. */
+ if (nwrite <= 0) {
+ return nwrite;
+ }
+ if (conn->state != NGTCP2_CS_POST_HANDSHAKE) {
+ return nwrite;
+ }
+
+ assert(nwrite);
+ assert(dest[0] & NGTCP2_HEADER_FORM_BIT);
+ assert(conn->negotiated_version);
+
+ if (ngtcp2_pkt_get_type_long(conn->negotiated_version, dest[0]) ==
+ NGTCP2_PKT_INITIAL) {
+ /* We have added padding already, but in that case, there is no
+ space left to write 1RTT packet. */
+ wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING;
+ }
+
+ res = nwrite;
+ dest += nwrite;
+ destlen -= (size_t)nwrite;
+ /* Break here so that we can coalesces 1RTT packet. */
+ break;
+ case NGTCP2_CS_SERVER_INITIAL:
+ case NGTCP2_CS_SERVER_WAIT_HANDSHAKE:
+ case NGTCP2_CS_SERVER_TLS_HANDSHAKE_FAILED:
+ if (!conn_pacing_pkt_tx_allowed(conn, ts)) {
+ assert(!ppe_pending);
+
+ if (!(conn->dcid.current.flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) {
+ server_tx_left = conn_server_tx_left(conn, &conn->dcid.current);
+ if (server_tx_left == 0) {
+ return 0;
+ }
+
+ origlen = (size_t)ngtcp2_min((uint64_t)origlen, server_tx_left);
+ }
+
+ return conn_write_handshake_ack_pkts(conn, pi, dest, origlen, ts);
+ }
+
+ if (!ppe_pending) {
+ if (!(conn->dcid.current.flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) {
+ server_tx_left = conn_server_tx_left(conn, &conn->dcid.current);
+ if (server_tx_left == 0) {
+ if (cstat->loss_detection_timer != UINT64_MAX) {
+ ngtcp2_log_info(
+ &conn->log, NGTCP2_LOG_EVENT_RCV,
+ "loss detection timer canceled due to amplification limit");
+ cstat->loss_detection_timer = UINT64_MAX;
+ }
+
+ return 0;
+ }
+
+ destlen = (size_t)ngtcp2_min((uint64_t)destlen, server_tx_left);
+ }
+
+ if (vmsg) {
+ switch (vmsg->type) {
+ case NGTCP2_VMSG_TYPE_STREAM:
+ datalen = ngtcp2_vec_len(vmsg->stream.data, vmsg->stream.datacnt);
+ if (datalen == 0 || (datalen > 0 &&
+ (vmsg->stream.strm->tx.max_offset -
+ vmsg->stream.strm->tx.offset) &&
+ (conn->tx.max_offset - conn->tx.offset))) {
+ write_datalen =
+ conn_enforce_flow_control(conn, vmsg->stream.strm, datalen);
+ write_datalen =
+ ngtcp2_min(write_datalen, NGTCP2_MIN_COALESCED_PAYLOADLEN);
+ write_datalen += NGTCP2_STREAM_OVERHEAD;
+ }
+ break;
+ case NGTCP2_VMSG_TYPE_DATAGRAM:
+ write_datalen =
+ ngtcp2_vec_len(vmsg->datagram.data, vmsg->datagram.datacnt) +
+ NGTCP2_DATAGRAM_OVERHEAD;
+ break;
+ default:
+ ngtcp2_unreachable();
+ }
+
+ if (conn->in_pktns && write_datalen > 0) {
+ it = ngtcp2_rtb_head(&conn->in_pktns->rtb);
+ if (!ngtcp2_ksl_it_end(&it)) {
+ rtbent = ngtcp2_ksl_it_get(&it);
+ prev_in_pkt_num = rtbent->hd.pkt_num;
+ }
+ }
+ }
+
+ nwrite = conn_write_handshake(conn, pi, dest, destlen, write_datalen, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ res = nwrite;
+ dest += nwrite;
+ destlen -= (size_t)nwrite;
+
+ if (conn->in_pktns && write_datalen > 0) {
+ it = ngtcp2_rtb_head(&conn->in_pktns->rtb);
+ if (!ngtcp2_ksl_it_end(&it)) {
+ rtbent = ngtcp2_ksl_it_get(&it);
+ if (rtbent->hd.pkt_num != prev_in_pkt_num &&
+ (rtbent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) {
+ /* We have added padding already, but in that case, there
+ is no space left to write 1RTT packet. */
+ wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING;
+ }
+ }
+ }
+ }
+ if (conn->state != NGTCP2_CS_POST_HANDSHAKE &&
+ conn->pktns.crypto.tx.ckm == NULL) {
+ return res;
+ }
+ break;
+ case NGTCP2_CS_POST_HANDSHAKE:
+ if (!conn_pacing_pkt_tx_allowed(conn, ts)) {
+ assert(!ppe_pending);
+
+ if (conn->server &&
+ !(conn->dcid.current.flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) {
+ server_tx_left = conn_server_tx_left(conn, &conn->dcid.current);
+ if (server_tx_left == 0) {
+ return 0;
+ }
+
+ origlen = (size_t)ngtcp2_min((uint64_t)origlen, server_tx_left);
+ }
+
+ return conn_write_ack_pkt(conn, pi, dest, origlen, NGTCP2_PKT_1RTT, ts);
+ }
+
+ break;
+ case NGTCP2_CS_CLOSING:
+ return NGTCP2_ERR_CLOSING;
+ case NGTCP2_CS_DRAINING:
+ return NGTCP2_ERR_DRAINING;
+ default:
+ return 0;
+ }
+
+ assert(conn->pktns.crypto.tx.ckm);
+
+ if (conn_check_pkt_num_exhausted(conn)) {
+ return NGTCP2_ERR_PKT_NUM_EXHAUSTED;
+ }
+
+ if (vmsg) {
+ switch (vmsg->type) {
+ case NGTCP2_VMSG_TYPE_STREAM:
+ if (vmsg->stream.flags & NGTCP2_WRITE_STREAM_FLAG_MORE) {
+ wflags |= NGTCP2_WRITE_PKT_FLAG_MORE;
+ }
+ break;
+ case NGTCP2_VMSG_TYPE_DATAGRAM:
+ if (vmsg->datagram.flags & NGTCP2_WRITE_DATAGRAM_FLAG_MORE) {
+ wflags |= NGTCP2_WRITE_PKT_FLAG_MORE;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (ppe_pending) {
+ res = conn->pkt.hs_spktlen;
+ conn->pkt.hs_spktlen = 0;
+ if (conn->pkt.require_padding) {
+ wflags |= NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING;
+ }
+ /* dest and destlen have already been adjusted in ppe in the first
+ run. They are adjusted for probe packet later. */
+ nwrite = conn_write_pkt(conn, pi, dest, destlen, vmsg, NGTCP2_PKT_1RTT,
+ wflags, ts);
+ goto fin;
+ } else {
+ conn->pkt.require_padding =
+ (wflags & NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING);
+
+ if (conn->state == NGTCP2_CS_POST_HANDSHAKE) {
+ rv = conn_prepare_key_update(conn, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ if (!conn->pktns.rtb.probe_pkt_left && conn_cwnd_is_zero(conn)) {
+ destlen = 0;
+ } else {
+ if (res == 0) {
+ nwrite =
+ conn_write_path_response(conn, path, pi, dest, origdestlen, ts);
+ if (nwrite) {
+ goto fin;
+ }
+
+ if (conn->pv) {
+ nwrite =
+ conn_write_path_challenge(conn, path, pi, dest, origdestlen, ts);
+ if (nwrite) {
+ goto fin;
+ }
+ }
+
+ if (conn->pmtud &&
+ (conn->dcid.current.flags & NGTCP2_DCID_FLAG_PATH_VALIDATED) &&
+ (!conn->hs_pktns ||
+ ngtcp2_ksl_len(&conn->hs_pktns->crypto.tx.frq) == 0)) {
+ nwrite = conn_write_pmtud_probe(conn, pi, dest, origdestlen, ts);
+ if (nwrite) {
+ goto fin;
+ }
+ }
+ }
+ }
+
+ if (conn->server &&
+ !(conn->dcid.current.flags & NGTCP2_DCID_FLAG_PATH_VALIDATED)) {
+ server_tx_left = conn_server_tx_left(conn, &conn->dcid.current);
+ origlen = (size_t)ngtcp2_min((uint64_t)origlen, server_tx_left);
+ destlen = (size_t)ngtcp2_min((uint64_t)destlen, server_tx_left);
+
+ if (server_tx_left == 0 &&
+ conn->cstat.loss_detection_timer != UINT64_MAX) {
+ ngtcp2_log_info(
+ &conn->log, NGTCP2_LOG_EVENT_RCV,
+ "loss detection timer canceled due to amplification limit");
+ conn->cstat.loss_detection_timer = UINT64_MAX;
+ }
+ }
+ }
+
+ if (res == 0) {
+ if (conn_handshake_remnants_left(conn)) {
+ if (conn_handshake_probe_left(conn) ||
+ /* Allow exceeding CWND if an Handshake packet needs to be
+ sent in order to avoid dead lock. In some situation,
+ typically for client, 1 RTT packets may occupy in-flight
+ bytes (e.g., some large requests and PMTUD), and
+ Handshake packet loss shrinks CWND, and we may get in the
+ situation that we are unable to send Handshake packet. */
+ (conn->hs_pktns->rtb.num_pto_eliciting == 0 &&
+ ngtcp2_ksl_len(&conn->hs_pktns->crypto.tx.frq))) {
+ destlen = origlen;
+ }
+ nwrite = conn_write_handshake_pkts(conn, pi, dest, destlen,
+ /* write_datalen = */ 0, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+ if (nwrite > 0) {
+ res = nwrite;
+ dest += nwrite;
+ destlen -= (size_t)nwrite;
+ } else if (destlen == 0) {
+ res = conn_write_handshake_ack_pkts(conn, pi, dest, origlen, ts);
+ if (res) {
+ return res;
+ }
+ }
+ }
+ }
+
+ if (conn->pktns.rtb.probe_pkt_left) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "transmit probe pkt left=%zu",
+ conn->pktns.rtb.probe_pkt_left);
+
+ nwrite = conn_write_pkt(conn, pi, dest, destlen, vmsg, NGTCP2_PKT_1RTT,
+ wflags, ts);
+
+ goto fin;
+ }
+
+ nwrite = conn_write_pkt(conn, pi, dest, destlen, vmsg, NGTCP2_PKT_1RTT,
+ wflags, ts);
+ if (nwrite) {
+ assert(nwrite != NGTCP2_ERR_NOBUF);
+ goto fin;
+ }
+
+ if (res == 0) {
+ nwrite = conn_write_ack_pkt(conn, pi, dest, origlen, NGTCP2_PKT_1RTT, ts);
+ }
+
+fin:
+ conn->pkt.hs_spktlen = 0;
+
+ if (nwrite >= 0) {
+ res += nwrite;
+ return res;
+ }
+ /* NGTCP2_CONN_FLAG_PPE_PENDING is set in conn_write_pkt above.
+ ppe_pending cannot be used here. */
+ if (conn->flags & NGTCP2_CONN_FLAG_PPE_PENDING) {
+ conn->pkt.hs_spktlen = res;
+ }
+
+ return nwrite;
+}
+
+static ngtcp2_ssize
+conn_write_connection_close(ngtcp2_conn *conn, ngtcp2_pkt_info *pi,
+ uint8_t *dest, size_t destlen, uint8_t pkt_type,
+ uint64_t error_code, const uint8_t *reason,
+ size_t reasonlen, ngtcp2_tstamp ts) {
+ ngtcp2_pktns *in_pktns = conn->in_pktns;
+ ngtcp2_pktns *hs_pktns = conn->hs_pktns;
+ ngtcp2_ssize res = 0, nwrite;
+ ngtcp2_frame fr;
+ uint8_t flags = NGTCP2_WRITE_PKT_FLAG_NONE;
+
+ fr.type = NGTCP2_FRAME_CONNECTION_CLOSE;
+ fr.connection_close.error_code = error_code;
+ fr.connection_close.frame_type = 0;
+ fr.connection_close.reasonlen = reasonlen;
+ fr.connection_close.reason = (uint8_t *)reason;
+
+ if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED) &&
+ pkt_type != NGTCP2_PKT_INITIAL) {
+ if (in_pktns && conn->server) {
+ nwrite = ngtcp2_conn_write_single_frame_pkt(
+ conn, pi, dest, destlen, NGTCP2_PKT_INITIAL,
+ NGTCP2_WRITE_PKT_FLAG_NONE, &conn->dcid.current.cid, &fr,
+ NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ dest += nwrite;
+ destlen -= (size_t)nwrite;
+ res += nwrite;
+ }
+
+ if (pkt_type != NGTCP2_PKT_HANDSHAKE && hs_pktns &&
+ hs_pktns->crypto.tx.ckm) {
+ nwrite = ngtcp2_conn_write_single_frame_pkt(
+ conn, pi, dest, destlen, NGTCP2_PKT_HANDSHAKE,
+ NGTCP2_WRITE_PKT_FLAG_NONE, &conn->dcid.current.cid, &fr,
+ NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ dest += nwrite;
+ destlen -= (size_t)nwrite;
+ res += nwrite;
+ }
+ }
+
+ if (!conn->server && pkt_type == NGTCP2_PKT_INITIAL) {
+ flags = NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING;
+ }
+
+ nwrite = ngtcp2_conn_write_single_frame_pkt(
+ conn, pi, dest, destlen, pkt_type, flags, &conn->dcid.current.cid, &fr,
+ NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts);
+
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ res += nwrite;
+
+ if (res == 0) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ return res;
+}
+
+ngtcp2_ssize ngtcp2_conn_write_connection_close_pkt(
+ ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest,
+ size_t destlen, uint64_t error_code, const uint8_t *reason,
+ size_t reasonlen, ngtcp2_tstamp ts) {
+ ngtcp2_pktns *in_pktns = conn->in_pktns;
+ ngtcp2_pktns *hs_pktns = conn->hs_pktns;
+ uint8_t pkt_type;
+ ngtcp2_ssize nwrite;
+ uint64_t server_tx_left;
+
+ conn->log.last_ts = ts;
+ conn->qlog.last_ts = ts;
+
+ if (conn_check_pkt_num_exhausted(conn)) {
+ return NGTCP2_ERR_PKT_NUM_EXHAUSTED;
+ }
+
+ switch (conn->state) {
+ case NGTCP2_CS_CLIENT_INITIAL:
+ return NGTCP2_ERR_INVALID_STATE;
+ case NGTCP2_CS_CLOSING:
+ case NGTCP2_CS_DRAINING:
+ return 0;
+ default:
+ break;
+ }
+
+ if (path) {
+ ngtcp2_path_copy(path, &conn->dcid.current.ps.path);
+ }
+
+ destlen = conn_shape_udp_payload(conn, &conn->dcid.current, destlen);
+
+ if (pi) {
+ pi->ecn = NGTCP2_ECN_NOT_ECT;
+ }
+
+ if (conn->server) {
+ server_tx_left = conn_server_tx_left(conn, &conn->dcid.current);
+ destlen = (size_t)ngtcp2_min((uint64_t)destlen, server_tx_left);
+ }
+
+ if (conn->state == NGTCP2_CS_POST_HANDSHAKE ||
+ (conn->server && conn->pktns.crypto.tx.ckm)) {
+ pkt_type = NGTCP2_PKT_1RTT;
+ } else if (hs_pktns && hs_pktns->crypto.tx.ckm) {
+ pkt_type = NGTCP2_PKT_HANDSHAKE;
+ } else if (in_pktns && in_pktns->crypto.tx.ckm) {
+ pkt_type = NGTCP2_PKT_INITIAL;
+ } else {
+ /* This branch is taken if server has not read any Initial packet
+ from client. */
+ return NGTCP2_ERR_INVALID_STATE;
+ }
+
+ nwrite = conn_write_connection_close(conn, pi, dest, destlen, pkt_type,
+ error_code, reason, reasonlen, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ conn->state = NGTCP2_CS_CLOSING;
+
+ return nwrite;
+}
+
+ngtcp2_ssize ngtcp2_conn_write_application_close_pkt(
+ ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest,
+ size_t destlen, uint64_t app_error_code, const uint8_t *reason,
+ size_t reasonlen, ngtcp2_tstamp ts) {
+ ngtcp2_ssize nwrite;
+ ngtcp2_ssize res = 0;
+ ngtcp2_frame fr;
+ uint64_t server_tx_left;
+
+ conn->log.last_ts = ts;
+ conn->qlog.last_ts = ts;
+
+ if (conn_check_pkt_num_exhausted(conn)) {
+ return NGTCP2_ERR_PKT_NUM_EXHAUSTED;
+ }
+
+ switch (conn->state) {
+ case NGTCP2_CS_CLIENT_INITIAL:
+ return NGTCP2_ERR_INVALID_STATE;
+ case NGTCP2_CS_CLOSING:
+ case NGTCP2_CS_DRAINING:
+ return 0;
+ default:
+ break;
+ }
+
+ if (path) {
+ ngtcp2_path_copy(path, &conn->dcid.current.ps.path);
+ }
+
+ destlen = conn_shape_udp_payload(conn, &conn->dcid.current, destlen);
+
+ if (pi) {
+ pi->ecn = NGTCP2_ECN_NOT_ECT;
+ }
+
+ if (conn->server) {
+ server_tx_left = conn_server_tx_left(conn, &conn->dcid.current);
+ destlen = (size_t)ngtcp2_min((uint64_t)destlen, server_tx_left);
+ }
+
+ if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED)) {
+ nwrite = conn_write_connection_close(conn, pi, dest, destlen,
+ conn->hs_pktns->crypto.tx.ckm
+ ? NGTCP2_PKT_HANDSHAKE
+ : NGTCP2_PKT_INITIAL,
+ NGTCP2_APPLICATION_ERROR, NULL, 0, ts);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+ res = nwrite;
+ dest += nwrite;
+ destlen -= (size_t)nwrite;
+ }
+
+ if (conn->state != NGTCP2_CS_POST_HANDSHAKE) {
+ assert(res);
+
+ if (!conn->server || !conn->pktns.crypto.tx.ckm) {
+ return res;
+ }
+ }
+
+ assert(conn->pktns.crypto.tx.ckm);
+
+ fr.type = NGTCP2_FRAME_CONNECTION_CLOSE_APP;
+ fr.connection_close.error_code = app_error_code;
+ fr.connection_close.frame_type = 0;
+ fr.connection_close.reasonlen = reasonlen;
+ fr.connection_close.reason = (uint8_t *)reason;
+
+ nwrite = ngtcp2_conn_write_single_frame_pkt(
+ conn, pi, dest, destlen, NGTCP2_PKT_1RTT, NGTCP2_WRITE_PKT_FLAG_NONE,
+ &conn->dcid.current.cid, &fr, NGTCP2_RTB_ENTRY_FLAG_NONE, NULL, ts);
+
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ res += nwrite;
+
+ if (res == 0) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ conn->state = NGTCP2_CS_CLOSING;
+
+ return res;
+}
+
+static void
+connection_close_error_init(ngtcp2_connection_close_error *ccerr,
+ ngtcp2_connection_close_error_code_type type,
+ uint64_t error_code, const uint8_t *reason,
+ size_t reasonlen) {
+ ccerr->type = type;
+ ccerr->error_code = error_code;
+ ccerr->frame_type = 0;
+ ccerr->reason = (uint8_t *)reason;
+ ccerr->reasonlen = reasonlen;
+}
+
+void ngtcp2_connection_close_error_default(
+ ngtcp2_connection_close_error *ccerr) {
+ connection_close_error_init(ccerr,
+ NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT,
+ NGTCP2_NO_ERROR, NULL, 0);
+}
+
+void ngtcp2_connection_close_error_set_transport_error(
+ ngtcp2_connection_close_error *ccerr, uint64_t error_code,
+ const uint8_t *reason, size_t reasonlen) {
+ connection_close_error_init(ccerr,
+ NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT,
+ error_code, reason, reasonlen);
+}
+
+void ngtcp2_connection_close_error_set_transport_error_liberr(
+ ngtcp2_connection_close_error *ccerr, int liberr, const uint8_t *reason,
+ size_t reasonlen) {
+ switch (liberr) {
+ case NGTCP2_ERR_RECV_VERSION_NEGOTIATION:
+ connection_close_error_init(
+ ccerr,
+ NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_VERSION_NEGOTIATION,
+ NGTCP2_NO_ERROR, reason, reasonlen);
+
+ return;
+ case NGTCP2_ERR_IDLE_CLOSE:
+ connection_close_error_init(
+ ccerr, NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE,
+ NGTCP2_NO_ERROR, reason, reasonlen);
+
+ return;
+ };
+
+ ngtcp2_connection_close_error_set_transport_error(
+ ccerr, ngtcp2_err_infer_quic_transport_error_code(liberr), reason,
+ reasonlen);
+}
+
+void ngtcp2_connection_close_error_set_transport_error_tls_alert(
+ ngtcp2_connection_close_error *ccerr, uint8_t tls_alert,
+ const uint8_t *reason, size_t reasonlen) {
+ ngtcp2_connection_close_error_set_transport_error(
+ ccerr, NGTCP2_CRYPTO_ERROR | tls_alert, reason, reasonlen);
+}
+
+void ngtcp2_connection_close_error_set_application_error(
+ ngtcp2_connection_close_error *ccerr, uint64_t error_code,
+ const uint8_t *reason, size_t reasonlen) {
+ connection_close_error_init(
+ ccerr, NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_APPLICATION, error_code,
+ reason, reasonlen);
+}
+
+ngtcp2_ssize ngtcp2_conn_write_connection_close_versioned(
+ ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version,
+ ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen,
+ const ngtcp2_connection_close_error *ccerr, ngtcp2_tstamp ts) {
+ (void)pkt_info_version;
+
+ switch (ccerr->type) {
+ case NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT:
+ return ngtcp2_conn_write_connection_close_pkt(
+ conn, path, pi, dest, destlen, ccerr->error_code, ccerr->reason,
+ ccerr->reasonlen, ts);
+ case NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_APPLICATION:
+ return ngtcp2_conn_write_application_close_pkt(
+ conn, path, pi, dest, destlen, ccerr->error_code, ccerr->reason,
+ ccerr->reasonlen, ts);
+ default:
+ return 0;
+ }
+}
+
+int ngtcp2_conn_is_in_closing_period(ngtcp2_conn *conn) {
+ return conn->state == NGTCP2_CS_CLOSING;
+}
+
+int ngtcp2_conn_is_in_draining_period(ngtcp2_conn *conn) {
+ return conn->state == NGTCP2_CS_DRAINING;
+}
+
+int ngtcp2_conn_close_stream(ngtcp2_conn *conn, ngtcp2_strm *strm) {
+ int rv;
+
+ rv = conn_call_stream_close(conn, strm);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = ngtcp2_map_remove(&conn->strms, (ngtcp2_map_key_type)strm->stream_id);
+ if (rv != 0) {
+ assert(rv != NGTCP2_ERR_INVALID_ARGUMENT);
+ return rv;
+ }
+
+ if (ngtcp2_strm_is_tx_queued(strm)) {
+ ngtcp2_pq_remove(&conn->tx.strmq, &strm->pe);
+ if (!ngtcp2_strm_streamfrq_empty(strm)) {
+ assert(conn->tx.strmq_nretrans);
+ --conn->tx.strmq_nretrans;
+ }
+ }
+
+ ngtcp2_strm_free(strm);
+ ngtcp2_objalloc_strm_release(&conn->strm_objalloc, strm);
+
+ return 0;
+}
+
+int ngtcp2_conn_close_stream_if_shut_rdwr(ngtcp2_conn *conn,
+ ngtcp2_strm *strm) {
+ if ((strm->flags & NGTCP2_STRM_FLAG_SHUT_RDWR) ==
+ NGTCP2_STRM_FLAG_SHUT_RDWR &&
+ ((strm->flags & NGTCP2_STRM_FLAG_RECV_RST) ||
+ ngtcp2_strm_rx_offset(strm) == strm->rx.last_offset) &&
+ (((strm->flags & NGTCP2_STRM_FLAG_SENT_RST) &&
+ (strm->flags & NGTCP2_STRM_FLAG_RST_ACKED)) ||
+ ngtcp2_strm_is_all_tx_data_fin_acked(strm))) {
+ return ngtcp2_conn_close_stream(conn, strm);
+ }
+ return 0;
+}
+
+/*
+ * conn_shutdown_stream_write closes send stream with error code
+ * |app_error_code|. RESET_STREAM frame is scheduled.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int conn_shutdown_stream_write(ngtcp2_conn *conn, ngtcp2_strm *strm,
+ uint64_t app_error_code) {
+ ngtcp2_strm_set_app_error_code(strm, app_error_code);
+
+ if ((strm->flags & NGTCP2_STRM_FLAG_SENT_RST) ||
+ ngtcp2_strm_is_all_tx_data_fin_acked(strm)) {
+ return 0;
+ }
+
+ /* Set this flag so that we don't accidentally send DATA to this
+ stream. */
+ strm->flags |= NGTCP2_STRM_FLAG_SHUT_WR | NGTCP2_STRM_FLAG_SENT_RST;
+
+ ngtcp2_strm_streamfrq_clear(strm);
+
+ return conn_reset_stream(conn, strm, app_error_code);
+}
+
+/*
+ * conn_shutdown_stream_read closes read stream with error code
+ * |app_error_code|. STOP_SENDING frame is scheduled.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int conn_shutdown_stream_read(ngtcp2_conn *conn, ngtcp2_strm *strm,
+ uint64_t app_error_code) {
+ ngtcp2_strm_set_app_error_code(strm, app_error_code);
+
+ if (strm->flags & NGTCP2_STRM_FLAG_STOP_SENDING) {
+ return 0;
+ }
+ if ((strm->flags & NGTCP2_STRM_FLAG_SHUT_RD) &&
+ ngtcp2_strm_rx_offset(strm) == strm->rx.last_offset) {
+ return 0;
+ }
+
+ /* Extend connection flow control window for the amount of data
+ which are not passed to application. */
+ if (!(strm->flags &
+ (NGTCP2_STRM_FLAG_STOP_SENDING | NGTCP2_STRM_FLAG_RECV_RST))) {
+ ngtcp2_conn_extend_max_offset(conn, strm->rx.last_offset -
+ ngtcp2_strm_rx_offset(strm));
+ }
+
+ strm->flags |= NGTCP2_STRM_FLAG_STOP_SENDING;
+
+ return conn_stop_sending(conn, strm, app_error_code);
+}
+
+int ngtcp2_conn_shutdown_stream(ngtcp2_conn *conn, int64_t stream_id,
+ uint64_t app_error_code) {
+ int rv;
+ ngtcp2_strm *strm;
+
+ strm = ngtcp2_conn_find_stream(conn, stream_id);
+ if (strm == NULL) {
+ return 0;
+ }
+
+ if (bidi_stream(stream_id) || !conn_local_stream(conn, stream_id)) {
+ rv = conn_shutdown_stream_read(conn, strm, app_error_code);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ if (bidi_stream(stream_id) || conn_local_stream(conn, stream_id)) {
+ rv = conn_shutdown_stream_write(conn, strm, app_error_code);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ return 0;
+}
+
+int ngtcp2_conn_shutdown_stream_write(ngtcp2_conn *conn, int64_t stream_id,
+ uint64_t app_error_code) {
+ ngtcp2_strm *strm;
+
+ if (!bidi_stream(stream_id) && !conn_local_stream(conn, stream_id)) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ strm = ngtcp2_conn_find_stream(conn, stream_id);
+ if (strm == NULL) {
+ return 0;
+ }
+
+ return conn_shutdown_stream_write(conn, strm, app_error_code);
+}
+
+int ngtcp2_conn_shutdown_stream_read(ngtcp2_conn *conn, int64_t stream_id,
+ uint64_t app_error_code) {
+ ngtcp2_strm *strm;
+
+ if (!bidi_stream(stream_id) && conn_local_stream(conn, stream_id)) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ strm = ngtcp2_conn_find_stream(conn, stream_id);
+ if (strm == NULL) {
+ return 0;
+ }
+
+ return conn_shutdown_stream_read(conn, strm, app_error_code);
+}
+
+/*
+ * conn_extend_max_stream_offset extends stream level flow control
+ * window by |datalen| of the stream denoted by |strm|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int conn_extend_max_stream_offset(ngtcp2_conn *conn, ngtcp2_strm *strm,
+ uint64_t datalen) {
+ ngtcp2_strm *top;
+
+ if (datalen > NGTCP2_MAX_VARINT ||
+ strm->rx.unsent_max_offset > NGTCP2_MAX_VARINT - datalen) {
+ strm->rx.unsent_max_offset = NGTCP2_MAX_VARINT;
+ } else {
+ strm->rx.unsent_max_offset += datalen;
+ }
+
+ if (!(strm->flags &
+ (NGTCP2_STRM_FLAG_SHUT_RD | NGTCP2_STRM_FLAG_STOP_SENDING)) &&
+ !ngtcp2_strm_is_tx_queued(strm) &&
+ conn_should_send_max_stream_data(conn, strm)) {
+ if (!ngtcp2_pq_empty(&conn->tx.strmq)) {
+ top = ngtcp2_conn_tx_strmq_top(conn);
+ strm->cycle = top->cycle;
+ }
+ strm->cycle = conn_tx_strmq_first_cycle(conn);
+ return ngtcp2_conn_tx_strmq_push(conn, strm);
+ }
+
+ return 0;
+}
+
+int ngtcp2_conn_extend_max_stream_offset(ngtcp2_conn *conn, int64_t stream_id,
+ uint64_t datalen) {
+ ngtcp2_strm *strm;
+
+ strm = ngtcp2_conn_find_stream(conn, stream_id);
+ if (strm == NULL) {
+ return 0;
+ }
+
+ return conn_extend_max_stream_offset(conn, strm, datalen);
+}
+
+void ngtcp2_conn_extend_max_offset(ngtcp2_conn *conn, uint64_t datalen) {
+ if (NGTCP2_MAX_VARINT < datalen ||
+ conn->rx.unsent_max_offset > NGTCP2_MAX_VARINT - datalen) {
+ conn->rx.unsent_max_offset = NGTCP2_MAX_VARINT;
+ return;
+ }
+
+ conn->rx.unsent_max_offset += datalen;
+}
+
+void ngtcp2_conn_extend_max_streams_bidi(ngtcp2_conn *conn, size_t n) {
+ handle_max_remote_streams_extension(&conn->remote.bidi.unsent_max_streams, n);
+}
+
+void ngtcp2_conn_extend_max_streams_uni(ngtcp2_conn *conn, size_t n) {
+ handle_max_remote_streams_extension(&conn->remote.uni.unsent_max_streams, n);
+}
+
+const ngtcp2_cid *ngtcp2_conn_get_dcid(ngtcp2_conn *conn) {
+ return &conn->dcid.current.cid;
+}
+
+const ngtcp2_cid *ngtcp2_conn_get_client_initial_dcid(ngtcp2_conn *conn) {
+ return &conn->rcid;
+}
+
+uint32_t ngtcp2_conn_get_client_chosen_version(ngtcp2_conn *conn) {
+ return conn->client_chosen_version;
+}
+
+uint32_t ngtcp2_conn_get_negotiated_version(ngtcp2_conn *conn) {
+ return conn->negotiated_version;
+}
+
+static int delete_strms_pq_each(void *data, void *ptr) {
+ ngtcp2_conn *conn = ptr;
+ ngtcp2_strm *s = data;
+
+ if (ngtcp2_strm_is_tx_queued(s)) {
+ ngtcp2_pq_remove(&conn->tx.strmq, &s->pe);
+ if (!ngtcp2_strm_streamfrq_empty(s)) {
+ assert(conn->tx.strmq_nretrans);
+ --conn->tx.strmq_nretrans;
+ }
+ }
+
+ ngtcp2_strm_free(s);
+ ngtcp2_objalloc_strm_release(&conn->strm_objalloc, s);
+
+ return 0;
+}
+
+/*
+ * conn_discard_early_data_state discards any connection states which
+ * are altered by any operations during early data transfer.
+ */
+static void conn_discard_early_data_state(ngtcp2_conn *conn) {
+ ngtcp2_frame_chain **pfrc, *frc;
+
+ ngtcp2_rtb_remove_early_data(&conn->pktns.rtb, &conn->cstat);
+
+ ngtcp2_map_each_free(&conn->strms, delete_strms_pq_each, conn);
+ ngtcp2_map_clear(&conn->strms);
+
+ conn->tx.offset = 0;
+
+ conn->rx.unsent_max_offset = conn->rx.max_offset =
+ conn->local.transport_params.initial_max_data;
+
+ conn->remote.bidi.unsent_max_streams = conn->remote.bidi.max_streams =
+ conn->local.transport_params.initial_max_streams_bidi;
+
+ conn->remote.uni.unsent_max_streams = conn->remote.uni.max_streams =
+ conn->local.transport_params.initial_max_streams_uni;
+
+ if (conn->server) {
+ conn->local.bidi.next_stream_id = 1;
+ conn->local.uni.next_stream_id = 3;
+ } else {
+ conn->local.bidi.next_stream_id = 0;
+ conn->local.uni.next_stream_id = 2;
+ }
+
+ for (pfrc = &conn->pktns.tx.frq; *pfrc;) {
+ frc = *pfrc;
+ *pfrc = (*pfrc)->next;
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ }
+}
+
+int ngtcp2_conn_early_data_rejected(ngtcp2_conn *conn) {
+ if (conn->flags & NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED) {
+ return 0;
+ }
+
+ conn->flags |= NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED;
+
+ conn_discard_early_data_state(conn);
+
+ if (conn->callbacks.early_data_rejected) {
+ return conn->callbacks.early_data_rejected(conn, conn->user_data);
+ }
+
+ return 0;
+}
+
+int ngtcp2_conn_get_early_data_rejected(ngtcp2_conn *conn) {
+ return (conn->flags & NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED) != 0;
+}
+
+int ngtcp2_conn_update_rtt(ngtcp2_conn *conn, ngtcp2_duration rtt,
+ ngtcp2_duration ack_delay, ngtcp2_tstamp ts) {
+ ngtcp2_conn_stat *cstat = &conn->cstat;
+
+ if (cstat->min_rtt == UINT64_MAX) {
+ cstat->latest_rtt = rtt;
+ cstat->min_rtt = rtt;
+ cstat->smoothed_rtt = rtt;
+ cstat->rttvar = rtt / 2;
+ cstat->first_rtt_sample_ts = ts;
+ } else {
+ if (conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED) {
+ assert(conn->remote.transport_params);
+
+ ack_delay =
+ ngtcp2_min(ack_delay, conn->remote.transport_params->max_ack_delay);
+ } else if (ack_delay > 0 && rtt >= cstat->min_rtt &&
+ rtt < cstat->min_rtt + ack_delay) {
+ /* Ignore RTT sample if adjusting ack_delay causes the sample
+ less than min_rtt before handshake confirmation. */
+ ngtcp2_log_info(
+ &conn->log, NGTCP2_LOG_EVENT_RCV,
+ "ignore rtt sample because ack_delay is too large latest_rtt=%" PRIu64
+ " min_rtt=%" PRIu64 " ack_delay=%" PRIu64,
+ rtt / NGTCP2_MILLISECONDS, cstat->min_rtt / NGTCP2_MILLISECONDS,
+ ack_delay / NGTCP2_MILLISECONDS);
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ cstat->latest_rtt = rtt;
+ cstat->min_rtt = ngtcp2_min(cstat->min_rtt, rtt);
+
+ if (rtt >= cstat->min_rtt + ack_delay) {
+ rtt -= ack_delay;
+ }
+
+ cstat->rttvar = (cstat->rttvar * 3 + (cstat->smoothed_rtt < rtt
+ ? rtt - cstat->smoothed_rtt
+ : cstat->smoothed_rtt - rtt)) /
+ 4;
+ cstat->smoothed_rtt = (cstat->smoothed_rtt * 7 + rtt) / 8;
+ }
+
+ ngtcp2_log_info(
+ &conn->log, NGTCP2_LOG_EVENT_RCV,
+ "latest_rtt=%" PRIu64 " min_rtt=%" PRIu64 " smoothed_rtt=%" PRIu64
+ " rttvar=%" PRIu64 " ack_delay=%" PRIu64,
+ cstat->latest_rtt / NGTCP2_MILLISECONDS,
+ cstat->min_rtt / NGTCP2_MILLISECONDS,
+ cstat->smoothed_rtt / NGTCP2_MILLISECONDS,
+ cstat->rttvar / NGTCP2_MILLISECONDS, ack_delay / NGTCP2_MILLISECONDS);
+
+ return 0;
+}
+
+void ngtcp2_conn_get_conn_stat_versioned(ngtcp2_conn *conn,
+ int conn_stat_version,
+ ngtcp2_conn_stat *cstat) {
+ (void)conn_stat_version;
+
+ *cstat = conn->cstat;
+}
+
+static void conn_get_loss_time_and_pktns(ngtcp2_conn *conn,
+ ngtcp2_tstamp *ploss_time,
+ ngtcp2_pktns **ppktns) {
+ ngtcp2_pktns *const ns[] = {conn->hs_pktns, &conn->pktns};
+ ngtcp2_conn_stat *cstat = &conn->cstat;
+ ngtcp2_duration *loss_time = cstat->loss_time + 1;
+ ngtcp2_tstamp earliest_loss_time = cstat->loss_time[NGTCP2_PKTNS_ID_INITIAL];
+ ngtcp2_pktns *pktns = conn->in_pktns;
+ size_t i;
+
+ for (i = 0; i < ngtcp2_arraylen(ns); ++i) {
+ if (ns[i] == NULL || loss_time[i] >= earliest_loss_time) {
+ continue;
+ }
+
+ earliest_loss_time = loss_time[i];
+ pktns = ns[i];
+ }
+
+ if (ploss_time) {
+ *ploss_time = earliest_loss_time;
+ }
+ if (ppktns) {
+ *ppktns = pktns;
+ }
+}
+
+static ngtcp2_tstamp conn_get_earliest_pto_expiry(ngtcp2_conn *conn,
+ ngtcp2_tstamp ts) {
+ ngtcp2_pktns *ns[] = {conn->in_pktns, conn->hs_pktns, &conn->pktns};
+ size_t i;
+ ngtcp2_tstamp earliest_ts = UINT64_MAX, t;
+ ngtcp2_conn_stat *cstat = &conn->cstat;
+ ngtcp2_tstamp *times = cstat->last_tx_pkt_ts;
+ ngtcp2_duration duration =
+ compute_pto(cstat->smoothed_rtt, cstat->rttvar, /* max_ack_delay = */ 0) *
+ (1ULL << cstat->pto_count);
+
+ for (i = NGTCP2_PKTNS_ID_INITIAL; i < NGTCP2_PKTNS_ID_MAX; ++i) {
+ if (ns[i] == NULL || ns[i]->rtb.num_pto_eliciting == 0 ||
+ (times[i] == UINT64_MAX ||
+ (i == NGTCP2_PKTNS_ID_APPLICATION &&
+ !(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED)))) {
+ continue;
+ }
+
+ t = times[i] + duration;
+
+ if (i == NGTCP2_PKTNS_ID_APPLICATION) {
+ assert(conn->remote.transport_params);
+ t += conn->remote.transport_params->max_ack_delay *
+ (1ULL << cstat->pto_count);
+ }
+
+ if (t < earliest_ts) {
+ earliest_ts = t;
+ }
+ }
+
+ if (earliest_ts == UINT64_MAX) {
+ return ts + duration;
+ }
+
+ return earliest_ts;
+}
+
+void ngtcp2_conn_set_loss_detection_timer(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ ngtcp2_conn_stat *cstat = &conn->cstat;
+ ngtcp2_duration timeout;
+ ngtcp2_pktns *in_pktns = conn->in_pktns;
+ ngtcp2_pktns *hs_pktns = conn->hs_pktns;
+ ngtcp2_pktns *pktns = &conn->pktns;
+ ngtcp2_tstamp earliest_loss_time;
+
+ conn_get_loss_time_and_pktns(conn, &earliest_loss_time, NULL);
+
+ if (earliest_loss_time != UINT64_MAX) {
+ cstat->loss_detection_timer = earliest_loss_time;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_RCV,
+ "loss_detection_timer=%" PRIu64 " nonzero crypto loss time",
+ cstat->loss_detection_timer);
+ return;
+ }
+
+ if ((!in_pktns || in_pktns->rtb.num_pto_eliciting == 0) &&
+ (!hs_pktns || hs_pktns->rtb.num_pto_eliciting == 0) &&
+ (pktns->rtb.num_pto_eliciting == 0 ||
+ !(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED)) &&
+ (conn->server ||
+ (conn->flags & (NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED |
+ NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED)))) {
+ if (cstat->loss_detection_timer != UINT64_MAX) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_RCV,
+ "loss detection timer canceled");
+ cstat->loss_detection_timer = UINT64_MAX;
+ cstat->pto_count = 0;
+ }
+ return;
+ }
+
+ cstat->loss_detection_timer = conn_get_earliest_pto_expiry(conn, ts);
+
+ timeout =
+ cstat->loss_detection_timer > ts ? cstat->loss_detection_timer - ts : 0;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_RCV,
+ "loss_detection_timer=%" PRIu64 " timeout=%" PRIu64,
+ cstat->loss_detection_timer, timeout / NGTCP2_MILLISECONDS);
+}
+
+int ngtcp2_conn_on_loss_detection_timer(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ ngtcp2_conn_stat *cstat = &conn->cstat;
+ int rv;
+ ngtcp2_pktns *in_pktns = conn->in_pktns;
+ ngtcp2_pktns *hs_pktns = conn->hs_pktns;
+ ngtcp2_tstamp earliest_loss_time;
+ ngtcp2_pktns *loss_pktns = NULL;
+
+ conn->log.last_ts = ts;
+ conn->qlog.last_ts = ts;
+
+ switch (conn->state) {
+ case NGTCP2_CS_CLOSING:
+ case NGTCP2_CS_DRAINING:
+ cstat->loss_detection_timer = UINT64_MAX;
+ cstat->pto_count = 0;
+ return 0;
+ default:
+ break;
+ }
+
+ if (cstat->loss_detection_timer == UINT64_MAX) {
+ return 0;
+ }
+
+ conn_get_loss_time_and_pktns(conn, &earliest_loss_time, &loss_pktns);
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_RCV,
+ "loss detection timer fired");
+
+ if (earliest_loss_time != UINT64_MAX) {
+ assert(loss_pktns);
+
+ rv = ngtcp2_conn_detect_lost_pkt(conn, loss_pktns, cstat, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ ngtcp2_conn_set_loss_detection_timer(conn, ts);
+ return 0;
+ }
+
+ if (!conn->server && !conn_is_handshake_completed(conn)) {
+ if (hs_pktns->crypto.tx.ckm) {
+ hs_pktns->rtb.probe_pkt_left = 1;
+ } else {
+ in_pktns->rtb.probe_pkt_left = 1;
+ }
+ } else {
+ if (in_pktns && in_pktns->rtb.num_pto_eliciting) {
+ in_pktns->rtb.probe_pkt_left = 1;
+
+ assert(hs_pktns);
+
+ if (conn->server && hs_pktns->rtb.num_pto_eliciting) {
+ /* let server coalesce packets */
+ hs_pktns->rtb.probe_pkt_left = 1;
+ }
+ } else if (hs_pktns && hs_pktns->rtb.num_pto_eliciting) {
+ hs_pktns->rtb.probe_pkt_left = 1;
+ } else {
+ conn->pktns.rtb.probe_pkt_left = 2;
+ }
+ }
+
+ ++cstat->pto_count;
+
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_RCV, "pto_count=%zu",
+ cstat->pto_count);
+
+ ngtcp2_conn_set_loss_detection_timer(conn, ts);
+
+ return 0;
+}
+
+static int conn_buffer_crypto_data(ngtcp2_conn *conn, const uint8_t **pdata,
+ ngtcp2_pktns *pktns, const uint8_t *data,
+ size_t datalen) {
+ int rv;
+ ngtcp2_buf_chain **pbufchain = &pktns->crypto.tx.data;
+
+ if (*pbufchain) {
+ for (; (*pbufchain)->next; pbufchain = &(*pbufchain)->next)
+ ;
+
+ if (ngtcp2_buf_left(&(*pbufchain)->buf) < datalen) {
+ pbufchain = &(*pbufchain)->next;
+ }
+ }
+
+ if (!*pbufchain) {
+ rv = ngtcp2_buf_chain_new(pbufchain, ngtcp2_max(1024, datalen), conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ *pdata = (*pbufchain)->buf.last;
+ (*pbufchain)->buf.last = ngtcp2_cpymem((*pbufchain)->buf.last, data, datalen);
+
+ return 0;
+}
+
+int ngtcp2_conn_submit_crypto_data(ngtcp2_conn *conn,
+ ngtcp2_crypto_level crypto_level,
+ const uint8_t *data, const size_t datalen) {
+ ngtcp2_pktns *pktns;
+ ngtcp2_frame_chain *frc;
+ ngtcp2_crypto *fr;
+ int rv;
+
+ if (datalen == 0) {
+ return 0;
+ }
+
+ switch (crypto_level) {
+ case NGTCP2_CRYPTO_LEVEL_INITIAL:
+ assert(conn->in_pktns);
+ pktns = conn->in_pktns;
+ break;
+ case NGTCP2_CRYPTO_LEVEL_HANDSHAKE:
+ assert(conn->hs_pktns);
+ pktns = conn->hs_pktns;
+ break;
+ case NGTCP2_CRYPTO_LEVEL_APPLICATION:
+ pktns = &conn->pktns;
+ break;
+ default:
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ rv = conn_buffer_crypto_data(conn, &data, pktns, data, datalen);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = ngtcp2_frame_chain_objalloc_new(&frc, &conn->frc_objalloc);
+ if (rv != 0) {
+ return rv;
+ }
+
+ fr = &frc->fr.crypto;
+
+ fr->type = NGTCP2_FRAME_CRYPTO;
+ fr->offset = pktns->crypto.tx.offset;
+ fr->datacnt = 1;
+ fr->data[0].len = datalen;
+ fr->data[0].base = (uint8_t *)data;
+
+ rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL, &fr->offset, frc);
+ if (rv != 0) {
+ ngtcp2_frame_chain_objalloc_del(frc, &conn->frc_objalloc, conn->mem);
+ return rv;
+ }
+
+ pktns->crypto.strm.tx.offset += datalen;
+ pktns->crypto.tx.offset += datalen;
+
+ return 0;
+}
+
+int ngtcp2_conn_submit_new_token(ngtcp2_conn *conn, const uint8_t *token,
+ size_t tokenlen) {
+ int rv;
+ ngtcp2_frame_chain *nfrc;
+
+ assert(conn->server);
+ assert(token);
+ assert(tokenlen);
+
+ rv = ngtcp2_frame_chain_new_token_objalloc_new(
+ &nfrc, token, tokenlen, &conn->frc_objalloc, conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ nfrc->next = conn->pktns.tx.frq;
+ conn->pktns.tx.frq = nfrc;
+
+ return 0;
+}
+
+ngtcp2_strm *ngtcp2_conn_tx_strmq_top(ngtcp2_conn *conn) {
+ assert(!ngtcp2_pq_empty(&conn->tx.strmq));
+ return ngtcp2_struct_of(ngtcp2_pq_top(&conn->tx.strmq), ngtcp2_strm, pe);
+}
+
+void ngtcp2_conn_tx_strmq_pop(ngtcp2_conn *conn) {
+ ngtcp2_strm *strm = ngtcp2_conn_tx_strmq_top(conn);
+ assert(strm);
+ ngtcp2_pq_pop(&conn->tx.strmq);
+ strm->pe.index = NGTCP2_PQ_BAD_INDEX;
+}
+
+int ngtcp2_conn_tx_strmq_push(ngtcp2_conn *conn, ngtcp2_strm *strm) {
+ return ngtcp2_pq_push(&conn->tx.strmq, &strm->pe);
+}
+
+static int conn_has_uncommited_preferred_address_cid(ngtcp2_conn *conn) {
+ return conn->server &&
+ !(conn->flags & NGTCP2_CONN_FLAG_LOCAL_TRANSPORT_PARAMS_COMMITTED) &&
+ conn->oscid.datalen &&
+ conn->local.transport_params.preferred_address_present;
+}
+
+size_t ngtcp2_conn_get_num_scid(ngtcp2_conn *conn) {
+ return ngtcp2_ksl_len(&conn->scid.set) +
+ (size_t)conn_has_uncommited_preferred_address_cid(conn);
+}
+
+size_t ngtcp2_conn_get_scid(ngtcp2_conn *conn, ngtcp2_cid *dest) {
+ ngtcp2_cid *origdest = dest;
+ ngtcp2_ksl_it it;
+ ngtcp2_scid *scid;
+
+ for (it = ngtcp2_ksl_begin(&conn->scid.set); !ngtcp2_ksl_it_end(&it);
+ ngtcp2_ksl_it_next(&it)) {
+ scid = ngtcp2_ksl_it_get(&it);
+ *dest++ = scid->cid;
+ }
+
+ if (conn_has_uncommited_preferred_address_cid(conn)) {
+ *dest++ = conn->local.transport_params.preferred_address.cid;
+ }
+
+ return (size_t)(dest - origdest);
+}
+
+size_t ngtcp2_conn_get_num_active_dcid(ngtcp2_conn *conn) {
+ size_t n = 1; /* for conn->dcid.current */
+ ngtcp2_pv *pv = conn->pv;
+
+ if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED)) {
+ return 0;
+ }
+
+ if (pv) {
+ if (pv->dcid.seq != conn->dcid.current.seq) {
+ ++n;
+ }
+ if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) &&
+ pv->fallback_dcid.seq != conn->dcid.current.seq &&
+ pv->fallback_dcid.seq != pv->dcid.seq) {
+ ++n;
+ }
+ }
+
+ n += ngtcp2_ringbuf_len(&conn->dcid.retired.rb);
+
+ return n;
+}
+
+static void copy_dcid_to_cid_token(ngtcp2_cid_token *dest,
+ const ngtcp2_dcid *src) {
+ dest->seq = src->seq;
+ dest->cid = src->cid;
+ ngtcp2_path_storage_init2(&dest->ps, &src->ps.path);
+ if ((dest->token_present =
+ (src->flags & NGTCP2_DCID_FLAG_TOKEN_PRESENT) != 0)) {
+ memcpy(dest->token, src->token, NGTCP2_STATELESS_RESET_TOKENLEN);
+ }
+}
+
+size_t ngtcp2_conn_get_active_dcid(ngtcp2_conn *conn, ngtcp2_cid_token *dest) {
+ ngtcp2_pv *pv = conn->pv;
+ ngtcp2_cid_token *orig = dest;
+ ngtcp2_dcid *dcid;
+ size_t len, i;
+
+ if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED)) {
+ return 0;
+ }
+
+ copy_dcid_to_cid_token(dest, &conn->dcid.current);
+ ++dest;
+
+ if (pv) {
+ if (pv->dcid.seq != conn->dcid.current.seq) {
+ copy_dcid_to_cid_token(dest, &pv->dcid);
+ ++dest;
+ }
+ if ((pv->flags & NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE) &&
+ pv->fallback_dcid.seq != conn->dcid.current.seq &&
+ pv->fallback_dcid.seq != pv->dcid.seq) {
+ copy_dcid_to_cid_token(dest, &pv->fallback_dcid);
+ ++dest;
+ }
+ }
+
+ len = ngtcp2_ringbuf_len(&conn->dcid.retired.rb);
+ for (i = 0; i < len; ++i) {
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.retired.rb, i);
+ copy_dcid_to_cid_token(dest, dcid);
+ ++dest;
+ }
+
+ return (size_t)(dest - orig);
+}
+
+void ngtcp2_conn_set_local_addr(ngtcp2_conn *conn, const ngtcp2_addr *addr) {
+ ngtcp2_addr *dest = &conn->dcid.current.ps.path.local;
+
+ assert(addr->addrlen <=
+ (ngtcp2_socklen)sizeof(conn->dcid.current.ps.local_addrbuf));
+ ngtcp2_addr_copy(dest, addr);
+}
+
+void ngtcp2_conn_set_path_user_data(ngtcp2_conn *conn, void *path_user_data) {
+ conn->dcid.current.ps.path.user_data = path_user_data;
+}
+
+const ngtcp2_path *ngtcp2_conn_get_path(ngtcp2_conn *conn) {
+ return &conn->dcid.current.ps.path;
+}
+
+size_t ngtcp2_conn_get_max_tx_udp_payload_size(ngtcp2_conn *conn) {
+ return conn->local.settings.max_tx_udp_payload_size;
+}
+
+size_t ngtcp2_conn_get_path_max_tx_udp_payload_size(ngtcp2_conn *conn) {
+ if (conn->local.settings.no_tx_udp_payload_size_shaping) {
+ return ngtcp2_conn_get_max_tx_udp_payload_size(conn);
+ }
+
+ return conn->dcid.current.max_udp_payload_size;
+}
+
+static int conn_initiate_migration_precheck(ngtcp2_conn *conn,
+ const ngtcp2_addr *local_addr) {
+ if (!(conn->flags & NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED) ||
+ conn->remote.transport_params->disable_active_migration ||
+ conn->dcid.current.cid.datalen == 0 ||
+ (conn->pv && (conn->pv->flags & NGTCP2_PV_FLAG_PREFERRED_ADDR))) {
+ return NGTCP2_ERR_INVALID_STATE;
+ }
+
+ if (ngtcp2_ringbuf_len(&conn->dcid.unused.rb) == 0) {
+ return NGTCP2_ERR_CONN_ID_BLOCKED;
+ }
+
+ if (ngtcp2_addr_eq(&conn->dcid.current.ps.path.local, local_addr)) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ return 0;
+}
+
+int ngtcp2_conn_initiate_immediate_migration(ngtcp2_conn *conn,
+ const ngtcp2_path *path,
+ ngtcp2_tstamp ts) {
+ int rv;
+ ngtcp2_dcid *dcid;
+ ngtcp2_duration pto, initial_pto, timeout;
+ ngtcp2_pv *pv;
+
+ assert(!conn->server);
+
+ conn->log.last_ts = ts;
+ conn->qlog.last_ts = ts;
+
+ rv = conn_initiate_migration_precheck(conn, &path->local);
+ if (rv != 0) {
+ return rv;
+ }
+
+ ngtcp2_conn_stop_pmtud(conn);
+
+ if (conn->pv) {
+ rv = conn_abort_pv(conn, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ rv = conn_retire_dcid(conn, &conn->dcid.current, ts);
+ if (rv != 0) {
+ return rv;
+ }
+
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0);
+ ngtcp2_dcid_set_path(dcid, path);
+
+ ngtcp2_dcid_copy(&conn->dcid.current, dcid);
+ ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb);
+
+ conn_reset_congestion_state(conn, ts);
+ conn_reset_ecn_validation_state(conn);
+
+ pto = conn_compute_pto(conn, &conn->pktns);
+ initial_pto = conn_compute_initial_pto(conn, &conn->pktns);
+ timeout = 3 * ngtcp2_max(pto, initial_pto);
+
+ /* TODO It might be better to add a new flag which indicates that a
+ connection should be closed if this path validation failed. The
+ current design allows an application to continue, by migrating
+ into yet another path. */
+ rv = ngtcp2_pv_new(&pv, dcid, timeout, NGTCP2_PV_FLAG_NONE, &conn->log,
+ conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ conn->pv = pv;
+
+ return conn_call_activate_dcid(conn, &conn->dcid.current);
+}
+
+int ngtcp2_conn_initiate_migration(ngtcp2_conn *conn, const ngtcp2_path *path,
+ ngtcp2_tstamp ts) {
+ int rv;
+ ngtcp2_dcid *dcid;
+ ngtcp2_duration pto, initial_pto, timeout;
+ ngtcp2_pv *pv;
+
+ assert(!conn->server);
+
+ conn->log.last_ts = ts;
+ conn->qlog.last_ts = ts;
+
+ rv = conn_initiate_migration_precheck(conn, &path->local);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (conn->pv) {
+ rv = conn_abort_pv(conn, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ dcid = ngtcp2_ringbuf_get(&conn->dcid.unused.rb, 0);
+ ngtcp2_dcid_set_path(dcid, path);
+
+ pto = conn_compute_pto(conn, &conn->pktns);
+ initial_pto = conn_compute_initial_pto(conn, &conn->pktns);
+ timeout = 3 * ngtcp2_max(pto, initial_pto);
+
+ rv = ngtcp2_pv_new(&pv, dcid, timeout, NGTCP2_PV_FLAG_NONE, &conn->log,
+ conn->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ ngtcp2_ringbuf_pop_front(&conn->dcid.unused.rb);
+ conn->pv = pv;
+
+ return conn_call_activate_dcid(conn, &pv->dcid);
+}
+
+uint64_t ngtcp2_conn_get_max_local_streams_uni(ngtcp2_conn *conn) {
+ return conn->local.uni.max_streams;
+}
+
+uint64_t ngtcp2_conn_get_max_data_left(ngtcp2_conn *conn) {
+ return conn->tx.max_offset - conn->tx.offset;
+}
+
+uint64_t ngtcp2_conn_get_max_stream_data_left(ngtcp2_conn *conn,
+ int64_t stream_id) {
+ ngtcp2_strm *strm = ngtcp2_conn_find_stream(conn, stream_id);
+
+ if (strm == NULL) {
+ return 0;
+ }
+
+ return strm->tx.max_offset - strm->tx.offset;
+}
+
+uint64_t ngtcp2_conn_get_streams_bidi_left(ngtcp2_conn *conn) {
+ uint64_t n = ngtcp2_ord_stream_id(conn->local.bidi.next_stream_id);
+
+ return n > conn->local.bidi.max_streams
+ ? 0
+ : conn->local.bidi.max_streams - n + 1;
+}
+
+uint64_t ngtcp2_conn_get_streams_uni_left(ngtcp2_conn *conn) {
+ uint64_t n = ngtcp2_ord_stream_id(conn->local.uni.next_stream_id);
+
+ return n > conn->local.uni.max_streams ? 0
+ : conn->local.uni.max_streams - n + 1;
+}
+
+uint64_t ngtcp2_conn_get_cwnd_left(ngtcp2_conn *conn) {
+ uint64_t bytes_in_flight = conn->cstat.bytes_in_flight;
+ uint64_t cwnd = conn_get_cwnd(conn);
+
+ if (cwnd > bytes_in_flight) {
+ return cwnd - bytes_in_flight;
+ }
+
+ return 0;
+}
+
+ngtcp2_tstamp ngtcp2_conn_get_idle_expiry(ngtcp2_conn *conn) {
+ ngtcp2_duration trpto;
+ ngtcp2_duration idle_timeout;
+
+ /* TODO Remote max_idle_timeout becomes effective after handshake
+ completion. */
+
+ if (!conn_is_handshake_completed(conn) ||
+ conn->remote.transport_params->max_idle_timeout == 0 ||
+ (conn->local.transport_params.max_idle_timeout &&
+ conn->local.transport_params.max_idle_timeout <
+ conn->remote.transport_params->max_idle_timeout)) {
+ idle_timeout = conn->local.transport_params.max_idle_timeout;
+ } else {
+ idle_timeout = conn->remote.transport_params->max_idle_timeout;
+ }
+
+ if (idle_timeout == 0) {
+ return UINT64_MAX;
+ }
+
+ trpto = 3 * conn_compute_pto(conn, conn_is_handshake_completed(conn)
+ ? &conn->pktns
+ : conn->hs_pktns);
+
+ return conn->idle_ts + ngtcp2_max(idle_timeout, trpto);
+}
+
+ngtcp2_duration ngtcp2_conn_get_pto(ngtcp2_conn *conn) {
+ return conn_compute_pto(
+ conn, conn_is_handshake_completed(conn) ? &conn->pktns : conn->hs_pktns);
+}
+
+void ngtcp2_conn_set_initial_crypto_ctx(ngtcp2_conn *conn,
+ const ngtcp2_crypto_ctx *ctx) {
+ assert(conn->in_pktns);
+ conn->in_pktns->crypto.ctx = *ctx;
+}
+
+const ngtcp2_crypto_ctx *ngtcp2_conn_get_initial_crypto_ctx(ngtcp2_conn *conn) {
+ assert(conn->in_pktns);
+ return &conn->in_pktns->crypto.ctx;
+}
+
+void ngtcp2_conn_set_retry_aead(ngtcp2_conn *conn,
+ const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx) {
+ assert(!conn->crypto.retry_aead_ctx.native_handle);
+
+ conn->crypto.retry_aead = *aead;
+ conn->crypto.retry_aead_ctx = *aead_ctx;
+}
+
+void ngtcp2_conn_set_crypto_ctx(ngtcp2_conn *conn,
+ const ngtcp2_crypto_ctx *ctx) {
+ assert(conn->hs_pktns);
+ conn->hs_pktns->crypto.ctx = *ctx;
+ conn->pktns.crypto.ctx = *ctx;
+}
+
+const ngtcp2_crypto_ctx *ngtcp2_conn_get_crypto_ctx(ngtcp2_conn *conn) {
+ return &conn->pktns.crypto.ctx;
+}
+
+void ngtcp2_conn_set_early_crypto_ctx(ngtcp2_conn *conn,
+ const ngtcp2_crypto_ctx *ctx) {
+ conn->early.ctx = *ctx;
+}
+
+const ngtcp2_crypto_ctx *ngtcp2_conn_get_early_crypto_ctx(ngtcp2_conn *conn) {
+ return &conn->early.ctx;
+}
+
+void *ngtcp2_conn_get_tls_native_handle(ngtcp2_conn *conn) {
+ return conn->crypto.tls_native_handle;
+}
+
+void ngtcp2_conn_set_tls_native_handle(ngtcp2_conn *conn,
+ void *tls_native_handle) {
+ conn->crypto.tls_native_handle = tls_native_handle;
+}
+
+void ngtcp2_conn_get_connection_close_error(
+ ngtcp2_conn *conn, ngtcp2_connection_close_error *ccerr) {
+ *ccerr = conn->rx.ccerr;
+}
+
+void ngtcp2_conn_set_tls_error(ngtcp2_conn *conn, int liberr) {
+ conn->crypto.tls_error = liberr;
+}
+
+int ngtcp2_conn_get_tls_error(ngtcp2_conn *conn) {
+ return conn->crypto.tls_error;
+}
+
+void ngtcp2_conn_set_tls_alert(ngtcp2_conn *conn, uint8_t alert) {
+ conn->crypto.tls_alert = alert;
+}
+
+uint8_t ngtcp2_conn_get_tls_alert(ngtcp2_conn *conn) {
+ return conn->crypto.tls_alert;
+}
+
+int ngtcp2_conn_is_local_stream(ngtcp2_conn *conn, int64_t stream_id) {
+ return conn_local_stream(conn, stream_id);
+}
+
+int ngtcp2_conn_is_server(ngtcp2_conn *conn) { return conn->server; }
+
+int ngtcp2_conn_after_retry(ngtcp2_conn *conn) {
+ return (conn->flags & NGTCP2_CONN_FLAG_RECV_RETRY) != 0;
+}
+
+int ngtcp2_conn_set_stream_user_data(ngtcp2_conn *conn, int64_t stream_id,
+ void *stream_user_data) {
+ ngtcp2_strm *strm = ngtcp2_conn_find_stream(conn, stream_id);
+
+ if (strm == NULL) {
+ return NGTCP2_ERR_STREAM_NOT_FOUND;
+ }
+
+ strm->stream_user_data = stream_user_data;
+
+ return 0;
+}
+
+void ngtcp2_conn_update_pkt_tx_time(ngtcp2_conn *conn, ngtcp2_tstamp ts) {
+ double pacing_rate;
+ ngtcp2_duration interval;
+
+ if (conn->tx.pacing.pktlen == 0) {
+ return;
+ }
+
+ if (conn->cstat.pacing_rate > 0) {
+ pacing_rate = conn->cstat.pacing_rate;
+ } else {
+ /* 1.25 is the under-utilization avoidance factor described in
+ https://datatracker.ietf.org/doc/html/rfc9002#section-7.7 */
+ pacing_rate =
+ (double)conn->cstat.cwnd / (double)conn->cstat.smoothed_rtt * 1.25;
+ }
+
+ interval = (ngtcp2_duration)((double)conn->tx.pacing.pktlen / pacing_rate);
+
+ conn->tx.pacing.next_ts = ts + interval;
+ conn->tx.pacing.pktlen = 0;
+}
+
+size_t ngtcp2_conn_get_send_quantum(ngtcp2_conn *conn) {
+ return conn->cstat.send_quantum;
+}
+
+int ngtcp2_conn_track_retired_dcid_seq(ngtcp2_conn *conn, uint64_t seq) {
+ size_t i;
+
+ if (conn->dcid.retire_unacked.len >=
+ ngtcp2_arraylen(conn->dcid.retire_unacked.seqs)) {
+ return NGTCP2_ERR_CONNECTION_ID_LIMIT;
+ }
+
+ /* Make sure that we do not have a duplicate */
+ for (i = 0; i < conn->dcid.retire_unacked.len; ++i) {
+ if (conn->dcid.retire_unacked.seqs[i] == seq) {
+ ngtcp2_unreachable();
+ }
+ }
+
+ conn->dcid.retire_unacked.seqs[conn->dcid.retire_unacked.len++] = seq;
+
+ return 0;
+}
+
+void ngtcp2_conn_untrack_retired_dcid_seq(ngtcp2_conn *conn, uint64_t seq) {
+ size_t i;
+
+ for (i = 0; i < conn->dcid.retire_unacked.len; ++i) {
+ if (conn->dcid.retire_unacked.seqs[i] != seq) {
+ continue;
+ }
+
+ if (i != conn->dcid.retire_unacked.len - 1) {
+ conn->dcid.retire_unacked.seqs[i] =
+ conn->dcid.retire_unacked.seqs[conn->dcid.retire_unacked.len - 1];
+ }
+
+ --conn->dcid.retire_unacked.len;
+
+ return;
+ }
+}
+
+size_t ngtcp2_conn_get_stream_loss_count(ngtcp2_conn *conn, int64_t stream_id) {
+ ngtcp2_strm *strm = ngtcp2_conn_find_stream(conn, stream_id);
+
+ if (strm == NULL) {
+ return 0;
+ }
+
+ return strm->tx.loss_count;
+}
+
+void ngtcp2_path_challenge_entry_init(ngtcp2_path_challenge_entry *pcent,
+ const ngtcp2_path *path,
+ const uint8_t *data) {
+ ngtcp2_path_storage_init2(&pcent->ps, path);
+ memcpy(pcent->data, data, sizeof(pcent->data));
+}
+
+void ngtcp2_settings_default_versioned(int settings_version,
+ ngtcp2_settings *settings) {
+ (void)settings_version;
+
+ memset(settings, 0, sizeof(*settings));
+ settings->cc_algo = NGTCP2_CC_ALGO_CUBIC;
+ settings->initial_rtt = NGTCP2_DEFAULT_INITIAL_RTT;
+ settings->ack_thresh = 2;
+ settings->max_tx_udp_payload_size = 1500 - 48;
+ settings->handshake_timeout = NGTCP2_DEFAULT_HANDSHAKE_TIMEOUT;
+}
+
+void ngtcp2_transport_params_default_versioned(
+ int transport_params_version, ngtcp2_transport_params *params) {
+ (void)transport_params_version;
+
+ memset(params, 0, sizeof(*params));
+ params->max_udp_payload_size = NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE;
+ params->ack_delay_exponent = NGTCP2_DEFAULT_ACK_DELAY_EXPONENT;
+ params->max_ack_delay = NGTCP2_DEFAULT_MAX_ACK_DELAY;
+ params->active_connection_id_limit =
+ NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT;
+}
+
+/* The functions prefixed with ngtcp2_pkt_ are usually put inside
+ ngtcp2_pkt.c. This function uses encryption construct and uses
+ test data defined only in ngtcp2_conn_test.c, so it is written
+ here. */
+ngtcp2_ssize ngtcp2_pkt_write_connection_close(
+ uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid,
+ const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason,
+ size_t reasonlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv,
+ ngtcp2_hp_mask hp_mask, const ngtcp2_crypto_cipher *hp,
+ const ngtcp2_crypto_cipher_ctx *hp_ctx) {
+ ngtcp2_pkt_hd hd;
+ ngtcp2_crypto_km ckm;
+ ngtcp2_crypto_cc cc;
+ ngtcp2_ppe ppe;
+ ngtcp2_frame fr = {0};
+ int rv;
+
+ ngtcp2_pkt_hd_init(&hd, NGTCP2_PKT_FLAG_LONG_FORM, NGTCP2_PKT_INITIAL, dcid,
+ scid, /* pkt_num = */ 0, /* pkt_numlen = */ 1, version,
+ /* len = */ 0);
+
+ ngtcp2_vec_init(&ckm.secret, NULL, 0);
+ ngtcp2_vec_init(&ckm.iv, iv, 12);
+ ckm.aead_ctx = *aead_ctx;
+ ckm.pkt_num = 0;
+ ckm.flags = NGTCP2_CRYPTO_KM_FLAG_NONE;
+
+ cc.aead = *aead;
+ cc.hp = *hp;
+ cc.ckm = &ckm;
+ cc.hp_ctx = *hp_ctx;
+ cc.encrypt = encrypt;
+ cc.hp_mask = hp_mask;
+
+ ngtcp2_ppe_init(&ppe, dest, destlen, &cc);
+
+ rv = ngtcp2_ppe_encode_hd(&ppe, &hd);
+ if (rv != 0) {
+ assert(NGTCP2_ERR_NOBUF == rv);
+ return rv;
+ }
+
+ if (!ngtcp2_ppe_ensure_hp_sample(&ppe)) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ fr.type = NGTCP2_FRAME_CONNECTION_CLOSE;
+ fr.connection_close.error_code = error_code;
+ fr.connection_close.reasonlen = reasonlen;
+ fr.connection_close.reason = (uint8_t *)reason;
+
+ rv = ngtcp2_ppe_encode_frame(&ppe, &fr);
+ if (rv != 0) {
+ assert(NGTCP2_ERR_NOBUF == rv);
+ return rv;
+ }
+
+ return ngtcp2_ppe_final(&ppe, NULL);
+}
+
+int ngtcp2_is_bidi_stream(int64_t stream_id) { return bidi_stream(stream_id); }
+
+uint32_t ngtcp2_select_version(const uint32_t *preferred_versions,
+ size_t preferred_versionslen,
+ const uint32_t *offered_versions,
+ size_t offered_versionslen) {
+ size_t i, j;
+
+ if (!preferred_versionslen || !offered_versionslen) {
+ return 0;
+ }
+
+ for (i = 0; i < preferred_versionslen; ++i) {
+ assert(ngtcp2_is_supported_version(preferred_versions[i]));
+
+ for (j = 0; j < offered_versionslen; ++j) {
+ if (preferred_versions[i] == offered_versions[j]) {
+ return preferred_versions[i];
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.h
new file mode 100644
index 0000000..aace7d9
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conn.h
@@ -0,0 +1,1115 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_CONN_H
+#define NGTCP2_CONN_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_mem.h"
+#include "ngtcp2_crypto.h"
+#include "ngtcp2_acktr.h"
+#include "ngtcp2_rtb.h"
+#include "ngtcp2_strm.h"
+#include "ngtcp2_idtr.h"
+#include "ngtcp2_str.h"
+#include "ngtcp2_pkt.h"
+#include "ngtcp2_log.h"
+#include "ngtcp2_pq.h"
+#include "ngtcp2_cc.h"
+#include "ngtcp2_bbr.h"
+#include "ngtcp2_bbr2.h"
+#include "ngtcp2_pv.h"
+#include "ngtcp2_pmtud.h"
+#include "ngtcp2_cid.h"
+#include "ngtcp2_buf.h"
+#include "ngtcp2_ppe.h"
+#include "ngtcp2_qlog.h"
+#include "ngtcp2_rst.h"
+
+typedef enum {
+ /* Client specific handshake states */
+ NGTCP2_CS_CLIENT_INITIAL,
+ NGTCP2_CS_CLIENT_WAIT_HANDSHAKE,
+ NGTCP2_CS_CLIENT_TLS_HANDSHAKE_FAILED,
+ /* Server specific handshake states */
+ NGTCP2_CS_SERVER_INITIAL,
+ NGTCP2_CS_SERVER_WAIT_HANDSHAKE,
+ NGTCP2_CS_SERVER_TLS_HANDSHAKE_FAILED,
+ /* Shared by both client and server */
+ NGTCP2_CS_POST_HANDSHAKE,
+ NGTCP2_CS_CLOSING,
+ NGTCP2_CS_DRAINING,
+} ngtcp2_conn_state;
+
+/* NGTCP2_MAX_STREAMS is the maximum number of streams. */
+#define NGTCP2_MAX_STREAMS (1LL << 60)
+
+/* NGTCP2_MAX_NUM_BUFFED_RX_PKTS is the maximum number of buffered
+ reordered packets. */
+#define NGTCP2_MAX_NUM_BUFFED_RX_PKTS 4
+
+/* NGTCP2_MAX_REORDERED_CRYPTO_DATA is the maximum offset of crypto
+ data which is not continuous. In other words, there is a gap of
+ unreceived data. */
+#define NGTCP2_MAX_REORDERED_CRYPTO_DATA 65536
+
+/* NGTCP2_MAX_RX_INITIAL_CRYPTO_DATA is the maximum offset of received
+ crypto stream in Initial packet. We set this hard limit here
+ because crypto stream is unbounded. */
+#define NGTCP2_MAX_RX_INITIAL_CRYPTO_DATA 65536
+/* NGTCP2_MAX_RX_HANDSHAKE_CRYPTO_DATA is the maximum offset of
+ received crypto stream in Handshake packet. We set this hard limit
+ here because crypto stream is unbounded. */
+#define NGTCP2_MAX_RX_HANDSHAKE_CRYPTO_DATA 65536
+
+/* NGTCP2_MAX_RETRIES is the number of Retry packet which client can
+ accept. */
+#define NGTCP2_MAX_RETRIES 3
+
+/* NGTCP2_MAX_BOUND_DCID_POOL_SIZE is the maximum number of
+ destination connection ID which have been bound to a particular
+ path, but not yet used as primary path and path validation is not
+ performed from the local endpoint. */
+#define NGTCP2_MAX_BOUND_DCID_POOL_SIZE 4
+/* NGTCP2_MAX_DCID_POOL_SIZE is the maximum number of destination
+ connection ID the remote endpoint provides to store. It must be
+ the power of 2. */
+#define NGTCP2_MAX_DCID_POOL_SIZE 8
+/* NGTCP2_MAX_DCID_RETIRED_SIZE is the maximum number of retired DCID
+ kept to catch in-flight packet on retired path. */
+#define NGTCP2_MAX_DCID_RETIRED_SIZE 2
+/* NGTCP2_MAX_SCID_POOL_SIZE is the maximum number of source
+ connection ID the local endpoint provides to the remote endpoint.
+ The chosen value was described in old draft. Now a remote endpoint
+ tells the maximum value. The value can be quite large, and we have
+ to put the sane limit.*/
+#define NGTCP2_MAX_SCID_POOL_SIZE 8
+
+/* NGTCP2_MAX_NON_ACK_TX_PKT is the maximum number of continuous non
+ ACK-eliciting packets. */
+#define NGTCP2_MAX_NON_ACK_TX_PKT 3
+
+/* NGTCP2_ECN_MAX_NUM_VALIDATION_PKTS is the maximum number of ECN marked
+ packets sent in NGTCP2_ECN_STATE_TESTING period. */
+#define NGTCP2_ECN_MAX_NUM_VALIDATION_PKTS 10
+
+/* NGTCP2_CONNECTION_CLOSE_ERROR_MAX_REASONLEN is the maximum length
+ of reason phrase to remember. If the received reason phrase is
+ longer than this value, it is truncated. */
+#define NGTCP2_CONNECTION_CLOSE_ERROR_MAX_REASONLEN 1024
+
+/* NGTCP2_WRITE_PKT_FLAG_NONE indicates that no flag is set. */
+#define NGTCP2_WRITE_PKT_FLAG_NONE 0x00u
+/* NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING indicates that packet other
+ than Initial packet should be padded. Initial packet might be
+ padded based on QUIC requirement regardless of this flag. */
+#define NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING 0x01u
+/* NGTCP2_WRITE_PKT_FLAG_MORE indicates that more frames might come
+ and it should be encoded into the current packet. */
+#define NGTCP2_WRITE_PKT_FLAG_MORE 0x02u
+
+/*
+ * ngtcp2_max_frame is defined so that it covers the largest ACK
+ * frame.
+ */
+typedef union ngtcp2_max_frame {
+ ngtcp2_frame fr;
+ struct {
+ ngtcp2_ack ack;
+ /* ack includes 1 ngtcp2_ack_range. */
+ ngtcp2_ack_range ranges[NGTCP2_MAX_ACK_RANGES - 1];
+ } ackfr;
+} ngtcp2_max_frame;
+
+typedef struct ngtcp2_path_challenge_entry {
+ ngtcp2_path_storage ps;
+ uint8_t data[8];
+} ngtcp2_path_challenge_entry;
+
+void ngtcp2_path_challenge_entry_init(ngtcp2_path_challenge_entry *pcent,
+ const ngtcp2_path *path,
+ const uint8_t *data);
+
+/* NGTCP2_CONN_FLAG_NONE indicates that no flag is set. */
+#define NGTCP2_CONN_FLAG_NONE 0x00u
+/* NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED is set when TLS stack declares
+ that TLS handshake has completed. The condition of this
+ declaration varies between TLS implementations and this flag does
+ not indicate the completion of QUIC handshake. Some
+ implementations declare TLS handshake completion as server when
+ they write off Server Finished and before deriving application rx
+ secret. */
+#define NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED 0x01u
+/* NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED is set if connection ID is
+ negotiated. This is only used for client. */
+#define NGTCP2_CONN_FLAG_CONN_ID_NEGOTIATED 0x02u
+/* NGTCP2_CONN_FLAG_TRANSPORT_PARAM_RECVED is set if transport
+ parameters are received. */
+#define NGTCP2_CONN_FLAG_TRANSPORT_PARAM_RECVED 0x04u
+/* NGTCP2_CONN_FLAG_LOCAL_TRANSPORT_PARAMS_COMMITTED is set when a
+ local transport parameters are applied. */
+#define NGTCP2_CONN_FLAG_LOCAL_TRANSPORT_PARAMS_COMMITTED 0x08u
+/* NGTCP2_CONN_FLAG_RECV_RETRY is set when a client receives Retry
+ packet. */
+#define NGTCP2_CONN_FLAG_RECV_RETRY 0x10u
+/* NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED is set when 0-RTT packet is
+ rejected by a peer. */
+#define NGTCP2_CONN_FLAG_EARLY_DATA_REJECTED 0x20u
+/* NGTCP2_CONN_FLAG_KEEP_ALIVE_CANCELLED is set when the expired
+ keep-alive timer has been cancelled. */
+#define NGTCP2_CONN_FLAG_KEEP_ALIVE_CANCELLED 0x40u
+/* NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED is set when an endpoint
+ confirmed completion of handshake. */
+#define NGTCP2_CONN_FLAG_HANDSHAKE_CONFIRMED 0x80u
+/* NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED is set when the
+ library transitions its state to "post handshake". */
+#define NGTCP2_CONN_FLAG_HANDSHAKE_COMPLETED_HANDLED 0x0100u
+/* NGTCP2_CONN_FLAG_HANDSHAKE_EARLY_RETRANSMIT is set when the early
+ handshake retransmission has done when server receives overlapping
+ Initial crypto data. */
+#define NGTCP2_CONN_FLAG_HANDSHAKE_EARLY_RETRANSMIT 0x0200u
+/* NGTCP2_CONN_FLAG_CLEAR_FIXED_BIT indicates that the local endpoint
+ sends a QUIC packet without Fixed Bit set if a remote endpoint
+ supports Greasing QUIC Bit extension. */
+#define NGTCP2_CONN_FLAG_CLEAR_FIXED_BIT 0x0400u
+/* NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED is set when key update is
+ not confirmed by the local endpoint. That is, it has not received
+ ACK frame which acknowledges packet which is encrypted with new
+ key. */
+#define NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED 0x0800u
+/* NGTCP2_CONN_FLAG_PPE_PENDING is set when
+ NGTCP2_WRITE_STREAM_FLAG_MORE is used and the intermediate state of
+ ngtcp2_ppe is stored in pkt struct of ngtcp2_conn. */
+#define NGTCP2_CONN_FLAG_PPE_PENDING 0x1000u
+/* NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE is set when idle timer
+ should be restarted on next write. */
+#define NGTCP2_CONN_FLAG_RESTART_IDLE_TIMER_ON_WRITE 0x2000u
+/* NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED indicates that server as peer
+ verified client address. This flag is only used by client. */
+#define NGTCP2_CONN_FLAG_SERVER_ADDR_VERIFIED 0x4000u
+/* NGTCP2_CONN_FLAG_EARLY_KEY_INSTALLED indicates that an early key is
+ installed. conn->early.ckm cannot be used for this purpose because
+ it might be discarded when a certain condition is met. */
+#define NGTCP2_CONN_FLAG_EARLY_KEY_INSTALLED 0x8000u
+/* NGTCP2_CONN_FLAG_KEY_UPDATE_INITIATOR is set when the local
+ endpoint has initiated key update. */
+#define NGTCP2_CONN_FLAG_KEY_UPDATE_INITIATOR 0x10000u
+
+typedef struct ngtcp2_crypto_data {
+ ngtcp2_buf buf;
+ /* pkt_type is the type of packet to send data in buf. If it is 0,
+ it must be sent in Short packet. Otherwise, it is sent the long
+ packet type denoted by pkt_type. */
+ uint8_t pkt_type;
+} ngtcp2_crypto_data;
+
+typedef struct ngtcp2_pktns {
+ struct {
+ /* last_pkt_num is the packet number which the local endpoint sent
+ last time.*/
+ int64_t last_pkt_num;
+ ngtcp2_frame_chain *frq;
+ /* num_non_ack_pkt is the number of continuous non ACK-eliciting
+ packets. */
+ size_t num_non_ack_pkt;
+
+ struct {
+ /* ect0 is the number of QUIC packets, not UDP datagram, which
+ are sent in UDP datagram with ECT0 marking. */
+ size_t ect0;
+ /* start_pkt_num is the lowest packet number that are sent
+ during ECN validation period. */
+ int64_t start_pkt_num;
+ /* validation_pkt_sent is the number of QUIC packets sent during
+ validation period. */
+ size_t validation_pkt_sent;
+ /* validation_pkt_lost is the number of QUIC packets lost during
+ validation period. */
+ size_t validation_pkt_lost;
+ } ecn;
+ } tx;
+
+ struct {
+ /* pngap tracks received packet number in order to suppress
+ duplicated packet number. */
+ ngtcp2_gaptr pngap;
+ /* max_pkt_num is the largest packet number received so far. */
+ int64_t max_pkt_num;
+ /* max_pkt_ts is the timestamp when max_pkt_num packet is
+ received. */
+ ngtcp2_tstamp max_pkt_ts;
+ /* max_ack_eliciting_pkt_num is the largest ack-eliciting packet
+ number received so far. */
+ int64_t max_ack_eliciting_pkt_num;
+ /*
+ * buffed_pkts is buffered packets which cannot be decrypted with
+ * the current encryption level.
+ *
+ * In server Initial encryption level, 0-RTT packet may be buffered.
+ * In server Handshake encryption level, Short packet may be buffered.
+ *
+ * In client Initial encryption level, Handshake or Short packet may
+ * be buffered. In client Handshake encryption level, Short packet
+ * may be buffered.
+ *
+ * - 0-RTT packet is only buffered in server Initial encryption
+ * level ngtcp2_pktns.
+ *
+ * - Handshake packet is only buffered in client Handshake
+ * encryption level ngtcp2_pktns.
+ *
+ * - Short packet is only buffered in Short encryption level
+ * ngtcp2_pktns.
+ */
+ ngtcp2_pkt_chain *buffed_pkts;
+
+ struct {
+ /* ect0, ect1, and ce are the number of QUIC packets received
+ with those markings. */
+ size_t ect0;
+ size_t ect1;
+ size_t ce;
+ struct {
+ /* ect0, ect1, ce are the ECN counts received in the latest
+ ACK frame. */
+ uint64_t ect0;
+ uint64_t ect1;
+ uint64_t ce;
+ } ack;
+ } ecn;
+ } rx;
+
+ struct {
+ struct {
+ /* frq contains crypto data sorted by their offset. */
+ ngtcp2_ksl frq;
+ /* offset is the offset of crypto stream in this packet number
+ space. */
+ uint64_t offset;
+ /* ckm is a cryptographic key, and iv to encrypt outgoing
+ packets. */
+ ngtcp2_crypto_km *ckm;
+ /* hp_ctx is cipher context for packet header protection. */
+ ngtcp2_crypto_cipher_ctx hp_ctx;
+ /* data is the submitted crypto data. */
+ ngtcp2_buf_chain *data;
+ } tx;
+
+ struct {
+ /* ckm is a cryptographic key, and iv to decrypt incoming
+ packets. */
+ ngtcp2_crypto_km *ckm;
+ /* hp_ctx is cipher context for packet header protection. */
+ ngtcp2_crypto_cipher_ctx hp_ctx;
+ } rx;
+
+ ngtcp2_strm strm;
+ ngtcp2_crypto_ctx ctx;
+ } crypto;
+
+ ngtcp2_acktr acktr;
+ ngtcp2_rtb rtb;
+} ngtcp2_pktns;
+
+typedef enum ngtcp2_ecn_state {
+ NGTCP2_ECN_STATE_TESTING,
+ NGTCP2_ECN_STATE_UNKNOWN,
+ NGTCP2_ECN_STATE_FAILED,
+ NGTCP2_ECN_STATE_CAPABLE,
+} ngtcp2_ecn_state;
+
+ngtcp2_static_ringbuf_def(dcid_bound, NGTCP2_MAX_BOUND_DCID_POOL_SIZE,
+ sizeof(ngtcp2_dcid));
+ngtcp2_static_ringbuf_def(dcid_unused, NGTCP2_MAX_DCID_POOL_SIZE,
+ sizeof(ngtcp2_dcid));
+ngtcp2_static_ringbuf_def(dcid_retired, NGTCP2_MAX_DCID_RETIRED_SIZE,
+ sizeof(ngtcp2_dcid));
+ngtcp2_static_ringbuf_def(path_challenge, 4,
+ sizeof(ngtcp2_path_challenge_entry));
+
+ngtcp2_objalloc_def(strm, ngtcp2_strm, oplent);
+
+struct ngtcp2_conn {
+ ngtcp2_objalloc frc_objalloc;
+ ngtcp2_objalloc rtb_entry_objalloc;
+ ngtcp2_objalloc strm_objalloc;
+ ngtcp2_conn_state state;
+ ngtcp2_callbacks callbacks;
+ /* rcid is a connection ID present in Initial or 0-RTT packet from
+ client as destination connection ID. Server uses this field to
+ check that duplicated Initial or 0-RTT packet are indeed sent to
+ this connection. Client uses this field to validate
+ original_destination_connection_id transport parameter. */
+ ngtcp2_cid rcid;
+ /* oscid is the source connection ID initially used by the local
+ endpoint. */
+ ngtcp2_cid oscid;
+ /* retry_scid is the source connection ID from Retry packet. Client
+ records it in order to verify retry_source_connection_id
+ transport parameter. Server does not use this field. */
+ ngtcp2_cid retry_scid;
+ ngtcp2_pktns *in_pktns;
+ ngtcp2_pktns *hs_pktns;
+ ngtcp2_pktns pktns;
+
+ struct {
+ /* current is the current destination connection ID. */
+ ngtcp2_dcid current;
+ /* bound is a set of destination connection IDs which are bound to
+ particular paths. These paths are not validated yet. */
+ ngtcp2_static_ringbuf_dcid_bound bound;
+ /* unused is a set of unused CID received from peer. */
+ ngtcp2_static_ringbuf_dcid_unused unused;
+ /* retired is a set of CID retired by local endpoint. Keep them
+ in 3*PTO to catch packets in flight along the old path. */
+ ngtcp2_static_ringbuf_dcid_retired retired;
+ /* seqgap tracks received sequence numbers in order to ignore
+ retransmitted duplicated NEW_CONNECTION_ID frame. */
+ ngtcp2_gaptr seqgap;
+ /* retire_prior_to is the largest retire_prior_to received so
+ far. */
+ uint64_t retire_prior_to;
+ struct {
+ /* seqs contains sequence number of Connection ID whose
+ retirement is not acknowledged by the remote endpoint yet. */
+ uint64_t seqs[NGTCP2_MAX_DCID_POOL_SIZE * 2];
+ /* len is the number of sequence numbers that seq contains. */
+ size_t len;
+ } retire_unacked;
+ /* zerolen_seq is a pseudo sequence number of zero-length
+ Destination Connection ID in order to distinguish between
+ them. */
+ uint64_t zerolen_seq;
+ } dcid;
+
+ struct {
+ /* set is a set of CID sent to peer. The peer can use any CIDs in
+ this set. This includes used CID as well as unused ones. */
+ ngtcp2_ksl set;
+ /* used is a set of CID used by peer. The sort function of this
+ priority queue takes timestamp when CID is retired and sorts
+ them in ascending order. */
+ ngtcp2_pq used;
+ /* last_seq is the last sequence number of connection ID. */
+ uint64_t last_seq;
+ /* num_retired is the number of retired Connection ID still
+ included in set. */
+ size_t num_retired;
+ } scid;
+
+ struct {
+ /* strmq contains ngtcp2_strm which has frames to send. */
+ ngtcp2_pq strmq;
+ /* strmq_nretrans is the number of entries in strmq which has
+ stream data to resent. */
+ size_t strmq_nretrans;
+ /* ack is ACK frame. The underlying buffer is reused. */
+ ngtcp2_frame *ack;
+ /* max_ack_ranges is the number of additional ngtcp2_ack_range
+ which ack can contain. */
+ size_t max_ack_ranges;
+ /* offset is the offset the local endpoint has sent to the remote
+ endpoint. */
+ uint64_t offset;
+ /* max_offset is the maximum offset that local endpoint can
+ send. */
+ uint64_t max_offset;
+ /* last_max_data_ts is the timestamp when last MAX_DATA frame is
+ sent. */
+ ngtcp2_tstamp last_max_data_ts;
+
+ struct {
+ /* state is the state of ECN validation */
+ ngtcp2_ecn_state state;
+ /* validation_start_ts is the timestamp when ECN validation is
+ started. It is UINT64_MAX if it has not started yet. */
+ ngtcp2_tstamp validation_start_ts;
+ /* dgram_sent is the number of UDP datagram sent during ECN
+ validation period. */
+ size_t dgram_sent;
+ } ecn;
+
+ struct {
+ /* pktlen is the number of bytes written before calling
+ ngtcp2_conn_update_pkt_tx_time which resets this field to
+ 0. */
+ size_t pktlen;
+ /* next_ts is the time to send next packet. It is UINT64_MAX if
+ packet pacing is disabled or expired.*/
+ ngtcp2_tstamp next_ts;
+ } pacing;
+ } tx;
+
+ struct {
+ /* unsent_max_offset is the maximum offset that remote endpoint
+ can send without extending MAX_DATA. This limit is not yet
+ notified to the remote endpoint. */
+ uint64_t unsent_max_offset;
+ /* offset is the cumulative sum of stream data received for this
+ connection. */
+ uint64_t offset;
+ /* max_offset is the maximum offset that remote endpoint can
+ send. */
+ uint64_t max_offset;
+ /* window is the connection-level flow control window size. */
+ uint64_t window;
+ /* path_challenge stores received PATH_CHALLENGE data. */
+ ngtcp2_static_ringbuf_path_challenge path_challenge;
+ /* ccerr is the received connection close error. */
+ ngtcp2_connection_close_error ccerr;
+ } rx;
+
+ struct {
+ ngtcp2_crypto_km *ckm;
+ ngtcp2_crypto_cipher_ctx hp_ctx;
+ ngtcp2_crypto_ctx ctx;
+ /* discard_started_ts is the timestamp when the timer to discard
+ early key has started. Used by server only. */
+ ngtcp2_tstamp discard_started_ts;
+ /* transport_params is the values remembered by client from the
+ previous session. These are set by
+ ngtcp2_conn_set_early_remote_transport_params(). Server does
+ not use this field. Server must not set values for these
+ parameters that are smaller than the remembered values. */
+ struct {
+ uint64_t initial_max_streams_bidi;
+ uint64_t initial_max_streams_uni;
+ uint64_t initial_max_stream_data_bidi_local;
+ uint64_t initial_max_stream_data_bidi_remote;
+ uint64_t initial_max_stream_data_uni;
+ uint64_t initial_max_data;
+ uint64_t active_connection_id_limit;
+ uint64_t max_datagram_frame_size;
+ } transport_params;
+ } early;
+
+ struct {
+ ngtcp2_settings settings;
+ /* transport_params is the local transport parameters. It is used
+ for Short packet only. */
+ ngtcp2_transport_params transport_params;
+ struct {
+ /* max_streams is the maximum number of bidirectional streams which
+ the local endpoint can open. */
+ uint64_t max_streams;
+ /* next_stream_id is the bidirectional stream ID which the local
+ endpoint opens next. */
+ int64_t next_stream_id;
+ } bidi;
+
+ struct {
+ /* max_streams is the maximum number of unidirectional streams
+ which the local endpoint can open. */
+ uint64_t max_streams;
+ /* next_stream_id is the unidirectional stream ID which the
+ local endpoint opens next. */
+ int64_t next_stream_id;
+ } uni;
+ } local;
+
+ struct {
+ /* transport_params is the received transport parameters during
+ handshake. It is used for Short packet only. */
+ ngtcp2_transport_params *transport_params;
+ /* pending_transport_params is received transport parameters
+ during handshake. It is copied to transport_params when 1RTT
+ key is available. */
+ ngtcp2_transport_params *pending_transport_params;
+ struct {
+ ngtcp2_idtr idtr;
+ /* unsent_max_streams is the maximum number of streams of peer
+ initiated bidirectional stream which the local endpoint can
+ accept. This limit is not yet notified to the remote
+ endpoint. */
+ uint64_t unsent_max_streams;
+ /* max_streams is the maximum number of streams of peer
+ initiated bidirectional stream which the local endpoint can
+ accept. */
+ uint64_t max_streams;
+ } bidi;
+
+ struct {
+ ngtcp2_idtr idtr;
+ /* unsent_max_streams is the maximum number of streams of peer
+ initiated unidirectional stream which the local endpoint can
+ accept. This limit is not yet notified to the remote
+ endpoint. */
+ uint64_t unsent_max_streams;
+ /* max_streams is the maximum number of streams of peer
+ initiated unidirectional stream which the local endpoint can
+ accept. */
+ uint64_t max_streams;
+ } uni;
+ } remote;
+
+ struct {
+ struct {
+ /* new_tx_ckm is a new sender 1RTT key which has not been
+ used. */
+ ngtcp2_crypto_km *new_tx_ckm;
+ /* new_rx_ckm is a new receiver 1RTT key which has not
+ successfully decrypted incoming packet yet. */
+ ngtcp2_crypto_km *new_rx_ckm;
+ /* old_rx_ckm is an old receiver 1RTT key. */
+ ngtcp2_crypto_km *old_rx_ckm;
+ /* confirmed_ts is the time instant when the key update is
+ confirmed by the local endpoint last time. UINT64_MAX means
+ undefined value. */
+ ngtcp2_tstamp confirmed_ts;
+ } key_update;
+
+ /* tls_native_handle is a native handle to TLS session object. */
+ void *tls_native_handle;
+ /* decrypt_hp_buf is a buffer which is used to write unprotected
+ packet header. */
+ ngtcp2_vec decrypt_hp_buf;
+ /* decrypt_buf is a buffer which is used to write decrypted data. */
+ ngtcp2_vec decrypt_buf;
+ /* retry_aead is AEAD to verify Retry packet integrity. It is
+ used by client only. */
+ ngtcp2_crypto_aead retry_aead;
+ /* retry_aead_ctx is AEAD cipher context to verify Retry packet
+ integrity. It is used by client only. */
+ ngtcp2_crypto_aead_ctx retry_aead_ctx;
+ /* tls_error is TLS related error. */
+ int tls_error;
+ /* tls_alert is TLS alert generated by the local endpoint. */
+ uint8_t tls_alert;
+ /* decryption_failure_count is the number of received packets that
+ fail authentication. */
+ uint64_t decryption_failure_count;
+ } crypto;
+
+ /* pkt contains the packet intermediate construction data to support
+ NGTCP2_WRITE_STREAM_FLAG_MORE */
+ struct {
+ ngtcp2_crypto_cc cc;
+ ngtcp2_pkt_hd hd;
+ ngtcp2_ppe ppe;
+ ngtcp2_frame_chain **pfrc;
+ int pkt_empty;
+ int hd_logged;
+ /* flags is bitwise OR of zero or more of
+ NGTCP2_RTB_ENTRY_FLAG_*. */
+ uint16_t rtb_entry_flags;
+ ngtcp2_ssize hs_spktlen;
+ int require_padding;
+ } pkt;
+
+ struct {
+ /* last_ts is a timestamp when a last packet is sent or received
+ on a current path. */
+ ngtcp2_tstamp last_ts;
+ /* timeout is keep-alive timeout. When it expires, a packet
+ should be sent to a current path to keep connection alive. It
+ might be used to keep NAT binding intact. If 0 is set,
+ keep-alive timer is disabled. */
+ ngtcp2_duration timeout;
+ } keep_alive;
+
+ struct {
+ /* Initial keys for negotiated version. If original version ==
+ negotiated version, these fields are not used. */
+ struct {
+ ngtcp2_crypto_km *ckm;
+ ngtcp2_crypto_cipher_ctx hp_ctx;
+ } rx;
+ struct {
+ ngtcp2_crypto_km *ckm;
+ ngtcp2_crypto_cipher_ctx hp_ctx;
+ } tx;
+ /* version is QUIC version that the above Initial keys are created
+ for. */
+ uint32_t version;
+ /* preferred_versions is the array of versions that are preferred
+ by the local endpoint. Server negotiates one of those versions
+ in this array if a client initially selects a less preferred
+ version. Client uses this field and original_version field to
+ prevent version downgrade attack if it reacted upon Version
+ Negotiation packet. */
+ uint32_t *preferred_versions;
+ /* preferred_versionslen is the number of versions stored in the
+ array pointed by preferred_versions. This field is only used
+ by server. */
+ size_t preferred_versionslen;
+ /* available_versions is the versions that the local endpoint
+ sends in version_information transport parameter. This is the
+ wire image of available_versions field of version_information
+ transport parameter. */
+ uint8_t *available_versions;
+ /* available_versionslen is the length of data pointed by
+ available_versions field. */
+ size_t available_versionslen;
+ } vneg;
+
+ ngtcp2_map strms;
+ ngtcp2_conn_stat cstat;
+ ngtcp2_pv *pv;
+ ngtcp2_pmtud *pmtud;
+ ngtcp2_log log;
+ ngtcp2_qlog qlog;
+ ngtcp2_rst rst;
+ ngtcp2_cc_algo cc_algo;
+ ngtcp2_cc cc;
+ const ngtcp2_mem *mem;
+ /* idle_ts is the time instant when idle timer started. */
+ ngtcp2_tstamp idle_ts;
+ void *user_data;
+ uint32_t client_chosen_version;
+ uint32_t negotiated_version;
+ /* flags is bitwise OR of zero or more of NGTCP2_CONN_FLAG_*. */
+ uint32_t flags;
+ int server;
+};
+
+typedef enum ngtcp2_vmsg_type {
+ NGTCP2_VMSG_TYPE_STREAM,
+ NGTCP2_VMSG_TYPE_DATAGRAM,
+} ngtcp2_vmsg_type;
+
+typedef struct ngtcp2_vmsg_stream {
+ /* strm is a stream that data is sent to. */
+ ngtcp2_strm *strm;
+ /* flags is bitwise OR of zero or more of
+ NGTCP2_WRITE_STREAM_FLAG_*. */
+ uint32_t flags;
+ /* data is the pointer to ngtcp2_vec array which contains the stream
+ data to send. */
+ const ngtcp2_vec *data;
+ /* datacnt is the number of ngtcp2_vec pointed by data. */
+ size_t datacnt;
+ /* pdatalen is the pointer to the variable which the number of bytes
+ written is assigned to if pdatalen is not NULL. */
+ ngtcp2_ssize *pdatalen;
+} ngtcp2_vmsg_stream;
+
+typedef struct ngtcp2_vmsg_datagram {
+ /* data is the pointer to ngtcp2_vec array which contains the data
+ to send. */
+ const ngtcp2_vec *data;
+ /* datacnt is the number of ngtcp2_vec pointed by data. */
+ size_t datacnt;
+ /* dgram_id is an opaque identifier chosen by an application. */
+ uint64_t dgram_id;
+ /* flags is bitwise OR of zero or more of
+ NGTCP2_WRITE_DATAGRAM_FLAG_*. */
+ uint32_t flags;
+ /* paccepted is the pointer to the variable which, if it is not
+ NULL, is assigned nonzero if data is written to a packet. */
+ int *paccepted;
+} ngtcp2_vmsg_datagram;
+
+typedef struct ngtcp2_vmsg {
+ ngtcp2_vmsg_type type;
+ union {
+ ngtcp2_vmsg_stream stream;
+ ngtcp2_vmsg_datagram datagram;
+ };
+} ngtcp2_vmsg;
+
+/*
+ * ngtcp2_conn_sched_ack stores packet number |pkt_num| and its
+ * reception timestamp |ts| in order to send its ACK.
+ *
+ * It returns 0 if it succeeds, or one of the following negative error
+ * codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ * NGTCP2_ERR_PROTO
+ * Same packet number has already been added.
+ */
+int ngtcp2_conn_sched_ack(ngtcp2_conn *conn, ngtcp2_acktr *acktr,
+ int64_t pkt_num, int active_ack, ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_conn_find_stream returns a stream whose stream ID is
+ * |stream_id|. If no such stream is found, it returns NULL.
+ */
+ngtcp2_strm *ngtcp2_conn_find_stream(ngtcp2_conn *conn, int64_t stream_id);
+
+/*
+ * conn_init_stream initializes |strm|. Its stream ID is |stream_id|.
+ * This function adds |strm| to conn->strms. |strm| must be allocated
+ * by the caller.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-callback function failed.
+ */
+int ngtcp2_conn_init_stream(ngtcp2_conn *conn, ngtcp2_strm *strm,
+ int64_t stream_id, void *stream_user_data);
+
+/*
+ * ngtcp2_conn_close_stream closes stream |strm|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * Stream is not found.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+int ngtcp2_conn_close_stream(ngtcp2_conn *conn, ngtcp2_strm *strm);
+
+/*
+ * ngtcp2_conn_close_stream closes stream |strm| if no further
+ * transmission and reception are allowed, and all reordered incoming
+ * data are emitted to the application, and the transmitted data are
+ * acked.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * Stream is not found.
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+int ngtcp2_conn_close_stream_if_shut_rdwr(ngtcp2_conn *conn, ngtcp2_strm *strm);
+
+/*
+ * ngtcp2_conn_update_rtt updates RTT measurements. |rtt| is a latest
+ * RTT which is not adjusted by ack delay. |ack_delay| is unscaled
+ * ack_delay included in ACK frame. |ack_delay| is actually tainted
+ * (sent by peer), so don't assume that |ack_delay| is always smaller
+ * than, or equals to |rtt|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * RTT sample is ignored.
+ */
+int ngtcp2_conn_update_rtt(ngtcp2_conn *conn, ngtcp2_duration rtt,
+ ngtcp2_duration ack_delay, ngtcp2_tstamp ts);
+
+void ngtcp2_conn_set_loss_detection_timer(ngtcp2_conn *conn, ngtcp2_tstamp ts);
+
+int ngtcp2_conn_on_loss_detection_timer(ngtcp2_conn *conn, ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_conn_detect_lost_pkt detects lost packets.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_conn_detect_lost_pkt(ngtcp2_conn *conn, ngtcp2_pktns *pktns,
+ ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_conn_tx_strmq_top returns the ngtcp2_strm which sits on the
+ * top of queue. tx_strmq must not be empty.
+ */
+ngtcp2_strm *ngtcp2_conn_tx_strmq_top(ngtcp2_conn *conn);
+
+/*
+ * ngtcp2_conn_tx_strmq_pop pops the ngtcp2_strm from the queue.
+ * tx_strmq must not be empty.
+ */
+void ngtcp2_conn_tx_strmq_pop(ngtcp2_conn *conn);
+
+/*
+ * ngtcp2_conn_tx_strmq_push pushes |strm| into tx_strmq.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_conn_tx_strmq_push(ngtcp2_conn *conn, ngtcp2_strm *strm);
+
+/*
+ * ngtcp2_conn_internal_expiry returns the minimum expiry time among
+ * all timers in |conn|.
+ */
+ngtcp2_tstamp ngtcp2_conn_internal_expiry(ngtcp2_conn *conn);
+
+ngtcp2_ssize ngtcp2_conn_write_vmsg(ngtcp2_conn *conn, ngtcp2_path *path,
+ int pkt_info_version, ngtcp2_pkt_info *pi,
+ uint8_t *dest, size_t destlen,
+ ngtcp2_vmsg *vmsg, ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_conn_write_single_frame_pkt writes a packet which contains
+ * |fr| frame only in the buffer pointed by |dest| whose length if
+ * |destlen|. |type| is a long packet type to send. If |type| is 0,
+ * Short packet is used. |dcid| is used as a destination connection
+ * ID. |flags| is zero or more of NGTCP2_WRITE_PKT_FLAG_*. Only
+ * NGTCP2_WRITE_PKT_FLAG_REQUIRE_PADDING is recognized.
+ *
+ * The packet written by this function will not be retransmitted.
+ *
+ * This function returns the number of bytes written in |dest| if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+ngtcp2_ssize ngtcp2_conn_write_single_frame_pkt(
+ ngtcp2_conn *conn, ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen,
+ uint8_t type, uint8_t flags, const ngtcp2_cid *dcid, ngtcp2_frame *fr,
+ uint16_t rtb_entry_flags, const ngtcp2_path *path, ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_conn_commit_local_transport_params commits the local
+ * transport parameters, which is currently set to
+ * conn->local.settings.transport_params. This function will do some
+ * amends on transport parameters for adjusting default values.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * CID in preferred address equals to the original SCID.
+ */
+int ngtcp2_conn_commit_local_transport_params(ngtcp2_conn *conn);
+
+/*
+ * ngtcp2_conn_lost_pkt_expiry returns the earliest expiry time of
+ * lost packet.
+ */
+ngtcp2_tstamp ngtcp2_conn_lost_pkt_expiry(ngtcp2_conn *conn);
+
+/*
+ * ngtcp2_conn_remove_lost_pkt removes the expired lost packet.
+ */
+void ngtcp2_conn_remove_lost_pkt(ngtcp2_conn *conn, ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_conn_resched_frames reschedules frames linked from |*pfrc|
+ * for retransmission.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_conn_resched_frames(ngtcp2_conn *conn, ngtcp2_pktns *pktns,
+ ngtcp2_frame_chain **pfrc);
+
+uint64_t ngtcp2_conn_tx_strmq_first_cycle(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_ack_delay_expiry` returns the expiry time point of
+ * delayed protected ACK. One should call
+ * `ngtcp2_conn_cancel_expired_ack_delay_timer` and
+ * `ngtcp2_conn_write_pkt` (or `ngtcp2_conn_writev_stream`) when it
+ * expires. It returns UINT64_MAX if there is no expiry.
+ */
+ngtcp2_tstamp ngtcp2_conn_ack_delay_expiry(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_cancel_expired_ack_delay_timer` stops expired ACK
+ * delay timer. |ts| is the current time. This function must be
+ * called when `ngtcp2_conn_ack_delay_expiry` <= ts.
+ */
+void ngtcp2_conn_cancel_expired_ack_delay_timer(ngtcp2_conn *conn,
+ ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_loss_detection_expiry` returns the expiry time point
+ * of loss detection timer. One should call
+ * `ngtcp2_conn_on_loss_detection_timer` and `ngtcp2_conn_write_pkt`
+ * (or `ngtcp2_conn_writev_stream`) when it expires. It returns
+ * UINT64_MAX if loss detection timer is not armed.
+ */
+ngtcp2_tstamp ngtcp2_conn_loss_detection_expiry(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_idle_expiry` returns the time when a connection
+ * should be closed if it continues to be idle. If idle timeout is
+ * disabled, this function returns ``UINT64_MAX``.
+ */
+ngtcp2_tstamp ngtcp2_conn_get_idle_expiry(ngtcp2_conn *conn);
+
+ngtcp2_duration ngtcp2_conn_compute_pto(ngtcp2_conn *conn, ngtcp2_pktns *pktns);
+
+/*
+ * ngtcp2_conn_track_retired_dcid_seq tracks the sequence number |seq|
+ * of unacknowledged retiring Destination Connection ID.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_CONNECTION_ID_LIMIT
+ * The number of unacknowledged retirement exceeds the limit.
+ */
+int ngtcp2_conn_track_retired_dcid_seq(ngtcp2_conn *conn, uint64_t seq);
+
+/*
+ * ngtcp2_conn_untrack_retired_dcid_seq deletes the sequence number
+ * |seq| of unacknowledged retiring Destination Connection ID. It is
+ * fine if such sequence number is not found.
+ */
+void ngtcp2_conn_untrack_retired_dcid_seq(ngtcp2_conn *conn, uint64_t seq);
+
+/*
+ * ngtcp2_conn_server_negotiate_version negotiates QUIC version. It
+ * is compatible version negotiation. It returns the negotiated QUIC
+ * version. This function must not be called by client.
+ */
+uint32_t
+ngtcp2_conn_server_negotiate_version(ngtcp2_conn *conn,
+ const ngtcp2_version_info *version_info);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_write_connection_close_pkt` writes a packet which
+ * contains a CONNECTION_CLOSE frame (type 0x1c) in the buffer pointed
+ * by |dest| whose capacity is |datalen|.
+ *
+ * If |path| is not ``NULL``, this function stores the network path
+ * with which the packet should be sent. Each addr field must point
+ * to the buffer which should be at least ``sizeof(struct
+ * sockaddr_storage)`` bytes long. The assignment might not be done
+ * if nothing is written to |dest|.
+ *
+ * If |pi| is not ``NULL``, this function stores packet metadata in it
+ * if it succeeds. The metadata includes ECN markings.
+ *
+ * This function must not be called from inside the callback
+ * functions.
+ *
+ * At the moment, successful call to this function makes connection
+ * close. We may change this behaviour in the future to allow
+ * graceful shutdown.
+ *
+ * This function returns the number of bytes written in |dest| if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory
+ * :macro:`NGTCP2_ERR_NOBUF`
+ * Buffer is too small
+ * :macro:`NGTCP2_ERR_INVALID_STATE`
+ * The current state does not allow sending CONNECTION_CLOSE.
+ * :macro:`NGTCP2_ERR_PKT_NUM_EXHAUSTED`
+ * Packet number is exhausted, and cannot send any more packet.
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`
+ * User callback failed
+ */
+ngtcp2_ssize ngtcp2_conn_write_connection_close_pkt(
+ ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest,
+ size_t destlen, uint64_t error_code, const uint8_t *reason,
+ size_t reasonlen, ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_write_application_close_pkt` writes a packet which
+ * contains a CONNECTION_CLOSE frame (type 0x1d) in the buffer pointed
+ * by |dest| whose capacity is |datalen|.
+ *
+ * If |path| is not ``NULL``, this function stores the network path
+ * with which the packet should be sent. Each addr field must point
+ * to the buffer which should be at least ``sizeof(struct
+ * sockaddr_storage)`` bytes long. The assignment might not be done
+ * if nothing is written to |dest|.
+ *
+ * If |pi| is not ``NULL``, this function stores packet metadata in it
+ * if it succeeds. The metadata includes ECN markings.
+ *
+ * If handshake has not been confirmed yet, CONNECTION_CLOSE (type
+ * 0x1c) with error code :macro:`NGTCP2_APPLICATION_ERROR` is written
+ * instead.
+ *
+ * This function must not be called from inside the callback
+ * functions.
+ *
+ * At the moment, successful call to this function makes connection
+ * close. We may change this behaviour in the future to allow
+ * graceful shutdown.
+ *
+ * This function returns the number of bytes written in |dest| if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory
+ * :macro:`NGTCP2_ERR_NOBUF`
+ * Buffer is too small
+ * :macro:`NGTCP2_ERR_INVALID_STATE`
+ * The current state does not allow sending CONNECTION_CLOSE.
+ * :macro:`NGTCP2_ERR_PKT_NUM_EXHAUSTED`
+ * Packet number is exhausted, and cannot send any more packet.
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`
+ * User callback failed
+ */
+ngtcp2_ssize ngtcp2_conn_write_application_close_pkt(
+ ngtcp2_conn *conn, ngtcp2_path *path, ngtcp2_pkt_info *pi, uint8_t *dest,
+ size_t destlen, uint64_t app_error_code, const uint8_t *reason,
+ size_t reasonlen, ngtcp2_tstamp ts);
+
+int ngtcp2_conn_start_pmtud(ngtcp2_conn *conn);
+
+void ngtcp2_conn_stop_pmtud(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_set_remote_transport_params` sets transport parameter
+ * |params| from a remote endpoint to |conn|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_TRANSPORT_PARAM`
+ * Failed to validate a remote transport parameters.
+ * :macro:`NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE`
+ * Version negotiation failure.
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`
+ * User callback failed
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory.
+ */
+int ngtcp2_conn_set_remote_transport_params(
+ ngtcp2_conn *conn, const ngtcp2_transport_params *params);
+
+#endif /* NGTCP2_CONN_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.c
new file mode 100644
index 0000000..3367217
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.c
@@ -0,0 +1,291 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_conv.h"
+
+#include <string.h>
+#include <assert.h>
+
+#include "ngtcp2_str.h"
+#include "ngtcp2_pkt.h"
+#include "ngtcp2_net.h"
+#include "ngtcp2_unreachable.h"
+
+const uint8_t *ngtcp2_get_uint64(uint64_t *dest, const uint8_t *p) {
+ uint64_t n;
+ memcpy(&n, p, sizeof(n));
+ *dest = ngtcp2_ntohl64(n);
+ return p + sizeof(n);
+}
+
+const uint8_t *ngtcp2_get_uint48(uint64_t *dest, const uint8_t *p) {
+ uint64_t n = 0;
+ memcpy(((uint8_t *)&n) + 2, p, 6);
+ *dest = ngtcp2_ntohl64(n);
+ return p + 6;
+}
+
+const uint8_t *ngtcp2_get_uint32(uint32_t *dest, const uint8_t *p) {
+ uint32_t n;
+ memcpy(&n, p, sizeof(n));
+ *dest = ngtcp2_ntohl(n);
+ return p + sizeof(n);
+}
+
+const uint8_t *ngtcp2_get_uint24(uint32_t *dest, const uint8_t *p) {
+ uint32_t n = 0;
+ memcpy(((uint8_t *)&n) + 1, p, 3);
+ *dest = ngtcp2_ntohl(n);
+ return p + 3;
+}
+
+const uint8_t *ngtcp2_get_uint16(uint16_t *dest, const uint8_t *p) {
+ uint16_t n;
+ memcpy(&n, p, sizeof(n));
+ *dest = ngtcp2_ntohs(n);
+ return p + sizeof(n);
+}
+
+const uint8_t *ngtcp2_get_uint16be(uint16_t *dest, const uint8_t *p) {
+ memcpy(dest, p, sizeof(*dest));
+ return p + sizeof(*dest);
+}
+
+static uint64_t get_uvarint(size_t *plen, const uint8_t *p) {
+ union {
+ uint8_t n8;
+ uint16_t n16;
+ uint32_t n32;
+ uint64_t n64;
+ } n;
+
+ *plen = (size_t)(1u << (*p >> 6));
+
+ switch (*plen) {
+ case 1:
+ return *p;
+ case 2:
+ memcpy(&n, p, 2);
+ n.n8 &= 0x3f;
+ return ngtcp2_ntohs(n.n16);
+ case 4:
+ memcpy(&n, p, 4);
+ n.n8 &= 0x3f;
+ return ngtcp2_ntohl(n.n32);
+ case 8:
+ memcpy(&n, p, 8);
+ n.n8 &= 0x3f;
+ return ngtcp2_ntohl64(n.n64);
+ default:
+ ngtcp2_unreachable();
+ }
+}
+
+const uint8_t *ngtcp2_get_uvarint(uint64_t *dest, const uint8_t *p) {
+ size_t len;
+
+ *dest = get_uvarint(&len, p);
+
+ return p + len;
+}
+
+const uint8_t *ngtcp2_get_varint(int64_t *dest, const uint8_t *p) {
+ size_t len;
+
+ *dest = (int64_t)get_uvarint(&len, p);
+
+ return p + len;
+}
+
+int64_t ngtcp2_get_pkt_num(const uint8_t *p, size_t pkt_numlen) {
+ uint32_t l;
+ uint16_t s;
+
+ switch (pkt_numlen) {
+ case 1:
+ return *p;
+ case 2:
+ ngtcp2_get_uint16(&s, p);
+ return (int64_t)s;
+ case 3:
+ ngtcp2_get_uint24(&l, p);
+ return (int64_t)l;
+ case 4:
+ ngtcp2_get_uint32(&l, p);
+ return (int64_t)l;
+ default:
+ ngtcp2_unreachable();
+ }
+}
+
+uint8_t *ngtcp2_put_uint64be(uint8_t *p, uint64_t n) {
+ n = ngtcp2_htonl64(n);
+ return ngtcp2_cpymem(p, (const uint8_t *)&n, sizeof(n));
+}
+
+uint8_t *ngtcp2_put_uint48be(uint8_t *p, uint64_t n) {
+ n = ngtcp2_htonl64(n);
+ return ngtcp2_cpymem(p, ((const uint8_t *)&n) + 2, 6);
+}
+
+uint8_t *ngtcp2_put_uint32be(uint8_t *p, uint32_t n) {
+ n = ngtcp2_htonl(n);
+ return ngtcp2_cpymem(p, (const uint8_t *)&n, sizeof(n));
+}
+
+uint8_t *ngtcp2_put_uint24be(uint8_t *p, uint32_t n) {
+ n = ngtcp2_htonl(n);
+ return ngtcp2_cpymem(p, ((const uint8_t *)&n) + 1, 3);
+}
+
+uint8_t *ngtcp2_put_uint16be(uint8_t *p, uint16_t n) {
+ n = ngtcp2_htons(n);
+ return ngtcp2_cpymem(p, (const uint8_t *)&n, sizeof(n));
+}
+
+uint8_t *ngtcp2_put_uint16(uint8_t *p, uint16_t n) {
+ return ngtcp2_cpymem(p, (const uint8_t *)&n, sizeof(n));
+}
+
+uint8_t *ngtcp2_put_uvarint(uint8_t *p, uint64_t n) {
+ uint8_t *rv;
+ if (n < 64) {
+ *p++ = (uint8_t)n;
+ return p;
+ }
+ if (n < 16384) {
+ rv = ngtcp2_put_uint16be(p, (uint16_t)n);
+ *p |= 0x40;
+ return rv;
+ }
+ if (n < 1073741824) {
+ rv = ngtcp2_put_uint32be(p, (uint32_t)n);
+ *p |= 0x80;
+ return rv;
+ }
+ assert(n < 4611686018427387904ULL);
+ rv = ngtcp2_put_uint64be(p, n);
+ *p |= 0xc0;
+ return rv;
+}
+
+uint8_t *ngtcp2_put_uvarint30(uint8_t *p, uint32_t n) {
+ uint8_t *rv;
+
+ assert(n < 1073741824);
+
+ rv = ngtcp2_put_uint32be(p, n);
+ *p |= 0x80;
+
+ return rv;
+}
+
+uint8_t *ngtcp2_put_pkt_num(uint8_t *p, int64_t pkt_num, size_t len) {
+ switch (len) {
+ case 1:
+ *p++ = (uint8_t)pkt_num;
+ return p;
+ case 2:
+ ngtcp2_put_uint16be(p, (uint16_t)pkt_num);
+ return p + 2;
+ case 3:
+ ngtcp2_put_uint24be(p, (uint32_t)pkt_num);
+ return p + 3;
+ case 4:
+ ngtcp2_put_uint32be(p, (uint32_t)pkt_num);
+ return p + 4;
+ default:
+ ngtcp2_unreachable();
+ }
+}
+
+size_t ngtcp2_get_uvarintlen(const uint8_t *p) {
+ return (size_t)(1u << (*p >> 6));
+}
+
+size_t ngtcp2_put_uvarintlen(uint64_t n) {
+ if (n < 64) {
+ return 1;
+ }
+ if (n < 16384) {
+ return 2;
+ }
+ if (n < 1073741824) {
+ return 4;
+ }
+ assert(n < 4611686018427387904ULL);
+ return 8;
+}
+
+int64_t ngtcp2_nth_server_bidi_id(uint64_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
+ if ((NGTCP2_MAX_VARINT >> 2) < n - 1) {
+ return NGTCP2_MAX_SERVER_STREAM_ID_BIDI;
+ }
+
+ return (int64_t)(((n - 1) << 2) | 0x01);
+}
+
+int64_t ngtcp2_nth_client_bidi_id(uint64_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
+ if ((NGTCP2_MAX_VARINT >> 2) < n - 1) {
+ return NGTCP2_MAX_CLIENT_STREAM_ID_BIDI;
+ }
+
+ return (int64_t)((n - 1) << 2);
+}
+
+int64_t ngtcp2_nth_server_uni_id(uint64_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
+ if ((NGTCP2_MAX_VARINT >> 2) < n - 1) {
+ return NGTCP2_MAX_SERVER_STREAM_ID_UNI;
+ }
+
+ return (int64_t)(((n - 1) << 2) | 0x03);
+}
+
+int64_t ngtcp2_nth_client_uni_id(uint64_t n) {
+ if (n == 0) {
+ return 0;
+ }
+
+ if ((NGTCP2_MAX_VARINT >> 2) < n - 1) {
+ return NGTCP2_MAX_CLIENT_STREAM_ID_UNI;
+ }
+
+ return (int64_t)(((n - 1) << 2) | 0x02);
+}
+
+uint64_t ngtcp2_ord_stream_id(int64_t stream_id) {
+ return (uint64_t)(stream_id >> 2) + 1;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.h
new file mode 100644
index 0000000..ef089a9
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_conv.h
@@ -0,0 +1,208 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_CONV_H
+#define NGTCP2_CONV_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+/*
+ * ngtcp2_get_uint64 reads 8 bytes from |p| as 64 bits unsigned
+ * integer encoded as network byte order, and stores it in the buffer
+ * pointed by |dest| in host byte order. It returns |p| + 8.
+ */
+const uint8_t *ngtcp2_get_uint64(uint64_t *dest, const uint8_t *p);
+
+/*
+ * ngtcp2_get_uint48 reads 6 bytes from |p| as 48 bits unsigned
+ * integer encoded as network byte order, and stores it in the buffer
+ * pointed by |dest| in host byte order. It returns |p| + 6.
+ */
+const uint8_t *ngtcp2_get_uint48(uint64_t *dest, const uint8_t *p);
+
+/*
+ * ngtcp2_get_uint32 reads 4 bytes from |p| as 32 bits unsigned
+ * integer encoded as network byte order, and stores it in the buffer
+ * pointed by |dest| in host byte order. It returns |p| + 4.
+ */
+const uint8_t *ngtcp2_get_uint32(uint32_t *dest, const uint8_t *p);
+
+/*
+ * ngtcp2_get_uint24 reads 3 bytes from |p| as 24 bits unsigned
+ * integer encoded as network byte order, and stores it in the buffer
+ * pointed by |dest| in host byte order. It returns |p| + 3.
+ */
+const uint8_t *ngtcp2_get_uint24(uint32_t *dest, const uint8_t *p);
+
+/*
+ * ngtcp2_get_uint16 reads 2 bytes from |p| as 16 bits unsigned
+ * integer encoded as network byte order, and stores it in the buffer
+ * pointed by |dest| in host byte order. It returns |p| + 2.
+ */
+const uint8_t *ngtcp2_get_uint16(uint16_t *dest, const uint8_t *p);
+
+/*
+ * ngtcp2_get_uint16be reads 2 bytes from |p| as 16 bits unsigned
+ * integer encoded as network byte order, and stores it in the buffer
+ * pointed by |dest| as is. It returns |p| + 2.
+ */
+const uint8_t *ngtcp2_get_uint16be(uint16_t *dest, const uint8_t *p);
+
+/*
+ * ngtcp2_get_uvarint reads variable-length unsigned integer from |p|,
+ * and stores it in the buffer pointed by |dest| in host byte order.
+ * It returns |p| plus the number of bytes read from |p|.
+ */
+const uint8_t *ngtcp2_get_uvarint(uint64_t *dest, const uint8_t *p);
+
+/*
+ * ngtcp2_get_varint reads variable-length unsigned integer from |p|,
+ * and casts it to the signed integer, and stores it in the buffer
+ * pointed by |dest| in host byte order. No information should be
+ * lost in this cast, because the variable-length integer is 62
+ * bits. It returns |p| plus the number of bytes read from |p|.
+ */
+const uint8_t *ngtcp2_get_varint(int64_t *dest, const uint8_t *p);
+
+/*
+ * ngtcp2_get_pkt_num reads encoded packet number from |p|. The
+ * packet number is encoed in |pkt_numlen| bytes.
+ */
+int64_t ngtcp2_get_pkt_num(const uint8_t *p, size_t pkt_numlen);
+
+/*
+ * ngtcp2_put_uint64be writes |n| in host byte order in |p| in network
+ * byte order. It returns the one beyond of the last written
+ * position.
+ */
+uint8_t *ngtcp2_put_uint64be(uint8_t *p, uint64_t n);
+
+/*
+ * ngtcp2_put_uint48be writes |n| in host byte order in |p| in network
+ * byte order. It writes only least significant 48 bits. It returns
+ * the one beyond of the last written position.
+ */
+uint8_t *ngtcp2_put_uint48be(uint8_t *p, uint64_t n);
+
+/*
+ * ngtcp2_put_uint32be writes |n| in host byte order in |p| in network
+ * byte order. It returns the one beyond of the last written
+ * position.
+ */
+uint8_t *ngtcp2_put_uint32be(uint8_t *p, uint32_t n);
+
+/*
+ * ngtcp2_put_uint24be writes |n| in host byte order in |p| in network
+ * byte order. It writes only least significant 24 bits. It returns
+ * the one beyond of the last written position.
+ */
+uint8_t *ngtcp2_put_uint24be(uint8_t *p, uint32_t n);
+
+/*
+ * ngtcp2_put_uint16be writes |n| in host byte order in |p| in network
+ * byte order. It returns the one beyond of the last written
+ * position.
+ */
+uint8_t *ngtcp2_put_uint16be(uint8_t *p, uint16_t n);
+
+/*
+ * ngtcp2_put_uint16 writes |n| as is in |p|. It returns the one
+ * beyond of the last written position.
+ */
+uint8_t *ngtcp2_put_uint16(uint8_t *p, uint16_t n);
+
+/*
+ * ngtcp2_put_uvarint writes |n| in |p| using variable-length integer
+ * encoding. It returns the one beyond of the last written position.
+ */
+uint8_t *ngtcp2_put_uvarint(uint8_t *p, uint64_t n);
+
+/*
+ * ngtcp2_put_uvarint30 writes |n| in |p| using variable-length
+ * integer encoding. |n| must be strictly less than 1073741824. The
+ * function always encodes |n| in 4 bytes. It returns the one beyond
+ * of the last written position.
+ */
+uint8_t *ngtcp2_put_uvarint30(uint8_t *p, uint32_t n);
+
+/*
+ * ngtcp2_put_pkt_num encodes |pkt_num| using |len| bytes. It
+ * returns the one beyond of the last written position.
+ */
+uint8_t *ngtcp2_put_pkt_num(uint8_t *p, int64_t pkt_num, size_t len);
+
+/*
+ * ngtcp2_get_uvarintlen returns the required number of bytes to read
+ * variable-length integer starting at |p|.
+ */
+size_t ngtcp2_get_uvarintlen(const uint8_t *p);
+
+/*
+ * ngtcp2_put_uvarintlen returns the required number of bytes to
+ * encode |n|.
+ */
+size_t ngtcp2_put_uvarintlen(uint64_t n);
+
+/*
+ * ngtcp2_nth_server_bidi_id returns |n|-th server bidirectional
+ * stream ID. If |n| is 0, it returns 0. If the |n|-th stream ID is
+ * larger than NGTCP2_MAX_SERVER_STREAM_ID_BIDI, this function returns
+ * NGTCP2_MAX_SERVER_STREAM_ID_BIDI.
+ */
+int64_t ngtcp2_nth_server_bidi_id(uint64_t n);
+
+/*
+ * ngtcp2_nth_client_bidi_id returns |n|-th client bidirectional
+ * stream ID. If |n| is 0, it returns 0. If the |n|-th stream ID is
+ * larger than NGTCP2_MAX_CLIENT_STREAM_ID_BIDI, this function returns
+ * NGTCP2_MAX_CLIENT_STREAM_ID_BIDI.
+ */
+int64_t ngtcp2_nth_client_bidi_id(uint64_t n);
+
+/*
+ * ngtcp2_nth_server_uni_id returns |n|-th server unidirectional
+ * stream ID. If |n| is 0, it returns 0. If the |n|-th stream ID is
+ * larger than NGTCP2_MAX_SERVER_STREAM_ID_UNI, this function returns
+ * NGTCP2_MAX_SERVER_STREAM_ID_UNI.
+ */
+int64_t ngtcp2_nth_server_uni_id(uint64_t n);
+
+/*
+ * ngtcp2_nth_client_uni_id returns |n|-th client unidirectional
+ * stream ID. If |n| is 0, it returns 0. If the |n|-th stream ID is
+ * larger than NGTCP2_MAX_CLIENT_STREAM_ID_UNI, this function returns
+ * NGTCP2_MAX_CLIENT_STREAM_ID_UNI.
+ */
+int64_t ngtcp2_nth_client_uni_id(uint64_t n);
+
+/*
+ * ngtcp2_ord_stream_id returns the ordinal number of |stream_id|.
+ */
+uint64_t ngtcp2_ord_stream_id(int64_t stream_id);
+
+#endif /* NGTCP2_CONV_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.c
new file mode 100644
index 0000000..24210a2
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.c
@@ -0,0 +1,895 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_crypto.h"
+
+#include <string.h>
+#include <assert.h>
+
+#include "ngtcp2_str.h"
+#include "ngtcp2_conv.h"
+#include "ngtcp2_conn.h"
+#include "ngtcp2_net.h"
+
+int ngtcp2_crypto_km_new(ngtcp2_crypto_km **pckm, const uint8_t *secret,
+ size_t secretlen,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *iv, size_t ivlen,
+ const ngtcp2_mem *mem) {
+ int rv = ngtcp2_crypto_km_nocopy_new(pckm, secretlen, ivlen, mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (secretlen) {
+ memcpy((*pckm)->secret.base, secret, secretlen);
+ }
+ if (aead_ctx) {
+ (*pckm)->aead_ctx = *aead_ctx;
+ }
+ memcpy((*pckm)->iv.base, iv, ivlen);
+
+ return 0;
+}
+
+int ngtcp2_crypto_km_nocopy_new(ngtcp2_crypto_km **pckm, size_t secretlen,
+ size_t ivlen, const ngtcp2_mem *mem) {
+ size_t len;
+ uint8_t *p;
+
+ len = sizeof(ngtcp2_crypto_km) + secretlen + ivlen;
+
+ *pckm = ngtcp2_mem_malloc(mem, len);
+ if (*pckm == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ p = (uint8_t *)(*pckm) + sizeof(ngtcp2_crypto_km);
+ (*pckm)->secret.base = p;
+ (*pckm)->secret.len = secretlen;
+ p += secretlen;
+ (*pckm)->iv.base = p;
+ (*pckm)->iv.len = ivlen;
+ (*pckm)->aead_ctx.native_handle = NULL;
+ (*pckm)->pkt_num = -1;
+ (*pckm)->use_count = 0;
+ (*pckm)->flags = NGTCP2_CRYPTO_KM_FLAG_NONE;
+
+ return 0;
+}
+
+void ngtcp2_crypto_km_del(ngtcp2_crypto_km *ckm, const ngtcp2_mem *mem) {
+ if (ckm == NULL) {
+ return;
+ }
+
+ ngtcp2_mem_free(mem, ckm);
+}
+
+void ngtcp2_crypto_create_nonce(uint8_t *dest, const uint8_t *iv, size_t ivlen,
+ int64_t pkt_num) {
+ size_t i;
+ uint64_t n;
+
+ assert(ivlen >= 8);
+
+ memcpy(dest, iv, ivlen);
+ n = ngtcp2_htonl64((uint64_t)pkt_num);
+
+ for (i = 0; i < 8; ++i) {
+ dest[ivlen - 8 + i] ^= ((uint8_t *)&n)[i];
+ }
+}
+
+/*
+ * varint_paramlen returns the length of a single transport parameter
+ * which has variable integer in its parameter.
+ */
+static size_t varint_paramlen(ngtcp2_transport_param_id id, uint64_t param) {
+ size_t valuelen = ngtcp2_put_uvarintlen(param);
+ return ngtcp2_put_uvarintlen(id) + ngtcp2_put_uvarintlen(valuelen) + valuelen;
+}
+
+/*
+ * write_varint_param writes parameter |id| of the given |value| in
+ * varint encoding. It returns p + the number of bytes written.
+ */
+static uint8_t *write_varint_param(uint8_t *p, ngtcp2_transport_param_id id,
+ uint64_t value) {
+ p = ngtcp2_put_uvarint(p, id);
+ p = ngtcp2_put_uvarint(p, ngtcp2_put_uvarintlen(value));
+ return ngtcp2_put_uvarint(p, value);
+}
+
+/*
+ * cid_paramlen returns the length of a single transport parameter
+ * which has |cid| as value.
+ */
+static size_t cid_paramlen(ngtcp2_transport_param_id id,
+ const ngtcp2_cid *cid) {
+ return ngtcp2_put_uvarintlen(id) + ngtcp2_put_uvarintlen(cid->datalen) +
+ cid->datalen;
+}
+
+/*
+ * write_cid_param writes parameter |id| of the given |cid|. It
+ * returns p + the number of bytes written.
+ */
+static uint8_t *write_cid_param(uint8_t *p, ngtcp2_transport_param_id id,
+ const ngtcp2_cid *cid) {
+ assert(cid->datalen == 0 || cid->datalen >= NGTCP2_MIN_CIDLEN);
+ assert(cid->datalen <= NGTCP2_MAX_CIDLEN);
+
+ p = ngtcp2_put_uvarint(p, id);
+ p = ngtcp2_put_uvarint(p, cid->datalen);
+ if (cid->datalen) {
+ p = ngtcp2_cpymem(p, cid->data, cid->datalen);
+ }
+ return p;
+}
+
+static const uint8_t empty_address[16];
+
+ngtcp2_ssize ngtcp2_encode_transport_params_versioned(
+ uint8_t *dest, size_t destlen, ngtcp2_transport_params_type exttype,
+ int transport_params_version, const ngtcp2_transport_params *params) {
+ uint8_t *p;
+ size_t len = 0;
+ /* For some reason, gcc 7.3.0 requires this initialization. */
+ size_t preferred_addrlen = 0;
+ size_t version_infolen = 0;
+ const ngtcp2_sockaddr_in *sa_in;
+ const ngtcp2_sockaddr_in6 *sa_in6;
+ (void)transport_params_version;
+
+ switch (exttype) {
+ case NGTCP2_TRANSPORT_PARAMS_TYPE_CLIENT_HELLO:
+ break;
+ case NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS:
+ len +=
+ cid_paramlen(NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID,
+ &params->original_dcid);
+
+ if (params->stateless_reset_token_present) {
+ len +=
+ ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN) +
+ ngtcp2_put_uvarintlen(NGTCP2_STATELESS_RESET_TOKENLEN) +
+ NGTCP2_STATELESS_RESET_TOKENLEN;
+ }
+ if (params->preferred_address_present) {
+ assert(params->preferred_address.cid.datalen >= NGTCP2_MIN_CIDLEN);
+ assert(params->preferred_address.cid.datalen <= NGTCP2_MAX_CIDLEN);
+ preferred_addrlen = 4 /* ipv4Address */ + 2 /* ipv4Port */ +
+ 16 /* ipv6Address */ + 2 /* ipv6Port */
+ + 1 +
+ params->preferred_address.cid.datalen /* CID */ +
+ NGTCP2_STATELESS_RESET_TOKENLEN;
+ len += ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS) +
+ ngtcp2_put_uvarintlen(preferred_addrlen) + preferred_addrlen;
+ }
+ if (params->retry_scid_present) {
+ len += cid_paramlen(NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID,
+ &params->retry_scid);
+ }
+ break;
+ default:
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ len += cid_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID,
+ &params->initial_scid);
+
+ if (params->initial_max_stream_data_bidi_local) {
+ len += varint_paramlen(
+ NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL,
+ params->initial_max_stream_data_bidi_local);
+ }
+ if (params->initial_max_stream_data_bidi_remote) {
+ len += varint_paramlen(
+ NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE,
+ params->initial_max_stream_data_bidi_remote);
+ }
+ if (params->initial_max_stream_data_uni) {
+ len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI,
+ params->initial_max_stream_data_uni);
+ }
+ if (params->initial_max_data) {
+ len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA,
+ params->initial_max_data);
+ }
+ if (params->initial_max_streams_bidi) {
+ len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI,
+ params->initial_max_streams_bidi);
+ }
+ if (params->initial_max_streams_uni) {
+ len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI,
+ params->initial_max_streams_uni);
+ }
+ if (params->max_udp_payload_size !=
+ NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE) {
+ len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE,
+ params->max_udp_payload_size);
+ }
+ if (params->ack_delay_exponent != NGTCP2_DEFAULT_ACK_DELAY_EXPONENT) {
+ len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT,
+ params->ack_delay_exponent);
+ }
+ if (params->disable_active_migration) {
+ len +=
+ ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION) +
+ ngtcp2_put_uvarintlen(0);
+ }
+ if (params->max_ack_delay != NGTCP2_DEFAULT_MAX_ACK_DELAY) {
+ len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY,
+ params->max_ack_delay / NGTCP2_MILLISECONDS);
+ }
+ if (params->max_idle_timeout) {
+ len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT,
+ params->max_idle_timeout / NGTCP2_MILLISECONDS);
+ }
+ if (params->active_connection_id_limit &&
+ params->active_connection_id_limit !=
+ NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT) {
+ len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT,
+ params->active_connection_id_limit);
+ }
+ if (params->max_datagram_frame_size) {
+ len += varint_paramlen(NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE,
+ params->max_datagram_frame_size);
+ }
+ if (params->grease_quic_bit) {
+ len += ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT) +
+ ngtcp2_put_uvarintlen(0);
+ }
+ if (params->version_info_present) {
+ version_infolen =
+ sizeof(uint32_t) + params->version_info.available_versionslen;
+ len += ngtcp2_put_uvarintlen(NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION) +
+ ngtcp2_put_uvarintlen(version_infolen) + version_infolen;
+ }
+
+ if (dest == NULL && destlen == 0) {
+ return (ngtcp2_ssize)len;
+ }
+
+ if (destlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = dest;
+
+ if (exttype == NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS) {
+ p = write_cid_param(
+ p, NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID,
+ &params->original_dcid);
+
+ if (params->stateless_reset_token_present) {
+ p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN);
+ p = ngtcp2_put_uvarint(p, sizeof(params->stateless_reset_token));
+ p = ngtcp2_cpymem(p, params->stateless_reset_token,
+ sizeof(params->stateless_reset_token));
+ }
+ if (params->preferred_address_present) {
+ p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS);
+ p = ngtcp2_put_uvarint(p, preferred_addrlen);
+
+ if (params->preferred_address.ipv4_present) {
+ sa_in = &params->preferred_address.ipv4;
+ p = ngtcp2_cpymem(p, &sa_in->sin_addr, sizeof(sa_in->sin_addr));
+ p = ngtcp2_put_uint16(p, sa_in->sin_port);
+ } else {
+ p = ngtcp2_cpymem(p, empty_address, sizeof(sa_in->sin_addr));
+ p = ngtcp2_put_uint16(p, 0);
+ }
+
+ if (params->preferred_address.ipv6_present) {
+ sa_in6 = &params->preferred_address.ipv6;
+ p = ngtcp2_cpymem(p, &sa_in6->sin6_addr, sizeof(sa_in6->sin6_addr));
+ p = ngtcp2_put_uint16(p, sa_in6->sin6_port);
+ } else {
+ p = ngtcp2_cpymem(p, empty_address, sizeof(sa_in6->sin6_addr));
+ p = ngtcp2_put_uint16(p, 0);
+ }
+
+ *p++ = (uint8_t)params->preferred_address.cid.datalen;
+ if (params->preferred_address.cid.datalen) {
+ p = ngtcp2_cpymem(p, params->preferred_address.cid.data,
+ params->preferred_address.cid.datalen);
+ }
+ p = ngtcp2_cpymem(
+ p, params->preferred_address.stateless_reset_token,
+ sizeof(params->preferred_address.stateless_reset_token));
+ }
+ if (params->retry_scid_present) {
+ p = write_cid_param(p, NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID,
+ &params->retry_scid);
+ }
+ }
+
+ p = write_cid_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID,
+ &params->initial_scid);
+
+ if (params->initial_max_stream_data_bidi_local) {
+ p = write_varint_param(
+ p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL,
+ params->initial_max_stream_data_bidi_local);
+ }
+
+ if (params->initial_max_stream_data_bidi_remote) {
+ p = write_varint_param(
+ p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE,
+ params->initial_max_stream_data_bidi_remote);
+ }
+
+ if (params->initial_max_stream_data_uni) {
+ p = write_varint_param(p,
+ NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI,
+ params->initial_max_stream_data_uni);
+ }
+
+ if (params->initial_max_data) {
+ p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA,
+ params->initial_max_data);
+ }
+
+ if (params->initial_max_streams_bidi) {
+ p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI,
+ params->initial_max_streams_bidi);
+ }
+
+ if (params->initial_max_streams_uni) {
+ p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI,
+ params->initial_max_streams_uni);
+ }
+
+ if (params->max_udp_payload_size !=
+ NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE) {
+ p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE,
+ params->max_udp_payload_size);
+ }
+
+ if (params->ack_delay_exponent != NGTCP2_DEFAULT_ACK_DELAY_EXPONENT) {
+ p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT,
+ params->ack_delay_exponent);
+ }
+
+ if (params->disable_active_migration) {
+ p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION);
+ p = ngtcp2_put_uvarint(p, 0);
+ }
+
+ if (params->max_ack_delay != NGTCP2_DEFAULT_MAX_ACK_DELAY) {
+ p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY,
+ params->max_ack_delay / NGTCP2_MILLISECONDS);
+ }
+
+ if (params->max_idle_timeout) {
+ p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT,
+ params->max_idle_timeout / NGTCP2_MILLISECONDS);
+ }
+
+ if (params->active_connection_id_limit &&
+ params->active_connection_id_limit !=
+ NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT) {
+ p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT,
+ params->active_connection_id_limit);
+ }
+
+ if (params->max_datagram_frame_size) {
+ p = write_varint_param(p, NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE,
+ params->max_datagram_frame_size);
+ }
+
+ if (params->grease_quic_bit) {
+ p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT);
+ p = ngtcp2_put_uvarint(p, 0);
+ }
+
+ if (params->version_info_present) {
+ p = ngtcp2_put_uvarint(p, NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION);
+ p = ngtcp2_put_uvarint(p, version_infolen);
+ p = ngtcp2_put_uint32be(p, params->version_info.chosen_version);
+ if (params->version_info.available_versionslen) {
+ p = ngtcp2_cpymem(p, params->version_info.available_versions,
+ params->version_info.available_versionslen);
+ }
+ }
+
+ assert((size_t)(p - dest) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+/*
+ * decode_varint decodes a single varint from the buffer pointed by
+ * |*pp| of length |end - *pp|. If it decodes an integer
+ * successfully, it stores the integer in |*pdest|, increment |*pp| by
+ * the number of bytes read from |*pp|, and returns 0. Otherwise it
+ * returns -1.
+ */
+static int decode_varint(uint64_t *pdest, const uint8_t **pp,
+ const uint8_t *end) {
+ const uint8_t *p = *pp;
+ size_t len;
+
+ if (p == end) {
+ return -1;
+ }
+
+ len = ngtcp2_get_uvarintlen(p);
+ if ((uint64_t)(end - p) < len) {
+ return -1;
+ }
+
+ *pp = ngtcp2_get_uvarint(pdest, p);
+
+ return 0;
+}
+
+/*
+ * decode_varint_param decodes length prefixed value from the buffer
+ * pointed by |*pp| of length |end - *pp|. The length and value are
+ * encoded in varint form. If it decodes a value successfully, it
+ * stores the value in |*pdest|, increment |*pp| by the number of
+ * bytes read from |*pp|, and returns 0. Otherwise it returns -1.
+ */
+static int decode_varint_param(uint64_t *pdest, const uint8_t **pp,
+ const uint8_t *end) {
+ const uint8_t *p = *pp;
+ uint64_t valuelen;
+
+ if (decode_varint(&valuelen, &p, end) != 0) {
+ return -1;
+ }
+
+ if (p == end) {
+ return -1;
+ }
+
+ if ((uint64_t)(end - p) < valuelen) {
+ return -1;
+ }
+
+ if (ngtcp2_get_uvarintlen(p) != valuelen) {
+ return -1;
+ }
+
+ *pp = ngtcp2_get_uvarint(pdest, p);
+
+ return 0;
+}
+
+/*
+ * decode_cid_param decodes length prefixed ngtcp2_cid from the buffer
+ * pointed by |*pp| of length |end - *pp|. The length is encoded in
+ * varint form. If it decodes a value successfully, it stores the
+ * value in |*pdest|, increment |*pp| by the number of read from
+ * |*pp|, and returns the number of bytes read. Otherwise it returns
+ * the one of the negative error code:
+ *
+ * NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM
+ * Could not decode Connection ID.
+ */
+static int decode_cid_param(ngtcp2_cid *pdest, const uint8_t **pp,
+ const uint8_t *end) {
+ const uint8_t *p = *pp;
+ uint64_t valuelen;
+
+ if (decode_varint(&valuelen, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+
+ if ((valuelen != 0 && valuelen < NGTCP2_MIN_CIDLEN) ||
+ valuelen > NGTCP2_MAX_CIDLEN || (size_t)(end - p) < valuelen) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+
+ ngtcp2_cid_init(pdest, p, (size_t)valuelen);
+
+ p += valuelen;
+
+ *pp = p;
+
+ return 0;
+}
+
+int ngtcp2_decode_transport_params_versioned(
+ int transport_params_version, ngtcp2_transport_params *params,
+ ngtcp2_transport_params_type exttype, const uint8_t *data, size_t datalen) {
+ const uint8_t *p, *end, *lend;
+ size_t len;
+ uint64_t param_type;
+ uint64_t valuelen;
+ int rv;
+ int initial_scid_present = 0;
+ int original_dcid_present = 0;
+ ngtcp2_sockaddr_in *sa_in;
+ ngtcp2_sockaddr_in6 *sa_in6;
+ uint32_t version;
+
+ (void)transport_params_version;
+
+ if (datalen == 0) {
+ return NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM;
+ }
+
+ /* Set default values */
+ memset(params, 0, sizeof(*params));
+ params->initial_max_streams_bidi = 0;
+ params->initial_max_streams_uni = 0;
+ params->initial_max_stream_data_bidi_local = 0;
+ params->initial_max_stream_data_bidi_remote = 0;
+ params->initial_max_stream_data_uni = 0;
+ params->max_udp_payload_size = NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE;
+ params->ack_delay_exponent = NGTCP2_DEFAULT_ACK_DELAY_EXPONENT;
+ params->stateless_reset_token_present = 0;
+ params->preferred_address_present = 0;
+ params->disable_active_migration = 0;
+ params->max_ack_delay = NGTCP2_DEFAULT_MAX_ACK_DELAY;
+ params->max_idle_timeout = 0;
+ params->active_connection_id_limit =
+ NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT;
+ params->retry_scid_present = 0;
+ params->max_datagram_frame_size = 0;
+ memset(&params->retry_scid, 0, sizeof(params->retry_scid));
+ memset(&params->initial_scid, 0, sizeof(params->initial_scid));
+ memset(&params->original_dcid, 0, sizeof(params->original_dcid));
+ params->version_info_present = 0;
+
+ p = data;
+ end = data + datalen;
+
+ for (; (size_t)(end - p) >= 2;) {
+ if (decode_varint(&param_type, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+
+ switch (param_type) {
+ case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL:
+ if (decode_varint_param(&params->initial_max_stream_data_bidi_local, &p,
+ end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ break;
+ case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE:
+ if (decode_varint_param(&params->initial_max_stream_data_bidi_remote, &p,
+ end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ break;
+ case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI:
+ if (decode_varint_param(&params->initial_max_stream_data_uni, &p, end) !=
+ 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ break;
+ case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA:
+ if (decode_varint_param(&params->initial_max_data, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ break;
+ case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI:
+ if (decode_varint_param(&params->initial_max_streams_bidi, &p, end) !=
+ 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if (params->initial_max_streams_bidi > NGTCP2_MAX_STREAMS) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ break;
+ case NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI:
+ if (decode_varint_param(&params->initial_max_streams_uni, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if (params->initial_max_streams_uni > NGTCP2_MAX_STREAMS) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ break;
+ case NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT:
+ if (decode_varint_param(&params->max_idle_timeout, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ params->max_idle_timeout *= NGTCP2_MILLISECONDS;
+ break;
+ case NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE:
+ if (decode_varint_param(&params->max_udp_payload_size, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ break;
+ case NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN:
+ if (exttype != NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if (decode_varint(&valuelen, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if ((size_t)valuelen != sizeof(params->stateless_reset_token)) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if ((size_t)(end - p) < sizeof(params->stateless_reset_token)) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+
+ p = ngtcp2_get_bytes(params->stateless_reset_token, p,
+ sizeof(params->stateless_reset_token));
+ params->stateless_reset_token_present = 1;
+
+ break;
+ case NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT:
+ if (decode_varint_param(&params->ack_delay_exponent, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if (params->ack_delay_exponent > 20) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ break;
+ case NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS:
+ if (exttype != NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if (decode_varint(&valuelen, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if ((size_t)(end - p) < valuelen) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ len = 4 /* ipv4Address */ + 2 /* ipv4Port */ + 16 /* ipv6Address */ +
+ 2 /* ipv6Port */
+ + 1 /* cid length */ + NGTCP2_STATELESS_RESET_TOKENLEN;
+ if (valuelen < len) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+
+ sa_in = &params->preferred_address.ipv4;
+
+ p = ngtcp2_get_bytes(&sa_in->sin_addr, p, sizeof(sa_in->sin_addr));
+ p = ngtcp2_get_uint16be(&sa_in->sin_port, p);
+
+ if (sa_in->sin_port || memcmp(empty_address, &sa_in->sin_addr,
+ sizeof(sa_in->sin_addr)) != 0) {
+ sa_in->sin_family = AF_INET;
+ params->preferred_address.ipv4_present = 1;
+ }
+
+ sa_in6 = &params->preferred_address.ipv6;
+
+ p = ngtcp2_get_bytes(&sa_in6->sin6_addr, p, sizeof(sa_in6->sin6_addr));
+ p = ngtcp2_get_uint16be(&sa_in6->sin6_port, p);
+
+ if (sa_in6->sin6_port || memcmp(empty_address, &sa_in6->sin6_addr,
+ sizeof(sa_in6->sin6_addr)) != 0) {
+ sa_in6->sin6_family = AF_INET6;
+ params->preferred_address.ipv6_present = 1;
+ }
+
+ /* cid */
+ params->preferred_address.cid.datalen = *p++;
+ len += params->preferred_address.cid.datalen;
+ if (valuelen != len ||
+ params->preferred_address.cid.datalen > NGTCP2_MAX_CIDLEN ||
+ params->preferred_address.cid.datalen < NGTCP2_MIN_CIDLEN) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if (params->preferred_address.cid.datalen) {
+ p = ngtcp2_get_bytes(params->preferred_address.cid.data, p,
+ params->preferred_address.cid.datalen);
+ }
+
+ /* stateless reset token */
+ p = ngtcp2_get_bytes(
+ params->preferred_address.stateless_reset_token, p,
+ sizeof(params->preferred_address.stateless_reset_token));
+ params->preferred_address_present = 1;
+ break;
+ case NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION:
+ if (decode_varint(&valuelen, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if (valuelen != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ params->disable_active_migration = 1;
+ break;
+ case NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID:
+ if (exttype != NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ rv = decode_cid_param(&params->original_dcid, &p, end);
+ if (rv != 0) {
+ return rv;
+ }
+ original_dcid_present = 1;
+ break;
+ case NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID:
+ if (exttype != NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ rv = decode_cid_param(&params->retry_scid, &p, end);
+ if (rv != 0) {
+ return rv;
+ }
+ params->retry_scid_present = 1;
+ break;
+ case NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID:
+ rv = decode_cid_param(&params->initial_scid, &p, end);
+ if (rv != 0) {
+ return rv;
+ }
+ initial_scid_present = 1;
+ break;
+ case NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY:
+ if (decode_varint_param(&params->max_ack_delay, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if (params->max_ack_delay >= 16384) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ params->max_ack_delay *= NGTCP2_MILLISECONDS;
+ break;
+ case NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT:
+ if (decode_varint_param(&params->active_connection_id_limit, &p, end) !=
+ 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ break;
+ case NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE:
+ if (decode_varint_param(&params->max_datagram_frame_size, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ break;
+ case NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT:
+ if (decode_varint(&valuelen, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if (valuelen != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ params->grease_quic_bit = 1;
+ break;
+ case NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION:
+ if (decode_varint(&valuelen, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if ((size_t)(end - p) < valuelen) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if (valuelen < sizeof(uint32_t) || (valuelen & 0x3)) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ p = ngtcp2_get_uint32(&params->version_info.chosen_version, p);
+ if (params->version_info.chosen_version == 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if (valuelen > sizeof(uint32_t)) {
+ params->version_info.available_versions = (uint8_t *)p;
+ params->version_info.available_versionslen =
+ (size_t)valuelen - sizeof(uint32_t);
+
+ for (lend = p + (valuelen - sizeof(uint32_t)); p != lend;) {
+ p = ngtcp2_get_uint32(&version, p);
+ if (version == 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ }
+ }
+ params->version_info_present = 1;
+ break;
+ default:
+ /* Ignore unknown parameter */
+ if (decode_varint(&valuelen, &p, end) != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ if ((size_t)(end - p) < valuelen) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+ p += valuelen;
+ break;
+ }
+ }
+
+ if (end - p != 0) {
+ return NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM;
+ }
+
+ if (!initial_scid_present ||
+ (exttype == NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS &&
+ !original_dcid_present)) {
+ return NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM;
+ }
+
+ return 0;
+}
+
+static int transport_params_copy_new(ngtcp2_transport_params **pdest,
+ const ngtcp2_transport_params *src,
+ const ngtcp2_mem *mem) {
+ size_t len = sizeof(**pdest);
+ ngtcp2_transport_params *dest;
+ uint8_t *p;
+
+ if (src->version_info_present) {
+ len += src->version_info.available_versionslen;
+ }
+
+ dest = ngtcp2_mem_malloc(mem, len);
+ if (dest == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ *dest = *src;
+
+ if (src->version_info_present && src->version_info.available_versionslen) {
+ p = (uint8_t *)dest + sizeof(*dest);
+ memcpy(p, src->version_info.available_versions,
+ src->version_info.available_versionslen);
+ dest->version_info.available_versions = p;
+ }
+
+ *pdest = dest;
+
+ return 0;
+}
+
+int ngtcp2_decode_transport_params_new(ngtcp2_transport_params **pparams,
+ ngtcp2_transport_params_type exttype,
+ const uint8_t *data, size_t datalen,
+ const ngtcp2_mem *mem) {
+ int rv;
+ ngtcp2_transport_params params;
+
+ rv = ngtcp2_decode_transport_params(&params, exttype, data, datalen);
+ if (rv < 0) {
+ return rv;
+ }
+
+ if (mem == NULL) {
+ mem = ngtcp2_mem_default();
+ }
+
+ return transport_params_copy_new(pparams, &params, mem);
+}
+
+void ngtcp2_transport_params_del(ngtcp2_transport_params *params,
+ const ngtcp2_mem *mem) {
+ if (params == NULL) {
+ return;
+ }
+
+ if (mem == NULL) {
+ mem = ngtcp2_mem_default();
+ }
+
+ ngtcp2_mem_free(mem, params);
+}
+
+int ngtcp2_transport_params_copy_new(ngtcp2_transport_params **pdest,
+ const ngtcp2_transport_params *src,
+ const ngtcp2_mem *mem) {
+ if (src == NULL) {
+ *pdest = NULL;
+ return 0;
+ }
+
+ return transport_params_copy_new(pdest, src, mem);
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.h
new file mode 100644
index 0000000..3b91ce9
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_crypto.h
@@ -0,0 +1,148 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_CRYPTO_H
+#define NGTCP2_CRYPTO_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_mem.h"
+
+/* NGTCP2_INITIAL_AEAD_OVERHEAD is an overhead of AEAD used by Initial
+ packets. Because QUIC uses AEAD_AES_128_GCM, the overhead is 16
+ bytes. */
+#define NGTCP2_INITIAL_AEAD_OVERHEAD 16
+
+/* NGTCP2_MAX_AEAD_OVERHEAD is expected maximum AEAD overhead. */
+#define NGTCP2_MAX_AEAD_OVERHEAD 16
+
+/* ngtcp2_transport_param_id is the registry of QUIC transport
+ parameter ID. */
+typedef enum ngtcp2_transport_param_id {
+ NGTCP2_TRANSPORT_PARAM_ORIGINAL_DESTINATION_CONNECTION_ID = 0x0000,
+ NGTCP2_TRANSPORT_PARAM_MAX_IDLE_TIMEOUT = 0x0001,
+ NGTCP2_TRANSPORT_PARAM_STATELESS_RESET_TOKEN = 0x0002,
+ NGTCP2_TRANSPORT_PARAM_MAX_UDP_PAYLOAD_SIZE = 0x0003,
+ NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_DATA = 0x0004,
+ NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_LOCAL = 0x0005,
+ NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_BIDI_REMOTE = 0x0006,
+ NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAM_DATA_UNI = 0x0007,
+ NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_BIDI = 0x0008,
+ NGTCP2_TRANSPORT_PARAM_INITIAL_MAX_STREAMS_UNI = 0x0009,
+ NGTCP2_TRANSPORT_PARAM_ACK_DELAY_EXPONENT = 0x000a,
+ NGTCP2_TRANSPORT_PARAM_MAX_ACK_DELAY = 0x000b,
+ NGTCP2_TRANSPORT_PARAM_DISABLE_ACTIVE_MIGRATION = 0x000c,
+ NGTCP2_TRANSPORT_PARAM_PREFERRED_ADDRESS = 0x000d,
+ NGTCP2_TRANSPORT_PARAM_ACTIVE_CONNECTION_ID_LIMIT = 0x000e,
+ NGTCP2_TRANSPORT_PARAM_INITIAL_SOURCE_CONNECTION_ID = 0x000f,
+ NGTCP2_TRANSPORT_PARAM_RETRY_SOURCE_CONNECTION_ID = 0x0010,
+ /* https://datatracker.ietf.org/doc/html/rfc9221 */
+ NGTCP2_TRANSPORT_PARAM_MAX_DATAGRAM_FRAME_SIZE = 0x0020,
+ NGTCP2_TRANSPORT_PARAM_GREASE_QUIC_BIT = 0x2ab2,
+ /* https://datatracker.ietf.org/doc/html/draft-ietf-quic-version-negotiation-14
+ */
+ NGTCP2_TRANSPORT_PARAM_VERSION_INFORMATION = 0x11,
+} ngtcp2_transport_param_id;
+
+/* NGTCP2_CRYPTO_KM_FLAG_NONE indicates that no flag is set. */
+#define NGTCP2_CRYPTO_KM_FLAG_NONE 0x00u
+/* NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE is set if key phase bit is
+ set. */
+#define NGTCP2_CRYPTO_KM_FLAG_KEY_PHASE_ONE 0x01u
+
+typedef struct ngtcp2_crypto_km {
+ ngtcp2_vec secret;
+ ngtcp2_crypto_aead_ctx aead_ctx;
+ ngtcp2_vec iv;
+ /* pkt_num is a packet number of a packet which uses this keying
+ material. For encryption key, it is the lowest packet number of
+ a packet. For decryption key, it is the lowest packet number of
+ a packet which can be decrypted with this keying material. */
+ int64_t pkt_num;
+ /* use_count is the number of encryption applied with this key.
+ This field is only used for tx key. */
+ uint64_t use_count;
+ /* flags is the bitwise OR of zero or more of
+ NGTCP2_CRYPTO_KM_FLAG_*. */
+ uint8_t flags;
+} ngtcp2_crypto_km;
+
+/*
+ * ngtcp2_crypto_km_new creates new ngtcp2_crypto_km object and
+ * assigns its pointer to |*pckm|. The |secret| of length
+ * |secretlen|, the |key| of length |keylen| and the |iv| of length
+ * |ivlen| are copied to |*pckm|. If |secretlen| == 0, the function
+ * assumes no secret is given which is acceptable. The sole reason to
+ * store secret is update keys. Only 1RTT key can be updated.
+ */
+int ngtcp2_crypto_km_new(ngtcp2_crypto_km **pckm, const uint8_t *secret,
+ size_t secretlen,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *iv, size_t ivlen,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_crypto_km_nocopy_new is similar to ngtcp2_crypto_km_new, but
+ * it does not copy secret, key and IV.
+ */
+int ngtcp2_crypto_km_nocopy_new(ngtcp2_crypto_km **pckm, size_t secretlen,
+ size_t ivlen, const ngtcp2_mem *mem);
+
+void ngtcp2_crypto_km_del(ngtcp2_crypto_km *ckm, const ngtcp2_mem *mem);
+
+typedef struct ngtcp2_crypto_cc {
+ ngtcp2_crypto_aead aead;
+ ngtcp2_crypto_cipher hp;
+ ngtcp2_crypto_km *ckm;
+ ngtcp2_crypto_cipher_ctx hp_ctx;
+ ngtcp2_encrypt encrypt;
+ ngtcp2_decrypt decrypt;
+ ngtcp2_hp_mask hp_mask;
+} ngtcp2_crypto_cc;
+
+void ngtcp2_crypto_create_nonce(uint8_t *dest, const uint8_t *iv, size_t ivlen,
+ int64_t pkt_num);
+
+/*
+ * ngtcp2_transport_params_copy_new makes a copy of |src|, and assigns
+ * it to |*pdest|. If |src| is NULL, NULL is assigned to |*pdest|.
+ *
+ * Caller is responsible to call ngtcp2_transport_params_del to free
+ * the memory assigned to |*pdest|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_transport_params_copy_new(ngtcp2_transport_params **pdest,
+ const ngtcp2_transport_params *src,
+ const ngtcp2_mem *mem);
+
+#endif /* NGTCP2_CRYPTO_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.c
new file mode 100644
index 0000000..ab932bd
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.c
@@ -0,0 +1,154 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_err.h"
+
+const char *ngtcp2_strerror(int liberr) {
+ switch (liberr) {
+ case 0:
+ return "NO_ERROR";
+ case NGTCP2_ERR_INVALID_ARGUMENT:
+ return "ERR_INVALID_ARGUMENT";
+ case NGTCP2_ERR_NOBUF:
+ return "ERR_NOBUF";
+ case NGTCP2_ERR_PROTO:
+ return "ERR_PROTO";
+ case NGTCP2_ERR_INVALID_STATE:
+ return "ERR_INVALID_STATE";
+ case NGTCP2_ERR_ACK_FRAME:
+ return "ERR_ACK_FRAME";
+ case NGTCP2_ERR_STREAM_ID_BLOCKED:
+ return "ERR_STREAM_ID_BLOCKED";
+ case NGTCP2_ERR_STREAM_IN_USE:
+ return "ERR_STREAM_IN_USE";
+ case NGTCP2_ERR_STREAM_DATA_BLOCKED:
+ return "ERR_STREAM_DATA_BLOCKED";
+ case NGTCP2_ERR_FLOW_CONTROL:
+ return "ERR_FLOW_CONTROL";
+ case NGTCP2_ERR_CONNECTION_ID_LIMIT:
+ return "ERR_CONNECTION_ID_LIMIT";
+ case NGTCP2_ERR_STREAM_LIMIT:
+ return "ERR_STREAM_LIMIT";
+ case NGTCP2_ERR_FINAL_SIZE:
+ return "ERR_FINAL_SIZE";
+ case NGTCP2_ERR_CRYPTO:
+ return "ERR_CRYPTO";
+ case NGTCP2_ERR_PKT_NUM_EXHAUSTED:
+ return "ERR_PKT_NUM_EXHAUSTED";
+ case NGTCP2_ERR_NOMEM:
+ return "ERR_NOMEM";
+ case NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM:
+ return "ERR_REQUIRED_TRANSPORT_PARAM";
+ case NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM:
+ return "ERR_MALFORMED_TRANSPORT_PARAM";
+ case NGTCP2_ERR_FRAME_ENCODING:
+ return "ERR_FRAME_ENCODING";
+ case NGTCP2_ERR_DECRYPT:
+ return "ERR_DECRYPT";
+ case NGTCP2_ERR_STREAM_SHUT_WR:
+ return "ERR_STREAM_SHUT_WR";
+ case NGTCP2_ERR_STREAM_NOT_FOUND:
+ return "ERR_STREAM_NOT_FOUND";
+ case NGTCP2_ERR_STREAM_STATE:
+ return "ERR_STREAM_STATE";
+ case NGTCP2_ERR_RECV_VERSION_NEGOTIATION:
+ return "ERR_RECV_VERSION_NEGOTIATION";
+ case NGTCP2_ERR_CLOSING:
+ return "ERR_CLOSING";
+ case NGTCP2_ERR_DRAINING:
+ return "ERR_DRAINING";
+ case NGTCP2_ERR_TRANSPORT_PARAM:
+ return "ERR_TRANSPORT_PARAM";
+ case NGTCP2_ERR_DISCARD_PKT:
+ return "ERR_DISCARD_PKT";
+ case NGTCP2_ERR_CONN_ID_BLOCKED:
+ return "ERR_CONN_ID_BLOCKED";
+ case NGTCP2_ERR_CALLBACK_FAILURE:
+ return "ERR_CALLBACK_FAILURE";
+ case NGTCP2_ERR_INTERNAL:
+ return "ERR_INTERNAL";
+ case NGTCP2_ERR_CRYPTO_BUFFER_EXCEEDED:
+ return "ERR_CRYPTO_BUFFER_EXCEEDED";
+ case NGTCP2_ERR_WRITE_MORE:
+ return "ERR_WRITE_MORE";
+ case NGTCP2_ERR_RETRY:
+ return "ERR_RETRY";
+ case NGTCP2_ERR_DROP_CONN:
+ return "ERR_DROP_CONN";
+ case NGTCP2_ERR_AEAD_LIMIT_REACHED:
+ return "ERR_AEAD_LIMIT_REACHED";
+ case NGTCP2_ERR_NO_VIABLE_PATH:
+ return "ERR_NO_VIABLE_PATH";
+ case NGTCP2_ERR_VERSION_NEGOTIATION:
+ return "ERR_VERSION_NEGOTIATION";
+ case NGTCP2_ERR_HANDSHAKE_TIMEOUT:
+ return "ERR_HANDSHAKE_TIMEOUT";
+ case NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE:
+ return "ERR_VERSION_NEGOTIATION_FAILURE";
+ case NGTCP2_ERR_IDLE_CLOSE:
+ return "ERR_IDLE_CLOSE";
+ default:
+ return "(unknown)";
+ }
+}
+
+int ngtcp2_err_is_fatal(int liberr) { return liberr < NGTCP2_ERR_FATAL; }
+
+uint64_t ngtcp2_err_infer_quic_transport_error_code(int liberr) {
+ switch (liberr) {
+ case 0:
+ return NGTCP2_NO_ERROR;
+ case NGTCP2_ERR_ACK_FRAME:
+ case NGTCP2_ERR_FRAME_ENCODING:
+ return NGTCP2_FRAME_ENCODING_ERROR;
+ case NGTCP2_ERR_FLOW_CONTROL:
+ return NGTCP2_FLOW_CONTROL_ERROR;
+ case NGTCP2_ERR_CONNECTION_ID_LIMIT:
+ return NGTCP2_CONNECTION_ID_LIMIT_ERROR;
+ case NGTCP2_ERR_STREAM_LIMIT:
+ return NGTCP2_STREAM_LIMIT_ERROR;
+ case NGTCP2_ERR_FINAL_SIZE:
+ return NGTCP2_FINAL_SIZE_ERROR;
+ case NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM:
+ case NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM:
+ case NGTCP2_ERR_TRANSPORT_PARAM:
+ return NGTCP2_TRANSPORT_PARAMETER_ERROR;
+ case NGTCP2_ERR_INVALID_ARGUMENT:
+ case NGTCP2_ERR_NOMEM:
+ case NGTCP2_ERR_CALLBACK_FAILURE:
+ return NGTCP2_INTERNAL_ERROR;
+ case NGTCP2_ERR_STREAM_STATE:
+ return NGTCP2_STREAM_STATE_ERROR;
+ case NGTCP2_ERR_CRYPTO_BUFFER_EXCEEDED:
+ return NGTCP2_CRYPTO_BUFFER_EXCEEDED;
+ case NGTCP2_ERR_AEAD_LIMIT_REACHED:
+ return NGTCP2_AEAD_LIMIT_REACHED;
+ case NGTCP2_ERR_NO_VIABLE_PATH:
+ return NGTCP2_NO_VIABLE_PATH;
+ case NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE:
+ return NGTCP2_VERSION_NEGOTIATION_ERROR;
+ default:
+ return NGTCP2_PROTOCOL_VIOLATION;
+ }
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.h
new file mode 100644
index 0000000..9229f54
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_err.h
@@ -0,0 +1,34 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_ERR_H
+#define NGTCP2_ERR_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#endif /* NGTCP2_ERR_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.c
new file mode 100644
index 0000000..87c2389
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.c
@@ -0,0 +1,167 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_gaptr.h"
+
+#include <string.h>
+#include <assert.h>
+
+void ngtcp2_gaptr_init(ngtcp2_gaptr *gaptr, const ngtcp2_mem *mem) {
+ ngtcp2_ksl_init(&gaptr->gap, ngtcp2_ksl_range_compar, sizeof(ngtcp2_range),
+ mem);
+
+ gaptr->mem = mem;
+}
+
+static int gaptr_gap_init(ngtcp2_gaptr *gaptr) {
+ ngtcp2_range range = {0, UINT64_MAX};
+ int rv;
+
+ rv = ngtcp2_ksl_insert(&gaptr->gap, NULL, &range, NULL);
+ if (rv != 0) {
+ return rv;
+ }
+
+ return 0;
+}
+
+void ngtcp2_gaptr_free(ngtcp2_gaptr *gaptr) {
+ if (gaptr == NULL) {
+ return;
+ }
+
+ ngtcp2_ksl_free(&gaptr->gap);
+}
+
+int ngtcp2_gaptr_push(ngtcp2_gaptr *gaptr, uint64_t offset, uint64_t datalen) {
+ int rv;
+ ngtcp2_range k, m, l, r, q = {offset, offset + datalen};
+ ngtcp2_ksl_it it;
+
+ if (ngtcp2_ksl_len(&gaptr->gap) == 0) {
+ rv = gaptr_gap_init(gaptr);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ it = ngtcp2_ksl_lower_bound_compar(&gaptr->gap, &q,
+ ngtcp2_ksl_range_exclusive_compar);
+
+ for (; !ngtcp2_ksl_it_end(&it);) {
+ k = *(ngtcp2_range *)ngtcp2_ksl_it_key(&it);
+ m = ngtcp2_range_intersect(&q, &k);
+ if (!ngtcp2_range_len(&m)) {
+ break;
+ }
+
+ if (ngtcp2_range_eq(&k, &m)) {
+ ngtcp2_ksl_remove_hint(&gaptr->gap, &it, &it, &k);
+ continue;
+ }
+ ngtcp2_range_cut(&l, &r, &k, &m);
+ if (ngtcp2_range_len(&l)) {
+ ngtcp2_ksl_update_key(&gaptr->gap, &k, &l);
+
+ if (ngtcp2_range_len(&r)) {
+ rv = ngtcp2_ksl_insert(&gaptr->gap, &it, &r, NULL);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+ } else if (ngtcp2_range_len(&r)) {
+ ngtcp2_ksl_update_key(&gaptr->gap, &k, &r);
+ }
+ ngtcp2_ksl_it_next(&it);
+ }
+ return 0;
+}
+
+uint64_t ngtcp2_gaptr_first_gap_offset(ngtcp2_gaptr *gaptr) {
+ ngtcp2_ksl_it it;
+ ngtcp2_range r;
+
+ if (ngtcp2_ksl_len(&gaptr->gap) == 0) {
+ return 0;
+ }
+
+ it = ngtcp2_ksl_begin(&gaptr->gap);
+ r = *(ngtcp2_range *)ngtcp2_ksl_it_key(&it);
+
+ return r.begin;
+}
+
+ngtcp2_range ngtcp2_gaptr_get_first_gap_after(ngtcp2_gaptr *gaptr,
+ uint64_t offset) {
+ ngtcp2_range q = {offset, offset + 1};
+ ngtcp2_ksl_it it;
+
+ if (ngtcp2_ksl_len(&gaptr->gap) == 0) {
+ ngtcp2_range r = {0, UINT64_MAX};
+ return r;
+ }
+
+ it = ngtcp2_ksl_lower_bound_compar(&gaptr->gap, &q,
+ ngtcp2_ksl_range_exclusive_compar);
+
+ assert(!ngtcp2_ksl_it_end(&it));
+
+ return *(ngtcp2_range *)ngtcp2_ksl_it_key(&it);
+}
+
+int ngtcp2_gaptr_is_pushed(ngtcp2_gaptr *gaptr, uint64_t offset,
+ uint64_t datalen) {
+ ngtcp2_range q = {offset, offset + datalen};
+ ngtcp2_ksl_it it;
+ ngtcp2_range k;
+ ngtcp2_range m;
+
+ if (ngtcp2_ksl_len(&gaptr->gap) == 0) {
+ return 0;
+ }
+
+ it = ngtcp2_ksl_lower_bound_compar(&gaptr->gap, &q,
+ ngtcp2_ksl_range_exclusive_compar);
+ k = *(ngtcp2_range *)ngtcp2_ksl_it_key(&it);
+ m = ngtcp2_range_intersect(&q, &k);
+
+ return ngtcp2_range_len(&m) == 0;
+}
+
+void ngtcp2_gaptr_drop_first_gap(ngtcp2_gaptr *gaptr) {
+ ngtcp2_ksl_it it;
+ ngtcp2_range r;
+
+ if (ngtcp2_ksl_len(&gaptr->gap) == 0) {
+ return;
+ }
+
+ it = ngtcp2_ksl_begin(&gaptr->gap);
+
+ assert(!ngtcp2_ksl_it_end(&it));
+
+ r = *(ngtcp2_range *)ngtcp2_ksl_it_key(&it);
+
+ ngtcp2_ksl_remove_hint(&gaptr->gap, NULL, &it, &r);
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.h
new file mode 100644
index 0000000..0f100a8
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_gaptr.h
@@ -0,0 +1,98 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_GAPTR_H
+#define NGTCP2_GAPTR_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_mem.h"
+#include "ngtcp2_ksl.h"
+#include "ngtcp2_range.h"
+
+/*
+ * ngtcp2_gaptr maintains the gap in the range [0, UINT64_MAX).
+ */
+typedef struct ngtcp2_gaptr {
+ /* gap maintains the range of offset which is not received
+ yet. Initially, its range is [0, UINT64_MAX). */
+ ngtcp2_ksl gap;
+ /* mem is custom memory allocator */
+ const ngtcp2_mem *mem;
+} ngtcp2_gaptr;
+
+/*
+ * ngtcp2_gaptr_init initializes |gaptr|.
+ */
+void ngtcp2_gaptr_init(ngtcp2_gaptr *gaptr, const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_gaptr_free frees resources allocated for |gaptr|.
+ */
+void ngtcp2_gaptr_free(ngtcp2_gaptr *gaptr);
+
+/*
+ * ngtcp2_gaptr_push adds new data of length |datalen| at the stream
+ * offset |offset|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+int ngtcp2_gaptr_push(ngtcp2_gaptr *gaptr, uint64_t offset, uint64_t datalen);
+
+/*
+ * ngtcp2_gaptr_first_gap_offset returns the offset to the first gap.
+ * If there is no gap, it returns UINT64_MAX.
+ */
+uint64_t ngtcp2_gaptr_first_gap_offset(ngtcp2_gaptr *gaptr);
+
+/*
+ * ngtcp2_gaptr_get_first_gap_after returns the first gap which
+ * overlaps or comes after |offset|.
+ */
+ngtcp2_range ngtcp2_gaptr_get_first_gap_after(ngtcp2_gaptr *gaptr,
+ uint64_t offset);
+
+/*
+ * ngtcp2_gaptr_is_pushed returns nonzero if range [offset, offset +
+ * datalen) is completely pushed into this object.
+ */
+int ngtcp2_gaptr_is_pushed(ngtcp2_gaptr *gaptr, uint64_t offset,
+ uint64_t datalen);
+
+/*
+ * ngtcp2_gaptr_drop_first_gap deletes the first gap entirely as if
+ * the range is pushed. This function assumes that at least one gap
+ * exists.
+ */
+void ngtcp2_gaptr_drop_first_gap(ngtcp2_gaptr *gaptr);
+
+#endif /* NGTCP2_GAPTR_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.c
new file mode 100644
index 0000000..d988022
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.c
@@ -0,0 +1,79 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_idtr.h"
+
+#include <assert.h>
+
+void ngtcp2_idtr_init(ngtcp2_idtr *idtr, int server, const ngtcp2_mem *mem) {
+ ngtcp2_gaptr_init(&idtr->gap, mem);
+
+ idtr->server = server;
+}
+
+void ngtcp2_idtr_free(ngtcp2_idtr *idtr) {
+ if (idtr == NULL) {
+ return;
+ }
+
+ ngtcp2_gaptr_free(&idtr->gap);
+}
+
+/*
+ * id_from_stream_id translates |stream_id| to id space used by
+ * ngtcp2_idtr.
+ */
+static uint64_t id_from_stream_id(int64_t stream_id) {
+ return (uint64_t)(stream_id >> 2);
+}
+
+int ngtcp2_idtr_open(ngtcp2_idtr *idtr, int64_t stream_id) {
+ uint64_t q;
+
+ assert((idtr->server && (stream_id % 2)) ||
+ (!idtr->server && (stream_id % 2)) == 0);
+
+ q = id_from_stream_id(stream_id);
+
+ if (ngtcp2_gaptr_is_pushed(&idtr->gap, q, 1)) {
+ return NGTCP2_ERR_STREAM_IN_USE;
+ }
+
+ return ngtcp2_gaptr_push(&idtr->gap, q, 1);
+}
+
+int ngtcp2_idtr_is_open(ngtcp2_idtr *idtr, int64_t stream_id) {
+ uint64_t q;
+
+ assert((idtr->server && (stream_id % 2)) ||
+ (!idtr->server && (stream_id % 2)) == 0);
+
+ q = id_from_stream_id(stream_id);
+
+ return ngtcp2_gaptr_is_pushed(&idtr->gap, q, 1);
+}
+
+uint64_t ngtcp2_idtr_first_gap(ngtcp2_idtr *idtr) {
+ return ngtcp2_gaptr_first_gap_offset(&idtr->gap);
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.h
new file mode 100644
index 0000000..edb8c68
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_idtr.h
@@ -0,0 +1,89 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_IDTR_H
+#define NGTCP2_IDTR_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_mem.h"
+#include "ngtcp2_gaptr.h"
+
+/*
+ * ngtcp2_idtr tracks the usage of stream ID.
+ */
+typedef struct ngtcp2_idtr {
+ /* gap maintains the range of ID which is not used yet. Initially,
+ its range is [0, UINT64_MAX). */
+ ngtcp2_gaptr gap;
+ /* server is nonzero if this object records server initiated stream
+ ID. */
+ int server;
+} ngtcp2_idtr;
+
+/*
+ * ngtcp2_idtr_init initializes |idtr|.
+ *
+ * If this object records server initiated ID (even number), set
+ * |server| to nonzero.
+ */
+void ngtcp2_idtr_init(ngtcp2_idtr *idtr, int server, const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_idtr_free frees resources allocated for |idtr|.
+ */
+void ngtcp2_idtr_free(ngtcp2_idtr *idtr);
+
+/*
+ * ngtcp2_idtr_open claims that |stream_id| is in used.
+ *
+ * It returns 0 if it succeeds, or one of the following negative error
+ * codes:
+ *
+ * NGTCP2_ERR_STREAM_IN_USE
+ * ID has already been used.
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_idtr_open(ngtcp2_idtr *idtr, int64_t stream_id);
+
+/*
+ * ngtcp2_idtr_open tells whether ID |stream_id| is in used or not.
+ *
+ * It returns nonzero if |stream_id| is used.
+ */
+int ngtcp2_idtr_is_open(ngtcp2_idtr *idtr, int64_t stream_id);
+
+/*
+ * ngtcp2_idtr_first_gap returns the first id of first gap. If there
+ * is no gap, it returns UINT64_MAX. The returned id is an id space
+ * used in this object internally, and not stream ID.
+ */
+uint64_t ngtcp2_idtr_first_gap(ngtcp2_idtr *idtr);
+
+#endif /* NGTCP2_IDTR_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.c
new file mode 100644
index 0000000..0bd424c
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.c
@@ -0,0 +1,819 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2018 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_ksl.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+
+#include "ngtcp2_macro.h"
+#include "ngtcp2_mem.h"
+#include "ngtcp2_range.h"
+
+static ngtcp2_ksl_blk null_blk = {{{NULL, NULL, 0, 0, {0}}}};
+
+static size_t ksl_nodelen(size_t keylen) {
+ return (sizeof(ngtcp2_ksl_node) + keylen - sizeof(uint64_t) + 0xfu) &
+ ~(uintptr_t)0xfu;
+}
+
+static size_t ksl_blklen(size_t nodelen) {
+ return sizeof(ngtcp2_ksl_blk) + nodelen * NGTCP2_KSL_MAX_NBLK -
+ sizeof(uint64_t);
+}
+
+/*
+ * ksl_node_set_key sets |key| to |node|.
+ */
+static void ksl_node_set_key(ngtcp2_ksl *ksl, ngtcp2_ksl_node *node,
+ const void *key) {
+ memcpy(node->key, key, ksl->keylen);
+}
+
+void ngtcp2_ksl_init(ngtcp2_ksl *ksl, ngtcp2_ksl_compar compar, size_t keylen,
+ const ngtcp2_mem *mem) {
+ size_t nodelen = ksl_nodelen(keylen);
+
+ ngtcp2_objalloc_init(&ksl->blkalloc,
+ ((ksl_blklen(nodelen) + 0xfu) & ~(uintptr_t)0xfu) * 8,
+ mem);
+
+ ksl->head = NULL;
+ ksl->front = ksl->back = NULL;
+ ksl->compar = compar;
+ ksl->keylen = keylen;
+ ksl->nodelen = nodelen;
+ ksl->n = 0;
+}
+
+static ngtcp2_ksl_blk *ksl_blk_objalloc_new(ngtcp2_ksl *ksl) {
+ return ngtcp2_objalloc_ksl_blk_len_get(&ksl->blkalloc,
+ ksl_blklen(ksl->nodelen));
+}
+
+static void ksl_blk_objalloc_del(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk) {
+ ngtcp2_objalloc_ksl_blk_release(&ksl->blkalloc, blk);
+}
+
+static int ksl_head_init(ngtcp2_ksl *ksl) {
+ ngtcp2_ksl_blk *head = ksl_blk_objalloc_new(ksl);
+ if (!head) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ head->next = head->prev = NULL;
+ head->n = 0;
+ head->leaf = 1;
+
+ ksl->head = head;
+ ksl->front = ksl->back = head;
+
+ return 0;
+}
+
+#ifdef NOMEMPOOL
+/*
+ * ksl_free_blk frees |blk| recursively.
+ */
+static void ksl_free_blk(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk) {
+ size_t i;
+
+ if (!blk->leaf) {
+ for (i = 0; i < blk->n; ++i) {
+ ksl_free_blk(ksl, ngtcp2_ksl_nth_node(ksl, blk, i)->blk);
+ }
+ }
+
+ ksl_blk_objalloc_del(ksl, blk);
+}
+#endif /* NOMEMPOOL */
+
+void ngtcp2_ksl_free(ngtcp2_ksl *ksl) {
+ if (!ksl || !ksl->head) {
+ return;
+ }
+
+#ifdef NOMEMPOOL
+ ksl_free_blk(ksl, ksl->head);
+#endif /* NOMEMPOOL */
+
+ ngtcp2_objalloc_free(&ksl->blkalloc);
+}
+
+/*
+ * ksl_split_blk splits |blk| into 2 ngtcp2_ksl_blk objects. The new
+ * ngtcp2_ksl_blk is always the "right" block.
+ *
+ * It returns the pointer to the ngtcp2_ksl_blk created which is the
+ * located at the right of |blk|, or NULL which indicates out of
+ * memory error.
+ */
+static ngtcp2_ksl_blk *ksl_split_blk(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk) {
+ ngtcp2_ksl_blk *rblk;
+
+ rblk = ksl_blk_objalloc_new(ksl);
+ if (rblk == NULL) {
+ return NULL;
+ }
+
+ rblk->next = blk->next;
+ blk->next = rblk;
+ if (rblk->next) {
+ rblk->next->prev = rblk;
+ } else if (ksl->back == blk) {
+ ksl->back = rblk;
+ }
+ rblk->prev = blk;
+ rblk->leaf = blk->leaf;
+
+ rblk->n = blk->n / 2;
+
+ memcpy(rblk->nodes, blk->nodes + ksl->nodelen * (blk->n - rblk->n),
+ ksl->nodelen * rblk->n);
+
+ blk->n -= rblk->n;
+
+ assert(blk->n >= NGTCP2_KSL_MIN_NBLK);
+ assert(rblk->n >= NGTCP2_KSL_MIN_NBLK);
+
+ return rblk;
+}
+
+/*
+ * ksl_split_node splits a node included in |blk| at the position |i|
+ * into 2 adjacent nodes. The new node is always inserted at the
+ * position |i+1|.
+ *
+ * It returns 0 if it succeeds, or one of the following negative error
+ * codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int ksl_split_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) {
+ ngtcp2_ksl_node *node;
+ ngtcp2_ksl_blk *lblk = ngtcp2_ksl_nth_node(ksl, blk, i)->blk, *rblk;
+
+ rblk = ksl_split_blk(ksl, lblk);
+ if (rblk == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ memmove(blk->nodes + (i + 2) * ksl->nodelen,
+ blk->nodes + (i + 1) * ksl->nodelen,
+ ksl->nodelen * (blk->n - (i + 1)));
+
+ node = ngtcp2_ksl_nth_node(ksl, blk, i + 1);
+ node->blk = rblk;
+ ++blk->n;
+ ksl_node_set_key(ksl, node, ngtcp2_ksl_nth_node(ksl, rblk, rblk->n - 1)->key);
+
+ node = ngtcp2_ksl_nth_node(ksl, blk, i);
+ ksl_node_set_key(ksl, node, ngtcp2_ksl_nth_node(ksl, lblk, lblk->n - 1)->key);
+
+ return 0;
+}
+
+/*
+ * ksl_split_head splits a head (root) block. It increases the height
+ * of skip list by 1.
+ *
+ * It returns 0 if it succeeds, or one of the following negative error
+ * codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+static int ksl_split_head(ngtcp2_ksl *ksl) {
+ ngtcp2_ksl_blk *rblk = NULL, *lblk, *nhead = NULL;
+ ngtcp2_ksl_node *node;
+
+ rblk = ksl_split_blk(ksl, ksl->head);
+ if (rblk == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ lblk = ksl->head;
+
+ nhead = ksl_blk_objalloc_new(ksl);
+ if (nhead == NULL) {
+ ksl_blk_objalloc_del(ksl, rblk);
+ return NGTCP2_ERR_NOMEM;
+ }
+ nhead->next = nhead->prev = NULL;
+ nhead->n = 2;
+ nhead->leaf = 0;
+
+ node = ngtcp2_ksl_nth_node(ksl, nhead, 0);
+ ksl_node_set_key(ksl, node, ngtcp2_ksl_nth_node(ksl, lblk, lblk->n - 1)->key);
+ node->blk = lblk;
+
+ node = ngtcp2_ksl_nth_node(ksl, nhead, 1);
+ ksl_node_set_key(ksl, node, ngtcp2_ksl_nth_node(ksl, rblk, rblk->n - 1)->key);
+ node->blk = rblk;
+
+ ksl->head = nhead;
+
+ return 0;
+}
+
+/*
+ * insert_node inserts a node whose key is |key| with the associated
+ * |data| at the index of |i|. This function assumes that the number
+ * of nodes contained by |blk| is strictly less than
+ * NGTCP2_KSL_MAX_NBLK.
+ */
+static void ksl_insert_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i,
+ const ngtcp2_ksl_key *key, void *data) {
+ ngtcp2_ksl_node *node;
+
+ assert(blk->n < NGTCP2_KSL_MAX_NBLK);
+
+ memmove(blk->nodes + (i + 1) * ksl->nodelen, blk->nodes + i * ksl->nodelen,
+ ksl->nodelen * (blk->n - i));
+
+ node = ngtcp2_ksl_nth_node(ksl, blk, i);
+ ksl_node_set_key(ksl, node, key);
+ node->data = data;
+
+ ++blk->n;
+}
+
+static size_t ksl_bsearch(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk,
+ const ngtcp2_ksl_key *key, ngtcp2_ksl_compar compar) {
+ size_t i;
+ ngtcp2_ksl_node *node;
+
+ for (i = 0, node = (ngtcp2_ksl_node *)(void *)blk->nodes;
+ i < blk->n && compar((ngtcp2_ksl_key *)node->key, key);
+ ++i, node = (ngtcp2_ksl_node *)(void *)((uint8_t *)node + ksl->nodelen))
+ ;
+
+ return i;
+}
+
+int ngtcp2_ksl_insert(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it,
+ const ngtcp2_ksl_key *key, void *data) {
+ ngtcp2_ksl_blk *blk;
+ ngtcp2_ksl_node *node;
+ size_t i;
+ int rv;
+
+ if (!ksl->head) {
+ rv = ksl_head_init(ksl);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ blk = ksl->head;
+
+ if (blk->n == NGTCP2_KSL_MAX_NBLK) {
+ rv = ksl_split_head(ksl);
+ if (rv != 0) {
+ return rv;
+ }
+ blk = ksl->head;
+ }
+
+ for (;;) {
+ i = ksl_bsearch(ksl, blk, key, ksl->compar);
+
+ if (blk->leaf) {
+ if (i < blk->n &&
+ !ksl->compar(key, ngtcp2_ksl_nth_node(ksl, blk, i)->key)) {
+ if (it) {
+ *it = ngtcp2_ksl_end(ksl);
+ }
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+ ksl_insert_node(ksl, blk, i, key, data);
+ ++ksl->n;
+ if (it) {
+ ngtcp2_ksl_it_init(it, ksl, blk, i);
+ }
+ return 0;
+ }
+
+ if (i == blk->n) {
+ /* This insertion extends the largest key in this subtree. */
+ for (; !blk->leaf;) {
+ node = ngtcp2_ksl_nth_node(ksl, blk, blk->n - 1);
+ if (node->blk->n == NGTCP2_KSL_MAX_NBLK) {
+ rv = ksl_split_node(ksl, blk, blk->n - 1);
+ if (rv != 0) {
+ return rv;
+ }
+ node = ngtcp2_ksl_nth_node(ksl, blk, blk->n - 1);
+ }
+ ksl_node_set_key(ksl, node, key);
+ blk = node->blk;
+ }
+ ksl_insert_node(ksl, blk, blk->n, key, data);
+ ++ksl->n;
+ if (it) {
+ ngtcp2_ksl_it_init(it, ksl, blk, blk->n - 1);
+ }
+ return 0;
+ }
+
+ node = ngtcp2_ksl_nth_node(ksl, blk, i);
+
+ if (node->blk->n == NGTCP2_KSL_MAX_NBLK) {
+ rv = ksl_split_node(ksl, blk, i);
+ if (rv != 0) {
+ return rv;
+ }
+ if (ksl->compar((ngtcp2_ksl_key *)node->key, key)) {
+ node = ngtcp2_ksl_nth_node(ksl, blk, i + 1);
+ if (ksl->compar((ngtcp2_ksl_key *)node->key, key)) {
+ ksl_node_set_key(ksl, node, key);
+ }
+ }
+ }
+
+ blk = node->blk;
+ }
+}
+
+/*
+ * ksl_remove_node removes the node included in |blk| at the index of
+ * |i|.
+ */
+static void ksl_remove_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) {
+ memmove(blk->nodes + i * ksl->nodelen, blk->nodes + (i + 1) * ksl->nodelen,
+ ksl->nodelen * (blk->n - (i + 1)));
+
+ --blk->n;
+}
+
+/*
+ * ksl_merge_node merges 2 nodes which are the nodes at the index of
+ * |i| and |i + 1|.
+ *
+ * If |blk| is the direct descendant of head (root) block and the head
+ * block contains just 2 nodes, the merged block becomes head block,
+ * which decreases the height of |ksl| by 1.
+ *
+ * This function returns the pointer to the merged block.
+ */
+static ngtcp2_ksl_blk *ksl_merge_node(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk,
+ size_t i) {
+ ngtcp2_ksl_blk *lblk, *rblk;
+
+ assert(i + 1 < blk->n);
+
+ lblk = ngtcp2_ksl_nth_node(ksl, blk, i)->blk;
+ rblk = ngtcp2_ksl_nth_node(ksl, blk, i + 1)->blk;
+
+ assert(lblk->n + rblk->n < NGTCP2_KSL_MAX_NBLK);
+
+ memcpy(lblk->nodes + ksl->nodelen * lblk->n, rblk->nodes,
+ ksl->nodelen * rblk->n);
+
+ lblk->n += rblk->n;
+ lblk->next = rblk->next;
+ if (lblk->next) {
+ lblk->next->prev = lblk;
+ } else if (ksl->back == rblk) {
+ ksl->back = lblk;
+ }
+
+ ksl_blk_objalloc_del(ksl, rblk);
+
+ if (ksl->head == blk && blk->n == 2) {
+ ksl_blk_objalloc_del(ksl, ksl->head);
+ ksl->head = lblk;
+ } else {
+ ksl_remove_node(ksl, blk, i + 1);
+ ksl_node_set_key(ksl, ngtcp2_ksl_nth_node(ksl, blk, i),
+ ngtcp2_ksl_nth_node(ksl, lblk, lblk->n - 1)->key);
+ }
+
+ return lblk;
+}
+
+/*
+ * ksl_shift_left moves the first nodes in blk->nodes[i]->blk->nodes
+ * to blk->nodes[i - 1]->blk->nodes in a manner that they have the
+ * same amount of nodes as much as possible.
+ */
+static void ksl_shift_left(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) {
+ ngtcp2_ksl_node *lnode, *rnode;
+ size_t n;
+
+ assert(i > 0);
+
+ lnode = ngtcp2_ksl_nth_node(ksl, blk, i - 1);
+ rnode = ngtcp2_ksl_nth_node(ksl, blk, i);
+
+ assert(lnode->blk->n < NGTCP2_KSL_MAX_NBLK);
+ assert(rnode->blk->n > NGTCP2_KSL_MIN_NBLK);
+
+ n = (lnode->blk->n + rnode->blk->n + 1) / 2 - lnode->blk->n;
+
+ assert(n > 0);
+ assert(lnode->blk->n <= NGTCP2_KSL_MAX_NBLK - n);
+ assert(rnode->blk->n >= NGTCP2_KSL_MIN_NBLK + n);
+
+ memcpy(lnode->blk->nodes + ksl->nodelen * lnode->blk->n, rnode->blk->nodes,
+ ksl->nodelen * n);
+
+ lnode->blk->n += (uint32_t)n;
+ rnode->blk->n -= (uint32_t)n;
+
+ ksl_node_set_key(
+ ksl, lnode, ngtcp2_ksl_nth_node(ksl, lnode->blk, lnode->blk->n - 1)->key);
+
+ memmove(rnode->blk->nodes, rnode->blk->nodes + ksl->nodelen * n,
+ ksl->nodelen * rnode->blk->n);
+}
+
+/*
+ * ksl_shift_right moves the last nodes in blk->nodes[i]->blk->nodes
+ * to blk->nodes[i + 1]->blk->nodes in a manner that they have the
+ * same amount of nodes as much as possible..
+ */
+static void ksl_shift_right(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t i) {
+ ngtcp2_ksl_node *lnode, *rnode;
+ size_t n;
+
+ assert(i < blk->n - 1);
+
+ lnode = ngtcp2_ksl_nth_node(ksl, blk, i);
+ rnode = ngtcp2_ksl_nth_node(ksl, blk, i + 1);
+
+ assert(lnode->blk->n > NGTCP2_KSL_MIN_NBLK);
+ assert(rnode->blk->n < NGTCP2_KSL_MAX_NBLK);
+
+ n = (lnode->blk->n + rnode->blk->n + 1) / 2 - rnode->blk->n;
+
+ assert(n > 0);
+ assert(lnode->blk->n >= NGTCP2_KSL_MIN_NBLK + n);
+ assert(rnode->blk->n <= NGTCP2_KSL_MAX_NBLK - n);
+
+ memmove(rnode->blk->nodes + ksl->nodelen * n, rnode->blk->nodes,
+ ksl->nodelen * rnode->blk->n);
+
+ rnode->blk->n += (uint32_t)n;
+ lnode->blk->n -= (uint32_t)n;
+
+ memcpy(rnode->blk->nodes, lnode->blk->nodes + ksl->nodelen * lnode->blk->n,
+ ksl->nodelen * n);
+
+ ksl_node_set_key(
+ ksl, lnode, ngtcp2_ksl_nth_node(ksl, lnode->blk, lnode->blk->n - 1)->key);
+}
+
+/*
+ * key_equal returns nonzero if |lhs| and |rhs| are equal using the
+ * function |compar|.
+ */
+static int key_equal(ngtcp2_ksl_compar compar, const ngtcp2_ksl_key *lhs,
+ const ngtcp2_ksl_key *rhs) {
+ return !compar(lhs, rhs) && !compar(rhs, lhs);
+}
+
+int ngtcp2_ksl_remove_hint(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it,
+ const ngtcp2_ksl_it *hint,
+ const ngtcp2_ksl_key *key) {
+ ngtcp2_ksl_blk *blk = hint->blk;
+
+ assert(ksl->head);
+
+ if (blk->n <= NGTCP2_KSL_MIN_NBLK) {
+ return ngtcp2_ksl_remove(ksl, it, key);
+ }
+
+ ksl_remove_node(ksl, blk, hint->i);
+
+ --ksl->n;
+
+ if (it) {
+ if (hint->i == blk->n && blk->next) {
+ ngtcp2_ksl_it_init(it, ksl, blk->next, 0);
+ } else {
+ ngtcp2_ksl_it_init(it, ksl, blk, hint->i);
+ }
+ }
+
+ return 0;
+}
+
+int ngtcp2_ksl_remove(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it,
+ const ngtcp2_ksl_key *key) {
+ ngtcp2_ksl_blk *blk = ksl->head;
+ ngtcp2_ksl_node *node;
+ size_t i;
+
+ if (!ksl->head) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ if (!blk->leaf && blk->n == 2 &&
+ ngtcp2_ksl_nth_node(ksl, blk, 0)->blk->n == NGTCP2_KSL_MIN_NBLK &&
+ ngtcp2_ksl_nth_node(ksl, blk, 1)->blk->n == NGTCP2_KSL_MIN_NBLK) {
+ blk = ksl_merge_node(ksl, ksl->head, 0);
+ }
+
+ for (;;) {
+ i = ksl_bsearch(ksl, blk, key, ksl->compar);
+
+ if (i == blk->n) {
+ if (it) {
+ *it = ngtcp2_ksl_end(ksl);
+ }
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ if (blk->leaf) {
+ if (ksl->compar(key, ngtcp2_ksl_nth_node(ksl, blk, i)->key)) {
+ if (it) {
+ *it = ngtcp2_ksl_end(ksl);
+ }
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+ ksl_remove_node(ksl, blk, i);
+ --ksl->n;
+ if (it) {
+ if (blk->n == i && blk->next) {
+ ngtcp2_ksl_it_init(it, ksl, blk->next, 0);
+ } else {
+ ngtcp2_ksl_it_init(it, ksl, blk, i);
+ }
+ }
+ return 0;
+ }
+
+ node = ngtcp2_ksl_nth_node(ksl, blk, i);
+
+ if (node->blk->n > NGTCP2_KSL_MIN_NBLK) {
+ blk = node->blk;
+ continue;
+ }
+
+ assert(node->blk->n == NGTCP2_KSL_MIN_NBLK);
+
+ if (i + 1 < blk->n &&
+ ngtcp2_ksl_nth_node(ksl, blk, i + 1)->blk->n > NGTCP2_KSL_MIN_NBLK) {
+ ksl_shift_left(ksl, blk, i + 1);
+ blk = node->blk;
+ continue;
+ }
+
+ if (i > 0 &&
+ ngtcp2_ksl_nth_node(ksl, blk, i - 1)->blk->n > NGTCP2_KSL_MIN_NBLK) {
+ ksl_shift_right(ksl, blk, i - 1);
+ blk = node->blk;
+ continue;
+ }
+
+ if (i + 1 < blk->n) {
+ blk = ksl_merge_node(ksl, blk, i);
+ continue;
+ }
+
+ assert(i > 0);
+
+ blk = ksl_merge_node(ksl, blk, i - 1);
+ }
+}
+
+ngtcp2_ksl_it ngtcp2_ksl_lower_bound(ngtcp2_ksl *ksl,
+ const ngtcp2_ksl_key *key) {
+ ngtcp2_ksl_blk *blk = ksl->head;
+ ngtcp2_ksl_it it;
+ size_t i;
+
+ if (!blk) {
+ ngtcp2_ksl_it_init(&it, ksl, &null_blk, 0);
+ return it;
+ }
+
+ for (;;) {
+ i = ksl_bsearch(ksl, blk, key, ksl->compar);
+
+ if (blk->leaf) {
+ if (i == blk->n && blk->next) {
+ blk = blk->next;
+ i = 0;
+ }
+ ngtcp2_ksl_it_init(&it, ksl, blk, i);
+ return it;
+ }
+
+ if (i == blk->n) {
+ /* This happens if descendant has smaller key. Fast forward to
+ find last node in this subtree. */
+ for (; !blk->leaf; blk = ngtcp2_ksl_nth_node(ksl, blk, blk->n - 1)->blk)
+ ;
+ if (blk->next) {
+ blk = blk->next;
+ i = 0;
+ } else {
+ i = blk->n;
+ }
+ ngtcp2_ksl_it_init(&it, ksl, blk, i);
+ return it;
+ }
+ blk = ngtcp2_ksl_nth_node(ksl, blk, i)->blk;
+ }
+}
+
+ngtcp2_ksl_it ngtcp2_ksl_lower_bound_compar(ngtcp2_ksl *ksl,
+ const ngtcp2_ksl_key *key,
+ ngtcp2_ksl_compar compar) {
+ ngtcp2_ksl_blk *blk = ksl->head;
+ ngtcp2_ksl_it it;
+ size_t i;
+
+ if (!blk) {
+ ngtcp2_ksl_it_init(&it, ksl, &null_blk, 0);
+ return it;
+ }
+
+ for (;;) {
+ i = ksl_bsearch(ksl, blk, key, compar);
+
+ if (blk->leaf) {
+ if (i == blk->n && blk->next) {
+ blk = blk->next;
+ i = 0;
+ }
+ ngtcp2_ksl_it_init(&it, ksl, blk, i);
+ return it;
+ }
+
+ if (i == blk->n) {
+ /* This happens if descendant has smaller key. Fast forward to
+ find last node in this subtree. */
+ for (; !blk->leaf; blk = ngtcp2_ksl_nth_node(ksl, blk, blk->n - 1)->blk)
+ ;
+ if (blk->next) {
+ blk = blk->next;
+ i = 0;
+ } else {
+ i = blk->n;
+ }
+ ngtcp2_ksl_it_init(&it, ksl, blk, i);
+ return it;
+ }
+ blk = ngtcp2_ksl_nth_node(ksl, blk, i)->blk;
+ }
+}
+
+void ngtcp2_ksl_update_key(ngtcp2_ksl *ksl, const ngtcp2_ksl_key *old_key,
+ const ngtcp2_ksl_key *new_key) {
+ ngtcp2_ksl_blk *blk = ksl->head;
+ ngtcp2_ksl_node *node;
+ size_t i;
+
+ assert(ksl->head);
+
+ for (;;) {
+ i = ksl_bsearch(ksl, blk, old_key, ksl->compar);
+
+ assert(i < blk->n);
+ node = ngtcp2_ksl_nth_node(ksl, blk, i);
+
+ if (blk->leaf) {
+ assert(key_equal(ksl->compar, (ngtcp2_ksl_key *)node->key, old_key));
+ ksl_node_set_key(ksl, node, new_key);
+ return;
+ }
+
+ if (key_equal(ksl->compar, (ngtcp2_ksl_key *)node->key, old_key) ||
+ ksl->compar((ngtcp2_ksl_key *)node->key, new_key)) {
+ ksl_node_set_key(ksl, node, new_key);
+ }
+
+ blk = node->blk;
+ }
+}
+
+static void ksl_print(ngtcp2_ksl *ksl, ngtcp2_ksl_blk *blk, size_t level) {
+ size_t i;
+ ngtcp2_ksl_node *node;
+
+ fprintf(stderr, "LV=%zu n=%u\n", level, blk->n);
+
+ if (blk->leaf) {
+ for (i = 0; i < blk->n; ++i) {
+ node = ngtcp2_ksl_nth_node(ksl, blk, i);
+ fprintf(stderr, " %" PRId64, *(int64_t *)(void *)node->key);
+ }
+ fprintf(stderr, "\n");
+ return;
+ }
+
+ for (i = 0; i < blk->n; ++i) {
+ ksl_print(ksl, ngtcp2_ksl_nth_node(ksl, blk, i)->blk, level + 1);
+ }
+}
+
+size_t ngtcp2_ksl_len(ngtcp2_ksl *ksl) { return ksl->n; }
+
+void ngtcp2_ksl_clear(ngtcp2_ksl *ksl) {
+ if (!ksl->head) {
+ return;
+ }
+
+#ifdef NOMEMPOOL
+ ksl_free_blk(ksl, ksl->head);
+#endif /* NOMEMPOOL */
+
+ ksl->front = ksl->back = ksl->head = NULL;
+ ksl->n = 0;
+
+ ngtcp2_objalloc_clear(&ksl->blkalloc);
+}
+
+void ngtcp2_ksl_print(ngtcp2_ksl *ksl) {
+ if (!ksl->head) {
+ return;
+ }
+
+ ksl_print(ksl, ksl->head, 0);
+}
+
+ngtcp2_ksl_it ngtcp2_ksl_begin(const ngtcp2_ksl *ksl) {
+ ngtcp2_ksl_it it;
+
+ if (ksl->head) {
+ ngtcp2_ksl_it_init(&it, ksl, ksl->front, 0);
+ } else {
+ ngtcp2_ksl_it_init(&it, ksl, &null_blk, 0);
+ }
+
+ return it;
+}
+
+ngtcp2_ksl_it ngtcp2_ksl_end(const ngtcp2_ksl *ksl) {
+ ngtcp2_ksl_it it;
+
+ if (ksl->head) {
+ ngtcp2_ksl_it_init(&it, ksl, ksl->back, ksl->back->n);
+ } else {
+ ngtcp2_ksl_it_init(&it, ksl, &null_blk, 0);
+ }
+
+ return it;
+}
+
+void ngtcp2_ksl_it_init(ngtcp2_ksl_it *it, const ngtcp2_ksl *ksl,
+ ngtcp2_ksl_blk *blk, size_t i) {
+ it->ksl = ksl;
+ it->blk = blk;
+ it->i = i;
+}
+
+void ngtcp2_ksl_it_prev(ngtcp2_ksl_it *it) {
+ assert(!ngtcp2_ksl_it_begin(it));
+
+ if (it->i == 0) {
+ it->blk = it->blk->prev;
+ it->i = it->blk->n - 1;
+ } else {
+ --it->i;
+ }
+}
+
+int ngtcp2_ksl_it_begin(const ngtcp2_ksl_it *it) {
+ return it->i == 0 && it->blk->prev == NULL;
+}
+
+int ngtcp2_ksl_range_compar(const ngtcp2_ksl_key *lhs,
+ const ngtcp2_ksl_key *rhs) {
+ const ngtcp2_range *a = lhs, *b = rhs;
+ return a->begin < b->begin;
+}
+
+int ngtcp2_ksl_range_exclusive_compar(const ngtcp2_ksl_key *lhs,
+ const ngtcp2_ksl_key *rhs) {
+ const ngtcp2_range *a = lhs, *b = rhs;
+ return a->begin < b->begin &&
+ !(ngtcp2_max(a->begin, b->begin) < ngtcp2_min(a->end, b->end));
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.h
new file mode 100644
index 0000000..312a151
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ksl.h
@@ -0,0 +1,345 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2018 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_KSL_H
+#define NGTCP2_KSL_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_objalloc.h"
+
+/*
+ * Skip List using single key instead of range.
+ */
+
+#define NGTCP2_KSL_DEGR 16
+/* NGTCP2_KSL_MAX_NBLK is the maximum number of nodes which a single
+ block can contain. */
+#define NGTCP2_KSL_MAX_NBLK (2 * NGTCP2_KSL_DEGR - 1)
+/* NGTCP2_KSL_MIN_NBLK is the minimum number of nodes which a single
+ block other than root must contains. */
+#define NGTCP2_KSL_MIN_NBLK (NGTCP2_KSL_DEGR - 1)
+
+/*
+ * ngtcp2_ksl_key represents key in ngtcp2_ksl.
+ */
+typedef void ngtcp2_ksl_key;
+
+typedef struct ngtcp2_ksl_node ngtcp2_ksl_node;
+
+typedef struct ngtcp2_ksl_blk ngtcp2_ksl_blk;
+
+/*
+ * ngtcp2_ksl_node is a node which contains either ngtcp2_ksl_blk or
+ * opaque data. If a node is an internal node, it contains
+ * ngtcp2_ksl_blk. Otherwise, it has data. The key is stored at the
+ * location starting at key.
+ */
+struct ngtcp2_ksl_node {
+ union {
+ ngtcp2_ksl_blk *blk;
+ void *data;
+ };
+ union {
+ uint64_t align;
+ /* key is a buffer to include key associated to this node.
+ Because the length of key is unknown until ngtcp2_ksl_init is
+ called, the actual buffer will be allocated after this
+ field. */
+ uint8_t key[1];
+ };
+};
+
+/*
+ * ngtcp2_ksl_blk contains ngtcp2_ksl_node objects.
+ */
+struct ngtcp2_ksl_blk {
+ union {
+ struct {
+ /* next points to the next block if leaf field is nonzero. */
+ ngtcp2_ksl_blk *next;
+ /* prev points to the previous block if leaf field is nonzero. */
+ ngtcp2_ksl_blk *prev;
+ /* n is the number of nodes this object contains in nodes. */
+ uint32_t n;
+ /* leaf is nonzero if this block contains leaf nodes. */
+ uint32_t leaf;
+ union {
+ uint64_t align;
+ /* nodes is a buffer to contain NGTCP2_KSL_MAX_NBLK
+ ngtcp2_ksl_node objects. Because ngtcp2_ksl_node object is
+ allocated along with the additional variable length key
+ storage, the size of buffer is unknown until ngtcp2_ksl_init is
+ called. */
+ uint8_t nodes[1];
+ };
+ };
+
+ ngtcp2_opl_entry oplent;
+ };
+};
+
+ngtcp2_objalloc_def(ksl_blk, ngtcp2_ksl_blk, oplent);
+
+/*
+ * ngtcp2_ksl_compar is a function type which returns nonzero if key
+ * |lhs| should be placed before |rhs|. It returns 0 otherwise.
+ */
+typedef int (*ngtcp2_ksl_compar)(const ngtcp2_ksl_key *lhs,
+ const ngtcp2_ksl_key *rhs);
+
+typedef struct ngtcp2_ksl ngtcp2_ksl;
+
+typedef struct ngtcp2_ksl_it ngtcp2_ksl_it;
+
+/*
+ * ngtcp2_ksl_it is a forward iterator to iterate nodes.
+ */
+struct ngtcp2_ksl_it {
+ const ngtcp2_ksl *ksl;
+ ngtcp2_ksl_blk *blk;
+ size_t i;
+};
+
+/*
+ * ngtcp2_ksl is a deterministic paged skip list.
+ */
+struct ngtcp2_ksl {
+ ngtcp2_objalloc blkalloc;
+ /* head points to the root block. */
+ ngtcp2_ksl_blk *head;
+ /* front points to the first leaf block. */
+ ngtcp2_ksl_blk *front;
+ /* back points to the last leaf block. */
+ ngtcp2_ksl_blk *back;
+ ngtcp2_ksl_compar compar;
+ size_t n;
+ /* keylen is the size of key */
+ size_t keylen;
+ /* nodelen is the actual size of ngtcp2_ksl_node including key
+ storage. */
+ size_t nodelen;
+};
+
+/*
+ * ngtcp2_ksl_init initializes |ksl|. |compar| specifies compare
+ * function. |keylen| is the length of key.
+ */
+void ngtcp2_ksl_init(ngtcp2_ksl *ksl, ngtcp2_ksl_compar compar, size_t keylen,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_ksl_free frees resources allocated for |ksl|. If |ksl| is
+ * NULL, this function does nothing. It does not free the memory
+ * region pointed by |ksl| itself.
+ */
+void ngtcp2_ksl_free(ngtcp2_ksl *ksl);
+
+/*
+ * ngtcp2_ksl_insert inserts |key| with its associated |data|. On
+ * successful insertion, the iterator points to the inserted node is
+ * stored in |*it|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * |key| already exists.
+ */
+int ngtcp2_ksl_insert(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it,
+ const ngtcp2_ksl_key *key, void *data);
+
+/*
+ * ngtcp2_ksl_remove removes the |key| from |ksl|.
+ *
+ * This function assigns the iterator to |*it|, which points to the
+ * node which is located at the right next of the removed node if |it|
+ * is not NULL. If |key| is not found, no deletion takes place and
+ * the return value of ngtcp2_ksl_end(ksl) is assigned to |*it|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * |key| does not exist.
+ */
+int ngtcp2_ksl_remove(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it,
+ const ngtcp2_ksl_key *key);
+
+/*
+ * ngtcp2_ksl_remove_hint removes the |key| from |ksl|. |hint| must
+ * point to the same node denoted by |key|. |hint| is used to remove
+ * a node efficiently in some cases. Other than that, it behaves
+ * exactly like ngtcp2_ksl_remove. |it| and |hint| can point to the
+ * same object.
+ */
+int ngtcp2_ksl_remove_hint(ngtcp2_ksl *ksl, ngtcp2_ksl_it *it,
+ const ngtcp2_ksl_it *hint,
+ const ngtcp2_ksl_key *key);
+
+/*
+ * ngtcp2_ksl_lower_bound returns the iterator which points to the
+ * first node which has the key which is equal to |key| or the last
+ * node which satisfies !compar(&node->key, key). If there is no such
+ * node, it returns the iterator which satisfies ngtcp2_ksl_it_end(it)
+ * != 0.
+ */
+ngtcp2_ksl_it ngtcp2_ksl_lower_bound(ngtcp2_ksl *ksl,
+ const ngtcp2_ksl_key *key);
+
+/*
+ * ngtcp2_ksl_lower_bound_compar works like ngtcp2_ksl_lower_bound,
+ * but it takes custom function |compar| to do lower bound search.
+ */
+ngtcp2_ksl_it ngtcp2_ksl_lower_bound_compar(ngtcp2_ksl *ksl,
+ const ngtcp2_ksl_key *key,
+ ngtcp2_ksl_compar compar);
+
+/*
+ * ngtcp2_ksl_update_key replaces the key of nodes which has |old_key|
+ * with |new_key|. |new_key| must be strictly greater than the
+ * previous node and strictly smaller than the next node.
+ */
+void ngtcp2_ksl_update_key(ngtcp2_ksl *ksl, const ngtcp2_ksl_key *old_key,
+ const ngtcp2_ksl_key *new_key);
+
+/*
+ * ngtcp2_ksl_begin returns the iterator which points to the first
+ * node. If there is no node in |ksl|, it returns the iterator which
+ * satisfies ngtcp2_ksl_it_end(it) != 0.
+ */
+ngtcp2_ksl_it ngtcp2_ksl_begin(const ngtcp2_ksl *ksl);
+
+/*
+ * ngtcp2_ksl_end returns the iterator which points to the node
+ * following the last node. The returned object satisfies
+ * ngtcp2_ksl_it_end(). If there is no node in |ksl|, it returns the
+ * iterator which satisfies ngtcp2_ksl_it_begin(it) != 0.
+ */
+ngtcp2_ksl_it ngtcp2_ksl_end(const ngtcp2_ksl *ksl);
+
+/*
+ * ngtcp2_ksl_len returns the number of elements stored in |ksl|.
+ */
+size_t ngtcp2_ksl_len(ngtcp2_ksl *ksl);
+
+/*
+ * ngtcp2_ksl_clear removes all elements stored in |ksl|.
+ */
+void ngtcp2_ksl_clear(ngtcp2_ksl *ksl);
+
+/*
+ * ngtcp2_ksl_nth_node returns the |n|th node under |blk|.
+ */
+#define ngtcp2_ksl_nth_node(KSL, BLK, N) \
+ ((ngtcp2_ksl_node *)(void *)((BLK)->nodes + (KSL)->nodelen * (N)))
+
+/*
+ * ngtcp2_ksl_print prints its internal state in stderr. It assumes
+ * that the key is of type int64_t. This function should be used for
+ * the debugging purpose only.
+ */
+void ngtcp2_ksl_print(ngtcp2_ksl *ksl);
+
+/*
+ * ngtcp2_ksl_it_init initializes |it|.
+ */
+void ngtcp2_ksl_it_init(ngtcp2_ksl_it *it, const ngtcp2_ksl *ksl,
+ ngtcp2_ksl_blk *blk, size_t i);
+
+/*
+ * ngtcp2_ksl_it_get returns the data associated to the node which
+ * |it| points to. It is undefined to call this function when
+ * ngtcp2_ksl_it_end(it) returns nonzero.
+ */
+#define ngtcp2_ksl_it_get(IT) \
+ ngtcp2_ksl_nth_node((IT)->ksl, (IT)->blk, (IT)->i)->data
+
+/*
+ * ngtcp2_ksl_it_next advances the iterator by one. It is undefined
+ * if this function is called when ngtcp2_ksl_it_end(it) returns
+ * nonzero.
+ */
+#define ngtcp2_ksl_it_next(IT) \
+ (++(IT)->i == (IT)->blk->n && (IT)->blk->next \
+ ? ((IT)->blk = (IT)->blk->next, (IT)->i = 0) \
+ : 0)
+
+/*
+ * ngtcp2_ksl_it_prev moves backward the iterator by one. It is
+ * undefined if this function is called when ngtcp2_ksl_it_begin(it)
+ * returns nonzero.
+ */
+void ngtcp2_ksl_it_prev(ngtcp2_ksl_it *it);
+
+/*
+ * ngtcp2_ksl_it_end returns nonzero if |it| points to the beyond the
+ * last node.
+ */
+#define ngtcp2_ksl_it_end(IT) \
+ ((IT)->blk->n == (IT)->i && (IT)->blk->next == NULL)
+
+/*
+ * ngtcp2_ksl_it_begin returns nonzero if |it| points to the first
+ * node. |it| might satisfy both ngtcp2_ksl_it_begin(&it) and
+ * ngtcp2_ksl_it_end(&it) if the skip list has no node.
+ */
+int ngtcp2_ksl_it_begin(const ngtcp2_ksl_it *it);
+
+/*
+ * ngtcp2_ksl_key returns the key of the node which |it| points to.
+ * It is undefined to call this function when ngtcp2_ksl_it_end(it)
+ * returns nonzero.
+ */
+#define ngtcp2_ksl_it_key(IT) \
+ ((ngtcp2_ksl_key *)ngtcp2_ksl_nth_node((IT)->ksl, (IT)->blk, (IT)->i)->key)
+
+/*
+ * ngtcp2_ksl_range_compar is an implementation of ngtcp2_ksl_compar.
+ * lhs->ptr and rhs->ptr must point to ngtcp2_range object and the
+ * function returns nonzero if (const ngtcp2_range *)(lhs->ptr)->begin
+ * < (const ngtcp2_range *)(rhs->ptr)->begin.
+ */
+int ngtcp2_ksl_range_compar(const ngtcp2_ksl_key *lhs,
+ const ngtcp2_ksl_key *rhs);
+
+/*
+ * ngtcp2_ksl_range_exclusive_compar is an implementation of
+ * ngtcp2_ksl_compar. lhs->ptr and rhs->ptr must point to
+ * ngtcp2_range object and the function returns nonzero if (const
+ * ngtcp2_range *)(lhs->ptr)->begin < (const ngtcp2_range
+ * *)(rhs->ptr)->begin and the 2 ranges do not intersect.
+ */
+int ngtcp2_ksl_range_exclusive_compar(const ngtcp2_ksl_key *lhs,
+ const ngtcp2_ksl_key *rhs);
+
+#endif /* NGTCP2_KSL_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.c
new file mode 100644
index 0000000..790adeb
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.c
@@ -0,0 +1,822 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2018 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_log.h"
+
+#include <stdio.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+#include <assert.h>
+#include <string.h>
+
+#include "ngtcp2_str.h"
+#include "ngtcp2_vec.h"
+#include "ngtcp2_macro.h"
+#include "ngtcp2_conv.h"
+#include "ngtcp2_unreachable.h"
+#include "ngtcp2_net.h"
+
+void ngtcp2_log_init(ngtcp2_log *log, const ngtcp2_cid *scid,
+ ngtcp2_printf log_printf, ngtcp2_tstamp ts,
+ void *user_data) {
+ if (scid) {
+ ngtcp2_encode_hex(log->scid, scid->data, scid->datalen);
+ } else {
+ log->scid[0] = '\0';
+ }
+ log->log_printf = log_printf;
+ log->ts = log->last_ts = ts;
+ log->user_data = user_data;
+}
+
+/*
+ * # Log header
+ *
+ * <LEVEL><TIMESTAMP> <SCID> <EVENT>
+ *
+ * <LEVEL>:
+ * Log level. I=Info, W=Warning, E=Error
+ *
+ * <TIMESTAMP>:
+ * Timestamp relative to ngtcp2_log.ts field in milliseconds
+ * resolution.
+ *
+ * <SCID>:
+ * Source Connection ID in hex string.
+ *
+ * <EVENT>:
+ * Event. pkt=packet, frm=frame, rcv=recovery, cry=crypto,
+ * con=connection(catch all)
+ *
+ * # Frame event
+ *
+ * <DIR> <PKN> <PKTNAME> <FRAMENAME>(<FRAMETYPE>)
+ *
+ * <DIR>:
+ * Flow direction. tx=transmission, rx=reception
+ *
+ * <PKN>:
+ * Packet number.
+ *
+ * <PKTNAME>:
+ * Packet name. (e.g., Initial, Handshake, 1RTT)
+ *
+ * <FRAMENAME>:
+ * Frame name. (e.g., STREAM, ACK, PING)
+ *
+ * <FRAMETYPE>:
+ * Frame type in hex string.
+ */
+
+#define NGTCP2_LOG_BUFLEN 4096
+
+/* TODO Split second and remaining fraction with comma */
+#define NGTCP2_LOG_HD "I%08" PRIu64 " 0x%s %s"
+#define NGTCP2_LOG_PKT NGTCP2_LOG_HD " %s %" PRId64 " %s"
+#define NGTCP2_LOG_TP NGTCP2_LOG_HD " remote transport_parameters"
+
+#define NGTCP2_LOG_FRM_HD_FIELDS(DIR) \
+ timestamp_cast(log->last_ts - log->ts), (const char *)log->scid, "frm", \
+ (DIR), hd->pkt_num, strpkttype(hd)
+
+#define NGTCP2_LOG_PKT_HD_FIELDS(DIR) \
+ timestamp_cast(log->last_ts - log->ts), (const char *)log->scid, "pkt", \
+ (DIR), hd->pkt_num, strpkttype(hd)
+
+#define NGTCP2_LOG_TP_HD_FIELDS \
+ timestamp_cast(log->last_ts - log->ts), (const char *)log->scid, "cry"
+
+static const char *strerrorcode(uint64_t error_code) {
+ switch (error_code) {
+ case NGTCP2_NO_ERROR:
+ return "NO_ERROR";
+ case NGTCP2_INTERNAL_ERROR:
+ return "INTERNAL_ERROR";
+ case NGTCP2_CONNECTION_REFUSED:
+ return "CONNECTION_REFUSED";
+ case NGTCP2_FLOW_CONTROL_ERROR:
+ return "FLOW_CONTROL_ERROR";
+ case NGTCP2_STREAM_LIMIT_ERROR:
+ return "STREAM_LIMIT_ERROR";
+ case NGTCP2_STREAM_STATE_ERROR:
+ return "STREAM_STATE_ERROR";
+ case NGTCP2_FINAL_SIZE_ERROR:
+ return "FINAL_SIZE_ERROR";
+ case NGTCP2_FRAME_ENCODING_ERROR:
+ return "FRAME_ENCODING_ERROR";
+ case NGTCP2_TRANSPORT_PARAMETER_ERROR:
+ return "TRANSPORT_PARAMETER_ERROR";
+ case NGTCP2_CONNECTION_ID_LIMIT_ERROR:
+ return "CONNECTION_ID_LIMIT_ERROR";
+ case NGTCP2_PROTOCOL_VIOLATION:
+ return "PROTOCOL_VIOLATION";
+ case NGTCP2_INVALID_TOKEN:
+ return "INVALID_TOKEN";
+ case NGTCP2_APPLICATION_ERROR:
+ return "APPLICATION_ERROR";
+ case NGTCP2_CRYPTO_BUFFER_EXCEEDED:
+ return "CRYPTO_BUFFER_EXCEEDED";
+ case NGTCP2_KEY_UPDATE_ERROR:
+ return "KEY_UPDATE_ERROR";
+ case NGTCP2_VERSION_NEGOTIATION_ERROR:
+ return "VERSION_NEGOTIATION_ERROR";
+ default:
+ if (0x100u <= error_code && error_code <= 0x1ffu) {
+ return "CRYPTO_ERROR";
+ }
+ return "(unknown)";
+ }
+}
+
+static const char *strapperrorcode(uint64_t app_error_code) {
+ (void)app_error_code;
+ return "(unknown)";
+}
+
+static const char *strpkttype_long(uint8_t type) {
+ switch (type) {
+ case NGTCP2_PKT_INITIAL:
+ return "Initial";
+ case NGTCP2_PKT_RETRY:
+ return "Retry";
+ case NGTCP2_PKT_HANDSHAKE:
+ return "Handshake";
+ case NGTCP2_PKT_0RTT:
+ return "0RTT";
+ default:
+ return "(unknown)";
+ }
+}
+
+static const char *strpkttype(const ngtcp2_pkt_hd *hd) {
+ if (hd->flags & NGTCP2_PKT_FLAG_LONG_FORM) {
+ return strpkttype_long(hd->type);
+ }
+
+ switch (hd->type) {
+ case NGTCP2_PKT_VERSION_NEGOTIATION:
+ return "VN";
+ case NGTCP2_PKT_STATELESS_RESET:
+ return "SR";
+ case NGTCP2_PKT_1RTT:
+ return "1RTT";
+ default:
+ return "(unknown)";
+ }
+}
+
+static const char *strpkttype_type_flags(uint8_t type, uint8_t flags) {
+ ngtcp2_pkt_hd hd = {0};
+
+ hd.type = type;
+ hd.flags = flags;
+
+ return strpkttype(&hd);
+}
+
+static const char *strevent(ngtcp2_log_event ev) {
+ switch (ev) {
+ case NGTCP2_LOG_EVENT_CON:
+ return "con";
+ case NGTCP2_LOG_EVENT_PKT:
+ return "pkt";
+ case NGTCP2_LOG_EVENT_FRM:
+ return "frm";
+ case NGTCP2_LOG_EVENT_RCV:
+ return "rcv";
+ case NGTCP2_LOG_EVENT_CRY:
+ return "cry";
+ case NGTCP2_LOG_EVENT_PTV:
+ return "ptv";
+ case NGTCP2_LOG_EVENT_NONE:
+ default:
+ return "non";
+ }
+}
+
+static uint64_t timestamp_cast(uint64_t ns) { return ns / NGTCP2_MILLISECONDS; }
+
+static void log_fr_stream(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_stream *fr, const char *dir) {
+ log->log_printf(
+ log->user_data,
+ (NGTCP2_LOG_PKT " STREAM(0x%02x) id=0x%" PRIx64 " fin=%d offset=%" PRIu64
+ " len=%" PRIu64 " uni=%d"),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type | fr->flags, fr->stream_id,
+ fr->fin, fr->offset, ngtcp2_vec_len(fr->data, fr->datacnt),
+ (fr->stream_id & 0x2) != 0);
+}
+
+static void log_fr_ack(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_ack *fr, const char *dir) {
+ int64_t largest_ack, min_ack;
+ size_t i;
+
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_PKT " ACK(0x%02x) largest_ack=%" PRId64
+ " ack_delay=%" PRIu64 "(%" PRIu64
+ ") ack_range_count=%zu"),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->largest_ack,
+ fr->ack_delay_unscaled / NGTCP2_MILLISECONDS, fr->ack_delay,
+ fr->rangecnt);
+
+ largest_ack = fr->largest_ack;
+ min_ack = fr->largest_ack - (int64_t)fr->first_ack_range;
+
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_PKT " ACK(0x%02x) range=[%" PRId64 "..%" PRId64
+ "] len=%" PRIu64),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, largest_ack, min_ack,
+ fr->first_ack_range);
+
+ for (i = 0; i < fr->rangecnt; ++i) {
+ const ngtcp2_ack_range *range = &fr->ranges[i];
+ largest_ack = min_ack - (int64_t)range->gap - 2;
+ min_ack = largest_ack - (int64_t)range->len;
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_PKT " ACK(0x%02x) range=[%" PRId64 "..%" PRId64
+ "] gap=%" PRIu64 " len=%" PRIu64),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, largest_ack,
+ min_ack, range->gap, range->len);
+ }
+
+ if (fr->type == NGTCP2_FRAME_ACK_ECN) {
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_PKT " ACK(0x%02x) ect0=%" PRIu64
+ " ect1=%" PRIu64 " ce=%" PRIu64),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->ecn.ect0,
+ fr->ecn.ect1, fr->ecn.ce);
+ }
+}
+
+static void log_fr_padding(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_padding *fr, const char *dir) {
+ log->log_printf(log->user_data, (NGTCP2_LOG_PKT " PADDING(0x%02x) len=%zu"),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->len);
+}
+
+static void log_fr_reset_stream(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_reset_stream *fr,
+ const char *dir) {
+ log->log_printf(
+ log->user_data,
+ (NGTCP2_LOG_PKT " RESET_STREAM(0x%02x) id=0x%" PRIx64
+ " app_error_code=%s(0x%" PRIx64 ") final_size=%" PRIu64),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->stream_id,
+ strapperrorcode(fr->app_error_code), fr->app_error_code, fr->final_size);
+}
+
+static void log_fr_connection_close(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_connection_close *fr,
+ const char *dir) {
+ char reason[256];
+ size_t reasonlen = ngtcp2_min(sizeof(reason) - 1, fr->reasonlen);
+
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_PKT
+ " CONNECTION_CLOSE(0x%02x) error_code=%s(0x%" PRIx64 ") "
+ "frame_type=%" PRIx64 " reason_len=%zu reason=[%s]"),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type,
+ fr->type == NGTCP2_FRAME_CONNECTION_CLOSE
+ ? strerrorcode(fr->error_code)
+ : strapperrorcode(fr->error_code),
+ fr->error_code, fr->frame_type, fr->reasonlen,
+ ngtcp2_encode_printable_ascii(reason, fr->reason, reasonlen));
+}
+
+static void log_fr_max_data(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_max_data *fr, const char *dir) {
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_PKT " MAX_DATA(0x%02x) max_data=%" PRIu64),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->max_data);
+}
+
+static void log_fr_max_stream_data(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_max_stream_data *fr,
+ const char *dir) {
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_PKT " MAX_STREAM_DATA(0x%02x) id=0x%" PRIx64
+ " max_stream_data=%" PRIu64),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->stream_id,
+ fr->max_stream_data);
+}
+
+static void log_fr_max_streams(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_max_streams *fr, const char *dir) {
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_PKT " MAX_STREAMS(0x%02x) max_streams=%" PRIu64),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->max_streams);
+}
+
+static void log_fr_ping(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_ping *fr, const char *dir) {
+ log->log_printf(log->user_data, (NGTCP2_LOG_PKT " PING(0x%02x)"),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type);
+}
+
+static void log_fr_data_blocked(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_data_blocked *fr,
+ const char *dir) {
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_PKT " DATA_BLOCKED(0x%02x) offset=%" PRIu64),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->offset);
+}
+
+static void log_fr_stream_data_blocked(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_stream_data_blocked *fr,
+ const char *dir) {
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_PKT " STREAM_DATA_BLOCKED(0x%02x) id=0x%" PRIx64
+ " offset=%" PRIu64),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->stream_id,
+ fr->offset);
+}
+
+static void log_fr_streams_blocked(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_streams_blocked *fr,
+ const char *dir) {
+ log->log_printf(
+ log->user_data,
+ (NGTCP2_LOG_PKT " STREAMS_BLOCKED(0x%02x) max_streams=%" PRIu64),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->max_streams);
+}
+
+static void log_fr_new_connection_id(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_new_connection_id *fr,
+ const char *dir) {
+ uint8_t buf[sizeof(fr->stateless_reset_token) * 2 + 1];
+ uint8_t cid[sizeof(fr->cid.data) * 2 + 1];
+
+ log->log_printf(
+ log->user_data,
+ (NGTCP2_LOG_PKT " NEW_CONNECTION_ID(0x%02x) seq=%" PRIu64
+ " cid=0x%s retire_prior_to=%" PRIu64
+ " stateless_reset_token=0x%s"),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->seq,
+ (const char *)ngtcp2_encode_hex(cid, fr->cid.data, fr->cid.datalen),
+ fr->retire_prior_to,
+ (const char *)ngtcp2_encode_hex(buf, fr->stateless_reset_token,
+ sizeof(fr->stateless_reset_token)));
+}
+
+static void log_fr_stop_sending(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_stop_sending *fr,
+ const char *dir) {
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_PKT " STOP_SENDING(0x%02x) id=0x%" PRIx64
+ " app_error_code=%s(0x%" PRIx64 ")"),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->stream_id,
+ strapperrorcode(fr->app_error_code), fr->app_error_code);
+}
+
+static void log_fr_path_challenge(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_path_challenge *fr,
+ const char *dir) {
+ uint8_t buf[sizeof(fr->data) * 2 + 1];
+
+ log->log_printf(
+ log->user_data, (NGTCP2_LOG_PKT " PATH_CHALLENGE(0x%02x) data=0x%s"),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type,
+ (const char *)ngtcp2_encode_hex(buf, fr->data, sizeof(fr->data)));
+}
+
+static void log_fr_path_response(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_path_response *fr,
+ const char *dir) {
+ uint8_t buf[sizeof(fr->data) * 2 + 1];
+
+ log->log_printf(
+ log->user_data, (NGTCP2_LOG_PKT " PATH_RESPONSE(0x%02x) data=0x%s"),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type,
+ (const char *)ngtcp2_encode_hex(buf, fr->data, sizeof(fr->data)));
+}
+
+static void log_fr_crypto(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_crypto *fr, const char *dir) {
+ log->log_printf(
+ log->user_data,
+ (NGTCP2_LOG_PKT " CRYPTO(0x%02x) offset=%" PRIu64 " len=%" PRIu64),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->offset,
+ ngtcp2_vec_len(fr->data, fr->datacnt));
+}
+
+static void log_fr_new_token(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_new_token *fr, const char *dir) {
+ /* Show at most first 64 bytes of token. If token is longer than 64
+ bytes, log first 64 bytes and then append "*" */
+ uint8_t buf[128 + 1 + 1];
+ uint8_t *p;
+
+ if (fr->tokenlen > 64) {
+ p = ngtcp2_encode_hex(buf, fr->token, 64);
+ p[128] = '*';
+ p[129] = '\0';
+ } else {
+ p = ngtcp2_encode_hex(buf, fr->token, fr->tokenlen);
+ }
+ log->log_printf(
+ log->user_data, (NGTCP2_LOG_PKT " NEW_TOKEN(0x%02x) token=0x%s len=%zu"),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, (const char *)p, fr->tokenlen);
+}
+
+static void log_fr_retire_connection_id(ngtcp2_log *log,
+ const ngtcp2_pkt_hd *hd,
+ const ngtcp2_retire_connection_id *fr,
+ const char *dir) {
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_PKT " RETIRE_CONNECTION_ID(0x%02x) seq=%" PRIu64),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type, fr->seq);
+}
+
+static void log_fr_handshake_done(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_handshake_done *fr,
+ const char *dir) {
+ log->log_printf(log->user_data, (NGTCP2_LOG_PKT " HANDSHAKE_DONE(0x%02x)"),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type);
+}
+
+static void log_fr_datagram(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_datagram *fr, const char *dir) {
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_PKT " DATAGRAM(0x%02x) len=%" PRIu64),
+ NGTCP2_LOG_FRM_HD_FIELDS(dir), fr->type,
+ ngtcp2_vec_len(fr->data, fr->datacnt));
+}
+
+static void log_fr(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_frame *fr, const char *dir) {
+ switch (fr->type) {
+ case NGTCP2_FRAME_STREAM:
+ log_fr_stream(log, hd, &fr->stream, dir);
+ break;
+ case NGTCP2_FRAME_ACK:
+ case NGTCP2_FRAME_ACK_ECN:
+ log_fr_ack(log, hd, &fr->ack, dir);
+ break;
+ case NGTCP2_FRAME_PADDING:
+ log_fr_padding(log, hd, &fr->padding, dir);
+ break;
+ case NGTCP2_FRAME_RESET_STREAM:
+ log_fr_reset_stream(log, hd, &fr->reset_stream, dir);
+ break;
+ case NGTCP2_FRAME_CONNECTION_CLOSE:
+ case NGTCP2_FRAME_CONNECTION_CLOSE_APP:
+ log_fr_connection_close(log, hd, &fr->connection_close, dir);
+ break;
+ case NGTCP2_FRAME_MAX_DATA:
+ log_fr_max_data(log, hd, &fr->max_data, dir);
+ break;
+ case NGTCP2_FRAME_MAX_STREAM_DATA:
+ log_fr_max_stream_data(log, hd, &fr->max_stream_data, dir);
+ break;
+ case NGTCP2_FRAME_MAX_STREAMS_BIDI:
+ case NGTCP2_FRAME_MAX_STREAMS_UNI:
+ log_fr_max_streams(log, hd, &fr->max_streams, dir);
+ break;
+ case NGTCP2_FRAME_PING:
+ log_fr_ping(log, hd, &fr->ping, dir);
+ break;
+ case NGTCP2_FRAME_DATA_BLOCKED:
+ log_fr_data_blocked(log, hd, &fr->data_blocked, dir);
+ break;
+ case NGTCP2_FRAME_STREAM_DATA_BLOCKED:
+ log_fr_stream_data_blocked(log, hd, &fr->stream_data_blocked, dir);
+ break;
+ case NGTCP2_FRAME_STREAMS_BLOCKED_BIDI:
+ case NGTCP2_FRAME_STREAMS_BLOCKED_UNI:
+ log_fr_streams_blocked(log, hd, &fr->streams_blocked, dir);
+ break;
+ case NGTCP2_FRAME_NEW_CONNECTION_ID:
+ log_fr_new_connection_id(log, hd, &fr->new_connection_id, dir);
+ break;
+ case NGTCP2_FRAME_STOP_SENDING:
+ log_fr_stop_sending(log, hd, &fr->stop_sending, dir);
+ break;
+ case NGTCP2_FRAME_PATH_CHALLENGE:
+ log_fr_path_challenge(log, hd, &fr->path_challenge, dir);
+ break;
+ case NGTCP2_FRAME_PATH_RESPONSE:
+ log_fr_path_response(log, hd, &fr->path_response, dir);
+ break;
+ case NGTCP2_FRAME_CRYPTO:
+ log_fr_crypto(log, hd, &fr->crypto, dir);
+ break;
+ case NGTCP2_FRAME_NEW_TOKEN:
+ log_fr_new_token(log, hd, &fr->new_token, dir);
+ break;
+ case NGTCP2_FRAME_RETIRE_CONNECTION_ID:
+ log_fr_retire_connection_id(log, hd, &fr->retire_connection_id, dir);
+ break;
+ case NGTCP2_FRAME_HANDSHAKE_DONE:
+ log_fr_handshake_done(log, hd, &fr->handshake_done, dir);
+ break;
+ case NGTCP2_FRAME_DATAGRAM:
+ case NGTCP2_FRAME_DATAGRAM_LEN:
+ log_fr_datagram(log, hd, &fr->datagram, dir);
+ break;
+ default:
+ ngtcp2_unreachable();
+ }
+}
+
+void ngtcp2_log_rx_fr(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_frame *fr) {
+ if (!log->log_printf) {
+ return;
+ }
+
+ log_fr(log, hd, fr, "rx");
+}
+
+void ngtcp2_log_tx_fr(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_frame *fr) {
+ if (!log->log_printf) {
+ return;
+ }
+
+ log_fr(log, hd, fr, "tx");
+}
+
+void ngtcp2_log_rx_vn(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const uint32_t *sv, size_t nsv) {
+ size_t i;
+
+ if (!log->log_printf) {
+ return;
+ }
+
+ for (i = 0; i < nsv; ++i) {
+ log->log_printf(log->user_data, (NGTCP2_LOG_PKT " v=0x%08x"),
+ NGTCP2_LOG_PKT_HD_FIELDS("rx"), sv[i]);
+ }
+}
+
+void ngtcp2_log_rx_sr(ngtcp2_log *log, const ngtcp2_pkt_stateless_reset *sr) {
+ uint8_t buf[sizeof(sr->stateless_reset_token) * 2 + 1];
+ ngtcp2_pkt_hd shd;
+ ngtcp2_pkt_hd *hd = &shd;
+
+ if (!log->log_printf) {
+ return;
+ }
+
+ memset(&shd, 0, sizeof(shd));
+
+ shd.type = NGTCP2_PKT_STATELESS_RESET;
+
+ log->log_printf(
+ log->user_data, (NGTCP2_LOG_PKT " token=0x%s randlen=%zu"),
+ NGTCP2_LOG_PKT_HD_FIELDS("rx"),
+ (const char *)ngtcp2_encode_hex(buf, sr->stateless_reset_token,
+ sizeof(sr->stateless_reset_token)),
+ sr->randlen);
+}
+
+void ngtcp2_log_remote_tp(ngtcp2_log *log, uint8_t exttype,
+ const ngtcp2_transport_params *params) {
+ uint8_t token[NGTCP2_STATELESS_RESET_TOKENLEN * 2 + 1];
+ uint8_t addr[16 * 2 + 7 + 1];
+ uint8_t cid[NGTCP2_MAX_CIDLEN * 2 + 1];
+ size_t i;
+ const ngtcp2_sockaddr_in *sa_in;
+ const ngtcp2_sockaddr_in6 *sa_in6;
+ const uint8_t *p;
+ uint32_t version;
+
+ if (!log->log_printf) {
+ return;
+ }
+
+ if (exttype == NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS) {
+ if (params->stateless_reset_token_present) {
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_TP " stateless_reset_token=0x%s"),
+ NGTCP2_LOG_TP_HD_FIELDS,
+ (const char *)ngtcp2_encode_hex(
+ token, params->stateless_reset_token,
+ sizeof(params->stateless_reset_token)));
+ }
+
+ if (params->preferred_address_present) {
+ if (params->preferred_address.ipv4_present) {
+ sa_in = &params->preferred_address.ipv4;
+
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_TP " preferred_address.ipv4_addr=%s"),
+ NGTCP2_LOG_TP_HD_FIELDS,
+ (const char *)ngtcp2_encode_ipv4(
+ addr, (const uint8_t *)&sa_in->sin_addr));
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_TP " preferred_address.ipv4_port=%u"),
+ NGTCP2_LOG_TP_HD_FIELDS, ngtcp2_ntohs(sa_in->sin_port));
+ }
+
+ if (params->preferred_address.ipv6_present) {
+ sa_in6 = &params->preferred_address.ipv6;
+
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_TP " preferred_address.ipv6_addr=%s"),
+ NGTCP2_LOG_TP_HD_FIELDS,
+ (const char *)ngtcp2_encode_ipv6(
+ addr, (const uint8_t *)&sa_in6->sin6_addr));
+ log->log_printf(
+ log->user_data, (NGTCP2_LOG_TP " preferred_address.ipv6_port=%u"),
+ NGTCP2_LOG_TP_HD_FIELDS, ngtcp2_ntohs(sa_in6->sin6_port));
+ }
+
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_TP " preferred_address.cid=0x%s"),
+ NGTCP2_LOG_TP_HD_FIELDS,
+ (const char *)ngtcp2_encode_hex(
+ cid, params->preferred_address.cid.data,
+ params->preferred_address.cid.datalen));
+ log->log_printf(
+ log->user_data,
+ (NGTCP2_LOG_TP " preferred_address.stateless_reset_token=0x%s"),
+ NGTCP2_LOG_TP_HD_FIELDS,
+ (const char *)ngtcp2_encode_hex(
+ token, params->preferred_address.stateless_reset_token,
+ sizeof(params->preferred_address.stateless_reset_token)));
+ }
+
+ log->log_printf(
+ log->user_data,
+ (NGTCP2_LOG_TP " original_destination_connection_id=0x%s"),
+ NGTCP2_LOG_TP_HD_FIELDS,
+ (const char *)ngtcp2_encode_hex(cid, params->original_dcid.data,
+ params->original_dcid.datalen));
+
+ if (params->retry_scid_present) {
+ log->log_printf(
+ log->user_data, (NGTCP2_LOG_TP " retry_source_connection_id=0x%s"),
+ NGTCP2_LOG_TP_HD_FIELDS,
+ (const char *)ngtcp2_encode_hex(cid, params->retry_scid.data,
+ params->retry_scid.datalen));
+ }
+ }
+
+ log->log_printf(
+ log->user_data, (NGTCP2_LOG_TP " initial_source_connection_id=0x%s"),
+ NGTCP2_LOG_TP_HD_FIELDS,
+ (const char *)ngtcp2_encode_hex(cid, params->initial_scid.data,
+ params->initial_scid.datalen));
+
+ log->log_printf(
+ log->user_data,
+ (NGTCP2_LOG_TP " initial_max_stream_data_bidi_local=%" PRIu64),
+ NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_stream_data_bidi_local);
+ log->log_printf(
+ log->user_data,
+ (NGTCP2_LOG_TP " initial_max_stream_data_bidi_remote=%" PRIu64),
+ NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_stream_data_bidi_remote);
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_TP " initial_max_stream_data_uni=%" PRIu64),
+ NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_stream_data_uni);
+ log->log_printf(log->user_data, (NGTCP2_LOG_TP " initial_max_data=%" PRIu64),
+ NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_data);
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_TP " initial_max_streams_bidi=%" PRIu64),
+ NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_streams_bidi);
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_TP " initial_max_streams_uni=%" PRIu64),
+ NGTCP2_LOG_TP_HD_FIELDS, params->initial_max_streams_uni);
+ log->log_printf(log->user_data, (NGTCP2_LOG_TP " max_idle_timeout=%" PRIu64),
+ NGTCP2_LOG_TP_HD_FIELDS,
+ params->max_idle_timeout / NGTCP2_MILLISECONDS);
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_TP " max_udp_payload_size=%" PRIu64),
+ NGTCP2_LOG_TP_HD_FIELDS, params->max_udp_payload_size);
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_TP " ack_delay_exponent=%" PRIu64),
+ NGTCP2_LOG_TP_HD_FIELDS, params->ack_delay_exponent);
+ log->log_printf(log->user_data, (NGTCP2_LOG_TP " max_ack_delay=%" PRIu64),
+ NGTCP2_LOG_TP_HD_FIELDS,
+ params->max_ack_delay / NGTCP2_MILLISECONDS);
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_TP " active_connection_id_limit=%" PRIu64),
+ NGTCP2_LOG_TP_HD_FIELDS, params->active_connection_id_limit);
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_TP " disable_active_migration=%d"),
+ NGTCP2_LOG_TP_HD_FIELDS, params->disable_active_migration);
+ log->log_printf(log->user_data,
+ (NGTCP2_LOG_TP " max_datagram_frame_size=%" PRIu64),
+ NGTCP2_LOG_TP_HD_FIELDS, params->max_datagram_frame_size);
+ log->log_printf(log->user_data, (NGTCP2_LOG_TP " grease_quic_bit=%d"),
+ NGTCP2_LOG_TP_HD_FIELDS, params->grease_quic_bit);
+
+ if (params->version_info_present) {
+ log->log_printf(
+ log->user_data,
+ (NGTCP2_LOG_TP " version_information.chosen_version=0x%08x"),
+ NGTCP2_LOG_TP_HD_FIELDS, params->version_info.chosen_version);
+
+ assert(!(params->version_info.available_versionslen & 0x3));
+
+ for (i = 0, p = params->version_info.available_versions;
+ i < params->version_info.available_versionslen;
+ i += sizeof(uint32_t)) {
+ p = ngtcp2_get_uint32(&version, p);
+
+ log->log_printf(
+ log->user_data,
+ (NGTCP2_LOG_TP " version_information.available_versions[%zu]=0x%08x"),
+ NGTCP2_LOG_TP_HD_FIELDS, i >> 2, version);
+ }
+ }
+}
+
+void ngtcp2_log_pkt_lost(ngtcp2_log *log, int64_t pkt_num, uint8_t type,
+ uint8_t flags, ngtcp2_tstamp sent_ts) {
+ if (!log->log_printf) {
+ return;
+ }
+
+ ngtcp2_log_info(log, NGTCP2_LOG_EVENT_RCV,
+ "pkn=%" PRId64 " lost type=%s sent_ts=%" PRIu64, pkt_num,
+ strpkttype_type_flags(type, flags), sent_ts);
+}
+
+static void log_pkt_hd(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const char *dir) {
+ uint8_t dcid[sizeof(hd->dcid.data) * 2 + 1];
+ uint8_t scid[sizeof(hd->scid.data) * 2 + 1];
+
+ if (!log->log_printf) {
+ return;
+ }
+
+ if (hd->type == NGTCP2_PKT_1RTT) {
+ ngtcp2_log_info(
+ log, NGTCP2_LOG_EVENT_PKT, "%s pkn=%" PRId64 " dcid=0x%s type=%s k=%d",
+ dir, hd->pkt_num,
+ (const char *)ngtcp2_encode_hex(dcid, hd->dcid.data, hd->dcid.datalen),
+ strpkttype(hd), (hd->flags & NGTCP2_PKT_FLAG_KEY_PHASE) != 0);
+ } else {
+ ngtcp2_log_info(
+ log, NGTCP2_LOG_EVENT_PKT,
+ "%s pkn=%" PRId64 " dcid=0x%s scid=0x%s version=0x%08x type=%s len=%zu",
+ dir, hd->pkt_num,
+ (const char *)ngtcp2_encode_hex(dcid, hd->dcid.data, hd->dcid.datalen),
+ (const char *)ngtcp2_encode_hex(scid, hd->scid.data, hd->scid.datalen),
+ hd->version, strpkttype(hd), hd->len);
+ }
+}
+
+void ngtcp2_log_rx_pkt_hd(ngtcp2_log *log, const ngtcp2_pkt_hd *hd) {
+ log_pkt_hd(log, hd, "rx");
+}
+
+void ngtcp2_log_tx_pkt_hd(ngtcp2_log *log, const ngtcp2_pkt_hd *hd) {
+ log_pkt_hd(log, hd, "tx");
+}
+
+void ngtcp2_log_info(ngtcp2_log *log, ngtcp2_log_event ev, const char *fmt,
+ ...) {
+ va_list ap;
+ int n;
+ char buf[NGTCP2_LOG_BUFLEN];
+
+ if (!log->log_printf) {
+ return;
+ }
+
+ va_start(ap, fmt);
+ n = vsnprintf(buf, sizeof(buf), fmt, ap);
+ va_end(ap);
+
+ if (n < 0 || (size_t)n >= sizeof(buf)) {
+ return;
+ }
+
+ log->log_printf(log->user_data, (NGTCP2_LOG_HD " %s"),
+ timestamp_cast(log->last_ts - log->ts), log->scid,
+ strevent(ev), buf);
+}
+
+void ngtcp2_log_tx_cancel(ngtcp2_log *log, const ngtcp2_pkt_hd *hd) {
+ ngtcp2_log_info(log, NGTCP2_LOG_EVENT_PKT,
+ "cancel tx pkn=%" PRId64 " type=%s", hd->pkt_num,
+ strpkttype(hd));
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.h
new file mode 100644
index 0000000..029ef1b
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_log.h
@@ -0,0 +1,123 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2018 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_LOG_H
+#define NGTCP2_LOG_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_pkt.h"
+
+typedef struct ngtcp2_log {
+ /* log_printf is a sink to write log. NULL means no logging
+ output. */
+ ngtcp2_printf log_printf;
+ /* ts is the time point used to write time delta in the log. */
+ ngtcp2_tstamp ts;
+ /* last_ts is the most recent time point that this object is
+ told. */
+ ngtcp2_tstamp last_ts;
+ /* user_data is user-defined opaque data which is passed to
+ log_pritnf. */
+ void *user_data;
+ /* scid is SCID encoded as NULL-terminated hex string. */
+ uint8_t scid[NGTCP2_MAX_CIDLEN * 2 + 1];
+} ngtcp2_log;
+
+/**
+ * @enum
+ *
+ * :type:`ngtcp2_log_event` defines an event of ngtcp2 library
+ * internal logger.
+ */
+typedef enum ngtcp2_log_event {
+ /**
+ * :enum:`NGTCP2_LOG_EVENT_NONE` represents no event.
+ */
+ NGTCP2_LOG_EVENT_NONE,
+ /**
+ * :enum:`NGTCP2_LOG_EVENT_CON` is a connection (catch-all) event
+ */
+ NGTCP2_LOG_EVENT_CON,
+ /**
+ * :enum:`NGTCP2_LOG_EVENT_PKT` is a packet event.
+ */
+ NGTCP2_LOG_EVENT_PKT,
+ /**
+ * :enum:`NGTCP2_LOG_EVENT_FRM` is a QUIC frame event.
+ */
+ NGTCP2_LOG_EVENT_FRM,
+ /**
+ * :enum:`NGTCP2_LOG_EVENT_RCV` is a congestion and recovery event.
+ */
+ NGTCP2_LOG_EVENT_RCV,
+ /**
+ * :enum:`NGTCP2_LOG_EVENT_CRY` is a crypto event.
+ */
+ NGTCP2_LOG_EVENT_CRY,
+ /**
+ * :enum:`NGTCP2_LOG_EVENT_PTV` is a path validation event.
+ */
+ NGTCP2_LOG_EVENT_PTV,
+} ngtcp2_log_event;
+
+void ngtcp2_log_init(ngtcp2_log *log, const ngtcp2_cid *scid,
+ ngtcp2_printf log_printf, ngtcp2_tstamp ts,
+ void *user_data);
+
+void ngtcp2_log_rx_fr(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_frame *fr);
+void ngtcp2_log_tx_fr(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_frame *fr);
+
+void ngtcp2_log_rx_vn(ngtcp2_log *log, const ngtcp2_pkt_hd *hd,
+ const uint32_t *sv, size_t nsv);
+
+void ngtcp2_log_rx_sr(ngtcp2_log *log, const ngtcp2_pkt_stateless_reset *sr);
+
+void ngtcp2_log_remote_tp(ngtcp2_log *log, uint8_t exttype,
+ const ngtcp2_transport_params *params);
+
+void ngtcp2_log_pkt_lost(ngtcp2_log *log, int64_t pkt_num, uint8_t type,
+ uint8_t flags, ngtcp2_tstamp sent_ts);
+
+void ngtcp2_log_rx_pkt_hd(ngtcp2_log *log, const ngtcp2_pkt_hd *hd);
+
+void ngtcp2_log_tx_pkt_hd(ngtcp2_log *log, const ngtcp2_pkt_hd *hd);
+
+void ngtcp2_log_tx_cancel(ngtcp2_log *log, const ngtcp2_pkt_hd *hd);
+
+/**
+ * @function
+ *
+ * `ngtcp2_log_info` writes info level log.
+ */
+void ngtcp2_log_info(ngtcp2_log *log, ngtcp2_log_event ev, const char *fmt,
+ ...);
+
+#endif /* NGTCP2_LOG_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_macro.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_macro.h
new file mode 100644
index 0000000..28d3461
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_macro.h
@@ -0,0 +1,58 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_MACRO_H
+#define NGTCP2_MACRO_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stddef.h>
+
+#include <ngtcp2/ngtcp2.h>
+
+#define ngtcp2_min(A, B) ((A) < (B) ? (A) : (B))
+#define ngtcp2_max(A, B) ((A) > (B) ? (A) : (B))
+
+#define ngtcp2_struct_of(ptr, type, member) \
+ ((type *)(void *)((char *)(ptr)-offsetof(type, member)))
+
+/* ngtcp2_list_insert inserts |T| before |*PD|. The contract is that
+ this is singly linked list, and the next element is pointed by next
+ field of the previous element. |PD| must be a pointer to the
+ pointer to the next field of the previous element of |*PD|: if C is
+ the previous element of |PD|, PD = &C->next. */
+#define ngtcp2_list_insert(T, PD) \
+ do { \
+ (T)->next = *(PD); \
+ *(PD) = (T); \
+ } while (0)
+
+/*
+ * ngtcp2_arraylen returns the number of elements in array |A|.
+ */
+#define ngtcp2_arraylen(A) (sizeof(A) / sizeof(A[0]))
+
+#endif /* NGTCP2_MACRO_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.c
new file mode 100644
index 0000000..12bc6e8
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.c
@@ -0,0 +1,336 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ * Copyright (c) 2012 nghttp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_map.h"
+
+#include <string.h>
+#include <assert.h>
+#include <stdio.h>
+
+#include "ngtcp2_conv.h"
+
+#define NGTCP2_INITIAL_TABLE_LENBITS 4
+
+void ngtcp2_map_init(ngtcp2_map *map, const ngtcp2_mem *mem) {
+ map->mem = mem;
+ map->tablelen = 0;
+ map->tablelenbits = 0;
+ map->table = NULL;
+ map->size = 0;
+}
+
+void ngtcp2_map_free(ngtcp2_map *map) {
+ if (!map) {
+ return;
+ }
+
+ ngtcp2_mem_free(map->mem, map->table);
+}
+
+void ngtcp2_map_each_free(ngtcp2_map *map, int (*func)(void *data, void *ptr),
+ void *ptr) {
+ uint32_t i;
+ ngtcp2_map_bucket *bkt;
+
+ for (i = 0; i < map->tablelen; ++i) {
+ bkt = &map->table[i];
+
+ if (bkt->data == NULL) {
+ continue;
+ }
+
+ func(bkt->data, ptr);
+ }
+}
+
+int ngtcp2_map_each(ngtcp2_map *map, int (*func)(void *data, void *ptr),
+ void *ptr) {
+ int rv;
+ uint32_t i;
+ ngtcp2_map_bucket *bkt;
+
+ if (map->size == 0) {
+ return 0;
+ }
+
+ for (i = 0; i < map->tablelen; ++i) {
+ bkt = &map->table[i];
+
+ if (bkt->data == NULL) {
+ continue;
+ }
+
+ rv = func(bkt->data, ptr);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ return 0;
+}
+
+static uint32_t hash(ngtcp2_map_key_type key) {
+ return (uint32_t)((key * 11400714819323198485llu) >> 32);
+}
+
+static size_t h2idx(uint32_t hash, uint32_t bits) {
+ return hash >> (32 - bits);
+}
+
+static size_t distance(uint32_t tablelen, uint32_t tablelenbits,
+ ngtcp2_map_bucket *bkt, size_t idx) {
+ return (idx - h2idx(bkt->hash, tablelenbits)) & (tablelen - 1);
+}
+
+static void map_bucket_swap(ngtcp2_map_bucket *bkt, uint32_t *phash,
+ ngtcp2_map_key_type *pkey, void **pdata) {
+ uint32_t h = bkt->hash;
+ ngtcp2_map_key_type key = bkt->key;
+ void *data = bkt->data;
+
+ bkt->hash = *phash;
+ bkt->key = *pkey;
+ bkt->data = *pdata;
+
+ *phash = h;
+ *pkey = key;
+ *pdata = data;
+}
+
+static void map_bucket_set_data(ngtcp2_map_bucket *bkt, uint32_t hash,
+ ngtcp2_map_key_type key, void *data) {
+ bkt->hash = hash;
+ bkt->key = key;
+ bkt->data = data;
+}
+
+void ngtcp2_map_print_distance(ngtcp2_map *map) {
+ uint32_t i;
+ size_t idx;
+ ngtcp2_map_bucket *bkt;
+
+ for (i = 0; i < map->tablelen; ++i) {
+ bkt = &map->table[i];
+
+ if (bkt->data == NULL) {
+ fprintf(stderr, "@%u <EMPTY>\n", i);
+ continue;
+ }
+
+ idx = h2idx(bkt->hash, map->tablelenbits);
+ fprintf(stderr, "@%u hash=%08x key=%" PRIu64 " base=%zu distance=%zu\n", i,
+ bkt->hash, bkt->key, idx,
+ distance(map->tablelen, map->tablelenbits, bkt, idx));
+ }
+}
+
+static int insert(ngtcp2_map_bucket *table, uint32_t tablelen,
+ uint32_t tablelenbits, uint32_t hash, ngtcp2_map_key_type key,
+ void *data) {
+ size_t idx = h2idx(hash, tablelenbits);
+ size_t d = 0, dd;
+ ngtcp2_map_bucket *bkt;
+
+ for (;;) {
+ bkt = &table[idx];
+
+ if (bkt->data == NULL) {
+ map_bucket_set_data(bkt, hash, key, data);
+ return 0;
+ }
+
+ dd = distance(tablelen, tablelenbits, bkt, idx);
+ if (d > dd) {
+ map_bucket_swap(bkt, &hash, &key, &data);
+ d = dd;
+ } else if (bkt->key == key) {
+ /* TODO This check is just a waste after first swap or if this
+ function is called from map_resize. That said, there is no
+ difference with or without this conditional in performance
+ wise. */
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ ++d;
+ idx = (idx + 1) & (tablelen - 1);
+ }
+}
+
+/* new_tablelen must be power of 2 and new_tablelen == (1 <<
+ new_tablelenbits) must hold. */
+static int map_resize(ngtcp2_map *map, uint32_t new_tablelen,
+ uint32_t new_tablelenbits) {
+ uint32_t i;
+ ngtcp2_map_bucket *new_table;
+ ngtcp2_map_bucket *bkt;
+ int rv;
+ (void)rv;
+
+ new_table =
+ ngtcp2_mem_calloc(map->mem, new_tablelen, sizeof(ngtcp2_map_bucket));
+ if (new_table == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ for (i = 0; i < map->tablelen; ++i) {
+ bkt = &map->table[i];
+ if (bkt->data == NULL) {
+ continue;
+ }
+ rv = insert(new_table, new_tablelen, new_tablelenbits, bkt->hash, bkt->key,
+ bkt->data);
+
+ assert(0 == rv);
+ }
+
+ ngtcp2_mem_free(map->mem, map->table);
+ map->tablelen = new_tablelen;
+ map->tablelenbits = new_tablelenbits;
+ map->table = new_table;
+
+ return 0;
+}
+
+int ngtcp2_map_insert(ngtcp2_map *map, ngtcp2_map_key_type key, void *data) {
+ int rv;
+
+ assert(data);
+
+ /* Load factor is 0.75 */
+ if ((map->size + 1) * 4 > map->tablelen * 3) {
+ if (map->tablelen) {
+ rv = map_resize(map, map->tablelen * 2, map->tablelenbits + 1);
+ if (rv != 0) {
+ return rv;
+ }
+ } else {
+ rv = map_resize(map, 1 << NGTCP2_INITIAL_TABLE_LENBITS,
+ NGTCP2_INITIAL_TABLE_LENBITS);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+ }
+
+ rv = insert(map->table, map->tablelen, map->tablelenbits, hash(key), key,
+ data);
+ if (rv != 0) {
+ return rv;
+ }
+ ++map->size;
+ return 0;
+}
+
+void *ngtcp2_map_find(ngtcp2_map *map, ngtcp2_map_key_type key) {
+ uint32_t h;
+ size_t idx;
+ ngtcp2_map_bucket *bkt;
+ size_t d = 0;
+
+ if (map->size == 0) {
+ return NULL;
+ }
+
+ h = hash(key);
+ idx = h2idx(h, map->tablelenbits);
+
+ for (;;) {
+ bkt = &map->table[idx];
+
+ if (bkt->data == NULL ||
+ d > distance(map->tablelen, map->tablelenbits, bkt, idx)) {
+ return NULL;
+ }
+
+ if (bkt->key == key) {
+ return bkt->data;
+ }
+
+ ++d;
+ idx = (idx + 1) & (map->tablelen - 1);
+ }
+}
+
+int ngtcp2_map_remove(ngtcp2_map *map, ngtcp2_map_key_type key) {
+ uint32_t h;
+ size_t idx, didx;
+ ngtcp2_map_bucket *bkt;
+ size_t d = 0;
+
+ if (map->size == 0) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ h = hash(key);
+ idx = h2idx(h, map->tablelenbits);
+
+ for (;;) {
+ bkt = &map->table[idx];
+
+ if (bkt->data == NULL ||
+ d > distance(map->tablelen, map->tablelenbits, bkt, idx)) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ if (bkt->key == key) {
+ map_bucket_set_data(bkt, 0, 0, NULL);
+
+ didx = idx;
+ idx = (idx + 1) & (map->tablelen - 1);
+
+ for (;;) {
+ bkt = &map->table[idx];
+ if (bkt->data == NULL ||
+ distance(map->tablelen, map->tablelenbits, bkt, idx) == 0) {
+ break;
+ }
+
+ map->table[didx] = *bkt;
+ map_bucket_set_data(bkt, 0, 0, NULL);
+ didx = idx;
+
+ idx = (idx + 1) & (map->tablelen - 1);
+ }
+
+ --map->size;
+
+ return 0;
+ }
+
+ ++d;
+ idx = (idx + 1) & (map->tablelen - 1);
+ }
+}
+
+void ngtcp2_map_clear(ngtcp2_map *map) {
+ if (map->tablelen == 0) {
+ return;
+ }
+
+ memset(map->table, 0, sizeof(*map->table) * map->tablelen);
+ map->size = 0;
+}
+
+size_t ngtcp2_map_size(ngtcp2_map *map) { return map->size; }
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.h
new file mode 100644
index 0000000..a64344a
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_map.h
@@ -0,0 +1,136 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ * Copyright (c) 2012 nghttp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_MAP_H
+#define NGTCP2_MAP_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_mem.h"
+
+/* Implementation of unordered map */
+
+typedef uint64_t ngtcp2_map_key_type;
+
+typedef struct ngtcp2_map_bucket {
+ uint32_t hash;
+ ngtcp2_map_key_type key;
+ void *data;
+} ngtcp2_map_bucket;
+
+typedef struct ngtcp2_map {
+ ngtcp2_map_bucket *table;
+ const ngtcp2_mem *mem;
+ size_t size;
+ uint32_t tablelen;
+ uint32_t tablelenbits;
+} ngtcp2_map;
+
+/*
+ * Initializes the map |map|.
+ */
+void ngtcp2_map_init(ngtcp2_map *map, const ngtcp2_mem *mem);
+
+/*
+ * Deallocates any resources allocated for |map|. The stored entries
+ * are not freed by this function. Use ngtcp2_map_each_free() to free
+ * each entries.
+ */
+void ngtcp2_map_free(ngtcp2_map *map);
+
+/*
+ * Deallocates each entries using |func| function and any resources
+ * allocated for |map|. The |func| function is responsible for freeing
+ * given the |data| object. The |ptr| will be passed to the |func| as
+ * send argument. The return value of the |func| will be ignored.
+ */
+void ngtcp2_map_each_free(ngtcp2_map *map, int (*func)(void *data, void *ptr),
+ void *ptr);
+
+/*
+ * Inserts the new |data| with the |key| to the map |map|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * The item associated by |key| already exists.
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+int ngtcp2_map_insert(ngtcp2_map *map, ngtcp2_map_key_type key, void *data);
+
+/*
+ * Returns the data associated by the key |key|. If there is no such
+ * data, this function returns NULL.
+ */
+void *ngtcp2_map_find(ngtcp2_map *map, ngtcp2_map_key_type key);
+
+/*
+ * Removes the data associated by the key |key| from the |map|. The
+ * removed data is not freed by this function.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * The data associated by |key| does not exist.
+ */
+int ngtcp2_map_remove(ngtcp2_map *map, ngtcp2_map_key_type key);
+
+/*
+ * Removes all entries from |map|.
+ */
+void ngtcp2_map_clear(ngtcp2_map *map);
+
+/*
+ * Returns the number of items stored in the map |map|.
+ */
+size_t ngtcp2_map_size(ngtcp2_map *map);
+
+/*
+ * Applies the function |func| to each data in the |map| with the
+ * optional user supplied pointer |ptr|.
+ *
+ * If the |func| returns 0, this function calls the |func| with the
+ * next data. If the |func| returns nonzero, it will not call the
+ * |func| for further entries and return the return value of the
+ * |func| immediately. Thus, this function returns 0 if all the
+ * invocations of the |func| return 0, or nonzero value which the last
+ * invocation of |func| returns.
+ *
+ * Don't use this function to free each data. Use
+ * ngtcp2_map_each_free() instead.
+ */
+int ngtcp2_map_each(ngtcp2_map *map, int (*func)(void *data, void *ptr),
+ void *ptr);
+
+void ngtcp2_map_print_distance(ngtcp2_map *map);
+
+#endif /* NGTCP2_MAP_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.c
new file mode 100644
index 0000000..bcce0b5
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.c
@@ -0,0 +1,113 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ * Copyright (c) 2014 nghttp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_mem.h"
+
+#include <stdio.h>
+
+static void *default_malloc(size_t size, void *user_data) {
+ (void)user_data;
+
+ return malloc(size);
+}
+
+static void default_free(void *ptr, void *user_data) {
+ (void)user_data;
+
+ free(ptr);
+}
+
+static void *default_calloc(size_t nmemb, size_t size, void *user_data) {
+ (void)user_data;
+
+ return calloc(nmemb, size);
+}
+
+static void *default_realloc(void *ptr, size_t size, void *user_data) {
+ (void)user_data;
+
+ return realloc(ptr, size);
+}
+
+static const ngtcp2_mem mem_default = {NULL, default_malloc, default_free,
+ default_calloc, default_realloc};
+
+const ngtcp2_mem *ngtcp2_mem_default(void) { return &mem_default; }
+
+#ifndef MEMDEBUG
+void *ngtcp2_mem_malloc(const ngtcp2_mem *mem, size_t size) {
+ return mem->malloc(size, mem->user_data);
+}
+
+void ngtcp2_mem_free(const ngtcp2_mem *mem, void *ptr) {
+ mem->free(ptr, mem->user_data);
+}
+
+void *ngtcp2_mem_calloc(const ngtcp2_mem *mem, size_t nmemb, size_t size) {
+ return mem->calloc(nmemb, size, mem->user_data);
+}
+
+void *ngtcp2_mem_realloc(const ngtcp2_mem *mem, void *ptr, size_t size) {
+ return mem->realloc(ptr, size, mem->user_data);
+}
+#else /* MEMDEBUG */
+void *ngtcp2_mem_malloc_debug(const ngtcp2_mem *mem, size_t size,
+ const char *func, const char *file, size_t line) {
+ void *nptr = mem->malloc(size, mem->user_data);
+
+ fprintf(stderr, "malloc %p size=%zu in %s at %s:%zu\n", nptr, size, func,
+ file, line);
+
+ return nptr;
+}
+
+void ngtcp2_mem_free_debug(const ngtcp2_mem *mem, void *ptr, const char *func,
+ const char *file, size_t line) {
+ fprintf(stderr, "free ptr=%p in %s at %s:%zu\n", ptr, func, file, line);
+
+ mem->free(ptr, mem->user_data);
+}
+
+void *ngtcp2_mem_calloc_debug(const ngtcp2_mem *mem, size_t nmemb, size_t size,
+ const char *func, const char *file, size_t line) {
+ void *nptr = mem->calloc(nmemb, size, mem->user_data);
+
+ fprintf(stderr, "calloc %p nmemb=%zu size=%zu in %s at %s:%zu\n", nptr, nmemb,
+ size, func, file, line);
+
+ return nptr;
+}
+
+void *ngtcp2_mem_realloc_debug(const ngtcp2_mem *mem, void *ptr, size_t size,
+ const char *func, const char *file,
+ size_t line) {
+ void *nptr = mem->realloc(ptr, size, mem->user_data);
+
+ fprintf(stderr, "realloc %p ptr=%p size=%zu in %s at %s:%zu\n", nptr, ptr,
+ size, func, file, line);
+
+ return nptr;
+}
+#endif /* MEMDEBUG */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.h
new file mode 100644
index 0000000..c99b6c5
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_mem.h
@@ -0,0 +1,72 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ * Copyright (c) 2014 nghttp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_MEM_H
+#define NGTCP2_MEM_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+/* Convenient wrapper functions to call allocator function in
+ |mem|. */
+#ifndef MEMDEBUG
+void *ngtcp2_mem_malloc(const ngtcp2_mem *mem, size_t size);
+
+void ngtcp2_mem_free(const ngtcp2_mem *mem, void *ptr);
+
+void *ngtcp2_mem_calloc(const ngtcp2_mem *mem, size_t nmemb, size_t size);
+
+void *ngtcp2_mem_realloc(const ngtcp2_mem *mem, void *ptr, size_t size);
+#else /* MEMDEBUG */
+void *ngtcp2_mem_malloc_debug(const ngtcp2_mem *mem, size_t size,
+ const char *func, const char *file, size_t line);
+
+# define ngtcp2_mem_malloc(MEM, SIZE) \
+ ngtcp2_mem_malloc_debug((MEM), (SIZE), __func__, __FILE__, __LINE__)
+
+void ngtcp2_mem_free_debug(const ngtcp2_mem *mem, void *ptr, const char *func,
+ const char *file, size_t line);
+
+# define ngtcp2_mem_free(MEM, PTR) \
+ ngtcp2_mem_free_debug((MEM), (PTR), __func__, __FILE__, __LINE__)
+
+void *ngtcp2_mem_calloc_debug(const ngtcp2_mem *mem, size_t nmemb, size_t size,
+ const char *func, const char *file, size_t line);
+
+# define ngtcp2_mem_calloc(MEM, NMEMB, SIZE) \
+ ngtcp2_mem_calloc_debug((MEM), (NMEMB), (SIZE), __func__, __FILE__, \
+ __LINE__)
+
+void *ngtcp2_mem_realloc_debug(const ngtcp2_mem *mem, void *ptr, size_t size,
+ const char *func, const char *file, size_t line);
+
+# define ngtcp2_mem_realloc(MEM, PTR, SIZE) \
+ ngtcp2_mem_realloc_debug((MEM), (PTR), (SIZE), __func__, __FILE__, __LINE__)
+#endif /* MEMDEBUG */
+
+#endif /* NGTCP2_MEM_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_net.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_net.h
new file mode 100644
index 0000000..cd73d2b
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_net.h
@@ -0,0 +1,136 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2022 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_NET_H
+#define NGTCP2_NET_H
+
+/* This header file is explicitly allowed to be shared with
+ ngtcp2_crypto library. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#ifdef HAVE_ARPA_INET_H
+# include <arpa/inet.h>
+#endif /* HAVE_ARPA_INET_H */
+
+#ifdef HAVE_NETINET_IN_H
+# include <netinet/in.h>
+#endif /* HAVE_NETINET_IN_H */
+
+#ifdef HAVE_BYTESWAP_H
+# include <byteswap.h>
+#endif /* HAVE_BYTESWAP_H */
+
+#ifdef HAVE_ENDIAN_H
+# include <endian.h>
+#endif /* HAVE_ENDIAN_H */
+
+#ifdef HAVE_SYS_ENDIAN_H
+# include <sys/endian.h>
+#endif /* HAVE_SYS_ENDIAN_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#if defined(HAVE_BSWAP_64) || \
+ (defined(HAVE_DECL_BSWAP_64) && HAVE_DECL_BSWAP_64 > 0)
+# define ngtcp2_bswap64 bswap_64
+#else /* !HAVE_BSWAP_64 */
+# define ngtcp2_bswap64(N) \
+ ((uint64_t)(ngtcp2_ntohl((uint32_t)(N))) << 32 | \
+ ngtcp2_ntohl((uint32_t)((N) >> 32)))
+#endif /* !HAVE_BSWAP_64 */
+
+#if defined(HAVE_BE64TOH) || \
+ (defined(HAVE_DECL_BE64TOH) && HAVE_DECL_BE64TOH > 0)
+# define ngtcp2_ntohl64(N) be64toh(N)
+# define ngtcp2_htonl64(N) htobe64(N)
+#else /* !HAVE_BE64TOH */
+# if defined(WORDS_BIGENDIAN)
+# define ngtcp2_ntohl64(N) (N)
+# define ngtcp2_htonl64(N) (N)
+# else /* !WORDS_BIGENDIAN */
+# define ngtcp2_ntohl64(N) ngtcp2_bswap64(N)
+# define ngtcp2_htonl64(N) ngtcp2_bswap64(N)
+# endif /* !WORDS_BIGENDIAN */
+#endif /* !HAVE_BE64TOH */
+
+#if defined(WIN32)
+/* Windows requires ws2_32 library for ntonl family functions. We
+ define inline functions for those function so that we don't have
+ dependeny on that lib. */
+
+# ifdef _MSC_VER
+# define STIN static __inline
+# else
+# define STIN static inline
+# endif
+
+STIN uint32_t ngtcp2_htonl(uint32_t hostlong) {
+ uint32_t res;
+ unsigned char *p = (unsigned char *)&res;
+ *p++ = (unsigned char)(hostlong >> 24);
+ *p++ = (hostlong >> 16) & 0xffu;
+ *p++ = (hostlong >> 8) & 0xffu;
+ *p = hostlong & 0xffu;
+ return res;
+}
+
+STIN uint16_t ngtcp2_htons(uint16_t hostshort) {
+ uint16_t res;
+ unsigned char *p = (unsigned char *)&res;
+ *p++ = (unsigned char)(hostshort >> 8);
+ *p = hostshort & 0xffu;
+ return res;
+}
+
+STIN uint32_t ngtcp2_ntohl(uint32_t netlong) {
+ uint32_t res;
+ unsigned char *p = (unsigned char *)&netlong;
+ res = (uint32_t)(*p++ << 24);
+ res += (uint32_t)(*p++ << 16);
+ res += (uint32_t)(*p++ << 8);
+ res += *p;
+ return res;
+}
+
+STIN uint16_t ngtcp2_ntohs(uint16_t netshort) {
+ uint16_t res;
+ unsigned char *p = (unsigned char *)&netshort;
+ res = (uint16_t)(*p++ << 8);
+ res += *p;
+ return res;
+}
+
+#else /* !WIN32 */
+
+# define ngtcp2_htonl htonl
+# define ngtcp2_htons htons
+# define ngtcp2_ntohl ntohl
+# define ngtcp2_ntohs ntohs
+
+#endif /* !WIN32 */
+
+#endif /* NGTCP2_NET_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.c
new file mode 100644
index 0000000..8b06cdd
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.c
@@ -0,0 +1,40 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2022 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_objalloc.h"
+
+void ngtcp2_objalloc_init(ngtcp2_objalloc *objalloc, size_t blklen,
+ const ngtcp2_mem *mem) {
+ ngtcp2_balloc_init(&objalloc->balloc, blklen, mem);
+ ngtcp2_opl_init(&objalloc->opl);
+}
+
+void ngtcp2_objalloc_free(ngtcp2_objalloc *objalloc) {
+ ngtcp2_balloc_free(&objalloc->balloc);
+}
+
+void ngtcp2_objalloc_clear(ngtcp2_objalloc *objalloc) {
+ ngtcp2_opl_clear(&objalloc->opl);
+ ngtcp2_balloc_clear(&objalloc->balloc);
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.h
new file mode 100644
index 0000000..f1bbd3a
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_objalloc.h
@@ -0,0 +1,140 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2022 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_OBJALLOC_H
+#define NGTCP2_OBJALLOC_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_balloc.h"
+#include "ngtcp2_opl.h"
+#include "ngtcp2_macro.h"
+#include "ngtcp2_mem.h"
+
+/*
+ * ngtcp2_objalloc combines ngtcp2_balloc and ngtcp2_opl, and provides
+ * an object pool with the custom allocator to reduce the allocation
+ * and deallocation overheads for small objects.
+ */
+typedef struct ngtcp2_objalloc {
+ ngtcp2_balloc balloc;
+ ngtcp2_opl opl;
+} ngtcp2_objalloc;
+
+/*
+ * ngtcp2_objalloc_init initializes |objalloc|. |blklen| is directly
+ * passed to ngtcp2_balloc_init.
+ */
+void ngtcp2_objalloc_init(ngtcp2_objalloc *objalloc, size_t blklen,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_objalloc_free releases all allocated resources.
+ */
+void ngtcp2_objalloc_free(ngtcp2_objalloc *objalloc);
+
+/*
+ * ngtcp2_objalloc_clear releases all allocated resources and
+ * initializes its state.
+ */
+void ngtcp2_objalloc_clear(ngtcp2_objalloc *objalloc);
+
+#ifndef NOMEMPOOL
+# define ngtcp2_objalloc_def(NAME, TYPE, OPLENTFIELD) \
+ inline static void ngtcp2_objalloc_##NAME##_init( \
+ ngtcp2_objalloc *objalloc, size_t nmemb, const ngtcp2_mem *mem) { \
+ ngtcp2_objalloc_init( \
+ objalloc, ((sizeof(TYPE) + 0xfu) & ~(uintptr_t)0xfu) * nmemb, mem); \
+ } \
+ \
+ inline static TYPE *ngtcp2_objalloc_##NAME##_get( \
+ ngtcp2_objalloc *objalloc) { \
+ ngtcp2_opl_entry *oplent = ngtcp2_opl_pop(&objalloc->opl); \
+ TYPE *obj; \
+ int rv; \
+ \
+ if (!oplent) { \
+ rv = \
+ ngtcp2_balloc_get(&objalloc->balloc, (void **)&obj, sizeof(TYPE)); \
+ if (rv != 0) { \
+ return NULL; \
+ } \
+ \
+ return obj; \
+ } \
+ \
+ return ngtcp2_struct_of(oplent, TYPE, OPLENTFIELD); \
+ } \
+ \
+ inline static TYPE *ngtcp2_objalloc_##NAME##_len_get( \
+ ngtcp2_objalloc *objalloc, size_t len) { \
+ ngtcp2_opl_entry *oplent = ngtcp2_opl_pop(&objalloc->opl); \
+ TYPE *obj; \
+ int rv; \
+ \
+ if (!oplent) { \
+ rv = ngtcp2_balloc_get(&objalloc->balloc, (void **)&obj, len); \
+ if (rv != 0) { \
+ return NULL; \
+ } \
+ \
+ return obj; \
+ } \
+ \
+ return ngtcp2_struct_of(oplent, TYPE, OPLENTFIELD); \
+ } \
+ \
+ inline static void ngtcp2_objalloc_##NAME##_release( \
+ ngtcp2_objalloc *objalloc, TYPE *obj) { \
+ ngtcp2_opl_push(&objalloc->opl, &obj->OPLENTFIELD); \
+ }
+#else /* NOMEMPOOL */
+# define ngtcp2_objalloc_def(NAME, TYPE, OPLENTFIELD) \
+ inline static void ngtcp2_objalloc_##NAME##_init( \
+ ngtcp2_objalloc *objalloc, size_t nmemb, const ngtcp2_mem *mem) { \
+ ngtcp2_objalloc_init( \
+ objalloc, ((sizeof(TYPE) + 0xfu) & ~(uintptr_t)0xfu) * nmemb, mem); \
+ } \
+ \
+ inline static TYPE *ngtcp2_objalloc_##NAME##_get( \
+ ngtcp2_objalloc *objalloc) { \
+ return ngtcp2_mem_malloc(objalloc->balloc.mem, sizeof(TYPE)); \
+ } \
+ \
+ inline static TYPE *ngtcp2_objalloc_##NAME##_len_get( \
+ ngtcp2_objalloc *objalloc, size_t len) { \
+ return ngtcp2_mem_malloc(objalloc->balloc.mem, len); \
+ } \
+ \
+ inline static void ngtcp2_objalloc_##NAME##_release( \
+ ngtcp2_objalloc *objalloc, TYPE *obj) { \
+ ngtcp2_mem_free(objalloc->balloc.mem, obj); \
+ }
+#endif /* NOMEMPOOL */
+
+#endif /* NGTCP2_OBJALLOC_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.c
new file mode 100644
index 0000000..a29361c
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.c
@@ -0,0 +1,46 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2022 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_opl.h"
+
+void ngtcp2_opl_init(ngtcp2_opl *opl) { opl->head = NULL; }
+
+void ngtcp2_opl_push(ngtcp2_opl *opl, ngtcp2_opl_entry *ent) {
+ ent->next = opl->head;
+ opl->head = ent;
+}
+
+ngtcp2_opl_entry *ngtcp2_opl_pop(ngtcp2_opl *opl) {
+ ngtcp2_opl_entry *ent = opl->head;
+
+ if (!ent) {
+ return NULL;
+ }
+
+ opl->head = ent->next;
+
+ return ent;
+}
+
+void ngtcp2_opl_clear(ngtcp2_opl *opl) { opl->head = NULL; }
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.h
new file mode 100644
index 0000000..714aa36
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_opl.h
@@ -0,0 +1,65 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2022 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_OPL_H
+#define NGTCP2_OPL_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+typedef struct ngtcp2_opl_entry ngtcp2_opl_entry;
+
+struct ngtcp2_opl_entry {
+ ngtcp2_opl_entry *next;
+};
+
+/*
+ * ngtcp2_opl is an object memory pool.
+ */
+typedef struct ngtcp2_opl {
+ ngtcp2_opl_entry *head;
+} ngtcp2_opl;
+
+/*
+ * ngtcp2_opl_init initializes |opl|.
+ */
+void ngtcp2_opl_init(ngtcp2_opl *opl);
+
+/*
+ * ngtcp2_opl_push inserts |ent| to |opl| head.
+ */
+void ngtcp2_opl_push(ngtcp2_opl *opl, ngtcp2_opl_entry *ent);
+
+/*
+ * ngtcp2_opl_pop removes the first ngtcp2_opl_entry from |opl| and
+ * returns it. If |opl| does not have any entry, it returns NULL.
+ */
+ngtcp2_opl_entry *ngtcp2_opl_pop(ngtcp2_opl *opl);
+
+void ngtcp2_opl_clear(ngtcp2_opl *opl);
+
+#endif /* NGTCP2_OPL_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.c
new file mode 100644
index 0000000..8323873
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.c
@@ -0,0 +1,77 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_path.h"
+
+#include <string.h>
+
+#include "ngtcp2_addr.h"
+
+void ngtcp2_path_init(ngtcp2_path *path, const ngtcp2_addr *local,
+ const ngtcp2_addr *remote) {
+ path->local = *local;
+ path->remote = *remote;
+}
+
+void ngtcp2_path_copy(ngtcp2_path *dest, const ngtcp2_path *src) {
+ ngtcp2_addr_copy(&dest->local, &src->local);
+ ngtcp2_addr_copy(&dest->remote, &src->remote);
+ dest->user_data = src->user_data;
+}
+
+int ngtcp2_path_eq(const ngtcp2_path *a, const ngtcp2_path *b) {
+ return ngtcp2_addr_eq(&a->local, &b->local) &&
+ ngtcp2_addr_eq(&a->remote, &b->remote);
+}
+
+void ngtcp2_path_storage_init(ngtcp2_path_storage *ps,
+ const ngtcp2_sockaddr *local_addr,
+ ngtcp2_socklen local_addrlen,
+ const ngtcp2_sockaddr *remote_addr,
+ ngtcp2_socklen remote_addrlen, void *user_data) {
+ ngtcp2_addr_init(&ps->path.local, (const ngtcp2_sockaddr *)&ps->local_addrbuf,
+ 0);
+ ngtcp2_addr_init(&ps->path.remote,
+ (const ngtcp2_sockaddr *)&ps->remote_addrbuf, 0);
+
+ ngtcp2_addr_copy_byte(&ps->path.local, local_addr, local_addrlen);
+ ngtcp2_addr_copy_byte(&ps->path.remote, remote_addr, remote_addrlen);
+
+ ps->path.user_data = user_data;
+}
+
+void ngtcp2_path_storage_init2(ngtcp2_path_storage *ps,
+ const ngtcp2_path *path) {
+ ngtcp2_path_storage_init(ps, path->local.addr, path->local.addrlen,
+ path->remote.addr, path->remote.addrlen,
+ path->user_data);
+}
+
+void ngtcp2_path_storage_zero(ngtcp2_path_storage *ps) {
+ ngtcp2_addr_init(&ps->path.local, (const ngtcp2_sockaddr *)&ps->local_addrbuf,
+ 0);
+ ngtcp2_addr_init(&ps->path.remote,
+ (const ngtcp2_sockaddr *)&ps->remote_addrbuf, 0);
+ ps->path.user_data = NULL;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.h
new file mode 100644
index 0000000..0c360e9
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_path.h
@@ -0,0 +1,49 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_PATH_H
+#define NGTCP2_PATH_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+/*
+ * ngtcp2_path_init initializes |path| with the given addresses. Note
+ * that the buffer pointed by local->addr and remote->addr are not
+ * copied. Their pointer values are assigned instead.
+ */
+void ngtcp2_path_init(ngtcp2_path *path, const ngtcp2_addr *local,
+ const ngtcp2_addr *remote);
+
+/*
+ * ngtcp2_path_storage_init2 initializes |ps| using |path| as initial
+ * data.
+ */
+void ngtcp2_path_storage_init2(ngtcp2_path_storage *ps,
+ const ngtcp2_path *path);
+
+#endif /* NGTCP2_PATH_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.c
new file mode 100644
index 0000000..7ca63b3
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.c
@@ -0,0 +1,2527 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_pkt.h"
+
+#include <assert.h>
+#include <string.h>
+
+#include "ngtcp2_conv.h"
+#include "ngtcp2_str.h"
+#include "ngtcp2_macro.h"
+#include "ngtcp2_cid.h"
+#include "ngtcp2_mem.h"
+#include "ngtcp2_vec.h"
+#include "ngtcp2_unreachable.h"
+#include "ngtcp2_str.h"
+
+int ngtcp2_pkt_chain_new(ngtcp2_pkt_chain **ppc, const ngtcp2_path *path,
+ const ngtcp2_pkt_info *pi, const uint8_t *pkt,
+ size_t pktlen, size_t dgramlen, ngtcp2_tstamp ts,
+ const ngtcp2_mem *mem) {
+ *ppc = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_pkt_chain) + pktlen);
+ if (*ppc == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ ngtcp2_path_storage_init2(&(*ppc)->path, path);
+ (*ppc)->pi = *pi;
+ (*ppc)->next = NULL;
+ (*ppc)->pkt = (uint8_t *)(*ppc) + sizeof(ngtcp2_pkt_chain);
+ (*ppc)->pktlen = pktlen;
+ (*ppc)->dgramlen = dgramlen;
+ (*ppc)->ts = ts;
+
+ memcpy((*ppc)->pkt, pkt, pktlen);
+
+ return 0;
+}
+
+void ngtcp2_pkt_chain_del(ngtcp2_pkt_chain *pc, const ngtcp2_mem *mem) {
+ ngtcp2_mem_free(mem, pc);
+}
+
+int ngtcp2_pkt_decode_version_cid(ngtcp2_version_cid *dest, const uint8_t *data,
+ size_t datalen, size_t short_dcidlen) {
+ size_t len;
+ uint32_t version;
+ size_t dcidlen, scidlen;
+ int supported_version;
+
+ assert(datalen);
+
+ if (data[0] & NGTCP2_HEADER_FORM_BIT) {
+ /* 1 byte (Header Form, Fixed Bit, Long Packet Type, Type-Specific bits)
+ * 4 bytes Version
+ * 1 byte DCID Length
+ * 1 byte SCID Length
+ */
+ len = 1 + 4 + 1 + 1;
+ if (datalen < len) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ dcidlen = data[5];
+ len += dcidlen;
+ if (datalen < len) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+ scidlen = data[5 + 1 + dcidlen];
+ len += scidlen;
+ if (datalen < len) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ ngtcp2_get_uint32(&version, &data[1]);
+
+ supported_version = ngtcp2_is_supported_version(version);
+
+ if (supported_version &&
+ (dcidlen > NGTCP2_MAX_CIDLEN || scidlen > NGTCP2_MAX_CIDLEN)) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ if (version && !supported_version &&
+ datalen < NGTCP2_MAX_UDP_PAYLOAD_SIZE) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ dest->version = version;
+ dest->dcid = &data[6];
+ dest->dcidlen = dcidlen;
+ dest->scid = &data[6 + dcidlen + 1];
+ dest->scidlen = scidlen;
+
+ if (!version) {
+ /* VN */
+ return 0;
+ }
+
+ if (!supported_version) {
+ return NGTCP2_ERR_VERSION_NEGOTIATION;
+ }
+ return 0;
+ }
+
+ assert(short_dcidlen <= NGTCP2_MAX_CIDLEN);
+
+ len = 1 + short_dcidlen;
+ if (datalen < len) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ dest->version = 0;
+ dest->dcid = &data[1];
+ dest->dcidlen = short_dcidlen;
+ dest->scid = NULL;
+ dest->scidlen = 0;
+
+ return 0;
+}
+
+void ngtcp2_pkt_hd_init(ngtcp2_pkt_hd *hd, uint8_t flags, uint8_t type,
+ const ngtcp2_cid *dcid, const ngtcp2_cid *scid,
+ int64_t pkt_num, size_t pkt_numlen, uint32_t version,
+ size_t len) {
+ hd->flags = flags;
+ hd->type = type;
+ if (dcid) {
+ hd->dcid = *dcid;
+ } else {
+ ngtcp2_cid_zero(&hd->dcid);
+ }
+ if (scid) {
+ hd->scid = *scid;
+ } else {
+ ngtcp2_cid_zero(&hd->scid);
+ }
+ hd->pkt_num = pkt_num;
+ hd->token = NULL;
+ hd->tokenlen = 0;
+ hd->pkt_numlen = pkt_numlen;
+ hd->version = version;
+ hd->len = len;
+}
+
+static int has_mask(uint8_t b, uint8_t mask) { return (b & mask) == mask; }
+
+ngtcp2_ssize ngtcp2_pkt_decode_hd_long(ngtcp2_pkt_hd *dest, const uint8_t *pkt,
+ size_t pktlen) {
+ uint8_t type;
+ uint32_t version;
+ size_t dcil, scil;
+ const uint8_t *p;
+ size_t len = 0;
+ size_t n;
+ size_t ntokenlen = 0;
+ const uint8_t *token = NULL;
+ size_t tokenlen = 0;
+ uint64_t vi;
+ uint8_t flags = NGTCP2_PKT_FLAG_LONG_FORM;
+
+ if (pktlen < 5) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ if (!(pkt[0] & NGTCP2_HEADER_FORM_BIT)) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ ngtcp2_get_uint32(&version, &pkt[1]);
+
+ if (version == 0) {
+ type = NGTCP2_PKT_VERSION_NEGOTIATION;
+ /* Version Negotiation is not a long header packet. */
+ flags = NGTCP2_PKT_FLAG_NONE;
+ /* This must be Version Negotiation packet which lacks packet
+ number and payload length fields. */
+ len = 5 + 2;
+ } else {
+ if (!(pkt[0] & NGTCP2_FIXED_BIT_MASK)) {
+ flags |= NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR;
+ }
+
+ type = ngtcp2_pkt_get_type_long(version, pkt[0]);
+ switch (type) {
+ case 0:
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ case NGTCP2_PKT_INITIAL:
+ len = 1 /* Token Length */ + NGTCP2_MIN_LONG_HEADERLEN -
+ 1; /* Cut packet number field */
+ break;
+ case NGTCP2_PKT_RETRY:
+ /* Retry packet does not have packet number and length fields */
+ len = 5 + 2;
+ break;
+ case NGTCP2_PKT_HANDSHAKE:
+ case NGTCP2_PKT_0RTT:
+ len = NGTCP2_MIN_LONG_HEADERLEN - 1; /* Cut packet number field */
+ break;
+ default:
+ /* Unreachable */
+ ngtcp2_unreachable();
+ }
+ }
+
+ if (pktlen < len) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ p = &pkt[5];
+ dcil = *p;
+ if (dcil > NGTCP2_MAX_CIDLEN) {
+ /* QUIC v1 implementation never expect to receive CID length more
+ than NGTCP2_MAX_CIDLEN. */
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+ len += dcil;
+
+ if (pktlen < len) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ p += 1 + dcil;
+ scil = *p;
+ if (scil > NGTCP2_MAX_CIDLEN) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+ len += scil;
+
+ if (pktlen < len) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ p += 1 + scil;
+
+ if (type == NGTCP2_PKT_INITIAL) {
+ /* Token Length */
+ ntokenlen = ngtcp2_get_uvarintlen(p);
+ len += ntokenlen - 1;
+
+ if (pktlen < len) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ p = ngtcp2_get_uvarint(&vi, p);
+ if (pktlen - len < vi) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+ tokenlen = (size_t)vi;
+ len += tokenlen;
+
+ if (tokenlen) {
+ token = p;
+ }
+
+ p += tokenlen;
+ }
+
+ switch (type) {
+ case NGTCP2_PKT_RETRY:
+ break;
+ default:
+ if (!(flags & NGTCP2_PKT_FLAG_LONG_FORM)) {
+ assert(type == NGTCP2_PKT_VERSION_NEGOTIATION);
+ /* Version Negotiation is not a long header packet. */
+ break;
+ }
+
+ /* Length */
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (pktlen < len) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+ }
+
+ dest->flags = flags;
+ dest->type = type;
+ dest->version = version;
+ dest->pkt_num = 0;
+ dest->pkt_numlen = 0;
+
+ p = &pkt[6];
+ ngtcp2_cid_init(&dest->dcid, p, dcil);
+ p += dcil + 1;
+ ngtcp2_cid_init(&dest->scid, p, scil);
+ p += scil;
+
+ dest->token = token;
+ dest->tokenlen = tokenlen;
+ p += ntokenlen + tokenlen;
+
+ switch (type) {
+ case NGTCP2_PKT_RETRY:
+ dest->len = 0;
+ break;
+ default:
+ if (!(flags & NGTCP2_PKT_FLAG_LONG_FORM)) {
+ assert(type == NGTCP2_PKT_VERSION_NEGOTIATION);
+ /* Version Negotiation is not a long header packet. */
+ dest->len = 0;
+ break;
+ }
+
+ p = ngtcp2_get_uvarint(&vi, p);
+ if (vi > SIZE_MAX) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+ dest->len = (size_t)vi;
+ }
+
+ assert((size_t)(p - pkt) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_hd_short(ngtcp2_pkt_hd *dest, const uint8_t *pkt,
+ size_t pktlen, size_t dcidlen) {
+ size_t len = 1 + dcidlen;
+ const uint8_t *p = pkt;
+ uint8_t flags = NGTCP2_PKT_FLAG_NONE;
+
+ assert(dcidlen <= NGTCP2_MAX_CIDLEN);
+
+ if (pktlen < len) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ if (pkt[0] & NGTCP2_HEADER_FORM_BIT) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ if (!(pkt[0] & NGTCP2_FIXED_BIT_MASK)) {
+ flags |= NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR;
+ }
+
+ p = &pkt[1];
+
+ dest->type = NGTCP2_PKT_1RTT;
+
+ ngtcp2_cid_init(&dest->dcid, p, dcidlen);
+ p += dcidlen;
+
+ /* Set 0 to SCID so that we don't accidentally reference it and gets
+ garbage. */
+ ngtcp2_cid_zero(&dest->scid);
+
+ dest->flags = flags;
+ dest->version = 0;
+ dest->len = 0;
+ dest->pkt_num = 0;
+ dest->pkt_numlen = 0;
+ dest->token = NULL;
+ dest->tokenlen = 0;
+
+ assert((size_t)(p - pkt) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_hd_long(uint8_t *out, size_t outlen,
+ const ngtcp2_pkt_hd *hd) {
+ uint8_t *p;
+ size_t len = NGTCP2_MIN_LONG_HEADERLEN + hd->dcid.datalen + hd->scid.datalen -
+ 2; /* NGTCP2_MIN_LONG_HEADERLEN includes 1 byte for
+ len and 1 byte for packet number. */
+
+ if (hd->type != NGTCP2_PKT_RETRY) {
+ len += NGTCP2_PKT_LENGTHLEN /* Length */ + hd->pkt_numlen;
+ }
+
+ if (hd->type == NGTCP2_PKT_INITIAL) {
+ len += ngtcp2_put_uvarintlen(hd->tokenlen) + hd->tokenlen;
+ }
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p = (uint8_t)(NGTCP2_HEADER_FORM_BIT |
+ (ngtcp2_pkt_versioned_type(hd->version, hd->type) << 4) |
+ (uint8_t)(hd->pkt_numlen - 1));
+ if (!(hd->flags & NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR)) {
+ *p |= NGTCP2_FIXED_BIT_MASK;
+ }
+
+ ++p;
+
+ p = ngtcp2_put_uint32be(p, hd->version);
+ *p++ = (uint8_t)hd->dcid.datalen;
+ if (hd->dcid.datalen) {
+ p = ngtcp2_cpymem(p, hd->dcid.data, hd->dcid.datalen);
+ }
+ *p++ = (uint8_t)hd->scid.datalen;
+ if (hd->scid.datalen) {
+ p = ngtcp2_cpymem(p, hd->scid.data, hd->scid.datalen);
+ }
+
+ if (hd->type == NGTCP2_PKT_INITIAL) {
+ p = ngtcp2_put_uvarint(p, hd->tokenlen);
+ if (hd->tokenlen) {
+ p = ngtcp2_cpymem(p, hd->token, hd->tokenlen);
+ }
+ }
+
+ if (hd->type != NGTCP2_PKT_RETRY) {
+ p = ngtcp2_put_uvarint30(p, (uint32_t)hd->len);
+ p = ngtcp2_put_pkt_num(p, hd->pkt_num, hd->pkt_numlen);
+ }
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_hd_short(uint8_t *out, size_t outlen,
+ const ngtcp2_pkt_hd *hd) {
+ uint8_t *p;
+ size_t len = 1 + hd->dcid.datalen + hd->pkt_numlen;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p = (uint8_t)(hd->pkt_numlen - 1);
+ if (!(hd->flags & NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR)) {
+ *p |= NGTCP2_FIXED_BIT_MASK;
+ }
+ if (hd->flags & NGTCP2_PKT_FLAG_KEY_PHASE) {
+ *p |= NGTCP2_SHORT_KEY_PHASE_BIT;
+ }
+
+ ++p;
+
+ if (hd->dcid.datalen) {
+ p = ngtcp2_cpymem(p, hd->dcid.data, hd->dcid.datalen);
+ }
+
+ p = ngtcp2_put_pkt_num(p, hd->pkt_num, hd->pkt_numlen);
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_frame(ngtcp2_frame *dest, const uint8_t *payload,
+ size_t payloadlen) {
+ uint8_t type;
+
+ if (payloadlen == 0) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ type = payload[0];
+
+ switch (type) {
+ case NGTCP2_FRAME_PADDING:
+ return ngtcp2_pkt_decode_padding_frame(&dest->padding, payload, payloadlen);
+ case NGTCP2_FRAME_RESET_STREAM:
+ return ngtcp2_pkt_decode_reset_stream_frame(&dest->reset_stream, payload,
+ payloadlen);
+ case NGTCP2_FRAME_CONNECTION_CLOSE:
+ case NGTCP2_FRAME_CONNECTION_CLOSE_APP:
+ return ngtcp2_pkt_decode_connection_close_frame(&dest->connection_close,
+ payload, payloadlen);
+ case NGTCP2_FRAME_MAX_DATA:
+ return ngtcp2_pkt_decode_max_data_frame(&dest->max_data, payload,
+ payloadlen);
+ case NGTCP2_FRAME_MAX_STREAM_DATA:
+ return ngtcp2_pkt_decode_max_stream_data_frame(&dest->max_stream_data,
+ payload, payloadlen);
+ case NGTCP2_FRAME_MAX_STREAMS_BIDI:
+ case NGTCP2_FRAME_MAX_STREAMS_UNI:
+ return ngtcp2_pkt_decode_max_streams_frame(&dest->max_streams, payload,
+ payloadlen);
+ case NGTCP2_FRAME_PING:
+ return ngtcp2_pkt_decode_ping_frame(&dest->ping, payload, payloadlen);
+ case NGTCP2_FRAME_DATA_BLOCKED:
+ return ngtcp2_pkt_decode_data_blocked_frame(&dest->data_blocked, payload,
+ payloadlen);
+ case NGTCP2_FRAME_STREAM_DATA_BLOCKED:
+ return ngtcp2_pkt_decode_stream_data_blocked_frame(
+ &dest->stream_data_blocked, payload, payloadlen);
+ case NGTCP2_FRAME_STREAMS_BLOCKED_BIDI:
+ case NGTCP2_FRAME_STREAMS_BLOCKED_UNI:
+ return ngtcp2_pkt_decode_streams_blocked_frame(&dest->streams_blocked,
+ payload, payloadlen);
+ case NGTCP2_FRAME_NEW_CONNECTION_ID:
+ return ngtcp2_pkt_decode_new_connection_id_frame(&dest->new_connection_id,
+ payload, payloadlen);
+ case NGTCP2_FRAME_STOP_SENDING:
+ return ngtcp2_pkt_decode_stop_sending_frame(&dest->stop_sending, payload,
+ payloadlen);
+ case NGTCP2_FRAME_ACK:
+ case NGTCP2_FRAME_ACK_ECN:
+ return ngtcp2_pkt_decode_ack_frame(&dest->ack, payload, payloadlen);
+ case NGTCP2_FRAME_PATH_CHALLENGE:
+ return ngtcp2_pkt_decode_path_challenge_frame(&dest->path_challenge,
+ payload, payloadlen);
+ case NGTCP2_FRAME_PATH_RESPONSE:
+ return ngtcp2_pkt_decode_path_response_frame(&dest->path_response, payload,
+ payloadlen);
+ case NGTCP2_FRAME_CRYPTO:
+ return ngtcp2_pkt_decode_crypto_frame(&dest->crypto, payload, payloadlen);
+ case NGTCP2_FRAME_NEW_TOKEN:
+ return ngtcp2_pkt_decode_new_token_frame(&dest->new_token, payload,
+ payloadlen);
+ case NGTCP2_FRAME_RETIRE_CONNECTION_ID:
+ return ngtcp2_pkt_decode_retire_connection_id_frame(
+ &dest->retire_connection_id, payload, payloadlen);
+ case NGTCP2_FRAME_HANDSHAKE_DONE:
+ return ngtcp2_pkt_decode_handshake_done_frame(&dest->handshake_done,
+ payload, payloadlen);
+ case NGTCP2_FRAME_DATAGRAM:
+ case NGTCP2_FRAME_DATAGRAM_LEN:
+ return ngtcp2_pkt_decode_datagram_frame(&dest->datagram, payload,
+ payloadlen);
+ default:
+ if (has_mask(type, NGTCP2_FRAME_STREAM)) {
+ return ngtcp2_pkt_decode_stream_frame(&dest->stream, payload, payloadlen);
+ }
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_stream_frame(ngtcp2_stream *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ uint8_t type;
+ size_t len = 1 + 1;
+ const uint8_t *p;
+ size_t datalen;
+ size_t ndatalen = 0;
+ size_t n;
+ uint64_t vi;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ type = payload[0];
+
+ p = payload + 1;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p += n;
+
+ if (type & NGTCP2_STREAM_OFF_BIT) {
+ ++len;
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p += n;
+ }
+
+ if (type & NGTCP2_STREAM_LEN_BIT) {
+ ++len;
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ ndatalen = ngtcp2_get_uvarintlen(p);
+ len += ndatalen - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ /* p = */ ngtcp2_get_uvarint(&vi, p);
+ if (payloadlen - len < vi) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+ datalen = (size_t)vi;
+ len += datalen;
+ } else {
+ len = payloadlen;
+ }
+
+ p = payload + 1;
+
+ dest->type = NGTCP2_FRAME_STREAM;
+ dest->flags = (uint8_t)(type & ~NGTCP2_FRAME_STREAM);
+ dest->fin = (type & NGTCP2_STREAM_FIN_BIT) != 0;
+ p = ngtcp2_get_varint(&dest->stream_id, p);
+
+ if (type & NGTCP2_STREAM_OFF_BIT) {
+ p = ngtcp2_get_uvarint(&dest->offset, p);
+ } else {
+ dest->offset = 0;
+ }
+
+ if (type & NGTCP2_STREAM_LEN_BIT) {
+ p += ndatalen;
+ } else {
+ datalen = payloadlen - (size_t)(p - payload);
+ }
+
+ if (datalen) {
+ dest->data[0].len = datalen;
+ dest->data[0].base = (uint8_t *)p;
+ dest->datacnt = 1;
+ p += datalen;
+ } else {
+ dest->datacnt = 0;
+ }
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_ack_frame(ngtcp2_ack *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ size_t rangecnt, max_rangecnt;
+ size_t nrangecnt;
+ size_t len = 1 + 1 + 1 + 1 + 1;
+ const uint8_t *p;
+ size_t i, j;
+ ngtcp2_ack_range *range;
+ size_t n;
+ uint8_t type;
+ uint64_t vi;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ type = payload[0];
+
+ p = payload + 1;
+
+ /* Largest Acknowledged */
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p += n;
+
+ /* ACK Delay */
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p += n;
+
+ /* ACK Range Count */
+ nrangecnt = ngtcp2_get_uvarintlen(p);
+ len += nrangecnt - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = ngtcp2_get_uvarint(&vi, p);
+ if (vi > SIZE_MAX / (1 + 1) || payloadlen - len < vi * (1 + 1)) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ rangecnt = (size_t)vi;
+ len += rangecnt * (1 + 1);
+
+ /* First ACK Range */
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p += n;
+
+ for (i = 0; i < rangecnt; ++i) {
+ /* Gap, and Additional ACK Range */
+ for (j = 0; j < 2; ++j) {
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p += n;
+ }
+ }
+
+ if (type == NGTCP2_FRAME_ACK_ECN) {
+ len += 3;
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ for (i = 0; i < 3; ++i) {
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p += n;
+ }
+ }
+
+ /* TODO We might not decode all ranges. It could be very large. */
+ max_rangecnt = ngtcp2_min(NGTCP2_MAX_ACK_RANGES, rangecnt);
+
+ p = payload + 1;
+
+ dest->type = type;
+ p = ngtcp2_get_varint(&dest->largest_ack, p);
+ p = ngtcp2_get_uvarint(&dest->ack_delay, p);
+ /* This value will be assigned in the upper layer. */
+ dest->ack_delay_unscaled = 0;
+ dest->rangecnt = max_rangecnt;
+ p += nrangecnt;
+ p = ngtcp2_get_uvarint(&dest->first_ack_range, p);
+
+ for (i = 0; i < max_rangecnt; ++i) {
+ range = &dest->ranges[i];
+ p = ngtcp2_get_uvarint(&range->gap, p);
+ p = ngtcp2_get_uvarint(&range->len, p);
+ }
+ for (i = max_rangecnt; i < rangecnt; ++i) {
+ p += ngtcp2_get_uvarintlen(p);
+ p += ngtcp2_get_uvarintlen(p);
+ }
+
+ if (type == NGTCP2_FRAME_ACK_ECN) {
+ p = ngtcp2_get_uvarint(&dest->ecn.ect0, p);
+ p = ngtcp2_get_uvarint(&dest->ecn.ect1, p);
+ p = ngtcp2_get_uvarint(&dest->ecn.ce, p);
+ }
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_padding_frame(ngtcp2_padding *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ const uint8_t *p, *ep;
+
+ assert(payloadlen > 0);
+
+ p = payload + 1;
+ ep = payload + payloadlen;
+
+ for (; p != ep && *p == NGTCP2_FRAME_PADDING; ++p)
+ ;
+
+ dest->type = NGTCP2_FRAME_PADDING;
+ dest->len = (size_t)(p - payload);
+
+ return (ngtcp2_ssize)dest->len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_reset_stream_frame(ngtcp2_reset_stream *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ size_t len = 1 + 1 + 1 + 1;
+ const uint8_t *p;
+ size_t n;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+ p += n;
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+ p += n;
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ dest->type = NGTCP2_FRAME_RESET_STREAM;
+ p = ngtcp2_get_varint(&dest->stream_id, p);
+ p = ngtcp2_get_uvarint(&dest->app_error_code, p);
+ p = ngtcp2_get_uvarint(&dest->final_size, p);
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_connection_close_frame(
+ ngtcp2_connection_close *dest, const uint8_t *payload, size_t payloadlen) {
+ size_t len = 1 + 1 + 1;
+ const uint8_t *p;
+ size_t reasonlen;
+ size_t nreasonlen;
+ size_t n;
+ uint8_t type;
+ uint64_t vi;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ type = payload[0];
+
+ p = payload + 1;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p += n;
+
+ if (type == NGTCP2_FRAME_CONNECTION_CLOSE) {
+ ++len;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p += n;
+ }
+
+ nreasonlen = ngtcp2_get_uvarintlen(p);
+ len += nreasonlen - 1;
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = ngtcp2_get_uvarint(&vi, p);
+ if (payloadlen - len < vi) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+ reasonlen = (size_t)vi;
+ len += reasonlen;
+
+ p = payload + 1;
+
+ dest->type = type;
+ p = ngtcp2_get_uvarint(&dest->error_code, p);
+ if (type == NGTCP2_FRAME_CONNECTION_CLOSE) {
+ p = ngtcp2_get_uvarint(&dest->frame_type, p);
+ } else {
+ dest->frame_type = 0;
+ }
+ dest->reasonlen = reasonlen;
+ p += nreasonlen;
+ if (reasonlen == 0) {
+ dest->reason = NULL;
+ } else {
+ dest->reason = (uint8_t *)p;
+ p += reasonlen;
+ }
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_max_data_frame(ngtcp2_max_data *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ size_t len = 1 + 1;
+ const uint8_t *p;
+ size_t n;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ dest->type = NGTCP2_FRAME_MAX_DATA;
+ p = ngtcp2_get_uvarint(&dest->max_data, p);
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_max_stream_data_frame(
+ ngtcp2_max_stream_data *dest, const uint8_t *payload, size_t payloadlen) {
+ size_t len = 1 + 1 + 1;
+ const uint8_t *p;
+ size_t n;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p += n;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ dest->type = NGTCP2_FRAME_MAX_STREAM_DATA;
+ p = ngtcp2_get_varint(&dest->stream_id, p);
+ p = ngtcp2_get_uvarint(&dest->max_stream_data, p);
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_max_streams_frame(ngtcp2_max_streams *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ size_t len = 1 + 1;
+ const uint8_t *p;
+ size_t n;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ dest->type = payload[0];
+ p = ngtcp2_get_uvarint(&dest->max_streams, p);
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_ping_frame(ngtcp2_ping *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ (void)payload;
+ (void)payloadlen;
+
+ dest->type = NGTCP2_FRAME_PING;
+ return 1;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_data_blocked_frame(ngtcp2_data_blocked *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ size_t len = 1 + 1;
+ const uint8_t *p;
+ size_t n;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ dest->type = NGTCP2_FRAME_DATA_BLOCKED;
+ p = ngtcp2_get_uvarint(&dest->offset, p);
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize
+ngtcp2_pkt_decode_stream_data_blocked_frame(ngtcp2_stream_data_blocked *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ size_t len = 1 + 1 + 1;
+ const uint8_t *p;
+ size_t n;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p += n;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ dest->type = NGTCP2_FRAME_STREAM_DATA_BLOCKED;
+ p = ngtcp2_get_varint(&dest->stream_id, p);
+ p = ngtcp2_get_uvarint(&dest->offset, p);
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_streams_blocked_frame(
+ ngtcp2_streams_blocked *dest, const uint8_t *payload, size_t payloadlen) {
+ size_t len = 1 + 1;
+ const uint8_t *p;
+ size_t n;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ dest->type = payload[0];
+ p = ngtcp2_get_uvarint(&dest->max_streams, p);
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_new_connection_id_frame(
+ ngtcp2_new_connection_id *dest, const uint8_t *payload, size_t payloadlen) {
+ size_t len = 1 + 1 + 1 + 1 + 16;
+ const uint8_t *p;
+ size_t n;
+ size_t cil;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p += n;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p += n;
+
+ cil = *p;
+ if (cil < NGTCP2_MIN_CIDLEN || cil > NGTCP2_MAX_CIDLEN) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ len += cil;
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ dest->type = NGTCP2_FRAME_NEW_CONNECTION_ID;
+ p = ngtcp2_get_uvarint(&dest->seq, p);
+ p = ngtcp2_get_uvarint(&dest->retire_prior_to, p);
+ ++p;
+ ngtcp2_cid_init(&dest->cid, p, cil);
+ p += cil;
+ p = ngtcp2_get_bytes(dest->stateless_reset_token, p,
+ NGTCP2_STATELESS_RESET_TOKENLEN);
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_stop_sending_frame(ngtcp2_stop_sending *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ size_t len = 1 + 1 + 1;
+ const uint8_t *p;
+ size_t n;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+ p += n;
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ dest->type = NGTCP2_FRAME_STOP_SENDING;
+ p = ngtcp2_get_varint(&dest->stream_id, p);
+ p = ngtcp2_get_uvarint(&dest->app_error_code, p);
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_path_challenge_frame(ngtcp2_path_challenge *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ size_t len = 1 + 8;
+ const uint8_t *p;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ dest->type = NGTCP2_FRAME_PATH_CHALLENGE;
+ ngtcp2_cpymem(dest->data, p, sizeof(dest->data));
+ p += sizeof(dest->data);
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_path_response_frame(ngtcp2_path_response *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ size_t len = 1 + 8;
+ const uint8_t *p;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ dest->type = NGTCP2_FRAME_PATH_RESPONSE;
+ ngtcp2_cpymem(dest->data, p, sizeof(dest->data));
+ p += sizeof(dest->data);
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_crypto_frame(ngtcp2_crypto *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ size_t len = 1 + 1 + 1;
+ const uint8_t *p;
+ size_t datalen;
+ size_t ndatalen;
+ size_t n;
+ uint64_t vi;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p += n;
+
+ ndatalen = ngtcp2_get_uvarintlen(p);
+ len += ndatalen - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = ngtcp2_get_uvarint(&vi, p);
+ if (payloadlen - len < vi) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ datalen = (size_t)vi;
+ len += datalen;
+
+ p = payload + 1;
+
+ dest->type = NGTCP2_FRAME_CRYPTO;
+ p = ngtcp2_get_uvarint(&dest->offset, p);
+ dest->data[0].len = datalen;
+ p += ndatalen;
+ if (dest->data[0].len) {
+ dest->data[0].base = (uint8_t *)p;
+ p += dest->data[0].len;
+ dest->datacnt = 1;
+ } else {
+ dest->data[0].base = NULL;
+ dest->datacnt = 0;
+ }
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_new_token_frame(ngtcp2_new_token *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ size_t len = 1 + 1;
+ const uint8_t *p;
+ size_t n;
+ size_t datalen;
+ uint64_t vi;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = ngtcp2_get_uvarint(&vi, p);
+ if (payloadlen - len < vi) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+ datalen = (size_t)vi;
+ len += datalen;
+
+ dest->type = NGTCP2_FRAME_NEW_TOKEN;
+ dest->tokenlen = datalen;
+ dest->token = (uint8_t *)p;
+ p += dest->tokenlen;
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize
+ngtcp2_pkt_decode_retire_connection_id_frame(ngtcp2_retire_connection_id *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ size_t len = 1 + 1;
+ const uint8_t *p;
+ size_t n;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = payload + 1;
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ dest->type = NGTCP2_FRAME_RETIRE_CONNECTION_ID;
+ p = ngtcp2_get_uvarint(&dest->seq, p);
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_handshake_done_frame(ngtcp2_handshake_done *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ (void)payload;
+ (void)payloadlen;
+
+ dest->type = NGTCP2_FRAME_HANDSHAKE_DONE;
+ return 1;
+}
+
+ngtcp2_ssize ngtcp2_pkt_decode_datagram_frame(ngtcp2_datagram *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ size_t len = 1;
+ const uint8_t *p;
+ uint8_t type;
+ size_t datalen;
+ size_t n;
+ uint64_t vi;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ type = payload[0];
+
+ p = payload + 1;
+
+ switch (type) {
+ case NGTCP2_FRAME_DATAGRAM:
+ datalen = payloadlen - 1;
+ len = payloadlen;
+ break;
+ case NGTCP2_FRAME_DATAGRAM_LEN:
+ ++len;
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ n = ngtcp2_get_uvarintlen(p);
+ len += n - 1;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ p = ngtcp2_get_uvarint(&vi, p);
+ if (payloadlen - len < vi) {
+ return NGTCP2_ERR_FRAME_ENCODING;
+ }
+
+ datalen = (size_t)vi;
+ len += datalen;
+ break;
+ default:
+ ngtcp2_unreachable();
+ }
+
+ dest->type = type;
+
+ if (datalen == 0) {
+ dest->datacnt = 0;
+ dest->data = NULL;
+ } else {
+ dest->datacnt = 1;
+ dest->data = dest->rdata;
+ dest->rdata[0].len = datalen;
+
+ dest->rdata[0].base = (uint8_t *)p;
+ p += datalen;
+ }
+
+ assert((size_t)(p - payload) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_frame(uint8_t *out, size_t outlen,
+ ngtcp2_frame *fr) {
+ switch (fr->type) {
+ case NGTCP2_FRAME_STREAM:
+ return ngtcp2_pkt_encode_stream_frame(out, outlen, &fr->stream);
+ case NGTCP2_FRAME_ACK:
+ case NGTCP2_FRAME_ACK_ECN:
+ return ngtcp2_pkt_encode_ack_frame(out, outlen, &fr->ack);
+ case NGTCP2_FRAME_PADDING:
+ return ngtcp2_pkt_encode_padding_frame(out, outlen, &fr->padding);
+ case NGTCP2_FRAME_RESET_STREAM:
+ return ngtcp2_pkt_encode_reset_stream_frame(out, outlen, &fr->reset_stream);
+ case NGTCP2_FRAME_CONNECTION_CLOSE:
+ case NGTCP2_FRAME_CONNECTION_CLOSE_APP:
+ return ngtcp2_pkt_encode_connection_close_frame(out, outlen,
+ &fr->connection_close);
+ case NGTCP2_FRAME_MAX_DATA:
+ return ngtcp2_pkt_encode_max_data_frame(out, outlen, &fr->max_data);
+ case NGTCP2_FRAME_MAX_STREAM_DATA:
+ return ngtcp2_pkt_encode_max_stream_data_frame(out, outlen,
+ &fr->max_stream_data);
+ case NGTCP2_FRAME_MAX_STREAMS_BIDI:
+ case NGTCP2_FRAME_MAX_STREAMS_UNI:
+ return ngtcp2_pkt_encode_max_streams_frame(out, outlen, &fr->max_streams);
+ case NGTCP2_FRAME_PING:
+ return ngtcp2_pkt_encode_ping_frame(out, outlen, &fr->ping);
+ case NGTCP2_FRAME_DATA_BLOCKED:
+ return ngtcp2_pkt_encode_data_blocked_frame(out, outlen, &fr->data_blocked);
+ case NGTCP2_FRAME_STREAM_DATA_BLOCKED:
+ return ngtcp2_pkt_encode_stream_data_blocked_frame(
+ out, outlen, &fr->stream_data_blocked);
+ case NGTCP2_FRAME_STREAMS_BLOCKED_BIDI:
+ case NGTCP2_FRAME_STREAMS_BLOCKED_UNI:
+ return ngtcp2_pkt_encode_streams_blocked_frame(out, outlen,
+ &fr->streams_blocked);
+ case NGTCP2_FRAME_NEW_CONNECTION_ID:
+ return ngtcp2_pkt_encode_new_connection_id_frame(out, outlen,
+ &fr->new_connection_id);
+ case NGTCP2_FRAME_STOP_SENDING:
+ return ngtcp2_pkt_encode_stop_sending_frame(out, outlen, &fr->stop_sending);
+ case NGTCP2_FRAME_PATH_CHALLENGE:
+ return ngtcp2_pkt_encode_path_challenge_frame(out, outlen,
+ &fr->path_challenge);
+ case NGTCP2_FRAME_PATH_RESPONSE:
+ return ngtcp2_pkt_encode_path_response_frame(out, outlen,
+ &fr->path_response);
+ case NGTCP2_FRAME_CRYPTO:
+ return ngtcp2_pkt_encode_crypto_frame(out, outlen, &fr->crypto);
+ case NGTCP2_FRAME_NEW_TOKEN:
+ return ngtcp2_pkt_encode_new_token_frame(out, outlen, &fr->new_token);
+ case NGTCP2_FRAME_RETIRE_CONNECTION_ID:
+ return ngtcp2_pkt_encode_retire_connection_id_frame(
+ out, outlen, &fr->retire_connection_id);
+ case NGTCP2_FRAME_HANDSHAKE_DONE:
+ return ngtcp2_pkt_encode_handshake_done_frame(out, outlen,
+ &fr->handshake_done);
+ case NGTCP2_FRAME_DATAGRAM:
+ case NGTCP2_FRAME_DATAGRAM_LEN:
+ return ngtcp2_pkt_encode_datagram_frame(out, outlen, &fr->datagram);
+ default:
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_stream_frame(uint8_t *out, size_t outlen,
+ ngtcp2_stream *fr) {
+ size_t len = 1;
+ uint8_t flags = NGTCP2_STREAM_LEN_BIT;
+ uint8_t *p;
+ size_t i;
+ size_t datalen = 0;
+
+ if (fr->fin) {
+ flags |= NGTCP2_STREAM_FIN_BIT;
+ }
+
+ if (fr->offset) {
+ flags |= NGTCP2_STREAM_OFF_BIT;
+ len += ngtcp2_put_uvarintlen(fr->offset);
+ }
+
+ len += ngtcp2_put_uvarintlen((uint64_t)fr->stream_id);
+
+ for (i = 0; i < fr->datacnt; ++i) {
+ datalen += fr->data[i].len;
+ }
+
+ len += ngtcp2_put_uvarintlen(datalen);
+ len += datalen;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = flags | NGTCP2_FRAME_STREAM;
+
+ fr->flags = flags;
+
+ p = ngtcp2_put_uvarint(p, (uint64_t)fr->stream_id);
+
+ if (fr->offset) {
+ p = ngtcp2_put_uvarint(p, fr->offset);
+ }
+
+ p = ngtcp2_put_uvarint(p, datalen);
+
+ for (i = 0; i < fr->datacnt; ++i) {
+ assert(fr->data[i].len);
+ assert(fr->data[i].base);
+ p = ngtcp2_cpymem(p, fr->data[i].base, fr->data[i].len);
+ }
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_ack_frame(uint8_t *out, size_t outlen,
+ ngtcp2_ack *fr) {
+ size_t len = 1 + ngtcp2_put_uvarintlen((uint64_t)fr->largest_ack) +
+ ngtcp2_put_uvarintlen(fr->ack_delay) +
+ ngtcp2_put_uvarintlen(fr->rangecnt) +
+ ngtcp2_put_uvarintlen(fr->first_ack_range);
+ uint8_t *p;
+ size_t i;
+ const ngtcp2_ack_range *range;
+
+ for (i = 0; i < fr->rangecnt; ++i) {
+ range = &fr->ranges[i];
+ len += ngtcp2_put_uvarintlen(range->gap);
+ len += ngtcp2_put_uvarintlen(range->len);
+ }
+
+ if (fr->type == NGTCP2_FRAME_ACK_ECN) {
+ len += ngtcp2_put_uvarintlen(fr->ecn.ect0) +
+ ngtcp2_put_uvarintlen(fr->ecn.ect1) +
+ ngtcp2_put_uvarintlen(fr->ecn.ce);
+ }
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = fr->type;
+ p = ngtcp2_put_uvarint(p, (uint64_t)fr->largest_ack);
+ p = ngtcp2_put_uvarint(p, fr->ack_delay);
+ p = ngtcp2_put_uvarint(p, fr->rangecnt);
+ p = ngtcp2_put_uvarint(p, fr->first_ack_range);
+
+ for (i = 0; i < fr->rangecnt; ++i) {
+ range = &fr->ranges[i];
+ p = ngtcp2_put_uvarint(p, range->gap);
+ p = ngtcp2_put_uvarint(p, range->len);
+ }
+
+ if (fr->type == NGTCP2_FRAME_ACK_ECN) {
+ p = ngtcp2_put_uvarint(p, fr->ecn.ect0);
+ p = ngtcp2_put_uvarint(p, fr->ecn.ect1);
+ p = ngtcp2_put_uvarint(p, fr->ecn.ce);
+ }
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_padding_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_padding *fr) {
+ if (outlen < fr->len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ memset(out, 0, fr->len);
+
+ return (ngtcp2_ssize)fr->len;
+}
+
+ngtcp2_ssize
+ngtcp2_pkt_encode_reset_stream_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_reset_stream *fr) {
+ size_t len = 1 + ngtcp2_put_uvarintlen((uint64_t)fr->stream_id) +
+ ngtcp2_put_uvarintlen(fr->app_error_code) +
+ ngtcp2_put_uvarintlen(fr->final_size);
+ uint8_t *p;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = NGTCP2_FRAME_RESET_STREAM;
+ p = ngtcp2_put_uvarint(p, (uint64_t)fr->stream_id);
+ p = ngtcp2_put_uvarint(p, fr->app_error_code);
+ p = ngtcp2_put_uvarint(p, fr->final_size);
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize
+ngtcp2_pkt_encode_connection_close_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_connection_close *fr) {
+ size_t len = 1 + ngtcp2_put_uvarintlen(fr->error_code) +
+ (fr->type == NGTCP2_FRAME_CONNECTION_CLOSE
+ ? ngtcp2_put_uvarintlen(fr->frame_type)
+ : 0) +
+ ngtcp2_put_uvarintlen(fr->reasonlen) + fr->reasonlen;
+ uint8_t *p;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = fr->type;
+ p = ngtcp2_put_uvarint(p, fr->error_code);
+ if (fr->type == NGTCP2_FRAME_CONNECTION_CLOSE) {
+ p = ngtcp2_put_uvarint(p, fr->frame_type);
+ }
+ p = ngtcp2_put_uvarint(p, fr->reasonlen);
+ if (fr->reasonlen) {
+ p = ngtcp2_cpymem(p, fr->reason, fr->reasonlen);
+ }
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_max_data_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_max_data *fr) {
+ size_t len = 1 + ngtcp2_put_uvarintlen(fr->max_data);
+ uint8_t *p;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = NGTCP2_FRAME_MAX_DATA;
+ p = ngtcp2_put_uvarint(p, fr->max_data);
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize
+ngtcp2_pkt_encode_max_stream_data_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_max_stream_data *fr) {
+ size_t len = 1 + ngtcp2_put_uvarintlen((uint64_t)fr->stream_id) +
+ ngtcp2_put_uvarintlen(fr->max_stream_data);
+ uint8_t *p;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = NGTCP2_FRAME_MAX_STREAM_DATA;
+ p = ngtcp2_put_uvarint(p, (uint64_t)fr->stream_id);
+ p = ngtcp2_put_uvarint(p, fr->max_stream_data);
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_max_streams_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_max_streams *fr) {
+ size_t len = 1 + ngtcp2_put_uvarintlen(fr->max_streams);
+ uint8_t *p;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = fr->type;
+ p = ngtcp2_put_uvarint(p, fr->max_streams);
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_ping_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_ping *fr) {
+ (void)fr;
+
+ if (outlen < 1) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ *out++ = NGTCP2_FRAME_PING;
+
+ return 1;
+}
+
+ngtcp2_ssize
+ngtcp2_pkt_encode_data_blocked_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_data_blocked *fr) {
+ size_t len = 1 + ngtcp2_put_uvarintlen(fr->offset);
+ uint8_t *p;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = NGTCP2_FRAME_DATA_BLOCKED;
+ p = ngtcp2_put_uvarint(p, fr->offset);
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_stream_data_blocked_frame(
+ uint8_t *out, size_t outlen, const ngtcp2_stream_data_blocked *fr) {
+ size_t len = 1 + ngtcp2_put_uvarintlen((uint64_t)fr->stream_id) +
+ ngtcp2_put_uvarintlen(fr->offset);
+ uint8_t *p;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = NGTCP2_FRAME_STREAM_DATA_BLOCKED;
+ p = ngtcp2_put_uvarint(p, (uint64_t)fr->stream_id);
+ p = ngtcp2_put_uvarint(p, fr->offset);
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize
+ngtcp2_pkt_encode_streams_blocked_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_streams_blocked *fr) {
+ size_t len = 1 + ngtcp2_put_uvarintlen(fr->max_streams);
+ uint8_t *p;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = fr->type;
+ p = ngtcp2_put_uvarint(p, fr->max_streams);
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize
+ngtcp2_pkt_encode_new_connection_id_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_new_connection_id *fr) {
+ size_t len = 1 + ngtcp2_put_uvarintlen(fr->seq) +
+ ngtcp2_put_uvarintlen(fr->retire_prior_to) + 1 +
+ fr->cid.datalen + NGTCP2_STATELESS_RESET_TOKENLEN;
+ uint8_t *p;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = NGTCP2_FRAME_NEW_CONNECTION_ID;
+ p = ngtcp2_put_uvarint(p, fr->seq);
+ p = ngtcp2_put_uvarint(p, fr->retire_prior_to);
+ *p++ = (uint8_t)fr->cid.datalen;
+ p = ngtcp2_cpymem(p, fr->cid.data, fr->cid.datalen);
+ p = ngtcp2_cpymem(p, fr->stateless_reset_token,
+ NGTCP2_STATELESS_RESET_TOKENLEN);
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize
+ngtcp2_pkt_encode_stop_sending_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_stop_sending *fr) {
+ size_t len = 1 + ngtcp2_put_uvarintlen((uint64_t)fr->stream_id) +
+ ngtcp2_put_uvarintlen(fr->app_error_code);
+ uint8_t *p;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = NGTCP2_FRAME_STOP_SENDING;
+ p = ngtcp2_put_uvarint(p, (uint64_t)fr->stream_id);
+ p = ngtcp2_put_uvarint(p, fr->app_error_code);
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize
+ngtcp2_pkt_encode_path_challenge_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_path_challenge *fr) {
+ size_t len = 1 + 8;
+ uint8_t *p;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = NGTCP2_FRAME_PATH_CHALLENGE;
+ p = ngtcp2_cpymem(p, fr->data, sizeof(fr->data));
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize
+ngtcp2_pkt_encode_path_response_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_path_response *fr) {
+ size_t len = 1 + 8;
+ uint8_t *p;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = NGTCP2_FRAME_PATH_RESPONSE;
+ p = ngtcp2_cpymem(p, fr->data, sizeof(fr->data));
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_crypto_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_crypto *fr) {
+ size_t len = 1;
+ uint8_t *p;
+ size_t i;
+ size_t datalen = 0;
+
+ len += ngtcp2_put_uvarintlen(fr->offset);
+
+ for (i = 0; i < fr->datacnt; ++i) {
+ datalen += fr->data[i].len;
+ }
+
+ len += ngtcp2_put_uvarintlen(datalen);
+ len += datalen;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = NGTCP2_FRAME_CRYPTO;
+
+ p = ngtcp2_put_uvarint(p, fr->offset);
+ p = ngtcp2_put_uvarint(p, datalen);
+
+ for (i = 0; i < fr->datacnt; ++i) {
+ assert(fr->data[i].base);
+ p = ngtcp2_cpymem(p, fr->data[i].base, fr->data[i].len);
+ }
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_new_token_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_new_token *fr) {
+ size_t len = 1 + ngtcp2_put_uvarintlen(fr->tokenlen) + fr->tokenlen;
+ uint8_t *p;
+
+ assert(fr->tokenlen);
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = NGTCP2_FRAME_NEW_TOKEN;
+
+ p = ngtcp2_put_uvarint(p, fr->tokenlen);
+ p = ngtcp2_cpymem(p, fr->token, fr->tokenlen);
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_retire_connection_id_frame(
+ uint8_t *out, size_t outlen, const ngtcp2_retire_connection_id *fr) {
+ size_t len = 1 + ngtcp2_put_uvarintlen(fr->seq);
+ uint8_t *p;
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = NGTCP2_FRAME_RETIRE_CONNECTION_ID;
+
+ p = ngtcp2_put_uvarint(p, fr->seq);
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize
+ngtcp2_pkt_encode_handshake_done_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_handshake_done *fr) {
+ (void)fr;
+
+ if (outlen < 1) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ *out++ = NGTCP2_FRAME_HANDSHAKE_DONE;
+
+ return 1;
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_datagram_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_datagram *fr) {
+ uint64_t datalen = ngtcp2_vec_len(fr->data, fr->datacnt);
+ uint64_t len =
+ 1 +
+ (fr->type == NGTCP2_FRAME_DATAGRAM ? 0 : ngtcp2_put_uvarintlen(datalen)) +
+ datalen;
+ uint8_t *p;
+ size_t i;
+
+ assert(fr->type == NGTCP2_FRAME_DATAGRAM ||
+ fr->type == NGTCP2_FRAME_DATAGRAM_LEN);
+
+ if (outlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = out;
+
+ *p++ = fr->type;
+ if (fr->type == NGTCP2_FRAME_DATAGRAM_LEN) {
+ p = ngtcp2_put_uvarint(p, datalen);
+ }
+
+ for (i = 0; i < fr->datacnt; ++i) {
+ assert(fr->data[i].len);
+ assert(fr->data[i].base);
+ p = ngtcp2_cpymem(p, fr->data[i].base, fr->data[i].len);
+ }
+
+ assert((size_t)(p - out) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+ngtcp2_ssize ngtcp2_pkt_write_version_negotiation(
+ uint8_t *dest, size_t destlen, uint8_t unused_random, const uint8_t *dcid,
+ size_t dcidlen, const uint8_t *scid, size_t scidlen, const uint32_t *sv,
+ size_t nsv) {
+ size_t len = 1 + 4 + 1 + dcidlen + 1 + scidlen + nsv * 4;
+ uint8_t *p;
+ size_t i;
+
+ assert(dcidlen < 256);
+ assert(scidlen < 256);
+
+ if (destlen < len) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = dest;
+
+ *p++ = 0x80 | unused_random;
+ p = ngtcp2_put_uint32be(p, 0);
+ *p++ = (uint8_t)dcidlen;
+ if (dcidlen) {
+ p = ngtcp2_cpymem(p, dcid, dcidlen);
+ }
+ *p++ = (uint8_t)scidlen;
+ if (scidlen) {
+ p = ngtcp2_cpymem(p, scid, scidlen);
+ }
+
+ for (i = 0; i < nsv; ++i) {
+ p = ngtcp2_put_uint32be(p, sv[i]);
+ }
+
+ assert((size_t)(p - dest) == len);
+
+ return (ngtcp2_ssize)len;
+}
+
+size_t ngtcp2_pkt_decode_version_negotiation(uint32_t *dest,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ const uint8_t *end = payload + payloadlen;
+
+ assert((payloadlen % sizeof(uint32_t)) == 0);
+
+ for (; payload != end;) {
+ payload = ngtcp2_get_uint32(dest++, payload);
+ }
+
+ return payloadlen / sizeof(uint32_t);
+}
+
+int ngtcp2_pkt_decode_stateless_reset(ngtcp2_pkt_stateless_reset *sr,
+ const uint8_t *payload,
+ size_t payloadlen) {
+ const uint8_t *p = payload;
+
+ if (payloadlen <
+ NGTCP2_MIN_STATELESS_RESET_RANDLEN + NGTCP2_STATELESS_RESET_TOKENLEN) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ sr->rand = p;
+ sr->randlen = payloadlen - NGTCP2_STATELESS_RESET_TOKENLEN;
+ p += sr->randlen;
+ memcpy(sr->stateless_reset_token, p, NGTCP2_STATELESS_RESET_TOKENLEN);
+
+ return 0;
+}
+
+int ngtcp2_pkt_decode_retry(ngtcp2_pkt_retry *dest, const uint8_t *payload,
+ size_t payloadlen) {
+ size_t len = /* token */ 1 + NGTCP2_RETRY_TAGLEN;
+
+ if (payloadlen < len) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ dest->token = (uint8_t *)payload;
+ dest->tokenlen = (size_t)(payloadlen - NGTCP2_RETRY_TAGLEN);
+ ngtcp2_cpymem(dest->tag, payload + dest->tokenlen, NGTCP2_RETRY_TAGLEN);
+
+ return 0;
+}
+
+int64_t ngtcp2_pkt_adjust_pkt_num(int64_t max_pkt_num, int64_t pkt_num,
+ size_t n) {
+ int64_t expected = max_pkt_num + 1;
+ int64_t win = (int64_t)1 << n;
+ int64_t hwin = win / 2;
+ int64_t mask = win - 1;
+ int64_t cand = (expected & ~mask) | pkt_num;
+
+ if (cand <= expected - hwin) {
+ assert(cand <= (int64_t)NGTCP2_MAX_VARINT - win);
+ return cand + win;
+ }
+ if (cand > expected + hwin && cand >= win) {
+ return cand - win;
+ }
+ return cand;
+}
+
+int ngtcp2_pkt_validate_ack(ngtcp2_ack *fr) {
+ int64_t largest_ack = fr->largest_ack;
+ size_t i;
+
+ if (largest_ack < (int64_t)fr->first_ack_range) {
+ return NGTCP2_ERR_ACK_FRAME;
+ }
+
+ largest_ack -= (int64_t)fr->first_ack_range;
+
+ for (i = 0; i < fr->rangecnt; ++i) {
+ if (largest_ack < (int64_t)fr->ranges[i].gap + 2) {
+ return NGTCP2_ERR_ACK_FRAME;
+ }
+
+ largest_ack -= (int64_t)fr->ranges[i].gap + 2;
+
+ if (largest_ack < (int64_t)fr->ranges[i].len) {
+ return NGTCP2_ERR_ACK_FRAME;
+ }
+
+ largest_ack -= (int64_t)fr->ranges[i].len;
+ }
+
+ return 0;
+}
+
+ngtcp2_ssize
+ngtcp2_pkt_write_stateless_reset(uint8_t *dest, size_t destlen,
+ const uint8_t *stateless_reset_token,
+ const uint8_t *rand, size_t randlen) {
+ uint8_t *p;
+
+ if (destlen <
+ NGTCP2_MIN_STATELESS_RESET_RANDLEN + NGTCP2_STATELESS_RESET_TOKENLEN) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ if (randlen < NGTCP2_MIN_STATELESS_RESET_RANDLEN) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ p = dest;
+
+ randlen = ngtcp2_min(destlen - NGTCP2_STATELESS_RESET_TOKENLEN, randlen);
+
+ p = ngtcp2_cpymem(p, rand, randlen);
+ p = ngtcp2_cpymem(p, stateless_reset_token, NGTCP2_STATELESS_RESET_TOKENLEN);
+ *dest = (uint8_t)((*dest & 0x7fu) | 0x40u);
+
+ return p - dest;
+}
+
+ngtcp2_ssize ngtcp2_pkt_write_retry(
+ uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid,
+ const ngtcp2_cid *scid, const ngtcp2_cid *odcid, const uint8_t *token,
+ size_t tokenlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx) {
+ ngtcp2_pkt_hd hd;
+ uint8_t pseudo_retry[1500];
+ ngtcp2_ssize pseudo_retrylen;
+ uint8_t tag[NGTCP2_RETRY_TAGLEN];
+ int rv;
+ uint8_t *p;
+ size_t offset;
+ const uint8_t *nonce;
+ size_t noncelen;
+
+ assert(tokenlen > 0);
+ assert(!ngtcp2_cid_eq(scid, odcid));
+
+ /* Retry packet is sent at most once per one connection attempt. In
+ the first connection attempt, client has to send random DCID
+ which is at least NGTCP2_MIN_INITIAL_DCIDLEN bytes long. */
+ if (odcid->datalen < NGTCP2_MIN_INITIAL_DCIDLEN) {
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+ }
+
+ ngtcp2_pkt_hd_init(&hd, NGTCP2_PKT_FLAG_LONG_FORM, NGTCP2_PKT_RETRY, dcid,
+ scid, /* pkt_num = */ 0, /* pkt_numlen = */ 1, version,
+ /* len = */ 0);
+
+ pseudo_retrylen =
+ ngtcp2_pkt_encode_pseudo_retry(pseudo_retry, sizeof(pseudo_retry), &hd,
+ /* unused = */ 0, odcid, token, tokenlen);
+ if (pseudo_retrylen < 0) {
+ return pseudo_retrylen;
+ }
+
+ switch (version) {
+ case NGTCP2_PROTO_VER_V1:
+ nonce = (const uint8_t *)NGTCP2_RETRY_NONCE_V1;
+ noncelen = sizeof(NGTCP2_RETRY_NONCE_V1) - 1;
+ break;
+ case NGTCP2_PROTO_VER_V2:
+ nonce = (const uint8_t *)NGTCP2_RETRY_NONCE_V2;
+ noncelen = sizeof(NGTCP2_RETRY_NONCE_V2) - 1;
+ break;
+ default:
+ nonce = (const uint8_t *)NGTCP2_RETRY_NONCE_DRAFT;
+ noncelen = sizeof(NGTCP2_RETRY_NONCE_DRAFT) - 1;
+ }
+
+ /* OpenSSL does not like NULL plaintext. */
+ rv = encrypt(tag, aead, aead_ctx, (const uint8_t *)"", 0, nonce, noncelen,
+ pseudo_retry, (size_t)pseudo_retrylen);
+ if (rv != 0) {
+ return rv;
+ }
+
+ offset = 1 + odcid->datalen;
+ if (destlen < (size_t)pseudo_retrylen + sizeof(tag) - offset) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ p = ngtcp2_cpymem(dest, pseudo_retry + offset,
+ (size_t)pseudo_retrylen - offset);
+ p = ngtcp2_cpymem(p, tag, sizeof(tag));
+
+ return p - dest;
+}
+
+ngtcp2_ssize ngtcp2_pkt_encode_pseudo_retry(
+ uint8_t *dest, size_t destlen, const ngtcp2_pkt_hd *hd, uint8_t unused,
+ const ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen) {
+ uint8_t *p = dest;
+ ngtcp2_ssize nwrite;
+
+ if (destlen < 1 + odcid->datalen) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ *p++ = (uint8_t)odcid->datalen;
+ p = ngtcp2_cpymem(p, odcid->data, odcid->datalen);
+ destlen -= (size_t)(p - dest);
+
+ nwrite = ngtcp2_pkt_encode_hd_long(p, destlen, hd);
+ if (nwrite < 0) {
+ return nwrite;
+ }
+
+ if (destlen < (size_t)nwrite + tokenlen) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ *p &= 0xf0;
+ *p |= unused;
+
+ p += nwrite;
+
+ p = ngtcp2_cpymem(p, token, tokenlen);
+
+ return p - dest;
+}
+
+int ngtcp2_pkt_verify_retry_tag(uint32_t version, const ngtcp2_pkt_retry *retry,
+ const uint8_t *pkt, size_t pktlen,
+ ngtcp2_encrypt encrypt,
+ const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx) {
+ uint8_t pseudo_retry[1500];
+ size_t pseudo_retrylen;
+ uint8_t *p = pseudo_retry;
+ int rv;
+ uint8_t tag[NGTCP2_RETRY_TAGLEN];
+ const uint8_t *nonce;
+ size_t noncelen;
+
+ assert(pktlen >= sizeof(retry->tag));
+
+ if (sizeof(pseudo_retry) <
+ 1 + retry->odcid.datalen + pktlen - sizeof(retry->tag)) {
+ return NGTCP2_ERR_PROTO;
+ }
+
+ *p++ = (uint8_t)retry->odcid.datalen;
+ p = ngtcp2_cpymem(p, retry->odcid.data, retry->odcid.datalen);
+ p = ngtcp2_cpymem(p, pkt, pktlen - sizeof(retry->tag));
+
+ pseudo_retrylen = (size_t)(p - pseudo_retry);
+
+ switch (version) {
+ case NGTCP2_PROTO_VER_V1:
+ nonce = (const uint8_t *)NGTCP2_RETRY_NONCE_V1;
+ noncelen = sizeof(NGTCP2_RETRY_NONCE_V1) - 1;
+ break;
+ case NGTCP2_PROTO_VER_V2:
+ nonce = (const uint8_t *)NGTCP2_RETRY_NONCE_V2;
+ noncelen = sizeof(NGTCP2_RETRY_NONCE_V2) - 1;
+ break;
+ default:
+ nonce = (const uint8_t *)NGTCP2_RETRY_NONCE_DRAFT;
+ noncelen = sizeof(NGTCP2_RETRY_NONCE_DRAFT) - 1;
+ }
+
+ /* OpenSSL does not like NULL plaintext. */
+ rv = encrypt(tag, aead, aead_ctx, (const uint8_t *)"", 0, nonce, noncelen,
+ pseudo_retry, pseudo_retrylen);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (0 != memcmp(retry->tag, tag, sizeof(retry->tag))) {
+ return NGTCP2_ERR_PROTO;
+ }
+
+ return 0;
+}
+
+size_t ngtcp2_pkt_stream_max_datalen(int64_t stream_id, uint64_t offset,
+ uint64_t len, size_t left) {
+ size_t n = 1 /* type */ + ngtcp2_put_uvarintlen((uint64_t)stream_id) +
+ (offset ? ngtcp2_put_uvarintlen(offset) : 0);
+
+ if (left <= n) {
+ return (size_t)-1;
+ }
+
+ left -= n;
+
+ if (left > 8 + 1073741823 && len > 1073741823) {
+#if SIZE_MAX > UINT32_MAX
+ len = ngtcp2_min(len, 4611686018427387903lu);
+#endif /* SIZE_MAX > UINT32_MAX */
+ return (size_t)ngtcp2_min(len, (uint64_t)(left - 8));
+ }
+
+ if (left > 4 + 16383 && len > 16383) {
+ len = ngtcp2_min(len, 1073741823);
+ return (size_t)ngtcp2_min(len, (uint64_t)(left - 4));
+ }
+
+ if (left > 2 + 63 && len > 63) {
+ len = ngtcp2_min(len, 16383);
+ return (size_t)ngtcp2_min(len, (uint64_t)(left - 2));
+ }
+
+ len = ngtcp2_min(len, 63);
+ return (size_t)ngtcp2_min(len, (uint64_t)(left - 1));
+}
+
+size_t ngtcp2_pkt_crypto_max_datalen(uint64_t offset, size_t len, size_t left) {
+ size_t n = 1 /* type */ + ngtcp2_put_uvarintlen(offset);
+
+ /* CRYPTO frame must contain nonzero length data. Return -1 if
+ there is no space to write crypto data. */
+ if (left <= n + 1) {
+ return (size_t)-1;
+ }
+
+ left -= n;
+
+ if (left > 8 + 1073741823 && len > 1073741823) {
+#if SIZE_MAX > UINT32_MAX
+ len = ngtcp2_min(len, 4611686018427387903lu);
+#endif /* SIZE_MAX > UINT32_MAX */
+ return ngtcp2_min(len, left - 8);
+ }
+
+ if (left > 4 + 16383 && len > 16383) {
+ len = ngtcp2_min(len, 1073741823);
+ return ngtcp2_min(len, left - 4);
+ }
+
+ if (left > 2 + 63 && len > 63) {
+ len = ngtcp2_min(len, 16383);
+ return ngtcp2_min(len, left - 2);
+ }
+
+ len = ngtcp2_min(len, 63);
+ return ngtcp2_min(len, left - 1);
+}
+
+size_t ngtcp2_pkt_datagram_framelen(size_t len) {
+ return 1 /* type */ + ngtcp2_put_uvarintlen(len) + len;
+}
+
+int ngtcp2_is_supported_version(uint32_t version) {
+ switch (version) {
+ case NGTCP2_PROTO_VER_V1:
+ case NGTCP2_PROTO_VER_V2:
+ return 1;
+ default:
+ return NGTCP2_PROTO_VER_DRAFT_MIN <= version &&
+ version <= NGTCP2_PROTO_VER_DRAFT_MAX;
+ }
+}
+
+int ngtcp2_is_reserved_version(uint32_t version) {
+ return (version & NGTCP2_RESERVED_VERSION_MASK) ==
+ NGTCP2_RESERVED_VERSION_MASK;
+}
+
+uint8_t ngtcp2_pkt_get_type_long(uint32_t version, uint8_t c) {
+ uint8_t pkt_type = (uint8_t)((c & NGTCP2_LONG_TYPE_MASK) >> 4);
+
+ switch (version) {
+ case NGTCP2_PROTO_VER_V2:
+ switch (pkt_type) {
+ case NGTCP2_PKT_TYPE_INITIAL_V2:
+ return NGTCP2_PKT_INITIAL;
+ case NGTCP2_PKT_TYPE_0RTT_V2:
+ return NGTCP2_PKT_0RTT;
+ case NGTCP2_PKT_TYPE_HANDSHAKE_V2:
+ return NGTCP2_PKT_HANDSHAKE;
+ case NGTCP2_PKT_TYPE_RETRY_V2:
+ return NGTCP2_PKT_RETRY;
+ default:
+ return 0;
+ }
+ default:
+ if (!ngtcp2_is_supported_version(version)) {
+ return 0;
+ }
+
+ /* QUIC v1 and draft versions share the same numeric packet
+ types. */
+ switch (pkt_type) {
+ case NGTCP2_PKT_TYPE_INITIAL_V1:
+ return NGTCP2_PKT_INITIAL;
+ case NGTCP2_PKT_TYPE_0RTT_V1:
+ return NGTCP2_PKT_0RTT;
+ case NGTCP2_PKT_TYPE_HANDSHAKE_V1:
+ return NGTCP2_PKT_HANDSHAKE;
+ case NGTCP2_PKT_TYPE_RETRY_V1:
+ return NGTCP2_PKT_RETRY;
+ default:
+ return 0;
+ }
+ }
+}
+
+uint8_t ngtcp2_pkt_versioned_type(uint32_t version, uint32_t pkt_type) {
+ switch (version) {
+ case NGTCP2_PROTO_VER_V2:
+ switch (pkt_type) {
+ case NGTCP2_PKT_INITIAL:
+ return NGTCP2_PKT_TYPE_INITIAL_V2;
+ case NGTCP2_PKT_0RTT:
+ return NGTCP2_PKT_TYPE_0RTT_V2;
+ case NGTCP2_PKT_HANDSHAKE:
+ return NGTCP2_PKT_TYPE_HANDSHAKE_V2;
+ case NGTCP2_PKT_RETRY:
+ return NGTCP2_PKT_TYPE_RETRY_V2;
+ default:
+ ngtcp2_unreachable();
+ }
+ default:
+ /* Assume that unsupported versions share the numeric long packet
+ types with QUIC v1 in order to send a packet to elicit Version
+ Negotiation packet. */
+
+ /* QUIC v1 and draft versions share the same numeric packet
+ types. */
+ switch (pkt_type) {
+ case NGTCP2_PKT_INITIAL:
+ return NGTCP2_PKT_TYPE_INITIAL_V1;
+ case NGTCP2_PKT_0RTT:
+ return NGTCP2_PKT_TYPE_0RTT_V1;
+ case NGTCP2_PKT_HANDSHAKE:
+ return NGTCP2_PKT_TYPE_HANDSHAKE_V1;
+ case NGTCP2_PKT_RETRY:
+ return NGTCP2_PKT_TYPE_RETRY_V1;
+ default:
+ ngtcp2_unreachable();
+ }
+ }
+}
+
+int ngtcp2_pkt_verify_reserved_bits(uint8_t c) {
+ if (c & NGTCP2_HEADER_FORM_BIT) {
+ return (c & NGTCP2_LONG_RESERVED_BIT_MASK) == 0 ? 0 : NGTCP2_ERR_PROTO;
+ }
+ return (c & NGTCP2_SHORT_RESERVED_BIT_MASK) == 0 ? 0 : NGTCP2_ERR_PROTO;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.h
new file mode 100644
index 0000000..9db62b0
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pkt.h
@@ -0,0 +1,1235 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_PKT_H
+#define NGTCP2_PKT_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+/* QUIC header macros */
+#define NGTCP2_HEADER_FORM_BIT 0x80
+#define NGTCP2_FIXED_BIT_MASK 0x40
+#define NGTCP2_PKT_NUMLEN_MASK 0x03
+
+/* Long header specific macros */
+#define NGTCP2_LONG_TYPE_MASK 0x30
+#define NGTCP2_LONG_RESERVED_BIT_MASK 0x0c
+
+/* Short header specific macros */
+#define NGTCP2_SHORT_SPIN_BIT_MASK 0x20
+#define NGTCP2_SHORT_RESERVED_BIT_MASK 0x18
+#define NGTCP2_SHORT_KEY_PHASE_BIT 0x04
+
+/* NGTCP2_SR_TYPE is a Type field of Stateless Reset. */
+#define NGTCP2_SR_TYPE 0x1f
+
+/* NGTCP2_MIN_LONG_HEADERLEN is the minimum length of long header.
+ That is (1|1|TT|RR|PP)<1> + VERSION<4> + DCIL<1> + SCIL<1> +
+ LENGTH<1> + PKN<1> */
+#define NGTCP2_MIN_LONG_HEADERLEN (1 + 4 + 1 + 1 + 1 + 1)
+
+#define NGTCP2_STREAM_FIN_BIT 0x01
+#define NGTCP2_STREAM_LEN_BIT 0x02
+#define NGTCP2_STREAM_OFF_BIT 0x04
+
+/* NGTCP2_STREAM_OVERHEAD is the maximum number of bytes required
+ other than payload for STREAM frame. That is from type field to
+ the beginning of the payload. */
+#define NGTCP2_STREAM_OVERHEAD (1 + 8 + 8 + 8)
+
+/* NGTCP2_CRYPTO_OVERHEAD is the maximum number of bytes required
+ other than payload for CRYPTO frame. That is from type field to
+ the beginning of the payload. */
+#define NGTCP2_CRYPTO_OVERHEAD (1 + 8 + 8)
+
+/* NGTCP2_DATAGRAM_OVERHEAD is the maximum number of bytes required
+ other than payload for DATAGRAM frame. That is from type field to
+ the beginning of the payload. */
+#define NGTCP2_DATAGRAM_OVERHEAD (1 + 8)
+
+/* NGTCP2_MIN_FRAME_PAYLOADLEN is the minimum frame payload length. */
+#define NGTCP2_MIN_FRAME_PAYLOADLEN 16
+
+/* NGTCP2_MAX_SERVER_STREAM_ID_BIDI is the maximum bidirectional
+ server stream ID. */
+#define NGTCP2_MAX_SERVER_STREAM_ID_BIDI ((int64_t)0x3ffffffffffffffdll)
+/* NGTCP2_MAX_CLIENT_STREAM_ID_BIDI is the maximum bidirectional
+ client stream ID. */
+#define NGTCP2_MAX_CLIENT_STREAM_ID_BIDI ((int64_t)0x3ffffffffffffffcll)
+/* NGTCP2_MAX_SERVER_STREAM_ID_UNI is the maximum unidirectional
+ server stream ID. */
+#define NGTCP2_MAX_SERVER_STREAM_ID_UNI ((int64_t)0x3fffffffffffffffll)
+/* NGTCP2_MAX_CLIENT_STREAM_ID_UNI is the maximum unidirectional
+ client stream ID. */
+#define NGTCP2_MAX_CLIENT_STREAM_ID_UNI ((int64_t)0x3ffffffffffffffell)
+
+/* NGTCP2_MAX_NUM_ACK_RANGES is the maximum number of Additional ACK
+ ranges which this library can create, or decode. */
+#define NGTCP2_MAX_ACK_RANGES 32
+
+/* NGTCP2_MAX_PKT_NUM is the maximum packet number. */
+#define NGTCP2_MAX_PKT_NUM ((int64_t)((1ll << 62) - 1))
+
+/* NGTCP2_MIN_PKT_EXPANDLEN is the minimum packet size expansion in
+ addition to the minimum DCID length to hide/trigger Stateless
+ Reset. */
+#define NGTCP2_MIN_PKT_EXPANDLEN 22
+
+/* NGTCP2_RETRY_TAGLEN is the length of Retry packet integrity tag. */
+#define NGTCP2_RETRY_TAGLEN 16
+
+/* NGTCP2_HARD_MAX_UDP_PAYLOAD_SIZE is the maximum UDP payload size
+ that this library can write. */
+#define NGTCP2_HARD_MAX_UDP_PAYLOAD_SIZE ((1 << 24) - 1)
+
+/* NGTCP2_PKT_LENGTHLEN is the number of bytes that is occupied by
+ Length field in Long packet header. */
+#define NGTCP2_PKT_LENGTHLEN 4
+
+/* NGTCP2_PKT_TYPE_INITIAL_V1 is Initial long header packet type for
+ QUIC v1. */
+#define NGTCP2_PKT_TYPE_INITIAL_V1 0x0
+/* NGTCP2_PKT_TYPE_0RTT_V1 is 0RTT long header packet type for QUIC
+ v1. */
+#define NGTCP2_PKT_TYPE_0RTT_V1 0x1
+/* NGTCP2_PKT_TYPE_HANDSHAKE_V1 is Handshake long header packet type
+ for QUIC v1. */
+#define NGTCP2_PKT_TYPE_HANDSHAKE_V1 0x2
+/* NGTCP2_PKT_TYPE_RETRY_V1 is Retry long header packet type for QUIC
+ v1. */
+#define NGTCP2_PKT_TYPE_RETRY_V1 0x3
+
+/* NGTCP2_PKT_TYPE_INITIAL_V2 is Initial long header packet type for
+ QUIC v2. */
+#define NGTCP2_PKT_TYPE_INITIAL_V2 0x1
+/* NGTCP2_PKT_TYPE_0RTT_V2 is 0RTT long header packet type for QUIC
+ v2. */
+#define NGTCP2_PKT_TYPE_0RTT_V2 0x2
+/* NGTCP2_PKT_TYPE_HANDSHAKE_V2 is Handshake long header packet type
+ for QUIC v2. */
+#define NGTCP2_PKT_TYPE_HANDSHAKE_V2 0x3
+/* NGTCP2_PKT_TYPE_RETRY_V2 is Retry long header packet type for QUIC
+ v2. */
+#define NGTCP2_PKT_TYPE_RETRY_V2 0x0
+
+typedef struct ngtcp2_pkt_retry {
+ ngtcp2_cid odcid;
+ uint8_t *token;
+ size_t tokenlen;
+ uint8_t tag[NGTCP2_RETRY_TAGLEN];
+} ngtcp2_pkt_retry;
+
+typedef enum {
+ NGTCP2_FRAME_PADDING = 0x00,
+ NGTCP2_FRAME_PING = 0x01,
+ NGTCP2_FRAME_ACK = 0x02,
+ NGTCP2_FRAME_ACK_ECN = 0x03,
+ NGTCP2_FRAME_RESET_STREAM = 0x04,
+ NGTCP2_FRAME_STOP_SENDING = 0x05,
+ NGTCP2_FRAME_CRYPTO = 0x06,
+ NGTCP2_FRAME_NEW_TOKEN = 0x07,
+ NGTCP2_FRAME_STREAM = 0x08,
+ NGTCP2_FRAME_MAX_DATA = 0x10,
+ NGTCP2_FRAME_MAX_STREAM_DATA = 0x11,
+ NGTCP2_FRAME_MAX_STREAMS_BIDI = 0x12,
+ NGTCP2_FRAME_MAX_STREAMS_UNI = 0x13,
+ NGTCP2_FRAME_DATA_BLOCKED = 0x14,
+ NGTCP2_FRAME_STREAM_DATA_BLOCKED = 0x15,
+ NGTCP2_FRAME_STREAMS_BLOCKED_BIDI = 0x16,
+ NGTCP2_FRAME_STREAMS_BLOCKED_UNI = 0x17,
+ NGTCP2_FRAME_NEW_CONNECTION_ID = 0x18,
+ NGTCP2_FRAME_RETIRE_CONNECTION_ID = 0x19,
+ NGTCP2_FRAME_PATH_CHALLENGE = 0x1a,
+ NGTCP2_FRAME_PATH_RESPONSE = 0x1b,
+ NGTCP2_FRAME_CONNECTION_CLOSE = 0x1c,
+ NGTCP2_FRAME_CONNECTION_CLOSE_APP = 0x1d,
+ NGTCP2_FRAME_HANDSHAKE_DONE = 0x1e,
+ NGTCP2_FRAME_DATAGRAM = 0x30,
+ NGTCP2_FRAME_DATAGRAM_LEN = 0x31,
+} ngtcp2_frame_type;
+
+typedef struct ngtcp2_stream {
+ uint8_t type;
+ /**
+ * flags of decoded STREAM frame. This gets ignored when encoding
+ * STREAM frame.
+ */
+ uint8_t flags;
+ uint8_t fin;
+ int64_t stream_id;
+ uint64_t offset;
+ /* datacnt is the number of elements that data contains. Although
+ the length of data is 1 in this definition, the library may
+ allocate extra bytes to hold more elements. */
+ size_t datacnt;
+ /* data is the array of ngtcp2_vec which references data. */
+ ngtcp2_vec data[1];
+} ngtcp2_stream;
+
+typedef struct ngtcp2_ack_range {
+ uint64_t gap;
+ uint64_t len;
+} ngtcp2_ack_range;
+
+typedef struct ngtcp2_ack {
+ uint8_t type;
+ int64_t largest_ack;
+ uint64_t ack_delay;
+ /**
+ * ack_delay_unscaled is an ack_delay multiplied by
+ * 2**ack_delay_component * NGTCP2_MICROSECONDS.
+ */
+ ngtcp2_duration ack_delay_unscaled;
+ struct {
+ uint64_t ect0;
+ uint64_t ect1;
+ uint64_t ce;
+ } ecn;
+ uint64_t first_ack_range;
+ size_t rangecnt;
+ ngtcp2_ack_range ranges[1];
+} ngtcp2_ack;
+
+typedef struct ngtcp2_padding {
+ uint8_t type;
+ /**
+ * The length of contiguous PADDING frames.
+ */
+ size_t len;
+} ngtcp2_padding;
+
+typedef struct ngtcp2_reset_stream {
+ uint8_t type;
+ int64_t stream_id;
+ uint64_t app_error_code;
+ uint64_t final_size;
+} ngtcp2_reset_stream;
+
+typedef struct ngtcp2_connection_close {
+ uint8_t type;
+ uint64_t error_code;
+ uint64_t frame_type;
+ size_t reasonlen;
+ uint8_t *reason;
+} ngtcp2_connection_close;
+
+typedef struct ngtcp2_max_data {
+ uint8_t type;
+ /**
+ * max_data is Maximum Data.
+ */
+ uint64_t max_data;
+} ngtcp2_max_data;
+
+typedef struct ngtcp2_max_stream_data {
+ uint8_t type;
+ int64_t stream_id;
+ uint64_t max_stream_data;
+} ngtcp2_max_stream_data;
+
+typedef struct ngtcp2_max_streams {
+ uint8_t type;
+ uint64_t max_streams;
+} ngtcp2_max_streams;
+
+typedef struct ngtcp2_ping {
+ uint8_t type;
+} ngtcp2_ping;
+
+typedef struct ngtcp2_data_blocked {
+ uint8_t type;
+ uint64_t offset;
+} ngtcp2_data_blocked;
+
+typedef struct ngtcp2_stream_data_blocked {
+ uint8_t type;
+ int64_t stream_id;
+ uint64_t offset;
+} ngtcp2_stream_data_blocked;
+
+typedef struct ngtcp2_streams_blocked {
+ uint8_t type;
+ uint64_t max_streams;
+} ngtcp2_streams_blocked;
+
+typedef struct ngtcp2_new_connection_id {
+ uint8_t type;
+ uint64_t seq;
+ uint64_t retire_prior_to;
+ ngtcp2_cid cid;
+ uint8_t stateless_reset_token[NGTCP2_STATELESS_RESET_TOKENLEN];
+} ngtcp2_new_connection_id;
+
+typedef struct ngtcp2_stop_sending {
+ uint8_t type;
+ int64_t stream_id;
+ uint64_t app_error_code;
+} ngtcp2_stop_sending;
+
+typedef struct ngtcp2_path_challenge {
+ uint8_t type;
+ uint8_t data[NGTCP2_PATH_CHALLENGE_DATALEN];
+} ngtcp2_path_challenge;
+
+typedef struct ngtcp2_path_response {
+ uint8_t type;
+ uint8_t data[NGTCP2_PATH_CHALLENGE_DATALEN];
+} ngtcp2_path_response;
+
+typedef struct ngtcp2_crypto {
+ uint8_t type;
+ uint64_t offset;
+ /* datacnt is the number of elements that data contains. Although
+ the length of data is 1 in this definition, the library may
+ allocate extra bytes to hold more elements. */
+ size_t datacnt;
+ /* data is the array of ngtcp2_vec which references data. */
+ ngtcp2_vec data[1];
+} ngtcp2_crypto;
+
+typedef struct ngtcp2_new_token {
+ uint8_t type;
+ uint8_t *token;
+ size_t tokenlen;
+} ngtcp2_new_token;
+
+typedef struct ngtcp2_retire_connection_id {
+ uint8_t type;
+ uint64_t seq;
+} ngtcp2_retire_connection_id;
+
+typedef struct ngtcp2_handshake_done {
+ uint8_t type;
+} ngtcp2_handshake_done;
+
+typedef struct ngtcp2_datagram {
+ uint8_t type;
+ /* dgram_id is an opaque identifier chosen by an application. */
+ uint64_t dgram_id;
+ /* datacnt is the number of elements that data contains. */
+ size_t datacnt;
+ /* data is a pointer to ngtcp2_vec array that stores data. */
+ ngtcp2_vec *data;
+ /* rdata is conveniently embedded to ngtcp2_datagram, so that data
+ field can just point to the address of this field to store a
+ single vector which is the case when DATAGRAM is received from a
+ remote endpoint. */
+ ngtcp2_vec rdata[1];
+} ngtcp2_datagram;
+
+typedef union ngtcp2_frame {
+ uint8_t type;
+ ngtcp2_stream stream;
+ ngtcp2_ack ack;
+ ngtcp2_padding padding;
+ ngtcp2_reset_stream reset_stream;
+ ngtcp2_connection_close connection_close;
+ ngtcp2_max_data max_data;
+ ngtcp2_max_stream_data max_stream_data;
+ ngtcp2_max_streams max_streams;
+ ngtcp2_ping ping;
+ ngtcp2_data_blocked data_blocked;
+ ngtcp2_stream_data_blocked stream_data_blocked;
+ ngtcp2_streams_blocked streams_blocked;
+ ngtcp2_new_connection_id new_connection_id;
+ ngtcp2_stop_sending stop_sending;
+ ngtcp2_path_challenge path_challenge;
+ ngtcp2_path_response path_response;
+ ngtcp2_crypto crypto;
+ ngtcp2_new_token new_token;
+ ngtcp2_retire_connection_id retire_connection_id;
+ ngtcp2_handshake_done handshake_done;
+ ngtcp2_datagram datagram;
+} ngtcp2_frame;
+
+typedef struct ngtcp2_pkt_chain ngtcp2_pkt_chain;
+
+/*
+ * ngtcp2_pkt_chain is the chain of incoming packets buffered.
+ */
+struct ngtcp2_pkt_chain {
+ ngtcp2_path_storage path;
+ ngtcp2_pkt_info pi;
+ ngtcp2_pkt_chain *next;
+ uint8_t *pkt;
+ /* pktlen is length of a QUIC packet. */
+ size_t pktlen;
+ /* dgramlen is length of UDP datagram that a QUIC packet is
+ included. */
+ size_t dgramlen;
+ ngtcp2_tstamp ts;
+};
+
+/*
+ * ngtcp2_pkt_chain_new allocates ngtcp2_pkt_chain objects, and
+ * assigns its pointer to |*ppc|. The content of buffer pointed by
+ * |pkt| of length |pktlen| is copied into |*ppc|. The packet is
+ * obtained via the network |path|. The values of path->local and
+ * path->remote are copied into |*ppc|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_pkt_chain_new(ngtcp2_pkt_chain **ppc, const ngtcp2_path *path,
+ const ngtcp2_pkt_info *pi, const uint8_t *pkt,
+ size_t pktlen, size_t dgramlen, ngtcp2_tstamp ts,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_pkt_chain_del deallocates |pc|. It also frees the memory
+ * pointed by |pc|.
+ */
+void ngtcp2_pkt_chain_del(ngtcp2_pkt_chain *pc, const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_pkt_hd_init initializes |hd| with the given values. If
+ * |dcid| and/or |scid| is NULL, DCID and SCID of |hd| is empty
+ * respectively. |pkt_numlen| is the number of bytes used to encode
+ * |pkt_num| and either 1, 2, or 4. |version| is QUIC version for
+ * long header. |len| is the length field of Initial, 0RTT, and
+ * Handshake packets.
+ */
+void ngtcp2_pkt_hd_init(ngtcp2_pkt_hd *hd, uint8_t flags, uint8_t type,
+ const ngtcp2_cid *dcid, const ngtcp2_cid *scid,
+ int64_t pkt_num, size_t pkt_numlen, uint32_t version,
+ size_t len);
+
+/*
+ * ngtcp2_pkt_encode_hd_long encodes |hd| as QUIC long header into
+ * |out| which has length |outlen|. It returns the number of bytes
+ * written into |outlen| if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer is too short
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_hd_long(uint8_t *out, size_t outlen,
+ const ngtcp2_pkt_hd *hd);
+
+/*
+ * ngtcp2_pkt_encode_hd_short encodes |hd| as QUIC short header into
+ * |out| which has length |outlen|. It returns the number of bytes
+ * written into |outlen| if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer is too short
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_hd_short(uint8_t *out, size_t outlen,
+ const ngtcp2_pkt_hd *hd);
+
+/**
+ * @function
+ *
+ * `ngtcp2_pkt_decode_frame` decodes a QUIC frame from the buffer
+ * pointed by |payload| whose length is |payloadlen|.
+ *
+ * This function returns the number of bytes read to decode a single
+ * frame if it succeeds, or one of the following negative error codes:
+ *
+ * :enum:`NGTCP2_ERR_FRAME_ENCODING`
+ * Frame is badly formatted; or frame type is unknown; or
+ * |payloadlen| is 0.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_frame(ngtcp2_frame *dest, const uint8_t *payload,
+ size_t payloadlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_pkt_encode_frame` encodes a frame |fm| into the buffer
+ * pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written to the buffer, or
+ * one of the following negative error codes:
+ *
+ * :enum:`NGTCP2_ERR_NOBUF`
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_frame(uint8_t *out, size_t outlen,
+ ngtcp2_frame *fr);
+
+/*
+ * ngtcp2_pkt_decode_version_negotiation decodes Version Negotiation
+ * packet payload |payload| of length |payloadlen|, and stores the
+ * result in |dest|. |dest| must have enough capacity to store the
+ * result. |payloadlen| also must be a multiple of sizeof(uint32_t).
+ *
+ * This function returns the number of versions written in |dest|.
+ */
+size_t ngtcp2_pkt_decode_version_negotiation(uint32_t *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_stateless_reset decodes Stateless Reset payload
+ * |payload| of length |payloadlen|. The |payload| must start with
+ * Stateless Reset Token.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * Payloadlen is too short.
+ */
+int ngtcp2_pkt_decode_stateless_reset(ngtcp2_pkt_stateless_reset *sr,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_retry decodes Retry packet payload |payload| of
+ * length |payloadlen|. The |payload| must start with Retry token
+ * field.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * Payloadlen is too short.
+ */
+int ngtcp2_pkt_decode_retry(ngtcp2_pkt_retry *dest, const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_stream_frame decodes STREAM frame from |payload|
+ * of length |payloadlen|. The result is stored in the object pointed
+ * by |dest|. STREAM frame must start at payload[0]. This function
+ * finishes when it decodes one STREAM frame, and returns the exact
+ * number of bytes read to decode a frame if it succeeds, or one of
+ * the following negative error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include STREAM frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_stream_frame(ngtcp2_stream *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_ack_frame decodes ACK frame from |payload| of
+ * length |payloadlen|. The result is stored in the object pointed by
+ * |dest|. ACK frame must start at payload[0]. This function
+ * finishes when it decodes one ACK frame, and returns the exact
+ * number of bytes read to decode a frame if it succeeds, or one of
+ * the following negative error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include ACK frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_ack_frame(ngtcp2_ack *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_padding_frame decodes contiguous PADDING frames
+ * from |payload| of length |payloadlen|. It continues to parse
+ * frames as long as the frame type is PADDING. This finishes when it
+ * encounters the frame type which is not PADDING, or all input data
+ * is read. The first byte (payload[0]) must be NGTCP2_FRAME_PADDING.
+ * This function returns the exact number of bytes read to decode
+ * PADDING frames.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_padding_frame(ngtcp2_padding *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_reset_stream_frame decodes RESET_STREAM frame
+ * from |payload| of length |payloadlen|. The result is stored in the
+ * object pointed by |dest|. RESET_STREAM frame must start at
+ * payload[0]. This function finishes when it decodes one
+ * RESET_STREAM frame, and returns the exact number of bytes read to
+ * decode a frame if it succeeds, or one of the following negative
+ * error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include RESET_STREAM frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_reset_stream_frame(ngtcp2_reset_stream *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_connection_close_frame decodes CONNECTION_CLOSE
+ * frame from |payload| of length |payloadlen|. The result is stored
+ * in the object pointed by |dest|. CONNECTION_CLOSE frame must start
+ * at payload[0]. This function finishes it decodes one
+ * CONNECTION_CLOSE frame, and returns the exact number of bytes read
+ * to decode a frame if it succeeds, or one of the following negative
+ * error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include CONNECTION_CLOSE frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_connection_close_frame(
+ ngtcp2_connection_close *dest, const uint8_t *payload, size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_max_data_frame decodes MAX_DATA frame from
+ * |payload| of length |payloadlen|. The result is stored in the
+ * object pointed by |dest|. MAX_DATA frame must start at payload[0].
+ * This function finishes when it decodes one MAX_DATA frame, and
+ * returns the exact number of bytes read to decode a frame if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include MAX_DATA frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_max_data_frame(ngtcp2_max_data *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_max_stream_data_frame decodes MAX_STREAM_DATA
+ * frame from |payload| of length |payloadlen|. The result is stored
+ * in the object pointed by |dest|. MAX_STREAM_DATA frame must start
+ * at payload[0]. This function finishes when it decodes one
+ * MAX_STREAM_DATA frame, and returns the exact number of bytes read
+ * to decode a frame if it succeeds, or one of the following negative
+ * error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include MAX_STREAM_DATA frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_max_stream_data_frame(
+ ngtcp2_max_stream_data *dest, const uint8_t *payload, size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_max_streams_frame decodes MAX_STREAMS frame from
+ * |payload| of length |payloadlen|. The result is stored in the
+ * object pointed by |dest|. MAX_STREAMS frame must start at
+ * payload[0]. This function finishes when it decodes one MAX_STREAMS
+ * frame, and returns the exact number of bytes read to decode a frame
+ * if it succeeds, or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include MAX_STREAMS frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_max_streams_frame(ngtcp2_max_streams *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_ping_frame decodes PING frame from |payload| of
+ * length |payloadlen|. The result is stored in the object pointed by
+ * |dest|. PING frame must start at payload[0]. This function
+ * finishes when it decodes one PING frame, and returns the exact
+ * number of bytes read to decode a frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_ping_frame(ngtcp2_ping *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_data_blocked_frame decodes DATA_BLOCKED frame
+ * from |payload| of length |payloadlen|. The result is stored in the
+ * object pointed by |dest|. DATA_BLOCKED frame must start at
+ * payload[0]. This function finishes when it decodes one
+ * DATA_BLOCKED frame, and returns the exact number of bytes read to
+ * decode a frame if it succeeds, or one of the following negative
+ * error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include DATA_BLOCKED frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_data_blocked_frame(ngtcp2_data_blocked *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_stream_data_blocked_frame decodes
+ * STREAM_DATA_BLOCKED frame from |payload| of length |payloadlen|.
+ * The result is stored in the object pointed by |dest|.
+ * STREAM_DATA_BLOCKED frame must start at payload[0]. This function
+ * finishes when it decodes one STREAM_DATA_BLOCKED frame, and returns
+ * the exact number of bytes read to decode a frame if it succeeds, or
+ * one of the following negative error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include STREAM_DATA_BLOCKED frame.
+ */
+ngtcp2_ssize
+ngtcp2_pkt_decode_stream_data_blocked_frame(ngtcp2_stream_data_blocked *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_streams_blocked_frame decodes STREAMS_BLOCKED
+ * frame from |payload| of length |payloadlen|. The result is stored
+ * in the object pointed by |dest|. STREAMS_BLOCKED frame must start
+ * at payload[0]. This function finishes when it decodes one
+ * STREAMS_BLOCKED frame, and returns the exact number of bytes read
+ * to decode a frame if it succeeds, or one of the following negative
+ * error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include STREAMS_BLOCKED frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_streams_blocked_frame(
+ ngtcp2_streams_blocked *dest, const uint8_t *payload, size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_new_connection_id_frame decodes NEW_CONNECTION_ID
+ * frame from |payload| of length |payloadlen|. The result is stored
+ * in the object pointed by |dest|. NEW_CONNECTION_ID frame must
+ * start at payload[0]. This function finishes when it decodes one
+ * NEW_CONNECTION_ID frame, and returns the exact number of bytes read
+ * to decode a frame if it succeeds, or one of the following negative
+ * error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include NEW_CONNECTION_ID frame; or the
+ * length of CID is strictly less than NGTCP2_MIN_CIDLEN or
+ * greater than NGTCP2_MAX_CIDLEN.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_new_connection_id_frame(
+ ngtcp2_new_connection_id *dest, const uint8_t *payload, size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_stop_sending_frame decodes STOP_SENDING frame
+ * from |payload| of length |payloadlen|. The result is stored in the
+ * object pointed by |dest|. STOP_SENDING frame must start at
+ * payload[0]. This function finishes when it decodes one
+ * STOP_SENDING frame, and returns the exact number of bytes read to
+ * decode a frame if it succeeds, or one of the following negative
+ * error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include STOP_SENDING frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_stop_sending_frame(ngtcp2_stop_sending *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_path_challenge_frame decodes PATH_CHALLENGE frame
+ * from |payload| of length |payloadlen|. The result is stored in the
+ * object pointed by |dest|. PATH_CHALLENGE frame must start at
+ * payload[0]. This function finishes when it decodes one
+ * PATH_CHALLENGE frame, and returns the exact number of bytes read to
+ * decode a frame if it succeeds, or one of the following negative
+ * error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include PATH_CHALLENGE frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_path_challenge_frame(ngtcp2_path_challenge *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_path_response_frame decodes PATH_RESPONSE frame
+ * from |payload| of length |payloadlen|. The result is stored in the
+ * object pointed by |dest|. PATH_RESPONSE frame must start at
+ * payload[0]. This function finishes when it decodes one
+ * PATH_RESPONSE frame, and returns the exact number of bytes read to
+ * decode a frame if it succeeds, or one of the following negative
+ * error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include PATH_RESPONSE frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_path_response_frame(ngtcp2_path_response *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_crypto_frame decodes CRYPTO frame from |payload|
+ * of length |payloadlen|. The result is stored in the object pointed
+ * by |dest|. CRYPTO frame must start at payload[0]. This function
+ * finishes when it decodes one CRYPTO frame, and returns the exact
+ * number of bytes read to decode a frame if it succeeds, or one of
+ * the following negative error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include CRYPTO frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_crypto_frame(ngtcp2_crypto *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_new_token_frame decodes NEW_TOKEN frame from
+ * |payload| of length |payloadlen|. The result is stored in the
+ * object pointed by |dest|. NEW_TOKEN frame must start at
+ * payload[0]. This function finishes when it decodes one NEW_TOKEN
+ * frame, and returns the exact number of bytes read to decode a frame
+ * if it succeeds, or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include NEW_TOKEN frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_new_token_frame(ngtcp2_new_token *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_retire_connection_id_frame decodes RETIRE_CONNECTION_ID
+ * frame from |payload| of length |payloadlen|. The result is stored in the
+ * object pointed by |dest|. RETIRE_CONNECTION_ID frame must start at
+ * payload[0]. This function finishes when it decodes one RETIRE_CONNECTION_ID
+ * frame, and returns the exact number of bytes read to decode a frame
+ * if it succeeds, or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include RETIRE_CONNECTION_ID frame.
+ */
+ngtcp2_ssize
+ngtcp2_pkt_decode_retire_connection_id_frame(ngtcp2_retire_connection_id *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_handshake_done_frame decodes HANDSHAKE_DONE frame
+ * from |payload| of length |payloadlen|. The result is stored in the
+ * object pointed by |dest|. HANDSHAKE_DONE frame must start at
+ * payload[0]. This function finishes when it decodes one
+ * HANDSHAKE_DONE frame, and returns the exact number of bytes read to
+ * decode a frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_handshake_done_frame(ngtcp2_handshake_done *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_decode_datagram_frame decodes DATAGRAM frame from
+ * |payload| of length |payloadlen|. The result is stored in the
+ * object pointed by |dest|. DATAGRAM frame must start at payload[0].
+ * This function finishes when it decodes one DATAGRAM frame, and
+ * returns the exact number of bytes read to decode a frame if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_FRAME_ENCODING
+ * Payload is too short to include DATAGRAM frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_decode_datagram_frame(ngtcp2_datagram *dest,
+ const uint8_t *payload,
+ size_t payloadlen);
+
+/*
+ * ngtcp2_pkt_encode_stream_frame encodes STREAM frame |fr| into the
+ * buffer pointed by |out| of length |outlen|.
+ *
+ * This function assigns <the serialized frame type> &
+ * ~NGTCP2_FRAME_STREAM to fr->flags.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_stream_frame(uint8_t *out, size_t outlen,
+ ngtcp2_stream *fr);
+
+/*
+ * ngtcp2_pkt_encode_ack_frame encodes ACK frame |fr| into the buffer
+ * pointed by |out| of length |outlen|.
+ *
+ * This function assigns <the serialized frame type> &
+ * ~NGTCP2_FRAME_ACK to fr->flags.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_ack_frame(uint8_t *out, size_t outlen,
+ ngtcp2_ack *fr);
+
+/*
+ * ngtcp2_pkt_encode_padding_frame encodes PADDING frame |fr| into the
+ * buffer pointed by |out| of length |outlen|.
+ *
+ * This function encodes consecutive fr->len PADDING frames.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write frame(s).
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_padding_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_padding *fr);
+
+/*
+ * ngtcp2_pkt_encode_reset_stream_frame encodes RESET_STREAM frame
+ * |fr| into the buffer pointed by |out| of length |buflen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize
+ngtcp2_pkt_encode_reset_stream_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_reset_stream *fr);
+
+/*
+ * ngtcp2_pkt_encode_connection_close_frame encodes CONNECTION_CLOSE
+ * frame |fr| into the buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize
+ngtcp2_pkt_encode_connection_close_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_connection_close *fr);
+
+/*
+ * ngtcp2_pkt_encode_max_data_frame encodes MAX_DATA frame |fr| into
+ * the buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_max_data_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_max_data *fr);
+
+/*
+ * ngtcp2_pkt_encode_max_stream_data_frame encodes MAX_STREAM_DATA
+ * frame |fr| into the buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize
+ngtcp2_pkt_encode_max_stream_data_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_max_stream_data *fr);
+
+/*
+ * ngtcp2_pkt_encode_max_streams_frame encodes MAX_STREAMS
+ * frame |fr| into the buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_max_streams_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_max_streams *fr);
+
+/*
+ * ngtcp2_pkt_encode_ping_frame encodes PING frame |fr| into the
+ * buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_ping_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_ping *fr);
+
+/*
+ * ngtcp2_pkt_encode_data_blocked_frame encodes DATA_BLOCKED frame
+ * |fr| into the buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize
+ngtcp2_pkt_encode_data_blocked_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_data_blocked *fr);
+
+/*
+ * ngtcp2_pkt_encode_stream_data_blocked_frame encodes
+ * STREAM_DATA_BLOCKED frame |fr| into the buffer pointed by |out| of
+ * length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_stream_data_blocked_frame(
+ uint8_t *out, size_t outlen, const ngtcp2_stream_data_blocked *fr);
+
+/*
+ * ngtcp2_pkt_encode_streams_blocked_frame encodes STREAMS_BLOCKED
+ * frame |fr| into the buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize
+ngtcp2_pkt_encode_streams_blocked_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_streams_blocked *fr);
+
+/*
+ * ngtcp2_pkt_encode_new_connection_id_frame encodes NEW_CONNECTION_ID
+ * frame |fr| into the buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize
+ngtcp2_pkt_encode_new_connection_id_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_new_connection_id *fr);
+
+/*
+ * ngtcp2_pkt_encode_stop_sending_frame encodes STOP_SENDING frame
+ * |fr| into the buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize
+ngtcp2_pkt_encode_stop_sending_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_stop_sending *fr);
+
+/*
+ * ngtcp2_pkt_encode_path_challenge_frame encodes PATH_CHALLENGE frame
+ * |fr| into the buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize
+ngtcp2_pkt_encode_path_challenge_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_path_challenge *fr);
+
+/*
+ * ngtcp2_pkt_encode_path_response_frame encodes PATH_RESPONSE frame
+ * |fr| into the buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize
+ngtcp2_pkt_encode_path_response_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_path_response *fr);
+
+/*
+ * ngtcp2_pkt_encode_crypto_frame encodes CRYPTO frame |fr| into the
+ * buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_crypto_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_crypto *fr);
+
+/*
+ * ngtcp2_pkt_encode_new_token_frame encodes NEW_TOKEN frame |fr| into
+ * the buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_new_token_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_new_token *fr);
+
+/*
+ * ngtcp2_pkt_encode_retire_connection_id_frame encodes RETIRE_CONNECTION_ID
+ * frame |fr| into the buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_retire_connection_id_frame(
+ uint8_t *out, size_t outlen, const ngtcp2_retire_connection_id *fr);
+
+/*
+ * ngtcp2_pkt_encode_handshake_done_frame encodes HANDSHAKE_DONE frame
+ * |fr| into the buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize
+ngtcp2_pkt_encode_handshake_done_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_handshake_done *fr);
+
+/*
+ * ngtcp2_pkt_encode_datagram_frame encodes DATAGRAM frame |fr| into
+ * the buffer pointed by |out| of length |outlen|.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * Buffer does not have enough capacity to write a frame.
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_datagram_frame(uint8_t *out, size_t outlen,
+ const ngtcp2_datagram *fr);
+
+/*
+ * ngtcp2_pkt_adjust_pkt_num find the full 64 bits packet number for
+ * |pkt_num|, which is expected to be least significant |n| bits. The
+ * |max_pkt_num| is the highest successfully authenticated packet
+ * number.
+ */
+int64_t ngtcp2_pkt_adjust_pkt_num(int64_t max_pkt_num, int64_t pkt_num,
+ size_t n);
+
+/*
+ * ngtcp2_pkt_validate_ack checks that ack is malformed or not.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_ACK_FRAME
+ * ACK frame is malformed
+ */
+int ngtcp2_pkt_validate_ack(ngtcp2_ack *fr);
+
+/*
+ * ngtcp2_pkt_stream_max_datalen returns the maximum number of bytes
+ * which can be sent for stream denoted by |stream_id|. |offset| is
+ * an offset of within the stream. |len| is the estimated number of
+ * bytes to be sent. |left| is the size of buffer. If |left| is too
+ * small to write STREAM frame, this function returns (size_t)-1.
+ */
+size_t ngtcp2_pkt_stream_max_datalen(int64_t stream_id, uint64_t offset,
+ uint64_t len, size_t left);
+
+/*
+ * ngtcp2_pkt_crypto_max_datalen returns the maximum number of bytes
+ * which can be sent for crypto stream. |offset| is an offset of
+ * within the crypto stream. |len| is the estimated number of bytes
+ * to be sent. |left| is the size of buffer. If |left| is too small
+ * to write CRYPTO frame, this function returns (size_t)-1.
+ */
+size_t ngtcp2_pkt_crypto_max_datalen(uint64_t offset, size_t len, size_t left);
+
+/*
+ * ngtcp2_pkt_datagram_framelen returns the length of DATAGRAM frame
+ * to encode |len| bytes of data.
+ */
+size_t ngtcp2_pkt_datagram_framelen(size_t len);
+
+/*
+ * ngtcp2_pkt_verify_reserved_bits verifies that the first byte |c| of
+ * the packet header has the correct reserved bits.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_PROTO
+ * Reserved bits has wrong value.
+ */
+int ngtcp2_pkt_verify_reserved_bits(uint8_t c);
+
+/*
+ * ngtcp2_pkt_encode_pseudo_retry encodes Retry pseudo-packet in the
+ * buffer pointed by |dest| of length |destlen|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_BUF
+ * Buffer is too short.
+ */
+ngtcp2_ssize ngtcp2_pkt_encode_pseudo_retry(
+ uint8_t *dest, size_t destlen, const ngtcp2_pkt_hd *hd, uint8_t unused,
+ const ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen);
+
+/*
+ * ngtcp2_pkt_verify_retry_tag verifies Retry packet. The buffer
+ * pointed by |pkt| of length |pktlen| must contain Retry packet
+ * including packet header. The odcid and tag fields of |retry| must
+ * be specified. |aead| must be AEAD_AES_128_GCM.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_PROTO
+ * Verification failed.
+ */
+int ngtcp2_pkt_verify_retry_tag(uint32_t version, const ngtcp2_pkt_retry *retry,
+ const uint8_t *pkt, size_t pktlen,
+ ngtcp2_encrypt encrypt,
+ const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx);
+
+/*
+ * ngtcp2_pkt_versioned_type returns versioned packet type for a
+ * version |version| that corresponds to the version-independent
+ * |pkt_type|.
+ */
+uint8_t ngtcp2_pkt_versioned_type(uint32_t version, uint32_t pkt_type);
+
+/**
+ * @function
+ *
+ * `ngtcp2_pkt_get_type_long` returns the version-independent long
+ * packet type. |version| is the QUIC version. |c| is the first byte
+ * of Long packet header. If |version| is not supported by the
+ * library, it returns 0.
+ */
+uint8_t ngtcp2_pkt_get_type_long(uint32_t version, uint8_t c);
+
+#endif /* NGTCP2_PKT_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.c
new file mode 100644
index 0000000..771ef5e
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.c
@@ -0,0 +1,160 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2022 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_pmtud.h"
+
+#include <assert.h>
+
+#include "ngtcp2_mem.h"
+#include "ngtcp2_macro.h"
+
+/* NGTCP2_PMTUD_PROBE_NUM_MAX is the maximum number of packets sent
+ for each probe. */
+#define NGTCP2_PMTUD_PROBE_NUM_MAX 3
+
+static size_t mtu_probes[] = {
+ 1454 - 48, /* The well known MTU used by a domestic optic fiber
+ service in Japan. */
+ 1390 - 48, /* Typical Tunneled MTU */
+ 1280 - 48, /* IPv6 minimum MTU */
+ 1492 - 48, /* PPPoE */
+};
+
+#define NGTCP2_MTU_PROBESLEN ngtcp2_arraylen(mtu_probes)
+
+int ngtcp2_pmtud_new(ngtcp2_pmtud **ppmtud, size_t max_udp_payload_size,
+ size_t hard_max_udp_payload_size, int64_t tx_pkt_num,
+ const ngtcp2_mem *mem) {
+ ngtcp2_pmtud *pmtud = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_pmtud));
+
+ if (pmtud == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ pmtud->mem = mem;
+ pmtud->mtu_idx = 0;
+ pmtud->num_pkts_sent = 0;
+ pmtud->expiry = UINT64_MAX;
+ pmtud->tx_pkt_num = tx_pkt_num;
+ pmtud->max_udp_payload_size = max_udp_payload_size;
+ pmtud->hard_max_udp_payload_size = hard_max_udp_payload_size;
+ pmtud->min_fail_udp_payload_size = SIZE_MAX;
+
+ for (; pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN; ++pmtud->mtu_idx) {
+ if (mtu_probes[pmtud->mtu_idx] > pmtud->hard_max_udp_payload_size) {
+ continue;
+ }
+ if (mtu_probes[pmtud->mtu_idx] > pmtud->max_udp_payload_size) {
+ break;
+ }
+ }
+
+ *ppmtud = pmtud;
+
+ return 0;
+}
+
+void ngtcp2_pmtud_del(ngtcp2_pmtud *pmtud) {
+ if (!pmtud) {
+ return;
+ }
+
+ ngtcp2_mem_free(pmtud->mem, pmtud);
+}
+
+size_t ngtcp2_pmtud_probelen(ngtcp2_pmtud *pmtud) {
+ assert(pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN);
+
+ return mtu_probes[pmtud->mtu_idx];
+}
+
+void ngtcp2_pmtud_probe_sent(ngtcp2_pmtud *pmtud, ngtcp2_duration pto,
+ ngtcp2_tstamp ts) {
+ ngtcp2_tstamp timeout;
+
+ if (++pmtud->num_pkts_sent < NGTCP2_PMTUD_PROBE_NUM_MAX) {
+ timeout = pto;
+ } else {
+ timeout = 3 * pto;
+ }
+
+ pmtud->expiry = ts + timeout;
+}
+
+int ngtcp2_pmtud_require_probe(ngtcp2_pmtud *pmtud) {
+ return pmtud->expiry == UINT64_MAX;
+}
+
+static void pmtud_next_probe(ngtcp2_pmtud *pmtud) {
+ assert(pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN);
+
+ ++pmtud->mtu_idx;
+ pmtud->num_pkts_sent = 0;
+ pmtud->expiry = UINT64_MAX;
+
+ for (; pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN; ++pmtud->mtu_idx) {
+ if (mtu_probes[pmtud->mtu_idx] <= pmtud->max_udp_payload_size ||
+ mtu_probes[pmtud->mtu_idx] > pmtud->hard_max_udp_payload_size) {
+ continue;
+ }
+
+ if (mtu_probes[pmtud->mtu_idx] < pmtud->min_fail_udp_payload_size) {
+ break;
+ }
+ }
+}
+
+void ngtcp2_pmtud_probe_success(ngtcp2_pmtud *pmtud, size_t payloadlen) {
+ pmtud->max_udp_payload_size =
+ ngtcp2_max(pmtud->max_udp_payload_size, payloadlen);
+
+ assert(pmtud->mtu_idx < NGTCP2_MTU_PROBESLEN);
+
+ if (mtu_probes[pmtud->mtu_idx] > pmtud->max_udp_payload_size) {
+ return;
+ }
+
+ pmtud_next_probe(pmtud);
+}
+
+void ngtcp2_pmtud_handle_expiry(ngtcp2_pmtud *pmtud, ngtcp2_tstamp ts) {
+ if (ts < pmtud->expiry) {
+ return;
+ }
+
+ pmtud->expiry = UINT64_MAX;
+
+ if (pmtud->num_pkts_sent < NGTCP2_PMTUD_PROBE_NUM_MAX) {
+ return;
+ }
+
+ pmtud->min_fail_udp_payload_size =
+ ngtcp2_min(pmtud->min_fail_udp_payload_size, mtu_probes[pmtud->mtu_idx]);
+
+ pmtud_next_probe(pmtud);
+}
+
+int ngtcp2_pmtud_finished(ngtcp2_pmtud *pmtud) {
+ return pmtud->mtu_idx >= NGTCP2_MTU_PROBESLEN;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.h
new file mode 100644
index 0000000..6b2e691
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pmtud.h
@@ -0,0 +1,123 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2022 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_PMTUD_H
+#define NGTCP2_PMTUD_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+typedef struct ngtcp2_pmtud {
+ const ngtcp2_mem *mem;
+ /* mtu_idx is the index of UDP payload size candidates to try
+ out. */
+ size_t mtu_idx;
+ /* num_pkts_sent is the number of mtu_idx sized UDP datagram payload
+ sent */
+ size_t num_pkts_sent;
+ /* expiry is the expired, if it is reached, send out the next UDP
+ datagram. UINT64_MAX means no expiry, or expiration is canceled.
+ In either case, new probe packet should be sent unless we have
+ done all attempts. */
+ ngtcp2_tstamp expiry;
+ /* tx_pkt_num is the smallest outgoing packet number where the
+ current discovery is performed. In other words, acknowledging
+ packet whose packet number lower than that does not indicate the
+ success of Path MTU Discovery. */
+ int64_t tx_pkt_num;
+ /* max_udp_payload_size is the maximum UDP payload size which is
+ known to work. */
+ size_t max_udp_payload_size;
+ /* hard_max_udp_payload_size is the maximum UDP payload size that is
+ going to be probed. */
+ size_t hard_max_udp_payload_size;
+ /* min_fail_udp_payload_size is the minimum UDP payload size that is
+ known to fail. */
+ size_t min_fail_udp_payload_size;
+} ngtcp2_pmtud;
+
+/*
+ * ngtcp2_pmtud_new creates new ngtcp2_pmtud object, and assigns its
+ * pointer to |*ppmtud|. |max_udp_payload_size| is the maximum UDP
+ * payload size that is known to work for the current path.
+ * |tx_pkt_num| should be the next packet number to send, which is
+ * used to differentiate the PMTUD probe packet sent by the previous
+ * PMTUD. PMTUD might finish immediately if |max_udp_payload_size| is
+ * larger than or equal to all UDP payload probe candidates.
+ * Therefore, call ngtcp2_pmtud_finished to check this situation.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_pmtud_new(ngtcp2_pmtud **ppmtud, size_t max_udp_payload_size,
+ size_t hard_max_udp_payload_size, int64_t tx_pkt_num,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_pmtud_del deletes |pmtud|.
+ */
+void ngtcp2_pmtud_del(ngtcp2_pmtud *pmtud);
+
+/*
+ * ngtcp2_pmtud_probelen returns the length of UDP payload size for a
+ * PMTUD probe packet.
+ */
+size_t ngtcp2_pmtud_probelen(ngtcp2_pmtud *pmtud);
+
+/*
+ * ngtcp2_pmtud_probe_sent should be invoked when a PMTUD probe packet is
+ * sent.
+ */
+void ngtcp2_pmtud_probe_sent(ngtcp2_pmtud *pmtud, ngtcp2_duration pto,
+ ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_pmtud_require_probe returns nonzero if a PMTUD probe packet
+ * should be sent.
+ */
+int ngtcp2_pmtud_require_probe(ngtcp2_pmtud *pmtud);
+
+/*
+ * ngtcp2_pmtud_probe_success should be invoked when a PMTUD probe
+ * UDP datagram sized |payloadlen| is acknowledged.
+ */
+void ngtcp2_pmtud_probe_success(ngtcp2_pmtud *pmtud, size_t payloadlen);
+
+/*
+ * ngtcp2_pmtud_handle_expiry handles expiry.
+ */
+void ngtcp2_pmtud_handle_expiry(ngtcp2_pmtud *pmtud, ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_pmtud_finished returns nonzero if PMTUD has finished.
+ */
+int ngtcp2_pmtud_finished(ngtcp2_pmtud *pmtud);
+
+#endif /* NGTCP2_PMTUD_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.c
new file mode 100644
index 0000000..ffba131
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.c
@@ -0,0 +1,230 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_ppe.h"
+
+#include <string.h>
+#include <assert.h>
+
+#include "ngtcp2_str.h"
+#include "ngtcp2_conv.h"
+
+void ngtcp2_ppe_init(ngtcp2_ppe *ppe, uint8_t *out, size_t outlen,
+ ngtcp2_crypto_cc *cc) {
+ ngtcp2_buf_init(&ppe->buf, out, outlen);
+
+ ppe->hdlen = 0;
+ ppe->len_offset = 0;
+ ppe->pkt_num_offset = 0;
+ ppe->pkt_numlen = 0;
+ ppe->pkt_num = 0;
+ ppe->sample_offset = 0;
+ ppe->cc = cc;
+}
+
+int ngtcp2_ppe_encode_hd(ngtcp2_ppe *ppe, const ngtcp2_pkt_hd *hd) {
+ ngtcp2_ssize rv;
+ ngtcp2_buf *buf = &ppe->buf;
+ ngtcp2_crypto_cc *cc = ppe->cc;
+
+ if (ngtcp2_buf_left(buf) < cc->aead.max_overhead) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ if (hd->flags & NGTCP2_PKT_FLAG_LONG_FORM) {
+ ppe->len_offset = 1 + 4 + 1 + hd->dcid.datalen + 1 + hd->scid.datalen;
+ if (hd->type == NGTCP2_PKT_INITIAL) {
+ ppe->len_offset += ngtcp2_put_uvarintlen(hd->tokenlen) + hd->tokenlen;
+ }
+ ppe->pkt_num_offset = ppe->len_offset + NGTCP2_PKT_LENGTHLEN;
+ rv = ngtcp2_pkt_encode_hd_long(
+ buf->last, ngtcp2_buf_left(buf) - cc->aead.max_overhead, hd);
+ } else {
+ ppe->pkt_num_offset = 1 + hd->dcid.datalen;
+ rv = ngtcp2_pkt_encode_hd_short(
+ buf->last, ngtcp2_buf_left(buf) - cc->aead.max_overhead, hd);
+ }
+ if (rv < 0) {
+ return (int)rv;
+ }
+
+ ppe->sample_offset = ppe->pkt_num_offset + 4;
+
+ buf->last += rv;
+
+ ppe->pkt_numlen = hd->pkt_numlen;
+ ppe->hdlen = (size_t)rv;
+
+ ppe->pkt_num = hd->pkt_num;
+
+ return 0;
+}
+
+int ngtcp2_ppe_encode_frame(ngtcp2_ppe *ppe, ngtcp2_frame *fr) {
+ ngtcp2_ssize rv;
+ ngtcp2_buf *buf = &ppe->buf;
+ ngtcp2_crypto_cc *cc = ppe->cc;
+
+ if (ngtcp2_buf_left(buf) < cc->aead.max_overhead) {
+ return NGTCP2_ERR_NOBUF;
+ }
+
+ rv = ngtcp2_pkt_encode_frame(
+ buf->last, ngtcp2_buf_left(buf) - cc->aead.max_overhead, fr);
+ if (rv < 0) {
+ return (int)rv;
+ }
+
+ buf->last += rv;
+
+ return 0;
+}
+
+ngtcp2_ssize ngtcp2_ppe_final(ngtcp2_ppe *ppe, const uint8_t **ppkt) {
+ ngtcp2_buf *buf = &ppe->buf;
+ ngtcp2_crypto_cc *cc = ppe->cc;
+ uint8_t *payload = buf->begin + ppe->hdlen;
+ size_t payloadlen = ngtcp2_buf_len(buf) - ppe->hdlen;
+ uint8_t mask[NGTCP2_HP_SAMPLELEN];
+ uint8_t *p;
+ size_t i;
+ int rv;
+
+ assert(cc->encrypt);
+ assert(cc->hp_mask);
+
+ if (ppe->len_offset) {
+ ngtcp2_put_uvarint30(
+ buf->begin + ppe->len_offset,
+ (uint16_t)(payloadlen + ppe->pkt_numlen + cc->aead.max_overhead));
+ }
+
+ ngtcp2_crypto_create_nonce(ppe->nonce, cc->ckm->iv.base, cc->ckm->iv.len,
+ ppe->pkt_num);
+
+ rv = cc->encrypt(payload, &cc->aead, &cc->ckm->aead_ctx, payload, payloadlen,
+ ppe->nonce, cc->ckm->iv.len, buf->begin, ppe->hdlen);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ buf->last = payload + payloadlen + cc->aead.max_overhead;
+
+ /* TODO Check that we have enough space to get sample */
+ assert(ppe->sample_offset + NGTCP2_HP_SAMPLELEN <= ngtcp2_buf_len(buf));
+
+ rv = cc->hp_mask(mask, &cc->hp, &cc->hp_ctx, buf->begin + ppe->sample_offset);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+
+ p = buf->begin;
+ if (*p & NGTCP2_HEADER_FORM_BIT) {
+ *p = (uint8_t)(*p ^ (mask[0] & 0x0f));
+ } else {
+ *p = (uint8_t)(*p ^ (mask[0] & 0x1f));
+ }
+
+ p = buf->begin + ppe->pkt_num_offset;
+ for (i = 0; i < ppe->pkt_numlen; ++i) {
+ *(p + i) ^= mask[i + 1];
+ }
+
+ if (ppkt != NULL) {
+ *ppkt = buf->begin;
+ }
+
+ return (ngtcp2_ssize)ngtcp2_buf_len(buf);
+}
+
+size_t ngtcp2_ppe_left(ngtcp2_ppe *ppe) {
+ ngtcp2_crypto_cc *cc = ppe->cc;
+
+ if (ngtcp2_buf_left(&ppe->buf) < cc->aead.max_overhead) {
+ return 0;
+ }
+
+ return ngtcp2_buf_left(&ppe->buf) - cc->aead.max_overhead;
+}
+
+size_t ngtcp2_ppe_pktlen(ngtcp2_ppe *ppe) {
+ ngtcp2_crypto_cc *cc = ppe->cc;
+
+ return ngtcp2_buf_len(&ppe->buf) + cc->aead.max_overhead;
+}
+
+size_t ngtcp2_ppe_padding(ngtcp2_ppe *ppe) {
+ ngtcp2_crypto_cc *cc = ppe->cc;
+ ngtcp2_buf *buf = &ppe->buf;
+ size_t len;
+
+ assert(ngtcp2_buf_left(buf) >= cc->aead.max_overhead);
+
+ len = ngtcp2_buf_left(buf) - cc->aead.max_overhead;
+ memset(buf->last, 0, len);
+ buf->last += len;
+
+ return len;
+}
+
+size_t ngtcp2_ppe_padding_hp_sample(ngtcp2_ppe *ppe) {
+ ngtcp2_crypto_cc *cc = ppe->cc;
+ ngtcp2_buf *buf = &ppe->buf;
+ size_t max_samplelen;
+ size_t len = 0;
+
+ assert(cc->aead.max_overhead);
+
+ max_samplelen =
+ ngtcp2_buf_len(buf) + cc->aead.max_overhead - ppe->sample_offset;
+ if (max_samplelen < NGTCP2_HP_SAMPLELEN) {
+ len = NGTCP2_HP_SAMPLELEN - max_samplelen;
+ assert(ngtcp2_ppe_left(ppe) >= len);
+ memset(buf->last, 0, len);
+ buf->last += len;
+ }
+
+ return len;
+}
+
+size_t ngtcp2_ppe_padding_size(ngtcp2_ppe *ppe, size_t n) {
+ ngtcp2_crypto_cc *cc = ppe->cc;
+ ngtcp2_buf *buf = &ppe->buf;
+ size_t pktlen = ngtcp2_buf_len(buf) + cc->aead.max_overhead;
+ size_t len;
+
+ if (pktlen >= n) {
+ return 0;
+ }
+
+ len = n - pktlen;
+ buf->last = ngtcp2_setmem(buf->last, 0, len);
+
+ return len;
+}
+
+int ngtcp2_ppe_ensure_hp_sample(ngtcp2_ppe *ppe) {
+ ngtcp2_buf *buf = &ppe->buf;
+ return ngtcp2_buf_left(buf) >= (4 - ppe->pkt_numlen) + NGTCP2_HP_SAMPLELEN;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.h
new file mode 100644
index 0000000..bf220df
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ppe.h
@@ -0,0 +1,153 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_PPE_H
+#define NGTCP2_PPE_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_pkt.h"
+#include "ngtcp2_buf.h"
+#include "ngtcp2_crypto.h"
+
+/*
+ * ngtcp2_ppe is the Protected Packet Encoder.
+ */
+typedef struct ngtcp2_ppe {
+ ngtcp2_buf buf;
+ ngtcp2_crypto_cc *cc;
+ /* hdlen is the number of bytes for packet header written in buf. */
+ size_t hdlen;
+ /* len_offset is the offset to Length field. */
+ size_t len_offset;
+ /* pkt_num_offset is the offset to packet number field. */
+ size_t pkt_num_offset;
+ /* pkt_numlen is the number of bytes used to encode a packet
+ number */
+ size_t pkt_numlen;
+ /* sample_offset is the offset to sample for packet number
+ encryption. */
+ size_t sample_offset;
+ /* pkt_num is the packet number written in buf. */
+ int64_t pkt_num;
+ /* nonce is the buffer to store nonce. It should be equal or longer
+ than then length of IV. */
+ uint8_t nonce[32];
+} ngtcp2_ppe;
+
+/*
+ * ngtcp2_ppe_init initializes |ppe| with the given buffer.
+ */
+void ngtcp2_ppe_init(ngtcp2_ppe *ppe, uint8_t *out, size_t outlen,
+ ngtcp2_crypto_cc *cc);
+
+/*
+ * ngtcp2_ppe_encode_hd encodes |hd|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * The buffer is too small.
+ */
+int ngtcp2_ppe_encode_hd(ngtcp2_ppe *ppe, const ngtcp2_pkt_hd *hd);
+
+/*
+ * ngtcp2_ppe_encode_frame encodes |fr|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOBUF
+ * The buffer is too small.
+ */
+int ngtcp2_ppe_encode_frame(ngtcp2_ppe *ppe, ngtcp2_frame *fr);
+
+/*
+ * ngtcp2_ppe_final encrypts QUIC packet payload. If |**ppkt| is not
+ * NULL, the pointer to the packet is assigned to it.
+ *
+ * This function returns the length of QUIC packet, including header,
+ * and payload if it succeeds, or one of the following negative error
+ * codes:
+ *
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User-defined callback function failed.
+ */
+ngtcp2_ssize ngtcp2_ppe_final(ngtcp2_ppe *ppe, const uint8_t **ppkt);
+
+/*
+ * ngtcp2_ppe_left returns the number of bytes left to write
+ * additional frames. This does not count AEAD overhead.
+ */
+size_t ngtcp2_ppe_left(ngtcp2_ppe *ppe);
+
+/*
+ * ngtcp2_ppe_pktlen returns the provisional packet length. It
+ * includes AEAD overhead.
+ */
+size_t ngtcp2_ppe_pktlen(ngtcp2_ppe *ppe);
+
+/**
+ * @function
+ *
+ * `ngtcp2_ppe_padding` encodes PADDING frames to the end of the
+ * buffer. This function returns the number of bytes padded.
+ */
+size_t ngtcp2_ppe_padding(ngtcp2_ppe *ppe);
+
+/*
+ * ngtcp2_ppe_padding_hp_sample adds PADDING frame if the current
+ * payload does not have enough space for header protection sample.
+ * This function should be called just before calling
+ * ngtcp2_ppe_final().
+ *
+ * This function returns the number of bytes added as padding.
+ */
+size_t ngtcp2_ppe_padding_hp_sample(ngtcp2_ppe *ppe);
+
+/*
+ * ngtcp2_ppe_padding_size adds PADDING frame so that the size of QUIC
+ * packet is at least |n| bytes long. If it is unable to add PADDING
+ * in that way, this function still adds PADDING frame as much as
+ * possible. This function should be called just before calling
+ * ngtcp2_ppe_final(). For Short packet, this function should be
+ * called instead of ngtcp2_ppe_padding_hp_sample.
+ *
+ * This function returns the number of bytes added as padding.
+ */
+size_t ngtcp2_ppe_padding_size(ngtcp2_ppe *ppe, size_t n);
+
+/*
+ * ngtcp2_ppe_ensure_hp_sample returns nonzero if the buffer has
+ * enough space for header protection sample. This should be called
+ * right after packet header is written.
+ */
+int ngtcp2_ppe_ensure_hp_sample(ngtcp2_ppe *ppe);
+
+#endif /* NGTCP2_PPE_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.c
new file mode 100644
index 0000000..5e1003d
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.c
@@ -0,0 +1,164 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ * Copyright (c) 2012 nghttp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_pq.h"
+
+#include <assert.h>
+
+#include "ngtcp2_macro.h"
+
+void ngtcp2_pq_init(ngtcp2_pq *pq, ngtcp2_less less, const ngtcp2_mem *mem) {
+ pq->mem = mem;
+ pq->capacity = 0;
+ pq->q = NULL;
+ pq->length = 0;
+ pq->less = less;
+}
+
+void ngtcp2_pq_free(ngtcp2_pq *pq) {
+ ngtcp2_mem_free(pq->mem, pq->q);
+ pq->q = NULL;
+}
+
+static void swap(ngtcp2_pq *pq, size_t i, size_t j) {
+ ngtcp2_pq_entry *a = pq->q[i];
+ ngtcp2_pq_entry *b = pq->q[j];
+
+ pq->q[i] = b;
+ b->index = i;
+ pq->q[j] = a;
+ a->index = j;
+}
+
+static void bubble_up(ngtcp2_pq *pq, size_t index) {
+ size_t parent;
+ while (index != 0) {
+ parent = (index - 1) / 2;
+ if (!pq->less(pq->q[index], pq->q[parent])) {
+ return;
+ }
+ swap(pq, parent, index);
+ index = parent;
+ }
+}
+
+int ngtcp2_pq_push(ngtcp2_pq *pq, ngtcp2_pq_entry *item) {
+ if (pq->capacity <= pq->length) {
+ void *nq;
+ size_t ncapacity;
+
+ ncapacity = ngtcp2_max(4, (pq->capacity * 2));
+
+ nq = ngtcp2_mem_realloc(pq->mem, pq->q,
+ ncapacity * sizeof(ngtcp2_pq_entry *));
+ if (nq == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+ pq->capacity = ncapacity;
+ pq->q = nq;
+ }
+ pq->q[pq->length] = item;
+ item->index = pq->length;
+ ++pq->length;
+ bubble_up(pq, pq->length - 1);
+ return 0;
+}
+
+ngtcp2_pq_entry *ngtcp2_pq_top(ngtcp2_pq *pq) {
+ assert(pq->length);
+ return pq->q[0];
+}
+
+static void bubble_down(ngtcp2_pq *pq, size_t index) {
+ size_t i, j, minindex;
+ for (;;) {
+ j = index * 2 + 1;
+ minindex = index;
+ for (i = 0; i < 2; ++i, ++j) {
+ if (j >= pq->length) {
+ break;
+ }
+ if (pq->less(pq->q[j], pq->q[minindex])) {
+ minindex = j;
+ }
+ }
+ if (minindex == index) {
+ return;
+ }
+ swap(pq, index, minindex);
+ index = minindex;
+ }
+}
+
+void ngtcp2_pq_pop(ngtcp2_pq *pq) {
+ if (pq->length > 0) {
+ pq->q[0] = pq->q[pq->length - 1];
+ pq->q[0]->index = 0;
+ --pq->length;
+ bubble_down(pq, 0);
+ }
+}
+
+void ngtcp2_pq_remove(ngtcp2_pq *pq, ngtcp2_pq_entry *item) {
+ assert(pq->q[item->index] == item);
+
+ if (item->index == 0) {
+ ngtcp2_pq_pop(pq);
+ return;
+ }
+
+ if (item->index == pq->length - 1) {
+ --pq->length;
+ return;
+ }
+
+ pq->q[item->index] = pq->q[pq->length - 1];
+ pq->q[item->index]->index = item->index;
+ --pq->length;
+
+ if (pq->less(item, pq->q[item->index])) {
+ bubble_down(pq, item->index);
+ } else {
+ bubble_up(pq, item->index);
+ }
+}
+
+int ngtcp2_pq_empty(ngtcp2_pq *pq) { return pq->length == 0; }
+
+size_t ngtcp2_pq_size(ngtcp2_pq *pq) { return pq->length; }
+
+int ngtcp2_pq_each(ngtcp2_pq *pq, ngtcp2_pq_item_cb fun, void *arg) {
+ size_t i;
+
+ if (pq->length == 0) {
+ return 0;
+ }
+ for (i = 0; i < pq->length; ++i) {
+ if ((*fun)(pq->q[i], arg)) {
+ return 1;
+ }
+ }
+ return 0;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.h
new file mode 100644
index 0000000..720c309
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pq.h
@@ -0,0 +1,126 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ * Copyright (c) 2012 nghttp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_PQ_H
+#define NGTCP2_PQ_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_mem.h"
+
+/* Implementation of priority queue */
+
+/* NGTCP2_PQ_BAD_INDEX is the priority queue index which indicates
+ that an entry is not queued. Assigning this value to
+ ngtcp2_pq_entry.index can check that the entry is queued or not. */
+#define NGTCP2_PQ_BAD_INDEX SIZE_MAX
+
+typedef struct ngtcp2_pq_entry {
+ size_t index;
+} ngtcp2_pq_entry;
+
+/* "less" function, return nonzero if |lhs| is less than |rhs|. */
+typedef int (*ngtcp2_less)(const ngtcp2_pq_entry *lhs,
+ const ngtcp2_pq_entry *rhs);
+
+typedef struct ngtcp2_pq {
+ /* The pointer to the pointer to the item stored */
+ ngtcp2_pq_entry **q;
+ /* Memory allocator */
+ const ngtcp2_mem *mem;
+ /* The number of items stored */
+ size_t length;
+ /* The maximum number of items this pq can store. This is
+ automatically extended when length is reached to this value. */
+ size_t capacity;
+ /* The less function between items */
+ ngtcp2_less less;
+} ngtcp2_pq;
+
+/*
+ * Initializes priority queue |pq| with compare function |cmp|.
+ */
+void ngtcp2_pq_init(ngtcp2_pq *pq, ngtcp2_less less, const ngtcp2_mem *mem);
+
+/*
+ * Deallocates any resources allocated for |pq|. The stored items are
+ * not freed by this function.
+ */
+void ngtcp2_pq_free(ngtcp2_pq *pq);
+
+/*
+ * Adds |item| to the priority queue |pq|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_pq_push(ngtcp2_pq *pq, ngtcp2_pq_entry *item);
+
+/*
+ * Returns item at the top of the queue |pq|. It is undefined if the
+ * queue is empty.
+ */
+ngtcp2_pq_entry *ngtcp2_pq_top(ngtcp2_pq *pq);
+
+/*
+ * Pops item at the top of the queue |pq|. The popped item is not
+ * freed by this function.
+ */
+void ngtcp2_pq_pop(ngtcp2_pq *pq);
+
+/*
+ * Returns nonzero if the queue |pq| is empty.
+ */
+int ngtcp2_pq_empty(ngtcp2_pq *pq);
+
+/*
+ * Returns the number of items in the queue |pq|.
+ */
+size_t ngtcp2_pq_size(ngtcp2_pq *pq);
+
+typedef int (*ngtcp2_pq_item_cb)(ngtcp2_pq_entry *item, void *arg);
+
+/*
+ * Applys |fun| to each item in |pq|. The |arg| is passed as arg
+ * parameter to callback function. This function must not change the
+ * ordering key. If the return value from callback is nonzero, this
+ * function returns 1 immediately without iterating remaining items.
+ * Otherwise this function returns 0.
+ */
+int ngtcp2_pq_each(ngtcp2_pq *pq, ngtcp2_pq_item_cb fun, void *arg);
+
+/*
+ * Removes |item| from priority queue.
+ */
+void ngtcp2_pq_remove(ngtcp2_pq *pq, ngtcp2_pq_entry *item);
+
+#endif /* NGTCP2_PQ_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.c
new file mode 100644
index 0000000..314e005
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.c
@@ -0,0 +1,172 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_pv.h"
+
+#include <string.h>
+#include <assert.h>
+
+#include "ngtcp2_mem.h"
+#include "ngtcp2_log.h"
+#include "ngtcp2_macro.h"
+#include "ngtcp2_addr.h"
+
+void ngtcp2_pv_entry_init(ngtcp2_pv_entry *pvent, const uint8_t *data,
+ ngtcp2_tstamp expiry, uint8_t flags) {
+ memcpy(pvent->data, data, sizeof(pvent->data));
+ pvent->expiry = expiry;
+ pvent->flags = flags;
+}
+
+int ngtcp2_pv_new(ngtcp2_pv **ppv, const ngtcp2_dcid *dcid,
+ ngtcp2_duration timeout, uint8_t flags, ngtcp2_log *log,
+ const ngtcp2_mem *mem) {
+ (*ppv) = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_pv));
+ if (*ppv == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ ngtcp2_static_ringbuf_pv_ents_init(&(*ppv)->ents);
+
+ ngtcp2_dcid_copy(&(*ppv)->dcid, dcid);
+
+ (*ppv)->mem = mem;
+ (*ppv)->log = log;
+ (*ppv)->timeout = timeout;
+ (*ppv)->fallback_pto = 0;
+ (*ppv)->started_ts = UINT64_MAX;
+ (*ppv)->probe_pkt_left = NGTCP2_PV_NUM_PROBE_PKT;
+ (*ppv)->round = 0;
+ (*ppv)->flags = flags;
+
+ return 0;
+}
+
+void ngtcp2_pv_del(ngtcp2_pv *pv) {
+ if (pv == NULL) {
+ return;
+ }
+ ngtcp2_mem_free(pv->mem, pv);
+}
+
+void ngtcp2_pv_add_entry(ngtcp2_pv *pv, const uint8_t *data,
+ ngtcp2_tstamp expiry, uint8_t flags,
+ ngtcp2_tstamp ts) {
+ ngtcp2_pv_entry *ent;
+
+ assert(pv->probe_pkt_left);
+
+ if (ngtcp2_ringbuf_len(&pv->ents.rb) == 0) {
+ pv->started_ts = ts;
+ }
+
+ ent = ngtcp2_ringbuf_push_back(&pv->ents.rb);
+ ngtcp2_pv_entry_init(ent, data, expiry, flags);
+
+ pv->flags &= (uint8_t)~NGTCP2_PV_FLAG_CANCEL_TIMER;
+ --pv->probe_pkt_left;
+}
+
+int ngtcp2_pv_validate(ngtcp2_pv *pv, uint8_t *pflags, const uint8_t *data) {
+ size_t len = ngtcp2_ringbuf_len(&pv->ents.rb);
+ size_t i;
+ ngtcp2_pv_entry *ent;
+
+ if (len == 0) {
+ return NGTCP2_ERR_INVALID_STATE;
+ }
+
+ for (i = 0; i < len; ++i) {
+ ent = ngtcp2_ringbuf_get(&pv->ents.rb, i);
+ if (memcmp(ent->data, data, sizeof(ent->data)) == 0) {
+ *pflags = ent->flags;
+ ngtcp2_log_info(pv->log, NGTCP2_LOG_EVENT_PTV, "path has been validated");
+ return 0;
+ }
+ }
+
+ return NGTCP2_ERR_INVALID_ARGUMENT;
+}
+
+void ngtcp2_pv_handle_entry_expiry(ngtcp2_pv *pv, ngtcp2_tstamp ts) {
+ ngtcp2_pv_entry *ent;
+
+ if (ngtcp2_ringbuf_len(&pv->ents.rb) == 0) {
+ return;
+ }
+
+ ent = ngtcp2_ringbuf_get(&pv->ents.rb, ngtcp2_ringbuf_len(&pv->ents.rb) - 1);
+
+ if (ent->expiry > ts) {
+ return;
+ }
+
+ ++pv->round;
+ pv->probe_pkt_left = NGTCP2_PV_NUM_PROBE_PKT;
+}
+
+int ngtcp2_pv_should_send_probe(ngtcp2_pv *pv) {
+ return pv->probe_pkt_left > 0;
+}
+
+int ngtcp2_pv_validation_timed_out(ngtcp2_pv *pv, ngtcp2_tstamp ts) {
+ ngtcp2_tstamp t;
+ ngtcp2_pv_entry *ent;
+
+ if (pv->started_ts == UINT64_MAX) {
+ return 0;
+ }
+
+ assert(ngtcp2_ringbuf_len(&pv->ents.rb));
+
+ ent = ngtcp2_ringbuf_get(&pv->ents.rb, ngtcp2_ringbuf_len(&pv->ents.rb) - 1);
+
+ t = pv->started_ts + pv->timeout;
+ t = ngtcp2_max(t, ent->expiry);
+
+ return t <= ts;
+}
+
+ngtcp2_tstamp ngtcp2_pv_next_expiry(ngtcp2_pv *pv) {
+ ngtcp2_pv_entry *ent;
+
+ if ((pv->flags & NGTCP2_PV_FLAG_CANCEL_TIMER) ||
+ ngtcp2_ringbuf_len(&pv->ents.rb) == 0) {
+ return UINT64_MAX;
+ }
+
+ ent = ngtcp2_ringbuf_get(&pv->ents.rb, ngtcp2_ringbuf_len(&pv->ents.rb) - 1);
+
+ return ent->expiry;
+}
+
+void ngtcp2_pv_cancel_expired_timer(ngtcp2_pv *pv, ngtcp2_tstamp ts) {
+ ngtcp2_tstamp expiry = ngtcp2_pv_next_expiry(pv);
+
+ if (expiry > ts) {
+ return;
+ }
+
+ pv->flags |= NGTCP2_PV_FLAG_CANCEL_TIMER;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.h
new file mode 100644
index 0000000..293cbca
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_pv.h
@@ -0,0 +1,198 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_PV_H
+#define NGTCP2_PV_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_cid.h"
+#include "ngtcp2_ringbuf.h"
+
+/* NGTCP2_PV_MAX_ENTRIES is the maximum number of entries that
+ ngtcp2_pv can contain. It must be power of 2. */
+#define NGTCP2_PV_MAX_ENTRIES 8
+/* NGTCP2_PV_NUM_PROBE_PKT is the number of probe packets containing
+ PATH_CHALLENGE sent at a time. */
+#define NGTCP2_PV_NUM_PROBE_PKT 2
+
+typedef struct ngtcp2_log ngtcp2_log;
+
+typedef struct ngtcp2_frame_chain ngtcp2_frame_chain;
+
+/* NGTCP2_PV_ENTRY_FLAG_NONE indicates that no flag is set. */
+#define NGTCP2_PV_ENTRY_FLAG_NONE 0x00u
+/* NGTCP2_PV_ENTRY_FLAG_UNDERSIZED indicates that UDP datagram which
+ contains PATH_CHALLENGE is undersized (< 1200 bytes) */
+#define NGTCP2_PV_ENTRY_FLAG_UNDERSIZED 0x01u
+
+typedef struct ngtcp2_pv_entry {
+ /* expiry is the timestamp when this PATH_CHALLENGE expires. */
+ ngtcp2_tstamp expiry;
+ /* flags is zero or more of NGTCP2_PV_ENTRY_FLAG_*. */
+ uint8_t flags;
+ /* data is a byte string included in PATH_CHALLENGE. */
+ uint8_t data[8];
+} ngtcp2_pv_entry;
+
+void ngtcp2_pv_entry_init(ngtcp2_pv_entry *pvent, const uint8_t *data,
+ ngtcp2_tstamp expiry, uint8_t flags);
+
+/* NGTCP2_PV_FLAG_NONE indicates no flag is set. */
+#define NGTCP2_PV_FLAG_NONE 0x00u
+/* NGTCP2_PV_FLAG_DONT_CARE indicates that the outcome of path
+ validation should be ignored entirely. */
+#define NGTCP2_PV_FLAG_DONT_CARE 0x01u
+/* NGTCP2_PV_FLAG_CANCEL_TIMER indicates that the expiry timer is
+ cancelled. */
+#define NGTCP2_PV_FLAG_CANCEL_TIMER 0x02u
+/* NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE indicates that fallback DCID is
+ available in ngtcp2_pv. If path validation fails, fallback to the
+ fallback DCID. If path validation succeeds, fallback DCID is
+ retired if it does not equal to the current DCID. */
+#define NGTCP2_PV_FLAG_FALLBACK_ON_FAILURE 0x04u
+/* NGTCP2_PV_FLAG_MTU_PROBE indicates that a validation must probe
+ least MTU that QUIC requires, which is 1200 bytes. If it fails, a
+ path is not viable. */
+#define NGTCP2_PV_FLAG_MTU_PROBE 0x08u
+/* NGTCP2_PV_FLAG_PREFERRED_ADDR indicates that client is migrating to
+ server's preferred address. This flag is only used by client. */
+#define NGTCP2_PV_FLAG_PREFERRED_ADDR 0x10u
+
+typedef struct ngtcp2_pv ngtcp2_pv;
+
+ngtcp2_static_ringbuf_def(pv_ents, NGTCP2_PV_MAX_ENTRIES,
+ sizeof(ngtcp2_pv_entry));
+/*
+ * ngtcp2_pv is the context of a single path validation.
+ */
+struct ngtcp2_pv {
+ const ngtcp2_mem *mem;
+ ngtcp2_log *log;
+ /* dcid is DCID and path this path validation uses. */
+ ngtcp2_dcid dcid;
+ /* fallback_dcid is the usually validated DCID and used as a
+ fallback if this path validation fails. */
+ ngtcp2_dcid fallback_dcid;
+ /* ents is the ring buffer of ngtcp2_pv_entry */
+ ngtcp2_static_ringbuf_pv_ents ents;
+ /* timeout is the duration within which this path validation should
+ succeed. */
+ ngtcp2_duration timeout;
+ /* fallback_pto is PTO of fallback connection. */
+ ngtcp2_duration fallback_pto;
+ /* started_ts is the timestamp this path validation starts. */
+ ngtcp2_tstamp started_ts;
+ /* round is the number of times that probe_pkt_left is reset. */
+ size_t round;
+ /* probe_pkt_left is the number of probe packets containing
+ PATH_CHALLENGE which can be send without waiting for an
+ expiration of a previous flight. */
+ size_t probe_pkt_left;
+ /* flags is bitwise-OR of zero or more of NGTCP2_PV_FLAG_*. */
+ uint8_t flags;
+};
+
+/*
+ * ngtcp2_pv_new creates new ngtcp2_pv object and assigns its pointer
+ * to |*ppv|. This function makes a copy of |dcid|. |timeout| is a
+ * duration within which this path validation must succeed.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+int ngtcp2_pv_new(ngtcp2_pv **ppv, const ngtcp2_dcid *dcid,
+ ngtcp2_duration timeout, uint8_t flags, ngtcp2_log *log,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_pv_del deallocates |pv|. This function frees memory |pv|
+ * points too.
+ */
+void ngtcp2_pv_del(ngtcp2_pv *pv);
+
+/*
+ * ngtcp2_pv_add_entry adds new entry with |data|. |expiry| is the
+ * expiry time of the entry.
+ */
+void ngtcp2_pv_add_entry(ngtcp2_pv *pv, const uint8_t *data,
+ ngtcp2_tstamp expiry, uint8_t flags, ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_pv_full returns nonzero if |pv| is full of ngtcp2_pv_entry.
+ */
+int ngtcp2_pv_full(ngtcp2_pv *pv);
+
+/*
+ * ngtcp2_pv_validate validates that the received |data| matches the
+ * one of the existing entry. The flag of ngtcp2_pv_entry that
+ * matches |data| is assigned to |*pflags| if this function succeeds.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_PATH_VALIDATION_FAILED
+ * path validation has failed and must be abandoned
+ * NGTCP2_ERR_INVALID_STATE
+ * |pv| includes no entry
+ * NGTCP2_ERR_INVALID_ARGUMENT
+ * |pv| does not have an entry which has |data| and |path|
+ */
+int ngtcp2_pv_validate(ngtcp2_pv *pv, uint8_t *pflags, const uint8_t *data);
+
+/*
+ * ngtcp2_pv_handle_entry_expiry checks expiry of existing entries.
+ */
+void ngtcp2_pv_handle_entry_expiry(ngtcp2_pv *pv, ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_pv_should_send_probe returns nonzero if new entry can be
+ * added by ngtcp2_pv_add_entry.
+ */
+int ngtcp2_pv_should_send_probe(ngtcp2_pv *pv);
+
+/*
+ * ngtcp2_pv_validation_timed_out returns nonzero if the path
+ * validation fails because of timeout.
+ */
+int ngtcp2_pv_validation_timed_out(ngtcp2_pv *pv, ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_pv_next_expiry returns the earliest expiry.
+ */
+ngtcp2_tstamp ngtcp2_pv_next_expiry(ngtcp2_pv *pv);
+
+/*
+ * ngtcp2_pv_cancel_expired_timer cancels the expired timer.
+ */
+void ngtcp2_pv_cancel_expired_timer(ngtcp2_pv *pv, ngtcp2_tstamp ts);
+
+#endif /* NGTCP2_PV_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.c
new file mode 100644
index 0000000..5107f44
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.c
@@ -0,0 +1,1218 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_qlog.h"
+
+#include <assert.h>
+
+#include "ngtcp2_str.h"
+#include "ngtcp2_vec.h"
+#include "ngtcp2_conv.h"
+#include "ngtcp2_net.h"
+#include "ngtcp2_unreachable.h"
+
+void ngtcp2_qlog_init(ngtcp2_qlog *qlog, ngtcp2_qlog_write write,
+ ngtcp2_tstamp ts, void *user_data) {
+ qlog->write = write;
+ qlog->ts = qlog->last_ts = ts;
+ qlog->user_data = user_data;
+}
+
+#define write_verbatim(DEST, S) ngtcp2_cpymem((DEST), (S), sizeof(S) - 1)
+
+static uint8_t *write_string_impl(uint8_t *p, const uint8_t *data,
+ size_t datalen) {
+ *p++ = '"';
+ if (datalen) {
+ p = ngtcp2_cpymem(p, data, datalen);
+ }
+ *p++ = '"';
+ return p;
+}
+
+#define write_string(DEST, S) \
+ write_string_impl((DEST), (const uint8_t *)(S), sizeof(S) - 1)
+
+#define NGTCP2_LOWER_XDIGITS "0123456789abcdef"
+
+static uint8_t *write_hex(uint8_t *p, const uint8_t *data, size_t datalen) {
+ const uint8_t *b = data, *end = data + datalen;
+ *p++ = '"';
+ for (; b != end; ++b) {
+ *p++ = (uint8_t)NGTCP2_LOWER_XDIGITS[*b >> 4];
+ *p++ = (uint8_t)NGTCP2_LOWER_XDIGITS[*b & 0xf];
+ }
+ *p++ = '"';
+ return p;
+}
+
+static uint8_t *write_cid(uint8_t *p, const ngtcp2_cid *cid) {
+ return write_hex(p, cid->data, cid->datalen);
+}
+
+static uint8_t *write_number(uint8_t *p, uint64_t n) {
+ size_t nlen = 0;
+ uint64_t t;
+ uint8_t *res;
+
+ if (n == 0) {
+ *p++ = '0';
+ return p;
+ }
+ for (t = n; t; t /= 10, ++nlen)
+ ;
+ p += nlen;
+ res = p;
+ for (; n; n /= 10) {
+ *--p = (uint8_t)((n % 10) + '0');
+ }
+ return res;
+}
+
+static uint8_t *write_tstamp(uint8_t *p, ngtcp2_tstamp ts) {
+ return write_number(p, ts / NGTCP2_MILLISECONDS);
+}
+
+static uint8_t *write_duration(uint8_t *p, ngtcp2_duration duration) {
+ return write_number(p, duration / NGTCP2_MILLISECONDS);
+}
+
+static uint8_t *write_bool(uint8_t *p, int b) {
+ if (b) {
+ return ngtcp2_cpymem(p, "true", sizeof("true") - 1);
+ }
+ return ngtcp2_cpymem(p, "false", sizeof("false") - 1);
+}
+
+static uint8_t *write_pair_impl(uint8_t *p, const uint8_t *name, size_t namelen,
+ const ngtcp2_vec *value) {
+ p = write_string_impl(p, name, namelen);
+ *p++ = ':';
+ return write_string_impl(p, value->base, value->len);
+}
+
+#define write_pair(DEST, NAME, VALUE) \
+ write_pair_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, (VALUE))
+
+static uint8_t *write_pair_hex_impl(uint8_t *p, const uint8_t *name,
+ size_t namelen, const uint8_t *value,
+ size_t valuelen) {
+ p = write_string_impl(p, name, namelen);
+ *p++ = ':';
+ return write_hex(p, value, valuelen);
+}
+
+#define write_pair_hex(DEST, NAME, VALUE, VALUELEN) \
+ write_pair_hex_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, \
+ (VALUE), (VALUELEN))
+
+static uint8_t *write_pair_number_impl(uint8_t *p, const uint8_t *name,
+ size_t namelen, uint64_t value) {
+ p = write_string_impl(p, name, namelen);
+ *p++ = ':';
+ return write_number(p, value);
+}
+
+#define write_pair_number(DEST, NAME, VALUE) \
+ write_pair_number_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, \
+ (VALUE))
+
+static uint8_t *write_pair_duration_impl(uint8_t *p, const uint8_t *name,
+ size_t namelen,
+ ngtcp2_duration duration) {
+ p = write_string_impl(p, name, namelen);
+ *p++ = ':';
+ return write_duration(p, duration);
+}
+
+#define write_pair_duration(DEST, NAME, VALUE) \
+ write_pair_duration_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, \
+ (VALUE))
+
+static uint8_t *write_pair_tstamp_impl(uint8_t *p, const uint8_t *name,
+ size_t namelen, ngtcp2_tstamp ts) {
+ p = write_string_impl(p, name, namelen);
+ *p++ = ':';
+ return write_tstamp(p, ts);
+}
+
+#define write_pair_tstamp(DEST, NAME, VALUE) \
+ write_pair_tstamp_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, \
+ (VALUE))
+
+static uint8_t *write_pair_bool_impl(uint8_t *p, const uint8_t *name,
+ size_t namelen, int b) {
+ p = write_string_impl(p, name, namelen);
+ *p++ = ':';
+ return write_bool(p, b);
+}
+
+#define write_pair_bool(DEST, NAME, VALUE) \
+ write_pair_bool_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, \
+ (VALUE))
+
+static uint8_t *write_pair_cid_impl(uint8_t *p, const uint8_t *name,
+ size_t namelen, const ngtcp2_cid *cid) {
+ p = write_string_impl(p, name, namelen);
+ *p++ = ':';
+ return write_cid(p, cid);
+}
+
+#define write_pair_cid(DEST, NAME, VALUE) \
+ write_pair_cid_impl((DEST), (const uint8_t *)(NAME), sizeof(NAME) - 1, \
+ (VALUE))
+
+#define ngtcp2_make_vec_lit(S) \
+ { (uint8_t *)(S), sizeof((S)) - 1 }
+
+static uint8_t *write_common_fields(uint8_t *p, const ngtcp2_cid *odcid) {
+ p = write_verbatim(
+ p, "\"common_fields\":{\"protocol_type\":[\"QUIC\"],\"time_format\":"
+ "\"relative\",\"reference_time\":0,\"group_id\":");
+ p = write_cid(p, odcid);
+ *p++ = '}';
+ return p;
+}
+
+static uint8_t *write_trace(uint8_t *p, int server, const ngtcp2_cid *odcid) {
+ p = write_verbatim(
+ p, "\"trace\":{\"vantage_point\":{\"name\":\"ngtcp2\",\"type\":");
+ if (server) {
+ p = write_string(p, "server");
+ } else {
+ p = write_string(p, "client");
+ }
+ p = write_verbatim(p, "},");
+ p = write_common_fields(p, odcid);
+ *p++ = '}';
+ return p;
+}
+
+void ngtcp2_qlog_start(ngtcp2_qlog *qlog, const ngtcp2_cid *odcid, int server) {
+ uint8_t buf[1024];
+ uint8_t *p = buf;
+
+ if (!qlog->write) {
+ return;
+ }
+
+ p = write_verbatim(
+ p, "\x1e{\"qlog_format\":\"JSON-SEQ\",\"qlog_version\":\"0.3\",");
+ p = write_trace(p, server, odcid);
+ p = write_verbatim(p, "}\n");
+
+ qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, buf,
+ (size_t)(p - buf));
+}
+
+void ngtcp2_qlog_end(ngtcp2_qlog *qlog) {
+ uint8_t buf[1] = {0};
+
+ if (!qlog->write) {
+ return;
+ }
+
+ qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_FIN, &buf, 0);
+}
+
+static ngtcp2_vec vec_pkt_type_initial = ngtcp2_make_vec_lit("initial");
+static ngtcp2_vec vec_pkt_type_handshake = ngtcp2_make_vec_lit("handshake");
+static ngtcp2_vec vec_pkt_type_0rtt = ngtcp2_make_vec_lit("0RTT");
+static ngtcp2_vec vec_pkt_type_1rtt = ngtcp2_make_vec_lit("1RTT");
+static ngtcp2_vec vec_pkt_type_retry = ngtcp2_make_vec_lit("retry");
+static ngtcp2_vec vec_pkt_type_version_negotiation =
+ ngtcp2_make_vec_lit("version_negotiation");
+static ngtcp2_vec vec_pkt_type_stateless_reset =
+ ngtcp2_make_vec_lit("stateless_reset");
+static ngtcp2_vec vec_pkt_type_unknown = ngtcp2_make_vec_lit("unknown");
+
+static const ngtcp2_vec *qlog_pkt_type(const ngtcp2_pkt_hd *hd) {
+ if (hd->flags & NGTCP2_PKT_FLAG_LONG_FORM) {
+ switch (hd->type) {
+ case NGTCP2_PKT_INITIAL:
+ return &vec_pkt_type_initial;
+ case NGTCP2_PKT_HANDSHAKE:
+ return &vec_pkt_type_handshake;
+ case NGTCP2_PKT_0RTT:
+ return &vec_pkt_type_0rtt;
+ case NGTCP2_PKT_RETRY:
+ return &vec_pkt_type_retry;
+ default:
+ return &vec_pkt_type_unknown;
+ }
+ }
+
+ switch (hd->type) {
+ case NGTCP2_PKT_VERSION_NEGOTIATION:
+ return &vec_pkt_type_version_negotiation;
+ case NGTCP2_PKT_STATELESS_RESET:
+ return &vec_pkt_type_stateless_reset;
+ case NGTCP2_PKT_1RTT:
+ return &vec_pkt_type_1rtt;
+ default:
+ return &vec_pkt_type_unknown;
+ }
+}
+
+static uint8_t *write_pkt_hd(uint8_t *p, const ngtcp2_pkt_hd *hd) {
+ /*
+ * {"packet_type":"version_negotiation","packet_number":"0000000000000000000","token":{"data":""}}
+ */
+#define NGTCP2_QLOG_PKT_HD_OVERHEAD 95
+
+ *p++ = '{';
+ p = write_pair(p, "packet_type", qlog_pkt_type(hd));
+ *p++ = ',';
+ p = write_pair_number(p, "packet_number", (uint64_t)hd->pkt_num);
+ if (hd->type == NGTCP2_PKT_INITIAL && hd->tokenlen) {
+ p = write_verbatim(p, ",\"token\":{");
+ p = write_pair_hex(p, "data", hd->token, hd->tokenlen);
+ *p++ = '}';
+ }
+ /* TODO Write DCIL and DCID */
+ /* TODO Write SCIL and SCID */
+ *p++ = '}';
+ return p;
+}
+
+static uint8_t *write_padding_frame(uint8_t *p, const ngtcp2_padding *fr) {
+ (void)fr;
+
+ /* {"frame_type":"padding"} */
+#define NGTCP2_QLOG_PADDING_FRAME_OVERHEAD 24
+
+ return write_verbatim(p, "{\"frame_type\":\"padding\"}");
+}
+
+static uint8_t *write_ping_frame(uint8_t *p, const ngtcp2_ping *fr) {
+ (void)fr;
+
+ /* {"frame_type":"ping"} */
+#define NGTCP2_QLOG_PING_FRAME_OVERHEAD 21
+
+ return write_verbatim(p, "{\"frame_type\":\"ping\"}");
+}
+
+static uint8_t *write_ack_frame(uint8_t *p, const ngtcp2_ack *fr) {
+ int64_t largest_ack, min_ack;
+ size_t i;
+ const ngtcp2_ack_range *range;
+
+ /*
+ * {"frame_type":"ack","ack_delay":0000000000000000000,"acked_ranges":[]}
+ *
+ * each range:
+ * [0000000000000000000,0000000000000000000],
+ *
+ * ecn:
+ * ,"ect1":0000000000000000000,"ect0":0000000000000000000,"ce":0000000000000000000
+ */
+#define NGTCP2_QLOG_ACK_FRAME_BASE_OVERHEAD 70
+#define NGTCP2_QLOG_ACK_FRAME_RANGE_OVERHEAD 42
+#define NGTCP2_QLOG_ACK_FRAME_ECN_OVERHEAD 79
+
+ p = write_verbatim(p, "{\"frame_type\":\"ack\",");
+ p = write_pair_duration(p, "ack_delay", fr->ack_delay_unscaled);
+ p = write_verbatim(p, ",\"acked_ranges\":[");
+
+ largest_ack = fr->largest_ack;
+ min_ack = fr->largest_ack - (int64_t)fr->first_ack_range;
+
+ *p++ = '[';
+ p = write_number(p, (uint64_t)min_ack);
+ if (largest_ack != min_ack) {
+ *p++ = ',';
+ p = write_number(p, (uint64_t)largest_ack);
+ }
+ *p++ = ']';
+
+ for (i = 0; i < fr->rangecnt; ++i) {
+ range = &fr->ranges[i];
+ largest_ack = min_ack - (int64_t)range->gap - 2;
+ min_ack = largest_ack - (int64_t)range->len;
+ *p++ = ',';
+ *p++ = '[';
+ p = write_number(p, (uint64_t)min_ack);
+ if (largest_ack != min_ack) {
+ *p++ = ',';
+ p = write_number(p, (uint64_t)largest_ack);
+ }
+ *p++ = ']';
+ }
+
+ *p++ = ']';
+
+ if (fr->type == NGTCP2_FRAME_ACK_ECN) {
+ *p++ = ',';
+ p = write_pair_number(p, "ect1", fr->ecn.ect1);
+ *p++ = ',';
+ p = write_pair_number(p, "ect0", fr->ecn.ect0);
+ *p++ = ',';
+ p = write_pair_number(p, "ce", fr->ecn.ce);
+ }
+
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *write_reset_stream_frame(uint8_t *p,
+ const ngtcp2_reset_stream *fr) {
+ /*
+ * {"frame_type":"reset_stream","stream_id":0000000000000000000,"error_code":0000000000000000000,"final_size":0000000000000000000}
+ */
+#define NGTCP2_QLOG_RESET_STREAM_FRAME_OVERHEAD 127
+
+ p = write_verbatim(p, "{\"frame_type\":\"reset_stream\",");
+ p = write_pair_number(p, "stream_id", (uint64_t)fr->stream_id);
+ *p++ = ',';
+ p = write_pair_number(p, "error_code", fr->app_error_code);
+ *p++ = ',';
+ p = write_pair_number(p, "final_size", fr->final_size);
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *write_stop_sending_frame(uint8_t *p,
+ const ngtcp2_stop_sending *fr) {
+ /*
+ * {"frame_type":"stop_sending","stream_id":0000000000000000000,"error_code":0000000000000000000}
+ */
+#define NGTCP2_QLOG_STOP_SENDING_FRAME_OVERHEAD 94
+
+ p = write_verbatim(p, "{\"frame_type\":\"stop_sending\",");
+ p = write_pair_number(p, "stream_id", (uint64_t)fr->stream_id);
+ *p++ = ',';
+ p = write_pair_number(p, "error_code", fr->app_error_code);
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *write_crypto_frame(uint8_t *p, const ngtcp2_crypto *fr) {
+ /*
+ * {"frame_type":"crypto","offset":0000000000000000000,"length":0000000000000000000}
+ */
+#define NGTCP2_QLOG_CRYPTO_FRAME_OVERHEAD 81
+
+ p = write_verbatim(p, "{\"frame_type\":\"crypto\",");
+ p = write_pair_number(p, "offset", fr->offset);
+ *p++ = ',';
+ p = write_pair_number(p, "length", ngtcp2_vec_len(fr->data, fr->datacnt));
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *write_new_token_frame(uint8_t *p, const ngtcp2_new_token *fr) {
+ /*
+ * {"frame_type":"new_token","length":0000000000000000000,"token":{"data":""}}
+ */
+#define NGTCP2_QLOG_NEW_TOKEN_FRAME_OVERHEAD 75
+
+ p = write_verbatim(p, "{\"frame_type\":\"new_token\",");
+ p = write_pair_number(p, "length", fr->tokenlen);
+ p = write_verbatim(p, ",\"token\":{");
+ p = write_pair_hex(p, "data", fr->token, fr->tokenlen);
+ *p++ = '}';
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *write_stream_frame(uint8_t *p, const ngtcp2_stream *fr) {
+ /*
+ * {"frame_type":"stream","stream_id":0000000000000000000,"offset":0000000000000000000,"length":0000000000000000000,"fin":true}
+ */
+#define NGTCP2_QLOG_STREAM_FRAME_OVERHEAD 124
+
+ p = write_verbatim(p, "{\"frame_type\":\"stream\",");
+ p = write_pair_number(p, "stream_id", (uint64_t)fr->stream_id);
+ *p++ = ',';
+ p = write_pair_number(p, "offset", fr->offset);
+ *p++ = ',';
+ p = write_pair_number(p, "length", ngtcp2_vec_len(fr->data, fr->datacnt));
+ if (fr->fin) {
+ *p++ = ',';
+ p = write_pair_bool(p, "fin", 1);
+ }
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *write_max_data_frame(uint8_t *p, const ngtcp2_max_data *fr) {
+ /*
+ * {"frame_type":"max_data","maximum":0000000000000000000}
+ */
+#define NGTCP2_QLOG_MAX_DATA_FRAME_OVERHEAD 55
+
+ p = write_verbatim(p, "{\"frame_type\":\"max_data\",");
+ p = write_pair_number(p, "maximum", fr->max_data);
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *write_max_stream_data_frame(uint8_t *p,
+ const ngtcp2_max_stream_data *fr) {
+ /*
+ * {"frame_type":"max_stream_data","stream_id":0000000000000000000,"maximum":0000000000000000000}
+ */
+#define NGTCP2_QLOG_MAX_STREAM_DATA_FRAME_OVERHEAD 94
+
+ p = write_verbatim(p, "{\"frame_type\":\"max_stream_data\",");
+ p = write_pair_number(p, "stream_id", (uint64_t)fr->stream_id);
+ *p++ = ',';
+ p = write_pair_number(p, "maximum", fr->max_stream_data);
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *write_max_streams_frame(uint8_t *p,
+ const ngtcp2_max_streams *fr) {
+ /*
+ * {"frame_type":"max_streams","stream_type":"unidirectional","maximum":0000000000000000000}
+ */
+#define NGTCP2_QLOG_MAX_STREAMS_FRAME_OVERHEAD 89
+
+ p = write_verbatim(p, "{\"frame_type\":\"max_streams\",\"stream_type\":");
+ if (fr->type == NGTCP2_FRAME_MAX_STREAMS_BIDI) {
+ p = write_string(p, "bidirectional");
+ } else {
+ p = write_string(p, "unidirectional");
+ }
+ *p++ = ',';
+ p = write_pair_number(p, "maximum", fr->max_streams);
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *write_data_blocked_frame(uint8_t *p,
+ const ngtcp2_data_blocked *fr) {
+ (void)fr;
+
+ /*
+ * {"frame_type":"data_blocked"}
+ */
+#define NGTCP2_QLOG_DATA_BLOCKED_FRAME_OVERHEAD 29
+
+ /* TODO log limit */
+
+ return write_verbatim(p, "{\"frame_type\":\"data_blocked\"}");
+}
+
+static uint8_t *
+write_stream_data_blocked_frame(uint8_t *p,
+ const ngtcp2_stream_data_blocked *fr) {
+ (void)fr;
+
+ /*
+ * {"frame_type":"stream_data_blocked"}
+ */
+#define NGTCP2_QLOG_STREAM_DATA_BLOCKED_FRAME_OVERHEAD 36
+
+ /* TODO log limit */
+
+ return write_verbatim(p, "{\"frame_type\":\"stream_data_blocked\"}");
+}
+
+static uint8_t *write_streams_blocked_frame(uint8_t *p,
+ const ngtcp2_streams_blocked *fr) {
+ (void)fr;
+
+ /*
+ * {"frame_type":"streams_blocked"}
+ */
+#define NGTCP2_QLOG_STREAMS_BLOCKED_FRAME_OVERHEAD 32
+
+ /* TODO Log stream_type and limit */
+
+ return write_verbatim(p, "{\"frame_type\":\"streams_blocked\"}");
+}
+
+static uint8_t *
+write_new_connection_id_frame(uint8_t *p, const ngtcp2_new_connection_id *fr) {
+ /*
+ * {"frame_type":"new_connection_id","sequence_number":0000000000000000000,"retire_prior_to":0000000000000000000,"connection_id_length":0000000000000000000,"connection_id":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx","stateless_reset_token":{"data":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}}
+ */
+#define NGTCP2_QLOG_NEW_CONNECTION_ID_FRAME_OVERHEAD 280
+
+ p = write_verbatim(p, "{\"frame_type\":\"new_connection_id\",");
+ p = write_pair_number(p, "sequence_number", fr->seq);
+ *p++ = ',';
+ p = write_pair_number(p, "retire_prior_to", fr->retire_prior_to);
+ *p++ = ',';
+ p = write_pair_number(p, "connection_id_length", fr->cid.datalen);
+ *p++ = ',';
+ p = write_pair_cid(p, "connection_id", &fr->cid);
+ p = write_verbatim(p, ",\"stateless_reset_token\":{");
+ p = write_pair_hex(p, "data", fr->stateless_reset_token,
+ sizeof(fr->stateless_reset_token));
+ *p++ = '}';
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *
+write_retire_connection_id_frame(uint8_t *p,
+ const ngtcp2_retire_connection_id *fr) {
+ /*
+ * {"frame_type":"retire_connection_id","sequence_number":0000000000000000000}
+ */
+#define NGTCP2_QLOG_RETIRE_CONNECTION_ID_FRAME_OVERHEAD 75
+
+ p = write_verbatim(p, "{\"frame_type\":\"retire_connection_id\",");
+ p = write_pair_number(p, "sequence_number", fr->seq);
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *write_path_challenge_frame(uint8_t *p,
+ const ngtcp2_path_challenge *fr) {
+ /*
+ * {"frame_type":"path_challenge","data":"xxxxxxxxxxxxxxxx"}
+ */
+#define NGTCP2_QLOG_PATH_CHALLENGE_FRAME_OVERHEAD 57
+
+ p = write_verbatim(p, "{\"frame_type\":\"path_challenge\",");
+ p = write_pair_hex(p, "data", fr->data, sizeof(fr->data));
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *write_path_response_frame(uint8_t *p,
+ const ngtcp2_path_response *fr) {
+ /*
+ * {"frame_type":"path_response","data":"xxxxxxxxxxxxxxxx"}
+ */
+#define NGTCP2_QLOG_PATH_RESPONSE_FRAME_OVERHEAD 56
+
+ p = write_verbatim(p, "{\"frame_type\":\"path_response\",");
+ p = write_pair_hex(p, "data", fr->data, sizeof(fr->data));
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *
+write_connection_close_frame(uint8_t *p, const ngtcp2_connection_close *fr) {
+ /*
+ * {"frame_type":"connection_close","error_space":"application","error_code":0000000000000000000,"raw_error_code":0000000000000000000}
+ */
+#define NGTCP2_QLOG_CONNECTION_CLOSE_FRAME_OVERHEAD 131
+
+ p = write_verbatim(p,
+ "{\"frame_type\":\"connection_close\",\"error_space\":");
+ if (fr->type == NGTCP2_FRAME_CONNECTION_CLOSE) {
+ p = write_string(p, "transport");
+ } else {
+ p = write_string(p, "application");
+ }
+ *p++ = ',';
+ p = write_pair_number(p, "error_code", fr->error_code);
+ *p++ = ',';
+ p = write_pair_number(p, "raw_error_code", fr->error_code);
+ /* TODO Write reason by escaping non-printables */
+ /* TODO Write trigger_frame_type */
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *write_handshake_done_frame(uint8_t *p,
+ const ngtcp2_handshake_done *fr) {
+ (void)fr;
+
+ /*
+ * {"frame_type":"handshake_done"}
+ */
+#define NGTCP2_QLOG_HANDSHAKE_DONE_FRAME_OVERHEAD 31
+
+ return write_verbatim(p, "{\"frame_type\":\"handshake_done\"}");
+}
+
+static uint8_t *write_datagram_frame(uint8_t *p, const ngtcp2_datagram *fr) {
+ /*
+ * {"frame_type":"datagram","length":0000000000000000000}
+ */
+#define NGTCP2_QLOG_DATAGRAM_FRAME_OVERHEAD 54
+
+ p = write_verbatim(p, "{\"frame_type\":\"datagram\",");
+ p = write_pair_number(p, "length", ngtcp2_vec_len(fr->data, fr->datacnt));
+ *p++ = '}';
+
+ return p;
+}
+
+static uint8_t *qlog_write_time(ngtcp2_qlog *qlog, uint8_t *p) {
+ return write_pair_tstamp(p, "time", qlog->last_ts - qlog->ts);
+}
+
+static void qlog_pkt_write_start(ngtcp2_qlog *qlog, int sent) {
+ uint8_t *p;
+
+ if (!qlog->write) {
+ return;
+ }
+
+ ngtcp2_buf_reset(&qlog->buf);
+ p = qlog->buf.last;
+
+ *p++ = '\x1e';
+ *p++ = '{';
+ p = qlog_write_time(qlog, p);
+ p = write_verbatim(p, ",\"name\":");
+ if (sent) {
+ p = write_string(p, "transport:packet_sent");
+ } else {
+ p = write_string(p, "transport:packet_received");
+ }
+ p = write_verbatim(p, ",\"data\":{\"frames\":[");
+ qlog->buf.last = p;
+}
+
+static void qlog_pkt_write_end(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd,
+ size_t pktlen) {
+ uint8_t *p = qlog->buf.last;
+
+ if (!qlog->write) {
+ return;
+ }
+
+ /*
+ * ],"header":,"raw":{"length":0000000000000000000}}}
+ *
+ * plus, terminating LF
+ */
+#define NGTCP2_QLOG_PKT_WRITE_END_OVERHEAD \
+ (1 + 50 + NGTCP2_QLOG_PKT_HD_OVERHEAD)
+
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_PKT_WRITE_END_OVERHEAD + hd->tokenlen * 2) {
+ return;
+ }
+
+ assert(ngtcp2_buf_len(&qlog->buf));
+
+ /* Eat last ',' */
+ if (*(p - 1) == ',') {
+ --p;
+ }
+
+ p = write_verbatim(p, "],\"header\":");
+ p = write_pkt_hd(p, hd);
+ p = write_verbatim(p, ",\"raw\":{\"length\":");
+ p = write_number(p, pktlen);
+ p = write_verbatim(p, "}}}\n");
+
+ qlog->buf.last = p;
+
+ qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, qlog->buf.pos,
+ ngtcp2_buf_len(&qlog->buf));
+}
+
+void ngtcp2_qlog_write_frame(ngtcp2_qlog *qlog, const ngtcp2_frame *fr) {
+ uint8_t *p = qlog->buf.last;
+
+ if (!qlog->write) {
+ return;
+ }
+
+ switch (fr->type) {
+ case NGTCP2_FRAME_PADDING:
+ if (ngtcp2_buf_left(&qlog->buf) < NGTCP2_QLOG_PADDING_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_padding_frame(p, &fr->padding);
+ break;
+ case NGTCP2_FRAME_PING:
+ if (ngtcp2_buf_left(&qlog->buf) < NGTCP2_QLOG_PING_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_ping_frame(p, &fr->ping);
+ break;
+ case NGTCP2_FRAME_ACK:
+ case NGTCP2_FRAME_ACK_ECN:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_ACK_FRAME_BASE_OVERHEAD +
+ (size_t)(fr->type == NGTCP2_FRAME_ACK_ECN
+ ? NGTCP2_QLOG_ACK_FRAME_ECN_OVERHEAD
+ : 0) +
+ NGTCP2_QLOG_ACK_FRAME_RANGE_OVERHEAD * (1 + fr->ack.rangecnt) + 1) {
+ return;
+ }
+ p = write_ack_frame(p, &fr->ack);
+ break;
+ case NGTCP2_FRAME_RESET_STREAM:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_RESET_STREAM_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_reset_stream_frame(p, &fr->reset_stream);
+ break;
+ case NGTCP2_FRAME_STOP_SENDING:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_STOP_SENDING_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_stop_sending_frame(p, &fr->stop_sending);
+ break;
+ case NGTCP2_FRAME_CRYPTO:
+ if (ngtcp2_buf_left(&qlog->buf) < NGTCP2_QLOG_CRYPTO_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_crypto_frame(p, &fr->crypto);
+ break;
+ case NGTCP2_FRAME_NEW_TOKEN:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_NEW_TOKEN_FRAME_OVERHEAD + fr->new_token.tokenlen * 2 + 1) {
+ return;
+ }
+ p = write_new_token_frame(p, &fr->new_token);
+ break;
+ case NGTCP2_FRAME_STREAM:
+ if (ngtcp2_buf_left(&qlog->buf) < NGTCP2_QLOG_STREAM_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_stream_frame(p, &fr->stream);
+ break;
+ case NGTCP2_FRAME_MAX_DATA:
+ if (ngtcp2_buf_left(&qlog->buf) < NGTCP2_QLOG_MAX_DATA_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_max_data_frame(p, &fr->max_data);
+ break;
+ case NGTCP2_FRAME_MAX_STREAM_DATA:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_MAX_STREAM_DATA_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_max_stream_data_frame(p, &fr->max_stream_data);
+ break;
+ case NGTCP2_FRAME_MAX_STREAMS_BIDI:
+ case NGTCP2_FRAME_MAX_STREAMS_UNI:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_MAX_STREAMS_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_max_streams_frame(p, &fr->max_streams);
+ break;
+ case NGTCP2_FRAME_DATA_BLOCKED:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_DATA_BLOCKED_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_data_blocked_frame(p, &fr->data_blocked);
+ break;
+ case NGTCP2_FRAME_STREAM_DATA_BLOCKED:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_STREAM_DATA_BLOCKED_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_stream_data_blocked_frame(p, &fr->stream_data_blocked);
+ break;
+ case NGTCP2_FRAME_STREAMS_BLOCKED_BIDI:
+ case NGTCP2_FRAME_STREAMS_BLOCKED_UNI:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_STREAMS_BLOCKED_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_streams_blocked_frame(p, &fr->streams_blocked);
+ break;
+ case NGTCP2_FRAME_NEW_CONNECTION_ID:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_NEW_CONNECTION_ID_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_new_connection_id_frame(p, &fr->new_connection_id);
+ break;
+ case NGTCP2_FRAME_RETIRE_CONNECTION_ID:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_RETIRE_CONNECTION_ID_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_retire_connection_id_frame(p, &fr->retire_connection_id);
+ break;
+ case NGTCP2_FRAME_PATH_CHALLENGE:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_PATH_CHALLENGE_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_path_challenge_frame(p, &fr->path_challenge);
+ break;
+ case NGTCP2_FRAME_PATH_RESPONSE:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_PATH_RESPONSE_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_path_response_frame(p, &fr->path_response);
+ break;
+ case NGTCP2_FRAME_CONNECTION_CLOSE:
+ case NGTCP2_FRAME_CONNECTION_CLOSE_APP:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_CONNECTION_CLOSE_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_connection_close_frame(p, &fr->connection_close);
+ break;
+ case NGTCP2_FRAME_HANDSHAKE_DONE:
+ if (ngtcp2_buf_left(&qlog->buf) <
+ NGTCP2_QLOG_HANDSHAKE_DONE_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_handshake_done_frame(p, &fr->handshake_done);
+ break;
+ case NGTCP2_FRAME_DATAGRAM:
+ case NGTCP2_FRAME_DATAGRAM_LEN:
+ if (ngtcp2_buf_left(&qlog->buf) < NGTCP2_QLOG_DATAGRAM_FRAME_OVERHEAD + 1) {
+ return;
+ }
+ p = write_datagram_frame(p, &fr->datagram);
+ break;
+ default:
+ ngtcp2_unreachable();
+ }
+
+ *p++ = ',';
+
+ qlog->buf.last = p;
+}
+
+void ngtcp2_qlog_pkt_received_start(ngtcp2_qlog *qlog) {
+ qlog_pkt_write_start(qlog, /* sent = */ 0);
+}
+
+void ngtcp2_qlog_pkt_received_end(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd,
+ size_t pktlen) {
+ qlog_pkt_write_end(qlog, hd, pktlen);
+}
+
+void ngtcp2_qlog_pkt_sent_start(ngtcp2_qlog *qlog) {
+ qlog_pkt_write_start(qlog, /* sent = */ 1);
+}
+
+void ngtcp2_qlog_pkt_sent_end(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd,
+ size_t pktlen) {
+ qlog_pkt_write_end(qlog, hd, pktlen);
+}
+
+void ngtcp2_qlog_parameters_set_transport_params(
+ ngtcp2_qlog *qlog, const ngtcp2_transport_params *params, int server,
+ ngtcp2_qlog_side side) {
+ uint8_t buf[1024];
+ uint8_t *p = buf;
+ const ngtcp2_preferred_addr *paddr;
+ const ngtcp2_sockaddr_in *sa_in;
+ const ngtcp2_sockaddr_in6 *sa_in6;
+
+ if (!qlog->write) {
+ return;
+ }
+
+ *p++ = '\x1e';
+ *p++ = '{';
+ p = qlog_write_time(qlog, p);
+ p = write_verbatim(
+ p, ",\"name\":\"transport:parameters_set\",\"data\":{\"owner\":");
+
+ if (side == NGTCP2_QLOG_SIDE_LOCAL) {
+ p = write_string(p, "local");
+ } else {
+ p = write_string(p, "remote");
+ }
+
+ *p++ = ',';
+ p = write_pair_cid(p, "initial_source_connection_id", &params->initial_scid);
+ *p++ = ',';
+ if (side == (server ? NGTCP2_QLOG_SIDE_LOCAL : NGTCP2_QLOG_SIDE_REMOTE)) {
+ p = write_pair_cid(p, "original_destination_connection_id",
+ &params->original_dcid);
+ *p++ = ',';
+ }
+ if (params->retry_scid_present) {
+ p = write_pair_cid(p, "retry_source_connection_id", &params->retry_scid);
+ *p++ = ',';
+ }
+ if (params->stateless_reset_token_present) {
+ p = write_verbatim(p, "\"stateless_reset_token\":{");
+ p = write_pair_hex(p, "data", params->stateless_reset_token,
+ sizeof(params->stateless_reset_token));
+ *p++ = '}';
+ *p++ = ',';
+ }
+ p = write_pair_bool(p, "disable_active_migration",
+ params->disable_active_migration);
+ *p++ = ',';
+ p = write_pair_duration(p, "max_idle_timeout", params->max_idle_timeout);
+ *p++ = ',';
+ p = write_pair_number(p, "max_udp_payload_size",
+ params->max_udp_payload_size);
+ *p++ = ',';
+ p = write_pair_number(p, "ack_delay_exponent", params->ack_delay_exponent);
+ *p++ = ',';
+ p = write_pair_duration(p, "max_ack_delay", params->max_ack_delay);
+ *p++ = ',';
+ p = write_pair_number(p, "active_connection_id_limit",
+ params->active_connection_id_limit);
+ *p++ = ',';
+ p = write_pair_number(p, "initial_max_data", params->initial_max_data);
+ *p++ = ',';
+ p = write_pair_number(p, "initial_max_stream_data_bidi_local",
+ params->initial_max_stream_data_bidi_local);
+ *p++ = ',';
+ p = write_pair_number(p, "initial_max_stream_data_bidi_remote",
+ params->initial_max_stream_data_bidi_remote);
+ *p++ = ',';
+ p = write_pair_number(p, "initial_max_stream_data_uni",
+ params->initial_max_stream_data_uni);
+ *p++ = ',';
+ p = write_pair_number(p, "initial_max_streams_bidi",
+ params->initial_max_streams_bidi);
+ *p++ = ',';
+ p = write_pair_number(p, "initial_max_streams_uni",
+ params->initial_max_streams_uni);
+ if (params->preferred_address_present) {
+ *p++ = ',';
+ paddr = &params->preferred_address;
+ p = write_string(p, "preferred_address");
+ *p++ = ':';
+ *p++ = '{';
+
+ if (paddr->ipv4_present) {
+ sa_in = &paddr->ipv4;
+
+ p = write_pair_hex(p, "ip_v4", (const uint8_t *)&sa_in->sin_addr,
+ sizeof(sa_in->sin_addr));
+ *p++ = ',';
+ p = write_pair_number(p, "port_v4", ngtcp2_ntohs(sa_in->sin_port));
+ *p++ = ',';
+ }
+
+ if (paddr->ipv6_present) {
+ sa_in6 = &paddr->ipv6;
+
+ p = write_pair_hex(p, "ip_v6", (const uint8_t *)&sa_in6->sin6_addr,
+ sizeof(sa_in6->sin6_addr));
+ *p++ = ',';
+ p = write_pair_number(p, "port_v6", ngtcp2_ntohs(sa_in6->sin6_port));
+ *p++ = ',';
+ }
+
+ p = write_pair_cid(p, "connection_id", &paddr->cid);
+ p = write_verbatim(p, ",\"stateless_reset_token\":{");
+ p = write_pair_hex(p, "data", paddr->stateless_reset_token,
+ sizeof(paddr->stateless_reset_token));
+ *p++ = '}';
+ *p++ = '}';
+ }
+ *p++ = ',';
+ p = write_pair_number(p, "max_datagram_frame_size",
+ params->max_datagram_frame_size);
+ *p++ = ',';
+ p = write_pair_bool(p, "grease_quic_bit", params->grease_quic_bit);
+ p = write_verbatim(p, "}}\n");
+
+ qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, buf,
+ (size_t)(p - buf));
+}
+
+void ngtcp2_qlog_metrics_updated(ngtcp2_qlog *qlog,
+ const ngtcp2_conn_stat *cstat) {
+ uint8_t buf[1024];
+ uint8_t *p = buf;
+
+ if (!qlog->write) {
+ return;
+ }
+
+ *p++ = '\x1e';
+ *p++ = '{';
+ p = qlog_write_time(qlog, p);
+ p = write_verbatim(p, ",\"name\":\"recovery:metrics_updated\",\"data\":{");
+
+ if (cstat->min_rtt != UINT64_MAX) {
+ p = write_pair_duration(p, "min_rtt", cstat->min_rtt);
+ *p++ = ',';
+ }
+ p = write_pair_duration(p, "smoothed_rtt", cstat->smoothed_rtt);
+ *p++ = ',';
+ p = write_pair_duration(p, "latest_rtt", cstat->latest_rtt);
+ *p++ = ',';
+ p = write_pair_duration(p, "rtt_variance", cstat->rttvar);
+ *p++ = ',';
+ p = write_pair_number(p, "pto_count", cstat->pto_count);
+ *p++ = ',';
+ p = write_pair_number(p, "congestion_window", cstat->cwnd);
+ *p++ = ',';
+ p = write_pair_number(p, "bytes_in_flight", cstat->bytes_in_flight);
+ if (cstat->ssthresh != UINT64_MAX) {
+ *p++ = ',';
+ p = write_pair_number(p, "ssthresh", cstat->ssthresh);
+ }
+
+ p = write_verbatim(p, "}}\n");
+
+ qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, buf,
+ (size_t)(p - buf));
+}
+
+void ngtcp2_qlog_pkt_lost(ngtcp2_qlog *qlog, ngtcp2_rtb_entry *ent) {
+ uint8_t buf[256];
+ uint8_t *p = buf;
+ ngtcp2_pkt_hd hd = {0};
+
+ if (!qlog->write) {
+ return;
+ }
+
+ *p++ = '\x1e';
+ *p++ = '{';
+ p = qlog_write_time(qlog, p);
+ p = write_verbatim(
+ p, ",\"name\":\"recovery:packet_lost\",\"data\":{\"header\":");
+
+ hd.type = ent->hd.type;
+ hd.flags = ent->hd.flags;
+ hd.pkt_num = ent->hd.pkt_num;
+
+ p = write_pkt_hd(p, &hd);
+ p = write_verbatim(p, "}}\n");
+
+ qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, buf,
+ (size_t)(p - buf));
+}
+
+void ngtcp2_qlog_retry_pkt_received(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_pkt_retry *retry) {
+ uint8_t rawbuf[1024];
+ ngtcp2_buf buf;
+
+ if (!qlog->write) {
+ return;
+ }
+
+ ngtcp2_buf_init(&buf, rawbuf, sizeof(rawbuf));
+
+ *buf.last++ = '\x1e';
+ *buf.last++ = '{';
+ buf.last = qlog_write_time(qlog, buf.last);
+ buf.last = write_verbatim(
+ buf.last,
+ ",\"name\":\"transport:packet_received\",\"data\":{\"header\":");
+
+ if (ngtcp2_buf_left(&buf) <
+ NGTCP2_QLOG_PKT_HD_OVERHEAD + hd->tokenlen * 2 +
+ sizeof(",\"retry_token\":{\"data\":\"\"}}}\n") - 1 +
+ retry->tokenlen * 2) {
+ return;
+ }
+
+ buf.last = write_pkt_hd(buf.last, hd);
+ buf.last = write_verbatim(buf.last, ",\"retry_token\":{");
+ buf.last = write_pair_hex(buf.last, "data", retry->token, retry->tokenlen);
+ buf.last = write_verbatim(buf.last, "}}}\n");
+
+ qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, buf.pos,
+ ngtcp2_buf_len(&buf));
+}
+
+void ngtcp2_qlog_stateless_reset_pkt_received(
+ ngtcp2_qlog *qlog, const ngtcp2_pkt_stateless_reset *sr) {
+ uint8_t buf[256];
+ uint8_t *p = buf;
+ ngtcp2_pkt_hd hd = {0};
+
+ if (!qlog->write) {
+ return;
+ }
+
+ hd.type = NGTCP2_PKT_STATELESS_RESET;
+
+ *p++ = '\x1e';
+ *p++ = '{';
+ p = qlog_write_time(qlog, p);
+ p = write_verbatim(
+ p, ",\"name\":\"transport:packet_received\",\"data\":{\"header\":");
+ p = write_pkt_hd(p, &hd);
+ *p++ = ',';
+ p = write_pair_hex(p, "stateless_reset_token", sr->stateless_reset_token,
+ NGTCP2_STATELESS_RESET_TOKENLEN);
+ p = write_verbatim(p, "}}\n");
+
+ qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, buf,
+ (size_t)(p - buf));
+}
+
+void ngtcp2_qlog_version_negotiation_pkt_received(ngtcp2_qlog *qlog,
+ const ngtcp2_pkt_hd *hd,
+ const uint32_t *sv,
+ size_t nsv) {
+ uint8_t rawbuf[512];
+ ngtcp2_buf buf;
+ size_t i;
+ uint32_t v;
+
+ if (!qlog->write) {
+ return;
+ }
+
+ ngtcp2_buf_init(&buf, rawbuf, sizeof(rawbuf));
+
+ *buf.last++ = '\x1e';
+ *buf.last++ = '{';
+ buf.last = qlog_write_time(qlog, buf.last);
+ buf.last = write_verbatim(
+ buf.last,
+ ",\"name\":\"transport:packet_received\",\"data\":{\"header\":");
+ buf.last = write_pkt_hd(buf.last, hd);
+ buf.last = write_verbatim(buf.last, ",\"supported_versions\":[");
+
+ if (nsv) {
+ if (ngtcp2_buf_left(&buf) <
+ (sizeof("\"xxxxxxxx\",") - 1) * nsv - 1 + sizeof("]}}\n") - 1) {
+ return;
+ }
+
+ v = ngtcp2_htonl(sv[0]);
+ buf.last = write_hex(buf.last, (const uint8_t *)&v, sizeof(v));
+
+ for (i = 1; i < nsv; ++i) {
+ *buf.last++ = ',';
+ v = ngtcp2_htonl(sv[i]);
+ buf.last = write_hex(buf.last, (const uint8_t *)&v, sizeof(v));
+ }
+ }
+
+ buf.last = write_verbatim(buf.last, "]}}\n");
+
+ qlog->write(qlog->user_data, NGTCP2_QLOG_WRITE_FLAG_NONE, buf.pos,
+ ngtcp2_buf_len(&buf));
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.h
new file mode 100644
index 0000000..b9107c0
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_qlog.h
@@ -0,0 +1,161 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_QLOG_H
+#define NGTCP2_QLOG_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_pkt.h"
+#include "ngtcp2_cc.h"
+#include "ngtcp2_buf.h"
+#include "ngtcp2_rtb.h"
+
+/* NGTCP2_QLOG_BUFLEN is the length of heap allocated buffer for
+ qlog. */
+#define NGTCP2_QLOG_BUFLEN 4096
+
+typedef enum ngtcp2_qlog_side {
+ NGTCP2_QLOG_SIDE_LOCAL,
+ NGTCP2_QLOG_SIDE_REMOTE,
+} ngtcp2_qlog_side;
+
+typedef struct ngtcp2_qlog {
+ /* write is a callback function to write qlog. */
+ ngtcp2_qlog_write write;
+ /* ts is the initial timestamp */
+ ngtcp2_tstamp ts;
+ /* last_ts is the timestamp observed last time. */
+ ngtcp2_tstamp last_ts;
+ /* buf is a heap allocated buffer to write exclusively
+ packet_received and packet_sent. */
+ ngtcp2_buf buf;
+ /* user_data is an opaque pointer which is passed to write
+ callback. */
+ void *user_data;
+} ngtcp2_qlog;
+
+/*
+ * ngtcp2_qlog_init initializes |qlog|.
+ */
+void ngtcp2_qlog_init(ngtcp2_qlog *qlog, ngtcp2_qlog_write write,
+ ngtcp2_tstamp ts, void *user_data);
+
+/*
+ * ngtcp2_qlog_start writes qlog preamble.
+ */
+void ngtcp2_qlog_start(ngtcp2_qlog *qlog, const ngtcp2_cid *odcid, int server);
+
+/*
+ * ngtcp2_qlog_end writes closing part of qlog.
+ */
+void ngtcp2_qlog_end(ngtcp2_qlog *qlog);
+
+/*
+ * ngtcp2_qlog_write_frame writes |fr| to qlog->buf.
+ * ngtcp2_qlog_pkt_received_start or ngtcp2_qlog_pkt_sent_start must
+ * be called before calling this function.
+ */
+void ngtcp2_qlog_write_frame(ngtcp2_qlog *qlog, const ngtcp2_frame *fr);
+
+/*
+ * ngtcp2_qlog_pkt_received_start starts to write packet_received
+ * event. It initializes qlog->buf. It writes qlog to qlog->buf.
+ * ngtcp2_qlog_pkt_received_end will flush the content of qlog->buf to
+ * write callback.
+ */
+void ngtcp2_qlog_pkt_received_start(ngtcp2_qlog *qlog);
+
+/*
+ * ngtcp2_qlog_pkt_received_end ends packet_received event and sends
+ * the content of qlog->buf to qlog->write callback.
+ */
+void ngtcp2_qlog_pkt_received_end(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd,
+ size_t pktlen);
+
+/*
+ * ngtcp2_qlog_pkt_sent_start starts to write packet_sent event. It
+ * initializes qlog->buf. It writes qlog to qlog->buf.
+ * ngtcp2_qlog_pkt_sent_end will flush the content of qlog->buf to
+ * write callback.
+ */
+void ngtcp2_qlog_pkt_sent_start(ngtcp2_qlog *qlog);
+
+/*
+ * ngtcp2_qlog_pkt_sent_end ends packet_sent event and sends the
+ * content of qlog->buf to qlog->write callback.
+ */
+void ngtcp2_qlog_pkt_sent_end(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd,
+ size_t pktlen);
+
+/*
+ * ngtcp2_qlog_parameters_set_transport_params writes |params| to qlog
+ * as parameters_set event. |server| is nonzero if the local endpoint
+ * is server. If |local| is nonzero, it is "owner" field becomes
+ * "local", otherwise "remote".
+ */
+void ngtcp2_qlog_parameters_set_transport_params(
+ ngtcp2_qlog *qlog, const ngtcp2_transport_params *params, int server,
+ ngtcp2_qlog_side side);
+
+/*
+ * ngtcp2_qlog_metrics_updated writes metrics_updated event of
+ * recovery category.
+ */
+void ngtcp2_qlog_metrics_updated(ngtcp2_qlog *qlog,
+ const ngtcp2_conn_stat *cstat);
+
+/*
+ * ngtcp2_qlog_pkt_lost writes packet_lost event.
+ */
+void ngtcp2_qlog_pkt_lost(ngtcp2_qlog *qlog, ngtcp2_rtb_entry *ent);
+
+/*
+ * ngtcp2_qlog_retry_pkt_received writes packet_received event for a
+ * received Retry packet.
+ */
+void ngtcp2_qlog_retry_pkt_received(ngtcp2_qlog *qlog, const ngtcp2_pkt_hd *hd,
+ const ngtcp2_pkt_retry *retry);
+
+/*
+ * ngtcp2_qlog_stateless_reset_pkt_received writes packet_received
+ * event for a received Stateless Reset packet.
+ */
+void ngtcp2_qlog_stateless_reset_pkt_received(
+ ngtcp2_qlog *qlog, const ngtcp2_pkt_stateless_reset *sr);
+
+/*
+ * ngtcp2_qlog_version_negotiation_pkt_received writes packet_received
+ * event for a received Version Negotiation packet.
+ */
+void ngtcp2_qlog_version_negotiation_pkt_received(ngtcp2_qlog *qlog,
+ const ngtcp2_pkt_hd *hd,
+ const uint32_t *sv,
+ size_t nsv);
+
+#endif /* NGTCP2_QLOG_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.c
new file mode 100644
index 0000000..9379496
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.c
@@ -0,0 +1,61 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_range.h"
+#include "ngtcp2_macro.h"
+
+void ngtcp2_range_init(ngtcp2_range *r, uint64_t begin, uint64_t end) {
+ r->begin = begin;
+ r->end = end;
+}
+
+ngtcp2_range ngtcp2_range_intersect(const ngtcp2_range *a,
+ const ngtcp2_range *b) {
+ ngtcp2_range r = {0, 0};
+ uint64_t begin = ngtcp2_max(a->begin, b->begin);
+ uint64_t end = ngtcp2_min(a->end, b->end);
+ if (begin < end) {
+ ngtcp2_range_init(&r, begin, end);
+ }
+ return r;
+}
+
+uint64_t ngtcp2_range_len(const ngtcp2_range *r) { return r->end - r->begin; }
+
+int ngtcp2_range_eq(const ngtcp2_range *a, const ngtcp2_range *b) {
+ return a->begin == b->begin && a->end == b->end;
+}
+
+void ngtcp2_range_cut(ngtcp2_range *left, ngtcp2_range *right,
+ const ngtcp2_range *a, const ngtcp2_range *b) {
+ /* Assume that b is included in a */
+ left->begin = a->begin;
+ left->end = b->begin;
+ right->begin = b->end;
+ right->end = a->end;
+}
+
+int ngtcp2_range_not_after(const ngtcp2_range *a, const ngtcp2_range *b) {
+ return a->end <= b->end;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.h
new file mode 100644
index 0000000..a776c4e
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_range.h
@@ -0,0 +1,80 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_RANGE_H
+#define NGTCP2_RANGE_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+/*
+ * ngtcp2_range represents half-closed range [begin, end).
+ */
+typedef struct ngtcp2_range {
+ uint64_t begin;
+ uint64_t end;
+} ngtcp2_range;
+
+/*
+ * ngtcp2_range_init initializes |r| with the range [|begin|, |end|).
+ */
+void ngtcp2_range_init(ngtcp2_range *r, uint64_t begin, uint64_t end);
+
+/*
+ * ngtcp2_range_intersect returns the intersection of |a| and |b|. If
+ * they do not overlap, it returns empty range.
+ */
+ngtcp2_range ngtcp2_range_intersect(const ngtcp2_range *a,
+ const ngtcp2_range *b);
+
+/*
+ * ngtcp2_range_len returns the length of |r|.
+ */
+uint64_t ngtcp2_range_len(const ngtcp2_range *r);
+
+/*
+ * ngtcp2_range_eq returns nonzero if |a| equals |b|, such that
+ * a->begin == b->begin, and a->end == b->end hold.
+ */
+int ngtcp2_range_eq(const ngtcp2_range *a, const ngtcp2_range *b);
+
+/*
+ * ngtcp2_range_cut returns the left and right range after removing
+ * |b| from |a|. This function assumes that |a| completely includes
+ * |b|. In other words, a->begin <= b->begin and b->end <= a->end
+ * hold.
+ */
+void ngtcp2_range_cut(ngtcp2_range *left, ngtcp2_range *right,
+ const ngtcp2_range *a, const ngtcp2_range *b);
+
+/*
+ * ngtcp2_range_not_after returns nonzero if the right edge of |a|
+ * does not go beyond of the right edge of |b|.
+ */
+int ngtcp2_range_not_after(const ngtcp2_range *a, const ngtcp2_range *b);
+
+#endif /* NGTCP2_RANGE_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rcvry.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rcvry.h
new file mode 100644
index 0000000..4cb4088
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rcvry.h
@@ -0,0 +1,40 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_RCVRY_H
+#define NGTCP2_RCVRY_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+/* NGTCP2_PKT_THRESHOLD is kPacketThreshold described in RFC 9002. */
+#define NGTCP2_PKT_THRESHOLD 3
+
+/* NGTCP2_GRANULARITY is kGranularity described in RFC 9002. */
+#define NGTCP2_GRANULARITY NGTCP2_MILLISECONDS
+
+#endif /* NGTCP2_RCVRY_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.c
new file mode 100644
index 0000000..a6b3f73
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.c
@@ -0,0 +1,120 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_ringbuf.h"
+
+#include <assert.h>
+#ifdef WIN32
+# include <intrin.h>
+#endif
+
+#include "ngtcp2_macro.h"
+
+#if defined(_MSC_VER) && defined(_M_ARM64)
+unsigned int __popcnt(unsigned int x) {
+ unsigned int c = 0;
+ for (; x; ++c) {
+ x &= x - 1;
+ }
+ return c;
+}
+#endif
+
+int ngtcp2_ringbuf_init(ngtcp2_ringbuf *rb, size_t nmemb, size_t size,
+ const ngtcp2_mem *mem) {
+ uint8_t *buf = ngtcp2_mem_malloc(mem, nmemb * size);
+ if (buf == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ ngtcp2_ringbuf_buf_init(rb, nmemb, size, buf, mem);
+
+ return 0;
+}
+
+void ngtcp2_ringbuf_buf_init(ngtcp2_ringbuf *rb, size_t nmemb, size_t size,
+ uint8_t *buf, const ngtcp2_mem *mem) {
+#ifdef WIN32
+ assert(1 == __popcnt((unsigned int)nmemb));
+#else
+ assert(1 == __builtin_popcount((unsigned int)nmemb));
+#endif
+
+ rb->buf = buf;
+ rb->mem = mem;
+ rb->nmemb = nmemb;
+ rb->size = size;
+ rb->first = 0;
+ rb->len = 0;
+}
+
+void ngtcp2_ringbuf_free(ngtcp2_ringbuf *rb) {
+ if (rb == NULL) {
+ return;
+ }
+
+ ngtcp2_mem_free(rb->mem, rb->buf);
+}
+
+void *ngtcp2_ringbuf_push_front(ngtcp2_ringbuf *rb) {
+ rb->first = (rb->first - 1) & (rb->nmemb - 1);
+ rb->len = ngtcp2_min(rb->nmemb, rb->len + 1);
+
+ return (void *)&rb->buf[rb->first * rb->size];
+}
+
+void *ngtcp2_ringbuf_push_back(ngtcp2_ringbuf *rb) {
+ size_t offset = (rb->first + rb->len) & (rb->nmemb - 1);
+
+ if (rb->len == rb->nmemb) {
+ rb->first = (rb->first + 1) & (rb->nmemb - 1);
+ } else {
+ ++rb->len;
+ }
+
+ return (void *)&rb->buf[offset * rb->size];
+}
+
+void ngtcp2_ringbuf_pop_front(ngtcp2_ringbuf *rb) {
+ rb->first = (rb->first + 1) & (rb->nmemb - 1);
+ --rb->len;
+}
+
+void ngtcp2_ringbuf_pop_back(ngtcp2_ringbuf *rb) {
+ assert(rb->len);
+ --rb->len;
+}
+
+void ngtcp2_ringbuf_resize(ngtcp2_ringbuf *rb, size_t len) {
+ assert(len <= rb->nmemb);
+ rb->len = len;
+}
+
+void *ngtcp2_ringbuf_get(ngtcp2_ringbuf *rb, size_t offset) {
+ assert(offset < rb->len);
+ offset = (rb->first + offset) & (rb->nmemb - 1);
+ return &rb->buf[offset * rb->size];
+}
+
+int ngtcp2_ringbuf_full(ngtcp2_ringbuf *rb) { return rb->len == rb->nmemb; }
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.h
new file mode 100644
index 0000000..16635c9
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_ringbuf.h
@@ -0,0 +1,132 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_RINGBUF_H
+#define NGTCP2_RINGBUF_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_mem.h"
+
+typedef struct ngtcp2_ringbuf {
+ /* buf points to the underlying buffer. */
+ uint8_t *buf;
+ const ngtcp2_mem *mem;
+ /* nmemb is the number of elements that can be stored in this ring
+ buffer. */
+ size_t nmemb;
+ /* size is the size of each element. */
+ size_t size;
+ /* first is the offset to the first element. */
+ size_t first;
+ /* len is the number of elements actually stored. */
+ size_t len;
+} ngtcp2_ringbuf;
+
+/*
+ * ngtcp2_ringbuf_init initializes |rb|. |nmemb| is the number of
+ * elements that can be stored in this buffer. |size| is the size of
+ * each element. |size| must be power of 2.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_ringbuf_init(ngtcp2_ringbuf *rb, size_t nmemb, size_t size,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_ringbuf_buf_init initializes |rb| with given buffer and
+ * size.
+ */
+void ngtcp2_ringbuf_buf_init(ngtcp2_ringbuf *rb, size_t nmemb, size_t size,
+ uint8_t *buf, const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_ringbuf_free frees resources allocated for |rb|. This
+ * function does not free the memory pointed by |rb|.
+ */
+void ngtcp2_ringbuf_free(ngtcp2_ringbuf *rb);
+
+/* ngtcp2_ringbuf_push_front moves the offset to the first element in
+ the buffer backward, and returns the pointer to the element.
+ Caller can store data to the buffer pointed by the returned
+ pointer. If this action exceeds the capacity of the ring buffer,
+ the last element is silently overwritten, and rb->len remains
+ unchanged. */
+void *ngtcp2_ringbuf_push_front(ngtcp2_ringbuf *rb);
+
+/* ngtcp2_ringbuf_push_back moves the offset to the last element in
+ the buffer forward, and returns the pointer to the element. Caller
+ can store data to the buffer pointed by the returned pointer. If
+ this action exceeds the capacity of the ring buffer, the first
+ element is silently overwritten, and rb->len remains unchanged. */
+void *ngtcp2_ringbuf_push_back(ngtcp2_ringbuf *rb);
+
+/*
+ * ngtcp2_ringbuf_pop_front removes first element in |rb|.
+ */
+void ngtcp2_ringbuf_pop_front(ngtcp2_ringbuf *rb);
+
+/*
+ * ngtcp2_ringbuf_pop_back removes the last element in |rb|.
+ */
+void ngtcp2_ringbuf_pop_back(ngtcp2_ringbuf *rb);
+
+/* ngtcp2_ringbuf_resize changes the number of elements stored. This
+ does not change the capacity of the underlying buffer. */
+void ngtcp2_ringbuf_resize(ngtcp2_ringbuf *rb, size_t len);
+
+/* ngtcp2_ringbuf_get returns the pointer to the element at
+ |offset|. */
+void *ngtcp2_ringbuf_get(ngtcp2_ringbuf *rb, size_t offset);
+
+/* ngtcp2_ringbuf_len returns the number of elements stored. */
+#define ngtcp2_ringbuf_len(RB) ((RB)->len)
+
+/* ngtcp2_ringbuf_full returns nonzero if |rb| is full. */
+int ngtcp2_ringbuf_full(ngtcp2_ringbuf *rb);
+
+/* ngtcp2_static_ringbuf_def defines ngtcp2_ringbuf struct wrapper
+ which uses a statically allocated buffer that is suitable for a
+ usage that does not change buffer size with ngtcp2_ringbuf_resize.
+ ngtcp2_ringbuf_free should never be called for rb field. */
+#define ngtcp2_static_ringbuf_def(NAME, NMEMB, SIZE) \
+ typedef struct ngtcp2_static_ringbuf_##NAME { \
+ ngtcp2_ringbuf rb; \
+ uint8_t buf[(NMEMB) * (SIZE)]; \
+ } ngtcp2_static_ringbuf_##NAME; \
+ \
+ static inline void ngtcp2_static_ringbuf_##NAME##_init( \
+ ngtcp2_static_ringbuf_##NAME *srb) { \
+ ngtcp2_ringbuf_buf_init(&srb->rb, (NMEMB), (SIZE), srb->buf, NULL); \
+ }
+
+#endif /* NGTCP2_RINGBUF_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.c
new file mode 100644
index 0000000..9c3d75d
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.c
@@ -0,0 +1,319 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_rob.h"
+
+#include <string.h>
+#include <assert.h>
+
+#include "ngtcp2_macro.h"
+
+int ngtcp2_rob_gap_new(ngtcp2_rob_gap **pg, uint64_t begin, uint64_t end,
+ const ngtcp2_mem *mem) {
+ *pg = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_rob_gap));
+ if (*pg == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ (*pg)->range.begin = begin;
+ (*pg)->range.end = end;
+
+ return 0;
+}
+
+void ngtcp2_rob_gap_del(ngtcp2_rob_gap *g, const ngtcp2_mem *mem) {
+ ngtcp2_mem_free(mem, g);
+}
+
+int ngtcp2_rob_data_new(ngtcp2_rob_data **pd, uint64_t offset, size_t chunk,
+ const ngtcp2_mem *mem) {
+ *pd = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_rob_data) + chunk);
+ if (*pd == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ (*pd)->range.begin = offset;
+ (*pd)->range.end = offset + chunk;
+ (*pd)->begin = (uint8_t *)(*pd) + sizeof(ngtcp2_rob_data);
+ (*pd)->end = (*pd)->begin + chunk;
+
+ return 0;
+}
+
+void ngtcp2_rob_data_del(ngtcp2_rob_data *d, const ngtcp2_mem *mem) {
+ ngtcp2_mem_free(mem, d);
+}
+
+int ngtcp2_rob_init(ngtcp2_rob *rob, size_t chunk, const ngtcp2_mem *mem) {
+ int rv;
+ ngtcp2_rob_gap *g;
+
+ ngtcp2_ksl_init(&rob->gapksl, ngtcp2_ksl_range_compar, sizeof(ngtcp2_range),
+ mem);
+
+ rv = ngtcp2_rob_gap_new(&g, 0, UINT64_MAX, mem);
+ if (rv != 0) {
+ goto fail_rob_gap_new;
+ }
+
+ rv = ngtcp2_ksl_insert(&rob->gapksl, NULL, &g->range, g);
+ if (rv != 0) {
+ goto fail_gapksl_ksl_insert;
+ }
+
+ ngtcp2_ksl_init(&rob->dataksl, ngtcp2_ksl_range_compar, sizeof(ngtcp2_range),
+ mem);
+
+ rob->chunk = chunk;
+ rob->mem = mem;
+
+ return 0;
+
+fail_gapksl_ksl_insert:
+ ngtcp2_rob_gap_del(g, mem);
+fail_rob_gap_new:
+ ngtcp2_ksl_free(&rob->gapksl);
+ return rv;
+}
+
+void ngtcp2_rob_free(ngtcp2_rob *rob) {
+ ngtcp2_ksl_it it;
+
+ if (rob == NULL) {
+ return;
+ }
+
+ for (it = ngtcp2_ksl_begin(&rob->dataksl); !ngtcp2_ksl_it_end(&it);
+ ngtcp2_ksl_it_next(&it)) {
+ ngtcp2_rob_data_del(ngtcp2_ksl_it_get(&it), rob->mem);
+ }
+
+ for (it = ngtcp2_ksl_begin(&rob->gapksl); !ngtcp2_ksl_it_end(&it);
+ ngtcp2_ksl_it_next(&it)) {
+ ngtcp2_rob_gap_del(ngtcp2_ksl_it_get(&it), rob->mem);
+ }
+
+ ngtcp2_ksl_free(&rob->dataksl);
+ ngtcp2_ksl_free(&rob->gapksl);
+}
+
+static int rob_write_data(ngtcp2_rob *rob, uint64_t offset, const uint8_t *data,
+ size_t len) {
+ size_t n;
+ int rv;
+ ngtcp2_rob_data *d;
+ ngtcp2_range range = {offset, offset + len};
+ ngtcp2_ksl_it it;
+
+ for (it = ngtcp2_ksl_lower_bound_compar(&rob->dataksl, &range,
+ ngtcp2_ksl_range_exclusive_compar);
+ len; ngtcp2_ksl_it_next(&it)) {
+ if (ngtcp2_ksl_it_end(&it)) {
+ d = NULL;
+ } else {
+ d = ngtcp2_ksl_it_get(&it);
+ }
+
+ if (d == NULL || offset < d->range.begin) {
+ rv = ngtcp2_rob_data_new(&d, (offset / rob->chunk) * rob->chunk,
+ rob->chunk, rob->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = ngtcp2_ksl_insert(&rob->dataksl, &it, &d->range, d);
+ if (rv != 0) {
+ ngtcp2_rob_data_del(d, rob->mem);
+ return rv;
+ }
+ }
+
+ n = (size_t)ngtcp2_min((uint64_t)len, d->range.begin + rob->chunk - offset);
+ memcpy(d->begin + (offset - d->range.begin), data, n);
+ offset += n;
+ data += n;
+ len -= n;
+ }
+
+ return 0;
+}
+
+int ngtcp2_rob_push(ngtcp2_rob *rob, uint64_t offset, const uint8_t *data,
+ size_t datalen) {
+ int rv;
+ ngtcp2_rob_gap *g;
+ ngtcp2_range m, l, r, q = {offset, offset + datalen};
+ ngtcp2_ksl_it it;
+
+ it = ngtcp2_ksl_lower_bound_compar(&rob->gapksl, &q,
+ ngtcp2_ksl_range_exclusive_compar);
+
+ for (; !ngtcp2_ksl_it_end(&it);) {
+ g = ngtcp2_ksl_it_get(&it);
+
+ m = ngtcp2_range_intersect(&q, &g->range);
+ if (!ngtcp2_range_len(&m)) {
+ break;
+ }
+ if (ngtcp2_range_eq(&g->range, &m)) {
+ ngtcp2_ksl_remove_hint(&rob->gapksl, &it, &it, &g->range);
+ ngtcp2_rob_gap_del(g, rob->mem);
+ rv = rob_write_data(rob, m.begin, data + (m.begin - offset),
+ (size_t)ngtcp2_range_len(&m));
+ if (rv != 0) {
+ return rv;
+ }
+
+ continue;
+ }
+ ngtcp2_range_cut(&l, &r, &g->range, &m);
+ if (ngtcp2_range_len(&l)) {
+ ngtcp2_ksl_update_key(&rob->gapksl, &g->range, &l);
+ g->range = l;
+
+ if (ngtcp2_range_len(&r)) {
+ ngtcp2_rob_gap *ng;
+ rv = ngtcp2_rob_gap_new(&ng, r.begin, r.end, rob->mem);
+ if (rv != 0) {
+ return rv;
+ }
+ rv = ngtcp2_ksl_insert(&rob->gapksl, &it, &ng->range, ng);
+ if (rv != 0) {
+ ngtcp2_rob_gap_del(ng, rob->mem);
+ return rv;
+ }
+ }
+ } else if (ngtcp2_range_len(&r)) {
+ ngtcp2_ksl_update_key(&rob->gapksl, &g->range, &r);
+ g->range = r;
+ }
+ rv = rob_write_data(rob, m.begin, data + (m.begin - offset),
+ (size_t)ngtcp2_range_len(&m));
+ if (rv != 0) {
+ return rv;
+ }
+ ngtcp2_ksl_it_next(&it);
+ }
+ return 0;
+}
+
+int ngtcp2_rob_remove_prefix(ngtcp2_rob *rob, uint64_t offset) {
+ ngtcp2_rob_gap *g;
+ ngtcp2_rob_data *d;
+ ngtcp2_ksl_it it;
+
+ it = ngtcp2_ksl_begin(&rob->gapksl);
+
+ for (; !ngtcp2_ksl_it_end(&it);) {
+ g = ngtcp2_ksl_it_get(&it);
+ if (offset <= g->range.begin) {
+ break;
+ }
+ if (offset < g->range.end) {
+ ngtcp2_range r = {offset, g->range.end};
+ ngtcp2_ksl_update_key(&rob->gapksl, &g->range, &r);
+ g->range.begin = offset;
+ break;
+ }
+ ngtcp2_ksl_remove_hint(&rob->gapksl, &it, &it, &g->range);
+ ngtcp2_rob_gap_del(g, rob->mem);
+ }
+
+ it = ngtcp2_ksl_begin(&rob->dataksl);
+
+ for (; !ngtcp2_ksl_it_end(&it);) {
+ d = ngtcp2_ksl_it_get(&it);
+ if (offset < d->range.begin + rob->chunk) {
+ return 0;
+ }
+ ngtcp2_ksl_remove_hint(&rob->dataksl, &it, &it, &d->range);
+ ngtcp2_rob_data_del(d, rob->mem);
+ }
+
+ return 0;
+}
+
+size_t ngtcp2_rob_data_at(ngtcp2_rob *rob, const uint8_t **pdest,
+ uint64_t offset) {
+ ngtcp2_rob_gap *g;
+ ngtcp2_rob_data *d;
+ ngtcp2_ksl_it it;
+
+ it = ngtcp2_ksl_begin(&rob->gapksl);
+ if (ngtcp2_ksl_it_end(&it)) {
+ return 0;
+ }
+
+ g = ngtcp2_ksl_it_get(&it);
+
+ if (g->range.begin <= offset) {
+ return 0;
+ }
+
+ it = ngtcp2_ksl_begin(&rob->dataksl);
+ d = ngtcp2_ksl_it_get(&it);
+
+ assert(d);
+ assert(d->range.begin <= offset);
+ assert(offset < d->range.begin + rob->chunk);
+
+ *pdest = d->begin + (offset - d->range.begin);
+
+ return (size_t)(ngtcp2_min(g->range.begin, d->range.begin + rob->chunk) -
+ offset);
+}
+
+void ngtcp2_rob_pop(ngtcp2_rob *rob, uint64_t offset, size_t len) {
+ ngtcp2_ksl_it it;
+ ngtcp2_rob_data *d;
+
+ it = ngtcp2_ksl_begin(&rob->dataksl);
+ d = ngtcp2_ksl_it_get(&it);
+
+ assert(d);
+
+ if (offset + len < d->range.begin + rob->chunk) {
+ return;
+ }
+
+ ngtcp2_ksl_remove_hint(&rob->dataksl, NULL, &it, &d->range);
+ ngtcp2_rob_data_del(d, rob->mem);
+}
+
+uint64_t ngtcp2_rob_first_gap_offset(ngtcp2_rob *rob) {
+ ngtcp2_ksl_it it = ngtcp2_ksl_begin(&rob->gapksl);
+ ngtcp2_rob_gap *g;
+
+ if (ngtcp2_ksl_it_end(&it)) {
+ return UINT64_MAX;
+ }
+
+ g = ngtcp2_ksl_it_get(&it);
+
+ return g->range.begin;
+}
+
+int ngtcp2_rob_data_buffered(ngtcp2_rob *rob) {
+ return ngtcp2_ksl_len(&rob->dataksl) != 0;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.h
new file mode 100644
index 0000000..c7688df
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rob.h
@@ -0,0 +1,197 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_ROB_H
+#define NGTCP2_ROB_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_mem.h"
+#include "ngtcp2_range.h"
+#include "ngtcp2_ksl.h"
+
+/*
+ * ngtcp2_rob_gap represents the gap, which is the range of stream
+ * data that is not received yet.
+ */
+typedef struct ngtcp2_rob_gap {
+ /* range is the range of this gap. */
+ ngtcp2_range range;
+} ngtcp2_rob_gap;
+
+/*
+ * ngtcp2_rob_gap_new allocates new ngtcp2_rob_gap object, and assigns
+ * its pointer to |*pg|. The caller should call ngtcp2_rob_gap_del to
+ * delete it when it is no longer used. The range of the gap is
+ * [begin, end). |mem| is custom memory allocator to allocate memory.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_rob_gap_new(ngtcp2_rob_gap **pg, uint64_t begin, uint64_t end,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_rob_gap_del deallocates |g|. It deallocates the memory
+ * pointed by |g| it self. |mem| is custom memory allocator to
+ * deallocate memory.
+ */
+void ngtcp2_rob_gap_del(ngtcp2_rob_gap *g, const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_rob_data holds the buffered stream data.
+ */
+typedef struct ngtcp2_rob_data {
+ /* range is the range of this gap. */
+ ngtcp2_range range;
+ /* begin points to the buffer. */
+ uint8_t *begin;
+ /* end points to the one beyond of the last byte of the buffer */
+ uint8_t *end;
+} ngtcp2_rob_data;
+
+/*
+ * ngtcp2_rob_data_new allocates new ngtcp2_rob_data object, and
+ * assigns its pointer to |*pd|. The caller should call
+ * ngtcp2_rob_data_del to delete it when it is no longer used.
+ * |offset| is the stream offset of the first byte of this data.
+ * |chunk| is the size of the buffer. |offset| must be multiple of
+ * |chunk|. |mem| is custom memory allocator to allocate memory.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_rob_data_new(ngtcp2_rob_data **pd, uint64_t offset, size_t chunk,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_rob_data_del deallocates |d|. It deallocates the memory
+ * pointed by |d| itself. |mem| is custom memory allocator to
+ * deallocate memory.
+ */
+void ngtcp2_rob_data_del(ngtcp2_rob_data *d, const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_rob is the reorder buffer which reassembles stream data
+ * received in out of order.
+ */
+typedef struct ngtcp2_rob {
+ /* gapksl maintains the range of offset which is not received
+ yet. Initially, its range is [0, UINT64_MAX). */
+ ngtcp2_ksl gapksl;
+ /* dataksl maintains the list of buffers which store received data
+ ordered by stream offset. */
+ ngtcp2_ksl dataksl;
+ /* mem is custom memory allocator */
+ const ngtcp2_mem *mem;
+ /* chunk is the size of each buffer in data field */
+ size_t chunk;
+} ngtcp2_rob;
+
+/*
+ * ngtcp2_rob_init initializes |rob|. |chunk| is the size of buffer
+ * per chunk.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_rob_init(ngtcp2_rob *rob, size_t chunk, const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_rob_free frees resources allocated for |rob|.
+ */
+void ngtcp2_rob_free(ngtcp2_rob *rob);
+
+/*
+ * ngtcp2_rob_push adds new data of length |datalen| at the stream
+ * offset |offset|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+int ngtcp2_rob_push(ngtcp2_rob *rob, uint64_t offset, const uint8_t *data,
+ size_t datalen);
+
+/*
+ * ngtcp2_rob_remove_prefix removes gap up to |offset|, exclusive. It
+ * also removes data buffer if it is completely included in |offset|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+int ngtcp2_rob_remove_prefix(ngtcp2_rob *rob, uint64_t offset);
+
+/*
+ * ngtcp2_rob_data_at stores the pointer to the buffer of stream
+ * offset |offset| to |*pdest| if it is available, and returns the
+ * valid length of available data. If no data is available, it
+ * returns 0.
+ */
+size_t ngtcp2_rob_data_at(ngtcp2_rob *rob, const uint8_t **pdest,
+ uint64_t offset);
+
+/*
+ * ngtcp2_rob_pop clears data at stream offset |offset| of length
+ * |len|.
+ *
+ * |offset| must be the offset given in ngtcp2_rob_data_at. |len|
+ * must be the return value of ngtcp2_rob_data_at when |offset| is
+ * passed.
+ *
+ * Caller should call this function from offset 0 in non-decreasing
+ * order.
+ */
+void ngtcp2_rob_pop(ngtcp2_rob *rob, uint64_t offset, size_t len);
+
+/*
+ * ngtcp2_rob_first_gap_offset returns the offset to the first gap.
+ * If there is no gap, it returns UINT64_MAX.
+ */
+uint64_t ngtcp2_rob_first_gap_offset(ngtcp2_rob *rob);
+
+/*
+ * ngtcp2_rob_data_buffered returns nonzero if any data is buffered.
+ */
+int ngtcp2_rob_data_buffered(ngtcp2_rob *rob);
+
+#endif /* NGTCP2_ROB_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.c
new file mode 100644
index 0000000..7b50f98
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.c
@@ -0,0 +1,137 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_rst.h"
+
+#include <assert.h>
+
+#include "ngtcp2_rtb.h"
+#include "ngtcp2_cc.h"
+#include "ngtcp2_macro.h"
+
+void ngtcp2_rs_init(ngtcp2_rs *rs) {
+ rs->interval = UINT64_MAX;
+ rs->delivered = 0;
+ rs->prior_delivered = 0;
+ rs->prior_ts = 0;
+ rs->tx_in_flight = 0;
+ rs->lost = 0;
+ rs->prior_lost = 0;
+ rs->send_elapsed = 0;
+ rs->ack_elapsed = 0;
+ rs->is_app_limited = 0;
+}
+
+void ngtcp2_rst_init(ngtcp2_rst *rst) {
+ ngtcp2_rs_init(&rst->rs);
+ ngtcp2_window_filter_init(&rst->wf, 12);
+ rst->delivered = 0;
+ rst->delivered_ts = 0;
+ rst->first_sent_ts = 0;
+ rst->app_limited = 0;
+ rst->next_round_delivered = 0;
+ rst->round_count = 0;
+ rst->is_cwnd_limited = 0;
+ rst->lost = 0;
+}
+
+void ngtcp2_rst_on_pkt_sent(ngtcp2_rst *rst, ngtcp2_rtb_entry *ent,
+ const ngtcp2_conn_stat *cstat) {
+ if (cstat->bytes_in_flight == 0) {
+ rst->first_sent_ts = rst->delivered_ts = ent->ts;
+ }
+ ent->rst.first_sent_ts = rst->first_sent_ts;
+ ent->rst.delivered_ts = rst->delivered_ts;
+ ent->rst.delivered = rst->delivered;
+ ent->rst.is_app_limited = rst->app_limited != 0;
+ ent->rst.tx_in_flight = cstat->bytes_in_flight + ent->pktlen;
+ ent->rst.lost = rst->lost;
+}
+
+int ngtcp2_rst_on_ack_recv(ngtcp2_rst *rst, ngtcp2_conn_stat *cstat,
+ uint64_t pkt_delivered) {
+ ngtcp2_rs *rs = &rst->rs;
+ uint64_t rate;
+
+ if (rst->app_limited && rst->delivered > rst->app_limited) {
+ rst->app_limited = 0;
+ }
+
+ if (pkt_delivered >= rst->next_round_delivered) {
+ rst->next_round_delivered = pkt_delivered;
+ ++rst->round_count;
+ }
+
+ if (rs->prior_ts == 0) {
+ return 0;
+ }
+
+ rs->interval = ngtcp2_max(rs->send_elapsed, rs->ack_elapsed);
+
+ rs->delivered = rst->delivered - rs->prior_delivered;
+ rs->lost = rst->lost - rs->prior_lost;
+
+ if (rs->interval < cstat->min_rtt) {
+ rs->interval = UINT64_MAX;
+ return 0;
+ }
+
+ if (!rs->interval) {
+ return 0;
+ }
+
+ rate = rs->delivered * NGTCP2_SECONDS / rs->interval;
+
+ if (rate > ngtcp2_window_filter_get_best(&rst->wf) || !rst->app_limited) {
+ ngtcp2_window_filter_update(&rst->wf, rate, rst->round_count);
+ cstat->delivery_rate_sec = ngtcp2_window_filter_get_best(&rst->wf);
+ }
+
+ return 0;
+}
+
+void ngtcp2_rst_update_rate_sample(ngtcp2_rst *rst, const ngtcp2_rtb_entry *ent,
+ ngtcp2_tstamp ts) {
+ ngtcp2_rs *rs = &rst->rs;
+
+ rst->delivered += ent->pktlen;
+ rst->delivered_ts = ts;
+
+ if (ent->rst.delivered > rs->prior_delivered) {
+ rs->prior_delivered = ent->rst.delivered;
+ rs->prior_ts = ent->rst.delivered_ts;
+ rs->is_app_limited = ent->rst.is_app_limited;
+ rs->send_elapsed = ent->ts - ent->rst.first_sent_ts;
+ rs->ack_elapsed = rst->delivered_ts - ent->rst.delivered_ts;
+ rs->tx_in_flight = ent->rst.tx_in_flight;
+ rs->prior_lost = ent->rst.lost;
+ rst->first_sent_ts = ent->ts;
+ }
+}
+
+void ngtcp2_rst_update_app_limited(ngtcp2_rst *rst, ngtcp2_conn_stat *cstat) {
+ (void)rst;
+ (void)cstat;
+ /* TODO Not implemented */
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.h
new file mode 100644
index 0000000..488c655
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rst.h
@@ -0,0 +1,85 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_RST_H
+#define NGTCP2_RST_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_window_filter.h"
+
+typedef struct ngtcp2_rtb_entry ngtcp2_rtb_entry;
+
+/**
+ * @struct
+ *
+ * ngtcp2_rs contains connection state for delivery rate estimation.
+ */
+typedef struct ngtcp2_rs {
+ ngtcp2_duration interval;
+ uint64_t delivered;
+ uint64_t prior_delivered;
+ ngtcp2_tstamp prior_ts;
+ uint64_t tx_in_flight;
+ uint64_t lost;
+ uint64_t prior_lost;
+ ngtcp2_duration send_elapsed;
+ ngtcp2_duration ack_elapsed;
+ int is_app_limited;
+} ngtcp2_rs;
+
+void ngtcp2_rs_init(ngtcp2_rs *rs);
+
+/*
+ * ngtcp2_rst implements delivery rate estimation described in
+ * https://tools.ietf.org/html/draft-cheng-iccrg-delivery-rate-estimation-00
+ */
+typedef struct ngtcp2_rst {
+ ngtcp2_rs rs;
+ ngtcp2_window_filter wf;
+ uint64_t delivered;
+ ngtcp2_tstamp delivered_ts;
+ ngtcp2_tstamp first_sent_ts;
+ uint64_t app_limited;
+ uint64_t next_round_delivered;
+ uint64_t round_count;
+ uint64_t lost;
+ int is_cwnd_limited;
+} ngtcp2_rst;
+
+void ngtcp2_rst_init(ngtcp2_rst *rst);
+
+void ngtcp2_rst_on_pkt_sent(ngtcp2_rst *rst, ngtcp2_rtb_entry *ent,
+ const ngtcp2_conn_stat *cstat);
+int ngtcp2_rst_on_ack_recv(ngtcp2_rst *rst, ngtcp2_conn_stat *cstat,
+ uint64_t pkt_delivered);
+void ngtcp2_rst_update_rate_sample(ngtcp2_rst *rst, const ngtcp2_rtb_entry *ent,
+ ngtcp2_tstamp ts);
+void ngtcp2_rst_update_app_limited(ngtcp2_rst *rst, ngtcp2_conn_stat *cstat);
+
+#endif /* NGTCP2_RST_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.c
new file mode 100644
index 0000000..7fb0cc7
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.c
@@ -0,0 +1,1676 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_rtb.h"
+
+#include <assert.h>
+#include <string.h>
+
+#include "ngtcp2_macro.h"
+#include "ngtcp2_conn.h"
+#include "ngtcp2_log.h"
+#include "ngtcp2_vec.h"
+#include "ngtcp2_cc.h"
+#include "ngtcp2_rcvry.h"
+#include "ngtcp2_rst.h"
+#include "ngtcp2_unreachable.h"
+
+int ngtcp2_frame_chain_new(ngtcp2_frame_chain **pfrc, const ngtcp2_mem *mem) {
+ *pfrc = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_frame_chain));
+ if (*pfrc == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ ngtcp2_frame_chain_init(*pfrc);
+
+ return 0;
+}
+
+int ngtcp2_frame_chain_objalloc_new(ngtcp2_frame_chain **pfrc,
+ ngtcp2_objalloc *objalloc) {
+ *pfrc = ngtcp2_objalloc_frame_chain_get(objalloc);
+ if (*pfrc == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ ngtcp2_frame_chain_init(*pfrc);
+
+ return 0;
+}
+
+int ngtcp2_frame_chain_extralen_new(ngtcp2_frame_chain **pfrc, size_t extralen,
+ const ngtcp2_mem *mem) {
+ *pfrc = ngtcp2_mem_malloc(mem, sizeof(ngtcp2_frame_chain) + extralen);
+ if (*pfrc == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ ngtcp2_frame_chain_init(*pfrc);
+
+ return 0;
+}
+
+int ngtcp2_frame_chain_stream_datacnt_objalloc_new(ngtcp2_frame_chain **pfrc,
+ size_t datacnt,
+ ngtcp2_objalloc *objalloc,
+ const ngtcp2_mem *mem) {
+ size_t need, avail = sizeof(ngtcp2_frame) - sizeof(ngtcp2_stream);
+
+ if (datacnt > 1) {
+ need = sizeof(ngtcp2_vec) * (datacnt - 1);
+
+ if (need > avail) {
+ return ngtcp2_frame_chain_extralen_new(pfrc, need - avail, mem);
+ }
+ }
+
+ return ngtcp2_frame_chain_objalloc_new(pfrc, objalloc);
+}
+
+int ngtcp2_frame_chain_crypto_datacnt_objalloc_new(ngtcp2_frame_chain **pfrc,
+ size_t datacnt,
+ ngtcp2_objalloc *objalloc,
+ const ngtcp2_mem *mem) {
+ size_t need, avail = sizeof(ngtcp2_frame) - sizeof(ngtcp2_crypto);
+
+ if (datacnt > 1) {
+ need = sizeof(ngtcp2_vec) * (datacnt - 1);
+
+ if (need > avail) {
+ return ngtcp2_frame_chain_extralen_new(pfrc, need - avail, mem);
+ }
+ }
+
+ return ngtcp2_frame_chain_objalloc_new(pfrc, objalloc);
+}
+
+int ngtcp2_frame_chain_new_token_objalloc_new(ngtcp2_frame_chain **pfrc,
+ const uint8_t *token,
+ size_t tokenlen,
+ ngtcp2_objalloc *objalloc,
+ const ngtcp2_mem *mem) {
+ size_t avail = sizeof(ngtcp2_frame) - sizeof(ngtcp2_new_token);
+ int rv;
+ uint8_t *p;
+ ngtcp2_frame *fr;
+
+ if (tokenlen > avail) {
+ rv = ngtcp2_frame_chain_extralen_new(pfrc, tokenlen - avail, mem);
+ } else {
+ rv = ngtcp2_frame_chain_objalloc_new(pfrc, objalloc);
+ }
+ if (rv != 0) {
+ return rv;
+ }
+
+ fr = &(*pfrc)->fr;
+ fr->type = NGTCP2_FRAME_NEW_TOKEN;
+
+ p = (uint8_t *)fr + sizeof(ngtcp2_new_token);
+ memcpy(p, token, tokenlen);
+
+ fr->new_token.token = p;
+ fr->new_token.tokenlen = tokenlen;
+
+ return 0;
+}
+
+void ngtcp2_frame_chain_del(ngtcp2_frame_chain *frc, const ngtcp2_mem *mem) {
+ ngtcp2_frame_chain_binder *binder;
+
+ if (frc == NULL) {
+ return;
+ }
+
+ binder = frc->binder;
+ if (binder && --binder->refcount == 0) {
+ ngtcp2_mem_free(mem, binder);
+ }
+
+ ngtcp2_mem_free(mem, frc);
+}
+
+void ngtcp2_frame_chain_objalloc_del(ngtcp2_frame_chain *frc,
+ ngtcp2_objalloc *objalloc,
+ const ngtcp2_mem *mem) {
+ ngtcp2_frame_chain_binder *binder;
+
+ if (frc == NULL) {
+ return;
+ }
+
+ switch (frc->fr.type) {
+ case NGTCP2_FRAME_STREAM:
+ if (frc->fr.stream.datacnt &&
+ sizeof(ngtcp2_vec) * (frc->fr.stream.datacnt - 1) >
+ sizeof(ngtcp2_frame) - sizeof(ngtcp2_stream)) {
+ ngtcp2_frame_chain_del(frc, mem);
+
+ return;
+ }
+
+ break;
+ case NGTCP2_FRAME_CRYPTO:
+ if (frc->fr.crypto.datacnt &&
+ sizeof(ngtcp2_vec) * (frc->fr.crypto.datacnt - 1) >
+ sizeof(ngtcp2_frame) - sizeof(ngtcp2_crypto)) {
+ ngtcp2_frame_chain_del(frc, mem);
+
+ return;
+ }
+
+ break;
+ case NGTCP2_FRAME_NEW_TOKEN:
+ if (frc->fr.new_token.tokenlen >
+ sizeof(ngtcp2_frame) - sizeof(ngtcp2_new_token)) {
+ ngtcp2_frame_chain_del(frc, mem);
+
+ return;
+ }
+
+ break;
+ }
+
+ binder = frc->binder;
+ if (binder && --binder->refcount == 0) {
+ ngtcp2_mem_free(mem, binder);
+ }
+
+ frc->binder = NULL;
+
+ ngtcp2_objalloc_frame_chain_release(objalloc, frc);
+}
+
+void ngtcp2_frame_chain_init(ngtcp2_frame_chain *frc) {
+ frc->next = NULL;
+ frc->binder = NULL;
+}
+
+void ngtcp2_frame_chain_list_objalloc_del(ngtcp2_frame_chain *frc,
+ ngtcp2_objalloc *objalloc,
+ const ngtcp2_mem *mem) {
+ ngtcp2_frame_chain *next;
+
+ for (; frc; frc = next) {
+ next = frc->next;
+
+ ngtcp2_frame_chain_objalloc_del(frc, objalloc, mem);
+ }
+}
+
+int ngtcp2_frame_chain_binder_new(ngtcp2_frame_chain_binder **pbinder,
+ const ngtcp2_mem *mem) {
+ *pbinder = ngtcp2_mem_calloc(mem, 1, sizeof(ngtcp2_frame_chain_binder));
+ if (*pbinder == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ return 0;
+}
+
+int ngtcp2_bind_frame_chains(ngtcp2_frame_chain *a, ngtcp2_frame_chain *b,
+ const ngtcp2_mem *mem) {
+ ngtcp2_frame_chain_binder *binder;
+ int rv;
+
+ assert(b->binder == NULL);
+
+ if (a->binder == NULL) {
+ rv = ngtcp2_frame_chain_binder_new(&binder, mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ a->binder = binder;
+ ++a->binder->refcount;
+ }
+
+ b->binder = a->binder;
+ ++b->binder->refcount;
+
+ return 0;
+}
+
+static void rtb_entry_init(ngtcp2_rtb_entry *ent, const ngtcp2_pkt_hd *hd,
+ ngtcp2_frame_chain *frc, ngtcp2_tstamp ts,
+ size_t pktlen, uint16_t flags) {
+ memset(ent, 0, sizeof(*ent));
+
+ ent->hd.pkt_num = hd->pkt_num;
+ ent->hd.type = hd->type;
+ ent->hd.flags = hd->flags;
+ ent->frc = frc;
+ ent->ts = ts;
+ ent->lost_ts = UINT64_MAX;
+ ent->pktlen = pktlen;
+ ent->flags = flags;
+ ent->next = NULL;
+}
+
+int ngtcp2_rtb_entry_objalloc_new(ngtcp2_rtb_entry **pent,
+ const ngtcp2_pkt_hd *hd,
+ ngtcp2_frame_chain *frc, ngtcp2_tstamp ts,
+ size_t pktlen, uint16_t flags,
+ ngtcp2_objalloc *objalloc) {
+ *pent = ngtcp2_objalloc_rtb_entry_get(objalloc);
+ if (*pent == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ rtb_entry_init(*pent, hd, frc, ts, pktlen, flags);
+
+ return 0;
+}
+
+void ngtcp2_rtb_entry_objalloc_del(ngtcp2_rtb_entry *ent,
+ ngtcp2_objalloc *objalloc,
+ ngtcp2_objalloc *frc_objalloc,
+ const ngtcp2_mem *mem) {
+ ngtcp2_frame_chain_list_objalloc_del(ent->frc, frc_objalloc, mem);
+
+ ent->frc = NULL;
+
+ ngtcp2_objalloc_rtb_entry_release(objalloc, ent);
+}
+
+static int greater(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs) {
+ return *(int64_t *)lhs > *(int64_t *)rhs;
+}
+
+void ngtcp2_rtb_init(ngtcp2_rtb *rtb, ngtcp2_pktns_id pktns_id,
+ ngtcp2_strm *crypto, ngtcp2_rst *rst, ngtcp2_cc *cc,
+ ngtcp2_log *log, ngtcp2_qlog *qlog,
+ ngtcp2_objalloc *rtb_entry_objalloc,
+ ngtcp2_objalloc *frc_objalloc, const ngtcp2_mem *mem) {
+ rtb->rtb_entry_objalloc = rtb_entry_objalloc;
+ rtb->frc_objalloc = frc_objalloc;
+ ngtcp2_ksl_init(&rtb->ents, greater, sizeof(int64_t), mem);
+ rtb->crypto = crypto;
+ rtb->rst = rst;
+ rtb->cc = cc;
+ rtb->log = log;
+ rtb->qlog = qlog;
+ rtb->mem = mem;
+ rtb->largest_acked_tx_pkt_num = -1;
+ rtb->num_ack_eliciting = 0;
+ rtb->num_retransmittable = 0;
+ rtb->num_pto_eliciting = 0;
+ rtb->probe_pkt_left = 0;
+ rtb->pktns_id = pktns_id;
+ rtb->cc_pkt_num = 0;
+ rtb->cc_bytes_in_flight = 0;
+ rtb->persistent_congestion_start_ts = UINT64_MAX;
+ rtb->num_lost_pkts = 0;
+ rtb->num_lost_pmtud_pkts = 0;
+}
+
+void ngtcp2_rtb_free(ngtcp2_rtb *rtb) {
+ ngtcp2_ksl_it it;
+
+ if (rtb == NULL) {
+ return;
+ }
+
+ it = ngtcp2_ksl_begin(&rtb->ents);
+
+ for (; !ngtcp2_ksl_it_end(&it); ngtcp2_ksl_it_next(&it)) {
+ ngtcp2_rtb_entry_objalloc_del(ngtcp2_ksl_it_get(&it),
+ rtb->rtb_entry_objalloc, rtb->frc_objalloc,
+ rtb->mem);
+ }
+
+ ngtcp2_ksl_free(&rtb->ents);
+}
+
+static void rtb_on_add(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent,
+ ngtcp2_conn_stat *cstat) {
+ ngtcp2_rst_on_pkt_sent(rtb->rst, ent, cstat);
+
+ assert(rtb->cc_pkt_num <= ent->hd.pkt_num);
+
+ cstat->bytes_in_flight += ent->pktlen;
+ rtb->cc_bytes_in_flight += ent->pktlen;
+
+ ngtcp2_rst_update_app_limited(rtb->rst, cstat);
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) {
+ ++rtb->num_ack_eliciting;
+ }
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE) {
+ ++rtb->num_retransmittable;
+ }
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING) {
+ ++rtb->num_pto_eliciting;
+ }
+}
+
+static size_t rtb_on_remove(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent,
+ ngtcp2_conn_stat *cstat) {
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED) {
+ assert(rtb->num_lost_pkts);
+ --rtb->num_lost_pkts;
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) {
+ assert(rtb->num_lost_pmtud_pkts);
+ --rtb->num_lost_pmtud_pkts;
+ }
+
+ return 0;
+ }
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) {
+ assert(rtb->num_ack_eliciting);
+ --rtb->num_ack_eliciting;
+ }
+
+ if ((ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE) &&
+ !(ent->flags & NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED)) {
+ assert(rtb->num_retransmittable);
+ --rtb->num_retransmittable;
+ }
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING) {
+ assert(rtb->num_pto_eliciting);
+ --rtb->num_pto_eliciting;
+ }
+
+ if (rtb->cc_pkt_num <= ent->hd.pkt_num) {
+ assert(cstat->bytes_in_flight >= ent->pktlen);
+ cstat->bytes_in_flight -= ent->pktlen;
+
+ assert(rtb->cc_bytes_in_flight >= ent->pktlen);
+ rtb->cc_bytes_in_flight -= ent->pktlen;
+
+ /* If PMTUD packet is lost, we do not report the lost bytes to the
+ caller in order to ignore loss of PMTUD packet. */
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) {
+ return 0;
+ }
+
+ return ent->pktlen;
+ }
+
+ return 0;
+}
+
+/* NGTCP2_RECLAIM_FLAG_NONE indicates that no flag is set. */
+#define NGTCP2_RECLAIM_FLAG_NONE 0x00u
+/* NGTCP2_RECLAIM_FLAG_ON_LOSS indicates that frames are reclaimed
+ because of the packet loss.*/
+#define NGTCP2_RECLAIM_FLAG_ON_LOSS 0x01u
+
+/*
+ * rtb_reclaim_frame queues unacknowledged frames included in |ent|
+ * for retransmission. The re-queued frames are not deleted from
+ * |ent|. It returns the number of frames queued. |flags| is bitwise
+ * OR of 0 or more of NGTCP2_RECLAIM_FLAG_*.
+ */
+static ngtcp2_ssize rtb_reclaim_frame(ngtcp2_rtb *rtb, uint8_t flags,
+ ngtcp2_conn *conn, ngtcp2_pktns *pktns,
+ ngtcp2_rtb_entry *ent) {
+ ngtcp2_frame_chain *frc, *nfrc, **pfrc = &pktns->tx.frq;
+ ngtcp2_frame *fr;
+ ngtcp2_strm *strm;
+ ngtcp2_range gap, range;
+ size_t num_reclaimed = 0;
+ int rv;
+ int streamfrq_empty;
+
+ assert(ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE);
+
+ /* TODO Reconsider the order of pfrc */
+ for (frc = ent->frc; frc; frc = frc->next) {
+ fr = &frc->fr;
+ /* Check that a late ACK acknowledged this frame. */
+ if (frc->binder &&
+ (frc->binder->flags & NGTCP2_FRAME_CHAIN_BINDER_FLAG_ACK)) {
+ continue;
+ }
+ switch (frc->fr.type) {
+ case NGTCP2_FRAME_STREAM:
+ strm = ngtcp2_conn_find_stream(conn, fr->stream.stream_id);
+ if (strm == NULL) {
+ continue;
+ }
+
+ gap = ngtcp2_strm_get_unacked_range_after(strm, fr->stream.offset);
+
+ range.begin = fr->stream.offset;
+ range.end = fr->stream.offset +
+ ngtcp2_vec_len(fr->stream.data, fr->stream.datacnt);
+ range = ngtcp2_range_intersect(&range, &gap);
+ if (ngtcp2_range_len(&range) == 0) {
+ if (!fr->stream.fin) {
+ /* 0 length STREAM frame with offset == 0 must be
+ retransmitted if no non-empty data is sent to this stream
+ and no data in this stream is acknowledged. */
+ if (fr->stream.offset != 0 || fr->stream.datacnt != 0 ||
+ strm->tx.offset || (strm->flags & NGTCP2_STRM_FLAG_ANY_ACKED)) {
+ continue;
+ }
+ } else if (strm->flags & NGTCP2_STRM_FLAG_FIN_ACKED) {
+ continue;
+ }
+ }
+
+ if ((flags & NGTCP2_RECLAIM_FLAG_ON_LOSS) &&
+ ent->hd.pkt_num != strm->tx.last_lost_pkt_num) {
+ strm->tx.last_lost_pkt_num = ent->hd.pkt_num;
+ ++strm->tx.loss_count;
+ }
+
+ rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new(
+ &nfrc, fr->stream.datacnt, rtb->frc_objalloc, rtb->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ nfrc->fr = *fr;
+ ngtcp2_vec_copy(nfrc->fr.stream.data, fr->stream.data,
+ fr->stream.datacnt);
+
+ streamfrq_empty = ngtcp2_strm_streamfrq_empty(strm);
+ rv = ngtcp2_strm_streamfrq_push(strm, nfrc);
+ if (rv != 0) {
+ ngtcp2_frame_chain_objalloc_del(nfrc, rtb->frc_objalloc, rtb->mem);
+ return rv;
+ }
+ if (!ngtcp2_strm_is_tx_queued(strm)) {
+ strm->cycle = ngtcp2_conn_tx_strmq_first_cycle(conn);
+ rv = ngtcp2_conn_tx_strmq_push(conn, strm);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+ if (streamfrq_empty) {
+ ++conn->tx.strmq_nretrans;
+ }
+
+ ++num_reclaimed;
+
+ continue;
+ case NGTCP2_FRAME_CRYPTO:
+ /* Don't resend CRYPTO frame if the whole region it contains has
+ been acknowledged */
+ gap = ngtcp2_strm_get_unacked_range_after(rtb->crypto, fr->crypto.offset);
+
+ range.begin = fr->crypto.offset;
+ range.end = fr->crypto.offset +
+ ngtcp2_vec_len(fr->crypto.data, fr->crypto.datacnt);
+ range = ngtcp2_range_intersect(&range, &gap);
+ if (ngtcp2_range_len(&range) == 0) {
+ continue;
+ }
+
+ rv = ngtcp2_frame_chain_crypto_datacnt_objalloc_new(
+ &nfrc, fr->crypto.datacnt, rtb->frc_objalloc, rtb->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ nfrc->fr = *fr;
+ ngtcp2_vec_copy(nfrc->fr.crypto.data, fr->crypto.data,
+ fr->crypto.datacnt);
+
+ rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL,
+ &nfrc->fr.crypto.offset, nfrc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(nfrc, rtb->frc_objalloc, rtb->mem);
+ return rv;
+ }
+
+ ++num_reclaimed;
+
+ continue;
+ case NGTCP2_FRAME_NEW_TOKEN:
+ rv = ngtcp2_frame_chain_new_token_objalloc_new(
+ &nfrc, fr->new_token.token, fr->new_token.tokenlen, rtb->frc_objalloc,
+ rtb->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv = ngtcp2_bind_frame_chains(frc, nfrc, rtb->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ break;
+ case NGTCP2_FRAME_DATAGRAM:
+ case NGTCP2_FRAME_DATAGRAM_LEN:
+ continue;
+ default:
+ rv = ngtcp2_frame_chain_objalloc_new(&nfrc, rtb->frc_objalloc);
+ if (rv != 0) {
+ return rv;
+ }
+
+ nfrc->fr = *fr;
+
+ rv = ngtcp2_bind_frame_chains(frc, nfrc, rtb->mem);
+ if (rv != 0) {
+ return rv;
+ }
+
+ break;
+ }
+
+ ++num_reclaimed;
+
+ nfrc->next = *pfrc;
+ *pfrc = nfrc;
+ pfrc = &nfrc->next;
+ }
+
+ return (ngtcp2_ssize)num_reclaimed;
+}
+
+/*
+ * conn_process_lost_datagram calls ngtcp2_lost_datagram callback for
+ * lost DATAGRAM frames.
+ */
+static int conn_process_lost_datagram(ngtcp2_conn *conn,
+ ngtcp2_rtb_entry *ent) {
+ ngtcp2_frame_chain *frc;
+ int rv;
+
+ for (frc = ent->frc; frc; frc = frc->next) {
+ switch (frc->fr.type) {
+ case NGTCP2_FRAME_DATAGRAM:
+ case NGTCP2_FRAME_DATAGRAM_LEN:
+ assert(conn->callbacks.lost_datagram);
+
+ rv = conn->callbacks.lost_datagram(conn, frc->fr.datagram.dgram_id,
+ conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int rtb_on_pkt_lost(ngtcp2_rtb *rtb, ngtcp2_ksl_it *it,
+ ngtcp2_rtb_entry *ent, ngtcp2_conn_stat *cstat,
+ ngtcp2_conn *conn, ngtcp2_pktns *pktns,
+ ngtcp2_tstamp ts) {
+ int rv;
+ ngtcp2_ssize reclaimed;
+ ngtcp2_cc *cc = rtb->cc;
+ ngtcp2_cc_pkt pkt;
+
+ ngtcp2_log_pkt_lost(rtb->log, ent->hd.pkt_num, ent->hd.type, ent->hd.flags,
+ ent->ts);
+
+ if (rtb->qlog) {
+ ngtcp2_qlog_pkt_lost(rtb->qlog, ent);
+ }
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) {
+ ++rtb->num_lost_pmtud_pkts;
+ } else if (rtb->cc->on_pkt_lost) {
+ cc->on_pkt_lost(cc, cstat,
+ ngtcp2_cc_pkt_init(&pkt, ent->hd.pkt_num, ent->pktlen,
+ rtb->pktns_id, ent->ts, ent->rst.lost,
+ ent->rst.tx_in_flight,
+ ent->rst.is_app_limited),
+ ts);
+ }
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED) {
+ ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV,
+ "pkn=%" PRId64 " has already been reclaimed on PTO",
+ ent->hd.pkt_num);
+ assert(!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED));
+ assert(UINT64_MAX == ent->lost_ts);
+
+ ent->flags |= NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED;
+ ent->lost_ts = ts;
+
+ ++rtb->num_lost_pkts;
+
+ ngtcp2_ksl_it_next(it);
+
+ return 0;
+ }
+
+ if (conn->callbacks.lost_datagram &&
+ (ent->flags & NGTCP2_RTB_ENTRY_FLAG_DATAGRAM)) {
+ rv = conn_process_lost_datagram(conn, ent);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE) {
+ assert(ent->frc);
+ assert(!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED));
+ assert(UINT64_MAX == ent->lost_ts);
+
+ reclaimed =
+ rtb_reclaim_frame(rtb, NGTCP2_RECLAIM_FLAG_ON_LOSS, conn, pktns, ent);
+ if (reclaimed < 0) {
+ return (int)reclaimed;
+ }
+ }
+
+ ent->flags |= NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED;
+ ent->lost_ts = ts;
+
+ ++rtb->num_lost_pkts;
+
+ ngtcp2_ksl_it_next(it);
+
+ return 0;
+}
+
+int ngtcp2_rtb_add(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent,
+ ngtcp2_conn_stat *cstat) {
+ int rv;
+
+ rv = ngtcp2_ksl_insert(&rtb->ents, NULL, &ent->hd.pkt_num, ent);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rtb_on_add(rtb, ent, cstat);
+
+ return 0;
+}
+
+ngtcp2_ksl_it ngtcp2_rtb_head(ngtcp2_rtb *rtb) {
+ return ngtcp2_ksl_begin(&rtb->ents);
+}
+
+static void rtb_remove(ngtcp2_rtb *rtb, ngtcp2_ksl_it *it,
+ ngtcp2_rtb_entry **pent, ngtcp2_rtb_entry *ent,
+ ngtcp2_conn_stat *cstat) {
+ int rv;
+ (void)rv;
+
+ rv = ngtcp2_ksl_remove_hint(&rtb->ents, it, it, &ent->hd.pkt_num);
+ assert(0 == rv);
+ rtb_on_remove(rtb, ent, cstat);
+
+ assert(ent->next == NULL);
+
+ ngtcp2_list_insert(ent, pent);
+}
+
+static void conn_ack_crypto_data(ngtcp2_conn *conn, ngtcp2_pktns *pktns,
+ uint64_t datalen) {
+ ngtcp2_buf_chain **pbufchain, *bufchain;
+ size_t left;
+
+ for (pbufchain = &pktns->crypto.tx.data; *pbufchain;) {
+ left = ngtcp2_buf_len(&(*pbufchain)->buf);
+ if (left > datalen) {
+ (*pbufchain)->buf.pos += datalen;
+ return;
+ }
+
+ bufchain = *pbufchain;
+ *pbufchain = bufchain->next;
+
+ ngtcp2_mem_free(conn->mem, bufchain);
+
+ datalen -= left;
+
+ if (datalen == 0) {
+ return;
+ }
+ }
+
+ assert(datalen == 0);
+
+ return;
+}
+
+static int rtb_process_acked_pkt(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent,
+ ngtcp2_conn *conn) {
+ ngtcp2_frame_chain *frc;
+ uint64_t prev_stream_offset, stream_offset;
+ ngtcp2_strm *strm;
+ int rv;
+ uint64_t datalen;
+ ngtcp2_strm *crypto = rtb->crypto;
+ ngtcp2_pktns *pktns = NULL;
+
+ if ((ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) && conn->pmtud &&
+ conn->pmtud->tx_pkt_num <= ent->hd.pkt_num) {
+ ngtcp2_pmtud_probe_success(conn->pmtud, ent->pktlen);
+
+ conn->dcid.current.max_udp_payload_size =
+ ngtcp2_max(conn->dcid.current.max_udp_payload_size, ent->pktlen);
+
+ if (ngtcp2_pmtud_finished(conn->pmtud)) {
+ ngtcp2_conn_stop_pmtud(conn);
+ }
+ }
+
+ for (frc = ent->frc; frc; frc = frc->next) {
+ if (frc->binder) {
+ frc->binder->flags |= NGTCP2_FRAME_CHAIN_BINDER_FLAG_ACK;
+ }
+
+ switch (frc->fr.type) {
+ case NGTCP2_FRAME_STREAM:
+ strm = ngtcp2_conn_find_stream(conn, frc->fr.stream.stream_id);
+ if (strm == NULL) {
+ break;
+ }
+
+ strm->flags |= NGTCP2_STRM_FLAG_ANY_ACKED;
+
+ if (frc->fr.stream.fin) {
+ strm->flags |= NGTCP2_STRM_FLAG_FIN_ACKED;
+ }
+
+ prev_stream_offset = ngtcp2_strm_get_acked_offset(strm);
+ rv = ngtcp2_strm_ack_data(
+ strm, frc->fr.stream.offset,
+ ngtcp2_vec_len(frc->fr.stream.data, frc->fr.stream.datacnt));
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (conn->callbacks.acked_stream_data_offset) {
+ stream_offset = ngtcp2_strm_get_acked_offset(strm);
+ datalen = stream_offset - prev_stream_offset;
+ if (datalen == 0 && !frc->fr.stream.fin) {
+ break;
+ }
+
+ rv = conn->callbacks.acked_stream_data_offset(
+ conn, strm->stream_id, prev_stream_offset, datalen, conn->user_data,
+ strm->stream_user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ }
+
+ rv = ngtcp2_conn_close_stream_if_shut_rdwr(conn, strm);
+ if (rv != 0) {
+ return rv;
+ }
+ break;
+ case NGTCP2_FRAME_CRYPTO:
+ prev_stream_offset = ngtcp2_strm_get_acked_offset(crypto);
+ rv = ngtcp2_strm_ack_data(
+ crypto, frc->fr.crypto.offset,
+ ngtcp2_vec_len(frc->fr.crypto.data, frc->fr.crypto.datacnt));
+ if (rv != 0) {
+ return rv;
+ }
+
+ stream_offset = ngtcp2_strm_get_acked_offset(crypto);
+ datalen = stream_offset - prev_stream_offset;
+ if (datalen == 0) {
+ break;
+ }
+
+ switch (rtb->pktns_id) {
+ case NGTCP2_PKTNS_ID_INITIAL:
+ pktns = conn->in_pktns;
+ break;
+ case NGTCP2_PKTNS_ID_HANDSHAKE:
+ pktns = conn->hs_pktns;
+ break;
+ case NGTCP2_PKTNS_ID_APPLICATION:
+ pktns = &conn->pktns;
+ break;
+ default:
+ ngtcp2_unreachable();
+ }
+
+ conn_ack_crypto_data(conn, pktns, datalen);
+
+ break;
+ case NGTCP2_FRAME_RESET_STREAM:
+ strm = ngtcp2_conn_find_stream(conn, frc->fr.reset_stream.stream_id);
+ if (strm == NULL) {
+ break;
+ }
+ strm->flags |= NGTCP2_STRM_FLAG_RST_ACKED;
+ rv = ngtcp2_conn_close_stream_if_shut_rdwr(conn, strm);
+ if (rv != 0) {
+ return rv;
+ }
+ break;
+ case NGTCP2_FRAME_RETIRE_CONNECTION_ID:
+ ngtcp2_conn_untrack_retired_dcid_seq(conn,
+ frc->fr.retire_connection_id.seq);
+ break;
+ case NGTCP2_FRAME_DATAGRAM:
+ case NGTCP2_FRAME_DATAGRAM_LEN:
+ if (!conn->callbacks.ack_datagram) {
+ break;
+ }
+
+ rv = conn->callbacks.ack_datagram(conn, frc->fr.datagram.dgram_id,
+ conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+static void rtb_on_pkt_acked(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent,
+ ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts) {
+ ngtcp2_cc *cc = rtb->cc;
+ ngtcp2_cc_pkt pkt;
+
+ ngtcp2_rst_update_rate_sample(rtb->rst, ent, ts);
+
+ cc->on_pkt_acked(cc, cstat,
+ ngtcp2_cc_pkt_init(&pkt, ent->hd.pkt_num, ent->pktlen,
+ rtb->pktns_id, ent->ts, ent->rst.lost,
+ ent->rst.tx_in_flight,
+ ent->rst.is_app_limited),
+ ts);
+
+ if (!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_PROBE) &&
+ (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING)) {
+ cstat->pto_count = 0;
+ }
+}
+
+static void conn_verify_ecn(ngtcp2_conn *conn, ngtcp2_pktns *pktns,
+ ngtcp2_cc *cc, ngtcp2_conn_stat *cstat,
+ const ngtcp2_ack *fr, size_t ecn_acked,
+ ngtcp2_tstamp largest_acked_sent_ts,
+ ngtcp2_tstamp ts) {
+ if (conn->tx.ecn.state == NGTCP2_ECN_STATE_FAILED) {
+ return;
+ }
+
+ if ((ecn_acked && fr->type == NGTCP2_FRAME_ACK) ||
+ (fr->type == NGTCP2_FRAME_ACK_ECN &&
+ (pktns->rx.ecn.ack.ect0 > fr->ecn.ect0 ||
+ pktns->rx.ecn.ack.ect1 > fr->ecn.ect1 ||
+ pktns->rx.ecn.ack.ce > fr->ecn.ce ||
+ (fr->ecn.ect0 - pktns->rx.ecn.ack.ect0) +
+ (fr->ecn.ce - pktns->rx.ecn.ack.ce) <
+ ecn_acked ||
+ fr->ecn.ect0 > pktns->tx.ecn.ect0 || fr->ecn.ect1))) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON,
+ "path is not ECN capable");
+ conn->tx.ecn.state = NGTCP2_ECN_STATE_FAILED;
+ return;
+ }
+
+ if (conn->tx.ecn.state != NGTCP2_ECN_STATE_CAPABLE && ecn_acked) {
+ ngtcp2_log_info(&conn->log, NGTCP2_LOG_EVENT_CON, "path is ECN capable");
+ conn->tx.ecn.state = NGTCP2_ECN_STATE_CAPABLE;
+ }
+
+ if (fr->type == NGTCP2_FRAME_ACK_ECN) {
+ if (largest_acked_sent_ts != UINT64_MAX &&
+ fr->ecn.ce > pktns->rx.ecn.ack.ce) {
+ cc->congestion_event(cc, cstat, largest_acked_sent_ts, ts);
+ }
+
+ pktns->rx.ecn.ack.ect0 = fr->ecn.ect0;
+ pktns->rx.ecn.ack.ect1 = fr->ecn.ect1;
+ pktns->rx.ecn.ack.ce = fr->ecn.ce;
+ }
+}
+
+static int rtb_detect_lost_pkt(ngtcp2_rtb *rtb, uint64_t *ppkt_lost,
+ ngtcp2_conn *conn, ngtcp2_pktns *pktns,
+ ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts);
+
+ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr,
+ ngtcp2_conn_stat *cstat, ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns, ngtcp2_tstamp pkt_ts,
+ ngtcp2_tstamp ts) {
+ ngtcp2_rtb_entry *ent;
+ int64_t largest_ack = fr->largest_ack, min_ack;
+ size_t i;
+ int rv;
+ ngtcp2_ksl_it it;
+ ngtcp2_ssize num_acked = 0;
+ ngtcp2_tstamp largest_pkt_sent_ts = UINT64_MAX;
+ ngtcp2_tstamp largest_acked_sent_ts = UINT64_MAX;
+ int64_t pkt_num;
+ ngtcp2_cc *cc = rtb->cc;
+ ngtcp2_rtb_entry *acked_ent = NULL;
+ int ack_eliciting_pkt_acked = 0;
+ size_t ecn_acked = 0;
+ int verify_ecn = 0;
+ ngtcp2_cc_ack cc_ack = {0};
+ size_t num_lost_pkts = rtb->num_lost_pkts - rtb->num_lost_pmtud_pkts;
+
+ cc_ack.prior_bytes_in_flight = cstat->bytes_in_flight;
+ cc_ack.rtt = UINT64_MAX;
+
+ if (conn && (conn->flags & NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED) &&
+ (conn->flags & NGTCP2_CONN_FLAG_KEY_UPDATE_INITIATOR) &&
+ largest_ack >= conn->pktns.crypto.tx.ckm->pkt_num) {
+ conn->flags &= (uint32_t) ~(NGTCP2_CONN_FLAG_KEY_UPDATE_NOT_CONFIRMED |
+ NGTCP2_CONN_FLAG_KEY_UPDATE_INITIATOR);
+ conn->crypto.key_update.confirmed_ts = ts;
+
+ ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_CRY, "key update confirmed");
+ }
+
+ if (rtb->largest_acked_tx_pkt_num < largest_ack) {
+ rtb->largest_acked_tx_pkt_num = largest_ack;
+ verify_ecn = 1;
+ }
+
+ /* Assume that ngtcp2_pkt_validate_ack(fr) returns 0 */
+ it = ngtcp2_ksl_lower_bound(&rtb->ents, &largest_ack);
+ if (ngtcp2_ksl_it_end(&it)) {
+ if (conn && verify_ecn) {
+ conn_verify_ecn(conn, pktns, rtb->cc, cstat, fr, ecn_acked,
+ largest_acked_sent_ts, ts);
+ }
+ return 0;
+ }
+
+ min_ack = largest_ack - (int64_t)fr->first_ack_range;
+
+ for (; !ngtcp2_ksl_it_end(&it);) {
+ pkt_num = *(int64_t *)ngtcp2_ksl_it_key(&it);
+
+ assert(pkt_num <= largest_ack);
+
+ if (pkt_num < min_ack) {
+ break;
+ }
+
+ ent = ngtcp2_ksl_it_get(&it);
+
+ if (largest_ack == pkt_num) {
+ largest_pkt_sent_ts = ent->ts;
+ }
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) {
+ ack_eliciting_pkt_acked = 1;
+ }
+
+ rtb_remove(rtb, &it, &acked_ent, ent, cstat);
+ ++num_acked;
+ }
+
+ for (i = 0; i < fr->rangecnt;) {
+ largest_ack = min_ack - (int64_t)fr->ranges[i].gap - 2;
+ min_ack = largest_ack - (int64_t)fr->ranges[i].len;
+
+ it = ngtcp2_ksl_lower_bound(&rtb->ents, &largest_ack);
+ if (ngtcp2_ksl_it_end(&it)) {
+ break;
+ }
+
+ for (; !ngtcp2_ksl_it_end(&it);) {
+ pkt_num = *(int64_t *)ngtcp2_ksl_it_key(&it);
+ if (pkt_num < min_ack) {
+ break;
+ }
+ ent = ngtcp2_ksl_it_get(&it);
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING) {
+ ack_eliciting_pkt_acked = 1;
+ }
+
+ rtb_remove(rtb, &it, &acked_ent, ent, cstat);
+ ++num_acked;
+ }
+
+ ++i;
+ }
+
+ if (largest_pkt_sent_ts != UINT64_MAX && ack_eliciting_pkt_acked) {
+ cc_ack.rtt = pkt_ts - largest_pkt_sent_ts;
+
+ rv = ngtcp2_conn_update_rtt(conn, cc_ack.rtt, fr->ack_delay_unscaled, ts);
+ if (rv == 0 && cc->new_rtt_sample) {
+ cc->new_rtt_sample(cc, cstat, ts);
+ }
+ }
+
+ if (conn) {
+ for (ent = acked_ent; ent; ent = acked_ent) {
+ if (ent->hd.pkt_num >= pktns->tx.ecn.start_pkt_num &&
+ (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ECN)) {
+ ++ecn_acked;
+ }
+
+ assert(largest_acked_sent_ts == UINT64_MAX ||
+ largest_acked_sent_ts <= ent->ts);
+
+ largest_acked_sent_ts = ent->ts;
+
+ rv = rtb_process_acked_pkt(rtb, ent, conn);
+ if (rv != 0) {
+ goto fail;
+ }
+
+ if (ent->hd.pkt_num >= rtb->cc_pkt_num) {
+ assert(cc_ack.pkt_delivered <= ent->rst.delivered);
+
+ cc_ack.bytes_delivered += ent->pktlen;
+ cc_ack.pkt_delivered = ent->rst.delivered;
+ }
+
+ rtb_on_pkt_acked(rtb, ent, cstat, ts);
+ acked_ent = ent->next;
+ ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc,
+ rtb->frc_objalloc, rtb->mem);
+ }
+
+ if (verify_ecn) {
+ conn_verify_ecn(conn, pktns, rtb->cc, cstat, fr, ecn_acked,
+ largest_acked_sent_ts, ts);
+ }
+ } else {
+ /* For unit tests */
+ for (ent = acked_ent; ent; ent = acked_ent) {
+ rtb_on_pkt_acked(rtb, ent, cstat, ts);
+ acked_ent = ent->next;
+ ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc,
+ rtb->frc_objalloc, rtb->mem);
+ }
+ }
+
+ if (rtb->cc->on_spurious_congestion && num_lost_pkts &&
+ rtb->num_lost_pkts - rtb->num_lost_pmtud_pkts == 0) {
+ rtb->cc->on_spurious_congestion(cc, cstat, ts);
+ }
+
+ ngtcp2_rst_on_ack_recv(rtb->rst, cstat, cc_ack.pkt_delivered);
+
+ if (conn && num_acked > 0) {
+ rv = rtb_detect_lost_pkt(rtb, &cc_ack.bytes_lost, conn, pktns, cstat, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ rtb->rst->lost += cc_ack.bytes_lost;
+
+ cc_ack.largest_acked_sent_ts = largest_acked_sent_ts;
+ cc->on_ack_recv(cc, cstat, &cc_ack, ts);
+
+ return num_acked;
+
+fail:
+ for (ent = acked_ent; ent; ent = acked_ent) {
+ acked_ent = ent->next;
+ ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc,
+ rtb->frc_objalloc, rtb->mem);
+ }
+
+ return rv;
+}
+
+static int rtb_pkt_lost(ngtcp2_rtb *rtb, ngtcp2_conn_stat *cstat,
+ const ngtcp2_rtb_entry *ent, ngtcp2_duration loss_delay,
+ size_t pkt_thres, ngtcp2_tstamp ts) {
+ ngtcp2_tstamp loss_time;
+
+ if (ent->ts + loss_delay <= ts ||
+ rtb->largest_acked_tx_pkt_num >= ent->hd.pkt_num + (int64_t)pkt_thres) {
+ return 1;
+ }
+
+ loss_time = cstat->loss_time[rtb->pktns_id];
+
+ if (loss_time == UINT64_MAX) {
+ loss_time = ent->ts + loss_delay;
+ } else {
+ loss_time = ngtcp2_min(loss_time, ent->ts + loss_delay);
+ }
+
+ cstat->loss_time[rtb->pktns_id] = loss_time;
+
+ return 0;
+}
+
+/*
+ * rtb_compute_pkt_loss_delay computes loss delay.
+ */
+static ngtcp2_duration compute_pkt_loss_delay(const ngtcp2_conn_stat *cstat) {
+ /* 9/8 is kTimeThreshold */
+ ngtcp2_duration loss_delay =
+ ngtcp2_max(cstat->latest_rtt, cstat->smoothed_rtt) * 9 / 8;
+ return ngtcp2_max(loss_delay, NGTCP2_GRANULARITY);
+}
+
+/*
+ * conn_all_ecn_pkt_lost returns nonzero if all ECN QUIC packets are
+ * lost during validation period.
+ */
+static int conn_all_ecn_pkt_lost(ngtcp2_conn *conn) {
+ ngtcp2_pktns *in_pktns = conn->in_pktns;
+ ngtcp2_pktns *hs_pktns = conn->hs_pktns;
+ ngtcp2_pktns *pktns = &conn->pktns;
+
+ return (!in_pktns || in_pktns->tx.ecn.validation_pkt_sent ==
+ in_pktns->tx.ecn.validation_pkt_lost) &&
+ (!hs_pktns || hs_pktns->tx.ecn.validation_pkt_sent ==
+ hs_pktns->tx.ecn.validation_pkt_lost) &&
+ pktns->tx.ecn.validation_pkt_sent == pktns->tx.ecn.validation_pkt_lost;
+}
+
+static int rtb_detect_lost_pkt(ngtcp2_rtb *rtb, uint64_t *ppkt_lost,
+ ngtcp2_conn *conn, ngtcp2_pktns *pktns,
+ ngtcp2_conn_stat *cstat, ngtcp2_tstamp ts) {
+ ngtcp2_rtb_entry *ent;
+ ngtcp2_duration loss_delay;
+ ngtcp2_ksl_it it;
+ ngtcp2_tstamp latest_ts, oldest_ts;
+ int64_t last_lost_pkt_num;
+ ngtcp2_duration loss_window, congestion_period;
+ ngtcp2_cc *cc = rtb->cc;
+ int rv;
+ uint64_t pkt_thres =
+ rtb->cc_bytes_in_flight / cstat->max_tx_udp_payload_size / 2;
+ size_t ecn_pkt_lost = 0;
+ ngtcp2_tstamp start_ts;
+ ngtcp2_duration pto = ngtcp2_conn_compute_pto(conn, pktns);
+ uint64_t bytes_lost = 0;
+ ngtcp2_duration max_ack_delay;
+
+ pkt_thres = ngtcp2_max(pkt_thres, NGTCP2_PKT_THRESHOLD);
+ pkt_thres = ngtcp2_min(pkt_thres, 256);
+ cstat->loss_time[rtb->pktns_id] = UINT64_MAX;
+ loss_delay = compute_pkt_loss_delay(cstat);
+
+ it = ngtcp2_ksl_lower_bound(&rtb->ents, &rtb->largest_acked_tx_pkt_num);
+ for (; !ngtcp2_ksl_it_end(&it); ngtcp2_ksl_it_next(&it)) {
+ ent = ngtcp2_ksl_it_get(&it);
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED) {
+ break;
+ }
+
+ if (rtb_pkt_lost(rtb, cstat, ent, loss_delay, (size_t)pkt_thres, ts)) {
+ /* All entries from ent are considered to be lost. */
+ latest_ts = oldest_ts = ent->ts;
+ last_lost_pkt_num = ent->hd.pkt_num;
+ max_ack_delay = conn->remote.transport_params
+ ? conn->remote.transport_params->max_ack_delay
+ : 0;
+
+ congestion_period =
+ (cstat->smoothed_rtt +
+ ngtcp2_max(4 * cstat->rttvar, NGTCP2_GRANULARITY) + max_ack_delay) *
+ NGTCP2_PERSISTENT_CONGESTION_THRESHOLD;
+
+ start_ts = ngtcp2_max(rtb->persistent_congestion_start_ts,
+ cstat->first_rtt_sample_ts);
+
+ for (; !ngtcp2_ksl_it_end(&it);) {
+ ent = ngtcp2_ksl_it_get(&it);
+
+ if (last_lost_pkt_num == ent->hd.pkt_num + 1 && ent->ts >= start_ts) {
+ last_lost_pkt_num = ent->hd.pkt_num;
+ oldest_ts = ent->ts;
+ } else {
+ last_lost_pkt_num = -1;
+ }
+
+ if ((ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED)) {
+ if (rtb->pktns_id != NGTCP2_PKTNS_ID_APPLICATION ||
+ last_lost_pkt_num == -1 ||
+ latest_ts - oldest_ts >= congestion_period) {
+ break;
+ }
+ ngtcp2_ksl_it_next(&it);
+ continue;
+ }
+
+ if (ent->hd.pkt_num >= pktns->tx.ecn.start_pkt_num &&
+ (ent->flags & NGTCP2_RTB_ENTRY_FLAG_ECN)) {
+ ++ecn_pkt_lost;
+ }
+
+ bytes_lost += rtb_on_remove(rtb, ent, cstat);
+ rv = rtb_on_pkt_lost(rtb, &it, ent, cstat, conn, pktns, ts);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ /* If only PMTUD packets are lost, do not trigger congestion
+ event. */
+ if (bytes_lost == 0) {
+ break;
+ }
+
+ switch (conn->tx.ecn.state) {
+ case NGTCP2_ECN_STATE_TESTING:
+ if (conn->tx.ecn.validation_start_ts == UINT64_MAX) {
+ break;
+ }
+ if (ts - conn->tx.ecn.validation_start_ts < 3 * pto) {
+ pktns->tx.ecn.validation_pkt_lost += ecn_pkt_lost;
+ assert(pktns->tx.ecn.validation_pkt_sent >=
+ pktns->tx.ecn.validation_pkt_lost);
+ break;
+ }
+ conn->tx.ecn.state = NGTCP2_ECN_STATE_UNKNOWN;
+ /* fall through */
+ case NGTCP2_ECN_STATE_UNKNOWN:
+ pktns->tx.ecn.validation_pkt_lost += ecn_pkt_lost;
+ assert(pktns->tx.ecn.validation_pkt_sent >=
+ pktns->tx.ecn.validation_pkt_lost);
+ if (conn_all_ecn_pkt_lost(conn)) {
+ conn->tx.ecn.state = NGTCP2_ECN_STATE_FAILED;
+ }
+ break;
+ default:
+ break;
+ }
+
+ cc->congestion_event(cc, cstat, latest_ts, ts);
+
+ loss_window = latest_ts - oldest_ts;
+ /* Persistent congestion situation is only evaluated for app
+ * packet number space and for the packets sent after handshake
+ * is confirmed. During handshake, there is not much packets
+ * sent and also people seem to do lots of effort not to trigger
+ * persistent congestion there, then it is a lot easier to just
+ * not enable it during handshake.
+ */
+ if (rtb->pktns_id == NGTCP2_PKTNS_ID_APPLICATION && loss_window > 0) {
+ if (loss_window >= congestion_period) {
+ ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV,
+ "persistent congestion loss_window=%" PRIu64
+ " congestion_period=%" PRIu64,
+ loss_window, congestion_period);
+
+ /* Reset min_rtt, srtt, and rttvar here. Next new RTT
+ sample will be used to recalculate these values. */
+ cstat->min_rtt = UINT64_MAX;
+ cstat->smoothed_rtt = conn->local.settings.initial_rtt;
+ cstat->rttvar = conn->local.settings.initial_rtt / 2;
+ cstat->first_rtt_sample_ts = UINT64_MAX;
+
+ cc->on_persistent_congestion(cc, cstat, ts);
+ }
+ }
+
+ break;
+ }
+ }
+
+ ngtcp2_rtb_remove_excessive_lost_pkt(rtb, (size_t)pkt_thres);
+
+ if (ppkt_lost) {
+ *ppkt_lost = bytes_lost;
+ }
+
+ return 0;
+}
+
+int ngtcp2_rtb_detect_lost_pkt(ngtcp2_rtb *rtb, ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts) {
+ return rtb_detect_lost_pkt(rtb, /* ppkt_lost = */ NULL, conn, pktns, cstat,
+ ts);
+}
+
+void ngtcp2_rtb_remove_excessive_lost_pkt(ngtcp2_rtb *rtb, size_t n) {
+ ngtcp2_ksl_it it = ngtcp2_ksl_end(&rtb->ents);
+ ngtcp2_rtb_entry *ent;
+ int rv;
+ (void)rv;
+
+ for (; rtb->num_lost_pkts > n;) {
+ assert(ngtcp2_ksl_it_end(&it));
+ ngtcp2_ksl_it_prev(&it);
+ ent = ngtcp2_ksl_it_get(&it);
+
+ assert(ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED);
+
+ ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV,
+ "removing stale lost pkn=%" PRId64, ent->hd.pkt_num);
+
+ --rtb->num_lost_pkts;
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) {
+ --rtb->num_lost_pmtud_pkts;
+ }
+
+ rv = ngtcp2_ksl_remove_hint(&rtb->ents, &it, &it, &ent->hd.pkt_num);
+ assert(0 == rv);
+ ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc,
+ rtb->frc_objalloc, rtb->mem);
+ }
+}
+
+void ngtcp2_rtb_remove_expired_lost_pkt(ngtcp2_rtb *rtb, ngtcp2_duration pto,
+ ngtcp2_tstamp ts) {
+ ngtcp2_ksl_it it;
+ ngtcp2_rtb_entry *ent;
+ int rv;
+ (void)rv;
+
+ if (ngtcp2_ksl_len(&rtb->ents) == 0) {
+ return;
+ }
+
+ it = ngtcp2_ksl_end(&rtb->ents);
+
+ for (;;) {
+ assert(ngtcp2_ksl_it_end(&it));
+
+ ngtcp2_ksl_it_prev(&it);
+ ent = ngtcp2_ksl_it_get(&it);
+
+ if (!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED) ||
+ ts - ent->lost_ts < pto) {
+ return;
+ }
+
+ ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV,
+ "removing stale lost pkn=%" PRId64, ent->hd.pkt_num);
+
+ --rtb->num_lost_pkts;
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) {
+ --rtb->num_lost_pmtud_pkts;
+ }
+
+ rv = ngtcp2_ksl_remove_hint(&rtb->ents, &it, &it, &ent->hd.pkt_num);
+ assert(0 == rv);
+ ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc,
+ rtb->frc_objalloc, rtb->mem);
+
+ if (ngtcp2_ksl_len(&rtb->ents) == 0) {
+ return;
+ }
+ }
+}
+
+ngtcp2_tstamp ngtcp2_rtb_lost_pkt_ts(ngtcp2_rtb *rtb) {
+ ngtcp2_ksl_it it;
+ ngtcp2_rtb_entry *ent;
+
+ if (ngtcp2_ksl_len(&rtb->ents) == 0) {
+ return UINT64_MAX;
+ }
+
+ it = ngtcp2_ksl_end(&rtb->ents);
+ ngtcp2_ksl_it_prev(&it);
+ ent = ngtcp2_ksl_it_get(&it);
+
+ if (!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED)) {
+ return UINT64_MAX;
+ }
+
+ return ent->lost_ts;
+}
+
+static int rtb_on_pkt_lost_resched_move(ngtcp2_rtb *rtb, ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns,
+ ngtcp2_rtb_entry *ent) {
+ ngtcp2_frame_chain **pfrc, *frc;
+ ngtcp2_stream *sfr;
+ ngtcp2_strm *strm;
+ int rv;
+ int streamfrq_empty;
+
+ ngtcp2_log_pkt_lost(rtb->log, ent->hd.pkt_num, ent->hd.type, ent->hd.flags,
+ ent->ts);
+
+ if (rtb->qlog) {
+ ngtcp2_qlog_pkt_lost(rtb->qlog, ent);
+ }
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PROBE) {
+ ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV,
+ "pkn=%" PRId64
+ " is a probe packet, no retransmission is necessary",
+ ent->hd.pkt_num);
+ return 0;
+ }
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) {
+ ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV,
+ "pkn=%" PRId64
+ " is a PMTUD probe packet, no retransmission is necessary",
+ ent->hd.pkt_num);
+ return 0;
+ }
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED) {
+ --rtb->num_lost_pkts;
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE) {
+ --rtb->num_lost_pmtud_pkts;
+ }
+
+ ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV,
+ "pkn=%" PRId64
+ " was declared lost and has already been retransmitted",
+ ent->hd.pkt_num);
+ return 0;
+ }
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED) {
+ ngtcp2_log_info(rtb->log, NGTCP2_LOG_EVENT_RCV,
+ "pkn=%" PRId64 " has already been reclaimed on PTO",
+ ent->hd.pkt_num);
+ return 0;
+ }
+
+ if (!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE) &&
+ (!(ent->flags & NGTCP2_RTB_ENTRY_FLAG_DATAGRAM) ||
+ !conn->callbacks.lost_datagram)) {
+ /* PADDING only (or PADDING + ACK ) packets will have NULL
+ ent->frc. */
+ return 0;
+ }
+
+ pfrc = &ent->frc;
+
+ for (; *pfrc;) {
+ switch ((*pfrc)->fr.type) {
+ case NGTCP2_FRAME_STREAM:
+ frc = *pfrc;
+
+ *pfrc = frc->next;
+ frc->next = NULL;
+ sfr = &frc->fr.stream;
+
+ strm = ngtcp2_conn_find_stream(conn, sfr->stream_id);
+ if (!strm) {
+ ngtcp2_frame_chain_objalloc_del(frc, rtb->frc_objalloc, rtb->mem);
+ break;
+ }
+ streamfrq_empty = ngtcp2_strm_streamfrq_empty(strm);
+ rv = ngtcp2_strm_streamfrq_push(strm, frc);
+ if (rv != 0) {
+ ngtcp2_frame_chain_objalloc_del(frc, rtb->frc_objalloc, rtb->mem);
+ return rv;
+ }
+ if (!ngtcp2_strm_is_tx_queued(strm)) {
+ strm->cycle = ngtcp2_conn_tx_strmq_first_cycle(conn);
+ rv = ngtcp2_conn_tx_strmq_push(conn, strm);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+ if (streamfrq_empty) {
+ ++conn->tx.strmq_nretrans;
+ }
+ break;
+ case NGTCP2_FRAME_CRYPTO:
+ frc = *pfrc;
+
+ *pfrc = frc->next;
+ frc->next = NULL;
+
+ rv = ngtcp2_ksl_insert(&pktns->crypto.tx.frq, NULL,
+ &frc->fr.crypto.offset, frc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(frc, rtb->frc_objalloc, rtb->mem);
+ return rv;
+ }
+ break;
+ case NGTCP2_FRAME_DATAGRAM:
+ case NGTCP2_FRAME_DATAGRAM_LEN:
+ frc = *pfrc;
+
+ if (conn->callbacks.lost_datagram) {
+ rv = conn->callbacks.lost_datagram(conn, frc->fr.datagram.dgram_id,
+ conn->user_data);
+ if (rv != 0) {
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ }
+
+ *pfrc = (*pfrc)->next;
+
+ ngtcp2_frame_chain_objalloc_del(frc, rtb->frc_objalloc, rtb->mem);
+ break;
+ default:
+ pfrc = &(*pfrc)->next;
+ }
+ }
+
+ *pfrc = pktns->tx.frq;
+ pktns->tx.frq = ent->frc;
+ ent->frc = NULL;
+
+ return 0;
+}
+
+int ngtcp2_rtb_remove_all(ngtcp2_rtb *rtb, ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns, ngtcp2_conn_stat *cstat) {
+ ngtcp2_rtb_entry *ent;
+ ngtcp2_ksl_it it;
+ int rv;
+
+ it = ngtcp2_ksl_begin(&rtb->ents);
+
+ for (; !ngtcp2_ksl_it_end(&it);) {
+ ent = ngtcp2_ksl_it_get(&it);
+
+ rtb_on_remove(rtb, ent, cstat);
+ rv = ngtcp2_ksl_remove_hint(&rtb->ents, &it, &it, &ent->hd.pkt_num);
+ assert(0 == rv);
+
+ rv = rtb_on_pkt_lost_resched_move(rtb, conn, pktns, ent);
+ ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc,
+ rtb->frc_objalloc, rtb->mem);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ return 0;
+}
+
+void ngtcp2_rtb_remove_early_data(ngtcp2_rtb *rtb, ngtcp2_conn_stat *cstat) {
+ ngtcp2_rtb_entry *ent;
+ ngtcp2_ksl_it it;
+ int rv;
+ (void)rv;
+
+ it = ngtcp2_ksl_begin(&rtb->ents);
+
+ for (; !ngtcp2_ksl_it_end(&it);) {
+ ent = ngtcp2_ksl_it_get(&it);
+
+ if (ent->hd.type != NGTCP2_PKT_0RTT) {
+ ngtcp2_ksl_it_next(&it);
+ continue;
+ }
+
+ rtb_on_remove(rtb, ent, cstat);
+ rv = ngtcp2_ksl_remove_hint(&rtb->ents, &it, &it, &ent->hd.pkt_num);
+ assert(0 == rv);
+
+ ngtcp2_rtb_entry_objalloc_del(ent, rtb->rtb_entry_objalloc,
+ rtb->frc_objalloc, rtb->mem);
+ }
+}
+
+int ngtcp2_rtb_empty(ngtcp2_rtb *rtb) {
+ return ngtcp2_ksl_len(&rtb->ents) == 0;
+}
+
+void ngtcp2_rtb_reset_cc_state(ngtcp2_rtb *rtb, int64_t cc_pkt_num) {
+ rtb->cc_pkt_num = cc_pkt_num;
+ rtb->cc_bytes_in_flight = 0;
+}
+
+ngtcp2_ssize ngtcp2_rtb_reclaim_on_pto(ngtcp2_rtb *rtb, ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns, size_t num_pkts) {
+ ngtcp2_ksl_it it;
+ ngtcp2_rtb_entry *ent;
+ ngtcp2_ssize reclaimed;
+ size_t atmost = num_pkts;
+
+ it = ngtcp2_ksl_end(&rtb->ents);
+ for (; !ngtcp2_ksl_it_begin(&it) && num_pkts >= 1;) {
+ ngtcp2_ksl_it_prev(&it);
+ ent = ngtcp2_ksl_it_get(&it);
+
+ if ((ent->flags & (NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED |
+ NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED)) ||
+ !(ent->flags & NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE)) {
+ continue;
+ }
+
+ assert(ent->frc);
+
+ reclaimed =
+ rtb_reclaim_frame(rtb, NGTCP2_RECLAIM_FLAG_NONE, conn, pktns, ent);
+ if (reclaimed < 0) {
+ return reclaimed;
+ }
+
+ /* Mark reclaimed even if reclaimed == 0 so that we can skip it in
+ the next run. */
+ ent->flags |= NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED;
+
+ assert(rtb->num_retransmittable);
+ --rtb->num_retransmittable;
+
+ if (ent->flags & NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING) {
+ ent->flags &= (uint16_t)~NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING;
+ assert(rtb->num_pto_eliciting);
+ --rtb->num_pto_eliciting;
+ }
+
+ if (reclaimed) {
+ --num_pkts;
+ }
+ }
+
+ return (ngtcp2_ssize)(atmost - num_pkts);
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.h
new file mode 100644
index 0000000..5183aed
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_rtb.h
@@ -0,0 +1,467 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_RTB_H
+#define NGTCP2_RTB_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_pkt.h"
+#include "ngtcp2_ksl.h"
+#include "ngtcp2_pq.h"
+#include "ngtcp2_objalloc.h"
+
+typedef struct ngtcp2_conn ngtcp2_conn;
+typedef struct ngtcp2_pktns ngtcp2_pktns;
+typedef struct ngtcp2_log ngtcp2_log;
+typedef struct ngtcp2_qlog ngtcp2_qlog;
+typedef struct ngtcp2_strm ngtcp2_strm;
+typedef struct ngtcp2_rst ngtcp2_rst;
+typedef struct ngtcp2_cc ngtcp2_cc;
+
+/* NGTCP2_FRAME_CHAIN_BINDER_FLAG_NONE indicates that no flag is
+ set. */
+#define NGTCP2_FRAME_CHAIN_BINDER_FLAG_NONE 0x00u
+/* NGTCP2_FRAME_CHAIN_BINDER_FLAG_ACK indicates that an information
+ which a frame carries has been acknowledged. */
+#define NGTCP2_FRAME_CHAIN_BINDER_FLAG_ACK 0x01u
+
+/*
+ * ngtcp2_frame_chain_binder binds 2 or more of ngtcp2_frame_chain to
+ * share the acknowledgement state. In general, all
+ * ngtcp2_frame_chains bound to the same binder must have the same
+ * information.
+ */
+typedef struct ngtcp2_frame_chain_binder {
+ size_t refcount;
+ /* flags is bitwise OR of zero or more of
+ NGTCP2_FRAME_CHAIN_BINDER_FLAG_*. */
+ uint32_t flags;
+} ngtcp2_frame_chain_binder;
+
+int ngtcp2_frame_chain_binder_new(ngtcp2_frame_chain_binder **pbinder,
+ const ngtcp2_mem *mem);
+
+typedef struct ngtcp2_frame_chain ngtcp2_frame_chain;
+
+/*
+ * ngtcp2_frame_chain chains frames in a single packet.
+ */
+struct ngtcp2_frame_chain {
+ union {
+ struct {
+ ngtcp2_frame_chain *next;
+ ngtcp2_frame_chain_binder *binder;
+ ngtcp2_frame fr;
+ };
+
+ ngtcp2_opl_entry oplent;
+ };
+};
+
+ngtcp2_objalloc_def(frame_chain, ngtcp2_frame_chain, oplent);
+
+/*
+ * ngtcp2_bind_frame_chains binds two frame chains |a| and |b| using
+ * new or existing ngtcp2_frame_chain_binder. |a| might have non-NULL
+ * a->binder. |b| must not have non-NULL b->binder.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+int ngtcp2_bind_frame_chains(ngtcp2_frame_chain *a, ngtcp2_frame_chain *b,
+ const ngtcp2_mem *mem);
+
+/* NGTCP2_MAX_STREAM_DATACNT is the maximum number of ngtcp2_vec that
+ a ngtcp2_stream can include. */
+#define NGTCP2_MAX_STREAM_DATACNT 256
+
+/* NGTCP2_MAX_CRYPTO_DATACNT is the maximum number of ngtcp2_vec that
+ a ngtcp2_crypto can include. */
+#define NGTCP2_MAX_CRYPTO_DATACNT 8
+
+/*
+ * ngtcp2_frame_chain_new allocates ngtcp2_frame_chain object and
+ * assigns its pointer to |*pfrc|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory.
+ */
+int ngtcp2_frame_chain_new(ngtcp2_frame_chain **pfrc, const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_frame_chain_objalloc_new behaves like
+ * ngtcp2_frame_chain_new, but it uses |objalloc| to allocate the object.
+ */
+int ngtcp2_frame_chain_objalloc_new(ngtcp2_frame_chain **pfrc,
+ ngtcp2_objalloc *objalloc);
+
+/*
+ * ngtcp2_frame_chain_extralen_new works like ngtcp2_frame_chain_new,
+ * but it allocates extra memory |extralen| in order to extend
+ * ngtcp2_frame.
+ */
+int ngtcp2_frame_chain_extralen_new(ngtcp2_frame_chain **pfrc, size_t extralen,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_frame_chain_stream_datacnt_objalloc_new works like
+ * ngtcp2_frame_chain_new, but it allocates enough data to store
+ * additional |datacnt| - 1 ngtcp2_vec object after ngtcp2_stream
+ * object. If no additional space is required,
+ * ngtcp2_frame_chain_objalloc_new is called internally.
+ */
+int ngtcp2_frame_chain_stream_datacnt_objalloc_new(ngtcp2_frame_chain **pfrc,
+ size_t datacnt,
+ ngtcp2_objalloc *objalloc,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_frame_chain_crypto_datacnt_objalloc_new works like
+ * ngtcp2_frame_chain_new, but it allocates enough data to store
+ * additional |datacnt| - 1 ngtcp2_vec object after ngtcp2_crypto
+ * object. If no additional space is required,
+ * ngtcp2_frame_chain_objalloc_new is called internally.
+ */
+int ngtcp2_frame_chain_crypto_datacnt_objalloc_new(ngtcp2_frame_chain **pfrc,
+ size_t datacnt,
+ ngtcp2_objalloc *objalloc,
+ const ngtcp2_mem *mem);
+
+int ngtcp2_frame_chain_new_token_objalloc_new(ngtcp2_frame_chain **pfrc,
+ const uint8_t *token,
+ size_t tokenlen,
+ ngtcp2_objalloc *objalloc,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_frame_chain_del deallocates |frc|. It also deallocates the
+ * memory pointed by |frc|.
+ */
+void ngtcp2_frame_chain_del(ngtcp2_frame_chain *frc, const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_frame_chain_objalloc_del adds |frc| to |objalloc| for reuse.
+ * It might just delete |frc| depending on the frame type and the size
+ * of |frc|.
+ */
+void ngtcp2_frame_chain_objalloc_del(ngtcp2_frame_chain *frc,
+ ngtcp2_objalloc *objalloc,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_frame_chain_init initializes |frc|.
+ */
+void ngtcp2_frame_chain_init(ngtcp2_frame_chain *frc);
+
+/*
+ * ngtcp2_frame_chain_list_objalloc_del adds all ngtcp2_frame_chain
+ * linked from |frc| to |objalloc| for reuse. Depending on the frame type
+ * and its size, ngtcp2_frame_chain might be deleted instead.
+ */
+void ngtcp2_frame_chain_list_objalloc_del(ngtcp2_frame_chain *frc,
+ ngtcp2_objalloc *objalloc,
+ const ngtcp2_mem *mem);
+
+/* NGTCP2_RTB_ENTRY_FLAG_NONE indicates that no flag is set. */
+#define NGTCP2_RTB_ENTRY_FLAG_NONE 0x00u
+/* NGTCP2_RTB_ENTRY_FLAG_PROBE indicates that the entry includes a
+ probe packet. */
+#define NGTCP2_RTB_ENTRY_FLAG_PROBE 0x01u
+/* NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE indicates that the entry
+ includes a frame which must be retransmitted until it is
+ acknowledged. In most cases, this flag is used along with
+ NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING and
+ NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING. */
+#define NGTCP2_RTB_ENTRY_FLAG_RETRANSMITTABLE 0x02u
+/* NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING indicates that the entry
+ elicits acknowledgement. */
+#define NGTCP2_RTB_ENTRY_FLAG_ACK_ELICITING 0x04u
+/* NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED indicates that the packet has
+ been reclaimed on PTO. It is not marked lost yet and still
+ consumes congestion window. */
+#define NGTCP2_RTB_ENTRY_FLAG_PTO_RECLAIMED 0x08u
+/* NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED indicates that the entry
+ has been marked lost and, optionally, scheduled to retransmit. */
+#define NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED 0x10u
+/* NGTCP2_RTB_ENTRY_FLAG_ECN indicates that the entry is included in a
+ UDP datagram with ECN marking. */
+#define NGTCP2_RTB_ENTRY_FLAG_ECN 0x20u
+/* NGTCP2_RTB_ENTRY_FLAG_DATAGRAM indicates that the entry includes
+ DATAGRAM frame. */
+#define NGTCP2_RTB_ENTRY_FLAG_DATAGRAM 0x40u
+/* NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE indicates that the entry includes
+ a PMTUD probe packet. */
+#define NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE 0x80u
+/* NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING indicates that the entry
+ includes a packet which elicits PTO probe packets. */
+#define NGTCP2_RTB_ENTRY_FLAG_PTO_ELICITING 0x100u
+
+typedef struct ngtcp2_rtb_entry ngtcp2_rtb_entry;
+
+/*
+ * ngtcp2_rtb_entry is an object stored in ngtcp2_rtb. It corresponds
+ * to the one packet which is waiting for its ACK.
+ */
+struct ngtcp2_rtb_entry {
+ union {
+ struct {
+ ngtcp2_rtb_entry *next;
+
+ struct {
+ int64_t pkt_num;
+ uint8_t type;
+ uint8_t flags;
+ } hd;
+ ngtcp2_frame_chain *frc;
+ /* ts is the time point when a packet included in this entry is sent
+ to a peer. */
+ ngtcp2_tstamp ts;
+ /* lost_ts is the time when this entry is marked lost. */
+ ngtcp2_tstamp lost_ts;
+ /* pktlen is the length of QUIC packet */
+ size_t pktlen;
+ struct {
+ uint64_t delivered;
+ ngtcp2_tstamp delivered_ts;
+ ngtcp2_tstamp first_sent_ts;
+ uint64_t tx_in_flight;
+ uint64_t lost;
+ int is_app_limited;
+ } rst;
+ /* flags is bitwise-OR of zero or more of
+ NGTCP2_RTB_ENTRY_FLAG_*. */
+ uint16_t flags;
+ };
+
+ ngtcp2_opl_entry oplent;
+ };
+};
+
+ngtcp2_objalloc_def(rtb_entry, ngtcp2_rtb_entry, oplent);
+
+/*
+ * ngtcp2_rtb_entry_new allocates ngtcp2_rtb_entry object, and assigns
+ * its pointer to |*pent|.
+ */
+int ngtcp2_rtb_entry_objalloc_new(ngtcp2_rtb_entry **pent,
+ const ngtcp2_pkt_hd *hd,
+ ngtcp2_frame_chain *frc, ngtcp2_tstamp ts,
+ size_t pktlen, uint16_t flags,
+ ngtcp2_objalloc *objalloc);
+
+/*
+ * ngtcp2_rtb_entry_objalloc_del adds |ent| to |objalloc| for reuse.
+ * ngtcp2_frame_chain linked from ent->frc are also added to
+ * |frc_objalloc| depending on their frame type and size.
+ */
+void ngtcp2_rtb_entry_objalloc_del(ngtcp2_rtb_entry *ent,
+ ngtcp2_objalloc *objalloc,
+ ngtcp2_objalloc *frc_objalloc,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_rtb tracks sent packets, and its ACK timeout for
+ * retransmission.
+ */
+typedef struct ngtcp2_rtb {
+ ngtcp2_objalloc *frc_objalloc;
+ ngtcp2_objalloc *rtb_entry_objalloc;
+ /* ents includes ngtcp2_rtb_entry sorted by decreasing order of
+ packet number. */
+ ngtcp2_ksl ents;
+ /* crypto is CRYPTO stream. */
+ ngtcp2_strm *crypto;
+ ngtcp2_rst *rst;
+ ngtcp2_cc *cc;
+ ngtcp2_log *log;
+ ngtcp2_qlog *qlog;
+ const ngtcp2_mem *mem;
+ /* largest_acked_tx_pkt_num is the largest packet number
+ acknowledged by the peer. */
+ int64_t largest_acked_tx_pkt_num;
+ /* num_ack_eliciting is the number of ACK eliciting entries. */
+ size_t num_ack_eliciting;
+ /* num_retransmittable is the number of packets which contain frames
+ that must be retransmitted on loss. */
+ size_t num_retransmittable;
+ /* num_pto_eliciting is the number of packets that elicit PTO probe
+ packets. */
+ size_t num_pto_eliciting;
+ /* probe_pkt_left is the number of probe packet to send */
+ size_t probe_pkt_left;
+ /* pktns_id is the identifier of packet number space. */
+ ngtcp2_pktns_id pktns_id;
+ /* cc_pkt_num is the smallest packet number that is contributed to
+ ngtcp2_conn_stat.bytes_in_flight. */
+ int64_t cc_pkt_num;
+ /* cc_bytes_in_flight is the number of in-flight bytes that is
+ contributed to ngtcp2_conn_stat.bytes_in_flight. It only
+ includes the bytes after congestion state is reset. */
+ uint64_t cc_bytes_in_flight;
+ /* persistent_congestion_start_ts is the time when persistent
+ congestion evaluation is started. It happens roughly after
+ handshake is confirmed. */
+ ngtcp2_tstamp persistent_congestion_start_ts;
+ /* num_lost_pkts is the number entries in ents which has
+ NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED flag set. */
+ size_t num_lost_pkts;
+ /* num_lost_pmtud_pkts is the number of entries in ents which have
+ both NGTCP2_RTB_ENTRY_FLAG_LOST_RETRANSMITTED and
+ NGTCP2_RTB_ENTRY_FLAG_PMTUD_PROBE flags set. */
+ size_t num_lost_pmtud_pkts;
+} ngtcp2_rtb;
+
+/*
+ * ngtcp2_rtb_init initializes |rtb|.
+ */
+void ngtcp2_rtb_init(ngtcp2_rtb *rtb, ngtcp2_pktns_id pktns_id,
+ ngtcp2_strm *crypto, ngtcp2_rst *rst, ngtcp2_cc *cc,
+ ngtcp2_log *log, ngtcp2_qlog *qlog,
+ ngtcp2_objalloc *rtb_entry_objalloc,
+ ngtcp2_objalloc *frc_objalloc, const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_rtb_free deallocates resources allocated for |rtb|.
+ */
+void ngtcp2_rtb_free(ngtcp2_rtb *rtb);
+
+/*
+ * ngtcp2_rtb_add adds |ent| to |rtb|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+int ngtcp2_rtb_add(ngtcp2_rtb *rtb, ngtcp2_rtb_entry *ent,
+ ngtcp2_conn_stat *cstat);
+
+/*
+ * ngtcp2_rtb_head returns the iterator which points to the entry
+ * which has the largest packet number. If there is no entry,
+ * returned value satisfies ngtcp2_ksl_it_end(&it) != 0.
+ */
+ngtcp2_ksl_it ngtcp2_rtb_head(ngtcp2_rtb *rtb);
+
+/*
+ * ngtcp2_rtb_recv_ack removes acked ngtcp2_rtb_entry from |rtb|.
+ * |pkt_num| is a packet number which includes |fr|. |pkt_ts| is the
+ * timestamp when packet is received. |ts| should be the current
+ * time. Usually they are the same, but for buffered packets,
+ * |pkt_ts| would be earlier than |ts|.
+ *
+ * This function returns the number of newly acknowledged packets if
+ * it succeeds, or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_CALLBACK_FAILURE
+ * User callback failed
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+ngtcp2_ssize ngtcp2_rtb_recv_ack(ngtcp2_rtb *rtb, const ngtcp2_ack *fr,
+ ngtcp2_conn_stat *cstat, ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns, ngtcp2_tstamp pkt_ts,
+ ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_rtb_detect_lost_pkt detects lost packets and prepends the
+ * frames contained them to |*pfrc|. Even when this function fails,
+ * some frames might be prepended to |*pfrc| and the caller should
+ * handle them.
+ */
+int ngtcp2_rtb_detect_lost_pkt(ngtcp2_rtb *rtb, ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns, ngtcp2_conn_stat *cstat,
+ ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_rtb_remove_expired_lost_pkt removes expired lost packet.
+ */
+void ngtcp2_rtb_remove_expired_lost_pkt(ngtcp2_rtb *rtb, ngtcp2_duration pto,
+ ngtcp2_tstamp ts);
+
+/*
+ * ngtcp2_rtb_lost_pkt_ts returns the earliest time when the still
+ * retained packet was lost. It returns UINT64_MAX if no such packet
+ * exists.
+ */
+ngtcp2_tstamp ngtcp2_rtb_lost_pkt_ts(ngtcp2_rtb *rtb);
+
+/*
+ * ngtcp2_rtb_remove_all removes all packets from |rtb| and prepends
+ * all frames to |*pfrc|. Even when this function fails, some frames
+ * might be prepended to |*pfrc| and the caller should handle them.
+ */
+int ngtcp2_rtb_remove_all(ngtcp2_rtb *rtb, ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns, ngtcp2_conn_stat *cstat);
+
+/*
+ * ngtcp2_rtb_remove_early_data removes all entries for 0RTT packets.
+ */
+void ngtcp2_rtb_remove_early_data(ngtcp2_rtb *rtb, ngtcp2_conn_stat *cstat);
+
+/*
+ * ngtcp2_rtb_empty returns nonzero if |rtb| have no entry.
+ */
+int ngtcp2_rtb_empty(ngtcp2_rtb *rtb);
+
+/*
+ * ngtcp2_rtb_reset_cc_state resets congestion state in |rtb|.
+ * |cc_pkt_num| is the next outbound packet number which is sent under
+ * new congestion state.
+ */
+void ngtcp2_rtb_reset_cc_state(ngtcp2_rtb *rtb, int64_t cc_pkt_num);
+
+/*
+ * ngtcp2_rtb_remove_expired_lost_pkt ensures that the number of lost
+ * packets at most |n|.
+ */
+void ngtcp2_rtb_remove_excessive_lost_pkt(ngtcp2_rtb *rtb, size_t n);
+
+/*
+ * ngtcp2_rtb_reclaim_on_pto reclaims up to |num_pkts| packets which
+ * are in-flight and not marked lost to send them in PTO probe. The
+ * reclaimed frames are chained to |*pfrc|.
+ *
+ * This function returns the number of packets reclaimed if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+ngtcp2_ssize ngtcp2_rtb_reclaim_on_pto(ngtcp2_rtb *rtb, ngtcp2_conn *conn,
+ ngtcp2_pktns *pktns, size_t num_pkts);
+
+#endif /* NGTCP2_RTB_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.c
new file mode 100644
index 0000000..a61636d
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.c
@@ -0,0 +1,233 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_str.h"
+
+#include <string.h>
+
+#include "ngtcp2_macro.h"
+
+void *ngtcp2_cpymem(void *dest, const void *src, size_t n) {
+ memcpy(dest, src, n);
+ return (uint8_t *)dest + n;
+}
+
+uint8_t *ngtcp2_setmem(uint8_t *dest, uint8_t b, size_t n) {
+ memset(dest, b, n);
+ return dest + n;
+}
+
+const void *ngtcp2_get_bytes(void *dest, const void *src, size_t n) {
+ memcpy(dest, src, n);
+ return (uint8_t *)src + n;
+}
+
+#define LOWER_XDIGITS "0123456789abcdef"
+
+uint8_t *ngtcp2_encode_hex(uint8_t *dest, const uint8_t *data, size_t len) {
+ size_t i;
+ uint8_t *p = dest;
+
+ for (i = 0; i < len; ++i) {
+ *p++ = (uint8_t)LOWER_XDIGITS[data[i] >> 4];
+ *p++ = (uint8_t)LOWER_XDIGITS[data[i] & 0xf];
+ }
+
+ *p = '\0';
+
+ return dest;
+}
+
+char *ngtcp2_encode_printable_ascii(char *dest, const uint8_t *data,
+ size_t len) {
+ size_t i;
+ char *p = dest;
+ uint8_t c;
+
+ for (i = 0; i < len; ++i) {
+ c = data[i];
+ if (0x20 <= c && c <= 0x7e) {
+ *p++ = (char)c;
+ } else {
+ *p++ = '.';
+ }
+ }
+
+ *p = '\0';
+
+ return dest;
+}
+
+/*
+ * write_uint writes |n| to the buffer pointed by |p| in decimal
+ * representation. It returns |p| plus the number of bytes written.
+ * The function assumes that the buffer has enough capacity to contain
+ * a string.
+ */
+static uint8_t *write_uint(uint8_t *p, uint64_t n) {
+ size_t nlen = 0;
+ uint64_t t;
+ uint8_t *res;
+
+ if (n == 0) {
+ *p++ = '0';
+ return p;
+ }
+ for (t = n; t; t /= 10, ++nlen)
+ ;
+ p += nlen;
+ res = p;
+ for (; n; n /= 10) {
+ *--p = (uint8_t)((n % 10) + '0');
+ }
+ return res;
+}
+
+uint8_t *ngtcp2_encode_ipv4(uint8_t *dest, const uint8_t *addr) {
+ size_t i;
+ uint8_t *p = dest;
+
+ p = write_uint(p, addr[0]);
+
+ for (i = 1; i < 4; ++i) {
+ *p++ = '.';
+ p = write_uint(p, addr[i]);
+ }
+
+ *p = '\0';
+
+ return dest;
+}
+
+/*
+ * write_hex_zsup writes the content of buffer pointed by |data| of
+ * length |len| to |dest| in hex string. Any leading zeros are
+ * suppressed. It returns |dest| plus the number of bytes written.
+ */
+static uint8_t *write_hex_zsup(uint8_t *dest, const uint8_t *data, size_t len) {
+ size_t i;
+ uint8_t *p = dest;
+ uint8_t d;
+
+ for (i = 0; i < len; ++i) {
+ d = data[i];
+ if (d >> 4) {
+ break;
+ }
+
+ d &= 0xf;
+
+ if (d) {
+ *p++ = (uint8_t)LOWER_XDIGITS[d];
+ ++i;
+ break;
+ }
+ }
+
+ if (p == dest && i == len) {
+ *p++ = '0';
+ return p;
+ }
+
+ for (; i < len; ++i) {
+ d = data[i];
+ *p++ = (uint8_t)LOWER_XDIGITS[d >> 4];
+ *p++ = (uint8_t)LOWER_XDIGITS[d & 0xf];
+ }
+
+ return p;
+}
+
+uint8_t *ngtcp2_encode_ipv6(uint8_t *dest, const uint8_t *addr) {
+ uint16_t blks[8];
+ size_t i;
+ size_t zlen, zoff;
+ size_t max_zlen = 0, max_zoff = 8;
+ uint8_t *p = dest;
+
+ for (i = 0; i < 16; i += sizeof(uint16_t)) {
+ /* Copy in network byte order. */
+ memcpy(&blks[i / sizeof(uint16_t)], addr + i, sizeof(uint16_t));
+ }
+
+ for (i = 0; i < 8;) {
+ if (blks[i]) {
+ ++i;
+ continue;
+ }
+
+ zlen = 1;
+ zoff = i;
+
+ ++i;
+ for (; i < 8 && blks[i] == 0; ++i, ++zlen)
+ ;
+ if (zlen > max_zlen) {
+ max_zlen = zlen;
+ max_zoff = zoff;
+ }
+ }
+
+ /* Do not suppress a single '0' block */
+ if (max_zlen == 1) {
+ max_zoff = 8;
+ }
+
+ if (max_zoff != 0) {
+ p = write_hex_zsup(p, (const uint8_t *)blks, sizeof(uint16_t));
+
+ for (i = 1; i < max_zoff; ++i) {
+ *p++ = ':';
+ p = write_hex_zsup(p, (const uint8_t *)(blks + i), sizeof(uint16_t));
+ }
+ }
+
+ if (max_zoff != 8) {
+ *p++ = ':';
+
+ if (max_zoff + max_zlen == 8) {
+ *p++ = ':';
+ } else {
+ for (i = max_zoff + max_zlen; i < 8; ++i) {
+ *p++ = ':';
+ p = write_hex_zsup(p, (const uint8_t *)(blks + i), sizeof(uint16_t));
+ }
+ }
+ }
+
+ *p = '\0';
+
+ return dest;
+}
+
+int ngtcp2_cmemeq(const uint8_t *a, const uint8_t *b, size_t n) {
+ size_t i;
+ int rv = 0;
+
+ for (i = 0; i < n; ++i) {
+ rv |= a[i] ^ b[i];
+ }
+
+ return rv == 0;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.h
new file mode 100644
index 0000000..deb75e3
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_str.h
@@ -0,0 +1,94 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_STR_H
+#define NGTCP2_STR_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+void *ngtcp2_cpymem(void *dest, const void *src, size_t n);
+
+/*
+ * ngtcp2_setmem writes a string of length |n| consisting only |b| to
+ * the buffer pointed by |dest|. It returns dest + n;
+ */
+uint8_t *ngtcp2_setmem(uint8_t *dest, uint8_t b, size_t n);
+
+/*
+ * ngtcp2_get_bytes copies |n| bytes from |src| to |dest|, and returns
+ * |src| + |n|.
+ */
+const void *ngtcp2_get_bytes(void *dest, const void *src, size_t n);
+
+/*
+ * ngtcp2_encode_hex encodes |data| of length |len| in hex string. It
+ * writes additional NULL bytes at the end of the buffer. The buffer
+ * pointed by |dest| must have at least |len| * 2 + 1 bytes space.
+ * This function returns |dest|.
+ */
+uint8_t *ngtcp2_encode_hex(uint8_t *dest, const uint8_t *data, size_t len);
+
+/*
+ * ngtcp2_encode_ipv4 encodes binary form IPv4 address stored in
+ * |addr| to human readable text form in the buffer pointed by |dest|.
+ * The capacity of buffer must have enough length to store a text form
+ * plus a terminating NULL byte. The resulting text form ends with
+ * NULL byte. The function returns |dest|.
+ */
+uint8_t *ngtcp2_encode_ipv4(uint8_t *dest, const uint8_t *addr);
+
+/*
+ * ngtcp2_encode_ipv6 encodes binary form IPv6 address stored in
+ * |addr| to human readable text form in the buffer pointed by |dest|.
+ * The capacity of buffer must have enough length to store a text form
+ * plus a terminating NULL byte. The resulting text form ends with
+ * NULL byte. The function produces the canonical form of IPv6 text
+ * representation described in
+ * https://tools.ietf.org/html/rfc5952#section-4. The function
+ * returns |dest|.
+ */
+uint8_t *ngtcp2_encode_ipv6(uint8_t *dest, const uint8_t *addr);
+
+/*
+ * ngtcp2_encode_printable_ascii encodes |data| of length |len| in
+ * |dest| in the following manner: printable ascii characters are
+ * copied as is. The other characters are converted to ".". It
+ * writes additional NULL bytes at the end of the buffer. |dest| must
+ * have at least |len| + 1 bytes. This function returns |dest|.
+ */
+char *ngtcp2_encode_printable_ascii(char *dest, const uint8_t *data,
+ size_t len);
+
+/*
+ * ngtcp2_cmemeq returns nonzero if the first |n| bytes of the buffers
+ * pointed by |a| and |b| are equal. The comparison is done in a
+ * constant time manner.
+ */
+int ngtcp2_cmemeq(const uint8_t *a, const uint8_t *b, size_t n);
+
+#endif /* NGTCP2_STR_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.c
new file mode 100644
index 0000000..6f20e86
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.c
@@ -0,0 +1,698 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_strm.h"
+
+#include <string.h>
+#include <assert.h>
+
+#include "ngtcp2_rtb.h"
+#include "ngtcp2_pkt.h"
+#include "ngtcp2_vec.h"
+
+static int offset_less(const ngtcp2_ksl_key *lhs, const ngtcp2_ksl_key *rhs) {
+ return *(int64_t *)lhs < *(int64_t *)rhs;
+}
+
+void ngtcp2_strm_init(ngtcp2_strm *strm, int64_t stream_id, uint32_t flags,
+ uint64_t max_rx_offset, uint64_t max_tx_offset,
+ void *stream_user_data, ngtcp2_objalloc *frc_objalloc,
+ const ngtcp2_mem *mem) {
+ strm->frc_objalloc = frc_objalloc;
+ strm->cycle = 0;
+ strm->tx.acked_offset = NULL;
+ strm->tx.cont_acked_offset = 0;
+ strm->tx.streamfrq = NULL;
+ strm->tx.offset = 0;
+ strm->tx.max_offset = max_tx_offset;
+ strm->tx.last_max_stream_data_ts = UINT64_MAX;
+ strm->tx.loss_count = 0;
+ strm->tx.last_lost_pkt_num = -1;
+ strm->rx.rob = NULL;
+ strm->rx.cont_offset = 0;
+ strm->rx.last_offset = 0;
+ strm->stream_id = stream_id;
+ strm->flags = flags;
+ strm->stream_user_data = stream_user_data;
+ strm->rx.window = strm->rx.max_offset = strm->rx.unsent_max_offset =
+ max_rx_offset;
+ strm->pe.index = NGTCP2_PQ_BAD_INDEX;
+ strm->mem = mem;
+ strm->app_error_code = 0;
+}
+
+void ngtcp2_strm_free(ngtcp2_strm *strm) {
+ ngtcp2_ksl_it it;
+
+ if (strm == NULL) {
+ return;
+ }
+
+ if (strm->tx.streamfrq) {
+ for (it = ngtcp2_ksl_begin(strm->tx.streamfrq); !ngtcp2_ksl_it_end(&it);
+ ngtcp2_ksl_it_next(&it)) {
+ ngtcp2_frame_chain_objalloc_del(ngtcp2_ksl_it_get(&it),
+ strm->frc_objalloc, strm->mem);
+ }
+
+ ngtcp2_ksl_free(strm->tx.streamfrq);
+ ngtcp2_mem_free(strm->mem, strm->tx.streamfrq);
+ }
+
+ if (strm->rx.rob) {
+ ngtcp2_rob_free(strm->rx.rob);
+ ngtcp2_mem_free(strm->mem, strm->rx.rob);
+ }
+
+ if (strm->tx.acked_offset) {
+ ngtcp2_gaptr_free(strm->tx.acked_offset);
+ ngtcp2_mem_free(strm->mem, strm->tx.acked_offset);
+ }
+}
+
+static int strm_rob_init(ngtcp2_strm *strm) {
+ int rv;
+ ngtcp2_rob *rob = ngtcp2_mem_malloc(strm->mem, sizeof(*rob));
+
+ if (rob == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ rv = ngtcp2_rob_init(rob, 8 * 1024, strm->mem);
+ if (rv != 0) {
+ ngtcp2_mem_free(strm->mem, rob);
+ return rv;
+ }
+
+ strm->rx.rob = rob;
+
+ return 0;
+}
+
+uint64_t ngtcp2_strm_rx_offset(ngtcp2_strm *strm) {
+ if (strm->rx.rob == NULL) {
+ return strm->rx.cont_offset;
+ }
+ return ngtcp2_rob_first_gap_offset(strm->rx.rob);
+}
+
+/* strm_rob_heavily_fragmented returns nonzero if the number of gaps
+ in |rob| exceeds the limit. */
+static int strm_rob_heavily_fragmented(ngtcp2_rob *rob) {
+ return ngtcp2_ksl_len(&rob->gapksl) >= 1000;
+}
+
+int ngtcp2_strm_recv_reordering(ngtcp2_strm *strm, const uint8_t *data,
+ size_t datalen, uint64_t offset) {
+ int rv;
+
+ if (strm->rx.rob == NULL) {
+ rv = strm_rob_init(strm);
+ if (rv != 0) {
+ return rv;
+ }
+
+ if (strm->rx.cont_offset) {
+ rv = ngtcp2_rob_remove_prefix(strm->rx.rob, strm->rx.cont_offset);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+ }
+
+ if (strm_rob_heavily_fragmented(strm->rx.rob)) {
+ return NGTCP2_ERR_INTERNAL;
+ }
+
+ return ngtcp2_rob_push(strm->rx.rob, offset, data, datalen);
+}
+
+int ngtcp2_strm_update_rx_offset(ngtcp2_strm *strm, uint64_t offset) {
+ if (strm->rx.rob == NULL) {
+ strm->rx.cont_offset = offset;
+ return 0;
+ }
+
+ return ngtcp2_rob_remove_prefix(strm->rx.rob, offset);
+}
+
+void ngtcp2_strm_shutdown(ngtcp2_strm *strm, uint32_t flags) {
+ strm->flags |= flags & NGTCP2_STRM_FLAG_SHUT_RDWR;
+}
+
+static int strm_streamfrq_init(ngtcp2_strm *strm) {
+ ngtcp2_ksl *streamfrq = ngtcp2_mem_malloc(strm->mem, sizeof(*streamfrq));
+ if (streamfrq == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ ngtcp2_ksl_init(streamfrq, offset_less, sizeof(uint64_t), strm->mem);
+
+ strm->tx.streamfrq = streamfrq;
+
+ return 0;
+}
+
+int ngtcp2_strm_streamfrq_push(ngtcp2_strm *strm, ngtcp2_frame_chain *frc) {
+ int rv;
+
+ assert(frc->fr.type == NGTCP2_FRAME_STREAM);
+ assert(frc->next == NULL);
+
+ if (strm->tx.streamfrq == NULL) {
+ rv = strm_streamfrq_init(strm);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ return ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &frc->fr.stream.offset,
+ frc);
+}
+
+static int strm_streamfrq_unacked_pop(ngtcp2_strm *strm,
+ ngtcp2_frame_chain **pfrc) {
+ ngtcp2_frame_chain *frc, *nfrc;
+ ngtcp2_stream *fr, *nfr;
+ uint64_t offset, end_offset;
+ size_t idx, end_idx;
+ uint64_t base_offset, end_base_offset;
+ ngtcp2_range gap;
+ ngtcp2_vec *v;
+ int rv;
+ ngtcp2_ksl_it it;
+
+ *pfrc = NULL;
+
+ assert(strm->tx.streamfrq);
+ assert(ngtcp2_ksl_len(strm->tx.streamfrq));
+
+ for (it = ngtcp2_ksl_begin(strm->tx.streamfrq); !ngtcp2_ksl_it_end(&it);) {
+ frc = ngtcp2_ksl_it_get(&it);
+ fr = &frc->fr.stream;
+
+ ngtcp2_ksl_remove_hint(strm->tx.streamfrq, &it, &it, &fr->offset);
+
+ idx = 0;
+ offset = fr->offset;
+ base_offset = 0;
+
+ gap = ngtcp2_strm_get_unacked_range_after(strm, offset);
+ if (gap.begin < offset) {
+ gap.begin = offset;
+ }
+
+ for (; idx < fr->datacnt && offset < gap.begin; ++idx) {
+ v = &fr->data[idx];
+ if (offset + v->len > gap.begin) {
+ base_offset = gap.begin - offset;
+ break;
+ }
+
+ offset += v->len;
+ }
+
+ if (idx == fr->datacnt) {
+ if (fr->fin) {
+ if (strm->flags & NGTCP2_STRM_FLAG_FIN_ACKED) {
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+ assert(ngtcp2_ksl_len(strm->tx.streamfrq) == 0);
+ return 0;
+ }
+
+ fr->offset += ngtcp2_vec_len(fr->data, fr->datacnt);
+ fr->datacnt = 0;
+
+ *pfrc = frc;
+
+ return 0;
+ }
+
+ if (fr->offset == 0 && fr->datacnt == 0 && strm->tx.offset == 0 &&
+ !(strm->flags & NGTCP2_STRM_FLAG_ANY_ACKED)) {
+ *pfrc = frc;
+
+ return 0;
+ }
+
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+ continue;
+ }
+
+ assert(gap.begin == offset + base_offset);
+
+ end_idx = idx;
+ end_offset = offset;
+ end_base_offset = 0;
+
+ for (; end_idx < fr->datacnt; ++end_idx) {
+ v = &fr->data[end_idx];
+ if (end_offset + v->len > gap.end) {
+ end_base_offset = gap.end - end_offset;
+ break;
+ }
+
+ end_offset += v->len;
+ }
+
+ if (fr->offset == offset && base_offset == 0 && fr->datacnt == end_idx) {
+ *pfrc = frc;
+ return 0;
+ }
+
+ if (fr->datacnt == end_idx) {
+ memmove(fr->data, fr->data + idx, sizeof(fr->data[0]) * (end_idx - idx));
+
+ assert(fr->data[0].len > base_offset);
+
+ fr->offset = offset + base_offset;
+ fr->datacnt = end_idx - idx;
+ fr->data[0].base += base_offset;
+ fr->data[0].len -= (size_t)base_offset;
+
+ *pfrc = frc;
+ return 0;
+ }
+
+ rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new(
+ &nfrc, fr->datacnt - end_idx, strm->frc_objalloc, strm->mem);
+ if (rv != 0) {
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+ return rv;
+ }
+
+ nfr = &nfrc->fr.stream;
+ memcpy(nfr->data, fr->data + end_idx,
+ sizeof(nfr->data[0]) * (fr->datacnt - end_idx));
+
+ assert(nfr->data[0].len > end_base_offset);
+
+ nfr->type = NGTCP2_FRAME_STREAM;
+ nfr->flags = 0;
+ nfr->fin = fr->fin;
+ nfr->stream_id = fr->stream_id;
+ nfr->offset = end_offset + end_base_offset;
+ nfr->datacnt = fr->datacnt - end_idx;
+ nfr->data[0].base += end_base_offset;
+ nfr->data[0].len -= (size_t)end_base_offset;
+
+ rv = ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &nfr->offset, nfrc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem);
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+ return rv;
+ }
+
+ if (end_base_offset) {
+ ++end_idx;
+ }
+
+ memmove(fr->data, fr->data + idx, sizeof(fr->data[0]) * (end_idx - idx));
+
+ assert(fr->data[0].len > base_offset);
+
+ fr->fin = 0;
+ fr->offset = offset + base_offset;
+ fr->datacnt = end_idx - idx;
+ if (end_base_offset) {
+ assert(fr->data[fr->datacnt - 1].len > end_base_offset);
+ fr->data[fr->datacnt - 1].len = (size_t)end_base_offset;
+ }
+ fr->data[0].base += base_offset;
+ fr->data[0].len -= (size_t)base_offset;
+
+ *pfrc = frc;
+ return 0;
+ }
+
+ return 0;
+}
+
+int ngtcp2_strm_streamfrq_pop(ngtcp2_strm *strm, ngtcp2_frame_chain **pfrc,
+ size_t left) {
+ ngtcp2_stream *fr, *nfr;
+ ngtcp2_frame_chain *frc, *nfrc;
+ int rv;
+ size_t nmerged;
+ uint64_t datalen;
+ ngtcp2_vec a[NGTCP2_MAX_STREAM_DATACNT];
+ ngtcp2_vec b[NGTCP2_MAX_STREAM_DATACNT];
+ size_t acnt, bcnt;
+ uint64_t unacked_offset;
+
+ if (strm->tx.streamfrq == NULL || ngtcp2_ksl_len(strm->tx.streamfrq) == 0) {
+ *pfrc = NULL;
+ return 0;
+ }
+
+ rv = strm_streamfrq_unacked_pop(strm, &frc);
+ if (rv != 0) {
+ return rv;
+ }
+ if (frc == NULL) {
+ *pfrc = NULL;
+ return 0;
+ }
+
+ fr = &frc->fr.stream;
+ datalen = ngtcp2_vec_len(fr->data, fr->datacnt);
+
+ if (left == 0) {
+ /* datalen could be zero if 0 length STREAM has been sent */
+ if (datalen || ngtcp2_ksl_len(strm->tx.streamfrq) > 1) {
+ rv = ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &fr->offset, frc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+ return rv;
+ }
+ *pfrc = NULL;
+ return 0;
+ }
+ }
+
+ if (datalen > left) {
+ ngtcp2_vec_copy(a, fr->data, fr->datacnt);
+ acnt = fr->datacnt;
+
+ bcnt = 0;
+ ngtcp2_vec_split(a, &acnt, b, &bcnt, left, NGTCP2_MAX_STREAM_DATACNT);
+
+ assert(acnt > 0);
+ assert(bcnt > 0);
+
+ rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new(
+ &nfrc, bcnt, strm->frc_objalloc, strm->mem);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+ return rv;
+ }
+
+ nfr = &nfrc->fr.stream;
+ nfr->type = NGTCP2_FRAME_STREAM;
+ nfr->flags = 0;
+ nfr->fin = fr->fin;
+ nfr->stream_id = fr->stream_id;
+ nfr->offset = fr->offset + left;
+ nfr->datacnt = bcnt;
+ ngtcp2_vec_copy(nfr->data, b, bcnt);
+
+ rv = ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &nfr->offset, nfrc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem);
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+ return rv;
+ }
+
+ rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new(
+ &nfrc, acnt, strm->frc_objalloc, strm->mem);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+ return rv;
+ }
+
+ nfr = &nfrc->fr.stream;
+ *nfr = *fr;
+ nfr->fin = 0;
+ nfr->datacnt = acnt;
+ ngtcp2_vec_copy(nfr->data, a, acnt);
+
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+
+ *pfrc = nfrc;
+
+ return 0;
+ }
+
+ left -= (size_t)datalen;
+
+ ngtcp2_vec_copy(a, fr->data, fr->datacnt);
+ acnt = fr->datacnt;
+
+ for (; left && ngtcp2_ksl_len(strm->tx.streamfrq);) {
+ unacked_offset = ngtcp2_strm_streamfrq_unacked_offset(strm);
+ if (unacked_offset != fr->offset + datalen) {
+ assert(fr->offset + datalen < unacked_offset);
+ break;
+ }
+
+ rv = strm_streamfrq_unacked_pop(strm, &nfrc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+ return rv;
+ }
+ if (nfrc == NULL) {
+ break;
+ }
+
+ nfr = &nfrc->fr.stream;
+
+ if (nfr->fin && nfr->datacnt == 0) {
+ fr->fin = 1;
+ ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem);
+ break;
+ }
+
+ nmerged = ngtcp2_vec_merge(a, &acnt, nfr->data, &nfr->datacnt, left,
+ NGTCP2_MAX_STREAM_DATACNT);
+ if (nmerged == 0) {
+ rv = ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &nfr->offset, nfrc);
+ if (rv != 0) {
+ assert(ngtcp2_err_is_fatal(rv));
+ ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem);
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+ return rv;
+ }
+ break;
+ }
+
+ datalen += nmerged;
+ left -= nmerged;
+
+ if (nfr->datacnt == 0) {
+ fr->fin = nfr->fin;
+ ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem);
+ continue;
+ }
+
+ nfr->offset += nmerged;
+
+ rv = ngtcp2_ksl_insert(strm->tx.streamfrq, NULL, &nfr->offset, nfrc);
+ if (rv != 0) {
+ ngtcp2_frame_chain_objalloc_del(nfrc, strm->frc_objalloc, strm->mem);
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+ return rv;
+ }
+
+ break;
+ }
+
+ if (acnt == fr->datacnt) {
+ if (acnt > 0) {
+ fr->data[acnt - 1] = a[acnt - 1];
+ }
+
+ *pfrc = frc;
+ return 0;
+ }
+
+ assert(acnt > fr->datacnt);
+
+ rv = ngtcp2_frame_chain_stream_datacnt_objalloc_new(
+ &nfrc, acnt, strm->frc_objalloc, strm->mem);
+ if (rv != 0) {
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+ return rv;
+ }
+
+ nfr = &nfrc->fr.stream;
+ *nfr = *fr;
+ nfr->datacnt = acnt;
+ ngtcp2_vec_copy(nfr->data, a, acnt);
+
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+
+ *pfrc = nfrc;
+
+ return 0;
+}
+
+uint64_t ngtcp2_strm_streamfrq_unacked_offset(ngtcp2_strm *strm) {
+ ngtcp2_frame_chain *frc;
+ ngtcp2_stream *fr;
+ ngtcp2_range gap;
+ ngtcp2_ksl_it it;
+ uint64_t datalen;
+
+ assert(strm->tx.streamfrq);
+ assert(ngtcp2_ksl_len(strm->tx.streamfrq));
+
+ for (it = ngtcp2_ksl_begin(strm->tx.streamfrq); !ngtcp2_ksl_it_end(&it);
+ ngtcp2_ksl_it_next(&it)) {
+ frc = ngtcp2_ksl_it_get(&it);
+ fr = &frc->fr.stream;
+
+ gap = ngtcp2_strm_get_unacked_range_after(strm, fr->offset);
+
+ datalen = ngtcp2_vec_len(fr->data, fr->datacnt);
+
+ if (gap.begin <= fr->offset) {
+ return fr->offset;
+ }
+ if (gap.begin < fr->offset + datalen) {
+ return gap.begin;
+ }
+ if (fr->offset + datalen == gap.begin && fr->fin &&
+ !(strm->flags & NGTCP2_STRM_FLAG_FIN_ACKED)) {
+ return fr->offset + datalen;
+ }
+ }
+
+ return (uint64_t)-1;
+}
+
+ngtcp2_frame_chain *ngtcp2_strm_streamfrq_top(ngtcp2_strm *strm) {
+ ngtcp2_ksl_it it;
+
+ assert(strm->tx.streamfrq);
+ assert(ngtcp2_ksl_len(strm->tx.streamfrq));
+
+ it = ngtcp2_ksl_begin(strm->tx.streamfrq);
+ return ngtcp2_ksl_it_get(&it);
+}
+
+int ngtcp2_strm_streamfrq_empty(ngtcp2_strm *strm) {
+ return strm->tx.streamfrq == NULL || ngtcp2_ksl_len(strm->tx.streamfrq) == 0;
+}
+
+void ngtcp2_strm_streamfrq_clear(ngtcp2_strm *strm) {
+ ngtcp2_frame_chain *frc;
+ ngtcp2_ksl_it it;
+
+ if (strm->tx.streamfrq == NULL) {
+ return;
+ }
+
+ for (it = ngtcp2_ksl_begin(strm->tx.streamfrq); !ngtcp2_ksl_it_end(&it);
+ ngtcp2_ksl_it_next(&it)) {
+ frc = ngtcp2_ksl_it_get(&it);
+ ngtcp2_frame_chain_objalloc_del(frc, strm->frc_objalloc, strm->mem);
+ }
+ ngtcp2_ksl_clear(strm->tx.streamfrq);
+}
+
+int ngtcp2_strm_is_tx_queued(ngtcp2_strm *strm) {
+ return strm->pe.index != NGTCP2_PQ_BAD_INDEX;
+}
+
+int ngtcp2_strm_is_all_tx_data_acked(ngtcp2_strm *strm) {
+ if (strm->tx.acked_offset == NULL) {
+ return strm->tx.cont_acked_offset == strm->tx.offset;
+ }
+
+ return ngtcp2_gaptr_first_gap_offset(strm->tx.acked_offset) ==
+ strm->tx.offset;
+}
+
+int ngtcp2_strm_is_all_tx_data_fin_acked(ngtcp2_strm *strm) {
+ return (strm->flags & NGTCP2_STRM_FLAG_FIN_ACKED) &&
+ ngtcp2_strm_is_all_tx_data_acked(strm);
+}
+
+ngtcp2_range ngtcp2_strm_get_unacked_range_after(ngtcp2_strm *strm,
+ uint64_t offset) {
+ ngtcp2_range gap;
+
+ if (strm->tx.acked_offset == NULL) {
+ gap.begin = strm->tx.cont_acked_offset;
+ gap.end = UINT64_MAX;
+ return gap;
+ }
+
+ return ngtcp2_gaptr_get_first_gap_after(strm->tx.acked_offset, offset);
+}
+
+uint64_t ngtcp2_strm_get_acked_offset(ngtcp2_strm *strm) {
+ if (strm->tx.acked_offset == NULL) {
+ return strm->tx.cont_acked_offset;
+ }
+
+ return ngtcp2_gaptr_first_gap_offset(strm->tx.acked_offset);
+}
+
+static int strm_acked_offset_init(ngtcp2_strm *strm) {
+ ngtcp2_gaptr *acked_offset =
+ ngtcp2_mem_malloc(strm->mem, sizeof(*acked_offset));
+
+ if (acked_offset == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ ngtcp2_gaptr_init(acked_offset, strm->mem);
+
+ strm->tx.acked_offset = acked_offset;
+
+ return 0;
+}
+
+int ngtcp2_strm_ack_data(ngtcp2_strm *strm, uint64_t offset, uint64_t len) {
+ int rv;
+
+ if (strm->tx.acked_offset == NULL) {
+ if (strm->tx.cont_acked_offset == offset) {
+ strm->tx.cont_acked_offset += len;
+ return 0;
+ }
+
+ rv = strm_acked_offset_init(strm);
+ if (rv != 0) {
+ return rv;
+ }
+
+ rv =
+ ngtcp2_gaptr_push(strm->tx.acked_offset, 0, strm->tx.cont_acked_offset);
+ if (rv != 0) {
+ return rv;
+ }
+ }
+
+ return ngtcp2_gaptr_push(strm->tx.acked_offset, offset, len);
+}
+
+void ngtcp2_strm_set_app_error_code(ngtcp2_strm *strm,
+ uint64_t app_error_code) {
+ if (strm->flags & NGTCP2_STRM_FLAG_APP_ERROR_CODE_SET) {
+ return;
+ }
+
+ assert(0 == strm->app_error_code);
+
+ strm->flags |= NGTCP2_STRM_FLAG_APP_ERROR_CODE_SET;
+ strm->app_error_code = app_error_code;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.h
new file mode 100644
index 0000000..8e3cfe8
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_strm.h
@@ -0,0 +1,310 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_STRM_H
+#define NGTCP2_STRM_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_rob.h"
+#include "ngtcp2_map.h"
+#include "ngtcp2_gaptr.h"
+#include "ngtcp2_ksl.h"
+#include "ngtcp2_pq.h"
+
+typedef struct ngtcp2_frame_chain ngtcp2_frame_chain;
+
+/* NGTCP2_STRM_FLAG_NONE indicates that no flag is set. */
+#define NGTCP2_STRM_FLAG_NONE 0x00u
+/* NGTCP2_STRM_FLAG_SHUT_RD indicates that further reception of stream
+ data is not allowed. */
+#define NGTCP2_STRM_FLAG_SHUT_RD 0x01u
+/* NGTCP2_STRM_FLAG_SHUT_WR indicates that further transmission of
+ stream data is not allowed. */
+#define NGTCP2_STRM_FLAG_SHUT_WR 0x02u
+#define NGTCP2_STRM_FLAG_SHUT_RDWR \
+ (NGTCP2_STRM_FLAG_SHUT_RD | NGTCP2_STRM_FLAG_SHUT_WR)
+/* NGTCP2_STRM_FLAG_SENT_RST indicates that RST_STREAM is sent from
+ the local endpoint. In this case, NGTCP2_STRM_FLAG_SHUT_WR is also
+ set. */
+#define NGTCP2_STRM_FLAG_SENT_RST 0x04u
+/* NGTCP2_STRM_FLAG_SENT_RST indicates that RST_STREAM is received
+ from the remote endpoint. In this case, NGTCP2_STRM_FLAG_SHUT_RD
+ is also set. */
+#define NGTCP2_STRM_FLAG_RECV_RST 0x08u
+/* NGTCP2_STRM_FLAG_STOP_SENDING indicates that STOP_SENDING is sent
+ from the local endpoint. */
+#define NGTCP2_STRM_FLAG_STOP_SENDING 0x10u
+/* NGTCP2_STRM_FLAG_RST_ACKED indicates that the outgoing RST_STREAM
+ is acknowledged by peer. */
+#define NGTCP2_STRM_FLAG_RST_ACKED 0x20u
+/* NGTCP2_STRM_FLAG_FIN_ACKED indicates that a STREAM with FIN bit set
+ is acknowledged by a remote endpoint. */
+#define NGTCP2_STRM_FLAG_FIN_ACKED 0x40u
+/* NGTCP2_STRM_FLAG_ANY_ACKED indicates that any portion of stream
+ data, including 0 length segment, is acknowledged. */
+#define NGTCP2_STRM_FLAG_ANY_ACKED 0x80u
+/* NGTCP2_STRM_FLAG_APP_ERROR_CODE_SET indicates that app_error_code
+ field is set. This resolves the ambiguity that the initial
+ app_error_code value 0 might be a proper application error code.
+ In this case, without this flag, we are unable to distinguish
+ assigned value from unassigned one. */
+#define NGTCP2_STRM_FLAG_APP_ERROR_CODE_SET 0x100u
+/* NGTCP2_STRM_FLAG_STREAM_STOP_SENDING_CALLED is set when
+ stream_stop_sending callback is called. */
+#define NGTCP2_STRM_FLAG_STREAM_STOP_SENDING_CALLED 0x200u
+
+typedef struct ngtcp2_strm ngtcp2_strm;
+
+struct ngtcp2_strm {
+ union {
+ struct {
+ ngtcp2_pq_entry pe;
+ uint64_t cycle;
+ ngtcp2_objalloc *frc_objalloc;
+
+ struct {
+ /* acked_offset tracks acknowledged outgoing data. */
+ ngtcp2_gaptr *acked_offset;
+ /* cont_acked_offset is the offset that all data up to this offset
+ is acknowledged by a remote endpoint. It is used until the
+ remote endpoint acknowledges data in out-of-order. After that,
+ acked_offset is used instead. */
+ uint64_t cont_acked_offset;
+ /* streamfrq contains STREAM frame for retransmission. The flow
+ control credits have been paid when they are transmitted first
+ time. There are no restriction regarding flow control for
+ retransmission. */
+ ngtcp2_ksl *streamfrq;
+ /* offset is the next offset of outgoing data. In other words, it
+ is the number of bytes sent in this stream without
+ duplication. */
+ uint64_t offset;
+ /* max_tx_offset is the maximum offset that local endpoint can
+ send for this stream. */
+ uint64_t max_offset;
+ /* last_max_stream_data_ts is the timestamp when last
+ MAX_STREAM_DATA frame is sent. */
+ ngtcp2_tstamp last_max_stream_data_ts;
+ /* loss_count is the number of packets that contain STREAM
+ frame for this stream and are declared to be lost. It may
+ include the spurious losses. It does not include a packet
+ whose contents have been reclaimed for PTO and which is
+ later declared to be lost. Those data are not blocked by
+ the flow control and will be sent immediately if no other
+ restrictions are applied. */
+ size_t loss_count;
+ /* last_lost_pkt_num is the packet number of the packet that
+ is counted to loss_count. It is used to avoid to count
+ multiple STREAM frames in one lost packet. */
+ int64_t last_lost_pkt_num;
+ } tx;
+
+ struct {
+ /* rob is the reorder buffer for incoming stream data. The data
+ received in out of order is buffered and sorted by its offset
+ in this object. */
+ ngtcp2_rob *rob;
+ /* cont_offset is the largest offset of consecutive data. It is
+ used until the endpoint receives out-of-order data. After
+ that, rob is used to track the offset and data. */
+ uint64_t cont_offset;
+ /* last_offset is the largest offset of stream data received for
+ this stream. */
+ uint64_t last_offset;
+ /* max_offset is the maximum offset that remote endpoint can send
+ to this stream. */
+ uint64_t max_offset;
+ /* unsent_max_offset is the maximum offset that remote endpoint
+ can send to this stream, and it is not notified to the remote
+ endpoint. unsent_max_offset >= max_offset must be hold. */
+ uint64_t unsent_max_offset;
+ /* window is the stream-level flow control window size. */
+ uint64_t window;
+ } rx;
+
+ const ngtcp2_mem *mem;
+ int64_t stream_id;
+ void *stream_user_data;
+ /* flags is bit-wise OR of zero or more of NGTCP2_STRM_FLAG_*. */
+ uint32_t flags;
+ /* app_error_code is an error code the local endpoint sent in
+ RESET_STREAM or STOP_SENDING, or received from a remote endpoint
+ in RESET_STREAM or STOP_SENDING. First application error code is
+ chosen and when set, NGTCP2_STRM_FLAG_APP_ERROR_CODE_SET flag is
+ set in flags field. */
+ uint64_t app_error_code;
+ };
+
+ ngtcp2_opl_entry oplent;
+ };
+};
+
+/*
+ * ngtcp2_strm_init initializes |strm|.
+ */
+void ngtcp2_strm_init(ngtcp2_strm *strm, int64_t stream_id, uint32_t flags,
+ uint64_t max_rx_offset, uint64_t max_tx_offset,
+ void *stream_user_data, ngtcp2_objalloc *frc_objalloc,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_strm_free deallocates memory allocated for |strm|. This
+ * function does not free the memory pointed by |strm| itself.
+ */
+void ngtcp2_strm_free(ngtcp2_strm *strm);
+
+/*
+ * ngtcp2_strm_rx_offset returns the minimum offset of stream data
+ * which is not received yet.
+ */
+uint64_t ngtcp2_strm_rx_offset(ngtcp2_strm *strm);
+
+/*
+ * ngtcp2_strm_recv_reordering handles reordered data.
+ *
+ * It returns 0 if it succeeds, or one of the following negative error
+ * codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+int ngtcp2_strm_recv_reordering(ngtcp2_strm *strm, const uint8_t *data,
+ size_t datalen, uint64_t offset);
+
+/*
+ * ngtcp2_strm_update_rx_offset tells that data up to offset bytes are
+ * received in order.
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+int ngtcp2_strm_update_rx_offset(ngtcp2_strm *strm, uint64_t offset);
+
+/*
+ * ngtcp2_strm_shutdown shutdowns |strm|. |flags| should be
+ * NGTCP2_STRM_FLAG_SHUT_RD, and/or NGTCP2_STRM_FLAG_SHUT_WR.
+ */
+void ngtcp2_strm_shutdown(ngtcp2_strm *strm, uint32_t flags);
+
+/*
+ * ngtcp2_strm_streamfrq_push pushes |frc| to streamfrq for
+ * retransmission.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+int ngtcp2_strm_streamfrq_push(ngtcp2_strm *strm, ngtcp2_frame_chain *frc);
+
+/*
+ * ngtcp2_strm_streamfrq_pop pops the first ngtcp2_frame_chain and
+ * assigns it to |*pfrc|. This function splits into or merges several
+ * ngtcp2_frame_chain objects so that the returned ngtcp2_frame_chain
+ * has at most |left| data length. If there is no frames to send,
+ * this function returns 0 and |*pfrc| is NULL.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * NGTCP2_ERR_NOMEM
+ * Out of memory
+ */
+int ngtcp2_strm_streamfrq_pop(ngtcp2_strm *strm, ngtcp2_frame_chain **pfrc,
+ size_t left);
+
+/*
+ * ngtcp2_strm_streamfrq_unacked_offset returns the smallest offset of
+ * unacknowledged stream data held in strm->tx.streamfrq.
+ */
+uint64_t ngtcp2_strm_streamfrq_unacked_offset(ngtcp2_strm *strm);
+
+/*
+ * ngtcp2_strm_streamfrq_top returns the first ngtcp2_frame_chain.
+ * The queue must not be empty.
+ */
+ngtcp2_frame_chain *ngtcp2_strm_streamfrq_top(ngtcp2_strm *strm);
+
+/*
+ * ngtcp2_strm_streamfrq_empty returns nonzero if streamfrq is empty.
+ */
+int ngtcp2_strm_streamfrq_empty(ngtcp2_strm *strm);
+
+/*
+ * ngtcp2_strm_streamfrq_clear removes all frames from streamfrq.
+ */
+void ngtcp2_strm_streamfrq_clear(ngtcp2_strm *strm);
+
+/*
+ * ngtcp2_strm_is_tx_queued returns nonzero if |strm| is queued.
+ */
+int ngtcp2_strm_is_tx_queued(ngtcp2_strm *strm);
+
+/*
+ * ngtcp2_strm_is_all_tx_data_acked returns nonzero if all outgoing
+ * data for |strm| which have sent so far have been acknowledged.
+ */
+int ngtcp2_strm_is_all_tx_data_acked(ngtcp2_strm *strm);
+
+/*
+ * ngtcp2_strm_is_all_tx_data_fin_acked behaves like
+ * ngtcp2_strm_is_all_tx_data_acked, but it also requires that STREAM
+ * frame with fin bit set is acknowledged.
+ */
+int ngtcp2_strm_is_all_tx_data_fin_acked(ngtcp2_strm *strm);
+
+/*
+ * ngtcp2_strm_get_unacked_range_after returns the range that is not
+ * acknowledged yet and intersects or comes after |offset|.
+ */
+ngtcp2_range ngtcp2_strm_get_unacked_range_after(ngtcp2_strm *strm,
+ uint64_t offset);
+
+/*
+ * ngtcp2_strm_get_acked_offset returns offset, that is the data up to
+ * this offset have been acknowledged by a remote endpoint. It
+ * returns 0 if no data is acknowledged.
+ */
+uint64_t ngtcp2_strm_get_acked_offset(ngtcp2_strm *strm);
+
+/*
+ * ngtcp2_strm_ack_data tells |strm| that the data [offset,
+ * offset+len) is acknowledged by a remote endpoint.
+ */
+int ngtcp2_strm_ack_data(ngtcp2_strm *strm, uint64_t offset, uint64_t len);
+
+/*
+ * ngtcp2_strm_set_app_error_code sets |app_error_code| to |strm| and
+ * set NGTCP2_STRM_FLAG_APP_ERROR_CODE_SET flag. If the flag is
+ * already set, this function does nothing.
+ */
+void ngtcp2_strm_set_app_error_code(ngtcp2_strm *strm, uint64_t app_error_code);
+
+#endif /* NGTCP2_STRM_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.c
new file mode 100644
index 0000000..7c7d9ae
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.c
@@ -0,0 +1,71 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2022 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_unreachable.h"
+
+#include <stdio.h>
+#include <errno.h>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif /* HAVE_UNISTD_H */
+#include <stdlib.h>
+#ifdef WIN32
+# include <io.h>
+#endif /* WIN32 */
+
+void ngtcp2_unreachable_fail(const char *file, int line, const char *func) {
+ char *buf;
+ size_t buflen;
+ int rv;
+
+#define NGTCP2_UNREACHABLE_TEMPLATE "%s:%d %s: Unreachable.\n"
+
+ rv = snprintf(NULL, 0, NGTCP2_UNREACHABLE_TEMPLATE, file, line, func);
+ if (rv < 0) {
+ abort();
+ }
+
+ /* here we explicitly use system malloc */
+ buflen = (size_t)rv + 1;
+ buf = malloc(buflen);
+ if (buf == NULL) {
+ abort();
+ }
+
+ rv = snprintf(buf, buflen, NGTCP2_UNREACHABLE_TEMPLATE, file, line, func);
+ if (rv < 0) {
+ abort();
+ }
+
+#ifndef WIN32
+ while (write(STDERR_FILENO, buf, (size_t)rv) == -1 && errno == EINTR)
+ ;
+#else /* WIN32 */
+ _write(_fileno(stderr), buf, (unsigned int)rv);
+#endif /* WIN32 */
+
+ free(buf);
+
+ abort();
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.h
new file mode 100644
index 0000000..11a6aaa
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_unreachable.h
@@ -0,0 +1,46 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2022 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_UNREACHABLE_H
+#define NGTCP2_UNREACHABLE_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#define ngtcp2_unreachable() \
+ ngtcp2_unreachable_fail(__FILE__, __LINE__, __func__)
+
+#ifdef _MSC_VER
+__declspec(noreturn)
+#endif /* _MSC_VER */
+ void ngtcp2_unreachable_fail(const char *file, int line, const char *func)
+#ifndef _MSC_VER
+ __attribute__((noreturn))
+#endif /* !_MSC_VER */
+ ;
+
+#endif /* NGTCP2_UNREACHABLE_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.c
new file mode 100644
index 0000000..257332e
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.c
@@ -0,0 +1,243 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2018 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ngtcp2_vec.h"
+
+#include <string.h>
+#include <assert.h>
+
+#include "ngtcp2_str.h"
+
+ngtcp2_vec *ngtcp2_vec_init(ngtcp2_vec *vec, const uint8_t *base, size_t len) {
+ vec->base = (uint8_t *)base;
+ vec->len = len;
+ return vec;
+}
+
+int ngtcp2_vec_new(ngtcp2_vec **pvec, const uint8_t *data, size_t datalen,
+ const ngtcp2_mem *mem) {
+ size_t len;
+ uint8_t *p;
+
+ len = sizeof(ngtcp2_vec) + datalen;
+
+ *pvec = ngtcp2_mem_malloc(mem, len);
+ if (*pvec == NULL) {
+ return NGTCP2_ERR_NOMEM;
+ }
+
+ p = (uint8_t *)(*pvec) + sizeof(ngtcp2_vec);
+ (*pvec)->base = p;
+ (*pvec)->len = datalen;
+ if (datalen) {
+ /* p = */ ngtcp2_cpymem(p, data, datalen);
+ }
+
+ return 0;
+}
+
+void ngtcp2_vec_del(ngtcp2_vec *vec, const ngtcp2_mem *mem) {
+ ngtcp2_mem_free(mem, vec);
+}
+
+uint64_t ngtcp2_vec_len(const ngtcp2_vec *vec, size_t n) {
+ size_t i;
+ size_t res = 0;
+
+ for (i = 0; i < n; ++i) {
+ res += vec[i].len;
+ }
+
+ return res;
+}
+
+int64_t ngtcp2_vec_len_varint(const ngtcp2_vec *vec, size_t n) {
+ uint64_t res = 0;
+ size_t len;
+ size_t i;
+
+ for (i = 0; i < n; ++i) {
+ len = vec[i].len;
+ if (len > NGTCP2_MAX_VARINT - res) {
+ return -1;
+ }
+
+ res += len;
+ }
+
+ return (int64_t)res;
+}
+
+ngtcp2_ssize ngtcp2_vec_split(ngtcp2_vec *src, size_t *psrccnt, ngtcp2_vec *dst,
+ size_t *pdstcnt, size_t left, size_t maxcnt) {
+ size_t i;
+ size_t srccnt = *psrccnt;
+ size_t nmove;
+ size_t extra = 0;
+
+ for (i = 0; i < srccnt; ++i) {
+ if (left >= src[i].len) {
+ left -= src[i].len;
+ continue;
+ }
+
+ if (*pdstcnt && src[srccnt - 1].base + src[srccnt - 1].len == dst[0].base) {
+ if (*pdstcnt + srccnt - i - 1 > maxcnt) {
+ return -1;
+ }
+
+ dst[0].len += src[srccnt - 1].len;
+ dst[0].base = src[srccnt - 1].base;
+ extra = src[srccnt - 1].len;
+ --srccnt;
+ } else if (*pdstcnt + srccnt - i > maxcnt) {
+ return -1;
+ }
+
+ if (left == 0) {
+ *psrccnt = i;
+ } else {
+ *psrccnt = i + 1;
+ }
+
+ nmove = srccnt - i;
+ if (nmove) {
+ memmove(dst + nmove, dst, sizeof(ngtcp2_vec) * (*pdstcnt));
+ *pdstcnt += nmove;
+ memcpy(dst, src + i, sizeof(ngtcp2_vec) * nmove);
+ }
+
+ dst[0].len -= left;
+ dst[0].base += left;
+ src[i].len = left;
+
+ if (nmove == 0) {
+ extra -= left;
+ }
+
+ return (ngtcp2_ssize)(ngtcp2_vec_len(dst, nmove) + extra);
+ }
+
+ return 0;
+}
+
+size_t ngtcp2_vec_merge(ngtcp2_vec *dst, size_t *pdstcnt, ngtcp2_vec *src,
+ size_t *psrccnt, size_t left, size_t maxcnt) {
+ size_t orig_left = left;
+ size_t i;
+ ngtcp2_vec *a, *b;
+
+ assert(maxcnt);
+
+ if (*pdstcnt == 0) {
+ if (*psrccnt == 0) {
+ return 0;
+ }
+
+ a = &dst[0];
+ b = &src[0];
+
+ if (left >= b->len) {
+ *a = *b;
+ ++*pdstcnt;
+ left -= b->len;
+ i = 1;
+ } else {
+ a->len = left;
+ a->base = b->base;
+
+ b->len -= left;
+ b->base += left;
+
+ return left;
+ }
+ } else {
+ i = 0;
+ }
+
+ for (; left && i < *psrccnt; ++i) {
+ a = &dst[*pdstcnt - 1];
+ b = &src[i];
+
+ if (left >= b->len) {
+ if (a->base + a->len == b->base) {
+ a->len += b->len;
+ } else if (*pdstcnt == maxcnt) {
+ break;
+ } else {
+ dst[(*pdstcnt)++] = *b;
+ }
+ left -= b->len;
+ continue;
+ }
+
+ if (a->base + a->len == b->base) {
+ a->len += left;
+ } else if (*pdstcnt == maxcnt) {
+ break;
+ } else {
+ dst[*pdstcnt].len = left;
+ dst[*pdstcnt].base = b->base;
+ ++*pdstcnt;
+ }
+
+ b->len -= left;
+ b->base += left;
+ left = 0;
+
+ break;
+ }
+
+ memmove(src, src + i, sizeof(ngtcp2_vec) * (*psrccnt - i));
+ *psrccnt -= i;
+
+ return orig_left - left;
+}
+
+size_t ngtcp2_vec_copy_at_most(ngtcp2_vec *dst, size_t dstcnt,
+ const ngtcp2_vec *src, size_t srccnt,
+ size_t left) {
+ size_t i, j;
+
+ for (i = 0, j = 0; left > 0 && i < srccnt && j < dstcnt;) {
+ if (src[i].len == 0) {
+ ++i;
+ continue;
+ }
+ dst[j] = src[i];
+ if (dst[j].len > left) {
+ dst[j].len = left;
+ return j + 1;
+ }
+ left -= dst[j].len;
+ ++i;
+ ++j;
+ }
+
+ return j;
+}
+
+void ngtcp2_vec_copy(ngtcp2_vec *dst, const ngtcp2_vec *src, size_t cnt) {
+ memcpy(dst, src, sizeof(ngtcp2_vec) * cnt);
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.h
new file mode 100644
index 0000000..a39c439
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_vec.h
@@ -0,0 +1,120 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2018 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_VEC_H
+#define NGTCP2_VEC_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+#include "ngtcp2_mem.h"
+
+/*
+ * ngtcp2_vec_lit is a convenient macro to fill the object pointed by
+ * |DEST| with the literal string |LIT|.
+ */
+#define ngtcp2_vec_lit(DEST, LIT) \
+ ((DEST)->base = (uint8_t *)(LIT), (DEST)->len = sizeof(LIT) - 1, (DEST))
+
+/*
+ * ngtcp2_vec_init initializes |vec| with the given parameters. It
+ * returns |vec|.
+ */
+ngtcp2_vec *ngtcp2_vec_init(ngtcp2_vec *vec, const uint8_t *base, size_t len);
+
+/*
+ * ngtcp2_vec_new allocates and initializes |*pvec| with given |data|
+ * of length |datalen|. This function allocates memory for |*pvec|
+ * and the given data with a single allocation, and the contents
+ * pointed by |data| is copied into the allocated memory space. To
+ * free the allocated memory, call ngtcp2_vec_del.
+ */
+int ngtcp2_vec_new(ngtcp2_vec **pvec, const uint8_t *data, size_t datalen,
+ const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_vec_del frees the memory allocated by |vec| which is
+ * allocated and initialized by ngtcp2_vec_new.
+ */
+void ngtcp2_vec_del(ngtcp2_vec *vec, const ngtcp2_mem *mem);
+
+/*
+ * ngtcp2_vec_len returns the sum of length in |vec| of |n| elements.
+ */
+uint64_t ngtcp2_vec_len(const ngtcp2_vec *vec, size_t n);
+
+/*
+ * ngtcp2_vec_len_varint is similar to ngtcp2_vec_len, but it returns
+ * -1 if the sum of the length exceeds NGTCP2_MAX_VARINT.
+ */
+int64_t ngtcp2_vec_len_varint(const ngtcp2_vec *vec, size_t n);
+
+/*
+ * ngtcp2_vec_split splits |src| to |dst| so that the sum of the
+ * length in |src| does not exceed |left| bytes. The |maxcnt| is the
+ * maximum number of elements which |dst| array can contain. The
+ * caller must set |*psrccnt| to the number of elements of |src|.
+ * Similarly, the caller must set |*pdstcnt| to the number of elements
+ * of |dst|. The split does not necessarily occur at the boundary of
+ * ngtcp2_vec object. After split has done, this function updates
+ * |*psrccnt| and |*pdstcnt|. This function returns the number of
+ * bytes moved from |src| to |dst|. If split cannot be made because
+ * doing so exceeds |maxcnt|, this function returns -1.
+ */
+ngtcp2_ssize ngtcp2_vec_split(ngtcp2_vec *src, size_t *psrccnt, ngtcp2_vec *dst,
+ size_t *pdstcnt, size_t left, size_t maxcnt);
+
+/*
+ * ngtcp2_vec_merge merges |src| into |dst| by moving at most |left|
+ * bytes from |src|. The |maxcnt| is the maximum number of elements
+ * which |dst| array can contain. The caller must set |*pdstcnt| to
+ * the number of elements of |dst|. Similarly, the caller must set
+ * |*psrccnt| to the number of elements of |src|. After merge has
+ * done, this function updates |*psrccnt| and |*pdstcnt|. This
+ * function returns the number of bytes moved from |src| to |dst|.
+ */
+size_t ngtcp2_vec_merge(ngtcp2_vec *dst, size_t *pdstcnt, ngtcp2_vec *src,
+ size_t *psrccnt, size_t left, size_t maxcnt);
+
+/*
+ * ngtcp2_vec_copy_at_most copies |src| of length |srccnt| to |dst| of
+ * length |dstcnt|. The total number of bytes which the copied
+ * ngtcp2_vec refers to is at most |left|. The empty elements in
+ * |src| are ignored. This function returns the number of elements
+ * copied.
+ */
+size_t ngtcp2_vec_copy_at_most(ngtcp2_vec *dst, size_t dstcnt,
+ const ngtcp2_vec *src, size_t srccnt,
+ size_t left);
+
+/*
+ * ngtcp2_vec_copy copies |src| of length |cnt| to |dst|. |dst| must
+ * have sufficient capacity.
+ */
+void ngtcp2_vec_copy(ngtcp2_vec *dst, const ngtcp2_vec *src, size_t cnt);
+
+#endif /* NGTCP2_VEC_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_version.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_version.c
new file mode 100644
index 0000000..b31162c
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_version.c
@@ -0,0 +1,39 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+static ngtcp2_info version = {NGTCP2_VERSION_AGE, NGTCP2_VERSION_NUM,
+ NGTCP2_VERSION};
+
+const ngtcp2_info *ngtcp2_version(int least_version) {
+ if (least_version > NGTCP2_VERSION_NUM) {
+ return NULL;
+ }
+ return &version;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.c b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.c
new file mode 100644
index 0000000..71c816e
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.c
@@ -0,0 +1,99 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2021 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Translated to C from the original C++ code
+ * https://quiche.googlesource.com/quiche/+/5be974e29f7e71a196e726d6e2272676d33ab77d/quic/core/congestion_control/windowed_filter.h
+ * with the following license:
+ *
+ * // Copyright (c) 2016 The Chromium Authors. All rights reserved.
+ * // Use of this source code is governed by a BSD-style license that can be
+ * // found in the LICENSE file.
+ */
+#include "ngtcp2_window_filter.h"
+
+#include <string.h>
+
+void ngtcp2_window_filter_init(ngtcp2_window_filter *wf,
+ uint64_t window_length) {
+ wf->window_length = window_length;
+ memset(wf->estimates, 0, sizeof(wf->estimates));
+}
+
+void ngtcp2_window_filter_update(ngtcp2_window_filter *wf, uint64_t new_sample,
+ uint64_t new_time) {
+ if (wf->estimates[0].sample == 0 || new_sample > wf->estimates[0].sample ||
+ new_time - wf->estimates[2].time > wf->window_length) {
+ ngtcp2_window_filter_reset(wf, new_sample, new_time);
+ return;
+ }
+
+ if (new_sample > wf->estimates[1].sample) {
+ wf->estimates[1].sample = new_sample;
+ wf->estimates[1].time = new_time;
+ wf->estimates[2] = wf->estimates[1];
+ } else if (new_sample > wf->estimates[2].sample) {
+ wf->estimates[2].sample = new_sample;
+ wf->estimates[2].time = new_time;
+ }
+
+ if (new_time - wf->estimates[0].time > wf->window_length) {
+ wf->estimates[0] = wf->estimates[1];
+ wf->estimates[1] = wf->estimates[2];
+ wf->estimates[2].sample = new_sample;
+ wf->estimates[2].time = new_time;
+
+ if (new_time - wf->estimates[0].time > wf->window_length) {
+ wf->estimates[0] = wf->estimates[1];
+ wf->estimates[1] = wf->estimates[2];
+ }
+ return;
+ }
+
+ if (wf->estimates[1].sample == wf->estimates[0].sample &&
+ new_time - wf->estimates[1].time > wf->window_length >> 2) {
+ wf->estimates[2].sample = new_sample;
+ wf->estimates[2].time = new_time;
+ wf->estimates[1] = wf->estimates[2];
+ return;
+ }
+
+ if (wf->estimates[2].sample == wf->estimates[1].sample &&
+ new_time - wf->estimates[2].time > wf->window_length >> 1) {
+ wf->estimates[2].sample = new_sample;
+ wf->estimates[2].time = new_time;
+ }
+}
+
+void ngtcp2_window_filter_reset(ngtcp2_window_filter *wf, uint64_t new_sample,
+ uint64_t new_time) {
+ wf->estimates[0].sample = new_sample;
+ wf->estimates[0].time = new_time;
+ wf->estimates[1] = wf->estimates[2] = wf->estimates[0];
+}
+
+uint64_t ngtcp2_window_filter_get_best(ngtcp2_window_filter *wf) {
+ return wf->estimates[0].sample;
+}
diff --git a/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.h b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.h
new file mode 100644
index 0000000..50415f1
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/lib/ngtcp2_window_filter.h
@@ -0,0 +1,65 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2021 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Translated to C from the original C++ code
+ * https://quiche.googlesource.com/quiche/+/5be974e29f7e71a196e726d6e2272676d33ab77d/quic/core/congestion_control/windowed_filter.h
+ * with the following license:
+ *
+ * // Copyright (c) 2016 The Chromium Authors. All rights reserved.
+ * // Use of this source code is governed by a BSD-style license that can be
+ * // found in the LICENSE file.
+ */
+#ifndef NGTCP2_WINDOW_FILTER_H
+#define NGTCP2_WINDOW_FILTER_H
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <ngtcp2/ngtcp2.h>
+
+typedef struct ngtcp2_window_filter_sample {
+ uint64_t sample;
+ uint64_t time;
+} ngtcp2_window_filter_sample;
+
+typedef struct ngtcp2_window_filter {
+ uint64_t window_length;
+ ngtcp2_window_filter_sample estimates[3];
+} ngtcp2_window_filter;
+
+void ngtcp2_window_filter_init(ngtcp2_window_filter *wf,
+ uint64_t window_length);
+
+void ngtcp2_window_filter_update(ngtcp2_window_filter *wf, uint64_t new_sample,
+ uint64_t new_time);
+
+void ngtcp2_window_filter_reset(ngtcp2_window_filter *wf, uint64_t new_sample,
+ uint64_t new_time);
+
+uint64_t ngtcp2_window_filter_get_best(ngtcp2_window_filter *wf);
+
+#endif /* NGTCP2_WINDOW_FILTER_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/ngtcp2.h b/src/contrib/libngtcp2/ngtcp2/ngtcp2.h
new file mode 100644
index 0000000..d4ba09d
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/ngtcp2.h
@@ -0,0 +1,5906 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2017 ngtcp2 contributors
+ * Copyright (c) 2017 nghttp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_H
+#define NGTCP2_H
+
+/* Define WIN32 when build target is Win32 API (borrowed from
+ libcurl) */
+#if (defined(_WIN32) || defined(__WIN32__)) && !defined(WIN32)
+# define WIN32
+#endif
+
+#ifdef _MSC_VER
+# pragma warning(push)
+# pragma warning(disable : 4324)
+#endif
+
+#include <stdlib.h>
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+/* MSVC < 2013 does not have inttypes.h because it is not C99
+ compliant. See compiler macros and version number in
+ https://sourceforge.net/p/predef/wiki/Compilers/ */
+# include <stdint.h>
+#else /* !defined(_MSC_VER) || (_MSC_VER >= 1800) */
+# include <inttypes.h>
+#endif /* !defined(_MSC_VER) || (_MSC_VER >= 1800) */
+#include <sys/types.h>
+#include <stdarg.h>
+#include <stddef.h>
+
+#ifndef NGTCP2_USE_GENERIC_SOCKADDR
+# ifdef WIN32
+# ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+# endif
+# include <ws2tcpip.h>
+# else
+# include <sys/socket.h>
+# include <netinet/in.h>
+# endif
+#endif
+
+#ifdef AF_INET
+# define NGTCP2_AF_INET AF_INET
+#else
+# define NGTCP2_AF_INET 2
+#endif
+
+#ifdef AF_INET6
+# define NGTCP2_AF_INET6 AF_INET6
+#else
+# define NGTCP2_AF_INET6 23
+# define NGTCP2_USE_GENERIC_IPV6_SOCKADDR
+#endif
+
+#include <ngtcp2/version.h>
+
+#ifdef NGTCP2_STATICLIB
+# define NGTCP2_EXTERN
+#elif defined(WIN32)
+# ifdef BUILDING_NGTCP2
+# define NGTCP2_EXTERN __declspec(dllexport)
+# else /* !BUILDING_NGTCP2 */
+# define NGTCP2_EXTERN __declspec(dllimport)
+# endif /* !BUILDING_NGTCP2 */
+#else /* !defined(WIN32) */
+# ifdef BUILDING_NGTCP2
+# define NGTCP2_EXTERN __attribute__((visibility("default")))
+# else /* !BUILDING_NGTCP2 */
+# define NGTCP2_EXTERN
+# endif /* !BUILDING_NGTCP2 */
+#endif /* !defined(WIN32) */
+
+#ifdef _MSC_VER
+# define NGTCP2_ALIGN(N) __declspec(align(N))
+#else /* !_MSC_VER */
+# define NGTCP2_ALIGN(N) __attribute__((aligned(N)))
+#endif /* !_MSC_VER */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @typedef
+ *
+ * :type:`ngtcp2_ssize` is signed counterpart of size_t.
+ */
+typedef ptrdiff_t ngtcp2_ssize;
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_malloc` is a custom memory allocator to replace
+ * :manpage:`malloc(3)`. The |user_data| is
+ * :member:`ngtcp2_mem.user_data`.
+ */
+typedef void *(*ngtcp2_malloc)(size_t size, void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_free` is a custom memory allocator to replace
+ * :manpage:`free(3)`. The |user_data| is
+ * :member:`ngtcp2_mem.user_data`.
+ */
+typedef void (*ngtcp2_free)(void *ptr, void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_calloc` is a custom memory allocator to replace
+ * :manpage:`calloc(3)`. The |user_data| is the
+ * :member:`ngtcp2_mem.user_data`.
+ */
+typedef void *(*ngtcp2_calloc)(size_t nmemb, size_t size, void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_realloc` is a custom memory allocator to replace
+ * :manpage:`realloc(3)`. The |user_data| is the
+ * :member:`ngtcp2_mem.user_data`.
+ */
+typedef void *(*ngtcp2_realloc)(void *ptr, size_t size, void *user_data);
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_mem` is a custom memory allocator. The
+ * :member:`user_data` field is passed to each allocator function.
+ * This can be used, for example, to achieve per-connection memory
+ * pool.
+ *
+ * In the following example code, ``my_malloc``, ``my_free``,
+ * ``my_calloc`` and ``my_realloc`` are the replacement of the
+ * standard allocators :manpage:`malloc(3)`, :manpage:`free(3)`,
+ * :manpage:`calloc(3)` and :manpage:`realloc(3)` respectively::
+ *
+ * void *my_malloc_cb(size_t size, void *user_data) {
+ * (void)user_data;
+ * return my_malloc(size);
+ * }
+ *
+ * void my_free_cb(void *ptr, void *user_data) {
+ * (void)user_data;
+ * my_free(ptr);
+ * }
+ *
+ * void *my_calloc_cb(size_t nmemb, size_t size, void *user_data) {
+ * (void)user_data;
+ * return my_calloc(nmemb, size);
+ * }
+ *
+ * void *my_realloc_cb(void *ptr, size_t size, void *user_data) {
+ * (void)user_data;
+ * return my_realloc(ptr, size);
+ * }
+ *
+ * void conn_new() {
+ * ngtcp2_mem mem = {NULL, my_malloc_cb, my_free_cb, my_calloc_cb,
+ * my_realloc_cb};
+ *
+ * ...
+ * }
+ */
+typedef struct ngtcp2_mem {
+ /**
+ * :member:`user_data` is an arbitrary user supplied data. This
+ * is passed to each allocator function.
+ */
+ void *user_data;
+ /**
+ * :member:`malloc` is a custom allocator function to replace
+ * :manpage:`malloc(3)`.
+ */
+ ngtcp2_malloc malloc;
+ /**
+ * :member:`free` is a custom allocator function to replace
+ * :manpage:`free(3)`.
+ */
+ ngtcp2_free free;
+ /**
+ * :member:`calloc` is a custom allocator function to replace
+ * :manpage:`calloc(3)`.
+ */
+ ngtcp2_calloc calloc;
+ /**
+ * :member:`realloc` is a custom allocator function to replace
+ * :manpage:`realloc(3)`.
+ */
+ ngtcp2_realloc realloc;
+} ngtcp2_mem;
+
+/**
+ * @macrosection
+ *
+ * Time related macros
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_SECONDS` is a count of tick which corresponds to 1 second.
+ */
+#define NGTCP2_SECONDS ((ngtcp2_duration)1000000000ULL)
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_MILLISECONDS` is a count of tick which corresponds
+ * to 1 millisecond.
+ */
+#define NGTCP2_MILLISECONDS ((ngtcp2_duration)1000000ULL)
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_MICROSECONDS` is a count of tick which corresponds
+ * to 1 microsecond.
+ */
+#define NGTCP2_MICROSECONDS ((ngtcp2_duration)1000ULL)
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_NANOSECONDS` is a count of tick which corresponds to
+ * 1 nanosecond.
+ */
+#define NGTCP2_NANOSECONDS ((ngtcp2_duration)1ULL)
+
+/**
+ * @macrosection
+ *
+ * QUIC protocol version macros
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_PROTO_VER_V1` is the QUIC version 1.
+ */
+#define NGTCP2_PROTO_VER_V1 ((uint32_t)0x00000001u)
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_PROTO_VER_V2` is the QUIC version 2.
+ *
+ * https://quicwg.org/quic-v2/draft-ietf-quic-v2.html
+ */
+#define NGTCP2_PROTO_VER_V2 ((uint32_t)0x6b3343cfu)
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_PROTO_VER_DRAFT_MAX` is the maximum QUIC draft
+ * version that this library supports.
+ */
+#define NGTCP2_PROTO_VER_DRAFT_MAX 0xff000020u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_PROTO_VER_DRAFT_MIN` is the minimum QUIC draft
+ * version that this library supports.
+ */
+#define NGTCP2_PROTO_VER_DRAFT_MIN 0xff00001du
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_PROTO_VER_MAX` is the highest QUIC version that this
+ * library supports.
+ */
+#define NGTCP2_PROTO_VER_MAX NGTCP2_PROTO_VER_V1
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_PROTO_VER_MIN` is the lowest QUIC version that this
+ * library supports.
+ */
+#define NGTCP2_PROTO_VER_MIN NGTCP2_PROTO_VER_DRAFT_MIN
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_RESERVED_VERSION_MASK` is the bit mask of reserved
+ * version.
+ */
+#define NGTCP2_RESERVED_VERSION_MASK 0x0a0a0a0au
+
+/**
+ * @macrosection
+ *
+ * UDP datagram related macros
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_MAX_UDP_PAYLOAD_SIZE` is the default maximum UDP
+ * datagram payload size that this endpoint transmits.
+ */
+#define NGTCP2_MAX_UDP_PAYLOAD_SIZE 1200
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_MAX_PMTUD_UDP_PAYLOAD_SIZE` is the maximum UDP
+ * datagram payload size that Path MTU Discovery can discover.
+ */
+#define NGTCP2_MAX_PMTUD_UDP_PAYLOAD_SIZE 1452
+
+/**
+ * @macrosection
+ *
+ * QUIC specific macros
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_MAX_VARINT` is the maximum value which can be
+ * encoded in variable-length integer encoding.
+ */
+#define NGTCP2_MAX_VARINT ((1ULL << 62) - 1)
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_STATELESS_RESET_TOKENLEN` is the length of Stateless
+ * Reset Token.
+ */
+#define NGTCP2_STATELESS_RESET_TOKENLEN 16
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_MIN_STATELESS_RESET_RANDLEN` is the minimum length
+ * of random bytes (Unpredictable Bits) in Stateless Reset packet
+ */
+#define NGTCP2_MIN_STATELESS_RESET_RANDLEN 5
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_PATH_CHALLENGE_DATALEN` is the length of
+ * PATH_CHALLENGE data.
+ */
+#define NGTCP2_PATH_CHALLENGE_DATALEN 8
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_RETRY_KEY_DRAFT` is an encryption key to create
+ * integrity tag of Retry packet. It is used for QUIC draft versions.
+ */
+#define NGTCP2_RETRY_KEY_DRAFT \
+ "\xcc\xce\x18\x7e\xd0\x9a\x09\xd0\x57\x28\x15\x5a\x6c\xb9\x6b\xe1"
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_RETRY_NONCE_DRAFT` is nonce used when generating
+ * integrity tag of Retry packet. It is used for QUIC draft versions.
+ */
+#define NGTCP2_RETRY_NONCE_DRAFT \
+ "\xe5\x49\x30\xf9\x7f\x21\x36\xf0\x53\x0a\x8c\x1c"
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_RETRY_KEY_V1` is an encryption key to create
+ * integrity tag of Retry packet. It is used for QUIC v1.
+ */
+#define NGTCP2_RETRY_KEY_V1 \
+ "\xbe\x0c\x69\x0b\x9f\x66\x57\x5a\x1d\x76\x6b\x54\xe3\x68\xc8\x4e"
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_RETRY_NONCE_V1` is nonce used when generating integrity
+ * tag of Retry packet. It is used for QUIC v1.
+ */
+#define NGTCP2_RETRY_NONCE_V1 "\x46\x15\x99\xd3\x5d\x63\x2b\xf2\x23\x98\x25\xbb"
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_RETRY_KEY_V2` is an encryption key to create
+ * integrity tag of Retry packet. It is used for QUIC v2.
+ *
+ * https://quicwg.org/quic-v2/draft-ietf-quic-v2.html
+ */
+#define NGTCP2_RETRY_KEY_V2 \
+ "\x8f\xb4\xb0\x1b\x56\xac\x48\xe2\x60\xfb\xcb\xce\xad\x7c\xcc\x92"
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_RETRY_NONCE_V2` is nonce used when generating
+ * integrity tag of Retry packet. It is used for QUIC v2.
+ *
+ * https://quicwg.org/quic-v2/draft-ietf-quic-v2.html
+ */
+#define NGTCP2_RETRY_NONCE_V2 "\xd8\x69\x69\xbc\x2d\x7c\x6d\x99\x90\xef\xb0\x4a"
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_HP_MASKLEN` is the length of header protection mask.
+ */
+#define NGTCP2_HP_MASKLEN 5
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_HP_SAMPLELEN` is the number bytes sampled when
+ * encrypting a packet header.
+ */
+#define NGTCP2_HP_SAMPLELEN 16
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_DEFAULT_INITIAL_RTT` is a default initial RTT.
+ */
+#define NGTCP2_DEFAULT_INITIAL_RTT (333 * NGTCP2_MILLISECONDS)
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_MAX_CIDLEN` is the maximum length of Connection ID.
+ */
+#define NGTCP2_MAX_CIDLEN 20
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_MIN_CIDLEN` is the minimum length of Connection ID.
+ */
+#define NGTCP2_MIN_CIDLEN 1
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_MIN_INITIAL_DCIDLEN` is the minimum length of
+ * Destination Connection ID in Client Initial packet if it does not
+ * bear token from Retry packet.
+ */
+#define NGTCP2_MIN_INITIAL_DCIDLEN 8
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_DEFAULT_HANDSHAKE_TIMEOUT` is the default handshake
+ * timeout.
+ */
+#define NGTCP2_DEFAULT_HANDSHAKE_TIMEOUT (10 * NGTCP2_SECONDS)
+
+/**
+ * @macrosection
+ *
+ * ECN related macros
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ECN_NOT_ECT` indicates no ECN marking.
+ */
+#define NGTCP2_ECN_NOT_ECT 0x0
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ECN_ECT_1` is ECT(1) codepoint.
+ */
+#define NGTCP2_ECN_ECT_1 0x1
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ECN_ECT_0` is ECT(0) codepoint.
+ */
+#define NGTCP2_ECN_ECT_0 0x2
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ECN_CE` is CE codepoint.
+ */
+#define NGTCP2_ECN_CE 0x3
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ECN_MASK` is a bit mask to get ECN marking.
+ */
+#define NGTCP2_ECN_MASK 0x3
+
+#define NGTCP2_PKT_INFO_VERSION_V1 1
+#define NGTCP2_PKT_INFO_VERSION NGTCP2_PKT_INFO_VERSION_V1
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_pkt_info` is a packet metadata.
+ */
+typedef struct NGTCP2_ALIGN(8) ngtcp2_pkt_info {
+ /**
+ * :member:`ecn` is ECN marking and when passing
+ * `ngtcp2_conn_read_pkt()`, and it should be either
+ * :macro:`NGTCP2_ECN_NOT_ECT`, :macro:`NGTCP2_ECN_ECT_1`,
+ * :macro:`NGTCP2_ECN_ECT_0`, or :macro:`NGTCP2_ECN_CE`.
+ */
+ uint32_t ecn;
+} ngtcp2_pkt_info;
+
+/**
+ * @macrosection
+ *
+ * ngtcp2 library error codes
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_INVALID_ARGUMENT` indicates that a passed
+ * argument is invalid.
+ */
+#define NGTCP2_ERR_INVALID_ARGUMENT -201
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_NOBUF` indicates that a provided buffer does not
+ * have enough space to store data.
+ */
+#define NGTCP2_ERR_NOBUF -203
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_PROTO` indicates a general protocol error.
+ */
+#define NGTCP2_ERR_PROTO -205
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_INVALID_STATE` indicates that a requested
+ * operation is not allowed at the current connection state.
+ */
+#define NGTCP2_ERR_INVALID_STATE -206
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_ACK_FRAME` indicates that an invalid ACK frame
+ * is received.
+ */
+#define NGTCP2_ERR_ACK_FRAME -207
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_STREAM_ID_BLOCKED` indicates that there is no
+ * spare stream ID available.
+ */
+#define NGTCP2_ERR_STREAM_ID_BLOCKED -208
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_STREAM_IN_USE` indicates that a stream ID is
+ * already in use.
+ */
+#define NGTCP2_ERR_STREAM_IN_USE -209
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_STREAM_DATA_BLOCKED` indicates that stream data
+ * cannot be sent because of flow control.
+ */
+#define NGTCP2_ERR_STREAM_DATA_BLOCKED -210
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_FLOW_CONTROL` indicates flow control error.
+ */
+#define NGTCP2_ERR_FLOW_CONTROL -211
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_CONNECTION_ID_LIMIT` indicates that the number
+ * of received Connection ID exceeds acceptable limit.
+ */
+#define NGTCP2_ERR_CONNECTION_ID_LIMIT -212
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_STREAM_LIMIT` indicates that a remote endpoint
+ * opens more streams that is permitted.
+ */
+#define NGTCP2_ERR_STREAM_LIMIT -213
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_FINAL_SIZE` indicates that inconsistent final
+ * size of a stream.
+ */
+#define NGTCP2_ERR_FINAL_SIZE -214
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_CRYPTO` indicates crypto (TLS) related error.
+ */
+#define NGTCP2_ERR_CRYPTO -215
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_PKT_NUM_EXHAUSTED` indicates that packet number
+ * is exhausted.
+ */
+#define NGTCP2_ERR_PKT_NUM_EXHAUSTED -216
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM` indicates that a
+ * required transport parameter is missing.
+ */
+#define NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM -217
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM` indicates that a
+ * transport parameter is malformed.
+ */
+#define NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM -218
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_FRAME_ENCODING` indicates there is an error in
+ * frame encoding.
+ */
+#define NGTCP2_ERR_FRAME_ENCODING -219
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_DECRYPT` indicates a decryption failure.
+ */
+#define NGTCP2_ERR_DECRYPT -220
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_STREAM_SHUT_WR` indicates no more data can be
+ * sent to a stream.
+ */
+#define NGTCP2_ERR_STREAM_SHUT_WR -221
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_STREAM_NOT_FOUND` indicates that a stream was not
+ * found.
+ */
+#define NGTCP2_ERR_STREAM_NOT_FOUND -222
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_STREAM_STATE` indicates that a requested
+ * operation is not allowed at the current stream state.
+ */
+#define NGTCP2_ERR_STREAM_STATE -226
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_RECV_VERSION_NEGOTIATION` indicates that Version
+ * Negotiation packet was received.
+ */
+#define NGTCP2_ERR_RECV_VERSION_NEGOTIATION -229
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_CLOSING` indicates that connection is in closing
+ * state.
+ */
+#define NGTCP2_ERR_CLOSING -230
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_DRAINING` indicates that connection is in
+ * draining state.
+ */
+#define NGTCP2_ERR_DRAINING -231
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_TRANSPORT_PARAM` indicates a general transport
+ * parameter error.
+ */
+#define NGTCP2_ERR_TRANSPORT_PARAM -234
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_DISCARD_PKT` indicates a packet was discarded.
+ */
+#define NGTCP2_ERR_DISCARD_PKT -235
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_CONN_ID_BLOCKED` indicates that there is no
+ * spare Connection ID available.
+ */
+#define NGTCP2_ERR_CONN_ID_BLOCKED -237
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_INTERNAL` indicates an internal error.
+ */
+#define NGTCP2_ERR_INTERNAL -238
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_CRYPTO_BUFFER_EXCEEDED` indicates that a crypto
+ * buffer exceeded.
+ */
+#define NGTCP2_ERR_CRYPTO_BUFFER_EXCEEDED -239
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_WRITE_MORE` indicates
+ * :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE` is used and a function call
+ * succeeded.
+ */
+#define NGTCP2_ERR_WRITE_MORE -240
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_RETRY` indicates that server should send Retry
+ * packet.
+ */
+#define NGTCP2_ERR_RETRY -241
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_DROP_CONN` indicates that an endpoint should
+ * drop connection immediately.
+ */
+#define NGTCP2_ERR_DROP_CONN -242
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_AEAD_LIMIT_REACHED` indicates AEAD encryption
+ * limit is reached and key update is not available. An endpoint
+ * should drop connection immediately.
+ */
+#define NGTCP2_ERR_AEAD_LIMIT_REACHED -243
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_NO_VIABLE_PATH` indicates that path validation
+ * could not probe that a path is capable of sending UDP datagram
+ * payload of size at least 1200 bytes.
+ */
+#define NGTCP2_ERR_NO_VIABLE_PATH -244
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_VERSION_NEGOTIATION` indicates that server
+ * should send Version Negotiation packet.
+ */
+#define NGTCP2_ERR_VERSION_NEGOTIATION -245
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_HANDSHAKE_TIMEOUT` indicates that QUIC
+ * connection is not established before the specified deadline.
+ */
+#define NGTCP2_ERR_HANDSHAKE_TIMEOUT -246
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE` indicates the
+ * version negotiation failed.
+ */
+#define NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE -247
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_IDLE_CLOSE` indicates the connection should be
+ * closed silently because of idle timeout.
+ */
+#define NGTCP2_ERR_IDLE_CLOSE -248
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_FATAL` indicates that error codes less than this
+ * value is fatal error. When this error is returned, an endpoint
+ * should drop connection immediately.
+ */
+#define NGTCP2_ERR_FATAL -500
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_NOMEM` indicates out of memory.
+ */
+#define NGTCP2_ERR_NOMEM -501
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` indicates that user defined
+ * callback function failed.
+ */
+#define NGTCP2_ERR_CALLBACK_FAILURE -502
+
+/**
+ * @macrosection
+ *
+ * QUIC packet header flags
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_PKT_FLAG_NONE` indicates no flag set.
+ */
+#define NGTCP2_PKT_FLAG_NONE 0x00u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_PKT_FLAG_LONG_FORM` indicates the Long header packet
+ * header.
+ */
+#define NGTCP2_PKT_FLAG_LONG_FORM 0x01u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR` indicates that Fixed Bit
+ * (aka QUIC bit) is not set.
+ */
+#define NGTCP2_PKT_FLAG_FIXED_BIT_CLEAR 0x02u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_PKT_FLAG_KEY_PHASE` indicates Key Phase bit set.
+ */
+#define NGTCP2_PKT_FLAG_KEY_PHASE 0x04u
+
+/**
+ * @enum
+ *
+ * :type:`ngtcp2_pkt_type` defines QUIC version-independent QUIC
+ * packet types.
+ */
+typedef enum ngtcp2_pkt_type {
+ /**
+ * :enum:`NGTCP2_PKT_VERSION_NEGOTIATION` is defined by libngtcp2
+ * for convenience.
+ */
+ NGTCP2_PKT_VERSION_NEGOTIATION = 0x80,
+ /**
+ * :enum:`NGTCP2_PKT_STATELESS_RESET` is defined by libngtcp2 for
+ * convenience.
+ */
+ NGTCP2_PKT_STATELESS_RESET = 0x81,
+ /**
+ * :enum:`NGTCP2_PKT_INITIAL` indicates Initial packet.
+ */
+ NGTCP2_PKT_INITIAL = 0x10,
+ /**
+ * :enum:`NGTCP2_PKT_0RTT` indicates 0RTT packet.
+ */
+ NGTCP2_PKT_0RTT = 0x11,
+ /**
+ * :enum:`NGTCP2_PKT_HANDSHAKE` indicates Handshake packet.
+ */
+ NGTCP2_PKT_HANDSHAKE = 0x12,
+ /**
+ * :enum:`NGTCP2_PKT_RETRY` indicates Retry packet.
+ */
+ NGTCP2_PKT_RETRY = 0x13,
+ /**
+ * :enum:`NGTCP2_PKT_1RTT` is defined by libngtcp2 for convenience.
+ */
+ NGTCP2_PKT_1RTT = 0x40
+} ngtcp2_pkt_type;
+
+/**
+ * @macrosection
+ *
+ * QUIC transport error code
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_NO_ERROR` is QUIC transport error code ``NO_ERROR``.
+ */
+#define NGTCP2_NO_ERROR 0x0u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_INTERNAL_ERROR` is QUIC transport error code
+ * ``INTERNAL_ERROR``.
+ */
+#define NGTCP2_INTERNAL_ERROR 0x1u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_CONNECTION_REFUSED` is QUIC transport error code
+ * ``CONNECTION_REFUSED``.
+ */
+#define NGTCP2_CONNECTION_REFUSED 0x2u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_FLOW_CONTROL_ERROR` is QUIC transport error code
+ * ``FLOW_CONTROL_ERROR``.
+ */
+#define NGTCP2_FLOW_CONTROL_ERROR 0x3u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_STREAM_LIMIT_ERROR` is QUIC transport error code
+ * ``STREAM_LIMIT_ERROR``.
+ */
+#define NGTCP2_STREAM_LIMIT_ERROR 0x4u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_STREAM_STATE_ERROR` is QUIC transport error code
+ * ``STREAM_STATE_ERROR``.
+ */
+#define NGTCP2_STREAM_STATE_ERROR 0x5u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_FINAL_SIZE_ERROR` is QUIC transport error code
+ * ``FINAL_SIZE_ERROR``.
+ */
+#define NGTCP2_FINAL_SIZE_ERROR 0x6u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_FRAME_ENCODING_ERROR` is QUIC transport error code
+ * ``FRAME_ENCODING_ERROR``.
+ */
+#define NGTCP2_FRAME_ENCODING_ERROR 0x7u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_TRANSPORT_PARAMETER_ERROR` is QUIC transport error
+ * code ``TRANSPORT_PARAMETER_ERROR``.
+ */
+#define NGTCP2_TRANSPORT_PARAMETER_ERROR 0x8u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_CONNECTION_ID_LIMIT_ERROR` is QUIC transport error
+ * code ``CONNECTION_ID_LIMIT_ERROR``.
+ */
+#define NGTCP2_CONNECTION_ID_LIMIT_ERROR 0x9u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_PROTOCOL_VIOLATION` is QUIC transport error code
+ * ``PROTOCOL_VIOLATION``.
+ */
+#define NGTCP2_PROTOCOL_VIOLATION 0xau
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_INVALID_TOKEN` is QUIC transport error code
+ * ``INVALID_TOKEN``.
+ */
+#define NGTCP2_INVALID_TOKEN 0xbu
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_APPLICATION_ERROR` is QUIC transport error code
+ * ``APPLICATION_ERROR``.
+ */
+#define NGTCP2_APPLICATION_ERROR 0xcu
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_CRYPTO_BUFFER_EXCEEDED` is QUIC transport error code
+ * ``CRYPTO_BUFFER_EXCEEDED``.
+ */
+#define NGTCP2_CRYPTO_BUFFER_EXCEEDED 0xdu
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_KEY_UPDATE_ERROR` is QUIC transport error code
+ * ``KEY_UPDATE_ERROR``.
+ */
+#define NGTCP2_KEY_UPDATE_ERROR 0xeu
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_AEAD_LIMIT_REACHED` is QUIC transport error code
+ * ``AEAD_LIMIT_REACHED``.
+ */
+#define NGTCP2_AEAD_LIMIT_REACHED 0xfu
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_NO_VIABLE_PATH` is QUIC transport error code
+ * ``NO_VIABLE_PATH``.
+ */
+#define NGTCP2_NO_VIABLE_PATH 0x10u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_CRYPTO_ERROR` is QUIC transport error code
+ * ``CRYPTO_ERROR``.
+ */
+#define NGTCP2_CRYPTO_ERROR 0x100u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_VERSION_NEGOTIATION_ERROR` is QUIC transport error
+ * code ``VERSION_NEGOTIATION_ERROR``.
+ *
+ * https://datatracker.ietf.org/doc/html/draft-ietf-quic-version-negotiation-14
+ */
+#define NGTCP2_VERSION_NEGOTIATION_ERROR 0x11
+
+/**
+ * @enum
+ *
+ * :type:`ngtcp2_path_validation_result` defines path validation
+ * result code.
+ */
+typedef enum ngtcp2_path_validation_result {
+ /**
+ * :enum:`NGTCP2_PATH_VALIDATION_RESULT_SUCCESS` indicates
+ * successful validation.
+ */
+ NGTCP2_PATH_VALIDATION_RESULT_SUCCESS,
+ /**
+ * :enum:`NGTCP2_PATH_VALIDATION_RESULT_FAILURE` indicates
+ * validation failure.
+ */
+ NGTCP2_PATH_VALIDATION_RESULT_FAILURE,
+ /**
+ * :enum:`NGTCP2_PATH_VALIDATION_RESULT_ABORTED` indicates that path
+ * validation was aborted.
+ */
+ NGTCP2_PATH_VALIDATION_RESULT_ABORTED
+} ngtcp2_path_validation_result;
+
+/**
+ * @typedef
+ *
+ * :type:`ngtcp2_tstamp` is a timestamp with nanosecond resolution.
+ * ``UINT64_MAX`` is an invalid value.
+ */
+typedef uint64_t ngtcp2_tstamp;
+
+/**
+ * @typedef
+ *
+ * :type:`ngtcp2_duration` is a period of time in nanosecond
+ * resolution. ``UINT64_MAX`` is an invalid value.
+ */
+typedef uint64_t ngtcp2_duration;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_cid` holds a Connection ID.
+ */
+typedef struct ngtcp2_cid {
+ /**
+ * :member:`datalen` is the length of Connection ID.
+ */
+ size_t datalen;
+ /**
+ * :member:`data` is the buffer to store Connection ID.
+ */
+ uint8_t data[NGTCP2_MAX_CIDLEN];
+} ngtcp2_cid;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_vec` is struct iovec compatible structure to
+ * reference arbitrary array of bytes.
+ */
+typedef struct ngtcp2_vec {
+ /**
+ * :member:`base` points to the data.
+ */
+ uint8_t *base;
+ /**
+ * :member:`len` is the number of bytes which the buffer pointed by
+ * base contains.
+ */
+ size_t len;
+} ngtcp2_vec;
+
+/**
+ * @function
+ *
+ * `ngtcp2_cid_init` initializes Connection ID |cid| with the byte
+ * string pointed by |data| and its length is |datalen|. |datalen|
+ * must be at most :macro:`NGTCP2_MAX_CIDLEN`.
+ */
+NGTCP2_EXTERN void ngtcp2_cid_init(ngtcp2_cid *cid, const uint8_t *data,
+ size_t datalen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_cid_eq` returns nonzero if |a| and |b| share the same
+ * Connection ID.
+ */
+NGTCP2_EXTERN int ngtcp2_cid_eq(const ngtcp2_cid *a, const ngtcp2_cid *b);
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_pkt_hd` represents QUIC packet header.
+ */
+typedef struct ngtcp2_pkt_hd {
+ /**
+ * :member:`dcid` is Destination Connection ID.
+ */
+ ngtcp2_cid dcid;
+ /**
+ * :member:`scid` is Source Connection ID.
+ */
+ ngtcp2_cid scid;
+ /**
+ * :member:`pkt_num` is a packet number.
+ */
+ int64_t pkt_num;
+ /**
+ * :member:`token` contains token for Initial
+ * packet.
+ */
+ const uint8_t *token;
+ /**
+ * :member:`tokenlen` is the length of :member:`token`.
+ */
+ size_t tokenlen;
+ /**
+ * :member:`pkt_numlen` is the number of bytes spent to encode
+ * :member:`pkt_num`.
+ */
+ size_t pkt_numlen;
+ /**
+ * :member:`len` is the sum of :member:`pkt_numlen` and the length
+ * of QUIC packet payload.
+ */
+ size_t len;
+ /**
+ * :member:`version` is QUIC version.
+ */
+ uint32_t version;
+ /**
+ * :member:`type` is a type of QUIC packet. See
+ * :type:`ngtcp2_pkt_type`.
+ */
+ uint8_t type;
+ /**
+ * :member:`flags` is zero or more of :macro:`NGTCP2_PKT_FLAG_*
+ * <NGTCP2_PKT_FLAG_NONE>`.
+ */
+ uint8_t flags;
+} ngtcp2_pkt_hd;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_pkt_stateless_reset` represents Stateless Reset.
+ */
+typedef struct ngtcp2_pkt_stateless_reset {
+ /**
+ * :member:`stateless_reset_token` contains stateless reset token.
+ */
+ uint8_t stateless_reset_token[NGTCP2_STATELESS_RESET_TOKENLEN];
+ /**
+ * :member:`rand` points a buffer which contains random bytes
+ * section.
+ */
+ const uint8_t *rand;
+ /**
+ * :member:`randlen` is the number of random bytes.
+ */
+ size_t randlen;
+} ngtcp2_pkt_stateless_reset;
+
+/**
+ * @enum
+ *
+ * :type:`ngtcp2_transport_params_type` defines TLS message type which
+ * carries transport parameters.
+ */
+typedef enum ngtcp2_transport_params_type {
+ /**
+ * :enum:`NGTCP2_TRANSPORT_PARAMS_TYPE_CLIENT_HELLO` is Client Hello
+ * TLS message.
+ */
+ NGTCP2_TRANSPORT_PARAMS_TYPE_CLIENT_HELLO,
+ /**
+ * :enum:`NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS` is
+ * Encrypted Extensions TLS message.
+ */
+ NGTCP2_TRANSPORT_PARAMS_TYPE_ENCRYPTED_EXTENSIONS
+} ngtcp2_transport_params_type;
+
+/**
+ * @macrosection
+ *
+ * QUIC transport parameters related macros
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE` is the default
+ * value of max_udp_payload_size transport parameter.
+ */
+#define NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE 65527
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_DEFAULT_ACK_DELAY_EXPONENT` is a default value of
+ * scaling factor of ACK Delay field in ACK frame.
+ */
+#define NGTCP2_DEFAULT_ACK_DELAY_EXPONENT 3
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_DEFAULT_MAX_ACK_DELAY` is a default value of the
+ * maximum amount of time in nanoseconds by which endpoint delays
+ * sending acknowledgement.
+ */
+#define NGTCP2_DEFAULT_MAX_ACK_DELAY (25 * NGTCP2_MILLISECONDS)
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT` is the default
+ * value of active_connection_id_limit transport parameter value if
+ * omitted.
+ */
+#define NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT 2
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_TLSEXT_QUIC_TRANSPORT_PARAMETERS_V1` is TLS
+ * extension type of quic_transport_parameters.
+ */
+#define NGTCP2_TLSEXT_QUIC_TRANSPORT_PARAMETERS_V1 0x39u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_TLSEXT_QUIC_TRANSPORT_PARAMETERS_DRAFT` is TLS
+ * extension type of quic_transport_parameters used during draft
+ * development.
+ */
+#define NGTCP2_TLSEXT_QUIC_TRANSPORT_PARAMETERS_DRAFT 0xffa5u
+
+#ifdef NGTCP2_USE_GENERIC_SOCKADDR
+typedef struct ngtcp2_sockaddr {
+ uint16_t sa_family;
+ uint8_t sa_data[14];
+} ngtcp2_sockaddr;
+
+typedef struct ngtcp2_in_addr {
+ uint32_t s_addr;
+} ngtcp2_in_addr;
+
+typedef struct ngtcp2_sockaddr_in {
+ uint16_t sin_family;
+ uint16_t sin_port;
+ ngtcp2_in_addr sin_addr;
+ uint8_t sin_zero[8];
+} ngtcp2_sockaddr_in;
+
+# define NGTCP2_SS_MAXSIZE 128
+# define NGTCP2_SS_ALIGNSIZE (sizeof(uint64_t))
+# define NGTCP2_SS_PAD1SIZE (NGTCP2_SS_ALIGNSIZE - sizeof(uint16_t))
+# define NGTCP2_SS_PAD2SIZE \
+ (NGTCP2_SS_MAXSIZE - \
+ (sizeof(uint16_t) + NGTCP2_SS_PAD1SIZE + NGTCP2_SS_ALIGNSIZE))
+
+typedef struct ngtcp2_sockaddr_storage {
+ uint16_t ss_family;
+ uint8_t _ss_pad1[NGTCP2_SS_PAD1SIZE];
+ uint64_t _ss_align;
+ uint8_t _ss_pad2[NGTCP2_SS_PAD2SIZE];
+} ngtcp2_sockaddr_storage;
+
+# undef NGTCP2_SS_PAD2SIZE
+# undef NGTCP2_SS_PAD1SIZE
+# undef NGTCP2_SS_ALIGNSIZE
+# undef NGTCP2_SS_MAXSIZE
+
+typedef uint32_t ngtcp2_socklen;
+#else
+/**
+ * @typedef
+ *
+ * :type:`ngtcp2_sockaddr` is typedefed to struct sockaddr. If
+ * :macro:`NGTCP2_USE_GENERIC_SOCKADDR` is defined, it is typedefed to
+ * the generic struct sockaddr defined in ngtcp2.h.
+ */
+typedef struct sockaddr ngtcp2_sockaddr;
+/**
+ * @typedef
+ *
+ * :type:`ngtcp2_sockaddr_storage` is typedefed to struct
+ * sockaddr_storage. If :macro:`NGTCP2_USE_GENERIC_SOCKADDR` is
+ * defined, it is typedefed to the generic struct sockaddr_storage
+ * defined in ngtcp2.h.
+ */
+typedef struct sockaddr_storage ngtcp2_sockaddr_storage;
+/**
+ * @typedef
+ *
+ * :type:`ngtcp2_sockaddr_in` is typedefed to struct sockaddr_in. If
+ * :macro:`NGTCP2_USE_GENERIC_SOCKADDR` is defined, it is typedefed to
+ * the generic struct sockaddr_in defined in ngtcp2.h.
+ */
+typedef struct sockaddr_in ngtcp2_sockaddr_in;
+/**
+ * @typedef
+ *
+ * :type:`ngtcp2_socklen` is typedefed to socklen_t. If
+ * :macro:`NGTCP2_USE_GENERIC_SOCKADDR` is defined, it is typedefed to
+ * uint32_t.
+ */
+typedef socklen_t ngtcp2_socklen;
+#endif
+
+#if defined(NGTCP2_USE_GENERIC_SOCKADDR) || \
+ defined(NGTCP2_USE_GENERIC_IPV6_SOCKADDR)
+typedef struct ngtcp2_in6_addr {
+ uint8_t in6_addr[16];
+} ngtcp2_in6_addr;
+
+typedef struct ngtcp2_sockaddr_in6 {
+ uint16_t sin6_family;
+ uint16_t sin6_port;
+ uint32_t sin6_flowinfo;
+ ngtcp2_in6_addr sin6_addr;
+ uint32_t sin6_scope_id;
+} ngtcp2_sockaddr_in6;
+#else
+/**
+ * @typedef
+ *
+ * :type:`ngtcp2_sockaddr_in6` is typedefed to struct sockaddr_in6.
+ * If :macro:`NGTCP2_USE_GENERIC_SOCKADDR` is defined, it is typedefed
+ * to the generic struct sockaddr_in6 defined in ngtcp2.h.
+ */
+typedef struct sockaddr_in6 ngtcp2_sockaddr_in6;
+#endif
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_sockaddr_union` conveniently includes all supported
+ * address types.
+ */
+typedef union ngtcp2_sockaddr_union {
+ ngtcp2_sockaddr sa;
+ ngtcp2_sockaddr_in in;
+ ngtcp2_sockaddr_in6 in6;
+} ngtcp2_sockaddr_union;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_preferred_addr` represents preferred address
+ * structure.
+ */
+typedef struct ngtcp2_preferred_addr {
+ /**
+ * :member:`cid` is a Connection ID.
+ */
+ ngtcp2_cid cid;
+ /**
+ * :member:`ipv4` contains IPv4 address and port.
+ */
+ ngtcp2_sockaddr_in ipv4;
+ /**
+ * :member:`ipv6` contains IPv4 address and port.
+ */
+ ngtcp2_sockaddr_in6 ipv6;
+ /**
+ * :member:`ipv4_present` indicates that :member:`ipv4` contains
+ * IPv4 address and port.
+ */
+ uint8_t ipv4_present;
+ /**
+ * :member:`ipv6_present` indicates that :member:`ipv6` contains
+ * IPv6 address and port.
+ */
+ uint8_t ipv6_present;
+ /**
+ * :member:`stateless_reset_token` contains stateless reset token.
+ */
+ uint8_t stateless_reset_token[NGTCP2_STATELESS_RESET_TOKENLEN];
+} ngtcp2_preferred_addr;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_version_info` represents version_information
+ * structure.
+ */
+typedef struct ngtcp2_version_info {
+ /**
+ * :member:`chosen_version` is the version chosen by the sender.
+ */
+ uint32_t chosen_version;
+ /**
+ * :member:`available_versions` points the wire image of
+ * available_versions field. The each version is therefore in
+ * network byte order.
+ */
+ const uint8_t *available_versions;
+ /**
+ * :member:`available_versionslen` is the number of bytes pointed by
+ * :member:`available_versions`, not the number of versions
+ * included.
+ */
+ size_t available_versionslen;
+} ngtcp2_version_info;
+
+#define NGTCP2_TRANSPORT_PARAMS_VERSION_V1 1
+#define NGTCP2_TRANSPORT_PARAMS_VERSION NGTCP2_TRANSPORT_PARAMS_VERSION_V1
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_transport_params` represents QUIC transport
+ * parameters.
+ */
+typedef struct ngtcp2_transport_params {
+ /**
+ * :member:`preferred_address` contains preferred address if
+ * :member:`preferred_address_present` is nonzero.
+ */
+ ngtcp2_preferred_addr preferred_address;
+ /**
+ * :member:`original_dcid` is the Destination Connection ID field
+ * from the first Initial packet from client. Server must specify
+ * this field. It is expected that application knows the original
+ * Destination Connection ID even if it sends Retry packet, for
+ * example, by including it in retry token. Otherwise, application
+ * should not specify this field.
+ */
+ ngtcp2_cid original_dcid;
+ /**
+ * :member:`initial_scid` is the Source Connection ID field from the
+ * first Initial packet the endpoint sends. Application should not
+ * specify this field.
+ */
+ ngtcp2_cid initial_scid;
+ /**
+ * :member:`retry_scid` is the Source Connection ID field from Retry
+ * packet. Only server uses this field. If server application
+ * received Initial packet with retry token from client and server
+ * verified its token, server application must set Destination
+ * Connection ID field from the Initial packet to this field and set
+ * :member:`retry_scid_present` to nonzero. Server application must
+ * verify that the Destination Connection ID from Initial packet was
+ * sent in Retry packet by, for example, including the Connection ID
+ * in a token, or including it in AAD when encrypting a token.
+ */
+ ngtcp2_cid retry_scid;
+ /**
+ * :member:`initial_max_stream_data_bidi_local` is the size of flow
+ * control window of locally initiated stream. This is the number
+ * of bytes that the remote endpoint can send and the local endpoint
+ * must ensure that it has enough buffer to receive them.
+ */
+ uint64_t initial_max_stream_data_bidi_local;
+ /**
+ * :member:`initial_max_stream_data_bidi_remote` is the size of flow
+ * control window of remotely initiated stream. This is the number
+ * of bytes that the remote endpoint can send and the local endpoint
+ * must ensure that it has enough buffer to receive them.
+ */
+ uint64_t initial_max_stream_data_bidi_remote;
+ /**
+ * :member:`initial_max_stream_data_uni` is the size of flow control
+ * window of remotely initiated unidirectional stream. This is the
+ * number of bytes that the remote endpoint can send and the local
+ * endpoint must ensure that it has enough buffer to receive them.
+ */
+ uint64_t initial_max_stream_data_uni;
+ /**
+ * :member:`initial_max_data` is the connection level flow control
+ * window.
+ */
+ uint64_t initial_max_data;
+ /**
+ * :member:`initial_max_streams_bidi` is the number of concurrent
+ * streams that the remote endpoint can create.
+ */
+ uint64_t initial_max_streams_bidi;
+ /**
+ * :member:`initial_max_streams_uni` is the number of concurrent
+ * unidirectional streams that the remote endpoint can create.
+ */
+ uint64_t initial_max_streams_uni;
+ /**
+ * :member:`max_idle_timeout` is a duration during which sender
+ * allows quiescent.
+ */
+ ngtcp2_duration max_idle_timeout;
+ /**
+ * :member:`max_udp_payload_size` is the maximum datagram size that
+ * the endpoint can receive.
+ */
+ uint64_t max_udp_payload_size;
+ /**
+ * :member:`active_connection_id_limit` is the maximum number of
+ * Connection ID that sender can store.
+ */
+ uint64_t active_connection_id_limit;
+ /**
+ * :member:`ack_delay_exponent` is the exponent used in ACK Delay
+ * field in ACK frame.
+ */
+ uint64_t ack_delay_exponent;
+ /**
+ * :member:`max_ack_delay` is the maximum acknowledgement delay by
+ * which the endpoint will delay sending acknowledgements.
+ */
+ ngtcp2_duration max_ack_delay;
+ /**
+ * :member:`max_datagram_frame_size` is the maximum size of DATAGRAM
+ * frame that this endpoint willingly receives. Specifying 0
+ * disables DATAGRAM support. See
+ * https://datatracker.ietf.org/doc/html/rfc9221
+ */
+ uint64_t max_datagram_frame_size;
+ /**
+ * :member:`stateless_reset_token_present` is nonzero if
+ * :member:`stateless_reset_token` field is set.
+ */
+ uint8_t stateless_reset_token_present;
+ /**
+ * :member:`disable_active_migration` is nonzero if the endpoint
+ * does not support active connection migration.
+ */
+ uint8_t disable_active_migration;
+ /**
+ * :member:`retry_scid_present` is nonzero if :member:`retry_scid`
+ * field is set.
+ */
+ uint8_t retry_scid_present;
+ /**
+ * :member:`preferred_address_present` is nonzero if
+ * :member:`preferred_address` is set.
+ */
+ uint8_t preferred_address_present;
+ /**
+ * :member:`stateless_reset_token` contains stateless reset token.
+ */
+ uint8_t stateless_reset_token[NGTCP2_STATELESS_RESET_TOKENLEN];
+ /**
+ * :member:`grease_quic_bit` is nonzero if sender supports "Greasing
+ * the QUIC Bit" extension. See :rfc:`9287`. Note that the local
+ * endpoint always enables greasing QUIC bit regardless of this
+ * field value.
+ */
+ uint8_t grease_quic_bit;
+ /**
+ * :member:`version_info` contains version_information field if
+ * :member:`version_info_present` is nonzero. Application should
+ * not specify this field.
+ */
+ ngtcp2_version_info version_info;
+ /**
+ * :member:`version_info_present` is nonzero if
+ * :member:`version_info` is set. Application should not specify
+ * this field.
+ */
+ uint8_t version_info_present;
+} ngtcp2_transport_params;
+
+/**
+ * @enum
+ *
+ * :type:`ngtcp2_pktns_id` defines packet number space identifier.
+ */
+typedef enum ngtcp2_pktns_id {
+ /**
+ * :enum:`NGTCP2_PKTNS_ID_INITIAL` is the Initial packet number
+ * space.
+ */
+ NGTCP2_PKTNS_ID_INITIAL,
+ /**
+ * :enum:`NGTCP2_PKTNS_ID_HANDSHAKE` is the Handshake packet number
+ * space.
+ */
+ NGTCP2_PKTNS_ID_HANDSHAKE,
+ /**
+ * :enum:`NGTCP2_PKTNS_ID_APPLICATION` is the Application data
+ * packet number space.
+ */
+ NGTCP2_PKTNS_ID_APPLICATION,
+ /**
+ * :enum:`NGTCP2_PKTNS_ID_MAX` is defined to get the number of
+ * packet number spaces.
+ */
+ NGTCP2_PKTNS_ID_MAX
+} ngtcp2_pktns_id;
+
+#define NGTCP2_CONN_STAT_VERSION_V1 1
+#define NGTCP2_CONN_STAT_VERSION NGTCP2_CONN_STAT_VERSION_V1
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_conn_stat` holds various connection statistics, and
+ * computed data for recovery and congestion controller.
+ */
+typedef struct ngtcp2_conn_stat {
+ /**
+ * :member:`latest_rtt` is the latest RTT sample which is not
+ * adjusted by acknowledgement delay.
+ */
+ ngtcp2_duration latest_rtt;
+ /**
+ * :member:`min_rtt` is the minimum RTT seen so far. It is not
+ * adjusted by acknowledgement delay.
+ */
+ ngtcp2_duration min_rtt;
+ /**
+ * :member:`smoothed_rtt` is the smoothed RTT.
+ */
+ ngtcp2_duration smoothed_rtt;
+ /**
+ * :member:`rttvar` is a mean deviation of observed RTT.
+ */
+ ngtcp2_duration rttvar;
+ /**
+ * :member:`initial_rtt` is the initial RTT which is used when no
+ * RTT sample is available.
+ */
+ ngtcp2_duration initial_rtt;
+ /**
+ * :member:`first_rtt_sample_ts` is the timestamp when the first RTT
+ * sample is obtained.
+ */
+ ngtcp2_tstamp first_rtt_sample_ts;
+ /**
+ * :member:`pto_count` is the count of successive PTO timer
+ * expiration.
+ */
+ size_t pto_count;
+ /**
+ * :member:`loss_detection_timer` is the deadline of the current
+ * loss detection timer.
+ */
+ ngtcp2_tstamp loss_detection_timer;
+ /**
+ * :member:`last_tx_pkt_ts` corresponds to
+ * time_of_last_ack_eliciting_packet in :rfc:`9002`.
+ */
+ ngtcp2_tstamp last_tx_pkt_ts[NGTCP2_PKTNS_ID_MAX];
+ /**
+ * :member:`loss_time` corresponds to loss_time in :rfc:`9002`.
+ */
+ ngtcp2_tstamp loss_time[NGTCP2_PKTNS_ID_MAX];
+ /**
+ * :member:`cwnd` is the size of congestion window.
+ */
+ uint64_t cwnd;
+ /**
+ * :member:`ssthresh` is slow start threshold.
+ */
+ uint64_t ssthresh;
+ /**
+ * :member:`congestion_recovery_start_ts` is the timestamp when
+ * congestion recovery started.
+ */
+ ngtcp2_tstamp congestion_recovery_start_ts;
+ /**
+ * :member:`bytes_in_flight` is the number in bytes of all sent
+ * packets which have not been acknowledged.
+ */
+ uint64_t bytes_in_flight;
+ /**
+ * :member:`max_tx_udp_payload_size` is the maximum size of UDP
+ * datagram payload that this endpoint transmits. It is used by
+ * congestion controller to compute congestion window.
+ */
+ size_t max_tx_udp_payload_size;
+ /**
+ * :member:`delivery_rate_sec` is the current sending rate measured
+ * in byte per second.
+ */
+ uint64_t delivery_rate_sec;
+ /**
+ * :member:`pacing_rate` is the current packet sending rate computed
+ * by a congestion controller. 0 if a congestion controller does
+ * not set pacing rate. Even if this value is set to 0, the library
+ * paces packets.
+ */
+ double pacing_rate;
+ /**
+ * :member:`send_quantum` is the maximum size of a data aggregate
+ * scheduled and transmitted together.
+ */
+ size_t send_quantum;
+} ngtcp2_conn_stat;
+
+/**
+ * @enum
+ *
+ * :type:`ngtcp2_cc_algo` defines congestion control algorithms.
+ */
+typedef enum ngtcp2_cc_algo {
+ /**
+ * :enum:`NGTCP2_CC_ALGO_RENO` represents Reno.
+ */
+ NGTCP2_CC_ALGO_RENO = 0x00,
+ /**
+ * :enum:`NGTCP2_CC_ALGO_CUBIC` represents Cubic.
+ */
+ NGTCP2_CC_ALGO_CUBIC = 0x01,
+ /**
+ * :enum:`NGTCP2_CC_ALGO_BBR` represents BBR.
+ */
+ NGTCP2_CC_ALGO_BBR = 0x02,
+ /**
+ * :enum:`NGTCP2_CC_ALGO_BBR2` represents BBR v2.
+ */
+ NGTCP2_CC_ALGO_BBR2 = 0x03
+} ngtcp2_cc_algo;
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_printf` is a callback function for logging.
+ * |user_data| is the same object passed to `ngtcp2_conn_client_new`
+ * or `ngtcp2_conn_server_new`.
+ */
+typedef void (*ngtcp2_printf)(void *user_data, const char *format, ...);
+
+/**
+ * @macrosection
+ *
+ * QLog related macros
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_QLOG_WRITE_FLAG_NONE` indicates no flag set.
+ */
+#define NGTCP2_QLOG_WRITE_FLAG_NONE 0x00u
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_QLOG_WRITE_FLAG_FIN` indicates that this is the
+ * final call to :type:`ngtcp2_qlog_write` in the current connection.
+ */
+#define NGTCP2_QLOG_WRITE_FLAG_FIN 0x01u
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_rand_ctx` is a wrapper around native random number
+ * generator. It is opaque to the ngtcp2 library. This might be
+ * useful if application needs to specify random number generator per
+ * thread or per connection.
+ */
+typedef struct ngtcp2_rand_ctx {
+ /**
+ * :member:`native_handle` is a pointer to an underlying random
+ * number generator.
+ */
+ void *native_handle;
+} ngtcp2_rand_ctx;
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_qlog_write` is a callback function which is called to
+ * write qlog |data| of length |datalen| bytes. |flags| is bitwise OR
+ * of zero or more of :macro:`NGTCP2_QLOG_WRITE_FLAG_*
+ * <NGTCP2_QLOG_WRITE_FLAG_NONE>`. If
+ * :macro:`NGTCP2_QLOG_WRITE_FLAG_FIN` is set, |datalen| may be 0.
+ */
+typedef void (*ngtcp2_qlog_write)(void *user_data, uint32_t flags,
+ const void *data, size_t datalen);
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_qlog_settings` is a set of settings for qlog.
+ */
+typedef struct ngtcp2_qlog_settings {
+ /**
+ * :member:`odcid` is Original Destination Connection ID sent by
+ * client. It is used as group_id and ODCID fields. Client ignores
+ * this field and uses dcid parameter passed to
+ * `ngtcp2_conn_client_new()`.
+ */
+ ngtcp2_cid odcid;
+ /**
+ * :member:`write` is a callback function to write qlog. Setting
+ * ``NULL`` disables qlog.
+ */
+ ngtcp2_qlog_write write;
+} ngtcp2_qlog_settings;
+
+#define NGTCP2_SETTINGS_VERSION_V1 1
+#define NGTCP2_SETTINGS_VERSION NGTCP2_SETTINGS_VERSION_V1
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_settings` defines QUIC connection settings.
+ */
+typedef struct ngtcp2_settings {
+ /**
+ * :member:`qlog` is qlog settings.
+ */
+ ngtcp2_qlog_settings qlog;
+ /**
+ * :member:`cc_algo` specifies congestion control algorithm.
+ */
+ ngtcp2_cc_algo cc_algo;
+ /**
+ * :member:`initial_ts` is an initial timestamp given to the
+ * library.
+ */
+ ngtcp2_tstamp initial_ts;
+ /**
+ * :member:`initial_rtt` is an initial RTT.
+ */
+ ngtcp2_duration initial_rtt;
+ /**
+ * :member:`log_printf` is a function that the library uses to write
+ * logs. ``NULL`` means no logging output. It is nothing to do
+ * with qlog.
+ */
+ ngtcp2_printf log_printf;
+ /**
+ * :member:`max_tx_udp_payload_size` is the maximum size of UDP
+ * datagram payload that this endpoint transmits. It is used by
+ * congestion controller to compute congestion window.
+ */
+ size_t max_tx_udp_payload_size;
+ /**
+ * :member:`token` is a token from Retry packet or NEW_TOKEN frame.
+ *
+ * Server sets this field if it received the token in Client Initial
+ * packet and successfully validated.
+ *
+ * Client sets this field if it intends to send token in its Initial
+ * packet.
+ *
+ * `ngtcp2_conn_server_new` and `ngtcp2_conn_client_new` make a copy
+ * of token.
+ */
+ const uint8_t *token;
+ /**
+ * :member:`tokenlen` is the length of :member:`token`.
+ */
+ size_t tokenlen;
+ /**
+ * :member:`rand_ctx` is an optional random number generator to be
+ * passed to :type:`ngtcp2_rand` callback.
+ */
+ ngtcp2_rand_ctx rand_ctx;
+ /**
+ * :member:`max_window` is the maximum connection-level flow control
+ * window if connection-level window auto-tuning is enabled. The
+ * connection-level window auto tuning is enabled if nonzero value
+ * is specified in this field. The initial value of window size is
+ * :member:`ngtcp2_transport_params.initial_max_data`. The window
+ * size is scaled up to the value specified in this field.
+ */
+ uint64_t max_window;
+ /**
+ * :member:`max_stream_window` is the maximum stream-level flow
+ * control window if stream-level window auto-tuning is enabled.
+ * The stream-level window auto-tuning is enabled if nonzero value
+ * is specified in this field. The initial value of window size is
+ * :member:`ngtcp2_transport_params.initial_max_stream_data_bidi_remote`,
+ * :member:`ngtcp2_transport_params.initial_max_stream_data_bidi_local`,
+ * or :member:`ngtcp2_transport_params.initial_max_stream_data_uni`,
+ * depending on the type of stream. The window size is scaled up to
+ * the value specified in this field.
+ */
+ uint64_t max_stream_window;
+ /**
+ * :member:`ack_thresh` is the minimum number of the received ACK
+ * eliciting packets that triggers the immediate acknowledgement.
+ */
+ size_t ack_thresh;
+ /**
+ * :member:`no_tx_udp_payload_size_shaping`, if set to nonzero,
+ * instructs the library not to limit the UDP payload size to
+ * :macro:`NGTCP2_MAX_UDP_PAYLOAD_SIZE` (which can be extended by
+ * Path MTU Discovery) and instead use the mininum size among the
+ * given buffer size, :member:`max_tx_udp_payload_size`, and the
+ * received max_udp_payload QUIC transport parameter.
+ */
+ int no_tx_udp_payload_size_shaping;
+ /**
+ * :member:`handshake_timeout` is the period of time before giving
+ * up QUIC connection establishment. If QUIC handshake is not
+ * complete within this period, `ngtcp2_conn_handle_expiry` returns
+ * :macro:`NGTCP2_ERR_HANDSHAKE_TIMEOUT` error. The deadline is
+ * :member:`initial_ts` + :member:`handshake_timeout`. If this
+ * field is set to ``UINT64_MAX``, no handshake timeout is set.
+ */
+ ngtcp2_duration handshake_timeout;
+ /**
+ * :member:`preferred_versions` is the array of versions that are
+ * preferred by the local endpoint. All versions set in this array
+ * must be supported by the library, and compatible to QUIC v1. The
+ * reserved versions are not allowed. They are sorted in the order
+ * of preference.
+ *
+ * On compatible version negotiation, server will negotiate one of
+ * those versions contained in this array if there is some overlap
+ * between these versions and the versions offered by the client.
+ * If there is no overlap, but the client chosen version is
+ * supported by the library, the server chooses the client chosen
+ * version as the negotiated version. This version set corresponds
+ * to Offered Versions in QUIC Version Negotiation draft, and it
+ * should be included in Version Negotiation packet.
+ *
+ * Client uses this field and :member:`original_version` to prevent
+ * version downgrade attack if it reacted upon Version Negotiation
+ * packet. If this field is specified, client must include
+ * |client_chosen_version| passed to `ngtcp2_conn_client_new` unless
+ * |client_chosen_version| is a reserved version.
+ */
+ const uint32_t *preferred_versions;
+ /**
+ * :member:`preferred_versionslen` is the number of versions that
+ * are contained in the array pointed by
+ * :member:`preferred_versions`.
+ */
+ size_t preferred_versionslen;
+ /**
+ * :member:`available_versions` is the array of versions that are
+ * going to be set in :member:`available_versions
+ * <ngtcp2_version_info.available_versions>` field of outgoing
+ * version_information QUIC transport parameter.
+ *
+ * For server, this corresponds to Fully-Deployed Versions in QUIC
+ * Version Negotiation draft. If this field is set not, it is set
+ * to :member:`preferred_versions` internally if
+ * :member:`preferred_versionslen` is not zero. If this field is
+ * not set, and :member:`preferred_versionslen` is zero, this field
+ * is set to :macro:`NGTCP2_PROTO_VER_V1` internally.
+ *
+ * Client must include |client_chosen_version| passed to
+ * `ngtcp2_conn_client_new` in this array if this field is set and
+ * |client_chosen_version| is not a reserved version. If this field
+ * is not set, |client_chosen_version| passed to
+ * `ngtcp2_conn_client_new` will be set in this field internally
+ * unless |client_chosen_version| is a reserved version.
+ */
+ const uint32_t *available_versions;
+ /**
+ * :member:`available_versionslen` is the number of versions that
+ * are contained in the array pointed by
+ * :member:`available_versions`.
+ */
+ size_t available_versionslen;
+ /**
+ * :member:`original_version` is the original version that client
+ * initially used to make a connection attempt. If it is set, and
+ * it differs from |client_chosen_version| passed to
+ * `ngtcp2_conn_client_new`, the library assumes that client reacted
+ * upon Version Negotiation packet. Server does not use this field.
+ */
+ uint32_t original_version;
+ /**
+ * :member:`no_pmtud`, if set to nonzero, disables Path MTU
+ * Discovery.
+ */
+ int no_pmtud;
+} ngtcp2_settings;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_addr` is the endpoint address.
+ */
+typedef struct ngtcp2_addr {
+ /**
+ * :member:`addr` points to the buffer which contains endpoint
+ * address. It must not be ``NULL``.
+ */
+ ngtcp2_sockaddr *addr;
+ /**
+ * :member:`addrlen` is the length of addr.
+ */
+ ngtcp2_socklen addrlen;
+} ngtcp2_addr;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_path` is the network endpoints where a packet is sent
+ * and received.
+ */
+typedef struct ngtcp2_path {
+ /**
+ * :member:`local` is the address of local endpoint.
+ */
+ ngtcp2_addr local;
+ /**
+ * :member:`remote` is the address of remote endpoint.
+ */
+ ngtcp2_addr remote;
+ /**
+ * :member:`user_data` is an arbitrary data and opaque to the
+ * library.
+ *
+ * Note that :type:`ngtcp2_path` is generally passed to
+ * :type:`ngtcp2_conn` by an application, and :type:`ngtcp2_conn`
+ * stores their copies. Unfortunately, there is no way for the
+ * application to know when :type:`ngtcp2_conn` finishes using a
+ * specific :type:`ngtcp2_path` object in mid connection, which
+ * means that the application cannot free the data pointed by this
+ * field. Therefore, it is advised to use this field only when the
+ * data pointed by this field persists in an entire lifetime of the
+ * connection.
+ */
+ void *user_data;
+} ngtcp2_path;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_path_storage` is a convenient struct to have buffers
+ * to store the longest addresses.
+ */
+typedef struct ngtcp2_path_storage {
+ /**
+ * :member:`path` stores network path.
+ */
+ ngtcp2_path path;
+ /**
+ * :member:`local_addrbuf` is a buffer to store local address.
+ */
+ ngtcp2_sockaddr_union local_addrbuf;
+ /**
+ * :member:`remote_addrbuf` is a buffer to store remote address.
+ */
+ ngtcp2_sockaddr_union remote_addrbuf;
+} ngtcp2_path_storage;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_crypto_md` is a wrapper around native message digest
+ * object.
+ */
+typedef struct ngtcp2_crypto_md {
+ /**
+ * :member:`native_handle` is a pointer to an underlying message
+ * digest object.
+ */
+ void *native_handle;
+} ngtcp2_crypto_md;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_crypto_aead` is a wrapper around native AEAD object.
+ */
+typedef struct ngtcp2_crypto_aead {
+ /**
+ * :member:`native_handle` is a pointer to an underlying AEAD
+ * object.
+ */
+ void *native_handle;
+ /**
+ * :member:`max_overhead` is the number of additional bytes which
+ * AEAD encryption needs on encryption.
+ */
+ size_t max_overhead;
+} ngtcp2_crypto_aead;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_crypto_cipher` is a wrapper around native cipher
+ * object.
+ */
+typedef struct ngtcp2_crypto_cipher {
+ /**
+ * :member:`native_handle` is a pointer to an underlying cipher
+ * object.
+ */
+ void *native_handle;
+} ngtcp2_crypto_cipher;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_crypto_aead_ctx` is a wrapper around native AEAD
+ * cipher context object. It should be initialized with a specific
+ * key. ngtcp2 library reuses this context object to encrypt or
+ * decrypt multiple packets.
+ */
+typedef struct ngtcp2_crypto_aead_ctx {
+ /**
+ * :member:`native_handle` is a pointer to an underlying AEAD
+ * context object.
+ */
+ void *native_handle;
+} ngtcp2_crypto_aead_ctx;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_crypto_cipher_ctx` is a wrapper around native cipher
+ * context object. It should be initialized with a specific key.
+ * ngtcp2 library reuses this context object to encrypt or decrypt
+ * multiple packet headers.
+ */
+typedef struct ngtcp2_crypto_cipher_ctx {
+ /**
+ * :member:`native_handle` is a pointer to an underlying cipher
+ * context object.
+ */
+ void *native_handle;
+} ngtcp2_crypto_cipher_ctx;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_crypto_ctx` is a convenient structure to bind all
+ * crypto related objects in one place. Use
+ * `ngtcp2_crypto_ctx_initial` to initialize this struct for Initial
+ * packet encryption. For Handshake and 1RTT packets, use
+ * `ngtcp2_crypto_ctx_tls`.
+ */
+typedef struct ngtcp2_crypto_ctx {
+ /**
+ * :member:`aead` is AEAD object.
+ */
+ ngtcp2_crypto_aead aead;
+ /**
+ * :member:`md` is message digest object.
+ */
+ ngtcp2_crypto_md md;
+ /**
+ * :member:`hp` is header protection cipher.
+ */
+ ngtcp2_crypto_cipher hp;
+ /**
+ * :member:`max_encryption` is the number of encryption which this
+ * key can be used with.
+ */
+ uint64_t max_encryption;
+ /**
+ * :member:`max_decryption_failure` is the number of decryption
+ * failure with this key.
+ */
+ uint64_t max_decryption_failure;
+} ngtcp2_crypto_ctx;
+
+/**
+ * @function
+ *
+ * `ngtcp2_encode_transport_params` encodes |params| in |dest| of
+ * length |destlen|.
+ *
+ * If |dest| is NULL, and |destlen| is zero, this function just
+ * returns the number of bytes required to store the encoded transport
+ * parameters.
+ *
+ * This function returns the number of written, or one of the
+ * following negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOBUF`
+ * Buffer is too small.
+ * :macro:`NGTCP2_ERR_INVALID_ARGUMENT`
+ * |exttype| is invalid.
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_encode_transport_params_versioned(
+ uint8_t *dest, size_t destlen, ngtcp2_transport_params_type exttype,
+ int transport_params_version, const ngtcp2_transport_params *params);
+
+/**
+ * @function
+ *
+ * `ngtcp2_decode_transport_params` decodes transport parameters in
+ * |data| of length |datalen|, and stores the result in the object
+ * pointed by |params|.
+ *
+ * If the optional parameters are missing, the default value is
+ * assigned.
+ *
+ * The following fields may point to somewhere inside the buffer
+ * pointed by |data| of length |datalen|:
+ *
+ * - :member:`ngtcp2_transport_params.version_info.available_versions
+ * <ngtcp2_version_info.available_versions>`
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM`
+ * The required parameter is missing.
+ * :macro:`NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM`
+ * The input is malformed.
+ */
+NGTCP2_EXTERN int ngtcp2_decode_transport_params_versioned(
+ int transport_params_version, ngtcp2_transport_params *params,
+ ngtcp2_transport_params_type exttype, const uint8_t *data, size_t datalen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_decode_transport_params_new` decodes transport parameters
+ * in |data| of length |datalen|, and stores the result in the object
+ * allocated dynamically. The pointer to the allocated object is
+ * assigned to |*pparams|. Unlike `ngtcp2_decode_transport_params`,
+ * all direct and indirect fields are also allocated dynamically if
+ * needed.
+ *
+ * |mem| is a memory allocator to allocate memory. If |mem| is
+ * ``NULL``, the memory allocator returned by `ngtcp2_mem_default()`
+ * is used.
+ *
+ * If the optional parameters are missing, the default value is
+ * assigned.
+ *
+ * `ngtcp2_transport_params_del` frees the memory allocated by this
+ * function.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM`
+ * The required parameter is missing.
+ * :macro:`NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM`
+ * The input is malformed.
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory.
+ */
+NGTCP2_EXTERN int ngtcp2_decode_transport_params_new(
+ ngtcp2_transport_params **pparams, ngtcp2_transport_params_type exttype,
+ const uint8_t *data, size_t datalen, const ngtcp2_mem *mem);
+
+/**
+ * @function
+ *
+ * `ngtcp2_transport_params_del` frees the |params| which must be
+ * dynamically allocated by `ngtcp2_decode_transport_params_new`.
+ *
+ * |mem| is a memory allocator that allocated |params|. If |mem| is
+ * ``NULL``, the memory allocator returned by `ngtcp2_mem_default()`
+ * is used.
+ *
+ * If |params| is ``NULL``, this function does nothing.
+ */
+NGTCP2_EXTERN void ngtcp2_transport_params_del(ngtcp2_transport_params *params,
+ const ngtcp2_mem *mem);
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_version_cid` is a convenient struct to store the
+ * result of `ngtcp2_pkt_decode_version_cid`.
+ */
+typedef struct ngtcp2_version_cid {
+ /**
+ * :member:`version` stores QUIC version.
+ */
+ uint32_t version;
+ /**
+ * :member:`dcid` points to the Destination Connection ID.
+ */
+ const uint8_t *dcid;
+ /**
+ * :member:`dcidlen` is the length of the Destination Connection ID
+ * pointed by :member:`dcid`.
+ */
+ size_t dcidlen;
+ /**
+ * :member:`scid` points to the Source Connection ID.
+ */
+ const uint8_t *scid;
+ /**
+ * :member:`scidlen` is the length of the Source Connection ID
+ * pointed by :member:`scid`.
+ */
+ size_t scidlen;
+} ngtcp2_version_cid;
+
+/**
+ * @function
+ *
+ * `ngtcp2_pkt_decode_version_cid` extracts QUIC version, Destination
+ * Connection ID and Source Connection ID from the packet pointed by
+ * |data| of length |datalen|. This function can handle Connection ID
+ * up to 255 bytes unlike `ngtcp2_pkt_decode_hd_long` or
+ * `ngtcp2_pkt_decode_hd_short` which are only capable of handling
+ * Connection ID less than or equal to :macro:`NGTCP2_MAX_CIDLEN`.
+ * Longer Connection ID is only valid if the version is unsupported
+ * QUIC version.
+ *
+ * If the given packet is Long header packet, this function extracts
+ * the version from the packet and assigns it to
+ * :member:`dest->version <ngtcp2_version_cid.version>`. It also
+ * extracts the pointer to the Destination Connection ID and its
+ * length and assigns them to :member:`dest->dcid
+ * <ngtcp2_version_cid.dcid>` and :member:`dest->dcidlen
+ * <ngtcp2_version_cid.dcidlen>` respectively. Similarly, it extracts
+ * the pointer to the Source Connection ID and its length and assigns
+ * them to :member:`dest->scid <ngtcp2_version_cid.scid>` and
+ * :member:`dest->scidlen <ngtcp2_version_cid.scidlen>` respectively.
+ *
+ * If the given packet is Short header packet, :member:`dest->version
+ * <ngtcp2_version_cid.version>` will be 0, :member:`dest->scid
+ * <ngtcp2_version_cid.scid>` will be ``NULL``, and
+ * :member:`dest->scidlen <ngtcp2_version_cid.scidlen>` will be 0.
+ * Because the Short header packet does not have the length of
+ * Destination Connection ID, the caller has to pass the length in
+ * |short_dcidlen|. This function extracts the pointer to the
+ * Destination Connection ID and assigns it to :member:`dest->dcid
+ * <ngtcp2_version_cid.dcid>`. |short_dcidlen| is assigned to
+ * :member:`dest->dcidlen <ngtcp2_version_cid.dcidlen>`.
+ *
+ * If Version Negotiation is required, this function returns
+ * :macro:`NGTCP2_ERR_VERSION_NEGOTIATION`. Unlike the other error
+ * cases, all fields of |dest| are assigned as described above.
+ *
+ * This function returns 0 if it succeeds. Otherwise, one of the
+ * following negative error code:
+ *
+ * :macro:`NGTCP2_ERR_INVALID_ARGUMENT`
+ * The function could not decode the packet header.
+ * :macro:`NGTCP2_ERR_VERSION_NEGOTIATION`
+ * Version Negotiation packet should be sent.
+ */
+NGTCP2_EXTERN int ngtcp2_pkt_decode_version_cid(ngtcp2_version_cid *dest,
+ const uint8_t *data,
+ size_t datalen,
+ size_t short_dcidlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_pkt_decode_hd_long` decodes QUIC long packet header in
+ * |pkt| of length |pktlen|. This function only parses the input just
+ * before packet number field.
+ *
+ * This function does not verify that length field is correct. In
+ * other words, this function succeeds even if length > |pktlen|.
+ *
+ * This function can handle Connection ID up to
+ * :macro:`NGTCP2_MAX_CIDLEN`. Consider to use
+ * `ngtcp2_pkt_decode_version_cid` to get longer Connection ID.
+ *
+ * This function handles Version Negotiation specially. If version
+ * field is 0, |pkt| must contain Version Negotiation packet. Version
+ * Negotiation packet has random type in wire format. For
+ * convenience, this function sets
+ * :enum:`ngtcp2_pkt_type.NGTCP2_PKT_VERSION_NEGOTIATION` to
+ * :member:`dest->type <ngtcp2_pkt_hd.type>`, clears
+ * :macro:`NGTCP2_PKT_FLAG_LONG_FORM` flag from :member:`dest->flags
+ * <ngtcp2_pkt_hd.flags>`, and sets 0 to :member:`dest->len
+ * <ngtcp2_pkt_hd.len>`. Version Negotiation packet occupies a single
+ * packet.
+ *
+ * It stores the result in the object pointed by |dest|, and returns
+ * the number of bytes decoded to read the packet header if it
+ * succeeds, or one of the following error codes:
+ *
+ * :macro:`NGTCP2_ERR_INVALID_ARGUMENT`
+ * Packet is too short; or it is not a long header
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_decode_hd_long(ngtcp2_pkt_hd *dest,
+ const uint8_t *pkt,
+ size_t pktlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_pkt_decode_hd_short` decodes QUIC short header packet
+ * header in |pkt| of length |pktlen|. |dcidlen| is the length of
+ * DCID in packet header. Short header packet does not encode the
+ * length of connection ID, thus we need the input from the outside.
+ * This function only parses the input just before packet number
+ * field. This function can handle Connection ID up to
+ * :macro:`NGTCP2_MAX_CIDLEN`. Consider to use
+ * `ngtcp2_pkt_decode_version_cid` to get longer Connection ID. It
+ * stores the result in the object pointed by |dest|, and returns the
+ * number of bytes decoded to read the packet header if it succeeds,
+ * or one of the following error codes:
+ *
+ * :macro:`NGTCP2_ERR_INVALID_ARGUMENT`
+ * Packet is too short; or it is not a short header
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_decode_hd_short(ngtcp2_pkt_hd *dest,
+ const uint8_t *pkt,
+ size_t pktlen,
+ size_t dcidlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_pkt_write_stateless_reset` writes Stateless Reset packet in
+ * the buffer pointed by |dest| whose length is |destlen|.
+ * |stateless_reset_token| is a pointer to the Stateless Reset Token,
+ * and its length must be :macro:`NGTCP2_STATELESS_RESET_TOKENLEN`
+ * bytes long. |rand| specifies the random octets preceding Stateless
+ * Reset Token. The length of |rand| is specified by |randlen| which
+ * must be at least :macro:`NGTCP2_MIN_STATELESS_RESET_RANDLEN` bytes
+ * long.
+ *
+ * If |randlen| is too long to write them all in the buffer, |rand| is
+ * written to the buffer as much as possible, and is truncated.
+ *
+ * This function returns the number of bytes written to the buffer, or
+ * one of the following negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOBUF`
+ * Buffer is too small.
+ * :macro:`NGTCP2_ERR_INVALID_ARGUMENT`
+ * |randlen| is strictly less than
+ * :macro:`NGTCP2_MIN_STATELESS_RESET_RANDLEN`.
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_write_stateless_reset(
+ uint8_t *dest, size_t destlen, const uint8_t *stateless_reset_token,
+ const uint8_t *rand, size_t randlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_pkt_write_version_negotiation` writes Version Negotiation
+ * packet in the buffer pointed by |dest| whose length is |destlen|.
+ * |unused_random| should be generated randomly. |dcid| is the
+ * destination connection ID which appears in a packet as a source
+ * connection ID sent by client which caused version negotiation.
+ * Similarly, |scid| is the source connection ID which appears in a
+ * packet as a destination connection ID sent by client. |sv| is a
+ * list of supported versions, and |nsv| specifies the number of
+ * supported versions included in |sv|.
+ *
+ * This function returns the number of bytes written to the buffer, or
+ * one of the following negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOBUF`
+ * Buffer is too small.
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_write_version_negotiation(
+ uint8_t *dest, size_t destlen, uint8_t unused_random, const uint8_t *dcid,
+ size_t dcidlen, const uint8_t *scid, size_t scidlen, const uint32_t *sv,
+ size_t nsv);
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_conn` represents a single QUIC connection.
+ */
+typedef struct ngtcp2_conn ngtcp2_conn;
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_client_initial` is invoked when client application
+ * asks TLS stack to produce first TLS cryptographic handshake data.
+ *
+ * This implementation of this callback must get the first handshake
+ * data from TLS stack and pass it to ngtcp2 library using
+ * `ngtcp2_conn_submit_crypto_data` function. Make sure that before
+ * calling `ngtcp2_conn_submit_crypto_data` function, client
+ * application must create initial packet protection keys and IVs, and
+ * provide them to ngtcp2 library using
+ * `ngtcp2_conn_install_initial_key`.
+ *
+ * This callback function must return 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library call
+ * return immediately.
+ */
+typedef int (*ngtcp2_client_initial)(ngtcp2_conn *conn, void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_recv_client_initial` is invoked when server receives
+ * Initial packet from client. An server application must implement
+ * this callback, and generate initial keys and IVs for both
+ * transmission and reception. Install them using
+ * `ngtcp2_conn_install_initial_key`. |dcid| is the destination
+ * connection ID which client generated randomly. It is used to
+ * derive initial packet protection keys.
+ *
+ * The callback function must return 0 if it succeeds. If an error
+ * occurs, return :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the
+ * library call return immediately.
+ */
+typedef int (*ngtcp2_recv_client_initial)(ngtcp2_conn *conn,
+ const ngtcp2_cid *dcid,
+ void *user_data);
+
+/**
+ * @enum
+ *
+ * :type:`ngtcp2_crypto_level` is encryption level.
+ */
+typedef enum ngtcp2_crypto_level {
+ /**
+ * :enum:`NGTCP2_CRYPTO_LEVEL_INITIAL` is Initial Keys encryption
+ * level.
+ */
+ NGTCP2_CRYPTO_LEVEL_INITIAL,
+ /**
+ * :enum:`NGTCP2_CRYPTO_LEVEL_HANDSHAKE` is Handshake Keys
+ * encryption level.
+ */
+ NGTCP2_CRYPTO_LEVEL_HANDSHAKE,
+ /**
+ * :enum:`NGTCP2_CRYPTO_LEVEL_APPLICATION` is Application Data
+ * (1-RTT) Keys encryption level.
+ */
+ NGTCP2_CRYPTO_LEVEL_APPLICATION,
+ /**
+ * :enum:`NGTCP2_CRYPTO_LEVEL_EARLY` is Early Data (0-RTT) Keys
+ * encryption level.
+ */
+ NGTCP2_CRYPTO_LEVEL_EARLY
+} ngtcp2_crypto_level;
+
+/**
+ * @functypedef
+ *
+ * :type`ngtcp2_recv_crypto_data` is invoked when crypto data is
+ * received. The received data is pointed to by |data|, and its
+ * length is |datalen|. The |offset| specifies the offset where
+ * |data| is positioned. |user_data| is the arbitrary pointer passed
+ * to `ngtcp2_conn_client_new` or `ngtcp2_conn_server_new`. The
+ * ngtcp2 library ensures that the crypto data is passed to the
+ * application in the increasing order of |offset|. |datalen| is
+ * always strictly greater than 0. |crypto_level| indicates the
+ * encryption level where this data is received. Crypto data can
+ * never be received in
+ * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_EARLY`.
+ *
+ * The application should provide the given data to TLS stack.
+ *
+ * The callback function must return 0 if it succeeds, or one of the
+ * following negative error codes:
+ *
+ * - :macro:`NGTCP2_ERR_CRYPTO`
+ * - :macro:`NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM`
+ * - :macro:`NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM`
+ * - :macro:`NGTCP2_ERR_TRANSPORT_PARAM`
+ * - :macro:`NGTCP2_ERR_PROTO`
+ * - :macro:`NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE`
+ * - :macro:`NGTCP2_ERR_NOMEM`
+ * - :macro:`NGTCP2_ERR_CALLBACK_FAILURE`
+ *
+ * If the other value is returned, it is treated as
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`.
+ *
+ * If application encounters fatal error, return
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library call
+ * return immediately.
+ */
+typedef int (*ngtcp2_recv_crypto_data)(ngtcp2_conn *conn,
+ ngtcp2_crypto_level crypto_level,
+ uint64_t offset, const uint8_t *data,
+ size_t datalen, void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_handshake_completed` is invoked when QUIC
+ * cryptographic handshake has completed.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_handshake_completed)(ngtcp2_conn *conn, void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_handshake_confirmed` is invoked when QUIC
+ * cryptographic handshake is confirmed. The handshake confirmation
+ * means that both endpoints agree that handshake has finished.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_handshake_confirmed)(ngtcp2_conn *conn, void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_recv_version_negotiation` is invoked when Version
+ * Negotiation packet is received. |hd| is the pointer to the QUIC
+ * packet header object. The vector |sv| of |nsv| elements contains
+ * the QUIC version the server supports. Since Version Negotiation is
+ * only sent by server, this callback function is used by client only.
+ *
+ * The callback function must return 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library call
+ * return immediately.
+ */
+typedef int (*ngtcp2_recv_version_negotiation)(ngtcp2_conn *conn,
+ const ngtcp2_pkt_hd *hd,
+ const uint32_t *sv, size_t nsv,
+ void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_recv_retry` is invoked when Retry packet is received.
+ * This callback is client use only.
+ *
+ * Application must regenerate packet protection key, IV, and header
+ * protection key for Initial packets using the destination connection
+ * ID obtained by :member:`hd->scid <ngtcp2_pkt_hd.scid>` and install
+ * them by calling `ngtcp2_conn_install_initial_key()`.
+ *
+ * 0-RTT data accepted by the ngtcp2 library will be automatically
+ * retransmitted as 0-RTT data by the library.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_recv_retry)(ngtcp2_conn *conn, const ngtcp2_pkt_hd *hd,
+ void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_encrypt` is invoked when the ngtcp2 library asks the
+ * application to encrypt packet payload. The packet payload to
+ * encrypt is passed as |plaintext| of length |plaintextlen|. The
+ * AEAD cipher is |aead|. |aead_ctx| is the AEAD cipher context
+ * object which is initialized with encryption key. The nonce is
+ * passed as |nonce| of length |noncelen|. The Additional
+ * Authenticated Data is passed as |aad| of length |aadlen|.
+ *
+ * The implementation of this callback must encrypt |plaintext| using
+ * the negotiated cipher suite and write the ciphertext into the
+ * buffer pointed by |dest|. |dest| has enough capacity to store the
+ * ciphertext and any additional AEAD tag data.
+ *
+ * |dest| and |plaintext| may point to the same buffer.
+ *
+ * The callback function must return 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library call
+ * return immediately.
+ */
+typedef int (*ngtcp2_encrypt)(uint8_t *dest, const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *plaintext, size_t plaintextlen,
+ const uint8_t *nonce, size_t noncelen,
+ const uint8_t *aad, size_t aadlen);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_decrypt` is invoked when the ngtcp2 library asks the
+ * application to decrypt packet payload. The packet payload to
+ * decrypt is passed as |ciphertext| of length |ciphertextlen|. The
+ * AEAD cipher is |aead|. |aead_ctx| is the AEAD cipher context
+ * object which is initialized with decryption key. The nonce is
+ * passed as |nonce| of length |noncelen|. The Additional
+ * Authenticated Data is passed as |aad| of length |aadlen|.
+ *
+ * The implementation of this callback must decrypt |ciphertext| using
+ * the negotiated cipher suite and write the ciphertext into the
+ * buffer pointed by |dest|. |dest| has enough capacity to store the
+ * cleartext.
+ *
+ * |dest| and |ciphertext| may point to the same buffer.
+ *
+ * The callback function must return 0 if it succeeds. If TLS stack
+ * fails to decrypt data, return :macro:`NGTCP2_ERR_DECRYPT`. For any
+ * other errors, return :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which
+ * makes the library call return immediately.
+ */
+typedef int (*ngtcp2_decrypt)(uint8_t *dest, const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *ciphertext, size_t ciphertextlen,
+ const uint8_t *nonce, size_t noncelen,
+ const uint8_t *aad, size_t aadlen);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_hp_mask` is invoked when the ngtcp2 library asks the
+ * application to produce a mask to encrypt or decrypt packet header.
+ * The encryption cipher is |hp|. |hp_ctx| is the cipher context
+ * object which is initialized with header protection key. The sample
+ * is passed as |sample| which is :macro:`NGTCP2_HP_SAMPLELEN` bytes
+ * long.
+ *
+ * The implementation of this callback must produce a mask using the
+ * header protection cipher suite specified by QUIC specification and
+ * write the result into the buffer pointed by |dest|. The length of
+ * the mask must be at least :macro:`NGTCP2_HP_MASKLEN`. The library
+ * only uses the first :macro:`NGTCP2_HP_MASKLEN` bytes of the
+ * produced mask. The buffer pointed by |dest| is guaranteed to have
+ * at least :macro:`NGTCP2_HP_SAMPLELEN` bytes available for
+ * convenience.
+ *
+ * The callback function must return 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library call
+ * return immediately.
+ */
+typedef int (*ngtcp2_hp_mask)(uint8_t *dest, const ngtcp2_crypto_cipher *hp,
+ const ngtcp2_crypto_cipher_ctx *hp_ctx,
+ const uint8_t *sample);
+
+/**
+ * @macrosection
+ *
+ * Stream data flags
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_STREAM_DATA_FLAG_NONE` indicates no flag set.
+ */
+#define NGTCP2_STREAM_DATA_FLAG_NONE 0x00u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_STREAM_DATA_FLAG_FIN` indicates that this chunk of
+ * data is final piece of an incoming stream.
+ */
+#define NGTCP2_STREAM_DATA_FLAG_FIN 0x01u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_STREAM_DATA_FLAG_EARLY` indicates that this chunk of
+ * data contains data received in 0RTT packet and the handshake has
+ * not completed yet, which means that the data might be replayed.
+ */
+#define NGTCP2_STREAM_DATA_FLAG_EARLY 0x02u
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_recv_stream_data` is invoked when stream data is
+ * received. The stream is specified by |stream_id|. |flags| is the
+ * bitwise-OR of zero or more of :macro:`NGTCP2_STREAM_DATA_FLAG_*
+ * <NGTCP2_STREAM_DATA_FLAG_NONE>`. If |flags| &
+ * :macro:`NGTCP2_STREAM_DATA_FLAG_FIN` is nonzero, this portion of
+ * the data is the last data in this stream. |offset| is the offset
+ * where this data begins. The library ensures that data is passed to
+ * the application in the non-decreasing order of |offset| without any
+ * overlap. The data is passed as |data| of length |datalen|.
+ * |datalen| may be 0 if and only if |fin| is nonzero.
+ *
+ * If :macro:`NGTCP2_STREAM_DATA_FLAG_EARLY` is set in |flags|, it
+ * indicates that a part of or whole data was received in 0RTT packet
+ * and a handshake has not completed yet.
+ *
+ * The callback function must return 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library return
+ * immediately.
+ */
+typedef int (*ngtcp2_recv_stream_data)(ngtcp2_conn *conn, uint32_t flags,
+ int64_t stream_id, uint64_t offset,
+ const uint8_t *data, size_t datalen,
+ void *user_data, void *stream_user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_stream_open` is a callback function which is called
+ * when remote stream is opened by peer. This function is not called
+ * if stream is opened by implicitly (we might reconsider this
+ * behaviour).
+ *
+ * The implementation of this callback should return 0 if it succeeds.
+ * Returning :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library
+ * call return immediately.
+ */
+typedef int (*ngtcp2_stream_open)(ngtcp2_conn *conn, int64_t stream_id,
+ void *user_data);
+
+/**
+ * @macrosection
+ *
+ * Stream close flags
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_STREAM_CLOSE_FLAG_NONE` indicates no flag set.
+ */
+#define NGTCP2_STREAM_CLOSE_FLAG_NONE 0x00u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET` indicates that
+ * app_error_code parameter is set.
+ */
+#define NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET 0x01u
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_stream_close` is invoked when a stream is closed.
+ * This callback is not called when QUIC connection is closed before
+ * existing streams are closed. |flags| is the bitwise-OR of zero or
+ * more of :macro:`NGTCP2_STREAM_CLOSE_FLAG_*
+ * <NGTCP2_STREAM_CLOSE_FLAG_NONE>`. |app_error_code| indicates the
+ * error code of this closure if
+ * :macro:`NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET` is set in
+ * |flags|. If it is not set, the stream was closed without any error
+ * code, which generally means success.
+ *
+ * |app_error_code| is the first application error code sent by a
+ * local endpoint, or received from a remote endpoint. If a stream is
+ * closed cleanly, no application error code is exchanged. Since QUIC
+ * stack does not know the application error code which indicates "no
+ * errors", |app_error_code| is set to 0 and
+ * :macro:`NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET` is not set in
+ * |flags| in this case.
+ *
+ * The implementation of this callback should return 0 if it succeeds.
+ * Returning :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library
+ * call return immediately.
+ */
+typedef int (*ngtcp2_stream_close)(ngtcp2_conn *conn, uint32_t flags,
+ int64_t stream_id, uint64_t app_error_code,
+ void *user_data, void *stream_user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_stream_reset` is invoked when a stream identified by
+ * |stream_id| is reset by a remote endpoint.
+ *
+ * The implementation of this callback should return 0 if it succeeds.
+ * Returning :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library
+ * call return immediately.
+ */
+typedef int (*ngtcp2_stream_reset)(ngtcp2_conn *conn, int64_t stream_id,
+ uint64_t final_size, uint64_t app_error_code,
+ void *user_data, void *stream_user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_acked_stream_data_offset` is a callback function
+ * which is called when stream data is acked, and application can free
+ * the data. The acked range of data is [offset, offset + datalen).
+ * For a given stream_id, this callback is called sequentially in
+ * increasing order of |offset| without any overlap. |datalen| is
+ * normally strictly greater than 0. One exception is that when a
+ * packet which includes STREAM frame which has fin flag set, and 0
+ * length data, this callback is invoked with 0 passed as |datalen|.
+ *
+ * If a stream is closed prematurely and stream data is still
+ * in-flight, this callback function is not called for those data.
+ *
+ * The implementation of this callback should return 0 if it succeeds.
+ * Returning :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library
+ * call return immediately.
+ */
+typedef int (*ngtcp2_acked_stream_data_offset)(
+ ngtcp2_conn *conn, int64_t stream_id, uint64_t offset, uint64_t datalen,
+ void *user_data, void *stream_user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_recv_stateless_reset` is a callback function which is
+ * called when Stateless Reset packet is received. The stateless
+ * reset details are given in |sr|.
+ *
+ * The implementation of this callback should return 0 if it succeeds.
+ * Returning :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library
+ * call return immediately.
+ */
+typedef int (*ngtcp2_recv_stateless_reset)(ngtcp2_conn *conn,
+ const ngtcp2_pkt_stateless_reset *sr,
+ void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_extend_max_streams` is a callback function which is
+ * called every time max stream ID is strictly extended.
+ * |max_streams| is the cumulative number of streams which an endpoint
+ * can open.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_extend_max_streams)(ngtcp2_conn *conn,
+ uint64_t max_streams, void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_extend_max_stream_data` is a callback function which
+ * is invoked when max stream data is extended. |stream_id|
+ * identifies the stream. |max_data| is a cumulative number of bytes
+ * the endpoint can send on this stream.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_extend_max_stream_data)(ngtcp2_conn *conn,
+ int64_t stream_id,
+ uint64_t max_data, void *user_data,
+ void *stream_user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_rand` is a callback function to get randomized byte
+ * string from application. Application must fill random |destlen|
+ * bytes to the buffer pointed by |dest|. The generated bytes are
+ * used only in non-cryptographic context.
+ */
+typedef void (*ngtcp2_rand)(uint8_t *dest, size_t destlen,
+ const ngtcp2_rand_ctx *rand_ctx);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_get_new_connection_id` is a callback function to ask
+ * an application for new connection ID. Application must generate
+ * new unused connection ID with the exact |cidlen| bytes and store it
+ * in |cid|. It also has to generate stateless reset token into
+ * |token|. The length of stateless reset token is
+ * :macro:`NGTCP2_STATELESS_RESET_TOKENLEN` and it is guaranteed that
+ * the buffer pointed by |cid| has the sufficient space to store the
+ * token.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_get_new_connection_id)(ngtcp2_conn *conn, ngtcp2_cid *cid,
+ uint8_t *token, size_t cidlen,
+ void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_remove_connection_id` is a callback function which
+ * notifies the application that connection ID |cid| is no longer used
+ * by remote endpoint.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_remove_connection_id)(ngtcp2_conn *conn,
+ const ngtcp2_cid *cid,
+ void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_update_key` is a callback function which tells the
+ * application that it must generate new packet protection keying
+ * materials and AEAD cipher context objects with new keys. The
+ * current set of secrets are given as |current_rx_secret| and
+ * |current_tx_secret| of length |secretlen|. They are decryption and
+ * encryption secrets respectively.
+ *
+ * The application has to generate new secrets and keys for both
+ * encryption and decryption, and write decryption secret and IV to
+ * the buffer pointed by |rx_secret| and |rx_iv| respectively. It
+ * also has to create new AEAD cipher context object with new
+ * decryption key and initialize |rx_aead_ctx| with it. Similarly,
+ * write encryption secret and IV to the buffer pointed by |tx_secret|
+ * and |tx_iv|. Create new AEAD cipher context object with new
+ * encryption key and initialize |tx_aead_ctx| with it. All given
+ * buffers have the enough capacity to store secret, key and IV.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_update_key)(
+ ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret,
+ ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_iv,
+ ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_iv,
+ const uint8_t *current_rx_secret, const uint8_t *current_tx_secret,
+ size_t secretlen, void *user_data);
+
+/**
+ * @macrosection
+ *
+ * Path validation related macros
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_PATH_VALIDATION_FLAG_NONE` indicates no flag set.
+ */
+#define NGTCP2_PATH_VALIDATION_FLAG_NONE 0x00u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_PATH_VALIDATION_FLAG_PREFERRED_ADDR` indicates the
+ * validation involving server preferred address. This flag is only
+ * set for client.
+ */
+#define NGTCP2_PATH_VALIDATION_FLAG_PREFERRED_ADDR 0x01u
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_path_validation` is a callback function which tells
+ * the application the outcome of path validation. |flags| is zero or
+ * more of :macro:`NGTCP2_PATH_VALIDATION_FLAG_*
+ * <NGTCP2_PATH_VALIDATION_FLAG_NONE>`. |path| is the path that was
+ * validated. If |res| is
+ * :enum:`ngtcp2_path_validation_result.NGTCP2_PATH_VALIDATION_RESULT_SUCCESS`,
+ * the path validation succeeded. If |res| is
+ * :enum:`ngtcp2_path_validation_result.NGTCP2_PATH_VALIDATION_RESULT_FAILURE`,
+ * the path validation failed.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_path_validation)(ngtcp2_conn *conn, uint32_t flags,
+ const ngtcp2_path *path,
+ ngtcp2_path_validation_result res,
+ void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_select_preferred_addr` is a callback function which
+ * asks a client application to choose server address from preferred
+ * addresses |paddr| received from server. An application should
+ * write a network path for a selected preferred address in |dest|.
+ * More specifically, the selected preferred address must be set to
+ * :member:`dest->remote <ngtcp2_path.remote>`, a client source
+ * address must be set to :member:`dest->local <ngtcp2_path.local>`.
+ * If a client source address does not change for the new server
+ * address, leave :member:`dest->local <ngtcp2_path.local>`
+ * unmodified, or copy the value of :member:`local
+ * <ngtcp2_path.local>` field of the current network path obtained
+ * from `ngtcp2_conn_get_path()`. Both :member:`dest->local.addr
+ * <ngtcp2_addr.addr>` and :member:`dest->remote.addr
+ * <ngtcp2_addr.addr>` point to buffers which are at least
+ * ``sizeof(struct sockaddr_storage)`` bytes long, respectively. If
+ * an application denies the preferred addresses, just leave |dest|
+ * unmodified (or set :member:`dest->remote.addrlen
+ * <ngtcp2_addr.addrlen>` to 0) and return 0.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_select_preferred_addr)(ngtcp2_conn *conn,
+ ngtcp2_path *dest,
+ const ngtcp2_preferred_addr *paddr,
+ void *user_data);
+
+/**
+ * @enum
+ *
+ * :type:`ngtcp2_connection_id_status_type` defines a set of status
+ * for Destination Connection ID.
+ */
+typedef enum ngtcp2_connection_id_status_type {
+ /**
+ * :enum:`NGTCP2_CONNECTION_ID_STATUS_TYPE_ACTIVATE` indicates that
+ * a local endpoint starts using new destination Connection ID.
+ */
+ NGTCP2_CONNECTION_ID_STATUS_TYPE_ACTIVATE,
+ /**
+ * :enum:`NGTCP2_CONNECTION_ID_STATUS_TYPE_DEACTIVATE` indicates
+ * that a local endpoint stops using a given destination Connection
+ * ID.
+ */
+ NGTCP2_CONNECTION_ID_STATUS_TYPE_DEACTIVATE
+} ngtcp2_connection_id_status_type;
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_connection_id_status` is a callback function which is
+ * called when the status of Connection ID changes.
+ *
+ * |token| is the associated stateless reset token and it is ``NULL``
+ * if no token is present.
+ *
+ * |type| is the one of the value defined in
+ * :type:`ngtcp2_connection_id_status_type`. The new value might be
+ * added in the future release.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_connection_id_status)(ngtcp2_conn *conn, int type,
+ uint64_t seq, const ngtcp2_cid *cid,
+ const uint8_t *token,
+ void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_recv_new_token` is a callback function which is
+ * called when new token is received from server.
+ *
+ * |token| is the received token of length |tokenlen| bytes long.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_recv_new_token)(ngtcp2_conn *conn, const uint8_t *token,
+ size_t tokenlen, void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_delete_crypto_aead_ctx` is a callback function which
+ * must delete the native object pointed by
+ * :member:`aead_ctx->native_handle
+ * <ngtcp2_crypto_aead_ctx.native_handle>`.
+ */
+typedef void (*ngtcp2_delete_crypto_aead_ctx)(ngtcp2_conn *conn,
+ ngtcp2_crypto_aead_ctx *aead_ctx,
+ void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_delete_crypto_cipher_ctx` is a callback function
+ * which must delete the native object pointed by
+ * :member:`cipher_ctx->native_handle
+ * <ngtcp2_crypto_cipher_ctx.native_handle>`.
+ */
+typedef void (*ngtcp2_delete_crypto_cipher_ctx)(
+ ngtcp2_conn *conn, ngtcp2_crypto_cipher_ctx *cipher_ctx, void *user_data);
+
+/**
+ * @macrosection
+ *
+ * Datagram flags
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_DATAGRAM_FLAG_NONE` indicates no flag set.
+ */
+#define NGTCP2_DATAGRAM_FLAG_NONE 0x00u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_DATAGRAM_FLAG_EARLY` indicates that DATAGRAM frame
+ * is received in 0RTT packet and the handshake has not completed yet,
+ * which means that the data might be replayed.
+ */
+#define NGTCP2_DATAGRAM_FLAG_EARLY 0x01u
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_recv_datagram` is invoked when DATAGRAM frame is
+ * received. |flags| is bitwise-OR of zero or more of
+ * :macro:`NGTCP2_DATAGRAM_FLAG_* <NGTCP2_DATAGRAM_FLAG_NONE>`.
+ *
+ * If :macro:`NGTCP2_DATAGRAM_FLAG_EARLY` is set in |flags|, it
+ * indicates that DATAGRAM frame was received in 0RTT packet and a
+ * handshake has not completed yet.
+ *
+ * The callback function must return 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library return
+ * immediately.
+ */
+typedef int (*ngtcp2_recv_datagram)(ngtcp2_conn *conn, uint32_t flags,
+ const uint8_t *data, size_t datalen,
+ void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_ack_datagram` is invoked when a packet which contains
+ * DATAGRAM frame which is identified by |dgram_id| is acknowledged.
+ * |dgram_id| is the valued passed to `ngtcp2_conn_writev_datagram`.
+ *
+ * The callback function must return 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library return
+ * immediately.
+ */
+typedef int (*ngtcp2_ack_datagram)(ngtcp2_conn *conn, uint64_t dgram_id,
+ void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_lost_datagram` is invoked when a packet which
+ * contains DATAGRAM frame which is identified by |dgram_id| is
+ * declared lost. |dgram_id| is the valued passed to
+ * `ngtcp2_conn_writev_datagram`. Note that the loss might be
+ * spurious, and DATAGRAM frame might be acknowledged later.
+ *
+ * The callback function must return 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` which makes the library return
+ * immediately.
+ */
+typedef int (*ngtcp2_lost_datagram)(ngtcp2_conn *conn, uint64_t dgram_id,
+ void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_get_path_challenge_data` is a callback function to
+ * ask an application for new data that is sent in PATH_CHALLENGE
+ * frame. Application must generate new unpredictable exactly
+ * :macro:`NGTCP2_PATH_CHALLENGE_DATALEN` bytes of random data and
+ * store them into the buffer pointed by |data|.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_get_path_challenge_data)(ngtcp2_conn *conn, uint8_t *data,
+ void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_stream_stop_sending` is invoked when a stream is no
+ * longer read by a local endpoint before it receives all stream data.
+ * This function is called at most once per stream. |app_error_code|
+ * is the error code passed to `ngtcp2_conn_shutdown_stream_read` or
+ * `ngtcp2_conn_shutdown_stream`.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_stream_stop_sending)(ngtcp2_conn *conn, int64_t stream_id,
+ uint64_t app_error_code,
+ void *user_data,
+ void *stream_user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_version_negotiation` is invoked when the compatible
+ * version negotiation takes place. For client, it is called when it
+ * sees a change in version field of a long header packet. This
+ * callback function might be called multiple times for client. For
+ * server, it is called once when the version is negotiated.
+ *
+ * The implementation of this callback must install new Initial keys
+ * for |version|. Use `ngtcp2_conn_install_vneg_initial_key` to
+ * install keys.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_version_negotiation)(ngtcp2_conn *conn, uint32_t version,
+ const ngtcp2_cid *client_dcid,
+ void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_recv_key` is invoked when new key is installed to
+ * |conn| during QUIC cryptographic handshake.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_recv_key)(ngtcp2_conn *conn, ngtcp2_crypto_level level,
+ void *user_data);
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_early_data_rejected` is invoked when early data was
+ * rejected by server, or client decided not to attempt early data.
+ *
+ * The callback function must return 0 if it succeeds. Returning
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE` makes the library call return
+ * immediately.
+ */
+typedef int (*ngtcp2_early_data_rejected)(ngtcp2_conn *conn, void *user_data);
+
+#define NGTCP2_CALLBACKS_VERSION_V1 1
+#define NGTCP2_CALLBACKS_VERSION NGTCP2_CALLBACKS_VERSION_V1
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_callbacks` holds a set of callback functions.
+ */
+typedef struct ngtcp2_callbacks {
+ /**
+ * :member:`client_initial` is a callback function which is invoked
+ * when client asks TLS stack to produce first TLS cryptographic
+ * handshake message. This callback function must be specified for
+ * a client application.
+ */
+ ngtcp2_client_initial client_initial;
+ /**
+ * :member:`recv_client_initial` is a callback function which is
+ * invoked when a server receives the first packet from client.
+ * This callback function must be specified for a server application.
+ */
+ ngtcp2_recv_client_initial recv_client_initial;
+ /**
+ * :member:`recv_crypto_data` is a callback function which is
+ * invoked when cryptographic data (CRYPTO frame, in other words,
+ * TLS message) is received. This callback function must be
+ * specified.
+ */
+ ngtcp2_recv_crypto_data recv_crypto_data;
+ /**
+ * :member:`handshake_completed` is a callback function which is
+ * invoked when QUIC cryptographic handshake has completed. This
+ * callback function is optional.
+ */
+ ngtcp2_handshake_completed handshake_completed;
+ /**
+ * :member:`recv_version_negotiation` is a callback function which
+ * is invoked when Version Negotiation packet is received by a
+ * client. This callback function is optional.
+ */
+ ngtcp2_recv_version_negotiation recv_version_negotiation;
+ /**
+ * :member:`encrypt` is a callback function which is invoked to
+ * encrypt a QUIC packet. This callback function must be specified.
+ */
+ ngtcp2_encrypt encrypt;
+ /**
+ * :member:`decrypt` is a callback function which is invoked to
+ * decrypt a QUIC packet. This callback function must be specified.
+ */
+ ngtcp2_decrypt decrypt;
+ /**
+ * :member:`hp_mask` is a callback function which is invoked to get
+ * a mask to encrypt or decrypt packet header. This callback
+ * function must be specified.
+ */
+ ngtcp2_hp_mask hp_mask;
+ /**
+ * :member:`recv_stream_data` is a callback function which is
+ * invoked when STREAM data, which includes application data, is
+ * received. This callback function is optional.
+ */
+ ngtcp2_recv_stream_data recv_stream_data;
+ /**
+ * :member:`acked_stream_data_offset` is a callback function which
+ * is invoked when STREAM data, which includes application data, is
+ * acknowledged by a remote endpoint. It tells an application the
+ * largest offset of acknowledged STREAM data without a gap so that
+ * application can free memory for the data. This callback function
+ * is optional.
+ */
+ ngtcp2_acked_stream_data_offset acked_stream_data_offset;
+ /**
+ * :member:`stream_open` is a callback function which is invoked
+ * when new remote stream is opened by a remote endpoint. This
+ * callback function is optional.
+ */
+ ngtcp2_stream_open stream_open;
+ /**
+ * :member:`stream_close` is a callback function which is invoked
+ * when a stream is closed. This callback function is optional.
+ */
+ ngtcp2_stream_close stream_close;
+ /**
+ * :member:`recv_stateless_reset` is a callback function which is
+ * invoked when Stateless Reset packet is received. This callback
+ * function is optional.
+ */
+ ngtcp2_recv_stateless_reset recv_stateless_reset;
+ /**
+ * :member:`recv_retry` is a callback function which is invoked when
+ * a client receives Retry packet. For client, this callback
+ * function must be specified. Server never receive Retry packet.
+ */
+ ngtcp2_recv_retry recv_retry;
+ /**
+ * :member:`extend_max_local_streams_bidi` is a callback function
+ * which is invoked when the number of bidirectional stream which a
+ * local endpoint can open is increased. This callback function is
+ * optional.
+ */
+ ngtcp2_extend_max_streams extend_max_local_streams_bidi;
+ /**
+ * :member:`extend_max_local_streams_uni` is a callback function
+ * which is invoked when the number of unidirectional stream which a
+ * local endpoint can open is increased. This callback function is
+ * optional.
+ */
+ ngtcp2_extend_max_streams extend_max_local_streams_uni;
+ /**
+ * :member:`rand` is a callback function which is invoked when the
+ * library needs sequence of random data. This callback function
+ * must be specified.
+ */
+ ngtcp2_rand rand;
+ /**
+ * :member:`get_new_connection_id` is a callback function which is
+ * invoked when the library needs new connection ID. This callback
+ * function must be specified.
+ */
+ ngtcp2_get_new_connection_id get_new_connection_id;
+ /**
+ * :member:`remove_connection_id` is a callback function which
+ * notifies an application that connection ID is no longer used by a
+ * remote endpoint. This callback function is optional.
+ */
+ ngtcp2_remove_connection_id remove_connection_id;
+ /**
+ * :member:`update_key` is a callback function which is invoked when
+ * the library tells an application that it must update keying
+ * materials and install new keys. This callback function must be
+ * specified.
+ */
+ ngtcp2_update_key update_key;
+ /**
+ * :member:`path_validation` is a callback function which is invoked
+ * when path validation completed. This callback function is
+ * optional.
+ */
+ ngtcp2_path_validation path_validation;
+ /**
+ * :member:`select_preferred_addr` is a callback function which is
+ * invoked when the library asks a client to select preferred
+ * address presented by a server. This callback function is
+ * optional.
+ */
+ ngtcp2_select_preferred_addr select_preferred_addr;
+ /**
+ * :member:`stream_reset` is a callback function which is invoked
+ * when a stream is reset by a remote endpoint. This callback
+ * function is optional.
+ */
+ ngtcp2_stream_reset stream_reset;
+ /**
+ * :member:`extend_max_remote_streams_bidi` is a callback function
+ * which is invoked when the number of bidirectional streams which a
+ * remote endpoint can open is increased. This callback function is
+ * optional.
+ */
+ ngtcp2_extend_max_streams extend_max_remote_streams_bidi;
+ /**
+ * :member:`extend_max_remote_streams_uni` is a callback function
+ * which is invoked when the number of unidirectional streams which
+ * a remote endpoint can open is increased. This callback function
+ * is optional.
+ */
+ ngtcp2_extend_max_streams extend_max_remote_streams_uni;
+ /**
+ * :member:`extend_max_stream_data` is callback function which is
+ * invoked when the maximum offset of STREAM data that a local
+ * endpoint can send is increased. This callback function is
+ * optional.
+ */
+ ngtcp2_extend_max_stream_data extend_max_stream_data;
+ /**
+ * :member:`dcid_status` is a callback function which is invoked
+ * when the new destination Connection ID is activated or the
+ * activated destination Connection ID is now deactivated. This
+ * callback function is optional.
+ */
+ ngtcp2_connection_id_status dcid_status;
+ /**
+ * :member:`handshake_confirmed` is a callback function which is
+ * invoked when both endpoints agree that handshake has finished.
+ * This field is ignored by server because handshake_completed
+ * indicates the handshake confirmation for server. This callback
+ * function is optional.
+ */
+ ngtcp2_handshake_confirmed handshake_confirmed;
+ /**
+ * :member:`recv_new_token` is a callback function which is invoked
+ * when new token is received from server. This field is ignored by
+ * server. This callback function is optional.
+ */
+ ngtcp2_recv_new_token recv_new_token;
+ /**
+ * :member:`delete_crypto_aead_ctx` is a callback function which
+ * deletes a given AEAD cipher context object. This callback
+ * function must be specified.
+ */
+ ngtcp2_delete_crypto_aead_ctx delete_crypto_aead_ctx;
+ /**
+ * :member:`delete_crypto_cipher_ctx` is a callback function which
+ * deletes a given cipher context object. This callback function
+ * must be specified.
+ */
+ ngtcp2_delete_crypto_cipher_ctx delete_crypto_cipher_ctx;
+ /**
+ * :member:`recv_datagram` is a callback function which is invoked
+ * when DATAGRAM frame is received. This callback function is
+ * optional.
+ */
+ ngtcp2_recv_datagram recv_datagram;
+ /**
+ * :member:`ack_datagram` is a callback function which is invoked
+ * when a packet containing DATAGRAM frame is acknowledged. This
+ * callback function is optional.
+ */
+ ngtcp2_ack_datagram ack_datagram;
+ /**
+ * :member:`lost_datagram` is a callback function which is invoked
+ * when a packet containing DATAGRAM frame is declared lost. This
+ * callback function is optional.
+ */
+ ngtcp2_lost_datagram lost_datagram;
+ /**
+ * :member:`get_path_challenge_data` is a callback function which is
+ * invoked when the library needs new PATH_CHALLENGE data. This
+ * callback must be specified.
+ */
+ ngtcp2_get_path_challenge_data get_path_challenge_data;
+ /**
+ * :member:`stream_stop_sending` is a callback function which is
+ * invoked when a local endpoint no longer reads from a stream
+ * before it receives all stream data. This callback function is
+ * optional.
+ */
+ ngtcp2_stream_stop_sending stream_stop_sending;
+ /**
+ * :member:`version_negotiation` is a callback function which is
+ * invoked when the compatible version negotiation takes place.
+ * This callback function must be specified.
+ */
+ ngtcp2_version_negotiation version_negotiation;
+ /**
+ * :member:`recv_rx_key` is a callback function which is invoked
+ * when a new key for decrypting packets is installed during QUIC
+ * cryptographic handshake. It is not called for
+ * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_INITIAL`.
+ */
+ ngtcp2_recv_key recv_rx_key;
+ /**
+ * :member:`recv_tx_key` is a callback function which is invoked
+ * when a new key for encrypting packets is installed during QUIC
+ * cryptographic handshake. It is not called for
+ * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_INITIAL`.
+ */
+ ngtcp2_recv_key recv_tx_key;
+ /**
+ * :member:`ngtcp2_early_data_rejected` is a callback function which
+ * is invoked when an attempt to send early data by client was
+ * rejected by server, or client decided not to attempt early data.
+ * This callback function is only used by client.
+ */
+ ngtcp2_early_data_rejected early_data_rejected;
+} ngtcp2_callbacks;
+
+/**
+ * @function
+ *
+ * `ngtcp2_pkt_write_connection_close` writes Initial packet
+ * containing CONNECTION_CLOSE frame with the given |error_code| and
+ * the optional |reason| of length |reasonlen| to the buffer pointed
+ * by |dest| of length |destlen|. All encryption parameters are for
+ * Initial packet encryption. The packet number is always 0.
+ *
+ * The primary use case of this function is for server to send
+ * CONNECTION_CLOSE frame in Initial packet to close connection
+ * without committing the state when validating Retry token fails.
+ *
+ * This function returns the number of bytes written if it succeeds,
+ * or one of the following negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOBUF`
+ * Buffer is too small.
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`
+ * Callback function failed.
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_write_connection_close(
+ uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid,
+ const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason,
+ size_t reasonlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv,
+ ngtcp2_hp_mask hp_mask, const ngtcp2_crypto_cipher *hp,
+ const ngtcp2_crypto_cipher_ctx *hp_ctx);
+
+/**
+ * @function
+ *
+ * `ngtcp2_pkt_write_retry` writes Retry packet in the buffer pointed
+ * by |dest| whose length is |destlen|. |dcid| is the destination
+ * connection ID which appeared in a packet as a source connection ID
+ * sent by client. |scid| is a server chosen source connection ID.
+ * |odcid| specifies Original Destination Connection ID which appeared
+ * in a packet as a destination connection ID sent by client. |token|
+ * specifies Retry Token, and |tokenlen| specifies its length. |aead|
+ * must be AEAD_AES_128_GCM. |aead_ctx| must be initialized with
+ * :macro:`NGTCP2_RETRY_KEY` as an encryption key.
+ *
+ * This function returns the number of bytes written to the buffer, or
+ * one of the following negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOBUF`
+ * Buffer is too small.
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`
+ * Callback function failed.
+ * :macro:`NGTCP2_ERR_INVALID_ARGUMENT`
+ * :member:`odcid->datalen <ngtcp2_cid.datalen>` is less than
+ * :macro:`NGTCP2_MIN_INITIAL_DCIDLEN`.
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_pkt_write_retry(
+ uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid,
+ const ngtcp2_cid *scid, const ngtcp2_cid *odcid, const uint8_t *token,
+ size_t tokenlen, ngtcp2_encrypt encrypt, const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx);
+
+/**
+ * @function
+ *
+ * `ngtcp2_accept` is used by server implementation, and decides
+ * whether packet |pkt| of length |pktlen| from client is acceptable
+ * for the very initial packet to a connection.
+ *
+ * If |dest| is not ``NULL`` and the function returns 0, or
+ * :macro:`NGTCP2_ERR_RETRY`, the decoded packet header is stored to
+ * the object pointed by |dest|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_RETRY`
+ * Retry packet should be sent.
+ * :macro:`NGTCP2_ERR_INVALID_ARGUMENT`
+ * The packet is not acceptable for the very first packet to a new
+ * connection; or the function failed to parse the packet header.
+ */
+NGTCP2_EXTERN int ngtcp2_accept(ngtcp2_pkt_hd *dest, const uint8_t *pkt,
+ size_t pktlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_client_new` creates new :type:`ngtcp2_conn`, and
+ * initializes it as client. |dcid| is randomized destination
+ * connection ID. |scid| is source connection ID.
+ * |client_chosen_version| is a QUIC version that a client chooses.
+ * |path| is the network path where this QUIC connection is being
+ * established and must not be ``NULL``. |callbacks|, |settings|, and
+ * |params| must not be ``NULL``, and the function make a copy of each
+ * of them. |params| is local QUIC transport parameters and sent to a
+ * remote endpoint during handshake. |user_data| is the arbitrary
+ * pointer which is passed to the user-defined callback functions. If
+ * |mem| is ``NULL``, the memory allocator returned by
+ * `ngtcp2_mem_default()` is used.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_client_new_versioned(
+ ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid,
+ const ngtcp2_path *path, uint32_t client_chosen_version,
+ int callbacks_version, const ngtcp2_callbacks *callbacks,
+ int settings_version, const ngtcp2_settings *settings,
+ int transport_params_version, const ngtcp2_transport_params *params,
+ const ngtcp2_mem *mem, void *user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_server_new` creates new :type:`ngtcp2_conn`, and
+ * initializes it as server. |dcid| is a destination connection ID.
+ * |scid| is a source connection ID. |path| is the network path where
+ * this QUIC connection is being established and must not be ``NULL``.
+ * |client_chosen_version| is a QUIC version that a client chooses.
+ * |callbacks|, |settings|, and |params| must not be ``NULL``, and the
+ * function make a copy of each of them. |params| is local QUIC
+ * transport parameters and sent to a remote endpoint during
+ * handshake. |user_data| is the arbitrary pointer which is passed to
+ * the user-defined callback functions. If |mem| is ``NULL``, the
+ * memory allocator returned by `ngtcp2_mem_default()` is used.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_server_new_versioned(
+ ngtcp2_conn **pconn, const ngtcp2_cid *dcid, const ngtcp2_cid *scid,
+ const ngtcp2_path *path, uint32_t client_chosen_version,
+ int callbacks_version, const ngtcp2_callbacks *callbacks,
+ int settings_version, const ngtcp2_settings *settings,
+ int transport_params_version, const ngtcp2_transport_params *params,
+ const ngtcp2_mem *mem, void *user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_del` frees resources allocated for |conn|. It also
+ * frees memory pointed by |conn|.
+ */
+NGTCP2_EXTERN void ngtcp2_conn_del(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_read_pkt` decrypts QUIC packet given in |pkt| of
+ * length |pktlen| and processes it. |path| is the network path the
+ * packet is delivered and must not be ``NULL``. |pi| is packet
+ * metadata and may be ``NULL``. This function performs QUIC handshake
+ * as well.
+ *
+ * This function must not be called from inside the callback
+ * functions.
+ *
+ * This function returns 0 if it succeeds, or negative error codes.
+ * If :macro:`NGTCP2_ERR_RETRY` is returned, application must be a
+ * server and it must perform address validation by sending Retry
+ * packet and discard the connection state. If
+ * :macro:`NGTCP2_ERR_DROP_CONN` is returned, server application must
+ * drop the connection silently (without sending any CONNECTION_CLOSE
+ * frame) and discard connection state. If
+ * :macro:`NGTCP2_ERR_DRAINING` is returned, a connection has entered
+ * the draining state, and no further packet transmission is allowed.
+ * If :macro:`NGTCP2_ERR_CRYPTO` is returned, the error happened in
+ * TLS stack and `ngtcp2_conn_get_tls_alert` returns TLS alert if set.
+ *
+ * If any other negative errors are returned, call
+ * `ngtcp2_conn_write_connection_close` to get terminal packet, and
+ * sending it makes QUIC connection enter the closing state.
+ */
+NGTCP2_EXTERN int
+ngtcp2_conn_read_pkt_versioned(ngtcp2_conn *conn, const ngtcp2_path *path,
+ int pkt_info_version, const ngtcp2_pkt_info *pi,
+ const uint8_t *pkt, size_t pktlen,
+ ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_write_pkt` is equivalent to calling
+ * `ngtcp2_conn_writev_stream` with -1 as stream_id, no stream data, and
+ * :macro:`NGTCP2_WRITE_STREAM_FLAG_NONE` as flags.
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_pkt_versioned(
+ ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version,
+ ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_handshake_completed` tells |conn| that the TLS stack
+ * declares TLS handshake completion. This does not mean QUIC
+ * handshake has completed. The library needs extra conditions to be
+ * met.
+ */
+NGTCP2_EXTERN void ngtcp2_conn_handshake_completed(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_handshake_completed` returns nonzero if QUIC handshake
+ * has completed.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_get_handshake_completed(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_install_initial_key` installs packet protection keying
+ * materials for Initial packets. |rx_aead_ctx| is AEAD cipher
+ * context object and must be initialized with a decryption key.
+ * |rx_iv| is IV of length |rx_ivlen| for decryption. |rx_hp_ctx| is
+ * a packet header protection cipher context object for decryption.
+ * Similarly, |tx_aead_ctx|, |tx_iv| and |tx_hp_ctx| are for
+ * encrypting outgoing packets and are the same length with the
+ * decryption counterpart . If they have already been set, they are
+ * overwritten.
+ *
+ * |ivlen| must be the minimum length of AEAD nonce, or 8 bytes if
+ * that is larger.
+ *
+ * If this function succeeds, |conn| takes ownership of |rx_aead_ctx|,
+ * |rx_hp_ctx|, |tx_aead_ctx|, and |tx_hp_ctx|.
+ * :type:`ngtcp2_delete_crypto_aead_ctx` and
+ * :type:`ngtcp2_delete_crypto_cipher_ctx` will be called to delete
+ * these objects when they are no longer used. If this function
+ * fails, the caller is responsible to delete them.
+ *
+ * After receiving Retry packet, the DCID most likely changes. In
+ * that case, client application must generate these keying materials
+ * again based on new DCID and install them again.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_install_initial_key(
+ ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *rx_aead_ctx,
+ const uint8_t *rx_iv, const ngtcp2_crypto_cipher_ctx *rx_hp_ctx,
+ const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv,
+ const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_install_vneg_initial_key` installs packet protection
+ * keying materials for Initial packets on compatible version
+ * negotiation for |version|. |rx_aead_ctx| is AEAD cipher context
+ * object and must be initialized with a decryption key. |rx_iv| is
+ * IV of length |rx_ivlen| for decryption. |rx_hp_ctx| is a packet
+ * header protection cipher context object for decryption. Similarly,
+ * |tx_aead_ctx|, |tx_iv| and |tx_hp_ctx| are for encrypting outgoing
+ * packets and are the same length with the decryption counterpart .
+ * If they have already been set, they are overwritten.
+ *
+ * |ivlen| must be the minimum length of AEAD nonce, or 8 bytes if
+ * that is larger.
+ *
+ * If this function succeeds, |conn| takes ownership of |rx_aead_ctx|,
+ * |rx_hp_ctx|, |tx_aead_ctx|, and |tx_hp_ctx|.
+ * :type:`ngtcp2_delete_crypto_aead_ctx` and
+ * :type:`ngtcp2_delete_crypto_cipher_ctx` will be called to delete
+ * these objects when they are no longer used. If this function
+ * fails, the caller is responsible to delete them.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_install_vneg_initial_key(
+ ngtcp2_conn *conn, uint32_t version,
+ const ngtcp2_crypto_aead_ctx *rx_aead_ctx, const uint8_t *rx_iv,
+ const ngtcp2_crypto_cipher_ctx *rx_hp_ctx,
+ const ngtcp2_crypto_aead_ctx *tx_aead_ctx, const uint8_t *tx_iv,
+ const ngtcp2_crypto_cipher_ctx *tx_hp_ctx, size_t ivlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_install_rx_handshake_key` installs packet protection
+ * keying materials for decrypting incoming Handshake packets.
+ * |aead_ctx| is AEAD cipher context object which must be initialized
+ * with a decryption key. |iv| is IV of length |ivlen|. |hp_ctx| is
+ * a packet header protection cipher context object.
+ *
+ * |ivlen| must be the minimum length of AEAD nonce, or 8 bytes if
+ * that is larger.
+ *
+ * If this function succeeds, |conn| takes ownership of |aead_ctx|,
+ * and |hp_ctx|. :type:`ngtcp2_delete_crypto_aead_ctx` and
+ * :type:`ngtcp2_delete_crypto_cipher_ctx` will be called to delete
+ * these objects when they are no longer used. If this function
+ * fails, the caller is responsible to delete them.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_install_rx_handshake_key(
+ ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_install_tx_handshake_key` installs packet protection
+ * keying materials for encrypting outgoing Handshake packets.
+ * |aead_ctx| is AEAD cipher context object which must be initialized
+ * with an encryption key. |iv| is IV of length |ivlen|. |hp_ctx| is
+ * a packet header protection cipher context object.
+ *
+ * |ivlen| must be the minimum length of AEAD nonce, or 8 bytes if
+ * that is larger.
+ *
+ * If this function succeeds, |conn| takes ownership of |aead_ctx| and
+ * |hp_ctx|. :type:`ngtcp2_delete_crypto_aead_ctx` and
+ * :type:`ngtcp2_delete_crypto_cipher_ctx` will be called to delete
+ * these objects when they are no longer used. If this function
+ * fails, the caller is responsible to delete them.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_install_tx_handshake_key(
+ ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_install_early_key` installs packet protection AEAD
+ * cipher context object |aead_ctx|, IV |iv| of length |ivlen|, and
+ * packet header protection cipher context object |hp_ctx| to encrypt
+ * (for client) or decrypt (for server) 0RTT packets.
+ *
+ * |ivlen| must be the minimum length of AEAD nonce, or 8 bytes if
+ * that is larger.
+ *
+ * If this function succeeds, |conn| takes ownership of |aead_ctx| and
+ * |hp_ctx|. :type:`ngtcp2_delete_crypto_aead_ctx` and
+ * :type:`ngtcp2_delete_crypto_cipher_ctx` will be called to delete
+ * these objects when they are no longer used. If this function
+ * fails, the caller is responsible to delete them.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_install_early_key(
+ ngtcp2_conn *conn, const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *iv, size_t ivlen, const ngtcp2_crypto_cipher_ctx *hp_ctx);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_install_rx_key` installs packet protection keying
+ * materials for decrypting Short header packets. |secret| of length
+ * |secretlen| is the decryption secret which is used to derive keying
+ * materials passed to this function. |aead_ctx| is AEAD cipher
+ * context object which must be initialized with a decryption key.
+ * |iv| is IV of length |ivlen|. |hp_ctx| is a packet header
+ * protection cipher context object.
+ *
+ * |ivlen| must be the minimum length of AEAD nonce, or 8 bytes if
+ * that is larger.
+ *
+ * If this function succeeds, |conn| takes ownership of |aead_ctx| and
+ * |hp_ctx|. :type:`ngtcp2_delete_crypto_aead_ctx` and
+ * :type:`ngtcp2_delete_crypto_cipher_ctx` will be called to delete
+ * these objects when they are no longer used. If this function
+ * fails, the caller is responsible to delete them.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_install_rx_key(
+ ngtcp2_conn *conn, const uint8_t *secret, size_t secretlen,
+ const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, size_t ivlen,
+ const ngtcp2_crypto_cipher_ctx *hp_ctx);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_install_tx_key` installs packet protection keying
+ * materials for encrypting Short header packets. |secret| of length
+ * |secretlen| is the encryption secret which is used to derive keying
+ * materials passed to this function. |aead_ctx| is AEAD cipher
+ * context object which must be initialized with an encryption key.
+ * |iv| is IV of length |ivlen|. |hp_ctx| is a packet header
+ * protection cipher context object.
+ *
+ * |ivlen| must be the minimum length of AEAD nonce, or 8 bytes if
+ * that is larger.
+ *
+ * If this function succeeds, |conn| takes ownership of |aead_ctx| and
+ * |hp_ctx|. :type:`ngtcp2_delete_crypto_aead_ctx` and
+ * :type:`ngtcp2_delete_crypto_cipher_ctx` will be called to delete
+ * these objects when they are no longer used. If this function
+ * fails, the caller is responsible to delete them.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_install_tx_key(
+ ngtcp2_conn *conn, const uint8_t *secret, size_t secretlen,
+ const ngtcp2_crypto_aead_ctx *aead_ctx, const uint8_t *iv, size_t ivlen,
+ const ngtcp2_crypto_cipher_ctx *hp_ctx);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_initiate_key_update` initiates the key update.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_INVALID_STATE`
+ * The previous key update has not been confirmed yet; or key
+ * update is too frequent; or new keys are not available yet.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_initiate_key_update(ngtcp2_conn *conn,
+ ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_set_tls_error` sets the TLS related error |liberr| in
+ * |conn|. |liberr| must be one of ngtcp2 library error codes (which
+ * is defined as NGTCP2_ERR_* macro, such as
+ * :macro:`NGTCP2_ERR_DECRYPT`). In general, error code should be
+ * propagated via return value, but sometimes ngtcp2 API is called
+ * inside callback function of TLS stack and it does not allow to
+ * return ngtcp2 error code directly. In this case, implementation
+ * can set the error code (e.g.,
+ * :macro:`NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM`) using this function.
+ */
+NGTCP2_EXTERN void ngtcp2_conn_set_tls_error(ngtcp2_conn *conn, int liberr);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_tls_error` returns the value set by
+ * `ngtcp2_conn_set_tls_error`. If no value is set, this function
+ * returns 0.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_get_tls_error(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_set_tls_alert` sets a TLS alert |alert| generated by a
+ * local endpoint to |conn|.
+ */
+NGTCP2_EXTERN void ngtcp2_conn_set_tls_alert(ngtcp2_conn *conn, uint8_t alert);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_tls_alert` returns the value set by
+ * `ngtcp2_conn_set_tls_alert`. If no value is set, this function
+ * returns 0.
+ */
+NGTCP2_EXTERN uint8_t ngtcp2_conn_get_tls_alert(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_set_keep_alive_timeout` sets keep-alive timeout. If
+ * nonzero value is given, after a connection is idle at least in a
+ * given amount of time, a keep-alive packet is sent. If 0 is set,
+ * keep-alive functionality is disabled and this is the default.
+ */
+NGTCP2_EXTERN void ngtcp2_conn_set_keep_alive_timeout(ngtcp2_conn *conn,
+ ngtcp2_duration timeout);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_expiry` returns the next expiry time. It returns
+ * ``UINT64_MAX`` if there is no next expiry.
+ *
+ * Call `ngtcp2_conn_handle_expiry()` and `ngtcp2_conn_write_pkt` (or
+ * `ngtcp2_conn_writev_stream`) if expiry time is passed.
+ */
+NGTCP2_EXTERN ngtcp2_tstamp ngtcp2_conn_get_expiry(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_handle_expiry` handles expired timer. It does nothing
+ * if timer is not expired.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_handle_expiry(ngtcp2_conn *conn,
+ ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_pto` returns Probe Timeout (PTO).
+ */
+NGTCP2_EXTERN ngtcp2_duration ngtcp2_conn_get_pto(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_decode_remote_transport_params` decodes QUIC transport
+ * parameters from the buffer pointed by |data| of length |datalen|,
+ * and sets the result to |conn|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_REQUIRED_TRANSPORT_PARAM`
+ * The required parameter is missing.
+ * :macro:`NGTCP2_ERR_MALFORMED_TRANSPORT_PARAM`
+ * The input is malformed.
+ * :macro:`NGTCP2_ERR_TRANSPORT_PARAM`
+ * Failed to validate the remote QUIC transport parameters.
+ * :macro:`NGTCP2_ERR_VERSION_NEGOTIATION_FAILURE`
+ * Version negotiation failure.
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`
+ * User callback failed
+ */
+NGTCP2_EXTERN int
+ngtcp2_conn_decode_remote_transport_params(ngtcp2_conn *conn,
+ const uint8_t *data, size_t datalen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_remote_transport_params` returns a pointer to the
+ * remote QUIC transport parameters. If no remote transport
+ * parameters are set, it returns NULL.
+ */
+NGTCP2_EXTERN const ngtcp2_transport_params *
+ngtcp2_conn_get_remote_transport_params(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_set_early_remote_transport_params` sets |params| as
+ * transport parameters previously received from a server. The
+ * parameters are used to send early data. QUIC requires that client
+ * application should remember transport parameters along with a
+ * session ticket.
+ *
+ * At least following fields should be set:
+ *
+ * - initial_max_stream_id_bidi
+ * - initial_max_stream_id_uni
+ * - initial_max_stream_data_bidi_local
+ * - initial_max_stream_data_bidi_remote
+ * - initial_max_stream_data_uni
+ * - initial_max_data
+ * - active_connection_id_limit
+ * - max_datagram_frame_size (if DATAGRAM extension was negotiated)
+ *
+ * The following fields are ignored:
+ *
+ * - ack_delay_exponent
+ * - max_ack_delay
+ * - initial_scid
+ * - original_dcid
+ * - preferred_address and preferred_address_present
+ * - retry_scid and retry_scid_present
+ * - stateless_reset_token and stateless_reset_token_present
+ */
+NGTCP2_EXTERN void ngtcp2_conn_set_early_remote_transport_params_versioned(
+ ngtcp2_conn *conn, int transport_params_version,
+ const ngtcp2_transport_params *params);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_set_local_transport_params` sets the local transport
+ * parameters |params|. This function can only be called by server.
+ * Although the local transport parameters are passed to
+ * `ngtcp2_conn_server_new`, server might want to update them after
+ * ALPN is chosen. In that case, server can update the transport
+ * parameter with this function. Server must call this function
+ * before calling `ngtcp2_conn_install_tx_handshake_key`.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_INVALID_STATE`
+ * `ngtcp2_conn_install_tx_handshake_key` has been called.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_set_local_transport_params_versioned(
+ ngtcp2_conn *conn, int transport_params_version,
+ const ngtcp2_transport_params *params);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_local_transport_params` returns a pointer to the
+ * local QUIC transport parameters.
+ */
+NGTCP2_EXTERN const ngtcp2_transport_params *
+ngtcp2_conn_get_local_transport_params(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_encode_local_transport_params` encodes the local QUIC
+ * transport parameters in |dest| of length |destlen|. This is
+ * equivalent to calling `ngtcp2_conn_get_local_transport_params` and
+ * then `ngtcp2_encode_transport_params`.
+ *
+ * This function returns the number of written, or one of the
+ * following negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOBUF`
+ * Buffer is too small.
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_encode_local_transport_params(
+ ngtcp2_conn *conn, uint8_t *dest, size_t destlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_open_bidi_stream` opens new bidirectional stream. The
+ * |stream_user_data| is the user data specific to the stream. The
+ * open stream ID is stored in |*pstream_id|.
+ *
+ * Application can call this function before handshake completes. For
+ * 0RTT packet, application can call this function after calling
+ * `ngtcp2_conn_set_early_remote_transport_params`. For 1RTT packet,
+ * application can call this function after calling
+ * `ngtcp2_conn_decode_remote_transport_params` and
+ * `ngtcp2_conn_install_tx_key`. If ngtcp2 crypto support library is
+ * used, application can call this function after calling
+ * `ngtcp2_crypto_derive_and_install_tx_key` for 1RTT packet.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory
+ * :macro:`NGTCP2_ERR_STREAM_ID_BLOCKED`
+ * The remote peer does not allow |stream_id| yet.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_open_bidi_stream(ngtcp2_conn *conn,
+ int64_t *pstream_id,
+ void *stream_user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_open_uni_stream` opens new unidirectional stream. The
+ * |stream_user_data| is the user data specific to the stream. The
+ * open stream ID is stored in |*pstream_id|.
+ *
+ * Application can call this function before handshake completes. For
+ * 0RTT packet, application can call this function after calling
+ * `ngtcp2_conn_set_early_remote_transport_params`. For 1RTT packet,
+ * application can call this function after calling
+ * `ngtcp2_conn_decode_remote_transport_params` and
+ * `ngtcp2_conn_install_tx_key`. If ngtcp2 crypto support library is
+ * used, application can call this function after calling
+ * `ngtcp2_crypto_derive_and_install_tx_key` for 1RTT packet.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory
+ * :macro:`NGTCP2_ERR_STREAM_ID_BLOCKED`
+ * The remote peer does not allow |stream_id| yet.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_open_uni_stream(ngtcp2_conn *conn,
+ int64_t *pstream_id,
+ void *stream_user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_shutdown_stream` closes stream denoted by |stream_id|
+ * abruptly. |app_error_code| is one of application error codes, and
+ * indicates the reason of shutdown. Successful call of this function
+ * does not immediately erase the state of the stream. The actual
+ * deletion is done when the remote endpoint sends acknowledgement.
+ * Calling this function is equivalent to call
+ * `ngtcp2_conn_shutdown_stream_read`, and
+ * `ngtcp2_conn_shutdown_stream_write` sequentially with the following
+ * differences. If |stream_id| refers to a local unidirectional
+ * stream, this function only shutdowns write side of the stream. If
+ * |stream_id| refers to a remote unidirectional stream, this function
+ * only shutdowns read side of the stream.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory
+ */
+NGTCP2_EXTERN int ngtcp2_conn_shutdown_stream(ngtcp2_conn *conn,
+ int64_t stream_id,
+ uint64_t app_error_code);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_shutdown_stream_write` closes write-side of stream
+ * denoted by |stream_id| abruptly. |app_error_code| is one of
+ * application error codes, and indicates the reason of shutdown. If
+ * this function succeeds, no application data is sent to the remote
+ * endpoint. It discards all data which has not been acknowledged
+ * yet.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory
+ * :macro:`NGTCP2_ERR_INVALID_ARGUMENT`
+ * |stream_id| refers to a remote unidirectional stream.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_shutdown_stream_write(ngtcp2_conn *conn,
+ int64_t stream_id,
+ uint64_t app_error_code);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_shutdown_stream_read` closes read-side of stream
+ * denoted by |stream_id| abruptly. |app_error_code| is one of
+ * application error codes, and indicates the reason of shutdown. If
+ * this function succeeds, no application data is forwarded to an
+ * application layer.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory
+ * :macro:`NGTCP2_ERR_INVALID_ARGUMENT`
+ * |stream_id| refers to a local unidirectional stream.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_shutdown_stream_read(ngtcp2_conn *conn,
+ int64_t stream_id,
+ uint64_t app_error_code);
+
+/**
+ * @macrosection
+ *
+ * Write stream data flags
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_WRITE_STREAM_FLAG_NONE` indicates no flag set.
+ */
+#define NGTCP2_WRITE_STREAM_FLAG_NONE 0x00u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE` indicates that more data may
+ * come and should be coalesced into the same packet if possible.
+ */
+#define NGTCP2_WRITE_STREAM_FLAG_MORE 0x01u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_WRITE_STREAM_FLAG_FIN` indicates that the passed
+ * data is the final part of a stream.
+ */
+#define NGTCP2_WRITE_STREAM_FLAG_FIN 0x02u
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_write_stream` is just like
+ * `ngtcp2_conn_writev_stream`. The only difference is that it
+ * conveniently accepts a single buffer.
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_stream_versioned(
+ ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version,
+ ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen,
+ uint32_t flags, int64_t stream_id, const uint8_t *data, size_t datalen,
+ ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_writev_stream` writes a packet containing stream data
+ * of stream denoted by |stream_id|. The buffer of the packet is
+ * pointed by |dest| of length |destlen|. This function performs QUIC
+ * handshake as well.
+ *
+ * |destlen| should be at least
+ * :member:`ngtcp2_settings.max_tx_udp_payload_size`.
+ *
+ * Specifying -1 to |stream_id| means no new stream data to send.
+ *
+ * If |path| is not ``NULL``, this function stores the network path
+ * with which the packet should be sent. Each addr field must point
+ * to the buffer which should be at least ``sizeof(struct
+ * sockaddr_storage)`` bytes long. The assignment might not be done
+ * if nothing is written to |dest|.
+ *
+ * If |pi| is not ``NULL``, this function stores packet metadata in it
+ * if it succeeds. The metadata includes ECN markings. When calling
+ * this function again after it returns
+ * :macro:`NGTCP2_ERR_WRITE_MORE`, caller must pass the same |pi| to
+ * this function.
+ *
+ * If the all given data is encoded as STREAM frame in |dest|, and if
+ * |flags| & :macro:`NGTCP2_WRITE_STREAM_FLAG_FIN` is nonzero, fin
+ * flag is set to outgoing STREAM frame. Otherwise, fin flag in
+ * STREAM frame is not set.
+ *
+ * This packet may contain frames other than STREAM frame. The packet
+ * might not contain STREAM frame if other frames occupy the packet.
+ * In that case, |*pdatalen| would be -1 if |pdatalen| is not
+ * ``NULL``.
+ *
+ * If |flags| & :macro:`NGTCP2_WRITE_STREAM_FLAG_FIN` is nonzero, and
+ * 0 length STREAM frame is successfully serialized, |*pdatalen| would
+ * be 0.
+ *
+ * The number of data encoded in STREAM frame is stored in |*pdatalen|
+ * if it is not ``NULL``. The caller must keep the portion of data
+ * covered by |*pdatalen| bytes in tact until
+ * :type:`ngtcp2_acked_stream_data_offset` indicates that they are
+ * acknowledged by a remote endpoint or the stream is closed.
+ *
+ * If |flags| equals to :macro:`NGTCP2_WRITE_STREAM_FLAG_NONE`, this
+ * function produces a single payload of UDP packet. If the given
+ * stream data is small (e.g., few bytes), the packet might be
+ * severely under filled. Too many small packet might increase
+ * overall packet processing costs. Unless there are retransmissions,
+ * by default, application can only send 1 STREAM frame in one QUIC
+ * packet. In order to include more than 1 STREAM frame in one QUIC
+ * packet, specify :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE` in |flags|.
+ * This is analogous to ``MSG_MORE`` flag in :manpage:`send(2)`. If
+ * the :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE` is used, there are 4
+ * outcomes:
+ *
+ * - The function returns the written length of packet just like
+ * without :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE`. This is because
+ * packet is nearly full and the library decided to make a complete
+ * packet. |*pdatalen| might be -1 or >= 0. It may return 0 which
+ * indicates that no packet transmission is possible at the moment
+ * for some reason.
+ *
+ * - The function returns :macro:`NGTCP2_ERR_WRITE_MORE`. In this
+ * case, |*pdatalen| >= 0 is asserted. It indicates that
+ * application can still call this function with different stream
+ * data (or `ngtcp2_conn_writev_datagram` if it has data to send in
+ * unreliable datagram) to pack them into the same packet.
+ * Application has to specify the same |conn|, |path|, |pi|, |dest|,
+ * |destlen|, and |ts| parameters, otherwise the behaviour is
+ * undefined. The application can change |flags|.
+ *
+ * - The function returns one of the following negative error codes:
+ * :macro:`NGTCP2_ERR_STREAM_DATA_BLOCKED`,
+ * :macro:`NGTCP2_ERR_STREAM_NOT_FOUND`,
+ * :macro:`NGTCP2_ERR_STREAM_SHUT_WR`. In this case, |*pdatalen| ==
+ * -1 is asserted. Application can still write the stream data of
+ * the other streams by calling this function (or
+ * `ngtcp2_conn_writev_datagram` if it has data to send in
+ * unreliable datagram) to pack them into the same packet.
+ * Application has to specify the same |conn|, |path|, |pi|, |dest|,
+ * |destlen|, and |ts| parameters, otherwise the behaviour is
+ * undefined. The application can change |flags|.
+ *
+ * - The other negative error codes might be returned just like
+ * without :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE`. These errors
+ * should be treated as a connection error.
+ *
+ * When application uses :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE` at
+ * least once, it must not call other ngtcp2 API functions
+ * (application can still call `ngtcp2_conn_write_connection_close` to
+ * handle error from this function), just keep calling this function
+ * (or `ngtcp2_conn_write_pkt`, or `ngtcp2_conn_writev_datagram`)
+ * until it returns 0, a positive number (which indicates a complete
+ * packet is ready), or the error codes other than
+ * :macro:`NGTCP2_ERR_WRITE_MORE`,
+ * :macro:`NGTCP2_ERR_STREAM_DATA_BLOCKED`,
+ * :macro:`NGTCP2_ERR_STREAM_NOT_FOUND`, and
+ * :macro:`NGTCP2_ERR_STREAM_SHUT_WR`. If there is no stream data to
+ * include, call this function with |stream_id| as -1 to stop
+ * coalescing and write a packet.
+ *
+ * This function returns 0 if it cannot write any frame because buffer
+ * is too small, or packet is congestion limited. Application should
+ * keep reading and wait for congestion window to grow.
+ *
+ * This function must not be called from inside the callback
+ * functions.
+ *
+ * `ngtcp2_conn_update_pkt_tx_time` must be called after this
+ * function. Application may call this function multiple times before
+ * calling `ngtcp2_conn_update_pkt_tx_time`.
+ *
+ * This function returns the number of bytes written in |dest| if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory
+ * :macro:`NGTCP2_ERR_STREAM_NOT_FOUND`
+ * Stream does not exist
+ * :macro:`NGTCP2_ERR_STREAM_SHUT_WR`
+ * Stream is half closed (local); or stream is being reset.
+ * :macro:`NGTCP2_ERR_PKT_NUM_EXHAUSTED`
+ * Packet number is exhausted, and cannot send any more packet.
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`
+ * User callback failed
+ * :macro:`NGTCP2_ERR_INVALID_ARGUMENT`
+ * The total length of stream data is too large.
+ * :macro:`NGTCP2_ERR_STREAM_DATA_BLOCKED`
+ * Stream is blocked because of flow control.
+ * :macro:`NGTCP2_ERR_WRITE_MORE`
+ * (Only when :macro:`NGTCP2_WRITE_STREAM_FLAG_MORE` is specified)
+ * Application can call this function to pack more stream data
+ * into the same packet. See above to know how it works.
+ *
+ * In general, if the error code which satisfies
+ * `ngtcp2_err_is_fatal(err) <ngtcp2_err_is_fatal>` != 0 is returned,
+ * the application should just close the connection by calling
+ * `ngtcp2_conn_write_connection_close` or just delete the QUIC
+ * connection using `ngtcp2_conn_del`. It is undefined to call the
+ * other library functions.
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_writev_stream_versioned(
+ ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version,
+ ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, ngtcp2_ssize *pdatalen,
+ uint32_t flags, int64_t stream_id, const ngtcp2_vec *datav, size_t datavcnt,
+ ngtcp2_tstamp ts);
+
+/**
+ * @macrosection
+ *
+ * Write datagram flags
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_WRITE_DATAGRAM_FLAG_NONE` indicates no flag set.
+ */
+#define NGTCP2_WRITE_DATAGRAM_FLAG_NONE 0x00u
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_WRITE_DATAGRAM_FLAG_MORE` indicates that more data
+ * may come and should be coalesced into the same packet if possible.
+ */
+#define NGTCP2_WRITE_DATAGRAM_FLAG_MORE 0x01u
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_writev_datagram` writes a packet containing unreliable
+ * data in DATAGRAM frame. The buffer of the packet is pointed by
+ * |dest| of length |destlen|. This function performs QUIC handshake
+ * as well.
+ *
+ * |destlen| should be at least
+ * :member:`ngtcp2_settings.max_tx_udp_payload_size`.
+ *
+ * For |path| and |pi| parameters, refer to
+ * `ngtcp2_conn_writev_stream`.
+ *
+ * If the given data is written to the buffer, nonzero value is
+ * assigned to |*paccepted| if it is not NULL. The data in DATAGRAM
+ * frame cannot be fragmented; writing partial data is not possible.
+ *
+ * |dgram_id| is an opaque identifier which should uniquely identify
+ * the given DATAGRAM. It is passed to :type:`ngtcp2_ack_datagram`
+ * callback when a packet that contains DATAGRAM frame is
+ * acknowledged. It is passed to :type:`ngtcp2_lost_datagram`
+ * callback when a packet that contains DATAGRAM frame is declared
+ * lost. If an application uses neither of those callbacks, it can
+ * sets 0 to this parameter.
+ *
+ * This function might write other frames other than DATAGRAM, just
+ * like `ngtcp2_conn_writev_stream`.
+ *
+ * If the function returns 0, it means that no more data cannot be
+ * sent because of congestion control limit; or, data does not fit
+ * into the provided buffer; or, a local endpoint, as a server, is
+ * unable to send data because of its amplification limit. In this
+ * case, |*paccepted| is assigned zero if it is not NULL.
+ *
+ * If :macro:`NGTCP2_WRITE_DATAGRAM_FLAG_MORE` is set in |flags|,
+ * there are 3 outcomes:
+ *
+ * - The function returns the written length of packet just like
+ * without :macro:`NGTCP2_WRITE_DATAGRAM_FLAG_MORE`. This is
+ * because packet is nearly full and the library decided to make a
+ * complete packet. |*paccepted| might be zero or nonzero.
+ *
+ * - The function returns :macro:`NGTCP2_ERR_WRITE_MORE`. In this
+ * case, |*paccepted| != 0 is asserted. This indicates that
+ * application can call this function with another unreliable data
+ * (or `ngtcp2_conn_writev_stream` if it has stream data to send) to
+ * pack them into the same packet. Application has to specify the
+ * same |conn|, |path|, |pi|, |dest|, |destlen|, and |ts|
+ * parameters, otherwise the behaviour is undefined. The
+ * application can change |flags|.
+ *
+ * - The other error might be returned just like without
+ * :macro:`NGTCP2_WRITE_DATAGRAM_FLAG_MORE`.
+ *
+ * When application sees :macro:`NGTCP2_ERR_WRITE_MORE`, it must not
+ * call other ngtcp2 API functions (application can still call
+ * `ngtcp2_conn_write_connection_close` to handle error from this
+ * function). Just keep calling `ngtcp2_conn_writev_datagram`,
+ * `ngtcp2_conn_writev_stream` or `ngtcp2_conn_write_pkt` until it
+ * returns a positive number (which indicates a complete packet is
+ * ready).
+ *
+ * This function returns the number of bytes written in |dest| if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory
+ * :macro:`NGTCP2_ERR_PKT_NUM_EXHAUSTED`
+ * Packet number is exhausted, and cannot send any more packet.
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`
+ * User callback failed
+ * :macro:`NGTCP2_ERR_WRITE_MORE`
+ * (Only when :macro:`NGTCP2_WRITE_DATAGRAM_FLAG_MORE` is
+ * specified) Application can call this function to pack more data
+ * into the same packet. See above to know how it works.
+ * :macro:`NGTCP2_ERR_INVALID_STATE`
+ * A remote endpoint did not express the DATAGRAM frame support.
+ * :macro:`NGTCP2_ERR_INVALID_ARGUMENT`
+ * The provisional DATAGRAM frame size exceeds the maximum
+ * DATAGRAM frame size that a remote endpoint can receive.
+ *
+ * In general, if the error code which satisfies
+ * `ngtcp2_err_is_fatal(err) <ngtcp2_err_is_fatal>` != 0 is returned,
+ * the application should just close the connection by calling
+ * `ngtcp2_conn_write_connection_close` or just delete the QUIC
+ * connection using `ngtcp2_conn_del`. It is undefined to call the
+ * other library functions.
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_writev_datagram_versioned(
+ ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version,
+ ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen, int *paccepted,
+ uint32_t flags, uint64_t dgram_id, const ngtcp2_vec *datav, size_t datavcnt,
+ ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_is_in_closing_period` returns nonzero if |conn| is in
+ * the closing period.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_is_in_closing_period(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_is_in_draining_period` returns nonzero if |conn| is in
+ * the draining period.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_is_in_draining_period(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_extend_max_stream_offset` extends stream's max stream
+ * data value by |datalen|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_extend_max_stream_offset(ngtcp2_conn *conn,
+ int64_t stream_id,
+ uint64_t datalen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_extend_max_offset` extends max data offset by
+ * |datalen|.
+ */
+NGTCP2_EXTERN void ngtcp2_conn_extend_max_offset(ngtcp2_conn *conn,
+ uint64_t datalen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_extend_max_streams_bidi` extends the number of maximum
+ * local bidirectional streams that a remote endpoint can open by |n|.
+ *
+ * The library does not increase maximum stream limit automatically.
+ * The exception is when a stream is closed without
+ * :type:`ngtcp2_stream_open` callback being called. In this case,
+ * stream limit is increased automatically.
+ */
+NGTCP2_EXTERN void ngtcp2_conn_extend_max_streams_bidi(ngtcp2_conn *conn,
+ size_t n);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_extend_max_streams_uni` extends the number of maximum
+ * local unidirectional streams that a remote endpoint can open by
+ * |n|.
+ *
+ * The library does not increase maximum stream limit automatically.
+ * The exception is when a stream is closed without
+ * :type:`ngtcp2_stream_open` callback being called. In this case,
+ * stream limit is increased automatically.
+ */
+NGTCP2_EXTERN void ngtcp2_conn_extend_max_streams_uni(ngtcp2_conn *conn,
+ size_t n);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_dcid` returns the non-NULL pointer to destination
+ * connection ID. If no destination connection ID is present, the
+ * return value is not ``NULL``, and its datalen field is 0.
+ */
+NGTCP2_EXTERN const ngtcp2_cid *ngtcp2_conn_get_dcid(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_client_initial_dcid` returns the non-NULL pointer
+ * to the Destination Connection ID that client sent in its Initial
+ * packet.
+ */
+NGTCP2_EXTERN const ngtcp2_cid *
+ngtcp2_conn_get_client_initial_dcid(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_num_scid` returns the number of source connection
+ * IDs which the local endpoint has provided to the peer and have not
+ * retired.
+ */
+NGTCP2_EXTERN size_t ngtcp2_conn_get_num_scid(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_scid` writes the all source connection IDs which
+ * the local endpoint has provided to the peer and have not retired in
+ * |dest|. The buffer pointed by |dest| must have
+ * ``sizeof(ngtcp2_cid) * n`` bytes available, where n is the return
+ * value of `ngtcp2_conn_get_num_scid()`.
+ */
+NGTCP2_EXTERN size_t ngtcp2_conn_get_scid(ngtcp2_conn *conn, ngtcp2_cid *dest);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_num_active_dcid` returns the number of the active
+ * destination connection ID.
+ */
+NGTCP2_EXTERN size_t ngtcp2_conn_get_num_active_dcid(ngtcp2_conn *conn);
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_cid_token` is the convenient struct to store
+ * Connection ID, its associated path, and stateless reset token.
+ */
+typedef struct ngtcp2_cid_token {
+ /**
+ * :member:`seq` is the sequence number of this Connection ID.
+ */
+ uint64_t seq;
+ /**
+ * :member:`cid` is Connection ID.
+ */
+ ngtcp2_cid cid;
+ /**
+ * :member:`ps` is the path which is associated to this Connection
+ * ID.
+ */
+ ngtcp2_path_storage ps;
+ /**
+ * :member:`token` is the stateless reset token for this Connection
+ * ID.
+ */
+ uint8_t token[NGTCP2_STATELESS_RESET_TOKENLEN];
+ /**
+ * :member:`token_present` is nonzero if token contains stateless
+ * reset token.
+ */
+ uint8_t token_present;
+} ngtcp2_cid_token;
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_active_dcid` writes the all active destination
+ * connection IDs and tokens to |dest|. The buffer pointed by |dest|
+ * must have ``sizeof(ngtcp2_cid_token) * n`` bytes available, where n
+ * is the return value of `ngtcp2_conn_get_num_active_dcid()`.
+ */
+NGTCP2_EXTERN size_t ngtcp2_conn_get_active_dcid(ngtcp2_conn *conn,
+ ngtcp2_cid_token *dest);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_client_chosen_version` returns the client chosen
+ * version.
+ */
+NGTCP2_EXTERN uint32_t ngtcp2_conn_get_client_chosen_version(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_negotiated_version` returns the negotiated version.
+ *
+ * Until the version is negotiated, this function returns 0.
+ */
+NGTCP2_EXTERN uint32_t ngtcp2_conn_get_negotiated_version(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_early_data_rejected` tells |conn| that early data was
+ * rejected by a server, or client decided not to attempt early data
+ * for some reason. |conn| discards the following connection states:
+ *
+ * - Any opended streams.
+ * - Stream identifier allocations.
+ * - Max data extended by `ngtcp2_conn_extend_max_offset`.
+ * - Max bidi streams extended by `ngtcp2_conn_extend_max_streams_bidi`.
+ * - Max uni streams extended by `ngtcp2_conn_extend_max_streams_uni`.
+ *
+ * Application which wishes to retransmit early data, it has to open
+ * streams and send stream data again.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`
+ * User callback failed
+ */
+NGTCP2_EXTERN int ngtcp2_conn_early_data_rejected(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_early_data_rejected` returns nonzero if
+ * `ngtcp2_conn_early_data_rejected` has been called.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_get_early_data_rejected(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_conn_stat` assigns connection statistics data to
+ * |*cstat|.
+ */
+NGTCP2_EXTERN void ngtcp2_conn_get_conn_stat_versioned(ngtcp2_conn *conn,
+ int conn_stat_version,
+ ngtcp2_conn_stat *cstat);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_submit_crypto_data` submits crypto stream data |data|
+ * of length |datalen| to the library for transmission. The
+ * encryption level is given in |crypto_level|.
+ *
+ * The library makes a copy of the buffer pointed by |data| of length
+ * |datalen|. Application can discard |data|.
+ */
+NGTCP2_EXTERN int
+ngtcp2_conn_submit_crypto_data(ngtcp2_conn *conn,
+ ngtcp2_crypto_level crypto_level,
+ const uint8_t *data, const size_t datalen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_submit_new_token` submits address validation token.
+ * It is sent in NEW_TOKEN frame. Only server can call this function.
+ * |tokenlen| must not be 0.
+ *
+ * This function makes a copy of the buffer pointed by |token| of
+ * length |tokenlen|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_submit_new_token(ngtcp2_conn *conn,
+ const uint8_t *token,
+ size_t tokenlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_set_local_addr` sets local endpoint address |addr| to
+ * the current path of |conn|.
+ */
+NGTCP2_EXTERN void ngtcp2_conn_set_local_addr(ngtcp2_conn *conn,
+ const ngtcp2_addr *addr);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_set_path_user_data` sets the |path_user_data| to the
+ * current path (see :member:`ngtcp2_path.user_data`).
+ */
+NGTCP2_EXTERN void ngtcp2_conn_set_path_user_data(ngtcp2_conn *conn,
+ void *path_user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_path` returns the current path.
+ */
+NGTCP2_EXTERN const ngtcp2_path *ngtcp2_conn_get_path(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_max_tx_udp_payload_size` returns the maximum UDP
+ * payload size that this local endpoint would send. This is the
+ * value of :member:`ngtcp2_settings.max_tx_udp_payload_size` that is
+ * passed to `ngtcp2_conn_client_new` or `ngtcp2_conn_server_new`.
+ */
+NGTCP2_EXTERN size_t ngtcp2_conn_get_max_tx_udp_payload_size(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_path_max_tx_udp_payload_size` returns the maximum
+ * UDP payload size for the current path. If
+ * :member:`ngtcp2_settings.no_tx_udp_payload_size_shaping` is set to
+ * nonzero, this function is equivalent to
+ * `ngtcp2_conn_get_max_tx_udp_payload_size`. Otherwise, it returns
+ * the maximum UDP payload size that is probed for the current path.
+ */
+NGTCP2_EXTERN size_t
+ngtcp2_conn_get_path_max_tx_udp_payload_size(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_initiate_immediate_migration` starts connection
+ * migration to the given |path|. Only client can initiate migration.
+ * This function does immediate migration; while the path validation
+ * is nonetheless performed, this function does not wait for it to
+ * succeed.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_INVALID_STATE`
+ * Migration is disabled; or handshake is not yet confirmed; or
+ * client is migrating to server's preferred address.
+ * :macro:`NGTCP2_ERR_CONN_ID_BLOCKED`
+ * No unused connection ID is available.
+ * :macro:`NGTCP2_ERR_INVALID_ARGUMENT`
+ * |local_addr| equals the current local address.
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory
+ */
+NGTCP2_EXTERN int ngtcp2_conn_initiate_immediate_migration(
+ ngtcp2_conn *conn, const ngtcp2_path *path, ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_initiate_migration` starts connection migration to the
+ * given |path|. Only client can initiate migration. Unlike
+ * `ngtcp2_conn_initiate_immediate_migration`, this function starts a
+ * path validation with a new path and migrate to the new path after
+ * successful path validation.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_INVALID_STATE`
+ * Migration is disabled; or handshake is not yet confirmed; or
+ * client is migrating to server's preferred address.
+ * :macro:`NGTCP2_ERR_CONN_ID_BLOCKED`
+ * No unused connection ID is available.
+ * :macro:`NGTCP2_ERR_INVALID_ARGUMENT`
+ * |local_addr| equals the current local address.
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory
+ */
+NGTCP2_EXTERN int ngtcp2_conn_initiate_migration(ngtcp2_conn *conn,
+ const ngtcp2_path *path,
+ ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_max_local_streams_uni` returns the cumulative
+ * number of streams which local endpoint can open.
+ */
+NGTCP2_EXTERN uint64_t ngtcp2_conn_get_max_local_streams_uni(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_max_data_left` returns the number of bytes that
+ * this local endpoint can send in this connection.
+ */
+NGTCP2_EXTERN uint64_t ngtcp2_conn_get_max_data_left(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_max_stream_data_left` returns the number of bytes
+ * that this local endpoint can send to a stream identified by
+ * |stream_id|. If no such stream is found, this function returns 0.
+ */
+NGTCP2_EXTERN uint64_t ngtcp2_conn_get_max_stream_data_left(ngtcp2_conn *conn,
+ int64_t stream_id);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_streams_bidi_left` returns the number of
+ * bidirectional streams which the local endpoint can open without
+ * violating stream concurrency limit.
+ */
+NGTCP2_EXTERN uint64_t ngtcp2_conn_get_streams_bidi_left(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_streams_uni_left` returns the number of
+ * unidirectional streams which the local endpoint can open without
+ * violating stream concurrency limit.
+ */
+NGTCP2_EXTERN uint64_t ngtcp2_conn_get_streams_uni_left(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_cwnd_left` returns the cwnd minus the number of
+ * bytes in flight on the current path. If the former is smaller than
+ * the latter, this function returns 0.
+ */
+NGTCP2_EXTERN uint64_t ngtcp2_conn_get_cwnd_left(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_set_initial_crypto_ctx` sets |ctx| for Initial packet
+ * encryption. The passed data will be passed to
+ * :type:`ngtcp2_encrypt`, :type:`ngtcp2_decrypt` and
+ * :type:`ngtcp2_hp_mask` callbacks.
+ */
+NGTCP2_EXTERN void
+ngtcp2_conn_set_initial_crypto_ctx(ngtcp2_conn *conn,
+ const ngtcp2_crypto_ctx *ctx);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_initial_crypto_ctx` returns
+ * :type:`ngtcp2_crypto_ctx` object for Initial packet encryption.
+ */
+NGTCP2_EXTERN const ngtcp2_crypto_ctx *
+ngtcp2_conn_get_initial_crypto_ctx(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_set_crypto_ctx` sets |ctx| for Handshake/1RTT packet
+ * encryption. The passed data will be passed to
+ * :type:`ngtcp2_encrypt`, :type:`ngtcp2_decrypt` and
+ * :type:`ngtcp2_hp_mask` callbacks.
+ */
+NGTCP2_EXTERN void ngtcp2_conn_set_crypto_ctx(ngtcp2_conn *conn,
+ const ngtcp2_crypto_ctx *ctx);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_tls_native_handle` returns TLS native handle set by
+ * `ngtcp2_conn_set_tls_native_handle()`.
+ */
+NGTCP2_EXTERN void *ngtcp2_conn_get_tls_native_handle(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_set_tls_native_handle` sets TLS native handle
+ * |tls_native_handle| to |conn|. Internally, it is used as an opaque
+ * pointer.
+ */
+NGTCP2_EXTERN void ngtcp2_conn_set_tls_native_handle(ngtcp2_conn *conn,
+ void *tls_native_handle);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_set_retry_aead` sets |aead| and |aead_ctx| for Retry
+ * integrity tag verification. |aead| must be AEAD_AES_128_GCM.
+ * |aead_ctx| must be initialized with :macro:`NGTCP2_RETRY_KEY` as
+ * encryption key. This function must be called if |conn| is
+ * initialized as client. Server does not verify the tag and has no
+ * need to call this function.
+ *
+ * If this function succeeds, |conn| takes ownership of |aead_ctx|.
+ * :type:`ngtcp2_delete_crypto_aead_ctx` will be called to delete this
+ * object when it is no longer used. If this function fails, the
+ * caller is responsible to delete it.
+ */
+NGTCP2_EXTERN void
+ngtcp2_conn_set_retry_aead(ngtcp2_conn *conn, const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_crypto_ctx` returns :type:`ngtcp2_crypto_ctx`
+ * object for Handshake/1RTT packet encryption.
+ */
+NGTCP2_EXTERN const ngtcp2_crypto_ctx *
+ngtcp2_conn_get_crypto_ctx(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_set_early_crypto_ctx` sets |ctx| for 0RTT packet
+ * encryption. The passed data will be passed to
+ * :type:`ngtcp2_encrypt`, :type:`ngtcp2_decrypt` and
+ * :type:`ngtcp2_hp_mask` callbacks.
+ */
+NGTCP2_EXTERN void
+ngtcp2_conn_set_early_crypto_ctx(ngtcp2_conn *conn,
+ const ngtcp2_crypto_ctx *ctx);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_early_crypto_ctx` returns
+ * :type:`ngtcp2_crypto_ctx` object for 0RTT packet encryption.
+ */
+NGTCP2_EXTERN const ngtcp2_crypto_ctx *
+ngtcp2_conn_get_early_crypto_ctx(ngtcp2_conn *conn);
+
+/**
+ * @enum
+ *
+ * :type:`ngtcp2_connection_close_error_code_type` defines connection
+ * error code type.
+ */
+typedef enum ngtcp2_connection_close_error_code_type {
+ /**
+ * :enum:`NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT`
+ * indicates the error code is QUIC transport error code.
+ */
+ NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT,
+ /**
+ * :enum:`NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_APPLICATION`
+ * indicates the error code is application error code.
+ */
+ NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_APPLICATION,
+ /**
+ * :enum:`NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_VERSION_NEGOTIATION`
+ * is a special case of QUIC transport error, and it indicates that
+ * client receives Version Negotiation packet.
+ */
+ NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_VERSION_NEGOTIATION,
+ /**
+ * :enum:`NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE`
+ * is a special case of QUIC transport error, and it indicates that
+ * connection is closed because of idle timeout.
+ */
+ NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE
+} ngtcp2_connection_close_error_code_type;
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_connection_close_error` contains connection
+ * error code, its type, and the optional reason phrase.
+ */
+typedef struct ngtcp2_connection_close_error {
+ /**
+ * :member:`type` is the type of :member:`error_code`.
+ */
+ ngtcp2_connection_close_error_code_type type;
+ /**
+ * :member:`error_code` is the error code for connection closure.
+ */
+ uint64_t error_code;
+ /**
+ * :member:`frame_type` is the type of QUIC frame which triggers
+ * this connection error. This field is set to 0 if the frame type
+ * is unknown.
+ */
+ uint64_t frame_type;
+ /**
+ * :member:`reason` points to the buffer which contains a reason
+ * phrase. It may be NULL if there is no reason phrase. If it is
+ * received from a remote endpoint, it is truncated to at most 1024
+ * bytes.
+ */
+ const uint8_t *reason;
+ /**
+ * :member:`reasonlen` is the length of data pointed by
+ * :member:`reason`.
+ */
+ size_t reasonlen;
+} ngtcp2_connection_close_error;
+
+/**
+ * @function
+ *
+ * `ngtcp2_connection_close_error_default` initializes |ccerr| with
+ * the default values. It sets the following fields:
+ *
+ * - :member:`type <ngtcp2_connection_close_error.type>` =
+ * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT`
+ * - :member:`error_code <ngtcp2_connection_close_error.error_code>` =
+ * :macro:`NGTCP2_NO_ERROR`.
+ * - :member:`frame_type <ngtcp2_connection_close_error.frame_type>` =
+ * 0
+ * - :member:`reason <ngtcp2_connection_close_error.reason>` = NULL
+ * - :member:`reasonlen <ngtcp2_connection_close_error.reasonlen>` = 0
+ */
+NGTCP2_EXTERN void
+ngtcp2_connection_close_error_default(ngtcp2_connection_close_error *ccerr);
+
+/**
+ * @function
+ *
+ * `ngtcp2_connection_close_error_set_transport_error` sets
+ * :member:`ccerr->type <ngtcp2_connection_close_error.type>` to
+ * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT`,
+ * and :member:`ccerr->error_code
+ * <ngtcp2_connection_close_error.error_code>` to |error_code|.
+ * |reason| is the reason phrase of length |reasonlen|. This function
+ * does not make a copy of the reason phrase.
+ */
+NGTCP2_EXTERN void ngtcp2_connection_close_error_set_transport_error(
+ ngtcp2_connection_close_error *ccerr, uint64_t error_code,
+ const uint8_t *reason, size_t reasonlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_connection_close_error_set_transport_error_liberr` sets
+ * type and error_code based on |liberr|.
+ *
+ * If |liberr| is :macro:`NGTCP2_ERR_RECV_VERSION_NEGOTIATION`,
+ * :member:`ccerr->type <ngtcp2_connection_close_error.type>` is set
+ * to
+ * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_VERSION_NEGOTIATION`,
+ * and :member:`ccerr->error_code
+ * <ngtcp2_connection_close_error.error_code>` to
+ * :macro:`NGTCP2_NO_ERROR`. If |liberr| is
+ * :macro:`NGTCP2_ERR_IDLE_CLOSE`, :member:`ccerr->type
+ * <ngtcp2_connection_close_error.type>` is set to
+ * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE`,
+ * and :member:`ccerr->error_code
+ * <ngtcp2_connection_close_error.error_code>` to
+ * :macro:`NGTCP2_NO_ERROR`. Otherwise, :member:`ccerr->type
+ * <ngtcp2_connection_close_error.type>` is set to
+ * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT`,
+ * and :member:`ccerr->error_code
+ * <ngtcp2_connection_close_error.error_code>` is set to an error code
+ * inferred by |liberr| (see
+ * `ngtcp2_err_infer_quic_transport_error_code`). |reason| is the
+ * reason phrase of length |reasonlen|. This function does not make a
+ * copy of the reason phrase.
+ */
+NGTCP2_EXTERN void ngtcp2_connection_close_error_set_transport_error_liberr(
+ ngtcp2_connection_close_error *ccerr, int liberr, const uint8_t *reason,
+ size_t reasonlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_connection_close_error_set_transport_error_tls_alert` sets
+ * :member:`ccerr->type <ngtcp2_connection_close_error.type>` to
+ * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT`,
+ * and :member:`ccerr->error_code
+ * <ngtcp2_connection_close_error.error_code>` to bitwise-OR of
+ * :macro:`NGTCP2_CRYPTO_ERROR` and |tls_alert|. |reason| is the
+ * reason phrase of length |reasonlen|. This function does not make a
+ * copy of the reason phrase.
+ */
+NGTCP2_EXTERN void ngtcp2_connection_close_error_set_transport_error_tls_alert(
+ ngtcp2_connection_close_error *ccerr, uint8_t tls_alert,
+ const uint8_t *reason, size_t reasonlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_connection_close_error_set_application_error` sets
+ * :member:`ccerr->type <ngtcp2_connection_close_error.type>` to
+ * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_APPLICATION`,
+ * and :member:`ccerr->error_code
+ * <ngtcp2_connection_close_error.error_code>` to |error_code|.
+ * |reason| is the reason phrase of length |reasonlen|. This function
+ * does not make a copy of the reason phrase.
+ */
+NGTCP2_EXTERN void ngtcp2_connection_close_error_set_application_error(
+ ngtcp2_connection_close_error *ccerr, uint64_t error_code,
+ const uint8_t *reason, size_t reasonlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_write_connection_close` writes a packet which contains
+ * CONNECTION_CLOSE frame(s) (type 0x1c or 0x1d) in the buffer pointed
+ * by |dest| whose capacity is |destlen|.
+ *
+ * For client, |destlen| should be at least
+ * :macro:`NGTCP2_MAX_UDP_PAYLOAD_SIZE`.
+ *
+ * If |path| is not ``NULL``, this function stores the network path
+ * with which the packet should be sent. Each addr field must point
+ * to the buffer which should be at least ``sizeof(struct
+ * sockaddr_storage)`` bytes long. The assignment might not be done
+ * if nothing is written to |dest|.
+ *
+ * If |pi| is not ``NULL``, this function stores packet metadata in it
+ * if it succeeds. The metadata includes ECN markings.
+ *
+ * If :member:`ccerr->type <ngtcp2_connection_close_error.type>` ==
+ * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT`,
+ * this function sends CONNECTION_CLOSE (type 0x1c) frame. If
+ * :member:`ccerr->type <ngtcp2_connection_close_error.type>` ==
+ * :enum:`ngtcp2_connection_close_error_code_type.NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_APPLICATION`,
+ * it sends CONNECTION_CLOSE (type 0x1d) frame. Otherwise, it does
+ * not produce any data, and returns 0.
+ *
+ * This function must not be called from inside the callback
+ * functions.
+ *
+ * At the moment, successful call to this function makes connection
+ * close. We may change this behaviour in the future to allow
+ * graceful shutdown.
+ *
+ * This function returns the number of bytes written in |dest| if it
+ * succeeds, or one of the following negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_NOMEM`
+ * Out of memory
+ * :macro:`NGTCP2_ERR_NOBUF`
+ * Buffer is too small
+ * :macro:`NGTCP2_ERR_INVALID_STATE`
+ * The current state does not allow sending CONNECTION_CLOSE.
+ * :macro:`NGTCP2_ERR_PKT_NUM_EXHAUSTED`
+ * Packet number is exhausted, and cannot send any more packet.
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`
+ * User callback failed
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_conn_write_connection_close_versioned(
+ ngtcp2_conn *conn, ngtcp2_path *path, int pkt_info_version,
+ ngtcp2_pkt_info *pi, uint8_t *dest, size_t destlen,
+ const ngtcp2_connection_close_error *ccerr, ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_connection_close_error` stores the received
+ * connection close error in |ccerr|.
+ */
+NGTCP2_EXTERN void
+ngtcp2_conn_get_connection_close_error(ngtcp2_conn *conn,
+ ngtcp2_connection_close_error *ccerr);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_is_local_stream` returns nonzero if |stream_id| denotes the
+ * stream which a local endpoint issues.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_is_local_stream(ngtcp2_conn *conn,
+ int64_t stream_id);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_is_server` returns nonzero if |conn| is initialized as
+ * server.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_is_server(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_after_retry` returns nonzero if |conn| as a client has
+ * received Retry packet from server and successfully validated it.
+ */
+NGTCP2_EXTERN int ngtcp2_conn_after_retry(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_set_stream_user_data` sets |stream_user_data| to the
+ * stream identified by |stream_id|.
+ *
+ * This function returns 0 if it succeeds, or one of the following
+ * negative error codes:
+ *
+ * :macro:`NGTCP2_ERR_STREAM_NOT_FOUND`
+ * Stream does not exist
+ */
+NGTCP2_EXTERN int ngtcp2_conn_set_stream_user_data(ngtcp2_conn *conn,
+ int64_t stream_id,
+ void *stream_user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_update_pkt_tx_time` sets the time instant of the next
+ * packet transmission. This function must be called after (multiple
+ * invocation of) `ngtcp2_conn_writev_stream`. If packet aggregation
+ * (e.g., packet batching, GSO) is used, call this function after all
+ * aggregated datagrams are sent, which indicates multiple invocation
+ * of `ngtcp2_conn_writev_stream`.
+ */
+NGTCP2_EXTERN void ngtcp2_conn_update_pkt_tx_time(ngtcp2_conn *conn,
+ ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_send_quantum` returns the maximum number of bytes
+ * that can be sent in one go without packet spacing.
+ */
+NGTCP2_EXTERN size_t ngtcp2_conn_get_send_quantum(ngtcp2_conn *conn);
+
+/**
+ * @function
+ *
+ * `ngtcp2_conn_get_stream_loss_count` returns the number of packets
+ * that contain STREAM frame for a stream identified by |stream_id|
+ * and are declared to be lost. The number may include the spurious
+ * losses. If no stream identified by |stream_id| is found, this
+ * function returns 0.
+ */
+NGTCP2_EXTERN size_t ngtcp2_conn_get_stream_loss_count(ngtcp2_conn *conn,
+ int64_t stream_id);
+
+/**
+ * @function
+ *
+ * `ngtcp2_strerror` returns the text representation of |liberr|.
+ * |liberr| must be one of ngtcp2 library error codes (which is
+ * defined as NGTCP2_ERR_* macro, such as
+ * :macro:`NGTCP2_ERR_DECRYPT`).
+ */
+NGTCP2_EXTERN const char *ngtcp2_strerror(int liberr);
+
+/**
+ * @function
+ *
+ * `ngtcp2_err_is_fatal` returns nonzero if |liberr| is a fatal error.
+ * |liberr| must be one of ngtcp2 library error codes (which is
+ * defined as NGTCP2_ERR_* macro, such as
+ * :macro:`NGTCP2_ERR_DECRYPT`).
+ */
+NGTCP2_EXTERN int ngtcp2_err_is_fatal(int liberr);
+
+/**
+ * @function
+ *
+ * `ngtcp2_err_infer_quic_transport_error_code` returns a QUIC
+ * transport error code which corresponds to |liberr|. |liberr| must
+ * be one of ngtcp2 library error codes (which is defined as
+ * NGTCP2_ERR_* macro, such as :macro:`NGTCP2_ERR_DECRYPT`).
+ */
+NGTCP2_EXTERN uint64_t ngtcp2_err_infer_quic_transport_error_code(int liberr);
+
+/**
+ * @function
+ *
+ * `ngtcp2_addr_init` initializes |dest| with the given arguments and
+ * returns |dest|.
+ */
+NGTCP2_EXTERN ngtcp2_addr *ngtcp2_addr_init(ngtcp2_addr *dest,
+ const ngtcp2_sockaddr *addr,
+ ngtcp2_socklen addrlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_addr_copy_byte` copies |addr| of length |addrlen| into the
+ * buffer pointed by :member:`dest->addr <ngtcp2_addr.addr>`.
+ * :member:`dest->addrlen <ngtcp2_addr.addrlen>` is updated to have
+ * |addrlen|. This function assumes that :member:`dest->addr
+ * <ngtcp2_addr.addr>` points to a buffer which has a sufficient
+ * capacity to store the copy.
+ */
+NGTCP2_EXTERN void ngtcp2_addr_copy_byte(ngtcp2_addr *dest,
+ const ngtcp2_sockaddr *addr,
+ ngtcp2_socklen addrlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_path_storage_init` initializes |ps| with the given
+ * arguments. This function copies |local_addr| and |remote_addr|.
+ */
+NGTCP2_EXTERN void ngtcp2_path_storage_init(ngtcp2_path_storage *ps,
+ const ngtcp2_sockaddr *local_addr,
+ ngtcp2_socklen local_addrlen,
+ const ngtcp2_sockaddr *remote_addr,
+ ngtcp2_socklen remote_addrlen,
+ void *user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_path_storage_zero` initializes |ps| with the zero length
+ * addresses.
+ */
+NGTCP2_EXTERN void ngtcp2_path_storage_zero(ngtcp2_path_storage *ps);
+
+/**
+ * @function
+ *
+ * `ngtcp2_settings_default` initializes |settings| with the default
+ * values. First this function fills |settings| with 0 and set the
+ * default value to the following fields:
+ *
+ * * :type:`cc_algo <ngtcp2_settings.cc_algo>` =
+ * :enum:`ngtcp2_cc_algo.NGTCP2_CC_ALGO_CUBIC`
+ * * :type:`initial_rtt <ngtcp2_settings.initial_rtt>` =
+ * :macro:`NGTCP2_DEFAULT_INITIAL_RTT`
+ * * :type:`ack_thresh <ngtcp2_settings.ack_thresh>` = 2
+ * * :type:`max_tx_udp_payload_size
+ * <ngtcp2_settings.max_tx_udp_payload_size>` = 1452
+ * * :type:`handshake_timeout <ngtcp2_settings.handshake_timeout>` =
+ * :macro:`NGTCP2_DEFAULT_HANDSHAKE_TIMEOUT`.
+ */
+NGTCP2_EXTERN void ngtcp2_settings_default_versioned(int settings_version,
+ ngtcp2_settings *settings);
+
+/**
+ * @function
+ *
+ * `ngtcp2_transport_params_default` initializes |params| with the
+ * default values. First this function fills |params| with 0 and set
+ * the default value to the following fields:
+ *
+ * * :type:`max_udp_payload_size
+ * <ngtcp2_transport_params.max_udp_payload_size>` =
+ * :macro:`NGTCP2_DEFAULT_MAX_RECV_UDP_PAYLOAD_SIZE`
+ * * :type:`ack_delay_exponent
+ * <ngtcp2_transport_params.ack_delay_exponent>` =
+ * :macro:`NGTCP2_DEFAULT_ACK_DELAY_EXPONENT`
+ * * :type:`max_ack_delay <ngtcp2_transport_params.max_ack_delay>` =
+ * :macro:`NGTCP2_DEFAULT_MAX_ACK_DELAY`
+ * * :type:`active_connection_id_limit
+ * <ngtcp2_transport_params.active_connection_id_limit>` =
+ * :macro:`NGTCP2_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT`
+ */
+NGTCP2_EXTERN void
+ngtcp2_transport_params_default_versioned(int transport_params_version,
+ ngtcp2_transport_params *params);
+
+/**
+ * @function
+ *
+ * `ngtcp2_mem_default` returns the default, system standard memory
+ * allocator.
+ */
+NGTCP2_EXTERN const ngtcp2_mem *ngtcp2_mem_default(void);
+
+/**
+ * @macrosection
+ *
+ * ngtcp2_info macros
+ */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_VERSION_AGE` is the age of :type:`ngtcp2_info`
+ */
+#define NGTCP2_VERSION_AGE 1
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_info` is what `ngtcp2_version()` returns. It holds
+ * information about the particular ngtcp2 version.
+ */
+typedef struct ngtcp2_info {
+ /**
+ * :member:`age` is the age of this struct. This instance of ngtcp2
+ * sets it to :macro:`NGTCP2_VERSION_AGE` but a future version may
+ * bump it and add more struct fields at the bottom
+ */
+ int age;
+ /**
+ * :member:`version_num` is the :macro:`NGTCP2_VERSION_NUM` number
+ * (since age ==1)
+ */
+ int version_num;
+ /**
+ * :member:`version_str` points to the :macro:`NGTCP2_VERSION`
+ * string (since age ==1)
+ */
+ const char *version_str;
+ /* -------- the above fields all exist when age == 1 */
+} ngtcp2_info;
+
+/**
+ * @function
+ *
+ * `ngtcp2_version` returns a pointer to a ngtcp2_info struct with
+ * version information about the run-time library in use. The
+ * |least_version| argument can be set to a 24 bit numerical value for
+ * the least accepted version number and if the condition is not met,
+ * this function will return a ``NULL``. Pass in 0 to skip the
+ * version checking.
+ */
+NGTCP2_EXTERN const ngtcp2_info *ngtcp2_version(int least_version);
+
+/**
+ * @function
+ *
+ * `ngtcp2_is_bidi_stream` returns nonzero if |stream_id| denotes
+ * bidirectional stream.
+ */
+NGTCP2_EXTERN int ngtcp2_is_bidi_stream(int64_t stream_id);
+
+/**
+ * @function
+ *
+ * `ngtcp2_path_copy` copies |src| into |dest|. This function assumes
+ * that |dest| has enough buffer to store the deep copy of
+ * :member:`src->local <ngtcp2_path.local>` and :member:`src->remote
+ * <ngtcp2_path.remote>`.
+ */
+NGTCP2_EXTERN void ngtcp2_path_copy(ngtcp2_path *dest, const ngtcp2_path *src);
+
+/**
+ * @function
+ *
+ * `ngtcp2_path_eq` returns nonzero if |a| and |b| shares the same
+ * local and remote addresses.
+ */
+NGTCP2_EXTERN int ngtcp2_path_eq(const ngtcp2_path *a, const ngtcp2_path *b);
+
+/**
+ * @function
+ *
+ * `ngtcp2_is_supported_version` returns nonzero if the library supports
+ * QUIC version |version|.
+ */
+NGTCP2_EXTERN int ngtcp2_is_supported_version(uint32_t version);
+
+/*
+ * @function
+ *
+ * `ngtcp2_is_reserved_version` returns nonzero if |version| is a
+ * reserved version.
+ */
+NGTCP2_EXTERN int ngtcp2_is_reserved_version(uint32_t version);
+
+/**
+ * @function
+ *
+ * `ngtcp2_select_version` selects and returns a version from the
+ * version set |offered_versions| of |offered_versionslen| elements.
+ * |preferred_versions| of |preferred_versionslen| elements specifies
+ * the preference of versions, which is sorted in the order of
+ * preference. All versions included in |preferred_versions| must be
+ * supported by the library, that is, passing a version to
+ * `ngtcp2_is_supported_version` must return nonzero. This function
+ * is intended to be used by client when it receives Version
+ * Negotiation packet. If no version is selected, this function
+ * returns 0.
+ */
+NGTCP2_EXTERN uint32_t ngtcp2_select_version(const uint32_t *preferred_versions,
+ size_t preferred_versionslen,
+ const uint32_t *offered_versions,
+ size_t offered_versionslen);
+
+/*
+ * Versioned function wrappers
+ */
+
+/*
+ * `ngtcp2_conn_read_pkt` is a wrapper around
+ * `ngtcp2_conn_read_pkt_versioned` to set the correct struct version.
+ */
+#define ngtcp2_conn_read_pkt(CONN, PATH, PI, PKT, PKTLEN, TS) \
+ ngtcp2_conn_read_pkt_versioned((CONN), (PATH), NGTCP2_PKT_INFO_VERSION, \
+ (PI), (PKT), (PKTLEN), (TS))
+
+/*
+ * `ngtcp2_conn_write_pkt` is a wrapper around
+ * `ngtcp2_conn_write_pkt_versioned` to set the correct struct
+ * version.
+ */
+#define ngtcp2_conn_write_pkt(CONN, PATH, PI, DEST, DESTLEN, TS) \
+ ngtcp2_conn_write_pkt_versioned((CONN), (PATH), NGTCP2_PKT_INFO_VERSION, \
+ (PI), (DEST), (DESTLEN), (TS))
+
+/*
+ * `ngtcp2_conn_write_stream` is a wrapper around
+ * `ngtcp2_conn_write_stream_versioned` to set the correct struct
+ * version.
+ */
+#define ngtcp2_conn_write_stream(CONN, PATH, PI, DEST, DESTLEN, PDATALEN, \
+ FLAGS, STREAM_ID, DATA, DATALEN, TS) \
+ ngtcp2_conn_write_stream_versioned( \
+ (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \
+ (PDATALEN), (FLAGS), (STREAM_ID), (DATA), (DATALEN), (TS))
+
+/*
+ * `ngtcp2_conn_writev_stream` is a wrapper around
+ * `ngtcp2_conn_writev_stream_versioned` to set the correct struct
+ * version.
+ */
+#define ngtcp2_conn_writev_stream(CONN, PATH, PI, DEST, DESTLEN, PDATALEN, \
+ FLAGS, STREAM_ID, DATAV, DATAVCNT, TS) \
+ ngtcp2_conn_writev_stream_versioned( \
+ (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \
+ (PDATALEN), (FLAGS), (STREAM_ID), (DATAV), (DATAVCNT), (TS))
+
+/*
+ * `ngtcp2_conn_writev_datagram` is a wrapper around
+ * `ngtcp2_conn_writev_datagram_versioned` to set the correct struct
+ * version.
+ */
+#define ngtcp2_conn_writev_datagram(CONN, PATH, PI, DEST, DESTLEN, PACCEPTED, \
+ FLAGS, DGRAM_ID, DATAV, DATAVCNT, TS) \
+ ngtcp2_conn_writev_datagram_versioned( \
+ (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \
+ (PACCEPTED), (FLAGS), (DGRAM_ID), (DATAV), (DATAVCNT), (TS))
+
+/*
+ * `ngtcp2_conn_write_connection_close` is a wrapper around
+ * `ngtcp2_conn_write_connection_close_versioned` to set the correct
+ * struct version.
+ */
+#define ngtcp2_conn_write_connection_close(CONN, PATH, PI, DEST, DESTLEN, \
+ CCERR, TS) \
+ ngtcp2_conn_write_connection_close_versioned( \
+ (CONN), (PATH), NGTCP2_PKT_INFO_VERSION, (PI), (DEST), (DESTLEN), \
+ (CCERR), (TS))
+
+/*
+ * `ngtcp2_encode_transport_params` is a wrapper around
+ * `ngtcp2_encode_transport_params_versioned` to set the correct
+ * struct version.
+ */
+#define ngtcp2_encode_transport_params(DEST, DESTLEN, EXTTYPE, PARAMS) \
+ ngtcp2_encode_transport_params_versioned( \
+ (DEST), (DESTLEN), (EXTTYPE), NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS))
+
+/*
+ * `ngtcp2_decode_transport_params` is a wrapper around
+ * `ngtcp2_decode_transport_params_versioned` to set the correct
+ * struct version.
+ */
+#define ngtcp2_decode_transport_params(PARAMS, EXTTYPE, DATA, DATALEN) \
+ ngtcp2_decode_transport_params_versioned( \
+ NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS), (EXTTYPE), (DATA), (DATALEN))
+
+/*
+ * `ngtcp2_conn_client_new` is a wrapper around
+ * `ngtcp2_conn_client_new_versioned` to set the correct struct
+ * version.
+ */
+#define ngtcp2_conn_client_new(PCONN, DCID, SCID, PATH, VERSION, CALLBACKS, \
+ SETTINGS, PARAMS, MEM, USER_DATA) \
+ ngtcp2_conn_client_new_versioned( \
+ (PCONN), (DCID), (SCID), (PATH), (VERSION), NGTCP2_CALLBACKS_VERSION, \
+ (CALLBACKS), NGTCP2_SETTINGS_VERSION, (SETTINGS), \
+ NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS), (MEM), (USER_DATA))
+
+/*
+ * `ngtcp2_conn_server_new` is a wrapper around
+ * `ngtcp2_conn_server_new_versioned` to set the correct struct
+ * version.
+ */
+#define ngtcp2_conn_server_new(PCONN, DCID, SCID, PATH, VERSION, CALLBACKS, \
+ SETTINGS, PARAMS, MEM, USER_DATA) \
+ ngtcp2_conn_server_new_versioned( \
+ (PCONN), (DCID), (SCID), (PATH), (VERSION), NGTCP2_CALLBACKS_VERSION, \
+ (CALLBACKS), NGTCP2_SETTINGS_VERSION, (SETTINGS), \
+ NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS), (MEM), (USER_DATA))
+
+/*
+ * `ngtcp2_conn_set_early_remote_transport_params` is a wrapper around
+ * `ngtcp2_conn_set_early_remote_transport_params_versioned` to set
+ * the correct struct version.
+ */
+#define ngtcp2_conn_set_early_remote_transport_params(CONN, PARAMS) \
+ ngtcp2_conn_set_early_remote_transport_params_versioned( \
+ (CONN), NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS))
+
+/*
+ * `ngtcp2_conn_set_local_transport_params` is a wrapper around
+ * `ngtcp2_conn_set_local_transport_params_versioned` to set the
+ * correct struct version.
+ */
+#define ngtcp2_conn_set_local_transport_params(CONN, PARAMS) \
+ ngtcp2_conn_set_local_transport_params_versioned( \
+ (CONN), NGTCP2_TRANSPORT_PARAMS_VERSION, (PARAMS))
+
+/*
+ * `ngtcp2_transport_params_default` is a wrapper around
+ * `ngtcp2_transport_params_default_versioned` to set the correct
+ * struct version.
+ */
+#define ngtcp2_transport_params_default(PARAMS) \
+ ngtcp2_transport_params_default_versioned(NGTCP2_TRANSPORT_PARAMS_VERSION, \
+ (PARAMS))
+
+/*
+ * `ngtcp2_conn_get_conn_stat` is a wrapper around
+ * `ngtcp2_conn_get_conn_stat_versioned` to set the correct struct
+ * version.
+ */
+#define ngtcp2_conn_get_conn_stat(CONN, CSTAT) \
+ ngtcp2_conn_get_conn_stat_versioned((CONN), NGTCP2_CONN_STAT_VERSION, (CSTAT))
+
+/*
+ * `ngtcp2_settings_default` is a wrapper around
+ * `ngtcp2_settings_default_versioned` to set the correct struct
+ * version.
+ */
+#define ngtcp2_settings_default(SETTINGS) \
+ ngtcp2_settings_default_versioned(NGTCP2_SETTINGS_VERSION, (SETTINGS))
+
+#ifdef _MSC_VER
+# pragma warning(pop)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* NGTCP2_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/ngtcp2_crypto.h b/src/contrib/libngtcp2/ngtcp2/ngtcp2_crypto.h
new file mode 100644
index 0000000..4736b51
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/ngtcp2_crypto.h
@@ -0,0 +1,893 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2019 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_CRYPTO_H
+#define NGTCP2_CRYPTO_H
+
+#include <ngtcp2/ngtcp2.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef WIN32
+# ifndef WIN32_LEAN_AND_MEAN
+# define WIN32_LEAN_AND_MEAN
+# endif
+# include <ws2tcpip.h>
+#endif /* WIN32 */
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_CRYPTO_INITIAL_SECRETLEN` is the length of secret
+ * for Initial packets.
+ */
+#define NGTCP2_CRYPTO_INITIAL_SECRETLEN 32
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_CRYPTO_INITIAL_KEYLEN` is the length of key for
+ * Initial packets.
+ */
+#define NGTCP2_CRYPTO_INITIAL_KEYLEN 16
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_CRYPTO_INITIAL_IVLEN` is the length of IV for
+ * Initial packets.
+ */
+#define NGTCP2_CRYPTO_INITIAL_IVLEN 12
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_ctx_tls` initializes |ctx| by extracting negotiated
+ * ciphers and message digests from native TLS session
+ * |tls_native_handle|. This is used for encrypting/decrypting
+ * Handshake and Short header packets.
+ *
+ * If libngtcp2_crypto_openssl is linked, |tls_native_handle| must be
+ * a pointer to SSL object.
+ */
+NGTCP2_EXTERN ngtcp2_crypto_ctx *ngtcp2_crypto_ctx_tls(ngtcp2_crypto_ctx *ctx,
+ void *tls_native_handle);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_ctx_tls_early` initializes |ctx| by extracting early
+ * ciphers and message digests from native TLS session
+ * |tls_native_handle|. This is used for encrypting/decrypting 0RTT
+ * packets.
+ *
+ * If libngtcp2_crypto_openssl is linked, |tls_native_handle| must be
+ * a pointer to SSL object.
+ */
+NGTCP2_EXTERN ngtcp2_crypto_ctx *
+ngtcp2_crypto_ctx_tls_early(ngtcp2_crypto_ctx *ctx, void *tls_native_handle);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_md_init` initializes |md| with the provided
+ * |md_native_handle| which is an underlying message digest object.
+ *
+ * If libngtcp2_crypto_openssl is linked, |md_native_handle| must be a
+ * pointer to EVP_MD.
+ *
+ * If libngtcp2_crypto_gnutls is linked, |md_native_handle| must be
+ * gnutls_mac_algorithm_t casted to ``void *``.
+ *
+ * If libngtcp2_crypto_boringssl is linked, |md_native_handle| must be
+ * a pointer to EVP_MD.
+ */
+NGTCP2_EXTERN ngtcp2_crypto_md *ngtcp2_crypto_md_init(ngtcp2_crypto_md *md,
+ void *md_native_handle);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_md_hashlen` returns the length of |md| output.
+ */
+NGTCP2_EXTERN size_t ngtcp2_crypto_md_hashlen(const ngtcp2_crypto_md *md);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_aead_keylen` returns the length of key for |aead|.
+ */
+NGTCP2_EXTERN size_t ngtcp2_crypto_aead_keylen(const ngtcp2_crypto_aead *aead);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_aead_noncelen` returns the length of nonce for
+ * |aead|.
+ */
+NGTCP2_EXTERN size_t
+ngtcp2_crypto_aead_noncelen(const ngtcp2_crypto_aead *aead);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_hkdf_extract` performs HKDF extract operation. The
+ * result is the length of |md| and is stored to the buffer pointed by
+ * |dest|. The caller is responsible to specify the buffer that can
+ * store the output.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int
+ngtcp2_crypto_hkdf_extract(uint8_t *dest, const ngtcp2_crypto_md *md,
+ const uint8_t *secret, size_t secretlen,
+ const uint8_t *salt, size_t saltlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_hkdf_expand` performs HKDF expand operation. The
+ * result is |destlen| bytes long and is stored to the buffer pointed
+ * by |dest|.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_hkdf_expand(uint8_t *dest, size_t destlen,
+ const ngtcp2_crypto_md *md,
+ const uint8_t *secret,
+ size_t secretlen,
+ const uint8_t *info,
+ size_t infolen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_hkdf` performs HKDF operation. The result is
+ * |destlen| bytes long and is stored to the buffer pointed by |dest|.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_hkdf(uint8_t *dest, size_t destlen,
+ const ngtcp2_crypto_md *md,
+ const uint8_t *secret, size_t secretlen,
+ const uint8_t *salt, size_t saltlen,
+ const uint8_t *info, size_t infolen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_hkdf_expand_label` performs HKDF expand label. The
+ * result is |destlen| bytes long and is stored to the buffer pointed
+ * by |dest|.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_hkdf_expand_label(uint8_t *dest, size_t destlen,
+ const ngtcp2_crypto_md *md,
+ const uint8_t *secret,
+ size_t secretlen,
+ const uint8_t *label,
+ size_t labellen);
+
+/**
+ * @enum
+ *
+ * :type:`ngtcp2_crypto_side` indicates which side the application
+ * implements; client or server.
+ */
+typedef enum ngtcp2_crypto_side {
+ /**
+ * :enum:`NGTCP2_CRYPTO_SIDE_CLIENT` indicates that the application
+ * is client.
+ */
+ NGTCP2_CRYPTO_SIDE_CLIENT,
+ /**
+ * :enum:`NGTCP2_CRYPTO_SIDE_SERVER` indicates that the application
+ * is server.
+ */
+ NGTCP2_CRYPTO_SIDE_SERVER
+} ngtcp2_crypto_side;
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_packet_protection_ivlen` returns the length of IV
+ * used to encrypt QUIC packet.
+ */
+NGTCP2_EXTERN size_t
+ngtcp2_crypto_packet_protection_ivlen(const ngtcp2_crypto_aead *aead);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_encrypt` encrypts |plaintext| of length
+ * |plaintextlen| and writes the ciphertext into the buffer pointed by
+ * |dest|. The length of ciphertext is plaintextlen +
+ * :member:`aead->max_overhead <ngtcp2_crypto_aead.max_overhead>`
+ * bytes long. |dest| must have enough capacity to store the
+ * ciphertext. It is allowed to specify the same value to |dest| and
+ * |plaintext|.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_encrypt(uint8_t *dest,
+ const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *plaintext,
+ size_t plaintextlen,
+ const uint8_t *nonce, size_t noncelen,
+ const uint8_t *aad, size_t aadlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_encrypt_cb` is a wrapper function around
+ * `ngtcp2_crypto_encrypt`. It can be directly passed to
+ * :member:`ngtcp2_callbacks.encrypt` field.
+ *
+ * This function returns 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`.
+ */
+NGTCP2_EXTERN int
+ngtcp2_crypto_encrypt_cb(uint8_t *dest, const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *plaintext, size_t plaintextlen,
+ const uint8_t *nonce, size_t noncelen,
+ const uint8_t *aad, size_t aadlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_decrypt` decrypts |ciphertext| of length
+ * |ciphertextlen| and writes the plaintext into the buffer pointed by
+ * |dest|. The length of plaintext is ciphertextlen -
+ * :member:`aead->max_overhead <ngtcp2_crypto_aead.max_overhead>`
+ * bytes long. |dest| must have enough capacity to store the
+ * plaintext. It is allowed to specify the same value to |dest| and
+ * |ciphertext|.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_decrypt(uint8_t *dest,
+ const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *ciphertext,
+ size_t ciphertextlen,
+ const uint8_t *nonce, size_t noncelen,
+ const uint8_t *aad, size_t aadlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_decrypt_cb` is a wrapper function around
+ * `ngtcp2_crypto_decrypt`. It can be directly passed to
+ * :member:`ngtcp2_callbacks.decrypt` field.
+ *
+ * This function returns 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_TLS_DECRYPT`.
+ */
+NGTCP2_EXTERN int
+ngtcp2_crypto_decrypt_cb(uint8_t *dest, const ngtcp2_crypto_aead *aead,
+ const ngtcp2_crypto_aead_ctx *aead_ctx,
+ const uint8_t *ciphertext, size_t ciphertextlen,
+ const uint8_t *nonce, size_t noncelen,
+ const uint8_t *aad, size_t aadlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_hp_mask` generates mask which is used in packet
+ * header encryption. The mask is written to the buffer pointed by
+ * |dest|. The sample is passed as |sample| which is
+ * :macro:`NGTCP2_HP_SAMPLELEN` bytes long. The length of mask must
+ * be at least :macro:`NGTCP2_HP_MASKLEN`. The library only uses the
+ * first :macro:`NGTCP2_HP_MASKLEN` bytes of the produced mask. The
+ * buffer pointed by |dest| must have at least
+ * :macro:`NGTCP2_HP_SAMPLELEN` bytes available.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_hp_mask(uint8_t *dest,
+ const ngtcp2_crypto_cipher *hp,
+ const ngtcp2_crypto_cipher_ctx *hp_ctx,
+ const uint8_t *sample);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_hp_mask_cb` is a wrapper function around
+ * `ngtcp2_crypto_hp_mask`. It can be directly passed to
+ * :member:`ngtcp2_callbacks.hp_mask` field.
+ *
+ * This function returns 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`.
+ */
+NGTCP2_EXTERN int
+ngtcp2_crypto_hp_mask_cb(uint8_t *dest, const ngtcp2_crypto_cipher *hp,
+ const ngtcp2_crypto_cipher_ctx *hp_ctx,
+ const uint8_t *sample);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_derive_and_install_rx_key` derives the rx keys from
+ * |secret| and installs new keys to |conn|.
+ *
+ * If |key| is not NULL, the derived packet protection key for
+ * decryption is written to the buffer pointed by |key|. If |iv| is
+ * not NULL, the derived packet protection IV for decryption is
+ * written to the buffer pointed by |iv|. If |hp| is not NULL, the
+ * derived header protection key for decryption is written to the
+ * buffer pointed by |hp|.
+ *
+ * |secretlen| specifies the length of |secret|.
+ *
+ * The length of packet protection key and header protection key is
+ * `ngtcp2_crypto_aead_keylen(ctx->aead) <ngtcp2_crypto_aead_keylen>`,
+ * and the length of packet protection IV is
+ * `ngtcp2_crypto_packet_protection_ivlen(ctx->aead)
+ * <ngtcp2_crypto_packet_protection_ivlen>` where ctx is obtained by
+ * `ngtcp2_crypto_ctx_tls` (or `ngtcp2_crypto_ctx_tls_early` if
+ * |level| == :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_EARLY`).
+ *
+ * In the first call of this function, it calls
+ * `ngtcp2_conn_set_crypto_ctx` (or `ngtcp2_conn_set_early_crypto_ctx`
+ * if |level| ==
+ * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_EARLY`) to set
+ * negotiated AEAD and message digest algorithm. After the successful
+ * call of this function, application can use
+ * `ngtcp2_conn_get_crypto_ctx` (or `ngtcp2_conn_get_early_crypto_ctx`
+ * if |level| ==
+ * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_EARLY`) to get
+ * :type:`ngtcp2_crypto_ctx`.
+ *
+ * If |conn| is initialized as client, and |level| is
+ * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_APPLICATION`, this
+ * function retrieves a remote QUIC transport parameters extension
+ * from an object obtained by `ngtcp2_conn_get_tls_native_handle` and
+ * sets it to |conn| by calling
+ * `ngtcp2_conn_decode_remote_transport_params`.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_derive_and_install_rx_key(
+ ngtcp2_conn *conn, uint8_t *key, uint8_t *iv, uint8_t *hp,
+ ngtcp2_crypto_level level, const uint8_t *secret, size_t secretlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_derive_and_install_tx_key` derives the tx keys from
+ * |secret| and installs new keys to |conn|.
+ *
+ * If |key| is not NULL, the derived packet protection key for
+ * encryption is written to the buffer pointed by |key|. If |iv| is
+ * not NULL, the derived packet protection IV for encryption is
+ * written to the buffer pointed by |iv|. If |hp| is not NULL, the
+ * derived header protection key for encryption is written to the
+ * buffer pointed by |hp|.
+ *
+ * |secretlen| specifies the length of |secret|.
+ *
+ * The length of packet protection key and header protection key is
+ * `ngtcp2_crypto_aead_keylen(ctx->aead) <ngtcp2_crypto_aead_keylen>`,
+ * and the length of packet protection IV is
+ * `ngtcp2_crypto_packet_protection_ivlen(ctx->aead)
+ * <ngtcp2_crypto_packet_protection_ivlen>` where ctx is obtained by
+ * `ngtcp2_crypto_ctx_tls` (or `ngtcp2_crypto_ctx_tls_early` if
+ * |level| == :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_EARLY`).
+ *
+ * In the first call of this function, it calls
+ * `ngtcp2_conn_set_crypto_ctx` (or `ngtcp2_conn_set_early_crypto_ctx`
+ * if |level| ==
+ * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_EARLY`) to set
+ * negotiated AEAD and message digest algorithm. After the successful
+ * call of this function, application can use
+ * `ngtcp2_conn_get_crypto_ctx` (or `ngtcp2_conn_get_early_crypto_ctx`
+ * if |level| ==
+ * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_EARLY`) to get
+ * :type:`ngtcp2_crypto_ctx`.
+ *
+ * If |conn| is initialized as server, and |level| is
+ * :enum:`ngtcp2_crypto_level.NGTCP2_CRYPTO_LEVEL_APPLICATION`, this
+ * function retrieves a remote QUIC transport parameters extension
+ * from an object obtained by `ngtcp2_conn_get_tls_native_handle` and
+ * sets it to |conn| by calling
+ * `ngtcp2_conn_decode_remote_transport_params`.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_derive_and_install_tx_key(
+ ngtcp2_conn *conn, uint8_t *key, uint8_t *iv, uint8_t *hp,
+ ngtcp2_crypto_level level, const uint8_t *secret, size_t secretlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_update_key` updates traffic keying materials.
+ *
+ * The new traffic secret for decryption is written to the buffer
+ * pointed by |rx_secret|. The length of secret is |secretlen| bytes,
+ * and |rx_secret| must point to the buffer which has enough capacity.
+ *
+ * The new traffic secret for encryption is written to the buffer
+ * pointed by |tx_secret|. The length of secret is |secretlen| bytes,
+ * and |tx_secret| must point to the buffer which has enough capacity.
+ *
+ * The derived packet protection key for decryption is written to the
+ * buffer pointed by |rx_key|. The derived packet protection IV for
+ * decryption is written to the buffer pointed by |rx_iv|.
+ * |rx_aead_ctx| must be constructed with |rx_key|.
+ *
+ * The derived packet protection key for encryption is written to the
+ * buffer pointed by |tx_key|. The derived packet protection IV for
+ * encryption is written to the buffer pointed by |tx_iv|.
+ * |tx_aead_ctx| must be constructed with |rx_key|.
+ *
+ * |current_rx_secret| and |current_tx_secret| are the current traffic
+ * secrets for decryption and encryption. |secretlen| specifies the
+ * length of |rx_secret| and |tx_secret|.
+ *
+ * The length of packet protection key and header protection key is
+ * `ngtcp2_crypto_aead_keylen(ctx->aead) <ngtcp2_crypto_aead_keylen>`,
+ * and the length of packet protection IV is
+ * `ngtcp2_crypto_packet_protection_ivlen(ctx->aead)
+ * <ngtcp2_crypto_packet_protection_ivlen>` where ctx is obtained by
+ * `ngtcp2_crypto_ctx_tls`.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_update_key(
+ ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret,
+ ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_key, uint8_t *rx_iv,
+ ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_key, uint8_t *tx_iv,
+ const uint8_t *current_rx_secret, const uint8_t *current_tx_secret,
+ size_t secretlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_update_key_cb` is a wrapper function around
+ * `ngtcp2_crypto_update_key`. It can be directly passed to
+ * :member:`ngtcp2_callbacks.update_key` field.
+ *
+ * This function returns 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_update_key_cb(
+ ngtcp2_conn *conn, uint8_t *rx_secret, uint8_t *tx_secret,
+ ngtcp2_crypto_aead_ctx *rx_aead_ctx, uint8_t *rx_iv,
+ ngtcp2_crypto_aead_ctx *tx_aead_ctx, uint8_t *tx_iv,
+ const uint8_t *current_rx_secret, const uint8_t *current_tx_secret,
+ size_t secretlen, void *user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_client_initial_cb` installs initial secrets and
+ * encryption keys and sets QUIC transport parameters.
+ *
+ * This function can be directly passed to
+ * :member:`ngtcp2_callbacks.client_initial` field. It is only used
+ * by client.
+ *
+ * This function returns 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_client_initial_cb(ngtcp2_conn *conn,
+ void *user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_recv_retry_cb` re-installs initial secrets in
+ * response to incoming Retry packet.
+ *
+ * This function can be directly passed to
+ * :member:`ngtcp2_callbacks.recv_retry` field. It is only used
+ * by client.
+ *
+ * This function returns 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_recv_retry_cb(ngtcp2_conn *conn,
+ const ngtcp2_pkt_hd *hd,
+ void *user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_recv_client_initial_cb` installs initial secrets in
+ * response to an incoming Initial packet from client, and sets QUIC
+ * transport parameters.
+ *
+ * This function can be directly passed to
+ * :member:`ngtcp2_callbacks.recv_client_initial` field. It is
+ * only used by server.
+ *
+ * This function returns 0 if it succeeds, or
+ * :macro:`NGTCP2_ERR_CALLBACK_FAILURE`.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_recv_client_initial_cb(ngtcp2_conn *conn,
+ const ngtcp2_cid *dcid,
+ void *user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_read_write_crypto_data` reads CRYPTO data |data| of
+ * length |datalen| in encryption level |crypto_level| and may feed
+ * outgoing CRYPTO data to |conn|. This function can drive handshake.
+ * This function can be also used after handshake completes. It is
+ * allowed to call this function with |datalen| == 0. In this case,
+ * no additional read operation is done.
+ *
+ * This function returns 0 if it succeeds, or a negative error code.
+ * The generic error code is -1 if a specific error code is not
+ * suitable. The error codes less than -10000 are specific to
+ * underlying TLS implementation. For OpenSSL, the error codes are
+ * defined in *ngtcp2_crypto_openssl.h*.
+ */
+NGTCP2_EXTERN int
+ngtcp2_crypto_read_write_crypto_data(ngtcp2_conn *conn,
+ ngtcp2_crypto_level crypto_level,
+ const uint8_t *data, size_t datalen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_recv_crypto_data_cb` is a wrapper function around
+ * `ngtcp2_crypto_read_write_crypto_data`. It can be directly passed
+ * to :member:`ngtcp2_callbacks.recv_crypto_data` field.
+ *
+ * If this function is used, the TLS implementation specific error
+ * codes described in `ngtcp2_crypto_read_write_crypto_data` are
+ * treated as if it returns -1. Do not use this function if an
+ * application wishes to use the TLS implementation specific error
+ * codes.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_recv_crypto_data_cb(
+ ngtcp2_conn *conn, ngtcp2_crypto_level crypto_level, uint64_t offset,
+ const uint8_t *data, size_t datalen, void *user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_generate_stateless_reset_token` generates a
+ * stateless reset token using HKDF extraction using the given |cid|
+ * and static key |secret| as input. The token will be written to
+ * the buffer pointed by |token| and it must have a capacity of at
+ * least :macro:`NGTCP2_STATELESS_RESET_TOKENLEN` bytes.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_generate_stateless_reset_token(
+ uint8_t *token, const uint8_t *secret, size_t secretlen,
+ const ngtcp2_cid *cid);
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_CRYPTO_TOKEN_RAND_DATALEN` is the length of random
+ * data added to a token generated by
+ * `ngtcp2_crypto_generate_retry_token` or
+ * `ngtcp2_crypto_generate_regular_token`.
+ */
+#define NGTCP2_CRYPTO_TOKEN_RAND_DATALEN 32
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY` is the magic byte for
+ * Retry token generated by `ngtcp2_crypto_generate_retry_token`.
+ */
+#define NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY 0xb6
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR` is the magic byte for a
+ * token generated by `ngtcp2_crypto_generate_regular_token`.
+ */
+#define NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR 0x36
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN` is the maximum length of
+ * a token generated by `ngtcp2_crypto_generate_retry_token`.
+ */
+#define NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN \
+ (/* magic = */ 1 + /* cid len = */ 1 + NGTCP2_MAX_CIDLEN + \
+ sizeof(ngtcp2_tstamp) + /* aead tag = */ 16 + \
+ NGTCP2_CRYPTO_TOKEN_RAND_DATALEN)
+
+/**
+ * @macro
+ *
+ * :macro:`NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN` is the maximum length
+ * of a token generated by `ngtcp2_crypto_generate_regular_token`.
+ */
+#define NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN \
+ (/* magic = */ 1 + sizeof(ngtcp2_tstamp) + /* aead tag = */ 16 + \
+ NGTCP2_CRYPTO_TOKEN_RAND_DATALEN)
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_generate_retry_token` generates a token in the
+ * buffer pointed by |token| that is sent with Retry packet. The
+ * buffer pointed by |token| must have at least
+ * :macro:`NGTCP2_CRYPTO_MAX_RETRY_TOKENLEN` bytes long. The
+ * successfully generated token starts with
+ * :macro:`NGTCP2_CRYPTO_TOKEN_MAGIC_RETRY`. |secret| of length
+ * |secretlen| is an initial keying material to generate keys to
+ * encrypt the token. |version| is QUIC version. |remote_addr| of
+ * length |remote_addrlen| is an address of client. |retry_scid| is a
+ * Source Connection ID chosen by server and set in Retry packet.
+ * |odcid| is a Destination Connection ID in Initial packet sent by
+ * client. |ts| is the timestamp when the token is generated.
+ *
+ * This function returns the length of generated token if it succeeds,
+ * or -1.
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_generate_retry_token(
+ uint8_t *token, const uint8_t *secret, size_t secretlen, uint32_t version,
+ const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen,
+ const ngtcp2_cid *retry_scid, const ngtcp2_cid *odcid, ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_verify_retry_token` verifies Retry token stored in
+ * the buffer pointed by |token| of length |tokenlen|. |secret| of
+ * length |secretlen| is an initial keying material to generate keys
+ * to decrypt the token. |version| is QUIC version of the Initial
+ * packet that contains this token. |remote_addr| of length
+ * |remote_addrlen| is an address of client. |dcid| is a Destination
+ * Connection ID in Initial packet sent by client. |timeout| is the
+ * period during which the token is valid. |ts| is the current
+ * timestamp. When validation succeeds, the extracted Destination
+ * Connection ID (which is the Destination Connection ID in Initial
+ * packet sent by client that triggered Retry packet) is stored to the
+ * buffer pointed by |odcid|.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_verify_retry_token(
+ ngtcp2_cid *odcid, const uint8_t *token, size_t tokenlen,
+ const uint8_t *secret, size_t secretlen, uint32_t version,
+ const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen,
+ const ngtcp2_cid *dcid, ngtcp2_duration timeout, ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_generate_regular_token` generates a token in the
+ * buffer pointed by |token| that is sent with NEW_TOKEN frame. The
+ * buffer pointed by |token| must have at least
+ * :macro:`NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN` bytes long. The
+ * successfully generated token starts with
+ * :macro:`NGTCP2_CRYPTO_TOKEN_MAGIC_REGULAR`. |secret| of length
+ * |secretlen| is an initial keying material to generate keys to
+ * encrypt the token. |remote_addr| of length |remote_addrlen| is an
+ * address of client. |ts| is the timestamp when the token is
+ * generated.
+ *
+ * This function returns the length of generated token if it succeeds,
+ * or -1.
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_generate_regular_token(
+ uint8_t *token, const uint8_t *secret, size_t secretlen,
+ const ngtcp2_sockaddr *remote_addr, ngtcp2_socklen remote_addrlen,
+ ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_verify_regular_token` verifies a regular token
+ * stored in the buffer pointed by |token| of length |tokenlen|.
+ * |secret| of length |secretlen| is an initial keying material to
+ * generate keys to decrypt the token. |remote_addr| of length
+ * |remote_addrlen| is an address of client. |timeout| is the period
+ * during which the token is valid. |ts| is the current timestamp.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_verify_regular_token(
+ const uint8_t *token, size_t tokenlen, const uint8_t *secret,
+ size_t secretlen, const ngtcp2_sockaddr *remote_addr,
+ ngtcp2_socklen remote_addrlen, ngtcp2_duration timeout, ngtcp2_tstamp ts);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_write_connection_close` writes Initial packet
+ * containing CONNECTION_CLOSE with the given |error_code| and the
+ * optional |reason| of length |reasonlen| to the buffer pointed by
+ * |dest| of length |destlen|. This function is designed for server
+ * to close connection without committing the state when validating
+ * Retry token fails. This function must not be used by client. The
+ * |dcid| must be the Source Connection ID in Initial packet from
+ * client. The |scid| must be the Destination Connection ID in
+ * Initial packet from client. |scid| is used to derive initial
+ * keying materials.
+ *
+ * This function wraps around `ngtcp2_pkt_write_connection_close` for
+ * easier use.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_write_connection_close(
+ uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid,
+ const ngtcp2_cid *scid, uint64_t error_code, const uint8_t *reason,
+ size_t reasonlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_write_retry` writes Retry packet to the buffer
+ * pointed by |dest| of length |destlen|. |odcid| specifies Original
+ * Destination Connection ID. |token| specifies Retry Token, and
+ * |tokenlen| specifies its length.
+ *
+ * This function wraps around `ngtcp2_pkt_write_retry` for easier use.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN ngtcp2_ssize ngtcp2_crypto_write_retry(
+ uint8_t *dest, size_t destlen, uint32_t version, const ngtcp2_cid *dcid,
+ const ngtcp2_cid *scid, const ngtcp2_cid *odcid, const uint8_t *token,
+ size_t tokenlen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_aead_ctx_encrypt_init` initializes |aead_ctx| with
+ * new AEAD cipher context object for encryption which is constructed
+ * to use |key| as encryption key. |aead| specifies AEAD cipher to
+ * use. |noncelen| is the length of nonce.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int
+ngtcp2_crypto_aead_ctx_encrypt_init(ngtcp2_crypto_aead_ctx *aead_ctx,
+ const ngtcp2_crypto_aead *aead,
+ const uint8_t *key, size_t noncelen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_aead_ctx_decrypt_init` initializes |aead_ctx| with
+ * new AEAD cipher context object for decryption which is constructed
+ * to use |key| as encryption key. |aead| specifies AEAD cipher to
+ * use. |noncelen| is the length of nonce.
+ *
+ * This function returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int
+ngtcp2_crypto_aead_ctx_decrypt_init(ngtcp2_crypto_aead_ctx *aead_ctx,
+ const ngtcp2_crypto_aead *aead,
+ const uint8_t *key, size_t noncelen);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_aead_ctx_free` frees up resources used by
+ * |aead_ctx|. This function does not free the memory pointed by
+ * |aead_ctx| itself.
+ */
+NGTCP2_EXTERN void
+ngtcp2_crypto_aead_ctx_free(ngtcp2_crypto_aead_ctx *aead_ctx);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_delete_crypto_aead_ctx_cb` deletes the given |aead_ctx|.
+ *
+ * This function can be directly passed to
+ * :member:`ngtcp2_callbacks.delete_crypto_aead_ctx` field.
+ */
+NGTCP2_EXTERN void ngtcp2_crypto_delete_crypto_aead_ctx_cb(
+ ngtcp2_conn *conn, ngtcp2_crypto_aead_ctx *aead_ctx, void *user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_delete_crypto_cipher_ctx_cb` deletes the given
+ * |cipher_ctx|.
+ *
+ * This function can be directly passed to
+ * :member:`ngtcp2_callbacks.delete_crypto_cipher_ctx` field.
+ */
+NGTCP2_EXTERN void ngtcp2_crypto_delete_crypto_cipher_ctx_cb(
+ ngtcp2_conn *conn, ngtcp2_crypto_cipher_ctx *cipher_ctx, void *user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_get_path_challenge_data_cb` writes unpredictable
+ * sequence of :macro:`NGTCP2_PATH_CHALLENGE_DATALEN` bytes to |data|
+ * which is sent with PATH_CHALLENGE frame.
+ *
+ * This function can be directly passed to
+ * :member:`ngtcp2_callbacks.get_path_challenge_data` field.
+ */
+NGTCP2_EXTERN int ngtcp2_crypto_get_path_challenge_data_cb(ngtcp2_conn *conn,
+ uint8_t *data,
+ void *user_data);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_version_negotiation_cb` installs Initial keys for
+ * |version| which is negotiated or being negotiated. |client_dcid|
+ * is the destination connection ID in first Initial packet of client.
+ *
+ * This function can be directly passed to
+ * :member:`ngtcp2_callbacks.version_negotiation` field.
+ */
+NGTCP2_EXTERN int
+ngtcp2_crypto_version_negotiation_cb(ngtcp2_conn *conn, uint32_t version,
+ const ngtcp2_cid *client_dcid,
+ void *user_data);
+
+typedef struct ngtcp2_crypto_conn_ref ngtcp2_crypto_conn_ref;
+
+/**
+ * @functypedef
+ *
+ * :type:`ngtcp2_crypto_get_conn` is a callback function to get a
+ * pointer to :type:`ngtcp2_conn` from |conn_ref|. The implementation
+ * must return non-NULL :type:`ngtcp2_conn` object.
+ */
+typedef ngtcp2_conn *(*ngtcp2_crypto_get_conn)(
+ ngtcp2_crypto_conn_ref *conn_ref);
+
+/**
+ * @struct
+ *
+ * :type:`ngtcp2_crypto_conn_ref` is a structure to get a pointer to
+ * :type:`ngtcp2_conn`. It is meant to be set to TLS native handle as
+ * an application specific data (e.g. SSL_set_app_data in OpenSSL).
+ */
+typedef struct ngtcp2_crypto_conn_ref {
+ /**
+ * :member:`get_conn` is a callback function to get a pointer to
+ * :type:`ngtcp2_conn` object.
+ */
+ ngtcp2_crypto_get_conn get_conn;
+ /**
+ * :member:`user_data` is a pointer to arbitrary user data.
+ */
+ void *user_data;
+} ngtcp2_crypto_conn_ref;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* NGTCP2_CRYPTO_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/ngtcp2_crypto_gnutls.h b/src/contrib/libngtcp2/ngtcp2/ngtcp2_crypto_gnutls.h
new file mode 100644
index 0000000..af5503f
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/ngtcp2_crypto_gnutls.h
@@ -0,0 +1,107 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2020 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef NGTCP2_CRYPTO_GNUTLS_H
+#define NGTCP2_CRYPTO_GNUTLS_H
+
+#include <ngtcp2/ngtcp2.h>
+
+#include <gnutls/gnutls.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_gnutls_from_gnutls_record_encryption_level`
+ * translates |gtls_level| to :type:`ngtcp2_crypto_level`. This
+ * function is only available for GnuTLS backend.
+ */
+NGTCP2_EXTERN ngtcp2_crypto_level
+ngtcp2_crypto_gnutls_from_gnutls_record_encryption_level(
+ gnutls_record_encryption_level_t gtls_level);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_gnutls_from_ngtcp2_crypto_level` translates
+ * |crypto_level| to gnutls_record_encryption_level_t. This function
+ * is only available for GnuTLS backend.
+ */
+NGTCP2_EXTERN gnutls_record_encryption_level_t
+ngtcp2_crypto_gnutls_from_ngtcp2_level(ngtcp2_crypto_level crypto_level);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_gnutls_configure_server_session` configures
+ * |session| for server side QUIC connection. It performs the
+ * following modifications:
+ *
+ * - Set gnutls_handshake_set_secret_function.
+ * - Set gnutls_handshake_set_read_function.
+ * - Set gnutls_alert_set_read_function.
+ * - Register a TLS extension handler for QUIC Transport Parameters.
+ *
+ * Application must set a pointer to :type:`ngtcp2_crypto_conn_ref` to
+ * gnutls_session_t object by calling gnutls_session_set_ptr, and
+ * :type:`ngtcp2_crypto_conn_ref` object must have
+ * :member:`ngtcp2_crypto_conn_ref.get_conn` field assigned to get
+ * :type:`ngtcp2_conn`.
+ *
+ * It returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int
+ngtcp2_crypto_gnutls_configure_server_session(gnutls_session_t session);
+
+/**
+ * @function
+ *
+ * `ngtcp2_crypto_gnutls_configure_client_session` configures
+ * |session| for client side QUIC connection. It performs the
+ * following modifications:
+ *
+ * - Set gnutls_handshake_set_secret_function.
+ * - Set gnutls_handshake_set_read_function.
+ * - Set gnutls_alert_set_read_function.
+ * - Register a TLS extension handler for QUIC Transport Parameters.
+ *
+ * Application must set a pointer to :type:`ngtcp2_crypto_conn_ref` to
+ * gnutls_session_t object by calling gnutls_session_set_ptr, and
+ * :type:`ngtcp2_crypto_conn_ref` object must have
+ * :member:`ngtcp2_crypto_conn_ref.get_conn` field assigned to get
+ * :type:`ngtcp2_conn`.
+ *
+ * It returns 0 if it succeeds, or -1.
+ */
+NGTCP2_EXTERN int
+ngtcp2_crypto_gnutls_configure_client_session(gnutls_session_t session);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* NGTCP2_CRYPTO_GNUTLS_H */
diff --git a/src/contrib/libngtcp2/ngtcp2/version.h b/src/contrib/libngtcp2/ngtcp2/version.h
new file mode 100644
index 0000000..54b2035
--- /dev/null
+++ b/src/contrib/libngtcp2/ngtcp2/version.h
@@ -0,0 +1,51 @@
+/*
+ * ngtcp2
+ *
+ * Copyright (c) 2016 ngtcp2 contributors
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef VERSION_H
+#define VERSION_H
+
+/**
+ * @macrosection
+ *
+ * Library version macros
+ */
+
+/**
+ * @macro
+ *
+ * Version number of the ngtcp2 library release.
+ */
+#define NGTCP2_VERSION "0.13.1"
+
+/**
+ * @macro
+ *
+ * Numerical representation of the version number of the ngtcp2
+ * library release. This is a 24 bit number with 8 bits for major
+ * number, 8 bits for minor and 8 bits for patch. Version 1.2.3
+ * becomes 0x010203.
+ */
+#define NGTCP2_VERSION_NUM 0x000d01
+
+#endif /* VERSION_H */
diff --git a/src/contrib/licenses/0BSD b/src/contrib/licenses/0BSD
new file mode 100644
index 0000000..56c5528
--- /dev/null
+++ b/src/contrib/licenses/0BSD
@@ -0,0 +1,12 @@
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
diff --git a/src/contrib/licenses/BSD-3-Clause b/src/contrib/licenses/BSD-3-Clause
new file mode 100644
index 0000000..8041f21
--- /dev/null
+++ b/src/contrib/licenses/BSD-3-Clause
@@ -0,0 +1,21 @@
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its contributors
+ may be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/contrib/licenses/LGPL-2.0 b/src/contrib/licenses/LGPL-2.0
new file mode 100644
index 0000000..d159169
--- /dev/null
+++ b/src/contrib/licenses/LGPL-2.0
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/src/contrib/licenses/LGPL-2.1 b/src/contrib/licenses/LGPL-2.1
new file mode 100644
index 0000000..27bb434
--- /dev/null
+++ b/src/contrib/licenses/LGPL-2.1
@@ -0,0 +1,503 @@
+Valid-License-Identifier: LGPL-2.1
+Valid-License-Identifier: LGPL-2.1+
+SPDX-URL: https://spdx.org/licenses/LGPL-2.1.html
+Usage-Guide:
+ To use this license in source code, put one of the following SPDX
+ tag/value pairs into a comment according to the placement
+ guidelines in the licensing rules documentation.
+ For 'GNU Lesser General Public License (LGPL) version 2.1 only' use:
+ SPDX-License-Identifier: LGPL-2.1
+ For 'GNU Lesser General Public License (LGPL) version 2.1 or any later
+ version' use:
+ SPDX-License-Identifier: LGPL-2.1+
+License-Text:
+
+GNU LESSER GENERAL PUBLIC LICENSE
+Version 2.1, February 1999
+
+Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+Everyone is permitted to copy and distribute verbatim copies of this
+license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts as
+the successor of the GNU Library Public License, version 2, hence the
+version number 2.1.]
+
+Preamble
+
+The licenses for most software are designed to take away your freedom to
+share and change it. By contrast, the GNU General Public Licenses are
+intended to guarantee your freedom to share and change free software--to
+make sure the software is free for all its users.
+
+This license, the Lesser General Public License, applies to some specially
+designated software packages--typically libraries--of the Free Software
+Foundation and other authors who decide to use it. You can use it too, but
+we suggest you first think carefully about whether this license or the
+ordinary General Public License is the better strategy to use in any
+particular case, based on the explanations below.
+
+When we speak of free software, we are referring to freedom of use, not
+price. Our General Public Licenses are designed to make sure that you have
+the freedom to distribute copies of free software (and charge for this
+service if you wish); that you receive source code or can get it if you
+want it; that you can change the software and use pieces of it in new free
+programs; and that you are informed that you can do these things.
+
+To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for you if
+you distribute copies of the library or if you modify it.
+
+For example, if you distribute copies of the library, whether gratis or for
+a fee, you must give the recipients all the rights that we gave you. You
+must make sure that they, too, receive or can get the source code. If you
+link other code with the library, you must provide complete object files to
+the recipients, so that they can relink them with the library after making
+changes to the library and recompiling it. And you must show them these
+terms so they know their rights.
+
+We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+To protect each distributor, we want to make it very clear that there is no
+warranty for the free library. Also, if the library is modified by someone
+else and passed on, the recipients should know that what they have is not
+the original version, so that the original author's reputation will not be
+affected by problems that might be introduced by others.
+
+Finally, software patents pose a constant threat to the existence of any
+free program. We wish to make sure that a company cannot effectively
+restrict the users of a free program by obtaining a restrictive license
+from a patent holder. Therefore, we insist that any patent license obtained
+for a version of the library must be consistent with the full freedom of
+use specified in this license.
+
+Most GNU software, including some libraries, is covered by the ordinary GNU
+General Public License. This license, the GNU Lesser General Public
+License, applies to certain designated libraries, and is quite different
+from the ordinary General Public License. We use this license for certain
+libraries in order to permit linking those libraries into non-free
+programs.
+
+When a program is linked with a library, whether statically or using a
+shared library, the combination of the two is legally speaking a combined
+work, a derivative of the original library. The ordinary General Public
+License therefore permits such linking only if the entire combination fits
+its criteria of freedom. The Lesser General Public License permits more lax
+criteria for linking other code with the library.
+
+We call this license the "Lesser" General Public License because it does
+Less to protect the user's freedom than the ordinary General Public
+License. It also provides other free software developers Less of an
+advantage over competing non-free programs. These disadvantages are the
+reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+For example, on rare occasions, there may be a special need to encourage
+the widest possible use of a certain library, so that it becomes a de-facto
+standard. To achieve this, non-free programs must be allowed to use the
+library. A more frequent case is that a free library does the same job as
+widely used non-free libraries. In this case, there is little to gain by
+limiting the free library to free software only, so we use the Lesser
+General Public License.
+
+In other cases, permission to use a particular library in non-free programs
+enables a greater number of people to use a large body of free
+software. For example, permission to use the GNU C Library in non-free
+programs enables many more people to use the whole GNU operating system, as
+well as its variant, the GNU/Linux operating system.
+
+Although the Lesser General Public License is Less protective of the users'
+freedom, it does ensure that the user of a program that is linked with the
+Library has the freedom and the wherewithal to run that program using a
+modified version of the Library.
+
+The precise terms and conditions for copying, distribution and modification
+follow. Pay close attention to the difference between a "work based on the
+library" and a "work that uses the library". The former contains code
+derived from the library, whereas the latter must be combined with the
+library in order to run.
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+0. This License Agreement applies to any software library or other program
+ which contains a notice placed by the copyright holder or other
+ authorized party saying it may be distributed under the terms of this
+ Lesser General Public License (also called "this License"). Each
+ licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+ prepared so as to be conveniently linked with application programs
+ (which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work which
+ has been distributed under these terms. A "work based on the Library"
+ means either the Library or any derivative work under copyright law:
+ that is to say, a work containing the Library or a portion of it, either
+ verbatim or with modifications and/or translated straightforwardly into
+ another language. (Hereinafter, translation is included without
+ limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for making
+ modifications to it. For a library, complete source code means all the
+ source code for all modules it contains, plus any associated interface
+ definition files, plus the scripts used to control compilation and
+ installation of the library.
+
+ Activities other than copying, distribution and modification are not
+ covered by this License; they are outside its scope. The act of running
+ a program using the Library is not restricted, and output from such a
+ program is covered only if its contents constitute a work based on the
+ Library (independent of the use of the Library in a tool for writing
+ it). Whether that is true depends on what the Library does and what the
+ program that uses the Library does.
+
+1. You may copy and distribute verbatim copies of the Library's complete
+ source code as you receive it, in any medium, provided that you
+ conspicuously and appropriately publish on each copy an appropriate
+ copyright notice and disclaimer of warranty; keep intact all the notices
+ that refer to this License and to the absence of any warranty; and
+ distribute a copy of this License along with the Library.
+
+ You may charge a fee for the physical act of transferring a copy, and
+ you may at your option offer warranty protection in exchange for a fee.
+
+2. You may modify your copy or copies of the Library or any portion of it,
+ thus forming a work based on the Library, and copy and distribute such
+ modifications or work under the terms of Section 1 above, provided that
+ you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices stating
+ that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no charge to
+ all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a table
+ of data to be supplied by an application program that uses the
+ facility, other than as an argument passed when the facility is
+ invoked, then you must make a good faith effort to ensure that, in
+ the event an application does not supply such function or table, the
+ facility still operates, and performs whatever part of its purpose
+ remains meaningful.
+
+ (For example, a function in a library to compute square roots has a
+ purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must be
+ optional: if the application does not supply it, the square root
+ function must still compute square roots.)
+
+ These requirements apply to the modified work as a whole. If
+ identifiable sections of that work are not derived from the Library, and
+ can be reasonably considered independent and separate works in
+ themselves, then this License, and its terms, do not apply to those
+ sections when you distribute them as separate works. But when you
+ distribute the same sections as part of a whole which is a work based on
+ the Library, the distribution of the whole must be on the terms of this
+ License, whose permissions for other licensees extend to the entire
+ whole, and thus to each and every part regardless of who wrote it.
+
+ Thus, it is not the intent of this section to claim rights or contest
+ your rights to work written entirely by you; rather, the intent is to
+ exercise the right to control the distribution of derivative or
+ collective works based on the Library.
+
+ In addition, mere aggregation of another work not based on the Library
+ with the Library (or with a work based on the Library) on a volume of a
+ storage or distribution medium does not bring the other work under the
+ scope of this License.
+
+3. You may opt to apply the terms of the ordinary GNU General Public
+ License instead of this License to a given copy of the Library. To do
+ this, you must alter all the notices that refer to this License, so that
+ they refer to the ordinary GNU General Public License, version 2,
+ instead of to this License. (If a newer version than version 2 of the
+ ordinary GNU General Public License has appeared, then you can specify
+ that version instead if you wish.) Do not make any other change in these
+ notices.
+
+ Once this change is made in a given copy, it is irreversible for that
+ copy, so the ordinary GNU General Public License applies to all
+ subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of the
+ Library into a program that is not a library.
+
+4. You may copy and distribute the Library (or a portion or derivative of
+ it, under Section 2) in object code or executable form under the terms
+ of Sections 1 and 2 above provided that you accompany it with the
+ complete corresponding machine-readable source code, which must be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy from a
+ designated place, then offering equivalent access to copy the source
+ code from the same place satisfies the requirement to distribute the
+ source code, even though third parties are not compelled to copy the
+ source along with the object code.
+
+5. A program that contains no derivative of any portion of the Library, but
+ is designed to work with the Library by being compiled or linked with
+ it, is called a "work that uses the Library". Such a work, in isolation,
+ is not a derivative work of the Library, and therefore falls outside the
+ scope of this License.
+
+ However, linking a "work that uses the Library" with the Library creates
+ an executable that is a derivative of the Library (because it contains
+ portions of the Library), rather than a "work that uses the
+ library". The executable is therefore covered by this License. Section 6
+ states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+ that is part of the Library, the object code for the work may be a
+ derivative work of the Library even though the source code is
+ not. Whether this is true is especially significant if the work can be
+ linked without the Library, or if the work is itself a library. The
+ threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data structure
+ layouts and accessors, and small macros and small inline functions (ten
+ lines or less in length), then the use of the object file is
+ unrestricted, regardless of whether it is legally a derivative
+ work. (Executables containing this object code plus portions of the
+ Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+ distribute the object code for the work under the terms of Section
+ 6. Any executables containing that work also fall under Section 6,
+ whether or not they are linked directly with the Library itself.
+
+6. As an exception to the Sections above, you may also combine or link a
+ "work that uses the Library" with the Library to produce a work
+ containing portions of the Library, and distribute that work under terms
+ of your choice, provided that the terms permit modification of the work
+ for the customer's own use and reverse engineering for debugging such
+ modifications.
+
+ You must give prominent notice with each copy of the work that the
+ Library is used in it and that the Library and its use are covered by
+ this License. You must supply a copy of this License. If the work during
+ execution displays copyright notices, you must include the copyright
+ notice for the Library among them, as well as a reference directing the
+ user to the copy of this License. Also, you must do one of these things:
+
+ a) Accompany the work with the complete corresponding machine-readable
+ source code for the Library including whatever changes were used in
+ the work (which must be distributed under Sections 1 and 2 above);
+ and, if the work is an executable linked with the Library, with the
+ complete machine-readable "work that uses the Library", as object
+ code and/or source code, so that the user can modify the Library and
+ then relink to produce a modified executable containing the modified
+ Library. (It is understood that the user who changes the contents of
+ definitions files in the Library will not necessarily be able to
+ recompile the application to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a copy
+ of the library already present on the user's computer system, rather
+ than copying library functions into the executable, and (2) will
+ operate properly with a modified version of the library, if the user
+ installs one, as long as the modified version is interface-compatible
+ with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at least three
+ years, to give the same user the materials specified in Subsection
+ 6a, above, for a charge no more than the cost of performing this
+ distribution.
+
+ d) If distribution of the work is made by offering access to copy from a
+ designated place, offer equivalent access to copy the above specified
+ materials from the same place.
+
+ e) Verify that the user has already received a copy of these materials
+ or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the Library"
+ must include any data and utility programs needed for reproducing the
+ executable from it. However, as a special exception, the materials to be
+ distributed need not include anything that is normally distributed (in
+ either source or binary form) with the major components (compiler,
+ kernel, and so on) of the operating system on which the executable runs,
+ unless that component itself accompanies the executable.
+
+ It may happen that this requirement contradicts the license restrictions
+ of other proprietary libraries that do not normally accompany the
+ operating system. Such a contradiction means you cannot use both them
+ and the Library together in an executable that you distribute.
+
+7. You may place library facilities that are a work based on the Library
+ side-by-side in a single library together with other library facilities
+ not covered by this License, and distribute such a combined library,
+ provided that the separate distribution of the work based on the Library
+ and of the other library facilities is otherwise permitted, and provided
+ that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work based on
+ the Library, uncombined with any other library facilities. This must
+ be distributed under the terms of the Sections above.
+
+ b) Give prominent notice with the combined library of the fact that part
+ of it is a work based on the Library, and explaining where to find
+ the accompanying uncombined form of the same work.
+
+8. You may not copy, modify, sublicense, link with, or distribute the
+ Library except as expressly provided under this License. Any attempt
+ otherwise to copy, modify, sublicense, link with, or distribute the
+ Library is void, and will automatically terminate your rights under this
+ License. However, parties who have received copies, or rights, from you
+ under this License will not have their licenses terminated so long as
+ such parties remain in full compliance.
+
+9. You are not required to accept this License, since you have not signed
+ it. However, nothing else grants you permission to modify or distribute
+ the Library or its derivative works. These actions are prohibited by law
+ if you do not accept this License. Therefore, by modifying or
+ distributing the Library (or any work based on the Library), you
+ indicate your acceptance of this License to do so, and all its terms and
+ conditions for copying, distributing or modifying the Library or works
+ based on it.
+
+10. Each time you redistribute the Library (or any work based on the
+ Library), the recipient automatically receives a license from the
+ original licensor to copy, distribute, link with or modify the Library
+ subject to these terms and conditions. You may not impose any further
+ restrictions on the recipients' exercise of the rights granted
+ herein. You are not responsible for enforcing compliance by third
+ parties with this License.
+
+11. If, as a consequence of a court judgment or allegation of patent
+ infringement or for any other reason (not limited to patent issues),
+ conditions are imposed on you (whether by court order, agreement or
+ otherwise) that contradict the conditions of this License, they do not
+ excuse you from the conditions of this License. If you cannot
+ distribute so as to satisfy simultaneously your obligations under this
+ License and any other pertinent obligations, then as a consequence you
+ may not distribute the Library at all. For example, if a patent license
+ would not permit royalty-free redistribution of the Library by all
+ those who receive copies directly or indirectly through you, then the
+ only way you could satisfy both it and this License would be to refrain
+ entirely from distribution of the Library.
+
+ If any portion of this section is held invalid or unenforceable under
+ any particular circumstance, the balance of the section is intended to
+ apply, and the section as a whole is intended to apply in other
+ circumstances.
+
+ It is not the purpose of this section to induce you to infringe any
+ patents or other property right claims or to contest validity of any
+ such claims; this section has the sole purpose of protecting the
+ integrity of the free software distribution system which is implemented
+ by public license practices. Many people have made generous
+ contributions to the wide range of software distributed through that
+ system in reliance on consistent application of that system; it is up
+ to the author/donor to decide if he or she is willing to distribute
+ software through any other system and a licensee cannot impose that
+ choice.
+
+ This section is intended to make thoroughly clear what is believed to
+ be a consequence of the rest of this License.
+
+12. If the distribution and/or use of the Library is restricted in certain
+ countries either by patents or by copyrighted interfaces, the original
+ copyright holder who places the Library under this License may add an
+ explicit geographical distribution limitation excluding those
+ countries, so that distribution is permitted only in or among countries
+ not thus excluded. In such case, this License incorporates the
+ limitation as if written in the body of this License.
+
+13. The Free Software Foundation may publish revised and/or new versions of
+ the Lesser General Public License from time to time. Such new versions
+ will be similar in spirit to the present version, but may differ in
+ detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the Library
+ specifies a version number of this License which applies to it and "any
+ later version", you have the option of following the terms and
+ conditions either of that version or of any later version published by
+ the Free Software Foundation. If the Library does not specify a license
+ version number, you may choose any version ever published by the Free
+ Software Foundation.
+
+14. If you wish to incorporate parts of the Library into other free
+ programs whose distribution conditions are incompatible with these,
+ write to the author to ask for permission. For software which is
+ copyrighted by the Free Software Foundation, write to the Free Software
+ Foundation; we sometimes make exceptions for this. Our decision will be
+ guided by the two goals of preserving the free status of all
+ derivatives of our free software and of promoting the sharing and reuse
+ of software generally.
+
+NO WARRANTY
+
+15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+ FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+ OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+ PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
+ EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH
+ YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL
+ NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+ REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR
+ DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL
+ DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY
+ (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED
+ INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF
+ THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR
+ OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Libraries
+
+If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+one line to give the library's name and an idea of what it does.
+Copyright (C) year name of author
+
+This library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 2.1 of the License, or (at
+your option) any later version.
+
+This library is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with this library; if not, write to the Free Software Foundation,
+Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add
+information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+Yoyodyne, Inc., hereby disclaims all copyright interest in
+the library `Frob' (a library for tweaking knobs) written
+by James Random Hacker.
+
+signature of Ty Coon, 1 April 1990
+Ty Coon, President of Vice
+That's all there is to it!
diff --git a/src/contrib/licenses/MIT b/src/contrib/licenses/MIT
new file mode 100644
index 0000000..fb122a0
--- /dev/null
+++ b/src/contrib/licenses/MIT
@@ -0,0 +1,19 @@
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/src/contrib/macros.h b/src/contrib/macros.h
new file mode 100644
index 0000000..78df47e
--- /dev/null
+++ b/src/contrib/macros.h
@@ -0,0 +1,41 @@
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief Common macros.
+ */
+
+#pragma once
+
+#ifndef MIN
+/*! \brief Type-safe minimum macro. */
+#define MIN(a, b) \
+ ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a < _b ? _a : _b; })
+
+/*! \brief Type-safe maximum macro. */
+#define MAX(a, b) \
+ ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a > _b ? _a : _b; })
+#endif
+
+#ifndef likely
+/*! \brief Optimize for x to be true value. */
+#define likely(x) __builtin_expect((x), 1)
+#endif
+
+#ifndef unlikely
+/*! \brief Optimize for x to be false value. */
+#define unlikely(x) __builtin_expect((x), 0)
+#endif
diff --git a/src/contrib/mempattern.c b/src/contrib/mempattern.c
new file mode 100644
index 0000000..f57139d
--- /dev/null
+++ b/src/contrib/mempattern.c
@@ -0,0 +1,122 @@
+/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+
+#include "contrib/mempattern.h"
+#include "contrib/string.h"
+#include "contrib/ucw/mempool.h"
+
+static void mm_nofree(void *p)
+{
+ /* nop */
+}
+
+static void *mm_malloc(void *ctx, size_t n)
+{
+ (void)ctx;
+ return malloc(n);
+}
+
+void *mm_alloc(knot_mm_t *mm, size_t size)
+{
+ if (mm) {
+ return mm->alloc(mm->ctx, size);
+ } else {
+ return malloc(size);
+ }
+}
+
+void *mm_calloc(knot_mm_t *mm, size_t nmemb, size_t size)
+{
+ if (nmemb == 0 || size == 0) {
+ return NULL;
+ }
+ if (mm) {
+ size_t total_size = nmemb * size;
+ if (total_size / nmemb != size) { // Overflow check
+ return NULL;
+ }
+ void *mem = mm_alloc(mm, total_size);
+ if (mem == NULL) {
+ return NULL;
+ }
+ return memzero(mem, total_size);
+ } else {
+ return calloc(nmemb, size);
+ }
+}
+
+void *mm_realloc(knot_mm_t *mm, void *what, size_t size, size_t prev_size)
+{
+ if (mm) {
+ void *p = mm->alloc(mm->ctx, size);
+ if (p == NULL) {
+ return NULL;
+ } else {
+ if (what) {
+ memcpy(p, what,
+ prev_size < size ? prev_size : size);
+ }
+ mm_free(mm, what);
+ return p;
+ }
+ } else {
+ return realloc(what, size);
+ }
+}
+
+char *mm_strdup(knot_mm_t *mm, const char *s)
+{
+ if (s == NULL) {
+ return NULL;
+ }
+ if (mm) {
+ size_t len = strlen(s) + 1;
+ void *mem = mm_alloc(mm, len);
+ if (mem == NULL) {
+ return NULL;
+ }
+ return memcpy(mem, s, len);
+ } else {
+ return strdup(s);
+ }
+}
+
+void mm_free(knot_mm_t *mm, void *what)
+{
+ if (mm) {
+ if (mm->free) {
+ mm->free(what);
+ }
+ } else {
+ free(what);
+ }
+}
+
+void mm_ctx_init(knot_mm_t *mm)
+{
+ mm->ctx = NULL;
+ mm->alloc = mm_malloc;
+ mm->free = free;
+}
+
+void mm_ctx_mempool(knot_mm_t *mm, size_t chunk_size)
+{
+ mm->ctx = mp_new(chunk_size);
+ mm->alloc = (knot_mm_alloc_t)mp_alloc;
+ mm->free = mm_nofree;
+}
diff --git a/src/contrib/mempattern.h b/src/contrib/mempattern.h
new file mode 100644
index 0000000..c74f983
--- /dev/null
+++ b/src/contrib/mempattern.h
@@ -0,0 +1,47 @@
+/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief Memory allocation related functions.
+ */
+
+#pragma once
+
+#include "libknot/mm_ctx.h"
+
+/*! \brief Default memory block size. */
+#define MM_DEFAULT_BLKSIZE 4096
+
+/*! \brief Allocs using 'mm' if any, uses system malloc() otherwise. */
+void *mm_alloc(knot_mm_t *mm, size_t size);
+
+/*! \brief Callocs using 'mm' if any, uses system calloc() otherwise. */
+void *mm_calloc(knot_mm_t *mm, size_t nmemb, size_t size);
+
+/*! \brief Reallocs using 'mm' if any, uses system realloc() otherwise. */
+void *mm_realloc(knot_mm_t *mm, void *what, size_t size, size_t prev_size);
+
+/*! \brief Strdups using 'mm' if any, uses system strdup() otherwise. */
+char *mm_strdup(knot_mm_t *mm, const char *s);
+
+/*! \brief Free using 'mm' if any, uses system free() otherwise. */
+void mm_free(knot_mm_t *mm, void *what);
+
+/*! \brief Initialize default memory allocation context. */
+void mm_ctx_init(knot_mm_t *mm);
+
+/*! \brief Memory pool context. */
+void mm_ctx_mempool(knot_mm_t *mm, size_t chunk_size);
diff --git a/src/contrib/musl/inet_ntop.c b/src/contrib/musl/inet_ntop.c
new file mode 100644
index 0000000..1b72a61
--- /dev/null
+++ b/src/contrib/musl/inet_ntop.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright © 2005-2020 Rich Felker, et al.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "contrib/musl/inet_ntop.h"
+#include "contrib/openbsd/strlcpy.h"
+
+const char *knot_inet_ntop(int af, const void *restrict a0, char *restrict s, socklen_t l)
+{
+ const unsigned char *a = a0;
+ int i, j, max, best;
+ char buf[100];
+
+ switch (af) {
+ case AF_INET:
+ if (snprintf(s, l, "%d.%d.%d.%d", a[0],a[1],a[2],a[3]) < l)
+ return s;
+ break;
+ case AF_INET6:
+ if (memcmp(a, "\0\0\0\0\0\0\0\0\0\0\377\377", 12))
+ (void)snprintf(buf, sizeof buf,
+ "%x:%x:%x:%x:%x:%x:%x:%x",
+ 256*a[0]+a[1],256*a[2]+a[3],
+ 256*a[4]+a[5],256*a[6]+a[7],
+ 256*a[8]+a[9],256*a[10]+a[11],
+ 256*a[12]+a[13],256*a[14]+a[15]);
+ else
+ (void)snprintf(buf, sizeof buf,
+ "%x:%x:%x:%x:%x:%x:%d.%d.%d.%d",
+ 256*a[0]+a[1],256*a[2]+a[3],
+ 256*a[4]+a[5],256*a[6]+a[7],
+ 256*a[8]+a[9],256*a[10]+a[11],
+ a[12],a[13],a[14],a[15]);
+ /* Replace longest /(^0|:)[:0]{2,}/ with "::" */
+ for (i=best=0, max=2; buf[i]; i++) {
+ if (i && buf[i] != ':') continue;
+ j = strspn(buf+i, ":0");
+ if (j>max) best=i, max=j;
+ }
+ if (max>3) {
+ buf[best] = buf[best+1] = ':';
+ memmove(buf+best+2, buf+best+max, i-best-max+1);
+ }
+ if (strlen(buf) < l) {
+ strlcpy(s, buf, l);
+ return s;
+ }
+ break;
+ default:
+ errno = EAFNOSUPPORT;
+ return 0;
+ }
+ errno = ENOSPC;
+ return 0;
+}
diff --git a/src/contrib/musl/inet_ntop.h b/src/contrib/musl/inet_ntop.h
new file mode 100644
index 0000000..73faaee
--- /dev/null
+++ b/src/contrib/musl/inet_ntop.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright © 2005-2020 Rich Felker, et al.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <sys/socket.h>
+
+const char *knot_inet_ntop(int af, const void *restrict a0, char *restrict s, socklen_t l);
diff --git a/src/contrib/net.c b/src/contrib/net.c
new file mode 100644
index 0000000..3de4a71
--- /dev/null
+++ b/src/contrib/net.c
@@ -0,0 +1,747 @@
+/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/types.h> // OpenBSD
+#include <netinet/tcp.h> // TCP_FASTOPEN
+#include <netinet/in.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include "libknot/errcode.h"
+#include "contrib/macros.h"
+#include "contrib/net.h"
+#include "contrib/sockaddr.h"
+#include "contrib/time.h"
+
+/*!
+ * \brief Enable socket option.
+ */
+static int sockopt_enable(int sock, int level, int optname)
+{
+ const int enable = 1;
+ if (setsockopt(sock, level, optname, &enable, sizeof(enable)) != 0) {
+ return knot_map_errno();
+ }
+
+ return KNOT_EOK;
+}
+
+/*!
+ * \brief Create a non-blocking socket.
+ *
+ * Prefer SOCK_NONBLOCK if available to save one fcntl() syscall.
+ *
+ */
+static int socket_create(int family, int type, int proto)
+{
+#ifdef SOCK_NONBLOCK
+ type |= SOCK_NONBLOCK;
+#endif
+ int sock = socket(family, type, proto);
+ if (sock < 0) {
+ return knot_map_errno();
+ }
+
+#ifndef SOCK_NONBLOCK
+ if (fcntl(sock, F_SETFL, O_NONBLOCK) != 0) {
+ int ret = knot_map_errno();
+ close(sock);
+ return ret;
+ }
+#endif
+
+/*
+ * OS X doesn't support MSG_NOSIGNAL. Use SO_NOSIGPIPE socket option instead.
+ */
+#if defined(__APPLE__) && !defined(MSG_NOSIGNAL)
+#define MSG_NOSIGNAL 0
+ int ret = sockopt_enable(sock, SOL_SOCKET, SO_NOSIGPIPE);
+ if (ret != KNOT_EOK) {
+ return ret;
+ }
+#endif
+
+ return sock;
+}
+
+int net_unbound_socket(int type, const struct sockaddr_storage *addr)
+{
+ if (addr == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ /* Create socket. */
+ return socket_create(addr->ss_family, type, 0);
+}
+
+struct option {
+ int level;
+ int name;
+};
+
+/*!
+ * \brief Get setsock option for binding non-local address.
+ */
+static const struct option *nonlocal_option(int family)
+{
+ static const struct option ipv4 = {
+ #if defined(IP_FREEBIND)
+ IPPROTO_IP, IP_FREEBIND
+ #elif defined(IP_BINDANY)
+ IPPROTO_IP, IP_BINDANY
+ #else
+ 0, 0
+ #endif
+ };
+
+ static const struct option ipv6 = {
+ #if defined(IP_FREEBIND)
+ IPPROTO_IP, IP_FREEBIND
+ #elif defined(IPV6_BINDANY)
+ IPPROTO_IPV6, IPV6_BINDANY
+ #else
+ 0, 0
+ #endif
+
+ };
+
+ switch (family) {
+ case AF_INET: return &ipv4;
+ case AF_INET6: return &ipv6;
+ default:
+ return NULL;
+ }
+}
+
+static int enable_nonlocal(int sock, int family)
+{
+ const struct option *opt = nonlocal_option(family);
+ if (opt == NULL || opt->name == 0) {
+ return KNOT_ENOTSUP;
+ }
+
+ return sockopt_enable(sock, opt->level, opt->name);
+}
+
+static int enable_reuseport(int sock)
+{
+#ifdef ENABLE_REUSEPORT
+# if defined(__FreeBSD__)
+ return sockopt_enable(sock, SOL_SOCKET, SO_REUSEPORT_LB);
+# else
+ return sockopt_enable(sock, SOL_SOCKET, SO_REUSEPORT);
+# endif
+#else
+ return KNOT_ENOTSUP;
+#endif
+}
+
+static void unlink_unix_socket(const struct sockaddr_storage *addr)
+{
+ char path[SOCKADDR_STRLEN] = { 0 };
+ sockaddr_tostr(path, sizeof(path), addr);
+ unlink(path);
+}
+
+int net_bound_socket(int type, const struct sockaddr_storage *addr,
+ net_bind_flag_t flags, mode_t unix_mode)
+{
+ /* Create socket. */
+ int sock = net_unbound_socket(type, addr);
+ if (sock < 0) {
+ return sock;
+ }
+
+ /* Unlink UNIX sock if exists. */
+ if (addr->ss_family == AF_UNIX) {
+ unlink_unix_socket(addr);
+ }
+
+ /* Reuse old address if taken. */
+ int ret = sockopt_enable(sock, SOL_SOCKET, SO_REUSEADDR);
+ if (ret != KNOT_EOK) {
+ close(sock);
+ return ret;
+ }
+
+#if defined(__linux__)
+ /* Set MSS (Maximum Segment Size) limit. */
+ if (addr->ss_family != AF_UNIX && type == SOCK_STREAM) {
+ const int mss = KNOT_TCP_MSS;
+ if (setsockopt(sock, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) != 0) {
+ ret = knot_map_errno();
+ close(sock);
+ return ret;
+ }
+ }
+#endif
+
+ /* Don't bind IPv4 for IPv6 any address. */
+ if (addr->ss_family == AF_INET6) {
+ ret = sockopt_enable(sock, IPPROTO_IPV6, IPV6_V6ONLY);
+ if (ret != KNOT_EOK) {
+ close(sock);
+ return ret;
+ }
+ }
+
+ /* Allow bind to non-local address. */
+ if (flags & NET_BIND_NONLOCAL) {
+ ret = enable_nonlocal(sock, addr->ss_family);
+ if (ret != KNOT_EOK) {
+ close(sock);
+ return ret;
+ }
+ }
+
+ /* Allow to bind the same address by multiple threads. */
+ if (flags & NET_BIND_MULTIPLE) {
+ ret = enable_reuseport(sock);
+ if (ret != KNOT_EOK) {
+ close(sock);
+ return ret;
+ }
+ }
+
+ /* Bind to specified address. */
+ ret = bind(sock, (const struct sockaddr *)addr, sockaddr_len(addr));
+ if (ret < 0) {
+ ret = knot_map_errno();
+ close(sock);
+ return ret;
+ }
+
+ if (addr->ss_family == AF_UNIX && unix_mode != 0) {
+ const char *path = ((const struct sockaddr_un *)addr)->sun_path;
+ if (chmod(path, unix_mode) != 0) {
+ ret = knot_map_errno();
+ close(sock);
+ return ret;
+ }
+ }
+
+ return sock;
+}
+
+static int tfo_connect(int sock, const struct sockaddr_storage *addr)
+{
+#if defined(__linux__)
+ /* connect() will be called implicitly with sendmsg(). */
+ return KNOT_EOK;
+#elif defined(__FreeBSD__)
+ return sockopt_enable(sock, IPPROTO_TCP, TCP_FASTOPEN);
+#elif defined(__APPLE__)
+ /* Connection is performed lazily when first data is sent. */
+ sa_endpoints_t ep = {
+ .sae_dstaddr = (const struct sockaddr *)addr,
+ .sae_dstaddrlen = sockaddr_len(addr)
+ };
+ int flags = CONNECT_DATA_IDEMPOTENT | CONNECT_RESUME_ON_READ_WRITE;
+
+ int ret = connectx(sock, &ep, SAE_ASSOCID_ANY, flags, NULL, 0, NULL, NULL);
+ return (ret == 0 ? KNOT_EOK : knot_map_errno());
+#else
+ return KNOT_ENOTSUP;
+#endif
+}
+
+int net_connected_socket(int type, const struct sockaddr_storage *dst_addr,
+ const struct sockaddr_storage *src_addr, bool tfo)
+{
+ if (dst_addr == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ /* Check port. */
+ if (sockaddr_port(dst_addr) == 0) {
+ return KNOT_NET_EADDR;
+ }
+
+ /* Bind to specific source address - if set. */
+ int sock = -1;
+ if (src_addr && src_addr->ss_family != AF_UNSPEC) {
+ sock = net_bound_socket(type, src_addr, 0, 0);
+ } else {
+ sock = net_unbound_socket(type, dst_addr);
+ }
+ if (sock < 0) {
+ return sock;
+ }
+
+ /* Connect to destination. */
+ if (tfo && net_is_stream(sock)) {
+ int ret = tfo_connect(sock, dst_addr);
+ if (ret != KNOT_EOK) {
+ close(sock);
+ return ret;
+ }
+ } else {
+ int ret = connect(sock, (const struct sockaddr *)dst_addr,
+ sockaddr_len(dst_addr));
+ if (ret != 0 && errno != EINPROGRESS) {
+ ret = knot_map_errno();
+ close(sock);
+ return ret;
+ }
+ }
+
+ return sock;
+}
+
+int net_bound_tfo(int sock, int backlog)
+{
+#if defined(TCP_FASTOPEN)
+#if defined(__APPLE__)
+ if (backlog > 0) {
+ backlog = 1; // just on-off switch on macOS
+ }
+#endif
+ if (setsockopt(sock, IPPROTO_TCP, TCP_FASTOPEN, &backlog, sizeof(backlog)) != 0) {
+ return knot_map_errno();
+ }
+
+ return KNOT_EOK;
+#endif
+ return KNOT_ENOTSUP;
+}
+
+bool net_is_connected(int sock)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ return (getpeername(sock, (struct sockaddr *)&addr, &len) == 0);
+}
+
+int net_socktype(int sock)
+{
+ int type;
+ socklen_t size = sizeof(type);
+
+ if (getsockopt(sock, SOL_SOCKET, SO_TYPE, &type, &size) == 0) {
+ return type;
+ } else {
+ return AF_UNSPEC;
+ }
+}
+
+bool net_is_stream(int sock)
+{
+ return net_socktype(sock) == SOCK_STREAM;
+}
+
+int net_accept(int sock, struct sockaddr_storage *addr)
+{
+ socklen_t len = sizeof(*addr);
+ socklen_t *addr_len = (addr != NULL) ? &len : NULL;
+
+ int remote = -1;
+
+#if defined(HAVE_ACCEPT4) && defined(SOCK_NONBLOCK)
+ remote = accept4(sock, (struct sockaddr *)addr, addr_len, SOCK_NONBLOCK);
+ if (remote < 0) {
+ return knot_map_errno();
+ }
+#else
+ remote = accept(sock, (struct sockaddr *)addr, addr_len);
+ if (fcntl(remote, F_SETFL, O_NONBLOCK) != 0) {
+ int error = knot_map_errno();
+ close(remote);
+ return error;
+ }
+#endif
+
+ return remote;
+}
+
+void net_reset(int sock)
+{
+ struct sockaddr unspec = { .sa_family = AF_UNSPEC };
+ (void)connect(sock, &unspec, sizeof(unspec));
+}
+
+/* -- I/O interface handling partial -------------------------------------- */
+
+/*!
+ * \brief Perform \a poll() on one socket.
+ */
+static int poll_one(int fd, int events, int timeout_ms)
+{
+ struct pollfd pfd = {
+ .fd = fd,
+ .events = events
+ };
+
+ return poll(&pfd, 1, timeout_ms);
+}
+
+/*!
+ * \brief Check if we should wait for I/O readiness.
+ *
+ * \param error \a errno set by the failed I/O operation.
+ */
+static bool io_should_wait(int error)
+{
+ if (error == EAGAIN || error == EWOULDBLOCK || /* Socket data not ready. */
+ error == ENOMEM || error == ENOBUFS) { /* Insufficient resources. */
+ return true;
+ }
+
+#ifndef __linux__
+ /* FreeBSD: connection in progress. */
+ if (error == ENOTCONN) {
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+/*!
+ * \brief Check if we should wait again.
+ *
+ * \param error \a errno set by the failed wait operation.
+ */
+static bool wait_should_retry(int error)
+{
+ if (error == EINTR || /* System call interrupted. */
+ error == EAGAIN || error == ENOMEM) { /* Insufficient resources. */
+ return true;
+ }
+ return false;
+}
+
+/*!
+ * \brief I/O operation callbacks.
+ */
+struct io {
+ ssize_t (*process)(int sockfd, struct msghdr *msg, int timeout_ms);
+ int (*wait)(int sockfd, int timeout_ms);
+};
+
+/*!
+ * \brief Get total size of I/O vector in a message.
+ */
+static size_t msg_iov_len(const struct msghdr *msg)
+{
+ size_t total = 0;
+
+ for (int i = 0; i < msg->msg_iovlen; i++) {
+ total += msg->msg_iov[i].iov_len;
+ }
+
+ return total;
+}
+
+/*!
+ * \brief Shift processed data out of message IO vectors.
+ */
+static void msg_iov_shift(struct msghdr *msg, size_t done)
+{
+ struct iovec *iov = msg->msg_iov;
+ int iovlen = msg->msg_iovlen;
+
+ for (int i = 0; i < iovlen && done > 0; i++) {
+ if (iov[i].iov_len > done) {
+ iov[i].iov_base += done;
+ iov[i].iov_len -= done;
+ done = 0;
+ } else {
+ done -= iov[i].iov_len;
+ msg->msg_iov += 1;
+ msg->msg_iovlen -= 1;
+ }
+ }
+
+ assert(done == 0);
+}
+
+#define TIMEOUT_CTX_INIT \
+ struct timespec begin, end; \
+ if (*timeout_ptr > 0) { \
+ clock_gettime(CLOCK_MONOTONIC, &begin); \
+ }
+
+#define TIMEOUT_CTX_UPDATE \
+ if (*timeout_ptr > 0) { \
+ clock_gettime(CLOCK_MONOTONIC, &end); \
+ int running_ms = time_diff_ms(&begin, &end); \
+ *timeout_ptr = MAX(*timeout_ptr - running_ms, 0); \
+ }
+
+/*!
+ * \brief Perform an I/O operation with a socket with waiting.
+ *
+ * \param oneshot If set, doesn't wait until the buffer is fully processed.
+ */
+static ssize_t io_exec(const struct io *io, int fd, struct msghdr *msg,
+ bool oneshot, int *timeout_ptr)
+{
+ size_t done = 0;
+ size_t total = msg_iov_len(msg);
+
+ for (;;) {
+ /* Perform I/O. */
+ ssize_t ret = io->process(fd, msg, *timeout_ptr);
+ if (ret == -1 && errno == EINTR) {
+ continue;
+ }
+ if (ret > 0) {
+ done += ret;
+ if (oneshot || done == total) {
+ break;
+ }
+ msg_iov_shift(msg, ret);
+ }
+
+ /* Wait for data readiness. */
+ if (ret > 0 || (ret == -1 && io_should_wait(errno))) {
+ for (;;) {
+ TIMEOUT_CTX_INIT
+
+ ret = io->wait(fd, *timeout_ptr);
+
+ if (ret == 1) {
+ TIMEOUT_CTX_UPDATE
+ /* Ready, retry process. */
+ break;
+ } else if (ret == -1 && wait_should_retry(errno)) {
+ TIMEOUT_CTX_UPDATE
+ /* Interrupted or transient error, continue waiting. */
+ continue;
+ } else if (ret == 0) {
+ /* Timed out, exit. */
+ return KNOT_ETIMEOUT;
+ } else {
+ /* In specific circumstances with Valgrind,
+ * poll() returns wrong value.
+ */
+ assert(ret <= 1);
+ assert(ret >= -1);
+ /* Other error, exit. */
+ return KNOT_ECONN;
+ }
+ }
+ } else {
+ /* Disconnect or error. */
+ return KNOT_ECONN;
+ }
+ }
+
+ return done;
+}
+
+static ssize_t recv_process(int fd, struct msghdr *msg, int timeout_ms)
+{
+ return recvmsg(fd, msg, MSG_DONTWAIT | MSG_NOSIGNAL);
+}
+
+static int recv_wait(int fd, int timeout_ms)
+{
+ return poll_one(fd, POLLIN, timeout_ms);
+}
+
+static ssize_t recv_data(int sock, struct msghdr *msg, bool oneshot, int *timeout_ptr)
+{
+ static const struct io RECV_IO = {
+ .process = recv_process,
+ .wait = recv_wait
+ };
+
+ return io_exec(&RECV_IO, sock, msg, oneshot, timeout_ptr);
+}
+
+static ssize_t send_process_tfo(int fd, struct msghdr *msg, int timeout_ms)
+{
+#if defined(__linux__)
+ int ret = sendmsg(fd, msg, MSG_FASTOPEN);
+ if (ret != 0 && errno == EINPROGRESS) {
+ if (poll_one(fd, POLLOUT, timeout_ms) != 1) {
+ errno = ETIMEDOUT;
+ return -1;
+ }
+ ret = sendmsg(fd, msg, MSG_NOSIGNAL);
+ }
+ return ret;
+#else
+ return sendmsg(fd, msg, MSG_NOSIGNAL);
+#endif
+}
+
+static ssize_t send_process(int fd, struct msghdr *msg, int timeout_ms)
+{
+ return sendmsg(fd, msg, MSG_NOSIGNAL);
+}
+
+static int send_wait(int fd, int timeout_ms)
+{
+ return poll_one(fd, POLLOUT, timeout_ms);
+}
+
+static ssize_t send_data(int sock, struct msghdr *msg, int *timeout_ptr, bool tfo)
+{
+ static const struct io SEND_IO = {
+ .process = send_process,
+ .wait = send_wait
+ };
+ static const struct io SEND_IO_TFO = {
+ .process = send_process_tfo,
+ .wait = send_wait
+ };
+
+ return io_exec(tfo ? &SEND_IO_TFO : &SEND_IO, sock, msg, false, timeout_ptr);
+}
+
+/* -- generic stream and datagram I/O -------------------------------------- */
+
+ssize_t net_base_send(int sock, const uint8_t *buffer, size_t size,
+ const struct sockaddr_storage *addr, int timeout_ms)
+{
+ if (sock < 0 || buffer == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ struct iovec iov = { 0 };
+ iov.iov_base = (void *)buffer;
+ iov.iov_len = size;
+
+ struct msghdr msg = { 0 };
+ msg.msg_name = (void *)addr;
+ msg.msg_namelen = sockaddr_len(addr);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ int ret = send_data(sock, &msg, &timeout_ms, false);
+ if (ret < 0) {
+ return ret;
+ } else if (ret != size) {
+ return KNOT_ECONN;
+ }
+
+ return ret;
+}
+
+ssize_t net_base_recv(int sock, uint8_t *buffer, size_t size,
+ struct sockaddr_storage *addr, int timeout_ms)
+{
+ if (sock < 0 || buffer == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ struct iovec iov = { 0 };
+ iov.iov_base = buffer;
+ iov.iov_len = size;
+
+ struct msghdr msg = { 0 };
+ msg.msg_name = (void *)addr;
+ msg.msg_namelen = addr ? sizeof(*addr) : 0;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ return recv_data(sock, &msg, true, &timeout_ms);
+}
+
+ssize_t net_dgram_send(int sock, const uint8_t *buffer, size_t size,
+ const struct sockaddr_storage *addr)
+{
+ return net_base_send(sock, buffer, size, addr, 0);
+}
+
+ssize_t net_dgram_recv(int sock, uint8_t *buffer, size_t size, int timeout_ms)
+{
+ return net_base_recv(sock, buffer, size, NULL, timeout_ms);
+}
+
+ssize_t net_stream_send(int sock, const uint8_t *buffer, size_t size, int timeout_ms)
+{
+ return net_base_send(sock, buffer, size, NULL, timeout_ms);
+}
+
+ssize_t net_stream_recv(int sock, uint8_t *buffer, size_t size, int timeout_ms)
+{
+ return net_base_recv(sock, buffer, size, NULL, timeout_ms);
+}
+
+/* -- DNS specific I/O ----------------------------------------------------- */
+
+ssize_t net_dns_tcp_send(int sock, const uint8_t *buffer, size_t size, int timeout_ms,
+ struct sockaddr_storage *tfo_addr)
+{
+ if (sock < 0 || buffer == NULL || size > UINT16_MAX) {
+ return KNOT_EINVAL;
+ }
+
+ struct iovec iov[2];
+ uint16_t pktsize = htons(size);
+ iov[0].iov_base = &pktsize;
+ iov[0].iov_len = sizeof(uint16_t);
+ iov[1].iov_base = (void *)buffer;
+ iov[1].iov_len = size;
+
+ struct msghdr msg = { 0 };
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 2;
+ msg.msg_name = (void *)tfo_addr;
+ msg.msg_namelen = tfo_addr ? sizeof(*tfo_addr) : 0;
+
+ ssize_t ret = send_data(sock, &msg, &timeout_ms, tfo_addr != NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return size; /* Do not count the size prefix. */
+}
+
+ssize_t net_dns_tcp_recv(int sock, uint8_t *buffer, size_t size, int timeout_ms)
+{
+ if (sock < 0 || buffer == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ uint16_t pktsize = 0;
+
+ struct iovec iov = {
+ .iov_base = &pktsize,
+ .iov_len = sizeof(pktsize)
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1
+ };
+
+ /* Receive size. */
+ int ret = recv_data(sock, &msg, false, &timeout_ms);
+ if (ret != sizeof(pktsize)) {
+ return ret;
+ }
+ pktsize = ntohs(pktsize);
+
+ /* Check packet size */
+ if (size < pktsize) {
+ return KNOT_ESPACE;
+ }
+
+ /* Receive payload. */
+ iov.iov_base = buffer;
+ iov.iov_len = pktsize;
+
+ return recv_data(sock, &msg, false, &timeout_ms);
+}
diff --git a/src/contrib/net.h b/src/contrib/net.h
new file mode 100644
index 0000000..85a8bd6
--- /dev/null
+++ b/src/contrib/net.h
@@ -0,0 +1,209 @@
+/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+// 1280 (IPv6 minimum link MTU) - 40 (IPv6 fixed header) - 20 (TCP fixed header)
+#define KNOT_TCP_MSS 1220
+
+/*!
+ * \brief Network interface flags.
+ */
+typedef enum {
+ NET_BIND_NONLOCAL = (1 << 0), //!< Allow to bind unavailable address.
+ NET_BIND_MULTIPLE = (1 << 1), //!< Allow to bind address multiple times.
+} net_bind_flag_t;
+
+/*!
+ * \brief Create unbound socket of given family and type.
+ *
+ * \note The socket is set to non-blocking mode.
+ *
+ * \param type Socket transport type (SOCK_STREAM, SOCK_DGRAM).
+ * \param addr Socket address.
+ *
+ * \return socket or error code
+ */
+int net_unbound_socket(int type, const struct sockaddr_storage *addr);
+
+/*!
+ * \brief Create socket bound to given address.
+ *
+ * The socket is set to non-blocking mode.
+ *
+ * \param type Socket transport type (SOCK_STREAM, SOCK_DGRAM).
+ * \param addr Socket address.
+ * \param flags Socket binding options.
+ * \param unix_mode Socket file mode (UNIX socket only). 0 means don't set mode.
+ *
+ * \return socket or error code
+ */
+int net_bound_socket(int type, const struct sockaddr_storage *addr,
+ net_bind_flag_t flags, mode_t unix_mode);
+
+/*!
+ * \brief Create socket connected (asynchronously) to destination address.
+ *
+ * \note The socket is set to non-blocking mode.
+ *
+ * \param type Socket transport type (SOCK_STREAM, SOCK_DGRAM).
+ * \param dst_addr Destination address.
+ * \param src_addr Source address (can be NULL).
+ * \param tfo Enable TCP Fast Open.
+ *
+ * \return socket or error code
+ */
+int net_connected_socket(int type, const struct sockaddr_storage *dst_addr,
+ const struct sockaddr_storage *src_addr, bool tfo);
+
+/*!
+ * \brief Enables TCP Fast Open on a bound socket.
+ *
+ * \param sock Socket.
+ *
+ * \return KNOT_EOK or error code
+ */
+int net_bound_tfo(int sock, int backlog);
+
+/*!
+ * \brief Return true if the socket is fully connected.
+ *
+ * \param sock Socket.
+ *
+ * \return true if connected
+ */
+bool net_is_connected(int sock);
+
+/*!
+ * \brief Get socket type (e.g. \a SOCK_STREAM).
+ *
+ * \param sock Socket.
+ */
+int net_socktype(int sock);
+
+/*!
+ * \brief Check if socket is a SOCK_STREAM socket.
+ */
+bool net_is_stream(int sock);
+
+/*!
+ * \brief Accept a connection on a listening socket.
+ *
+ * \brief The socket is set to non-blocking mode.
+ *
+ * \param sock Socket
+ * \param addr Remote address (can be NULL).
+ *
+ * \return socket or error code
+ */
+int net_accept(int sock, struct sockaddr_storage *addr);
+
+/*!
+ * \brief Reset a TCP connection (with a RST packet).
+ */
+void net_reset(int sock);
+
+/*!
+ * \brief Send a message on a socket.
+ *
+ * The socket can be SOCK_STREAM or SOCK_DGRAM.
+ *
+ * The implementation handles partial-writes automatically.
+ *
+ * \param[in] sock Socket.
+ * \param[in] buffer Message buffer.
+ * \param[in] size Size of the message.
+ * \param[in] addr Remote address (ignored for SOCK_STREAM).
+ * \param[in] timeout_ms Write timeout in milliseconds (-1 for infinity,
+ * not valid for SOCK_DGRAM).
+ *
+ * \return Number of bytes sent or negative error code.
+ */
+ssize_t net_base_send(int sock, const uint8_t *buffer, size_t size,
+ const struct sockaddr_storage *addr, int timeout_ms);
+
+/*!
+ * \brief Receive a message from a socket.
+ *
+ * \param[in] sock Socket.
+ * \param[out] buffer Receiving buffer.
+ * \param[in] size Capacity of the receiving buffer.
+ * \param[out] addr Remote address (can be NULL).
+ * \param[in] timeout_ms Read timeout in milliseconds (-1 for infinity).
+ *
+ * \return Number of bytes read or negative error code.
+ */
+ssize_t net_base_recv(int sock, uint8_t *buffer, size_t size,
+ struct sockaddr_storage *addr, int timeout_ms);
+
+/*!
+ * \brief Send a message on a SOCK_DGRAM socket.
+ *
+ * \see net_base_send
+ */
+ssize_t net_dgram_send(int sock, const uint8_t *buffer, size_t size,
+ const struct sockaddr_storage *addr);
+
+/*!
+ * \brief Receive a message from a SOCK_DGRAM socket.
+ *
+ * \see net_base_recv
+ */
+ssize_t net_dgram_recv(int sock, uint8_t *buffer, size_t size, int timeout_ms);
+
+/*!
+ * \brief Send a message on a SOCK_STREAM socket.
+ *
+ * \see net_base_send
+ */
+ssize_t net_stream_send(int sock, const uint8_t *buffer, size_t size, int timeout_ms);
+
+/*!
+ * \brief Receive a message from a SOCK_STREAM socket.
+ *
+ * \see net_base_recv
+ */
+ssize_t net_stream_recv(int sock, uint8_t *buffer, size_t size, int timeout_ms);
+
+/*!
+ * \brief Send a DNS message on a TCP socket.
+ *
+ * The outgoing message is prefixed with a two-byte value carrying the DNS
+ * message size according to the specification. These two bytes are not
+ * reflected in the return value.
+ *
+ * \param[in] tfo_addr If not NULL, send using TCP Fast Open to this address.
+ *
+ * \see net_base_send
+ */
+ssize_t net_dns_tcp_send(int sock, const uint8_t *buffer, size_t size, int timeout_ms,
+ struct sockaddr_storage *tfo_addr);
+
+/*!
+ * \brief Receive a DNS message from a TCP socket.
+ *
+ * The first two bytes of the incoming message are interpreted as a DNS message
+ * size according to the specification. These two bytes are not included in
+ * the returned size. Only a complete DNS message is retrieved.
+ *
+ * \see net_base_recv
+ */
+ssize_t net_dns_tcp_recv(int sock, uint8_t *buffer, size_t size, int timeout_ms);
diff --git a/src/contrib/openbsd/LICENSE b/src/contrib/openbsd/LICENSE
new file mode 100644
index 0000000..e9a1aaa
--- /dev/null
+++ b/src/contrib/openbsd/LICENSE
@@ -0,0 +1,2 @@
+../licenses/0BSD
+../licenses/BSD-3-Clause \ No newline at end of file
diff --git a/src/contrib/openbsd/siphash.c b/src/contrib/openbsd/siphash.c
new file mode 100644
index 0000000..26b8cfc
--- /dev/null
+++ b/src/contrib/openbsd/siphash.c
@@ -0,0 +1,176 @@
+/* $OpenBSD: siphash.c,v 1.6 2017/04/12 17:41:49 deraadt Exp $ */
+
+/*-
+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * SipHash is a family of PRFs SipHash-c-d where the integer parameters c and d
+ * are the number of compression rounds and the number of finalization rounds.
+ * A compression round is identical to a finalization round and this round
+ * function is called SipRound. Given a 128-bit key k and a (possibly empty)
+ * byte string m, SipHash-c-d returns a 64-bit value SipHash-c-d(k; m).
+ *
+ * Implemented from the paper "SipHash: a fast short-input PRF", 2012.09.18,
+ * by Jean-Philippe Aumasson and Daniel J. Bernstein,
+ * Permanent Document ID b9a943a805fbfc6fde808af9fc0ecdfa
+ * https://131002.net/siphash/siphash.pdf
+ * https://131002.net/siphash/
+ */
+
+#include <string.h>
+
+#include "libknot/endian.h"
+#include "contrib/string.h"
+#include "contrib/openbsd/siphash.h"
+
+static void SipHash_CRounds(SIPHASH_CTX *, int);
+static void SipHash_Rounds(SIPHASH_CTX *, int);
+
+void
+SipHash_Init(SIPHASH_CTX *ctx, const SIPHASH_KEY *key)
+{
+ uint64_t k0, k1;
+
+ k0 = le64toh(key->k0);
+ k1 = le64toh(key->k1);
+
+ ctx->v[0] = 0x736f6d6570736575ULL ^ k0;
+ ctx->v[1] = 0x646f72616e646f6dULL ^ k1;
+ ctx->v[2] = 0x6c7967656e657261ULL ^ k0;
+ ctx->v[3] = 0x7465646279746573ULL ^ k1;
+
+ memset(ctx->buf, 0, sizeof(ctx->buf));
+ ctx->bytes = 0;
+}
+
+void
+SipHash_Update(SIPHASH_CTX *ctx, int rc, int rf, const void *src, size_t len)
+{
+ const uint8_t *ptr = src;
+ size_t left, used;
+
+ if (len == 0)
+ return;
+
+ used = ctx->bytes % sizeof(ctx->buf);
+ ctx->bytes += len;
+
+ if (used > 0) {
+ left = sizeof(ctx->buf) - used;
+
+ if (len >= left) {
+ memcpy(&ctx->buf[used], ptr, left);
+ SipHash_CRounds(ctx, rc);
+ len -= left;
+ ptr += left;
+ } else {
+ memcpy(&ctx->buf[used], ptr, len);
+ return;
+ }
+ }
+
+ while (len >= sizeof(ctx->buf)) {
+ memcpy(ctx->buf, ptr, sizeof(ctx->buf));
+ SipHash_CRounds(ctx, rc);
+ len -= sizeof(ctx->buf);
+ ptr += sizeof(ctx->buf);
+ }
+
+ if (len > 0)
+ memcpy(&ctx->buf, ptr, len);
+}
+
+uint64_t
+SipHash_End(SIPHASH_CTX *ctx, int rc, int rf)
+{
+ uint64_t r;
+ size_t left, used;
+
+ used = ctx->bytes % sizeof(ctx->buf);
+ left = sizeof(ctx->buf) - used;
+ memset(&ctx->buf[used], 0, left - 1);
+ ctx->buf[7] = ctx->bytes;
+
+ SipHash_CRounds(ctx, rc);
+ ctx->v[2] ^= 0xff;
+ SipHash_Rounds(ctx, rf);
+
+ r = (ctx->v[0] ^ ctx->v[1]) ^ (ctx->v[2] ^ ctx->v[3]);
+ memzero(ctx, sizeof(*ctx));
+ return htole64(r);
+}
+
+uint64_t
+SipHash(const SIPHASH_KEY *key, int rc, int rf, const void *src, size_t len)
+{
+ SIPHASH_CTX ctx;
+
+ SipHash_Init(&ctx, key);
+ SipHash_Update(&ctx, rc, rf, src, len);
+ return (SipHash_End(&ctx, rc, rf));
+}
+
+#define SIP_ROTL(x, b) ((x) << (b)) | ( (x) >> (64 - (b)))
+
+static void
+SipHash_Rounds(SIPHASH_CTX *ctx, int rounds)
+{
+ while (rounds--) {
+ ctx->v[0] += ctx->v[1];
+ ctx->v[2] += ctx->v[3];
+ ctx->v[1] = SIP_ROTL(ctx->v[1], 13);
+ ctx->v[3] = SIP_ROTL(ctx->v[3], 16);
+
+ ctx->v[1] ^= ctx->v[0];
+ ctx->v[3] ^= ctx->v[2];
+ ctx->v[0] = SIP_ROTL(ctx->v[0], 32);
+
+ ctx->v[2] += ctx->v[1];
+ ctx->v[0] += ctx->v[3];
+ ctx->v[1] = SIP_ROTL(ctx->v[1], 17);
+ ctx->v[3] = SIP_ROTL(ctx->v[3], 21);
+
+ ctx->v[1] ^= ctx->v[2];
+ ctx->v[3] ^= ctx->v[0];
+ ctx->v[2] = SIP_ROTL(ctx->v[2], 32);
+ }
+}
+
+static void
+SipHash_CRounds(SIPHASH_CTX *ctx, int rounds)
+{
+ uint64_t tmp;
+
+ memcpy(&tmp, ctx->buf, sizeof(tmp));
+ uint64_t m = le64toh(tmp);
+
+ ctx->v[3] ^= m;
+ SipHash_Rounds(ctx, rounds);
+ ctx->v[0] ^= m;
+}
diff --git a/src/contrib/openbsd/siphash.h b/src/contrib/openbsd/siphash.h
new file mode 100644
index 0000000..59d952d
--- /dev/null
+++ b/src/contrib/openbsd/siphash.h
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 2013 Andre Oppermann <andre@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior written
+ * permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $OpenBSD: siphash.h,v 1.3 2015/02/20 11:51:03 tedu Exp $
+ */
+
+/*
+ * SipHash is a family of pseudorandom functions (a.k.a. keyed hash functions)
+ * optimized for speed on short messages returning a 64bit hash/digest value.
+ *
+ * The number of rounds is defined during the initialization:
+ * SipHash24_Init() for the fast and reasonable strong version
+ * SipHash48_Init() for the strong version (half as fast)
+ *
+ * struct SIPHASH_CTX ctx;
+ * SipHash24_Init(&ctx);
+ * SipHash_SetKey(&ctx, "16bytes long key");
+ * SipHash_Update(&ctx, pointer_to_string, length_of_string);
+ * SipHash_End(&ctx);
+ */
+
+#ifndef _SIPHASH_H_
+#define _SIPHASH_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define SIPHASH_BLOCK_LENGTH 8
+#define SIPHASH_KEY_LENGTH 16
+#define SIPHASH_DIGEST_LENGTH 8
+
+typedef struct _SIPHASH_CTX {
+ uint64_t v[4];
+ uint8_t buf[SIPHASH_BLOCK_LENGTH];
+ uint32_t bytes;
+} SIPHASH_CTX;
+
+typedef struct {
+ uint64_t k0;
+ uint64_t k1;
+} SIPHASH_KEY;
+
+void SipHash_Init(SIPHASH_CTX *, const SIPHASH_KEY *);
+void SipHash_Update(SIPHASH_CTX *, int, int, const void *, size_t);
+uint64_t SipHash_End(SIPHASH_CTX *, int, int);
+uint64_t SipHash(const SIPHASH_KEY *, int, int, const void *, size_t);
+
+#define SipHash24_Init(_c, _k) SipHash_Init((_c), (_k))
+#define SipHash24_Update(_c, _p, _l) SipHash_Update((_c), 2, 4, (_p), (_l))
+#define SipHash24_End(_d) SipHash_End((_d), 2, 4)
+#define SipHash24(_k, _p, _l) SipHash((_k), 2, 4, (_p), (_l))
+
+#define SipHash48_Init(_c, _k) SipHash_Init((_c), (_k))
+#define SipHash48_Update(_c, _p, _l) SipHash_Update((_c), 4, 8, (_p), (_l))
+#define SipHash48_End(_d) SipHash_End((_d), 4, 8)
+#define SipHash48(_k, _p, _l) SipHash((_k), 4, 8, (_p), (_l))
+
+#endif /* _SIPHASH_H_ */
diff --git a/src/contrib/openbsd/strlcat.c b/src/contrib/openbsd/strlcat.c
new file mode 100644
index 0000000..1409062
--- /dev/null
+++ b/src/contrib/openbsd/strlcat.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+
+#include "contrib/openbsd/strlcat.h"
+
+size_t
+knot_strlcat(char *dst, const char *src, size_t siz)
+{
+ char *d = dst;
+ const char *s = src;
+ size_t n = siz;
+ size_t dlen;
+
+ /* Find the end of dst and adjust bytes left but don't go past end */
+ while (n-- != 0 && *d != '\0')
+ d++;
+ dlen = d - dst;
+ n = siz - dlen;
+
+ if (n == 0)
+ return(dlen + strlen(s));
+ while (*s != '\0') {
+ if (n != 1) {
+ *d++ = *s;
+ n--;
+ }
+ s++;
+ }
+ *d = '\0';
+
+ return(dlen + (s - src)); /* count does not include NUL */
+}
diff --git a/src/contrib/openbsd/strlcat.h b/src/contrib/openbsd/strlcat.h
new file mode 100644
index 0000000..7016069
--- /dev/null
+++ b/src/contrib/openbsd/strlcat.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#pragma once
+
+#ifndef HAVE_STRLCAT
+#define strlcat(dst, src, size) knot_strlcat(dst, src, size)
+#endif
+
+/*
+ * Appends src to string dst of size siz (unlike strncat, siz is the
+ * full size of dst, not space left). At most siz-1 characters
+ * will be copied. Always NUL terminates (unless siz <= strlen(dst)).
+ * Returns strlen(src) + MIN(siz, strlen(initial dst)).
+ * If retval >= siz, truncation occurred.
+ */
+size_t
+knot_strlcat(char *dst, const char *src, size_t siz);
diff --git a/src/contrib/openbsd/strlcpy.c b/src/contrib/openbsd/strlcpy.c
new file mode 100644
index 0000000..eafc0e4
--- /dev/null
+++ b/src/contrib/openbsd/strlcpy.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+
+#include "contrib/openbsd/strlcpy.h"
+
+size_t
+knot_strlcpy(char *dst, const char *src, size_t siz)
+{
+ char *d = dst;
+ const char *s = src;
+ size_t n = siz;
+
+ /* Copy as many bytes as will fit */
+ if (n != 0) {
+ while (--n != 0) {
+ if ((*d++ = *s++) == '\0')
+ break;
+ }
+ }
+
+ /* Not enough room in dst, add NUL and traverse rest of src */
+ if (n == 0) {
+ if (siz != 0)
+ *d = '\0'; /* NUL-terminate dst */
+ while (*s++)
+ ;
+ }
+
+ return(s - src - 1); /* count does not include NUL */
+}
diff --git a/src/contrib/openbsd/strlcpy.h b/src/contrib/openbsd/strlcpy.h
new file mode 100644
index 0000000..6421068
--- /dev/null
+++ b/src/contrib/openbsd/strlcpy.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#pragma once
+
+#ifndef HAVE_STRLCPY
+#define strlcpy(dst, src, size) knot_strlcpy(dst, src, size)
+#endif
+
+/*
+ * Copy src to string dst of size siz. At most siz-1 characters
+ * will be copied. Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t
+knot_strlcpy(char *dst, const char *src, size_t siz);
diff --git a/src/contrib/os.h b/src/contrib/os.h
new file mode 100644
index 0000000..8d4a2e2
--- /dev/null
+++ b/src/contrib/os.h
@@ -0,0 +1,37 @@
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/utsname.h>
+
+inline static bool linux_at_least(unsigned version_first, unsigned version_second)
+{
+#if defined(__linux__)
+ struct utsname info;
+ unsigned first, second;
+ if (uname(&info) != 0 || sscanf(info.release, "%u.%u.", &first, &second) != 2) {
+ return false;
+ } else {
+ return first > version_first ||
+ (first = version_first && second >= version_second);
+ }
+#else
+ return false;
+#endif
+}
diff --git a/src/contrib/proxyv2/proxyv2.c b/src/contrib/proxyv2/proxyv2.c
new file mode 100644
index 0000000..0e5c90b
--- /dev/null
+++ b/src/contrib/proxyv2/proxyv2.c
@@ -0,0 +1,281 @@
+/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ Copyright (C) 2021 Fastly, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <arpa/inet.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "contrib/proxyv2/proxyv2.h"
+#include "contrib/sockaddr.h"
+#include "libknot/errcode.h"
+
+/*
+ * Minimal implementation of the haproxy PROXY v2 protocol.
+ *
+ * Supports extracting the original client address and client port number from
+ * the haproxy PROXY v2 protocol's address block.
+ *
+ * See https://www.haproxy.org/download/2.5/doc/proxy-protocol.txt for the
+ * protocol specification.
+ */
+
+static const char PROXYV2_SIG[12] = "\x0D\x0A\x0D\x0A\x00\x0D\x0A\x51\x55\x49\x54\x0A";
+
+/*
+ * The part of the PROXY v2 payload following the signature.
+ */
+struct proxyv2_hdr {
+ /*
+ * The protocol version and command.
+ *
+ * The upper four bits contain the version which must be \x2 and the
+ * receiver must only accept this value.
+ *
+ * The lower four bits represent the command, which is \x0 for LOCAL
+ * and \x1 for PROXY.
+ */
+ uint8_t ver_cmd;
+
+ /*
+ * The transport protocol and address family. The upper four bits
+ * contain the address family and the lower four bits contain the
+ * protocol.
+ *
+ * The relevant values for DNS are:
+ * \x11: TCP over IPv4
+ * \x12: UDP over IPv4
+ * \x21: TCP over IPv6
+ * \x22: UDP over IPv6
+ */
+ uint8_t fam_addr;
+
+ /*
+ * The number of PROXY v2 payload bytes following this header to skip
+ * to reach the proxied packet (i.e., start of the original DNS message).
+ */
+ uint16_t len;
+};
+
+/*
+ * The PROXY v2 address block for IPv4.
+ */
+struct proxyv2_addr_ipv4 {
+ uint8_t src_addr[4];
+ uint8_t dst_addr[4];
+ uint16_t src_port;
+ uint16_t dst_port;
+};
+
+/*
+ * The PROXY v2 address block for IPv6.
+ */
+struct proxyv2_addr_ipv6 {
+ uint8_t src_addr[16];
+ uint8_t dst_addr[16];
+ uint16_t src_port;
+ uint16_t dst_port;
+};
+
+const size_t PROXYV2_HEADER_MAXLEN = sizeof(PROXYV2_SIG) +
+ sizeof(struct proxyv2_hdr) +
+ sizeof(struct proxyv2_addr_ipv6);
+
+/*
+ * Make sure the C compiler lays out the PROXY v2 address block structs so that
+ * they can be memcpy()'d off the wire.
+ */
+#if (__STDC_VERSION__ >= 201112L)
+_Static_assert(sizeof(struct proxyv2_hdr) == 4,
+ "struct proxyv2_hdr is correct size");
+_Static_assert(sizeof(struct proxyv2_addr_ipv4) == 12,
+ "struct proxyv2_addr_ipv4 is correct size");
+_Static_assert(sizeof(struct proxyv2_addr_ipv6) == 36,
+ "struct proxyv2_addr_ipv6 is correct size");
+#endif
+
+int proxyv2_header_offset(void *base, size_t len_base)
+{
+ /*
+ * Check that 'base' has enough bytes to read the PROXY v2 signature
+ * and header, and if so whether the PROXY v2 signature is present.
+ */
+ if (len_base < (sizeof(PROXYV2_SIG) + sizeof(struct proxyv2_hdr)) ||
+ memcmp(base, PROXYV2_SIG, sizeof(PROXYV2_SIG)) != 0)
+ {
+ /* Failure. */
+ return KNOT_EMALF;
+ }
+
+ /* Read the PROXY v2 header. */
+ struct proxyv2_hdr *hdr = base + sizeof(PROXYV2_SIG);
+
+ /*
+ * Check that this is a version 2, command "PROXY" payload.
+ *
+ * XXX: The PROXY v2 spec mandates support for the "LOCAL" command
+ * (byte 0x20).
+ */
+ if (hdr->ver_cmd != 0x21) {
+ /* Failure. */
+ return KNOT_EMALF;
+ }
+
+ /*
+ * Calculate the offset of the original DNS message inside the packet.
+ * This needs to account for the length of the PROXY v2 signature,
+ * PROXY v2 header, and the bytes of variable length PROXY v2 data
+ * following the PROXY v2 header.
+ */
+ const size_t offset_dns = sizeof(PROXYV2_SIG) +
+ sizeof(struct proxyv2_hdr) + ntohs(hdr->len);
+ if (offset_dns < len_base) {
+ return offset_dns;
+ }
+
+ return KNOT_EMALF;
+}
+
+int proxyv2_addr_store(void *base, size_t len_base, struct sockaddr_storage *ss)
+{
+ /*
+ * Calculate the offset of the PROXY v2 address block. This is the data
+ * immediately following the PROXY v2 header.
+ */
+ const size_t offset_proxy_addr = sizeof(PROXYV2_SIG) +
+ sizeof(struct proxyv2_hdr);
+ struct proxyv2_hdr *hdr = base + sizeof(PROXYV2_SIG);
+
+ /*
+ * Handle proxied UDP-over-IPv4 and UDP-over-IPv6 packets.
+ */
+ //TODO What about TCP?
+ if (hdr->fam_addr == 0x12) {
+ /* This is a proxied UDP-over-IPv4 packet. */
+ struct proxyv2_addr_ipv4 *addr;
+
+ /*
+ * Check that the packet is large enough to contain the IPv4
+ * address block.
+ */
+ if (offset_proxy_addr + sizeof(*addr) < len_base) {
+ /* Read the PROXY v2 address block. */
+ addr = base + offset_proxy_addr;
+
+ /* Copy the client's IPv4 address to the caller. */
+ sockaddr_set_raw(ss, AF_INET, addr->src_addr,
+ sizeof(addr->src_addr));
+
+ /* Copy the client's port to the caller. */
+ sockaddr_port_set(ss, ntohs(addr->src_port));
+
+ /* Success. */
+ return KNOT_EOK;
+ }
+ } else if (hdr->fam_addr == 0x22) {
+ /* This is a proxied UDP-over-IPv6 packet. */
+ struct proxyv2_addr_ipv6 *addr;
+
+ /*
+ * Check that the packet is large enough to contain the IPv6
+ * address block.
+ */
+ if (offset_proxy_addr + sizeof(*addr) < len_base) {
+ /* Read the PROXY v2 address block. */
+ addr = base + offset_proxy_addr;
+
+ /* Copy the client's IPv6 address to the caller. */
+ sockaddr_set_raw(ss, AF_INET6, addr->src_addr,
+ sizeof(addr->src_addr));
+
+ /* Copy the client's port to the caller. */
+ sockaddr_port_set(ss, ntohs(addr->src_port));
+
+ /* Success. */
+ return KNOT_EOK;
+ }
+ }
+
+ /* Failure. */
+ return KNOT_EMALF;
+}
+
+int proxyv2_write_header(char *buf, size_t buflen, int socktype, const struct sockaddr *src,
+ const struct sockaddr *dst)
+{
+ if (buflen < PROXYV2_HEADER_MAXLEN) {
+ return KNOT_EINVAL;
+ }
+
+ uint8_t fam_addr = 0;
+ int family = src->sa_family;
+ if (socktype == SOCK_DGRAM) {
+ fam_addr += 0x2;
+ } else if (socktype == SOCK_STREAM) {
+ fam_addr += 0x1;
+ } else {
+ return KNOT_EINVAL;
+ }
+ if (family == AF_INET) {
+ fam_addr += 0x10;
+ } else if (family == AF_INET6) {
+ fam_addr += 0x20;
+ } else {
+ return KNOT_EINVAL;
+ }
+
+ struct proxyv2_hdr hdr = {
+ .ver_cmd = 0x21,
+ .fam_addr = fam_addr,
+ .len = (family == AF_INET)
+ ? htons(sizeof(struct proxyv2_addr_ipv4))
+ : htons(sizeof(struct proxyv2_addr_ipv6))
+ };
+
+ size_t offset = 0;
+ memcpy(buf, PROXYV2_SIG, sizeof(PROXYV2_SIG));
+ offset += sizeof(PROXYV2_SIG);
+ memcpy(buf + offset, &hdr, sizeof(hdr));
+ offset += sizeof(hdr);
+
+ if (family == AF_INET) {
+ struct proxyv2_addr_ipv4 ipv4 = { 0 };
+ struct sockaddr_in *p_src = (struct sockaddr_in *)src;
+ struct sockaddr_in *p_dst = (struct sockaddr_in *)dst;
+ memcpy(ipv4.src_addr, &p_src->sin_addr, sizeof(p_src->sin_addr));
+ memcpy(ipv4.dst_addr, &p_dst->sin_addr, sizeof(p_dst->sin_addr));
+ ipv4.src_port = p_src->sin_port;
+ ipv4.dst_port = p_dst->sin_port;
+
+ // Store in buffer
+ memcpy(buf + offset, &ipv4, sizeof(ipv4));
+ offset += sizeof(ipv4);
+ } else {
+ struct proxyv2_addr_ipv6 ipv6 = { 0 };
+ struct sockaddr_in6 *p_src = (struct sockaddr_in6 *)src;
+ struct sockaddr_in6 *p_dst = (struct sockaddr_in6 *)dst;
+ memcpy(ipv6.src_addr, &p_src->sin6_addr, sizeof(p_src->sin6_addr));
+ memcpy(ipv6.dst_addr, &p_dst->sin6_addr, sizeof(p_dst->sin6_addr));
+ ipv6.src_port = p_src->sin6_port;
+ ipv6.dst_port = p_dst->sin6_port;
+
+ // Store in buffer
+ memcpy(buf + offset, &ipv6, sizeof(ipv6));
+ offset += sizeof(ipv6);
+ }
+
+ return offset;
+}
diff --git a/src/contrib/proxyv2/proxyv2.h b/src/contrib/proxyv2/proxyv2.h
new file mode 100644
index 0000000..8fd8299
--- /dev/null
+++ b/src/contrib/proxyv2/proxyv2.h
@@ -0,0 +1,29 @@
+/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <stddef.h>
+#include <sys/socket.h>
+
+extern const size_t PROXYV2_HEADER_MAXLEN;
+
+int proxyv2_header_offset(void *base, size_t len_base);
+
+int proxyv2_addr_store(void *base, size_t len_base, struct sockaddr_storage *ss);
+
+int proxyv2_write_header(char *buf, size_t buflen, int socktype, const struct sockaddr *src,
+ const struct sockaddr *dst);
diff --git a/src/contrib/qp-trie/trie.c b/src/contrib/qp-trie/trie.c
new file mode 100644
index 0000000..30dcb42
--- /dev/null
+++ b/src/contrib/qp-trie/trie.c
@@ -0,0 +1,1451 @@
+/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ Copyright (C) 2018 Tony Finch <dot@dotat.at>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+ The code originated from https://github.com/fanf2/qp/blob/master/qp.c
+ at revision 5f6d93753.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "contrib/qp-trie/trie.h"
+#include "contrib/macros.h"
+#include "contrib/mempattern.h"
+#include "libknot/errcode.h"
+
+typedef unsigned int uint;
+typedef uint64_t trie_index_t; /*!< nibble index into a key */
+typedef uint64_t word; /*!< A type-punned word */
+typedef uint bitmap_t; /*!< Bit-maps, using the range of 1<<0 to 1<<16 (inclusive). */
+
+typedef char static_assert_pointer_fits_in_word
+ [sizeof(word) >= sizeof(uintptr_t) ? 1 : -1];
+
+#define KEYLENBITS 31
+
+/*! \brief trie keys have lengths
+ *
+ * 32 bits are enough for key lengths; probably even 16 bits would be.
+ * However, a 32 bit length means the alignment will be a multiple of
+ * 4, allowing us to stash the COW and BRANCH flags in the bottom bits
+ * of a pointer to a key.
+ *
+ * We need to steal a couple of bits from the length to keep the COW
+ * state of key allocations.
+ */
+typedef struct {
+ uint32_t cow:1, len:KEYLENBITS;
+ trie_key_t chars[];
+} tkey_t;
+
+/*! \brief A trie node is a pair of words.
+ *
+ * Each word is type-punned, depending on whether this is a branch
+ * node or a leaf node. We'll define some accessor functions to wrap
+ * this up into something reasonably safe.
+ *
+ * We aren't using a union to avoid problems with strict aliasing, and
+ * we aren't using bitfields because we want to control exactly which
+ * bits in the word are used by each field (in particular the flags).
+ *
+ * Branch nodes are never allocated individually: they are always part
+ * of either the root node or the twigs array of their parent branch.
+ *
+ * In a branch:
+ *
+ * `i` contains flags, bitmap, and index, explained in more detail below.
+ *
+ * `p` is a pointer to the "twigs", an array of child nodes.
+ *
+ * In a leaf:
+ *
+ * `i` is cast from a pointer to a tkey_t, with flags in the bottom bits.
+ *
+ * `p` is a trie_val_t.
+ */
+typedef struct node {
+ word i;
+ void *p;
+} node_t;
+
+struct trie {
+ node_t root; // undefined when weight == 0, see empty_root()
+ size_t weight;
+ knot_mm_t mm;
+};
+
+/*! \brief size (in bits) of nibble (half-byte) indexes into keys
+ *
+ * The bottom bit is clear for the upper nibble, and set for the lower
+ * nibble, big-endian style, since the tree has to be in lexicographic
+ * order. The index increases from one branch node to the next as you
+ * go deeper into the trie. All the keys below a branch are identical
+ * up to the nibble identified by the branch.
+ *
+ * (see also tkey_t.len above)
+ */
+#define TWIDTH_INDEX 33
+
+/*! \brief exclusive limit on indexes */
+#define TMAX_INDEX (BIG1 << TWIDTH_INDEX)
+
+/*! \brief size (in bits) of branch bitmap
+ *
+ * The bitmap indicates which subtries are present. The present child
+ * nodes are stored in the twigs array (with no holes between them).
+ *
+ * To simplify storing keys that are prefixes of each other, the
+ * end-of-string position is treated as an extra nibble value, ordered
+ * before all others. So there are 16 possible real nibble values,
+ * plus one value for nibbles past the end of the key.
+ */
+#define TWIDTH_BMP 17
+
+/*
+ * We're constructing the layout of the branch `i` field in a careful
+ * way to avoid mistakes, getting the compiler to calculate values
+ * rather than typing them in by hand.
+ */
+enum {
+ TSHIFT_BRANCH = 0,
+ TSHIFT_COW,
+ TSHIFT_BMP,
+ TOP_BMP = TSHIFT_BMP + TWIDTH_BMP,
+ TSHIFT_INDEX = TOP_BMP,
+ TOP_INDEX = TSHIFT_INDEX + TWIDTH_INDEX,
+};
+
+typedef char static_assert_fields_fit_in_word
+ [TOP_INDEX <= sizeof(word) * CHAR_BIT ? 1 : -1];
+
+typedef char static_assert_bmp_fits
+ [TOP_BMP <= sizeof(bitmap_t) * CHAR_BIT ? 1 : -1];
+
+#define BIG1 ((word)1)
+#define TMASK(width, shift) (((BIG1 << (width)) - BIG1) << (shift))
+
+/*! \brief is this node a branch or a leaf? */
+#define TFLAG_BRANCH (BIG1 << TSHIFT_BRANCH)
+
+/*! \brief copy-on-write flag, used in both leaves and branches */
+#define TFLAG_COW (BIG1 << TSHIFT_COW)
+
+/*! \brief for extracting pointer to key */
+#define TMASK_LEAF (~(word)(TFLAG_BRANCH | TFLAG_COW))
+
+/*! \brief mask for extracting nibble index */
+#define TMASK_INDEX TMASK(TWIDTH_INDEX, TSHIFT_INDEX)
+
+/*! \brief mask for extracting bitmap */
+#define TMASK_BMP TMASK(TWIDTH_BMP, TSHIFT_BMP)
+
+/*! \brief bitmap entry for NOBYTE */
+#define BMP_NOBYTE (BIG1 << TSHIFT_BMP)
+
+/*! \brief Initialize a new leaf, copying the key, and returning failure code. */
+static int mkleaf(node_t *leaf, const trie_key_t *key, uint32_t len, knot_mm_t *mm)
+{
+ if (unlikely((word)len > (BIG1 << KEYLENBITS)))
+ return KNOT_ENOMEM;
+ tkey_t *lkey = mm_alloc(mm, sizeof(tkey_t) + len);
+ if (unlikely(!lkey))
+ return KNOT_ENOMEM;
+ lkey->cow = 0;
+ lkey->len = len;
+ memcpy(lkey->chars, key, len);
+ word i = (uintptr_t)lkey;
+ assert((i & TFLAG_BRANCH) == 0);
+ *leaf = (node_t){ .i = i, .p = NULL };
+ return KNOT_EOK;
+}
+
+/*! \brief construct a branch node */
+static node_t mkbranch(trie_index_t index, bitmap_t bmp, node_t *twigs)
+{
+ word i = TFLAG_BRANCH | bmp
+ | (index << TSHIFT_INDEX);
+ assert(index < TMAX_INDEX);
+ assert((bmp & ~TMASK_BMP) == 0);
+ return (node_t){ .i = i, .p = twigs };
+}
+
+/*! \brief Make an empty root node. */
+static node_t empty_root(void)
+{
+ return mkbranch(TMAX_INDEX-1, 0, NULL);
+}
+
+/*! \brief Propagate error codes. */
+#define ERR_RETURN(x) \
+ do { \
+ int err_code_ = x; \
+ if (unlikely(err_code_ != KNOT_EOK)) \
+ return err_code_; \
+ } while (false)
+
+
+/*! \brief Test flags to determine type of this node. */
+static bool isbranch(const node_t *t)
+{
+ return t->i & TFLAG_BRANCH;
+}
+
+static tkey_t *tkey(const node_t *t)
+{
+ assert(!isbranch(t));
+ return (tkey_t *)(uintptr_t)(t->i & TMASK_LEAF);
+}
+
+static trie_val_t *tvalp(node_t *t)
+{
+ assert(!isbranch(t));
+ return &t->p;
+}
+
+/*! \brief Given a branch node, return the index of the corresponding nibble in the key. */
+static trie_index_t branch_index(const node_t *t)
+{
+ assert(isbranch(t));
+ return (t->i & TMASK_INDEX) >> TSHIFT_INDEX;
+}
+
+static bitmap_t branch_bmp(const node_t *t)
+{
+ assert(isbranch(t));
+ return (t->i & TMASK_BMP);
+}
+
+/*!
+ * \brief Count the number of set bits.
+ *
+ * \TODO This implementation may be relatively slow on some HW.
+ */
+static uint branch_weight(const node_t *t)
+{
+ assert(isbranch(t));
+ uint n = __builtin_popcount(t->i & TMASK_BMP);
+ assert(n > 1 && n <= TWIDTH_BMP);
+ return n;
+}
+
+/*! \brief Compute offset of an existing child in a branch node. */
+static uint twigoff(const node_t *t, bitmap_t bit)
+{
+ assert(isbranch(t));
+ assert(__builtin_popcount(bit) == 1);
+ return __builtin_popcount(t->i & TMASK_BMP & (bit - 1));
+}
+
+/*! \brief Extract a nibble from a key and turn it into a bitmask. */
+static bitmap_t keybit(trie_index_t ni, const trie_key_t *key, uint32_t len)
+{
+ trie_index_t bytei = ni >> 1;
+
+ if (bytei >= len)
+ return BMP_NOBYTE;
+
+ uint8_t ki = (uint8_t)key[bytei];
+ uint nibble = (ni & 1) ? (ki & 0xf) : (ki >> 4);
+
+ // skip one for NOBYTE nibbles after the end of the key
+ return BIG1 << (nibble + 1 + TSHIFT_BMP);
+}
+
+/*! \brief Extract a nibble from a key and turn it into a bitmask. */
+static bitmap_t twigbit(const node_t *t, const trie_key_t *key, uint32_t len)
+{
+ assert(isbranch(t));
+ return keybit(branch_index(t), key, len);
+}
+
+/*! \brief Test if a branch node has a child indicated by a bitmask. */
+static bool hastwig(const node_t *t, bitmap_t bit)
+{
+ assert(isbranch(t));
+ assert((bit & ~TMASK_BMP) == 0);
+ assert(__builtin_popcount(bit) == 1);
+ return t->i & bit;
+}
+
+/*! \brief Get pointer to packed array of child nodes. */
+static node_t* twigs(node_t *t)
+{
+ assert(isbranch(t));
+ return t->p;
+}
+
+/*! \brief Get pointer to a particular child of a branch node. */
+static node_t* twig(node_t *t, uint i)
+{
+ assert(i < branch_weight(t));
+ return twigs(t) + i;
+}
+
+/*! \brief Get twig number of a child node TODO: better description. */
+static uint twig_number(node_t *child, node_t *parent)
+{
+ // twig array index using pointer arithmetic
+ ptrdiff_t num = child - twigs(parent);
+ assert(num >= 0 && num < branch_weight(parent));
+ return (uint)num;
+}
+
+/*! \brief Simple string comparator. */
+static int key_cmp(const trie_key_t *k1, uint32_t k1_len,
+ const trie_key_t *k2, uint32_t k2_len)
+{
+ int ret = memcmp(k1, k2, MIN(k1_len, k2_len));
+ if (ret != 0) {
+ return ret;
+ }
+
+ /* Key string is equal, compare lengths. */
+ if (k1_len == k2_len) {
+ return 0;
+ } else if (k1_len < k2_len) {
+ return -1;
+ } else {
+ return 1;
+ }
+}
+
+trie_t* trie_create(knot_mm_t *mm)
+{
+ trie_t *trie = mm_alloc(mm, sizeof(trie_t));
+ if (trie != NULL) {
+ trie->root = empty_root();
+ trie->weight = 0;
+ if (mm != NULL)
+ trie->mm = *mm;
+ else
+ mm_ctx_init(&trie->mm);
+ }
+ return trie;
+}
+
+/*! \brief Free anything under the trie node, except for the passed pointer itself. */
+static void clear_trie(node_t *trie, knot_mm_t *mm)
+{
+ if (!isbranch(trie)) {
+ mm_free(mm, tkey(trie));
+ } else {
+ uint n = branch_weight(trie);
+ for (uint i = 0; i < n; ++i)
+ clear_trie(twig(trie, i), mm);
+ mm_free(mm, twigs(trie));
+ }
+}
+
+void trie_free(trie_t *tbl)
+{
+ if (tbl == NULL)
+ return;
+ if (tbl->weight)
+ clear_trie(&tbl->root, &tbl->mm);
+ mm_free(&tbl->mm, tbl);
+}
+
+void trie_clear(trie_t *tbl)
+{
+ assert(tbl);
+ if (!tbl->weight)
+ return;
+ clear_trie(&tbl->root, &tbl->mm);
+ tbl->root = empty_root();
+ tbl->weight = 0;
+}
+
+static bool dup_trie(node_t *copy, const node_t *orig, trie_dup_cb dup_cb, knot_mm_t *mm)
+{
+ if (isbranch(orig)) {
+ uint n = branch_weight(orig);
+ node_t *cotw = mm_alloc(mm, n * sizeof(*cotw));
+ if (cotw == NULL) {
+ return NULL;
+ }
+ const node_t *ortw = twigs((node_t *)orig);
+ for (uint i = 0; i < n; ++i) {
+ if (!dup_trie(cotw + i, ortw + i, dup_cb, mm)) {
+ while (i-- > 0) {
+ clear_trie(cotw + i, mm);
+ }
+ mm_free(mm, cotw);
+ return false;
+ }
+ }
+ *copy = mkbranch(branch_index(orig), branch_bmp(orig), cotw);
+ } else {
+ tkey_t *key = tkey(orig);
+ if (mkleaf(copy, key->chars, key->len, mm) != KNOT_EOK) {
+ return false;
+ }
+ if ((copy->p = dup_cb(orig->p, mm)) == NULL) {
+ mm_free(mm, tkey(copy));
+ return false;
+ }
+ }
+ return true;
+}
+
+trie_t* trie_dup(const trie_t *orig, trie_dup_cb dup_cb, knot_mm_t *mm)
+{
+ if (orig == NULL) {
+ return NULL;
+ }
+ trie_t *copy = mm_alloc(mm, sizeof(*copy));
+ if (copy == NULL) {
+ return NULL;
+ }
+ copy->weight = orig->weight;
+ if (mm != NULL) {
+ copy->mm = *mm;
+ } else {
+ mm_ctx_init(&copy->mm);
+ }
+ if (copy->weight) {
+ if (!dup_trie(&copy->root, &orig->root, dup_cb, mm)) {
+ mm_free(mm, copy);
+ return NULL;
+ }
+ }
+ return copy;
+}
+
+size_t trie_weight(const trie_t *tbl)
+{
+ assert(tbl);
+ return tbl->weight;
+}
+
+trie_val_t* trie_get_try(trie_t *tbl, const trie_key_t *key, uint32_t len)
+{
+ assert(tbl);
+ if (!tbl->weight)
+ return NULL;
+ node_t *t = &tbl->root;
+ while (isbranch(t)) {
+ __builtin_prefetch(twigs(t));
+ bitmap_t b = twigbit(t, key, len);
+ if (!hastwig(t, b))
+ return NULL;
+ t = twig(t, twigoff(t, b));
+ }
+ tkey_t *lkey = tkey(t);
+ if (key_cmp(key, len, lkey->chars, lkey->len) != 0)
+ return NULL;
+ return tvalp(t);
+}
+
+/* Optimization: the approach isn't ideal, as e.g. walking through the prefix
+ * is duplicated and we explicitly construct the wildcard key. Still, it's close
+ * to optimum which would be significantly more complicated and error-prone to write. */
+trie_val_t* trie_get_try_wildcard(trie_t *tbl, const trie_key_t *key, uint32_t len)
+{
+ assert(tbl);
+ if (!tbl->weight)
+ return NULL;
+ // Find leaf sharing the longest common prefix; see ns_find_branch() for explanation.
+ node_t *t = &tbl->root;
+ while (isbranch(t)) {
+ __builtin_prefetch(twigs(t));
+ bitmap_t b = twigbit(t, key, len);
+ uint i = hastwig(t, b) ? twigoff(t, b) : 0;
+ t = twig(t, i);
+ }
+ const tkey_t * const lcp_key = tkey(t);
+
+ // Find the last matching zero byte or -1 (source of synthesis)
+ int i_lmz = -1;
+ for (int i = 0; i < len && i < lcp_key->len && key[i] == lcp_key->chars[i]; ++i) {
+ if (key[i] == '\0' && i < len - 1) // do not count the terminating zero
+ i_lmz = i;
+ // Shortcut: we may have found an exact match.
+ if (i == len - 1 && len == lcp_key->len)
+ return tvalp(t);
+ }
+ if (len == 0) // The empty name needs separate handling.
+ return lcp_key->len == 0 ? tvalp(t) : NULL;
+
+ // Construct the key of the wildcard we need and look it up.
+ const int wild_len = i_lmz + 3;
+ uint8_t wild_key[wild_len];
+ memcpy(wild_key, key, wild_len - 2);
+ wild_key[wild_len - 2] = '*';
+ wild_key[wild_len - 1] = '\0'; // LF is always 0-terminated ATM
+ return trie_get_try(tbl, wild_key, wild_len);
+}
+
+/*! \brief Delete leaf t with parent p; b is the bit for t under p.
+ * Optionally return the deleted value via val. The function can't fail. */
+static void del_found(trie_t *tbl, node_t *t, node_t *p, bitmap_t b, trie_val_t *val)
+{
+ assert(!tkey(t)->cow);
+ mm_free(&tbl->mm, tkey(t));
+ if (val != NULL)
+ *val = *tvalp(t); // we return trie_val_t directly when deleting
+ --tbl->weight;
+ if (unlikely(!p)) { // whole trie was a single leaf
+ assert(tbl->weight == 0);
+ tbl->root = empty_root();
+ return;
+ }
+ // remove leaf t as child of p
+ node_t *tp = twigs(p);
+ uint ci = twig_number(t, p);
+ uint cc = branch_weight(p); // child count
+
+ if (cc == 2) {
+ // collapse binary node p: move the other child to the parent
+ *p = tp[1 - ci];
+ mm_free(&tbl->mm, tp);
+ return;
+ }
+ memmove(tp + ci, tp + ci + 1, sizeof(node_t) * (cc - ci - 1));
+ p->i &= ~b;
+ node_t *newt = mm_realloc(&tbl->mm, tp, sizeof(node_t) * (cc - 1),
+ sizeof(node_t) * cc);
+ if (likely(newt != NULL))
+ p->p = newt;
+ // We can ignore mm_realloc failure because an oversized twig
+ // array is OK - only beware that next time the prev_size
+ // passed to mm_realloc will not be correct; TODO?
+}
+
+int trie_del(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t *val)
+{
+ assert(tbl);
+ if (!tbl->weight)
+ return KNOT_ENOENT;
+ node_t *t = &tbl->root; // current and parent node
+ node_t *p = NULL;
+ bitmap_t b = 0;
+ while (isbranch(t)) {
+ __builtin_prefetch(twigs(t));
+ b = twigbit(t, key, len);
+ if (!hastwig(t, b))
+ return KNOT_ENOENT;
+ p = t;
+ t = twig(t, twigoff(t, b));
+ }
+ tkey_t *lkey = tkey(t);
+ if (key_cmp(key, len, lkey->chars, lkey->len) != 0)
+ return KNOT_ENOENT;
+ del_found(tbl, t, p, b, val);
+ return KNOT_EOK;
+}
+
+/*!
+ * \brief Stack of nodes, storing a path down a trie.
+ *
+ * The structure also serves directly as the public trie_it_t type,
+ * in which case it always points to the current leaf, unless we've finished
+ * (i.e. it->len == 0).
+ * stack[0] is always a valid pointer to the root -> ns_gettrie()
+ */
+typedef struct trie_it {
+ node_t* *stack; /*!< The stack; malloc is used directly instead of mm. */
+ uint32_t len; /*!< Current length of the stack. */
+ uint32_t alen; /*!< Allocated/available length of the stack. */
+ /*! \brief Initial storage for \a stack; it should fit in most use cases. */
+ node_t* stack_init[250];
+} nstack_t;
+
+/*! \brief Create a node stack containing just the root (or empty). */
+static void ns_init(nstack_t *ns, trie_t *tbl)
+{
+ assert(tbl);
+ ns->stack = ns->stack_init;
+ ns->alen = sizeof(ns->stack_init) / sizeof(ns->stack_init[0]);
+ ns->stack[0] = &tbl->root;
+ ns->len = (tbl->weight > 0);
+}
+
+static inline trie_t * ns_gettrie(nstack_t *ns)
+{
+ assert(ns && ns->stack && ns->stack[0]);
+ return (struct trie *)ns->stack[0];
+}
+
+/*! \brief Free inside of the stack, i.e. not the passed pointer itself. */
+static void ns_cleanup(nstack_t *ns)
+{
+ assert(ns && ns->stack);
+ if (likely(ns->stack == ns->stack_init))
+ return;
+ free(ns->stack);
+ #ifndef NDEBUG
+ ns->stack = NULL;
+ ns->alen = 0;
+ #endif
+}
+
+/*! \brief Allocate more space for the stack. */
+static int ns_longer_alloc(nstack_t *ns)
+{
+ ns->alen *= 2;
+ size_t new_size = ns->alen * sizeof(node_t *);
+ node_t **st;
+ if (ns->stack == ns->stack_init) {
+ st = malloc(new_size);
+ if (st != NULL)
+ memcpy(st, ns->stack, ns->len * sizeof(node_t *));
+ } else {
+ st = realloc(ns->stack, new_size);
+ }
+ if (st == NULL)
+ return KNOT_ENOMEM;
+ ns->stack = st;
+ return KNOT_EOK;
+}
+
+/*! \brief Ensure the node stack can be extended by one. */
+static inline int ns_longer(nstack_t *ns)
+{
+ // get a longer stack if needed
+ if (likely(ns->len < ns->alen))
+ return KNOT_EOK;
+ return ns_longer_alloc(ns); // hand-split the part suitable for inlining
+}
+
+/*!
+ * \brief Find the "branching point" as if searching for a key.
+ *
+ * The whole path to the point is kept on the passed stack;
+ * always at least the root will remain on the top of it.
+ * Beware: the precise semantics of this function is rather tricky.
+ * The top of the stack will contain: the corresponding leaf if exact
+ * match is found; or the immediate node below a
+ * branching-point-on-edge or the branching-point itself.
+ *
+ * \param idiff Set the index of first differing nibble, or TMAX_INDEX for an exact match
+ * \param tbit Set the bit of the closest leaf's nibble at index idiff
+ * \param kbit Set the bit of the key's nibble at index idiff
+ *
+ * \return KNOT_EOK or KNOT_ENOMEM.
+ */
+static int ns_find_branch(nstack_t *ns, const trie_key_t *key, uint32_t len,
+ trie_index_t *idiff, bitmap_t *tbit, bitmap_t *kbit)
+{
+ assert(ns && ns->len && idiff);
+ // First find some leaf with longest matching prefix.
+ while (isbranch(ns->stack[ns->len - 1])) {
+ ERR_RETURN(ns_longer(ns));
+ node_t *t = ns->stack[ns->len - 1];
+ __builtin_prefetch(twigs(t));
+ bitmap_t b = twigbit(t, key, len);
+ // Even if our key is missing from this branch we need to
+ // keep iterating down to a leaf. It doesn't matter which
+ // twig we choose since the keys are all the same up to this
+ // index. Note that blindly using twigoff(t, b) can cause
+ // an out-of-bounds index if it equals twigmax(t).
+ uint i = hastwig(t, b) ? twigoff(t, b) : 0;
+ ns->stack[ns->len++] = twig(t, i);
+ }
+ tkey_t *lkey = tkey(ns->stack[ns->len-1]);
+ // Find index of the first char that differs.
+ size_t bytei = 0;
+ uint32_t klen = lkey->len;
+ for (bytei = 0; bytei < MIN(len,klen); bytei++) {
+ if (key[bytei] != lkey->chars[bytei])
+ break;
+ }
+ // Find which half-byte has matched.
+ trie_index_t index = bytei << 1;
+ if (bytei == len && len == lkey->len) { // found equivalent key
+ index = TMAX_INDEX;
+ goto success;
+ }
+ if (likely(bytei < MIN(len,klen))) {
+ uint8_t k2 = (uint8_t)lkey->chars[bytei];
+ uint8_t k1 = (uint8_t)key[bytei];
+ if (((k1 ^ k2) & 0xf0) == 0)
+ index += 1;
+ }
+ // now go up the trie from the current leaf
+ node_t *t;
+ do {
+ if (unlikely(ns->len == 1))
+ goto success; // only the root stays on the stack
+ t = ns->stack[ns->len - 2];
+ if (branch_index(t) < index)
+ goto success;
+ --ns->len;
+ } while (true);
+success:
+ #ifndef NDEBUG // invariants on successful return
+ assert(ns->len);
+ if (isbranch(ns->stack[ns->len - 1])) {
+ t = ns->stack[ns->len - 1];
+ assert(branch_index(t) >= index);
+ }
+ if (ns->len > 1) {
+ t = ns->stack[ns->len - 2];
+ assert(branch_index(t) < index || index == TMAX_INDEX);
+ }
+ #endif
+ *idiff = index;
+ *tbit = keybit(index, lkey->chars, lkey->len);
+ *kbit = keybit(index, key, len);
+ return KNOT_EOK;
+}
+
+/*!
+ * \brief Advance the node stack to the last leaf in the subtree.
+ *
+ * \return KNOT_EOK or KNOT_ENOMEM.
+ */
+static int ns_last_leaf(nstack_t *ns)
+{
+ assert(ns);
+ do {
+ ERR_RETURN(ns_longer(ns));
+ node_t *t = ns->stack[ns->len - 1];
+ if (!isbranch(t))
+ return KNOT_EOK;
+ uint lasti = branch_weight(t) - 1;
+ ns->stack[ns->len++] = twig(t, lasti);
+ } while (true);
+}
+
+/*!
+ * \brief Advance the node stack to the first leaf in the subtree.
+ *
+ * \return KNOT_EOK or KNOT_ENOMEM.
+ */
+static int ns_first_leaf(nstack_t *ns)
+{
+ assert(ns && ns->len);
+ do {
+ ERR_RETURN(ns_longer(ns));
+ node_t *t = ns->stack[ns->len - 1];
+ if (!isbranch(t))
+ return KNOT_EOK;
+ ns->stack[ns->len++] = twig(t, 0);
+ } while (true);
+}
+
+/*!
+ * \brief Advance the node stack to the leaf that is previous to the current node.
+ *
+ * \note Prefix leaf under the current node DOES count (if present; perhaps questionable).
+ * \return KNOT_EOK on success, KNOT_ENOENT on not-found, or possibly KNOT_ENOMEM.
+ */
+static int ns_prev_leaf(nstack_t *ns)
+{
+ assert(ns && ns->len > 0);
+
+ node_t *t = ns->stack[ns->len - 1];
+ // Beware: BMP_NOBYTE child is ordered *before* its parent.
+ if (isbranch(t) && hastwig(t, BMP_NOBYTE)) {
+ ERR_RETURN(ns_longer(ns));
+ ns->stack[ns->len++] = twig(t, 0);
+ return KNOT_EOK;
+ }
+
+ for (; ns->len >= 2; --ns->len) {
+ t = ns->stack[ns->len - 1];
+ node_t *p = ns->stack[ns->len - 2];
+ uint ci = twig_number(t, p);
+ if (ci == 0) // we've got to go up again
+ continue;
+ // t isn't the first child -> go down the previous one
+ ns->stack[ns->len - 1] = twig(p, ci - 1);
+ return ns_last_leaf(ns);
+ }
+ return KNOT_ENOENT; // root without empty key has no previous leaf
+}
+
+/*!
+ * \brief Advance the node stack to the leaf that is successor to the current node.
+ *
+ * \param skip_prefixed skip any nodes whose key is a prefix of the current one.
+ * If false, prefix leaf or anything else under the current node DOES count.
+ * \return KNOT_EOK on success, KNOT_ENOENT on not-found, or possibly KNOT_ENOMEM.
+ */
+static int ns_next_leaf(nstack_t *ns, const bool skip_pefixed)
+{
+ assert(ns && ns->len > 0);
+
+ node_t *t = ns->stack[ns->len - 1];
+ if (!skip_pefixed && isbranch(t))
+ return ns_first_leaf(ns);
+ for (; ns->len >= 2; --ns->len) {
+ t = ns->stack[ns->len - 1];
+ node_t *p = ns->stack[ns->len - 2];
+ uint ci = twig_number(t, p);
+ if (skip_pefixed && ci == 0 && hastwig(t, BMP_NOBYTE)) {
+ // Keys in the subtree of p are suffixes of the key of t,
+ // so we've got to go one level higher
+ // (this can't happen more than once)
+ continue;
+ }
+ uint cc = branch_weight(p);
+ assert(ci + 1 <= cc);
+ if (ci + 1 == cc) {
+ // t is the last child of p, so we need to keep climbing
+ continue;
+ }
+ // go down the next child of p
+ ns->stack[ns->len - 1] = twig(p, ci + 1);
+ return ns_first_leaf(ns);
+ }
+ return KNOT_ENOENT; // not found, as no more parent is available
+}
+
+/*! \brief Advance the node stack to leaf with longest prefix of the current key. */
+static int ns_prefix(nstack_t *ns)
+{
+ assert(ns && ns->len > 0);
+ const node_t *start = ns->stack[ns->len - 1];
+ // Walk up the trie until we find a BMP_NOBYTE child.
+ while (--ns->len > 0) {
+ node_t *p = ns->stack[ns->len - 1];
+ if (!hastwig(p, BMP_NOBYTE))
+ continue;
+ node_t *end = twig(p, 0);
+ // In case we started in a BMP_NOBYTE leaf, the first step up
+ // did NOT shorten the key and we would get back into the same
+ // node again.
+ if (end == start)
+ continue;
+ ns->stack[ns->len++] = end;
+ return KNOT_EOK;
+ }
+ return KNOT_ENOENT; // not found, as no more parent is available
+}
+
+/*! \brief less-or-equal search.
+ *
+ * \return KNOT_EOK for exact match, 1 for previous, KNOT_ENOENT for not-found,
+ * or KNOT_E*.
+ */
+static int ns_get_leq(nstack_t *ns, const trie_key_t *key, uint32_t len)
+{
+ // First find the key with longest-matching prefix
+ trie_index_t idiff;
+ bitmap_t tbit, kbit;
+ ERR_RETURN(ns_find_branch(ns, key, len, &idiff, &tbit, &kbit));
+ node_t *t = ns->stack[ns->len - 1];
+ if (idiff == TMAX_INDEX) // found exact match
+ return KNOT_EOK;
+ // Get t: the last node on matching path
+ bitmap_t b;
+ if (isbranch(t) && branch_index(t) == idiff) {
+ // t is OK
+ b = kbit;
+ } else {
+ // the top of the stack was the first unmatched node -> step up
+ if (ns->len == 1) {
+ // root was unmatched already
+ if (kbit < tbit)
+ return KNOT_ENOENT;
+ ERR_RETURN(ns_last_leaf(ns));
+ return 1;
+ }
+ --ns->len;
+ t = ns->stack[ns->len - 1];
+ b = twigbit(t, key, len);
+ }
+ // Now we re-do the first "non-matching" step in the trie
+ // but try the previous child if key was less (it may not exist)
+ int i = hastwig(t, b)
+ ? (int)twigoff(t, b) - (kbit < tbit)
+ : (int)twigoff(t, b) - 1 /* twigoff returns successor when !hastwig */;
+ if (i >= 0) {
+ ERR_RETURN(ns_longer(ns));
+ ns->stack[ns->len++] = twig(t, i);
+ ERR_RETURN(ns_last_leaf(ns));
+ } else {
+ ERR_RETURN(ns_prev_leaf(ns));
+ }
+ return 1;
+}
+
+int trie_get_leq(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t **val)
+{
+ assert(tbl && val);
+ if (tbl->weight == 0) {
+ if (val) *val = NULL;
+ return KNOT_ENOENT;
+ }
+ // We try to do without malloc.
+ nstack_t ns_local;
+ ns_init(&ns_local, tbl);
+ nstack_t *ns = &ns_local;
+
+ int ret = ns_get_leq(ns, key, len);
+ if (ret == KNOT_EOK || ret == 1) {
+ assert(!isbranch(ns->stack[ns->len - 1]));
+ if (val) *val = tvalp(ns->stack[ns->len - 1]);
+ } else {
+ if (val) *val = NULL;
+ }
+ ns_cleanup(ns);
+ return ret;
+}
+
+int trie_it_get_leq(trie_it_t *it, const trie_key_t *key, uint32_t len)
+{
+ assert(it && it->stack[0] && it->alen);
+ const trie_t *tbl = ns_gettrie(it);
+ if (tbl->weight == 0) {
+ it->len = 0;
+ return KNOT_ENOENT;
+ }
+ it->len = 1;
+ int ret = ns_get_leq(it, key, len);
+ if (ret == KNOT_EOK || ret == 1) {
+ assert(trie_it_key(it, NULL));
+ } else {
+ it->len = 0;
+ }
+ return ret;
+}
+
+/* see below */
+static int cow_pushdown(trie_cow_t *cow, nstack_t *ns);
+
+/*! \brief implementation of trie_get_ins() and trie_get_cow() */
+static trie_val_t* cow_get_ins(trie_cow_t *cow, trie_t *tbl,
+ const trie_key_t *key, uint32_t len)
+{
+ assert(tbl);
+ // First leaf in an empty tbl?
+ if (unlikely(!tbl->weight)) {
+ if (unlikely(mkleaf(&tbl->root, key, len, &tbl->mm)))
+ return NULL;
+ ++tbl->weight;
+ return tvalp(&tbl->root);
+ }
+ { // Intentionally un-indented; until end of function, to bound cleanup attr.
+ // Find the branching-point
+ __attribute__((cleanup(ns_cleanup)))
+ nstack_t ns_local;
+ ns_init(&ns_local, tbl);
+ nstack_t *ns = &ns_local;
+ trie_index_t idiff;
+ bitmap_t tbit, kbit;
+ if (unlikely(ns_find_branch(ns, key, len, &idiff, &tbit, &kbit)))
+ return NULL;
+ if (unlikely(cow && cow_pushdown(cow, ns) != KNOT_EOK))
+ return NULL;
+ node_t *t = ns->stack[ns->len - 1];
+ if (idiff == TMAX_INDEX) // the same key was already present
+ return tvalp(t);
+ node_t leaf, *leafp;
+ if (unlikely(mkleaf(&leaf, key, len, &tbl->mm)))
+ return NULL;
+
+ if (isbranch(t) && branch_index(t) == idiff) {
+ // The node t needs a new leaf child.
+ assert(!hastwig(t, kbit));
+ // new child position and original child count
+ uint s = twigoff(t, kbit);
+ uint m = branch_weight(t);
+ node_t *nt = mm_realloc(&tbl->mm, twigs(t),
+ sizeof(node_t) * (m + 1), sizeof(node_t) * m);
+ if (unlikely(!nt))
+ goto err_leaf;
+ memmove(nt + s + 1, nt + s, sizeof(node_t) * (m - s));
+ leafp = nt + s;
+ *t = mkbranch(idiff, branch_bmp(t) | kbit, nt);
+ } else {
+ // We need to insert a new binary branch with leaf at *t.
+ // Note: it works the same for the case where we insert above root t.
+ #ifndef NDEBUG
+ if (ns->len > 1) {
+ node_t *pt = ns->stack[ns->len - 2];
+ assert(hastwig(pt, twigbit(pt, key, len)));
+ }
+ #endif
+ node_t *nt = mm_alloc(&tbl->mm, sizeof(node_t) * 2);
+ if (unlikely(!nt))
+ goto err_leaf;
+ node_t t2 = *t; // Save before overwriting t.
+ *t = mkbranch(idiff, tbit | kbit, nt);
+ *twig(t, twigoff(t, tbit)) = t2;
+ leafp = twig(t, twigoff(t, kbit));
+ };
+ *leafp = leaf;
+ ++tbl->weight;
+ return tvalp(leafp);
+err_leaf:
+ mm_free(&tbl->mm, tkey(&leaf));
+ return NULL;
+ }
+}
+
+trie_val_t* trie_get_ins(trie_t *tbl, const trie_key_t *key, uint32_t len)
+{
+ return cow_get_ins(NULL, tbl, key, len);
+}
+
+/*! \brief Apply a function to every trie_val_t*, in order; a recursive solution. */
+static int apply_nodes(node_t *t, int (*f)(trie_val_t *, void *), void *d)
+{
+ assert(t);
+ if (!isbranch(t))
+ return f(tvalp(t), d);
+ uint n = branch_weight(t);
+ for (uint i = 0; i < n; ++i)
+ ERR_RETURN(apply_nodes(twig(t, i), f, d));
+ return KNOT_EOK;
+}
+
+int trie_apply(trie_t *tbl, int (*f)(trie_val_t *, void *), void *d)
+{
+ assert(tbl && f);
+ if (!tbl->weight)
+ return KNOT_EOK;
+ return apply_nodes(&tbl->root, f, d);
+}
+
+/* These are all thin wrappers around static Tns* functions. */
+trie_it_t* trie_it_begin(trie_t *tbl)
+{
+ assert(tbl);
+ trie_it_t *it = malloc(sizeof(nstack_t));
+ if (!it)
+ return NULL;
+ ns_init(it, tbl);
+ if (it->len == 0) // empty tbl
+ return it;
+ if (ns_first_leaf(it)) {
+ ns_cleanup(it);
+ free(it);
+ return NULL;
+ }
+ return it;
+}
+
+bool trie_it_finished(trie_it_t *it)
+{
+ assert(it);
+ return it->len == 0;
+}
+
+void trie_it_free(trie_it_t *it)
+{
+ if (!it)
+ return;
+ ns_cleanup(it);
+ free(it);
+}
+
+trie_it_t *trie_it_clone(const trie_it_t *it)
+{
+ if (!it) // TODO: or should that be an assertion?
+ return NULL;
+ trie_it_t *it2 = malloc(sizeof(nstack_t));
+ if (!it2)
+ return NULL;
+ it2->len = it->len;
+ it2->alen = it->alen; // we _might_ change it in the rare malloc case, but...
+ if (likely(it->stack == it->stack_init)) {
+ it2->stack = it2->stack_init;
+ assert(it->alen == sizeof(it->stack_init) / sizeof(it->stack_init[0]));
+ } else {
+ it2->stack = malloc(it2->alen * sizeof(it2->stack[0]));
+ if (!it2->stack) {
+ free(it2);
+ return NULL;
+ }
+ }
+ memcpy(it2->stack, it->stack, it->len * sizeof(it->stack[0]));
+ return it2;
+}
+
+const trie_key_t* trie_it_key(trie_it_t *it, size_t *len)
+{
+ assert(it && it->len);
+ node_t *t = it->stack[it->len - 1];
+ assert(!isbranch(t));
+ tkey_t *key = tkey(t);
+ if (len)
+ *len = key->len;
+ return key->chars;
+}
+
+trie_val_t* trie_it_val(trie_it_t *it)
+{
+ assert(it && it->len);
+ node_t *t = it->stack[it->len - 1];
+ assert(!isbranch(t));
+ return tvalp(t);
+}
+
+void trie_it_next(trie_it_t *it)
+{
+ assert(it && it->len);
+ if (ns_next_leaf(it, false) != KNOT_EOK)
+ it->len = 0;
+}
+
+void trie_it_next_loop(trie_it_t *it)
+{
+ assert(it && it->len);
+ int ret = ns_next_leaf(it, false);
+ if (ret == KNOT_ENOENT) {
+ it->len = 1;
+ ret = ns_first_leaf(it);
+ }
+ if (ret)
+ it->len = 0;
+}
+
+void trie_it_next_nosuffix(trie_it_t *it)
+{
+ assert(it && it->len);
+ if (ns_next_leaf(it, true) != KNOT_EOK)
+ it->len = 0;
+}
+
+void trie_it_prev(trie_it_t *it)
+{
+ assert(it && it->len);
+ if (ns_prev_leaf(it) != KNOT_EOK)
+ it->len = 0;
+}
+
+void trie_it_prev_loop(trie_it_t *it)
+{
+ assert(it && it->len);
+ int ret = ns_prev_leaf(it);
+ if (ret == KNOT_ENOENT) {
+ it->len = 1;
+ ret = ns_last_leaf(it);
+ }
+ if (ret)
+ it->len = 0;
+}
+
+void trie_it_parent(trie_it_t *it)
+{
+ assert(it && it->len);
+ if (ns_prefix(it))
+ it->len = 0;
+}
+
+void trie_it_del(trie_it_t *it)
+{
+ assert(it && it->len);
+ if (it->len == 0)
+ return;
+ node_t *t = it->stack[it->len - 1];
+ assert(!isbranch(t));
+ bitmap_t b; // del_found() needs to know which bit to zero in the bitmap
+ node_t *p;
+ if (it->len == 1) { // deleting the root
+ p = NULL;
+ b = 0; // unused
+ } else {
+ p = it->stack[it->len - 2];
+ assert(isbranch(p));
+ size_t len;
+ const trie_key_t *key = trie_it_key(it, &len);
+ b = twigbit(p, key, len);
+ }
+ // We could trie_it_{next,prev,...}(it) now, in case we wanted that semantics.
+ it->len = 0;
+ del_found(ns_gettrie(it), t, p, b, NULL);
+}
+
+
+/*!\file
+ *
+ * \section About copy-on-write
+ *
+ * In these notes I'll use the term "object" to refer to either the
+ * twig array of a branch, or the application's data that is referred
+ * to by a leaf's trie_val_t pointer. Note that for COW we don't care
+ * about trie node_t structs themselves, but the objects that they
+ * point to.
+ *
+ * \subsection COW states
+ *
+ * During a COW transaction an object can be in one of three states:
+ * shared, only in the old trie, or only in the new trie. When a
+ * transaction is rolled back, the only-new objects are freed; when a
+ * transaction is committed the new trie takes the place of the old
+ * one and only-old objects are freed.
+ *
+ * \subsection branch marks and regions
+ *
+ * A branch object can be marked by setting the COW flag in the first
+ * element of its twig array. Marked branches partition the trie into
+ * regions; an object's state depends on its region.
+ *
+ * The unmarked branch objects between a trie's root and the marked
+ * branches (excluding the marked branches themselves) is exclusively
+ * owned: either old-only (if you started from the old root) or
+ * new-only (if you started from the new root).
+ *
+ * Marked branch objects, and all objects reachable from marked branch
+ * objects, are in the shared region accessible from both old and new
+ * roots. All branch objects below a marked branch must be unmarked.
+ * (That is, there is at most one marked branch object on any path
+ * from the root of a trie.)
+ *
+ * Branch nodes in the new-only region can be modified in place, in
+ * the same way as an original qp trie. Branch nodes in the old-only
+ * or shared regions must not be modified.
+ *
+ * \subsection app object states
+ *
+ * The app objects reachable from the new-only and old-only regions
+ * explicitly record their state in a way determined by the
+ * application. (These app objects are reachable from the old and new
+ * roots by traversing only unmarked branch objects.)
+ *
+ * The app objects reachable from marked branch objects are implicitly
+ * shared, but their state field has an indeterminate value. If an app
+ * object was previously touched by a rolled-back transaction it may
+ * be marked shared or old-only; if it was previously touched by a
+ * committed transaction it may be marked shared or new-only.
+ *
+ * \subsection key states
+ *
+ * The memory allocated for tkey_t objects also needs to track its
+ * sharing state. They have a "cow" flag to mark when they are shared.
+ * Keys are relatively lazily copied (to make them exclusive) when
+ * their leaf node is touched by a COW mutation.
+ *
+ * [An alternative technique might be to copy them more eagerly, in
+ * cow_pushdown(), which would avoid the need for a flag bit at the
+ * cost of more allocator churn in a transaction.]
+ *
+ * \subsection outside COW
+ *
+ * When a COW transaction is not in progress, there are no marked
+ * branch objects, so everything is exclusively owned. When a COW
+ * transaction is finished (committed or rolled back), the branch
+ * marks are removed. Since they are in the shared region, this branch
+ * cleanup is visible to both old and new tries.
+ *
+ * However the state of app objects is not clean between COW
+ * transactions. When a COW transaction is committed, we traverse the
+ * old-only region to find old-only app objects that should be freed
+ * (and vice versa for rollback). In general, there will be app
+ * objects that are only reachable from the new-only region, and that
+ * have a mixture of shared and new states.
+ */
+
+/*! \brief Trie copy-on-write state */
+struct trie_cow {
+ trie_t *old;
+ trie_t *new;
+ trie_cb *mark_shared;
+ void *d;
+};
+
+/*! \brief is this a marked branch object */
+static bool cow_marked(node_t *t)
+{
+ return isbranch(t) && (twigs(t)->i & TFLAG_COW);
+}
+
+/*! \brief is this a leaf with a marked key */
+static bool cow_key(node_t *t)
+{
+ return !isbranch(t) && tkey(t)->cow;
+}
+
+/*! \brief remove mark from a branch object */
+static void clear_cow(node_t *t)
+{
+ assert(isbranch(t));
+ twigs(t)->i &= ~TFLAG_COW;
+}
+
+/*! \brief mark a node as shared
+ *
+ * For branches this marks the twig array (in COW terminology, the
+ * branch object); for leaves it uses the callback to mark the app
+ * object.
+ */
+static void mark_cow(trie_cow_t *cow, node_t *t)
+{
+ if (isbranch(t)) {
+ node_t *object = twigs(t);
+ object->i |= TFLAG_COW;
+ } else {
+ tkey_t *lkey = tkey(t);
+ lkey->cow = 1;
+ if (cow->mark_shared != NULL) {
+ trie_val_t *valp = tvalp(t);
+ cow->mark_shared(*valp, lkey->chars, lkey->len, cow->d);
+ }
+ }
+}
+
+/*! \brief push exclusive COW region down one node */
+static int cow_pushdown_one(trie_cow_t *cow, node_t *t)
+{
+ uint cc = branch_weight(t);
+ node_t *nt = mm_alloc(&cow->new->mm, sizeof(node_t) * cc);
+ if (nt == NULL)
+ return KNOT_ENOMEM;
+ /* mark all the children */
+ for (uint ci = 0; ci < cc; ++ci)
+ mark_cow(cow, twig(t, ci));
+ /* this node must be unmarked in both old and new versions */
+ clear_cow(t);
+ t->p = memcpy(nt, twigs(t), sizeof(node_t) * cc);
+ return KNOT_EOK;
+}
+
+/*! \brief push exclusive COW region to cover a whole node stack */
+static int cow_pushdown(trie_cow_t *cow, nstack_t *ns)
+{
+ node_t *new_twigs = NULL;
+ node_t *old_twigs = NULL;
+ for (uint i = 0; i < ns->len; i++) {
+ /* if we did a pushdown on the previous iteration, we
+ need to update this stack entry so it points into
+ the parent's new twigs instead of the old ones */
+ if (new_twigs != old_twigs)
+ ns->stack[i] = new_twigs + (ns->stack[i] - old_twigs);
+ if (cow_marked(ns->stack[i])) {
+ old_twigs = twigs(ns->stack[i]);
+ if (cow_pushdown_one(cow, ns->stack[i]))
+ return KNOT_ENOMEM;
+ new_twigs = twigs(ns->stack[i]);
+ } else {
+ new_twigs = NULL;
+ old_twigs = NULL;
+ /* ensure key is exclusively owned */
+ if (cow_key(ns->stack[i])) {
+ node_t oleaf = *ns->stack[i];
+ tkey_t *okey = tkey(&oleaf);
+ if(mkleaf(ns->stack[i], okey->chars, okey->len,
+ &cow->new->mm))
+ return KNOT_ENOMEM;
+ ns->stack[i]->p = oleaf.p;
+ okey->cow = 0;
+ }
+ }
+ }
+ return KNOT_EOK;
+}
+
+trie_cow_t* trie_cow(trie_t *old, trie_cb *mark_shared, void *d)
+{
+ knot_mm_t *mm = &old->mm;
+ trie_t *new = mm_alloc(mm, sizeof(trie_t));
+ trie_cow_t *cow = mm_alloc(mm, sizeof(trie_cow_t));
+ if (new == NULL || cow == NULL) {
+ mm_free(mm, new);
+ mm_free(mm, cow);
+ return NULL;
+ }
+ new->mm = old->mm;
+ new->root = old->root;
+ new->weight = old->weight;
+ cow->old = old;
+ cow->new = new;
+ cow->mark_shared = mark_shared;
+ cow->d = d;
+ if (old->weight)
+ mark_cow(cow, &old->root);
+ return cow;
+}
+
+trie_t* trie_cow_new(trie_cow_t *cow)
+{
+ assert(cow != NULL);
+ return cow->new;
+}
+
+trie_val_t* trie_get_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len)
+{
+ return cow_get_ins(cow, cow->new, key, len);
+}
+
+int trie_del_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len, trie_val_t *val)
+{
+ trie_t *tbl = cow->new;
+ if (unlikely(!tbl->weight))
+ return KNOT_ENOENT;
+ { // Intentionally un-indented; until end of function, to bound cleanup attr.
+ // Find the branching-point
+ __attribute__((cleanup(ns_cleanup)))
+ nstack_t ns_local;
+ ns_init(&ns_local, tbl);
+ nstack_t *ns = &ns_local;
+ trie_index_t idiff;
+ bitmap_t tbit, kbit;
+ ERR_RETURN(ns_find_branch(ns, key, len, &idiff, &tbit, &kbit));
+ if (idiff != TMAX_INDEX)
+ return KNOT_ENOENT;
+ ERR_RETURN(cow_pushdown(cow, ns));
+ node_t *t = ns->stack[ns->len - 1];
+ node_t *p = ns->len >= 2 ? ns->stack[ns->len - 2] : NULL;
+ del_found(tbl, t, p, p ? twigbit(p, key, len) : 0, val);
+ }
+ return KNOT_EOK;
+}
+
+/*! \brief clean up after a COW transaction, recursively */
+static void cow_cleanup(trie_cow_t *cow, node_t *t, trie_cb *cb, void *d)
+{
+ if (cow_marked(t)) {
+ // we have hit the shared region, so just reset the mark
+ clear_cow(t);
+ return;
+ } else if (isbranch(t)) {
+ // traverse and free the exclusive region
+ uint cc = branch_weight(t);
+ for (uint ci = 0; ci < cc; ++ci)
+ cow_cleanup(cow, twig(t, ci), cb, d);
+ mm_free(&cow->new->mm, twigs(t));
+ return;
+ } else {
+ // application must decide how to clean up its values
+ tkey_t *lkey = tkey(t);
+ if (cb != NULL) {
+ trie_val_t *valp = tvalp(t);
+ cb(*valp, lkey->chars, lkey->len, d);
+ }
+ // clean up exclusively-owned keys
+ if (lkey->cow)
+ lkey->cow = 0;
+ else
+ mm_free(&cow->new->mm, lkey);
+ return;
+ }
+}
+
+trie_t* trie_cow_commit(trie_cow_t *cow, trie_cb *cb, void *d)
+{
+ trie_t *ret = cow->new;
+ if (cow->old->weight)
+ cow_cleanup(cow, &cow->old->root, cb, d);
+ mm_free(&ret->mm, cow->old);
+ mm_free(&ret->mm, cow);
+ return ret;
+}
+
+trie_t* trie_cow_rollback(trie_cow_t *cow, trie_cb *cb, void *d)
+{
+ trie_t *ret = cow->old;
+ if (cow->new->weight)
+ cow_cleanup(cow, &cow->new->root, cb, d);
+ mm_free(&ret->mm, cow->new);
+ mm_free(&ret->mm, cow);
+ return ret;
+}
diff --git a/src/contrib/qp-trie/trie.h b/src/contrib/qp-trie/trie.h
new file mode 100644
index 0000000..d479d3e
--- /dev/null
+++ b/src/contrib/qp-trie/trie.h
@@ -0,0 +1,280 @@
+/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ Copyright (C) 2018 Tony Finch <dot@dotat.at>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "libknot/mm_ctx.h"
+
+/*!
+ * \brief Native API of QP-tries:
+ *
+ * - keys are uint8_t strings, not necessarily zero-terminated,
+ * the structure copies the contents of the passed keys
+ * - values are void* pointers, typically you get an ephemeral pointer to it
+ * - key lengths are limited by 2^32-1 ATM
+ */
+
+/*! \brief Element value. */
+typedef void* trie_val_t;
+/*! \brief Key for indexing tries. Sign could be flipped easily. */
+typedef uint8_t trie_key_t;
+
+/*! \brief Opaque structure holding a QP-trie. */
+typedef struct trie trie_t;
+
+/*! \brief Opaque type for holding a QP-trie iterator. */
+typedef struct trie_it trie_it_t;
+
+/*! \brief Callback for cloning trie values. */
+typedef trie_val_t (*trie_dup_cb)(const trie_val_t val, knot_mm_t *mm);
+
+/*! \brief Callback for performing actions on a trie leaf
+ *
+ * Used during copy-on-write transactions
+ *
+ * \param val The value of the element to be altered
+ * \param key The key of the element to be altered
+ * \param len The length of key
+ * \param d Additional user data
+ */
+typedef void trie_cb(trie_val_t val, const trie_key_t *key, size_t len, void *d);
+
+/*! \brief Opaque type for holding the copy-on-write state for a QP-trie. */
+typedef struct trie_cow trie_cow_t;
+
+/*! \brief Create a trie instance. */
+trie_t* trie_create(knot_mm_t *mm);
+
+/*! \brief Free a trie instance. */
+void trie_free(trie_t *tbl);
+
+/*! \brief Clear a trie instance (make it empty). */
+void trie_clear(trie_t *tbl);
+
+/*! \brief Create a clone of existing trie. */
+trie_t* trie_dup(const trie_t *orig, trie_dup_cb dup_cb, knot_mm_t *mm);
+
+/*! \brief Return the number of keys in the trie. */
+size_t trie_weight(const trie_t *tbl);
+
+/*! \brief Search the trie, returning NULL on failure. */
+trie_val_t* trie_get_try(trie_t *tbl, const trie_key_t *key, uint32_t len);
+
+/*! \brief Search the trie including DNS wildcard semantics, returning NULL on failure.
+ *
+ * \note We assume the key is in knot_dname_lf() format, i.e. labels are ordered
+ * from root to leaf and separated by zero bytes (and no other zeros are allowed).
+ * \note Beware that DNS wildcard matching is not exactly what normal people would expect.
+ */
+trie_val_t* trie_get_try_wildcard(trie_t *tbl, const trie_key_t *key, uint32_t len);
+
+/*! \brief Search the trie, inserting NULL trie_val_t on failure. */
+trie_val_t* trie_get_ins(trie_t *tbl, const trie_key_t *key, uint32_t len);
+
+/*!
+ * \brief Search for less-or-equal element.
+ *
+ * \param tbl Trie.
+ * \param key Searched key.
+ * \param len Key length.
+ * \param val (optional) Value found; it will be set to NULL if not found or errored.
+ * \return KNOT_EOK for exact match, 1 for previous, KNOT_ENOENT for not-found,
+ * or KNOT_E*.
+ */
+int trie_get_leq(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t **val);
+
+/*!
+ * \brief Apply a function to every trie_val_t, in order.
+ *
+ * \return KNOT_EOK if success or KNOT_E* if error.
+ */
+int trie_apply(trie_t *tbl, int (*f)(trie_val_t *, void *), void *d);
+
+/*!
+ * \brief Remove an item, returning KNOT_EOK if succeeded or KNOT_ENOENT if not found.
+ *
+ * If val!=NULL and deletion succeeded, the deleted value is set.
+ */
+int trie_del(trie_t *tbl, const trie_key_t *key, uint32_t len, trie_val_t *val);
+
+
+/*! \brief Create a new iterator pointing to the first element (if any).
+ *
+ * trie_it_* functions deal with these iterators capable of walking and jumping
+ * over the trie. Note that any modification to key-set stored by the trie
+ * will in general invalidate all iterators and you will need to begin anew.
+ * (It won't be detected - you may end up reading freed memory, etc.)
+ */
+trie_it_t* trie_it_begin(trie_t *tbl);
+
+/*! \brief Test if the iterator has gone "past the end" (and points nowhere). */
+bool trie_it_finished(trie_it_t *it);
+
+/*! \brief Free any resources of the iterator. It's OK to call it on NULL. */
+void trie_it_free(trie_it_t *it);
+
+/*! \brief Copy the iterator. See the warning in trie_it_begin(). */
+trie_it_t *trie_it_clone(const trie_it_t *it);
+
+/*!
+ * \brief Return pointer to the key of the current element.
+ *
+ * \note The len is uint32_t internally but size_t is better for our usage
+ * as it is without an additional type conversion.
+ */
+const trie_key_t* trie_it_key(trie_it_t *it, size_t *len);
+
+/*! \brief Return pointer to the value of the current element (writable). */
+trie_val_t* trie_it_val(trie_it_t *it);
+
+/*!
+ * \brief Advance the iterator to the next element.
+ *
+ * Iteration is in ascending lexicographical order.
+ * In particular, the empty string would be considered as the very first.
+ *
+ * \TODO: in most iterator operations, ENOMEM is very unlikely
+ * but it leads to a _finished() iterator (silently).
+ * Perhaps the functions should simply return KNOT_E*
+ */
+void trie_it_next(trie_it_t *it);
+/*! \brief Advance the iterator to the previous element. See trie_it_next(). */
+void trie_it_prev(trie_it_t *it);
+
+/*! \brief Advance iterator to the next element, looping to first after last. */
+void trie_it_next_loop(trie_it_t *it);
+/*! \brief Advance iterator to the previous element, looping to last after first. */
+void trie_it_prev_loop(trie_it_t *it);
+
+/*! \brief Advance iterator to the next element while ignoring the subtree.
+ *
+ * \note Another formulation: skip keys that are prefixed by the current key.
+ * \TODO: name, maybe _unprefixed? The thing is that in the "subtree" meaning
+ * doesn't correspond to how the pointers go in the implementation,
+ * but we may not care much for implementation in the API...
+ */
+void trie_it_next_nosub(trie_it_t *it);
+
+/*! \brief Advance iterator to the longest prefix of the current key.
+ *
+ * \TODO: name, maybe _prefix? Arguments similar to _nosub vs. _unprefixed.
+ */
+void trie_it_parent(trie_it_t *it);
+
+/*! \brief trie_get_leq() but with an iterator. */
+int trie_it_get_leq(trie_it_t *it, const trie_key_t *key, uint32_t len);
+
+/*! \brief Remove the current element. The iterator will get trie_it_finished() */
+void trie_it_del(trie_it_t *it);
+
+
+/*! \brief Start a COW transaction
+ *
+ * A copy-on-write transaction starts by obtaining a write lock (in
+ * your application code) followed by a call to trie_cow(). This
+ * creates a shared clone of the trie and saves both old and new roots
+ * in the COW context.
+ *
+ * During the COW transaction, you call trie_cow_ins() or
+ * trie_cow_del() as necessary. These calls ensure that the relevant
+ * parts of the (new) trie are copied so that they can be modified
+ * freely.
+ *
+ * Your trie_val_t objects must be able to distinguish their
+ * reachability, either shared, or old-only, or new-only. Before a COW
+ * transaction the reachability of your objects is indeterminate.
+ * During a transaction, any trie_val_t objects that might be affected
+ * (because they are adjacent to a trie_get_cow() or trie_del_cow())
+ * are first marked as shared using the callback you pass to
+ * trie_cow().
+ *
+ * When the transaction is complete, to commit, call trie_cow_new() to
+ * get the new root, swap the old and new trie roots (e.g. with
+ * rcu_xchg_pointer()), wait for readers to finish with the old trie
+ * (e.g. using synchronize_rcu()), then call trie_cow_commit(). For a
+ * rollback, you can just call trie_cow_rollback() without waiting
+ * since that doesn't conflict with readers. After trie_cow_commit()
+ * or trie_cow_rollback() have finished, you can release your write
+ * lock.
+ *
+ * Concurrent reading of the old trie is allowed during a transaction
+ * provided that it is known when all readers have finished with the
+ * old version, e.g. using rcu_read_lock() and rcu_read_unlock().
+ * There must be only one write transaction at a time.
+ *
+ * \param old the old trie
+ * \param mark_shared callback to mark a leaf as shared (can be NULL)
+ * \param d extra data for the callback
+ * \return a pointer to a COW context,
+ * or NULL if there was a failure
+ */
+trie_cow_t* trie_cow(trie_t *old, trie_cb *mark_shared, void *d);
+
+/*! \brief get the new trie from a COW context */
+trie_t* trie_cow_new(trie_cow_t *cow);
+
+/*! \brief variant of trie_get_ins() for use during COW transactions
+ *
+ * As necessary, this copies path from the root of the trie to the
+ * leaf, so that it is no longer shared. Any leaves adjacent to this
+ * path are marked as shared using the mark_shared callback passed to
+ * trie_cow().
+ *
+ * It is your responsibility to COW your trie_val_t objects. If you copy an
+ * object you must change the original's reachability from shared to old-only.
+ * New objects (including copies) must have new-only reachability.
+ */
+trie_val_t* trie_get_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len);
+
+/*!
+ * \brief variant of trie_del() for use during COW transactions
+ *
+ * The mark_shared callback is invoked as necessary, in the same way
+ * as trie_get_cow().
+ *
+ * Returns KNOT_EOK if the key was removed or KNOT_ENOENT if not found.
+ * If val!=NULL and deletion succeeded, the *val is set to the deleted
+ * value pointer.
+ */
+int trie_del_cow(trie_cow_t *cow, const trie_key_t *key, uint32_t len, trie_val_t *val);
+
+/*! \brief clean up the old trie after committing a COW transaction
+ *
+ * Your callback is invoked for any trie_val_t objects that might need
+ * cleaning up; you must free any objects you have marked as old-only
+ * and retain objects with shared reachability.
+ *
+ * \note The callback can be NULL.
+ *
+ * The cow object is free()d, and the new trie root is returned.
+ */
+trie_t* trie_cow_commit(trie_cow_t *cow, trie_cb *cb, void *d);
+
+/*! \brief clean up the new trie after rolling back a COW transaction
+ *
+ * Your callback is invoked for any trie_val_t objects that might need
+ * cleaning up; you must free any objects you have marked as new-only
+ * and retain objects with shared reachability.
+ *
+ * \note The callback can be NULL.
+ *
+ * The cow object is free()d, and the old trie root is returned.
+ */
+trie_t* trie_cow_rollback(trie_cow_t *cow, trie_cb *cb, void *d);
diff --git a/src/contrib/semaphore.c b/src/contrib/semaphore.c
new file mode 100644
index 0000000..ad50dcc
--- /dev/null
+++ b/src/contrib/semaphore.c
@@ -0,0 +1,80 @@
+/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "semaphore.h"
+
+#include <assert.h>
+#include <stdlib.h>
+
+#if defined(__APPLE__)
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#endif
+
+void knot_sem_init(knot_sem_t *sem, unsigned int value)
+{
+ int ret = sem_init(&sem->semaphore, 1, value);
+ if (ret == 0) {
+ sem->status = -1;
+ } else {
+ sem->status = value;
+ sem->status_lock = malloc(sizeof(*sem->status_lock));
+ pthread_mutex_init(&sem->status_lock->mutex, NULL);
+ pthread_cond_init(&sem->status_lock->cond, NULL);
+ }
+}
+
+void knot_sem_wait(knot_sem_t *sem)
+{
+ if (sem->status < 0) {
+ int semret;
+ do {
+ semret = sem_wait(&sem->semaphore);
+ } while (semret != 0); // repeat wait as it might be interrupted by a signal
+ } else {
+ pthread_mutex_lock(&sem->status_lock->mutex);
+ while (sem->status == 0) {
+ pthread_cond_wait(&sem->status_lock->cond, &sem->status_lock->mutex);
+ }
+ sem->status--;
+ pthread_mutex_unlock(&sem->status_lock->mutex);
+ }
+}
+
+void knot_sem_post(knot_sem_t *sem)
+{
+ if (sem->status < 0) {
+ int semret = sem_post(&sem->semaphore);
+ (void)semret;
+ assert(semret == 0);
+ } else {
+ pthread_mutex_lock(&sem->status_lock->mutex);
+ sem->status++;
+ pthread_cond_signal(&sem->status_lock->cond);
+ pthread_mutex_unlock(&sem->status_lock->mutex);
+ }
+}
+
+void knot_sem_destroy(knot_sem_t *sem)
+{
+ knot_sem_wait(sem);
+ if (sem->status < 0) {
+ sem_destroy(&sem->semaphore);
+ } else {
+ pthread_cond_destroy(&sem->status_lock->cond);
+ pthread_mutex_destroy(&sem->status_lock->mutex);
+ free(sem->status_lock);
+ }
+}
diff --git a/src/contrib/semaphore.h b/src/contrib/semaphore.h
new file mode 100644
index 0000000..be49b7f
--- /dev/null
+++ b/src/contrib/semaphore.h
@@ -0,0 +1,41 @@
+/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <pthread.h>
+#include <semaphore.h>
+
+#pragma once
+
+typedef struct {
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+} knot_sem_mutex_t;
+
+typedef struct {
+ int status;
+ union {
+ sem_t semaphore;
+ knot_sem_mutex_t *status_lock;
+ };
+} knot_sem_t;
+
+void knot_sem_init(knot_sem_t *sem, unsigned int value);
+
+void knot_sem_wait(knot_sem_t *sem);
+
+void knot_sem_post(knot_sem_t *sem);
+
+void knot_sem_destroy(knot_sem_t *sem);
diff --git a/src/contrib/sockaddr.c b/src/contrib/sockaddr.c
new file mode 100644
index 0000000..2b3218b
--- /dev/null
+++ b/src/contrib/sockaddr.c
@@ -0,0 +1,368 @@
+/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <netdb.h>
+
+#include "libknot/errcode.h"
+#include "contrib/sockaddr.h"
+#include "contrib/openbsd/strlcpy.h"
+#include "contrib/macros.h"
+#include "contrib/musl/inet_ntop.h"
+
+int sockaddr_len(const struct sockaddr_storage *ss)
+{
+ if (ss == NULL) {
+ return 0;
+ }
+
+ switch(ss->ss_family) {
+ case AF_INET:
+ return sizeof(struct sockaddr_in);
+ case AF_INET6:
+ return sizeof(struct sockaddr_in6);
+ case AF_UNIX:
+ return sizeof(struct sockaddr_un);
+ default:
+ return 0;
+ }
+}
+
+static int cmp_ipv4(const struct sockaddr_in *a, const struct sockaddr_in *b,
+ bool ignore_port)
+{
+ if (a->sin_addr.s_addr < b->sin_addr.s_addr) {
+ return -1;
+ } else if (a->sin_addr.s_addr > b->sin_addr.s_addr) {
+ return 1;
+ } else {
+ return ignore_port ? 0 : a->sin_port - b->sin_port;
+ }
+}
+
+static int cmp_ipv6(const struct sockaddr_in6 *a, const struct sockaddr_in6 *b,
+ bool ignore_port)
+{
+ int ret = memcmp(&a->sin6_addr, &b->sin6_addr, sizeof(struct in6_addr));
+ if (ret == 0) {
+ ret = ignore_port ? 0 : a->sin6_port - b->sin6_port;
+ }
+
+ return ret;
+}
+
+static int cmp_unix(const struct sockaddr_un *a, const struct sockaddr_un *b)
+{
+ int len_a = strnlen(a->sun_path, sizeof(a->sun_path));
+ int len_b = strnlen(b->sun_path, sizeof(b->sun_path));
+ int len_min = len_a <= len_b ? len_a : len_b;
+
+ int ret = strncmp(a->sun_path, b->sun_path, len_min);
+ if (ret == 0) {
+ ret = len_a - len_b;
+ }
+
+ return ret;
+}
+
+int sockaddr_cmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b,
+ bool ignore_port)
+{
+ assert(a);
+ assert(b);
+ if (a->ss_family != b->ss_family) {
+ return (int)a->ss_family - (int)b->ss_family;
+ }
+
+ switch (a->ss_family) {
+ case AF_UNSPEC:
+ return 0;
+ case AF_INET:
+ return cmp_ipv4((struct sockaddr_in *)a, (struct sockaddr_in *)b,
+ ignore_port);
+ case AF_INET6:
+ return cmp_ipv6((struct sockaddr_in6 *)a, (struct sockaddr_in6 *)b,
+ ignore_port);
+ case AF_UNIX:
+ return cmp_unix((struct sockaddr_un *)a, (struct sockaddr_un *)b);
+ default:
+ return 1;
+ }
+}
+
+int sockaddr_set(struct sockaddr_storage *ss, int family, const char *straddr, int port)
+{
+ if (ss == NULL || straddr == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ /* Set family and port. */
+ memset(ss, 0, sizeof(*ss));
+ ss->ss_family = family;
+ sockaddr_port_set(ss, port);
+
+ /* Initialize address depending on address family. */
+ if (family == AF_INET6) {
+ struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)ss;
+ if (inet_pton(family, straddr, &ipv6->sin6_addr) < 1) {
+ return KNOT_ERROR;
+ }
+ return KNOT_EOK;
+ } else if (family == AF_INET) {
+ struct sockaddr_in *ipv4 = (struct sockaddr_in *)ss;
+ if (inet_pton(family, straddr, &ipv4->sin_addr) < 1) {
+ return KNOT_ERROR;
+ }
+ return KNOT_EOK;
+ } else if (family == AF_UNIX) {
+ struct sockaddr_un *un = (struct sockaddr_un *)ss;
+ size_t ret = strlcpy(un->sun_path, straddr, sizeof(un->sun_path));
+ if (ret >= sizeof(un->sun_path)) {
+ return KNOT_ESPACE;
+ }
+ return KNOT_EOK;
+ }
+
+ return KNOT_EINVAL;
+}
+
+void *sockaddr_raw(const struct sockaddr_storage *ss, size_t *addr_size)
+{
+ if (ss == NULL || addr_size == NULL) {
+ return NULL;
+ }
+
+ if (ss->ss_family == AF_INET) {
+ struct sockaddr_in *ipv4 = (struct sockaddr_in *)ss;
+ *addr_size = sizeof(ipv4->sin_addr);
+ return &ipv4->sin_addr;
+ } else if (ss->ss_family == AF_INET6) {
+ struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)ss;
+ *addr_size = sizeof(ipv6->sin6_addr);
+ return &ipv6->sin6_addr;
+ } else if (ss->ss_family == AF_UNIX) {
+ struct sockaddr_un *un = (struct sockaddr_un *)ss;
+ *addr_size = sizeof(un->sun_path);
+ return un->sun_path;
+ } else {
+ return NULL;
+ }
+}
+
+int sockaddr_set_raw(struct sockaddr_storage *ss, int family,
+ const uint8_t *raw_addr, size_t raw_addr_size)
+{
+ if (ss == NULL || raw_addr == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ memset(ss, 0, sizeof(*ss));
+ ss->ss_family = family;
+
+ size_t ss_size = 0;
+ void *ss_data = sockaddr_raw(ss, &ss_size);
+ if (ss_data == NULL ||
+ (family != AF_UNIX && ss_size != raw_addr_size) ||
+ (family == AF_UNIX && ss_size <= raw_addr_size)) {
+ return KNOT_EINVAL;
+ }
+
+ memcpy(ss_data, raw_addr, raw_addr_size);
+
+ return KNOT_EOK;
+}
+
+int sockaddr_tostr(char *buf, size_t maxlen, const struct sockaddr_storage *ss)
+{
+ if (ss == NULL || buf == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ const char *out = NULL;
+
+ /* Convert network address string. */
+ if (ss->ss_family == AF_INET6) {
+ const struct sockaddr_in6 *s = (const struct sockaddr_in6 *)ss;
+ out = knot_inet_ntop(ss->ss_family, &s->sin6_addr, buf, maxlen);
+ } else if (ss->ss_family == AF_INET) {
+ const struct sockaddr_in *s = (const struct sockaddr_in *)ss;
+ out = knot_inet_ntop(ss->ss_family, &s->sin_addr, buf, maxlen);
+ } else if (ss->ss_family == AF_UNIX) {
+ const struct sockaddr_un *s = (const struct sockaddr_un *)ss;
+ const char *path = (s->sun_path[0] != '\0' ? s->sun_path : "UNIX socket");
+ size_t ret = strlcpy(buf, path, maxlen);
+ out = (ret < maxlen) ? buf : NULL;
+ } else {
+ *buf = '\0';
+ return KNOT_EINVAL;
+ }
+
+ if (out == NULL) {
+ *buf = '\0';
+ return KNOT_ESPACE;
+ }
+
+ /* Write separator and port. */
+ int written = strlen(buf);
+ int port = sockaddr_port(ss);
+ if (port > 0) {
+ int ret = snprintf(&buf[written], maxlen - written, "@%d", port);
+ if (ret <= 0 || (size_t)ret >= maxlen - written) {
+ *buf = '\0';
+ return KNOT_ESPACE;
+ }
+
+ written += ret;
+ }
+
+ return written;
+}
+
+int sockaddr_port(const struct sockaddr_storage *ss)
+{
+ if (ss == NULL) {
+ return KNOT_EINVAL;
+ }
+
+ if (ss->ss_family == AF_INET6) {
+ return ntohs(((struct sockaddr_in6 *)ss)->sin6_port);
+ } else if (ss->ss_family == AF_INET) {
+ return ntohs(((struct sockaddr_in *)ss)->sin_port);
+ } else {
+ return KNOT_EINVAL;
+ }
+}
+
+void sockaddr_port_set(struct sockaddr_storage *ss, uint16_t port)
+{
+ if (ss == NULL) {
+ return;
+ }
+
+ if (ss->ss_family == AF_INET6) {
+ ((struct sockaddr_in6 *)ss)->sin6_port = htons(port);
+ } else if (ss->ss_family == AF_INET) {
+ ((struct sockaddr_in *)ss)->sin_port = htons(port);
+ }
+}
+
+char *sockaddr_hostname(void)
+{
+ /* Fetch hostname. */
+ char host[256] = "";
+ if (gethostname(host, sizeof(host)) != 0) {
+ return NULL;
+ }
+ /* Just to be sure. */
+ host[sizeof(host) - 1] = '\0';
+
+ /* Fetch canonical name for this address/DNS. */
+ struct addrinfo hints, *info = NULL;
+ memset(&hints, 0, sizeof hints);
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_DGRAM;
+ hints.ai_flags = AI_CANONNAME;
+ if (getaddrinfo(host, "domain", &hints, &info) != 0) {
+ return NULL;
+ }
+
+ /* Fetch first valid hostname. */
+ char *hname = NULL;
+ struct addrinfo *p = NULL;
+ for (p = info; p != NULL; p = p->ai_next) {
+ if (p->ai_canonname) {
+ hname = strdup(p->ai_canonname);
+ break;
+ }
+ }
+
+ /* No valid hostname found, resort to gethostname() result */
+ if (hname == NULL) {
+ hname = strdup(host);
+ }
+
+ freeaddrinfo(info);
+ return hname;
+}
+
+bool sockaddr_is_any(const struct sockaddr_storage *ss)
+{
+ if (ss == NULL) {
+ return false;
+ }
+
+ if (ss->ss_family == AF_INET) {
+ const struct sockaddr_in *ipv4 = (struct sockaddr_in *)ss;
+ return ipv4->sin_addr.s_addr == INADDR_ANY;
+ }
+
+ if (ss->ss_family == AF_INET6) {
+ const struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)ss;
+ return memcmp(&ipv6->sin6_addr, &in6addr_any, sizeof(ipv6->sin6_addr)) == 0;
+ }
+
+ return false;
+}
+
+bool sockaddr_net_match(const struct sockaddr_storage *ss1,
+ const struct sockaddr_storage *ss2,
+ unsigned prefix)
+{
+ if (ss1 == NULL || ss2 == NULL) {
+ return false;
+ }
+
+ if (ss1->ss_family != ss2->ss_family) {
+ return false;
+ }
+
+ size_t raw_len = 0;
+ const uint8_t *raw_1 = sockaddr_raw(ss1, &raw_len);
+ const uint8_t *raw_2 = sockaddr_raw(ss2, &raw_len);
+
+ prefix = MIN(prefix, raw_len * 8);
+ unsigned bytes = prefix / 8;
+ unsigned bits = prefix % 8;
+
+ /* Compare full bytes. */
+ if (memcmp(raw_1, raw_2, bytes) != 0) {
+ return false;
+ }
+
+ /* Compare last partial byte. */
+ return bits == 0 ||
+ (raw_1[bytes] >> (8 - bits) == raw_2[bytes] >> (8 - bits));
+}
+
+bool sockaddr_range_match(const struct sockaddr_storage *ss,
+ const struct sockaddr_storage *ss_min,
+ const struct sockaddr_storage *ss_max)
+{
+ if (ss == NULL || ss_min == NULL || ss_max == NULL) {
+ return false;
+ }
+
+ if (ss_min->ss_family != ss_max->ss_family ||
+ ss_min->ss_family != ss->ss_family) {
+ return false;
+ }
+
+ return sockaddr_cmp(ss, ss_min, true) >= 0 &&
+ sockaddr_cmp(ss, ss_max, true) <= 0;
+}
diff --git a/src/contrib/sockaddr.h b/src/contrib/sockaddr.h
new file mode 100644
index 0000000..f9228d7
--- /dev/null
+++ b/src/contrib/sockaddr.h
@@ -0,0 +1,160 @@
+/* Copyright (C) 2020 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <stdint.h>
+#include <unistd.h>
+
+/* Subnet maximum prefix length. */
+#define IPV4_PREFIXLEN 32
+#define IPV6_PREFIXLEN 128
+
+/* Address string "address[@port]" maximum length. */
+#define SOCKADDR_STRLEN_EXT (1 + 6) /* '@', 5 digits number, \0 */
+#define SOCKADDR_STRLEN (sizeof(struct sockaddr_un) + SOCKADDR_STRLEN_EXT)
+
+/*!
+ * \brief Calculate current structure length based on address family.
+ *
+ * \param ss Socket address.
+ *
+ * \return Number of bytes or error code.
+ */
+int sockaddr_len(const struct sockaddr_storage *ss);
+
+/*!
+ * \brief Compare addresses.
+ *
+ * \param a First address.
+ * \param b Second address.
+ * \param ignore_port Ignore port indication.
+ *
+ * \return like memcmp(3)
+ */
+int sockaddr_cmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b,
+ bool ignore_port);
+
+/*!
+ * \brief Set address and port.
+ *
+ * \param ss Socket address.
+ * \param family Address family.
+ * \param straddr IP address in string format.
+ * \param port Port.
+ *
+ * \return KNOT_EOK on success or an error code.
+ */
+int sockaddr_set(struct sockaddr_storage *ss, int family, const char *straddr, int port);
+
+/*!
+ * \brief Return raw network address in network byte order.
+ *
+ * \param[in] ss Socket address.
+ * \param[out] addr_size Address length.
+ *
+ * \return Pointer to binary buffer of size addr_size.
+ */
+void *sockaddr_raw(const struct sockaddr_storage *ss, size_t *addr_size);
+
+/*!
+ * \brief Set raw address.
+ *
+ * \param ss Socket address.
+ * \param family Address family.
+ * \param raw_addr IP address in binary format.
+ * \param raw_addr_size Size of the binary address.
+ *
+ * \return KNOT_EOK on success or an error code.
+ */
+int sockaddr_set_raw(struct sockaddr_storage *ss, int family,
+ const uint8_t *raw_addr, size_t raw_addr_size);
+
+/*!
+ * \brief Return string representation of socket address.
+ *
+ * \note String format: \<address>[@<port>], f.e. '127.0.0.1@53'
+ *
+ * \param buf Destination for string representation.
+ * \param maxlen Maximum number of written bytes.
+ * \param ss Socket address.
+ *
+ * \return Number of bytes written on success, error code on failure.
+ */
+int sockaddr_tostr(char *buf, size_t maxlen, const struct sockaddr_storage *ss);
+
+/*!
+ * \brief Return port number from address.
+ *
+ * \param ss Socket address.
+ *
+ * \return Port number or error code.
+ */
+int sockaddr_port(const struct sockaddr_storage *ss);
+
+/*!
+ * \brief Set port number.
+ *
+ * \param ss Socket address.
+ * \param port Port to set.
+ */
+void sockaddr_port_set(struct sockaddr_storage *ss, uint16_t port);
+
+/*!
+ * \brief Get host FQDN address.
+ *
+ * \return Hostname string or NULL.
+ */
+char *sockaddr_hostname(void);
+
+/*!
+ * \brief Check if address is ANY address.
+ *
+ * \param ss Socket address.
+ */
+bool sockaddr_is_any(const struct sockaddr_storage *ss);
+
+/*!
+ * \brief Check if two addresses match the given network prefix.
+ *
+ * \param ss1 First address.
+ * \param ss2 Second address.
+ * \param prefix Prefix length.
+ *
+ * \return True on match.
+ */
+bool sockaddr_net_match(const struct sockaddr_storage *ss1,
+ const struct sockaddr_storage *ss2,
+ unsigned prefix);
+
+/*!
+ * \brief Check if the address is within the given address range (inclusive).
+ *
+ * \param ss Address to check.
+ * \param ss_min Minimum address.
+ * \param ss_max Maximum address.
+ *
+ * \return True on match.
+ */
+bool sockaddr_range_match(const struct sockaddr_storage *ss,
+ const struct sockaddr_storage *ss_min,
+ const struct sockaddr_storage *ss_max);
diff --git a/src/contrib/spinlock.h b/src/contrib/spinlock.h
new file mode 100644
index 0000000..837f500
--- /dev/null
+++ b/src/contrib/spinlock.h
@@ -0,0 +1,133 @@
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief Multiplatform spinlock.
+ */
+
+#pragma once
+
+#if (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
+/* Not tested and activated yet. */
+/* #define HAVE_STDATOMIC */
+#endif
+
+#if defined(__APPLE__)
+# if defined(MAC_OS_X_VERSION_10_12) || \
+ MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_12
+# define APPLE_SPIN_NEW
+# else
+# define APPLE_SPIN_OLD
+# endif /* MAC_OS_X_VERSION_10_12 ... */
+#endif /* __APPLE__ */
+
+#if defined(HAVE_SYNC_ATOMIC) || defined(HAVE_ATOMIC)
+# include <stdbool.h>
+#elif defined(HAVE_STDATOMIC)
+# include <stdbool.h>
+# include <stdatomic.h>
+#elif defined(APPLE_SPIN_NEW)
+# include <os/lock.h>
+#elif defined(APPLE_SPIN_OLD)
+# include <libkern/OSAtomic.h>
+#else /* POSIX pthread spinlock. */
+# include <pthread.h>
+#endif
+
+/*! \brief Spinlock lock variable type. */
+typedef
+#if defined(HAVE_SYNC_ATOMIC) || defined(HAVE_ATOMIC)
+ bool /*!< Spinlock lock - a simple & fast atomic version. */
+#elif defined(HAVE_STDATOMIC)
+ atomic_bool /*!< Spinlock lock - a simple & fast atomic version, C11 */
+#elif defined(APPLE_SPIN_NEW)
+ os_unfair_lock /*!< Spinlock lock - a newer macOS version (macOS >= 10.12). */
+#elif defined(APPLE_SPIN_OLD)
+ OSSpinLock /*!< Spinlock lock - an older macOS version (macOS < 10.12). */
+#else /* POSIX */
+ pthread_spinlock_t /*!< Spinlock lock - a POSIX pthread version. */
+#endif
+ knot_spin_t;
+
+/*! \brief Initialize the spinlock pointed to by the parameter "lock". */
+static inline void knot_spin_init(knot_spin_t *lock)
+{
+#if defined(HAVE_SYNC_ATOMIC) || defined(HAVE_ATOMIC)
+ *lock = false;
+#elif defined(HAVE_STDATOMIC)
+ atomic_init(lock, false);
+#elif defined(APPLE_SPIN_NEW)
+ *lock = OS_UNFAIR_LOCK_INIT;
+#elif defined(APPLE_SPIN_OLD)
+ *lock = OS_SPINLOCK_INIT;
+#else /* POSIX */
+ pthread_spin_init(lock, PTHREAD_PROCESS_PRIVATE);
+#endif
+}
+
+/*! \brief Destroy the spinlock pointed to by the parameter "lock". */
+static inline void knot_spin_destroy(knot_spin_t *lock)
+{
+#if defined(HAVE_SYNC_ATOMIC) || defined(HAVE_ATOMIC) || defined(HAVE_STDATOMIC) || \
+ defined(APPLE_SPIN_NEW) || defined(APPLE_SPIN_OLD)
+ /* Nothing needed here. */
+#else /* POSIX */
+ pthread_spin_destroy(lock);
+#endif
+}
+
+/*! \brief Lock the spinlock pointed to by the parameter "lock". */
+static inline void knot_spin_lock(knot_spin_t *lock)
+{
+#if defined(HAVE_SYNC_ATOMIC)
+ while (__sync_lock_test_and_set(lock, 1)) {
+ }
+#elif defined(HAVE_ATOMIC)
+ int expected = 0;
+ while (!__atomic_compare_exchange_n(lock, &expected, 1, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
+ expected = 0;
+ }
+#elif defined(HAVE_STDATOMIC)
+ int expected = 0;
+ while (!atomic_compare_exchange_strong(lock, &expected, false)) {
+ expected = 0;
+ }
+#elif defined(APPLE_SPIN_NEW)
+ os_unfair_lock_lock(lock);
+#elif defined(APPLE_SPIN_OLD)
+ OSSpinLockLock(lock);
+#else /* POSIX */
+ pthread_spin_lock(lock);
+#endif
+}
+
+/*! \brief Unlock the spinlock pointed to by the parameter "lock". */
+static inline void knot_spin_unlock(knot_spin_t *lock)
+{
+#if defined(HAVE_SYNC_ATOMIC)
+ __sync_lock_release(lock);
+#elif defined(HAVE_ATOMIC)
+ __atomic_clear(lock, __ATOMIC_RELAXED);
+#elif defined(HAVE_STDATOMIC)
+ atomic_store(lock, false);
+#elif defined(APPLE_SPIN_NEW)
+ os_unfair_lock_unlock(lock);
+#elif defined(APPLE_SPIN_OLD)
+ OSSpinLockUnlock(lock);
+#else /* POSIX */
+ pthread_spin_unlock(lock);
+#endif
+}
diff --git a/src/contrib/string.c b/src/contrib/string.c
new file mode 100644
index 0000000..272116e
--- /dev/null
+++ b/src/contrib/string.c
@@ -0,0 +1,247 @@
+/* Copyright (C) 2021 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if defined(HAVE_EXPLICIT_BZERO)
+ #if defined(HAVE_BSD_STRING_H)
+ #include <bsd/string.h>
+ #endif
+ /* #include <string.h> is needed. */
+#elif defined(HAVE_EXPLICIT_MEMSET)
+ /* #include <string.h> is needed. */
+#elif defined(HAVE_GNUTLS_MEMSET)
+ #include <gnutls/gnutls.h>
+#else
+ #define USE_CUSTOM_MEMSET
+#endif
+
+#include "contrib/string.h"
+#include "contrib/ctype.h"
+#include "contrib/tolower.h"
+
+uint8_t *memdup(const uint8_t *data, size_t data_size)
+{
+ uint8_t *result = (uint8_t *)malloc(data_size);
+ if (!result) {
+ return NULL;
+ }
+
+ return memcpy(result, data, data_size);
+}
+
+int strmemcmp(const char *str, const uint8_t *mem, size_t mem_size)
+{
+ if (mem_size == 0) {
+ return 1;
+ }
+ size_t cmp_len = strnlen(str, mem_size - 1) + 1;
+ return memcmp(str, mem, cmp_len);
+}
+
+char *sprintf_alloc(const char *fmt, ...)
+{
+ char *strp = NULL;
+ va_list ap;
+
+ va_start(ap, fmt);
+ int ret = vasprintf(&strp, fmt, ap);
+ va_end(ap);
+
+ if (ret < 0) {
+ return NULL;
+ }
+ return strp;
+}
+
+char *strcdup(const char *s1, const char *s2)
+{
+ if (!s1 || !s2) {
+ return NULL;
+ }
+
+ size_t s1len = strlen(s1);
+ size_t s2len = strlen(s2);
+ size_t nlen = s1len + s2len + 1;
+
+ char* dst = malloc(nlen);
+ if (dst == NULL) {
+ return NULL;
+ }
+
+ memcpy(dst, s1, s1len);
+ memcpy(dst + s1len, s2, s2len + 1);
+ return dst;
+}
+
+char *strstrip(const char *str)
+{
+ // leading white-spaces
+ const char *scan = str;
+ while (is_space(scan[0])) {
+ scan += 1;
+ }
+
+ // trailing white-spaces
+ size_t len = strlen(scan);
+ while (len > 0 && is_space(scan[len - 1])) {
+ len -= 1;
+ }
+
+ char *trimmed = malloc(len + 1);
+ if (!trimmed) {
+ return NULL;
+ }
+
+ memcpy(trimmed, scan, len);
+ trimmed[len] = '\0';
+
+ return trimmed;
+}
+
+void strtolower(char *str)
+{
+ if (str == NULL) {
+ return;
+ }
+
+ for (char *it = str; *it != '\0'; ++it) {
+ *it = knot_tolower(*it);
+ }
+}
+
+int const_time_memcmp(const void *s1, const void *s2, size_t n)
+{
+ volatile uint8_t equal = 0;
+
+ for (size_t i = 0; i < n; i++) {
+ equal |= ((uint8_t *)s1)[i] ^ ((uint8_t *)s2)[i];
+ }
+
+ return equal;
+}
+
+#if defined(USE_CUSTOM_MEMSET)
+typedef void *(*memset_t)(void *, int, size_t);
+static volatile memset_t volatile_memset = memset;
+#endif
+
+void *memzero(void *s, size_t n)
+{
+#if defined(HAVE_EXPLICIT_BZERO) /* In OpenBSD since 5.5. */
+ /* In FreeBSD since 11.0. */
+ /* In glibc since 2.25. */
+ /* In DragonFly BSD since 5.5. */
+# if defined(__has_feature)
+# if __has_feature(memory_sanitizer)
+ #warning "Memory sanitizer detected. Using bzero() instead of explicit_bzero()."
+ bzero(s, n);
+# else
+ explicit_bzero(s, n);
+# endif
+# else
+ explicit_bzero(s, n);
+# endif
+ return s;
+#elif defined(HAVE_EXPLICIT_MEMSET) /* In NetBSD since 7.0. */
+ return explicit_memset(s, 0, n);
+#elif defined(HAVE_GNUTLS_MEMSET) /* In GnuTLS since 3.4.0. */
+ gnutls_memset(s, 0, n);
+ return s;
+#else /* Knot custom solution as a fallback. */
+ /* Warning: the use of the return value is *probably* needed
+ * so as to avoid the volatile_memset() to be optimized out.
+ */
+ return volatile_memset(s, 0, n);
+#endif
+}
+
+static const char BIN_TO_HEX[] = {
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
+};
+
+char *bin_to_hex(const uint8_t *bin, size_t bin_len, bool upper_case)
+{
+ if (bin == NULL) {
+ return NULL;
+ }
+
+ size_t hex_size = bin_len * 2;
+ char *hex = malloc(hex_size + 1);
+ if (hex == NULL) {
+ return NULL;
+ }
+
+ unsigned offset = upper_case ? 16 : 0;
+ for (size_t i = 0; i < bin_len; i++) {
+ hex[2 * i] = BIN_TO_HEX[offset + (bin[i] >> 4)];
+ hex[2 * i + 1] = BIN_TO_HEX[offset + (bin[i] & 0x0f)];
+ }
+ hex[hex_size] = '\0';
+
+ return hex;
+}
+
+/*!
+ * Convert HEX character to numeric value (assumes valid input).
+ */
+static uint8_t hex_to_number(const char hex)
+{
+ if (hex >= '0' && hex <= '9') {
+ return hex - '0';
+ } else if (hex >= 'a' && hex <= 'f') {
+ return hex - 'a' + 10;
+ } else {
+ assert(hex >= 'A' && hex <= 'F');
+ return hex - 'A' + 10;
+ }
+}
+
+uint8_t *hex_to_bin(const char *hex, size_t *out_len)
+{
+ if (hex == NULL || out_len == NULL) {
+ return NULL;
+ }
+
+ size_t hex_len = strlen(hex);
+ if (hex_len % 2 != 0) {
+ return NULL;
+ }
+
+ size_t bin_len = hex_len / 2;
+ uint8_t *bin = malloc(bin_len + 1);
+ if (bin == NULL) {
+ return NULL;
+ }
+
+ for (size_t i = 0; i < bin_len; i++) {
+ if (!is_xdigit(hex[2 * i]) || !is_xdigit(hex[2 * i + 1])) {
+ free(bin);
+ return NULL;
+ }
+ uint8_t high = hex_to_number(hex[2 * i]);
+ uint8_t low = hex_to_number(hex[2 * i + 1]);
+ bin[i] = high << 4 | low;
+ }
+
+ *out_len = bin_len;
+
+ return bin;
+}
diff --git a/src/contrib/string.h b/src/contrib/string.h
new file mode 100644
index 0000000..ad3c990
--- /dev/null
+++ b/src/contrib/string.h
@@ -0,0 +1,104 @@
+/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief String manipulations.
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+/*!
+ * \brief Create a copy of a binary buffer.
+ *
+ * Like \c strdup, but for binary data.
+ */
+uint8_t *memdup(const uint8_t *data, size_t data_size);
+
+/*!
+ * \brief Compare a zero-terminated string with fixed-size memory.
+ */
+int strmemcmp(const char *str, const uint8_t *mem, size_t mem_size);
+
+/*!
+ * \brief Format string and take care of allocating memory.
+ *
+ * \note sprintf(3) manual page reference implementation.
+ *
+ * \param fmt Message format.
+ * \return formatted message or NULL.
+ */
+char *sprintf_alloc(const char *fmt, ...);
+
+/*!
+ * \brief Create new string from a concatenation of s1 and s2.
+ *
+ * \param s1 First string.
+ * \param s2 Second string.
+ *
+ * \retval Newly allocated string on success.
+ * \retval NULL on error.
+ */
+char *strcdup(const char *s1, const char *s2);
+
+/*!
+ * \brief Create a copy of a string skipping leading and trailing white spaces.
+ *
+ * \return Newly allocated string, NULL in case of error.
+ */
+char *strstrip(const char *str);
+
+/*!
+ * \brief Convert upper-case letters to lower-case in a string.
+ */
+void strtolower(char *str);
+
+/*!
+ * \brief Compare data in time based on string length.
+ * This function just checks for (in)equality not for relation
+ *
+ * \param s1 The first address to compare.
+ * \param s2 The second address to compare.
+ * \param n The size of memory to compare.
+ *
+ * \return Non zero on difference and zero if the buffers are identical.
+ */
+int const_time_memcmp(const void *s1, const void *s2, size_t n);
+
+/*!
+ * \brief Fill memory with zeroes.
+ *
+ * Inspired by OPENSSL_cleanse. Such a memset shouldn't be optimized out.
+ *
+ * \param s The address to fill.
+ * \param n The size of memory to fill.
+ *
+ * \return Pointer to the memory.
+ */
+void *memzero(void *s, size_t n);
+
+/*!
+ * \brief Convert binary data to hexadecimal string.
+ */
+char *bin_to_hex(const uint8_t *bin, size_t bin_len, bool upper_case);
+
+/*!
+ * \brief Convert hex encoded string to binary data.
+ */
+uint8_t *hex_to_bin(const char *hex, size_t *out_len);
diff --git a/src/contrib/strtonum.h b/src/contrib/strtonum.h
new file mode 100644
index 0000000..849f66f
--- /dev/null
+++ b/src/contrib/strtonum.h
@@ -0,0 +1,125 @@
+/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "libknot/errcode.h"
+#include "contrib/ctype.h"
+
+inline static int intmax_from_str(const char *src, intmax_t *dst,
+ intmax_t min, intmax_t max)
+{
+ if (!is_digit(*src) && *src != '-' && *src != '+') {
+ return KNOT_EINVAL;
+ }
+
+ errno = 0;
+ char *end = NULL;
+ intmax_t result = strtoimax(src, &end, 10);
+
+ if (errno == ERANGE) {
+ return KNOT_ERANGE;
+ }
+
+ if (src == end || *end != '\0') {
+ return KNOT_EINVAL;
+ }
+
+ if (result < min || result > max) {
+ return KNOT_ERANGE;
+ }
+
+ *dst = result;
+ return KNOT_EOK;
+}
+
+inline static int uintmax_from_str(const char *src, uintmax_t *dst,
+ uintmax_t min, uintmax_t max)
+{
+ if (!is_digit(*src) && *src != '+') {
+ return KNOT_EINVAL;
+ }
+
+ errno = 0;
+ char *end = NULL;
+ uintmax_t result = strtoumax(src, &end, 10);
+
+ if (errno == ERANGE) {
+ return KNOT_ERANGE;
+ }
+
+ if (src == end || *end != '\0') {
+ return KNOT_EINVAL;
+ }
+
+ if (result < min || result > max) {
+ return KNOT_ERANGE;
+ }
+
+ *dst = result;
+ return KNOT_EOK;
+}
+
+#define CONVERT(prefix, type, min, max, src, dst) \
+{ \
+ assert(src && dst); \
+ prefix##max_t value; \
+ int result = prefix##max_from_str(src, &value, min, max); \
+ if (result != KNOT_EOK) { \
+ return result; \
+ } \
+ *dst = (type)value; \
+ return KNOT_EOK; \
+}
+
+inline static int str_to_int(const char *src, int *dst, int min, int max)
+{
+ CONVERT(int, int, min, max, src, dst);
+}
+
+inline static int str_to_u8(const char *src, uint8_t *dst)
+{
+ CONVERT(uint, uint8_t, 0, UINT8_MAX, src, dst);
+}
+
+inline static int str_to_u16(const char *src, uint16_t *dst)
+{
+ CONVERT(uint, uint16_t, 0, UINT16_MAX, src, dst);
+}
+
+inline static int str_to_u32(const char *src, uint32_t *dst)
+{
+ CONVERT(uint, uint32_t, 0, UINT32_MAX, src, dst);
+}
+
+inline static int str_to_u64(const char *src, uint64_t *dst)
+{
+ CONVERT(uint, uint64_t, 0, UINT64_MAX, src, dst);
+}
+
+inline static int str_to_size(const char *src, size_t *dst, size_t min, size_t max)
+{
+ CONVERT(uint, size_t, min, max, src, dst);
+}
+
+#undef CONVERT
diff --git a/src/contrib/time.c b/src/contrib/time.c
new file mode 100644
index 0000000..a3a4cdf
--- /dev/null
+++ b/src/contrib/time.c
@@ -0,0 +1,441 @@
+/* Copyright (C) 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "contrib/time.h"
+#include "contrib/ctype.h"
+#ifndef HAVE_CLOCK_GETTIME
+ #include <sys/time.h>
+#endif
+
+struct timespec time_now(void)
+{
+ struct timespec result = { 0 };
+
+#ifdef HAVE_CLOCK_GETTIME
+ clock_gettime(CLOCK_MONOTONIC, &result);
+#else // OS X < Sierra fallback.
+ struct timeval tmp = { 0 };
+ gettimeofday(&tmp, NULL);
+ result.tv_sec = tmp.tv_sec;
+ result.tv_nsec = 1000 * tmp.tv_usec;
+#endif
+
+ return result;
+}
+
+struct timespec time_diff(const struct timespec *begin, const struct timespec *end)
+{
+ struct timespec result = { 0 };
+
+ if (end->tv_nsec >= begin->tv_nsec) {
+ result.tv_sec = end->tv_sec - begin->tv_sec;
+ result.tv_nsec = end->tv_nsec - begin->tv_nsec;
+ } else {
+ result.tv_sec = end->tv_sec - begin->tv_sec - 1;
+ result.tv_nsec = 1000000000 - begin->tv_nsec + end->tv_nsec;
+ }
+
+ return result;
+}
+
+double time_diff_ms(const struct timespec *begin, const struct timespec *end)
+{
+ struct timespec result = time_diff(begin, end);
+
+ return (result.tv_sec * 1e3) + (result.tv_nsec / 1e6);
+}
+
+typedef struct {
+ const char *format;
+ const char *timespec;
+ const char *parsed;
+ knot_timediff_t offset;
+ char offset_sign;
+ char offset_unit;
+ struct tm calendar;
+ int error;
+} time_ctx_t;
+
+// After casting (struct tm) to (int []), we can use indexes...
+static int calendar_index(char ind)
+{
+ switch (ind) {
+ case 'Y': return 5;
+ case 'M': return 4;
+ case 'D': return 3;
+ case 'h': return 2;
+ case 'm': return 1;
+ case 's': return 0;
+ default: assert(0); return 6;
+ }
+}
+
+static size_t calendar_digits(int index)
+{
+ return index == 5 ? 4 : 2;
+}
+
+static size_t unit_value(char unit)
+{
+ size_t val = 1;
+ switch (unit) {
+ case 'M':
+ return 3600 * 24 * 30;
+ case 'Y':
+ val *= 365;
+ // FALLTHROUGH
+ case 'D':
+ val *= 24;
+ // FALLTHROUGH
+ case 'h':
+ val *= 60;
+ // FALLTHROUGH
+ case 'm':
+ val *= 60;
+ // FALLTHROUGH
+ case 's':
+ default:
+ return val;
+ }
+}
+
+static knot_time_t time_ctx_finalize(time_ctx_t *ctx)
+{
+ if (ctx->offset_sign) {
+ ctx->offset *= unit_value(ctx->offset_unit);
+ return knot_time_add(knot_time(), (ctx->offset_sign == '-' ? -1 : 1) * ctx->offset);
+ } else if (ctx->offset) {
+ return (knot_time_t)ctx->offset;
+ } else if (ctx->calendar.tm_year != 0) {
+ ctx->calendar.tm_isdst = -1;
+ ctx->calendar.tm_year -= 1900;
+ ctx->calendar.tm_mon -= 1;
+ // Set UTC timezone before using mktime
+ putenv("TZ=UTC");
+ tzset();
+ return (knot_time_t)mktime(&ctx->calendar);
+ } else {
+ return (knot_time_t)0;
+ }
+}
+
+static void time_ctx_reset(time_ctx_t *ctx)
+{
+ ctx->parsed = ctx->timespec;
+ ctx->offset = 0;
+ ctx->offset_sign = 0;
+ memset(&ctx->calendar, 0, sizeof(ctx->calendar));
+ ctx->error = 0;
+}
+
+static void parse_quote(time_ctx_t *ctx)
+{
+ while (*ctx->format != '|' && *ctx->format != '\0') {
+ if (*ctx->format == '\'') {
+ ctx->format++;
+ return;
+ }
+ if (*ctx->format++ != *ctx->parsed++) {
+ ctx->error = -1;
+ return;
+ }
+ }
+ ctx->error = -2;
+ return;
+}
+
+static void parse_offset(time_ctx_t *ctx)
+{
+ ctx->offset = 0;
+ ctx->error = -1;
+ while (is_digit(*ctx->parsed)) {
+ ctx->offset *= 10;
+ ctx->offset += *ctx->parsed++ - '0';
+ ctx->error = 0;
+ }
+}
+
+static void parse_calendar(time_ctx_t *ctx, int index)
+{
+ int *cal_arr = (int *)&ctx->calendar;
+ cal_arr[index] = 0;
+ for (size_t i = 0; i < calendar_digits(index); i++) {
+ if (!is_digit(*ctx->parsed)) {
+ ctx->error = -1;
+ return;
+ }
+ cal_arr[index] *= 10;
+ cal_arr[index] += *ctx->parsed++ - '0';
+ }
+}
+
+static void parse_sign(time_ctx_t *ctx)
+{
+ char sign1 = *(ctx->format - 1), sign2 = *ctx->format;
+
+ bool use_sign2 = (sign2 == '+' || sign2 == '-');
+
+ bool allow_plus = (sign1 == '+' || (sign1 == '-' && sign2 == '+'));
+ bool allow_minus = (sign1 == '-' || (sign1 == '+' && sign2 == '-'));
+ assert(sign1 == '+' || sign1 == '-');
+
+ if ((*ctx->parsed == '+' && allow_plus) || (*ctx->parsed == '-' && allow_minus)) {
+ ctx->offset_sign = *ctx->parsed++;
+ ctx->format += (use_sign2 ? 1 : 0);
+ } else {
+ ctx->error = -11;
+ }
+}
+
+static void parse_unit1(time_ctx_t *ctx)
+{
+ char u = *ctx->parsed++;
+ switch (u) {
+ case 'Y':
+ case 'M':
+ case 'D':
+ case 'h':
+ case 'm':
+ case 's':
+ ctx->offset_unit = u;
+ break;
+ default:
+ ctx->error = -1;
+ }
+}
+
+static void parse_unit2(time_ctx_t *ctx)
+{
+ char u = *ctx->parsed++;
+ switch (u) {
+ case 'y':
+ case 'd':
+ ctx->offset_unit = toupper((unsigned char)u);
+ break;
+ case 'h':
+ case 's':
+ ctx->offset_unit = u;
+ break;
+ case 'm':
+ switch (*ctx->parsed++) {
+ case 'o':
+ ctx->offset_unit = 'M';
+ break;
+ case 'i':
+ ctx->offset_unit = 'm';
+ break;
+ default:
+ ctx->error = -1;
+ }
+ break;
+ default:
+ ctx->error = -1;
+ }
+}
+
+int knot_time_parse(const char *format, const char *timespec, knot_time_t *time)
+{
+ if (format == NULL || timespec == NULL || time == NULL) {
+ return -1;
+ }
+
+ time_ctx_t ctx = {
+ .format = format,
+ .timespec = timespec,
+ .parsed = timespec,
+ .offset = 0,
+ .offset_sign = 0,
+ // we hope that .calendar is zeroed by default
+ .error = 0,
+ };
+
+ while (ctx.error == 0 && *ctx.format != '\0') {
+ switch (*ctx.format++) {
+ case '|':
+ if (*ctx.parsed == '\0') {
+ *time = time_ctx_finalize(&ctx);
+ return 0;
+ } else {
+ time_ctx_reset(&ctx);
+ }
+ break;
+ case '\'':
+ parse_quote(&ctx);
+ break;
+ case '#':
+ parse_offset(&ctx);
+ break;
+ case 'Y':
+ case 'M':
+ case 'D':
+ case 'h':
+ case 'm':
+ case 's':
+ parse_calendar(&ctx, calendar_index(*(ctx.format - 1)));
+ break;
+ case '+':
+ case '-':
+ parse_sign(&ctx);
+ break;
+ case 'U':
+ parse_unit1(&ctx);
+ break;
+ case 'u':
+ parse_unit2(&ctx);
+ break;
+ default:
+ return -1;
+ }
+
+ if (ctx.error < 0) {
+ while (*ctx.format != '|' && *ctx.format != '\0') {
+ ctx.format++;
+ }
+ time_ctx_reset(&ctx);
+ ctx.error = (*ctx.format == '\0' ? -1 : 0);
+ }
+ }
+
+ if (ctx.error == 0 && *ctx.parsed == '\0') {
+ *time = time_ctx_finalize(&ctx);
+ return 0;
+ }
+ return -1;
+}
+
+static char *unit_names_mixed[] = { "Y", "M", "D", "h", "m", "s" };
+static char *unit_names_lower[] = { "y", "mo", "d", "h", "mi", "s" };
+static size_t unit_sizes[] = { 3600*24*365, 3600*24*30, 3600*24, 3600, 60, 1 };
+static const size_t unit_count = 6;
+
+static int print_unit(char *dst, size_t dst_len, char *unit_names[unit_count],
+ size_t max_units, knot_time_t time)
+{
+ int ret;
+ if (time == 0) {
+ ret = snprintf(dst, dst_len, "0");
+ return (ret < 0 || ret >= dst_len ? -1 : 0);
+ }
+ knot_timediff_t diff = knot_time_diff(time, knot_time());
+ if (dst_len-- < 1) {
+ return -1;
+ }
+ *dst++ = (diff < 0 ? '-' : '+');
+ if (diff < 0) {
+ diff = -diff;
+ } else if (diff == 0) {
+ ret = snprintf(dst, dst_len, "0%s", unit_names[unit_count - 1]);
+ return (ret < 0 || ret >= dst_len ? -1 : 0);
+ }
+ size_t curr_unit = 0, used_units = 0;
+ while (curr_unit < unit_count && used_units < max_units) {
+ if (diff >= unit_sizes[curr_unit]) {
+ ret = snprintf(dst, dst_len, "%"KNOT_TIMEDIFF_PRINTF"%s",
+ diff / unit_sizes[curr_unit],
+ unit_names[curr_unit]);
+ if (ret < 0 || ret >= dst_len) {
+ return -1;
+ }
+ dst += ret;
+ dst_len -= ret;
+ used_units++;
+ diff %= unit_sizes[curr_unit];
+ }
+ curr_unit++;
+ }
+ return 0;
+}
+
+int knot_time_print(knot_time_print_t format, knot_time_t time, char *dst, size_t dst_len)
+{
+ if (dst == NULL) {
+ return -1;
+ }
+
+ int ret;
+ switch (format) {
+ case TIME_PRINT_UNIX:
+ ret = snprintf(dst, dst_len, "%"KNOT_TIME_PRINTF, time);
+ return ((ret >= 0 && ret < dst_len) ? 0 : -1);
+ case TIME_PRINT_ISO8601:
+ if (time > LONG_MAX) {
+ return -1;
+ }
+
+ // Set timezone to UTC before using timezone dependent functions
+ putenv("TZ=UTC");
+ tzset();
+
+ struct tm lt;
+ time_t tt = (time_t)time;
+ ret = (localtime_r(&tt, &lt) == NULL ? -1 :
+ strftime(dst, dst_len, "%Y-%m-%dT%H:%M:%SZ", &lt));
+ return (ret > 0 ? 0 : -1);
+ case TIME_PRINT_RELSEC:
+ ret = snprintf(dst, dst_len, "%+"KNOT_TIMEDIFF_PRINTF,
+ knot_time_diff(time, knot_time()));
+ return ((ret >= 0 && ret < dst_len) ? 0 : -1);
+ case TIME_PRINT_HUMAN_MIXED:
+ return print_unit(dst, dst_len, unit_names_mixed, unit_count, time);
+ case TIME_PRINT_HUMAN_LOWER:
+ return print_unit(dst, dst_len, unit_names_lower, unit_count, time);
+ default:
+ return -1;
+ }
+}
+
+int knot_time_print_human(knot_time_t time, char *dst, size_t dst_len, bool condensed)
+{
+ int ret;
+ knot_time_t num;
+ bool empty = true;
+ size_t total_len = 0;
+
+#define tths_process(unit, unit_name, unit_size) \
+ num = time / (unit_size); \
+ if (num > 0) { \
+ ret = snprintf(dst + total_len, dst_len - total_len, \
+ "%s%"PRIu64"%s%s", \
+ (!empty && !condensed ? " " : ""), \
+ num, \
+ (condensed ? unit : unit_name), \
+ (num > 1 && !condensed ? "s" : "")); \
+ if (ret <= 0 || (size_t)ret >= dst_len - total_len) { \
+ return -1; \
+ } \
+ empty = false; \
+ total_len += ret; \
+ time -= num * (unit_size); \
+ }
+
+ tths_process("w", " week", 604800);
+ tths_process("d", " day", 86400);
+ tths_process("h", " hour", 3600);
+ tths_process("m", " minute", 60);
+ tths_process("s", " second", 1);
+
+#undef tths_process
+
+ return total_len > 0 ? total_len : -1;
+}
diff --git a/src/contrib/time.h b/src/contrib/time.h
new file mode 100644
index 0000000..20d241e
--- /dev/null
+++ b/src/contrib/time.h
@@ -0,0 +1,211 @@
+/* Copyright (C) 2023 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <time.h>
+
+#ifdef __APPLE__
+ #define st_mtim st_mtimespec
+#endif
+
+/*!
+ * \brief Specify output format for knot_time_print().
+ */
+typedef enum {
+ TIME_PRINT_UNIX, // numeric UNIX time
+ TIME_PRINT_ISO8601, // 2016-12-31T23:59:00
+ TIME_PRINT_RELSEC, // relative +6523
+ TIME_PRINT_HUMAN_MIXED, // relative with mixed-case units
+ TIME_PRINT_HUMAN_LOWER, // relative with lower-case units
+} knot_time_print_t;
+
+/*!
+ * \brief Get current time.
+ */
+struct timespec time_now(void);
+
+/*!
+ * \brief Get time elapsed between two events.
+ */
+struct timespec time_diff(const struct timespec *begin, const struct timespec *end);
+
+/*!
+ * \brief Get time elapsed between two events in milliseconds.
+ */
+double time_diff_ms(const struct timespec *begin, const struct timespec *end);
+
+/*!
+ * \brief Data type for keeping UNIX timestamps.
+ *
+ * This is because time_t can be 32-bit on some systems, which is bad.
+ * Zero value represents infinity.
+ */
+typedef uint64_t knot_time_t;
+
+/*!
+ * \brief Data type for keeping time differences.
+ */
+typedef int64_t knot_timediff_t;
+
+#define KNOT_TIMEDIFF_MIN INT64_MIN
+#define KNOT_TIMEDIFF_MAX INT64_MAX
+
+#define KNOT_TIME_PRINTF PRIu64
+#define KNOT_TIMEDIFF_PRINTF PRId64
+
+/*!
+ * \brief Returns current time sice epoch.
+ */
+inline static knot_time_t knot_time(void)
+{
+ return (knot_time_t)time(NULL);
+}
+
+/*!
+ * \brief Compare two timestamps.
+ *
+ * \return 0 if equal, -1 if the former is smaller (=earlier), 1 else.
+ */
+inline static int knot_time_cmp(knot_time_t a, knot_time_t b)
+{
+ return (a == b ? 0 : 1) * ((a && b) == 0 ? -1 : 1) * (a < b ? -1 : 1);
+}
+
+/*!
+ * \brief Return the smaller (=earlier) from given two timestamps.
+ */
+inline static knot_time_t knot_time_min(knot_time_t a, knot_time_t b)
+{
+ if ((a && b) == 0) {
+ return a + b;
+ } else {
+ return (a < b ? a : b);
+ }
+}
+
+/*!
+ * \brief Return the difference between two timestamps (to "minus" from).
+ *
+ * \note If both are zero (=infinity), KNOT_TIMEDIFF_MAX is returned.
+ */
+inline static knot_timediff_t knot_time_diff(knot_time_t to, knot_time_t from)
+{
+ if ((to && from) == 0) {
+ return (to > from ? KNOT_TIMEDIFF_MIN : KNOT_TIMEDIFF_MAX);
+ } else {
+ return (knot_timediff_t)to - (knot_timediff_t)from;
+ }
+}
+
+/*!
+ * \brief Add a time difference to timestamp.
+ */
+inline static knot_time_t knot_time_add(knot_time_t since, knot_timediff_t howlong)
+{
+ return (since != 0 ? since + howlong : since);
+}
+
+inline static knot_time_t knot_time_plus(knot_time_t a, knot_time_t b)
+{
+ return ((a && b) ? a + b : 0);
+}
+
+/*!
+ * \brief Convert uint32_t-encoded timestamp to knot_time_t.
+ *
+ * In RRSIG rdata, there are inception and expiration timestamps in uint32_t format.
+ * One shall use 'serial arithmetics' to decode them.
+ *
+ * The result of this function is a timestamp that equals to
+ * given 32-bit time in lower 32 bits, and does not differ from
+ * now by more than 2^31.
+ */
+inline static knot_time_t knot_time_from_u32(uint32_t u32time, knot_time_t now)
+{
+ if (now == 0) {
+ now = knot_time();
+ }
+
+ uint32_t now_lower32 = (uint32_t)now;
+ uint64_t now_upper32 = now >> 32;
+ if (now_lower32 > u32time && now_lower32 - u32time > INT32_MAX) {
+ now_upper32++;
+ } else if (now_lower32 < u32time && u32time - now_lower32 > INT32_MAX) {
+ now_upper32--;
+ }
+
+ return (now_upper32 << 32) + u32time;
+}
+
+/*!
+ * \brief Parse a text-formatted timestamp to knot_time_t using format specification.
+ *
+ * \param format The timestamp text format specification.
+ * \param timespec Text-formatted timestamp.
+ * \param time The parsed timestamp.
+ *
+ * The format specification basics:
+ * <format 1>|<format 2> - The pipe sign separates two time format specifications. Leftmost
+ * specification matching the timespec is used.
+ * '<a string>' - Matches exactly <a string> (not containing apostrophes) in timespec.
+ * # - Hashtag matches for a number in timespec, stands for either a UNIX timestamp,
+ * or, within a context of an unit, as a number of such units.
+ * Y, M, D, h, m, s - Matches a number, stands for a number of years, months, days, hours,
+ * minutes and seconds, respectively.
+ * +, - - The + and - signs declaring that following timespec is relative to "now".
+ * A single sign can be used to limit the timestamp being in future or in past,
+ * or both +- allow the timestamp to select any (just one) of them.
+ * U - Matches one of Y, M, D, h, m, s in the timespec standing for a time unit.
+ * u - Like U, but the unit in the timestamp is from: y, mo, d, h, mi, s.
+ *
+ * \retval -1 An error occurred, out_time has no sense.
+ * \return 0 OK, timestamp parsed successfully.
+ */
+int knot_time_parse(const char *format, const char *timespec, knot_time_t *time);
+
+/*!
+ * \brief Print the timestamp in specified format into a string buffer.
+ *
+ * \param format The timestamp text format specification.
+ * \param time The timestamp to be printed.
+ * \param dst The destination buffer pointer with text-formatted timestamp.
+ * \param dst_len The destination buffer length.
+ *
+ * \retval -1 An error occurred, the buffer may be filled with nonsense.
+ * \return 0 OK, timestamp printed successfully.
+ */
+int knot_time_print(knot_time_print_t format, knot_time_t time, char *dst, size_t dst_len);
+
+/*!
+ * \brief Print the timestamp in a predefined human format.
+ *
+ * Condensed format (zone file compatible): 1w2d3h4m5s
+ * Normal format: 1 week 2 days 3 hours 4 minutes 5 seconds
+ *
+ * \param time The timestamp to be printed.
+ * \param dst The destination buffer pointer with text-formatted timestamp.
+ * \param dst_len The destination buffer length.
+ * \param condensed Condensed format indication.
+ *
+ * \retval -1 An error occurred, the buffer may be filled with nonsense.
+ * \return >0 OK, timestamp printed successfully.
+ */
+int knot_time_print_human(knot_time_t time, char *dst, size_t dst_len, bool condensed);
diff --git a/src/contrib/toeplitz.h b/src/contrib/toeplitz.h
new file mode 100644
index 0000000..02974d2
--- /dev/null
+++ b/src/contrib/toeplitz.h
@@ -0,0 +1,122 @@
+/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief Microsoft Toeplitz-based hash implementation
+ */
+
+#pragma once
+
+#include <assert.h>
+#include <stdint.h>
+
+#include "libknot/endian.h"
+
+/*!
+ * \brief Computes a Toeplitz hash value for given key and data.
+ *
+ * \param key Key vector
+ * \param key_len Length of the key vector in bytes.
+ * \param data Input data to compute hash from.
+ * \param data_len Lenght of the input data in bytes.
+ *
+ * \return A Toeplitz hash value.
+ */
+inline static uint32_t toeplitz_hash(const uint8_t *key, const size_t key_len,
+ const uint8_t *data, const size_t data_len)
+{
+ assert(key_len >= 4 + 2 * (16 + 2));
+
+ uint32_t key32 = be32toh(*(const uint32_t *)key);
+ key += sizeof(uint32_t);
+
+ int ret = 0;
+
+ for (int i = 0; i < data_len; i++) {
+ for (int bit = 7; bit >= 0; bit--) {
+ if (data[i] & (1 << bit)) {
+ ret ^= key32;
+ }
+
+ key32 <<= 1;
+ key32 |= !!(key[0] & (1 << bit));
+ }
+ key++;
+ }
+
+ return ret;
+}
+
+/*!
+ * \brief Toeplitz hash context for divided processing.
+ */
+typedef struct {
+ const uint8_t *data;
+ const uint8_t *data_end;
+ const uint8_t *key;
+ uint32_t hash;
+ uint32_t key32;
+} toeplitz_ctx_t;
+
+inline static void toeplitz_init(toeplitz_ctx_t *ctx, uint8_t count,
+ const uint8_t *key, const uint8_t key_len,
+ const uint8_t *data, const uint8_t data_len)
+{
+ assert(key_len >= 40);
+
+ ctx->data = data;
+ ctx->data_end = data + data_len;
+ ctx->key = key + sizeof(uint32_t);
+ ctx->hash = 0;
+ ctx->key32 = be32toh(*(const uint32_t *)key);
+
+ const uint8_t *stop = ctx->data + count;
+ assert(stop <= ctx->data_end);
+
+ while (ctx->data < stop) {
+ for (int bit = 7; bit >= 0; bit--) {
+ if (*ctx->data & (1 << bit)) {
+ ctx->hash ^= ctx->key32;
+ }
+
+ ctx->key32 <<= 1;
+ ctx->key32 |= !!(*ctx->key & (1 << bit));
+ }
+ ctx->data++;
+ ctx->key++;
+ }
+}
+
+inline static uint32_t toeplitz_finish(toeplitz_ctx_t *ctx)
+{
+ uint32_t hash = ctx->hash;
+ uint32_t key32 = ctx->key32;
+ const uint8_t *key = ctx->key;
+
+ for (const uint8_t *in = ctx->data; in < ctx->data_end; in++) {
+ for (int bit = 7; bit >= 0; bit--) {
+ if (*in & (1 << bit)) {
+ hash ^= key32;
+ }
+
+ key32 <<= 1;
+ key32 |= !!(*key & (1 << bit));
+ }
+ key++;
+ }
+
+ return hash;
+}
diff --git a/src/contrib/tolower.h b/src/contrib/tolower.h
new file mode 100644
index 0000000..c8b4897
--- /dev/null
+++ b/src/contrib/tolower.h
@@ -0,0 +1,52 @@
+/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief Table for converting ASCII characters to lowercase.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+/*!
+ * \brief Converts binary character to lowercase.
+ *
+ * \param c Character code.
+ *
+ * \return \a c converted to lowercase (or \a c if not applicable).
+ */
+static inline uint8_t knot_tolower(uint8_t c) {
+ const uint8_t *tolower_table = (uint8_t *)
+ "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F"
+ "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F"
+ "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2A\x2B\x2C\x2D\x2E\x2F"
+ "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3A\x3B\x3C\x3D\x3E\x3F"
+ "\x40\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F"
+ "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x5B\x5C\x5D\x5E\x5F"
+ "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6A\x6B\x6C\x6D\x6E\x6F"
+ "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7A\x7B\x7C\x7D\x7E\x7F"
+ "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8A\x8B\x8C\x8D\x8E\x8F"
+ "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9A\x9B\x9C\x9D\x9E\x9F"
+ "\xA0\xA1\xA2\xA3\xA4\xA5\xA6\xA7\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF"
+ "\xB0\xB1\xB2\xB3\xB4\xB5\xB6\xB7\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF"
+ "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF"
+ "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"
+ "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF"
+ "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF";
+
+ return tolower_table[c];
+}
diff --git a/src/contrib/trim.h b/src/contrib/trim.h
new file mode 100644
index 0000000..2b89249
--- /dev/null
+++ b/src/contrib/trim.h
@@ -0,0 +1,36 @@
+/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+/*!
+ * \brief Heap memory trimmer.
+ */
+
+#pragma once
+
+#ifdef HAVE_MALLOC_TRIM
+#include <malloc.h>
+#endif
+
+/*!
+ * \brief Trim excess heap memory.
+ */
+static inline void mem_trim(void)
+{
+#ifdef HAVE_MALLOC_TRIM
+ malloc_trim(0);
+#endif
+ return;
+}
diff --git a/src/contrib/ucw/LICENSE b/src/contrib/ucw/LICENSE
new file mode 100644
index 0000000..b463d57
--- /dev/null
+++ b/src/contrib/ucw/LICENSE
@@ -0,0 +1 @@
+../licenses/LGPL-2.0 \ No newline at end of file
diff --git a/src/contrib/ucw/array-sort.h b/src/contrib/ucw/array-sort.h
new file mode 100644
index 0000000..1ff1377
--- /dev/null
+++ b/src/contrib/ucw/array-sort.h
@@ -0,0 +1,195 @@
+/*
+ * UCW Library -- Universal Simple Array Sorter
+ *
+ * (c) 2003--2008 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#pragma once
+
+#include "contrib/macros.h"
+
+/*
+ * This is not a normal header file, it's a generator of sorting
+ * routines. Each time you include it with parameters set in the
+ * corresponding preprocessor macros, it generates an array sorter
+ * with the parameters given.
+ *
+ * You might wonder why the heck do we implement our own array sorter
+ * instead of using qsort(). The primary reason is that qsort handles
+ * only continuous arrays, but we need to sort array-like data structures
+ * where the only way to access elements is by using an indexing macro.
+ * Besides that, we are more than 2 times faster.
+ *
+ * So much for advocacy, there are the parameters (those marked with [*]
+ * are mandatory):
+ *
+ * ASORT_PREFIX(x) [*] add a name prefix (used on all global names
+ * defined by the sorter)
+ * ASORT_KEY_TYPE [*] data type of a single array entry key
+ * ASORT_ELT(i) returns the key of i-th element; if this macro is not
+ * defined, the function gets a pointer to an array to be sorted
+ * ASORT_LT(x,y) x < y for ASORT_KEY_TYPE (default: "x<y")
+ * ASORT_SWAP(i,j) swap i-th and j-th element (default: assume _ELT
+ * is an l-value and swap just the keys)
+ * ASORT_THRESHOLD threshold for switching between quicksort and insertsort
+ * ASORT_EXTRA_ARGS extra arguments for the sort function (they are always
+ * visible in all the macros supplied above), starts with comma
+ *
+ * After including this file, a function ASORT_PREFIX(sort)(unsigned array_size)
+ * or ASORT_PREFIX(sort)(ASORT_KEY_TYPE *array, unsigned array_size) [if ASORT_ELT
+ * is not defined] is declared and all parameter macros are automatically
+ * undef'd.
+ */
+
+#ifndef ASORT_LT
+#define ASORT_LT(x,y) ((x) < (y))
+#endif
+
+#ifndef ASORT_SWAP
+#define ASORT_SWAP(i,j) do { ASORT_KEY_TYPE tmp = ASORT_ELT(i); ASORT_ELT(i)=ASORT_ELT(j); ASORT_ELT(j)=tmp; } while (0)
+#endif
+
+#ifndef ASORT_THRESHOLD
+#define ASORT_THRESHOLD 8 /* Guesswork and experimentation */
+#endif
+
+#ifndef ASORT_EXTRA_ARGS
+#define ASORT_EXTRA_ARGS
+#endif
+
+#ifndef ASORT_ELT
+#define ASORT_ARRAY_ARG ASORT_KEY_TYPE *array,
+#define ASORT_ELT(i) array[i]
+#else
+#define ASORT_ARRAY_ARG
+#endif
+
+/**
+ * The generated sorting function. If `ASORT_ELT` macro is not provided, the
+ * @ASORT_ARRAY_ARG is equal to `ASORT_KEY_TYPE *array` and is the array to be
+ * sorted. If the macro is provided, this parameter is omitted. In that case,
+ * you can sort global variables or pass your structure by @ASORT_EXTRA_ARGS.
+ **/
+static void ASORT_PREFIX(sort)(ASORT_ARRAY_ARG unsigned array_size ASORT_EXTRA_ARGS)
+{
+ struct stk { int l, r; } stack[8*sizeof(unsigned)];
+ int l, r, left, right, m;
+ unsigned sp = 0;
+ ASORT_KEY_TYPE pivot;
+
+ if (array_size <= 1)
+ return;
+
+ /* QuickSort with optimizations a'la Sedgewick, but stop at ASORT_THRESHOLD */
+
+ left = 0;
+ right = array_size - 1;
+ for(;;)
+ {
+ l = left;
+ r = right;
+ m = (l+r)/2;
+ if (ASORT_LT(ASORT_ELT(m), ASORT_ELT(l)))
+ ASORT_SWAP(l,m);
+ if (ASORT_LT(ASORT_ELT(r), ASORT_ELT(m)))
+ {
+ ASORT_SWAP(m,r);
+ if (ASORT_LT(ASORT_ELT(m), ASORT_ELT(l)))
+ ASORT_SWAP(l,m);
+ }
+ pivot = ASORT_ELT(m);
+ do
+ {
+ while (ASORT_LT(ASORT_ELT(l), pivot))
+ l++;
+ while (ASORT_LT(pivot, ASORT_ELT(r)))
+ r--;
+ if (l < r)
+ {
+ ASORT_SWAP(l,r);
+ l++;
+ r--;
+ }
+ else if (l == r)
+ {
+ l++;
+ r--;
+ }
+ }
+ while (l <= r);
+ if ((r - left) >= ASORT_THRESHOLD && (right - l) >= ASORT_THRESHOLD)
+ {
+ /* Both partitions ok => push the larger one */
+ if ((r - left) > (right - l))
+ {
+ stack[sp].l = left;
+ stack[sp].r = r;
+ left = l;
+ }
+ else
+ {
+ stack[sp].l = l;
+ stack[sp].r = right;
+ right = r;
+ }
+ sp++;
+ }
+ else if ((r - left) >= ASORT_THRESHOLD)
+ {
+ /* Left partition OK, right undersize */
+ right = r;
+ }
+ else if ((right - l) >= ASORT_THRESHOLD)
+ {
+ /* Right partition OK, left undersize */
+ left = l;
+ }
+ else
+ {
+ /* Both partitions undersize => pop */
+ if (!sp)
+ break;
+ sp--;
+ left = stack[sp].l;
+ right = stack[sp].r;
+ }
+ }
+
+ /*
+ * We have a partially sorted array, finish by insertsort. Inspired
+ * by qsort() in GNU libc.
+ */
+
+ /* Find minimal element which will serve as a barrier */
+ r = MIN(array_size, ASORT_THRESHOLD);
+ m = 0;
+ for (l=1; l<r; l++)
+ if (ASORT_LT(ASORT_ELT(l),ASORT_ELT(m)))
+ m = l;
+ ASORT_SWAP(0,m);
+
+ /* Insertion sort */
+ for (m=1; m<(int)array_size; m++)
+ {
+ l=m;
+ while (ASORT_LT(ASORT_ELT(m),ASORT_ELT(l-1)))
+ l--;
+ while (l < m)
+ {
+ ASORT_SWAP(l,m);
+ l++;
+ }
+ }
+}
+
+#undef ASORT_PREFIX
+#undef ASORT_KEY_TYPE
+#undef ASORT_ELT
+#undef ASORT_LT
+#undef ASORT_SWAP
+#undef ASORT_THRESHOLD
+#undef ASORT_EXTRA_ARGS
+#undef ASORT_ARRAY_ARG
diff --git a/src/contrib/ucw/binsearch.h b/src/contrib/ucw/binsearch.h
new file mode 100644
index 0000000..b791d39
--- /dev/null
+++ b/src/contrib/ucw/binsearch.h
@@ -0,0 +1,50 @@
+/*
+ * UCW Library -- Generic Binary Search
+ *
+ * (c) 2005 Martin Mares <mj@ucw.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#pragma once
+
+/***
+ * [[defs]]
+ * Definitions
+ * -----------
+ ***/
+
+/**
+ * Find the first element not lower than \p x in the sorted array \p ary of \p N elements (non-decreasing order).
+ * Returns the index of the found element or \p N if no exists. Uses `ary_lt_x(ary,i,x)` to compare the i'th element with \p x.
+ * The time complexity is `O(log(N))`.
+ **/
+#define BIN_SEARCH_FIRST_GE_CMP(ary, N, ary_lt_x, x, ...) ({ \
+ unsigned l = 0, r = (N); \
+ while (l < r) \
+ { \
+ unsigned m = (l+r)/2; \
+ if (ary_lt_x(ary, m, x, __VA_ARGS__)) \
+ l = m+1; \
+ else \
+ r = m; \
+ } \
+ l; \
+})
+
+/**
+ * The default comparison macro for \ref BIN_SEARCH_FIRST_GE_CMP().
+ **/
+#define ARY_LT_NUM(ary,i,x) (ary)[i] < (x)
+
+/**
+ * Same as \ref BIN_SEARCH_FIRST_GE_CMP(), but uses the default `<` operator for comparisons.
+ **/
+#define BIN_SEARCH_FIRST_GE(ary,N,x) BIN_SEARCH_FIRST_GE_CMP(ary,N,ARY_LT_NUM,x)
+
+/**
+ * Search the sorted array \p ary of \p N elements (non-decreasing) for the first occurrence of \p x.
+ * Returns the index or -1 if no such element exists. Uses the `<` operator for comparisons.
+ **/
+#define BIN_SEARCH_EQ(ary,N,x) ({ int i = BIN_SEARCH_FIRST_GE(ary,N,x); if (i >= (N) || (ary)[i] != (x)) i=-1; i; })
diff --git a/src/contrib/ucw/heap.c b/src/contrib/ucw/heap.c
new file mode 100644
index 0000000..7d74aeb
--- /dev/null
+++ b/src/contrib/ucw/heap.c
@@ -0,0 +1,153 @@
+/*
+ * Binary heap
+ *
+ * (c) 2012 Ondrej Filip <feela@network.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+/***
+ * Introduction
+ * ------------
+ *
+ * Binary heap is a simple data structure, which for example supports efficient insertions, deletions
+ * and access to the minimal inserted item. We define several macros for such operations.
+ * Note that because of simplicity of heaps, we have decided to define direct macros instead
+ * of a <<generic:,macro generator>> as for several other data structures in the Libucw.
+ *
+ * A heap is represented by a number of elements and by an array of values. Beware that we
+ * index this array from one, not from zero as do the standard C arrays.
+ *
+ * Most macros use these parameters:
+ *
+ * - @num - a variable (signed or unsigned integer) with the number of elements
+ * - @heap - a C array of type @type; the heap is stored in `heap[1] .. heap[num]`; `heap[0]` is unused
+ *
+ * A valid heap must follow these rules:
+ *
+ * - `num >= 0`
+ * - `heap[i] >= heap[i / 2]` for each `i` in `[2, num]`
+ *
+ * The first element `heap[1]` is always lower or equal to all other elements.
+ ***/
+
+#include <string.h>
+#include <stdlib.h>
+#include "contrib/ucw/heap.h"
+
+static inline void heap_swap(heap_val_t **e1, heap_val_t **e2)
+{
+ if (e1 == e2) return; /* Stack tmp should be faster than tmpelem. */
+ heap_val_t *tmp = *e1; /* Even faster than 2-XOR nowadays. */
+ *e1 = *e2;
+ *e2 = tmp;
+ int pos = (*e1)->pos;
+ (*e1)->pos = (*e2)->pos;
+ (*e2)->pos = pos;
+}
+
+int heap_init(struct heap *h, int (*cmp)(void *, void *), int init_size)
+{
+ int isize = init_size ? init_size : INITIAL_HEAP_SIZE;
+
+ h->num = 0;
+ h->max_size = isize;
+ h->cmp = cmp;
+ h->data = malloc((isize + 1) * sizeof(heap_val_t*)); /* Temp element unused. */
+
+ return h->data ? 1 : 0;
+}
+
+void heap_deinit(struct heap *h)
+{
+ free(h->data);
+ memset(h, 0, sizeof(*h));
+}
+
+static inline void _heap_bubble_down(struct heap *h, int e)
+{
+ int e1;
+ for (;;) {
+ e1 = 2 * e;
+ if (e1 > h->num) break;
+ if ((h->cmp(*HELEMENT(h, e), *HELEMENT(h, e1)) < 0) &&
+ (e1 == h->num || (h->cmp(*HELEMENT(h, e), *HELEMENT(h, e1 + 1)) < 0))) break;
+ if ((e1 != h->num) && (h->cmp(*HELEMENT(h, e1 + 1), *HELEMENT(h, e1)) < 0)) e1++;
+ heap_swap(HELEMENT(h, e), HELEMENT(h, e1));
+ e = e1;
+ }
+}
+
+static inline void _heap_bubble_up(struct heap *h, int e)
+{
+ int e1;
+ while (e > 1) {
+ e1 = e / 2;
+ if (h->cmp(*HELEMENT(h, e1), *HELEMENT(h, e)) < 0) break;
+ heap_swap(HELEMENT(h, e), HELEMENT(h, e1));
+ e = e1;
+ }
+}
+
+void heap_replace(struct heap *h, int pos, heap_val_t *e)
+{
+ *HELEMENT(h, pos) = e;
+ e->pos = pos;
+
+ if (pos == 1 || h->cmp(*HELEMENT(h, pos / 2), e) < 0) {
+ _heap_bubble_down(h, pos);
+ } else {
+ _heap_bubble_up(h, pos);
+ }
+}
+
+void heap_delmin(struct heap *h)
+{
+ if (h->num == 0) return;
+ if (h->num > 1) {
+ heap_swap(HHEAD(h), HELEMENT(h, h->num));
+ }
+ (*HELEMENT(h, h->num))->pos = 0;
+ h->num--;
+ _heap_bubble_down(h, 1);
+}
+
+int heap_insert(struct heap *h, heap_val_t *e)
+{
+ if (h->num == h->max_size) {
+ h->max_size = h->max_size * HEAP_INCREASE_STEP;
+ h->data = realloc(h->data, (h->max_size + 1) * sizeof(heap_val_t*));
+ if (!h->data) {
+ return 0;
+ }
+ }
+
+ h->num++;
+ *HELEMENT(h, h->num) = e;
+ e->pos = h->num;
+ _heap_bubble_up(h, h->num);
+ return 1;
+}
+
+int heap_find(struct heap *h, heap_val_t *elm)
+{
+ return ((struct heap_val *) elm)->pos;
+}
+
+void heap_delete(struct heap *h, int e)
+{
+ heap_swap(HELEMENT(h, e), HELEMENT(h, h->num));
+ (*HELEMENT(h, h->num))->pos = 0;
+ h->num--;
+ if (h->cmp(*HELEMENT(h, e), *HELEMENT(h, h->num + 1)) < 0) {
+ _heap_bubble_up(h, e);
+ } else {
+ _heap_bubble_down(h, e);
+ }
+
+ if ((h->num > INITIAL_HEAP_SIZE) && (h->num < h->max_size / HEAP_DECREASE_THRESHOLD)) {
+ h->max_size = h->max_size / HEAP_INCREASE_STEP;
+ h->data = realloc(h->data, (h->max_size + 1) * sizeof(heap_val_t*));
+ }
+}
diff --git a/src/contrib/ucw/heap.h b/src/contrib/ucw/heap.h
new file mode 100644
index 0000000..f85bb82
--- /dev/null
+++ b/src/contrib/ucw/heap.h
@@ -0,0 +1,46 @@
+/* Copyright (C) 2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+struct heap_val {
+ int pos;
+};
+
+typedef struct heap_val heap_val_t;
+
+struct heap {
+ int num; /* Number of elements */
+ int max_size; /* Size of allocated memory */
+ int (*cmp)(void *, void *);
+ heap_val_t **data;
+}; /* Array follows */
+
+#define INITIAL_HEAP_SIZE 512 /* initial heap size */
+#define HEAP_INCREASE_STEP 2 /* multiplier for each inflation, keep conservative */
+#define HEAP_DECREASE_THRESHOLD 2 /* threshold for deflation, keep conservative */
+#define HELEMENT(h,num) ((h)->data + (num))
+#define HHEAD(h) HELEMENT((h), 1)
+#define EMPTY_HEAP(h) ((h)->num == 0)
+
+int heap_init(struct heap *, int (*cmp)(void *, void *), int);
+void heap_deinit(struct heap *);
+
+void heap_delmin(struct heap *);
+int heap_insert(struct heap *, heap_val_t *);
+int heap_find(struct heap *, heap_val_t *);
+void heap_delete(struct heap *, int);
+void heap_replace(struct heap *, int, heap_val_t *);
diff --git a/src/contrib/ucw/lists.c b/src/contrib/ucw/lists.c
new file mode 100644
index 0000000..01af28f
--- /dev/null
+++ b/src/contrib/ucw/lists.c
@@ -0,0 +1,264 @@
+/*
+ * BIRD Library -- Linked Lists
+ *
+ * (c) 1998 Martin Mares <mj@ucw.cz>
+ * (c) 2015, 2020-2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+/**
+ * DOC: Linked lists
+ *
+ * The BIRD library provides a set of functions for operating on linked
+ * lists. The lists are internally represented as standard doubly linked
+ * lists with synthetic head and tail which makes all the basic operations
+ * run in constant time and contain no extra end-of-list checks. Each list
+ * is described by a &list structure, nodes can have any format as long
+ * as they start with a &node structure. If you want your nodes to belong
+ * to multiple lists at once, you can embed multiple &node structures in them
+ * and use the SKIP_BACK() macro to calculate a pointer to the start of the
+ * structure from a &node pointer, but beware of obscurity.
+ *
+ * There also exist safe linked lists (&slist, &snode and all functions
+ * being prefixed with |s_|) which support asynchronous walking very
+ * similar to that used in the &fib structure.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "contrib/ucw/lists.h"
+#include "contrib/mempattern.h"
+
+/**
+ * add_tail - append a node to a list
+ * \p l: linked list
+ * \p n: list node
+ *
+ * add_tail() takes a node \p n and appends it at the end of the list \p l.
+ */
+void
+add_tail(list_t *l, node_t *n)
+{
+ node_t *z = &l->tail;
+
+ n->next = z;
+ n->prev = z->prev;
+ z->prev->next = n;
+ z->prev = n;
+ assert(z->next == NULL);
+}
+
+/**
+ * add_head - prepend a node to a list
+ * \p l: linked list
+ * \p n: list node
+ *
+ * add_head() takes a node \p n and prepends it at the start of the list \p l.
+ */
+void
+add_head(list_t *l, node_t *n)
+{
+ node_t *z = &l->head;
+
+ n->next = z->next;
+ n->prev = z;
+ z->next->prev = n;
+ z->next = n;
+ assert(z->prev == NULL);
+}
+
+/**
+ * insert_node - insert a node to a list
+ * \p n: a new list node
+ * \p after: a node of a list
+ *
+ * Inserts a node \p n to a linked list after an already inserted
+ * node \p after.
+ */
+void
+insert_node(node_t *n, node_t *after)
+{
+ node_t *z = after->next;
+
+ n->next = z;
+ n->prev = after;
+ after->next = n;
+ z->prev = n;
+}
+
+/**
+ * rem_node - remove a node from a list
+ * \p n: node to be removed
+ *
+ * Removes a node \p n from the list it's linked in.
+ */
+void
+rem_node(node_t *n)
+{
+ node_t *z = n->prev;
+ node_t *x = n->next;
+
+ z->next = x;
+ x->prev = z;
+ n->prev = 0;
+ n->next = 0;
+}
+
+/**
+ * init_list - create an empty list
+ * \p l: list
+ *
+ * init_list() takes a &list structure and initializes its
+ * fields, so that it represents an empty list.
+ */
+void
+init_list(list_t *l)
+{
+ l->head.next = &l->tail;
+ l->head.prev = NULL;
+ l->tail.next = NULL;
+ l->tail.prev = &l->head;
+}
+
+/**
+ * add_tail_list - concatenate two lists
+ * \p to: destination list
+ * \p l: source list
+ *
+ * This function appends all elements of the list \p l to
+ * the list \p to in constant time.
+ */
+void
+add_tail_list(list_t *to, list_t *l)
+{
+ node_t *p = to->tail.prev;
+ node_t *q = l->head.next;
+
+ p->next = q;
+ q->prev = p;
+ to->tail.prev = l->tail.prev;
+}
+
+/**
+ * list_dup - duplicate list
+ * \p to: destination list
+ * \p l: source list
+ *
+ * This function duplicates all elements of the list \p l to
+ * the list \p to in linear time.
+ *
+ * This function only works with a homogenous item size.
+ */
+void list_dup(list_t *dst, list_t *src, size_t itemsz)
+{
+ node_t *n;
+ WALK_LIST(n, *src) {
+ node_t *i = malloc(itemsz);
+ memcpy(i, n, itemsz);
+ add_tail(dst, i);
+ }
+}
+
+/**
+ * list_size - gets number of nodes
+ * \p l: list
+ *
+ * This function counts nodes in list \p l and returns this number.
+ */
+size_t list_size(const list_t *l)
+{
+ size_t count = 0;
+
+ node_t *n;
+ WALK_LIST(n, *l) {
+ count++;
+ }
+
+ return count;
+}
+
+/**
+ * fix_list - correction of head/tail pointers when list had been memmove'd
+ * \p l: list
+ *
+ * WARNING: must not be called on empty list
+ */
+void fix_list(list_t *l)
+{
+ node_t *n = HEAD(*l);
+ assert(n->next != NULL);
+ n->prev = &l->head;
+
+ n = TAIL(*l);
+ assert(n->prev != NULL);
+ n->next = &l->tail;
+}
+
+/**
+ * ptrlist_add - add pointer to pointer list
+ * \p to: destination list
+ * \p val: added pointer
+ * \p mm: memory context
+ */
+ptrnode_t *ptrlist_add(list_t *to, void *val, knot_mm_t *mm)
+{
+ ptrnode_t *node = mm_alloc(mm , sizeof(ptrnode_t));
+ if (node == NULL) {
+ return NULL;
+ } else {
+ node->d = val;
+ }
+ add_tail(to, &node->n);
+ return node;
+}
+
+/**
+ * ptrlist_free - free all nodes in pointer list
+ * \p list: list nodes
+ * \p mm: memory context
+ */
+void ptrlist_free(list_t *list, knot_mm_t *mm)
+{
+ node_t *n, *nxt;
+ WALK_LIST_DELSAFE(n, nxt, *list) {
+ mm_free(mm, n);
+ }
+ init_list(list);
+}
+
+/**
+ * ptrlist_rem - remove pointer node
+ * \p val: pointer to remove
+ * \p mm: memory context
+ */
+void ptrlist_rem(ptrnode_t *node, knot_mm_t *mm)
+{
+ rem_node(&node->n);
+ mm_free(mm, node);
+}
+
+/**
+ * ptrlist_deep_free - free all nodes incl referenced data
+ * \p list: list nodes
+ * \p mm: memory context
+ */
+void ptrlist_deep_free(list_t *l, knot_mm_t *mm)
+{
+ ptrnode_t *n;
+ WALK_LIST(n, *l) {
+ mm_free(mm, n->d);
+ }
+ ptrlist_free(l, mm);
+}
+
+void ptrlist_free_custom(list_t *l, knot_mm_t *mm, ptrlist_free_cb free_cb)
+{
+ ptrnode_t *n;
+ WALK_LIST(n, *l) {
+ free_cb(n->d);
+ }
+ ptrlist_free(l, mm);
+}
diff --git a/src/contrib/ucw/lists.h b/src/contrib/ucw/lists.h
new file mode 100644
index 0000000..1a3ca95
--- /dev/null
+++ b/src/contrib/ucw/lists.h
@@ -0,0 +1,74 @@
+/*
+ * BIRD Library -- Linked Lists
+ *
+ * (c) 1998 Martin Mares <mj@ucw.cz>
+ * (c) 2015, 2020-2022 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ *
+ * Can be freely distributed and used under the terms of the GNU GPL.
+ */
+
+#pragma once
+
+#include <string.h>
+#include "libknot/mm_ctx.h"
+
+typedef struct node {
+ struct node *next, *prev;
+} node_t;
+
+typedef struct list {
+ node_t head, tail;
+} list_t;
+
+#define NODE (node_t *)
+#define HEAD(list) ((void *)((list).head.next))
+#define TAIL(list) ((void *)((list).tail.prev))
+#define WALK_LIST(n,list) for(n=HEAD(list);(NODE (n))->next; \
+ n=(void *)((NODE (n))->next))
+#define WALK_LIST_DELSAFE(n,nxt,list) \
+ for(n=HEAD(list); (nxt=(void *)((NODE (n))->next)); n=(void *) nxt)
+/* WALK_LIST_FIRST supposes that called code removes each processed node */
+#define WALK_LIST_FIRST(n,list) \
+ while(n=HEAD(list), (NODE (n))->next)
+#define WALK_LIST_BACKWARDS(n,list) for(n=TAIL(list);(NODE (n))->prev; \
+ n=(void *)((NODE (n))->prev))
+#define WALK_LIST_BACKWARDS_DELSAFE(n,prv,list) \
+ for(n=TAIL(list); prv=(void *)((NODE (n))->prev); n=(void *) prv)
+
+#define EMPTY_LIST(list) (!(NODE HEAD(list))->next)
+
+/*! \brief Free every node in the list. */
+#define WALK_LIST_FREE(list) \
+ do { \
+ node_t *n=0,*nxt=0; \
+ WALK_LIST_DELSAFE(n,nxt,list) { \
+ free(n); \
+ } \
+ init_list(&list); \
+ } while(0)
+
+void add_tail(list_t *, node_t *);
+void add_head(list_t *, node_t *);
+void rem_node(node_t *);
+void add_tail_list(list_t *, list_t *);
+void init_list(list_t *);
+void insert_node(node_t *, node_t *);
+void list_dup(list_t *dst, list_t *src, size_t itemsz);
+size_t list_size(const list_t *);
+void fix_list(list_t *);
+
+/*!
+ * \brief Generic pointer list implementation.
+ */
+typedef struct ptrnode {
+ node_t n;
+ void *d;
+} ptrnode_t;
+
+ptrnode_t *ptrlist_add(list_t *, void *, knot_mm_t *);
+void ptrlist_free(list_t *, knot_mm_t *);
+void ptrlist_rem(ptrnode_t *node, knot_mm_t *mm);
+void ptrlist_deep_free(list_t *, knot_mm_t *);
+
+typedef void (*ptrlist_free_cb)(void *);
+void ptrlist_free_custom(list_t *l, knot_mm_t *mm, ptrlist_free_cb free_cb);
diff --git a/src/contrib/ucw/mempool.c b/src/contrib/ucw/mempool.c
new file mode 100644
index 0000000..8e835c1
--- /dev/null
+++ b/src/contrib/ucw/mempool.c
@@ -0,0 +1,323 @@
+/*
+ * UCW Library -- Memory Pools (One-Time Allocation)
+ *
+ * (c) 1997--2001 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ * (c) 2015, 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#undef LOCAL_DEBUG
+
+#include <string.h>
+#include <strings.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include "contrib/asan.h"
+#include "contrib/macros.h"
+#include "contrib/ucw/mempool.h"
+
+/** \todo This shouldn't be precalculated, but computed on load. */
+#define CPU_PAGE_SIZE 4096
+
+/** Align an integer \p s to the nearest higher multiple of \p a (which should be a power of two) **/
+#define ALIGN_TO(s, a) (((s)+a-1)&~(a-1))
+#define MP_CHUNK_TAIL ALIGN_TO(sizeof(struct mempool_chunk), CPU_STRUCT_ALIGN)
+#define MP_SIZE_MAX (~0U - MP_CHUNK_TAIL - CPU_PAGE_SIZE)
+#define DBG(s, ...)
+
+/** \note Imported MMAP backend from bigalloc.c */
+#define CONFIG_UCW_POOL_IS_MMAP
+#ifdef CONFIG_UCW_POOL_IS_MMAP
+#include <sys/mman.h>
+static void *
+page_alloc(uint64_t len)
+{
+ if (!len) {
+ return NULL;
+ }
+ if (len > SIZE_MAX) {
+ return NULL;
+ }
+ assert(!(len & (CPU_PAGE_SIZE-1)));
+ uint8_t *p = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (p == (uint8_t*) MAP_FAILED) {
+ return NULL;
+ }
+ return p;
+}
+
+static void
+page_free(void *start, uint64_t len)
+{
+ assert(!(len & (CPU_PAGE_SIZE-1)));
+ assert(!((uintptr_t) start & (CPU_PAGE_SIZE-1)));
+ munmap(start, len);
+}
+#endif
+
+struct mempool_chunk {
+ struct mempool_chunk *next;
+ unsigned size;
+};
+
+static unsigned
+mp_align_size(unsigned size)
+{
+#ifdef CONFIG_UCW_POOL_IS_MMAP
+ return ALIGN_TO(size + MP_CHUNK_TAIL, CPU_PAGE_SIZE) - MP_CHUNK_TAIL;
+#else
+ return ALIGN_TO(size, CPU_STRUCT_ALIGN);
+#endif
+}
+
+void
+mp_init(struct mempool *pool, unsigned chunk_size)
+{
+ chunk_size = mp_align_size(MAX(sizeof(struct mempool), chunk_size));
+ *pool = (struct mempool) {
+ .chunk_size = chunk_size,
+ .threshold = chunk_size >> 1,
+ .last_big = &pool->last_big
+ };
+}
+
+static void *
+mp_new_big_chunk(unsigned size)
+{
+ uint8_t *data = malloc(size + MP_CHUNK_TAIL);
+ if (!data) {
+ return NULL;
+ }
+ ASAN_POISON_MEMORY_REGION(data, size);
+ struct mempool_chunk *chunk = (struct mempool_chunk *)(data + size);
+ chunk->size = size;
+ return chunk;
+}
+
+static void
+mp_free_big_chunk(struct mempool_chunk *chunk)
+{
+ void *ptr = (uint8_t *)chunk - chunk->size;
+ ASAN_UNPOISON_MEMORY_REGION(ptr, chunk->size);
+ free(ptr);
+}
+
+static void *
+mp_new_chunk(unsigned size)
+{
+#ifdef CONFIG_UCW_POOL_IS_MMAP
+ uint8_t *data = page_alloc(size + MP_CHUNK_TAIL);
+ if (!data) {
+ return NULL;
+ }
+ ASAN_POISON_MEMORY_REGION(data, size);
+ struct mempool_chunk *chunk = (struct mempool_chunk *)(data + size);
+ chunk->size = size;
+ return chunk;
+#else
+ return mp_new_big_chunk(size);
+#endif
+}
+
+static void
+mp_free_chunk(struct mempool_chunk *chunk)
+{
+#ifdef CONFIG_UCW_POOL_IS_MMAP
+ uint8_t *data = (uint8_t *)chunk - chunk->size;
+ ASAN_UNPOISON_MEMORY_REGION(data, chunk->size);
+ page_free(data, chunk->size + MP_CHUNK_TAIL);
+#else
+ mp_free_big_chunk(chunk);
+#endif
+}
+
+struct mempool *
+mp_new(unsigned chunk_size)
+{
+ chunk_size = mp_align_size(MAX(sizeof(struct mempool), chunk_size));
+ struct mempool_chunk *chunk = mp_new_chunk(chunk_size);
+ struct mempool *pool = (void *)chunk - chunk_size;
+ ASAN_UNPOISON_MEMORY_REGION(pool, sizeof(*pool));
+ DBG("Creating mempool %p with %u bytes long chunks", pool, chunk_size);
+ chunk->next = NULL;
+ ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk));
+ *pool = (struct mempool) {
+ .state = { .free = { chunk_size - sizeof(*pool) }, .last = { chunk } },
+ .chunk_size = chunk_size,
+ .threshold = chunk_size >> 1,
+ .last_big = &pool->last_big
+ };
+ return pool;
+}
+
+static void
+mp_free_chain(struct mempool_chunk *chunk)
+{
+ while (chunk) {
+ ASAN_UNPOISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk));
+ struct mempool_chunk *next = chunk->next;
+ mp_free_chunk(chunk);
+ chunk = next;
+ }
+}
+
+static void
+mp_free_big_chain(struct mempool_chunk *chunk)
+{
+ while (chunk) {
+ ASAN_UNPOISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk));
+ struct mempool_chunk *next = chunk->next;
+ mp_free_big_chunk(chunk);
+ chunk = next;
+ }
+}
+
+void
+mp_delete(struct mempool *pool)
+{
+ if (pool == NULL) {
+ return;
+ }
+ DBG("Deleting mempool %p", pool);
+ mp_free_big_chain(pool->state.last[1]);
+ mp_free_chain(pool->unused);
+ mp_free_chain(pool->state.last[0]); // can contain the mempool structure
+}
+
+void
+mp_flush(struct mempool *pool)
+{
+ mp_free_big_chain(pool->state.last[1]);
+ struct mempool_chunk *chunk = pool->state.last[0], *next;
+ while (chunk) {
+ ASAN_UNPOISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk));
+ if ((uint8_t *)chunk - chunk->size == (uint8_t *)pool) {
+ break;
+ }
+ next = chunk->next;
+ chunk->next = pool->unused;
+ ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk));
+ pool->unused = chunk;
+ chunk = next;
+ }
+ pool->state.last[0] = chunk;
+ if (chunk) {
+ pool->state.free[0] = chunk->size - sizeof(*pool);
+ ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk));
+ } else {
+ pool->state.free[0] = 0;
+ }
+ pool->state.last[1] = NULL;
+ pool->state.free[1] = 0;
+ pool->last_big = &pool->last_big;
+}
+
+static void
+mp_stats_chain(struct mempool_chunk *chunk, struct mempool_stats *stats, unsigned idx)
+{
+ struct mempool_chunk *next;
+ while (chunk) {
+ ASAN_UNPOISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk));
+ stats->chain_size[idx] += chunk->size + sizeof(*chunk);
+ stats->chain_count[idx]++;
+ next = chunk->next;
+ ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk));
+ chunk = next;
+ }
+ stats->total_size += stats->chain_size[idx];
+}
+
+void
+mp_stats(struct mempool *pool, struct mempool_stats *stats)
+{
+ bzero(stats, sizeof(*stats));
+ mp_stats_chain(pool->state.last[0], stats, 0);
+ mp_stats_chain(pool->state.last[1], stats, 1);
+ mp_stats_chain(pool->unused, stats, 2);
+}
+
+uint64_t
+mp_total_size(struct mempool *pool)
+{
+ struct mempool_stats stats;
+ mp_stats(pool, &stats);
+ return stats.total_size;
+}
+
+static void *
+mp_alloc_internal(struct mempool *pool, unsigned size)
+{
+ struct mempool_chunk *chunk;
+ if (size <= pool->threshold) {
+ pool->idx = 0;
+ if (pool->unused) {
+ chunk = pool->unused;
+ ASAN_UNPOISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk));
+ pool->unused = chunk->next;
+ } else {
+ chunk = mp_new_chunk(pool->chunk_size);
+ }
+ chunk->next = pool->state.last[0];
+ ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk));
+ pool->state.last[0] = chunk;
+ pool->state.free[0] = pool->chunk_size - size;
+ return (uint8_t *)chunk - pool->chunk_size;
+ } else if (size <= MP_SIZE_MAX) {
+ pool->idx = 1;
+ unsigned aligned = ALIGN_TO(size, CPU_STRUCT_ALIGN);
+ chunk = mp_new_big_chunk(aligned);
+ if (!chunk) {
+ return NULL;
+ }
+ chunk->next = pool->state.last[1];
+ ASAN_POISON_MEMORY_REGION(chunk, sizeof(struct mempool_chunk));
+ pool->state.last[1] = chunk;
+ pool->state.free[1] = aligned - size;
+ return pool->last_big = (uint8_t *)chunk - aligned;
+ } else {
+ fprintf(stderr, "Cannot allocate %u bytes from a mempool", size);
+ assert(0);
+ return NULL;
+ }
+}
+
+void *
+mp_alloc(struct mempool *pool, unsigned size)
+{
+ unsigned avail = pool->state.free[0] & ~(CPU_STRUCT_ALIGN - 1);
+ void *ptr = NULL;
+ if (size <= avail) {
+ pool->state.free[0] = avail - size;
+ ptr = (uint8_t*)pool->state.last[0] - avail;
+ } else {
+ ptr = mp_alloc_internal(pool, size);
+ }
+ ASAN_UNPOISON_MEMORY_REGION(ptr, size);
+ return ptr;
+}
+
+void *
+mp_alloc_noalign(struct mempool *pool, unsigned size)
+{
+ void *ptr = NULL;
+ if (size <= pool->state.free[0]) {
+ ptr = (uint8_t*)pool->state.last[0] - pool->state.free[0];
+ pool->state.free[0] -= size;
+ } else {
+ ptr = mp_alloc_internal(pool, size);
+ }
+ ASAN_UNPOISON_MEMORY_REGION(ptr, size);
+ return ptr;
+}
+
+void *
+mp_alloc_zero(struct mempool *pool, unsigned size)
+{
+ void *ptr = mp_alloc(pool, size);
+ bzero(ptr, size);
+ return ptr;
+}
diff --git a/src/contrib/ucw/mempool.h b/src/contrib/ucw/mempool.h
new file mode 100644
index 0000000..c5a4fa8
--- /dev/null
+++ b/src/contrib/ucw/mempool.h
@@ -0,0 +1,124 @@
+/*
+ * UCW Library -- Memory Pools
+ *
+ * (c) 1997--2005 Martin Mares <mj@ucw.cz>
+ * (c) 2007 Pavel Charvat <pchar@ucw.cz>
+ * (c) 2015, 2017 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+ *
+ * This software may be freely distributed and used according to the terms
+ * of the GNU Lesser General Public License.
+ */
+
+#pragma once
+
+#include <string.h>
+#include <stdint.h>
+
+#define CPU_STRUCT_ALIGN (sizeof(void*))
+
+/***
+ * [[defs]]
+ * Definitions
+ * -----------
+ ***/
+
+/**
+ * Memory pool state (see mp_push(), ...).
+ * You should use this one as an opaque handle only, the insides are internal.
+ **/
+struct mempool_state {
+ unsigned free[2];
+ void *last[2];
+};
+
+/**
+ * Memory pool.
+ * You should use this one as an opaque handle only, the insides are internal.
+ **/
+struct mempool {
+ struct mempool_state state;
+ void *unused, *last_big;
+ unsigned chunk_size, threshold, idx;
+};
+
+struct mempool_stats { /** Mempool statistics. See mp_stats(). **/
+ uint64_t total_size; /** Real allocated size in bytes. */
+ unsigned chain_count[3]; /** Number of allocated chunks in small/big/unused chains. */
+ unsigned chain_size[3]; /** Size of allocated chunks in small/big/unused chains. */
+};
+
+/***
+ * [[basic]]
+ * Basic manipulation
+ * ------------------
+ ***/
+
+/**
+ * Initialize a given mempool structure.
+ * \p chunk_size must be in the interval `[1, UINT_MAX / 2]`.
+ * It will allocate memory by this large chunks and take
+ * memory to satisfy requests from them.
+ *
+ * Memory pools can be treated as <<trans:respools,resources>>, see <<trans:res_mempool()>>.
+ **/
+void mp_init(struct mempool *pool, unsigned chunk_size);
+
+/**
+ * Allocate and initialize a new memory pool.
+ * See \ref mp_init() for \p chunk_size limitations.
+ *
+ * The new mempool structure is allocated on the new mempool.
+ *
+ * Memory pools can be treated as <<trans:respools,resources>>, see <<trans:res_mempool()>>.
+ **/
+struct mempool *mp_new(unsigned chunk_size);
+
+/**
+ * Cleanup mempool initialized by mp_init or mp_new.
+ * Frees all the memory allocated by this mempool and,
+ * if created by \ref mp_new(), the \p pool itself.
+ **/
+void mp_delete(struct mempool *pool);
+
+/**
+ * Frees all data on a memory pool, but leaves it working.
+ * It can keep some of the chunks allocated to serve
+ * further allocation requests. Leaves the \p pool alive,
+ * even if it was created with \ref mp_new().
+ **/
+void mp_flush(struct mempool *pool);
+
+/**
+ * Compute some statistics for debug purposes.
+ * See the definition of the <<struct_mempool_stats,mempool_stats structure>>.
+ **/
+void mp_stats(struct mempool *pool, struct mempool_stats *stats);
+uint64_t mp_total_size(struct mempool *pool); /** How many bytes were allocated by the pool. **/
+
+/***
+ * [[alloc]]
+ * Allocation routines
+ * -------------------
+ ***/
+
+/**
+ * The function allocates new \p size bytes on a given memory pool.
+ * If the \p size is zero, the resulting pointer is undefined,
+ * but it may be safely reallocated or used as the parameter
+ * to other functions below.
+ *
+ * The resulting pointer is always aligned to a multiple of
+ * `CPU_STRUCT_ALIGN` bytes and this condition remains true also
+ * after future reallocations.
+ **/
+void *mp_alloc(struct mempool *pool, unsigned size);
+
+/**
+ * The same as \ref mp_alloc(), but the result may be unaligned.
+ **/
+void *mp_alloc_noalign(struct mempool *pool, unsigned size);
+
+/**
+ * The same as \ref mp_alloc(), but fills the newly allocated memory with zeroes.
+ **/
+void *mp_alloc_zero(struct mempool *pool, unsigned size);
diff --git a/src/contrib/url-parser/LICENSE b/src/contrib/url-parser/LICENSE
new file mode 100644
index 0000000..0161703
--- /dev/null
+++ b/src/contrib/url-parser/LICENSE
@@ -0,0 +1 @@
+../licenses/MIT \ No newline at end of file
diff --git a/src/contrib/url-parser/README.md b/src/contrib/url-parser/README.md
new file mode 100644
index 0000000..c3b1919
--- /dev/null
+++ b/src/contrib/url-parser/README.md
@@ -0,0 +1,14 @@
+url-parser
+==========
+
+This is Joyent's [URL parser][original-source] from their
+[http-parser][joyent-http-parser] library.
+
+## LICENSE
+
+This derivative is licensed under the same terms as NGINX and copyright Joyent,
+Inc. and other Node contributors as outlined in their [LICENSE][].
+
+[original-source]: https://github.com/joyent/http-parser/blob/e01811e7f4894d7f0f7f4bd8492cccec6f6b4038/http_parser.c#L2343
+[joyent-http-parser]: https://github.com/joyent/http-parser
+[LICENSE]: https://github.com/joyent/http-parser/blob/master/LICENSE-MIT
diff --git a/src/contrib/url-parser/url_parser.c b/src/contrib/url-parser/url_parser.c
new file mode 100644
index 0000000..5084740
--- /dev/null
+++ b/src/contrib/url-parser/url_parser.c
@@ -0,0 +1,635 @@
+/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
+ *
+ * Additional changes are licensed under the same terms as NGINX and
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * This is the http_parser_parse_url function extracted from joyent's
+ * http-parser library.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "url_parser.h"
+
+/* Macros for character classes; depends on strict-mode */
+#define CR '\r'
+#define LF '\n'
+#define LOWER(c) (unsigned char)(c | 0x20)
+#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
+#define IS_NUM(c) ((c) >= '0' && (c) <= '9')
+#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
+#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
+#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \
+ (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \
+ (c) == ')')
+#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \
+ (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \
+ (c) == '$' || (c) == ',')
+
+#define STRICT_TOKEN(c) (tokens[(unsigned char)c])
+
+#ifndef BIT_AT
+# define BIT_AT(a, i) \
+ (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \
+ (1 << ((unsigned int) (i) & 7))))
+#endif
+
+#if HTTP_PARSER_STRICT
+# define T(v) 0
+#else
+# define T(v) v
+#endif
+
+static const uint8_t normal_url_char[32] = {
+/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
+/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */
+ 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0,
+/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
+/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */
+ 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0,
+/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */
+ 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128,
+/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
+/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
+/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0,
+/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
+/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
+/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
+/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
+/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
+/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
+/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128,
+/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
+ 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, };
+
+#if HTTP_PARSER_STRICT
+#define TOKEN(c) (tokens[(unsigned char)c])
+#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c))
+#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
+#else
+#define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c])
+#define IS_URL_CHAR(c) \
+ (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80))
+#define IS_HOST_CHAR(c) \
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
+#endif
+
+
+
+enum state
+ { s_dead = 1 /* important that this is > 0 */
+
+ , s_start_req_or_res
+ , s_res_or_resp_H
+ , s_start_res
+ , s_res_H
+ , s_res_HT
+ , s_res_HTT
+ , s_res_HTTP
+ , s_res_first_http_major
+ , s_res_http_major
+ , s_res_first_http_minor
+ , s_res_http_minor
+ , s_res_first_status_code
+ , s_res_status_code
+ , s_res_status_start
+ , s_res_status
+ , s_res_line_almost_done
+
+ , s_start_req
+
+ , s_req_method
+ , s_req_spaces_before_url
+ , s_req_schema
+ , s_req_schema_slash
+ , s_req_schema_slash_slash
+ , s_req_server_start
+ , s_req_server
+ , s_req_server_with_at
+ , s_req_path
+ , s_req_query_string_start
+ , s_req_query_string
+ , s_req_fragment_start
+ , s_req_fragment
+ , s_req_http_start
+ , s_req_http_H
+ , s_req_http_HT
+ , s_req_http_HTT
+ , s_req_http_HTTP
+ , s_req_first_http_major
+ , s_req_http_major
+ , s_req_first_http_minor
+ , s_req_http_minor
+ , s_req_line_almost_done
+
+ , s_header_field_start
+ , s_header_field
+ , s_header_value_discard_ws
+ , s_header_value_discard_ws_almost_done
+ , s_header_value_discard_lws
+ , s_header_value_start
+ , s_header_value
+ , s_header_value_lws
+
+ , s_header_almost_done
+
+ , s_chunk_size_start
+ , s_chunk_size
+ , s_chunk_parameters
+ , s_chunk_size_almost_done
+
+ , s_headers_almost_done
+ , s_headers_done
+
+ /* Important: 's_headers_done' must be the last 'header' state. All
+ * states beyond this must be 'body' states. It is used for overflow
+ * checking. See the PARSING_HEADER() macro.
+ */
+
+ , s_chunk_data
+ , s_chunk_data_almost_done
+ , s_chunk_data_done
+
+ , s_body_identity
+ , s_body_identity_eof
+
+ , s_message_done
+ };
+
+
+enum http_host_state
+ {
+ s_http_host_dead = 1
+ , s_http_userinfo_start
+ , s_http_userinfo
+ , s_http_host_start
+ , s_http_host_v6_start
+ , s_http_host
+ , s_http_host_v6
+ , s_http_host_v6_end
+ , s_http_host_v6_zone_start
+ , s_http_host_v6_zone
+ , s_http_host_port_start
+ , s_http_host_port
+};
+
+/* Our URL parser.
+ *
+ * This is designed to be shared by http_parser_execute() for URL validation,
+ * hence it has a state transition + byte-for-byte interface. In addition, it
+ * is meant to be embedded in http_parser_parse_url(), which does the dirty
+ * work of turning state transitions URL components for its API.
+ *
+ * This function should only be invoked with non-space characters. It is
+ * assumed that the caller cares about (and can detect) the transition between
+ * URL and non-URL states by looking for these.
+ */
+static enum state
+parse_url_char(enum state s, const char ch)
+{
+ if (ch == ' ' || ch == '\r' || ch == '\n') {
+ return s_dead;
+ }
+
+#if HTTP_PARSER_STRICT
+ if (ch == '\t' || ch == '\f') {
+ return s_dead;
+ }
+#endif
+
+ switch (s) {
+ case s_req_spaces_before_url:
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
+ * All methods except CONNECT are followed by '/' or '*'.
+ */
+
+ if (ch == '/' || ch == '*') {
+ return s_req_path;
+ }
+
+ if (IS_ALPHA(ch)) {
+ return s_req_schema;
+ }
+
+ break;
+
+ case s_req_schema:
+ if (IS_ALPHA(ch)) {
+ return s;
+ }
+
+ if (ch == ':') {
+ return s_req_schema_slash;
+ }
+
+ break;
+
+ case s_req_schema_slash:
+ if (ch == '/') {
+ return s_req_schema_slash_slash;
+ }
+
+ break;
+
+ case s_req_schema_slash_slash:
+ if (ch == '/') {
+ return s_req_server_start;
+ }
+
+ break;
+
+ case s_req_server_with_at:
+ if (ch == '@') {
+ return s_dead;
+ }
+
+ /* FALLTHROUGH */
+ case s_req_server_start:
+ case s_req_server:
+ if (ch == '/') {
+ return s_req_path;
+ }
+
+ if (ch == '?') {
+ return s_req_query_string_start;
+ }
+
+ if (ch == '@') {
+ return s_req_server_with_at;
+ }
+
+ if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') {
+ return s_req_server;
+ }
+
+ break;
+
+ case s_req_path:
+ if (IS_URL_CHAR(ch)) {
+ return s;
+ }
+
+ switch (ch) {
+ case '?':
+ return s_req_query_string_start;
+
+ case '#':
+ return s_req_fragment_start;
+ }
+
+ break;
+
+ case s_req_query_string_start:
+ case s_req_query_string:
+ if (IS_URL_CHAR(ch)) {
+ return s_req_query_string;
+ }
+
+ switch (ch) {
+ case '?':
+ /* allow extra '?' in query string */
+ return s_req_query_string;
+
+ case '#':
+ return s_req_fragment_start;
+ }
+
+ break;
+
+ case s_req_fragment_start:
+ if (IS_URL_CHAR(ch)) {
+ return s_req_fragment;
+ }
+
+ switch (ch) {
+ case '?':
+ return s_req_fragment;
+
+ case '#':
+ return s;
+ }
+
+ break;
+
+ case s_req_fragment:
+ if (IS_URL_CHAR(ch)) {
+ return s;
+ }
+
+ switch (ch) {
+ case '?':
+ case '#':
+ return s;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ /* We should never fall out of the switch above unless there's an error */
+ return s_dead;
+}
+
+static enum http_host_state
+http_parse_host_char(enum http_host_state s, const char ch) {
+ switch(s) {
+ case s_http_userinfo:
+ case s_http_userinfo_start:
+ if (ch == '@') {
+ return s_http_host_start;
+ }
+
+ if (IS_USERINFO_CHAR(ch)) {
+ return s_http_userinfo;
+ }
+ break;
+
+ case s_http_host_start:
+ if (ch == '[') {
+ return s_http_host_v6_start;
+ }
+
+ if (IS_HOST_CHAR(ch)) {
+ return s_http_host;
+ }
+
+ break;
+
+ case s_http_host:
+ if (IS_HOST_CHAR(ch)) {
+ return s_http_host;
+ }
+
+ /* FALLTHROUGH */
+ case s_http_host_v6_end:
+ if (ch == ':') {
+ return s_http_host_port_start;
+ }
+
+ break;
+
+ case s_http_host_v6:
+ if (ch == ']') {
+ return s_http_host_v6_end;
+ }
+
+ /* FALLTHROUGH */
+ case s_http_host_v6_start:
+ if (IS_HEX(ch) || ch == ':' || ch == '.') {
+ return s_http_host_v6;
+ }
+
+ if (s == s_http_host_v6 && ch == '%') {
+ return s_http_host_v6_zone_start;
+ }
+ break;
+
+ case s_http_host_v6_zone:
+ if (ch == ']') {
+ return s_http_host_v6_end;
+ }
+
+ /* FALLTHROUGH */
+ case s_http_host_v6_zone_start:
+ /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
+ if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
+ ch == '~') {
+ return s_http_host_v6_zone;
+ }
+ break;
+
+ case s_http_host_port:
+ case s_http_host_port_start:
+ if (IS_NUM(ch)) {
+ return s_http_host_port;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+ return s_http_host_dead;
+}
+
+static int
+http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
+ assert(u->field_set & (1 << UF_HOST));
+ enum http_host_state s;
+
+ const char *p;
+ size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len;
+
+ u->field_data[UF_HOST].len = 0;
+
+ s = found_at ? s_http_userinfo_start : s_http_host_start;
+
+ for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) {
+ enum http_host_state new_s = http_parse_host_char(s, *p);
+
+ if (new_s == s_http_host_dead) {
+ return 1;
+ }
+
+ switch(new_s) {
+ case s_http_host:
+ if (s != s_http_host) {
+ u->field_data[UF_HOST].off = p - buf;
+ }
+ u->field_data[UF_HOST].len++;
+ break;
+
+ case s_http_host_v6:
+ if (s != s_http_host_v6) {
+ u->field_data[UF_HOST].off = p - buf;
+ }
+ u->field_data[UF_HOST].len++;
+ break;
+
+ case s_http_host_v6_zone_start:
+ case s_http_host_v6_zone:
+ u->field_data[UF_HOST].len++;
+ break;
+
+ case s_http_host_port:
+ if (s != s_http_host_port) {
+ u->field_data[UF_PORT].off = p - buf;
+ u->field_data[UF_PORT].len = 0;
+ u->field_set |= (1 << UF_PORT);
+ }
+ u->field_data[UF_PORT].len++;
+ break;
+
+ case s_http_userinfo:
+ if (s != s_http_userinfo) {
+ u->field_data[UF_USERINFO].off = p - buf ;
+ u->field_data[UF_USERINFO].len = 0;
+ u->field_set |= (1 << UF_USERINFO);
+ }
+ u->field_data[UF_USERINFO].len++;
+ break;
+
+ default:
+ break;
+ }
+ s = new_s;
+ }
+
+ /* Make sure we don't end somewhere unexpected */
+ switch (s) {
+ case s_http_host_start:
+ case s_http_host_v6_start:
+ case s_http_host_v6:
+ case s_http_host_v6_zone_start:
+ case s_http_host_v6_zone:
+ case s_http_host_port_start:
+ case s_http_userinfo:
+ case s_http_userinfo_start:
+ return 1;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+int
+http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
+ struct http_parser_url *u)
+{
+ enum state s;
+ const char *p;
+ enum http_parser_url_fields uf, old_uf;
+ int found_at = 0;
+
+ u->port = u->field_set = 0;
+ s = is_connect ? s_req_server_start : s_req_spaces_before_url;
+ old_uf = UF_MAX;
+
+ for (p = buf; p < buf + buflen; p++) {
+ s = parse_url_char(s, *p);
+
+ /* Figure out the next field that we're operating on */
+ switch (s) {
+ case s_dead:
+ return 1;
+
+ /* Skip delimiters */
+ case s_req_schema_slash:
+ case s_req_schema_slash_slash:
+ case s_req_server_start:
+ case s_req_query_string_start:
+ case s_req_fragment_start:
+ continue;
+
+ case s_req_schema:
+ uf = UF_SCHEMA;
+ break;
+
+ case s_req_server_with_at:
+ found_at = 1;
+
+ /* FALLTHROUGH */
+ case s_req_server:
+ uf = UF_HOST;
+ break;
+
+ case s_req_path:
+ uf = UF_PATH;
+ break;
+
+ case s_req_query_string:
+ uf = UF_QUERY;
+ break;
+
+ case s_req_fragment:
+ uf = UF_FRAGMENT;
+ break;
+
+ default:
+ assert(!"Unexpected state");
+ return 1;
+ }
+
+ /* Nothing's changed; soldier on */
+ if (uf == old_uf) {
+ u->field_data[uf].len++;
+ continue;
+ }
+
+ u->field_data[uf].off = p - buf;
+ u->field_data[uf].len = 1;
+
+ u->field_set |= (1 << uf);
+ old_uf = uf;
+ }
+
+ /* host must be present if there is a schema */
+ /* parsing http:///toto will fail */
+ if ((u->field_set & (1 << UF_SCHEMA)) &&
+ (u->field_set & (1 << UF_HOST)) == 0) {
+ return 1;
+ }
+
+ if (u->field_set & (1 << UF_HOST)) {
+ if (http_parse_host(buf, u, found_at) != 0) {
+ return 1;
+ }
+ }
+
+ /* CONNECT requests can only contain "hostname:port" */
+ if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) {
+ return 1;
+ }
+
+ if (u->field_set & (1 << UF_PORT)) {
+ /* Don't bother with endp; we've already validated the string */
+ unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10);
+
+ /* Ports have a max value of 2^16 */
+ if (v > 0xffff) {
+ return 1;
+ }
+
+ u->port = (uint16_t) v;
+ }
+
+ return 0;
+}
diff --git a/src/contrib/url-parser/url_parser.h b/src/contrib/url-parser/url_parser.h
new file mode 100644
index 0000000..0f916e0
--- /dev/null
+++ b/src/contrib/url-parser/url_parser.h
@@ -0,0 +1,64 @@
+/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev
+ *
+ * Additional changes are licensed under the same terms as NGINX and
+ * copyright Joyent, Inc. and other Node contributors. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * This is the http_parser_parse_url function extracted from joyent's
+ * http-parser library.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+enum http_parser_url_fields
+ { UF_SCHEMA = 0
+ , UF_HOST = 1
+ , UF_PORT = 2
+ , UF_PATH = 3
+ , UF_QUERY = 4
+ , UF_FRAGMENT = 5
+ , UF_USERINFO = 6
+ , UF_MAX = 7
+ };
+
+/* Result structure for http_parser_parse_url().
+ *
+ * Callers should index into field_data[] with UF_* values iff field_set
+ * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and
+ * because we probably have padding left over), we convert any port to
+ * a uint16_t.
+ */
+struct http_parser_url {
+ uint16_t field_set; /* Bitmask of (1 << UF_*) values */
+ uint16_t port; /* Converted UF_PORT string */
+
+ struct {
+ uint16_t off; /* Offset into buffer in which field starts */
+ uint16_t len; /* Length of run in buffer */
+ } field_data[UF_MAX];
+};
+
+int
+http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
+ struct http_parser_url *u);
diff --git a/src/contrib/vpool/vpool.c b/src/contrib/vpool/vpool.c
new file mode 100644
index 0000000..f130a47
--- /dev/null
+++ b/src/contrib/vpool/vpool.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2006, 2008 Alexey Vatchenko <av@bsdua.org>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "contrib/vpool/vpool.h"
+
+static void vpool_shift(struct vpool *pool);
+static int vpool_new_size(struct vpool *pool, size_t datsize,
+ size_t *size);
+static int vpool_resize(struct vpool *pool, size_t datsize);
+
+static void
+vpool_shift(struct vpool *pool)
+{
+ if (pool->v_buf != pool->v_basebuf) {
+ memmove(pool->v_basebuf, pool->v_buf, pool->v_off);
+ pool->v_buf = pool->v_basebuf;
+ }
+}
+
+static int
+vpool_new_size(struct vpool *pool, size_t datsize, size_t *size)
+{
+ size_t need;
+ size_t rem;
+
+ if (datsize <= pool->v_size - pool->v_off) {
+ *size = pool->v_size;
+ return (0);
+ }
+
+ /* Check limit of new requested size */
+ if (pool->v_limit - pool->v_off < datsize) {
+ return (EFBIG);
+ }
+ need = pool->v_off + datsize;
+
+ /* Check limit of new size aligned to block size */
+ rem = need % pool->v_blksize;
+ if (rem != 0) {
+ if (pool->v_limit - pool->v_off >=
+ datsize + (pool->v_blksize - rem)) {
+ need += pool->v_blksize - rem;
+ } else {
+ need = pool->v_limit;
+ }
+ }
+
+ *size = need;
+ return (0);
+}
+
+static int
+vpool_resize(struct vpool *pool, size_t datsize)
+{
+ void *ret;
+ size_t size;
+ int error;
+
+ error = vpool_new_size(pool, datsize, &size);
+ if (error != 0) {
+ return (error);
+ }
+
+ if (size > pool->v_size) {
+ ret = malloc(size);
+ if (ret == NULL) {
+ return (ENOMEM);
+ }
+
+ if (pool->v_off > 0) {
+ memcpy(ret, pool->v_buf, pool->v_off);
+ }
+ free(pool->v_basebuf);
+ pool->v_basebuf = pool->v_buf = ret;
+ pool->v_size = size;
+ } else if ((pool->v_size - pool->v_off) -
+ (pool->v_buf - pool->v_basebuf) < datsize) {
+ vpool_shift(pool);
+ }
+
+ return (0);
+}
+
+void
+vpool_init(struct vpool *pool, size_t blksize, size_t limit)
+{
+
+ pool->v_basebuf = pool->v_buf = NULL;
+ pool->v_off = pool->v_size = 0;
+
+ pool->v_blksize = (blksize == 0) ? 4096 : blksize; /* XXX */
+ pool->v_limit = (limit == 0) ? SIZE_MAX : limit;
+
+ pool->v_lasterr = 0;
+}
+
+void
+vpool_final(struct vpool *pool)
+{
+ free(pool->v_basebuf);
+}
+
+void
+vpool_reset(struct vpool *pool)
+{
+ free(pool->v_basebuf);
+ pool->v_basebuf = pool->v_buf = NULL;
+ pool->v_off = pool->v_size = 0;
+ pool->v_lasterr = 0;
+}
+
+void
+vpool_wipe(struct vpool *pool)
+{
+ pool->v_off = 0;
+ pool->v_lasterr = 0;
+}
+
+void *
+vpool_insert(struct vpool *pool, size_t where, void *data, size_t datsize)
+{
+ void *ret;
+ int error;
+
+ error = vpool_resize(pool, datsize);
+ if (error != 0) {
+ pool->v_lasterr = error;
+ return (NULL);
+ }
+
+ /*
+ * If ``where'' is greater than or equal to offset then
+ * we are appending data to the end of the buffer.
+ */
+ if (where > pool->v_off) {
+ where = pool->v_off;
+ }
+
+ ret = (uint8_t *)pool->v_buf + where;
+ if (pool->v_off - where > 0) {
+ memmove(ret + datsize, ret, pool->v_off - where);
+ }
+ memcpy(ret, data, datsize);
+ pool->v_off += datsize;
+ pool->v_lasterr = 0;
+
+ return (ret);
+}
+
+void *
+vpool_expand(struct vpool *pool, size_t where, size_t size)
+{
+ void *ret;
+ int error;
+
+ error = vpool_resize(pool, size);
+ if (error != 0) {
+ pool->v_lasterr = error;
+ return (NULL);
+ }
+
+ /*
+ * If ``where'' is greater than or equal to offset then
+ * we are appending data to the end of the buffer.
+ */
+ if (where > pool->v_off) {
+ where = pool->v_off;
+ }
+
+ ret = (uint8_t *)pool->v_buf + where;
+ if (pool->v_off - where > 0) {
+ memmove(ret + size, ret, pool->v_off - where);
+ }
+ pool->v_off += size;
+ pool->v_lasterr = 0;
+
+ return (ret);
+}
+
+int
+vpool_truncate(struct vpool *pool,
+ size_t where, size_t size, enum vpool_trunc how)
+{
+ /* Check if caller wants to remove more data than we have */
+ if (where >= pool->v_off ||
+ size > pool->v_off || pool->v_off - size < where) {
+ pool->v_lasterr = ERANGE;
+ return (pool->v_lasterr);
+ }
+
+ if (how == VPOOL_EXCLUDE) {
+ if (where == 0) {
+ /*
+ * Optimization.
+ * Don't move data, just adjust pointer.
+ */
+ pool->v_buf = (uint8_t *)pool->v_buf + size;
+ } else {
+ memmove((uint8_t *)pool->v_buf + where,
+ (uint8_t *)pool->v_buf + where + size,
+ pool->v_off - size - where);
+ }
+ pool->v_off -= size;
+ } else {
+ pool->v_buf = (uint8_t *)pool->v_buf + where;
+ pool->v_off = size;
+ }
+
+ pool->v_lasterr = 0;
+ return (0);
+}
+
+void
+vpool_export(struct vpool *pool, void **buf, size_t *size)
+{
+ vpool_shift(pool);
+ *buf = pool->v_buf;
+ *size = pool->v_off;
+ pool->v_basebuf = pool->v_buf = NULL;
+ pool->v_off = pool->v_size = 0;
+ pool->v_lasterr = 0;
+}
diff --git a/src/contrib/vpool/vpool.h b/src/contrib/vpool/vpool.h
new file mode 100644
index 0000000..82e3d66
--- /dev/null
+++ b/src/contrib/vpool/vpool.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2006, 2008 Alexey Vatchenko <av@bsdua.org>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * VPool: implementation of pool of data with a variable size.
+ */
+#ifndef _VPOOL_H_
+#define _VPOOL_H_
+
+#include <stddef.h>
+#include <limits.h>
+
+struct vpool {
+ void *v_basebuf; /* pointer returned by (re|m)alloc() */
+ void *v_buf; /* actual data starts here */
+ size_t v_off;
+ size_t v_size;
+
+ size_t v_blksize;
+ size_t v_limit;
+ int v_lasterr;
+};
+
+enum vpool_trunc {VPOOL_EXCLUDE, VPOOL_INCLUDE};
+#define VPOOL_TAIL UINT_MAX
+
+void vpool_init(struct vpool *pool, size_t blksize, size_t limit);
+void vpool_final(struct vpool *pool);
+
+void vpool_reset(struct vpool *pool);
+void vpool_wipe(struct vpool *pool);
+
+void * vpool_insert(struct vpool *pool,
+ size_t where, void *data, size_t datsize);
+void * vpool_expand(struct vpool *pool, size_t where, size_t size);
+
+int vpool_truncate(struct vpool *pool,
+ size_t where, size_t size, enum vpool_trunc how);
+
+#define vpool_is_empty(pool) ((pool)->v_off == 0)
+#define vpool_get_buf(pool) ((pool)->v_buf)
+#define vpool_get_length(pool) ((pool)->v_off)
+#define vpool_get_error(pool) ((pool)->v_lasterr)
+
+void vpool_export(struct vpool *pool, void **buf, size_t *size);
+
+#endif /* !_VPOOL_H_ */
diff --git a/src/contrib/wire_ctx.h b/src/contrib/wire_ctx.h
new file mode 100644
index 0000000..f4f1789
--- /dev/null
+++ b/src/contrib/wire_ctx.h
@@ -0,0 +1,355 @@
+/* Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "contrib/string.h"
+#include "libknot/endian.h"
+#include "libknot/errcode.h"
+
+/*!
+ * \brief Struct to keep the wire context.
+ */
+typedef struct wire_ctx {
+ size_t size;
+ uint8_t *wire;
+ uint8_t *position;
+ int error;
+ bool readonly;
+} wire_ctx_t;
+
+/*!
+ * \brief Initialize wire context.
+ */
+static inline wire_ctx_t wire_ctx_init(uint8_t *data, size_t size)
+{
+ assert(data);
+
+ wire_ctx_t result = {
+ .size = size,
+ .wire = data,
+ .position = data,
+ .error = KNOT_EOK,
+ .readonly = false
+ };
+
+ return result;
+}
+
+/*!
+ * \brief Initialize read only wire context.
+ *
+ * \note No write is performed, and error is set to KNOT_EACCES.
+ *
+ */
+static inline wire_ctx_t wire_ctx_init_const(const uint8_t *data, size_t size)
+{
+ assert(data);
+
+ wire_ctx_t result = wire_ctx_init((uint8_t *)data, size);
+ result.readonly = true;
+
+ return result;
+}
+
+/*!
+ * \brief Gets actual position.
+ *
+ * \return position from the begin.
+ */
+static inline size_t wire_ctx_offset(wire_ctx_t *ctx)
+{
+ assert(ctx);
+
+ return ctx->position - ctx->wire;
+}
+
+/*!
+ * \brief Set position offset from the begin.
+ *
+ * \param offset Wire offset (starts from 0).
+ *
+ * \note Noop if previous error.
+ */
+static inline void wire_ctx_set_offset(wire_ctx_t *ctx, size_t offset)
+{
+ assert(ctx);
+
+ if (ctx->error != KNOT_EOK) {
+ return;
+ }
+
+ if (offset > ctx->size) {
+ ctx->error = KNOT_ERANGE;
+ return;
+ }
+
+ ctx->position = ctx->wire + offset;
+}
+
+/*!
+ * \brief Gets available bytes.
+ *
+ * \return Number of bytes to end.
+ */
+static inline size_t wire_ctx_available(wire_ctx_t *ctx)
+{
+ assert(ctx);
+
+ return ctx->size - wire_ctx_offset(ctx);
+}
+
+/*!
+ * \brief Add offset to the current position.
+ *
+ * \note Noop if previous error.
+ */
+static inline void wire_ctx_skip(wire_ctx_t *ctx, ssize_t offset)
+{
+ assert(ctx);
+
+ if (ctx->error != KNOT_EOK) {
+ return;
+ }
+
+ // Check for out of scope skip.
+ if (offset >= 0) {
+ if (offset > wire_ctx_available(ctx)) {
+ ctx->error = KNOT_ERANGE;
+ return;
+ }
+ } else {
+ if (-offset > wire_ctx_offset(ctx)) {
+ ctx->error = KNOT_ERANGE;
+ return;
+ }
+ }
+
+ ctx->position += offset;
+}
+
+/*!
+ * \brief Check the context if reading is possible.
+ */
+static inline int wire_ctx_can_read(wire_ctx_t *ctx, size_t size)
+{
+ assert(ctx);
+
+ if (ctx->error != KNOT_EOK) {
+ return ctx->error;
+ }
+
+ if (wire_ctx_available(ctx) < size) {
+ return KNOT_EFEWDATA;
+ }
+
+ return KNOT_EOK;
+}
+
+/*!
+ * \brief Check the context if writing is possible.
+ */
+static inline int wire_ctx_can_write(wire_ctx_t *ctx, size_t size)
+{
+ assert(ctx);
+
+ if (ctx->error != KNOT_EOK) {
+ return ctx->error;
+ }
+
+ if (ctx->readonly) {
+ return KNOT_EACCES;
+ }
+
+ if (wire_ctx_available(ctx) < size) {
+ return KNOT_ESPACE;
+ }
+
+ return KNOT_EOK;
+}
+
+
+static inline void wire_ctx_read(wire_ctx_t *ctx, void *data, size_t size)
+{
+ assert(ctx);
+ assert(data);
+
+ if (ctx->error != KNOT_EOK) {
+ /* Avoid leaving data uninitialized. */
+ memzero(data, size);
+ return;
+ }
+
+ int ret = wire_ctx_can_read(ctx, size);
+ if (ret != KNOT_EOK) {
+ ctx->error = ret;
+ memzero(data, size);
+ return;
+ }
+
+ memcpy(data, ctx->position, size);
+ ctx->position += size;
+}
+
+static inline uint8_t wire_ctx_read_u8(wire_ctx_t *ctx)
+{
+ uint8_t result;
+ wire_ctx_read(ctx, &result, sizeof(result));
+
+ return result;
+}
+
+static inline uint16_t wire_ctx_read_u16(wire_ctx_t *ctx)
+{
+ uint16_t result;
+ wire_ctx_read(ctx, &result, sizeof(result));
+
+ return be16toh(result);
+}
+
+static inline uint32_t wire_ctx_read_u32(wire_ctx_t *ctx)
+{
+ uint32_t result;
+ wire_ctx_read(ctx, &result, sizeof(result));
+
+ return be32toh(result);
+}
+
+static inline uint64_t wire_ctx_read_u48(wire_ctx_t *ctx)
+{
+ /* This case is slightly tricky. */
+ uint64_t result = 0;
+ wire_ctx_read(ctx, (uint8_t *)&result + 1, 6);
+
+ return be64toh(result) >> 8;
+}
+
+static inline uint64_t wire_ctx_read_u64(wire_ctx_t *ctx)
+{
+ uint64_t result;
+ wire_ctx_read(ctx, &result, sizeof(result));
+
+ return be64toh(result);
+}
+
+
+static inline void wire_ctx_write(wire_ctx_t *ctx, const void *data, size_t size)
+{
+ assert(ctx);
+
+ if (ctx->error != KNOT_EOK) {
+ return;
+ }
+
+ if (size == 0) {
+ return;
+ }
+
+ assert(data);
+
+ int ret = wire_ctx_can_write(ctx, size);
+ if (ret != KNOT_EOK) {
+ ctx->error = ret;
+ return;
+ }
+
+ memcpy(ctx->position, data, size);
+ ctx->position += size;
+}
+
+static inline void wire_ctx_write_u8(wire_ctx_t *ctx, uint8_t value)
+{
+ wire_ctx_write(ctx, &value, sizeof(value));
+}
+
+static inline void wire_ctx_write_u16(wire_ctx_t *ctx, uint16_t value)
+{
+ uint16_t beval = htobe16(value);
+ wire_ctx_write(ctx, &beval, sizeof(beval));
+}
+
+static inline void wire_ctx_write_u32(wire_ctx_t *ctx, uint32_t value)
+{
+ uint32_t beval = htobe32(value);
+ wire_ctx_write(ctx, &beval, sizeof(beval));
+}
+
+static inline void wire_ctx_write_u48(wire_ctx_t *ctx, uint64_t value)
+{
+ /* This case is slightly tricky. */
+ uint64_t swapped = htobe64(value << 8);
+ wire_ctx_write(ctx, (uint8_t *)&swapped + 1, 6);
+}
+
+static inline void wire_ctx_write_u64(wire_ctx_t *ctx, uint64_t value)
+{
+ uint64_t beval = htobe64(value);
+ wire_ctx_write(ctx, &beval, sizeof(beval));
+}
+
+static inline void wire_ctx_clear(wire_ctx_t *ctx, size_t size)
+{
+ assert(ctx);
+
+ if (ctx->error != KNOT_EOK) {
+ return;
+ }
+
+ if (size == 0) {
+ return;
+ }
+
+ int ret = wire_ctx_can_write(ctx, size);
+ if (ret != KNOT_EOK) {
+ ctx->error = ret;
+ return;
+ }
+
+ memzero(ctx->position, size);
+ ctx->position += size;
+}
+
+static inline void wire_ctx_copy(wire_ctx_t *dst, wire_ctx_t *src, size_t size)
+{
+ assert(dst);
+ assert(src);
+
+ if (size == 0 || dst->error != KNOT_EOK) {
+ return;
+ }
+
+ if (wire_ctx_can_read(src, size) != KNOT_EOK) {
+ dst->error = KNOT_EFEWDATA;
+ return;
+ }
+
+ int ret = wire_ctx_can_write(dst, size);
+ if (ret != KNOT_EOK) {
+ dst->error = ret;
+ return;
+ }
+
+ memcpy(dst->position, src->position, size);
+ dst->position += size;
+ src->position += size;
+}