diff options
Diffstat (limited to '')
349 files changed, 32907 insertions, 15289 deletions
diff --git a/.github/matrix.py b/.github/matrix.py index 53279c4..d3ab890 100755 --- a/.github/matrix.py +++ b/.github/matrix.py @@ -86,14 +86,6 @@ def clean_compression(compression): return compression.replace("USE_", "").lower() -def get_asan_flags(cc): - return [ - "USE_OBSOLETE_LINKER=1", - 'DEBUG_CFLAGS="-g -fsanitize=address"', - 'LDFLAGS="-fsanitize=address"', - 'CPU_CFLAGS.generic="-O1"', - ] - def main(ref_name): print("Generating matrix for branch '{}'.".format(ref_name)) @@ -125,7 +117,7 @@ def main(ref_name): "TARGET": TARGET, "CC": CC, "FLAGS": [ - 'DEBUG_CFLAGS="-DDEBUG_LIST"', + 'DEBUG="-DDEBUG_LIST"', "USE_ZLIB=1", "USE_OT=1", "OT_INC=${HOME}/opt-ot/include", @@ -156,8 +148,10 @@ def main(ref_name): "os": os, "TARGET": TARGET, "CC": CC, - "FLAGS": get_asan_flags(CC) - + [ + "FLAGS": [ + "USE_OBSOLETE_LINKER=1", + 'ARCH_FLAGS="-g -fsanitize=address"', + 'OPT_CFLAGS="-O1"', "USE_ZLIB=1", "USE_OT=1", "OT_INC=${HOME}/opt-ot/include", @@ -196,7 +190,7 @@ def main(ref_name): "OPENSSL_VERSION=1.0.2u", "OPENSSL_VERSION=1.1.1s", "QUICTLS=yes", - "WOLFSSL_VERSION=5.6.4", + "WOLFSSL_VERSION=5.7.0", "AWS_LC_VERSION=1.16.0", # "BORINGSSL=yes", ] @@ -237,9 +231,9 @@ def main(ref_name): # macOS if "haproxy-" in ref_name: - os = "macos-12" # stable branch + os = "macos-13" # stable branch else: - os = "macos-latest" # development branch + os = "macos-14" # development branch TARGET = "osx" for CC in ["clang"]: diff --git a/.github/workflows/aws-lc.yml b/.github/workflows/aws-lc.yml index e590000..4384116 100644 --- a/.github/workflows/aws-lc.yml +++ b/.github/workflows/aws-lc.yml @@ -36,7 +36,7 @@ jobs: make -j$(nproc) CC=gcc TARGET=linux-glibc \ USE_OPENSSL_AWSLC=1 USE_QUIC=1 \ SSL_LIB=${HOME}/opt/lib SSL_INC=${HOME}/opt/include \ - DEBUG="-DDEBUG_STRICT -DDEBUG_MEMORY_POOLS -DDEBUG_POOL_INTEGRITY" \ + DEBUG="-DDEBUG_POOL_INTEGRITY" \ ADDLIB="-Wl,-rpath,/usr/local/lib/ -Wl,-rpath,$HOME/opt/lib/" sudo make install - name: Show HAProxy version diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml index 3d66f29..385b698 100644 --- a/.github/workflows/codespell.yml +++ b/.github/workflows/codespell.yml @@ -17,4 +17,5 @@ jobs: - uses: codespell-project/actions-codespell@master with: skip: CHANGELOG,Makefile,*.fig,*.pem,./doc/design-thoughts,./doc/internals - ignore_words_list: ist,ists,hist,wan,ca,cas,que,ans,te,nd,referer,ot,uint,iif,fo,keep-alives,dosen,ifset,thrid,strack,ba,chck,hel,unx,mor,clen + ignore_words_list: ist,ists,hist,wan,ca,cas,que,ans,te,nd,referer,ot,uint,iif,fo,keep-alives,dosen,ifset,thrid,strack,ba,chck,hel,unx,mor,clen,collet,bu,htmp,siz,experim + uri_ignore_words_list: trafic,ressources diff --git a/.github/workflows/compliance.yml b/.github/workflows/compliance.yml index caf9624..29af0a9 100644 --- a/.github/workflows/compliance.yml +++ b/.github/workflows/compliance.yml @@ -34,7 +34,7 @@ jobs: ERR=1 \ TARGET=${{ matrix.TARGET }} \ CC=${{ matrix.CC }} \ - DEBUG="-DDEBUG_STRICT -DDEBUG_MEMORY_POOLS -DDEBUG_POOL_INTEGRITY" \ + DEBUG="-DDEBUG_POOL_INTEGRITY" \ USE_OPENSSL=1 sudo make install - name: Show HAProxy version diff --git a/.github/workflows/fedora-rawhide.yml b/.github/workflows/fedora-rawhide.yml index 1bb2745..3035219 100644 --- a/.github/workflows/fedora-rawhide.yml +++ b/.github/workflows/fedora-rawhide.yml @@ -11,8 +11,13 @@ jobs: build_and_test: strategy: matrix: - cc: [ gcc, clang ] - name: ${{ matrix.cc }} + platform: [ + { name: x64, cc: gcc, QUICTLS_EXTRA_ARGS: "", ADDLIB_ATOMIC: "", ARCH_FLAGS: "" }, + { name: x64, cc: clang, QUICTLS_EXTRA_ARGS: "", ADDLIB_ATOMIC: "", ARCH_FLAGS: "" }, + { name: x86, cc: gcc, QUICTLS_EXTRA_ARGS: "-m32 linux-generic32", ADDLIB_ATOMIC: "-latomic", ARCH_FLAGS: "-m32" }, + { name: x86, cc: clang, QUICTLS_EXTRA_ARGS: "-m32 linux-generic32", ADDLIB_ATOMIC: "-latomic", ARCH_FLAGS: "-m32" } + ] + name: ${{ matrix.platform.cc }}.${{ matrix.platform.name }} runs-on: ubuntu-latest if: ${{ github.repository_owner == 'haproxy' }} container: @@ -21,20 +26,21 @@ jobs: - uses: actions/checkout@v4 - name: Install dependencies run: | - dnf -y install git pcre-devel zlib-devel pcre2-devel 'perl(FindBin)' perl-IPC-Cmd 'perl(File::Copy)' 'perl(File::Compare)' lua-devel socat findutils systemd-devel clang + dnf -y install diffutils git pcre-devel zlib-devel pcre2-devel 'perl(FindBin)' perl-IPC-Cmd 'perl(File::Copy)' 'perl(File::Compare)' lua-devel socat findutils systemd-devel clang + dnf -y install 'perl(FindBin)' 'perl(File::Compare)' perl-IPC-Cmd 'perl(File::Copy)' glibc-devel.i686 lua-devel.i686 lua-devel.x86_64 systemd-devel.i686 zlib-ng-compat-devel.i686 pcre-devel.i686 libatomic.i686 - name: Install VTest run: scripts/build-vtest.sh - name: Install QuicTLS - run: QUICTLS=yes scripts/build-ssl.sh + run: QUICTLS=yes QUICTLS_EXTRA_ARGS="${{ matrix.platform.QUICTLS_EXTRA_ARGS }}" scripts/build-ssl.sh - name: Build contrib tools run: | make admin/halog/halog make dev/flags/flags make dev/poll/poll make dev/hpack/decode dev/hpack/gen-enc dev/hpack/gen-rht - - name: Compile HAProxy with ${{ matrix.cc }} + - name: Compile HAProxy with ${{ matrix.platform.cc }} run: | - make -j3 CC=${{ matrix.cc }} V=1 ERR=1 TARGET=linux-glibc USE_OPENSSL=1 USE_QUIC=1 USE_ZLIB=1 USE_PCRE=1 USE_PCRE_JIT=1 USE_LUA=1 USE_SYSTEMD=1 ADDLIB="-Wl,-rpath,${HOME}/opt/lib" SSL_LIB=${HOME}/opt/lib SSL_INC=${HOME}/opt/include + make -j3 CC=${{ matrix.platform.cc }} V=1 ERR=1 TARGET=linux-glibc USE_OPENSSL=1 USE_QUIC=1 USE_ZLIB=1 USE_PCRE=1 USE_PCRE_JIT=1 USE_LUA=1 USE_SYSTEMD=1 ADDLIB="${{ matrix.platform.ADDLIB_ATOMIC }} -Wl,-rpath,${HOME}/opt/lib" SSL_LIB=${HOME}/opt/lib SSL_INC=${HOME}/opt/include ARCH_FLAGS="${{ matrix.platform.ARCH_FLAGS }}" make install - name: Show HAProxy version id: show-version diff --git a/.github/workflows/illumos.yml b/.github/workflows/illumos.yml new file mode 100644 index 0000000..0259bf1 --- /dev/null +++ b/.github/workflows/illumos.yml @@ -0,0 +1,23 @@ +name: Illumos + +on: + schedule: + - cron: "0 0 25 * *" + +jobs: + gcc: + runs-on: ubuntu-latest + if: ${{ github.repository_owner == 'haproxy' }} + permissions: + contents: read + steps: + - name: "Checkout repository" + uses: actions/checkout@v4 + + - name: "Build on VM" + uses: vmactions/solaris-vm@v1 + with: + prepare: | + pkg install gcc make + run: | + gmake CC=gcc TARGET=solaris USE_OPENSSL=1 USE_PROMEX=1 diff --git a/.github/workflows/musl.yml b/.github/workflows/musl.yml index 930a22e..a54414f 100644 --- a/.github/workflows/musl.yml +++ b/.github/workflows/musl.yml @@ -26,7 +26,7 @@ jobs: - name: Install VTest run: scripts/build-vtest.sh - name: Build - run: make -j$(nproc) TARGET=linux-musl DEBUG_CFLAGS='-ggdb3' CC=cc V=1 USE_LUA=1 LUA_INC=/usr/include/lua5.3 LUA_LIB=/usr/lib/lua5.3 USE_OPENSSL=1 USE_PCRE2=1 USE_PCRE2_JIT=1 USE_PROMEX=1 + run: make -j$(nproc) TARGET=linux-musl ARCH_FLAGS='-ggdb3' CC=cc V=1 USE_LUA=1 LUA_INC=/usr/include/lua5.3 LUA_LIB=/usr/lib/lua5.3 USE_OPENSSL=1 USE_PCRE2=1 USE_PCRE2_JIT=1 USE_PROMEX=1 - name: Show version run: ./haproxy -vv - name: Show linked libraries diff --git a/.github/workflows/netbsd.yml b/.github/workflows/netbsd.yml new file mode 100644 index 0000000..6514725 --- /dev/null +++ b/.github/workflows/netbsd.yml @@ -0,0 +1,23 @@ +name: NetBSD + +on: + schedule: + - cron: "0 0 25 * *" + +jobs: + gcc: + runs-on: ubuntu-latest + if: ${{ github.repository_owner == 'haproxy' }} + permissions: + contents: read + steps: + - name: "Checkout repository" + uses: actions/checkout@v4 + + - name: "Build on VM" + uses: vmactions/netbsd-vm@v1 + with: + prepare: | + /usr/sbin/pkg_add gmake curl + run: | + gmake CC=gcc TARGET=netbsd USE_OPENSSL=1 USE_LUA=1 USE_PCRE2=1 USE_PCRE2_JIT=1 USE_PROMEX=1 USE_ZLIB=1 diff --git a/.github/workflows/vtest.yml b/.github/workflows/vtest.yml index d0e4ec3..284bff7 100644 --- a/.github/workflows/vtest.yml +++ b/.github/workflows/vtest.yml @@ -42,13 +42,18 @@ jobs: # Configure a short TMPDIR to prevent failures due to long unix socket # paths. TMPDIR: /tmp - # Force ASAN output into asan.log to make the output more readable. - ASAN_OPTIONS: log_path=asan.log OT_CPP_VERSION: 1.6.0 steps: - uses: actions/checkout@v4 with: fetch-depth: 100 + + - name: Setup coredumps + if: ${{ startsWith(matrix.os, 'ubuntu-') }} + run: | + sudo sysctl -w fs.suid_dumpable=1 + sudo sysctl kernel.core_pattern=/tmp/core.%h.%e.%t + # # Github Action cache key cannot contain comma, so we calculate it based on job name # @@ -57,17 +62,6 @@ jobs: run: | echo "key=$(echo ${{ matrix.name }} | sha256sum | awk '{print $1}')" >> $GITHUB_OUTPUT - -# -# temporary hack -# should be revisited after https://github.com/actions/runner-images/issues/9491 is resolved -# - - - name: Setup enthropy - if: ${{ startsWith(matrix.os, 'ubuntu-') }} - run: | - sudo sysctl vm.mmap_rnd_bits=28 - - name: Cache SSL libs if: ${{ matrix.ssl && matrix.ssl != 'stock' && matrix.ssl != 'BORINGSSL=yes' && matrix.ssl != 'QUICTLS=yes' }} id: cache_ssl @@ -92,7 +86,8 @@ jobs: libpcre2-dev \ libsystemd-dev \ ninja-build \ - socat + socat \ + gdb - name: Install brew dependencies if: ${{ startsWith(matrix.os, 'macos-') }} run: | @@ -123,7 +118,7 @@ jobs: ERR=1 \ TARGET=${{ matrix.TARGET }} \ CC=${{ matrix.CC }} \ - DEBUG="-DDEBUG_STRICT -DDEBUG_MEMORY_POOLS -DDEBUG_POOL_INTEGRITY" \ + DEBUG="-DDEBUG_POOL_INTEGRITY" \ ${{ join(matrix.FLAGS, ' ') }} \ ADDLIB="-Wl,-rpath,/usr/local/lib/ -Wl,-rpath,$HOME/opt/lib/" sudo make install @@ -150,11 +145,16 @@ jobs: # This is required for macOS which does not actually allow to increase # the '-n' soft limit to the hard limit, thus failing to run. ulimit -n 65536 + ulimit -c unlimited make reg-tests VTEST_PROGRAM=../vtest/vtest REGTESTS_TYPES=default,bug,devel - name: Config syntax check memleak smoke testing if: ${{ contains(matrix.name, 'ASAN') }} run: | - ./haproxy -f .github/h2spec.config -c + ./haproxy -dI -f .github/h2spec.config -c + ./haproxy -dI -f examples/content-sw-sample.cfg -c + ./haproxy -dI -f examples/option-http_proxy.cfg -c + ./haproxy -dI -f examples/quick-test.cfg -c + ./haproxy -dI -f examples/transparent_proxy.cfg -c - name: Show VTest results if: ${{ failure() && steps.vtest.outcome == 'failure' }} run: | @@ -164,10 +164,19 @@ jobs: cat $folder/LOG echo "::endgroup::" done + exit 1 + + - name: Show coredumps + if: ${{ failure() && steps.vtest.outcome == 'failure' }} + run: | + failed=false shopt -s nullglob - for asan in asan.log*; do - echo "::group::$asan" - cat $asan + for file in /tmp/core.*; do + failed=true + printf "::group::" + gdb -ex 'thread apply all bt full' ./haproxy $file echo "::endgroup::" done - exit 1 + if [ "$failed" = true ]; then + exit 1; + fi diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index b020d7c..29ad16f 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -58,7 +58,7 @@ jobs: ERR=1 \ TARGET=${{ matrix.TARGET }} \ CC=${{ matrix.CC }} \ - DEBUG="-DDEBUG_STRICT -DDEBUG_MEMORY_POOLS -DDEBUG_POOL_INTEGRITY" \ + DEBUG="-DDEBUG_POOL_INTEGRITY" \ ${{ join(matrix.FLAGS, ' ') }} - name: Show HAProxy version id: show-version @@ -1,38 +1,658 @@ ChangeLog : =========== -2024/04/05 : 2.9.7 - - MINOR: mux-h2: add a counter of "glitches" on a connection - - BUG/MINOR: mux-h2: count rejected DATA frames against the connection's flow control - - MINOR: mux-h2: count excess of CONTINUATION frames as a glitch - - MINOR: mux-h2: count late reduction of INITIAL_WINDOW_SIZE as a glitch +2024/05/29 : 3.0.0 + - MINOR: sample: implement the uptime sample fetch + - CI: scripts: fix build of vtest regarding option -C + - CI: scripts: build vtest using multiple CPUs + - MINOR: log: rename 'log-format tag' to 'log-format alias' + - DOC: config: document logformat item naming and typecasting features + - BUILD: makefile: yearly reordering of objects by build time + - BUILD: fd: errno is also needed without poll() + - DOC: config: fix two typos "RST_STEAM" vs "RST_STREAM" + - DOC: config: refer to the non-deprecated keywords in ocsp-update on/off + - DOC: streamline http-reuse and connection naming definition + - REGTESTS: complete http-reuse test with pool-conn-name + - DOC: config: add %ID logformat alias alternative + - CLEANUP: ssl/ocsp: readable ifdef in ssl_sock_load_ocsp + - BUG/MINOR: ssl/ocsp: init callback func ptr as NULL + - CLEANUP: ssl_sock: move dirty openssl-1.0.2 wrapper to openssl-compat + - BUG/MINOR: activity: fix Delta_calls and Delta_bytes count + - CI: github: upgrade the WolfSSL job to 5.7.0 + - DOC: install: update quick build reminders with some missing options + - DOC: install: update the range of tested openssl version to cover 3.3 + - DEV: patchbot: prepare for new version 3.1-dev + - MINOR: version: mention that it's 3.0 LTS now. + +2024/05/24 : 3.0-dev13 + - CLEANUP: ssl/cli: remove unused code in dump_crtlist_conf + - MINOR: ssl: check parameter in ckch_conf_cmp() + - BUG/MINOR: ring: free ring's allocated area not ring's usable area when using maps + - DOC: configuration: rework the crt-store load documentation + - DEBUG: tools: add vma_set_name() helper + - DEBUG: shctx: name shared memory using vma_set_name() + - DEBUG: sink: add name hint for memory area used by memory-backed sinks + - DEBUG: pollers: add name hint for large memory areas used by pollers + - DEBUG: errors: add name hint for startup-logs memory area + - DEBUG: fd: add name hint for large memory areas + - MEDIUM: ssl: don't load file by discovering them in crt-store + - DOC: configuration: update the crt-list documentation + - DOC: configuration: add the supported crt-store options in crt-list + - BUG/MEDIUM: proto: fix fd leak in <proto>_connect_server + - MINOR: sock: set conn->err_code in case of EPERM + - BUG/MINOR: http-ana: Don't crush stream termination condition on internal error + - MAJOR: spoe: Let the SPOE back into the game + - BUG/MINOR: connection: parse PROXY TLV for LOCAL mode + - BUG/MINOR: server: free PROXY v2 TLVs on srv drop + - MINOR: rhttp: add log on connection allocation failure + - BUG/MEDIUM: rhttp: fix preconnect on single-thread + - BUG/MINOR: rhttp: prevent listener suspend + - BUG/MINOR: rhttp: fix task_wakeup state + - MINOR: session: define flag to explicitely release listener on free + - MEDIUM: rhttp: create session for active preconnect + - MINOR: rhttp: support PROXY emission on preconnect + - MINOR: connection: support PROXY v2 TLV emission without stream + - MINOR: traces: enumerate the list of levels/verbosities when not found + - BUG/MINOR: sock: fix sock_create_server_socket + - MINOR: proto: fix coding style + - BUG/MAJOR: quic: Crash with TLS_AES_128_CCM_SHA256 (libressl only) + - REGTESTS: scripts: allow to change the vtest timeout + - BUG/MEDIUM: quic_tls: prevent LibreSSL < 4.0 from negotiating CHACHA20_POLY1305 + - CI: scripts/build-ssl.sh: loudly fail on unsupported platforms + - BUG/MEDIUM: mux-quic: Create sedesc in same time of the QUIC stream + - MINOR: mux-quic: Set abort info for SC-less QCS on STOP_SENDING frame + - CI: scripts/build-ssl: add a DESTDIR and TMPDIR variable + - CI: scripts/buil-ssl: cleanup the boringssl and quictls build + - MINOR: config: add thread-hard-limit to set an upper bound to nbthread + - BUILD: quic: fix unused variable warning when threads are disabled + - BUG/MEDIUM: stick-tables: Fix race with peers when trashing oldest entries + - BUG/MEDIUM: stick-tables: Fix race with peers when killing a sticky session + - BUG/MEDIUM: stick-tables: make sure never to create two same remote entries + - CLEANUP: stick-tables: remove a few unneeded tests for use_wrlock + - MINOR: stick-tables: remove the uneeded read lock in stksess_free() + - CLEANUP: tools: fix vma_set_name() function comment + - DEBUG: tools: add vma_set_name_id() helper + - DEBUG: pollers/fd: add thread id suffix to per-thread memory areas name hints + - DOC: config: fix aes_gcm_enc() description text + - BUILD: trace: fix warning on null dereference + - MEDIUM: config: prevent communication with privileged ports + - MAJOR: config: prevent QUIC with clients privileged port by default + - BUG/MINOR: quic: adjust restriction for stateless reset emission + - MINOR: quic: clarify doc for quic_recv() + - MINOR: server: generalize sni expr parsing + - MINOR: server: define pool-conn-name keyword + - MEDIUM: connection: use pool-conn-name instead of sni on reuse + - BUG/MINOR: rhttp: initialize session origin after preconnect reversal + - BUG/MEDIUM: server/dns: preserve server's port upon resolution timeout or error + - BUG/MINOR: http-htx: Support default path during scheme based normalization + - BUG/MINOR: server: Don't reset resolver options on a new default-server line + - DOC: quic: specify that connection migration is not supported + - DOC: config: fix incorrect section reference about custom log format + - DOC: config: uniformize the naming and description of custom log format args + - DOC: config: clarify the fact that custom log format is not just for logging + - REGTESTS: acl_cli_spaces: avoid a warning caused by undefined logs + +2024/05/18 : 3.0-dev12 + - CI: drop asan.log umbrella completely + - BUG/MINOR: log: fix leak in add_sample_to_logformat_list() error path + - BUG/MINOR: log: smp_rgs array issues with inherited global log directives + - MINOR: rhttp: Don't require SSL when attach-srv name parsing + - REGTESTS: ssl: be more verbose with ocsp_compat_check.vtc + - DOC: Update UUID references to RFC 9562 + - MINOR: hlua: add hlua_nb_instruction getter + - MEDIUM: hlua: take nbthread into account in hlua_get_nb_instruction() + - BUG/MEDIUM: server: clear purgeable conns before server deletion + - BUG/MINOR: mux-quic: fix error code on shutdown for non HTTP/3 + - BUG/MINOR: qpack: fix error code reported on QPACK decoding failure + - BUG/MEDIUM: htx: mark htx_sl as packed since it may be realigned + - BUG/MEDIUM: stick-tables: properly mark stktable_data as packed + - SCRIPTS: run-regtests: fix a few occurrences of extended regexes + - BUG/MINOR: ssl_sock: fix xprt_set_used() to properly clear the TASK_F_USR1 bit + - MINOR: dynbuf: provide a b_dequeue() variant for multi-thread + - BUG/MEDIUM: muxes: enforce buf_wait check in takeover() + - BUG/MINOR: h1: Check authority for non-CONNECT methods only if a scheme is found + - BUG/MEDIUM: h1: Reject CONNECT request if the target has a scheme + - BUG/MAJOR: h1: Be stricter on request target validation during message parsing + - MINOR: qpack: prepare error renaming + - MINOR: h3/qpack: adjust naming for errors + - MINOR: h3: adjust error reporting on sending + - MINOR: h3: adjust error reporting on receive + - MINOR: mux-quic: support glitches + - MINOR: h3: report glitch on RFC violation + - BUILD: stick-tables: better mark the stktable_data as 32-bit aligned + - MINOR: ssl: rename tune.ssl.ocsp-update.mode in ocsp-update.mode + - REGTESTS: update the ocsp-update tests + - BUILD: stats: remove non portable getline() usage + - MEDIUM: ssl: add ocsp-update.mindelay and ocsp-update.maxdelay + - BUILD: log: get rid of non-portable strnlen() func + - BUG/MEDIUM: fd: prevent memory waste in fdtab array + - CLEANUP: compat: make the MIN/MAX macros more reliable + - Revert: MEDIUM: evports: permit to report multiple events at once" + - BUG/MINOR: stats: Don't state the 303 redirect response is chunked + - MINOR: mux-h1: Add a flag to ignore the request payload + - REORG: mux-h1: Group H1S_F_BODYLESS_* flags + - CLEANUP: mux-h1: Remove unused H1S_F_ERROR_MASK mask value + - MEDIUM: mux-h1: Support C-L/T-E header suppressions when sending messages + - MINOR: ssl: ckch_store_new_load_files_conf() loads filenames from ckch_conf + - MEDIUM: ssl/crtlist: loading crt-store keywords from a crt-list + - CLEANUP: ssl/ocsp: remove the deprecated parsing code for "ocsp-update" + - MINOR: ssl: pass ckch_store instead of ckch_data to ssl_sock_load_ocsp() + - MEDIUM: ssl: ckch_conf_parse() uses -1/0/1 for off/default/on + - MINOR: ssl: handle PARSE_TYPE_INT and PARSE_TYPE_ONOFF in ckch_store_load_files() + - MINOR: ssl/ocsp: use 'ocsp-update' in crt-store + - MINOR: ssl: ckch_conf_clean() utility function for ckch_conf + - MEDIUM: ssl: add ocsp-update.disable global option + - MEDIUM: ssl/cli: handle crt-store keywords in crt-list over the CLI + - MINOR: ssl: ckch_conf_cmp() compare multiple ckch_conf structures + - MEDIUM: ssl: temporarily load files by detecting their presence in crt-store + - REGTESTS: ocsp-update: change the reg-test to support the new crt-store mode + - DOC: capabilities: fix chapter header rendering + +2024/05/10 : 3.0-dev11 + - BUILD: clock: improve check for pthread_getcpuclockid() + - CI: add Illumos scheduled workflow + - CI: netbsd: limit scheduled workflow to parent repo only + - OPTIM: log: resolve logformat options during postparsing + - BUG/MINOR: haproxy: only tid 0 must not sleep if got signal + - REGTEST: add tests for acl() sample fetch + - BUG/MINOR: acl: support built-in ACLs with acl() sample + - BUG/MINOR: cfgparse: use curproxy global var from config post validation + - MEDIUM: stconn/muxes: Add an abort reason for SE shutdowns on muxes + - MINOR: mux-h2: Set the SE abort reason when a RST_STREAM frame is received + - MEDIUM: mux-h2: Forward h2 client cancellations to h2 servers + - MINOR: mux-quic: Set tha SE abort reason when a STOP_SENDING frame is received + - MINOR: stconn: Add samples to retrieve about stream aborts + - MINOR: mux-quic: Add .ctl callback function to get info about a mux connection + - MINOR: muxes: Add ctl commands to get info on streams for a connection + - MINOR: connection: Add samples to retrieve info on streams for a connection + - BUG/MEDIUM: log/ring: broken syslog octet counting + - BUG/MEDIUM: mux-quic: fix crash on STOP_SENDING received without SD + - DOC: lua: fix filters.txt file location + - MINOR: dynbuf: pass a criticality argument to b_alloc() + - MINOR: dynbuf: add functions to help queue/requeue buffer_wait fields + - MINOR: dynbuf: use the b_queue()/b_requeue() functions everywhere + - MEDIUM: dynbuf: make the buffer_wq an array of list heads + - CLEANUP: tinfo: better align fields in thread_ctx + - MINOR: dynbuf: provide a b_dequeue() function to detach a bw from the queue + - MEDIUM: dynbuf: generalize the use of b_dequeue() to detach buffer_wait + - MEDIUM: dynbuf/stream: re-enable queueing upon failed buffer allocation + - MEDIUM: dynbuf/stream: do not allocate the buffers in the callback + - MEDIUM: applet: make appctx_buf_available() only wake the applet up, not allocate + - MINOR: applet: set the blocking flag in the buffer allocation function + - MINOR: applet: adjust the allocation criticity based on the requested buffer + - MINOR: dynbuf/mux-h1: use different criticalities for buffer allocations + - MEDIUM: dynbuf/mux-h1: do not allocate the buffers in the callback + - MEDIUM: dynbuf: refrain from offering a buffer if more critical ones are waiting + - MINOR: stconn: report that a buffer allocation succeeded + - MINOR: stream: report that a buffer allocation succeeded + - MINOR: applet: report about buffer allocation success + - MINOR: mux-h1: report that a buffer allocation succeeded + - MEDIUM: stream: allocate without queuing when retrying + - MEDIUM: channel: allocate without queuing when retrying + - MEDIUM: mux-h1: allocate without queuing when retrying + - MEDIUM: dynbuf: implement emergency buffers + - MEDIUM: dynbuf: use emergency buffers upon failed memory allocations + +2024/05/04 : 3.0-dev10 + - BUG/MEDIUM: cache: Vary not working properly on anything other than accept-encoding + - REGTESTS: cache: Add test on 'vary' other than accept-encoding + - BUG/MINOR: stats: replace objt_* by __objt_* macros + - CLEANUP: tools/cbor: rename cbor_encode_ctx struct members + - MINOR: log/cbor: _lf_cbor_encode_byte() explicitly requires non-NULL ctx + - BUG/MINOR: log: fix global lf_expr node options behavior + - CLEANUP: log: add a macro to know if a lf_node is configurable + - MINOR: httpclient: allow to use absolute URI with new flag HC_F_HTTPROXY + - MINOR: ssl: introduce ocsp_update.http_proxy for ocsp-update keyword + - BUG/MINOR: log/encode: consider global options for key encoding + - BUG/MINOR: log/encode: fix potential NULL-dereference in LOGCHAR() + - BUG/MINOR: log: fix global lf_expr node options behavior (2nd try) + - MINOR: log/cbor: _lf_cbor_encode_byte() explicitly requires non-NULL ctx (again) + - BUG/MEDIUM: log: don't ignore disabled node's options + - BUG/MINOR: stconn: don't wake up an applet waiting on buffer allocation + - MINOR: sock: rename sock to sock_fd in sock_create_server_socket + - MEDIUM: proto_uxst: take in account server namespace + - MEIDUM: unix sock: use my_socketat to create bind socket + - MINOR: sock_set_mark: take sock family in account + - MEDIUM: proto: make common fd checks in sock_create_server_socket + - MINOR: sock: add EPERM case in sock_handle_system_err + - MINOR: capabilities: add cap_sys_admin support + - CLEANUP: ssl: clean the includes in ssl_ocsp.c + - CLEANUP: ssl: move the global ocsp-update options parsing to ssl_ocsp.c + - MINOR: stats: fix visual alignment for stat_cols_px definition + - MINOR: stats: convert req_tot as generic column + - MINOR: stats: prepare stats-file support for values other than FN_COUNTER + - MINOR: counters: move freq-ctr from proxy/server into counters struct + - MINOR: stats: support rate in stats-file + - MINOR: stats: convert rate as generic column for proxy stats + - MINOR: counters: move last_change into counters struct + - MINOR: stats: support age in stats-file + - MINOR: stats: convert age as generic column for proxy stat + - CLEANUP: ssl: rename new_ckch_store_load_files_path() to ckch_store_new_load_files_path() + - MINOR: ssl: rename ocsp_update.http_proxy into ocsp-update.httpproxy + - REORG: stats: define stats-proxy source module + - MINOR: stats: extract proxy clear-counter in a dedicated function + - REGTESTS: stats: add test stats-file counters preload + - CI: netbsd: adjust packages after NetBSD-10 released + - CLEANUP: assorted typo fixes in the code and comments + - REGTESTS: replace REQUIRE_VERSION by version_atleast + - MEDIUM: log: optimizing tmp->type handling in sess_build_logline() + - BUG/MINOR: log: prevent double spaces emission in sess_build_logline() + - OPTIM: log: declare empty buffer as global variable + - OPTIM: log: use thread local lf_buildctx to stop pushing it on the stack + - OPTIM: log: use lf_buildctx's buffer instead of temporary stack buffers + - OPTIM: log: speedup date printing in sess_build_logline() when no encoding is used + +2024/04/27 : 3.0-dev9 + - BUILD: ssl: use %zd for sizeof() in ssl_ckch.c + - MINOR: backend: use be_counters for health down accounting + - BUG/MINOR: backend: use cum_sess counters instead of cum_conn + - BUG/MINOR: stats: fix stot metric for listeners + - REGTESTS: use -dI for insecure fork by default in the regtest scripts + - MINOR: stats: rename proxy stats + - MINOR: stats: rename ambiguous stat_l and stat_count + - MINOR: stats: rename info stats + - MINOR: stats: use stricter naming stats/field/line + - MINOR: stats: use STAT_F_* prefix for flags + - BUG/MEDIUM: applet: Let's applets decide if they have more data to deliver + - BUILD: stick-tables: silence build warnings when threads are disabled + - MINOR: tools: Rename `ha_generate_uuid` to `ha_generate_uuid_v4` + - MINOR: Add `ha_generate_uuid_v7` + - MINOR: Add support for UUIDv7 to the `uuid` sample fetch + - MEDIUM: shctx: Naming shared memory context + - BUG/MINOR: h1: fix detection of upper bytes in the URI + - MINOR: intops: add a pair of functions to check multi-byte ranges + - TESTS: add a unit test for the multi-byte range checks + - CLEANUP: h1: make use of the multi-byte matching functions + - REGTESTS: ssl: Remove "sleep" calls from ocsp auto update test + - BUG/MEDIUM: peers: Automatically start to learn on local peer + - BUG/MEDIUM: peers: Reprocess peer state after all session shutdowns + - MINOR: peers: Remove unused PEERS_F_RESYNC_REQUESTED flag + - MINOR: peers: Don't set TEACH flags on a peer from the sync task + - MINOR: peers: Use a peer flag to block the applet waiting ack of the sync task + - BUG/MEDIUM: peers: Wait for sync task ack when a resynchro is finished + - MINOR: peers: Remove unused PEERS_F_RESYNC_PROCESS flag + - MINOR: applet: Add a function to know the side where an applet was created + - MEDIUM: peers: Simplify the peer flags dealing with the connection state + - MEDIUM: peers: Use true states for the peer applets as seen from outside + - MEDIUM: peers: Use true states for the learn state of a peer + - MINOR: peers: Start learning for local peer before receiving messages + - MINOR: peers: Rename PEERS_F_TEACH_COMPLETE to PEERS_F_LOCAL_TEACH_COMPLETE + - MINOR: peers: Reorder and slightly rename PEER flags + - MINOR: peers: Reorder and rename PEERS flags + - REORG: peers: Move peer and peers flags in the corresponding header file + - DEV: flags/peers: Decode PEER and PEERS flags + - MINOR: peers: Add comment on processing functions of the sync task + - MINOR: peers: Use a static variable to wait a resync on reload + - BUG/MEDIUM: peers: Use atomic operations on peers flags when necessary + - REORG: peers: Rename all occurrences to 'ps' variable + - BUG/MINOR: peers: Don't wait for a remote resync if there no remote peer + - MINOR: stats: update ambiguous "metrics" naming to "stat_cols" + - MINOR: stats: introduce a more expressive stat definition method + - MINOR: stats: implement automatic metric generation from stat_col + - MINOR: stats: hide some columns in output + - MEDIUM: stats: convert counters to new column definition + - MINOR: stats: define stats-file output format support + - MEDIUM: stats: implement dump stats-file CLI + - MINOR: ist: define iststrip() new function + - MINOR: guid: define guid_is_valid_fmt() + - MINOR: stats: apply stats-file on process startup + - MINOR: stats: parse header lines from stats-file + - MINOR: stats: parse values from stats-file + - MEDIUM: stats: define stats-file keyword + - BUG/MINOR: mworker: reintroduce way to disable seamless reload with -x /dev/null + - CLEANUP: log: remove unused checks for encode_{chunk,string} + - MINOR: log: store lf_expr nodes inside substruct + - MINOR: log: global lf_expr node options + - CLEANUP: log: simplify complex values usages in sess_build_logline() + - MINOR: log: skip custom logformat_node name if empty + - MINOR: log: add lf_int() wrapper to print integers + - MINOR: log: add lf_rawtext{_len}() functions + - MEDIUM: log: pass date strings to lf_rawtext() + - MEDIUM: log: write raw strings using lf_rawtext() + - MEDIUM: log: use lf_rawtext for lf_ip() and lf_port() hex strings + - MINOR: log: explicitly handle %ts and %tsc as text strings + - MINOR: log: use LOG_VARTEXT_{START,END} to enclose text strings + - MINOR: log: make all lf_* sess build helper static + - MINOR: log: merge lf_encode_string() and lf_encode_chunk() logic + - MEDIUM: log: lf_* build helpers now take a ctx argument + - MINOR: log: expose node typecast in lf_buildctx struct + - MINOR: log: postpone conversion for sample expressions in sess_build_logline() + - MINOR: log: add LOG_OPT_NONE flag + - MINOR: log: add no_escape_map to bypass escape with _lf_encode_bytes() + - MINOR: log: add +bin logformat node option + - MINOR: log: add +json encoding option + - MINOR: tools: add cbor encode helpers + - MINOR: log: add +cbor encoding option + - MINOR: log: support true cbor binary encoding + - CLEANUP: dynbuf: move the reserve and limit parsers to dynbuf.c + - MINOR: list: add a macro to detect that a list contains at most one element + - MINOR: cli/wait: rename the condition "srv-unused" to "srv-removable" + +2024/04/19 : 3.0-dev8 + - BUG/MINOR: cli: Don't warn about a too big command for incomplete commands + - BUG/MINOR: listener: always assign distinct IDs to shards + - BUG/MINOR: log: fix lf_text_len() truncate inconsistency + - BUG/MINOR: tools/log: invalid encode_{chunk,string} usage + - BUG/MINOR: log: invalid snprintf() usage in sess_build_logline() + - CLEANUP: log: lf_text_len() returns a pointer not an integer + - MINOR: quic: simplify qc_send_hdshk_pkts() return + - MINOR: quic: uniformize sending methods for handshake + - MINOR: quic: improve sending API on retransmit + - MINOR: quic: use qc_send_hdshk_pkts() in handshake IO cb + - MEDIUM: quic: remove duplicate hdshk/app send functions + - OPTIM: quic: do not call qc_send() if nothing to emit + - OPTIM: quic: do not call qc_prep_pkts() if everything sent + - BUG/MEDIUM: http-ana: Deliver 502 on keep-alive for fressh server connection + - BUG/MINOR: http-ana: Fix TX_L7_RETRY and TX_D_L7_RETRY values + - BUILD: makefile: warn about unknown USE_* variables + - BUILD: makefile: support USE_xxx=0 as well + - BUG/MINOR: guid: fix crash on invalid guid name + - BUILD: atomic: fix peers build regression on gcc < 4.7 after recent changes + - BUG/MINOR: debug: make sure DEBUG_STRICT=0 does work as documented + - BUILD: cache: fix non-inline vs inline declaration mismatch to silence a warning + - BUILD: debug: make DEBUG_STRICT=1 the default + - BUILD: pools: make DEBUG_MEMORY_POOLS=1 the default option + - CI: update the build options to get rid of unneeded DEBUG options + - BUILD: makefile: get rid of the config CFLAGS variable + - BUILD: makefile: allow to use CFLAGS to append build options + - BUILD: makefile: drop the SMALL_OPTS settings + - BUILD: makefile: move -O2 from CPU_CFLAGS to OPT_CFLAGS + - BUILD: makefile: get rid of the CPU variable + - BUILD: makefile: drop the ARCH variable and better document ARCH_FLAGS + - BUILD: makefile: extract ARCH_FLAGS out of LDFLAGS + - BUILD: makefile: move the fwrapv option to STD_CFLAGS + - BUILD: makefile: make the ERR variable also support 0 + - BUILD: makefile: add FAILFAST to select the -Wfatal-errors behavior + - BUILD: makefile: extract -Werror/-Wfatal-errors from automatic CFLAGS + - BUILD: makefile: split WARN_CFLAGS from SPEC_CFLAGS + - BUILD: makefile: rename SPEC_CFLAGS to NOWARN_CFLAGS + - BUILD: makefile: do not pass warnings to VERBOSE_CFLAGS + - BUILD: makefile: also drop DEBUG_CFLAGS + - CLEANUP: makefile: make the output of the "opts" target more readable + - DOC: install: clarify the build process by splitting it into subsections + - BUG/MINOR: server: fix slowstart behavior + - BUG/MEDIUM: cache/stats: Handle inbuf allocation failure in the I/O handler + - MINOR: ssl: add the section parser for 'crt-store' + - DOC: configuration: Add 3.12 Certificate Storage + - REGTESTS: ssl: test simple case of crt-store + - MINOR: ssl: rename ckchs_load_cert_file to new_ckch_store_load_files_path + - MINOR: ssl/crtlist: alloc ssl_conf only when a valid keyword is found + - BUG/MEDIUM: stick-tables: fix the task's next expiration date + - CLEANUP: stick-tables: always respect the to_batch limit when trashing + - BUG/MEDIUM: peers/trace: fix crash when listing event types + - BUG/MAJOR: stick-tables: fix race with peers in entry expiration + - DEBUG: pool: improve decoding of corrupted pools + - REORG: pool: move the area dump with symbol resolution to tools.c + - DEBUG: pools: report the data around the offending area in case of mismatch + - MINOR: listener/protocol: add proto name in alerts + - MINOR: proto_quic: add proto name in alert + - BUG/MINOR: lru: fix the standalone test case for invalid revision + - DOC: management: fix typos + - CI: revert kernel addr randomization introduced in 3a0fc864 + - MINOR: ring: clarify the usage of ring_size() and add ring_allocated_size() + - BUG/MAJOR: ring: use the correct size to reallocate startup_logs + - MINOR: ring: always check that the old ring fits in the new one in ring_dup() + - CLEANUP: ssl: remove dead code in cfg_parse_crtstore() + - MINOR: ssl: supports crt-base in crt-store + - MINOR: ssl: 'key-base' allows to load a 'key' from a specific path + - MINOR: net_helper: Add support for floats/doubles. + - BUG/MEDIUM: grpc: Fix several unaligned 32/64 bits accesses + - MINOR: peers: Split resync process function to separate running/stopping states + - MINOR: peers: Add 2 peer flags about the peer learn status + - MINOR: peers: Add flags to report the peer state to the resync task + - MINOR: peers: sligthly adapt part processing the stopping signal + - MINOR: peers: Add functions to commit peer changes from the resync task + - BUG/MINOR: peers: Report a resync was explicitly requested from a thread-safe manner + - BUG/MAJOR: peers: Update peers section state from a thread-safe manner + - MEDIUM: peers: Only lock one peer at a time in the sync process function + - MINOR: peer: Restore previous peer flags value to ease debugging + - BUG/MEDIUM: stconn: Don't forward channel data if input data must be filtered + - BUILD: cache: fix a build warning with gcc < 7 + - BUILD: xxhash: silence a build warning on Solaris + gcc-5.5 + - CI: reduce ASAN log redirection umbrella size + - CLEANUP: assorted typo fixes in the code and comments + - BUG/MEDIUM: evports: do not clear returned events list on signal + - MEDIUM: evports: permit to report multiple events at once + - MEDIUM: ssl: support aliases in crt-store + - BUG/MINOR: ssl: check on forbidden character on wrong value + - BUG/MINOR: ssl: fix crt-store load parsing + - BUG/MEDIUM: applet: Fix applet API to put input data in a buffer + - BUG/MEDIUM: spoe: Always retry when an applet fails to send a frame + - BUG/MEDIUM: peers: Fix exit condition when max-updates-at-once is reached + - BUILD: linuxcap: Properly declare prepare_caps_from_permitted_set() + - BUG/MEDIUM: peers: fix localpeer regression with 'bind+server' config style + - MINOR: peers: stop relying on srv->addr to find peer port + - MEDIUM: ssl: support a named crt-store section + - MINOR: stats: remove implicit static trash_chunk usage + - REORG: stats: extract HTML related functions + - REORG: stats: extract JSON related functions + - MEDIUM: ssl: crt-base and key-base local keywords for crt-store + - MINOR: stats: Get the right prototype for stats_dump_html_end(). + - MAJOR: ssl: use the msg callback mecanism for backend connections + - MINOR: ssl: implement keylog fetches for backend connections + - BUG/MINOR: stconn: Fix sc_mux_strm() return value + - MINOR: mux-pt: Test conn flags instead of sedesc ones to perform a full close + - MINOR: stconn/connection: Move shut modes at the SE descriptor level + - MINOR: stconn: Rewrite shutdown functions to simplify the switch statements + - MEDIUM: stconn: Use only one SC function to shut connection endpoints + - MEDIUM: stconn: Explicitly pass shut modes to shut applet endpoints + - MEDIUM: stconn: Use one function to shut connection and applet endpoints + - MEDIUM: muxes: Use one callback function to shut a mux stream + - BUG/MINOR: sock: handle a weird condition with connect() + - BUG/MINOR: fd: my_closefrom() on Linux could skip contiguous series of sockets + - BUG/MEDIUM: peers: Don't set PEERS_F_RESYNC_PROCESS flag on a peer + - BUG/MEDIUM: peers: Fix state transitions of a peer + - MINOR: init: use RLIMIT_DATA instead of RLIMIT_AS + - CI: modernize macos matrix + +2024/04/06 : 3.0-dev7 + - BUG/MINOR: ssl: Wrong ocsp-update "incompatibility" error message + - BUG/MINOR: ssl: Detect more 'ocsp-update' incompatibilities + - MEDIUM: ssl: Add 'tune.ssl.ocsp-update.mode' global option + - REGTESTS: ssl: Add OCSP update compatibility tests + - REGTESTS: ssl: Add functional test for global ocsp-update option + - BUG/MINOR: server: reject enabled for dynamic server + - BUG/MINOR: server: fix persistence cookie for dynamic servers + - MINOR: server: allow cookie for dynamic servers + - REGTESTS: Fix script about OCSP update compatibility tests + - BUG/MINOR: cli: Report an error to user if command or payload is too big + - MINOR: sc_strm: Add generic version to perform sync receives and sends + - MEDIUM: stream: Use generic version to perform sync receives and sends + - MEDIUM: buf: Add b_getline() and b_getdelim() functions + - MEDIUM: applet: Handle applets with their own buffers in put functions + - MEDIUM: cli/applet: Stop to test opposite SC in I/O handler of CLI commands + - MINOR: applet: Always use applet API to set appctx flags + - BUG/MEDIUM: applet: State appctx have more data if its EOI/EOS/ERROR flag is set + - MAJOR: cli: Update the CLI applet to handle its own buffers + - MINOR: applet: Let's applets .snd_buf function deal with full input buffers + - MINOR: stconn: Add a connection flag to notify sending data are the last ones + - MAJOR: cli: Use a custom .snd_buf function to only copy the current command + - DOC: config: balance 'first' not usable in LOG mode + - BUG/MINOR: log/balance: detect if user tries to use unsupported algo + - MINOR: lbprm: implement true "sticky" balance algo + - MEDIUM: log/balance: leverage lbprm api for log load-balancing + - BUG/BUILD: debug: fix unused variable error + - MEDIUM: lb-chash: Deterministic node hashes based on server address + - BUG/MEDIUM: stick-tables: fix a small remaining race in expiration task + - REGTESTS: Do not use REQUIRE_VERSION for HAProxy 2.5+ (4) + - REGTESTS: Remove REQUIRE_VERSION=1.9 from all tests (2) + - CLEANUP: Reapply ist.cocci (3) + - CLEANUP: Reapply strcmp.cocci (2) + - CLEANUP: Reapply xalloc_cast.cocci + - CLEANUP: Reapply ha_free.cocci + - CI: vtest: show coredumps if any + - REGTESTS: ssl: disable ssl/ocsp_auto_update.vtc + - BUG/MINOR: backend: properly handle redispatch 0 + - MINOR: quic: HyStart++ implementation (RFC 9406) + - BUG/MEDIUM: stconn: Don't forward shutdown to SE if iobuf is not empty + - BUG/MEDIUM: stick-table: use the update lock when reading tables from peers + - BUG/MAJOR: applet: fix a MIN vs MAX usage in appctx_raw_rcv_buf() + - OPTIM: peers: avoid the locking dance around peer_send_teach_process_msgs() + - BUILD: quic: 32 bits compilation issue (QUIC_MIN() usage) + - BUG/MEDIUM: server/lbprm: fix crash in _srv_set_inetaddr_port() + - MEDIUM: mworker: get rid of libsystemd + - BUILD: systemd: fix build error on non-systemd systems with USE_SYSTEMD=1 + - BUG/MINOR: bwlim/config: fix missing '\n' after error messages + - MINOR: stick-tables: mark the seen stksess with a flag "seen" + - OPTIM: stick-tables: check the stksess without taking the read lock + - MAJOR: stktable: split the keys across multiple shards to reduce contention + - CI: extend Fedora Rawhide, add m32 mode + - BUG/MINOR: stick-tables: Missing stick-table key nullity check + - BUILD: systemd: enable USE_SYSTEMD by default with TARGET=linux-glibc + - MINOR: systemd: Include MONOTONIC_USEC field in RELOADING=1 message + - BUG/MINOR: proxy: fix logformat expression leak in use_backend rules + - MEDIUM: log: rename logformat var to logformat tag + - MINOR: log: expose logformat_tag struct + - MEDIUM: log: carry tag context in logformat node + - MEDIUM: tree-wide: add logformat expressions wrapper + - MINOR: proxy: add PR_FL_CHECKED flag + - MAJOR: log: implement proper postparsing for logformat expressions + - MEDIUM: log: add compiling logic to logformat expressions + - MEDIUM: proxy/log: leverage lf_expr API for logformat preparsing + - MINOR: guid: introduce global UID module + - MINOR: guid: restrict guid format + - MINOR: proxy: implement GUID support + - MINOR: server: implement GUID support + - MINOR: listener: implement GUID support + - DOC: configuration: grammar fixes for strict-sni + - BUG/MINOR: init: relax LSTCHK_NETADM checks for non root + - MEDIUM: capabilities: check process capabilities sets + - CLEANUP: global: remove LSTCHK_CAP_BIND + - BUG/MEDIUM: quic: don't blindly rely on unaligned accesses + +2024/03/26 : 3.0-dev6 - MINOR: mux-h2: always use h2c_report_glitch() - MEDIUM: mux-h2: allow to set the glitches threshold to kill a connection - - MINOR: connection: add a new mux_ctl to report number of connection glitches - - MINOR: mux-h2: implement MUX_CTL_GET_GLITCHES - - MINOR: connection: add sample fetches to report per-connection glitches - - BUG/MINOR: quic: reject unknown frame type - - BUG/MINOR: quic: reject HANDSHAKE_DONE as server - - BUG/MINOR: qpack: reject invalid increment count decoding - - BUG/MINOR: qpack: reject invalid dynamic table capacity - - DOC: quic: Missing tuning setting in "Global parameters" - - BUG/MEDIUM: applet: Immediately free appctx on early error - - BUG/MEDIUM: hlua: Be able to garbage collect uninitialized lua sockets - - BUG/MEDIUM: hlua: Don't loop if a lua socket does not consume received data - - BUG/MEDIUM: quic: fix transient send error with listener socket - - DOC: quic: fix recommandation for bind on multiple address - - MINOR: quic: warn on bind on multiple addresses if no IP_PKTINFO support - - BUG/MINOR: ist: allocate nul byte on istdup - - BUG/MINOR: stats: drop srv refcount on early release - - BUG/MAJOR: server: fix stream crash due to deleted server - - MINOR: cli: Remove useless loop on commands to find unescaped semi-colon - - BUG/MEDIUM: cli: Warn if pipelined commands are delimited by a \n - - BUG/MINOR: quic: fix output of show quic - - BUG/MINOR: ist: only store NUL byte on succeeded alloc + - MINOR: quic: simplify rescheduling for handshake + - MINOR: quic: remove qc_treat_rx_crypto_frms() + - DOC: configuration: clarify ciphersuites usage (V2) + - MINOR: tools: use public interface for FreeBSD get_exec_path() + - BUG/MINOR: ssl: fix possible ctx memory leak in sample_conv_aes_gcm() + - BUG/MINOR: ssl: do not set the aead_tag flags in sample_conv_aes_gcm() + - BUG/MINOR: server: fix first server template not being indexed + - MEDIUM: ssl: initialize the SSL stack explicitely + - MEDIUM: ssl: allow to change the OpenSSL security level from global section + - CLEANUP: ssl: remove useless #ifdef in openssl-compat.h + - CI: github: add -DDEBUG_LIST to the default builds + - BUG/MINOR: hlua: segfault when loading the same filter from different contexts + - BUG/MINOR: hlua: missing lock in hlua_filter_new() + - BUG/MINOR: hlua: fix missing lock in hlua_filter_delete() + - DEBUG: lua: precisely identify if stream is stuck inside lua or not + - MINOR: hlua: use accessors for stream hlua ctx + - BUG/MEDIUM: hlua: streams don't support mixing lua-load with lua-load-per-thread (2nd try) + - MINOR: debug: enable insecure fork on the command line + - CI: github: add -dI to haproxy arguments + - BUG/MINOR: listener: Wake proxy's mngmt task up if necessary on session release + - BUG/MINOR: listener: Don't schedule frontend without task in listener_release() + - MINOR: session: rename private conns elements + - BUG/MAJOR: server: do not delete srv referenced by session + - BUG/MEDIUM: spoe: Don't rely on stream's expiration to detect processing timeout + - BUG/MINOR: spoe: Be sure to be able to quickly close IDLE applets on soft-stop + - MAJOR: spoe: Deprecate the SPOE filter + - MINOR: cfgparse: Add a global option to expose deprecated directives + - MINOR: spoe: Add SPOE filters in the exposed deprecated directives + - CLEANUP: assorted typo fixes in the code and comments + - CI: temporarily adjust kernel entropy to work with ASAN/clang + - BUG/MEDIUM: spoe: Return an invalid frame on recv if size is too small + - BUG/MINOR: session: ensure conn owner is set after insert into session + - BUG/MEDIUM: http_ana: ignore NTLM for reuse aggressive/always and no H1 + - BUG/MAJOR: connection: fix server used_conns with H2 + reuse safe + - BUG/MAJOR: ocsp: Separate refcount per instance and per store + - REGTESTS: ssl: Add OCSP related tests + - BUG/MEDIUM: ssl: Fix crash when calling "update ssl ocsp-response" when an update is ongoing + - BUG/MEDIUM: ssl: Fix crash in ocsp-update log function + - MEDIUM: ssl: Change output of ocsp-update log + - MINOR: ssl: Change level of ocsp-update logs + - CLEANUP: ssl: Remove undocumented ocsp fetches + - REGTESTS: ssl: Add checks on ocsp-update log format + - MINOR: connection: implement conn_release() + - MINOR: connection: extend takeover with release option + - MEDIUM: server: close idle conn on server deletion + - MEDIUM: mux: prepare for takeover on private connections + - MEDIUM: server: close private idle connection before server deletion + - BUG/MINOR: mux-quic: close all QCS before freeing QCC tasklet + - BUG/MEDIUM: mux-fcgi: Properly handle EOM flag on end-of-trailers HTX block + - BUILD: server: fix build regression on old compilers (<= gcc-4.4) + - OPTIM: http_ext: avoid useless copy in http_7239_extract_{ipv4,ipv6} + - MINOR: debug: add "debug dev trace" to flood with traces + - MINOR: atomic: add a read-specific variant of __ha_cpu_relax() + - MINOR: applet: add new function applet_append_line() + - MINOR: log/applet: add new function syslog_applet_append_event() + - MEDIUM: ring/sink: use applet_append_line()/syslog_applet_append_event() for readers + - REORG: dns/ring: split the ring between the generic one and the DNS one + - MEDIUM: ring: move the ring reader code to ring_dispatch_messages() + - MEDIUM: sink: move the generic ring forwarder code use ring_dispatch_messages() + - MEDIUM: log/sink: make the log forwarder code use ring_dispatch_messages() + - MINOR: buf: add b_add_ofs() to add a count to an absolute position + - MINOR: buf: add b_rel_ofs() to turn an absolute offset into a relative one + - MINOR: buf: add b_putblk_ofs() to copy a block at a specific position + - MINOR: buf: add b_getblk_ofs() that works relative to area and not head + - MINOR: ring: make the ring reader use only absolute offsets + - MINOR: ring: reserve one special value for the readers count + - MINOR: vecpair: add new vector pair based data manipulation mechanisms + - MINOR: vecpair: add necessary functions to use vecpairss from/to ring APIs + - MINOR: ring: rename totlen vs msglen in ring_write() + - MINOR: ring: add ring_data() to report the amount of data in a ring + - MINOR: ring: add ring_size() to return the ring's size + - MINOR: ring: add ring_dup() to copy a ring into another one + - MINOR: ring: also add ring_area(), ring_head(), ring_tail() + - MINOR: ring: make callers use ring_data() and ring_size(), not ring->buf + - MINOR: errors: use ring_dup() to duplicate the startup_logs + - MINOR: ring: use ring_size(), ring_area(), ring_head() and ring_tail() + - MINOR: ring: add a flag to indicate a mapped file + - MAJOR: ring: insert an intermediary ring_storage level + - MINOR: ring: resize only under thread isolation + - MINOR: ring: allow to reduce a ring size + - MEDIUM: ring: replace the buffer API in ring_write() with the vec<->ring API + - MEDIUM: ring: change the ring reader to use the new vector-based API now + - MEDIUM: ring: remove the struct buffer from the ring + - MEDIUM: ring: align the head and tail fields in the ring_storage structure + - MINOR: ring: make the reader check the readers count before inc/dec + - MEDIUM: ring: lock the tail's readers counters before proceeding with the changes + - MEDIUM: ring: protect the reader's positions against writers + - MEDIUM: ring: use the topmost bit of the tail as a lock + - MEDIUM: move the ring's lock to only protect the readers list + - MEDIUM: ring: unlock the ring's tail earlier + - MINOR: ring: don't take the readers lock if there are no readers + - MEDIUM: ring/applet: turn the wait_entry list to an mt_list instead + - MEDIUM: ring: protect the initialization of the initial reader offset + - MINOR: ring: make sure ring_dispatch waits when facing a changing message + - MAJOR: ring: drop the now unneeded lock + - OPTIM: ring: don't even try to update offset when failed to read + - OPTIM: ring: have only one thread at a time wake up all readers + - MINOR: ring: keep a few frequently used pointers in the local stack + - MINOR: ring: add the definition of a ring waiting cell + - MINOR: ring: make the number of queues configurable + - MAJOR: ring: implement a waiting queue in front of the ring + - MEDIUM: ring: significant boost in the loop by checking the ring queue ptr first + - MEDIUM: ring: improve speed in the queue waiting loop on x86_64 + - MINOR: ring: simplify the write loop a little bit + - CLEANUP: ring: further simplify the write loop + - MINOR: ring: it's not x86 but all non-ARMv8.1 which needs the read before OR + - MINOR: ring: avoid writes to cells during copy + - OPTIM: ring: use relaxed stores to release the threads + - CLEANUP: ring: use only curr_cell and not next_cell in the main write loop + - BUILD: ssl: fix build error on older compilers with openssl-3.2 + - BUG/MINOR: server: 'source' interface ignored from 'default-server' directive + - BUG/MAJOR: ring: free the ring storage not the ring itself when using maps + +2024/03/09 : 3.0-dev5 + - BUG/MEDIUM: applet: Fix HTX .rcv_buf callback function to release outbuf buffer + - BUG/MAJOR: ssl/ocsp: crash with ocsp when old process exit or using ocsp CLI + - BUG/MEDIUM: server: fix dynamic servers initial settings - BUG/MINOR: ssl/cli: duplicate cleaning code in cli_parse_del_crtlist - LICENSE: event_hdl: fix GPL license version - LICENSE: http_ext: fix GPL license version + - BUG/MEDIUM: mux-h1: Fix again 0-copy forwarding of chunks with an unknown size - BUG/MINOR: mux-h1: Properly report when mux is blocked during a nego + - MINOR: mux-h1: Move checks performed before a shutdown in a dedicated function + - MINOR: mux-h1: Move all stuff to detach a stream in an internal function + - MAJOR: mux-h1: Drain requests on client side before shut a stream down + - MEDIUM: htx/http-ana: No longer close connection on early HAProxy response + - MINOR: quic: filter show quic by address + - MINOR: quic: specify show quic output fields + - MINOR: quic: add MUX output for show quic + - CLEANUP: mux-h2: Fix h2s_make_data() comment about the return value - DOC: configuration: clarify ciphersuites usage - BUG/MINOR: config/quic: Alert about PROXY protocol use on a QUIC listener - BUG/MINOR: hlua: Fix log level to the right value when set via TXN:set_loglevel @@ -46,195 +666,464 @@ ChangeLog : - BUG/MEDIUM: hlua: improper lock usage with SET_SAFE_LJMP() - BUG/MAJOR: hlua: improper lock usage with hlua_ctx_resume() - BUG/MINOR: hlua: don't call ha_alert() in hlua_event_subscribe() + - MINOR: hlua: use SEND_ERR to report errors in hlua_event_runner() + - CLEANUP: hlua: txn class functions may LJMP - BUG/MINOR: sink: fix a race condition in the TCP log forwarding code + - BUILD: thread: move lock label definitions to thread-t.h + - BUILD: tree-wide: fix a few missing includes in a few files + - BUILD: buf: make b_ncat() take a const for the source + - CLEANUP: assorted typo fixes in the code and comments + - CLEANUP: fix typo in naming for variable "unused" + - CI: run more smoke tests on config syntax to check memory related issues + - CI: enable monthly build only test on netbsd-9.3 - CI: skip scheduled builds on forks - BUG/MINOR: ssl/cli: typo in new ssl crl-file CLI description + - BUG/MEDIUM: quic: fix connection freeze on post handshake + - BUG/MINOR: mux-quic: fix crash on aborting uni remote stream + - CLEANUP: log: fix obsolete comment for add_sample_to_logformat_list() + - CLEANUP: tree-wide: use proper ERR_* return values for PRE_CHECK fcts - BUG/MINOR: cfgparse: report proper location for log-format-sd errors + - MINOR: vars: export var_set and var_unset functions + - MINOR: Add aes_gcm_enc converter - BUG/MEDIUM: quic: fix handshake freeze under high traffic - MINOR: quic: always use ncbuf for rx CRYPTO + - BUILD: ssl: define EVP_CTRL_AEAD_GET_TAG for older versions + - DOC: design: write first notes about ring-v2 + - OPTIM: sink: try to merge "dropped" messages faster + - OPTIM: sink: drop the sink lock used to count drops + - DEV: haring: make haring not depend on the struct ring itself + - DEV: haring: split the code between ring and buffer + - DEV: haring: automatically use the advertised ring header size - BUILD: solaris: fix compilation errors - - DOC: configuration: clarify ciphersuites usage (V2) - - BUG/MINOR: ssl: fix possible ctx memory leak in sample_conv_aes_gcm() - - CI: github: add -DDEBUG_LIST to the default builds - - BUG/MINOR: hlua: segfault when loading the same filter from different contexts - - BUG/MINOR: hlua: missing lock in hlua_filter_new() - - BUG/MINOR: hlua: fix missing lock in hlua_filter_delete() - - DEBUG: lua: precisely identify if stream is stuck inside lua or not - - MINOR: hlua: use accessors for stream hlua ctx - - BUG/MEDIUM: hlua: streams don't support mixing lua-load with lua-load-per-thread (2nd try) - - BUG/MINOR: listener: Wake proxy's mngmt task up if necessary on session release - - BUG/MINOR: listener: Don't schedule frontend without task in listener_release() - - BUG/MEDIUM: spoe: Don't rely on stream's expiration to detect processing timeout - - BUG/MINOR: spoe: Be sure to be able to quickly close IDLE applets on soft-stop - - CI: temporarily adjust kernel entropy to work with ASAN/clang - - BUG/MEDIUM: spoe: Return an invalid frame on recv if size is too small - - BUG/MINOR: session: ensure conn owner is set after insert into session - - BUG/MEDIUM: ssl: Fix crash in ocsp-update log function - - BUG/MINOR: mux-quic: close all QCS before freeing QCC tasklet - - BUG/MEDIUM: mux-fcgi: Properly handle EOM flag on end-of-trailers HTX block - - OPTIM: http_ext: avoid useless copy in http_7239_extract_{ipv4,ipv6} - - BUG/MINOR: server: 'source' interface ignored from 'default-server' directive - - BUILD: ssl: fix build error on older compilers with openssl-3.2 - - BUG/MINOR: ssl: Wrong ocsp-update "incompatibility" error message - - BUG/MINOR: ssl: Detect more 'ocsp-update' incompatibilities - - BUG/MINOR: server: fix persistence cookie for dynamic servers - - MINOR: server: allow cookie for dynamic servers - - BUG/MINOR: server: ignore 'enabled' for dynamic servers - - DOC: config: balance 'first' not usable in LOG mode - - BUG/MINOR: log/balance: detect if user tries to use unsupported algo - - BUG/MEDIUM: stick-tables: fix a small remaining race in expiration task - - BUG/MINOR: backend: properly handle redispatch 0 - - BUG/MEDIUM: stconn: Don't forward shutdown to SE if iobuf is not empty - - BUG/MEDIUM: stick-table: use the update lock when reading tables from peers - - BUG/MINOR: proxy: fix logformat expression leak in use_backend rules - - BUG/MINOR: init: relax LSTCHK_NETADM checks for non root -2024/02/26 : 2.9.6 +2024/02/23 : 3.0-dev4 + - BUG/MEDIUM: ssl: Fix crash when calling "update ssl ocsp-response" when an update is ongoing + - BUG/MEDIUM: quic: Wrong K CUBIC calculation. + - MINOR: quic: Update K CUBIC calculation (RFC 9438) + - MINOR: quic: Dynamic packet reordering threshold + - MINOR: quic: Add a counter for reordered packets + - BUG/MAJOR: mux-h1: Fix zero-copy forwarding when sending chunks of unknown size + - MINOR: stats: Use a dedicated function to check if output is almost full + - BUG/MEDIUM: applet: Add a flag to state an applet is using zero-copy forwarding + - BUG/MEDIUM: stconn/applet: Block 0-copy forwarding if producer needs more room + - MINOR: applet: Remove uselelss test on SE_FL_SHR/SHW flags + - MEDIUM: applet: Add notion of shutdown for write for applets + - MINOR: cli: No longer check SC for shutdown to interrupt wait command + - BUG/MEDIUM: stconn: Allow expiration update when READ/WRITE event is pending + - BUG/MEDIUM: stconn: Don't check pending shutdown to wake an applet up + - CLEANUP: stconn: Move SE flags set by app layer at the end of the bitfield + - MINOR: stconn: Rename SE_FL_MAY_FASTFWD and reorder bitfield + - MINOR: stconn: Add SE flag to announce zero-copy forwarding on consumer side + - MINOR: muxes: Announce support for zero-copy forwarding on consumer side + - BUG/MAJOR: stconn: Check support for zero-copy forwarding on both sides + - MINOR: muxes/applet: Simplify checks on options to disable zero-copy forwarding + - BUG/MINOR: quic: reject unknown frame type + - MINOR: quic: handle all frame types on reception + - BUG/MINOR: quic: reject HANDSHAKE_DONE as server + - BUG/MINOR: qpack: reject invalid increment count decoding + - BUG/MINOR: qpack: reject invalid dynamic table capacity + - DOC/MINOR: userlists: mention solutions to high cpu with hashes + - DOC: quic: Missing tuning setting in "Global parameters" + - BUG/MEDIUM: applet: Immediately free appctx on early error + - BUG/MEDIUM: hlua: Be able to garbage collect uninitialized lua sockets + - BUG/MEDIUM: hlua: Don't loop if a lua socket does not consume received data + - BUG/MEDIUM: quic: fix transient send error with listener socket + - MINOR: log: custom name for logformat node + - MINOR: sample: add type_to_smp() helper function + - MINOR: log: explicit typecasting for logformat nodes + - MINOR: log: simplify last_isspace in sess_build_logline() + - MINOR: log: simplify quotes handling in sess_build_logline() + - MINOR: log: print metadata prefixes separately in sess_build_logline() + - MINOR: log: automate string array construction in sess_build_logline() + - DOC: quic: fix recommandation for bind on multiple address + - MINOR: quic: warn on bind on multiple addresses if no IP_PKTINFO support + - OPTIM: quic: improve slightly qc_snd_buf() internal + - MINOR: quic: move IP_PKTINFO on send on a dedicated function + - MINOR: quic: remove sendto() usage variant + - MINOR: quic: only use sendmsg() syscall variant + - BUILD: applet: fix build on some 32-bit archs + - BUG/MINOR: quic: initialize msg_flags before sendmsg + - BUG/MEDIUM: mux-h1: Don't emit 0-CRLF chunk in h1_done_ff() when iobuf is empty + - CLEANUP: proxy/log: remove unused proxy flag + - CLEANUP: log: fix process_send_log() indentation + - CLEANUP: log: use free_logformat_list() in parse_logformat_string() + - MINOR: log: add free_logformat_node() helper function + - BUG/MINOR: log: fix potential lf->name memory leak + - BUG/MINOR: ist: allocate nul byte on istdup + - BUG/MINOR: stats: drop srv refcount on early release - BUG/MAJOR: promex: fix crash on deleted server - - BUG/MAJOR: ssl/ocsp: crash with ocsp when old process exit or using ocsp CLI + - BUG/MAJOR: server: fix stream crash due to deleted server + - BUG/MEDIUM: mux-quic: do not crash on qcs_destroy for connection error + - MINOR: cli: Remove useless loop on commands to find unescaped semi-colon + - BUG/MEDIUM: cli: Warn if pipelined commands are delimited by a \n + - BUG/MAJOR: cli: Restore non-interactive mode behavior with pipelined commands + - BUG/MINOR: quic: fix output of show quic + - MINOR: ssl: Call callback function after loading SSL CRL data + - BUG/MINOR: ist: only store NUL byte on succeeded alloc -2024/02/15 : 2.9.5 +2024/02/10 : 3.0-dev3 + - DOC: configuration: clarify http-request wait-for-body + - BUG/MAJOR: ssl_sock: Always clear retry flags in read/write functions + - MINOR: h3: add traces for stream sending function + - BUG/MEDIUM: h3: do not crash on invalid response status code + - BUG/MEDIUM: qpack: allow 6xx..9xx status codes + - BUG/MEDIUM: quic: fix crash on invalid qc_stream_buf_free() BUG_ON + - CLEANUP: log: deinitialization of the log buffer in one function + - BUG/MINOR: h1: Don't support LF only at the end of chunks + - BUG/MEDIUM: h1: Don't support LF only to mark the end of a chunk size + - MINOR: ssl: add HAVE_SSL_0RTT constant + - MINOR: ssl: rename HA_OPENSSL_HAVE_0RTT_SUPPORT constant to HAVE_SSL_0RTT_QUIC + - MEDIUM: ssl/quic: always compile the ssl_conf.early_data test + - DOC: httpclient: add dedicated httpclient section + - BUG/MINOR: h1-htx: properly initialize the err_pos field + - BUG/MEDIUM: h1: always reject the NUL character in header values + - CLEANUP: h1: remove unused function h1_measure_trailers() + - BUG/MINOR: ssl/quic: fix 0RTT define + - MINOR: mux-quic: prepare for earlier flow control update + - MINOR: mux-quic: define a flow control related type + - MEDIUM: mux-quic: limit stream flow control on snd_buf + - MEDIUM: mux-quic: limit conn flow control on snd_buf + - MINOR: mux-quic: remove unneeded sent-offset fields + - MINOR: mux-quic: check fctl during STREAM frame build + - MAJOR: mux-quic: remove intermediary Tx buffer + - MEDIUM: mux-quic: simplify sending API + - MEDIUM: mux-quic: release Tx buf on too small room + - MEDIUM: mux-quic: properly handle conn Tx buf exhaustion + - MINOR: mux-quic: realign Tx buffer if possible + - CLEANUP: connection: remove obsolete comment in header file + - OPTIM: connection: progressive hash for conn_calculate_hash() + - MINOR: tcp_act: fix alphabetical ordering of tcp request content actions + - MINOR: tcp-act: Rename "set-{mark,tos}" to "set-fc-{mark,tos}" + - MINOR: hlua: Rename set_{tos, mark} to set_fc_{tos, mark} + - MEDIUM: tcp-act: <expr> support for set-fc-{mark,tos} actions + - MEDIUM: tcp-act/backend: support for set-bc-{mark,tos} actions + - MINOR: stats: Be able to access to registered stats modules from anywhere + - MEDIUM: stats: Be able to access a specific field into a stats module + - MINOR: promex: Add a param to override the description when a metric is dumped + - MINOR: promex: Add info in the promex context to dump extra counters + - MEDIUM: promex: Dump frontends extra counters if requested + - MEDIUM: promex: Dump backends extra counters if requested + - MEDIUM: promex: Dump servers extra counters if requested + - MEDIUM: promex: Dump listeners extra counters if requested + - DOC: promex: Add documentation about extra-counters + - MINOR: promex: Always limit the number of labels dumped for each metric + - MEDIUM: promex: Simplify the context using generic pointers for restart points + - MINOR: promex: Remove unsued htx parameter when a metric is dumped + - MEDIUM: promex: Add a registration mechanism to support modules + - MEDIUM: promex: Dump metrics of registered modules with a way to filter them + - MEDIUM: promex/stick-table: Dump stick-table metrics via a promex module + - MEDIUM: promex/resolvers: Dump resolvers metrics via a promex module + - MINOR: promex: Rename dump functions to use the right wording + - MINOR: promex: Always pass the final name and description to promex_dmp_ts() + - MEDIUM: promex: Add support for filters on metric names + - REGTESTS: promex: Adapt script to be less verbose + - MINOR: compiler: add a new DO_NOT_FOLD() macro to prevent code folding + - MINOR: debug: make sure calls to ha_crash_now() are never merged + - MINOR: debug: make ABORT_NOW() store the caller's line number when using abort - BUG/MINOR: diag: always show the version before dumping a diag warning - BUG/MINOR: diag: run the final diags before quitting when using -c + - MINOR: acl: add extra diagnostics about suspicious string patterns - BUG/MINOR: quic: Wrong ack ranges handling when reaching the limit. - BUILD: quic: Variable name typo inside a BUG_ON(). + - DOC: config: fix typo for '%ms' log format alternative + - DOC: config: fix ordering for "txn.*" fetches + - MINOR: stream: add "txn.redispatch" fetch + - BUILD: debug: remove leftover parentheses in ABORT_NOW() + - MINOR: debug: make BUG_ON() catch build errors even without DEBUG_STRICT - BUG/MINOR: ssl: Fix error message after ssl_sock_load_ocsp call + - MINOR: debug: support passing an optional message in ABORT_NOW() + - MINOR: debug: add an optional message argument to the BUG_ON() family + - DEBUG: make the "debug dev {debug|warn|check}" command print a message - CLEANUP: quic: Code clarifications for QUIC CUBIC (RFC 9438) - BUG/MINOR: quic: fix possible integer wrap around in cubic window calculation - MINOR: quic: Stop using 1024th of a second. - - MINOR: compiler: add a new DO_NOT_FOLD() macro to prevent code folding - - MINOR: debug: make sure calls to ha_crash_now() are never merged - - MINOR: debug: make ABORT_NOW() store the caller's line number when using abort - - BUILD: debug: remove leftover parentheses in ABORT_NOW() - - MINOR: debug: make BUG_ON() catch build errors even without DEBUG_STRICT + - CI: github: abandon asan matrix.py helper + - CI: ssl: add yet another OpenSSL download fallback + - DOC: install: clarify WolfSSL chroot requirements + - MINOR: task: Move wait_event in the task header file + - MINOR: stconn: Be able to detect applets using HTX + - MINOR: stconn: Explicitly use an appctx to attach a stconn on it + - MINOR: stconn: Be prepared to handle error when a SC is attached to an applet + - MINOR: applet: Add dedicated IN/OUT buffers for appctx + - MINOR: applet: Add traces to debug receive/send and block/wake events + - MINOR: applet: Add support for callback functions to exchange data with channels + - MINOR: applet: Implement default functions to exchange data with channels + - MEDIUM: stconn: Add functions to handle applets I/O from the SC layer + - MEDIM: applet: Add the applet handler based on IN/OUT buffers + - MINOR: applet: Show IN/OUT buffers in trace messages when used + - MINOR: applet: Add flags on the appctx and stop abusing its state + - MINIOR: applet: Add flags to deal with ends of input, ends of stream and errors + - MINOR: applet: Remove appctx state field to only used the flags + - MINOR: applet: Add an appctx flag to report shutdown to applets + - MEDIUM: applet: Use appctx flags to report EOS/EOI/ERROR to SE + - MINOR: applet: Add callback function to deal with zero-copy forwarding + - MEDIUM: applet: Add support for zero-copy forwarding from an applet + - MINOR: applet: Automatically handle applets having more data for the stream + - MEDIUM: stats: Don't interrupt processing on partial post + - MAJOR: stats: Update HTTP stats applet to handle its own buffers + - MEDIUM: cache: Temporarily remove zero-copy forwarding support + - MAJOR: cache: Update HTTP cache applet to handle its own buffers + - MAJOR: cache: Send cached objects using zero-copy forwarding + - MINOR: stconn: Add support for flags during zero-copy forwarding negotiation + - MINOR: mux-h1: Be able to define the length of a chunk size when it is prepended + - MEDIUM: stconn: Nofify requested size during zero-copy forwarding nego is exact + - MINOR: mux-h1: Stop zero-copy forwarding during nego for too big requested size + - MEDIUM: mux-h1: Support zero-copy forwarding for chunks with an unknown size + - MAJOR: stats: Send stats dump over HTTP using zero-copy forwarding + - MEDIUM: applet: Simplify a bit API to exchange data with applets + - MINOR: cache: Remove unsed .data_sent field from the cache applet context + - MINOR: applet: Use an option to disable zero-copy forwarding for all applets + - MINOR: applet: Identify applets using their own buffers via a flag - BUG/MINOR: ssl: Duplicate ocsp update mode when dup'ing ckch - - BUG/MINOR: ssl: Clear the ckch instance when deleting a crt-list line - MINOR: ssl: Use OCSP_CERTID instead of ckch_store in ckch_store_build_certid + - BUG/MINOR: ssl: Clear the ckch instance when deleting a crt-list line - BUG/MEDIUM: ocsp: Separate refcount per instance and per store - BUG/MINOR: ssl: Destroy ckch instances before the store during deinit - BUG/MINOR: ssl: Reenable ocsp auto-update after an "add ssl crt-list" - - REGTESTS: ssl: Fix empty line in cli command input - REGTESTS: ssl: Add OCSP related tests + - REGTESTS: ssl: Fix empty line in cli command input - DOC: install: recommend pcre2 - DOC: config: fix misplaced "txn.conn_retries" - DOC: config: fix typos for "bytes_{in,out}" - DOC: config: fix misplaced "bytes_{in,out}" + - DOC: config: add more custom log format table alternatives + - MINOR: stream: rename "txn.redispatch" to "txn.redispatched" + - MINOR: sample: implement bc_{be,srv}_queue samples + - BUG/MINOR: mux-h2: count rejected DATA frames against the connection's flow control + - MINOR: mux-h2: count excess of CONTINUATION frames as a glitch + - MINOR: mux-h2: count late reduction of INITIAL_WINDOW_SIZE as a glitch - DOC: internal: update missing data types in peers-v2.0.txt + - MEDIUM: stick-tables: add a new stored type for glitch_cnt and glitch_rate + - MINOR: session: add the necessary functions to update the per-session glitches + - MEDIUM: mux-h2: update session trackers with number of glitches + - BUG/MINOR: server/cli: add missing LF at the end of certain notice/error lines - BUG/MINOR: vars/cli: fix missing LF after "get var" output - BUG/MEDIUM: cli: fix once for all the problem of missing trailing LFs + - MINOR: cli: make sure to always print a pending message after release() + - MINOR: cli: always reset the applet task's timeout + - MINOR: cli: add a new "wait" command to wait for a certain delay + - BUG/MINOR: applet: Always release empty appctx buffers after processing + - MINOR: server: split the server deletion code in two parts + - MINOR: cli/wait: make the wait command support a more detailed help message + - MINOR: cli/wait: also support an unrecoverable failure status + - MINOR: cli/wait: also pass up to 4 arguments to the external conditions + - MINOR: cli/wait: add a condition to wait on a server to become unused - CI: Update to actions/cache@v4 - BUILD: address a few remaining calloc(size, n) cases - BUG/MEDIUM: pool: fix rare risk of deadlock in pool_flush() - - BUG/MEDIUM: ssl: Fix crash when calling "update ssl ocsp-response" when an update is ongoing - - BUG/MEDIUM: quic: Wrong K CUBIC calculation. - - MINOR: quic: Update K CUBIC calculation (RFC 9438) - - MINOR: quic: Dynamic packet reordering threshold - - MINOR: quic: Add a counter for reordered packets - - BUG/MEDIUM: stconn: Allow expiration update when READ/WRITE event is pending - - BUG/MEDIUM: stconn: Don't check pending shutdown to wake an applet up - - CLEANUP: stconn: Move SE flags set by app layer at the end of the bitfield - - MINOR: stconn: Rename SE_FL_MAY_FASTFWD and reorder bitfield - - MINOR: stconn: Add SE flag to announce zero-copy forwarding on consumer side - - MINOR: muxes: Announce support for zero-copy forwarding on consumer side - - BUG/MAJOR: stconn: Check support for zero-copy forwarding on both sides - - MINOR: muxes/applet: Simplify checks on options to disable zero-copy forwarding - - BUG/MEDIUM: mux-h2: Switch pending error to error if demux buffer is empty - - BUG/MEDIUM: mux-h2: Only Report H2C error on read error if demux buffer is empty - - BUG/MEDIUM: mux-h2: Don't report error on SE if error is only pending on H2C - - BUG/MEDIUM: mux-h2: Don't report error on SE for closed H2 streams -2024/01/31 : 2.9.4 - - BUG/MINOR: h3: fix checking on NULL Tx buffer +2024/01/26 : 3.0-dev2 + - MINOR: ot: logsrv struct becomes logger + - MINOR: ssl: Update ssl_fc_curve/ssl_bc_curve to use SSL_get0_group_name + - CLEANUP: ssl: fix indentation in smp_fetch_ssl_fc_ec() + - DEV: patchbot: produce a verdict for too long commit messages + - CLEANUP: ssl: fix indentation in smp_fetch_ssl_fc_ec() (part 2) + - CLEANUP: quic: Double quic_dgram_parse() prototype declaration. + - BUG/MINOR: map: list-based matching potential ordering regression + - REGTESTS: add a test to ensure map-ordering is preserved + - DOC: config: fix typo about map_*_key converters + - DOC: configuration: corrected description of keyword tune.ssl.ocsp-update.mindelay + - MINOR: map: mapfile ordering also matters for tree-based match types + - DEV: phash: add a trivial perfect hash generator for integers + - OPTIM: http: simplify http_get_status_idx() using a hash + - CLEANUP: http: avoid duplicating literals in find_http_meth() + - MINOR: http: add infrastructure to choose status codes for err / fail + - MEDIUM: http_act: check status codes against the bit fields for err/fail + - MEDIUM: http: add the ability to redefine http-err-codes and http-fail-codes + - CI: codespell: ignore some words in URLs + - CI: codespell: add more words to whitelist + - CLEANUP: fix spelling of "occured" in src/h3.c + - BUILD: quic: missing include for quic_tp + - BUG/MINOR: mux-quic: do not prevent non-STREAM sending on flow control + - MEDIUM: ssl: allow multiple fallback certificate to allow ECDSA/RSA selection + - MEDIUM: ssl: generate '*' SNI filters for default certificates + - MEDIUM: ssl: does not use default_ctx for 'generate-certificate' option + - REORG: ssl: move 'generate-certificates' code to ssl_gencert.c + - DOC: configuration: update configuration on how to have multiple default certs + - MEDIUM: ssl: implements 'default-crt' keyword for bind Lines + - CI: github: update wolfSSL to 5.6.6 + - DOC: INSTALL: require at least WolfSSL 5.6.6 + - DEV: h2: add support for multiple flags in mkhdr + - DEV: h2: support hex-encoded data sequences in mkhdr + - BUG/MINOR: mux-h2: also count streams for refused ones + - BUG/MEDIUM: quic: keylog callback not called (USE_OPENSSL_COMPAT) + - MINOR: vars: fix indentation in var_clear_buffer() - DOC: configuration: fix set-dst in actions keywords matrix - BUG/MEDIUM: mux-h2: refine connection vs stream error on headers - MINOR: mux-h2/traces: add a missing trace on connection WU with negative inc + - MINOR: mux-h2: add a counter of "glitches" on a connection + - MINOR: connection: add a new mux_ctl to report number of connection glitches + - MINOR: mux-h2: implement MUX_CTL_GET_GLITCHES + - MINOR: connection: add sample fetches to report per-connection glitches + - BUILD: stick-table: fix build error on 32-bit platforms + - MINOR: quic: Transport parameters encoding without version_information + - MINOR: quic: Enable early data at SSL session level (aws-lc) + - MINOR: ssl_sock: Early data disabled during SSL_CTX switching (aws-lc) + - MINOR: quic: Correctly wait for the completion of handshakes with early data (aws-lc) - BUG/MEDIUM: cli: some err/warn msg dumps add LR into CSV output on stat's CLI - BUG/MINOR: jwt: fix jwt_verify crash on 32-bit archs + - BUILD: quic: fix build error when using the compatibility layer + - BUILD: quic: Fix build error when building QUIC against wolfssl. + - BUILD: quic: Fix build error when building QUIC against libressl. - BUG/MINOR: hlua: fix uninitialized var in hlua_core_get_var() + - CLEANUP: hlua: fix indent, remove extra return in hlua_core_get_var() - BUG/MEDIUM: cache: Fix crash when deleting secondary entry - BUG/MINOR: quic: newreno QUIC congestion control algorithm no more available - CLEANUP: quic: Remove unused CUBIC_BETA_SCALE_FACTOR_SHIFT macro. - MINOR: quic: Stop hardcoding a scale shifting value (CUBIC_BETA_SCALE_FACTOR_SHIFT) - MINOR: quic: extract qc_stream_buf free in a dedicated function - BUG/MEDIUM: quic: remove unsent data from qc_stream_desc buf - - DOC: configuration: clarify http-request wait-for-body - - BUG/MAJOR: ssl_sock: Always clear retry flags in read/write functions - - MINOR: h3: add traces for stream sending function - - BUG/MEDIUM: h3: do not crash on invalid response status code - - BUG/MEDIUM: qpack: allow 6xx..9xx status codes - - BUG/MEDIUM: quic: fix crash on invalid qc_stream_buf_free() BUG_ON - - BUG/MINOR: h1: Don't support LF only at the end of chunks - - BUG/MEDIUM: h1: Don't support LF only to mark the end of a chunk size - - DOC: httpclient: add dedicated httpclient section - - BUG/MINOR: h1-htx: properly initialize the err_pos field - - BUG/MEDIUM: h1: always reject the NUL character in header values - -2024/01/18 : 2.9.3 - - BUILD: quic: missing include for quic_tp - - BUG/MINOR: mux-quic: do not prevent non-STREAM sending on flow control - - BUG/MINOR: mux-h2: also count streams for refused ones - - BUG/MEDIUM: quic: keylog callback not called (USE_OPENSSL_COMPAT) - -2024/01/11 : 2.9.2 + - CLEANUP: fix spelling of "elemt" + - CI: extend spell check white list + - CI: enable spell check on git push + - BUILD: makefile: also define cmd_CXX to pretty-print C++ build commands + - BUILD/MEDIUM: deviceatlas: addon build rework. + - DOC: deviceatlas: update to be in line with the v3 api. + - BUILD/MEDIUM: deviceatlas: updating the addon part. + - BUILD: deviceatlas: remove unneeded depenency on libcurl / libzip + - BUILD: deviceatlas: fix empty "-I" left on CFLAGS + - Revert "CI: enable spell check on git push" + +2024/01/06 : 3.0-dev1 + - MINOR: channel: Use dedicated functions to deal with STREAMER flags + - MEDIUM: applet: Handle channel's STREAMER flags on applets size + - MINOR: applets: Use channel's field to compute amount of data received + - MEDIUM: cache: Save body size of cached objects and track it on delivery + - MEDIUM: cache: Add support for endp-to-endp fast-forwarding + - MINOR: cache: Add global option to enable/disable zero-copy forwarding + - MINOR: pattern: Use reference name as filename to read patterns from a file + - MEDIUM: pattern: Add support for virtual and optional files for patterns + - DOC: config: Add section about name format for maps and ACLs + - DOC: management/lua: Update commands about map and acl + - MINOR: promex: Add support for specialized front/back/li/srv metric names + - MINOR: promex: Export active/backup metrics per-server + - BUG/MINOR: ssl: Double free of OCSP Certificate ID + - MINOR: ssl/cli: Add ha_(warning|alert) msgs to CLI ckch callback + - BUG/MINOR: ssl: Wrong OCSP CID after modifying an SSL certficate + - BUG/MINOR: lua: Wrong OCSP CID after modifying an SSL certficate (LUA) + - DOC: configuration: typo req.ssl_hello_type + - MINOR: hq-interop: add fastfwd support + - CLEANUP: mux_quic: rename ffwd function with prefix qmux_strm_ + - MINOR: mux-quic: add traces for 0-copy/fast-forward + - BUG/MINOR: mworker/cli: fix set severity-output support + - CLEANUP: mworker/cli: add comments about pcli_find_and_exec_kw() + - BUG/MEDIUM: quic: Possible buffer overflow when building TLS records + - BUILD: ssl: update types in wolfssl cert selection callback + - MINOR: ssl: activate the certificate selection callback for WolfSSL + - CI: github: switch to wolfssl git-c4b77ad for new PR + - BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions + - BUG/MINOR: ext-check: cannot use without preserve-env + - CLEANUP: mux-quic: remove unused prototype + - MINOR: mux-quic: clean up qcs Rx buffer allocation API + - MINOR: mux-quic: clean up qcs Tx buffer allocation API + - CLEANUP: mux-quic: clean up app ops callback definitions + - MINOR: mux-quic: factorize QC_SF_UNKNOWN_PL_LENGTH set + - MINOR: h3: complete traces for sending + - MINOR: h3: adjust zero-copy sending related code + - MINOR: hq-interop: use zero-copy to transfer single HTX data block + - BUG/MEDIUM: quic: QUIC CID removed from tree without locking + - BUG/MEDIUM: stconn: Block zero-copy forwarding if EOS/ERROR on consumer side + - BUG/MEDIUM: mux-h1: Cound data from input buf during zero-copy forwarding + - BUG/MEDIUM: mux-h1: Explicitly skip request's C-L header if not set originally + - CLEANUP: mux-h1: Fix a trace message about C-L header addition + - BUG/MEDIUM: mux-h2: Report too large HEADERS frame only when rxbuf is empty + - BUG/MEDIUM: mux-quic: report early error on stream + - DOC: config: add arguments to sample fetch methods in the table + - DOC: config: also add arguments to the converters in the table - BUG/MINOR: resolvers: default resolvers fails when network not configured + - SCRIPTS: mk-patch-list: produce a list of patches + - DEV: patchbot: add the AI-based bot to pre-select candidate patches to backport + - BUG/MEDIUM: mux-h2: Switch pending error to error if demux buffer is empty + - BUG/MEDIUM: mux-h2: Only Report H2C error on read error if demux buffer is empty + - BUG/MEDIUM: mux-h2: Don't report error on SE if error is only pending on H2C + - BUG/MEDIUM: mux-h2: Don't report error on SE for closed H2 streams - DOC: config: Update documentation about local haproxy response + - DEV: patchbot: use checked buttons as reference instead of internal table + - DEV: patchbot: allow to show/hide backported patches + - MINOR: h3: remove quic_conn only reference - BUG/MINOR: server: Use the configured address family for the initial resolution + - MINOR: mux-quic: remove qcc_shutdown() from qcc_release() + - MINOR: mux-quic: use qcc_release in case of init failure + - MINOR: mux-quic: adjust error code in init failure + - MINOR: h3: add traces for connection init stage + - BUG/MINOR: h3: properly handle alloc failure on finalize + - MINOR: h3: use INTERNAL_ERROR code for init failure - BUG/MAJOR: stconn: Disable zero-copy forwarding if consumer is shut or in error - MINOR: stats: store the parent proxy in stats ctx (http) - BUG/MEDIUM: stats: unhandled switching rules with TCP frontend + - MEDIUM: proxy: set PR_O_HTTP_UPG on implicit upgrades + - MINOR: proxy: monitor-uri works with tcp->http upgrades + - OPTIM: server: eb lookup for server_find_by_name() + - OPTIM: server: ebtree lookups for findserver_unique_* functions - MINOR: server/event_hdl: add server_inetaddr struct to facilitate event data usage - MINOR: server/event_hdl: update _srv_event_hdl_prepare_inetaddr prototype - BUG/MINOR: server/event_hdl: propagate map port info through inetaddr event + - MINOR: server: ensure connection cleanup on server addr changes + - CLEANUP: server/event_hdl: remove purge_conn hint in INETADDR event + - MEDIUM: server: merge srv_update_addr() and srv_update_addr_port() logic + - CLEANUP: server: remove unused server_parse_addr_change_request() function + - CLEANUP: resolvers: remove duplicate func prototype + - MINOR: resolvers: add unique numeric id to nameservers + - MEDIUM: server: make server_set_inetaddr() updater serializable + - MINOR: server/event_hdl: expose updater info through INETADDR event + - MINOR: server: add dns hint in server_inetaddr_updater struct + - MEDIUM: server/dns: clear RMAINT when addr resolves again + - BUG/MINOR: server/dns: use server_set_inetaddr() to unset srv addr from DNS + - BUG/MEDIUM: server/dns: perform svc_port updates atomically from SRV records + - MEDIUM: peers: use server as stream target + - CLEANUP: peers: remove unused sock_init_arg struct member + - CLEANUP: peers: remove unused "proto" and "xprt" struct members + - MINOR: peers: rely on srv->addr and remove peer->addr + - DOC: config: add context hint for server keywords + - MINOR: stktable: add table_process_entry helper function + - MINOR: stktable: use {show,set,clear} table with ptr + - MINOR: map: add map_*_key converters to provide the matching key - DOC: fix typo for fastfwd QUIC option - BUG/MINOR: mux-quic: always report error to SC on RESET_STREAM emission + - MEDIUM: mux-quic: add BUG_ON if sending on locally closed QCS - BUG/MINOR: mux-quic: disable fast-fwd if connection on error - BUG/MINOR: quic: Wrong keylog callback setting. - BUG/MINOR: quic: Missing call to TLS message callbacks - MINOR: h3: check connection error during sending - BUG/MINOR: h3: close connection on header list too big - - MINOR: h3: add traces for connection init stage - - BUG/MINOR: h3: properly handle alloc failure on finalize - BUG/MINOR: h3: close connection on sending alloc errors - BUG/MINOR: h3: disable fast-forward on buffer alloc failure + - Revert "MINOR: mux-quic: Disable zero-copy forwarding for send by default" + - MINOR: stktable: stktable_data_ptr() cannot fail in table_process_entry() + - CLEANUP: assorted typo fixes in the code and comments - CI: use semantic version compare for determing "latest" OpenSSL + - CLEANUP: server: remove ambiguous check in srv_update_addr_port() + - CLEANUP: resolvers: remove unused RSLV_UPD_OBSOLETE_IP flag + - CLEANUP: resolvers: remove some more unused RSLV_UDP flags + - MEDIUM: server: simplify snr_set_srv_down() to prevent confusions + - MINOR: backend: export get_server_*() functions + - MINOR: tcpcheck: export proxy_parse_tcpcheck() + - MEDIUM: udp: allow to retrieve the frontend destination address - MINOR: global: export a way to list build options - MINOR: debug: add features and build options to "show dev" + - BUG/MINOR: server: fix server_find_by_name() usage during parsing - REGTESTS: check attach-srv out of order declaration - CLEANUP: quic: Remaining useless code into server part - BUILD: quic: Missing quic_ssl.h header protection - BUG/MEDIUM: h3: fix incorrect snd_buf return value + - MINOR: h3: do not consider missing buf room as error on trailers - BUG/MEDIUM: stconn: Forward shutdown on write timeout only if it is forwardable - BUG/MEDIUM: stconn: Set fsb date if zero-copy forwarding is blocked during nego - BUG/MEDIUM: spoe: Never create new spoe applet if there is no server up - MINOR: mux-h2: support limiting the total number of H2 streams per connection - - MINOR: ot: logsrv struct becomes logger - - MINOR: ssl: Update ssl_fc_curve/ssl_bc_curve to use SSL_get0_group_name - - CLEANUP: quic: Double quic_dgram_parse() prototype declaration. - - BUG/MINOR: map: list-based matching potential ordering regression - - REGTESTS: add a test to ensure map-ordering is preserved - - DOC: configuration: corrected description of keyword tune.ssl.ocsp-update.mindelay - -2023/12/15 : 2.9.1 - - BUG/MINOR: ssl: Double free of OCSP Certificate ID - - MINOR: ssl/cli: Add ha_(warning|alert) msgs to CLI ckch callback - - BUG/MINOR: ssl: Wrong OCSP CID after modifying an SSL certficate - - BUG/MINOR: lua: Wrong OCSP CID after modifying an SSL certficate (LUA) - - DOC: configuration: typo req.ssl_hello_type - - BUG/MINOR: mworker/cli: fix set severity-output support - - BUG/MEDIUM: quic: Possible buffer overflow when building TLS records - - BUILD: ssl: update types in wolfssl cert selection callback - - BUG/MEDIUM: map/acl: pat_ref_{set,delete}_by_id regressions - - BUG/MINOR: ext-check: cannot use without preserve-env - - MINOR: version: mention that it's stable now - - BUG/MEDIUM: quic: QUIC CID removed from tree without locking - - BUG/MEDIUM: stconn: Block zero-copy forwarding if EOS/ERROR on consumer side - - BUG/MEDIUM: mux-h1: Cound data from input buf during zero-copy forwarding - - BUG/MEDIUM: mux-h1: Explicitly skip request's C-L header if not set originally - - CLEANUP: mux-h1: Fix a trace message about C-L header addition - - BUG/MEDIUM: mux-h2: Report too large HEADERS frame only when rxbuf is empty - - BUG/MEDIUM: mux-quic: report early error on stream - - DOC: config: add arguments to sample fetch methods in the table - - DOC: config: also add arguments to the converters in the table + - CLEANUP: mux-h2: remove the printfs from previous commit on h2 streams limit. + - DEV: h2: add the ability to emit literals in mkhdr + - DEV: h2: add the preface as well in supported output types + - DEV: h2: support passing raw data for a frame + - IMPORT: ebtree: implement and use flsnz_long() to count bits + - IMPORT: ebtree: switch the sizes and offsets to size_t and ssize_t + - IMPORT: ebtree: rework the fls macros to better deal with arch-specific ones + - IMPORT: ebtree: make string_equal_bits turn back to unsigned char + - IMPORT: ebtree: use unsigned ints for flznz() + - IMPORT: ebtree: make string_equal_bits() return an unsigned + +2023/12/05 : 3.0-dev0 + - exact copy of 2.9.0 2023/12/05 : 2.9.0 - DOC: config: add missing colon to "bytes_out" sample fetch keyword (2) @@ -1,14 +1,19 @@ Installation instructions for HAProxy ===================================== -HAProxy 2.9 is a stable version, which means that it will get fixes for bugs as -they are discovered till around Q1 2025 and should not receive new features. -This version is mostly suited at experienced users who are willing to quickly -follow updates. New users are encouraged to use long term supported versions -such as the ones provided by their software vendor or Linux distribution, as -such versions require far less common updates. - -If for any reason you'd prefer to use a different version than the one packaged +HAProxy 3.0 is a long-term supported version, which means that it will get +fixes for bugs as they are discovered till around Q2 2029 and will not receive +new features. This version is suitable for general deployment as it is expected +to receive less frequent updates than regular stable branches which have an odd +digit in the minor version number. New users are encouraged to use long-term +supported versions such as the ones provided by their software vendor, Linux +distribution, or by a trusted package maintainer. Experienced users who manage +a fleet of load balancers are encouraged to deploy at least one node with the +latest weekly development version to get familiar with upcoming changes and +possibly detect unwelcome changes or bugs before the release. This is also a +great way to get new features implemented exactly as desired. + +If for any reason you would prefer a different version than the one packaged for your system, you want to be certain to have all the fixes or to get some commercial support, other choices are available at http://www.haproxy.com/. @@ -33,18 +38,26 @@ are a few build examples : - recent Linux system with all options, make and install : $ make clean $ make -j $(nproc) TARGET=linux-glibc \ - USE_OPENSSL=1 USE_LUA=1 USE_PCRE2=1 USE_SYSTEMD=1 + USE_OPENSSL=1 USE_QUIC=1 USE_QUIC_OPENSSL_COMPAT=1 \ + USE_LUA=1 USE_PCRE2=1 $ sudo make install - - FreeBSD and OpenBSD, build with all options : - $ gmake -j 4 TARGET=freebsd USE_OPENSSL=1 USE_LUA=1 USE_PCRE2=1 + - FreeBSD + OpenSSL, build with all options : + $ gmake -j $(sysctl -n hw.ncpu) TARGET=freebsd \ + USE_OPENSSL=1 USE_QUIC=1 USE_QUIC_OPENSSL_COMPAT=1 \ + USE_LUA=1 USE_PCRE2=1 + + - OpenBSD + LibreSSL, build with all options : + $ gmake -j $(sysctl -n hw.ncpu) TARGET=openbsd \ + USE_OPENSSL=1 USE_QUIC=1 USE_LUA=1 USE_PCRE2=1 - embedded Linux, build using a cross-compiler : $ make -j $(nproc) TARGET=linux-glibc USE_OPENSSL=1 USE_PCRE2=1 \ - CC=/opt/cross/gcc730-arm/bin/gcc ADDLIB=-latomic + CC=/opt/cross/gcc730-arm/bin/gcc CFLAGS="-mthumb" ADDLIB=-latomic - Build with static PCRE on Solaris / UltraSPARC : - $ make TARGET=solaris CPU=ultrasparc USE_STATIC_PCRE2=1 + $ make -j $(/usr/sbin/psrinfo -p) TARGET=solaris \ + CPU_CFLAGS="-mcpu=v9" USE_STATIC_PCRE2=1 For more advanced build options or if a command above reports an error, please read the following sections. @@ -72,10 +85,10 @@ can use a relatively similar one and adjust specific variables by hand. Most configuration variables are in fact booleans. Some options are detected and enabled by default if available on the target platform. This is the case for all those named "USE_<feature>". These booleans are enabled by "USE_<feature>=1" -and are disabled by "USE_<feature>=" (with no value). An exhaustive list of the -supported USE_* features is located at the top of the main Makefile. The last -occurrence of such an option on the command line overrides any previous one. -Example : +and are disabled by "USE_<feature>=" (with no value) or "USE_<feature>=0". An +exhaustive list of the supported USE_* features is located at the top of the +main Makefile. The last occurrence of such an option on the command line +overrides any previous one. Example : $ make TARGET=generic USE_THREAD= @@ -226,7 +239,7 @@ to forcefully enable it using "USE_LIBCRYPT=1". ----------------- For SSL/TLS, it is necessary to use a cryptography library. HAProxy currently supports the OpenSSL library, and is known to build and work with branches -1.0.0, 1.0.1, 1.0.2, 1.1.0, 1.1.1, 3.0, 3.1 and 3.2. It is recommended to use +1.0.0, 1.0.1, 1.0.2, 1.1.0, 1.1.1, and 3.0 to 3.3. It is recommended to use at least OpenSSL 1.1.1 to have support for all SSL keywords and configuration in HAProxy. OpenSSL follows a long-term support cycle similar to HAProxy's, and each of the branches above receives its own fixes, without forcing you to @@ -280,11 +293,11 @@ SSL library files using SSL_LIB. Example : USE_OPENSSL=1 SSL_INC=/opt/ssl-1.1.1/include SSL_LIB=/opt/ssl-1.1.1/lib To use HAProxy with WolfSSL, WolfSSL must be built with haproxy support, at -least WolfSSL 5.6.4 is needed, but a development version might be needed for +least WolfSSL 5.6.6 is needed, but a development version might be needed for some of the features: $ cd ~/build/wolfssl - $ ./configure --enable-haproxy --enable-quic --prefix=/opt/wolfssl-5.6.4/ + $ ./configure --enable-haproxy --enable-quic --prefix=/opt/wolfssl-5.6.6/ $ make -j $(nproc) $ make install @@ -292,12 +305,24 @@ Please also note that wolfSSL supports many platform-specific features that may affect performance, and that for production uses it might be a good idea to check them using "./configure --help". Please refer to the lib's documentation. +When running wolfSSL in chroot, either mount /dev/[u]random devices into the +chroot: + + $ mkdir -p /path/to/chrootdir/dev/ + $ mknod -m 444 /path/to/chrootdir/dev/random c 1 8 + $ mknod -m 444 /path/to/chrootdir/dev/urandom c 1 9 + +Or, if your OS supports it, enable the getrandom() syscall by appending the +following argument to the wolfSSL configure command: + + EXTRA_CFLAGS=-DWOLFSSL_GETRANDOM=1 + Building HAProxy with wolfSSL requires to specify the API variant on the "make" command line, for example: $ cd ~/build/haproxy $ make -j $(nproc) TARGET=generic USE_OPENSSL_WOLFSSL=1 USE_QUIC=1 \ - SSL_INC=/opt/wolfssl-5.6.4/include SSL_LIB=/opt/wolfssl-5.6.4/lib + SSL_INC=/opt/wolfssl-5.6.6/include SSL_LIB=/opt/wolfssl-5.6.6/lib To use HAProxy with AWS-LC you must have version v1.13.0 or newer of AWS-LC built and installed locally. @@ -524,7 +549,11 @@ bindings must be explicitly enabled with a specific QUIC tuning parameter. This section assumes that you have already read section 2 (basic principles) and section 3 (build environment). It often refers to section 4 (dependencies). +It goes into more details with the main options. + +5.1) Configuring the TARGET +--------------------------- To build haproxy, you have to choose your target OS amongst the following ones and assign it to the TARGET variable : @@ -545,29 +574,64 @@ and assign it to the TARGET variable : - generic for any other OS or version. - custom to manually adjust every setting -You may also choose your CPU to benefit from some optimizations. This is -particularly important on UltraSparc machines. For this, you can assign -one of the following choices to the CPU variable : - - - i686 for intel PentiumPro, Pentium 2 and above, AMD Athlon (32 bits) - - i586 for intel Pentium, AMD K6, VIA C3. - - ultrasparc : Sun UltraSparc I/II/III/IV processor - - power8 : IBM POWER8 processor - - power9 : IBM POWER9 processor - - armv81 : modern ARM cores (Cortex A55/A75/A76/A78/X1, Neoverse, Graviton2) - - a72 : ARM Cortex-A72 or A73 (e.g. RPi4, Odroid N2, AWS Graviton) - - a53 : ARM Cortex-A53 or any of its successors in 64-bit mode (e.g. RPi3) - - armv8-auto : support both older and newer armv8 cores with a minor penalty, - thanks to gcc 10's outline atomics (default with gcc 10.2). - - native : use the build machine's specific processor optimizations. Use with - extreme care, and never in virtualized environments (known to break). - - generic : any other processor or no CPU-specific optimization. (default) - -Alternatively, you may just set the CPU_CFLAGS value to the optimal GCC options -for your platform. A second variable named SMALL_OPTS also supports passing a -number of defines and compiler options usually for small systems. For better -clarity it's recommended to pass the options which result in a smaller binary -(like memory limits or -Os) into this variable. +Example: + $ make -j $(nproc) TARGET=linux-glibc + +AIX 5.3 is known to work with the generic target. However, for the binary to +also run on 5.2 or earlier, you need to build with DEFINE="-D_MSGQSUPPORT", +otherwise __fd_select() will be used while not being present in the libc, but +this is easily addressed using the "aix52" target. If you get build errors +because of strange symbols or section mismatches, simply remove -g from +ARCH_FLAGS. + +Building on AIX 7.2 works fine using the "aix72-gcc" TARGET. It adds two +special CFLAGS to prevent the loading of AIX's xmem.h and var.h. This is done +by defining the corresponding include-guards _H_XMEM and _H_VAR. Without +excluding those header-files the build fails because of redefinition errors. +Furthermore, the atomic library is added to the LDFLAGS to allow for +multithreading via USE_THREAD. + +You can easily define your own target with the GNU Makefile. Unknown targets +are processed with no default option except USE_POLL=default. So you can very +well use that property to define your own set of options. USE_POLL and USE_SLZ +can even be disabled by setting them to an empty string or a zero. For +example : + + $ gmake TARGET=tiny USE_POLL="" USE_SLZ=0 TARGET_CFLAGS=-fomit-frame-pointer + + +5.2) Adding extra CFLAGS for compiling +-------------------------------------- +A generic CFLAGS variable may be set to append any option to pass to the C +compiler. These flags are passed last so the variable may be used to override +other options such as warnings, optimization levels, include paths etc. + +A default optimization level of -O2 is set by variable OPT_CFLAGS which may be +overridden if desired. It's used early in the list of CFLAGS so that any other +set of CFLAGS providing a different value may easily override it. + +Some platforms may benefit from some CPU-specific options that will enable +certain instruction sets, word size or endianness for example. One of them is +the common "-march=native" that indicates to modern compilers that they need to +optimize for the machine the compiler is running on. Such options may be either +passed in the CPU_CFLAGS or in the CFLAGS variable, either will work though +one may be more convenient for certain methods of packaging and the other one +for other methods. Among the many possible options, the following ones are +known for having successfully been used: + + - "-march=native" for a native build + - "-march=armv8-a+crc" for older ARM Cortex A53/A72/A73 (such as RPi 3B/4B) + - "-march=armv8.1-a" for modern ARM Cortex A55/A76, Graviton2+, RPi 5 + - "-march=armv8-a+crc -moutline-atomics" to support older ARM with better + support of modern cores with gcc-10+ + - "-mavx", "-mavx2", "-mavx512", to enable certain x86 SIMD instruction sets + - "-march=i586" to support almost all 32-bit x86 systems + - "-march=i686" to support only the latest 32-bit x86 systems + - "-march=i386" to support even the oldest 32-bit x86 systems + - "-mlittle-endian -march=armv5te" for some little-endian ARMv5 systems + - "-mcpu=v9 -mtune=ultrasparc -m64" for a 64-bit Solaris SPARC build + - "-march=1004kc -mtune=1004kc" for some multi-core 32-bit MIPS 1004Kc + - "-march=24kc -mtune=24kc" for some single-core 32-bit MIPS 24Kc If you are building for a different system than the one you're building on, this is called "cross-compiling". HAProxy supports cross-compilation pretty @@ -585,20 +649,49 @@ flags are passed to the compiler nor what compiler is involved. Simply append details again. It is recommended to use this option when cross-compiling to verify that the paths are correct and that /usr/include is never involved. -You may want to build specific target binaries which do not match your native -compiler's target. This is particularly true on 64-bit systems when you want -to build a 32-bit binary. Use the ARCH variable for this purpose. Right now -it only knows about a few x86 variants (i386,i486,i586,i686,x86_64), two -generic ones (32,64) and sets -m32/-m64 as well as -march=<arch> accordingly. -This variable is only used to set ARCH_FLAGS to preset values, so if you know -the arch-specific flags that your system needs, you may prefer to set -ARCH_FLAGS instead. Note that these flags are passed both to the compiler and -to the linker. For example, in order to build a 32-bit binary on an x86_64 -Linux system with SSL support without support for compression but when OpenSSL +If you need to pass some defines to the preprocessor or compiler, you may pass +them all in the DEFINE variable. Example: + + $ make TARGET=generic DEFINE="-DDEBUG_DONT_SHARE_POOLS" + +The ADDINC variable may be used to add some extra include paths; this is +sometimes needed when cross-compiling. Similarly the ADDLIB variable may be +used to specify extra paths to library files. Example : + + $ make TARGET=generic ADDINC=-I/opt/cross/include ADDLIB=-L/opt/cross/lib64 + + +5.3) Adding extra LDFLAGS for linking +------------------------------------- +If a particular target requires specific link-time flags, these can be passed +via the LDFLAGS variable. This variable is passed to the linker immediately +after ARCH_FLAGS. One of the common use cases is to add some run time search +paths for a dynamic library that's not part of the default system search path: + + $ make -j $(nproc) TARGET=generic USE_OPENSSL_AWSLC=1 USE_QUIC=1 \ + SSL_INC=/opt/aws-lc/include SSL_LIB=/opt/aws-lc/lib \ + LDFLAGS="-Wl,-rpath,/opt/aws-lc/lib" + +Some options require to be consistent between the compilation stage and the +linking stage. This is the case for options which enable debugging (e.g. "-g"), +profiling ("-pg"), link-time optimization ("-flto"), endianness ("-EB", "-EL"), +bit width ("-m32", "-m64"), or code analyzers ("-fsanitize=address"). These +options can be passed via the ARCH_FLAGS variable, which will be used at both +stages during the build process, thus avoiding the risk of inconsistencies. By +default, ARCH_FLAGS only contains "-g" to enable the generation of debug +symbols. For example, in order to build a 32-bit binary on an x86_64 Linux +system with SSL support without support for compression but when OpenSSL requires ZLIB anyway : - $ make TARGET=linux-glibc ARCH=i386 USE_OPENSSL=1 ADDLIB=-lz + $ make TARGET=linux-glibc ARCH_FLAGS="-m32 -g" USE_OPENSSL=1 ADDLIB=-lz + +and building with the address sanitizer (ASAN) simply requires: + $ make TARGET=linux-glibc ARCH_FLAGS="-fsanitize=address -g" + + +5.4) Other common OS-specific options +------------------------------------- Recent systems can resolve IPv6 host names using getaddrinfo(). This primitive is not present in all libcs and does not work in all of them either. Support in glibc was broken before 2.3. Some embedded libs may not properly work either, @@ -625,16 +718,63 @@ section 4 about dependencies for more information on how to build with OpenSSL. HAProxy can compress HTTP responses to save bandwidth. Please see section 4 about dependencies to see the available libraries and associated options. -By default, the DEBUG_CFLAGS variable is set to '-g' to enable debug symbols. -It is not wise to disable it on uncommon systems, because it's often the only -way to get a usable core when you need one. Otherwise, you can set DEBUG to -'-s' to strip the binary. +If you need to pass other defines, includes, libraries, etc... then please +check the Makefile to see which ones will be available in your case, and +use/override the USE_* variables from the Makefile. + -If the ERR variable is set to any non-empty value, then -Werror will be added -to the compiler so that any build warning will trigger an error. This is the -recommended way to build when developing, and it is expected that contributed -patches were tested with ERR=1. +5.5) Adjusting the build error / warning behavior +------------------------------------------------- +If the ERR variable is set to any non-empty value other than "0", then -Werror +will be added to the compiler so that any build warning will trigger an error. +This is the recommended way to build when developing, and it is expected that +contributed patches were tested with ERR=1. Similarly, for developers, another +variable, FAILFAST enables -Wfatal-errors when set to non-empty except 0, and +makes the compiler stop at the first error instead of scrolling pages. It's +essentially a matter of taste. + +Packagers who want to achieve the cleanest warning-free builds may be +interested in knowing that all enabled warnings are normally placed into +the WARN_CFLAGS variable. The variable contains a list of pre-established +warnings and a list of some that are dynamically detected on the compiler. +If the build environment or toolchain doesn't even support some of the basic +ones, it is then possible to just redefine them by passing the main ones in +WARN_CFLAGS (e.g. at the very least -W -Wall). Similarly, it may sometimes +be desirable not to disable certain warnings when porting to new platforms +or during code audits, or simply because the toolchain doesn't support some +of the most basic -Wno options. In this case, the list of automatic -Wno +variables is specified by variable NOWARN_CFLAGS, which is passed after +WARN_CFLAGS (i.e. it can undo some of the WARN_CFLAGS settings). Be careful +with it, as clearing this list can yield many warnings depending on the +compiler and options. +The DEP variable is automatically set to the list of include files and also +designates a file that contains the last build options used. It is used during +the build process to compute dependencies and decide whether or not to rebuild +everything (we do rebuild everything when .h files are touched or when build +options change). Sometimes when performing fast build iterations on inline +functions it may be desirable to avoid a full rebuild. Forcing this variable +to be empty will be sufficient to achieve this. This variable must never be +forced to produce final binaries, and must not be used during bisect sessions, +as it will often lead to the wrong commit. + +Examples: + # silence strict-aliasing warnings with old gcc-4.4: + $ make -j$(nproc) TARGET=linux-glibc CC=gcc-44 CFLAGS=-fno-strict-aliasing + + # disable all warning options: + $ make -j$(nproc) TARGET=linux-glibc CC=mycc WARN_CFLAGS= NOWARN_CFLAGS= + + # enable -Werror and -Wfatal-errors to immediately stop on error + $ make -j$(nproc) TARGET=linux-glibc ERR=1 FAILFAST=1 + + # try to restart the build where it was after hacking an include file, to + # check if that was sufficient or not: + $ make -j$(nproc) TARGET=linux-glibc ERR=1 DEP= + + +5.6) Enabling a DEBUG build +--------------------------- The DEBUG variable is used to extend the CFLAGS and is preset to a list of build-time options that are known for providing significant reliability improvements and a barely perceptible performance cost. Unless instructed to do @@ -645,8 +785,8 @@ these options should not be changed. Among the usable ones are: conditions are not met, and whose violation will result in a misbehaving process due to memory corruption or other significant trouble, possibly caused by an attempt to exploit a bug in the program or a library it relies - on. The option knows 3 values: 0 (disable all such assertions, the default - when the option is not set), 1 (enable all inexpensive assertions), and + on. The option knows 3 values: 0 (disable all such assertions, not + recommended), 1 (enable all inexpensive assertions, the default), and 2 (enable all assertions even in fast paths). Setting the option with no value corresponds to 1, which is the recommended value for production. @@ -678,7 +818,7 @@ these options should not be changed. Among the usable ones are: overflows, which may have security implications. The cost is extremely low (less than 1% increase in memory footprint). This is equivalent to adding "-dMtag" on the command line. This option is enabled in the default build - options. + options and may be disabled with -DDEBUG_MEMORY_POOLS=0. - -DDEBUG_DONT_SHARE_POOLS: this will keep separate pools for same-sized objects of different types. Using this increases the memory usage a little @@ -698,58 +838,34 @@ these options should not be changed. Among the usable ones are: are encouraged to use it, in combination with -DDEBUG_DONT_SHARE_POOLS and -DDEBUG_MEMORY_POOLS, as this could catch dangerous regressions. -As such, for regular production, "-DDEBUG_STRICT -DDEBUG_MEMORY_POOLS" is -recommended. For security sensitive environments, it is recommended to use -"-DDEBUG_STRICT -DDEBUG_STRICT_ACTION=2 -DDEBUG_MEMORY_POOLS \ --DDEBUG_DONT_SHARE_POOLS". For deployments dedicated to testing new versions or -when trying to nail a bug down, use "-DDEBUG_STRICT=2 -DDEBUG_STRICT_ACTION=2 \ --DDEBUG_MEMORY_POOLS -DDEBUG_DONT_SHARE_POOLS -DDEBUG_POOL_INTEGRITY". - -The DEP variable is automatically set to the list of include files and also -designates a file that contains the last build options used. It is used during -the build process to compute dependencies and decide whether or not to rebuild -everything (we do rebuild everything when .h files are touched or when build -options change). Sometimes when performing fast build iterations on inline -functions it may be desirable to avoid a full rebuild. Forcing this variable -to be empty will be sufficient to achieve this. This variable must never be -forced to produce final binaries, and must not be used during bisect sessions, -as it will often lead to the wrong commit. - -If you need to pass other defines, includes, libraries, etc... then please -check the Makefile to see which ones will be available in your case, and -use/override the USE_* variables from the Makefile. - -AIX 5.3 is known to work with the generic target. However, for the binary to -also run on 5.2 or earlier, you need to build with DEFINE="-D_MSGQSUPPORT", -otherwise __fd_select() will be used while not being present in the libc, but -this is easily addressed using the "aix52" target. If you get build errors -because of strange symbols or section mismatches, simply remove -g from -DEBUG_CFLAGS. - -Building on AIX 7.2 works fine using the "aix72-gcc" TARGET. It adds two -special CFLAGS to prevent the loading of AIX's xmem.h and var.h. This is done -by defining the corresponding include-guards _H_XMEM and _H_VAR. Without -excluding those header-files the build fails because of redefinition errors. -Furthermore, the atomic library is added to the LDFLAGS to allow for -multithreading via USE_THREAD. - -You can easily define your own target with the GNU Makefile. Unknown targets -are processed with no default option except USE_POLL=default. So you can very -well use that property to define your own set of options. USE_POLL and USE_SLZ -can even be disabled by setting them to an empty string. For example : - - $ gmake TARGET=tiny USE_POLL="" USE_SLZ="" TARGET_CFLAGS=-fomit-frame-pointer - -If you need to pass some defines to the preprocessor or compiler, you may pass -them all in the DEFINE variable. Example: - - $ make TARGET=generic DEFINE="-DDEBUG_DONT_SHARE_POOLS -DDEBUG_MEMORY_POOLS" - -The ADDINC variable may be used to add some extra include paths; this is -sometimes needed when cross-compiling. Similarly the ADDLIB variable may be -used to specify extra paths to library files. Example : - - $ make TARGET=generic ADDINC=-I/opt/cross/include ADDLIB=-L/opt/cross/lib64 +As such, "-DDEBUG_STRICT -DDEBUG_MEMORY_POOLS" is implicit and recommended for +production. For security sensitive environments, it is recommended to use +"-DDEBUG_STRICT_ACTION=2 -DDEBUG_DONT_SHARE_POOLS". When testing new versions +or trying to nail a bug down, use "-DDEBUG_STRICT=2 -DDEBUG_STRICT_ACTION=2 \ +-DDEBUG_DONT_SHARE_POOLS -DDEBUG_POOL_INTEGRITY". Finally in order to minimize +memory usage by disabling these integrity features, it is also possible to use +"-DDEBUG_STRICT=0 -DDEBUG_MEMORY_POOLS=0". + + +5.7) Summary of the Makefile's main variables +--------------------------------------------- + +The following variables are commonly used: + - TARGET platform name, empty by default, see help + - CC path to the C compiler, defaults to "cc" + - LD path to the linker, defaults to "$CC" + - CFLAGS CFLAGS to append at the end, empty by default + - LDFLAGS LDFLAGS to append at the end, empty by default + - ARCH_FLAGS flags common to CC and LD (-fsanitize, etc). Defaults to "-g" + - OPT_CFLAGS C compiler optimization level. Defaults to "-O2" + - WARN_CFLAGS list of autodetected C compiler warnings to enable + - NOWARN_CFLAGS list of autodetected C compiler warnings to disable + - ADDINC include directives to append at the end, empty by default + - ADDLIB lib directives to append at the end, empty by default + - DEFINE extra macros definitions for compiler, empty by default + - DEBUG extra DEBUG options for compiler, empty by default + - ERR enables -Werror if non-zero, empty by default + - FAILFAST enables -Wfatal-error if non-zero, empty by default 6) How to install HAProxy @@ -1,7 +1,7 @@ # This GNU Makefile supports different OS and CPU combinations. # # You should use it this way : -# [g]make TARGET=os [ARCH=arch] [CPU=cpu] USE_xxx=1 ... +# [g]make TARGET=os [CFLAGS=...] USE_xxx=1 ... # # When in doubt, invoke help, possibly with a known target : # [g]make help @@ -75,13 +75,16 @@ # Variables useful for packagers : # CC is set to "cc" by default and is used for compilation only. # LD is set to "cc" by default and is used for linking only. -# ARCH may be useful to force build of 32-bit binary on 64-bit systems -# CFLAGS is automatically set for the specified CPU and may be overridden. +# OPT_CFLAGS sets the default optimization level (-O2). +# CFLAGS may be used to append any flags for the C compiler. # LDFLAGS is automatically set to -g and may be overridden. +# ARCH_FLAGS for flags common to both CC and LD. Defaults to -g. # DEP may be cleared to ignore changes to include files during development -# SMALL_OPTS may be used to specify some options to shrink memory usage. # DEBUG may be used to set some internal debugging options. # ERR may be set to non-empty to pass -Werror to the compiler +# FAILFAST may be set to non-empty to pass -Wfatal-errors to the compiler +# WARN_CFLAGS overrides the default set of enabled warning options +# NOWARN_CFLAGS overrides the default set of disabled warning options # ADDINC may be used to complete the include path in the form -Ipath. # ADDLIB may be used to complete the library list in the form -Lpath -llib. # DEFINE may be used to specify any additional define, which will be reported @@ -152,27 +155,45 @@ DOCDIR = $(PREFIX)/doc/haproxy # custom TARGET = -#### TARGET CPU -# Use CPU=<cpu_name> to optimize for a particular CPU, among the following -# list : -# generic, native, i586, i686, ultrasparc, power8, power9, custom, -# a53, a72, armv81, armv8-auto -CPU = generic - -#### Architecture, used when not building for native architecture -# Use ARCH=<arch_name> to force build for a specific architecture. Known -# architectures will lead to "-m32" or "-m64" being added to CFLAGS and -# LDFLAGS. This can be required to build 32-bit binaries on 64-bit targets. -# Currently, only 32, 64, x86_64, i386, i486, i586 and i686 are understood. +#### No longer used +CPU = +ifneq ($(CPU),) +ifneq ($(CPU),generic) +$(warning Warning: the "CPU" variable was forced to "$(CPU)" but is no longer \ + used and will be ignored. For native builds, modern compilers generally \ + prefer that the string "-march=native" is passed in CPU_CFLAGS or CFLAGS. \ + For other CPU-specific options, please read suggestions in the INSTALL file.) +endif +endif + +#### No longer used ARCH = +ifneq ($(ARCH),) +$(warning Warning: the "ARCH" variable was forced to "$(ARCH)" but is no \ + longer used and will be ignored. Please check the INSTALL file for other \ + options, but usually in order to pass arch-specific options, ARCH_FLAGS, \ + CFLAGS or LDFLAGS are preferred.) +endif #### Toolchain options. CC = cc LD = $(CC) -#### Debug flags (typically "-g"). -# Those flags only feed CFLAGS so it is not mandatory to use this form. -DEBUG_CFLAGS = -g +#### Default optimizations +# Those are integrated early in the list of CFLAGS, and may be overridden by +# other CFLAGS options if needed. +OPT_CFLAGS = -O2 + +#### No longer used +DEBUG_CFLAGS = +ifneq ($(DEBUG_CFLAGS),) +$(warning Warning: DEBUG_CFLAGS was forced to "$(DEBUG_CFLAGS)" but is no \ + longer used and will be ignored. If you have ported this build setting from \ + and older version, it is likely that you just want to pass these options \ + to the CFLAGS variable. If you are passing some debugging-related options \ + such as -g/-ggdb3/-pg etc, they can now be passed in ARCH_FLAGS at once for \ + both the compilation and linking stages.) +endif #### Add -Werror when set to non-empty ERR = @@ -181,42 +202,55 @@ ERR = REG_TEST_FILES = REG_TEST_SCRIPT=./scripts/run-regtests.sh -#### Compiler-specific flags that may be used to disable some negative over- -# optimization or to silence some warnings. -# We rely on signed integer wraparound on overflow, however clang think it -# can do whatever it wants since it's an undefined behavior, so use -fwrapv -# to be sure we get the intended behavior. -WARN_CFLAGS := -Wtype-limits -Wshift-negative-value -Wshift-overflow=2 \ - -Wduplicated-cond -Wnull-dereference -SPEC_CFLAGS := -Wall -Wextra -Wundef -Wdeclaration-after-statement -Wfatal-errors -SPEC_CFLAGS += $(call cc-all-fast,$(WARN_CFLAGS)) - -SPEC_CFLAGS += $(call cc-opt-alt,-fwrapv,-fno-strict-overflow) -SPEC_CFLAGS += $(cc-wnouwo) -SPEC_CFLAGS += $(call cc-nowarn,address-of-packed-member) -SPEC_CFLAGS += $(call cc-nowarn,unused-label) -SPEC_CFLAGS += $(call cc-nowarn,sign-compare) -SPEC_CFLAGS += $(call cc-nowarn,unused-parameter) -SPEC_CFLAGS += $(call cc-nowarn,clobbered) -SPEC_CFLAGS += $(call cc-nowarn,missing-field-initializers) -SPEC_CFLAGS += $(call cc-nowarn,cast-function-type) -SPEC_CFLAGS += $(call cc-nowarn,string-plus-int) -SPEC_CFLAGS += $(call cc-nowarn,atomic-alignment) - -ifneq ($(ERR),) - SPEC_CFLAGS += -Werror +#### Standard C definition +# Compiler-specific flags that may be used to set the standard behavior we +# rely on and to disable some negative over-optimization. More specifically, +# we rely on signed integer wraparound on overflow, however recently clang and +# gcc decided to change their code generation regarding this and abuse the +# undefined behavior to silently produce invalid code. For this reason we have +# to use -fwrapv or -fno-strict-overflow to guarantee the intended behavior. +# It is preferable not to change this option in order to avoid breakage. +STD_CFLAGS := $(call cc-opt-alt,-fwrapv,-fno-strict-overflow) + +#### Compiler-specific flags to enable certain classes of warnings. +# Some are hard-coded, others are enabled only if supported. +WARN_CFLAGS := -Wall -Wextra -Wundef -Wdeclaration-after-statement \ + $(call cc-all-fast, \ + -Wtype-limits -Wshift-negative-value -Wshift-overflow=2 \ + -Wduplicated-cond -Wnull-dereference) + +#### Compiler-specific flags to enable certain classes of warnings. +NOWARN_CFLAGS := $(cc-wnouwo) +NOWARN_CFLAGS += $(call cc-nowarn,address-of-packed-member) +NOWARN_CFLAGS += $(call cc-nowarn,unused-label) +NOWARN_CFLAGS += $(call cc-nowarn,sign-compare) +NOWARN_CFLAGS += $(call cc-nowarn,unused-parameter) +NOWARN_CFLAGS += $(call cc-nowarn,clobbered) +NOWARN_CFLAGS += $(call cc-nowarn,missing-field-initializers) +NOWARN_CFLAGS += $(call cc-nowarn,cast-function-type) +NOWARN_CFLAGS += $(call cc-nowarn,string-plus-int) +NOWARN_CFLAGS += $(call cc-nowarn,atomic-alignment) + +#### CFLAGS defining error handling +# ERROR_CFLAGS are just accumulators for these variables, they're not meant +# to be exposed nor manipulated outside of this. They're not reported in +# VERBOSE_CFLAGS and don't cause a rebuild when changed. +ERROR_CFLAGS := +ifneq ($(ERR:0=),) + ERROR_CFLAGS += -Werror +endif + +ifneq ($(FAILFAST:0=),) + ERROR_CFLAGS += -Wfatal-errors endif -#### Memory usage tuning -# If small memory footprint is required, you can reduce the buffer size. There -# are 2 buffers per concurrent session, so 16 kB buffers will eat 32 MB memory -# with 1000 concurrent sessions. Putting it slightly lower than a page size -# will prevent the additional parameters to go beyond a page. 8030 bytes is -# exactly 5.5 TCP segments of 1460 bytes and is generally good. Useful tuning -# macros include : -# SYSTEM_MAXCONN, BUFSIZE, MAXREWRITE, REQURI_LEN, CAPTURE_LEN. -# Example: SMALL_OPTS = -DBUFSIZE=8030 -DMAXREWRITE=1030 -DSYSTEM_MAXCONN=1024 +#### No longer used SMALL_OPTS = +ifneq ($(SMALL_OPTS),) +$(warning Warning: SMALL_OPTS was forced to "$(SMALL_OPTS)" but is no longer \ + used and will be ignored. Please check if this setting are still relevant, \ + and move it either to DEFINE or to CFLAGS instead.) +endif #### Debug settings # You can enable debugging on specific code parts by setting DEBUG=-DDEBUG_xxx. @@ -228,7 +262,7 @@ SMALL_OPTS = # DEBUG_NO_POOLS, DEBUG_FAIL_ALLOC, DEBUG_STRICT_ACTION=[0-3], DEBUG_HPACK, # DEBUG_AUTH, DEBUG_SPOE, DEBUG_UAF, DEBUG_THREAD, DEBUG_STRICT, DEBUG_DEV, # DEBUG_TASK, DEBUG_MEMORY_POOLS, DEBUG_POOL_TRACING, DEBUG_QPACK, DEBUG_LIST. -DEBUG = -DDEBUG_STRICT -DDEBUG_MEMORY_POOLS +DEBUG = #### Trace options # Use TRACE=1 to trace function calls to file "trace.out" or to stderr if not @@ -258,44 +292,36 @@ SILENT_DEFINE = EXTRA = #### CPU dependent optimizations -# Some CFLAGS are set by default depending on the target CPU. Those flags only -# feed CPU_CFLAGS, which in turn feed CFLAGS, so it is not mandatory to use -# them. You should not have to change these options. Better use CPU_CFLAGS or -# even CFLAGS instead. -CPU_CFLAGS.generic = -O2 -CPU_CFLAGS.native = -O2 -march=native -CPU_CFLAGS.i586 = -O2 -march=i586 -CPU_CFLAGS.i686 = -O2 -march=i686 -CPU_CFLAGS.ultrasparc = -O6 -mcpu=v9 -mtune=ultrasparc -CPU_CFLAGS.power8 = -O2 -mcpu=power8 -mtune=power8 -CPU_CFLAGS.power9 = -O2 -mcpu=power9 -mtune=power9 -CPU_CFLAGS.a53 = -O2 -mcpu=cortex-a53 -CPU_CFLAGS.a72 = -O2 -mcpu=cortex-a72 -CPU_CFLAGS.armv81 = -O2 -march=armv8.1-a -CPU_CFLAGS.armv8-auto = -O2 -march=armv8-a+crc -moutline-atomics -CPU_CFLAGS = $(CPU_CFLAGS.$(CPU)) - -#### ARCH dependent flags, may be overridden by CPU flags -ARCH_FLAGS.32 = -m32 -ARCH_FLAGS.64 = -m64 -ARCH_FLAGS.i386 = -m32 -march=i386 -ARCH_FLAGS.i486 = -m32 -march=i486 -ARCH_FLAGS.i586 = -m32 -march=i586 -ARCH_FLAGS.i686 = -m32 -march=i686 -ARCH_FLAGS.x86_64 = -m64 -march=x86-64 -ARCH_FLAGS = $(ARCH_FLAGS.$(ARCH)) - -#### Common CFLAGS -# These CFLAGS contain general optimization options, CPU-specific optimizations -# and debug flags. They may be overridden by some distributions which prefer to -# set all of them at once instead of playing with the CPU and DEBUG variables. -CFLAGS = $(ARCH_FLAGS) $(CPU_CFLAGS) $(DEBUG_CFLAGS) $(SPEC_CFLAGS) - -#### Common LDFLAGS -# These LDFLAGS are used as the first "ld" options, regardless of any library -# path or any other option. They may be changed to add any linker-specific -# option at the beginning of the ld command line. -LDFLAGS = $(ARCH_FLAGS) -g +# This may optionally be used to pass CPU-specific optimizations such as +# -march=native, -mcpu=something, -m64 etc independently of CFLAGS if it is +# considered more convenient. Historically, the optimization level was also +# passed there. This is still supported but not recommended though; OPT_CFLAGS +# is better suited. The default is empty. +CPU_CFLAGS = + +#### Architecture dependent flags. +# These flags are passed both to the compiler and to the linker. A number of +# settings may need to be passed to both tools, among which some arch-specific +# options such as -m32 or -m64, some debugging options (-g), some profiling +# options (-pg), some options affecting how the linkage is done (-flto), as +# well as some code analysers such as -fsanitize=address. All of these make +# sense here and will be consistently propagated to both stages. By default +# only the debugging is enabled (-g). +ARCH_FLAGS = -g + +#### Extra CFLAGS +# These CFLAGS are empty by default and are appended at the end of all the +# flags passed to the compiler, so that it is possible to use them to force +# some optimization levels, architecture types and/or disable certain warnings. +# Just set CFLAGS to the desired ones on the "make" command line. +CFLAGS = + +#### Extra LDFLAGS +# These LDFLAGS are used as the first "ld" options just after ARCH_FLAGS, +# regardless of any library path or any other option. They may be used to add +# any linker-specific option at the beginning of the ld command line. It may be +# convenient to set a run time search path (-rpath), see INSTALL for more info. +LDFLAGS = #### list of all "USE_*" options. These ones must be updated if new options are # added, so that the relevant options are properly added to the CFLAGS and to @@ -322,6 +348,9 @@ use_opts = USE_EPOLL USE_KQUEUE USE_NETFILTER USE_POLL \ # preset all variables for all supported build options among use_opts $(reset_opts_vars) +# Check that any USE_* variable that was forced actually exist. +$(warn_unknown_options) + #### Target system options # poll() is always supported, unless explicitly disabled by passing USE_POLL="" @@ -330,7 +359,7 @@ USE_POLL = default # SLZ is always supported unless explicitly disabled by passing USE_SLZ="" # or disabled by enabling ZLIB using USE_ZLIB=1 -ifeq ($(USE_ZLIB),) +ifeq ($(USE_ZLIB:0=),) USE_SLZ = default endif @@ -351,7 +380,7 @@ ifeq ($(TARGET),linux-glibc) USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER \ USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY USE_LINUX_CAP \ USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO \ - USE_GETADDRINFO USE_BACKTRACE USE_SHM_OPEN) + USE_GETADDRINFO USE_BACKTRACE USE_SHM_OPEN USE_SYSTEMD) INSTALL = install -v endif @@ -431,7 +460,6 @@ ifeq ($(TARGET),aix51) set_target_defaults = $(call default_opts, \ USE_POLL USE_LIBCRYPT USE_OBSOLETE_LINKER) TARGET_CFLAGS = -Dss_family=__ss_family -Dip6_hdr=ip6hdr -DSTEVENS_API -D_LINUX_SOURCE_COMPAT -Dunsetenv=my_unsetenv - DEBUG_CFLAGS = endif # AIX 5.2 @@ -439,7 +467,6 @@ ifeq ($(TARGET),aix52) set_target_defaults = $(call default_opts, \ USE_POLL USE_LIBCRYPT USE_OBSOLETE_LINKER) TARGET_CFLAGS = -D_MSGQSUPPORT - DEBUG_CFLAGS = endif # AIX 7.2 and above @@ -467,8 +494,8 @@ $(set_target_defaults) # any occurrence of 1 indicates libatomic is necessary. It's better to avoid # linking with it by default as it's not always available nor deployed # (especially on archs which do not need it). -ifneq ($(USE_THREAD),) - ifneq ($(shell $(CC) $(CFLAGS) -dM -E -xc - </dev/null 2>/dev/null | grep -c 'LOCK_FREE.*1'),0) +ifneq ($(USE_THREAD:0=),) + ifneq ($(shell $(CC) $(OPT_CFLAGS) $(ARCH_FLAGS) $(CPU_CFLAGS) $(STD_CFLAGS) $(WARN_CFLAGS) $(NOWARN_CFLAGS) $(ERROR_CFLAGS) $(CFLAGS) -dM -E -xc - </dev/null 2>/dev/null | grep -c 'LOCK_FREE.*1'),0) USE_LIBATOMIC = implicit endif endif @@ -519,7 +546,7 @@ BUILD_OPTIONS := $(call build_options) # possibly be unused though) OPTIONS_CFLAGS += $(call opts_as_defines) -ifneq ($(USE_LIBCRYPT),) +ifneq ($(USE_LIBCRYPT:0=),) ifneq ($(TARGET),openbsd) ifneq ($(TARGET),osx) LIBCRYPT_LDFLAGS = -lcrypt @@ -527,45 +554,45 @@ ifneq ($(USE_LIBCRYPT),) endif endif -ifneq ($(USE_ZLIB),) +ifneq ($(USE_ZLIB:0=),) # Use ZLIB_INC and ZLIB_LIB to force path to zlib.h and libz.{a,so} if needed. ZLIB_CFLAGS = $(if $(ZLIB_INC),-I$(ZLIB_INC)) ZLIB_LDFLAGS = $(if $(ZLIB_LIB),-L$(ZLIB_LIB)) -lz endif -ifneq ($(USE_SLZ),) +ifneq ($(USE_SLZ:0=),) OPTIONS_OBJS += src/slz.o endif -ifneq ($(USE_POLL),) +ifneq ($(USE_POLL:0=),) OPTIONS_OBJS += src/ev_poll.o endif -ifneq ($(USE_EPOLL),) +ifneq ($(USE_EPOLL:0=),) OPTIONS_OBJS += src/ev_epoll.o endif -ifneq ($(USE_KQUEUE),) +ifneq ($(USE_KQUEUE:0=),) OPTIONS_OBJS += src/ev_kqueue.o endif -ifneq ($(USE_EVPORTS),) +ifneq ($(USE_EVPORTS:0=),) OPTIONS_OBJS += src/ev_evports.o endif -ifneq ($(USE_RT),) +ifneq ($(USE_RT:0=),) RT_LDFLAGS = -lrt endif -ifneq ($(USE_THREAD),) +ifneq ($(USE_THREAD:0=),) THREAD_LDFLAGS = -pthread endif -ifneq ($(USE_BACKTRACE),) +ifneq ($(USE_BACKTRACE:0=),) BACKTRACE_LDFLAGS = -Wl,$(if $(EXPORT_SYMBOL),$(EXPORT_SYMBOL),--export-dynamic) endif -ifneq ($(USE_CPU_AFFINITY),) +ifneq ($(USE_CPU_AFFINITY:0=),) OPTIONS_OBJS += src/cpuset.o endif @@ -577,32 +604,32 @@ endif # This is for the WolfSSL variant of the OpenSSL API. Setting it implies # OPENSSL so it's not necessary to set the latter. -ifneq ($(USE_OPENSSL_WOLFSSL),) +ifneq ($(USE_OPENSSL_WOLFSSL:0=),) SSL_CFLAGS := $(if $(SSL_INC),-I$(SSL_INC)/wolfssl -I$(SSL_INC)) SSL_LDFLAGS := $(if $(SSL_LIB),-L$(SSL_LIB)) -lwolfssl # always automatically set USE_OPENSSL - USE_OPENSSL := $(if $(USE_OPENSSL),$(USE_OPENSSL),implicit) + USE_OPENSSL := $(if $(USE_OPENSSL:0=),$(USE_OPENSSL:0=),implicit) endif # This is for the AWS-LC variant of the OpenSSL API. Setting it implies # OPENSSL so it's not necessary to set the latter. -ifneq ($(USE_OPENSSL_AWSLC),) +ifneq ($(USE_OPENSSL_AWSLC:0=),) # always automatically set USE_OPENSSL - USE_OPENSSL := $(if $(USE_OPENSSL),$(USE_OPENSSL),implicit) + USE_OPENSSL := $(if $(USE_OPENSSL:0=),$(USE_OPENSSL:0=),implicit) endif # This is for any variant of the OpenSSL API. By default it uses OpenSSL. -ifneq ($(USE_OPENSSL),) +ifneq ($(USE_OPENSSL:0=),) # only preset these for the regular openssl - ifeq ($(USE_OPENSSL_WOLFSSL),) + ifeq ($(USE_OPENSSL_WOLFSSL:0=),) SSL_CFLAGS := $(if $(SSL_INC),-I$(SSL_INC)) SSL_LDFLAGS := $(if $(SSL_LIB),-L$(SSL_LIB)) -lssl -lcrypto endif - USE_SSL := $(if $(USE_SSL),$(USE_SSL),implicit) - OPTIONS_OBJS += src/ssl_sock.o src/ssl_ckch.o src/ssl_sample.o src/ssl_crtlist.o src/cfgparse-ssl.o src/ssl_utils.o src/jwt.o src/ssl_ocsp.o + USE_SSL := $(if $(USE_SSL:0=),$(USE_SSL:0=),implicit) + OPTIONS_OBJS += src/ssl_sock.o src/ssl_ckch.o src/ssl_ocsp.o src/ssl_crtlist.o src/ssl_sample.o src/cfgparse-ssl.o src/ssl_gencert.o src/ssl_utils.o src/jwt.o endif -ifneq ($(USE_ENGINE),) +ifneq ($(USE_ENGINE:0=),) # OpenSSL 3.0 emits loud deprecation warnings by default when building with # engine support, and this option is made to silence them. Better use it # only when absolutely necessary, until there's a viable alternative to the @@ -610,25 +637,28 @@ ifneq ($(USE_ENGINE),) ENGINE_CFLAGS = -DOPENSSL_SUPPRESS_DEPRECATED endif -ifneq ($(USE_QUIC),) -OPTIONS_OBJS += src/quic_conn.o src/mux_quic.o src/h3.o src/xprt_quic.o \ - src/quic_frame.o src/quic_tls.o src/quic_tp.o \ - src/quic_stats.o src/quic_sock.o src/proto_quic.o \ - src/qmux_trace.o src/quic_loss.o src/qpack-enc.o \ - src/quic_cc_newreno.o src/quic_cc_cubic.o src/qpack-tbl.o \ - src/qpack-dec.o src/hq_interop.o src/quic_stream.o \ - src/h3_stats.o src/qmux_http.o src/cfgparse-quic.o \ - src/cbuf.o src/quic_cc.o src/quic_cc_nocc.o src/quic_ack.o \ - src/quic_trace.o src/quic_cli.o src/quic_ssl.o \ - src/quic_rx.o src/quic_tx.o src/quic_cid.o src/quic_retry.o\ - src/quic_retransmit.o +ifneq ($(USE_QUIC:0=),) + + +OPTIONS_OBJS += src/quic_rx.o src/mux_quic.o src/h3.o src/quic_tx.o \ + src/quic_conn.o src/quic_frame.o src/quic_sock.o \ + src/quic_ssl.o src/quic_tls.o src/proto_quic.o \ + src/quic_trace.o src/quic_cli.o src/quic_tp.o \ + src/quic_cid.o src/quic_retransmit.o src/quic_retry.o \ + src/quic_loss.o src/quic_cc_cubic.o src/quic_stream.o \ + src/xprt_quic.o src/quic_ack.o src/hq_interop.o \ + src/quic_cc_newreno.o src/qmux_http.o \ + src/quic_cc_nocc.o src/qpack-dec.o src/quic_cc.o \ + src/cfgparse-quic.o src/qmux_trace.o src/qpack-enc.o \ + src/qpack-tbl.o src/h3_stats.o src/quic_stats.o \ + src/quic_fctl.o src/cbuf.o endif -ifneq ($(USE_QUIC_OPENSSL_COMPAT),) +ifneq ($(USE_QUIC_OPENSSL_COMPAT:0=),) OPTIONS_OBJS += src/quic_openssl_compat.o endif -ifneq ($(USE_LUA),) +ifneq ($(USE_LUA:0=),) check_lua_inc = $(shell if [ -d $(2)$(1) ]; then echo $(2)$(1); fi;) LUA_INC := $(firstword $(foreach lib,lua5.4 lua54 lua5.3 lua53 lua,$(call check_lua_inc,$(lib),"/usr/include/"))) @@ -661,34 +691,20 @@ ifneq ($(USE_LUA),) OPTIONS_OBJS += src/hlua.o src/hlua_fcn.o endif # USE_LUA -ifneq ($(USE_PROMEX),) +ifneq ($(USE_PROMEX:0=),) OPTIONS_OBJS += addons/promex/service-prometheus.o + PROMEX_CFLAGS = -Iaddons/promex/include endif -ifneq ($(USE_DEVICEATLAS),) +ifneq ($(USE_DEVICEATLAS:0=),) # Use DEVICEATLAS_SRC and possibly DEVICEATLAS_INC and DEVICEATLAS_LIB to force path - # to DeviceAtlas headers and libraries if needed. + # to DeviceAtlas headers and libraries if needed. In this context, DEVICEATLAS_NOCACHE + # can be used to disable the cache support if needed (this also removes the necessity of having + # a C++ toolchain installed). DEVICEATLAS_INC = $(DEVICEATLAS_SRC) DEVICEATLAS_LIB = $(DEVICEATLAS_SRC) - ifeq ($(DEVICEATLAS_SRC),) - DEVICEATLAS_LDFLAGS += -lda - else - ifeq ($(USE_PCRE),) - ifeq ($(USE_PCRE2),) - $(error the DeviceAtlas module needs the PCRE or the PCRE2 library in order to compile) - endif - endif - ifneq ($(USE_PCRE2),) - DEVICEATLAS_CFLAGS += -DDA_REGEX_HDR=\"dac_pcre2.c\" -DDA_REGEX_TAG=2 - endif - OPTIONS_OBJS += $(DEVICEATLAS_LIB)/Os/daunix.o - OPTIONS_OBJS += $(DEVICEATLAS_LIB)/dadwcom.o - OPTIONS_OBJS += $(DEVICEATLAS_LIB)/dasch.o - OPTIONS_OBJS += $(DEVICEATLAS_LIB)/json.o - OPTIONS_OBJS += $(DEVICEATLAS_LIB)/dac.o - endif + include addons/deviceatlas/Makefile.inc OPTIONS_OBJS += addons/deviceatlas/da.o - DEVICEATLAS_CFLAGS += $(if $(DEVICEATLAS_INC),-I$(DEVICEATLAS_INC)) $(if $(DEVICEATLAS_SRC),-DDATLAS_DA_NOCACHE) endif # Use 51DEGREES_SRC and possibly 51DEGREES_INC and 51DEGREES_LIB to force path @@ -700,12 +716,12 @@ endif 51DEGREES_LIB = $(51DEGREES_SRC) 51DEGREES_VER = 3 -ifneq ($(USE_51DEGREES),) +ifneq ($(USE_51DEGREES:0=),) ifeq ($(51DEGREES_VER),4) # v4 here _51DEGREES_SRC = $(shell find $(51DEGREES_LIB) -maxdepth 2 -name '*.c') OPTIONS_OBJS += $(_51DEGREES_SRC:%.c=%.o) 51DEGREES_CFLAGS += -DUSE_51DEGREES_V4 - ifeq ($(USE_THREAD),) + ifeq ($(USE_THREAD:0=),) 51DEGREES_CFLAGS += -DFIFTYONEDEGREES_NO_THREADING -DFIFTYONE_DEGREES_NO_THREADING endif USE_LIBATOMIC = implicit @@ -714,7 +730,7 @@ ifneq ($(USE_51DEGREES),) ifeq ($(51DEGREES_VER),3) # v3 here OPTIONS_OBJS += $(51DEGREES_LIB)/../cityhash/city.o OPTIONS_OBJS += $(51DEGREES_LIB)/51Degrees.o - ifeq ($(USE_THREAD),) + ifeq ($(USE_THREAD:0=),) 51DEGREES_CFLAGS += -DFIFTYONEDEGREES_NO_THREADING else OPTIONS_OBJS += $(51DEGREES_LIB)/../threading.o @@ -731,7 +747,7 @@ ifneq ($(USE_51DEGREES),) USE_MATH = implicit endif # USE_51DEGREES -ifneq ($(USE_WURFL),) +ifneq ($(USE_WURFL:0=),) # Use WURFL_SRC and possibly WURFL_INC and WURFL_LIB to force path # to WURFL headers and libraries if needed. WURFL_INC = $(WURFL_SRC) @@ -747,12 +763,12 @@ ifneq ($(USE_WURFL),) WURFL_LDFLAGS = $(if $(WURFL_LIB),-L$(WURFL_LIB)) -lwurfl endif -ifneq ($(USE_SYSTEMD),) - SYSTEMD_LDFLAGS = -lsystemd +ifneq ($(USE_SYSTEMD:0=),) + OPTIONS_OBJS += src/systemd.o endif -ifneq ($(USE_PCRE)$(USE_STATIC_PCRE)$(USE_PCRE_JIT),) - ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),) +ifneq ($(USE_PCRE:0=)$(USE_STATIC_PCRE:0=)$(USE_PCRE_JIT:0=),) + ifneq ($(USE_PCRE2:0=)$(USE_STATIC_PCRE2:0=)$(USE_PCRE2_JIT:0=),) $(error cannot compile both PCRE and PCRE2 support) endif # PCREDIR is used to automatically construct the PCRE_INC and PCRE_LIB paths, @@ -763,7 +779,7 @@ ifneq ($(USE_PCRE)$(USE_STATIC_PCRE)$(USE_PCRE_JIT),) # locations. # in case only USE_STATIC_PCRE/USE_PCRE_JIT were set - USE_PCRE := $(if $(USE_PCRE),$(USE_PCRE),implicit) + USE_PCRE := $(if $(USE_PCRE:0=),$(USE_PCRE:0=),implicit) PCRE_CONFIG := pcre-config PCREDIR := $(shell $(PCRE_CONFIG) --prefix 2>/dev/null || echo /usr/local) ifneq ($(PCREDIR),) @@ -772,16 +788,16 @@ ifneq ($(USE_PCRE)$(USE_STATIC_PCRE)$(USE_PCRE_JIT),) endif PCRE_CFLAGS := $(if $(PCRE_INC),-I$(PCRE_INC)) - ifeq ($(USE_STATIC_PCRE),) + ifeq ($(USE_STATIC_PCRE:0=),) PCRE_LDFLAGS := $(if $(PCRE_LIB),-L$(PCRE_LIB)) -lpcreposix -lpcre else PCRE_LDFLAGS := $(if $(PCRE_LIB),-L$(PCRE_LIB)) -Wl,-Bstatic -lpcreposix -lpcre -Wl,-Bdynamic endif endif # USE_PCRE -ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),) +ifneq ($(USE_PCRE2:0=)$(USE_STATIC_PCRE2:0=)$(USE_PCRE2_JIT:0=),) # in case only USE_STATIC_PCRE2/USE_PCRE2_JIT were set - USE_PCRE2 := $(if $(USE_PCRE2),$(USE_PCRE2),implicit) + USE_PCRE2 := $(if $(USE_PCRE2:0=),$(USE_PCRE2:0=),implicit) PCRE2_CONFIG := pcre2-config PCRE2DIR := $(shell $(PCRE2_CONFIG) --prefix 2>/dev/null || echo /usr/local) ifneq ($(PCRE2DIR),) @@ -811,7 +827,7 @@ ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),) endif endif - ifneq ($(USE_STATIC_PCRE2),) + ifneq ($(USE_STATIC_PCRE2:0=),) PCRE2_LDFLAGS := $(if $(PCRE2_LIB),-L$(PCRE2_LIB)) -Wl,-Bstatic -L$(PCRE2_LIB) $(PCRE2_LDFLAGS) -Wl,-Bdynamic else PCRE2_LDFLAGS := $(if $(PCRE2_LIB),-L$(PCRE2_LIB)) -L$(PCRE2_LIB) $(PCRE2_LDFLAGS) @@ -819,28 +835,28 @@ ifneq ($(USE_PCRE2)$(USE_STATIC_PCRE2)$(USE_PCRE2_JIT),) endif # PCRE2DIR endif # USE_PCRE2 -ifneq ($(USE_NS),) +ifneq ($(USE_NS:0=),) OPTIONS_OBJS += src/namespace.o endif -ifneq ($(USE_LINUX_CAP),) +ifneq ($(USE_LINUX_CAP:0=),) OPTIONS_OBJS += src/linuxcap.o endif -ifneq ($(USE_OT),) +ifneq ($(USE_OT:0=),) include addons/ot/Makefile endif # better keep this one close to the end, as several libs above may need it -ifneq ($(USE_DL),) +ifneq ($(USE_DL:0=),) DL_LDFLAGS = -ldl endif -ifneq ($(USE_MATH),) +ifneq ($(USE_MATH:0=),) MATH_LDFLAGS = -lm endif -ifneq ($(USE_LIBATOMIC),) +ifneq ($(USE_LIBATOMIC:0=),) LIBATOMIC_LDFLAGS = -latomic endif @@ -851,11 +867,11 @@ endif $(collect_opts_flags) #### Global compile options -VERBOSE_CFLAGS = $(CFLAGS) $(TARGET_CFLAGS) $(SMALL_OPTS) $(DEFINE) +VERBOSE_CFLAGS = $(OPT_CFLAGS) $(ARCH_FLAGS) $(CPU_CFLAGS) $(STD_CFLAGS) $(TARGET_CFLAGS) $(CFLAGS) $(DEFINE) COPTS = -Iinclude -COPTS += $(CFLAGS) $(TARGET_CFLAGS) $(SMALL_OPTS) $(DEFINE) $(SILENT_DEFINE) -COPTS += $(DEBUG) $(OPTIONS_CFLAGS) $(ADDINC) +COPTS += $(OPT_CFLAGS) $(ARCH_FLAGS) $(CPU_CFLAGS) $(STD_CFLAGS) $(WARN_CFLAGS) $(NOWARN_CFLAGS) $(ERROR_CFLAGS) $(TARGET_CFLAGS) $(DEFINE) $(SILENT_DEFINE) +COPTS += $(DEBUG) $(OPTIONS_CFLAGS) $(CFLAGS) $(ADDINC) ifneq ($(VERSION)$(SUBVERS)$(EXTRAVERSION),) COPTS += -DCONFIG_HAPROXY_VERSION=\"$(VERSION)$(SUBVERS)$(EXTRAVERSION)\" @@ -930,38 +946,43 @@ ifneq ($(EXTRA_OBJS),) OBJS += $(EXTRA_OBJS) endif -OBJS += src/mux_h2.o src/mux_fcgi.o src/mux_h1.o src/tcpcheck.o \ - src/stream.o src/stats.o src/http_ana.o src/server.o \ - src/stick_table.o src/sample.o src/flt_spoe.o src/tools.o \ - src/log.o src/cfgparse.o src/peers.o src/backend.o src/resolvers.o \ - src/cli.o src/connection.o src/proxy.o src/http_htx.o \ - src/cfgparse-listen.o src/pattern.o src/check.o src/haproxy.o \ - src/cache.o src/stconn.o src/http_act.o src/http_fetch.o \ - src/http_client.o src/listener.o src/dns.o src/vars.o src/debug.o \ - src/tcp_rules.o src/sink.o src/h1_htx.o src/task.o src/mjson.o \ - src/h2.o src/filters.o src/server_state.o src/payload.o \ - src/fcgi-app.o src/map.o src/htx.o src/h1.o src/pool.o \ - src/cfgparse-global.o src/trace.o src/tcp_sample.o src/http_ext.o \ - src/flt_http_comp.o src/mux_pt.o src/flt_trace.o src/mqtt.o \ - src/acl.o src/sock.o src/mworker.o src/tcp_act.o src/ring.o \ - src/session.o src/proto_tcp.o src/fd.o src/channel.o src/activity.o \ - src/queue.o src/lb_fas.o src/http_rules.o src/extcheck.o \ - src/flt_bwlim.o src/thread.o src/http.o src/lb_chash.o src/applet.o \ - src/compression.o src/raw_sock.o src/ncbuf.o src/frontend.o \ - src/errors.o src/uri_normalizer.o src/http_conv.o src/lb_fwrr.o \ - src/sha1.o src/proto_sockpair.o src/mailers.o src/lb_fwlc.o \ - src/ebmbtree.o src/cfgcond.o src/action.o src/xprt_handshake.o \ - src/protocol.o src/proto_uxst.o src/proto_udp.o src/lb_map.o \ - src/fix.o src/ev_select.o src/arg.o src/sock_inet.o src/event_hdl.o \ - src/mworker-prog.o src/hpack-dec.o src/cfgparse-tcp.o \ - src/sock_unix.o src/shctx.o src/proto_uxdg.o src/fcgi.o \ - src/eb64tree.o src/clock.o src/chunk.o src/cfgdiag.o src/signal.o \ - src/regex.o src/lru.o src/eb32tree.o src/eb32sctree.o \ - src/cfgparse-unix.o src/hpack-tbl.o src/ebsttree.o src/ebimtree.o \ - src/base64.o src/auth.o src/uri_auth.o src/time.o src/ebistree.o \ - src/dynbuf.o src/wdt.o src/pipe.o src/init.o src/http_acl.o \ - src/hpack-huff.o src/hpack-enc.o src/dict.o src/freq_ctr.o \ - src/ebtree.o src/hash.o src/dgram.o src/version.o src/proto_rhttp.o +OBJS += src/mux_h2.o src/mux_h1.o src/mux_fcgi.o src/stream.o \ + src/log.o src/server.o src/tcpcheck.o src/http_ana.o \ + src/stick_table.o src/tools.o src/sample.o src/flt_spoe.o \ + src/cfgparse.o src/peers.o src/cli.o src/resolvers.o \ + src/connection.o src/backend.o src/cache.o src/http_htx.o \ + src/proxy.o src/stconn.o src/check.o src/haproxy.o \ + src/stats-html.o src/listener.o src/pattern.o src/debug.o \ + src/cfgparse-listen.o src/http_client.o src/activity.o \ + src/applet.o src/http_act.o src/http_fetch.o src/http_ext.o \ + src/dns.o src/vars.o src/tcp_rules.o src/pool.o src/stats.o \ + src/stats-proxy.o src/sink.o src/filters.o src/mux_pt.o \ + src/event_hdl.o src/server_state.o src/h1_htx.o src/h1.o \ + src/flt_http_comp.o src/task.o src/payload.o src/fcgi-app.o \ + src/map.o src/trace.o src/tcp_sample.o src/tcp_act.o \ + src/session.o src/htx.o src/cfgparse-global.o src/mjson.o \ + src/h2.o src/ring.o src/fd.o src/sock.o src/mworker.o \ + src/flt_trace.o src/thread.o src/proto_rhttp.o src/acl.o \ + src/http.o src/flt_bwlim.o src/channel.o src/queue.o \ + src/mqtt.o src/proto_tcp.o src/lb_chash.o src/http_rules.o \ + src/errors.o src/extcheck.o src/dns_ring.o src/stats-json.o \ + src/http_conv.o src/frontend.o src/proto_sockpair.o \ + src/compression.o src/ncbuf.o src/stats-file.o src/raw_sock.o \ + src/lb_fwrr.o src/action.o src/uri_normalizer.o \ + src/proto_uxst.o src/ebmbtree.o src/xprt_handshake.o \ + src/protocol.o src/proto_udp.o src/lb_fwlc.o src/sha1.o \ + src/proto_uxdg.o src/mailers.o src/lb_fas.o src/cfgcond.o \ + src/cfgdiag.o src/sock_unix.o src/sock_inet.o \ + src/mworker-prog.o src/lb_map.o src/ev_select.o src/shctx.o \ + src/hpack-dec.o src/fix.o src/clock.o src/cfgparse-tcp.o \ + src/arg.o src/signal.o src/fcgi.o src/dynbuf.o src/regex.o \ + src/lru.o src/lb_ss.o src/eb64tree.o src/chunk.o \ + src/cfgparse-unix.o src/guid.o src/ebimtree.o src/eb32tree.o \ + src/eb32sctree.o src/base64.o src/uri_auth.o src/time.o \ + src/hpack-tbl.o src/ebsttree.o src/ebistree.o src/auth.o \ + src/hpack-huff.o src/freq_ctr.o src/dict.o src/wdt.o \ + src/pipe.o src/init.o src/http_acl.o src/hpack-enc.o \ + src/ebtree.o src/dgram.o src/hash.o src/version.o ifneq ($(TRACE),) OBJS += src/calltrace.o @@ -1001,7 +1022,7 @@ IGNORE_OPTS=help install install-man install-doc install-bin \ ifneq ($(TARGET),) ifeq ($(filter $(firstword $(MAKECMDGOALS)),$(IGNORE_OPTS)),) -build_opts = $(shell rm -f .build_opts.new; echo \'$(TARGET) $(BUILD_OPTIONS) $(VERBOSE_CFLAGS) $(DEBUG)\' > .build_opts.new; if cmp -s .build_opts .build_opts.new; then rm -f .build_opts.new; else mv -f .build_opts.new .build_opts; fi) +build_opts = $(shell rm -f .build_opts.new; echo \'$(TARGET) $(BUILD_OPTIONS) $(VERBOSE_CFLAGS) $(WARN_CFLAGS) $(NOWARN_CFLAGS) $(DEBUG)\' > .build_opts.new; if cmp -s .build_opts .build_opts.new; then rm -f .build_opts.new; else mv -f .build_opts.new .build_opts; fi) .build_opts: $(build_opts) else .build_opts: @@ -1011,7 +1032,7 @@ else endif # non-empty target haproxy: $(OPTIONS_OBJS) $(OBJS) - $(cmd_LD) $(LDFLAGS) -o $@ $^ $(LDOPTS) + $(cmd_LD) $(ARCH_FLAGS) $(LDFLAGS) -o $@ $^ $(LDOPTS) objsize: haproxy $(Q)objdump -t $^|grep ' g '|grep -F '.text'|awk '{print $$5 FS $$6}'|sort @@ -1020,31 +1041,31 @@ objsize: haproxy $(cmd_CC) $(COPTS) -c -o $@ $< admin/halog/halog: admin/halog/halog.o admin/halog/fgets2.o src/ebtree.o src/eb32tree.o src/eb64tree.o src/ebmbtree.o src/ebsttree.o src/ebistree.o src/ebimtree.o - $(cmd_LD) $(LDFLAGS) -o $@ $^ $(LDOPTS) + $(cmd_LD) $(ARCH_FLAGS) $(LDFLAGS) -o $@ $^ $(LDOPTS) admin/dyncookie/dyncookie: admin/dyncookie/dyncookie.o - $(cmd_LD) $(LDFLAGS) -o $@ $^ $(LDOPTS) + $(cmd_LD) $(ARCH_FLAGS) $(LDFLAGS) -o $@ $^ $(LDOPTS) dev/flags/flags: dev/flags/flags.o - $(cmd_LD) $(LDFLAGS) -o $@ $^ $(LDOPTS) + $(cmd_LD) $(ARCH_FLAGS) $(LDFLAGS) -o $@ $^ $(LDOPTS) dev/haring/haring: dev/haring/haring.o - $(cmd_LD) $(LDFLAGS) -o $@ $^ $(LDOPTS) + $(cmd_LD) $(ARCH_FLAGS) $(LDFLAGS) -o $@ $^ $(LDOPTS) dev/hpack/%: dev/hpack/%.o - $(cmd_LD) $(LDFLAGS) -o $@ $^ $(LDOPTS) + $(cmd_LD) $(ARCH_FLAGS) $(LDFLAGS) -o $@ $^ $(LDOPTS) dev/poll/poll: $(cmd_MAKE) -C dev/poll poll CC='$(CC)' OPTIMIZE='$(COPTS)' V='$(V)' dev/qpack/decode: dev/qpack/decode.o - $(cmd_LD) $(LDFLAGS) -o $@ $^ $(LDOPTS) + $(cmd_LD) $(ARCH_FLAGS) $(LDFLAGS) -o $@ $^ $(LDOPTS) dev/tcploop/tcploop: $(cmd_MAKE) -C dev/tcploop tcploop CC='$(CC)' OPTIMIZE='$(COPTS)' V='$(V)' dev/udp/udp-perturb: dev/udp/udp-perturb.o - $(cmd_LD) $(LDFLAGS) -o $@ $^ $(LDOPTS) + $(cmd_LD) $(ARCH_FLAGS) $(LDFLAGS) -o $@ $^ $(LDOPTS) # rebuild it every time .PHONY: src/version.c dev/poll/poll dev/tcploop/tcploop @@ -1055,8 +1076,6 @@ src/calltrace.o: src/calltrace.c $(DEP) src/haproxy.o: src/haproxy.c $(DEP) $(cmd_CC) $(COPTS) \ -DBUILD_TARGET='"$(strip $(TARGET))"' \ - -DBUILD_ARCH='"$(strip $(ARCH))"' \ - -DBUILD_CPU='"$(strip $(CPU))"' \ -DBUILD_CC='"$(strip $(CC))"' \ -DBUILD_CFLAGS='"$(strip $(VERBOSE_CFLAGS))"' \ -DBUILD_OPTIONS='"$(strip $(BUILD_OPTIONS))"' \ @@ -1156,27 +1175,30 @@ update-version: # settings are also listed if they're explicitly set on the command line, or if # they are not empty. Implicit "USE_*" are not listed. opts: - @echo -n 'Using: ' - @echo -n 'TARGET="$(strip $(TARGET))" ' - @echo -n 'ARCH="$(strip $(ARCH))" ' - @echo -n 'CPU="$(strip $(CPU))" ' - @echo -n 'CC="$(strip $(CC))" ' - @echo -n 'ARCH_FLAGS="$(strip $(ARCH_FLAGS))" ' - @echo -n 'CPU_CFLAGS="$(strip $(CPU_CFLAGS))" ' - @echo -n 'DEBUG_CFLAGS="$(strip $(DEBUG_CFLAGS))" ' - @#echo "$(strip $(BUILD_OPTIONS))" + @echo 'Using the following variables (copy-pastable as make arguments):' + @echo ' TARGET="$(strip $(TARGET))" '\\ + @echo ' ARCH="$(strip $(ARCH))" '\\ + @echo ' CC="$(strip $(CC))" '\\ + @echo ' OPT_CFLAGS="$(strip $(OPT_CFLAGS))" '\\ + @echo ' ARCH_FLAGS="$(strip $(ARCH_FLAGS))" '\\ + @echo ' CPU_CFLAGS="$(strip $(CPU_CFLAGS))" '\\ + @echo ' STD_CFLAGS="$(strip $(STD_CFLAGS))" '\\ + @echo ' WARN_CFLAGS="$(strip $(WARN_CFLAGS))" '\\ + @echo ' NOWARN_CFLAGS="$(strip $(NOWARN_CFLAGS))" '\\ + @echo ' ERROR_CFLAGS="$(strip $(ERROR_CFLAGS))" '\\ + @echo ' CFLAGS="$(strip $(CFLAGS))" '\\ @$(foreach opt,$(enabled_opts),\ $(if $(subst command line,,$(origin USE_$(opt))),,\ - echo -n 'USE_$(opt)=$(USE_$(opt)) ';) \ + echo ' USE_$(opt)=$(USE_$(opt:0=)) '\\;) \ $(if $(subst command line,,$(origin $(opt)_CFLAGS)),\ - $(if $($(opt)_CFLAGS),echo -n '$(opt)_CFLAGS="$($(opt)_CFLAGS)" ';),\ - echo -n '$(opt)_CFLAGS="$($(opt)_CFLAGS)" ';) \ + $(if $($(opt)_CFLAGS),echo ' $(opt)_CFLAGS="$($(opt)_CFLAGS)" '\\;),\ + echo ' $(opt)_CFLAGS="$($(opt)_CFLAGS)" '\\;) \ $(if $(subst command line,,$(origin $(opt)_LDFLAGS)),\ - $(if $($(opt)_LDFLAGS),echo -n '$(opt)_LDFLAGS="$($(opt)_LDFLAGS)" ';),\ - echo -n '$(opt)_LDFLAGS="$($(opt)_LDFLAGS)" ';)) + $(if $($(opt)_LDFLAGS),echo ' $(opt)_LDFLAGS="$($(opt)_LDFLAGS)" '\\;),\ + echo ' $(opt)_LDFLAGS="$($(opt)_LDFLAGS)" '\\;)) + @echo ' LDFLAGS="$(strip $(LDFLAGS))"' @echo @echo 'COPTS="$(strip $(COPTS))"' - @echo 'LDFLAGS="$(strip $(LDFLAGS))"' @echo 'LDOPTS="$(strip $(LDOPTS))"' @echo 'OPTIONS_OBJS="$(strip $(OPTIONS_OBJS))"' @echo 'OBJS="$(strip $(OBJS))"' @@ -1,2 +1,2 @@ --5742051 +-5590ada @@ -1,2 +1,2 @@ -2024-04-05 20:18:55 +0200 -2024/04/05 +2024-05-29 14:43:38 +0200 +2024/05/29 @@ -1 +1 @@ -2.9.7 +3.0.0 diff --git a/addons/deviceatlas/Makefile b/addons/deviceatlas/Makefile deleted file mode 100644 index fbcffca..0000000 --- a/addons/deviceatlas/Makefile +++ /dev/null @@ -1,48 +0,0 @@ -# DEVICEATLAS_SRC : DeviceAtlas API source root path - - -OS := $(shell uname -s) -OBJS := dadwsch.o -CFLAGS := -g -O2 -LDFLAGS := - -CURL_CONFIG := curl-config -CURLDIR := $(shell $(CURL_CONFIG) --prefix 2>/dev/null || echo /usr/local) -CURL_INC := $(CURLDIR)/include -CURL_LIB := $(CURLDIR)/lib -CURL_LDFLAGS := $(shell $(CURL_CONFIG) --libs 2>/dev/null || echo -L /usr/local/lib -lcurl) - -PCRE2_CONFIG := pcre2-config -PCRE2DIR := $(shell $(PCRE2_CONFIG) --prefix 2>/dev/null || echo /usr/local) -PCRE2_INC := $(PCRE2DIR)/include -PCRE2_LIB := $(PCRE2DIR)/lib -PCRE2_LDFLAGS := $(shell $(PCRE2_CONFIG) --libs8 2>/dev/null || echo /usr/local) - -ifeq ($(DEVICEATLAS_SRC),) -dadwsch: dadwsch.c - $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) - -LDFLAGS += -lda -else -DEVICEATLAS_INC = $(DEVICEATLAS_SRC) -DEVICEATLAS_LIB = $(DEVICEATLAS_SRC) -CFLAGS += -DDA_REGEX_HDR=\"dac_pcre2.c\" -DDA_REGEX_TAG=2 -CFLAGS += -DMOBI_CURL -DMOBI_CURLSSET -DMOBI_GZ -DMOBI_ZIP -CFLAGS += -I$(DEVICEATLAS_INC) -I$(CURL_INC) -I$(PCRE2DIR) -LDFLAGS += $(CURL_LDFLAGS) $(PCRE2_LDFLAGS) -lz -lzip -lpthread - -dadwsch: dadwsch.c $(DEVICEATLAS_SRC)/dac.c $(DEVICEATLAS_SRC)/dasch.c $(DEVICEATLAS_SRC)/dadwarc.c $(DEVICEATLAS_SRC)/dadwcom.c $(DEVICEATLAS_SRC)/dadwcurl.c $(DEVICEATLAS_SRC)/json.c $(DEVICEATLAS_SRC)/Os/daunix.c - $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) -endif - -ifeq ($(OS), Linux) -LDFLAGS += -lrt -endif -ifeq ($(OS), SunOS) -LDFLAGS += -lrt -endif - -clean: - rm -f *.o - rm -f $(DEVICEATLAS_LIB)*.o - rm -f dadwsch diff --git a/addons/deviceatlas/Makefile.inc b/addons/deviceatlas/Makefile.inc new file mode 100644 index 0000000..07d950a --- /dev/null +++ b/addons/deviceatlas/Makefile.inc @@ -0,0 +1,31 @@ +# DEVICEATLAS_SRC : DeviceAtlas API source root path + + +CXX := c++ +CXXLIB := -lstdc++ + +ifeq ($(DEVICEATLAS_SRC),) +OPTIONS_LDFLAGS += -lda +else +DEVICEATLAS_INC = $(DEVICEATLAS_SRC) +DEVICEATLAS_LIB = $(DEVICEATLAS_SRC) +OPTIONS_LDFLAGS += -lpthread +OPTIONS_CFLAGS += -I$(DEVICEATLAS_INC) +ifeq ($(DEVICEATLAS_NOCACHE),) +CXXFLAGS := $(OPTIONS_CFLAGS) -std=gnu++11 +OPTIONS_OBJS += $(DEVICEATLAS_SRC)/dacache.o +OPTIONS_LDFLAGS += $(CXXLIB) +else +OPTIONS_CFLAGS += -DAPINOCACHE +endif +OPTIONS_OBJS += $(DEVICEATLAS_SRC)/dac.o +OPTIONS_OBJS += $(DEVICEATLAS_SRC)/json.o +OPTIONS_OBJS += $(DEVICEATLAS_SRC)/dasch.o +OPTIONS_OBJS += $(DEVICEATLAS_SRC)/dadwarc.o +OPTIONS_OBJS += $(DEVICEATLAS_SRC)/dadwcom.o +OPTIONS_OBJS += $(DEVICEATLAS_SRC)/dadwcurl.o +OPTIONS_OBJS += $(DEVICEATLAS_SRC)/Os/daunix.o +endif + +addons/deviceatlas/dummy/%.o: addons/deviceatlas/dummy/%.cpp + $(cmd_CXX) $(CXXFLAGS) -c -o $@ $< diff --git a/addons/deviceatlas/da.c b/addons/deviceatlas/da.c index 969dfaa..417fbf2 100644 --- a/addons/deviceatlas/da.c +++ b/addons/deviceatlas/da.c @@ -18,7 +18,7 @@ #include <dac.h> #define ATLASTOKSZ PATH_MAX -#define ATLASMAPNM "/hapdeviceatlas" +#define ATLASMAPNM "/da_map_sch_data" static struct { void *atlasimgptr; @@ -26,6 +26,7 @@ static struct { char *jsonpath; char *cookiename; size_t cookienamelen; + size_t cachesize; int atlasfd; da_atlas_t atlas; da_evidence_id_t useragentid; @@ -37,6 +38,7 @@ static struct { .jsonpath = 0, .cookiename = 0, .cookienamelen = 0, + .cachesize = 0, .atlasmap = NULL, .atlasfd = -1, .useragentid = 0, @@ -104,6 +106,29 @@ static int da_properties_cookie(char **args, int section_type, struct proxy *cur return 0; } +static int da_cache_size(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + int cachesize; + if (*(args[1]) == 0) { + memprintf(err, "deviceatlas cache size : expects an integer argument.\n"); + return -1; + } + + cachesize = atol(args[1]); + if (cachesize < 0 || cachesize > DA_CACHE_MAX) { + memprintf(err, "deviceatlas cache size : expects a cache size between 0 and %d, %s given.\n", DA_CACHE_MAX, args[1]); + } else { +#ifdef APINOCACHE + fprintf(stdout, "deviceatlas cache size : no-op, its support is disabled.\n"); +#endif + global_deviceatlas.cachesize = (size_t)cachesize; + } + + return 0; +} + static size_t da_haproxy_read(void *ctx, size_t len, char *buf) { return fread(buf, 1, len, ctx); @@ -168,6 +193,8 @@ static int init_deviceatlas(void) goto out; } + global_deviceatlas.atlas.config.cache_size = global_deviceatlas.cachesize; + if (global_deviceatlas.cookiename == 0) { global_deviceatlas.cookiename = strdup(DA_COOKIENAME_DEFAULT); global_deviceatlas.cookienamelen = strlen(global_deviceatlas.cookiename); @@ -222,48 +249,57 @@ static void da_haproxy_checkinst(void) base = (char *)global_deviceatlas.atlasmap; if (base[0] != 0) { - void *cnew; - size_t atlassz; - char atlasp[ATLASTOKSZ] = {0}; - da_atlas_t inst; - da_property_decl_t extraprops[1] = {{NULL, 0}}; + FILE *jsonp; + void *cnew; + da_status_t status; + size_t atlassz; + char atlasp[ATLASTOKSZ] = {0}; + da_atlas_t inst; + da_property_decl_t extraprops[1] = {{NULL, 0}}; #ifdef USE_THREAD - HA_SPIN_LOCK(OTHER_LOCK, &dadwsch_lock); + HA_SPIN_LOCK(OTHER_LOCK, &dadwsch_lock); #endif - strlcpy2(atlasp, base, sizeof(atlasp)); - if (da_atlas_read_mapped(atlasp, NULL, &cnew, &atlassz) == DA_OK) { - if (da_atlas_open(&inst, extraprops, cnew, atlassz) == DA_OK) { - char jsonbuf[26]; - time_t jsond; - - da_atlas_close(&global_deviceatlas.atlas); - free(global_deviceatlas.atlasimgptr); - global_deviceatlas.atlasimgptr = cnew; - global_deviceatlas.atlas = inst; - memset(base, 0, ATLASTOKSZ); - jsond = da_getdatacreation(&global_deviceatlas.atlas); - ctime_r(&jsond, jsonbuf); - jsonbuf[24] = 0; - printf("deviceatlas: new instance, data file date `%s`.\n", jsonbuf); - } else { - ha_warning("deviceatlas: instance update failed.\n"); - memset(base, 0, ATLASTOKSZ); - free(cnew); - } - } + strlcpy2(atlasp, base + sizeof(char), sizeof(atlasp)); + jsonp = fopen(atlasp, "r"); + if (jsonp == 0) { + ha_alert("deviceatlas : '%s' json file has invalid path or is not readable.\n", + atlasp); #ifdef USE_THREAD - HA_SPIN_UNLOCK(OTHER_LOCK, &dadwsch_lock); + HA_SPIN_UNLOCK(OTHER_LOCK, &dadwsch_lock); #endif - } - } + return; + } + + status = da_atlas_compile(jsonp, da_haproxy_read, da_haproxy_seek, + &cnew, &atlassz); + fclose(jsonp); + if (status == DA_OK) { + if (da_atlas_open(&inst, extraprops, cnew, atlassz) == DA_OK) { + da_atlas_close(&global_deviceatlas.atlas); + free(global_deviceatlas.atlasimgptr); + global_deviceatlas.atlasimgptr = cnew; + global_deviceatlas.atlas = inst; + base[0] = 0; + ha_notice("deviceatlas : new instance, data file date `%s`.\n", + da_getdatacreationiso8601(&global_deviceatlas.atlas)); + } else { + ha_alert("deviceatlas : instance update failed.\n"); + free(cnew); + } + } +#ifdef USE_THREAD + HA_SPIN_UNLOCK(OTHER_LOCK, &dadwsch_lock); +#endif + } + } } static int da_haproxy(const struct arg *args, struct sample *smp, da_deviceinfo_t *devinfo) { - struct buffer *tmp; - da_propid_t prop, *pprop; - da_status_t status; - da_type_t proptype; + struct buffer *tmp; + da_propid_t prop, *pprop; + da_status_t status; + da_type_t proptype; const char *propname; int i; @@ -463,6 +499,7 @@ static struct cfg_kw_list dacfg_kws = {{ }, { { CFG_GLOBAL, "deviceatlas-log-level", da_log_level }, { CFG_GLOBAL, "deviceatlas-property-separator", da_property_separator }, { CFG_GLOBAL, "deviceatlas-properties-cookie", da_properties_cookie }, + { CFG_GLOBAL, "deviceatlas-cache-size", da_cache_size }, { 0, NULL, NULL }, }}; @@ -486,10 +523,10 @@ static void da_haproxy_register_build_options() { char *ptr = NULL; -#ifdef MOBI_DA_DUMMY_LIBRARY +#ifdef DATLAS_DA_DUMMY_LIBRARY memprintf(&ptr, "Built with DeviceAtlas support (dummy library only)."); #else - memprintf(&ptr, "Built with DeviceAtlas support (library version %u.%u).", MOBI_DA_MAJOR, MOBI_DA_MINOR); + memprintf(&ptr, "Built with DeviceAtlas support (library version %u.%u).", DATLAS_DA_MAJOR, DATLAS_DA_MINOR); #endif hap_register_build_opts(ptr, 1); } diff --git a/addons/deviceatlas/dadwsch.c b/addons/deviceatlas/dadwsch.c deleted file mode 100644 index e35566a..0000000 --- a/addons/deviceatlas/dadwsch.c +++ /dev/null @@ -1,195 +0,0 @@ -#define _GNU_SOURCE -#include <dac.h> -#include <dadwcurl.h> -#include <dadwarc.h> -#include <getopt.h> -#include <stdlib.h> -#include <signal.h> -#include <errno.h> -#include <fcntl.h> -#include <sys/mman.h> - -#define ATLASTOKSZ PATH_MAX -#define ATLASMAPNM "/hapdeviceatlas" - -const char *__pgname; - -static struct { - da_dwatlas_t o; - int ofd; - void* atlasmap; -} global_deviceatlassch = { - .ofd = -1, - .atlasmap = NULL -}; - - -void usage(void) -{ - fprintf(stderr, "%s -u download URL [-d hour (in H:M:S format) current hour by default] [-p path for the downloaded file, /tmp by default]\n", __pgname); - exit(EXIT_FAILURE); -} - -static size_t jsonread(void *ctx, size_t count, char *buf) -{ - return fread(buf, 1, count, ctx); -} - -static da_status_t jsonseek(void *ctx, off_t pos) -{ - return fseek(ctx, pos, SEEK_SET) != -1 ? DA_OK : DA_SYS; -} - -static void dadwlog(dw_config_t cfg, const char* msg) -{ - time_t now = time(NULL); - char buf[26] = {0}; - ctime_r(&now, buf); - buf[24] = 0; - fprintf(stderr, "%s: %s\n", buf, msg); -} - -static dw_status_t dadwnot(void *a, dw_config_t *cfg) -{ - da_dwatlas_t *o = (da_dwatlas_t *)a; - if (!o) - return DW_ERR; - char *e; - char jsondbuf[26] = {0}, buf[26] = {0}, atlasp[ATLASTOKSZ] = {0}; - time_t now = time(NULL); - time_t jsond; - int fd = -1; - (void)a; - jsond = da_getdatacreation(&o->atlas); - dwgetfinalp(o->dcfg.info, atlasp, sizeof(atlasp)); - ctime_r(&jsond, jsondbuf); - ctime_r(&now, buf); - jsondbuf[24] = 0; - buf[24] = 0; - - printf("%s: data file generated on `%s`\n", buf, jsondbuf); - int val = 1; - unsigned char *ptr = (unsigned char *)global_deviceatlassch.atlasmap; - memset(ptr, 0, sizeof(atlasp)); - strcpy(ptr, atlasp); - return DW_OK; -} - -static da_status_t dadwinit(void) -{ - if ((global_deviceatlassch.ofd = shm_open(ATLASMAPNM, O_RDWR | O_CREAT, 0660)) == -1) { - fprintf(stderr, "%s\n", strerror(errno)); - return DA_SYS; - } - - if (ftruncate(global_deviceatlassch.ofd, ATLASTOKSZ) == -1) { - close(global_deviceatlassch.ofd); - return DA_SYS; - } - lseek(global_deviceatlassch.ofd, 0, SEEK_SET); - global_deviceatlassch.atlasmap = mmap(0, ATLASTOKSZ, PROT_READ | PROT_WRITE, MAP_SHARED, global_deviceatlassch.ofd, 0); - if (global_deviceatlassch.atlasmap == MAP_FAILED) { - fprintf(stderr, "%s\n", strerror(errno)); - return DA_SYS; - } else { - memset(global_deviceatlassch.atlasmap, 0, ATLASTOKSZ); - return DA_OK; - } -} - -static void dadwexit(int sig __attribute__((unused)), siginfo_t *s __attribute__((unused)), void *ctx __attribute__((unused))) -{ - ssize_t w; - - fprintf(stderr, "%s: exit\n", __pgname); - dw_daatlas_close(&global_deviceatlassch.o); - da_fini(); - munmap(global_deviceatlassch.atlasmap, ATLASTOKSZ); - close(global_deviceatlassch.ofd); - shm_unlink(ATLASMAPNM); - exit(EXIT_SUCCESS); -} - -int main(int argc, char **argv) -{ - const char *opts = "u:p:d:h"; - bool dset = false; - size_t i; - int ch; - - da_property_decl_t extraprops[1] = { - { 0, 0 } - }; - - __pgname = argv[0]; - - dw_df_dainit_fn = curldwinit; - dw_df_dacleanup_fn = curldwcleanup; - - da_init(); - memset(&global_deviceatlassch.o.dcfg, 0, sizeof(global_deviceatlassch.o.dcfg)); - while ((ch = getopt(argc, argv, opts)) != -1) { - switch (ch) { - case 'u': - global_deviceatlassch.o.dcfg.info.url = strdup(optarg); - break; - case 'p': - global_deviceatlassch.o.dcfg.info.path = strdup(optarg); - break; - case 'd': - if (strptime(optarg, "%H:%M:%S", &global_deviceatlassch.o.dcfg.info.rtm) != NULL) - dset = true; - else - usage(); - break; - case 'h': - default: - usage(); - } - } - - if (!dset) { - time_t now = time(NULL); - struct tm *cnow = gmtime(&now); - memcpy(&global_deviceatlassch.o.dcfg.info.rtm, cnow, offsetof(struct tm, tm_mday)); - } - - if (!global_deviceatlassch.o.dcfg.info.url) - usage(); - - struct sigaction sa; - memset(&sa, 0, sizeof(sa)); - sa.sa_flags = SA_SIGINFO | SA_RESTART; - sa.sa_sigaction = dadwexit; - - global_deviceatlassch.o.dcfg.info.datatm = 1; - global_deviceatlassch.o.dcfg.info.chksum = 1; - global_deviceatlassch.o.dcfg.info.reload = 1; - global_deviceatlassch.o.dcfg.info.tobin = 1; - global_deviceatlassch.o.dcfg.ep = extraprops; - global_deviceatlassch.o.dcfg.dwproc = curldwproc; - global_deviceatlassch.o.dcfg.dwextract = dadwextract; - global_deviceatlassch.o.dcfg.lptr = (void *)stderr; - global_deviceatlassch.o.dcfg.dwlog = &dadwlog; - global_deviceatlassch.o.dcfg.dwnotify_n = &dadwnot; - global_deviceatlassch.o.rfn = jsonread; - global_deviceatlassch.o.posfn = jsonseek; - - if (dadwinit() != DA_OK) { - fprintf(stderr, "%s init failed\n", __pgname); - exit(EXIT_FAILURE); - } - - if (da_atlas_open_schedule(&global_deviceatlassch.o) != DA_OK) { - fprintf(stderr, "%s scheduling failed\n", __pgname); - exit(EXIT_FAILURE); - } - - sigaction(SIGINT, &sa, NULL); - sigaction(SIGQUIT, &sa, NULL); - sigaction(SIGTERM, &sa, NULL); - - while (true) sleep(1); - - return 0; -} diff --git a/addons/deviceatlas/dummy/Makefile b/addons/deviceatlas/dummy/Makefile index 8bba840..0e87738 100644 --- a/addons/deviceatlas/dummy/Makefile +++ b/addons/deviceatlas/dummy/Makefile @@ -1,7 +1,7 @@ # makefile for dummy DeviceAtlas library # # To enable the DeviceAtlas module support, the following are needed -# make TARGET=<target> DEVICEATLAS_SRC=addons/deviceatlas/dummy USE_PCRE=1 USE_DEVICEATLAS=1 +# make TARGET=<target> DEVICEATLAS_SRC=addons/deviceatlas/dummy USE_DEVICEATLAS=1 build: libda.a diff --git a/addons/deviceatlas/dummy/dac.c b/addons/deviceatlas/dummy/dac.c index 720dc6a..c846ec0 100644 --- a/addons/deviceatlas/dummy/dac.c +++ b/addons/deviceatlas/dummy/dac.c @@ -37,21 +37,21 @@ da_typename(da_type_t fieldtype) } char * -da_getdataversion(da_atlas_t *atlas) +da_getdataversion(const da_atlas_t *atlas) { return "dummy library version 1.0"; } time_t -da_getdatacreation(da_atlas_t *atlas) +da_getdatacreation(const da_atlas_t *atlas) { return time(NULL); } -int -da_getdatarevision(da_atlas_t *atlas) +char * +da_getdatacreationiso8601(const da_atlas_t *atlas) { - return 1; + return "20000123T012345.678+0900"; } da_status_t @@ -118,11 +118,6 @@ da_atlas_getpropcount(const da_atlas_t *atlas) return 1; } -void -da_atlas_setconfig(da_atlas_t *atlas, da_config_t *config) -{ -} - da_status_t da_searchv(const da_atlas_t *atlas, da_deviceinfo_t *result, da_evidence_t *evidence, size_t count) { diff --git a/addons/deviceatlas/dummy/dac.h b/addons/deviceatlas/dummy/dac.h index bf166ae..836a9af 100644 --- a/addons/deviceatlas/dummy/dac.h +++ b/addons/deviceatlas/dummy/dac.h @@ -26,9 +26,8 @@ typedef int _Bool; #endif #endif -#define MOBI_DA_MAJOR 2 -#define MOBI_DA_MINOR 1 -#define MOBI_DA_DUMMY_LIBRARY 1 +#define DATLAS_DA_MAJOR 3 +#define DATLAS_DA_DUMMY_LIBRARY 1 /** @@ -134,6 +133,7 @@ typedef void (*da_errorfunc_t)(da_severity_t severity, da_status_t status, const /* Manifest constants. */ enum { + DA_CACHE_MAX = 50000, /* * used as the initial guess for the compiled size of an atlas. * If atlas sizes grow more beyond this, it can be expanded to avoid multiple scans of the data. @@ -142,9 +142,8 @@ enum { }; struct da_config { - unsigned int ua_props; - unsigned int lang_props; - unsigned int __reserved[14]; /* enough reserved keywords for future use */ + unsigned int cache_size; + unsigned int __reserved[15]; /* enough reserved keywords for future use */ }; /** @@ -451,21 +450,22 @@ const char *da_typename(da_type_t type); * @param atlas * @return version */ -char *da_getdataversion(da_atlas_t *atlas); +char *da_getdataversion(const da_atlas_t *atlas); /** * @brief returns the date creation's timestamp from the JSON in memory * @param atlas * @return version */ -time_t da_getdatacreation(da_atlas_t *atlas); +time_t da_getdatacreation(const da_atlas_t *atlas); +char *da_getdatacreationiso8601(const da_atlas_t *atlas); /** * @brief returns the revision's number from the JSON in memory * @param atlas * @return version */ -int da_getdatarevision(da_atlas_t *atlas); +int da_getdatarevision(const da_atlas_t *atlas); /** * @brief returns the name of a global property diff --git a/addons/deviceatlas/dummy/dacache.cpp b/addons/deviceatlas/dummy/dacache.cpp new file mode 100644 index 0000000..bab71cc --- /dev/null +++ b/addons/deviceatlas/dummy/dacache.cpp @@ -0,0 +1,26 @@ +#include "dac.h" + +extern "C" { +void da_atlas_cache_init(const da_atlas_t *atlas) { + (void)atlas; +} + +da_status_t da_atlas_cache_insert(const da_atlas_t *atlas, unsigned long long h, da_deviceinfo_t *info) { + (void)atlas; + (void)h; + (void)info; + return DA_OK; +} + +da_status_t da_atlas_cache_search(const da_atlas_t *atlas, unsigned long long h, da_deviceinfo_t **info) { + (void)atlas; + (void)h; + (void)info; + return DA_OK; +} + +void da_atlas_cache_close(da_atlas_t *atlas) { + (void)atlas; +} +} + diff --git a/addons/deviceatlas/dummy/dadwarc.c b/addons/deviceatlas/dummy/dadwarc.c new file mode 100644 index 0000000..53c5fdf --- /dev/null +++ b/addons/deviceatlas/dummy/dadwarc.c @@ -0,0 +1 @@ +#include <stdio.h> diff --git a/addons/deviceatlas/dummy/dadwcurl.c b/addons/deviceatlas/dummy/dadwcurl.c new file mode 100644 index 0000000..53c5fdf --- /dev/null +++ b/addons/deviceatlas/dummy/dadwcurl.c @@ -0,0 +1 @@ +#include <stdio.h> diff --git a/addons/ot/src/scope.c b/addons/ot/src/scope.c index efe8fe2..8a4c02f 100644 --- a/addons/ot/src/scope.c +++ b/addons/ot/src/scope.c @@ -113,7 +113,7 @@ struct flt_ot_runtime_context *flt_ot_runtime_context_init(struct stream *s, str LIST_INIT(&(retptr->contexts)); uuid = b_make(retptr->uuid, sizeof(retptr->uuid), 0, 0); - ha_generate_uuid(&uuid); + ha_generate_uuid_v4(&uuid); #ifdef USE_OT_VARS /* diff --git a/addons/promex/README b/addons/promex/README index 4e29e23..8c2266f 100644 --- a/addons/promex/README +++ b/addons/promex/README @@ -75,6 +75,40 @@ exported. Here are examples: /metrics?scope=&scope=global # ==> global metrics will be exported /metrics?scope=sticktable # ==> stick tables metrics will be exported +* Filtering on metrics name + +It is possible to filter metrics dumped by the exporter. To to so, multiple +"metrics" parameters may be passed to specify all metrics to include or exclude, +as a comma-separated list of filter. By default, there is no filter and all +metrics are dumped. By specifying at least one metric to be included in the +dump, this disables the default behavior and only explicitly mentioned metrics +are dumped. To include a metric, its name must be specified. To exclude it, its +name must be preceded by a minus character ('-'). Here are examples: + + # Dumped all metrics, except "haproxy_server_check_status" + /metrics?metrics=-haproxy_server_check_status + + # Only dump frontends, backends and servers status + /metrics?metrics=haproxy_frontend_status,haproxy_backend_status,haproxy_server_status + + +* Dump extra counters + +Internally, some modules can register to frontends, backends, servers or +listeners to export extra counters. For instance, some multiplexers do so on +frontends or backends. To display extra counters for all regiestered modules, +"extra-counters" parameter must be pass. It can be cumulated with "scope" +parameters: + + /metrics?extra-counters # ==> export all extra counters in + # addition to main ones, for all + # scopes + /metrics?scope=frontend&extra-counters # ==> export extra counters for + # frontends + +There are extra counters are only for frontends, backends, servers and +listeners. + * How do I prevent my prometheus instance to explode? ** Filtering on servers state @@ -109,7 +143,8 @@ except the server_check_status, you may configure prometheus that way: Exported metrics ------------------ -See prometheus export for the description of each field. +See prometheus export for the description of each field. Only main metrics are +listed below. Metrics from extra counters are not listed. * Globals metrics @@ -310,6 +345,8 @@ See prometheus export for the description of each field. | haproxy_server_redispatch_warnings_total | | haproxy_server_status | | haproxy_server_weight | +| haproxy_server_active | +| haproxy_server_backup | | haproxy_server_check_failures_total | | haproxy_server_check_up_down_total | | haproxy_server_check_last_change_seconds | @@ -354,3 +391,24 @@ See prometheus export for the description of each field. | haproxy_sticktable_size | | haproxy_sticktable_used | +----------------------------------------------------+ + +* Resolvers metrics + ++----------------------------------------------------+ +| Metric name | ++----------------------------------------------------+ +| haproxy_resolver_sent | +| haproxy_resolver_send_error | +| haproxy_resolver_valid | +| haproxy_resolver_update | +| haproxy_resolver_cname | +| haproxy_resolver_cname_error | +| haproxy_resolver_any_err | +| haproxy_resolver_nx | +| haproxy_resolver_timeout | +| haproxy_resolver_refused | +| haproxy_resolver_other | +| haproxy_resolver_invalid | +| haproxy_resolver_too_big | +| haproxy_resolver_outdated | ++----------------------------------------------------+ diff --git a/addons/promex/include/promex/promex.h b/addons/promex/include/promex/promex.h new file mode 100644 index 0000000..74ea2f1 --- /dev/null +++ b/addons/promex/include/promex/promex.h @@ -0,0 +1,127 @@ +/* + * include/promex/promex.h + * This file contains definitions, macros and inline functions dedicated to + * the prometheus exporter for HAProxy. + * + * Copyright 2024 Christopher Faulet <cfaulet@haproxy.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _PROMEX_PROMEX_H +#define _PROMEX_PROMEX_H + +#include <import/ist.h> + +#include <haproxy/api-t.h> +#include <haproxy/list-t.h> + +#include <haproxy/stats.h> + +/* Prometheus exporter flags (ctx->flags) */ +#define PROMEX_FL_METRIC_HDR 0x00000001 +#define PROMEX_FL_INFO_METRIC 0x00000002 +#define PROMEX_FL_FRONT_METRIC 0x00000004 +#define PROMEX_FL_BACK_METRIC 0x00000008 +#define PROMEX_FL_SRV_METRIC 0x00000010 +#define PROMEX_FL_LI_METRIC 0x00000020 +#define PROMEX_FL_MODULE_METRIC 0x00000040 +#define PROMEX_FL_SCOPE_GLOBAL 0x00000080 +#define PROMEX_FL_SCOPE_FRONT 0x00000100 +#define PROMEX_FL_SCOPE_BACK 0x00000200 +#define PROMEX_FL_SCOPE_SERVER 0x00000400 +#define PROMEX_FL_SCOPE_LI 0x00000800 +#define PROMEX_FL_SCOPE_MODULE 0x00001000 +#define PROMEX_FL_NO_MAINT_SRV 0x00002000 +#define PROMEX_FL_EXTRA_COUNTERS 0x00004000 +#define PROMEX_FL_INC_METRIC_BY_DEFAULT 0x00008000 + +#define PROMEX_FL_SCOPE_ALL (PROMEX_FL_SCOPE_GLOBAL | PROMEX_FL_SCOPE_FRONT | \ + PROMEX_FL_SCOPE_LI | PROMEX_FL_SCOPE_BACK | \ + PROMEX_FL_SCOPE_SERVER | PROMEX_FL_SCOPE_MODULE) + +/* The max number of labels per metric */ +#define PROMEX_MAX_LABELS 8 + +/* Promtheus metric type (gauge or counter) */ +enum promex_mt_type { + PROMEX_MT_GAUGE = 1, + PROMEX_MT_COUNTER = 2, +}; + +/* Describe a prometheus metric */ +struct promex_metric { + struct ist n; /* The metric name */ + enum promex_mt_type type; /* The metric type (gauge or counter) */ + unsigned int flags; /* PROMEX_FL_* flags */ +}; + +/* Describe a prometheus metric label. It is just a key/value pair */ +struct promex_label { + struct ist name; + struct ist value; +}; + +/* Entity used to expose custom metrics on HAProxy. + * + * * start_metric_dump(): It is an optional callback function. If defined, it + * is responsible to initialize the dump context use + * as the first restart point. + * + * * stop_metric_dump(): It is an optional callback function. If defined, it + * is responsible to deinit the dump context. + * + * * metric_info(): This one is mandatory. It returns the info about the + * metric: name, type and flags and description. + * + * * start_ts(): This one is mandatory, it initializes the context for a time + * series for a given metric. This context is the second + * restart point. + * + * * next_ts(): This one is mandatory. It iterates on time series for a + * given metrics. It is also responsible to handle end of a + * time series and deinit the context. + * + * * fill_ts(): It fills info on the time series for a given metric : the + * labels and the value. + */ +struct promex_module { + struct list list; + struct ist name; /* The promex module name */ + int (*metric_info)(unsigned int id, /* Return info for the given id */ + struct promex_metric *metric, + struct ist *desc); + void *(*start_metrics_dump)(); /* Start a dump (may be NULL) */ + void (*stop_metrics_dump)(void *ctx); /* Stop a dump (may be NULL) */ + void *(*start_ts)(void *ctx, unsigned int id); /* Start a time series for the given metric */ + void *(*next_ts)(void *ctx, void *ts_ctx, unsigned int id); /* move to the next time series for the given metric */ + int (*fill_ts)(void *ctx, void *ts_ctx, unsigned int id, /* fill the time series for the given metric */ + struct promex_label *labels, struct field *field); + + size_t nb_metrics; /* # of metrics */ +}; + +extern struct list promex_module_list; + +void promex_register_module(struct promex_module *m); + +#endif /* _PROMEX_PROMEX_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/addons/promex/service-prometheus.c b/addons/promex/service-prometheus.c index e9ad44e..4e0bc68 100644 --- a/addons/promex/service-prometheus.c +++ b/addons/promex/service-prometheus.c @@ -28,6 +28,7 @@ #include <haproxy/list.h> #include <haproxy/listener.h> #include <haproxy/log.h> +#include <haproxy/pool.h> #include <haproxy/proxy.h> #include <haproxy/sample.h> #include <haproxy/sc_strm.h> @@ -38,6 +39,9 @@ #include <haproxy/task.h> #include <haproxy/tools.h> #include <haproxy/version.h> +#include <haproxy/xxhash.h> + +#include <promex/promex.h> /* Prometheus exporter applet states (appctx->st0) */ enum { @@ -56,45 +60,30 @@ enum { PROMEX_DUMPER_BACK, /* dump metrics of backend proxies */ PROMEX_DUMPER_LI, /* dump metrics of listeners */ PROMEX_DUMPER_SRV, /* dump metrics of servers */ - PROMEX_DUMPER_STICKTABLE, /* dump metrics of stick tables */ + PROMEX_DUMPER_MODULES, /* dump metrics of modules */ PROMEX_DUMPER_DONE, /* finished */ }; -/* Prometheus exporter flags (ctx->flags) */ -#define PROMEX_FL_METRIC_HDR 0x00000001 -#define PROMEX_FL_INFO_METRIC 0x00000002 -#define PROMEX_FL_FRONT_METRIC 0x00000004 -#define PROMEX_FL_BACK_METRIC 0x00000008 -#define PROMEX_FL_SRV_METRIC 0x00000010 -#define PROMEX_FL_LI_METRIC 0x00000020 -#define PROMEX_FL_STICKTABLE_METRIC 0x00000040 -#define PROMEX_FL_SCOPE_GLOBAL 0x00000080 -#define PROMEX_FL_SCOPE_FRONT 0x00000100 -#define PROMEX_FL_SCOPE_BACK 0x00000200 -#define PROMEX_FL_SCOPE_SERVER 0x00000400 -#define PROMEX_FL_SCOPE_LI 0x00000800 -#define PROMEX_FL_SCOPE_STICKTABLE 0x00001000 -#define PROMEX_FL_NO_MAINT_SRV 0x00002000 - -#define PROMEX_FL_SCOPE_ALL (PROMEX_FL_SCOPE_GLOBAL | PROMEX_FL_SCOPE_FRONT | \ - PROMEX_FL_SCOPE_LI | PROMEX_FL_SCOPE_BACK | \ - PROMEX_FL_SCOPE_SERVER | PROMEX_FL_SCOPE_STICKTABLE) +struct promex_module_ref { + struct promex_module *mod; + struct list list; +}; + +/* An entry in a headers map */ +struct promex_metric_filter { + int exclude; + struct eb32_node node; +}; /* the context of the applet */ struct promex_ctx { - struct proxy *px; /* current proxy */ - struct stktable *st; /* current table */ - struct listener *li; /* current listener */ - struct server *sv; /* current server */ + void *p[4]; /* generic pointers used to save context */ unsigned int flags; /* PROMEX_FL_* */ - unsigned field_num; /* current field number (ST_F_* etc) */ + unsigned field_num; /* current field number (ST_I_PX_* etc) */ + unsigned mod_field_num; /* first field number of the current module (ST_I_PX_* etc) */ int obj_state; /* current state among PROMEX_{FRONT|BACK|SRV|LI}_STATE_* */ -}; - -/* Promtheus metric type (gauge or counter) */ -enum promex_mt_type { - PROMEX_MT_GAUGE = 1, - PROMEX_MT_COUNTER = 2, + struct list modules; /* list of promex modules to export */ + struct eb_root filters; /* list of filters to apply on metrics name */ }; /* The max length for metrics name. It is a hard limit but it should be @@ -108,247 +97,230 @@ enum promex_mt_type { */ #define PROMEX_MAX_METRIC_LENGTH 512 -/* The max number of labels per metric */ -#define PROMEX_MAX_LABELS 8 - -/* Describe a prometheus metric */ -struct promex_metric { - const struct ist n; /* The metric name */ - enum promex_mt_type type; /* The metric type (gauge or counter) */ - unsigned int flags; /* PROMEX_FL_* flags */ -}; - -/* Describe a prometheus metric label. It is just a key/value pair */ -struct promex_label { - struct ist name; - struct ist value; -}; - /* Global metrics */ -const struct promex_metric promex_global_metrics[INF_TOTAL_FIELDS] = { - //[INF_NAME] ignored - //[INF_VERSION], ignored - //[INF_RELEASE_DATE] ignored - [INF_NBTHREAD] = { .n = IST("nbthread"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_NBPROC] = { .n = IST("nbproc"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_PROCESS_NUM] = { .n = IST("relative_process_id"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - //[INF_PID] ignored - //[INF_UPTIME] ignored - [INF_UPTIME_SEC] = { .n = IST("uptime_seconds"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_START_TIME_SEC] = { .n = IST("start_time_seconds"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - //[INF_MEMMAX_MB] ignored - [INF_MEMMAX_BYTES] = { .n = IST("max_memory_bytes"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - //[INF_POOL_ALLOC_MB] ignored - [INF_POOL_ALLOC_BYTES] = { .n = IST("pool_allocated_bytes"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - //[INF_POOL_USED_MB] ignored - [INF_POOL_USED_BYTES] = { .n = IST("pool_used_bytes"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_POOL_FAILED] = { .n = IST("pool_failures_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_ULIMIT_N] = { .n = IST("max_fds"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_MAXSOCK] = { .n = IST("max_sockets"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_MAXCONN] = { .n = IST("max_connections"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_HARD_MAXCONN] = { .n = IST("hard_max_connections"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_CURR_CONN] = { .n = IST("current_connections"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_CUM_CONN] = { .n = IST("connections_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_CUM_REQ] = { .n = IST("requests_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_MAX_SSL_CONNS] = { .n = IST("max_ssl_connections"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_CURR_SSL_CONNS] = { .n = IST("current_ssl_connections"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_CUM_SSL_CONNS] = { .n = IST("ssl_connections_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_MAXPIPES] = { .n = IST("max_pipes"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_PIPES_USED] = { .n = IST("pipes_used_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_PIPES_FREE] = { .n = IST("pipes_free_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_CONN_RATE] = { .n = IST("current_connection_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_CONN_RATE_LIMIT] = { .n = IST("limit_connection_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_MAX_CONN_RATE] = { .n = IST("max_connection_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_SESS_RATE] = { .n = IST("current_session_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_SESS_RATE_LIMIT] = { .n = IST("limit_session_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_MAX_SESS_RATE] = { .n = IST("max_session_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_SSL_RATE] = { .n = IST("current_ssl_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_SSL_RATE_LIMIT] = { .n = IST("limit_ssl_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_MAX_SSL_RATE] = { .n = IST("max_ssl_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_SSL_FRONTEND_KEY_RATE] = { .n = IST("current_frontend_ssl_key_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_SSL_FRONTEND_MAX_KEY_RATE] = { .n = IST("max_frontend_ssl_key_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_SSL_FRONTEND_SESSION_REUSE_PCT] = { .n = IST("frontend_ssl_reuse"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_SSL_BACKEND_KEY_RATE] = { .n = IST("current_backend_ssl_key_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_SSL_BACKEND_MAX_KEY_RATE] = { .n = IST("max_backend_ssl_key_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_SSL_CACHE_LOOKUPS] = { .n = IST("ssl_cache_lookups_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_SSL_CACHE_MISSES] = { .n = IST("ssl_cache_misses_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_COMPRESS_BPS_IN] = { .n = IST("http_comp_bytes_in_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_COMPRESS_BPS_OUT] = { .n = IST("http_comp_bytes_out_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_COMPRESS_BPS_RATE_LIM] = { .n = IST("limit_http_comp"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_ZLIB_MEM_USAGE] = { .n = IST("current_zlib_memory"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_MAX_ZLIB_MEM_USAGE] = { .n = IST("max_zlib_memory"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_TASKS] = { .n = IST("current_tasks"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_RUN_QUEUE] = { .n = IST("current_run_queue"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_IDLE_PCT] = { .n = IST("idle_time_percent"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - //[INF_NODE] ignored - //[INF_DESCRIPTION] ignored - [INF_STOPPING] = { .n = IST("stopping"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_JOBS] = { .n = IST("jobs"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_UNSTOPPABLE_JOBS] = { .n = IST("unstoppable_jobs"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_LISTENERS] = { .n = IST("listeners"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_ACTIVE_PEERS] = { .n = IST("active_peers"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_CONNECTED_PEERS] = { .n = IST("connected_peers"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_DROPPED_LOGS] = { .n = IST("dropped_logs_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_BUSY_POLLING] = { .n = IST("busy_polling_enabled"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - [INF_FAILED_RESOLUTIONS] = { .n = IST("failed_resolutions"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_TOTAL_BYTES_OUT] = { .n = IST("bytes_out_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_TOTAL_SPLICED_BYTES_OUT] = { .n = IST("spliced_bytes_out_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_BYTES_OUT_RATE] = { .n = IST("bytes_out_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, - //[INF_DEBUG_COMMANDS_ISSUED] ignored - [INF_CUM_LOG_MSGS] = { .n = IST("recv_logs_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, - [INF_BUILD_INFO] = { .n = IST("build_info"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, +const struct promex_metric promex_global_metrics[ST_I_INF_MAX] = { + //[ST_I_INF_NAME] ignored + //[ST_I_INF_VERSION], ignored + //[ST_I_INF_RELEASE_DATE] ignored + [ST_I_INF_NBTHREAD] = { .n = IST("nbthread"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_NBPROC] = { .n = IST("nbproc"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_PROCESS_NUM] = { .n = IST("relative_process_id"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + //[ST_I_INF_PID] ignored + //[ST_I_INF_UPTIME] ignored + [ST_I_INF_UPTIME_SEC] = { .n = IST("uptime_seconds"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_START_TIME_SEC] = { .n = IST("start_time_seconds"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + //[ST_I_INF_MEMMAX_MB] ignored + [ST_I_INF_MEMMAX_BYTES] = { .n = IST("max_memory_bytes"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + //[ST_I_INF_POOL_ALLOC_MB] ignored + [ST_I_INF_POOL_ALLOC_BYTES] = { .n = IST("pool_allocated_bytes"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + //[ST_I_INF_POOL_USED_MB] ignored + [ST_I_INF_POOL_USED_BYTES] = { .n = IST("pool_used_bytes"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_POOL_FAILED] = { .n = IST("pool_failures_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_ULIMIT_N] = { .n = IST("max_fds"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_MAXSOCK] = { .n = IST("max_sockets"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_MAXCONN] = { .n = IST("max_connections"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_HARD_MAXCONN] = { .n = IST("hard_max_connections"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_CURR_CONN] = { .n = IST("current_connections"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_CUM_CONN] = { .n = IST("connections_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_CUM_REQ] = { .n = IST("requests_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_MAX_SSL_CONNS] = { .n = IST("max_ssl_connections"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_CURR_SSL_CONNS] = { .n = IST("current_ssl_connections"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_CUM_SSL_CONNS] = { .n = IST("ssl_connections_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_MAXPIPES] = { .n = IST("max_pipes"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_PIPES_USED] = { .n = IST("pipes_used_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_PIPES_FREE] = { .n = IST("pipes_free_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_CONN_RATE] = { .n = IST("current_connection_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_CONN_RATE_LIMIT] = { .n = IST("limit_connection_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_MAX_CONN_RATE] = { .n = IST("max_connection_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_SESS_RATE] = { .n = IST("current_session_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_SESS_RATE_LIMIT] = { .n = IST("limit_session_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_MAX_SESS_RATE] = { .n = IST("max_session_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_SSL_RATE] = { .n = IST("current_ssl_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_SSL_RATE_LIMIT] = { .n = IST("limit_ssl_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_MAX_SSL_RATE] = { .n = IST("max_ssl_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_SSL_FRONTEND_KEY_RATE] = { .n = IST("current_frontend_ssl_key_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_SSL_FRONTEND_MAX_KEY_RATE] = { .n = IST("max_frontend_ssl_key_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_SSL_FRONTEND_SESSION_REUSE_PCT] = { .n = IST("frontend_ssl_reuse"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_SSL_BACKEND_KEY_RATE] = { .n = IST("current_backend_ssl_key_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_SSL_BACKEND_MAX_KEY_RATE] = { .n = IST("max_backend_ssl_key_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_SSL_CACHE_LOOKUPS] = { .n = IST("ssl_cache_lookups_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_SSL_CACHE_MISSES] = { .n = IST("ssl_cache_misses_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_COMPRESS_BPS_IN] = { .n = IST("http_comp_bytes_in_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_COMPRESS_BPS_OUT] = { .n = IST("http_comp_bytes_out_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_COMPRESS_BPS_RATE_LIM] = { .n = IST("limit_http_comp"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_ZLIB_MEM_USAGE] = { .n = IST("current_zlib_memory"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_MAX_ZLIB_MEM_USAGE] = { .n = IST("max_zlib_memory"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_TASKS] = { .n = IST("current_tasks"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_RUN_QUEUE] = { .n = IST("current_run_queue"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_IDLE_PCT] = { .n = IST("idle_time_percent"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + //[ST_I_INF_NODE] ignored + //[ST_I_INF_DESCRIPTION] ignored + [ST_I_INF_STOPPING] = { .n = IST("stopping"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_JOBS] = { .n = IST("jobs"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_UNSTOPPABLE_JOBS] = { .n = IST("unstoppable_jobs"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_LISTENERS] = { .n = IST("listeners"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_ACTIVE_PEERS] = { .n = IST("active_peers"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_CONNECTED_PEERS] = { .n = IST("connected_peers"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_DROPPED_LOGS] = { .n = IST("dropped_logs_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_BUSY_POLLING] = { .n = IST("busy_polling_enabled"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_FAILED_RESOLUTIONS] = { .n = IST("failed_resolutions"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_TOTAL_BYTES_OUT] = { .n = IST("bytes_out_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_TOTAL_SPLICED_BYTES_OUT] = { .n = IST("spliced_bytes_out_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_BYTES_OUT_RATE] = { .n = IST("bytes_out_rate"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, + //[ST_I_INF_DEBUG_COMMANDS_ISSUED] ignored + [ST_I_INF_CUM_LOG_MSGS] = { .n = IST("recv_logs_total"), .type = PROMEX_MT_COUNTER, .flags = PROMEX_FL_INFO_METRIC }, + [ST_I_INF_BUILD_INFO] = { .n = IST("build_info"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_INFO_METRIC }, }; /* frontend/backend/server fields */ -const struct promex_metric promex_st_metrics[ST_F_TOTAL_FIELDS] = { - //[ST_F_PXNAME] ignored - //[ST_F_SVNAME] ignored - [ST_F_QCUR] = { .n = IST("current_queue"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_QMAX] = { .n = IST("max_queue"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_SCUR] = { .n = IST("current_sessions"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_SMAX] = { .n = IST("max_sessions"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_SLIM] = { .n = IST("limit_sessions"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_STOT] = { .n = IST("sessions_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_BIN] = { .n = IST("bytes_in_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_BOUT] = { .n = IST("bytes_out_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_DREQ] = { .n = IST("requests_denied_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC ) }, - [ST_F_DRESP] = { .n = IST("responses_denied_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_EREQ] = { .n = IST("request_errors_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC ) }, - [ST_F_ECON] = { .n = IST("connection_errors_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_ERESP] = { .n = IST("response_errors_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_WRETR] = { .n = IST("retry_warnings_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_WREDIS] = { .n = IST("redispatch_warnings_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_STATUS] = { .n = IST("status"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_WEIGHT] = { .n = IST("weight"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_ACT] = { .n = IST("active_servers"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC ) }, - [ST_F_BCK] = { .n = IST("backup_servers"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC ) }, - [ST_F_CHKFAIL] = { .n = IST("check_failures_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_SRV_METRIC) }, - [ST_F_CHKDOWN] = { .n = IST("check_up_down_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_LASTCHG] = { .n = IST("check_last_change_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_DOWNTIME] = { .n = IST("downtime_seconds_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_QLIMIT] = { .n = IST("queue_limit"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, - //[ST_F_PID] ignored - //[ST_F_IID] ignored - //[ST_F_SID] ignored - [ST_F_THROTTLE] = { .n = IST("current_throttle"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, - [ST_F_LBTOT] = { .n = IST("loadbalanced_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - //[ST_F_TRACKED] ignored - //[ST_F_TYPE] ignored - //[ST_F_RATE] ignored - [ST_F_RATE_LIM] = { .n = IST("limit_session_rate"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC ) }, - [ST_F_RATE_MAX] = { .n = IST("max_session_rate"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_CHECK_STATUS] = { .n = IST("check_status"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, - [ST_F_CHECK_CODE] = { .n = IST("check_code"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, - [ST_F_CHECK_DURATION] = { .n = IST("check_duration_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, - [ST_F_HRSP_1XX] = { .n = IST("http_responses_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_HRSP_2XX] = { .n = IST("http_responses_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_HRSP_3XX] = { .n = IST("http_responses_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_HRSP_4XX] = { .n = IST("http_responses_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_HRSP_5XX] = { .n = IST("http_responses_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_HRSP_OTHER] = { .n = IST("http_responses_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - //[ST_F_HANAFAIL] ignored - //[ST_F_REQ_RATE] ignored - [ST_F_REQ_RATE_MAX] = { .n = IST("http_requests_rate_max"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC ) }, - [ST_F_REQ_TOT] = { .n = IST("http_requests_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC ) }, - [ST_F_CLI_ABRT] = { .n = IST("client_aborts_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_SRV_ABRT] = { .n = IST("server_aborts_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_COMP_IN] = { .n = IST("http_comp_bytes_in_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC ) }, - [ST_F_COMP_OUT] = { .n = IST("http_comp_bytes_out_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC ) }, - [ST_F_COMP_BYP] = { .n = IST("http_comp_bytes_bypassed_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC ) }, - [ST_F_COMP_RSP] = { .n = IST("http_comp_responses_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC ) }, - [ST_F_LASTSESS] = { .n = IST("last_session_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - //[ST_F_LAST_CHK] ignored - //[ST_F_LAST_AGT] ignored - [ST_F_QTIME] = { .n = IST("queue_time_average_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_CTIME] = { .n = IST("connect_time_average_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_RTIME] = { .n = IST("response_time_average_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_TTIME] = { .n = IST("total_time_average_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - //[ST_F_AGENT_STATUS] ignored - //[ST_F_AGENT_CODE] ignored - //[ST_F_AGENT_DURATION] ignored - //[ST_F_CHECK_DESC] ignored - //[ST_F_AGENT_DESC] ignored - //[ST_F_CHECK_RISE] ignored - //[ST_F_CHECK_FALL] ignored - //[ST_F_CHECK_HEALTH] ignored - //[ST_F_AGENT_RISE] ignored - //[ST_F_AGENT_FALL] ignored - //[ST_F_AGENT_HEALTH] ignored - //[ST_F_ADDR] ignored - //[ST_F_COOKIE] ignored - //[ST_F_MODE] ignored - //[ST_F_ALGO] ignored - //[ST_F_CONN_RATE] ignored - [ST_F_CONN_RATE_MAX] = { .n = IST("connections_rate_max"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC ) }, - [ST_F_CONN_TOT] = { .n = IST("connections_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC ) }, - [ST_F_INTERCEPTED] = { .n = IST("intercepted_requests_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC ) }, - [ST_F_DCON] = { .n = IST("denied_connections_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC ) }, - [ST_F_DSES] = { .n = IST("denied_sessions_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC ) }, - [ST_F_WREW] = { .n = IST("failed_header_rewriting_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_CONNECT] = { .n = IST("connection_attempts_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_REUSE] = { .n = IST("connection_reuses_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_CACHE_LOOKUPS] = { .n = IST("http_cache_lookups_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC ) }, - [ST_F_CACHE_HITS] = { .n = IST("http_cache_hits_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC ) }, - [ST_F_SRV_ICUR] = { .n = IST("idle_connections_current"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, - [ST_F_SRV_ILIM] = { .n = IST("idle_connections_limit"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, - [ST_F_QT_MAX] = { .n = IST("max_queue_time_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_CT_MAX] = { .n = IST("max_connect_time_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_RT_MAX] = { .n = IST("max_response_time_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_TT_MAX] = { .n = IST("max_total_time_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_EINT] = { .n = IST("internal_errors_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_IDLE_CONN_CUR] = { .n = IST("unsafe_idle_connections_current"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, - [ST_F_SAFE_CONN_CUR] = { .n = IST("safe_idle_connections_current"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, - [ST_F_USED_CONN_CUR] = { .n = IST("used_connections_current"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, - [ST_F_NEED_CONN_EST] = { .n = IST("need_connections_current"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, - [ST_F_UWEIGHT] = { .n = IST("uweight"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, - [ST_F_AGG_SRV_CHECK_STATUS] = { .n = IST("agg_server_check_status"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC ) }, - [ST_F_AGG_SRV_STATUS ] = { .n = IST("agg_server_status"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC ) }, - [ST_F_AGG_CHECK_STATUS] = { .n = IST("agg_check_status"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC ) }, +const struct promex_metric promex_st_metrics[ST_I_PX_MAX] = { + //[ST_I_PX_PXNAME] ignored + //[ST_I_PX_SVNAME] ignored + [ST_I_PX_QCUR] = { .n = IST("current_queue"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_QMAX] = { .n = IST("max_queue"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_SCUR] = { .n = IST("current_sessions"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_SMAX] = { .n = IST("max_sessions"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_SLIM] = { .n = IST("limit_sessions"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_STOT] = { .n = IST("sessions_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_BIN] = { .n = IST("bytes_in_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_BOUT] = { .n = IST("bytes_out_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_DREQ] = { .n = IST("requests_denied_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC ) }, + [ST_I_PX_DRESP] = { .n = IST("responses_denied_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_EREQ] = { .n = IST("request_errors_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC ) }, + [ST_I_PX_ECON] = { .n = IST("connection_errors_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_ERESP] = { .n = IST("response_errors_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_WRETR] = { .n = IST("retry_warnings_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_WREDIS] = { .n = IST("redispatch_warnings_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_STATUS] = { .n = IST("status"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_WEIGHT] = { .n = IST("weight"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_ACT] = { .n = IST("active_servers"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_BCK] = { .n = IST("backup_servers"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_CHKFAIL] = { .n = IST("check_failures_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_CHKDOWN] = { .n = IST("check_up_down_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_LASTCHG] = { .n = IST("check_last_change_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_DOWNTIME] = { .n = IST("downtime_seconds_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_QLIMIT] = { .n = IST("queue_limit"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, + //[ST_I_PX_PID] ignored + //[ST_I_PX_IID] ignored + //[ST_I_PX_SID] ignored + [ST_I_PX_THROTTLE] = { .n = IST("current_throttle"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_LBTOT] = { .n = IST("loadbalanced_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + //[ST_I_PX_TRACKED] ignored + //[ST_I_PX_TYPE] ignored + //[ST_I_PX_RATE] ignored + [ST_I_PX_RATE_LIM] = { .n = IST("limit_session_rate"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC ) }, + [ST_I_PX_RATE_MAX] = { .n = IST("max_session_rate"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_CHECK_STATUS] = { .n = IST("check_status"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_CHECK_CODE] = { .n = IST("check_code"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_CHECK_DURATION] = { .n = IST("check_duration_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_HRSP_1XX] = { .n = IST("http_responses_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_HRSP_2XX] = { .n = IST("http_responses_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_HRSP_3XX] = { .n = IST("http_responses_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_HRSP_4XX] = { .n = IST("http_responses_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_HRSP_5XX] = { .n = IST("http_responses_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_HRSP_OTHER] = { .n = IST("http_responses_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + //[ST_I_PX_HANAFAIL] ignored + //[ST_I_PX_REQ_RATE] ignored + [ST_I_PX_REQ_RATE_MAX] = { .n = IST("http_requests_rate_max"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC ) }, + [ST_I_PX_REQ_TOT] = { .n = IST("http_requests_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC ) }, + [ST_I_PX_CLI_ABRT] = { .n = IST("client_aborts_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_SRV_ABRT] = { .n = IST("server_aborts_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_COMP_IN] = { .n = IST("http_comp_bytes_in_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC ) }, + [ST_I_PX_COMP_OUT] = { .n = IST("http_comp_bytes_out_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC ) }, + [ST_I_PX_COMP_BYP] = { .n = IST("http_comp_bytes_bypassed_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC ) }, + [ST_I_PX_COMP_RSP] = { .n = IST("http_comp_responses_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC ) }, + [ST_I_PX_LASTSESS] = { .n = IST("last_session_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + //[ST_I_PX_LAST_CHK] ignored + //[ST_I_PX_LAST_AGT] ignored + [ST_I_PX_QTIME] = { .n = IST("queue_time_average_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_CTIME] = { .n = IST("connect_time_average_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_RTIME] = { .n = IST("response_time_average_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_TTIME] = { .n = IST("total_time_average_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + //[ST_I_PX_AGENT_STATUS] ignored + //[ST_I_PX_AGENT_CODE] ignored + //[ST_I_PX_AGENT_DURATION] ignored + //[ST_I_PX_CHECK_DESC] ignored + //[ST_I_PX_AGENT_DESC] ignored + //[ST_I_PX_CHECK_RISE] ignored + //[ST_I_PX_CHECK_FALL] ignored + //[ST_I_PX_CHECK_HEALTH] ignored + //[ST_I_PX_AGENT_RISE] ignored + //[ST_I_PX_AGENT_FALL] ignored + //[ST_I_PX_AGENT_HEALTH] ignored + //[ST_I_PX_ADDR] ignored + //[ST_I_PX_COOKIE] ignored + //[ST_I_PX_MODE] ignored + //[ST_I_PX_ALGO] ignored + //[ST_I_PX_CONN_RATE] ignored + [ST_I_PX_CONN_RATE_MAX] = { .n = IST("connections_rate_max"), .type = PROMEX_MT_GAUGE, .flags = (PROMEX_FL_FRONT_METRIC ) }, + [ST_I_PX_CONN_TOT] = { .n = IST("connections_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC ) }, + [ST_I_PX_INTERCEPTED] = { .n = IST("intercepted_requests_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC ) }, + [ST_I_PX_DCON] = { .n = IST("denied_connections_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC ) }, + [ST_I_PX_DSES] = { .n = IST("denied_sessions_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC ) }, + [ST_I_PX_WREW] = { .n = IST("failed_header_rewriting_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_CONNECT] = { .n = IST("connection_attempts_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_REUSE] = { .n = IST("connection_reuses_total"), .type = PROMEX_MT_COUNTER, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_CACHE_LOOKUPS] = { .n = IST("http_cache_lookups_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC ) }, + [ST_I_PX_CACHE_HITS] = { .n = IST("http_cache_hits_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_BACK_METRIC ) }, + [ST_I_PX_SRV_ICUR] = { .n = IST("idle_connections_current"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_SRV_ILIM] = { .n = IST("idle_connections_limit"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_QT_MAX] = { .n = IST("max_queue_time_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_CT_MAX] = { .n = IST("max_connect_time_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_RT_MAX] = { .n = IST("max_response_time_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_TT_MAX] = { .n = IST("max_total_time_seconds"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_EINT] = { .n = IST("internal_errors_total"), .type = PROMEX_MT_COUNTER, .flags = (PROMEX_FL_FRONT_METRIC | PROMEX_FL_LI_METRIC | PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_IDLE_CONN_CUR] = { .n = IST("unsafe_idle_connections_current"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_SAFE_CONN_CUR] = { .n = IST("safe_idle_connections_current"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_USED_CONN_CUR] = { .n = IST("used_connections_current"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_NEED_CONN_EST] = { .n = IST("need_connections_current"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_UWEIGHT] = { .n = IST("uweight"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC | PROMEX_FL_SRV_METRIC) }, + [ST_I_PX_AGG_SRV_CHECK_STATUS] = { .n = IST("agg_server_check_status"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC ) }, + [ST_I_PX_AGG_SRV_STATUS ] = { .n = IST("agg_server_status"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC ) }, + [ST_I_PX_AGG_CHECK_STATUS] = { .n = IST("agg_check_status"), .type = PROMEX_MT_GAUGE, .flags = ( PROMEX_FL_BACK_METRIC ) }, }; -/* Description of overridden stats fields */ -const struct ist promex_st_metric_desc[ST_F_TOTAL_FIELDS] = { - [ST_F_STATUS] = IST("Current status of the service, per state label value."), - [ST_F_CHECK_STATUS] = IST("Status of last health check, per state label value."), - [ST_F_CHECK_CODE] = IST("layer5-7 code, if available of the last health check."), - [ST_F_CHECK_DURATION] = IST("Total duration of the latest server health check, in seconds."), - [ST_F_QTIME] = IST("Avg. queue time for last 1024 successful connections."), - [ST_F_CTIME] = IST("Avg. connect time for last 1024 successful connections."), - [ST_F_RTIME] = IST("Avg. response time for last 1024 successful connections."), - [ST_F_TTIME] = IST("Avg. total time for last 1024 successful connections."), - [ST_F_QT_MAX] = IST("Maximum observed time spent in the queue"), - [ST_F_CT_MAX] = IST("Maximum observed time spent waiting for a connection to complete"), - [ST_F_RT_MAX] = IST("Maximum observed time spent waiting for a server response"), - [ST_F_TT_MAX] = IST("Maximum observed total request+response time (request+queue+connect+response+processing)"), +/* Specialized frontend metric names, to override default ones */ +const struct ist promex_st_front_metrics_names[ST_I_PX_MAX] = { }; -/* stick table base fields */ -enum sticktable_field { - STICKTABLE_SIZE = 0, - STICKTABLE_USED, - /* must always be the last one */ - STICKTABLE_TOTAL_FIELDS +/* Specialized backend metric names, to override default ones */ +const struct ist promex_st_back_metrics_names[ST_I_PX_MAX] = { }; -const struct promex_metric promex_sticktable_metrics[STICKTABLE_TOTAL_FIELDS] = { - [STICKTABLE_SIZE] = { .n = IST("size"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_STICKTABLE_METRIC }, - [STICKTABLE_USED] = { .n = IST("used"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_STICKTABLE_METRIC }, +/* Specialized listener metric names, to override default ones */ +const struct ist promex_st_li_metrics_names[ST_I_PX_MAX] = { }; -/* stick table base description */ -const struct ist promex_sticktable_metric_desc[STICKTABLE_TOTAL_FIELDS] = { - [STICKTABLE_SIZE] = IST("Stick table size."), - [STICKTABLE_USED] = IST("Number of entries used in this stick table."), +/* Specialized server metric names, to override default ones */ +const struct ist promex_st_srv_metrics_names[ST_I_PX_MAX] = { + [ST_I_PX_ACT] = IST("active"), + [ST_I_PX_BCK] = IST("backup"), +}; + +/* Description of overridden stats fields */ +const struct ist promex_st_metric_desc[ST_I_PX_MAX] = { + [ST_I_PX_STATUS] = IST("Current status of the service, per state label value."), + [ST_I_PX_CHECK_STATUS] = IST("Status of last health check, per state label value."), + [ST_I_PX_CHECK_CODE] = IST("layer5-7 code, if available of the last health check."), + [ST_I_PX_CHECK_DURATION] = IST("Total duration of the latest server health check, in seconds."), + [ST_I_PX_QTIME] = IST("Avg. queue time for last 1024 successful connections."), + [ST_I_PX_CTIME] = IST("Avg. connect time for last 1024 successful connections."), + [ST_I_PX_RTIME] = IST("Avg. response time for last 1024 successful connections."), + [ST_I_PX_TTIME] = IST("Avg. total time for last 1024 successful connections."), + [ST_I_PX_QT_MAX] = IST("Maximum observed time spent in the queue"), + [ST_I_PX_CT_MAX] = IST("Maximum observed time spent waiting for a connection to complete"), + [ST_I_PX_RT_MAX] = IST("Maximum observed time spent waiting for a server response"), + [ST_I_PX_TT_MAX] = IST("Maximum observed total request+response time (request+queue+connect+response+processing)"), }; -/* Specific labels for all ST_F_HRSP_* fields */ -const struct ist promex_hrsp_code[1 + ST_F_HRSP_OTHER - ST_F_HRSP_1XX] = { - [ST_F_HRSP_1XX - ST_F_HRSP_1XX] = IST("1xx"), - [ST_F_HRSP_2XX - ST_F_HRSP_1XX] = IST("2xx"), - [ST_F_HRSP_3XX - ST_F_HRSP_1XX] = IST("3xx"), - [ST_F_HRSP_4XX - ST_F_HRSP_1XX] = IST("4xx"), - [ST_F_HRSP_5XX - ST_F_HRSP_1XX] = IST("5xx"), - [ST_F_HRSP_OTHER - ST_F_HRSP_1XX] = IST("other"), +/* Specific labels for all ST_I_PX_HRSP_* fields */ +const struct ist promex_hrsp_code[1 + ST_I_PX_HRSP_OTHER - ST_I_PX_HRSP_1XX] = { + [ST_I_PX_HRSP_1XX - ST_I_PX_HRSP_1XX] = IST("1xx"), + [ST_I_PX_HRSP_2XX - ST_I_PX_HRSP_1XX] = IST("2xx"), + [ST_I_PX_HRSP_3XX - ST_I_PX_HRSP_1XX] = IST("3xx"), + [ST_I_PX_HRSP_4XX - ST_I_PX_HRSP_1XX] = IST("4xx"), + [ST_I_PX_HRSP_5XX - ST_I_PX_HRSP_1XX] = IST("5xx"), + [ST_I_PX_HRSP_OTHER - ST_I_PX_HRSP_1XX] = IST("other"), }; enum promex_front_state { @@ -393,6 +365,18 @@ const struct ist promex_srv_st[PROMEX_SRV_STATE_COUNT] = { [PROMEX_SRV_STATE_NOLB] = IST("NOLB"), }; +struct list promex_module_list = LIST_HEAD_INIT(promex_module_list); + + +void promex_register_module(struct promex_module *m) +{ + LIST_APPEND(&promex_module_list, &m->list); +} + +/* Pools used to allocate ref on Promex modules and filters */ +DECLARE_STATIC_POOL(pool_head_promex_mod_ref, "promex_module_ref", sizeof(struct promex_module_ref)); +DECLARE_STATIC_POOL(pool_head_promex_metric_flt, "promex_metric_filter", sizeof(struct promex_metric_filter)); + /* Return the server status. */ enum promex_srv_state promex_srv_status(struct server *sv) { @@ -412,22 +396,13 @@ enum promex_srv_state promex_srv_status(struct server *sv) return state; } -/* Store <sv> in <ctx> safely by using refcount to prevent server deletion. */ -static void promex_set_ctx_sv(struct promex_ctx *ctx, struct server *sv) -{ - srv_drop(ctx->sv); - ctx->sv = sv; - if (ctx->sv) - srv_take(ctx->sv); -} - /* Convert a field to its string representation and write it in <out>, followed * by a newline, if there is enough space. non-numeric value are converted in * "NaN" because Prometheus only support numerical values (but it is unexepceted * to process this kind of value). It returns 1 on success. Otherwise, it * returns 0. The buffer's length must not exceed <max> value. */ -static int promex_metric_to_str(struct buffer *out, struct field *f, size_t max) +static int promex_ts_val_to_str(struct buffer *out, struct field *f, size_t max) { int ret = 0; @@ -446,23 +421,21 @@ static int promex_metric_to_str(struct buffer *out, struct field *f, size_t max) return 1; } -/* Dump the header lines for <metric>. It is its #HELP and #TYPE strings. It - * returns 1 on success. Otherwise, if <out> length exceeds <max>, it returns 0. +/* Dump the time series header lines for the metric <name>. It is its #HELP and #TYPE + * strings. It returns 1 on success. Otherwise, if <out> length exceeds <max>, + * it returns 0. */ -static int promex_dump_metric_header(struct appctx *appctx, struct htx *htx, - const struct promex_metric *metric, const struct ist name, - struct ist *out, size_t max) +static int promex_dump_ts_header(const struct ist name, const struct ist desc, enum promex_mt_type type, + struct ist *out, size_t max) { - struct promex_ctx *ctx = appctx->svcctx; - struct ist type; - struct ist desc; + struct ist t; - switch (metric->type) { + switch (type) { case PROMEX_MT_COUNTER: - type = ist("counter"); + t = ist("counter"); break; default: - type = ist("gauge"); + t = ist("gauge"); } if (istcat(out, ist("# HELP "), max) == -1 || @@ -470,20 +443,16 @@ static int promex_dump_metric_header(struct appctx *appctx, struct htx *htx, istcat(out, ist(" "), max) == -1) goto full; - if (metric->flags & PROMEX_FL_INFO_METRIC) - desc = ist(info_fields[ctx->field_num].desc); - else if (metric->flags & PROMEX_FL_STICKTABLE_METRIC) - desc = promex_sticktable_metric_desc[ctx->field_num]; - else if (!isttest(promex_st_metric_desc[ctx->field_num])) - desc = ist(stat_fields[ctx->field_num].desc); - else - desc = promex_st_metric_desc[ctx->field_num]; + if (istcat(out, ist("# HELP "), max) == -1 || + istcat(out, name, max) == -1 || + istcat(out, ist(" "), max) == -1 || + istcat(out, desc, max) == -1) + goto full; - if (istcat(out, desc, max) == -1 || - istcat(out, ist("\n# TYPE "), max) == -1 || + if (istcat(out, ist("\n# TYPE "), max) == -1 || istcat(out, name, max) == -1 || istcat(out, ist(" "), max) == -1 || - istcat(out, type, max) == -1 || + istcat(out, t, max) == -1 || istcat(out, ist("\n"), max) == -1) goto full; @@ -493,32 +462,32 @@ static int promex_dump_metric_header(struct appctx *appctx, struct htx *htx, return 0; } -/* Dump the line for <metric>. It starts by the metric name followed by its - * labels (proxy name, server name...) between braces and finally its value. If - * not already done, the header lines are dumped first. It returns 1 on - * success. Otherwise if <out> length exceeds <max>, it returns 0. +/* Dump the time series for the metric <name>. It starts by the metric name followed by + * its labels (proxy name, server name...) between braces and finally its + * value. If not already done, the header lines are dumped first. It returns 1 + * on success. Otherwise if <out> length exceeds <max>, it returns 0. */ -static int promex_dump_metric(struct appctx *appctx, struct htx *htx, struct ist prefix, - const struct promex_metric *metric, struct field *val, - struct promex_label *labels, struct ist *out, size_t max) +static int promex_dump_ts(struct appctx *appctx, struct ist prefix, + const struct ist name, const struct ist desc, enum promex_mt_type type, + struct field *val, struct promex_label *labels, struct ist *out, size_t max) { - struct ist name = { .ptr = (char[PROMEX_MAX_NAME_LEN]){ 0 }, .len = 0 }; + struct ist n = { .ptr = (char[PROMEX_MAX_NAME_LEN]){ 0 }, .len = 0 }; struct promex_ctx *ctx = appctx->svcctx; size_t len = out->len; if (out->len + PROMEX_MAX_METRIC_LENGTH > max) return 0; - /* Fill the metric name */ - istcat(&name, prefix, PROMEX_MAX_NAME_LEN); - istcat(&name, metric->n, PROMEX_MAX_NAME_LEN); + /* Fill the metric name */ + istcat(&n, prefix, PROMEX_MAX_NAME_LEN); + istcat(&n, name, PROMEX_MAX_NAME_LEN); if ((ctx->flags & PROMEX_FL_METRIC_HDR) && - !promex_dump_metric_header(appctx, htx, metric, name, out, max)) + !promex_dump_ts_header(n, desc, type, out, max)) goto full; - if (istcat(out, name, max) == -1) + if (istcat(out, n, max) == -1) goto full; if (isttest(labels[0].name)) { @@ -527,7 +496,7 @@ static int promex_dump_metric(struct appctx *appctx, struct htx *htx, struct ist if (istcat(out, ist("{"), max) == -1) goto full; - for (i = 0; isttest(labels[i].name); i++) { + for (i = 0; i < PROMEX_MAX_LABELS && isttest(labels[i].name); i++) { if (!isttest(labels[i].value)) continue; @@ -548,7 +517,7 @@ static int promex_dump_metric(struct appctx *appctx, struct htx *htx, struct ist goto full; trash.data = out->len; - if (!promex_metric_to_str(&trash, val, max)) + if (!promex_ts_val_to_str(&trash, val, max)) goto full; out->len = trash.data; @@ -561,6 +530,32 @@ static int promex_dump_metric(struct appctx *appctx, struct htx *htx, struct ist } +static int promex_filter_metric(struct appctx *appctx, struct ist prefix, struct ist name) +{ + struct promex_ctx *ctx = appctx->svcctx; + struct eb32_node *node; + struct promex_metric_filter *flt; + unsigned int hash; + XXH32_state_t state; + + if (!eb_is_empty(&ctx->filters)) { + XXH32_reset(&state, 0); + XXH32_update(&state, istptr(prefix), istlen(prefix)); + XXH32_update(&state, istptr(name), istlen(name)); + hash = XXH32_digest(&state); + + node = eb32_lookup(&ctx->filters, hash); + if (node) { + flt = container_of(node, typeof(*flt), node); + if (flt->exclude) + return 1; + } + else if (!(ctx->flags & PROMEX_FL_INC_METRIC_BY_DEFAULT)) + return 1; + } + + return 0; +} /* Dump global metrics (prefixed by "haproxy_process_"). It returns 1 on success, * 0 if <htx> is full and -1 in case of any error. */ @@ -570,32 +565,39 @@ static int promex_dump_global_metrics(struct appctx *appctx, struct htx *htx) struct promex_ctx *ctx = appctx->svcctx; struct field val; struct channel *chn = sc_ic(appctx_sc(appctx)); - struct ist out = ist2(trash.area, 0); + struct ist name, desc, out = ist2(trash.area, 0); size_t max = htx_get_max_blksz(htx, channel_htx_recv_max(chn, htx)); int ret = 1; - if (!stats_fill_info(info, INF_TOTAL_FIELDS, 0)) + if (!stats_fill_info(stat_line_info, ST_I_INF_MAX, 0)) return -1; - for (; ctx->field_num < INF_TOTAL_FIELDS; ctx->field_num++) { + for (; ctx->field_num < ST_I_INF_MAX; ctx->field_num++) { struct promex_label labels[PROMEX_MAX_LABELS-1] = {}; if (!(promex_global_metrics[ctx->field_num].flags & ctx->flags)) continue; + name = promex_global_metrics[ctx->field_num].n; + desc = ist(stat_cols_info[ctx->field_num].desc); + + if (promex_filter_metric(appctx, prefix, name)) + continue; + switch (ctx->field_num) { - case INF_BUILD_INFO: + case ST_I_INF_BUILD_INFO: labels[0].name = ist("version"); labels[0].value = ist(HAPROXY_VERSION); val = mkf_u32(FN_GAUGE, 1); break; default: - val = info[ctx->field_num]; + val = stat_line_info[ctx->field_num]; } - if (!promex_dump_metric(appctx, htx, prefix, &promex_global_metrics[ctx->field_num], - &val, labels, &out, max)) + if (!promex_dump_ts(appctx, prefix, name, desc, + promex_global_metrics[ctx->field_num].type, + &val, labels, &out, max)) goto full; ctx->flags |= PROMEX_FL_METRIC_HDR; @@ -619,23 +621,36 @@ static int promex_dump_front_metrics(struct appctx *appctx, struct htx *htx) { static struct ist prefix = IST("haproxy_frontend_"); struct promex_ctx *ctx = appctx->svcctx; - struct proxy *px; + struct proxy *px = ctx->p[0]; + struct stats_module *mod = ctx->p[1]; struct field val; struct channel *chn = sc_ic(appctx_sc(appctx)); - struct ist out = ist2(trash.area, 0); + struct ist name, desc, out = ist2(trash.area, 0); size_t max = htx_get_max_blksz(htx, channel_htx_recv_max(chn, htx)); - struct field *stats = stat_l[STATS_DOMAIN_PROXY]; + struct field *stats = stat_lines[STATS_DOMAIN_PROXY]; int ret = 1; enum promex_front_state state; - for (;ctx->field_num < ST_F_TOTAL_FIELDS; ctx->field_num++) { + for (;ctx->field_num < ST_I_PX_MAX; ctx->field_num++) { if (!(promex_st_metrics[ctx->field_num].flags & ctx->flags)) continue; - while (ctx->px) { - struct promex_label labels[PROMEX_MAX_LABELS-1] = {}; + name = promex_st_front_metrics_names[ctx->field_num]; + desc = promex_st_metric_desc[ctx->field_num]; + + if (!isttest(name)) + name = promex_st_metrics[ctx->field_num].n; + if (!isttest(desc)) + desc = ist(stat_cols_px[ctx->field_num].desc); + + if (promex_filter_metric(appctx, prefix, name)) + continue; + + if (!px) + px = proxies_list; - px = ctx->px; + while (px) { + struct promex_label labels[PROMEX_MAX_LABELS-1] = {}; labels[0].name = ist("proxy"); labels[0].value = ist2(px->id, strlen(px->id)); @@ -644,47 +659,49 @@ static int promex_dump_front_metrics(struct appctx *appctx, struct htx *htx) if ((px->flags & PR_FL_DISABLED) || px->uuid <= 0 || !(px->cap & PR_CAP_FE)) goto next_px; - if (!stats_fill_fe_stats(px, stats, ST_F_TOTAL_FIELDS, &(ctx->field_num))) + if (!stats_fill_fe_line(px, 0, stats, ST_I_PX_MAX, &(ctx->field_num))) return -1; switch (ctx->field_num) { - case ST_F_STATUS: + case ST_I_PX_STATUS: state = !(px->flags & PR_FL_STOPPED); for (; ctx->obj_state < PROMEX_FRONT_STATE_COUNT; ctx->obj_state++) { labels[1].name = ist("state"); labels[1].value = promex_front_st[ctx->obj_state]; val = mkf_u32(FO_STATUS, state == ctx->obj_state); - if (!promex_dump_metric(appctx, htx, prefix, &promex_st_metrics[ctx->field_num], - &val, labels, &out, max)) + + if (!promex_dump_ts(appctx, prefix, name, desc, + promex_st_metrics[ctx->field_num].type, + &val, labels, &out, max)) goto full; } ctx->obj_state = 0; goto next_px; - case ST_F_REQ_RATE_MAX: - case ST_F_REQ_TOT: - case ST_F_INTERCEPTED: - case ST_F_CACHE_LOOKUPS: - case ST_F_CACHE_HITS: - case ST_F_COMP_IN: - case ST_F_COMP_OUT: - case ST_F_COMP_BYP: - case ST_F_COMP_RSP: + case ST_I_PX_REQ_RATE_MAX: + case ST_I_PX_REQ_TOT: + case ST_I_PX_INTERCEPTED: + case ST_I_PX_CACHE_LOOKUPS: + case ST_I_PX_CACHE_HITS: + case ST_I_PX_COMP_IN: + case ST_I_PX_COMP_OUT: + case ST_I_PX_COMP_BYP: + case ST_I_PX_COMP_RSP: if (px->mode != PR_MODE_HTTP) goto next_px; val = stats[ctx->field_num]; break; - case ST_F_HRSP_1XX: - case ST_F_HRSP_2XX: - case ST_F_HRSP_3XX: - case ST_F_HRSP_4XX: - case ST_F_HRSP_5XX: - case ST_F_HRSP_OTHER: + case ST_I_PX_HRSP_1XX: + case ST_I_PX_HRSP_2XX: + case ST_I_PX_HRSP_3XX: + case ST_I_PX_HRSP_4XX: + case ST_I_PX_HRSP_5XX: + case ST_I_PX_HRSP_OTHER: if (px->mode != PR_MODE_HTTP) goto next_px; - if (ctx->field_num != ST_F_HRSP_1XX) + if (ctx->field_num != ST_I_PX_HRSP_1XX) ctx->flags &= ~PROMEX_FL_METRIC_HDR; labels[1].name = ist("code"); - labels[1].value = promex_hrsp_code[ctx->field_num - ST_F_HRSP_1XX]; + labels[1].value = promex_hrsp_code[ctx->field_num - ST_I_PX_HRSP_1XX]; val = stats[ctx->field_num]; break; @@ -692,22 +709,92 @@ static int promex_dump_front_metrics(struct appctx *appctx, struct htx *htx) val = stats[ctx->field_num]; } - if (!promex_dump_metric(appctx, htx, prefix, &promex_st_metrics[ctx->field_num], - &val, labels, &out, max)) + if (!promex_dump_ts(appctx, prefix, name, desc, + promex_st_metrics[ctx->field_num].type, + &val, labels, &out, max)) goto full; next_px: - ctx->px = px->next; + px = px->next; } ctx->flags |= PROMEX_FL_METRIC_HDR; - ctx->px = proxies_list; } + /* Skip extra counters */ + if (!(ctx->flags & PROMEX_FL_EXTRA_COUNTERS)) + goto end; + + if (!mod) { + mod = LIST_NEXT(&stats_module_list[STATS_DOMAIN_PROXY], typeof(mod), list); + ctx->mod_field_num = 0; + } + + list_for_each_entry_from(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + void *counters; + + if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_FE)) { + ctx->field_num += mod->stats_count; + ctx->mod_field_num = 0; + continue; + } + + for (;ctx->mod_field_num < mod->stats_count; ctx->mod_field_num++) { + name = ist2(mod->stats[ctx->mod_field_num].name, strlen(mod->stats[ctx->mod_field_num].name)); + desc = ist2(mod->stats[ctx->mod_field_num].desc, strlen(mod->stats[ctx->mod_field_num].desc)); + + if (promex_filter_metric(appctx, prefix, name)) + continue; + + if (!px) + px = proxies_list; + + while (px) { + struct promex_label labels[PROMEX_MAX_LABELS-1] = {}; + struct promex_metric metric; + + labels[0].name = ist("proxy"); + labels[0].value = ist2(px->id, strlen(px->id)); + + labels[1].name = ist("mod"); + labels[1].value = ist2(mod->name, strlen(mod->name)); + + /* skip the disabled proxies, global frontend and non-networked ones */ + if ((px->flags & PR_FL_DISABLED) || px->uuid <= 0 || !(px->cap & PR_CAP_FE)) + goto next_px2; + + counters = EXTRA_COUNTERS_GET(px->extra_counters_fe, mod); + if (!mod->fill_stats(counters, stats + ctx->field_num, &ctx->mod_field_num)) + return -1; + + val = stats[ctx->field_num + ctx->mod_field_num]; + metric.type = ((val.type == FN_GAUGE) ? PROMEX_MT_GAUGE : PROMEX_MT_COUNTER); + + if (!promex_dump_ts(appctx, prefix, name, desc, metric.type, + &val, labels, &out, max)) + goto full; + + next_px2: + px = px->next; + } + ctx->flags |= PROMEX_FL_METRIC_HDR; + } + + ctx->field_num += mod->stats_count; + ctx->mod_field_num = 0; + } + + px = NULL; + mod = NULL; + end: if (out.len) { if (!htx_add_data_atonce(htx, out)) return -1; /* Unexpected and unrecoverable error */ channel_add_input(chn, out.len); } + + /* Save pointers (0=current proxy, 1=current stats module) of the current context */ + ctx->p[0] = px; + ctx->p[1] = mod; return ret; full: ret = 0; @@ -720,24 +807,37 @@ static int promex_dump_listener_metrics(struct appctx *appctx, struct htx *htx) { static struct ist prefix = IST("haproxy_listener_"); struct promex_ctx *ctx = appctx->svcctx; - struct proxy *px; + struct proxy *px = ctx->p[0]; + struct listener *li = ctx->p[1]; + struct stats_module *mod = ctx->p[2]; struct field val; struct channel *chn = sc_ic(appctx_sc(appctx)); - struct ist out = ist2(trash.area, 0); + struct ist name, desc, out = ist2(trash.area, 0); size_t max = htx_get_max_blksz(htx, channel_htx_recv_max(chn, htx)); - struct field *stats = stat_l[STATS_DOMAIN_PROXY]; - struct listener *li; + struct field *stats = stat_lines[STATS_DOMAIN_PROXY]; int ret = 1; enum li_status status; - for (;ctx->field_num < ST_F_TOTAL_FIELDS; ctx->field_num++) { + for (;ctx->field_num < ST_I_PX_MAX; ctx->field_num++) { if (!(promex_st_metrics[ctx->field_num].flags & ctx->flags)) continue; - while (ctx->px) { - struct promex_label labels[PROMEX_MAX_LABELS-1] = {}; + name = promex_st_li_metrics_names[ctx->field_num]; + desc = promex_st_metric_desc[ctx->field_num]; + + if (!isttest(name)) + name = promex_st_metrics[ctx->field_num].n; + if (!isttest(desc)) + desc = ist(stat_cols_px[ctx->field_num].desc); - px = ctx->px; + if (promex_filter_metric(appctx, prefix, name)) + continue; + + if (!px) + px = proxies_list; + + while (px) { + struct promex_label labels[PROMEX_MAX_LABELS-1] = {}; labels[0].name = ist("proxy"); labels[0].value = ist2(px->id, strlen(px->id)); @@ -746,28 +846,30 @@ static int promex_dump_listener_metrics(struct appctx *appctx, struct htx *htx) if ((px->flags & PR_FL_DISABLED) || px->uuid <= 0 || !(px->cap & PR_CAP_FE)) goto next_px; - li = ctx->li; - list_for_each_entry_from(li, &px->conf.listeners, by_fe) { + if (!li) + li = LIST_NEXT(&px->conf.listeners, struct listener *, by_fe); + list_for_each_entry_from(li, &px->conf.listeners, by_fe) { if (!li->counters) continue; labels[1].name = ist("listener"); labels[1].value = ist2(li->name, strlen(li->name)); - if (!stats_fill_li_stats(px, li, 0, stats, - ST_F_TOTAL_FIELDS, &(ctx->field_num))) + if (!stats_fill_li_line(px, li, 0, stats, + ST_I_PX_MAX, &(ctx->field_num))) return -1; switch (ctx->field_num) { - case ST_F_STATUS: + case ST_I_PX_STATUS: status = get_li_status(li); for (; ctx->obj_state < LI_STATE_COUNT; ctx->obj_state++) { val = mkf_u32(FO_STATUS, status == ctx->obj_state); labels[2].name = ist("state"); labels[2].value = ist(li_status_st[ctx->obj_state]); - if (!promex_dump_metric(appctx, htx, prefix, &promex_st_metrics[ctx->field_num], - &val, labels, &out, max)) + if (!promex_dump_ts(appctx, prefix, name, desc, + promex_st_metrics[ctx->field_num].type, + &val, labels, &out, max)) goto full; } ctx->obj_state = 0; @@ -776,31 +878,110 @@ static int promex_dump_listener_metrics(struct appctx *appctx, struct htx *htx) val = stats[ctx->field_num]; } - if (!promex_dump_metric(appctx, htx, prefix, - &promex_st_metrics[ctx->field_num], - &val, labels, &out, max)) + if (!promex_dump_ts(appctx, prefix, name, desc, + promex_st_metrics[ctx->field_num].type, + &val, labels, &out, max)) goto full; } + li = NULL; next_px: px = px->next; - ctx->px = px; - ctx->li = (px ? LIST_NEXT(&px->conf.listeners, struct listener *, by_fe) : NULL); } ctx->flags |= PROMEX_FL_METRIC_HDR; - ctx->px = proxies_list; - ctx->li = LIST_NEXT(&proxies_list->conf.listeners, struct listener *, by_fe); } + /* Skip extra counters */ + if (!(ctx->flags & PROMEX_FL_EXTRA_COUNTERS)) + goto end; + + if (!mod) { + mod = LIST_NEXT(&stats_module_list[STATS_DOMAIN_PROXY], typeof(mod), list); + ctx->mod_field_num = 0; + } + + list_for_each_entry_from(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + void *counters; + + if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_LI)) { + ctx->field_num += mod->stats_count; + ctx->mod_field_num = 0; + continue; + } + + for (;ctx->mod_field_num < mod->stats_count; ctx->mod_field_num++) { + name = ist2(mod->stats[ctx->mod_field_num].name, strlen(mod->stats[ctx->mod_field_num].name)); + desc = ist2(mod->stats[ctx->mod_field_num].desc, strlen(mod->stats[ctx->mod_field_num].desc)); + + if (promex_filter_metric(appctx, prefix, name)) + continue; + + if (!px) + px = proxies_list; + + while (px) { + struct promex_label labels[PROMEX_MAX_LABELS-1] = {}; + struct promex_metric metric; + + labels[0].name = ist("proxy"); + labels[0].value = ist2(px->id, strlen(px->id)); + + /* skip the disabled proxies, global frontend and non-networked ones */ + if ((px->flags & PR_FL_DISABLED) || px->uuid <= 0 || !(px->cap & PR_CAP_FE)) + goto next_px2; + + if (!li) + li = LIST_NEXT(&px->conf.listeners, struct listener *, by_fe); + + list_for_each_entry_from(li, &px->conf.listeners, by_fe) { + if (!li->counters) + continue; + + labels[1].name = ist("listener"); + labels[1].value = ist2(li->name, strlen(li->name)); + + labels[2].name = ist("mod"); + labels[2].value = ist2(mod->name, strlen(mod->name)); + + counters = EXTRA_COUNTERS_GET(li->extra_counters, mod); + if (!mod->fill_stats(counters, stats + ctx->field_num, &ctx->mod_field_num)) + return -1; + + val = stats[ctx->field_num + ctx->mod_field_num]; + metric.type = ((val.type == FN_GAUGE) ? PROMEX_MT_GAUGE : PROMEX_MT_COUNTER); + + if (!promex_dump_ts(appctx, prefix, name, desc, metric.type, + &val, labels, &out, max)) + goto full; + } + li = NULL; + + next_px2: + px = px->next; + } + ctx->flags |= PROMEX_FL_METRIC_HDR; + } + + ctx->field_num += mod->stats_count; + ctx->mod_field_num = 0; + } + + px = NULL; + li = NULL; + mod = NULL; + end: if (out.len) { if (!htx_add_data_atonce(htx, out)) return -1; /* Unexpected and unrecoverable error */ channel_add_input(chn, out.len); } + /* Save pointers (0=current proxy, 1=current listener, 2=current stats module) of the current context */ + ctx->p[0] = px; + ctx->p[1] = li; + ctx->p[2] = mod; return ret; full: - ctx->li = li; ret = 0; goto end; } @@ -811,31 +992,44 @@ static int promex_dump_back_metrics(struct appctx *appctx, struct htx *htx) { static struct ist prefix = IST("haproxy_backend_"); struct promex_ctx *ctx = appctx->svcctx; - struct proxy *px; + struct proxy *px = ctx->p[0]; + struct stats_module *mod = ctx->p[2]; struct server *sv; struct field val; struct channel *chn = sc_ic(appctx_sc(appctx)); - struct ist out = ist2(trash.area, 0); + struct ist name, desc, out = ist2(trash.area, 0); size_t max = htx_get_max_blksz(htx, channel_htx_recv_max(chn, htx)); - struct field *stats = stat_l[STATS_DOMAIN_PROXY]; + struct field *stats = stat_lines[STATS_DOMAIN_PROXY]; int ret = 1; double secs; enum promex_back_state bkd_state; enum promex_srv_state srv_state; enum healthcheck_status srv_check_status; - for (;ctx->field_num < ST_F_TOTAL_FIELDS; ctx->field_num++) { + for (;ctx->field_num < ST_I_PX_MAX; ctx->field_num++) { if (!(promex_st_metrics[ctx->field_num].flags & ctx->flags)) continue; - while (ctx->px) { + name = promex_st_back_metrics_names[ctx->field_num]; + desc = promex_st_metric_desc[ctx->field_num]; + + if (!isttest(name)) + name = promex_st_metrics[ctx->field_num].n; + if (!isttest(desc)) + desc = ist(stat_cols_px[ctx->field_num].desc); + + if (promex_filter_metric(appctx, prefix, name)) + continue; + + if (!px) + px = proxies_list; + + while (px) { struct promex_label labels[PROMEX_MAX_LABELS-1] = {}; unsigned int srv_state_count[PROMEX_SRV_STATE_COUNT] = { 0 }; unsigned int srv_check_count[HCHK_STATUS_SIZE] = { 0 }; const char *check_state; - px = ctx->px; - labels[0].name = ist("proxy"); labels[0].value = ist2(px->id, strlen(px->id)); @@ -843,12 +1037,12 @@ static int promex_dump_back_metrics(struct appctx *appctx, struct htx *htx) if ((px->flags & PR_FL_DISABLED) || px->uuid <= 0 || !(px->cap & PR_CAP_BE)) goto next_px; - if (!stats_fill_be_stats(px, 0, stats, ST_F_TOTAL_FIELDS, &(ctx->field_num))) + if (!stats_fill_be_line(px, 0, stats, ST_I_PX_MAX, &(ctx->field_num))) return -1; switch (ctx->field_num) { - case ST_F_AGG_SRV_CHECK_STATUS: // DEPRECATED - case ST_F_AGG_SRV_STATUS: + case ST_I_PX_AGG_SRV_CHECK_STATUS: // DEPRECATED + case ST_I_PX_AGG_SRV_STATUS: if (!px->srv) goto next_px; sv = px->srv; @@ -861,13 +1055,14 @@ static int promex_dump_back_metrics(struct appctx *appctx, struct htx *htx) val = mkf_u32(FN_GAUGE, srv_state_count[ctx->obj_state]); labels[1].name = ist("state"); labels[1].value = promex_srv_st[ctx->obj_state]; - if (!promex_dump_metric(appctx, htx, prefix, &promex_st_metrics[ctx->field_num], - &val, labels, &out, max)) + if (!promex_dump_ts(appctx, prefix, name, desc, + promex_st_metrics[ctx->field_num].type, + &val, labels, &out, max)) goto full; } ctx->obj_state = 0; goto next_px; - case ST_F_AGG_CHECK_STATUS: + case ST_I_PX_AGG_CHECK_STATUS: if (!px->srv) goto next_px; sv = px->srv; @@ -885,79 +1080,81 @@ static int promex_dump_back_metrics(struct appctx *appctx, struct htx *htx) check_state = get_check_status_info(ctx->obj_state); labels[1].name = ist("state"); labels[1].value = ist(check_state); - if (!promex_dump_metric(appctx, htx, prefix, &promex_st_metrics[ctx->field_num], - &val, labels, &out, max)) + if (!promex_dump_ts(appctx, prefix, name, desc, + promex_st_metrics[ctx->field_num].type, + &val, labels, &out, max)) goto full; } ctx->obj_state = 0; goto next_px; - case ST_F_STATUS: + case ST_I_PX_STATUS: bkd_state = ((px->lbprm.tot_weight > 0 || !px->srv) ? 1 : 0); for (; ctx->obj_state < PROMEX_BACK_STATE_COUNT; ctx->obj_state++) { labels[1].name = ist("state"); labels[1].value = promex_back_st[ctx->obj_state]; val = mkf_u32(FO_STATUS, bkd_state == ctx->obj_state); - if (!promex_dump_metric(appctx, htx, prefix, &promex_st_metrics[ctx->field_num], - &val, labels, &out, max)) + if (!promex_dump_ts(appctx, prefix, name, desc, + promex_st_metrics[ctx->field_num].type, + &val, labels, &out, max)) goto full; } ctx->obj_state = 0; goto next_px; - case ST_F_QTIME: + case ST_I_PX_QTIME: secs = (double)swrate_avg(px->be_counters.q_time, TIME_STATS_SAMPLES) / 1000.0; val = mkf_flt(FN_AVG, secs); break; - case ST_F_CTIME: + case ST_I_PX_CTIME: secs = (double)swrate_avg(px->be_counters.c_time, TIME_STATS_SAMPLES) / 1000.0; val = mkf_flt(FN_AVG, secs); break; - case ST_F_RTIME: + case ST_I_PX_RTIME: secs = (double)swrate_avg(px->be_counters.d_time, TIME_STATS_SAMPLES) / 1000.0; val = mkf_flt(FN_AVG, secs); break; - case ST_F_TTIME: + case ST_I_PX_TTIME: secs = (double)swrate_avg(px->be_counters.t_time, TIME_STATS_SAMPLES) / 1000.0; val = mkf_flt(FN_AVG, secs); break; - case ST_F_QT_MAX: + case ST_I_PX_QT_MAX: secs = (double)px->be_counters.qtime_max / 1000.0; val = mkf_flt(FN_MAX, secs); break; - case ST_F_CT_MAX: + case ST_I_PX_CT_MAX: secs = (double)px->be_counters.ctime_max / 1000.0; val = mkf_flt(FN_MAX, secs); break; - case ST_F_RT_MAX: + case ST_I_PX_RT_MAX: secs = (double)px->be_counters.dtime_max / 1000.0; val = mkf_flt(FN_MAX, secs); break; - case ST_F_TT_MAX: + case ST_I_PX_TT_MAX: secs = (double)px->be_counters.ttime_max / 1000.0; val = mkf_flt(FN_MAX, secs); break; - case ST_F_REQ_TOT: - case ST_F_CACHE_LOOKUPS: - case ST_F_CACHE_HITS: - case ST_F_COMP_IN: - case ST_F_COMP_OUT: - case ST_F_COMP_BYP: - case ST_F_COMP_RSP: + case ST_I_PX_REQ_TOT: + case ST_I_PX_CACHE_LOOKUPS: + case ST_I_PX_CACHE_HITS: + case ST_I_PX_COMP_IN: + case ST_I_PX_COMP_OUT: + case ST_I_PX_COMP_BYP: + case ST_I_PX_COMP_RSP: if (px->mode != PR_MODE_HTTP) goto next_px; val = stats[ctx->field_num]; break; - case ST_F_HRSP_1XX: - case ST_F_HRSP_2XX: - case ST_F_HRSP_3XX: - case ST_F_HRSP_4XX: - case ST_F_HRSP_5XX: - case ST_F_HRSP_OTHER: + case ST_I_PX_HRSP_1XX: + case ST_I_PX_HRSP_2XX: + case ST_I_PX_HRSP_3XX: + case ST_I_PX_HRSP_4XX: + case ST_I_PX_HRSP_5XX: + case ST_I_PX_HRSP_OTHER: if (px->mode != PR_MODE_HTTP) goto next_px; - if (ctx->field_num != ST_F_HRSP_1XX) + if (ctx->field_num != ST_I_PX_HRSP_1XX) ctx->flags &= ~PROMEX_FL_METRIC_HDR; labels[1].name = ist("code"); - labels[1].value = promex_hrsp_code[ctx->field_num - ST_F_HRSP_1XX]; + labels[1].value = promex_hrsp_code[ctx->field_num - ST_I_PX_HRSP_1XX]; val = stats[ctx->field_num]; break; @@ -965,22 +1162,91 @@ static int promex_dump_back_metrics(struct appctx *appctx, struct htx *htx) val = stats[ctx->field_num]; } - if (!promex_dump_metric(appctx, htx, prefix, &promex_st_metrics[ctx->field_num], - &val, labels, &out, max)) + if (!promex_dump_ts(appctx, prefix, name, desc, + promex_st_metrics[ctx->field_num].type, + &val, labels, &out, max)) goto full; next_px: - ctx->px = px->next; + px = px->next; } ctx->flags |= PROMEX_FL_METRIC_HDR; - ctx->px = proxies_list; } + /* Skip extra counters */ + if (!(ctx->flags & PROMEX_FL_EXTRA_COUNTERS)) + goto end; + + if (!mod) { + mod = LIST_NEXT(&stats_module_list[STATS_DOMAIN_PROXY], typeof(mod), list); + ctx->mod_field_num = 0; + } + + list_for_each_entry_from(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + void *counters; + + if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_BE)) { + ctx->field_num += mod->stats_count; + ctx->mod_field_num = 0; + continue; + } + + for (;ctx->mod_field_num < mod->stats_count; ctx->mod_field_num++) { + name = ist2(mod->stats[ctx->mod_field_num].name, strlen(mod->stats[ctx->mod_field_num].name)); + desc = ist2(mod->stats[ctx->mod_field_num].desc, strlen(mod->stats[ctx->mod_field_num].desc)); + + if (promex_filter_metric(appctx, prefix, name)) + continue; + + if (!px) + px = proxies_list; + + while (px) { + struct promex_label labels[PROMEX_MAX_LABELS-1] = {}; + struct promex_metric metric; + + labels[0].name = ist("proxy"); + labels[0].value = ist2(px->id, strlen(px->id)); + + labels[1].name = ist("mod"); + labels[1].value = ist2(mod->name, strlen(mod->name)); + + /* skip the disabled proxies, global frontend and non-networked ones */ + if ((px->flags & PR_FL_DISABLED) || px->uuid <= 0 || !(px->cap & PR_CAP_BE)) + goto next_px2; + + counters = EXTRA_COUNTERS_GET(px->extra_counters_be, mod); + if (!mod->fill_stats(counters, stats + ctx->field_num, &ctx->mod_field_num)) + return -1; + + val = stats[ctx->field_num + ctx->mod_field_num]; + metric.type = ((val.type == FN_GAUGE) ? PROMEX_MT_GAUGE : PROMEX_MT_COUNTER); + + if (!promex_dump_ts(appctx, prefix, name, desc, metric.type, + &val, labels, &out, max)) + goto full; + + next_px2: + px = px->next; + } + ctx->flags |= PROMEX_FL_METRIC_HDR; + } + + ctx->field_num += mod->stats_count; + ctx->mod_field_num = 0; + } + + px = NULL; + mod = NULL; + end: if (out.len) { if (!htx_add_data_atonce(htx, out)) return -1; /* Unexpected and unrecoverable error */ channel_add_input(chn, out.len); } + /* Save pointers (0=current proxy, 1=current stats module) of the current context */ + ctx->p[0] = px; + ctx->p[1] = mod; return ret; full: ret = 0; @@ -993,26 +1259,39 @@ static int promex_dump_srv_metrics(struct appctx *appctx, struct htx *htx) { static struct ist prefix = IST("haproxy_server_"); struct promex_ctx *ctx = appctx->svcctx; - struct proxy *px; - struct server *sv; + struct proxy *px = ctx->p[0]; + struct server *sv = ctx->p[1]; + struct stats_module *mod = ctx->p[2]; struct field val; struct channel *chn = sc_ic(appctx_sc(appctx)); - struct ist out = ist2(trash.area, 0); + struct ist name, desc, out = ist2(trash.area, 0); size_t max = htx_get_max_blksz(htx, channel_htx_recv_max(chn, htx)); - struct field *stats = stat_l[STATS_DOMAIN_PROXY]; + struct field *stats = stat_lines[STATS_DOMAIN_PROXY]; int ret = 1; double secs; enum promex_srv_state state; const char *check_state; - for (;ctx->field_num < ST_F_TOTAL_FIELDS; ctx->field_num++) { + for (;ctx->field_num < ST_I_PX_MAX; ctx->field_num++) { if (!(promex_st_metrics[ctx->field_num].flags & ctx->flags)) continue; - while (ctx->px) { - struct promex_label labels[PROMEX_MAX_LABELS-1] = {}; + name = promex_st_srv_metrics_names[ctx->field_num]; + desc = promex_st_metric_desc[ctx->field_num]; + + if (!isttest(name)) + name = promex_st_metrics[ctx->field_num].n; + if (!isttest(desc)) + desc = ist(stat_cols_px[ctx->field_num].desc); + + if (promex_filter_metric(appctx, prefix, name)) + continue; - px = ctx->px; + if (!px) + px = proxies_list; + + while (px) { + struct promex_label labels[PROMEX_MAX_LABELS-1] = {}; labels[0].name = ist("proxy"); labels[0].value = ist2(px->id, strlen(px->id)); @@ -1021,64 +1300,66 @@ static int promex_dump_srv_metrics(struct appctx *appctx, struct htx *htx) if ((px->flags & PR_FL_DISABLED) || px->uuid <= 0 || !(px->cap & PR_CAP_BE)) goto next_px; - while (ctx->sv) { - sv = ctx->sv; + if (!sv) + sv = px->srv; + while (sv) { labels[1].name = ist("server"); labels[1].value = ist2(sv->id, strlen(sv->id)); - if (!stats_fill_sv_stats(px, sv, 0, stats, ST_F_TOTAL_FIELDS, &(ctx->field_num))) + if (!stats_fill_sv_line(px, sv, 0, stats, ST_I_PX_MAX, &(ctx->field_num))) return -1; if ((ctx->flags & PROMEX_FL_NO_MAINT_SRV) && (sv->cur_admin & SRV_ADMF_MAINT)) goto next_sv; switch (ctx->field_num) { - case ST_F_STATUS: + case ST_I_PX_STATUS: state = promex_srv_status(sv); for (; ctx->obj_state < PROMEX_SRV_STATE_COUNT; ctx->obj_state++) { val = mkf_u32(FO_STATUS, state == ctx->obj_state); labels[2].name = ist("state"); labels[2].value = promex_srv_st[ctx->obj_state]; - if (!promex_dump_metric(appctx, htx, prefix, &promex_st_metrics[ctx->field_num], - &val, labels, &out, max)) + if (!promex_dump_ts(appctx, prefix, name, desc, + promex_st_metrics[ctx->field_num].type, + &val, labels, &out, max)) goto full; } ctx->obj_state = 0; goto next_sv; - case ST_F_QTIME: + case ST_I_PX_QTIME: secs = (double)swrate_avg(sv->counters.q_time, TIME_STATS_SAMPLES) / 1000.0; val = mkf_flt(FN_AVG, secs); break; - case ST_F_CTIME: + case ST_I_PX_CTIME: secs = (double)swrate_avg(sv->counters.c_time, TIME_STATS_SAMPLES) / 1000.0; val = mkf_flt(FN_AVG, secs); break; - case ST_F_RTIME: + case ST_I_PX_RTIME: secs = (double)swrate_avg(sv->counters.d_time, TIME_STATS_SAMPLES) / 1000.0; val = mkf_flt(FN_AVG, secs); break; - case ST_F_TTIME: + case ST_I_PX_TTIME: secs = (double)swrate_avg(sv->counters.t_time, TIME_STATS_SAMPLES) / 1000.0; val = mkf_flt(FN_AVG, secs); break; - case ST_F_QT_MAX: + case ST_I_PX_QT_MAX: secs = (double)sv->counters.qtime_max / 1000.0; val = mkf_flt(FN_MAX, secs); break; - case ST_F_CT_MAX: + case ST_I_PX_CT_MAX: secs = (double)sv->counters.ctime_max / 1000.0; val = mkf_flt(FN_MAX, secs); break; - case ST_F_RT_MAX: + case ST_I_PX_RT_MAX: secs = (double)sv->counters.dtime_max / 1000.0; val = mkf_flt(FN_MAX, secs); break; - case ST_F_TT_MAX: + case ST_I_PX_TT_MAX: secs = (double)sv->counters.ttime_max / 1000.0; val = mkf_flt(FN_MAX, secs); break; - case ST_F_CHECK_STATUS: + case ST_I_PX_CHECK_STATUS: if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) != CHK_ST_ENABLED) goto next_sv; @@ -1089,40 +1370,45 @@ static int promex_dump_srv_metrics(struct appctx *appctx, struct htx *htx) check_state = get_check_status_info(ctx->obj_state); labels[2].name = ist("state"); labels[2].value = ist(check_state); - if (!promex_dump_metric(appctx, htx, prefix, &promex_st_metrics[ctx->field_num], - &val, labels, &out, max)) + if (!promex_dump_ts(appctx, prefix, name, desc, + promex_st_metrics[ctx->field_num].type, + &val, labels, &out, max)) goto full; } ctx->obj_state = 0; goto next_sv; - case ST_F_CHECK_CODE: + case ST_I_PX_CHECK_CODE: if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) != CHK_ST_ENABLED) goto next_sv; val = mkf_u32(FN_OUTPUT, (sv->check.status < HCHK_STATUS_L57DATA) ? 0 : sv->check.code); break; - case ST_F_CHECK_DURATION: + case ST_I_PX_CHECK_DURATION: if (sv->check.status < HCHK_STATUS_CHECKED) goto next_sv; secs = (double)sv->check.duration / 1000.0; val = mkf_flt(FN_DURATION, secs); break; - case ST_F_REQ_TOT: - if (px->mode != PR_MODE_HTTP) + case ST_I_PX_REQ_TOT: + if (px->mode != PR_MODE_HTTP) { + sv = NULL; goto next_px; + } val = stats[ctx->field_num]; break; - case ST_F_HRSP_1XX: - case ST_F_HRSP_2XX: - case ST_F_HRSP_3XX: - case ST_F_HRSP_4XX: - case ST_F_HRSP_5XX: - case ST_F_HRSP_OTHER: - if (px->mode != PR_MODE_HTTP) + case ST_I_PX_HRSP_1XX: + case ST_I_PX_HRSP_2XX: + case ST_I_PX_HRSP_3XX: + case ST_I_PX_HRSP_4XX: + case ST_I_PX_HRSP_5XX: + case ST_I_PX_HRSP_OTHER: + if (px->mode != PR_MODE_HTTP) { + sv = NULL; goto next_px; - if (ctx->field_num != ST_F_HRSP_1XX) + } + if (ctx->field_num != ST_I_PX_HRSP_1XX) ctx->flags &= ~PROMEX_FL_METRIC_HDR; labels[2].name = ist("code"); - labels[2].value = promex_hrsp_code[ctx->field_num - ST_F_HRSP_1XX]; + labels[2].value = promex_hrsp_code[ctx->field_num - ST_I_PX_HRSP_1XX]; val = stats[ctx->field_num]; break; @@ -1130,22 +1416,101 @@ static int promex_dump_srv_metrics(struct appctx *appctx, struct htx *htx) val = stats[ctx->field_num]; } - if (!promex_dump_metric(appctx, htx, prefix, &promex_st_metrics[ctx->field_num], - &val, labels, &out, max)) + if (!promex_dump_ts(appctx, prefix, name, desc, + promex_st_metrics[ctx->field_num].type, + &val, labels, &out, max)) goto full; next_sv: - promex_set_ctx_sv(ctx, sv->next); + sv = sv->next; } next_px: - ctx->px = px->next; - promex_set_ctx_sv(ctx, ctx->px ? ctx->px->srv : NULL); + px = px->next; } ctx->flags |= PROMEX_FL_METRIC_HDR; - ctx->px = proxies_list; - promex_set_ctx_sv(ctx, ctx->px ? ctx->px->srv : NULL); } + /* Skip extra counters */ + if (!(ctx->flags & PROMEX_FL_EXTRA_COUNTERS)) + goto end; + + if (!mod) { + mod = LIST_NEXT(&stats_module_list[STATS_DOMAIN_PROXY], typeof(mod), list); + ctx->mod_field_num = 0; + } + + list_for_each_entry_from(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + void *counters; + + if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_SRV)) { + ctx->field_num += mod->stats_count; + ctx->mod_field_num = 0; + continue; + } + + for (;ctx->mod_field_num < mod->stats_count; ctx->mod_field_num++) { + name = ist2(mod->stats[ctx->mod_field_num].name, strlen(mod->stats[ctx->mod_field_num].name)); + desc = ist2(mod->stats[ctx->mod_field_num].desc, strlen(mod->stats[ctx->mod_field_num].desc)); + + if (promex_filter_metric(appctx, prefix, name)) + continue; + + if (!px) + px = proxies_list; + + while (px) { + struct promex_label labels[PROMEX_MAX_LABELS-1] = {}; + struct promex_metric metric; + + labels[0].name = ist("proxy"); + labels[0].value = ist2(px->id, strlen(px->id)); + + /* skip the disabled proxies, global frontend and non-networked ones */ + if ((px->flags & PR_FL_DISABLED) || px->uuid <= 0 || !(px->cap & PR_CAP_BE)) + goto next_px2; + + if (!sv) + sv = px->srv; + + while (sv) { + labels[1].name = ist("server"); + labels[1].value = ist2(sv->id, strlen(sv->id)); + + labels[2].name = ist("mod"); + labels[2].value = ist2(mod->name, strlen(mod->name)); + + if ((ctx->flags & PROMEX_FL_NO_MAINT_SRV) && (sv->cur_admin & SRV_ADMF_MAINT)) + goto next_sv2; + + + counters = EXTRA_COUNTERS_GET(sv->extra_counters, mod); + if (!mod->fill_stats(counters, stats + ctx->field_num, &ctx->mod_field_num)) + return -1; + + val = stats[ctx->field_num + ctx->mod_field_num]; + metric.type = ((val.type == FN_GAUGE) ? PROMEX_MT_GAUGE : PROMEX_MT_COUNTER); + + if (!promex_dump_ts(appctx, prefix, name, desc, metric.type, + &val, labels, &out, max)) + goto full; + + next_sv2: + sv = sv->next; + } + + next_px2: + px = px->next; + } + ctx->flags |= PROMEX_FL_METRIC_HDR; + } + + ctx->field_num += mod->stats_count; + ctx->mod_field_num = 0; + } + + px = NULL; + sv = NULL; + mod = NULL; end: if (out.len) { @@ -1153,62 +1518,162 @@ static int promex_dump_srv_metrics(struct appctx *appctx, struct htx *htx) return -1; /* Unexpected and unrecoverable error */ channel_add_input(chn, out.len); } + + /* Decrement server refcount if it was saved through ctx.p[1]. */ + srv_drop(ctx->p[1]); + if (sv) + srv_take(sv); + + /* Save pointers (0=current proxy, 1=current server, 2=current stats module) of the current context */ + ctx->p[0] = px; + ctx->p[1] = sv; + ctx->p[2] = mod; return ret; full: ret = 0; goto end; } -/* Dump stick table metrics (prefixed by "haproxy_sticktable_"). It returns 1 on success, - * 0 if <htx> is full and -1 in case of any error. */ -static int promex_dump_sticktable_metrics(struct appctx *appctx, struct htx *htx) +/* Dump metrics of module <mod>. It returns 1 on success, 0 if <out> is full and + * -1 on error. */ +static int promex_dump_module_metrics(struct appctx *appctx, struct promex_module *mod, + struct ist *out, size_t max) { - static struct ist prefix = IST("haproxy_sticktable_"); + struct ist prefix = { .ptr = (char[PROMEX_MAX_NAME_LEN]){ 0 }, .len = 0 }; struct promex_ctx *ctx = appctx->svcctx; - struct field val; - struct channel *chn = sc_ic(appctx_sc(appctx)); - struct ist out = ist2(trash.area, 0); - size_t max = htx_get_max_blksz(htx, channel_htx_recv_max(chn, htx)); int ret = 1; - struct stktable *t; - for (; ctx->field_num < STICKTABLE_TOTAL_FIELDS; ctx->field_num++) { - if (!(promex_sticktable_metrics[ctx->field_num].flags & ctx->flags)) + istcat(&prefix, ist("haproxy_"), PROMEX_MAX_NAME_LEN); + istcat(&prefix, mod->name, PROMEX_MAX_NAME_LEN); + istcat(&prefix, ist("_"), PROMEX_MAX_NAME_LEN); + + if (!ctx->p[1] && mod->start_metrics_dump) { + ctx->p[1] = mod->start_metrics_dump(); + if (!ctx->p[1]) + goto end; + } + + for (; ctx->mod_field_num < mod->nb_metrics; ctx->mod_field_num++) { + struct promex_metric metric; + struct ist desc; + + + ret = mod->metric_info(ctx->mod_field_num, &metric, &desc); + if (!ret) continue; + if (ret < 0) + goto error; - while (ctx->st) { - struct promex_label labels[PROMEX_MAX_LABELS - 1] = {}; + if (promex_filter_metric(appctx, prefix, metric.n)) + continue; - t = ctx->st; - if (!t->size) - goto next_px; + if (!ctx->p[2]) + ctx->p[2] = mod->start_ts(ctx->p[1], ctx->mod_field_num); - labels[0].name = ist("name"); - labels[0].value = ist2(t->id, strlen(t->id)); - labels[1].name = ist("type"); - labels[1].value = ist2(stktable_types[t->type].kw, strlen(stktable_types[t->type].kw)); - switch (ctx->field_num) { - case STICKTABLE_SIZE: - val = mkf_u32(FN_GAUGE, t->size); - break; - case STICKTABLE_USED: - val = mkf_u32(FN_GAUGE, t->current); - break; - default: - goto next_px; - } + while (ctx->p[2]) { + struct promex_label labels[PROMEX_MAX_LABELS - 1] = {}; + struct field val; + + ret = mod->fill_ts(ctx->p[1], ctx->p[2], ctx->mod_field_num, labels, &val); + if (!ret) + continue; + if (ret < 0) + goto error; - if (!promex_dump_metric(appctx, htx, prefix, - &promex_sticktable_metrics[ctx->field_num], - &val, labels, &out, max)) + if (!promex_dump_ts(appctx, prefix, metric.n, desc, metric.type, + &val, labels, out, max)) goto full; - next_px: - ctx->st = t->next; + next: + ctx->p[2] = mod->next_ts(ctx->p[1], ctx->p[2], ctx->mod_field_num); } ctx->flags |= PROMEX_FL_METRIC_HDR; - ctx->st = stktables_list; } + ret = 1; + + end: + if (ctx->p[1] && mod->stop_metrics_dump) + mod->stop_metrics_dump(ctx->p[1]); + ctx->p[1] = NULL; + ctx->p[2] = NULL; + return ret; + + full: + return 0; + error: + ret = -1; + goto end; + +} + +/* Dump metrics of referenced modules. It returns 1 on success, 0 if <htx> is + * full and -1 in case of any error. */ +static int promex_dump_ref_modules_metrics(struct appctx *appctx, struct htx *htx) +{ + struct promex_ctx *ctx = appctx->svcctx; + struct promex_module_ref *ref = ctx->p[0]; + struct channel *chn = sc_ic(appctx_sc(appctx)); + struct ist out = ist2(trash.area, 0); + size_t max = htx_get_max_blksz(htx, channel_htx_recv_max(chn, htx)); + int ret = 1; + + if (!ref) { + ref = LIST_NEXT(&ctx->modules, typeof(ref), list); + ctx->mod_field_num = 0; + } + + list_for_each_entry_from(ref, &ctx->modules, list) { + ret = promex_dump_module_metrics(appctx, ref->mod, &out, max); + if (ret <= 0) { + if (ret == -1) + return -1; + goto full; + } + ctx->mod_field_num = 0; + } + + ref = NULL; + + end: + if (out.len) { + if (!htx_add_data_atonce(htx, out)) + return -1; /* Unexpected and unrecoverable error */ + channel_add_input(chn, out.len); + } + ctx->p[0] = ref; + return ret; + full: + ret = 0; + goto end; +} + +/* Dump metrics of all registered modules. It returns 1 on success, 0 if <htx> is + * full and -1 in case of any error. */ +static int promex_dump_all_modules_metrics(struct appctx *appctx, struct htx *htx) +{ + struct promex_ctx *ctx = appctx->svcctx; + struct promex_module *mod = ctx->p[0]; + struct channel *chn = sc_ic(appctx_sc(appctx)); + struct ist out = ist2(trash.area, 0); + size_t max = htx_get_max_blksz(htx, channel_htx_recv_max(chn, htx)); + int ret = 1; + + if (!mod) { + mod = LIST_NEXT(&promex_module_list, typeof(mod), list); + ctx->mod_field_num = 0; + } + + list_for_each_entry_from(mod, &promex_module_list, list) { + ret = promex_dump_module_metrics(appctx, mod, &out, max); + if (ret <= 0) { + if (ret == -1) + return -1; + goto full; + } + ctx->mod_field_num = 0; + } + + mod = NULL; end: if (out.len) { @@ -1216,6 +1681,7 @@ static int promex_dump_sticktable_metrics(struct appctx *appctx, struct htx *htx return -1; /* Unexpected and unrecoverable error */ channel_add_input(chn, out.len); } + ctx->p[0] = mod; return ret; full: ret = 0; @@ -1235,13 +1701,9 @@ static int promex_dump_metrics(struct appctx *appctx, struct stconn *sc, struct switch (appctx->st1) { case PROMEX_DUMPER_INIT: - ctx->px = NULL; - ctx->st = NULL; - ctx->li = NULL; - promex_set_ctx_sv(ctx, NULL); ctx->flags |= (PROMEX_FL_METRIC_HDR|PROMEX_FL_INFO_METRIC); ctx->obj_state = 0; - ctx->field_num = INF_NAME; + ctx->field_num = ST_I_INF_NAME; appctx->st1 = PROMEX_DUMPER_GLOBAL; __fallthrough; @@ -1255,14 +1717,11 @@ static int promex_dump_metrics(struct appctx *appctx, struct stconn *sc, struct } } - ctx->px = proxies_list; - ctx->st = NULL; - ctx->li = NULL; - promex_set_ctx_sv(ctx, NULL); ctx->flags &= ~PROMEX_FL_INFO_METRIC; ctx->flags |= (PROMEX_FL_METRIC_HDR|PROMEX_FL_FRONT_METRIC); ctx->obj_state = 0; - ctx->field_num = ST_F_PXNAME; + ctx->field_num = ST_I_PX_PXNAME; + ctx->mod_field_num = 0; appctx->st1 = PROMEX_DUMPER_FRONT; __fallthrough; @@ -1276,14 +1735,11 @@ static int promex_dump_metrics(struct appctx *appctx, struct stconn *sc, struct } } - ctx->px = proxies_list; - ctx->st = NULL; - ctx->li = LIST_NEXT(&proxies_list->conf.listeners, struct listener *, by_fe); - promex_set_ctx_sv(ctx, NULL); ctx->flags &= ~PROMEX_FL_FRONT_METRIC; ctx->flags |= (PROMEX_FL_METRIC_HDR|PROMEX_FL_LI_METRIC); ctx->obj_state = 0; - ctx->field_num = ST_F_PXNAME; + ctx->field_num = ST_I_PX_PXNAME; + ctx->mod_field_num = 0; appctx->st1 = PROMEX_DUMPER_LI; __fallthrough; @@ -1297,14 +1753,11 @@ static int promex_dump_metrics(struct appctx *appctx, struct stconn *sc, struct } } - ctx->px = proxies_list; - ctx->st = NULL; - ctx->li = NULL; - promex_set_ctx_sv(ctx, NULL); ctx->flags &= ~PROMEX_FL_LI_METRIC; ctx->flags |= (PROMEX_FL_METRIC_HDR|PROMEX_FL_BACK_METRIC); ctx->obj_state = 0; - ctx->field_num = ST_F_PXNAME; + ctx->field_num = ST_I_PX_PXNAME; + ctx->mod_field_num = 0; appctx->st1 = PROMEX_DUMPER_BACK; __fallthrough; @@ -1318,14 +1771,11 @@ static int promex_dump_metrics(struct appctx *appctx, struct stconn *sc, struct } } - ctx->px = proxies_list; - ctx->st = NULL; - ctx->li = NULL; - promex_set_ctx_sv(ctx, ctx->px ? ctx->px->srv : NULL); ctx->flags &= ~PROMEX_FL_BACK_METRIC; ctx->flags |= (PROMEX_FL_METRIC_HDR|PROMEX_FL_SRV_METRIC); ctx->obj_state = 0; - ctx->field_num = ST_F_PXNAME; + ctx->field_num = ST_I_PX_PXNAME; + ctx->mod_field_num = 0; appctx->st1 = PROMEX_DUMPER_SRV; __fallthrough; @@ -1339,19 +1789,19 @@ static int promex_dump_metrics(struct appctx *appctx, struct stconn *sc, struct } } - ctx->px = NULL; - ctx->st = stktables_list; - ctx->li = NULL; - promex_set_ctx_sv(ctx, NULL); ctx->flags &= ~(PROMEX_FL_METRIC_HDR|PROMEX_FL_SRV_METRIC); - ctx->flags |= (PROMEX_FL_METRIC_HDR|PROMEX_FL_STICKTABLE_METRIC); - ctx->field_num = STICKTABLE_SIZE; - appctx->st1 = PROMEX_DUMPER_STICKTABLE; + ctx->flags |= (PROMEX_FL_METRIC_HDR|PROMEX_FL_MODULE_METRIC); + ctx->field_num = 0; + ctx->mod_field_num = 0; + appctx->st1 = PROMEX_DUMPER_MODULES; __fallthrough; - case PROMEX_DUMPER_STICKTABLE: - if (ctx->flags & PROMEX_FL_SCOPE_STICKTABLE) { - ret = promex_dump_sticktable_metrics(appctx, htx); + case PROMEX_DUMPER_MODULES: + if (ctx->flags & PROMEX_FL_SCOPE_MODULE) { + if (LIST_ISEMPTY(&ctx->modules)) + ret = promex_dump_all_modules_metrics(appctx, htx); + else + ret = promex_dump_ref_modules_metrics(appctx, htx); if (ret <= 0) { if (ret == -1) goto error; @@ -1359,12 +1809,9 @@ static int promex_dump_metrics(struct appctx *appctx, struct stconn *sc, struct } } - ctx->px = NULL; - ctx->st = NULL; - ctx->li = NULL; - promex_set_ctx_sv(ctx, NULL); - ctx->flags &= ~(PROMEX_FL_METRIC_HDR|PROMEX_FL_STICKTABLE_METRIC); + ctx->flags &= ~(PROMEX_FL_METRIC_HDR|PROMEX_FL_MODULE_METRIC); ctx->field_num = 0; + ctx->mod_field_num = 0; appctx->st1 = PROMEX_DUMPER_DONE; __fallthrough; @@ -1380,12 +1827,9 @@ static int promex_dump_metrics(struct appctx *appctx, struct stconn *sc, struct return 0; error: /* unrecoverable error */ - ctx->px = NULL; - ctx->st = NULL; - ctx->li = NULL; - promex_set_ctx_sv(ctx, NULL); ctx->flags = 0; ctx->field_num = 0; + ctx->mod_field_num = 0; appctx->st1 = PROMEX_DUMPER_DONE; return -1; } @@ -1403,6 +1847,7 @@ static int promex_parse_uri(struct appctx *appctx, struct stconn *sc) const char *end; struct buffer *err; int default_scopes = PROMEX_FL_SCOPE_ALL; + int default_metrics_filter = PROMEX_FL_INC_METRIC_BY_DEFAULT; int len; /* Get the query-string */ @@ -1465,7 +1910,7 @@ static int promex_parse_uri(struct appctx *appctx, struct stconn *sc) goto error; else if (*value == 0) ctx->flags &= ~PROMEX_FL_SCOPE_ALL; - else if (*value == '*') + else if (*value == '*' && *(value+1) == 0) ctx->flags |= PROMEX_FL_SCOPE_ALL; else if (strcmp(value, "global") == 0) ctx->flags |= PROMEX_FL_SCOPE_GLOBAL; @@ -1477,17 +1922,76 @@ static int promex_parse_uri(struct appctx *appctx, struct stconn *sc) ctx->flags |= PROMEX_FL_SCOPE_FRONT; else if (strcmp(value, "listener") == 0) ctx->flags |= PROMEX_FL_SCOPE_LI; - else if (strcmp(value, "sticktable") == 0) - ctx->flags |= PROMEX_FL_SCOPE_STICKTABLE; - else + else { + struct promex_module *mod; + struct promex_module_ref *ref; + + list_for_each_entry(mod, &promex_module_list, list) { + if (strncmp(value, istptr(mod->name), istlen(mod->name)) == 0) { + ref = pool_alloc(pool_head_promex_mod_ref); + if (!ref) + goto internal_error; + ctx->flags |= PROMEX_FL_SCOPE_MODULE; + ref->mod = mod; + LIST_APPEND(&ctx->modules, &ref->list); + break; + } + } + if (!(ctx->flags & PROMEX_FL_SCOPE_MODULE)) + goto error; + } + } + else if (strcmp(key, "metrics") == 0) { + struct ist args; + + if (!value) goto error; + + for (args = ist(value); istlen(args); args = istadv(istfind(args, ','), 1)) { + struct eb32_node *node; + struct promex_metric_filter *flt; + struct ist m = iststop(args, ','); + unsigned int hash; + int exclude = 0; + + if (!istlen(m)) + continue; + + if (*istptr(m) == '-') { + m = istnext(m); + if (!istlen(m)) + continue; + exclude = 1; + } + else + default_metrics_filter &= ~PROMEX_FL_INC_METRIC_BY_DEFAULT; + + + hash = XXH32(istptr(m), istlen(m), 0); + node = eb32_lookup(&ctx->filters, hash); + if (node) { + flt = container_of(node, typeof(*flt), node); + flt->exclude = exclude; + continue; + } + + flt = pool_alloc(pool_head_promex_metric_flt); + if (!flt) + goto internal_error; + flt->node.key = hash; + flt->exclude = exclude; + eb32_insert(&ctx->filters, &flt->node); + } + } + else if (strcmp(key, "extra-counters") == 0) { + ctx->flags |= PROMEX_FL_EXTRA_COUNTERS; } else if (strcmp(key, "no-maint") == 0) ctx->flags |= PROMEX_FL_NO_MAINT_SRV; } end: - ctx->flags |= default_scopes; + ctx->flags |= (default_scopes | default_metrics_filter); return 1; error: @@ -1498,6 +2002,15 @@ static int promex_parse_uri(struct appctx *appctx, struct stconn *sc) res_htx = htx_from_buf(&res->buf); channel_add_input(res, res_htx->data); return -1; + + internal_error: + err = &http_err_chunks[HTTP_ERR_400]; + channel_erase(res); + res->buf.data = b_data(err); + memcpy(res->buf.area, b_head(err), b_data(err)); + res_htx = htx_from_buf(&res->buf); + channel_add_input(res, res_htx->data); + return -1; } /* Send HTTP headers of the response. It returns 1 on success and 0 if <htx> is @@ -1533,19 +2046,45 @@ static int promex_send_headers(struct appctx *appctx, struct stconn *sc, struct */ static int promex_appctx_init(struct appctx *appctx) { + struct promex_ctx *ctx; + applet_reserve_svcctx(appctx, sizeof(struct promex_ctx)); + ctx = appctx->svcctx; + memset(ctx->p, 0, sizeof(ctx->p)); + LIST_INIT(&ctx->modules); + ctx->filters = EB_ROOT; appctx->st0 = PROMEX_ST_INIT; return 0; } + /* Callback function that releases a promex applet. This happens when the * connection with the agent is closed. */ static void promex_appctx_release(struct appctx *appctx) { struct promex_ctx *ctx = appctx->svcctx; + struct promex_module_ref *ref, *back; + struct promex_metric_filter *flt; + struct eb32_node *node, *next; - if (appctx->st1 == PROMEX_DUMPER_SRV) - srv_drop(ctx->sv); + if (appctx->st1 == PROMEX_DUMPER_SRV) { + struct server *srv = objt_server(ctx->p[1]); + srv_drop(srv); + } + + list_for_each_entry_safe(ref, back, &ctx->modules, list) { + LIST_DELETE(&ref->list); + pool_free(pool_head_promex_mod_ref, ref); + } + + node = eb32_first(&ctx->filters); + while (node) { + next = eb32_next(node); + eb32_delete(node); + flt = container_of(node, typeof(*flt), node); + pool_free(pool_head_promex_metric_flt, flt); + node = next; + } } /* The main I/O handler for the promex applet. */ @@ -1639,8 +2178,8 @@ struct applet promex_applet = { .obj_type = OBJ_TYPE_APPLET, .name = "<PROMEX>", /* used for logging */ .init = promex_appctx_init, - .fct = promex_appctx_handle_io, .release = promex_appctx_release, + .fct = promex_appctx_handle_io, }; static enum act_parse_ret service_parse_prometheus_exporter(const char **args, int *cur_arg, struct proxy *px, diff --git a/dev/flags/flags.c b/dev/flags/flags.c index 65af237..8da485b 100644 --- a/dev/flags/flags.c +++ b/dev/flags/flags.c @@ -12,6 +12,7 @@ #include <haproxy/mux_fcgi-t.h> #include <haproxy/mux_h2-t.h> #include <haproxy/mux_h1-t.h> +#include <haproxy/peers-t.h> #include <haproxy/stconn-t.h> #include <haproxy/stream-t.h> #include <haproxy/task-t.h> @@ -36,10 +37,13 @@ #define SHOW_AS_H1S 0x00010000 #define SHOW_AS_FCONN 0x00020000 #define SHOW_AS_FSTRM 0x00040000 +#define SHOW_AS_PEERS 0x00080000 +#define SHOW_AS_PEER 0x00100000 // command line names, must be in exact same order as the SHOW_AS_* flags above // so that show_as_words[i] matches flag 1U<<i. -const char *show_as_words[] = { "ana", "chn", "conn", "sc", "stet", "strm", "task", "txn", "sd", "hsl", "htx", "hmsg", "fd", "h2c", "h2s", "h1c", "h1s", "fconn", "fstrm"}; +const char *show_as_words[] = { "ana", "chn", "conn", "sc", "stet", "strm", "task", "txn", "sd", "hsl", "htx", "hmsg", "fd", "h2c", "h2s", "h1c", "h1s", "fconn", "fstrm", + "peers", "peer"}; /* will be sufficient for even largest flag names */ static char buf[4096]; @@ -152,6 +156,8 @@ int main(int argc, char **argv) if (show_as & SHOW_AS_H1S) printf("h1s->flags = %s\n", (h1s_show_flags (buf, bsz, " | ", flags), buf)); if (show_as & SHOW_AS_FCONN) printf("fconn->flags = %s\n",(fconn_show_flags (buf, bsz, " | ", flags), buf)); if (show_as & SHOW_AS_FSTRM) printf("fstrm->flags = %s\n",(fstrm_show_flags (buf, bsz, " | ", flags), buf)); + if (show_as & SHOW_AS_PEERS) printf("peers->flags = %s\n",(peers_show_flags (buf, bsz, " | ", flags), buf)); + if (show_as & SHOW_AS_PEER) printf("peer->flags = %s\n", (peer_show_flags (buf, bsz, " | ", flags), buf)); } return 0; } diff --git a/dev/h2/mkhdr.sh b/dev/h2/mkhdr.sh index 4d129fa..4ed1a07 100755 --- a/dev/h2/mkhdr.sh +++ b/dev/h2/mkhdr.sh @@ -4,9 +4,13 @@ # All fields are optional. 0 assumed when absent. USAGE=\ -"Usage: %s [-l <len> ] [-t <type>] [-f <flags>] [-i <sid>] [ -d <data> ] > hdr.bin +"Usage: %s [-l <len> ] [-t <type>] [-f <flags>[,...]] [-i <sid>] [ -d <data> ] + [ -e <name> <value> ]* [ -r|-R raw ] [ -h | --help ] > hdr.bin Numbers are decimal or 0xhex. Not set=0. If <data> is passed, it points - to a file that is read and chunked into frames of <len> bytes. + to a file that is read and chunked into frames of <len> bytes. -e + encodes a headers frame (by default) with all headers at once encoded + in literal. Use type 'p' for the preface. Use -r to pass raw data or + -R to pass raw hex codes (hex digit pairs, blanks ignored). Supported symbolic types (case insensitive prefix match): DATA (0x00) PUSH_PROMISE (0x05) @@ -25,6 +29,8 @@ LEN= TYPE= FLAGS= ID= +RAW= +HDR=( ) die() { [ "$#" -eq 0 ] || echo "$*" >&2 @@ -48,7 +54,7 @@ mkframe() { local T="${2:-0}" local F="${3:-0}" local I="${4:-0}" - local t f + local t f f2 f3 # get the first match in this order for t in DATA:0x00 HEADERS:0x01 RST_STREAM:0x03 SETTINGS:0x04 PING:0x06 \ @@ -66,17 +72,37 @@ mkframe() { die fi - # get the first match in this order - for f in ES:0x01 EH:0x04 PAD:0x08 PRIO:0x20; do - if [ -z "${f##${F^^*}*}" ]; then - F="${f##*:}" + # get the first match in this order, for each entry delimited by ','. + # E.g.: "-f ES,EH" + f2=${F^^*}; F=0 + + while [ -n "$f2" ]; do + f3="${f2%%,*}" + tmp="" + for f in ES:0x01 EH:0x04 PAD:0x08 PRIO:0x20; do + if [ -n "$f3" -a -z "${f##${f3}*}" ]; then + tmp="${f#*:}" + break + fi + done + + if [ -n "$tmp" ]; then + F=$(( F | tmp )) + f2="${f2#$f3}" + f2="${f2#,}" + elif [ -z "${f3##[X0-9A-F]*}" ]; then + F=$(( F | f3 )) + f2="${f2#$f3}" + f2="${f2#,}" + else + echo "Unknown flag(s) '$f3'" >&2 + usage "${0##*}" + die fi done - if [ -n "${F##[0-9]*}" ]; then - echo "Unknown type '$T'" >&2 - usage "${0##*}" - die + if [ -n "$f2" ]; then + F="${f2} | ${F}" fi L=$(( L )); T=$(( T )); F=$(( F )); I=$(( I )) @@ -110,6 +136,9 @@ while [ -n "$1" -a -z "${1##-*}" ]; do -f) FLAGS="$2" ; shift 2 ;; -i) ID="$2" ; shift 2 ;; -d) DATA="$2" ; shift 2 ;; + -r) RAW="$2" ; shift 2 ;; + -R) RAW="$(printf $(echo -n "${2// /}" | sed -e 's/\([^ ][^ ]\)/\\\\x\1/g'))" ; shift 2 ;; + -e) TYPE=1; HDR[${#HDR[@]}]="$2=$3"; shift 3 ;; -h|--help) usage "${0##*}"; quit;; *) usage "${0##*}"; die ;; esac @@ -135,8 +164,35 @@ if [ -n "${ID##[0-9]*}" ]; then die fi -if [ -z "$DATA" ]; then +if [ "$TYPE" = "p" ]; then + printf "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n" +elif [ -z "$DATA" ]; then + # If we're trying to emit literal headers, let's pre-build the raw data + # and measure their total length. + if [ ${#HDR[@]} -gt 0 ]; then + # limited to 127 bytes for name and value + for h in "${HDR[@]}"; do + n=${h%%=*} + v=${h#*=} + nl=${#n} + vl=${#v} + nl7=$(printf "%02x" $((nl & 127))) + vl7=$(printf "%02x" $((vl & 127))) + RAW="${RAW}\x40\x${nl7}${n}\x${vl7}${v}" + done + fi + + # compute length if RAW set + if [ -n "$RAW" ]; then + LEN=$(printf "${RAW}" | wc -c) + fi + mkframe "$LEN" "$TYPE" "$FLAGS" "$ID" + + # now emit the literal data of advertised length + if [ -n "$RAW" ]; then + printf "${RAW}" + fi else # read file $DATA in <LEN> chunks and send it in multiple frames # advertising their respective lengths. diff --git a/dev/haring/haring.c b/dev/haring/haring.c index ee7e1aa..4dfdafa 100644 --- a/dev/haring/haring.c +++ b/dev/haring/haring.c @@ -35,12 +35,34 @@ #include <haproxy/api.h> #include <haproxy/buf.h> -#include <haproxy/ring.h> +#include <haproxy/ring-t.h> +#include <haproxy/thread.h> int force = 0; // force access to a different layout int lfremap = 0; // remap LF in traces int repair = 0; // repair file +struct ring_v1 { + struct buffer buf; // storage area +}; + +// ring v2 format (not aligned) +struct ring_v2 { + size_t size; // storage size + size_t rsvd; // header length (used for file-backed maps) + size_t tail; // storage tail + size_t head; // storage head + char area[0]; // storage area begins immediately here +}; + +// ring v2 format (thread aligned) +struct ring_v2a { + size_t size; // storage size + size_t rsvd; // header length (used for file-backed maps) + size_t tail __attribute__((aligned(64))); // storage tail + size_t head __attribute__((aligned(64))); // storage head + char area[0] __attribute__((aligned(64))); // storage area begins immediately here +}; /* display the message and exit with the code */ __attribute__((noreturn)) void die(int code, const char *format, ...) @@ -69,75 +91,21 @@ __attribute__((noreturn)) void usage(int code, const char *arg0) "", arg0); } -/* This function dumps all events from the ring whose pointer is in <p0> into - * the appctx's output buffer, and takes from <o0> the seek offset into the - * buffer's history (0 for oldest known event). It looks at <i0> for boolean - * options: bit0 means it must wait for new data or any key to be pressed. Bit1 - * means it must seek directly to the end to wait for new contents. It returns - * 0 if the output buffer or events are missing is full and it needs to be - * called again, otherwise non-zero. It is meant to be used with - * cli_release_show_ring() to clean up. +/* dump a ring represented in a pre-initialized buffer, starting from offset + * <ofs> and with flags <flags> */ -int dump_ring(struct ring *ring, size_t ofs, int flags) +int dump_ring_as_buf(struct buffer buf, size_t ofs, int flags) { - struct buffer buf; uint64_t msg_len = 0; size_t len, cnt; const char *blk1 = NULL, *blk2 = NULL, *p; size_t len1 = 0, len2 = 0, bl; - /* Explanation: the storage area in the writing process starts after - * the end of the structure. Since the whole area is mmapped(), we know - * it starts at 0 mod 4096, hence the buf->area pointer's 12 LSB point - * to the relative offset of the storage area. As there will always be - * users using the wrong version of the tool with a dump, we need to - * run a few checks first. After that we'll create our own buffer - * descriptor matching that area. - */ - if ((((long)ring->buf.area) & 4095) != sizeof(*ring)) { - if (!force) { - fprintf(stderr, "FATAL: header in file is %ld bytes long vs %ld expected!\n", - (((long)ring->buf.area) & 4095), - (long)sizeof(*ring)); - exit(1); - } - else { - fprintf(stderr, "WARNING: header in file is %ld bytes long vs %ld expected!\n", - (((long)ring->buf.area) & 4095), - (long)sizeof(*ring)); - } - /* maybe we could emit a warning at least ? */ - } - - /* Now make our own buffer pointing to that area */ - buf = b_make(((void *)ring + (((long)ring->buf.area) & 4095)), - ring->buf.size, ring->buf.head, ring->buf.data); - - /* explanation for the initialization below: it would be better to do - * this in the parsing function but this would occasionally result in - * dropped events because we'd take a reference on the oldest message - * and keep it while being scheduled. Thus instead let's take it the - * first time we enter here so that we have a chance to pass many - * existing messages before grabbing a reference to a location. This - * value cannot be produced after initialization. - */ - if (unlikely(ofs == ~0)) { - ofs = 0; - - /* going to the end means looking at tail-1 */ - ofs = (flags & RING_WF_SEEK_NEW) ? buf.data - 1 : 0; - - //HA_ATOMIC_INC(b_peek(&buf, ofs)); - } - while (1) { - //HA_RWLOCK_RDLOCK(RING_LOCK, &ring->lock); - if (ofs >= buf.size) { fprintf(stderr, "FATAL error at %d\n", __LINE__); return 1; } - //HA_ATOMIC_DEC(b_peek(&buf, ofs)); /* in this loop, ofs always points to the counter byte that precedes * the message so that we can take our reference there if we have to @@ -198,9 +166,6 @@ int dump_ring(struct ring *ring, size_t ofs, int flags) ofs += cnt + msg_len; } - //HA_ATOMIC_INC(b_peek(&buf, ofs)); - //HA_RWLOCK_RDUNLOCK(RING_LOCK, &ring->lock); - if (!(flags & RING_WF_WAIT_MODE)) break; @@ -210,9 +175,84 @@ int dump_ring(struct ring *ring, size_t ofs, int flags) return 0; } +/* This function dumps all events from the ring <ring> from offset <ofs> and + * with flags <flags>. + */ +int dump_ring_v1(struct ring_v1 *ring, size_t ofs, int flags) +{ + struct buffer buf; + + /* Explanation: the storage area in the writing process starts after + * the end of the structure. Since the whole area is mmapped(), we know + * it starts at 0 mod 4096, hence the buf->area pointer's 12 LSB point + * to the relative offset of the storage area. As there will always be + * users using the wrong version of the tool with a dump, we need to + * run a few checks first. After that we'll create our own buffer + * descriptor matching that area. + */ + + /* Now make our own buffer pointing to that area */ + buf = b_make(((void *)ring + (((long)ring->buf.area) & 4095)), + ring->buf.size, ring->buf.head, ring->buf.data); + + return dump_ring_as_buf(buf, ofs, flags); +} + +/* This function dumps all events from the ring <ring> from offset <ofs> and + * with flags <flags>. + */ +int dump_ring_v2(struct ring_v2 *ring, size_t ofs, int flags) +{ + size_t size, head, tail, data; + struct buffer buf; + + /* In ring v2 format, we have in this order: + * - size + * - hdr len (reserved bytes) + * - tail + * - head + * We can rebuild an equivalent buffer from these info for the function + * to dump. + */ + + /* Now make our own buffer pointing to that area */ + size = ring->size; + head = ring->head; + tail = ring->tail & ~RING_TAIL_LOCK; + data = (head <= tail ? 0 : size) + tail - head; + buf = b_make((void *)ring + ring->rsvd, size, head, data); + return dump_ring_as_buf(buf, ofs, flags); +} + +/* This function dumps all events from the ring <ring> from offset <ofs> and + * with flags <flags>. + */ +int dump_ring_v2a(struct ring_v2a *ring, size_t ofs, int flags) +{ + size_t size, head, tail, data; + struct buffer buf; + + /* In ring v2 format, we have in this order: + * - size + * - hdr len (reserved bytes) + * - tail + * - head + * We can rebuild an equivalent buffer from these info for the function + * to dump. + */ + + /* Now make our own buffer pointing to that area */ + size = ring->size; + head = ring->head; + tail = ring->tail & ~RING_TAIL_LOCK; + data = (head <= tail ? 0 : size) + tail - head; + buf = b_make((void *)ring + ring->rsvd, size, head, data); + return dump_ring_as_buf(buf, ofs, flags); +} + int main(int argc, char **argv) { - struct ring *ring; + void *ring; struct stat statbuf; const char *arg0; int fd; @@ -254,7 +294,15 @@ int main(int argc, char **argv) return 1; } - return dump_ring(ring, ~0, 0); + if (((struct ring_v2 *)ring)->rsvd < 4096 && // not a pointer (v1), must be ringv2's rsvd + ((struct ring_v2 *)ring)->rsvd + ((struct ring_v2 *)ring)->size == statbuf.st_size) { + if (((struct ring_v2 *)ring)->rsvd < 192) + return dump_ring_v2(ring, 0, 0); + else + return dump_ring_v2a(ring, 0, 0); // thread-aligned version + } + else + return dump_ring_v1(ring, 0, 0); } diff --git a/dev/patchbot/README b/dev/patchbot/README new file mode 100644 index 0000000..a645cc3 --- /dev/null +++ b/dev/patchbot/README @@ -0,0 +1,395 @@ +Patchbot: AI bot making use of Natural Language Processing to suggest backports +=============================================================== 2023-12-18 ==== + + +Background +---------- + +Selecting patches to backport from the development branch is a tedious task, in +part due to the abundance of patches and the fact that many bug fixes are for +that same version and not for backporting. The more it gets delayed, the harder +it becomes, and the harder it is to start, the less likely it gets started. The +urban legend along which one "just" has to do that periodically doesn't work +because certain patches need to be left hanging for a while under observation, +others need to be merged urgently, and for some, the person in charge of the +backport might simply need an opinion from the patch's author or the affected +subsystem maintainer, and this cannot make the whole backport process stall. + +The information needed to figure if a patch needs to be backported is present +in the commit message, with varying nuances such as "may", "may not", "should", +"probably", "shouldn't unless", "keep under observation" etc. One particularly +that is specific to backports is that the opinion on a patch may change over +time, either because it was later found to be wrong or insufficient, or because +the former analysis mistakenly suggested to backport or not to. + +This means that the person in charge of the backports has to read the whole +commit message for each patch, to figure the backporting instructions, and this +takes a while. + +Several attempts were made over the years to try to partially automate this +task, including the cherry-pick mode of the "git-show-backports" utility that +eases navigation back-and-forth between commits. + +Lately, a lot of progress was made in the domain of Natural Language +Understanding (NLU) and more generally Natural Language Processing (NLP). Since +the first attempts in early 2023 involving successive layers of the Roberta +model, called from totally unreliable Python code, and December 2023, the +situation evolved from promising but unusable to mostly autonomous. + +For those interested in history, the first attempts in early 2023 involved +successive layers of the Roberta model, but these were relying on totally +unreliable Python code that broke all the time and could barely be transferred +to another machine without upgrading or downgrading the installed modules, and +it used to use huge amounts of resources for a somewhat disappointing result: +the verdicts were correct roughly 60-70% of the time, it was not possible to +get hints such as "wait" nor even "uncertain". It could just be qualified as +promising. Another big limitation was the limit to 256 tokens, forcing the +script to select only the last few lines of the commit message to take the +decision. Roughly at the same time, in March 2023 Meta issued their much larger +LLaMa model, and Georgi Gerganov released "llama.cpp", an open-source C++ +engine that loads and runs such large models without all the usual problems +inherent to the Python ecosystem. New attempts were made with LLaMa and it was +already much better than Roberta, but the output was difficult to parse, and it +required to be combined with the final decision layer of Roberta. Then new +variants of LLaMa appeared such as Alpaca, which follows instructions, but +tends to forget them if given before the patch, then Vicuna which was pretty +reliable but very slow at 33B size and difficult to tune, then Airoboros, +which was the first one to give very satisfying results in a reasonable time, +following instructions reasonably closely with a stable output, but with +sometimes surprising analysis and contradictions. It was already about 90% +reliable and considered as a time saver in 13B size. Other models were later +tried as they appeared such as OpenChat-3.5, Juna, OpenInstruct, Orca-2, +Mistral-0.1 and it variants Neural and OpenHermes-2.5. Mistral showed an +unrivaled understanding despite being smaller and much faster than other ones, +but was a bit freewheeling regarding instructions. Dolphin-2.1 rebased on top +of it gave extremely satisfying results, with less variations in the output +format, but still the script had difficulties trying to catch its conclusion +from time to time, though it was pretty much readable for the human in charge +of the task. And finally just before releasing, Mistral-0.2 was released and +addressed all issues, with a human-like understanding and perfectly obeying +instructions, providing an extremely stable output format that is easy to parse +from simple scripts. The decisions now match the human's ones in close to 100% +of the patches, unless the human is aware of extra context, of course. + + +Architecture +------------ + +The current solution relies on the llama.cpp engine, which is a simple, fast, +reliable and portable engine to load models and run inference, and the +Mistral-0.2 LLM. + +A collection of patches is built from the development branch since the -dev0 +tag, and for each of them, the engine is called to evaluate the developer's +intent based on the commit message. A detailed context explaining the haproxy +maintenance model and what the user wants is passed, then the LLM is invited to +provide its opinion on the need for a backport and an explanation of the reason +for its choice. This often helps the user to find a quick summary about the +patch. All these outputs are then converted to a long HTML page with colors and +radio buttons, where patches are pre-selected based on this classification, +that the user can consult and adjust, read the commits if needed, and the +selected patches finally provide some copy-pastable commands in a text-area to +select commit IDs to work on, typically in a form that's suitable for a simple +"git cherry-pick -sx". + +The scripts are designed to be able to run on a headless machine, called from a +crontab and with the output served from a static HTTP server. + +The code is currently found from Georgi Gerganov's repository: + + https://github.com/ggerganov/llama.cpp + +Tag b1505 is known to work fine, and uses the GGUF file format. + +The model(s) can be found on Hugging Face user "TheBloke"'s collection of +models: + + https://huggingface.co/TheBloke + +Model Mistral-7B-Instruct-v0.2-GGUF quantized at Q5K_M is known to work well +with the llama.cpp version above. + + +Deployment +---------- + +Note: it is a good idea to start to download the model(s) in the background as + such files are typically 5 GB or more and can take some time to download + depending on the internet bandwidth. + +It seems reasonable to create a dedicated user to periodically run this task. +Let's call it "patchbot". Developers should be able to easily run a shell from +this user to perform some maintenance or testing (e.g. "sudo"). + +All paths are specified in the example "update-3.0.sh" script, and assume a +deployment in the user's home, so this is what is being described here. The +proposed deployment layout is the following: + + $HOME (e.g. /home/patchbot) + | + +- data + | | + | +-- models # GGUF files from TheBloke's collection + | | + | +-- prompts # prompt*-pfx*, prompt*-sfx*, cache + | | + | +-- in + | | | + | | +-- haproxy # haproxy Git repo + | | | + | | +-- patches-3.0 # patches from development branch 3.0 + | | + | +-- out # report directory (HTML) + | + +- prog + | | + | +-- bin # program(s) + | | + | +-- scripts # processing scripts + | | + | +-- llama.cpp # llama Git repository + + +- Let's first create the structure: + + mkdir -p ~/data/{in,models,prompts} ~/prog/{bin,scripts} + +- data/in/haproxy must contain a clone of the haproxy development tree that + will periodically be pulled from: + + cd ~/data/in + git clone https://github.com/haproxy/haproxy + cd ~ + +- The prompt files are a copy of haproxy's "dev/patchbot/prompt/" subdirectory. + The prompt files are per-version because they contain references to the + haproxy development version number. For each prompt, there is a prefix + ("-pfx"), that is loaded before the patch, and a suffix ("-sfx") that + precises the user's expectations after reading the patch. For best efficiency + it's useful to place most of the explanation in the prefix and the least + possible in the suffix, because the prefix is cacheable. Different models + will use different instructions formats and different explanations, so it's + fine to keep a collection of prompts and use only one. Different instruction + formats are commonly used, "llama-2", "alpaca", "vicuna", "chatml" being + common. When experimenting with a new model, just copy-paste the closest one + and tune it for best results. Since we already cloned haproxy above, we'll + take the files from there: + + cp ~/data/in/haproxy/dev/patchbot/prompt/*txt ~/data/prompts/ + + Upon first run, a cache file will be produced in this directory by parsing + an empty file and saving the current model's context. The cache file will + automatically be deleted and rebuilt if it is absent or older than the prefix + or suffix file. The cache files are specific to a model so when experimenting + with other models, be sure not to reuse the same cache file, or in doubt, + just delete them. Rebuilding the cache file typically takes around 2 minutes + of processing on a 8-core machine. + +- The model(s) from TheBloke's Hugging Face account have to be downloaded in + GGUF file format, quantized at Q5K_M, and stored as-is into data/models/. + +- data/in/patches-3.0/ is where the "mk-patch-list.sh" script will emit the + patches corresponding to new commits in the development branch. Its suffix + must match the name of the current development branch for patches to be found + there. In addition, the classification of the patches will be emitted there + next to the input patches, with the same name as the original file with a + suffix indicating what model/prompt combination was used. + + mkdir -p ~/data/in/patches-3.0 + +- data/out is where the final report will be emitted. If running on a headless + machine, it is worth making sure that this directory is accessible from a + static web server. Thus either create a directory and place a symlink or + configuration somewhere in the web server's settings to reference this + location, or make it a symlink to another place already exported by the web + server and make sure the user has the permissions to write there. + + mkdir -p ~/data/out + + On Ubuntu-20.04 it was found that the package "micro-httpd" works out of the + box serving /var/www/html and follows symlinks. As such this is sufficient to + expose the reports: + + sudo ln -s ~patchbot/data/out /var/www/html/patchbot + +- prog/bin will contain the executable(s) needed to operate, namely "main" from + llama.cpp: + + mkdir -p ~/prog/bin + +- prog/llama.cpp is a clone of the "llama.cpp" GitHub repository. As of + december 2023, the project has improved its forward compatibility and it's + generally both safe and recommended to stay on the last version, hence to + just clone the master branch. In case of difficulties, tag b1505 was proven + to work well with the aforementioned model. Building is done by default for + the local platform, optimised for speed with native CPU. + + mkdir -p ~/prog + cd ~/prog + git clone https://github.com/ggerganov/llama.cpp + [ only in case of problems: cd llama.cpp && git checkout b1505 ] + + make -j$(nproc) main LLAMA_FAST=1 + cp main ~/prog/bin/ + cd ~ + +- prog/scripts needs the following scripts: + - mk-patch-list.sh from haproxy's scripts/ subdirectory + - submit-ai.sh, process-*.sh, post-ai.sh, update-*.sh + + cp ~/data/in/haproxy/scripts/mk-patch-list.sh ~/prog/scripts/ + cp ~/data/in/haproxy/dev/patchbot/scripts/*.sh ~/prog/scripts/ + + - verify that the various paths in update-3.0.sh match your choices, or + adjust them: + + vi ~/prog/scripts/update-3.0.sh + + - the tool is memory-bound, so a machine with more memory channels and/or + very fast memory will usually be faster than a higher CPU count with a + lower memory bandwidth. In addition, the performance is not linear with + the number of cores and experimentation shows that efficiency drops above + 8 threads. For this reason the script integrates a "PARALLEL_RUNS" variable + indicating how many instances to run in parallel, each on its own patch. + This allows to make better use of the CPUs and memory bandwidth. Setting + 2 instances for 8 cores / 16 threads gives optimal results on dual memory + channel systems. + +From this point, executing this update script manually should work and produce +the result. Count around 0.5-2 mn per patch on a 8-core machine, so it can be +reasonably fast during the early development stages (before -dev1) but +unbearably long later, where it can make more sense to run it at night. It +should not report any error and should only report the total execution time. + +If interrupted (Ctrl-C, logout, out of memory etc), check for incomplete .txt +files in ~/data/in/patches*/ that can result from this interruption, and delete +them because they will not be reproduced: + + ls -lart ~/data/in/patches-3.0/*.txt + ls -lS ~/data/in/patches-3.0/*.txt + +Once the output is produced, visit ~/data/out/ using a web browser and check +that the table loads correctly. Note that after a new release or a series of +backports, the table may appear empty, it's just because all known patches are +already backported and collapsed by default. Clicking on "All" at the top left +will unhide them. + +Finally when satisfied, place it in a crontab, for example, run every hour: + + crontab -e + + # m h dom mon dow command + # run every hour at minute 02 + 2 * * * * /home/patchbot/update-3.0.sh + + +Usage +----- + +Using the HTML output is a bit rustic but efficient. The interface is split in +5 columns from left to right: + + - first column: patch number from 1 to N, just to ease navigation. Below the + number appears a radio button which allows to mark this patch as the start + of the review. When clicked, all prior patches disappear and are not listed + anymore. This can be undone by clicking on the radio button under the "All" + word in this column's header. + + + - second column: commit ID (abbreviated "CID" in the header). It's a 8-digit + shortened representation of the commit ID. It's presented as a link, which, + if clicked, will directly show that commit from the haproxy public + repository. Below the commit ID is the patch's author date in condensed + format "DD-MmmYY", e.g. "18-Dec23" for "18th December 2023". It was found + that having a date indication sometimes helps differentiate certain related + patches. + + - third column: "Subject", this is the subject of the patch, prefixed with + the 4-digit number matching the file name in the directory (e.g. helps to + remove or reprocess one if needed). This is also a link to the same commit + in the haproxy's public repository. At the lower right under the subject + is the shortened e-mail address (only user@domain keeping only the first + part of the domain, e.g. "foo@haproxy"). Just like with the date, it helps + figuring what to expect after a recent discussion with a developer. + + - fourth column: "Verdict". This column contains 4 radio buttons prefiguring + the choice for this patch between "N" for "No", represented in gray (this + patch should not be backported, let's drop it), "U" for "Uncertain" in + green (still unsure about it, most likely the author should be contacted), + "W" for "Wait" in blue (this patch should be backported but not + immediately, only after it has spent some time in the development branch), + and "Y" for "Yes" in red (this patch must be backported, let's pick it). + The choice is preselected by the scripts above, and since these are radio + buttons, the user is free to change this selection. Reloading will lose the + user's choices. When changing a selection, the line's background changes to + match a similar color tone, allowing to visually spot preselected patches. + + - fifth column: reason for the choice. The scripts try to provide an + explanation for the choice of the preselection, and try to always end with + a conclusion among "yes", "no", "wait", "uncertain". The explanation + usually fits in 2-4 lines and is faster to read than a whole commit message + and very often pretty accurate. It's also been noticed that Mistral-v0.2 + shows much less hallucinations than others (it doesn't seem to invent + information that was not part of its input), so seeing certain topics being + discussed there generally indicate that they were in the original commit + message. The scripts try to emphasize the sensitive parts of the commit + message such as risks, dependencies, referenced issues, oldest version to + backport to, etc. Elements that look like issues numbers and commit IDs are + turned to links to ease navigation. + +In addition, in order to improve readability, the top of the table shows 4 +buttons allowing to show/hide each category. For example, when trying to focus +only on "uncertain" and "wait", it can make sense to hide "N" and "Y" and click +"Y" or "N" on the displayed ones until there is none anymore. + +In order to reduce the risk of missing a misqualified patch, those marked "BUG" +or "DOC" are displayed in bold even if tagged "No". It has been shown to be +sufficient to catch the eye when scrolling and encouraging to re-visit them. + +More importantly, the script will try to also check which patches were already +backported to the previous stable version. Those that were backported will have +the first two columns colored gray, and by default, the review will start from +the first patch after the last backported one. This explains why just after a +backport, the table may appear empty with only the footer "New" checked. + +Finally, at the bottom of the table is an editable, copy-pastable text area +that is redrawn at each click. It contains a series of 4 shell commands that +can be copy-pasted at once and assign commit IDs to 4 variables, one per +category. Most often only "y" will be of interest, so for example if the +review process ends with: + + cid_y=( 7dab3e82 456ba6e9 75f5977f 917f7c74 ) + +Then copy-pasting it in a terminal already in the haproxy-2.9 directory and +issuing: + + git cherry-pick -sx ${cid_y[@]} + +Will result in all these patches to be backported to that version. + + +Criticisms +---------- + +The interface is absolutely ugly but gets the job done. Proposals to revamp it +are welcome, provided that they do not alter usability and portability (e.g. +the ability to open the locally produced file without requiring access to an +external server). + + +Thanks +------ + +This utility is the proof that boringly repetitive tasks that can be offloaded +from humans can save their time to do more productive things. This work which +started with extremely limited tools was made possible thanks to Meta, for +opening their models after leaking it, Georgi Gerganov and the community that +developed around llama.cpp, for creating the first really open engine that +builds out of the box and just works, contrary to the previous crippled Python- +only ecosystem, Tom Jobbins (aka TheBloke) for making it so easy to discover +new models every day by simply quantizing all of them and making them available +from a single location, MistralAI for producing an exceptionally good model +that surpasses all others, is the first one to feel as smart and accurate as a +real human on such tasks, is fast, and totally free, and of course, HAProxy +Technologies for investing some time on this and for the available hardware +that permits a lot of experimentation. diff --git a/dev/patchbot/prompts/prompt14-2.9-airo14-pfx.txt b/dev/patchbot/prompts/prompt14-2.9-airo14-pfx.txt new file mode 100644 index 0000000..2f3fde2 --- /dev/null +++ b/dev/patchbot/prompts/prompt14-2.9-airo14-pfx.txt @@ -0,0 +1,70 @@ +BEGININPUT +BEGINCONTEXT + +HAProxy's development cycle consists in one development branch, and multiple +maintenance branches. + +All the development is made into the development branch exclusively. This +includes mostly new features, doc updates, cleanups and or course, fixes. + +The maintenance branches, also called stable branches, never see any +development, and only receive ultra-safe fixes for bugs that affect them, +that are picked from the development branch. + +Branches are numbered in 0.1 increments. Every 6 months, upon a new major +release, the development branch enters maintenance and a new development branch +is created with a new, higher version. The current development branch is +2.9-dev, and maintenance branches are 2.8 and below. + +Fixes created in the development branch for issues that were introduced in an +earlier branch are applied in descending order to each and every version till +that branch that introduced the issue: 2.8 first, then 2.7, then 2.6 and so +on. This operation is called "backporting". A fix for an issue is never +backported beyond the branch that introduced the issue. An important point is +that the project maintainers really aim at zero regression in maintenance +branches, so they're never willing to take any risk backporting patches that +are not deemed strictly necessary. + +Fixes consist of patches managed using the Git version control tool and are +identified by a Git commit ID and a commit message. For this reason we +indistinctly talk about backporting fixes, commits, or patches; all mean the +same thing. When mentioning commit IDs, developers always use a short form +made of the first 8 characters only, and expect the AI assistant to do the +same. + +It seldom happens that some fixes depend on changes that were brought by other +patches that were not in some branches and that will need to be backported as +well for the fix to work. In this case, such information is explicitly provided +in the commit message by the patch's author in natural language. + +Developers are serious and always indicate if a patch needs to be backported. +Sometimes they omit the exact target branch, or they will say that the patch is +"needed" in some older branch, but it means the same. If a commit message +doesn't mention any backport instructions, it means that the commit does not +have to be backported. And patches that are not strictly bug fixes nor doc +improvements are normally not backported. For example, fixes for design +limitations, architectural improvements and performance optimizations are +considered too risky for a backport. Finally, all bug fixes are tagged as +"BUG" at the beginning of their subject line. Patches that are not tagged as +such are not bugs, and must never be backported unless their commit message +explicitly requests so. + +ENDCONTEXT + +A developer is reviewing the development branch, trying to spot which commits +need to be backported to maintenance branches. This person is already expert +on HAProxy and everything related to Git, patch management, and the risks +associated with backports, so he doesn't want to be told how to proceed nor to +review the contents of the patch. + +The goal for this developer is to get some help from the AI assistant to save +some precious time on this tedious review work. In order to do a better job, he +needs an accurate summary of the information and instructions found in each +commit message. Specifically he needs to figure if the patch fixes a problem +affecting an older branch or not, if it needs to be backported, if so to which +branches, and if other patches need to be backported along with it. + +The indented text block below after an "id" line and starting with a Subject line +is a commit message from the HAProxy development branch that describes a patch +applied to that branch, starting with its subject line, please read it carefully. + diff --git a/dev/patchbot/prompts/prompt14-2.9-alpaca-pfx.txt b/dev/patchbot/prompts/prompt14-2.9-alpaca-pfx.txt new file mode 100644 index 0000000..cabe7f0 --- /dev/null +++ b/dev/patchbot/prompts/prompt14-2.9-alpaca-pfx.txt @@ -0,0 +1,68 @@ +### Instruction: + +HAProxy's development cycle consists in one development branch, and multiple +maintenance branches. + +All the development is made into the development branch exclusively. This +includes mostly new features, doc updates, cleanups and or course, fixes. + +The maintenance branches, also called stable branches, never see any +development, and only receive ultra-safe fixes for bugs that affect them, +that are picked from the development branch. + +Branches are numbered in 0.1 increments. Every 6 months, upon a new major +release, the development branch enters maintenance and a new development branch +is created with a new, higher version. The current development branch is +2.9-dev, and maintenance branches are 2.8 and below. + +Fixes created in the development branch for issues that were introduced in an +earlier branch are applied in descending order to each and every version till +that branch that introduced the issue: 2.8 first, then 2.7, then 2.6 and so +on. This operation is called "backporting". A fix for an issue is never +backported beyond the branch that introduced the issue. An important point is +that the project maintainers really aim at zero regression in maintenance +branches, so they're never willing to take any risk backporting patches that +are not deemed strictly necessary. + +Fixes consist of patches managed using the Git version control tool and are +identified by a Git commit ID and a commit message. For this reason we +indistinctly talk about backporting fixes, commits, or patches; all mean the +same thing. When mentioning commit IDs, developers always use a short form +made of the first 8 characters only, and expect the AI assistant to do the +same. + +It seldom happens that some fixes depend on changes that were brought by other +patches that were not in some branches and that will need to be backported as +well for the fix to work. In this case, such information is explicitly provided +in the commit message by the patch's author in natural language. + +Developers are serious and always indicate if a patch needs to be backported. +Sometimes they omit the exact target branch, or they will say that the patch is +"needed" in some older branch, but it means the same. If a commit message +doesn't mention any backport instructions, it means that the commit does not +have to be backported. And patches that are not strictly bug fixes nor doc +improvements are normally not backported. For example, fixes for design +limitations, architectural improvements and performance optimizations are +considered too risky for a backport. Finally, all bug fixes are tagged as +"BUG" at the beginning of their subject line. Patches that are not tagged as +such are not bugs, and must never be backported unless their commit message +explicitly requests so. + +A developer is reviewing the development branch, trying to spot which commits +need to be backported to maintenance branches. This person is already expert +on HAProxy and everything related to Git, patch management, and the risks +associated with backports, so he doesn't want to be told how to proceed nor to +review the contents of the patch. + +The goal for this developer is to get some help from the AI assistant to save +some precious time on this tedious review work. In order to do a better job, he +needs an accurate summary of the information and instructions found in each +commit message. Specifically he needs to figure if the patch fixes a problem +affecting an older branch or not, if it needs to be backported, if so to which +branches, and if other patches need to be backported along with it. + +The indented text block below after an "id" line and starting with a Subject line +is a commit message from the HAProxy development branch that describes a patch +applied to that branch, starting with its subject line, please read it carefully. + +### Input: diff --git a/dev/patchbot/prompts/prompt14-2.9-alpaca-sfx.txt b/dev/patchbot/prompts/prompt14-2.9-alpaca-sfx.txt new file mode 100644 index 0000000..9906132 --- /dev/null +++ b/dev/patchbot/prompts/prompt14-2.9-alpaca-sfx.txt @@ -0,0 +1,28 @@ + +### Instruction: + +You are an AI assistant that follows instruction extremely well. Help as much +as you can, responding to a single question using a single response. + +The developer wants to know if he needs to backport the patch above to fix +maintenance branches, for which branches, and what possible dependencies might +be mentioned in the commit message. Carefully study the commit message and its +backporting instructions if any (otherwise it should probably not be backported), +then provide a very concise and short summary that will help the developer decide +to backport it, or simply to skip it. + +Start by explaining in one or two sentences what you recommend for this one and why. +Finally, based on your analysis, give your general conclusion as "Conclusion: X" +where X is a single word among: + - "yes", if you recommend to backport the patch right now either because + it explicitly states this or because it's a fix for a bug that affects + a maintenance branch (2.8 or lower); + - "wait", if this patch explicitly mentions that it must be backported, but + only after waiting some time. + - "no", if nothing clearly indicates a necessity to backport this patch (e.g. + lack of explicit backport instructions, or it's just an improvement); + - "uncertain" otherwise for cases not covered above + +### Response: + +Explanation: diff --git a/dev/patchbot/prompts/prompt14-2.9-chatml-pfx.txt b/dev/patchbot/prompts/prompt14-2.9-chatml-pfx.txt new file mode 100644 index 0000000..c35138e --- /dev/null +++ b/dev/patchbot/prompts/prompt14-2.9-chatml-pfx.txt @@ -0,0 +1,67 @@ +<|im_start|>system +HAProxy's development cycle consists in one development branch, and multiple +maintenance branches. + +All the development is made into the development branch exclusively. This +includes mostly new features, doc updates, cleanups and or course, fixes. + +The maintenance branches, also called stable branches, never see any +development, and only receive ultra-safe fixes for bugs that affect them, +that are picked from the development branch. + +Branches are numbered in 0.1 increments. Every 6 months, upon a new major +release, the development branch enters maintenance and a new development branch +is created with a new, higher version. The current development branch is +2.9-dev, and maintenance branches are 2.8 and below. + +Fixes created in the development branch for issues that were introduced in an +earlier branch are applied in descending order to each and every version till +that branch that introduced the issue: 2.8 first, then 2.7, then 2.6 and so +on. This operation is called "backporting". A fix for an issue is never +backported beyond the branch that introduced the issue. An important point is +that the project maintainers really aim at zero regression in maintenance +branches, so they're never willing to take any risk backporting patches that +are not deemed strictly necessary. + +Fixes consist of patches managed using the Git version control tool and are +identified by a Git commit ID and a commit message. For this reason we +indistinctly talk about backporting fixes, commits, or patches; all mean the +same thing. When mentioning commit IDs, developers always use a short form +made of the first 8 characters only, and expect the AI assistant to do the +same. + +It seldom happens that some fixes depend on changes that were brought by other +patches that were not in some branches and that will need to be backported as +well for the fix to work. In this case, such information is explicitly provided +in the commit message by the patch's author in natural language. + +Developers are serious and always indicate if a patch needs to be backported. +Sometimes they omit the exact target branch, or they will say that the patch is +"needed" in some older branch, but it means the same. If a commit message +doesn't mention any backport instructions, it means that the commit does not +have to be backported. And patches that are not strictly bug fixes nor doc +improvements are normally not backported. For example, fixes for design +limitations, architectural improvements and performance optimizations are +considered too risky for a backport. Finally, all bug fixes are tagged as +"BUG" at the beginning of their subject line. Patches that are not tagged as +such are not bugs, and must never be backported unless their commit message +explicitly requests so. + +A developer is reviewing the development branch, trying to spot which commits +need to be backported to maintenance branches. This person is already expert +on HAProxy and everything related to Git, patch management, and the risks +associated with backports, so he doesn't want to be told how to proceed nor to +review the contents of the patch. + +The goal for this developer is to get some help from the AI assistant to save +some precious time on this tedious review work. In order to do a better job, he +needs an accurate summary of the information and instructions found in each +commit message. Specifically he needs to figure if the patch fixes a problem +affecting an older branch or not, if it needs to be backported, if so to which +branches, and if other patches need to be backported along with it. + +The indented text block below after an "id" line and starting with a Subject line +is a commit message from the HAProxy development branch that describes a patch +applied to that branch, starting with its subject line, please read it carefully. +<|im_end|> +<|im_start|>user diff --git a/dev/patchbot/prompts/prompt14-2.9-chatml-sfx.txt b/dev/patchbot/prompts/prompt14-2.9-chatml-sfx.txt new file mode 100644 index 0000000..31e26d6 --- /dev/null +++ b/dev/patchbot/prompts/prompt14-2.9-chatml-sfx.txt @@ -0,0 +1,28 @@ +<|im_end|> +<|im_start|>system + +You are an AI assistant that follows instruction extremely well. Help as much +as you can, responding to a single question using a single response. + +The developer wants to know if he needs to backport the patch above to fix +maintenance branches, for which branches, and what possible dependencies might +be mentioned in the commit message. Carefully study the commit message and its +backporting instructions if any (otherwise it should probably not be backported), +then provide a very concise and short summary that will help the developer decide +to backport it, or simply to skip it. + +Start by explaining in one or two sentences what you recommend for this one and why. +Finally, based on your analysis, give your general conclusion as "Conclusion: X" +where X is a single word among: + - "yes", if you recommend to backport the patch right now either because + it explicitly states this or because it's a fix for a bug that affects + a maintenance branch (2.8 or lower); + - "wait", if this patch explicitly mentions that it must be backported, but + only after waiting some time. + - "no", if nothing clearly indicates a necessity to backport this patch (e.g. + lack of explicit backport instructions, or it's just an improvement); + - "uncertain" otherwise for cases not covered above +<|im_end|> +<|im_start|>assistant + +Explanation: diff --git a/dev/patchbot/prompts/prompt14-2.9-mist7b-sfx.txt b/dev/patchbot/prompts/prompt14-2.9-mist7b-sfx.txt new file mode 100644 index 0000000..3d1b03b --- /dev/null +++ b/dev/patchbot/prompts/prompt14-2.9-mist7b-sfx.txt @@ -0,0 +1,29 @@ + +ENDINPUT +BEGININSTRUCTION + +You are an AI assistant that follows instruction extremely well. Help as much +as you can, responding to a single question using a single response. + +The developer wants to know if he needs to backport the patch above to fix +maintenance branches, for which branches, and what possible dependencies might +be mentioned in the commit message. Carefully study the commit message and its +backporting instructions if any (otherwise it should probably not be backported), +then provide a very concise and short summary that will help the developer decide +to backport it, or simply to skip it. + +Start by explaining in one or two sentences what you recommend for this one and why. +Finally, based on your analysis, give your general conclusion as "Conclusion: X" +where X is a single word among: + - "yes", if you recommend to backport the patch right now either because + it explicitly states this or because it's a fix for a bug that affects + a maintenance branch (2.8 or lower); + - "wait", if this patch explicitly mentions that it must be backported, but + only after waiting some time. + - "no", if nothing clearly indicates a necessity to backport this patch (e.g. + lack of explicit backport instructions, or it's just an improvement); + - "uncertain" otherwise for cases not covered above + +ENDINSTRUCTION + +Explanation: diff --git a/dev/patchbot/prompts/prompt15-3.1-mist7bv2-pfx.txt b/dev/patchbot/prompts/prompt15-3.1-mist7bv2-pfx.txt new file mode 100644 index 0000000..3120167 --- /dev/null +++ b/dev/patchbot/prompts/prompt15-3.1-mist7bv2-pfx.txt @@ -0,0 +1,70 @@ +BEGININPUT +BEGINCONTEXT + +HAProxy's development cycle consists in one development branch, and multiple +maintenance branches. + +All the development is made into the development branch exclusively. This +includes mostly new features, doc updates, cleanups and or course, fixes. + +The maintenance branches, also called stable branches, never see any +development, and only receive ultra-safe fixes for bugs that affect them, +that are picked from the development branch. + +Branches are numbered in 0.1 increments. Every 6 months, upon a new major +release, the development branch enters maintenance and a new development branch +is created with a new, higher version. The current development branch is +3.1-dev, and maintenance branches are 3.0 and below. + +Fixes created in the development branch for issues that were introduced in an +earlier branch are applied in descending order to each and every version till +that branch that introduced the issue: 3.0 first, then 2.9, then 2.8 and so +on. This operation is called "backporting". A fix for an issue is never +backported beyond the branch that introduced the issue. An important point is +that the project maintainers really aim at zero regression in maintenance +branches, so they're never willing to take any risk backporting patches that +are not deemed strictly necessary. + +Fixes consist of patches managed using the Git version control tool and are +identified by a Git commit ID and a commit message. For this reason we +indistinctly talk about backporting fixes, commits, or patches; all mean the +same thing. When mentioning commit IDs, developers always use a short form +made of the first 8 characters only, and expect the AI assistant to do the +same. + +It seldom happens that some fixes depend on changes that were brought by other +patches that were not in some branches and that will need to be backported as +well for the fix to work. In this case, such information is explicitly provided +in the commit message by the patch's author in natural language. + +Developers are serious and always indicate if a patch needs to be backported. +Sometimes they omit the exact target branch, or they will say that the patch is +"needed" in some older branch, but it means the same. If a commit message +doesn't mention any backport instructions, it means that the commit does not +have to be backported. And patches that are not strictly bug fixes nor doc +improvements are normally not backported. For example, fixes for design +limitations, architectural improvements and performance optimizations are +considered too risky for a backport. Finally, all bug fixes are tagged as +"BUG" at the beginning of their subject line. Patches that are not tagged as +such are not bugs, and must never be backported unless their commit message +explicitly requests so. + +ENDCONTEXT + +A developer is reviewing the development branch, trying to spot which commits +need to be backported to maintenance branches. This person is already expert +on HAProxy and everything related to Git, patch management, and the risks +associated with backports, so he doesn't want to be told how to proceed nor to +review the contents of the patch. + +The goal for this developer is to get some help from the AI assistant to save +some precious time on this tedious review work. In order to do a better job, he +needs an accurate summary of the information and instructions found in each +commit message. Specifically he needs to figure if the patch fixes a problem +affecting an older branch or not, if it needs to be backported, if so to which +branches, and if other patches need to be backported along with it. + +The indented text block below after an "id" line and starting with a Subject line +is a commit message from the HAProxy development branch that describes a patch +applied to that branch, starting with its subject line, please read it carefully. + diff --git a/dev/patchbot/prompts/prompt15-3.1-mist7bv2-sfx.txt b/dev/patchbot/prompts/prompt15-3.1-mist7bv2-sfx.txt new file mode 100644 index 0000000..dd4280b --- /dev/null +++ b/dev/patchbot/prompts/prompt15-3.1-mist7bv2-sfx.txt @@ -0,0 +1,29 @@ + +ENDINPUT +BEGININSTRUCTION + +You are an AI assistant that follows instruction extremely well. Help as much +as you can, responding to a single question using a single response. + +The developer wants to know if he needs to backport the patch above to fix +maintenance branches, for which branches, and what possible dependencies might +be mentioned in the commit message. Carefully study the commit message and its +backporting instructions if any (otherwise it should probably not be backported), +then provide a very concise and short summary that will help the developer decide +to backport it, or simply to skip it. + +Start by explaining in one or two sentences what you recommend for this one and why. +Finally, based on your analysis, give your general conclusion as "Conclusion: X" +where X is a single word among: + - "yes", if you recommend to backport the patch right now either because + it explicitly states this or because it's a fix for a bug that affects + a maintenance branch (3.0 or lower); + - "wait", if this patch explicitly mentions that it must be backported, but + only after waiting some time. + - "no", if nothing clearly indicates a necessity to backport this patch (e.g. + lack of explicit backport instructions, or it's just an improvement); + - "uncertain" otherwise for cases not covered above + +ENDINSTRUCTION + +Explanation: diff --git a/dev/patchbot/scripts/post-ai.sh b/dev/patchbot/scripts/post-ai.sh new file mode 100755 index 0000000..7dba63a --- /dev/null +++ b/dev/patchbot/scripts/post-ai.sh @@ -0,0 +1,372 @@ +#!/bin/bash + +#### +#### Todo: +#### - change line color based on the selected radio button +#### - support collapsing lines per color/category (show/hide for each) +#### - add category "next" and see if the prompt can handle that (eg: d3e379b3) +#### - produce multiple lists on output (per category) allowing to save batches +#### + +die() { + [ "$#" -eq 0 ] || echo "$*" >&2 + exit 1 +} + +err() { + echo "$*" >&2 +} + +quit() { + [ "$#" -eq 0 ] || echo "$*" + exit 0 +} + +#### Main + +USAGE="Usage: ${0##*/} [ -h ] [ -b 'bkp_list' ] patch..." +MYSELF="$0" +GITURL="http://git.haproxy.org/?p=haproxy.git;a=commitdiff;h=" +ISSUES="https://github.com/haproxy/haproxy/issues/" +BKP="" + +while [ -n "$1" -a -z "${1##-*}" ]; do + case "$1" in + -h|--help) quit "$USAGE" ;; + -b) BKP="$2"; shift 2 ;; + *) die "$USAGE" ;; + esac +done + +PATCHES=( "$@" ) + +if [ ${#PATCHES[@]} = 0 ]; then + die "$USAGE" +fi + +# BKP is a space-delimited list of 8-char commit IDs, we'll +# assign them to the local bkp[] associative array. + +declare -A bkp + +for cid in $BKP; do + bkp[$cid]=1 +done + +# some colors +BG_B="#e0e0e0" +BT_N="gray"; BG_N="white" +BT_U="#00e000"; BG_U="#e0ffe0" +BT_W="#0060ff"; BG_W="#e0e0ff" +BT_Y="red"; BG_Y="#ffe0e0" + +echo "<HTML>" + +cat <<- EOF +<HEAD><style> +input.n[type="radio"] { + appearance: none; + width: 1.25em; + height: 1.25em; + border-radius: 50%; + border: 3px solid $BT_N; + background-color: transparent; +} +input.n[type="radio"]:checked { + appearance: none; + width: 1.25em; + height: 1.25em; + border-radius: 50%; + border: 2px solid black; + background-color: $BT_N; +} + +input.u[type="radio"] { + appearance: none; + width: 1.25em; + height: 1.25em; + border-radius: 50%; + border: 3px solid $BT_U; + background-color: transparent; +} +input.u[type="radio"]:checked { + appearance: none; + width: 1.25em; + height: 1.25em; + border-radius: 50%; + border: 2px solid black; + background-color: $BT_U; +} + +input.w[type="radio"] { + appearance: none; + width: 1.25em; + height: 1.25em; + border-radius: 50%; + border: 3px solid $BT_W; + background-color: transparent; +} +input.w[type="radio"]:checked { + appearance: none; + width: 1.25em; + height: 1.25em; + border-radius: 50%; + border: 2px solid black; + background-color: $BT_W; +} + +input.y[type="radio"] { + appearance: none; + width: 1.25em; + height: 1.25em; + border-radius: 50%; + border: 3px solid $BT_Y; + background-color: transparent; +} +input.y[type="radio"]:checked { + appearance: none; + width: 1.25em; + height: 1.25em; + border-radius: 50%; + border: 2px solid black; + background-color: $BT_Y; +} +</style> + +<script type="text/javascript"><!-- + +var nb_patches = 0; +var cid = []; +var bkp = []; + +// first line to review +var review = 0; + +// show/hide table lines and update their color +function updt_table(line) { + var b = document.getElementById("sh_b").checked; + var n = document.getElementById("sh_n").checked; + var u = document.getElementById("sh_u").checked; + var w = document.getElementById("sh_w").checked; + var y = document.getElementById("sh_y").checked; + var tn = 0, tu = 0, tw = 0, ty = 0; + var i, el; + + for (i = 1; i < nb_patches; i++) { + if (document.getElementById("bt_" + i + "_n").checked) { + tn++; + if (line && i != line) + continue; + el = document.getElementById("tr_" + i); + el.style.backgroundColor = "$BG_N"; + el.style.display = n && (b || !bkp[i]) && i >= review ? "" : "none"; + } + else if (document.getElementById("bt_" + i + "_u").checked) { + tu++; + if (line && i != line) + continue; + el = document.getElementById("tr_" + i); + el.style.backgroundColor = "$BG_U"; + el.style.display = u && (b || !bkp[i]) && i >= review ? "" : "none"; + } + else if (document.getElementById("bt_" + i + "_w").checked) { + tw++; + if (line && i != line) + continue; + el = document.getElementById("tr_" + i); + el.style.backgroundColor = "$BG_W"; + el.style.display = w && (b || !bkp[i]) && i >= review ? "" : "none"; + } + else if (document.getElementById("bt_" + i + "_y").checked) { + ty++; + if (line && i != line) + continue; + el = document.getElementById("tr_" + i); + el.style.backgroundColor = "$BG_Y"; + el.style.display = y && (b || !bkp[i]) && i >= review ? "" : "none"; + } + else { + // bug + if (line && i != line) + continue; + el = document.getElementById("tr_" + i); + el.style.backgroundColor = "red"; + el.style.display = ""; + } + } + document.getElementById("cnt_n").innerText = tn; + document.getElementById("cnt_u").innerText = tu; + document.getElementById("cnt_w").innerText = tw; + document.getElementById("cnt_y").innerText = ty; +} + +function updt_output() { + var b = document.getElementById("sh_b").checked; + var i, y = "", w = "", u = "", n = ""; + + for (i = 1; i < nb_patches; i++) { + if (i < review) + continue; + if (bkp[i]) + continue; + if (document.getElementById("bt_" + i + "_y").checked) + y = y + " " + cid[i]; + else if (document.getElementById("bt_" + i + "_w").checked) + w = w + " " + cid[i]; + else if (document.getElementById("bt_" + i + "_u").checked) + u = u + " " + cid[i]; + else if (document.getElementById("bt_" + i + "_n").checked) + n = n + " " + cid[i]; + } + + // update the textarea + document.getElementById("output").value = + "cid_y=(" + y + " )\n" + + "cid_w=(" + w + " )\n" + + "cid_u=(" + u + " )\n" + + "cid_n=(" + n + " )\n"; +} + +function updt(line,value) { + if (value == "r") { + review = line; + line = 0; // redraw everything + } + updt_table(line); + updt_output(); +} + +// --> +</script> +</HEAD> +EOF + +echo "<BODY>" +echo -n "<big><big>Show:" +echo -n " <span style='background-color:$BG_B'><input type='checkbox' onclick='updt_table(0);' id='sh_b' checked />B (${#bkp[*]})</span> " +echo -n " <span style='background-color:$BG_N'><input type='checkbox' onclick='updt_table(0);' id='sh_n' checked />N (<span id='cnt_n'>0</span>)</span> " +echo -n " <span style='background-color:$BG_U'><input type='checkbox' onclick='updt_table(0);' id='sh_u' checked />U (<span id='cnt_u'>0</span>)</span> " +echo -n " <span style='background-color:$BG_W'><input type='checkbox' onclick='updt_table(0);' id='sh_w' checked />W (<span id='cnt_w'>0</span>)</span> " +echo -n " <span style='background-color:$BG_Y'><input type='checkbox' onclick='updt_table(0);' id='sh_y' checked />Y (<span id='cnt_y'>0</span>)</span> " +echo -n "</big/></big> (B=show backported, N=no/drop, U=uncertain, W=wait/next, Y=yes/pick" +echo ")<P/>" + +echo "<TABLE COLS=5 BORDER=1 CELLSPACING=0 CELLPADDING=3>" +echo "<TR><TH>All<br/><input type='radio' name='review' onclick='updt(0,\"r\");' checked title='Start review here'/></TH><TH>CID</TH><TH>Subject</TH><TH>Verdict<BR>N U W Y</BR></TH><TH>Reason</TH></TR>" +seq_num=1; do_check=1; review=0; +for patch in "${PATCHES[@]}"; do + # try to retrieve the patch's numbering (0001-9999) + pnum="${patch##*/}" + pnum="${pnum%%[^0-9]*}" + + id=$(sed -ne 's/^#id: \(.*\)/\1/p' "$patch") + resp=$(grep -v ^llama "$patch" | sed -ne '/^Explanation:/,$p' | sed -z 's/\n[\n]*/\n/g' | sed -z 's/\([^. ]\)\n\([A-Z]\)/\1.\n\2/' | tr '\012' ' ') + resp="${resp#Explanation:}"; + while [ -n "$resp" -a -z "${resp##[ .]*}" ]; do + resp="${resp#[ .]}" + done + + respl=$(echo -- "$resp" | tr 'A-Z' 'a-z') + + if [[ "${respl}" =~ (conclusion|verdict)[:\ ][^.]*yes ]]; then + verdict=yes + elif [[ "${respl}" =~ (conclusion|verdict)[:\ ][^.]*wait ]]; then + verdict=wait + elif [[ "${respl}" =~ (conclusion|verdict)[:\ ][^.]*no ]]; then + verdict=no + elif [[ "${respl}" =~ (conclusion|verdict)[:\ ][^.]*uncertain ]]; then + verdict=uncertain + elif [[ "${respl}" =~ (\"wait\"|\"yes\"|\"no\"|\"uncertain\")[^\"]*$ ]]; then + # last word under quotes in the response, sometimes happens as + # in 'thus I would conclude "no"'. + verdict=${BASH_REMATCH[1]} + else + verdict=uncertain + fi + + verdict="${verdict//[\"\',;:. ]}" + verdict=$(echo -n "$verdict" | tr '[A-Z]' '[a-z]') + + # There are two formats for the ID line: + # - old: #id: cid subject + # - new: #id: cid author date subject + # We can detect the 2nd one as the date starts with a series of digits + # followed by "-" then an upper case letter (eg: "18-Dec23"). + set -- $id + cid="$1" + author="" + date="" + if [ -n "$3" ] && [ -z "${3##[1-9]-[A-Z]*}" -o -z "${3##[0-3][0-9]-[A-Z]*}" ]; then + author="$2" + date="$3" + subj="${id#$cid $author $date }" + else + subj="${id#$cid }" + fi + + if [ -z "$cid" ]; then + echo "ERROR: commit ID not found in patch $pnum: $patch" >&2 + continue + fi + + echo "<script type='text/javascript'>cid[$seq_num]='$cid'; bkp[$seq_num]=${bkp[$cid]:+1}+0;</script>" + + echo -n "<TR id='tr_$seq_num' name='$cid'" + + # highlight unqualified docs and bugs + if [ "$verdict" != "no" ]; then + : # no special treatment for accepted/uncertain elements + elif [ -z "${subj##BUG*}" ] && ! [[ "${respl}" =~ (explicitly|specifically|clearly|also|commit\ message|does)[\ ]*(state|mention|say|request) ]]; then + # bold for BUG marked "no" with no "explicitly states that ..." + echo -n " style='font-weight:bold'" + elif [ -z "${subj##DOC*}" ]; then # && ! [[ "${respl}" =~ (explicitly|specifically|clearly|also|commit\ message|does)[\ ]*(state|mention|say|request) ]]; then + # gray for DOC marked "no" + echo -n " style='font-weight:bold'" + #echo -n " bgcolor=#E0E0E0" #"$BG_U" + fi + + echo -n ">" + + # HTMLify subject and summary + subj="${subj//&/&}"; subj="${subj//</<}"; subj="${subj//>/>}"; + resp="${resp//&/&}"; resp="${resp//</<}"; resp="${resp//>/>}"; + + # turn "#XXXX" to a link to an issue + resp=$(echo "$resp" | sed -e "s|#\([0-9]\{1,5\}\)|<a href='${ISSUES}\1'>#\1</a>|g") + + # put links to commit IDs + resp=$(echo "$resp" | sed -e "s|\([0-9a-f]\{8,40\}\)|<a href='${GITURL}\1'>\1</a>|g") + + echo -n "<TD nowrap align=center ${bkp[$cid]:+style='background-color:${BG_B}'}>$seq_num<BR/>" + echo -n "<input type='radio' name='review' onclick='updt($seq_num,\"r\");' ${do_check:+checked} title='Start review here'/></TD>" + echo -n "<TD nowrap ${bkp[$cid]:+style='background-color:${BG_B}'}><tt><a href='${GITURL}${cid}'>$cid</a></tt>${date:+<br/><small style='font-weight:normal'>$date</small>}</TD>" + echo -n "<TD nowrap><a href='${GITURL}${cid}'>${pnum:+$pnum }$subj</a>${author:+<br/><div align=right><small style='font-weight:normal'>$author</small></div>}</TD>" + echo -n "<TD nowrap align=center>" + echo -n "<input type='radio' onclick='updt($seq_num,\"n\");' id='bt_${seq_num}_n' class='n' name='$cid' value='n' title='Drop' $( [ "$verdict" != no ] || echo -n checked) />" + echo -n "<input type='radio' onclick='updt($seq_num,\"u\");' id='bt_${seq_num}_u' class='u' name='$cid' value='u' title='Uncertain' $( [ "$verdict" != uncertain ] || echo -n checked) />" + echo -n "<input type='radio' onclick='updt($seq_num,\"w\");' id='bt_${seq_num}_w' class='w' name='$cid' value='w' title='wait in -next' $([ "$verdict" != wait ] || echo -n checked) />" + echo -n "<input type='radio' onclick='updt($seq_num,\"y\");' id='bt_${seq_num}_y' class='y' name='$cid' value='y' title='Pick' $( [ "$verdict" != yes ] || echo -n checked) />" + echo -n "</TD>" + echo -n "<TD>$resp</TD>" + echo "</TR>" + echo + ((seq_num++)) + + # if this patch was already backported, make the review start on the next + if [ -n "${bkp[$cid]}" ]; then + review=$seq_num + do_check=1 + else + do_check= + fi +done + +echo "<TR><TH>New<br/><input type='radio' name='review' onclick='updt($seq_num,\"r\");' ${do_check:+checked} title='Nothing to backport'/></TH><TH>CID</TH><TH>Subject</TH><TH>Verdict<BR>N U W Y</BR></TH><TH>Reason</TH></TR>" + +echo "</TABLE>" +echo "<P/>" +echo "<H3>Output:</H3>" +echo "<textarea cols=120 rows=10 id='output'></textarea>" +echo "<P/>" +echo "<script type='text/javascript'>nb_patches=$seq_num; review=$review; updt_table(0); updt_output();</script>" +echo "</BODY></HTML>" diff --git a/dev/patchbot/scripts/process-patch-v15.sh b/dev/patchbot/scripts/process-patch-v15.sh new file mode 100755 index 0000000..e9f718a --- /dev/null +++ b/dev/patchbot/scripts/process-patch-v15.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# the patch itself +F="$1" +shift + +# if non-empty, force to redo the patch +FORCE="${FORCE:-}" + +CPU="${CPU:-$(nproc)}" +MODEL="${MODEL:-../models/airoboros-l2-13b-gpt4-1.4.1.Q5_K_M.gguf}" +PROMPT_PFX="${PROMPT_PFX:-prompt14-airo14-pfx.txt}" +PROMPT_SFX="${PROMPT_SFX:-prompt14-airo14-sfx.txt}" +CACHE="${CACHE:-prompt-airo14.cache}" +CACHE_RO="${CACHE_RO- --prompt-cache-ro}" +EXT="${EXT:-airo14.txt}" +OUTPUT="${OUTPUT:-$(set -- "$F"."$EXT"; echo $1)}" +MAINPROG="${MAINPROG:-./main}" + +# switch to interactive mode with this reverse-prompt at the end if set. +# Typically: INTERACTIVE="Developer". +INTERACTIVE=${INTERACTIVE:-""} + +# Compute the full prompt +# +# Input format for "$F": git-format-patch with lines in this order: +# 1: From cid ... +# 2: From: author user@... +# 3: Date: +# 4: Subject: +# ... +# n: ^---$ +# It will emit a preliminary line with the commit ID, the author, the date, +# the subject, then the whole commit message indented. The output can be +# searched using grep '^\(Bot:\|#id:\)' + +PROMPT="$(cat "$PROMPT_PFX"; cat "$F" | sed -e '/^---/,$d' -e '/^Signed-off-by:/d' -e '/^Cc:/d' -e '/^Reported-by:/d' -e '/^Acked-by:/d' -e '1s/From \([0-9a-f]\{8\}\)\([0-9a-f]\{32\}\).*/\1/' -e '2s/^From: .*<\([^<@>]*\)@\([^<.>]*\).*/\1@\2/' -e '3s/^Date:[^,]*, \([^ ]*\) \([^ ]*\) 20\([^ ]*\).*/\1-\2\3/' | sed -ne '1h;1d;2x;2G;2h;2d;3x;3G;3h;3d;4x;4G;4s/^\([^\n]*\)\n\([^\n]*\)\n\([^\n]*\)\nSubject: \(.*\)/#id: \1 \2 \3 \4\n\nSubject: \4/;p' | sed -e '3,$s/^/ \0/'; echo; cat "$PROMPT_SFX")" + +# already done: don't do it again. Note that /dev/null is OK +if [ -z "$FORCE" -a -s "$OUTPUT" ]; then + exit 0 +fi + +# In order to rebuild the prompt cache: +# OUTPUT=blah CACHE_RO= ./$0 /dev/null +# +# Note: airoboros is able to carefully isolate an entire context, tests show +# that it's possible to ask it to repeat the entire commit message and it does +# so correctly. However its logic is sometimes bizarre + + +if [ -z "$INTERACTIVE" ]; then + LANG=C "$MAINPROG" --log-disable --model "$MODEL" --threads "$CPU" --ctx_size 4096 --temp 0.36 --top_k 12 --top_p 1 --repeat_last_n 256 --batch_size 16384 --repeat_penalty 1.1 --n_predict 200 --multiline-input --prompt "$PROMPT" --prompt-cache "$CACHE" $CACHE_RO "$@" 2>&1 | grep -v ^llama_model_loader | grep -v ^llm_load_ > "${OUTPUT}" + if [ "$?" != 0 ]; then + # failed: this is likely because the text is too long + (echo "$PROMPT"; echo + echo "Explanation: the commit message was way too long, couldn't analyse it." + echo "Conclusion: uncertain" + echo) > "${OUTPUT}" + fi +else + LANG=C "$MAINPROG" --log-disable --model "$MODEL" --threads "$CPU" --ctx_size 4096 --temp 0.36 --repeat_penalty 1.1 --n_predict 200 --multiline-input --prompt "$PROMPT" --prompt-cache "$CACHE" $CACHE_RO -n -1 -i --color --in-prefix ' ' --reverse-prompt "$INTERACTIVE:" "$@" +fi diff --git a/dev/patchbot/scripts/submit-ai.sh b/dev/patchbot/scripts/submit-ai.sh new file mode 100755 index 0000000..d6c6710 --- /dev/null +++ b/dev/patchbot/scripts/submit-ai.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# note: the program may re-execute itself: when it has more than one patch to +# process, it will call itself with one patch only in argument. When called +# with a single patch in argument, it will always start the analysis directly. + +# The program uses several environment variables: +# - EXT file name extension for the response +# - MODEL path to the model file (GGUF format) +# - FORCE force to re-process existing patches +# - PROGRAM path to the script to be called +# - CACHE path to the prompt cache (optional) +# - CACHE_RO force cache to remain read-only +# - PROMPT_PFX path to the prompt prefix (before the patch) +# - PROMPT_SFX path to the prompt suffix (after the patch) +# - TOT_CPUS total number of usable CPUs (def: nproc or 1) +# - SLOT_CPUS if defined, it's an array of CPU sets for each running slot +# - CPU_SLOT passed by the first level to the second one to allow binding +# to a specific CPU set based on the slot number from 0 to N-1. + +die() { + [ "$#" -eq 0 ] || echo "$*" >&2 + exit 1 +} + +err() { + echo "$*" >&2 +} + +quit() { + [ "$#" -eq 0 ] || echo "$*" + exit 0 +} + +#### Main + +# detect if running under -x, pass it down to sub-processes +#opt=; set -o | grep xtrace | grep -q on && opt=-x + +USAGE="Usage: ${0##*/} [ -s slots ] patch..." +MYSELF="$0" +TOT_CPUS=${TOT_CPUS:-$(nproc)} +TOT_CPUS=${TOT_CPUS:-1} +SLOTS=1 + + +while [ -n "$1" -a -z "${1##-*}" ]; do + case "$1" in + -s) SLOTS="$2" ; shift 2 ;; + -h|--help) quit "$USAGE" ;; + *) die "$USAGE" ;; + esac +done + +[ -n "$EXT" ] || die "Missing extension name (EXT)" +[ -n "$MODEL" ] || die "Missing model name (MODEL)" +[ -n "$PROGRAM" ] || die "Missing program name (PROGRAM)" +[ -n "$PROMPT_PFX" ] || die "Missing prompt prefix (PROMPT_PFX)" +[ -n "$PROMPT_SFX" ] || die "Missing prompt suffix (PROMPT_SFX)" + +PATCHES=( "$@" ) + +if [ ${#PATCHES[@]} = 0 ]; then + die "$USAGE" +elif [ ${#PATCHES[@]} = 1 ]; then + # really execute + taskset_cmd="" + if [ -n "$CPU_SLOT" ] && [ -n "${SLOT_CPUS[$CPU_SLOT]}" ]; then + taskset_cmd="taskset -c ${SLOT_CPUS[$CPU_SLOT]}" + fi + export CPU=$TOT_CPUS + ${taskset_cmd} ${PROGRAM} "${PATCHES[0]}" +else + # divide CPUs by number of slots + export TOT_CPUS=$(( (TOT_CPUS + SLOTS - 1) / SLOTS )) + # reexecute ourselves in parallel with a single patch each + xargs -n 1 -P "${SLOTS}" --process-slot-var=CPU_SLOT "${MYSELF}" -s 1 <<< "${PATCHES[@]}" +fi + diff --git a/dev/patchbot/scripts/update-3.0.sh b/dev/patchbot/scripts/update-3.0.sh new file mode 100755 index 0000000..5f8ac87 --- /dev/null +++ b/dev/patchbot/scripts/update-3.0.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +SCRIPTS_DIR="$HOME/prog/scripts" +HAPROXY_DIR="$HOME/data/in/haproxy" +PATCHES_PFX="$HOME/data/in/patches" +VERDICT_DIR="$HOME/data/out" +PROMPTS_DIR="$HOME/data/prompts" +MODELS_DIR="$HOME/data/models" +MAINPROG="$HOME/prog/bin/main" + +PARALLEL_RUNS=2 + +BRANCH=$(cd "$HAPROXY_DIR" && git describe --tags HEAD|cut -f1 -d-|cut -f2- -dv) +if [ -z "$BRANCH" ]; then + echo "Couldn't guess current branch, aborting." + exit 1 +fi + +# eg: for v3.0-dev0^ we should get v2.9.0 hence "2.9" +STABLE=$(cd "$HAPROXY_DIR" && git describe --tags "v${BRANCH}-dev0^" |cut -f1,2 -d.|cut -f2- -dv) + +PATCHES_DIR="$PATCHES_PFX"-"$BRANCH" + +(cd "$HAPROXY_DIR" + git pull + last_file=$(ls -1 "$PATCHES_DIR"/*.patch 2>/dev/null | tail -n1) + if [ -n "$last_file" ]; then + restart=$(head -n1 "$last_file" | cut -f2 -d' ') + else + restart="v${BRANCH}-dev0" + fi + "$SCRIPTS_DIR"/mk-patch-list.sh -o "$PATCHES_DIR" -b v${BRANCH}-dev0 $(git log $restart.. --oneline | cut -f1 -d' ') +) + +# List backported fixes (possibly none) +BKP=( + $( + cd "$HAPROXY_DIR" + if ! git remote update "$STABLE"; then + git remote add "$STABLE" "http://git.haproxy.org/git/haproxy-${STABLE}.git/" + git remote update "$STABLE" + fi >&2 + + git log --no-decorate --reverse "v${STABLE}.0..${STABLE}/master" | + sed -ne 's,(cherry picked from commit \(.\{8\}\).*,\1,p' + ) +) + +# by far the best model for now with little uncertain and few wait +echo "${BRANCH}: mistral-7b-v0.2" + +if [ ! -e "${PROMPTS_DIR}/prompt-${BRANCH}-m7bv02.cache" -o "${PROMPTS_DIR}/prompt15-${BRANCH}-mist7bv2-pfx.txt" -nt "${PROMPTS_DIR}/prompt-${BRANCH}-m7bv02.cache" ]; then + echo "Regenerating the prompt cache, may take 1-2 min" + rm -f "${PROMPTS_DIR}/prompt-${BRANCH}-m7bv02.cache" + rm -f empty + touch empty + time EXT=m7bv02.txt MODEL=${MODELS_DIR}/mistral-7b-instruct-v0.2.Q5_K_M.gguf CACHE=${PROMPTS_DIR}/prompt-${BRANCH}-m7bv02.cache CACHE_RO= PROMPT_PFX=${PROMPTS_DIR}/prompt15-${BRANCH}-mist7bv2-pfx.txt PROMPT_SFX=${PROMPTS_DIR}/prompt15-${BRANCH}-mist7bv2-sfx.txt MAINPROG=$MAINPROG PROGRAM="$SCRIPTS_DIR"/process-patch-v15.sh "$SCRIPTS_DIR"/submit-ai.sh empty + rm -f empty empty.m7bv02.txt + echo "Done!" +fi + +# Now process the patches, may take 1-2 hours +time EXT=m7bv02.txt MODEL=${MODELS_DIR}/mistral-7b-instruct-v0.2.Q5_K_M.gguf CACHE=${PROMPTS_DIR}/prompt-${BRANCH}-m7bv02.cache PROMPT_PFX=${PROMPTS_DIR}/prompt15-${BRANCH}-mist7bv2-pfx.txt PROMPT_SFX=${PROMPTS_DIR}/prompt15-${BRANCH}-mist7bv2-sfx.txt MAINPROG=$MAINPROG PROGRAM="$SCRIPTS_DIR"/process-patch-v15.sh "$SCRIPTS_DIR"/submit-ai.sh -s ${PARALLEL_RUNS} ${PATCHES_DIR}/*.patch + +# generate the output, takes 3-5 seconds +"$SCRIPTS_DIR"/post-ai.sh -b "${BKP[*]}" ${PATCHES_DIR}/*.m7bv02.txt > ${VERDICT_DIR}/verdict-${BRANCH}-m7bv02.html diff --git a/dev/phash/phash.c b/dev/phash/phash.c new file mode 100644 index 0000000..8a27405 --- /dev/null +++ b/dev/phash/phash.c @@ -0,0 +1,113 @@ +/* Brute-force based perfect hash generator for small sets of integers. Just + * fill the table below with the integer values, try to pad a little bit to + * avoid too complicated divides, experiment with a few operations in the + * hash function and reuse the output as-is to make your table. You may also + * want to experiment with the random generator to use either one or two + * distinct values for mul and key. + */ + +#include <stdio.h> +#include <stdlib.h> + +/* warning no more than 32 distinct values! */ + +//#define CODES 21 +//#define CODES 20 +//#define CODES 19 +//const int codes[CODES] = { 200,400,401,403,404,405,407,408,410,413,421,422,425,429,500,501,502,503,504}; + +#define CODES 32 +const int codes[CODES] = { 200,400,401,403,404,405,407,408,410,413,421,422,425,429,500,501,502,503,504, + /* padding entries below, which will fall back to the default code */ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}; + +unsigned mul, xor; +unsigned bmul = 0, bxor = 0; + +static unsigned rnd32seed = 0x11111111U; +static unsigned rnd32() +{ + rnd32seed ^= rnd32seed << 13; + rnd32seed ^= rnd32seed >> 17; + rnd32seed ^= rnd32seed << 5; + return rnd32seed; +} + +/* the hash function to use in the target code. Try various combinations of + * multiplies and xor, always folded with a modulo, and try to spot the + * simplest operations if possible. Sometimes it may be worth adding a few + * dummy codes to get a better modulo code. In this case, just add dummy + * values at the end, but always distinct from the original ones. If the + * number of codes is even, it might be needed to rotate left the result + * before the modulo to compensate for lost LSBs. + */ +unsigned hash(unsigned i) +{ + //return ((i * mul) - (i ^ xor)) % CODES; // more solutions + //return ((i * mul) + (i ^ xor)) % CODES; // alternate + //return ((i ^ xor) * mul) % CODES; // less solutions but still OK for sequences up to 19 long + //return ((i * mul) ^ xor) % CODES; // less solutions but still OK for sequences up to 19 long + + i = i * mul; + i >>= 5; + //i = i ^ xor; + //i = (i << 30) | (i >> 2); // rotate 2 right + //i = (i << 2) | (i >> 30); // rotate 2 left + //i |= i >> 20; + //i += i >> 30; + //i |= i >> 16; + return i % CODES; + //return ((i * mul) ^ xor) % CODES; // less solutions but still OK for sequences up to 19 long +} + +int main(int argc, char **argv) +{ + unsigned h, i, flag, best, tests; + + if (argc > 2) { + mul = atol(argv[1]); + xor = atol(argv[2]); + for (i = 0; i < CODES && codes[i] >= 0; i++) + printf("hash(%4u) = %4u // [%4u] = %4u\n", codes[i], hash(codes[i]), hash(codes[i]), codes[i]); + return 0; + } + + tests = 0; + best = 0; + while (/*best < CODES &&*/ ++tests) { + mul = rnd32(); + xor = mul; // works for some sequences up to 21 long + //xor = rnd32(); // more solutions + + flag = 0; + for (i = 0; i < CODES && codes[i] >= 0; i++) { + h = hash(codes[i]); + if (flag & (1 << h)) + break; + flag |= 1 << h; + } + + if (i > best || + (i == best && mul <= bmul && xor <= bxor)) { + /* find the best code and try to find the smallest + * parameters among the best ones (need to disable + * best<CODES in the loop for this). Small values are + * interesting for some multipliers, and for some RISC + * architectures where literals can be loaded in less + * instructions. + */ + best = i; + bmul = mul; + bxor = xor; + printf("%u: mul=%u xor=%u\n", best, bmul, bxor); + } + + if ((tests & 0x7ffff) == 0) + printf("%u tests...\r", tests); + } + printf("%u tests, %u vals with mul=%u xor=%u:\n", tests, best, bmul, bxor); + + mul = bmul; xor = bxor; + for (i = 0; i < CODES && codes[i] >= 0; i++) + printf("hash(%4u) = %2u // [%2u] = %4u\n", codes[i], hash(codes[i]), hash(codes[i]), codes[i]); +} diff --git a/doc/DeviceAtlas-device-detection.txt b/doc/DeviceAtlas-device-detection.txt index b600918..9df9783 100644 --- a/doc/DeviceAtlas-device-detection.txt +++ b/doc/DeviceAtlas-device-detection.txt @@ -3,15 +3,20 @@ DeviceAtlas Device Detection In order to add DeviceAtlas Device Detection support, you would need to download the API source code from https://deviceatlas.com/deviceatlas-haproxy-module. -The build supports the USE_PCRE and USE_PCRE2 options. Once extracted : +Once extracted : - $ make TARGET=<target> USE_PCRE=1 (or USE_PCRE2=1) USE_DEVICEATLAS=1 DEVICEATLAS_SRC=<path to the API root folder> + $ make TARGET=<target> USE_DEVICEATLAS=1 DEVICEATLAS_SRC=<path to the API root folder> Optionally DEVICEATLAS_INC and DEVICEATLAS_LIB may be set to override the path to the include files and libraries respectively if they're not in the source -directory. However, if the API had been installed beforehand, DEVICEATLAS_SRC -can be omitted. Note that the DeviceAtlas C API version supported is the 2.4.0 -at minimum. +directory. Also, in the case the api cache support is not needed and/or a C++ toolchain + could not be used, DEVICEATLAS_NOCACHE is available. + + $ make TARGET=<target> USE_DEVICEATLAS=1 DEVICEATLAS_SRC=<path to the API root folder> DEVICEATLAS_NOCACHE=1 + +However, if the API had been installed beforehand, DEVICEATLAS_SRC +can be omitted. Note that the DeviceAtlas C API version supported is from the 3.x +releases series (3.2.1 minimum recommended). For HAProxy developers who need to verify that their changes didn't accidentally break the DeviceAtlas code, it is possible to build a dummy library provided in @@ -20,7 +25,7 @@ full library. This will not provide the full functionalities, it will just allow haproxy to start with a deviceatlas configuration, which generally is enough to validate API changes : - $ make TARGET=<target> USE_PCRE=1 USE_DEVICEATLAS=1 DEVICEATLAS_SRC=$PWD/addons/deviceatlas/dummy + $ make TARGET=<target> USE_DEVICEATLAS=1 DEVICEATLAS_SRC=$PWD/addons/deviceatlas/dummy These are supported DeviceAtlas directives (see doc/configuration.txt) : - deviceatlas-json-file <path to the DeviceAtlas JSON data file>. @@ -28,6 +33,7 @@ These are supported DeviceAtlas directives (see doc/configuration.txt) : the API, 0 by default). - deviceatlas-property-separator <character> (character used to separate the properties produced by the API, | by default). + - deviceatlas-cache-size <number> (number of cache entries, 0 by default). Sample configuration : @@ -64,18 +70,8 @@ Single HTTP header acl device_type_tablet req.fhdr(User-Agent),da-csv-conv(primaryHardwareType) "Tablet" -Optionally a JSON download scheduler is provided to allow a data file being -fetched automatically in a daily basis without restarting HAProxy : - - $ cd addons/deviceatlas && make [DEVICEATLAS_SRC=<path to the API root folder>] - -Similarly, if the DeviceAtlas API is installed, DEVICEATLAS_SRC can be omitted. - - $ ./dadwsch -u JSON data file URL e.g. "https://deviceatlas.com/getJSON?licencekey=<your licence key>&format=zip&data=my&index=web" \ - [-p download directory path /tmp by default] \ - [-d scheduled hour of download, hour when the service is launched by default] - -Noted it needs to be started before HAProxy. +Note that the JSON download scheduler is now part of the API's package, it is recommended +to read its documentation. Note it needs to be started before HAProxy. Please find more information about DeviceAtlas and the detection methods at diff --git a/doc/configuration.txt b/doc/configuration.txt index e1c5034..6a02988 100644 --- a/doc/configuration.txt +++ b/doc/configuration.txt @@ -2,8 +2,8 @@ HAProxy Configuration Manual ---------------------- - version 2.9 - 2024/04/05 + version 3.0 + 2024/05/29 This document covers the configuration language as implemented in the version @@ -44,7 +44,8 @@ Summary 2.4. Conditional blocks 2.5. Time format 2.6. Size format -2.7. Examples +2.7. Name format for maps and ACLs +2.8. Examples 3. Global parameters 3.1. Process management and security @@ -58,6 +59,8 @@ Summary 3.9. Rings 3.10. Log forwarding 3.11. HTTPClient tuning +3.12. Certificate Storage +3.12.1. Load options 4. Proxies 4.1. Proxy keywords matrix @@ -240,7 +243,7 @@ sometimes more) streams in parallel over a same connection, and let the server sort them out and respond in any order depending on what response is available. The main benefit of the multiplexed mode is that it significantly reduces the number of round trips, and speeds up page loading time over high latency -networks. It is sometimes visibles on sites using many images, where all images +networks. It is sometimes visible on sites using many images, where all images appear to load in parallel. These protocols have also improved their efficiency by adopting some mechanisms @@ -259,7 +262,8 @@ is called "head of line blocking" or "HoL blocking" or sometimes just "HoL". HTTP/3 is implemented over QUIC, itself implemented over UDP. QUIC solves the head of line blocking at the transport level by means of independently handled streams. Indeed, when experiencing loss, an impacted stream does not affect the -other streams, and all of them can be accessed in parallel. +other streams, and all of them can be accessed in parallel. QUIC also provides +connection migration support but currently haproxy does not support it. By default HAProxy operates in keep-alive mode with regards to persistent connections: for each connection it processes each request and response, and @@ -282,7 +286,7 @@ HAProxy essentially supports 3 connection modes : In addition to this, by default, the server-facing connection is reusable by any request from any client, as mandated by the HTTP protocol specification, so any information pertaining to a specific client has to be passed along with -each request if needed (e.g. client's source adress etc). When HTTP/2 is used +each request if needed (e.g. client's source address etc). When HTTP/2 is used with a server, by default HAProxy will dedicate this connection to the same client to avoid the risk of head of line blocking between clients. @@ -1148,7 +1152,43 @@ for every keyword. Supported units are case insensitive : Both time and size formats require integers, decimal notation is not allowed. -2.7. Examples +2.7. Name format for maps and ACLs +------------------------------------- + +It is possible to use a list of pattern for maps or ACLs. A list of pattern is +identified by its name and may be used at different places in the +configuration. List of pattern are split on three categories depending on +the name format: + + * Lists of pattern based on regular files: It is the default case. The + filename, absolute or relative, is used as name. The file must exist + otherwise an error is triggered. But it may be empty. The "file@" prefix + may also be specified but it is not part of the name identifying the + list. A filename, with or without the prefix, references the same list of + pattern. + + * Lists of pattern based on optional files: The filename must be preceded by + "opt@" prefix. The file existence is optional. If the file exists, its + content is loaded but no error is reported if not. The prefix is not part + of the name identifying the list. It means, for a given filename, Optional + files and regular files reference the same list of pattern. + + * Lists of pattern based on virtual files: The name is just an identified. It + is not a reference to any file. "virt@" prefix must be used. It is part of + the name. Thus it cannot be mixed with other kind of lists. + +Virtual files are useful when patterns are fully dynamically managed with no +patterns on startup and on reload. Optional files may be used under the same +conditions. But patterns can be dumped in the file, via an external script based +on the "show map" CLI command for instance. This way, it is possible to keep +patterns on reload. + +Note: Even if it is unlikely, it means no regular file starting with "file@", + "opt@" or "virt@" can be loaded, except by adding "./" explicitly in + front of the filename (for instance "file@./virt@map"). + + +2.8. Examples ------------- # Simple configuration for an HTTP proxy listening on port 80 on all @@ -1225,6 +1265,7 @@ The following keywords are supported in the "global" section : - deviceatlas-log-level - deviceatlas-properties-cookie - deviceatlas-separator + - expose-deprecated-directives - expose-experimental-directives - external-check - fd-hard-limit @@ -1236,9 +1277,12 @@ The following keywords are supported in the "global" section : - h1-case-adjust-file - h2-workaround-bogus-websocket-clients - hard-stop-after + - harden.reject-privileged-ports.tcp + - harden.reject-privileged-ports.quic - insecure-fork-wanted - insecure-setuid-wanted - issuers-chain-path + - key-base - localpeer - log - log-send-hostname @@ -1250,6 +1294,11 @@ The following keywords are supported in the "global" section : - nbthread - node - numa-cpu-mapping + - ocsp-update.disable + - ocsp-update.maxdelay + - ocsp-update.mindelay + - ocsp-update.httpproxy + - ocsp-update.mode - pidfile - pp2-never-send-local - presetenv @@ -1274,9 +1323,11 @@ The following keywords are supported in the "global" section : - ssl-propquery - ssl-provider - ssl-provider-path + - ssl-security-level - ssl-server-verify - ssl-skip-self-issued-ca - stats + - stats-file - strict-limits - uid - ulimit-n @@ -1314,6 +1365,7 @@ The following keywords are supported in the "global" section : - spread-checks - ssl-engine - ssl-mode-async + - tune.applet.zero-copy-forwarding - tune.buffers.limit - tune.buffers.reserve - tune.bufsize @@ -1360,7 +1412,9 @@ The following keywords are supported in the "global" section : - tune.pool-high-fd-ratio - tune.pool-low-fd-ratio - tune.pt.zero-copy-forwarding + - tune.quic.cc-hystart - tune.quic.frontend.conn-tx-buffers.limit + - tune.quic.frontend.glitches-threshold - tune.quic.frontend.max-idle-timeout - tune.quic.frontend.max-streams-bidi - tune.quic.max-frame-loss @@ -1373,6 +1427,7 @@ The following keywords are supported in the "global" section : - tune.rcvbuf.frontend - tune.rcvbuf.server - tune.recv_enough + - tune.ring.queues - tune.runqueue-depth - tune.sched.low-latency - tune.sndbuf.backend @@ -1390,8 +1445,8 @@ The following keywords are supported in the "global" section : - tune.ssl.lifetime - tune.ssl.maxrecord - tune.ssl.ssl-ctx-cache-size - - tune.ssl.ocsp-update.maxdelay - - tune.ssl.ocsp-update.mindelay + - tune.ssl.ocsp-update.maxdelay (deprecated) + - tune.ssl.ocsp-update.mindelay (deprecated) - tune.vars.global-max-size - tune.vars.proc-max-size - tune.vars.reqres-max-size @@ -1699,6 +1754,12 @@ deviceatlas-separator <char> Sets the character separator for the API properties results. This directive is optional and set to | by default if not set. +expose-deprecated-directives + This statement must appear before using some directives tagged as deprecated + to silent warnings and make sure the config file will not be rejected. Not + all deprecated directives are concerned, only those without any alternative + solution. + expose-experimental-directives This statement must appear before using directives tagged as experimental or the config file will be rejected. @@ -1886,6 +1947,48 @@ hard-stop-after <time> See also: grace +harden.reject-privileged-ports.tcp { on | off } +harden.reject-privileged-ports.quic { on | off } + Toggle per protocol protection which forbid communication with clients which + use privileged ports as their source port. This range of ports is defined + according to RFC 6335. By default, protection is active for QUIC protocol as + this behavior is suspicious and may be used as a spoofing or DNS/NTP + amplification attack. + +http-err-codes [+-]<range>[,...] [...] + Replace, reduce or extend the list of status codes that define an error as + considered by the termination codes and the "http_err_cnt" counter in stick + tables. The default range for errors is 400 to 499, but in certain contexts + some users prefer to exclude specific codes, especially when tracking client + errors (e.g. 404 on systems with dynamically generated contents). See also + "http-fail-codes" and "http_err_cnt". + + A range specified without '+' nor '-' redefines the existing range to the new + one. A range starting with '+' extends the existing range to also include the + specified one, which may or may not overlap with the existing one. A range + starting with '-' removes the specified range from the existing one. A range + consists in a number from 100 to 599, optionally followed by "-" followed by + another number greater than or equal to the first one to indicate the high + boundary of the range. Multiple ranges may be delimited by commas for a same + add/del/ replace operation. + + Example: + http-err-codes 400,402-444,446-480,490 # sets exactly these codes + http-err-codes 400-499 -450 +500 # sets 400 to 500 except 450 + http-err-codes -450-459 # removes 450 to 459 from range + http-err-codes +501,505 # adds 501 and 505 to range + +http-fail-codes [+-]<range>[,...] [...] + Replace, reduce or extend the list of status codes that define a failure as + considered by the termination codes and the "http_fail_cnt" counter in stick + tables. The default range for failures is 500 to 599 except 501 and 505 which + can be triggered by clients, and normally indicate a failure from the server + to process the request. Some users prefer to exclude certain codes in certain + contexts where it is known they're not relevant, such as 500 in certain SOAP + environments as it doesn't translate a server fault there. The syntax is + exactly the same as for http-err-codes above. See also "http-err-codes" and + "http_fail_cnt". + insecure-fork-wanted By default HAProxy tries hard to prevent any thread and process creation after it starts. Doing so is particularly important when using Lua files of @@ -1902,7 +2005,8 @@ insecure-fork-wanted highly recommended that this option is never used and that any workload requiring such a fork be reconsidered and moved to a safer solution (such as agents instead of external checks). This option supports the "no" prefix to - disable it. + disable it. This can also be activated with "-dI" on the haproxy command + line. insecure-setuid-wanted HAProxy doesn't need to call executables at run time (except when using @@ -1933,6 +2037,11 @@ issuers-chain-path <dir> "issuers-chain-path" directory. All other certificates with the same issuer will share the chain in memory. +key-base <dir> + Assigns a default directory to fetch SSL private keys from when a relative + path is used with "key" directives. Absolute locations specified prevail and + ignore "key-base". This option only works with a crt-store load line. + limited-quic This setting must be used to explicitly enable the QUIC listener bindings when haproxy is compiled against a TLS/SSL stack without QUIC support, typically @@ -2058,7 +2167,8 @@ nbthread <number> bound to upon startup. This means that the thread count can easily be adjusted from the calling process using commands like "taskset" or "cpuset". Otherwise, this value defaults to 1. The default value is reported in the - output of "haproxy -vv". + output of "haproxy -vv". Note that values set here or automatically detected + are subject to the limit set by "thread-hard-limit" (if set). no-quic Disable QUIC transport protocol. All the QUIC listeners will still be created. @@ -2077,6 +2187,40 @@ numa-cpu-mapping already specified, for example via the 'cpu-map' directive or the taskset utility. +ocsp-update.disable [ on | off ] + Disable completely the ocsp-update in HAProxy. Any ocsp-update configuration + will be ignored. Default is "off". + See option "ocsp-update" for more information about the auto update + mechanism. + +ocsp-update.httpproxy <address>[:port] + Allow to use an HTTP proxy for the OCSP updates. This only works with HTTP, + HTTPS is not supported. This option will allow the OCSP updater to send + absolute URI in the request to the proxy. + +ocsp-update.maxdelay <number> +tune.ssl.ocsp-update.maxdelay <number> (deprecated) + Sets the maximum interval between two automatic updates of the same OCSP + response. This time is expressed in seconds and defaults to 3600 (1 hour). It + must be set to a higher value than "ocsp-update.mindelay". See + option "ocsp-update" for more information about the auto update mechanism. + +ocsp-update.mindelay <number> +tune.ssl.ocsp-update.mindelay <number> (deprecated) + Sets the minimum interval between two automatic updates of the same OCSP + response. This time is expressed in seconds and defaults to 300 (5 minutes). + It is particularly useful for OCSP response that do not have explicit + expiration times. It must be set to a lower value than + "ocsp-update.maxdelay". See option "ocsp-update" for more + information about the auto update mechanism. + +ocsp-update.mode [ on | off ] + Sets the default ocsp-update mode for all certificates used in the + configuration. This global option can be superseded by the crt-list + "ocsp-update" option. This option is set to "off" by default. + See option "ocsp-update" for more information about the auto update + mechanism. + pidfile <pidfile> Writes PIDs of all daemons into file <pidfile> when daemon mode or writes PID of master process into file <pidfile> when master-worker mode. This option is @@ -2182,7 +2326,7 @@ set-var-fmt <var-name> <fmt> are only those using internal data, typically 'int(value)' or 'str(value)'. It is possible to reference previously allocated variables as well. These variables will then be readable (and modifiable) from the regular rule sets. - Please see section 8.2.4 for details on the log-format syntax. + Please see section 8.2.6 for details on the Custom log format syntax. Example: global @@ -2190,20 +2334,29 @@ set-var-fmt <var-name> <fmt> set-var-fmt proc.bootid "%pid|%t" setcap <name>[,<name>...] - Sets a list of capabilities that must be preserved when starting with uid 0 - and switching to a non-zero uid. By default all permissions are lost by the - uid switch, but some are often needed when trying connecting to a server from - a foreign address during transparent proxying, or when binding to a port - below 1024, e.g. when using "tune.quic.socket-owner connection", resulting in - setups running entirely under uid 0. Setting capabilities generally is a - safer alternative, as only the required capabilities will be preserved. The - feature is OS-specific and only enabled on Linux when USE_LINUX_CAP=1 is set - at build time. The list of supported capabilities also depends on the OS and - is enumerated by the error message displayed when an invalid capability name - or an empty one is passed. Multiple capabilities may be passed, delimited by - commas. Among those commonly used, "cap_net_raw" allows to transparently bind - to a foreign address, and "cap_net_bind_service" allows to bind to a - privileged port and may be used by QUIC. + Sets a list of capabilities that must be preserved when starting and running + either as a non-root user (uid > 0), or when starting with uid 0 (root) + and switching then to a non-root. By default all permissions are + lost by the uid switch, but some are often needed when trying to connect to + a server from a foreign address during transparent proxying, or when binding + to a port below 1024, e.g. when using "tune.quic.socket-owner connection", + resulting in setups running entirely under uid 0. Setting capabilities + generally is a safer alternative, as only the required capabilities will be + preserved. The feature is OS-specific and only enabled on Linux when + USE_LINUX_CAP=1 is set at build time. The list of supported capabilities also + depends on the OS and is enumerated by the error message displayed when an + invalid capability name or an empty one is passed. Multiple capabilities may + be passed, delimited by commas. Among those commonly used, "cap_net_raw" + allows to transparently bind to a foreign address, and "cap_net_bind_service" + allows to bind to a privileged port and may be used by QUIC. If the process + is started and run under the same non-root user, needed capabilities should + be set on haproxy binary file with setcap along with this keyword. For more + details about setting capabilities on haproxy binary, please see chapter + 13.1 Linux capabilities support in the Management guide. + + Example: + global + setcap cap_net_bind_service,cap_net_admin setenv <name> <value> Sets environment variable <name> to value <value>. If the variable exists, it @@ -2516,6 +2669,17 @@ ssl-load-extra-files <none|all|bundle|sctl|ocsp|issuer|key>* See also: "crt", section 5.1 about bind options and section 5.2 about server options. +ssl-security-level <number> + This directive allows to chose the OpenSSL security level as described in + https://www.openssl.org/docs/man1.1.1/man3/SSL_CTX_set_security_level.html + The security level will be applied to every SSL contextes in HAProxy. + Only a value between 0 and 5 is supported. + + The default value depends on your OpenSSL version, distribution and how was + compiled the library. + + This directive requires at least OpenSSL 1.1.1. + ssl-server-verify [none|required] The default behavior for SSL verify on servers side. If specified to 'none', servers certificates are not verified. The default is 'required' except if @@ -2551,6 +2715,11 @@ stats timeout <timeout, in milliseconds> to change this value with "stats timeout". The value must be passed in milliseconds, or be suffixed by a time unit among { us, ms, s, m, h, d }. +stats-file <path> + Path to a generated haproxy stats-file. On startup haproxy will preload the + values to its internal counters. Use the CLI command "dump stats-file" to + produce such stats-file. See the management manual for more details. + strict-limits Makes process fail at startup when a setrlimit fails. HAProxy tries to set the best setrlimit according to what has been calculated. If it fails, it will @@ -2578,6 +2747,19 @@ thread-groups <number> since up to 64 threads per group may be configured. The maximum number of groups is configured at compile time and defaults to 16. See also "nbthread". +thread-hard-limit <number> + This setting is used to enforce a limit to the number of threads, either + detected, or configured. This is particularly useful on operating systems + where the number of threads is automatically detected, where a number of + threads lower than the number of CPUs is desired in generic and portable + configurations. Indeed, while "nbthread" enforces a number of threads that + will result in a warning and bad performance if higher than CPUs available, + thread-hard-limit will only cap the maximum value and automatically limit + the number of threads to no higher than this value, but will not raise lower + values. If "nbthread" is forced to a higher value, thread-hard-limit wins, + and a warning is emitted in so that the configuration anomaly can be + fixed. By default there is no limit. See also "nbthread". + trace <args...> This command configures one "trace" subsystem statement. Each of them can be found in the management manual, and follow the exact same syntax. Only one @@ -2974,28 +3156,32 @@ ssl-mode-async read/write operations (it is only enabled during initial and renegotiation handshakes). +tune.applet.zero-copy-forwarding { on | off } + Enables ('on') of disabled ('off') the zero-copy forwarding of data for the + applets. It is enabled by default. + + See also: tune.disable-zero-copy-forwarding. + tune.buffers.limit <number> Sets a hard limit on the number of buffers which may be allocated per process. - The default value is zero which means unlimited. The minimum non-zero value - will always be greater than "tune.buffers.reserve" and should ideally always - be about twice as large. Forcing this value can be particularly useful to - limit the amount of memory a process may take, while retaining a sane - behavior. When this limit is reached, streams which need a buffer wait for - another one to be released by another stream. Since buffers are dynamically - allocated and released, the waiting time is very short and not perceptible - provided that limits remain reasonable. In fact sometimes reducing the limit - may even increase performance by increasing the CPU cache's efficiency. Tests - have shown good results on average HTTP traffic with a limit to 1/10 of the - expected global maxconn setting, which also significantly reduces memory - usage. The memory savings come from the fact that a number of connections - will not allocate 2*tune.bufsize. It is best not to touch this value unless - advised to do so by an HAProxy core developer. + The default value is zero which means unlimited. The limit will automatically + be re-adjusted to satisfy the reserved buffers for emergency situations so + that the user doesn't have to perform complicated calculations. Forcing this + value can be particularly useful to limit the amount of memory a process may + take, while retaining a sane behavior. When this limit is reached, a task + that requests a buffer waits for another one to be released first. Most of + the time the waiting time is very short and not perceptible provided that + limits remain reasonable. However, some historical limitations have weakened + this mechanism over versions and it is known that in certain situations of + sustained shortage, some tasks may freeze until their timeout expires, so it + is safer to avoid using this when not strictly necessary. tune.buffers.reserve <number> - Sets the number of buffers which are pre-allocated and reserved for use only - during memory shortage conditions resulting in failed memory allocations. The - minimum value is 2 and is also the default. There is no reason a user would - want to change this value, it's mostly aimed at HAProxy core developers. + Sets the number of per-thread buffers which are pre-allocated and reserved + for use only during memory shortage conditions resulting in failed memory + allocations. The minimum value is 0 and the default is 4. There is no reason + a user would want to change this value, unless a core developer suggests to + change it for a very specific reason. tune.bufsize <number> Sets the buffer size to this size (in bytes). Lower values allow more @@ -3036,7 +3222,7 @@ tune.disable-zero-copy-forwarding Thanks to this directive, it is possible to disable this optimization. Note it also disable any kernel tcp splicing. - See also: tune.pt.zero-copy-forwarding, + See also: tune.pt.zero-copy-forwarding, tune.applet.zero-copy-forwarding, tune.h1.zero-copy-fwd-recv, tune.h1.zero-copy-fwd-send, tune.h2.zero-copy-fwd-send, tune.quic.zero-copy-fwd-send @@ -3330,10 +3516,17 @@ tune.lua.forced-yield <number> This directive forces the Lua engine to execute a yield each <number> of instructions executed. This permits interrupting a long script and allows the HAProxy scheduler to process other tasks like accepting connections or - forwarding traffic. The default value is 10000 instructions. If HAProxy often - executes some Lua code but more responsiveness is required, this value can be - lowered. If the Lua code is quite long and its result is absolutely required - to process the data, the <number> can be increased. + forwarding traffic. The default value is 10000 instructions for scripts loaded + using "lua-load-per-thread" and MAX(500, 10000 / nbthread) instructions for + scripts loaded using "lua-load" (it was found to be an optimal value for + performance while taking care of not creating thread contention with multiple + threads competing for the global lua lock). + + If HAProxy often executes some Lua code but more responsiveness is required, + this value can be lowered. If the Lua code is quite long and its result is + absolutely required to process the data, the <number> can be increased, but + the value should be set wisely as in multithreading context it could increase + contention. tune.lua.maxmem <number> Sets the maximum amount of RAM in megabytes per process usable by Lua. By @@ -3551,6 +3744,11 @@ tune.pt.zero-copy-forwarding { on | off } See also: tune.disable-zero-copy-forwarding, option splice-auto, option splice-request and option splice-response +tune.quic.cc-hystart { on | off } + Enables ('on') or disabled ('off') the HyStart++ (RFC 9406) algorithm for + QUIC connections used as a replacement for the slow start phase of congestion + control algorithms which may cause high packet loss. It is disabled by default. + tune.quic.frontend.conn-tx-buffers.limit <number> This settings defines the maximum number of buffers allocated for a QUIC connection on data emission. By default, it is set to 30. QUIC buffers are @@ -3558,6 +3756,18 @@ tune.quic.frontend.conn-tx-buffers.limit <number> and memory consumption and can be adjusted according to an estimated round time-trip. Each buffer is tune.bufsize. +tune.quic.frontend.glitches-threshold <number> + Sets the threshold for the number of glitches on a frontend connection, where + that connection will automatically be killed. This allows to automatically + kill misbehaving connections without having to write explicit rules for them. + The default value is zero, indicating that no threshold is set so that no + event will cause a connection to be closed. Beware that some QUIC clients may + occasionally cause a few glitches over long lasting connection, so any non- + zero value here should probably be in the hundreds or thousands to be + effective without affecting slightly bogus clients. + + See also: fc_glitches + tune.quic.frontend.max-idle-timeout <timeout> Sets the QUIC max_idle_timeout transport parameters in milliseconds for frontends which determines the period of time after which a connection silently @@ -3634,7 +3844,7 @@ tune.quic.socket-owner { connection | listener } tune.quic.zero-copy-fwd-send { on | off } Enables ('on') of disabled ('off') the zero-copy sends of data for the QUIC - multiplexer. It is disabled by default. + multiplexer. It is enabled by default. See also: tune.disable-zero-copy-forwarding @@ -3671,6 +3881,15 @@ tune.recv_enough <number> may be changed by this setting to better deal with workloads involving lots of short messages such as telnet or SSH sessions. +tune.ring.queues <number> + Sets the number of write queues in front of ring buffers. This can have an + effect on the CPU usage of traces during debugging sessions, and both too + low or too large a value can have an important effect. The good value was + determined experimentally by developers and there should be no reason to + try to change it unless instructed to do so in order to try to address + specific issues. Such a setting should not be left in the configuration + across version upgrades because its optimal value may evolve over time. + tune.runqueue-depth <number> Sets the maximum amount of task that can be processed at once when running tasks. The default value depends on the number of threads but sits between 35 @@ -3793,13 +4012,16 @@ tune.ssl.keylog { on | off } SSLKEYLOGFILE Label | Sample fetches for the Secrets --------------------------------|----------------------------------------- - CLIENT_EARLY_TRAFFIC_SECRET | %[ssl_fc_client_early_traffic_secret] - CLIENT_HANDSHAKE_TRAFFIC_SECRET | %[ssl_fc_client_handshake_traffic_secret] - SERVER_HANDSHAKE_TRAFFIC_SECRET | %[ssl_fc_server_handshake_traffic_secret] - CLIENT_TRAFFIC_SECRET_0 | %[ssl_fc_client_traffic_secret_0] - SERVER_TRAFFIC_SECRET_0 | %[ssl_fc_server_traffic_secret_0] - EXPORTER_SECRET | %[ssl_fc_exporter_secret] - EARLY_EXPORTER_SECRET | %[ssl_fc_early_exporter_secret] + CLIENT_EARLY_TRAFFIC_SECRET | %[ssl_xx_client_early_traffic_secret] + CLIENT_HANDSHAKE_TRAFFIC_SECRET | %[ssl_xx_client_handshake_traffic_secret] + SERVER_HANDSHAKE_TRAFFIC_SECRET | %[ssl_xx_server_handshake_traffic_secret] + CLIENT_TRAFFIC_SECRET_0 | %[ssl_xx_client_traffic_secret_0] + SERVER_TRAFFIC_SECRET_0 | %[ssl_xx_server_traffic_secret_0] + EXPORTER_SECRET | %[ssl_xx_exporter_secret] + EARLY_EXPORTER_SECRET | %[ssl_xx_early_exporter_secret] + + These fetches exists for frontend (fc) or backend (bc) sides, replace "xx" by + "fc" or "bc" to use the right side. This is only available with OpenSSL 1.1.1, and useful with TLS1.3 session. @@ -3808,6 +4030,17 @@ tune.ssl.keylog { on | off } "CLIENT_RANDOM %[ssl_fc_client_random,hex] %[ssl_fc_session_key,hex]" + A complete keylog could be generate with a log-format these way, even though + this is not ideal for syslog: + + log-format "CLIENT_EARLY_TRAFFIC_SECRET %[ssl_bc_client_random,hex] %[ssl_bc_client_early_traffic_secret]\n + CLIENT_HANDSHAKE_TRAFFIC_SECRET %[ssl_bc_client_random,hex] %[ssl_bc_client_handshake_traffic_secret]\n + SERVER_HANDSHAKE_TRAFFIC_SECRET %[ssl_bc_client_random,hex] %[ssl_bc_server_handshake_traffic_secret]\n + CLIENT_TRAFFIC_SECRET_0 %[ssl_bc_client_random,hex] %[ssl_bc_client_traffic_secret_0]\n + SERVER_TRAFFIC_SECRET_0 %[ssl_bc_client_random,hex] %[ssl_bc_server_traffic_secret_0]\n + EXPORTER_SECRET %[ssl_bc_client_random,hex] %[ssl_bc_exporter_secret]\n + EARLY_EXPORTER_SECRET %[ssl_bc_client_random,hex] %[ssl_bc_early_exporter_secret]" + tune.ssl.lifetime <timeout> Sets how long a cached SSL session may remain valid. This time is expressed in seconds and defaults to 300 (5 min). It is important to understand that it @@ -3837,20 +4070,6 @@ tune.ssl.ssl-ctx-cache-size <number> dynamically is expensive, they are cached. The default cache size is set to 1000 entries. -tune.ssl.ocsp-update.maxdelay <number> - Sets the maximum interval between two automatic updates of the same OCSP - response. This time is expressed in seconds and defaults to 3600 (1 hour). It - must be set to a higher value than "tune.ssl.ocsp-update.mindelay". See - option "ocsp-update" for more information about the auto update mechanism. - -tune.ssl.ocsp-update.mindelay <number> - Sets the minimum interval between two automatic updates of the same OCSP - response. This time is expressed in seconds and defaults to 300 (5 minutes). - It is particularly useful for OCSP response that do not have explicit - expiration times. It must be set to a lower value than - "tune.ssl.ocsp-update.maxdelay". See option "ocsp-update" for more - information about the auto update mechanism. - tune.stick-counters <number> Sets the number of stick-counters that may be tracked at the same time by a connection or a request via "track-sc*" actions in "tcp-request" or @@ -3965,7 +4184,15 @@ user <username> [password|insecure-password <password>] designed to be expensive to compute to achieve resistance against brute force attacks. They do not simply salt/hash the clear text password once, but thousands of times. This can quickly become a major factor in HAProxy's - overall CPU consumption! + overall CPU consumption, and can even lead to application crashes! + + To address the high CPU usage of hash functions, one approach is to reduce + the number of rounds of the hash function (SHA family algorithms) or decrease + the "cost" of the function, if the algorithm supports it. + + As a side note, musl (e.g. Alpine Linux) implementations are known to be + slower than their glibc counterparts when calculating hashes, so you might + want to consider this aspect too. Example: userlist L1 @@ -4577,6 +4804,196 @@ httpclient.timeout.connect <timeout> The default value is 5000ms. + +3.12. Certificate Storage +------------------------- + +HAProxy uses an internal storage mechanism to load and store certificates used +in the configuration. This storage can be configured by using a "crt-store" +section. It allows to configure certificate definitions and which files should +be loaded in it. A certificate definition must be written before it is used +elsewhere in the configuration. + +The "crt-store" takes an optional name in argument. If a name is specified, +every certificate of this store must be referenced using "@<name>/<crt>" or +"@<name>/<alias>". + +Files in the certificate storage can also be updated dynamically with the CLI. +See "set ssl cert" in the section 9.3 of the management guide. + + +The following keywords are supported in the "crt-store" section : + - crt-base + - key-base + - load + +crt-base <dir> + Assigns a default directory to fetch SSL certificates from when a relative + path is used with "crt" directives. Absolute locations specified prevail and + ignore "crt-base". When used in a crt-store, the crt-base of the global + section is ignored. + +key-base <dir> + Assigns a default directory to fetch SSL private keys from when a relative + path is used with "key" directives. Absolute locations specified prevail and + ignore "key-base". When used in a crt-store, the key-base of the global + section is ignored. + +load [crt <filename>] [param*] + Load SSL files in the certificate storage. For the parameter list, see section + "3.12.1. Load options" + +Example: + + crt-store + load crt "site1.crt" key "site1.key" ocsp "site1.ocsp" alias "site1" + load crt "site2.crt" key "site2.key" + + frontend in2 + bind *:443 ssl crt "@/site1" crt "site2.crt" + + crt-store web + crt-base /etc/ssl/certs/ + key-base /etc/ssl/private/ + load crt "site3.crt" alias "site3" + load crt "site4.crt" key "site4.key" + + frontend in2 + bind *:443 ssl crt "@web/site1" crt "site2.crt" crt "@web/site3" crt "@web/site4.crt" + +3.12.1. Load options +-------------------- + +Load SSL files in the certificate storage. The load keyword can take multiple +parameters which are listed below. These keywords are also usable in a +crt-list. + +crt <filename> + This argument is mandatory, it loads a PEM which must contain the public + certificate but could also contain the intermediate certificates and the + private key. If no private key is provided in this file, a key can be provided + with the "key" keyword. + +alias <string> + Optional argument. Allow to name the certificate with an alias, so it can be + referenced with it in the configuration. An alias must be prefixed with '@/' + when called elsewhere in the configuration. + +key <filename> + This argument is optional. Load a private key in PEM format. If a private key + was already defined in "crt", it will overwrite it. + +ocsp <filename> + This argument is optional, it loads an OCSP response in DER format. It can + be updated with the CLI. + +issuer <filename> + This argument is optional. Load the OCSP issuer in PEM format. In order to + identify which certificate an OCSP Response applies to, the issuer's + certificate is necessary. If the issuer's certificate is not found in the + "crt" file, it could be loaded from a file with this argument. + +sctl <filename> + This argument is optional. Support for Certificate Transparency (RFC6962) TLS + extension is enabled. The file must contain a valid Signed Certificate + Timestamp List, as described in RFC. File is parsed to check basic syntax, + but no signatures are verified. + +ocsp-update [ off | on ] + Enable automatic OCSP response update when set to 'on', disable it otherwise. + Its value defaults to 'off'. + To enable the OCSP auto update on a bind line, you can use this option in a + crt-store or you can use the global option "tune.ocsp-update.mode". + If a given certificate is used in multiple crt-lists with different values of + the 'ocsp-update' set, an error will be raised. Likewise, if a certificate + inherits from the global option on a bind line and has an incompatible + explicit 'ocsp-update' option set in a crt-list, the same error will be + raised. + + Examples: + + Here is an example configuration enabling it with a crt-list: + + haproxy.cfg: + frontend fe + bind :443 ssl crt-list haproxy.list + + haproxy.list: + server_cert.pem [ocsp-update on] foo.bar + + Here is an example configuration enabling it with a crt-store: + + haproxy.cfg: + + crt-store + load crt foobar.pem ocsp-update on + + frontend fe + bind :443 ssl crt foobar.pem + + When the option is set to 'on', we will try to get an ocsp response whenever + an ocsp uri is found in the frontend's certificate. The only limitation of + this mode is that the certificate's issuer will have to be known in order for + the OCSP certid to be built. + Each OCSP response will be updated at least once an hour, and even more + frequently if a given OCSP response has an expire date earlier than this one + hour limit. A minimum update interval of 5 minutes will still exist in order + to avoid updating too often responses that have a really short expire time or + even no 'Next Update' at all. Because of this hard limit, please note that + when auto update is set to 'on', any OCSP response loaded during init will + not be updated until at least 5 minutes, even if its expire time ends before + now+5m. This should not be too much of a hassle since an OCSP response must + be valid when it gets loaded during init (its expire time must be in the + future) so it is unlikely that this response expires in such a short time + after init. + On the other hand, if a certificate has an OCSP uri specified and no OCSP + response, setting this option to 'on' for the given certificate will ensure + that the OCSP response gets fetched automatically right after init. + The default minimum and maximum delays (5 minutes and 1 hour respectively) + can be configured by the "ocsp-update.maxdelay" and "ocsp-update.mindelay" + global options. + + Whenever an OCSP response is updated by the auto update task or following a + call to the "update ssl ocsp-response" CLI command, a dedicated log line is + emitted. It follows a dedicated format that contains the following header + "<OCSP-UPDATE>" and is followed by specific OCSP-related information: + - the path of the corresponding frontend certificate + - a numerical update status + - a textual update status + - the number of update failures for the given response + - the number of update successes for the givan response + See "show ssl ocsp-updates" CLI command for a full list of error codes and + error messages. This line is emitted regardless of the success or failure of + the concerned OCSP response update. + The OCSP request/response is sent and received through an http_client + instance that has the dontlog-normal option set and that uses the regular + HTTP log format in case of error (unreachable OCSP responder for instance). + If such an error occurs, another log line that contains HTTP-related + information will then be emitted alongside the "regular" OCSP one (which will + likely have "HTTP error" as text status). But if a purely HTTP error happens + (unreachable OCSP responder for instance), an extra log line that follows the + regular HTTP log-format will be emitted. + Here are two examples of such log lines, with a successful OCSP update log + line first and then an example of an HTTP error with the two different lines + (lines were spit and the URL was shortened for readability): + <133>Mar 6 11:16:53 haproxy[14872]: <OCSP-UPDATE> /path_to_cert/foo.pem 1 \ + "Update successful" 0 1 + + <133>Mar 6 11:18:55 haproxy[14872]: <OCSP-UPDATE> /path_to_cert/bar.pem 2 \ + "HTTP error" 1 0 + <133>Mar 6 11:18:55 haproxy[14872]: -:- [06/Mar/2023:11:18:52.200] \ + <OCSP-UPDATE> -/- 2/0/-1/-1/3009 503 217 - - SC-- 0/0/0/0/3 0/0 {} \ + "GET http://127.0.0.1:12345/MEMwQT HTTP/1.1" + + Troubleshooting: + A common error that can happen with let's encrypt certificates is if the DNS + resolution provides an IPv6 address and your system does not have a valid + outgoing IPv6 route. In such a case, you can either create the appropriate + route or set the "httpclient.resolvers.prefer ipv4" option in the global + section. + In case of "OCSP response check failure" error, you might want to check that + the issuer certificate that you provided is valid. + 4. Proxies ---------- @@ -4771,6 +5188,9 @@ error-log-format X X X - force-persist - - X X filter - X X X fullconn X - X X +guid - X X X +hash-balance-factor X - X X +hash-key X - X X hash-type X - X X http-after-response X (!) X X X http-check comment X - X X @@ -5241,8 +5661,7 @@ balance url_param <param> [check_post] the log messages. When the server goes DOWN, the next server in the list takes its place. When a previously DOWN server goes back UP it is added at the end of the list so that the - sticky server doesn't change until it becomes DOWN. This - algorithm is only usable for backends in LOG mode. + sticky server doesn't change until it becomes DOWN. <arguments> is an optional list of arguments which may be needed by some algorithms. Right now, only "url_param", "uri" and "log-hash" @@ -5250,7 +5669,7 @@ balance url_param <param> [check_post] The load balancing algorithm of a backend is set to roundrobin when no other algorithm, mode nor option have been set. The algorithm may only be set once - for each backend. In backends in LOG mode, server "weight" is always ignored. + for each backend. With authentication schemes that require the same connection like NTLM, URI based algorithms must not be used, as they would cause subsequent requests @@ -6404,7 +6823,7 @@ email-alert to <emailaddr> "email-alert myhostname", section 3.6 about mailers. -error-log-format <string> +error-log-format <fmt> Specifies the log format string to use in case of connection error on the frontend side. May be used in the following contexts: tcp, http @@ -6419,8 +6838,8 @@ error-log-format <string> connection errors described in section 8.2.5. If the directive is used in a defaults section, all subsequent frontends will - use the same log format. Please see section 8.2.4 which covers the log format - string in depth. + use the same log format. Please see section 8.2.6 which covers the custom log + format string in depth. "error-log-format" directive overrides previous "error-log-format" directives. @@ -6534,6 +6953,14 @@ fullconn <conns> See also : "maxconn", "server" +guid <string> + Specify a case-sensitive global unique ID for this proxy. This must be unique + across all haproxy configuration on every object types. Format is left + unspecified to allow the user to select its naming policy. The only + restriction is its length which cannot be greater than 127 characters. All + alphanumerical values and '.', ':', '-' and '_' characters are valid. + + hash-balance-factor <factor> Specify the balancing factor for bounded-load consistent hashing @@ -6567,6 +6994,29 @@ hash-balance-factor <factor> See also : "balance" and "hash-type". +hash-key <key> + Specify how "hash-type consistent" node keys are computed + + Arguments : + <key> <key> may be one of the following : + + id The node keys will be derived from the server's numeric + identifier as set from "id" or which defaults to its position + in the server list. + + addr The node keys will be derived from the server's address, when + available, or else fall back on "id". + + addr-port The node keys will be derived from the server's address and + port, when available, or else fall back on "id". + + The "addr" and "addr-port" options may be useful in scenarios where multiple + HAProxy processes are balancing traffic to the same set of servers. If the + server order of each process is different (because, for example, DNS records + were resolved in different orders) then this will allow each independent + HAProxy processes to agree on routing decisions. + + hash-type <method> <function> <modifier> Specify a method to use for mapping hashes to servers @@ -6897,12 +7347,13 @@ http-check expect [min-recv <int>] [comment <msg>] on-success <fmt> is optional and can be used to customize the informational message reported in logs if the expect rule is successfully evaluated and if it is the last rule - in the tcp-check ruleset. <fmt> is a log-format string. + in the tcp-check ruleset. <fmt> is a Custom log format + string (see section 8.2.6). on-error <fmt> is optional and can be used to customize the informational message reported in logs if an error occurred during the expect rule evaluation. <fmt> is a - log-format string. + Custom log format string (see section 8.2.6). <match> is a keyword indicating how to look for a specific pattern in the response. The keyword may be one of "status", "rstatus", "hdr", @@ -6948,17 +7399,18 @@ http-check expect [min-recv <int>] [comment <msg>] match), "end" (suffix match), "sub" (substring match) or "reg" (regex match). If not specified, exact matching method is used. If the "name-lf" parameter is used, - <name> is evaluated as a log-format string. If "value-lf" - parameter is used, <value> is evaluated as a log-format - string. These parameters cannot be used with the regex - matching method. Finally, the header value is considered - as comma-separated list. Note that matchings are case - insensitive on the header names. + <name> is evaluated as a Custom log format string (see + section 8.2.6). If "value-lf" parameter is used, <value> + is evaluated as a log-format string. These parameters + cannot be used with the regex matching method. Finally, + the header value is considered as comma-separated + list. Note that matchings are case insensitive on the + header names. fhdr { name | name-lf } [ -m <meth> ] <name> [ { value | value-lf } [ -m <meth> ] <value> : test the specified full header pattern on the HTTP - response headers. It does exactly the same than "hdr" + response headers. It does exactly the same as the "hdr" keyword, except the full header value is tested, commas are not considered as delimiters. @@ -6981,12 +7433,13 @@ http-check expect [min-recv <int>] [comment <msg>] of a dynamic page, or to detect a failure when a specific error appears on the check page (e.g. a stack trace). - string-lf <fmt> : test a log-format string match in the HTTP response body. - A health check response will be considered valid if the - response's body contains the string resulting of the - evaluation of <fmt>, which follows the log-format rules. - If prefixed with "!", then the response will be - considered invalid if the body contains the string. + string-lf <fmt> : test a Custom log format string (see section 8.2.6) match + in the HTTP response body. A health check response will + be considered valid if the response's body contains the + string resulting of the evaluation of <fmt>, which + follows the log-format rules. If prefixed with "!", then + the response will be considered invalid if the body + contains the string. It is important to note that the responses will be limited to a certain size defined by the global "tune.bufsize" option, which defaults to 16384 bytes. @@ -7052,9 +7505,10 @@ http-check send [meth <method>] [{ uri <uri> | uri-lf <fmt> }>] [ver <version>] other URI. Query strings are permitted. uri-lf <fmt> is optional and set the URI referenced in the HTTP requests - using the log-format string <fmt>. It defaults to "/" which - is accessible by default on almost any server, but may be - changed to any other URI. Query strings are permitted. + using the Custom log format <fmt> (see section 8.2.6). It + defaults to "/" which is accessible by default on almost any + server, but may be changed to any other URI. Query strings + are permitted. ver <version> is the optional HTTP version string. It defaults to "HTTP/1.0" but some servers might behave incorrectly in HTTP @@ -7064,16 +7518,16 @@ http-check send [meth <method>] [{ uri <uri> | uri-lf <fmt> }>] [ver <version>] hdr <name> <fmt> adds the HTTP header field whose name is specified in <name> and whose value is defined by <fmt>, which follows - to the log-format rules. + the Custom log format rules described in section 8.2.6. body <string> add the body defined by <string> to the request sent during HTTP health checks. If defined, the "Content-Length" header is thus automatically added to the request. - body-lf <fmt> add the body defined by the log-format string <fmt> to the - request sent during HTTP health checks. If defined, the - "Content-Length" header is thus automatically added to the - request. + body-lf <fmt> add the body defined by the Custom log format <fmt> (see + section 8.2.6) to the request sent during HTTP health + checks. If defined, the "Content-Length" header is thus + automatically added to the request. In addition to the request line defined by the "option httpchk" directive, this one is the valid way to add some headers and optionally a body to the @@ -7182,8 +7636,8 @@ http-check set-var-fmt(<var-name>[,<cond>...]) <fmt> <expr> Is a sample-fetch expression potentially followed by converters. - <fmt> This is the value expressed using log-format rules (see Custom - Log Format in section 8.2.4). + <fmt> This is the value expressed using Custom log format (see Custom + Log Format in section 8.2.6). Examples : http-check set-var(check.port) int(1234) @@ -7265,7 +7719,7 @@ http-error status <code> [content-type <type>] file is not empty, its content-type must be set as argument to "content-type", otherwise, any "content-type" argument is ignored. <file> is - evaluated as a log-format string. + evaluated as a Custom log format (see section 8.2.6). lf-string <str> specifies the log-format string to use as response payload. The content-type must always be set as @@ -7273,8 +7727,9 @@ http-error status <code> [content-type <type>] hdr <name> <fmt> adds to the response the HTTP header field whose name is specified in <name> and whose value is defined by - <fmt>, which follows to the log-format rules. - This parameter is ignored if an errorfile is used. + <fmt>, which follows the Custom log format rules (see + section 8.2.6). This parameter is ignored if an + errorfile is used. This directive may be used instead of "errorfile", to define a custom error message. As "errorfile" directive, it is used for errors detected and @@ -7430,12 +7885,29 @@ http-reuse { never | safe | aggressive | always } May be used in sections: defaults | frontend | listen | backend yes | no | yes | yes - By default, a connection established between HAProxy and the backend server - which is considered safe for reuse is moved back to the server's idle - connections pool so that any other request can make use of it. This is the - "safe" strategy below. - - The argument indicates the desired connection reuse strategy : + In order to avoid the cost of setting up new connections to backend servers + for each HTTP request, HAProxy tries to keep such idle connections opened + after being used. These connections are specific to a server and are stored + in a list called a pool, and are grouped together by a set of common key + properties. Subsequent HTTP requests will cause a lookup of a compatible + connection sharing identical properties in the associated pool and result in + this connection being reused instead of establishing a new one. + + A limit on the number of idle connections to keep on a server can be + specified via the "pool-max-conn" server keyword. Unused connections are + periodically purged according to the "pool-purge-delay" interval. + + The following connection properties are used to determine if an idle + connection is eligible for reuse on a given request: + - source and destination addresses + - proxy protocol + - TOS and mark socket options + - connection name, determined either by the result of the evaluation of the + "pool-conn-name" expression if present, otherwise by the "sni" expression + + In some occasions, connection lookup or reuse is not performed due to extra + restrictions. This is determined by the reuse strategy specified via the + keyword argument: - "never" : idle connections are never shared between sessions. This mode may be enforced to cancel a different strategy inherited from @@ -7486,20 +7958,12 @@ http-reuse { never | safe | aggressive | always } gains as "aggressive" but with more risks. It should only be used when it improves the situation over "aggressive". - When http connection sharing is enabled, a great care is taken to respect the - connection properties and compatibility. Indeed, some properties are specific - and it is not possibly to reuse it blindly. Those are the SSL SNI, source - and destination address and proxy protocol block. A connection is reused only - if it shares the same set of properties with the request. - Also note that connections with certain bogus authentication schemes (relying - on the connection) like NTLM are marked private and never shared. - - A connection pool is involved and configurable with "pool-max-conn". - - Note: connection reuse improves the accuracy of the "server maxconn" setting, - because almost no new connection will be established while idle connections - remain available. This is particularly true with the "always" strategy. + on the connection) like NTLM are marked private if possible and never shared. + This won't be the case however when using a protocol with multiplexing + abilities and using reuse mode level value greater than the default "safe" + strategy as in this case nothing prevents the connection from being already + shared. The rules to decide to keep an idle connection opened or to close it after processing are also governed by the "tune.pool-low-fd-ratio" (default: 20%) @@ -7513,13 +7977,14 @@ http-reuse { never | safe | aggressive | always } too few connections are kept open. It may be desirable in this case to adjust such thresholds or simply to increase the global "maxconn" value. - Similarly, when thread groups are explicitly enabled, it is important to - understand that idle connections are only usable between threads from a same - group. As such it may happen that unfair load between groups leads to more - idle connections being needed, causing a lower reuse rate. The same solution - may then be applied (increase global "maxconn" or increase pool ratios). + When thread groups are explicitly enabled, it is important to understand that + idle connections are only usable between threads from a same group. As such + it may happen that unfair load between groups leads to more idle connections + being needed, causing a lower reuse rate. The same solution may then be + applied (increase global "maxconn" or increase pool ratios). - See also : "option http-keep-alive", "server maxconn", "thread-groups", + See also : "option http-keep-alive", "pool-conn-name", "pool-max-conn", + "pool-purge-delay", "server maxconn", "sni", "thread-groups", "tune.pool-high-fd-ratio", "tune.pool-low-fd-ratio" @@ -7883,8 +8348,8 @@ no log # level and send in tcp log "${LOCAL_SYSLOG}:514" local0 notice # send to local server -log-format <string> - Specifies the log format string to use for traffic logs +log-format <fmt> + Specifies the custom log format string to use for traffic logs May be used in the following contexts: tcp, http @@ -7894,16 +8359,17 @@ log-format <string> This directive specifies the log format string that will be used for all logs resulting from traffic passing through the frontend using this line. If the directive is used in a defaults section, all subsequent frontends will use - the same log format. Please see section 8.2.4 which covers the log format - string in depth. + the same log format. Please see section 8.2.6 which covers the custom log + format string in depth. + A specific log-format used only in case of connection error can also be defined, see the "error-log-format" option. "log-format" directive overrides previous "option tcplog", "log-format", "option httplog" and "option httpslog" directives. -log-format-sd <string> - Specifies the RFC5424 structured-data log format string +log-format-sd <fmt> + Specifies the Custom log format string used to produce RFC5424 structured-data May be used in the following contexts: tcp, http @@ -7913,7 +8379,7 @@ log-format-sd <string> This directive specifies the RFC5424 structured-data log format string that will be used for all logs resulting from traffic passing through the frontend using this line. If the directive is used in a defaults section, all - subsequent frontends will use the same log format. Please see section 8.2.4 + subsequent frontends will use the same log format. Please see section 8.2.6 which covers the log format string in depth. See https://tools.ietf.org/html/rfc5424#section-6.3 for more information @@ -9421,7 +9887,7 @@ no option logasap Arguments : none - By default, logs are emitted when all the log format variables and sample + By default, logs are emitted when all the log format aliases and sample fetches used in the definition of the log-format string return a value, or when the stream is terminated. This allows the built in log-format strings to account for the transfer time, or the number of bytes in log messages. @@ -10467,8 +10933,8 @@ redirect scheme <sch> [code <code>] <option> [{if | unless} <condition>] Arguments : <loc> With "redirect location", the exact value in <loc> is placed into the HTTP "Location" header. When used in an "http-request" rule, - <loc> value follows the log-format rules and can include some - dynamic values (see Custom Log Format in section 8.2.4). + <loc> value follows the Custom log format rules and can include + some dynamic values (see Custom log format in section 8.2.6). <pfx> With "redirect prefix", the "Location" header is built from the concatenation of <pfx> and the complete URI path, including the @@ -10476,9 +10942,9 @@ redirect scheme <sch> [code <code>] <option> [{if | unless} <condition>] below). As a special case, if <pfx> equals exactly "/", then nothing is inserted before the original URI. It allows one to redirect to the same URL (for instance, to insert a cookie). When - used in an "http-request" rule, <pfx> value follows the log-format - rules and can include some dynamic values (see Custom Log Format - in section 8.2.4). + used in an "http-request" rule, <pfx> value follows the Custom + Log Format rules and can include some dynamic values (see Custom + Log Format in section 8.2.6). <sch> With "redirect scheme", then the "Location" header is built by concatenating <sch> with "://" then the first occurrence of the @@ -10489,8 +10955,8 @@ redirect scheme <sch> [code <code>] <option> [{if | unless} <condition>] returned, which most recent browsers interpret as redirecting to the same host. This directive is mostly used to redirect HTTP to HTTPS. When used in an "http-request" rule, <sch> value follows - the log-format rules and can include some dynamic values (see - Custom Log Format in section 8.2.4). + the Custom log format rules and can include some dynamic values + (see Custom log format in section 8.2.6). <code> The code is optional. It indicates which type of HTTP redirection is desired. Only codes 301, 302, 303, 307 and 308 are supported, @@ -12367,12 +12833,13 @@ tcp-check expect [min-recv <int>] [comment <msg>] on-success <fmt> is optional and can be used to customize the informational message reported in logs if the expect rule is successfully evaluated and if it is the last rule - in the tcp-check ruleset. <fmt> is a log-format string. + in the tcp-check ruleset. <fmt> is a Custom log format + (see section 8.2.6). on-error <fmt> is optional and can be used to customize the informational message reported in logs if an error occurred during the expect rule evaluation. <fmt> is a - log-format string. + Custom log format (see section 8.2.6). status-code <expr> is optional and can be used to set the check status code reported in logs, on success or on error. <expr> is a @@ -12405,12 +12872,13 @@ tcp-check expect [min-recv <int>] [comment <msg>] will be considered invalid if the body matches the expression. - string-lf <fmt> : test a log-format string match in the response's buffer. + string-lf <fmt> : test a Custom log format match in the response's buffer. A health check response will be considered valid if the response's buffer contains the string resulting of the - evaluation of <fmt>, which follows the log-format rules. - If prefixed with "!", then the response will be - considered invalid if the buffer contains the string. + evaluation of <fmt>, which follows the Custom log format + rules described in section 8.2.6. If prefixed with "!", + then the response will be considered invalid if the + buffer contains the string. binary <hexstring> : test the exact string in its hexadecimal form matches in the response buffer. A health check response will @@ -12427,16 +12895,16 @@ tcp-check expect [min-recv <int>] [comment <msg>] pattern should work on at-most half the response buffer size. - binary-lf <hexfmt> : test a log-format string in its hexadecimal form - match in the response's buffer. A health check response - will be considered valid if the response's buffer - contains the hexadecimal string resulting of the - evaluation of <fmt>, which follows the log-format - rules. If prefixed with "!", then the response will be - considered invalid if the buffer contains the - hexadecimal string. The hexadecimal string is converted - in a binary string before matching the response's - buffer. + binary-lf <hexfmt> : test a Custom log format in its hexadecimal form match + in the response's buffer. A health check response will + be considered valid if the response's buffer contains + the hexadecimal string resulting of the evaluation of + <fmt>, which follows the Custom log format rules (see + section 8.2.6). If prefixed with "!", then the + response will be considered invalid if the buffer + contains the hexadecimal string. The hexadecimal + string is converted in a binary string before matching + the response's buffer. It is important to note that the responses will be limited to a certain size defined by the global "tune.bufsize" option, which defaults to 16384 bytes. @@ -12475,7 +12943,7 @@ tcp-check expect [min-recv <int>] [comment <msg>] tcp-check send <data> [comment <msg>] tcp-check send-lf <fmt> [comment <msg>] - Specify a string or a log-format string to be sent as a question during a + Specify a string or a Custom log format to be sent as a question during a generic health check May be used in the following contexts: tcp, http, log @@ -12489,8 +12957,8 @@ tcp-check send-lf <fmt> [comment <msg>] <data> is the string that will be sent during a generic health check session. - <fmt> is the log-format string that will be sent, once evaluated, - during a generic health check session. + <fmt> is the Custom log format that will be sent, once evaluated, + during a generic health check session (see section 8.2.6). Examples : # look for the redis master server @@ -12504,7 +12972,7 @@ tcp-check send-lf <fmt> [comment <msg>] tcp-check send-binary <hexstring> [comment <msg>] tcp-check send-binary-lf <hexfmt> [comment <msg>] - Specify an hex digits string or an hex digits log-format string to be sent as + Specify an hex digits string or an hex digits Custom log format to be sent as a binary question during a raw tcp health check May be used in the following contexts: tcp, http, log @@ -12518,9 +12986,9 @@ tcp-check send-binary-lf <hexfmt> [comment <msg>] <hexstring> is the hexadecimal string that will be send, once converted to binary, during a generic health check session. - <hexfmt> is the hexadecimal log-format string that will be send, once + <hexfmt> is the hexadecimal Custom log format that will be send, once evaluated and converted to binary, during a generic health - check session. + check session (see section 8.2.6). Examples : # redis check in binary @@ -12559,8 +13027,8 @@ tcp-check set-var-fmt(<var-name>[,<cond>...]) <fmt> <expr> Is a sample-fetch expression potentially followed by converters. - <fmt> This is the value expressed using log-format rules (see Custom - Log Format in section 8.2.4). + <fmt> This is the value expressed using Custom log format rules (see + Custom log format in section 8.2.6). Examples : tcp-check set-var(check.port) int(1234) @@ -13499,7 +13967,7 @@ transparent (deprecated) See also: "option transparent" -unique-id-format <string> +unique-id-format <fmt> Generate a unique ID for each request. May be used in the following contexts: tcp, http @@ -13508,12 +13976,12 @@ unique-id-format <string> yes | yes | yes | no Arguments : - <string> is a log-format string. + <fmt> is a Custom log format string (see section 8.2.6). This keyword creates a ID for each request using the custom log format. A unique ID is useful to trace a request passing through many components of a complex infrastructure. The newly created ID may also be logged using the - %ID tag the log-format string. + %ID alias in the Custom log format string. The format should be composed from elements that are guaranteed to be unique when combined together. For instance, if multiple HAProxy instances @@ -13572,7 +14040,8 @@ use_backend <backend> [{if | unless} <condition>] Arguments : <backend> is the name of a valid backend or "listen" section, or a - "log-format" string resolving to a backend name. + Custom log format resolving to a backend name (see Custom + Log Format in section 8.2.6). <condition> is a condition composed of ACLs, as described in section 7. If it is omitted, the rule is unconditionally applied. @@ -13604,7 +14073,7 @@ use_backend <backend> [{if | unless} <condition>] When <backend> is a simple name, it is resolved at configuration time, and an error is reported if the specified backend does not exist. If <backend> is - a log-format string instead, no check may be done at configuration time, so + a Custom log format instead, no check may be done at configuration time, so the backend name is resolved dynamically at run time. If the resulting backend name does not correspond to any valid backend, no other rule is evaluated, and the default_backend directive is applied instead. Note that @@ -13643,7 +14112,8 @@ use-server <server> unless <condition> Arguments : <server> is the name of a valid server in the same backend section - or a "log-format" string resolving to a server name. + or a Custom log format string resolving to a server name + (see section 8.2.6). <condition> is a condition composed of ACLs, as described in section 7. @@ -13691,10 +14161,10 @@ use-server <server> unless <condition> When <server> is a simple name, it is checked against existing servers in the configuration and an error is reported if the specified server does not exist. - If it is a log-format, no check is performed when parsing the configuration, - and if we can't resolve a valid server name at runtime but the use-server rule - was conditioned by an ACL returning true, no other use-server rule is applied - and we fall back to load balancing. + If it is a Custom log format, no check is performed when parsing the + configuration, and if we can't resolve a valid server name at runtime but the + use-server rule was conditioned by an ACL returning true, no other use-server + rule is applied and we fall back to load balancing. See also: "use_backend", section 5 about server and section 7 about ACLs. @@ -13766,12 +14236,16 @@ sc-set-gpt X X X X X X X sc-set-gpt0 X X X X X X X send-spoe-group - - X X X X - set-bandwidth-limit - - X X X X - +set-bc-mark - - X - X - - +set-bc-tos - - X - X - - set-dst X X X - X - - set-dst-port X X X - X - - +set-fc-mark X X X X X X - +set-fc-tos X X X X X X - set-header - - - - X X X set-log-level - - X X X X X set-map - - - - X X X -set-mark X X X X X X - +set-mark (deprecated) X X X X X X - set-method - - - - X - - set-nice - - X X X X - set-path - - - - X - - @@ -13784,7 +14258,7 @@ set-src X X X - X - - set-src-port X X X - X - - set-status - - - - - X X set-timeout - - - - X X - -set-tos X X X X X X - +set-tos (deprecated) X X X X X X - set-uri - - - - X - - set-var X X X X X X X set-var-fmt X X X X X X X @@ -13827,10 +14301,10 @@ add-acl(<file-name>) <key fmt> This is used to add a new entry into an ACL. The ACL must be loaded from a file (even a dummy empty file). The file name of the ACL to be updated is passed between parentheses. It takes one argument: <key fmt>, which follows - log-format rules, to collect content of the new entry. It performs a lookup - in the ACL before insertion, to avoid duplicated (or more) values. - It is the equivalent of the "add acl" command from the stats socket, but can - be triggered by an HTTP request. + Custom log format rules described in section 8.2.6, to collect content of the + new entry. It performs a lookup in the ACL before insertion, to avoid + duplicated (or more) values. It is the equivalent of the "add acl" command + from the stats socket, but can be triggered by an HTTP request. add-header <name> <fmt> @@ -13838,13 +14312,13 @@ add-header <name> <fmt> - | - | - | - | X | X | X This appends an HTTP header field whose name is specified in <name> and - whose value is defined by <fmt> which follows the log-format rules (see - Custom Log Format in section 8.2.4). This is particularly useful to pass + whose value is defined by <fmt> which follows the Custom log format rules + (see Custom log format in section 8.2.6). This is particularly useful to pass connection-specific information to the server (e.g. the client's SSL - certificate), or to combine several headers into one. This rule is not - final, so it is possible to add other similar rules. Note that header - addition is performed immediately, so one rule might reuse the resulting - header from a previous rule. + certificate), or to combine several headers into one. This rule is not final, + so it is possible to add other similar rules. Note that header addition is + performed immediately, so one rule might reuse the resulting header from a + previous rule. allow @@ -13868,20 +14342,19 @@ attach-srv <srv> [name <expr>] [ EXPERIMENTAL ] pool of server <srv>. This may only be used with servers having an 'rhttp@' address. - An extra parameter <expr> can be specified. Its value is interpreted as a - sample expression to name the connection inside the server idle pool. When - routing an outgoing request through this server, this name will be matched - against the 'sni' parameter of the server line. Otherwise, the connection - will have no name and will only match requests without SNI. - - This rule is only valid for frontend in HTTP mode. Also all listeners must - not require a protocol different from HTTP/2. + The connection is inserted into the server idle pool with a name defined by + the result of the <expr> evaluation. This is the name that will be matched + against by requests subject to "pool-conn-name" or "sni" parameter. See + "http-reuse" for more details. Reverse HTTP is currently still in active development. Configuration mechanism may change in the future. For this reason it is internally marked as experimental, meaning that "expose-experimental-directives" must appear on a line before this directive. + Note that a very similar but independent protocol is under development. See + https://www.ietf.org/archive/id/draft-bt-httpbis-reverse-http-00.html. + auth [realm <realm>] Usable in: TCP RqCon| RqSes| RqCnt| RsCnt| HTTP Req| Res| Aft - | - | - | - | X | - | - @@ -13935,7 +14408,7 @@ capture <sample> [ len <length> | id <id> ] This captures sample expression <sample> from the request or response buffer, and converts it to a string of at most <len> characters. The resulting string - is stored into the next "capture" slot (either request or reponse), so it + is stored into the next "capture" slot (either request or response), so it will possibly appear next to some captured HTTP headers. It will then automatically appear in the logs, and it will be possible to extract it using sample fetch methods to feed it into headers or anything. The length should @@ -13974,9 +14447,9 @@ del-acl(<file-name>) <key fmt> This is used to delete an entry from an ACL. The ACL must be loaded from a file (even a dummy empty file). The file name of the ACL to be updated is passed between parentheses. It takes one argument: <key fmt>, which follows - log-format rules, to collect content of the entry to delete. - It is the equivalent of the "del acl" command from the stats socket, but can - be triggered by an HTTP request or response. + Custom log format rules of section 8.2.6, to collect content of the entry to + delete. It is the equivalent of the "del acl" command from the stats socket, + but can be triggered by an HTTP request or response. del-header <name> [ -m <meth> ] @@ -13990,17 +14463,17 @@ del-header <name> [ -m <meth> ] method is used. -del-map(<file-name>) <key fmt> +del-map(<map-name>) <key fmt> Usable in: TCP RqCon| RqSes| RqCnt| RsCnt| HTTP Req| Res| Aft - | - | - | - | X | X | X - This is used to delete an entry from a MAP. The MAP must be loaded from a - file (even a dummy empty file). The file name of the MAP to be updated is - passed between parentheses. It takes one argument: <key fmt>, which follows - log-format rules, to collect content of the entry to delete. - It takes one argument: "file name" It is the equivalent of the "del map" - command from the stats socket, but can be triggered by an HTTP request or - response. + This is used to delete an entry from a MAP. <map-name> must follow the format + described in 2.7. about name format for maps and ACLs. The name of the MAP to + be updated is passed between parentheses. It takes one argument: <key fmt>, + which follows Custom log format rules of section 8.2.6, to collect content of + the entry to delete. It takes one argument: "file name" It is the equivalent + of the "del map" command from the stats socket, but can be triggered by an + HTTP request or response. deny [ { status | deny_status } <code> ] [ content-type <type> ] @@ -14095,10 +14568,10 @@ early-hint <name> <fmt> This is used to build an HTTP 103 Early Hints response prior to any other one. This appends an HTTP header field to this response whose name is specified in - <name> and whose value is defined by <fmt> which follows the log-format rules - (see Custom Log Format in section 8.2.4). This is particularly useful to pass - to the client some Link headers to preload resources required to render the - HTML documents. + <name> and whose value is defined by <fmt> which follows the Custom Log + Format rules (see Custom log format in section 8.2.6). This is particularly + useful to pass to the client some Link headers to preload resources required + to render the HTML documents. See RFC 8297 for more information. @@ -14285,7 +14758,7 @@ redirect <rule> This performs an HTTP redirection based on a redirect rule. This is exactly the same as the "redirect" statement except that it inserts a redirect rule which is processed in the middle of other "http-request" or "http-response" - rules and that these rules use the "log-format" strings. For responses, only + rules and that these rules use the Custom log format. For responses, only the "location" type of redirect is permitted. In addition, when a redirect is performed during a response, the transfer from the server to HAProxy is interrupted so that no payload can be forwarded to the client. This may cause @@ -14511,19 +14984,20 @@ return [ status <code> ] [ content-type <type> ] used as the response payload. If the file is not empty, its content-type must be set as argument to "content-type". Otherwise, any "content-type" argument is ignored. With a "lf-file" argument, the file's content is - evaluated as a log-format string. With a "file" argument, it is considered - as a raw content. + evaluated as a Custom log format (see section 8.2.6). With a "file" + argument, it is considered as a raw content. * If a "string" or "lf-string" argument is specified, the defined string is used as the response payload. The content-type must always be set as argument to "content-type". With a "lf-string" argument, the string is - evaluated as a log-format string. With a "string" argument, it is - considered as a raw string. + evaluated as a Custom log format (see section 8.2.6). With a "string" + argument, it is considered as a raw string. When the response is not based on an errorfile, it is possible to append HTTP header fields to the response using "hdr" arguments. Otherwise, all "hdr" arguments are ignored. For each one, the header name is specified in <name> - and its value is defined by <fmt> which follows the log-format rules. + and its value is defined by <fmt> which follows the Custom log format rules + described in section 8.2.6. Note that the generated response must be smaller than a buffer. And to avoid any warning, when an errorfile or a raw file is loaded, the buffer space @@ -14674,6 +15148,42 @@ set-bandwidth-limit <name> [limit {<expr> | <size>}] [period {<expr> | <time>}] See section 9.7 about bandwidth limitation filter setup. +set-bc-mark { <mark> | <expr> } + Usable in: TCP RqCon| RqSes| RqCnt| RsCnt| HTTP Req| Res| Aft + - | - | X | - | X | - | - + + This is used to set the Netfilter/IPFW MARK on the backend connection (all + packets sent to the server) to the value passed in <mark> or <expr> on + platforms which support it. This value is an unsigned 32 bit value which can + be matched by netfilter/ipfw and by the routing table or monitoring the + packets through DTrace. <mark> can be expressed both in decimal or hexadecimal + format (prefixed by "0x"). Alternatively, <expr> can be used: it is a standard + HAProxy expression formed by a sample-fetch followed by some converters which + must resolve to integer type. This action can be useful to force certain + packets to take a different route (for example a cheaper network path for bulk + downloads). This works on Linux kernels 2.6.32 and above and requires admin + privileges, as well on FreeBSD and OpenBSD. The mark will be set for the whole + duration of the backend/server connection (from connect to close). + + +set-bc-tos { <tos> | <expr> } + Usable in: TCP RqCon| RqSes| RqCnt| RsCnt| HTTP Req| Res| Aft + - | - | X | - | X | - | - + + This is used to set the TOS or DSCP field value on the backend connection + (all packets sent to the server) to the value passed in <tos> or <expr> on + platforms which support this. This value represents the whole 8 bits of the + IP TOS field. Note that only the 6 higher bits are used in DSCP or TOS, and + the two lower bits are always 0. Alternatively, <expr> can be used: it is a + standard HAProxy expression formed by a sample-fetch followed by some + converters which must resolve to integer type. This action can be used to + adjust some routing behavior on inner routers based on some information from + the request. The tos will be set for the whole duration of the backend/server + connection (from connect to close). + + See RFC 2474, 2597, 3260 and 4594 for more information. + + set-dst <expr> Usable in: TCP RqCon| RqSes| RqCnt| RsCnt| HTTP Req| Res| Aft X | X | X | - | X | - | - @@ -14717,6 +15227,39 @@ set-dst-port <expr> destination address to IPv4 "0.0.0.0" before rewriting the port. +set-fc-mark { <mark> | <expr> } + Usable in: TCP RqCon| RqSes| RqCnt| RsCnt| HTTP Req| Res| Aft + X | X | X | X | X | X | - + + This is used to set the Netfilter/IPFW MARK on all packets sent to the client + to the value passed in <mark> or <expr> on platforms which support it. This + value is an unsigned 32 bit value which can be matched by netfilter/ipfw and + by the routing table or monitoring the packets through DTrace. <mark> can be + expressed both in decimal or hexadecimal format (prefixed by "0x"). + Alternatively, <expr> can be used: it is a standard HAProxy expression formed + by a sample-fetch followed by some converters which must resolve to integer + type. This action can be useful to force certain packets to take a different + route (for example a cheaper network path for bulk downloads). This works on + Linux kernels 2.6.32 and above and requires admin privileges, as well on + FreeBSD and OpenBSD. + + +set-fc-tos { <tos | <expr> } + Usable in: TCP RqCon| RqSes| RqCnt| RsCnt| HTTP Req| Res| Aft + X | X | X | X | X | X | - + + This is used to set the TOS or DSCP field value of packets sent to the client + to the value passed in <tos> or <expr> on platforms which support this. This + value represents the whole 8 bits of the IP TOS field. Note that only the 6 + higher bits are used in DSCP or TOS, and the two lower bits are always 0. + Alternatively, <expr> can be used: it is a standard HAProxy expression formed + by a sample-fetch followed by some converters which must resolve to integer + type. This action can be used to adjust some routing behavior on border + routers based on some information from the request. + + See RFC 2474, 2597, 3260 and 4594 for more information. + + set-header <name> <fmt> Usable in: TCP RqCon| RqSes| RqCnt| RsCnt| HTTP Req| Res| Aft - | - | - | - | X | X | X @@ -14751,33 +15294,23 @@ set-log-level <level> can be useful to disable health checks coming from another equipment. -set-map(<file-name>) <key fmt> <value fmt> +set-map(<map-name>) <key fmt> <value fmt> Usable in: TCP RqCon| RqSes| RqCnt| RsCnt| HTTP Req| Res| Aft - | - | - | - | X | X | X - This is used to add a new entry into a map. The map must be loaded from a - file (even a dummy empty file). The file name of the map to be updated is - passed between parentheses. It takes 2 arguments: <key fmt>, which follows - log-format rules, used to collect map key, and <value fmt>, which follows - log-format rules, used to collect content for the new entry. - It performs a lookup in the map before insertion, to avoid duplicated (or - more) values. It is the equivalent of the "set map" command from the - stats socket, but can be triggered by an HTTP request. + This is used to add a new entry into a map. <map-name> must follow the format + described in 2.7. about name format for maps and ACLs. The name of the MAP to + be updated is passed between parentheses. It takes 2 arguments: <key fmt>, + which follows Custom log format rules described in section 8.2.6, used to + collect map key, and <value fmt>, which follows Custom log format rules, used + to collect content for the new entry. It performs a lookup in the map before + insertion, to avoid duplicated (or more) values. It is the equivalent of the + "set map" command from the stats socket, but can be triggered by an HTTP + request. -set-mark <mark> - Usable in: TCP RqCon| RqSes| RqCnt| RsCnt| HTTP Req| Res| Aft - X | X | X | X | X | X | - - - This is used to set the Netfilter/IPFW MARK on all packets sent to the client - to the value passed in <mark> on platforms which support it. This value is an - unsigned 32 bit value which can be matched by netfilter/ipfw and by the - routing table or monitoring the packets through DTrace. It can be expressed - both in decimal or hexadecimal format (prefixed by "0x"). - This can be useful to force certain packets to take a different route (for - example a cheaper network path for bulk downloads). This works on Linux - kernels 2.6.32 and above and requires admin privileges, as well on FreeBSD - and OpenBSD. +set-mark <mark> (deprecated) + This is an alias for "set-fc-mark" (which should be used instead). set-method <fmt> @@ -14964,19 +15497,8 @@ set-timeout { client | server | tunnel } { <timeout> | <expr> } http-response set-timeout server res.hdr(X-Refresh-Seconds),mul(1000) -set-tos <tos> - Usable in: TCP RqCon| RqSes| RqCnt| RsCnt| HTTP Req| Res| Aft - X | X | X | X | X | X | - - - This is used to set the TOS or DSCP field value of packets sent to the client - to the value passed in <tos> on platforms which support this. This value - represents the whole 8 bits of the IP TOS field, and can be expressed both in - decimal or hexadecimal format (prefixed by "0x"). Note that only the 6 higher - bits are used in DSCP or TOS, and the two lower bits are always 0. This can - be used to adjust some routing behavior on border routers based on some - information from the request. - - See RFC 2474, 2597, 3260 and 4594 for more information. +set-tos <tos> (deprecated) + This is an alias for "set-fc-tos" (which should be used instead). set-uri <fmt> @@ -15024,8 +15546,8 @@ set-var-fmt(<var-name>[,<cond>...]) <fmt> <expr> Is a standard HAProxy expression formed by a sample-fetch followed by some converters. - <fmt> This is the value expressed using log-format rules (see Custom - Log Format in section 8.2.4). + <fmt> This is the value expressed using Custom log format rules (see + Custom log format in section 8.2.6). All scopes are usable for HTTP rules, but scopes "proc" and "sess" are the only usable ones in rule sets which do not have access to contents such as @@ -15058,7 +15580,7 @@ silent-drop [ rst-ttl <ttl> ] the RST packet travels through the local infrastructure, deleting the connection in firewalls and other systems, but disappears before reaching the client. Future packets from the client will then be dropped already by - front equipments. These local RSTs protect local resources, but not the + front equipment. These local RSTs protect local resources, but not the client's. This must not be used unless the consequences of doing this are fully understood. @@ -15152,7 +15674,7 @@ track-sc2 <key> [table <table>] <key> is mandatory, and is a sample expression rule as described in section 7.3. It describes what elements of the incoming connection, - request or reponse will be analyzed, extracted, combined, and used + request or response will be analyzed, extracted, combined, and used to select which table entry to update the counters. <table> is an optional table to be used instead of the default one, which @@ -15224,7 +15746,7 @@ wait-for-body time <time> [ at-least <bytes> ] case HAProxy will respond with a 408 "Request Timeout" error to the client and stop processing the request. Note that if any of the other conditions happens first, this timeout will not occur even if the full body has - not yet been recieved. + not yet been received. This action may be used as a replacement for "option http-buffer-request". @@ -15480,7 +16002,13 @@ crt <cert> match any certificate, then the first loaded certificate will be presented. This means that when loading certificates from a directory, it is highly recommended to load the default one first as a file or to ensure that it will - always be the first one in the directory. + always be the first one in the directory. In order to chose multiple default + certificates (1 rsa and 1 ecdsa), there are 3 options: + - A multi-cert bundle can be configured as the first certificate + (`crt foobar.pem` in the configuration where the existing files + are `foobar.pem.ecdsa` and `foobar.pem.rsa`. + - Or a '*' filter for each certificate in a crt-list line. + - The 'default-crt' keyword can be used. Note that the same cert may be loaded multiple times without side effects. @@ -15538,11 +16066,38 @@ crt-list <file> <crtfile> [\[<sslbindconf> ...\]] [[!]<snifilter> ...] - sslbindconf supports "allow-0rtt", "alpn", "ca-file", "ca-verify-file", - "ciphers", "ciphersuites", "crl-file", "curves", "ecdhe", "no-ca-names", - "npn", "verify" configuration. With BoringSSL and Openssl >= 1.1.1 - "ssl-min-ver" and "ssl-max-ver" are also supported. It overrides the - configuration set in bind line for the certificate. + sslbindconf supports the following keywords from the bind line + (see Section 5.1. Bind options): + + - allow-0rtt + - alpn + - ca-file + - ca-verify-file + - ciphers + - ciphersuites + - client-sigalgs + - crl-file + - curves + - ecdhe + - no-alpn + - no-ca-names + - npn + - sigalgs + - ssl-min-ver + - ssl-max-ver + - verify + + sslbindconf also supports the following keywords from the crt-store load + keyword (see Section 3.12.1. Load options): + + - crt + - key + - ocsp + - issuer + - sctl + - ocsp-update + + It overrides the configuration set in bind line for the certificate. Wildcards are supported in the SNI filter. Negative filter are also supported, useful in combination with a wildcard filter to exclude a particular SNI, or @@ -15567,7 +16122,10 @@ crt-list <file> filter is found on any crt-list. The SNI filter !* can be used after the first declared certificate to not include its CN and SAN in the SNI tree, so it will never match except if no other certificate matches. This way the first - declared certificate act as a fallback. + declared certificate act as a fallback. It is also possible to declare a '*' + filter, which will allow to chose this certificate as default. When multiple + default certificates are defined, HAProxy is able to chose the right ECDSA or + RSA one depending on what the client supports. When no ALPN is set, the "bind" line's default one is used. If a "bind" line has no "no-alpn", "alpn" nor "npn" set, a default value will be used @@ -15581,6 +16139,25 @@ crt-list <file> cert2.pem [alpn h2,http/1.1] certW.pem *.domain.tld !secure.domain.tld certS.pem [curves X25519:P-256 ciphers ECDHE-ECDSA-AES256-GCM-SHA384] secure.domain.tld + default.pem.rsa * + default.pem.ecdsa * + +default-crt <cert> + This option does the same as the "crt" option, with the difference that this + certificate will be used as a default one. It is possible to add multiple + default certificates to have an ECDSA and an RSA one, having more is not + really useful. + + A default certificate is used when no "strict-sni" option is used on the bind + line. A default certificate is provided when the servername extension was not + used by the client, or when the servername does not match any configured + certificate. + + Example: + + bind *:443 default-crt foobar.pem.rsa default-crt foobar.pem.ecdsa crt website.pem.rsa + + See also the "crt" keyword. defer-accept Is an optional keyword which is supported only on certain Linux kernels. It @@ -15660,6 +16237,12 @@ group <group> "gid" setting except that the group name is used instead of its gid. This setting is ignored by non UNIX sockets. +guid-prefix <string> + Generate case-sensitive global unique IDs for each listening sockets + allocated on this bind line. Prefix will be concatenated to listeners + position index on the current bind line, with character '-' as separator. See + "guid" proxy keyword description for more information on its format. + id <id> Fixes the socket ID. By default, socket IDs are automatically assigned, but sometimes it is more convenient to fix them to ease monitoring. This value @@ -15843,87 +16426,6 @@ npn <protocols> at the time of writing this. It is possible to enable both NPN and ALPN though it probably doesn't make any sense out of testing. -ocsp-update [ off | on ] (crt-list only) - Enable automatic OCSP response update when set to 'on', disable it otherwise. - Its value defaults to 'off'. - Please note that for now, this option can only be used in a crt-list line, it - cannot be used directly on a bind line. It lies in this "Bind options" - section because it is still a frontend option. This limitation was set so - that the option applies to only one certificate at a time. - If a given certificate is used in multiple crt-lists with different values of - the 'ocsp-update' set, an error will be raised. Here is an example - configuration enabling it: - - haproxy.cfg: - frontend fe - bind :443 ssl crt-list haproxy.list - - haproxy.list: - server_cert.pem [ocsp-update on] foo.bar - - When the option is set to 'on', we will try to get an ocsp response whenever - an ocsp uri is found in the frontend's certificate. The only limitation of - this mode is that the certificate's issuer will have to be known in order for - the OCSP certid to be built. - Each OCSP response will be updated at least once an hour, and even more - frequently if a given OCSP response has an expire date earlier than this one - hour limit. A minimum update interval of 5 minutes will still exist in order - to avoid updating too often responses that have a really short expire time or - even no 'Next Update' at all. Because of this hard limit, please note that - when auto update is set to 'on' or 'auto', any OCSP response loaded during - init will not be updated until at least 5 minutes, even if its expire time - ends before now+5m. This should not be too much of a hassle since an OCSP - response must be valid when it gets loaded during init (its expire time must - be in the future) so it is unlikely that this response expires in such a - short time after init. - On the other hand, if a certificate has an OCSP uri specified and no OCSP - response, setting this option to 'on' for the given certificate will ensure - that the OCSP response gets fetched automatically right after init. - The default minimum and maximum delays (5 minutes and 1 hour respectively) - can be configured by the "tune.ssl.ocsp-update.maxdelay" and - "tune.ssl.ocsp-update.mindelay" global options. - - Whenever an OCSP response is updated by the auto update task or following a - call to the "update ssl ocsp-response" CLI command, a dedicated log line is - emitted. It follows a dedicated log-format that contains the following header - "%ci:%cp [%tr] %ft" and is followed by specific OCSP-related information: - - the path of the corresponding frontend certificate - - a numerical update status - - a textual update status - - the number of update failures for the given response - - the number of update successes for the givan response - See "show ssl ocsp-updates" CLI command for a full list of error codes and - error messages. This line is emitted regardless of the success or failure of - the concerned OCSP response update. - The OCSP request/response is sent and received through an http_client - instance that has the dontlog-normal option set and that uses the regular - HTTP log format in case of error (unreachable OCSP responder for instance). - If such an error occurs, another log line that contains HTTP-related - information will then be emitted alongside the "regular" OCSP one (which will - likely have "HTTP error" as text status). But if a purely HTTP error happens - (unreachable OCSP responder for instance), an extra log line that follows the - regular HTTP log-format will be emitted. - Here are two examples of such log lines, with a successful OCSP update log - line first and then an example of an HTTP error with the two different lines - (lines were spit and the URL was shortened for readability): - <134>Mar 6 11:16:53 haproxy[14872]: -:- [06/Mar/2023:11:16:52.808] \ - <OCSP-UPDATE> /path_to_cert/foo.pem 1 "Update successful" 0 1 - - <134>Mar 6 11:18:55 haproxy[14872]: -:- [06/Mar/2023:11:18:54.207] \ - <OCSP-UPDATE> /path_to_cert/bar.pem 2 "HTTP error" 1 0 - <134>Mar 6 11:18:55 haproxy[14872]: -:- [06/Mar/2023:11:18:52.200] \ - <OCSP-UPDATE> -/- 2/0/-1/-1/3009 503 217 - - SC-- 0/0/0/0/3 0/0 {} \ - "GET http://127.0.0.1:12345/MEMwQT HTTP/1.1" - - Troubleshooting: - A common error that can happen with let's encrypt certificates is if the DNS - resolution provides an IPv6 address and your system does not have a valid - outgoing IPv6 route. In such a case, you can either create the appropriate - route or set the "httpclient.resolvers.prefer ipv4" option in the global - section. - In case of "OCSP response check failure" error, you might want to check that - the issuer certificate that you provided is valid. - prefer-client-ciphers Use the client's preference when selecting the cipher suite, by default the server's preference is enforced. This option is also available on @@ -16053,9 +16555,9 @@ ssl-min-ver [ SSLv3 | TLSv1.0 | TLSv1.1 | TLSv1.2 | TLSv1.3 ] strict-sni This setting is only available when support for OpenSSL was built in. The - SSL/TLS negotiation is allow only if the client provided an SNI which match + SSL/TLS negotiation is allowed only if the client provided an SNI that matches a certificate. The default certificate is not used. This option also allows - to start without any certificate on a bind line, so an empty directory could + starting without any certificate on a bind line, so an empty directory could be used and filled later from the stats socket. See the "crt" option for more information. See "add ssl crt-list" command in the management guide. @@ -16144,7 +16646,7 @@ thread [<thread-group>/]<thread-set>[,...] lines and their assignment to multiple groups of threads. This keyword is compatible with reverse HTTP binds. However, it is forbidden - to specify a thread set which spans accross several thread groups for such a + to specify a thread set which spans across several thread groups for such a listener as this may caused "nbconn" to not work as intended. tls-ticket-keys <keyfile> @@ -16233,6 +16735,8 @@ keywords, except "id" which is only supported by "server". The currently supported settings are the following ones. addr <ipv4|ipv6> + May be used in the following contexts: tcp, http, log + Using the "addr" parameter, it becomes possible to use a different IP address to send health-checks or to probe the agent-check. On some servers, it may be desirable to dedicate an IP address to specific component able to perform @@ -16241,6 +16745,8 @@ addr <ipv4|ipv6> "port" parameter. agent-check + May be used in the following contexts: tcp, http, log + Enable an auxiliary agent check which is run independently of a regular health check. An agent health check is performed by making a TCP connection to the port set by the "agent-port" parameter and reading an ASCII string @@ -16302,6 +16808,8 @@ agent-check and "no-agent-check" parameters. agent-send <string> + May be used in the following contexts: tcp, http, log + If this option is specified, HAProxy will send the given string (verbatim) to the agent server upon connection. You could, for example, encode the backend name into this string, which would enable your agent to send @@ -16309,6 +16817,8 @@ agent-send <string> you want to terminate your request with a newline. agent-inter <delay> + May be used in the following contexts: tcp, http, log + The "agent-inter" parameter sets the interval between two agent checks to <delay> milliseconds. If left unspecified, the delay defaults to 2000 ms. @@ -16325,6 +16835,8 @@ agent-inter <delay> See also the "agent-check" and "agent-port" parameters. agent-addr <addr> + May be used in the following contexts: tcp, http, log + The "agent-addr" parameter sets address for agent check. You can offload agent-check to another target, so you can make single place @@ -16333,16 +16845,22 @@ agent-addr <addr> hostname, it will be resolved. agent-port <port> + May be used in the following contexts: tcp, http, log + The "agent-port" parameter sets the TCP port used for agent checks. See also the "agent-check" and "agent-inter" parameters. allow-0rtt + May be used in the following contexts: tcp, http, log, peers, ring + Allow sending early data to the server when using TLS 1.3. Note that early data will be sent only if the client used early data, or if the backend uses "retry-on" with the "0rtt-rejected" keyword. alpn <protocols> + May be used in the following contexts: tcp, http + This enables the TLS ALPN extension and advertises the specified protocol list as supported on top of ALPN. The protocol list consists in a comma- delimited list of protocol names, for instance: "http/1.1,http/1.0" (without @@ -16359,6 +16877,8 @@ alpn <protocols> See also "ws" to use an alternative ALPN for websocket streams. backup + May be used in the following contexts: tcp, http, log + When "backup" is present on a server line, the server is only used in load balancing when all other non-backup servers are unavailable. Requests coming with a persistence cookie referencing the server will always be served @@ -16367,6 +16887,8 @@ backup "allbackups" options. ca-file <cafile> + May be used in the following contexts: tcp, http, log, peers, ring + This setting is only available when support for OpenSSL was built in. It designates a PEM file from which to load CA certificates used to verify server's certificate. It is possible to load a directory containing multiple @@ -16378,6 +16900,8 @@ ca-file <cafile> overwritten by setting the SSL_CERT_DIR environment variable. check + May be used in the following contexts: tcp, http, log + This option enables health checks on a server: - when not set, no health checking is performed, and the server is always considered available. @@ -16435,6 +16959,8 @@ check server s1 192.168.0.1:443 ssl check check-send-proxy + May be used in the following contexts: tcp, http + This option forces emission of a PROXY protocol line with outgoing health checks, regardless of whether the server uses send-proxy or not for the normal traffic. By default, the PROXY protocol is enabled for health checks @@ -16444,11 +16970,15 @@ check-send-proxy protocol. See also the "send-proxy" option for more information. check-alpn <protocols> + May be used in the following contexts: tcp, http + Defines which protocols to advertise with ALPN. The protocol list consists in a comma-delimited list of protocol names, for instance: "http/1.1,http/1.0" (without quotes). If it is not set, the server ALPN is used. check-proto <name> + May be used in the following contexts: tcp, http + Forces the multiplexer's protocol to use for the server's health-check connections. It must be compatible with the health-check type (TCP or HTTP). It must also be usable on the backend side. The list of available @@ -16472,11 +17002,15 @@ check-proto <name> If not defined, the server one will be used, if set. check-sni <sni> + May be used in the following contexts: tcp, http, log + This option allows you to specify the SNI to be used when doing health checks over SSL. It is only possible to use a string to set <sni>. If you want to set a SNI for proxied traffic, see "sni". check-ssl + May be used in the following contexts: tcp, http, log + This option forces encryption of all health checks over SSL, regardless of whether the server uses SSL or not for the normal traffic. This is generally used when an explicit "port" or "addr" directive is specified and SSL health @@ -16489,11 +17023,15 @@ check-ssl this option. check-via-socks4 + May be used in the following contexts: tcp, http, log + This option enables outgoing health checks using upstream socks4 proxy. By default, the health checks won't go through socks tunnel even it was enabled for normal traffic. ciphers <ciphers> + May be used in the following contexts: tcp, http, log, peers, ring + This setting is only available when support for OpenSSL was built in. This option sets the string describing the list of cipher algorithms that is negotiated during the SSL/TLS handshake with the server. The format of the @@ -16504,6 +17042,8 @@ ciphers <ciphers> cipher configuration, please check the "ciphersuites" keyword. ciphersuites <ciphersuites> + May be used in the following contexts: tcp, http, log, peers, ring + This setting is only available when support for OpenSSL was built in and OpenSSL 1.1.1 or later was used to build HAProxy. This option sets the string describing the list of cipher algorithms that is negotiated during the TLS @@ -16513,6 +17053,8 @@ ciphersuites <ciphersuites> keyword. client-sigalgs <sigalgs> + May be used in the following contexts: tcp, http, log, peers, ring + This setting is only available when support for OpenSSL was built in. It sets the string describing the list of signature algorithms related to client authentication that are negotiated . The format of the string is defined in @@ -16520,6 +17062,8 @@ client-sigalgs <sigalgs> recommended to use this setting if no specific usecase was identified. cookie <value> + May be used in the following contexts: http + The "cookie" parameter sets the cookie value assigned to the server to <value>. This value will be checked in incoming requests, and the first operational server possessing the same value will be selected. In return, in @@ -16529,11 +17073,15 @@ cookie <value> backup servers. See also the "cookie" keyword in backend section. crl-file <crlfile> + May be used in the following contexts: tcp, http, log, peers, ring + This setting is only available when support for OpenSSL was built in. It designates a PEM file from which to load certificate revocation list used to verify server's certificate. crt <cert> + May be used in the following contexts: tcp, http, log, peers, ring + This setting is only available when support for OpenSSL was built in. It designates a PEM file from which to load both a certificate and the associated private key. This file can be built by concatenating both PEM @@ -16545,6 +17093,8 @@ crt <cert> option is set accordingly). curves <curves> + May be used in the following contexts: tcp, http, log, peers, ring + This setting is only available when support for OpenSSL was built in. It sets the string describing the list of elliptic curves algorithms ("curve suite") that are negotiated during the SSL/TLS handshake with ECDHE. The format of the @@ -16552,6 +17102,8 @@ curves <curves> Example: "X25519:P-256" (without quote) disabled + May be used in the following contexts: tcp, http, log + The "disabled" keyword starts the server in the "disabled" state. That means that it is marked down in maintenance mode, and no connection other than the ones allowed by persist mode will reach it. It is very well suited to setup @@ -16560,6 +17112,8 @@ disabled See also "enabled" setting. enabled + May be used in the following contexts: tcp, http, log + This option may be used as 'server' setting to reset any 'disabled' setting which would have been inherited from 'default-server' directive as default value. @@ -16567,6 +17121,8 @@ enabled 'default-server' 'disabled' setting. error-limit <count> + May be used in the following contexts: tcp, http, log + If health observing is enabled, the "error-limit" parameter specifies the number of consecutive errors that triggers event selected by the "on-error" option. By default it is set to 10 consecutive errors. @@ -16574,42 +17130,63 @@ error-limit <count> See also the "check", "error-limit" and "on-error". fall <count> + May be used in the following contexts: tcp, http, log + The "fall" parameter states that a server will be considered as dead after <count> consecutive unsuccessful health checks. This value defaults to 3 if unspecified. See also the "check", "inter" and "rise" parameters. force-sslv3 + May be used in the following contexts: tcp, http, log, peers, ring + This option enforces use of SSLv3 only when SSL is used to communicate with the server. SSLv3 is generally less expensive than the TLS counterparts for high connection rates. This option is also available on global statement "ssl-default-server-options". See also "ssl-min-ver" and ssl-max-ver". force-tlsv10 + May be used in the following contexts: tcp, http, log, peers, ring + This option enforces use of TLSv1.0 only when SSL is used to communicate with the server. This option is also available on global statement "ssl-default-server-options". See also "ssl-min-ver" and ssl-max-ver". force-tlsv11 + May be used in the following contexts: tcp, http, log, peers, ring + This option enforces use of TLSv1.1 only when SSL is used to communicate with the server. This option is also available on global statement "ssl-default-server-options". See also "ssl-min-ver" and ssl-max-ver". force-tlsv12 + May be used in the following contexts: tcp, http, log, peers, ring + This option enforces use of TLSv1.2 only when SSL is used to communicate with the server. This option is also available on global statement "ssl-default-server-options". See also "ssl-min-ver" and ssl-max-ver". force-tlsv13 + May be used in the following contexts: tcp, http, log, peers, ring + This option enforces use of TLSv1.3 only when SSL is used to communicate with the server. This option is also available on global statement "ssl-default-server-options". See also "ssl-min-ver" and ssl-max-ver". +guid <string> + Specify a case-sensitive global unique ID for this server. This must be + unique across all haproxy configuration on every object types. See "guid" + proxy keyword description for more information on its format. + id <value> + May be used in the following contexts: tcp, http, log + Set a persistent ID for the server. This ID must be positive and unique for the proxy. An unused ID will automatically be assigned if unset. The first assigned value will be 1. This ID is currently only returned in statistics. init-addr {last | libc | none | <ip>},[...]* + May be used in the following contexts: tcp, http, log + Indicate in what order the server's address should be resolved upon startup if it uses an FQDN. Attempts are made to resolve the address by applying in turn each of the methods mentioned in the comma-delimited list. The first @@ -16639,6 +17216,8 @@ init-addr {last | libc | none | <ip>},[...]* inter <delay> fastinter <delay> downinter <delay> + May be used in the following contexts: tcp, http, log + The "inter" parameter sets the interval between two consecutive health checks to <delay> milliseconds. If left unspecified, the delay defaults to 2000 ms. It is also possible to use "fastinter" and "downinter" to optimize delays @@ -16674,6 +17253,8 @@ downinter <delay> reduce the time spent in the queue. log-bufsize <bufsize> + May be used in the following contexts: log + The "log-bufsize" specifies the ring bufsize to use for the implicit ring that will be associated to the log server in a log backend. When not specified, this defaults to BUFSIZE. Use of a greater value will increase @@ -16682,12 +17263,16 @@ log-bufsize <bufsize> This keyword may only be used in log backend sections (with "mode log") log-proto <logproto> + May be used in the following contexts: log, ring + The "log-proto" specifies the protocol used to forward event messages to a server configured in a log or ring section. Possible values are "legacy" and "octet-count" corresponding respectively to "Non-transparent-framing" and "Octet counting" in rfc6587. "legacy" is the default. maxconn <maxconn> + May be used in the following contexts: tcp, http + The "maxconn" parameter specifies the maximal number of concurrent connections that will be sent to this server. If the number of incoming concurrent connections goes higher than this value, they will be queued, @@ -16704,6 +17289,8 @@ maxconn <maxconn> than 50 concurrent requests. maxqueue <maxqueue> + May be used in the following contexts: tcp, http + The "maxqueue" parameter specifies the maximal number of connections which will wait in the queue for this server. If this limit is reached, next requests will be redispatched to other servers instead of indefinitely @@ -16717,6 +17304,8 @@ maxqueue <maxqueue> and "balance leastconn". max-reuse <count> + May be used in the following contexts: http + The "max-reuse" argument indicates the HTTP connection processors that they should not reuse a server connection more than this number of times to send new requests. Permitted values are -1 (the default), which disables this @@ -16727,6 +17316,8 @@ max-reuse <count> enforce. At least HTTP/2 connections to servers will respect it. minconn <minconn> + May be used in the following contexts: tcp, http + When the "minconn" parameter is set, the maxconn limit becomes a dynamic limit following the backend's load. The server will always accept at least <minconn> connections, never more than <maxconn>, and the limit will be on @@ -16737,12 +17328,16 @@ minconn <minconn> and "maxqueue" parameters, as well as the "fullconn" backend keyword. namespace <name> + May be used in the following contexts: tcp, http, log, peers, ring + On Linux, it is possible to specify which network namespace a socket will belong to. This directive makes it possible to explicitly bind a server to a namespace different from the default one. Please refer to your operating system's documentation to find more details about network namespaces. no-agent-check + May be used in the following contexts: tcp, http, log + This option may be used as "server" setting to reset any "agent-check" setting which would have been inherited from "default-server" directive as default value. @@ -16750,6 +17345,8 @@ no-agent-check "default-server" "agent-check" setting. no-backup + May be used in the following contexts: tcp, http, log + This option may be used as "server" setting to reset any "backup" setting which would have been inherited from "default-server" directive as default value. @@ -16757,6 +17354,8 @@ no-backup "default-server" "backup" setting. no-check + May be used in the following contexts: tcp, http, log + This option may be used as "server" setting to reset any "check" setting which would have been inherited from "default-server" directive as default value. @@ -16764,6 +17363,8 @@ no-check "default-server" "check" setting. no-check-ssl + May be used in the following contexts: tcp, http, log + This option may be used as "server" setting to reset any "check-ssl" setting which would have been inherited from "default-server" directive as default value. @@ -16771,6 +17372,8 @@ no-check-ssl "default-server" "check-ssl" setting. no-send-proxy + May be used in the following contexts: tcp, http + This option may be used as "server" setting to reset any "send-proxy" setting which would have been inherited from "default-server" directive as default value. @@ -16778,6 +17381,8 @@ no-send-proxy "default-server" "send-proxy" setting. no-send-proxy-v2 + May be used in the following contexts: tcp, http + This option may be used as "server" setting to reset any "send-proxy-v2" setting which would have been inherited from "default-server" directive as default value. @@ -16785,6 +17390,8 @@ no-send-proxy-v2 "default-server" "send-proxy-v2" setting. no-send-proxy-v2-ssl + May be used in the following contexts: tcp, http + This option may be used as "server" setting to reset any "send-proxy-v2-ssl" setting which would have been inherited from "default-server" directive as default value. @@ -16792,6 +17399,8 @@ no-send-proxy-v2-ssl "default-server" "send-proxy-v2-ssl" setting. no-send-proxy-v2-ssl-cn + May be used in the following contexts: tcp, http + This option may be used as "server" setting to reset any "send-proxy-v2-ssl-cn" setting which would have been inherited from "default-server" directive as default value. @@ -16799,6 +17408,8 @@ no-send-proxy-v2-ssl-cn "default-server" "send-proxy-v2-ssl-cn" setting. no-ssl + May be used in the following contexts: tcp, http, log, peers, ring + This option may be used as "server" setting to reset any "ssl" setting which would have been inherited from "default-server" directive as default value. @@ -16810,12 +17421,16 @@ no-ssl runtime API: see `set server` commands in management doc. no-ssl-reuse + May be used in the following contexts: tcp, http, log, peers, ring + This option disables SSL session reuse when SSL is used to communicate with the server. It will force the server to perform a full handshake for every new connection. It's probably only useful for benchmarking, troubleshooting, and for paranoid users. no-sslv3 + May be used in the following contexts: tcp, http, log, peers, ring + This option disables support for SSLv3 when SSL is used to communicate with the server. Note that SSLv2 is disabled in the code and cannot be enabled using any configuration option. Use "ssl-min-ver" and "ssl-max-ver" instead. @@ -16823,6 +17438,8 @@ no-sslv3 Supported in default-server: No no-tls-tickets + May be used in the following contexts: tcp, http, log, peers, ring + This setting is only available when support for OpenSSL was built in. It disables the stateless session resumption (RFC 5077 TLS Ticket extension) and force to use stateful session resumption. Stateless @@ -16834,6 +17451,8 @@ no-tls-tickets See also "tls-tickets". no-tlsv10 + May be used in the following contexts: tcp, http, log, peers, ring + This option disables support for TLSv1.0 when SSL is used to communicate with the server. Note that SSLv2 is disabled in the code and cannot be enabled using any configuration option. TLSv1 is more expensive than SSLv3 so it @@ -16844,6 +17463,8 @@ no-tlsv10 Supported in default-server: No no-tlsv11 + May be used in the following contexts: tcp, http, log, peers, ring + This option disables support for TLSv1.1 when SSL is used to communicate with the server. Note that SSLv2 is disabled in the code and cannot be enabled using any configuration option. TLSv1 is more expensive than SSLv3 so it @@ -16854,6 +17475,8 @@ no-tlsv11 Supported in default-server: No no-tlsv12 + May be used in the following contexts: tcp, http, log, peers, ring + This option disables support for TLSv1.2 when SSL is used to communicate with the server. Note that SSLv2 is disabled in the code and cannot be enabled using any configuration option. TLSv1 is more expensive than SSLv3 so it @@ -16864,6 +17487,8 @@ no-tlsv12 Supported in default-server: No no-tlsv13 + May be used in the following contexts: tcp, http, log, peers, ring + This option disables support for TLSv1.3 when SSL is used to communicate with the server. Note that SSLv2 is disabled in the code and cannot be enabled using any configuration option. TLSv1 is more expensive than SSLv3 so it @@ -16874,6 +17499,8 @@ no-tlsv13 Supported in default-server: No no-verifyhost + May be used in the following contexts: tcp, http, log, peers, ring + This option may be used as "server" setting to reset any "verifyhost" setting which would have been inherited from "default-server" directive as default value. @@ -16881,6 +17508,8 @@ no-verifyhost "default-server" "verifyhost" setting. no-tfo + May be used in the following contexts: tcp, http, log, peers, ring + This option may be used as "server" setting to reset any "tfo" setting which would have been inherited from "default-server" directive as default value. @@ -16888,11 +17517,15 @@ no-tfo "default-server" "tfo" setting. non-stick + May be used in the following contexts: tcp, http + Never add connections allocated to this sever to a stick-table. This may be used in conjunction with backup to ensure that stick-table persistence is disabled for backup servers. npn <protocols> + May be used in the following contexts: tcp, http + This enables the NPN TLS extension and advertises the specified protocol list as supported on top of NPN. The protocol list consists in a comma-delimited list of protocol names, for instance: "http/1.1,http/1.0" (without quotes). @@ -16902,6 +17535,8 @@ npn <protocols> only available starting with OpenSSL 1.0.2. observe <mode> + May be used in the following contexts: tcp, http + This option enables health adjusting based on observing communication with the server. By default this functionality is disabled and enabling it also requires to enable health checks. There are two supported modes: "layer4" and @@ -16913,6 +17548,8 @@ observe <mode> See also the "check", "on-error" and "error-limit". on-error <mode> + May be used in the following contexts: tcp, http, log + Select what should happen when enough consecutive errors are detected. Currently, four modes are available: - fastinter: force fastinter @@ -16924,6 +17561,8 @@ on-error <mode> See also the "check", "observe" and "error-limit". on-marked-down <action> + May be used in the following contexts: tcp, http, log + Modify what occurs when a server is marked down. Currently one action is available: - shutdown-sessions: Shutdown peer streams. When this setting is enabled, @@ -16938,6 +17577,8 @@ on-marked-down <action> Actions are disabled by default on-marked-up <action> + May be used in the following contexts: tcp, http, log + Modify what occurs when a server is marked up. Currently one action is available: - shutdown-backup-sessions: Shutdown streams on all backup servers. This is @@ -16951,7 +17592,25 @@ on-marked-up <action> Actions are disabled by default +pool-conn-name <expr> + May be used in the following contexts: http + + When a backend connection is established, this expression is evaluated to + generate the connection name. This name is one of the key properties of the + connection in the idle server pool. See the "http-reuse" keyword. When a + request looks up an existing idle connection, this expression is evaluated to + match an identical connection. + + In context where SSL SNI is used for backend connection, the connection name + is automatically assigned to the result of the "sni" expression. This suits + the most common usage. For more advanced setup, "pool-conn-name" may be used + to override this. + + See also: "http-reuse", "sni" + pool-low-conn <max> + May be used in the following contexts: http + Set a low threshold on the number of idling connections for a server, below which a thread will not try to steal a connection from another thread. This can be useful to improve CPU usage patterns in scenarios involving many very @@ -16968,6 +17627,8 @@ pool-low-conn <max> connection reuse rate will decrease as thread count increases. pool-max-conn <max> + May be used in the following contexts: http + Set the maximum number of idling connections for a server. -1 means unlimited connections, 0 means no idle connections. The default is -1. When idle connections are enabled, orphaned idle connections which do not belong to any @@ -16976,11 +17637,15 @@ pool-max-conn <max> according to the same principles as those applying to "http-reuse". pool-purge-delay <delay> + May be used in the following contexts: http + Sets the delay to start purging idle connections. Each <delay> interval, half of the idle connections are closed. 0 means we don't keep any idle connection. The default is 5s. port <port> + May be used in the following contexts: tcp, http, log + Using the "port" parameter, it becomes possible to use a different port to send health-checks or to probe the agent-check. On some servers, it may be desirable to dedicate a port to a specific component able to perform complex @@ -16989,6 +17654,8 @@ port <port> ignored if the "check" parameter is not set. See also the "addr" parameter. proto <name> + May be used in the following contexts: tcp, http + Forces the multiplexer's protocol to use for the outgoing connections to this server. It must be compatible with the mode of the backend (TCP or HTTP). It must also be usable on the backend side. The list of available protocols is @@ -17013,6 +17680,8 @@ proto <name> See also "ws" to use an alternative protocol for websocket streams. redir <prefix> + May be used in the following contexts: http + The "redir" parameter enables the redirection mode for all GET and HEAD requests addressing this server. This means that instead of having HAProxy forward the request to the server, it will send an "HTTP 302" response with @@ -17031,11 +17700,15 @@ redir <prefix> Example : server srv1 192.168.1.1:80 redir http://image1.mydomain.com check rise <count> + May be used in the following contexts: tcp, http, log + The "rise" parameter states that a server will be considered as operational after <count> consecutive successful health checks. This value defaults to 2 if unspecified. See also the "check", "inter" and "fall" parameters. resolve-opts <option>,<option>,... + May be used in the following contexts: tcp, http, log + Comma separated list of options to apply to DNS resolution linked to this server. @@ -17075,6 +17748,8 @@ resolve-opts <option>,<option>,... Default value: not set resolve-prefer <family> + May be used in the following contexts: tcp, http, log + When DNS resolution is enabled for a server and multiple IP addresses from different families are returned, HAProxy will prefer using an IP address from the family mentioned in the "resolve-prefer" parameter. @@ -17087,6 +17762,8 @@ resolve-prefer <family> server s1 app1.domain.com:80 resolvers mydns resolve-prefer ipv6 resolve-net <network>[,<network[,...]] + May be used in the following contexts: tcp, http, log + This option prioritizes the choice of an ip address matching a network. This is useful with clouds to prefer a local ip. In some cases, a cloud high availability service can be announced with many ip addresses on many @@ -17099,6 +17776,8 @@ resolve-net <network>[,<network[,...]] server s1 app1.domain.com:80 resolvers mydns resolve-net 10.0.0.0/8 resolvers <id> + May be used in the following contexts: tcp, http, log + Points to an existing "resolvers" section to resolve current server's hostname. @@ -17109,6 +17788,8 @@ resolvers <id> See also section 5.3 send-proxy + May be used in the following contexts: tcp, http + The "send-proxy" parameter enforces use of the PROXY protocol over any connection established to this server. The PROXY protocol informs the other end about the layer 3/4 addresses of the incoming connection, so that it can @@ -17127,6 +17808,8 @@ send-proxy "accept-netscaler-cip" option of the "bind" keyword. send-proxy-v2 + May be used in the following contexts: tcp, http + The "send-proxy-v2" parameter enforces use of the PROXY protocol version 2 over any connection established to this server. The PROXY protocol informs the other end about the layer 3/4 addresses of the incoming connection, so @@ -17137,6 +17820,8 @@ send-proxy-v2 this section and send-proxy" option of the "bind" keyword. set-proxy-v2-tlv-fmt(<id>) <fmt> + May be used in the following contexts: tcp, http + The "set-proxy-v2-tlv-fmt" parameter is used to send arbitrary PROXY protocol version 2 TLVs. For the type (<id>) range of the defined TLV type please refer to section 2.2.8. of the proxy protocol specification. However, the value can @@ -17153,6 +17838,8 @@ set-proxy-v2-tlv-fmt(<id>) <fmt> of a newly created TLV that also has the type 0x20. proxy-v2-options <option>[,<option>]* + May be used in the following contexts: tcp, http + The "proxy-v2-options" parameter add options to send in PROXY protocol version 2 when "send-proxy-v2" is used. Options available are: @@ -17172,6 +17859,8 @@ proxy-v2-options <option>[,<option>]* within a Keep-Alive connection. send-proxy-v2-ssl + May be used in the following contexts: tcp, http + The "send-proxy-v2-ssl" parameter enforces use of the PROXY protocol version 2 over any connection established to this server. The PROXY protocol informs the other end about the layer 3/4 addresses of the incoming connection, so @@ -17183,6 +17872,8 @@ send-proxy-v2-ssl "send-proxy-v2" option of the "bind" keyword. send-proxy-v2-ssl-cn + May be used in the following contexts: tcp, http + The "send-proxy-v2-ssl" parameter enforces use of the PROXY protocol version 2 over any connection established to this server. The PROXY protocol informs the other end about the layer 3/4 addresses of the incoming connection, so @@ -17195,6 +17886,8 @@ send-proxy-v2-ssl-cn the "send-proxy-v2" option of the "bind" keyword. shard <shard> + May be used in the following contexts: peers + This parameter in used only in the context of stick-tables synchronisation with peers protocol. The "shard" parameter identifies the peers which will receive all the stick-table updates for keys with this shard as distribution @@ -17213,6 +17906,8 @@ shard <shard> peer D 127.0.0.1:40004 shard 3 sigalgs <sigalgs> + May be used in the following contexts: tcp, http, log, peers, ring + This setting is only available when support for OpenSSL was built in. It sets the string describing the list of signature algorithms that are negotiated during the TLSv1.2 and TLSv1.3 handshake. The format of the string is defined @@ -17221,6 +17916,8 @@ sigalgs <sigalgs> required. slowstart <start_time_in_ms> + May be used in the following contexts: tcp, http + The "slowstart" parameter for a server accepts a value in milliseconds which indicates after how long a server which has just come back up will run at full speed. Just as with every other time-based parameter, it can be entered @@ -17241,6 +17938,8 @@ slowstart <start_time_in_ms> seen as failed. sni <expression> + May be used in the following contexts: tcp, http, log, peers, ring + The "sni" parameter evaluates the sample fetch expression, converts it to a string and uses the result as the host name sent in the SNI TLS extension to the server. A typical use case is to send the SNI received from the client in @@ -17253,9 +17952,14 @@ sni <expression> "verify" directive for more details. If you want to set a SNI for health checks, see the "check-sni" directive for more details. + By default, the SNI is assigned to the connection name for "http-reuse", + unless overriden by the "pool-conn-name" server keyword. + source <addr>[:<pl>[-<ph>]] [usesrc { <addr2>[:<port2>] | client | clientip } ] source <addr>[:<port>] [usesrc { <addr2>[:<port2>] | hdr_ip(<hdr>[,<occ>]) } ] source <addr>[:<pl>[-<ph>]] [interface <name>] ... + May be used in the following contexts: tcp, http, log, peers, ring + The "source" parameter sets the source address which will be used when connecting to the server. It follows the exact same parameters and principle as the backend "source" keyword, except that it only applies to the server @@ -17273,6 +17977,8 @@ source <addr>[:<pl>[-<ph>]] [interface <name>] ... specifying the source address without port(s). ssl + May be used in the following contexts: tcp, http, log, peers, ring + This option enables SSL ciphering on outgoing connections to the server. It is critical to verify server certificates using "verify" when using SSL to connect to servers, otherwise the communication is prone to trivial man in @@ -17283,16 +17989,22 @@ ssl SSL health checks. ssl-max-ver [ SSLv3 | TLSv1.0 | TLSv1.1 | TLSv1.2 | TLSv1.3 ] + May be used in the following contexts: tcp, http, log, peers, ring + This option enforces use of <version> or lower when SSL is used to communicate with the server. This option is also available on global statement "ssl-default-server-options". See also "ssl-min-ver". ssl-min-ver [ SSLv3 | TLSv1.0 | TLSv1.1 | TLSv1.2 | TLSv1.3 ] + May be used in the following contexts: tcp, http, log, peers, ring + This option enforces use of <version> or upper when SSL is used to communicate with the server. This option is also available on global statement "ssl-default-server-options". See also "ssl-max-ver". ssl-reuse + May be used in the following contexts: tcp, http, log, peers, ring + This option may be used as "server" setting to reset any "no-ssl-reuse" setting which would have been inherited from "default-server" directive as default value. @@ -17300,6 +18012,8 @@ ssl-reuse "default-server" "no-ssl-reuse" setting. stick + May be used in the following contexts: tcp, http + This option may be used as "server" setting to reset any "non-stick" setting which would have been inherited from "default-server" directive as default value. @@ -17307,11 +18021,15 @@ stick "default-server" "non-stick" setting. socks4 <addr>:<port> + May be used in the following contexts: tcp, http, log, peers, ring + This option enables upstream socks4 tunnel for outgoing connections to the server. Using this option won't force the health check to go via socks4 by default. You will have to use the keyword "check-via-socks4" to enable it. tcp-ut <delay> + May be used in the following contexts: tcp, http, log, peers, ring + Sets the TCP User Timeout for all outgoing connections to this server. This option is available on Linux since version 2.6.37. It allows HAProxy to configure a timeout for sockets which contain data not receiving an @@ -17327,6 +18045,8 @@ tcp-ut <delay> regular TCP connections, and is ignored for other protocols. tfo + May be used in the following contexts: tcp, http, log, peers, ring + This option enables using TCP fast open when connecting to servers, on systems that support it (currently only the Linux kernel >= 4.11). See the "tfo" bind option for more information about TCP fast open. @@ -17335,6 +18055,8 @@ tfo won't be able to retry the connection on failure. See also "no-tfo". track [<backend>/]<server> + May be used in the following contexts: tcp, http, log + This option enables ability to set the current state of the server by tracking another one. It is possible to track a server which itself tracks another server, provided that at the end of the chain, a server has health checks @@ -17342,6 +18064,8 @@ track [<backend>/]<server> used, it has to be enabled on both proxies. tls-tickets + May be used in the following contexts: tcp, http, log, peers, ring + This option may be used as "server" setting to reset any "no-tls-tickets" setting which would have been inherited from "default-server" directive as default value. @@ -17352,6 +18076,8 @@ tls-tickets "default-server" "no-tls-tickets" setting. verify [none|required] + May be used in the following contexts: tcp, http, log, peers, ring + This setting is only available when support for OpenSSL was built in. If set to 'none', server certificate is not verified. In the other case, The certificate provided by the server is verified using CAs from 'ca-file' and @@ -17367,6 +18093,8 @@ verify [none|required] the global section, "verify" is set to "required" by default. verifyhost <hostname> + May be used in the following contexts: tcp, http, log, peers, ring + This setting is only available when support for OpenSSL was built in, and only takes effect if 'verify required' is also specified. This directive sets a default static hostname to check the server's certificate against when no @@ -17378,6 +18106,8 @@ verifyhost <hostname> include wildcards. See also "verify", "sni" and "no-verifyhost" options. weight <weight> + May be used in the following contexts: tcp, http + The "weight" parameter is used to adjust the server's weight relative to other servers. All servers will receive a load proportional to their weight relative to the sum of all weights, so the higher the weight, the higher the @@ -17389,6 +18119,8 @@ weight <weight> room above and below for later adjustments. ws { auto | h1 | h2 } + May be used in the following contexts: http + This option allows to configure the protocol used when relaying websocket streams. This is most notably useful when using an HTTP/2 backend without the support for H2 websockets through the RFC8441. @@ -17856,25 +18588,27 @@ The ACL engine can match these types against patterns of the following types : The following ACL flags are currently supported : -i : ignore case during matching of all subsequent patterns. - -f : load patterns from a file. + -f : load patterns from a list. -m : use a specific pattern matching method -n : forbid the DNS resolutions - -M : load the file pointed by -f like a map file. + -M : load the file pointed by -f like a map. -u : force the unique id of the ACL -- : force end of flags. Useful when a string looks like one of the flags. -The "-f" flag is followed by the name of a file from which all lines will be -read as individual values. It is even possible to pass multiple "-f" arguments -if the patterns are to be loaded from multiple files. Empty lines as well as -lines beginning with a sharp ('#') will be ignored. All leading spaces and tabs -will be stripped. If it is absolutely necessary to insert a valid pattern -beginning with a sharp, just prefix it with a space so that it is not taken for -a comment. Depending on the data type and match method, HAProxy may load the -lines into a binary tree, allowing very fast lookups. This is true for IPv4 and -exact string matching. In this case, duplicates will automatically be removed. - -The "-M" flag allows an ACL to use a map file. If this flag is set, the file is -parsed as two column file. The first column contains the patterns used by the +The "-f" flag is followed by the name that must follow the format described in +2.7. about name format for maps and ACLs. It is even possible to pass multiple +"-f" arguments if the patterns are to be loaded from multiple lists. if an +existing file is referenced, all lines will be read as individual values. Empty +lines as well as lines beginning with a sharp ('#') will be ignored. All +leading spaces and tabs will be stripped. If it is absolutely necessary to +insert a valid pattern beginning with a sharp, just prefix it with a space so +that it is not taken for a comment. Depending on the data type and match +method, HAProxy may load the lines into a binary tree, allowing very fast +lookups. This is true for IPv4 and exact string matching. In this case, +duplicates will automatically be removed. + +The "-M" flag allows an ACL to use a map. If this flag is set, the list is +parsed as two column entries. The first column contains the patterns used by the ACL, and the second column contain the samples. The sample can be used later by a map. This can be useful in some rare cases where an ACL would just be used to check for the existence of a pattern in a map before a mapping is applied. @@ -18362,6 +19096,7 @@ The following keywords are supported: add(value) integer integer add_item(delim,[var][,suff]]) string string aes_gcm_dec(bits,nonce,key,aead_tag) binary binary +aes_gcm_enc(bits,nonce,key,aead_tag) binary binary and(value) integer integer b64dec string binary base64 binary string @@ -18560,6 +19295,18 @@ aes_gcm_dec(<bits>,<nonce>,<key>,<aead_tag>) http-response set-header X-Decrypted-Text %[var(txn.enc),\ aes_gcm_dec(128,txn.nonce,Zm9vb2Zvb29mb29wZm9vbw==,txn.aead_tag)] +aes_gcm_enc(<bits>,<nonce>,<key>,<aead_tag>) + Encrypts the raw byte input using the AES128-GCM, AES192-GCM or + AES256-GCM algorithm, depending on the <bits> parameter. <nonce> and <key> + parameters must be base64 encoded. Last parameter, <aead_tag>, must be a + variable. The AEAD tag will be stored base64 encoded into that variable. + The returned result is in raw byte format. The <nonce> and <key> can either + be strings or variables. This converter requires at least OpenSSL 1.0.1. + + Example: + http-response set-header X-Encrypted-Text %[var(txn.plain),\ + aes_gcm_enc(128,txn.nonce,Zm9vb2Zvb29mb29wZm9vbw==,txn.aead_tag)] + and(<value>) Performs a bitwise "AND" between <value> and the input value of type signed integer, and returns the result as an signed integer. <value> can be a @@ -19109,17 +19856,18 @@ ltrim(<chars>) Skips any characters from <chars> from the beginning of the string representation of the input sample. -map(<map_file>[,<default_value>]) -map_<match_type>(<map_file>[,<default_value>]) -map_<match_type>_<output_type>(<map_file>[,<default_value>]) - Search the input value from <map_file> using the <match_type> matching method, - and return the associated value converted to the type <output_type>. If the - input value cannot be found in the <map_file>, the converter returns the - <default_value>. If the <default_value> is not set, the converter fails and - acts as if no input value could be fetched. If the <match_type> is not set, it - defaults to "str". Likewise, if the <output_type> is not set, it defaults to - "str". For convenience, the "map" keyword is an alias for "map_str" and maps a - string to another string. +map(<map_name>[,<default_value>]) +map_<match_type>(<map_name>[,<default_value>]) +map_<match_type>_<output_type>(<map_name>[,<default_value>]) + Search the input value from <map_name> using the <match_type> matching + method, and return the associated value converted to the type <output_type>. + If the input value cannot be found in the <map_name>, the converter returns + the <default_value>. If the <default_value> is not set, the converter fails + and acts as if no input value could be fetched. If the <match_type> is not + set, it defaults to "str". Likewise, if the <output_type> is not set, it + defaults to "str". For convenience, the "map" keyword is an alias for + "map_str" and maps a string to another string. <map_name> must follow the + format described in 2.7. about name format for maps and ACLs It is important to avoid overlapping between the keys : IP addresses and strings are stored in trees, so the first of the finest match will be used. @@ -19128,38 +19876,43 @@ map_<match_type>_<output_type>(<map_file>[,<default_value>]) The following array contains the list of all map functions available sorted by input type, match type and output type. - input type | match method | output type str | output type int | output type ip - -----------+--------------+-----------------+-----------------+--------------- - str | str | map_str | map_str_int | map_str_ip - -----------+--------------+-----------------+-----------------+--------------- - str | beg | map_beg | map_beg_int | map_end_ip - -----------+--------------+-----------------+-----------------+--------------- - str | sub | map_sub | map_sub_int | map_sub_ip - -----------+--------------+-----------------+-----------------+--------------- - str | dir | map_dir | map_dir_int | map_dir_ip - -----------+--------------+-----------------+-----------------+--------------- - str | dom | map_dom | map_dom_int | map_dom_ip - -----------+--------------+-----------------+-----------------+--------------- - str | end | map_end | map_end_int | map_end_ip - -----------+--------------+-----------------+-----------------+--------------- - str | reg | map_reg | map_reg_int | map_reg_ip - -----------+--------------+-----------------+-----------------+--------------- - str | reg | map_regm | map_reg_int | map_reg_ip - -----------+--------------+-----------------+-----------------+--------------- - int | int | map_int | map_int_int | map_int_ip - -----------+--------------+-----------------+-----------------+--------------- - ip | ip | map_ip | map_ip_int | map_ip_ip - -----------+--------------+-----------------+-----------------+--------------- + input type | match method | output type str | output type int | output type ip | output type key + -----------+--------------+-----------------+-----------------+----------------+---------------- + str | str | map_str | map_str_int | map_str_ip | map_str_key + -----------+--------------+-----------------+-----------------+----------------+---------------- + str | beg | map_beg | map_beg_int | map_end_ip | map_end_key + -----------+--------------+-----------------+-----------------+----------------+---------------- + str | sub | map_sub | map_sub_int | map_sub_ip | map_sub_key + -----------+--------------+-----------------+-----------------+----------------+---------------- + str | dir | map_dir | map_dir_int | map_dir_ip | map_dir_key + -----------+--------------+-----------------+-----------------+----------------+---------------- + str | dom | map_dom | map_dom_int | map_dom_ip | map_dom_key + -----------+--------------+-----------------+-----------------+----------------+---------------- + str | end | map_end | map_end_int | map_end_ip | map_end_key + -----------+--------------+-----------------+-----------------+----------------+---------------- + str | reg | map_reg | map_reg_int | map_reg_ip | map_reg_key + -----------+--------------+-----------------+-----------------+----------------+---------------- + str | reg | map_regm | map_reg_int | map_reg_ip | map_reg_key + -----------+--------------+-----------------+-----------------+----------------+---------------- + int | int | map_int | map_int_int | map_int_ip | map_int_key + -----------+--------------+-----------------+-----------------+----------------+---------------- + ip | ip | map_ip | map_ip_int | map_ip_ip | map_ip_key + -----------+--------------+-----------------+-----------------+----------------+---------------- The special map called "map_regm" expect matching zone in the regular expression and modify the output replacing back reference (like "\1") by the corresponding match text. - The file contains one key + value per line. Lines which start with '#' are - ignored, just like empty lines. Leading tabs and spaces are stripped. The key - is then the first "word" (series of non-space/tabs characters), and the value - is what follows this series of space/tab till the end of the line excluding - trailing spaces/tabs. + Output type "key" means that it is the matched entry's key (as found in the + map file) that will be returned as a string instead of the value. Note that + optional <default_value> argument is not supported when "key" output type is + used. + + Files referenced by <map_name> contains one key + value per line. Lines which + start with '#' are ignored, just like empty lines. Leading tabs and spaces + are stripped. The key is then the first "word" (series of non-space/tabs + characters), and the value is what follows this series of space/tab till the + end of the line excluding trailing spaces/tabs. Example : @@ -19699,6 +20452,21 @@ table_expire(<table>[,<default_value>]) input sample in the designated table. See also the table_idle sample fetch keyword. +table_glitch_cnt(<table>) + Uses the string representation of the input sample to perform a look up in + the specified table. If the key is not found in the table, integer value zero + is returned. Otherwise the converter returns the cumulative number of front + connection glitches associated with the input sample in the designated table. + See also the sc_glitch_cnt sample fetch keyword and fc_glitches for the value + measured on the current front connection. + +table_glitch_rate(<table>) + Uses the string representation of the input sample to perform a look up in + the specified table. If the key is not found in the table, integer value zero + is returned. Otherwise the converter returns the average front connection + glitch rate associated with the input sample in the designated table. See + also the sc_glitch_rate sample fetch keyword. + table_gpc(<idx>,<table>) Uses the string representation of the input sample to perform a lookup in the specified table. If the key is not found in the table, integer value zero @@ -20212,7 +20980,6 @@ table_avl([<table>]) integer table_cnt([<table>]) integer thread integer txn.id32 integer -txn.conn_retries integer txn.sess_term_state string uuid([<version>]) string var(<var-name>[,<default>]) undefined @@ -20669,14 +21436,8 @@ txn.id32 : integer depends on the request rate. In practice, it should not be an issue. For a true unique ID, see "unique-id-format" directive. -txn.conn_retries : integer - Returns the the number of connection retries experienced by this stream when - trying to connect to the server. This value is subject to change while the - connection is not fully established. For HTTP connections, the value may be - affected by L7 retries. - txn.sess_term_state : string - Retruns the TCP or HTTP stream termination state, as reported in the log. It + Returns the TCP or HTTP stream termination state, as reported in the log. It is a 2-characters string, The final stream state followed by the event which caused its to terminate. See section 8.5 about stream state at disconnection for the list of possible events. The current value at time the sample fetch @@ -20687,10 +21448,14 @@ txn.sess_term_state : string # Return a 429-Too-Many-Requests if stream timed out in queue http-after-response set-status 429 if { txn.sess_term_state "sQ" } +uptime : integer + Returns the uptime of the current HAProxy worker in seconds. + uuid([<version>]) : string - Returns a UUID following the RFC4122 standard. If the version is not + Returns a UUID following the RFC 9562 standard. If the version is not specified, a UUID version 4 (fully random) is returned. - Currently, only version 4 is supported. + + Versions 4 and 7 are supported. var(<var-name>[,<default>]) : undefined Returns a variable with the stored type. If the variable is not set, the @@ -20730,14 +21495,18 @@ Summary of sample fetch methods in this section and their respective types: -------------------------------------------------+------------- accept_date([<unit>]) integer bc.timer.connect integer +bc_be_queue integer bc_dst ip bc_dst_port integer bc_err integer bc_err_str string bc_glitches integer bc_http_major integer +bc_nb_streams integer bc_src ip bc_src_port integer +bc_srv_queue integer +bc_settings_streams_limit integer be_id integer be_name string bc_rtt(<unit>) integer @@ -20764,6 +21533,7 @@ fc_fackets integer fc_glitches integer fc_http_major integer fc_lost integer +fc_nb_streams integer fc_pp_authority string fc_pp_unique_id string fc_pp_tlv(<id>) string @@ -20776,6 +21546,7 @@ fc_sacked integer fc_src ip fc_src_is_local boolean fc_src_port integer +fc_settings_streams_limit integer fc_unacked integer fe_defbe string fe_id integer @@ -20825,6 +21596,14 @@ sc_get_gpt0(<ctr>[,<table>]) integer sc0_get_gpt0([<table>]) integer sc1_get_gpt0([<table>]) integer sc2_get_gpt0([<table>]) integer +sc_glitch_cnt(<ctr>[,<table>]) integer +sc0_glitch_cnt([<table>]) integer +sc1_glitch_cnt([<table>]) integer +sc2_glitch_cnt([<table>]) integer +sc_glitch_rate(<ctr>[,<table>]) integer +sc0_glitch_rate([<table>]) integer +sc1_glitch_rate([<table>]) integer +sc2_glitch_rate([<table>]) integer sc_gpc_rate(<idx>,<ctr>[,<table>]) integer sc_gpc0_rate(<ctr>[,<table>]) integer sc0_gpc0_rate([<table>]) integer @@ -20929,6 +21708,7 @@ src_updt_conn_cnt([<table>]) integer srv_id integer srv_name string txn.conn_retries integer +txn.redispatched boolean -------------------------------------------------+------------- Detailed list: @@ -20955,6 +21735,10 @@ bc.timer.connect : integer equivalent of %Tc in the log-format. This is reported in milliseconds (ms). For more information see Section 8.4 "Timing events" +bc_be_queue : integer + Number of streams de-queued while waiting for a connection slot on the + target backend. This is the equivalent of %bq in the log-format. + bc_dst : ip This is the destination ip address of the connection on the server side, which is the server address HAProxy connected to. It is of type IP and works @@ -20995,6 +21779,9 @@ bc_http_major : integer for HTTP/0.9 to HTTP/1.1 or 2 for HTTP/2. Note, this is based on the on-wire encoding and not the version present in the request header. +bc_nb_streams : integer + Returns the number of streams opened on the backend connection. + bc_src : ip This is the source ip address of the connection on the server side, which is the server address HAProxy connected from. It is of type IP and works on both @@ -21005,6 +21792,15 @@ bc_src_port : integer Returns an integer value corresponding to the TCP source port of the connection on the server side, which is the port HAProxy connected from. +bc_srv_queue : integer + Number of streams de-queued while waiting for a connection slot on the + target server. This is the equivalent of %sq in the log-format. + +bc_settings_streams_limit : integer + Returns the maximum number of streams allowed on the backend connection. For + TCP and HTTP/1.1 connections, it is always 1. For other protocols, it depends + on the settings negociated with the server. + be_id : integer Returns an integer containing the current backend's id. It can be used in frontends with responses to check which backend processed the request. If @@ -21137,13 +21933,13 @@ fc_err : integer Returns the ID of the error that might have occurred on the current connection. Any strictly positive value of this fetch indicates that the connection did not succeed and would result in an error log being output (as - described in section 8.2.6). See the "fc_err_str" fetch for a full list of + described in section 8.2.5). See the "fc_err_str" fetch for a full list of error codes and their corresponding error message. fc_err_str : string Returns an error message describing what problem happened on the current connection, resulting in a connection failure. This string corresponds to the - "message" part of the error log format (see section 8.2.6). See below for a + "message" part of the error log format (see section 8.2.5). See below for a full list of error codes and their corresponding error messages : +----+---------------------------------------------------------------------------+ @@ -21229,6 +22025,9 @@ fc_lost : integer not TCP or if the operating system does not support TCP_INFO, for example Linux kernels before 2.4, the sample fetch fails. +fc_nb_streams : integer + Returns the number of streams opened on the frontend connection. + fc_pp_authority : string Returns the first authority TLV sent by the client in the PROXY protocol header, if any. @@ -21314,6 +22113,10 @@ fc_src_port : integer connection on the client side. Only "tcp-request connection" rules may alter this address. See "src-port" for details. +fc_settings_streams_limit : integer + Returns the maximum number of streams allowed on the frontend connection. For + TCP and HTTP/1.1 connections, it is always 1. For other protocols, it depends + on the settings negociated with the client. fc_unacked : integer Returns the unacked counter measured by the kernel for the client connection. @@ -21464,6 +22267,34 @@ sc2_get_gpt0([<table>]) : integer Returns the value of the first General Purpose Tag associated to the currently tracked counters. See also src_get_gpt0. +sc_glitch_cnt(<ctr>[,<table>]) : integer +sc0_glitch_cnt([<table>]) : integer +sc1_glitch_cnt([<table>]) : integer +sc2_glitch_cnt([<table>]) : integer + Returns the cumulative number of front connection glitches that were observed + on connections associated with the currently tracked counters. Usually these + result in requests or connections to be aborted so the returned value will + often correspond to past connections. There is no good nor bad value, but a + poor quality client may occasionally cause a few glitches per connection, + while a very bogus or malevolent client may quickly cause thousands of events + to be added on a connection. See also fc_glitches for the number affecting + the current connection, src_glitch_cnt to look them up per source, and + sc_glitch_rate for the event rate measurements. + +sc_glitch_rate(<ctr>[,<table>]) : integer +sc0_glitch_rate([<table>]) : integer +sc1_glitch_rate([<table>]) : integer +sc2_glitch_rate([<table>]) : integer + Returns the average rate at which front connection glitches were observed for + the currently tracked counters, measured in amount of events over the period + configured in the table. Usually these glitches result in requests or + connections to be aborted so the returned value will often be related to past + connections. There is no good nor bad value, but a poor quality client may + occasionally cause a few glitches per connection, hence a low rate is + generally expected. However, a very bogus or malevolent client may quickly + cause thousands of events to be added per connection, and maintain a high + rate here. See also src_glitch_rate and sc_glitch_cnt. + sc_gpc_rate(<idx>,<ctr>[,<table>]) : integer Returns the average increment rate of the General Purpose Counter at the index <idx> of the array associated to the tracked counter of ID <ctr> from @@ -21778,6 +22609,29 @@ src_get_gpt0([<table>]) : integer the designated stick-table. If the address is not found, zero is returned. See also sc/sc0/sc1/sc2_get_gpt0. +src_glitch_cnt([<table>]) : integer + Returns the cumulative number of front connection glitches that were observed + on connections from the current connection's source address. Usually these + result in requests or connections to be aborted so the returned value will + often correspond to past connections. There is no good nor bad value, but a + poor quality client may occasionally cause a few glitches per connection, + while a very bogus or malevolent client may quickly cause thousands of events + to be added on a connection. See also fc_glitches for the number affecting + the current connection, sc_glitch_cnt to look them up in currently tracked + counters, and src_glitch_rate for the event rate measurements. + +src_glitch_rate([<table>]) : integer + Returns the average rate at which front connection glitches were observed for + on connections from the current connection's source address, measured in + amount of events over the period configured in the table. Usually these + glitches result in requests or connections to be aborted so the returned + value will often be related to past connections. There is no good nor bad + value, but a poor quality client may occasionally cause a few glitches per + connection, hence a low rate is generally expected. However, a very bogus or + malevolent client may quickly cause thousands of events to be added per + connection, and maintain a high rate here. See also sc_glitch_rate and + src_glitch_cnt. + src_gpc_rate(<idx>[,<table>]) : integer Returns the average increment rate of the General Purpose Counter at the index <idx> of the array associated to the incoming connection's @@ -21963,6 +22817,12 @@ txn.conn_retries : integer connection is not fully established. For HTTP connections, the value may be affected by L7 retries. +txn.redispatched : boolean + Returns true if the connection has experienced redispatch upon retry according + to "option redispatch" configuration. This value is subject to change while + the connection is not fully established. For HTTP connections, the value may + be affected by L7 retries. + 7.3.4. Fetching samples at Layer 5 ---------------------------------- @@ -21982,6 +22842,11 @@ ssl_bc_alg_keysize integer ssl_bc_alpn string ssl_bc_cipher string ssl_bc_client_random binary +ssl_bc_client_early_traffic_secret string +ssl_bc_client_handshake_traffic_secret string +ssl_bc_client_traffic_secret_0 string +ssl_bc_exporter_secret string +ssl_bc_early_exporter_secret string ssl_bc_curve string ssl_bc_err integer ssl_bc_err_str string @@ -21989,6 +22854,8 @@ ssl_bc_is_resumed boolean ssl_bc_npn string ssl_bc_protocol string ssl_bc_unique_id binary +ssl_bc_server_handshake_traffic_secret string +ssl_bc_server_traffic_secret_0 string ssl_bc_server_random binary ssl_bc_session_id binary ssl_bc_session_key binary @@ -22122,6 +22989,51 @@ ssl_bc_client_random : binary sent using ephemeral ciphers. This requires OpenSSL >= 1.1.0, or BoringSSL. It can be used in a tcp-check or an http-check ruleset. +ssl_bc_client_early_traffic_secret : string + Return the CLIENT_EARLY_TRAFFIC_SECRET as an hexadecimal string for the + back connection when the outgoing connection was made over a TLS 1.3 + transport layer. + Require OpenSSL >= 1.1.1. This is one of the keys dumped by the OpenSSL + keylog callback to generate the SSLKEYLOGFILE. The SSL Key logging must be + activated with "tune.ssl.keylog on" in the global section. See also + "tune.ssl.keylog" + +ssl_bc_client_handshake_traffic_secret : string + Return the CLIENT_HANDSHAKE_TRAFFIC_SECRET as an hexadecimal string for the + bacl connection when the outgoing connection was made over a TLS 1.3 + transport layer. + Require OpenSSL >= 1.1.1. This is one of the keys dumped by the OpenSSL + keylog callback to generate the SSLKEYLOGFILE. The SSL Key logging must be + activated with "tune.ssl.keylog on" in the global section. See also + "tune.ssl.keylog" + +ssl_bc_client_traffic_secret_0 : string + Return the CLIENT_TRAFFIC_SECRET_0 as an hexadecimal string for the + back connection when the outgoing connection was made over a TLS 1.3 + transport layer. + Require OpenSSL >= 1.1.1. This is one of the keys dumped by the OpenSSL + keylog callback to generate the SSLKEYLOGFILE. The SSL Key logging must be + activated with "tune.ssl.keylog on" in the global section. See also + "tune.ssl.keylog" + +ssl_bc_exporter_secret : string + Return the EXPORTER_SECRET as an hexadecimal string for the + back connection when the outgoing connection was made over a TLS 1.3 + transport layer. + Require OpenSSL >= 1.1.1. This is one of the keys dumped by the OpenSSL + keylog callback to generate the SSLKEYLOGFILE. The SSL Key logging must be + activated with "tune.ssl.keylog on" in the global section. See also + "tune.ssl.keylog" + +ssl_bc_early_exporter_secret : string + Return the EARLY_EXPORTER_SECRET as an hexadecimal string for the + back connection when the outgoing connection was made over an TLS 1.3 + transport layer. + Require OpenSSL >= 1.1.1. This is one of the keys dumped by the OpenSSL + keylog callback to generate the SSLKEYLOGFILE. The SSL Key logging must be + activated with "tune.ssl.keylog on" in the global section. See also + "tune.ssl.keylog" + ssl_bc_curve : string Returns the name of the curve used in the key agreement when the outgoing connection was made over an SSL/TLS transport layer. This requires @@ -22171,6 +23083,24 @@ ssl_bc_unique_id : binary can be encoded to base64 using the converter: "ssl_bc_unique_id,base64". It can be used in a tcp-check or an http-check ruleset. +ssl_bc_server_handshake_traffic_secret : string + Return the SERVER_HANDSHAKE_TRAFFIC_SECRET as an hexadecimal string for the + back connection when the outgoing connection was made over a TLS 1.3 + transport layer. + Require OpenSSL >= 1.1.1. This is one of the keys dumped by the OpenSSL + keylog callback to generate the SSLKEYLOGFILE. The SSL Key logging must be + activated with "tune.ssl.keylog on" in the global section. See also + "tune.ssl.keylog" + +ssl_bc_server_traffic_secret_0 : string + Return the SERVER_TRAFFIC_SECRET_0 as an hexadecimal string for the + back connection when the outgoing connection was made over an TLS 1.3 + transport layer. + Require OpenSSL >= 1.1.1. This is one of the keys dumped by the OpenSSL + keylog callback to generate the SSLKEYLOGFILE. The SSL Key logging must be + activated with "tune.ssl.keylog on" in the global section. See also + "tune.ssl.keylog" + ssl_bc_server_random : binary Returns the server random of the back connection when the incoming connection was made over an SSL/TLS transport layer. It is useful to to decrypt traffic @@ -22821,7 +23751,7 @@ Warning : Following sample fetches are ignored if used from HTTP proxies. They HTTP proxies use structured content. Thus raw representation of these data are meaningless. A warning is emitted if an ACL relies on one of the following sample fetches. But it is not possible to detect - all invalid usage (for instance inside a log-format string or a + all invalid usage (for instance inside a Custom log format or a sample expression). So be careful. Summary of sample fetch methods in this section and their respective types: @@ -22829,9 +23759,13 @@ Summary of sample fetch methods in this section and their respective types: keyword output type ----------------------------------------------------+------------- bs.id integer +bs.aborted boolean +bs.rst_code integer distcc_body(<token>[,<occ>]) binary distcc_param(<token>[,<occ>]) integer fs.id integer +fs.aborted boolean +fs.rst_code integer payload(<offset>,<length>) binary payload_lv(<offset1>,<length>[,<offset2>]) binary req.len integer @@ -22867,6 +23801,16 @@ bs.id : integer Returns the multiplexer's stream ID on the server side. It is the multiplexer's responsibility to return the appropriate information. +bs.aborted: boolean + Returns true is an abort was received from the server for the current + stream. Otherwise false is returned. + +bs.rst_code: integer + Returns the reset code received from the server for the current stream. The + code of the H2 RST_STREAM frame or the QUIC STOP_SENDING frame received from + the server is returned. The sample fetch fails if no abort was received or if + the server stream is not an H2/QUIC stream. + distcc_body(<token>[,<occ>]) : binary Parses a distcc message and returns the body associated to occurrence #<occ> of the token <token>. Occurrences start at 1, and when unspecified, any may @@ -22898,6 +23842,16 @@ fs.id : integer multiplexer's responsibility to return the appropriate information. For instance, on a raw TCP, 0 is always returned because there is no stream. +fs.aborted: boolean + Returns true is an abort was received from the client for the current + stream. Otherwise false is returned. + +fs.rst_code: integer + Returns the reset code received from the client for the current stream. The + code of the H2 RST_STREAM frame or the QUIC STOP_SENDING frame received from + the client is returned. The sample fetch fails if no abort was received or + if the client stream is not an H2/QUIC stream. + payload(<offset>,<length>) : binary (deprecated) This is an alias for "req.payload" when used in the context of a request (e.g. "stick on", "stick match"), and for "res.payload" when used in the context of @@ -23438,7 +24392,7 @@ hdr([<name>[,<occ>]]) : string request_date([<unit>]) : integer This is the exact date when the first byte of the HTTP request was received - by HAProxy (log-format tag %tr). This is computed from accept_date + + by HAProxy (log-format alias %tr). This is computed from accept_date + handshake time (%Th) + idle time (%Ti). Returns a value in number of seconds since epoch. @@ -23705,8 +24659,8 @@ req.hdr_names([<delim>]) : string req.ver : string req_ver : string (deprecated) Returns the version string from the HTTP request, for example "1.1". This can - be useful for ACL. For logs use the "%HV" log variable. Some predefined ACL - already check for versions 1.0 and 1.1. + be useful for ACL. For logs use the "%HV" logformat alias. Some predefined + ACL already check for versions 1.0 and 1.1. Common values are "1.0", "1.1", "2.0" or "3.0". @@ -24934,42 +25888,116 @@ regular traffic log (see option httplog or option httpslog). 8.2.6. Custom log format ------------------------ -When the default log formats are not sufficient, it is possible to define new -ones in very fine details. As creating a log-format from scratch is not always -a trivial task, it is strongly recommended to first have a look at the existing -formats ("option tcplog", "option httplog", "option httpslog"), pick the one -looking the closest to the expectation, copy its "log-format" equivalent string -and adjust it. - -HAProxy understands some log format variables. % precedes log format variables. -Variables can take arguments using braces ('{}'), and multiple arguments are -separated by commas within the braces. Flags may be added or removed by -prefixing them with a '+' or '-' sign. - -Special variable "%o" may be used to propagate its flags to all other -variables on the same format string. This is particularly handy with quoted -("Q") and escaped ("E") string formats. - -If a variable is named between square brackets ('[' .. ']') then it is used +Historically, custom log formats were only used to produce logs. But their +convenience when used to produce a string by assembling multiple complex +expressions has got them adopted by many directives which used to take only +a string in argument and which may now also take an such a Custom log format +definition. Such arguments, which are commonly designated by "<fmt>" in this +document, are defined exactly the same way as the argument to the "log-format" +directive, described here. + +When it comes to logs and when the default log formats are not sufficient, it +is possible to define new ones in very fine details. As creating a log-format +from scratch is not always a trivial task, it is strongly recommended to first +have a look at the existing formats ("option tcplog", "option httplog", "option +httpslog"), pick the one looking the closest to the expectation, copy its +"log-format" equivalent string and adjust it. + +A Custom log format definition is a single argument from a configuration +perspective. This means that it may not contain blanks (spaces or tabs), unless +these blanks are escaped using the backslash character ('\'), or the whole +definition is enclosed between quotes (which is the recommended way to use +them). The use of unquoted format strings is not recommended anymore as history +has shown that it was very error prone since a single missing backslash +character could result in silent truncation of the format. Such configurations +are still commonly encountered due to the massive adoption of log formats after +version 1.5-dev9, 3 years before quotes were usable, but it is recommended to +convert them to quoted strings and to drop the backslashes now. + +A log format definition is made of any number of log format items separated +by text and spaces. A log format item starts with character '%'. In order to +emit a verbatim '%', it must be preceded by another '%' resulting in '%%'. + +Logformat items may either be aliases or sample expressions: + +If an item is named between square brackets ('[' .. ']') then it is used as a sample expression rule (see section 7.3). This it useful to add some less common information such as the client's SSL certificate's DN, or to log -the key that would be used to store an entry into a stick table. +the key that would be used to store an entry into a stick table. It is also +commonly used with non-log actions (header manipulation, variables etc). + +Else if the item is named using an alpha-numerical name, it is an alias. +(Refer to the table below for the list of available aliases) -Note: spaces must be escaped. In configuration directives "log-format", -"log-format-sd" and "unique-id-format", spaces are considered as -delimiters and are merged. In order to emit a verbatim '%', it must be -preceded by another '%' resulting in '%%'. +Items can take arguments using braces ('{}'), and multiple arguments are +separated by commas within the braces. Flags may be added or removed by +prefixing them with a '+' or '-' sign (see below for the list of available +flags). + +Special alias "%o" may be used to propagate its flags to all other +logformat items on the same format string. This is particularly handy with +quoted ("Q") and escaped ("E") string formats. + +Items can optionally be named using ('()'). The name must be provided right +after '%' (before arguments). It will automatically be used as key name when +encoding flag such as "json" or "cbor" is set. When no encoding flag is +specified (default), item name will be ignored. It is also possible to force +the item's output to a given type by appending ':type' after the name, like +this: %(itemname:itemtype)aliasname or %(itemname:itemtype)[expr] where +itemtype may be 'str', 'sint' or 'bool'. Specifying the type is only relevant +when an encoding method is used. Also, it is supported to provide an empty name +to force the output type on an anonymous item: %(:itemtype), ie: when encoding +is not set globally, see flags definitions below for more information. + +Due to the original goal of custom log formats to be used for logging only, +there is a special case made of non-printable and unsafe characters (those +outside ASCII codes 32 to 126 plus a few other ones) depending where they are +used. Section 8.6 describes what's done exactly for logs in order to make sure +one will not send unsafe codes that alter the readability of the output in a +terminal. When used to form header fields, health checks or payload responses, +the rules are less strict and only characters forbidden in HTTP header fields +are replaced by their hexadecimal encoding preceded by character '%'. This is +normally not a problem, but it might affect the output when the character was +expected to be reproduced verbatim (e.g. when building an error page or a full +response payload, where line feeds could appear as "%0A"). + +Note: in configuration directives "log-format", "log-format-sd" and +"unique-id-format", spaces are considered as delimiters and are merged. Note: when using the RFC5424 syslog message format, the characters '"', '\' and ']' inside PARAM-VALUE should be escaped with '\' as prefix (see https://tools.ietf.org/html/rfc5424#section-6.3.3 for more details). In such cases, the use of the flag "E" should be considered. -Flags are : +Supported item flags are (may be enabled/disabled from item's arguments): * Q: quote a string * X: hexadecimal representation (IPs, Ports, %Ts, %rt, %pid) * E: escape characters '"', '\' and ']' in a string with '\' as prefix (intended purpose is for the RFC5424 structured-data log formats) + * bin: try to preserve binary data, this can be useful with sample + expressions that output binary data in order to preserve the original + data. Be careful however, because it can obviously generate non- + printable chars, including NULL-byte, which most syslog endpoints + don't expect. Thus it is mainly intended for use with set-var-fmt, + rings and binary-capable log endpoints. + This option can only be set globally (with %o), it will be ignored + if set on an individual item's options. + * json: automatically encode value in JSON format + (when set globally, only named logformat items are considered) + Incomplete numerical values (e.g.: '%B' when logasap is used), + which are normally prefixed with '+' without encoding, will be + encoded as-is. Also, '+E' option will be ignored. + * cbor: automatically encode value in CBOR format + (when set globally, only named logformat items are considered) + By default, cbor encoded data is represented in HEX form so + that it remains printable on stdout an can be used with usual + syslog endpoints. + As with json encoding, incomplete numerical values will be encoded + as-is and '+E' option will be ignored. + When combined with '+bin' option, it will directly generate raw + binary CBOR payload. Be careful, because it will obviously generate + non-printable chars, thus it is mainly intended for use with + set-var-fmt, rings and binary-capable log endpoints. Example: @@ -24978,13 +26006,16 @@ Flags are : log-format-sd %{+Q,+E}o\ [exampleSDID@1234\ header=%[capture.req.hdr(0)]] -Please refer to the table below for currently defined variables : + log-format "%{+json}o %(request)r %(custom_expr)[str(custom)]" + log-format "%{+cbor}o %(request)r %(custom_expr)[str(custom)]" + +Please refer to the table below for currently defined aliases : +---+------+------------------------------------------------------+---------+ - | R | var | field name (8.2.2 and 8.2.3 for description) | type | + | R | alias| field name (8.2.2 and 8.2.3 for description) | type | | | | sample fetch alternative | | +===+======+======================================================+=========+ - | | %o | special variable, apply flags on all next var | | + | | %o | special, apply flags on all following items | | +---+------+------------------------------------------------------+---------+ | date formats | +---+------+------------------------------------------------------+---------+ @@ -24995,12 +26026,13 @@ Please refer to the table below for currently defined variables : | | | %[accept_date,ltime("%d/%b/%Y:%H:%M:%S %z")] | date | +---+------+------------------------------------------------------+---------+ | | %Ts | Accept date as a UNIX timestamp | numeric | + | | | %[accept_date] | | +---+------+------------------------------------------------------+---------+ | | %t | Accept date local (with millisecond resolution) | | | | | %[accept_date(ms),ms_ltime("%d/%b/%Y:%H:%M:%S.%3N")] | date | +---+------+------------------------------------------------------+---------+ | | %ms | Accept date milliseconds | | - | | | %[accept_date(ms),ms_utime("%3N") | numeric | + | | | %[accept_date(ms),ms_utime("%3N")] | numeric | +---+------+------------------------------------------------------+---------+ | H | %tr | Request date local (with millisecond resolution) | | | | | %[request_date(ms),ms_ltime("%d/%b/%Y:%H:%M:%S.%3N")]| date | @@ -25056,8 +26088,10 @@ Please refer to the table below for currently defined variables : | H | %CS | captured_response_cookie | string | +---+------+------------------------------------------------------+---------+ | | %H | hostname | string | + | | | %[hostname] | | +---+------+------------------------------------------------------+---------+ | H | %HM | HTTP method (ex: POST) | string | + | | | %[method] +---+------+------------------------------------------------------+---------+ | H | %HP | HTTP request URI without query string | string | +---+------+------------------------------------------------------+---------+ @@ -25072,6 +26106,7 @@ Please refer to the table below for currently defined variables : | | | HTTP/%[req.ver] | | +---+------+------------------------------------------------------+---------+ | | %ID | unique-id | string | + | | | %[unique-id] | | +---+------+------------------------------------------------------+---------+ | | %ST | status_code | numeric | | | | %[txn.status] | | @@ -25086,6 +26121,7 @@ Please refer to the table below for currently defined variables : | | | %[be_name] | string | +---+------+------------------------------------------------------+---------+ | | %bc | beconn (backend concurrent connections) | numeric | + | | | %[be_conn] | | +---+------+------------------------------------------------------+---------+ | | %bi | backend_source_ip (connecting address) | | | | | %[bc_src] | IP | @@ -25094,6 +26130,7 @@ Please refer to the table below for currently defined variables : | | | %[bc_src_port] | numeric | +---+------+------------------------------------------------------+---------+ | | %bq | backend_queue | numeric | + | | | %[bc_be_queue] | | +---+------+------------------------------------------------------+---------+ | | %ci | client_ip (accepted address) | | | | | %[src] | IP | @@ -25102,8 +26139,10 @@ Please refer to the table below for currently defined variables : | | | %[src_port] | numeric | +---+------+------------------------------------------------------+---------+ | | %f | frontend_name | string | + | | | %[fe_name] | | +---+------+------------------------------------------------------+---------+ | | %fc | feconn (frontend concurrent connections) | numeric | + | | | %[fe_conn] | | +---+------+------------------------------------------------------+---------+ | | %fi | frontend_ip (accepting address) | | | | | %[dst] | IP | @@ -25131,12 +26170,13 @@ Please refer to the table below for currently defined variables : | H | %r | http_request | string | +---+------+------------------------------------------------------+---------+ | | %rc | retries | numeric | - | | | %[txn.conn_retries] | | + | | | %[txn.redispatched,iif(+,)]%[txn.conn_retries] | | +---+------+------------------------------------------------------+---------+ | | %rt | request_counter (HTTP req or TCP session) | numeric | | | | %[txn.id32] | | +---+------+------------------------------------------------------+---------+ | | %s | server_name | string | + | | | %[srv_name] | | +---+------+------------------------------------------------------+---------+ | | %sc | srv_conn (server concurrent connections) | numeric | +---+------+------------------------------------------------------+---------+ @@ -25147,6 +26187,7 @@ Please refer to the table below for currently defined variables : | | | %[bc_dst_port] | numeric | +---+------+------------------------------------------------------+---------+ | | %sq | srv_queue | numeric | + | | | %[bc_srv_queue] | | +---+------+------------------------------------------------------+---------+ | S | %sslc| ssl_ciphers (ex: AES-SHA) | | | | | %[ssl_fc_cipher] | string | @@ -25280,7 +26321,7 @@ Timings events in TCP mode: all request to calculate the amortized value. The second and subsequent request will always report zero here. - This timer is named %Th as a log-format tag, and fc.timer.handshake as a + This timer is named %Th as a log-format alias, and fc.timer.handshake as a sample fetch. - Ti: is the idle time before the HTTP request (HTTP mode only). This timer @@ -25293,7 +26334,7 @@ Timings events in TCP mode: pending until they need it. This delay will be reported as the idle time. A value of -1 indicates that nothing was received on the connection. - This timer is named %Ti as a log-format tag, and req.timer.idle as a + This timer is named %Ti as a log-format alias, and req.timer.idle as a sample fetch. - TR: total time to get the client request (HTTP mode only). It's the time @@ -25304,7 +26345,7 @@ Timings events in TCP mode: since most requests fit in a single packet. A large time may indicate a request typed by hand during a test. - This timer is named %TR as a log-format tag, and req.timer.hdr as a + This timer is named %TR as a log-format alias, and req.timer.hdr as a sample fetch. - Tq: total time to get the client request from the accept date or since the @@ -25315,7 +26356,7 @@ Timings events in TCP mode: it in favor of TR nowadays, as the idle time adds a lot of noise to the reports. - This timer is named %Tq as a log-format tag, and req.timer.tq as a + This timer is named %Tq as a log-format alias, and req.timer.tq as a sample fetch. - Tw: total time spent in the queues waiting for a connection slot. It @@ -25324,7 +26365,7 @@ Timings events in TCP mode: requests. The value "-1" means that the request was killed before reaching the queue, which is generally what happens with invalid or denied requests. - This timer is named %Tw as a log-format tag, and req.timer.queue as a + This timer is named %Tw as a log-format alias, and req.timer.queue as a sample fetch. - Tc: total time to establish the TCP connection to the server. It's the time @@ -25333,7 +26374,7 @@ Timings events in TCP mode: the matching SYN/ACK packet in return. The value "-1" means that the connection never established. - This timer is named %Tc as a log-format tag, and bc.timer.connect as a + This timer is named %Tc as a log-format alias, and bc.timer.connect as a sample fetch. - Tr: server response time (HTTP mode only). It's the time elapsed between @@ -25348,7 +26389,7 @@ Timings events in TCP mode: header (empty line) was never seen, most likely because the server timeout stroke before the server managed to process the request. - This timer is named %Tr as a log-format tag, and res.timer.hdr as a + This timer is named %Tr as a log-format alias, and res.timer.hdr as a sample fetch. - Td: this is the total transfer time of the response payload till the last @@ -25358,7 +26399,7 @@ Timings events in TCP mode: The data sent are not guaranteed to be received by the client, they can be stuck in either the kernel or the network. - This timer is named %Td as a log-format tag, and res.timer.data as a + This timer is named %Td as a log-format alias, and res.timer.data as a sample fetch. - Ta: total active time for the HTTP request, between the moment the proxy @@ -25373,7 +26414,7 @@ Timings events in TCP mode: Timers with "-1" values have to be excluded from this equation. Note that "Ta" can never be negative. - This timer is named %Ta as a log-format tag, and txn.timer.total as a + This timer is named %Ta as a log-format alias, and txn.timer.total as a sample fetch. - Tt: total stream duration time, between the moment the proxy accepted it @@ -25388,7 +26429,7 @@ Timings events in TCP mode: mode, "Ti", "Tq" and "Tr" have to be excluded too. Note that "Tt" can never be negative and that for HTTP, Tt is simply equal to (Th+Ti+Ta). - This timer is named %Tt as a log-format tag, and fc.timer.total as a + This timer is named %Tt as a log-format alias, and fc.timer.total as a sample fetch. - Tu: total estimated time as seen from client, between the moment the proxy @@ -25400,7 +26441,7 @@ Timings events in TCP mode: option is specified. In this case, it only equals (Th+TR+Tw+Tc+Tr), and is prefixed with a '+' sign. - This timer is named %Tu as a log-format tag, and txn.timer.user as a + This timer is named %Tu as a log-format alias, and txn.timer.user as a sample fetch. These timers provide precious indications on trouble causes. Since the TCP @@ -26462,8 +27503,8 @@ no option mpxs-conns set-param <name> <fmt> [ { if | unless } <condition> ] Set a FastCGI parameter that should be passed to this application. Its - value, defined by <fmt> must follows the log-format rules (see section 8.2.4 - "Custom Log format"). It may optionally be followed by an ACL-based + value, defined by <fmt> must follows the Custom log format rules (see section + 8.2.6 "Custom Log format"). It may optionally be followed by an ACL-based condition, in which case it will only be evaluated if the condition is true. With this directive, it is possible to overwrite the value of default FastCGI diff --git a/doc/design-thoughts/ring-v2.txt b/doc/design-thoughts/ring-v2.txt new file mode 100644 index 0000000..48c539a --- /dev/null +++ b/doc/design-thoughts/ring-v2.txt @@ -0,0 +1,312 @@ +2024-02-20 - Ring buffer v2 +=========================== + +Goals: + - improve the multi-thread performance of rings so that traces can be written + from all threads in parallel without the huge bottleneck of the lock that + is currently necessary to protect the buffer. This is important for mmapped + areas that are left as a file when the process crashes. + + - keep traces synchronous within a given thread, i.e. when the TRACE() call + returns, the trace is either written into the ring or lost due to slow + readers. + + - try hard to limit the cache line bounces between threads due to the use of + a shared work area. + + - make waiting threads not disturb working ones + + - continue to work on all supported platforms, with a particular focus on + performance for modern platforms (memory ordering, DWCAS etc can be used if + they provide any benefit), with a fallback for inferior platforms. + + - do not reorder traces within a given thread. + + - do not break existing features + + - do not significantly increase memory usage + + +Analysis of the current situation +================================= + +Currently, there is a read lock around the call to __sink_write() in order to +make sure that an attempt to write the number of lost messages is delivered +with highest priority and is consistent with the lost counter. This doesn't +seem to pose any problem at this point though if it were, it could possibly +be revisited. + +__sink_write() calls ring_write() which first measures the input string length +from the multiple segments, and locks the ring: + - while trying to free space + - while copying the message, due to the buffer's API + +Because of this, there is a huge serialization and threads wait in queue. Tests +involving a split of the lock and a release around the message copy have shown +a +60% performance increase, which is still not acceptable. + + +First proposed approach +======================= + +The first approach would have consisted in writing messages in small parts: + 1) write 0xFF in the tag to mean "size not filled yet" + 2) write the message's length and write a zero tag after the message's + location + 3) replace the first tag to 0xFE to indicate the size is known, but the + message is not filled yet. + 4) memcpy() of the message to the area + 5) replace the first tag to 0 to mark the entry as valid. + +It's worth noting that doing that without any lock will allow a second thread +looping on the first tag to jump to the second tag after step 3. But the cost +is high: in a 64-thread scenario where each of them wants to send one message, +the work would look like this: + - 64 threads try to CAS the tag. One gets it, 63 fail. They loop on the byte + in question in read-only mode, waiting for the byte to change. This loop + constantly forces the cache line to switch from MODIFIED to SHARED in the + writer thread, and makes it a pain for it to write the message's length + just after it. + + - once the first writer thread finally manages to write the length (step 2), + it writes 0xFE on the tag to release the waiting threads, and starts with + step 4. At this point, 63 threads try a CAS on the same entry, and this + hammering further complicates the memcpy() of step 4 for the first 63 bytes + of the message (well, 32 on avg since the tag is not necessarily aligned). + One thread wins, 62 fail. All read the size field and jump to the next tag, + waiting in read loops there. The second thread starts to write its size and + faces the same difficulty as described above, facing 62 competitors when + writing its size and the beginning of its message. + + - when the first writer thread writes the end of its message, it gets close + to the final tag where the 62 waiting threads are still reading, causing + a slow down again with the loss of exclusivity on the cache line. This is + the same for the second thread etc. + +Thus, on average, a writing thread is hindered by N-1 threads at the beginning +of its message area (in the first 32 bytes on avg) and by N-2 threads at the +end of its area (in the last 32 bytes on avg). Given that messages are roughly +218 bytes on avg for HTTP/1, this means that roughly 1/3 of the message is +written under severe cache contention. + +In addition to this, the buffer's tail needs to be updated once all threads are +ready, something that adds the need for synchronization so that the last writing +threads (the most likely to complete fast due to less perturbations) needs to +wait for all previous ones. This also means N atomic writes to the tail. + + +New proposal +============ + +In order to address the contention scenarios above, let's try to factor the +work as much as possible. The principle is that threads that want to write will +either do it themselves or declare their intent and wait for a writing thread +to do it for them. This aims at ensuring a maximum usage of read-only data +between threads, and to leave the work area read-write between very few +threads, and exclusive for multiple messages at once, avoiding the bounces. + +First, the buffer will have 2 indexes: + - head: where the valid data start + - tail: where new data need to be appended + +When a thread starts to work, it will keep a copy of $tail and push it forward +by as many bytes as needed to write all the messages it has to. In order to +guarantee that neither the previous nor the new $tail point to an outdated or +overwritten location but that there is always a tag there, $tail contains a +lock bit in its highest bit that will guarantee that only one at a time will +update it. The goal here is to perform as few atomic ops as possible in the +contended path so as to later amortize the costs and make sure to limit the +number of atomic ops on the wait path to the strict minimum so that waiting +threads do not hinder the workers: + + Fast path: + 1 load($tail) to check the topmost bit + 1 CAS($tail,$tail|BIT63) to set the bit (atomic_fetch_or / atomic_bts also work) + 1 store(1 byte tag=0xFF) at the beginning to mark the area busy + 1 store($tail) to update the new value + 1 copy of the whole message + 1 store(1 byte tag=0) at the beginning to release the message + + Contented path: + N load($tail) while waiting for the bit to be zero + M CAS($tail,$tail|BIT63) to try to set the bit on tail, competing with others + 1 store(1 byte tag=0xFF) at the beginning to mark the area busy + 1 store($tail) to update the new value + 1 copy of the whole message + 1 store(1 byte tag=0) at the beginning to release the message + +Queue +----- + +In order to limit the contention, writers will not start to write but will wait +in a queue, announcing their message pointers/lengths and total lengths. The +queue is made of a (ptr, len) pair that points to one such descriptor, located +in the waiter thread's stack, that itself points to the next pair. In fact +messages are ordered in a LIFO fashion but that isn't important since intra- +thread ordering is preserved (and in the worst case it will also be possible +to write them from end to beginning). + +The approach is the following: a writer loasd $tail and sees it's busy, there's +no point continuing, it will add itself to the queue, announcing (ptr, len + +next->len) so that by just reading the first entry, one knows the total size +of the queue. And it will wait there as long as $tail has its topmost bit set +and the queue points to itself (meaning it's the queue's leader), so that only +one thread in the queue watches $tail, limiting the number of cache line +bounces. If the queue doesn't point anymore to the current thread, it means +another thread has taken it over so there's no point continuing, this thread +just becomes passive. If the lock bit is dropped from $tail, the watching +thread needs to re-check that it's still the queue's leader before trying to +grab the lock, so that only the leading thread will attempt it. Indeed, a few +of the last leading threads might still be looping, unaware that they're no +longer leaders. A CAS(&queue, self, self) will do it. Upon failure, the thread +just becomes a passive thread. Upon success, the thread is a confirmed leader, +it must then try to grab the tail lock. Only this thread and a few potential +newcomers will compete on this one. If the leading thread wins, it brings all +the queue with it and the newcomers will queue again. If the leading thread +loses, it needs to loop back to the point above, watching $tail and the +queue. In this case a newcomer might have grabbed the lock. It will notice +the non-empty queue and will take it with it. Thus in both cases the winner +thread does a CAS(queue, queue, NULL) to reset the queue, keeping the previous +pointer. + +At this point the winner thread considers its own message size plus the +retrieved queue's size as the total required size and advances $tail by as +much, and will iterate over all messages to copy them in turn. The passive +threads are released by doing XCHG(&ptr->next, ptr) for each message, that +is normally impossible otherwise. As such, a passive thread just has to +loop over its own value, stored in its own stack, reading from its L1 cache +in loops without any risk of disturbing others, hence no need for EBO. + +During the time it took to update $tail, more messages will have been +accumulating in the queue from various other threads, and once $tail is +written, one thread can pick them up again. + +The benefit here is that the longer it takes one thread to free some space, +the more messages add up in the queue and the larger the next batch, so that +there are always very few contenders on the ring area and on the tail index. +At worst, the queue pointer is hammered but it's not on the fast path, since +wasting time here means all waiters will be queued. + +Also, if we keep the first tag unchanged after it's set to 0xFF, it allows to +avoid atomic ops inside all the message. Indeed there's no reader in the area +as long as the tag is 0xFF, so we can just write all contents at once including +the varints and subsequent message tags without ever using atomic ops, hence +not forcing ordered writes. So maybe in the end there is some value in writing +the messages backwards from end to beginning, and just writing the first tag +atomically but not the rest. + +The scenario would look like this: + + (without queue) + + - before starting to work: + do { + while (ret=(load(&tail) & BIT63)) + ; + } while (!cas(&tail, &ret, ret | BIT63)); + + - at this point, alone on it and guaranteed not to change + - after new size is calculated, write it and drop the lock: + + store(&tail, new_tail & ~BIT63); + + - that's sufficient to unlock other waiters. + + (with queue) + + in_queue = 0; + do { + ret = load(&tail); + if (ret & BIT63) { + if (!in_queue) { + queue_this_node(); + in_queue = 1; + } + while (ret & BIT63) + ; + } + } while (!cas(&tail, &ret, ret | BIT63)); + + dequeue(in_queue) etc. + + Fast path: + 1 load($tail) to check the topmost bit + 1 CAS($tail,$tail|BIT63) to set the bit (atomic_fetch_or / atomic_bts also work) + 1 load of the queue to see that it's empty + 1 store(1 byte tag=0xFF) at the beginning to mark the area busy + 1 store($tail) to update the new value + 1 copy of the whole message + 1 store(1 byte tag=0) at the beginning to release the message + + Contented path: + 1 load($tail) to see the tail is changing + M CAS(queue,queue,self) to try to add the thread to the queue (avgmax nbthr/2) + N load($tail) while waiting for the lock bit to become zero + 1 CAS(queue,self,self) to check the leader still is + M CAS($tail,$tail|BIT63) to try to set the bit on tail, competing with others + 1 CAS(queue,queue,NULL) to reset the queue + 1 store(1 byte tag=0xFF) at the beginning to mark the area busy + 1 store($tail) to update the new value + 1 copy of the whole message + P copies of individual messages + P stores of individual pointers to release writers + 1 store(1 byte tag=0) at the beginning to release the message + +Optimal approach (later if needed?): multiple queues. Each thread has one queue +assigned, either from a thread group, or using a modulo from the thread ID. +Same as above then. + + +Steps +----- + +It looks that the queue is what allows the process to scale by amortizing a +single lock for every N messages, but that it's not a prerequisite to start, +without a queue threads can just wait on $tail. + + +Options +------- + +It is possible to avoid the extra check on CAS(queue,self,self) by forcing +writers into the queue all the time. It would slow down the fast path but +may improve the slow path, both of which would become the same: + + Contented path: + 1 XCHG(queue,self) to try to add the thread to the queue + N load($tail) while waiting for the lock bit to become zero + M CAS($tail,$tail|BIT63) to try to set the bit on tail, competing with others + 1 CAS(queue,self,NULL) to reset the queue + 1 store(1 byte tag=0xFF) at the beginning to mark the area busy + 1 store($tail) to update the new value + 1 copy of the whole message + P copies of individual messages + P stores of individual pointers to release writers + 1 store(1 byte tag=0) at the beginning to release the message + +There seems to remain a race when resetting the queue, where a newcomer thread +would queue itself while not being the leader. It seems it can be addressed by +deciding that whoever gets the bit is not important, what matters is the thread +that manages to reset the queue. This can then be done using another XCHG: + + 1 XCHG(queue,self) to try to add the thread to the queue + N load($tail) while waiting for the lock bit to become zero + M CAS($tail,$tail|BIT63) to try to set the bit on tail, competing with others + 1 XCHG(queue,NULL) to reset the queue + 1 store(1 byte tag=0xFF) at the beginning to mark the area busy + 1 store($tail) to update the new value + 1 copy of the whole message + P copies of individual messages + P stores of individual pointers to release writers + 1 store(1 byte tag=0) at the beginning to release the message + +However this time this can cause fragmentation of multiple sub-queues that will +need to be reassembled. So finally the CAS is better, the leader thread should +recognize itself. + +It seems tricky to reliably store the next pointer in each element, and a DWCAS +wouldn't help here either. Maybe uninitialized elements should just have a +special value (eg 0x1) for their next pointer, meaning "not initialized yet", +and that the thread will then replace with the previous queue pointer. A reader +would have to wait on this value when meeting it, knowing the pointer is not +filled yet but is coming. diff --git a/doc/internals/api/buffer-api.txt b/doc/internals/api/buffer-api.txt index ac35300..1e09ff9 100644 --- a/doc/internals/api/buffer-api.txt +++ b/doc/internals/api/buffer-api.txt @@ -548,11 +548,15 @@ buffer_almost_full | const buffer *buf| returns true if the buffer is not null | | are used. A waiting buffer will match. --------------------+------------------+--------------------------------------- b_alloc | buffer *buf | ensures that <buf> is allocated or - | ret: buffer * | allocates a buffer and assigns it to - | | *buf. If no memory is available, (1) - | | is assigned instead with a zero size. + | enum dynbuf_crit | allocates a buffer and assigns it to + | criticality | *buf. If no memory is available, (1) + | ret: buffer * | is assigned instead with a zero size. | | The allocated buffer is returned, or - | | NULL in case no memory is available + | | NULL in case no memory is available. + | | The criticality indicates the how the + | | buffer might be used and how likely it + | | is that the allocated memory will be + | | quickly released. --------------------+------------------+--------------------------------------- __b_free | buffer *buf | releases <buf> which must be allocated | ret: void | and marks it empty diff --git a/doc/intro.txt b/doc/intro.txt index f4133a1..c3f6cda 100644 --- a/doc/intro.txt +++ b/doc/intro.txt @@ -1,7 +1,7 @@ ----------------------- HAProxy Starter Guide ----------------------- - version 2.9 + version 3.0 This document is an introduction to HAProxy for all those who don't know it, as diff --git a/doc/lua-api/index.rst b/doc/lua-api/index.rst index 17927f3..0d69a2f 100644 --- a/doc/lua-api/index.rst +++ b/doc/lua-api/index.rst @@ -348,33 +348,33 @@ Core class end .. -.. js:function:: core.add_acl(filename, key) +.. js:function:: core.add_acl(name, key) **context**: init, task, action, sample-fetch, converter - Add the ACL *key* in the ACLs list referenced by the file *filename*. + Add the ACL *key* in the ACLs list referenced by *name*. - :param string filename: the filename that reference the ACL entries. + :param string name: the name that reference the ACL entries. :param string key: the key which will be added. -.. js:function:: core.del_acl(filename, key) +.. js:function:: core.del_acl(name, key) **context**: init, task, action, sample-fetch, converter Delete the ACL entry referenced by the key *key* in the list of ACLs - referenced by *filename*. + referenced by *name*. - :param string filename: the filename that reference the ACL entries. + :param string name: the name that reference the ACL entries. :param string key: the key which will be deleted. -.. js:function:: core.del_map(filename, key) +.. js:function:: core.del_map(name, key) **context**: init, task, action, sample-fetch, converter Delete the map entry indexed with the specified key in the list of maps - referenced by his filename. + referenced by his name. - :param string filename: the filename that reference the map entries. + :param string name: the name that reference the map entries. :param string key: the key which will be deleted. .. js:function:: core.get_info() @@ -828,14 +828,14 @@ Core class :param integer nice: the nice value, it must be between -1024 and 1024. -.. js:function:: core.set_map(filename, key, value) +.. js:function:: core.set_map(name, key, value) **context**: init, task, action, sample-fetch, converter Set the value *value* associated to the key *key* in the map referenced by - *filename*. + *name*. - :param string filename: the Map reference + :param string name: the Map reference :param string key: the key to set or replace :param string value: the associated value @@ -2877,6 +2877,22 @@ TXN class :see: :js:func:`TXN.reply`, :js:class:`Reply` +.. js:function:: TXN.set_fc_tos(txn, tos) + + Is used to set the TOS or DSCP field value of packets sent to the client to + the value passed in "tos" on platforms which support this. + + :param class_txn txn: The class txn object containing the data. + :param integer tos: The new TOS os DSCP. + +.. js:function:: TXN.set_fc_mark(txn, mark) + + Is used to set the Netfilter MARK on all packets sent to the client to the + value passed in "mark" on platforms which support it. + + :param class_txn txn: The class txn object containing the data. + :param integer mark: The mark value. + .. js:function:: TXN.set_loglevel(txn, loglevel) Is used to change the log level of the current request. The "loglevel" must @@ -2888,21 +2904,21 @@ TXN class :js:attr:`core.crit`, :js:attr:`core.err`, :js:attr:`core.warning`, :js:attr:`core.notice`, :js:attr:`core.info`, :js:attr:`core.debug` (log level definitions) -.. js:function:: TXN.set_tos(txn, tos) +.. js:function:: TXN.set_mark(txn, mark) - Is used to set the TOS or DSCP field value of packets sent to the client to - the value passed in "tos" on platforms which support this. + Alias for :js:func:`TXN.set_fc_mark()`. - :param class_txn txn: The class txn object containing the data. - :param integer tos: The new TOS os DSCP. + .. warning:: + This function is deprecated. :js:func:`TXN.set_fc_mark()` must be used + instead. -.. js:function:: TXN.set_mark(txn, mark) +.. js:function:: TXN.set_tos(txn, tos) - Is used to set the Netfilter MARK on all packets sent to the client to the - value passed in "mark" on platforms which support it. + Alias for :js:func:`TXN.set_fc_tos()`. - :param class_txn txn: The class txn object containing the data. - :param integer mark: The mark value. + .. warning:: + This function is deprecated. :js:func:`TXN.set_fc_tos()` must be used + instead. .. js:function:: TXN.set_priority_class(txn, prio) @@ -3367,11 +3383,11 @@ Map class Note that :js:attr:`Map.reg` is also available for compatibility. -.. js:function:: Map.new(file, method) +.. js:function:: Map.new(name, method) Creates and load a map. - :param string file: Is the file containing the map. + :param string name: Is the name referencing the map. :param integer method: Is the map pattern matching method. See the attributes of the Map class. :returns: a class Map object. @@ -3913,7 +3929,7 @@ Filter class This class contains return codes some filter callback functions may return. It also contains configuration flags and some helper functions. To understand how - the filter API works, see `doc/internal/filters.txt` documentation. + the filter API works, see `doc/internals/api/filters.txt` documentation. .. js:attribute:: filter.CONTINUE diff --git a/doc/management.txt b/doc/management.txt index 9cbc772..d036018 100644 --- a/doc/management.txt +++ b/doc/management.txt @@ -1,7 +1,7 @@ ------------------------ HAProxy Management Guide ------------------------ - version 2.9 + version 3.0 This document describes how to start, stop, manage, and troubleshoot HAProxy, @@ -32,10 +32,12 @@ Summary 9.3. Unix Socket commands 9.4. Master CLI 9.4.1. Master CLI commands +9.5. Stats-file 10. Tricks for easier configuration management 11. Well-known traps to avoid 12. Debugging and performance issues 13. Security considerations +13.1. Linux capabilities support 1. Prerequisites @@ -49,7 +51,7 @@ familiar with troubleshooting utilities such as strace and tcpdump. 2. Quick reminder about HAProxy's architecture ---------------------------------------------- -HAProxy is a multi-threaded, event-driven, non-blocking daemon. This means is +HAProxy is a multi-threaded, event-driven, non-blocking daemon. This means it uses event multiplexing to schedule all of its activities instead of relying on the system to schedule between multiple activities. Most of the time it runs as a single process, so the output of "ps aux" on a system will report only one @@ -128,7 +130,7 @@ followed by one of more letters, and possibly followed by one or multiple extra arguments. Without any option, HAProxy displays the help page with a reminder about supported options. Available options may vary slightly based on the operating system. A fair number of these options overlap with an equivalent one -if the "global" section. In this case, the command line always has precedence +in the "global" section. In this case, the command line always has precedence over the configuration file, so that the command line can be used to quickly enforce some settings without touching the configuration files. The current list of options is : @@ -230,6 +232,11 @@ list of options is : getaddrinfo() exist on various systems and cause anomalies that are difficult to troubleshoot. + -dI : enable the insecure fork. This is the equivalent of the + "insecure-fork-wanted" in the global section. It can be useful when running + all the reg-tests with ASAN which need to fork addr2line to resolve the + addresses. + -dK<class[,class]*> : dumps the list of registered keywords in each class. The list of classes is available with "-dKhelp". All classes may be dumped using "-dKall", otherwise a selection of those shown in the help can be @@ -407,16 +414,20 @@ list of options is : detect protocol violations from clients or servers. An optional argument can be used to specify a list of various trace configurations using ',' as separator. Each element activates one or all trace sources. Additionally, - level and verbosity can be optionally specified on each element using ':' as - inner separator with trace name. - - -m <limit> : limit the total allocatable memory to <limit> megabytes across - all processes. This may cause some connection refusals or some slowdowns - depending on the amount of memory needed for normal operations. This is - mostly used to force the processes to work in a constrained resource usage - scenario. It is important to note that the memory is not shared between - processes, so in a multi-process scenario, this value is first divided by - global.nbproc before forking. + level and verbosity can be optionally specified on each element using ':' + as inner separator with trace name. When entering an invalid verbosity or + level name, the list of available keywords is presented. For example it can + be convenient to pass 'help' for each field to consult the list first. + + -m <limit> : limit allocatable memory, which is used to keep process's data, + to <limit> megabytes. This may cause some connection refusals or some + slowdowns depending on the amount of memory needed for normal operations. + This is mostly used to force haproxy process to work in a constrained + resource consumption scenario. It is important to note that the memory is + not shared between haproxy processes and a child process created via fork() + system call inherits its parent's resource limits. So, in a master-worker + mode this memory limit is separately applied to the master and its forked + worker process. -n <limit> : limits the per-process connection limit to <limit>. This is equivalent to the global section's keyword "maxconn". It has precedence @@ -450,7 +461,7 @@ list of options is : -st <pid>* : send the "terminate" signal (SIGTERM) to older processes after boot completion to terminate them immediately without finishing what they were doing. <pid> is a list of pids to signal (one per argument). The list - is ends on any option starting with a "-". It is not a problem if the list + ends on any option starting with a "-". It is not a problem if the list of pids is empty, so that it can be built on the fly based on the result of a command like "pidof" or "pgrep". @@ -462,11 +473,16 @@ list of options is : -x <unix_socket> : connect to the specified socket and try to retrieve any listening sockets from the old process, and use them instead of trying to bind new ones. This is useful to avoid missing any new connection when - reloading the configuration on Linux. The capability must be enable on the - stats socket using "expose-fd listeners" in your configuration. - In master-worker mode, the master will use this option upon a reload with - the "sockpair@" syntax, which allows the master to connect directly to a - worker without using stats socket declared in the configuration. + reloading the configuration on Linux. + + Without master-worker mode, the capability must be enable on the stats + socket using "expose-fd listeners" in your configuration. + + In master-worker mode, it does not need "expose-fd listeners", the master + will use automatically this option upon a reload with the "sockpair@" + syntax, which allows the master to connect directly to a worker without using + any stats socket declared in the configuration. If you want to disable this, + you can pass -x /dev/null. A safe way to start HAProxy from an init file consists in forcing the daemon mode, storing existing pids to a pid file and using this pid file to notify @@ -1553,7 +1569,7 @@ Limitations do exist: the length of the whole buffer passed to the CLI must not be greater than tune.bfsize and the pattern "<<" must not be glued to the last word of the line. -When entering a paylod while in interactive mode, the prompt will change from +When entering a payload while in interactive mode, the prompt will change from "> " to "+ ". It is important to understand that when multiple haproxy processes are started @@ -1586,7 +1602,7 @@ abort ssl crl-file <crlfile> See also "set ssl crl-file" and "commit ssl crl-file". add acl [@<ver>] <acl> <pattern> - Add an entry into the acl <acl>. <acl> is the #<id> or the <file> returned by + Add an entry into the acl <acl>. <acl> is the #<id> or the <name> returned by "show acl". This command does not verify if the entry already exists. Entries are added to the current version of the ACL, unless a specific version is specified with "@<ver>". This version number must have preliminary been @@ -1595,7 +1611,7 @@ add acl [@<ver>] <acl> <pattern> added with a specific version number will not match until a "commit acl" operation is performed on them. They may however be consulted using the "show acl @<ver>" command, and cleared using a "clear acl @<ver>" command. - This command cannot be used if the reference <acl> is a file also used with + This command cannot be used if the reference <acl> is a name also used with a map. In this case, the "add map" command must be used instead. add map [@<ver>] <map> <key> <value> @@ -1692,7 +1708,6 @@ add server <backend>/<server> [args]* - crt - disabled - downinter - - enabled - error-limit - fall - fastinter @@ -1788,15 +1803,15 @@ clear counters all and can only be issued on sockets configured for level "admin". clear acl [@<ver>] <acl> - Remove all entries from the acl <acl>. <acl> is the #<id> or the <file> - returned by "show acl". Note that if the reference <acl> is a file and is + Remove all entries from the acl <acl>. <acl> is the #<id> or the <name> + returned by "show acl". Note that if the reference <acl> is a name and is shared with a map, this map will be also cleared. By default only the current version of the ACL is cleared (the one being matched against). However it is possible to specify another version using '@' followed by this version. clear map [@<ver>] <map> - Remove all entries from the map <map>. <map> is the #<id> or the <file> - returned by "show map". Note that if the reference <map> is a file and is + Remove all entries from the map <map>. <map> is the #<id> or the <name> + returned by "show map". Note that if the reference <map> is a name and is shared with a acl, this acl will be also cleared. By default only the current version of the map is cleared (the one being matched against). However it is possible to specify another version using '@' followed by this version. @@ -1851,7 +1866,7 @@ clear table <table> [ data.<type> <operator> <value> ] | [ key <key> ] commit acl @<ver> <acl> Commit all changes made to version <ver> of ACL <acl>, and deletes all past - versions. <acl> is the #<id> or the <file> returned by "show acl". The + versions. <acl> is the #<id> or the <name> returned by "show acl". The version number must be between "curr_ver"+1 and "next_ver" as reported in "show acl". The contents to be committed to the ACL can be consulted with "show acl @<ver> <acl>" if desired. The specified version number has normally @@ -1861,12 +1876,12 @@ commit acl @<ver> <acl> and all entries in the new version to become visible. It is also possible to use this command to perform an atomic removal of all visible entries of an ACL by calling "prepare acl" first then committing without adding any - entries. This command cannot be used if the reference <acl> is a file also + entries. This command cannot be used if the reference <acl> is a name also used as a map. In this case, the "commit map" command must be used instead. commit map @<ver> <map> Commit all changes made to version <ver> of map <map>, and deletes all past - versions. <map> is the #<id> or the <file> returned by "show map". The + versions. <map> is the #<id> or the <name> returned by "show map". The version number must be between "curr_ver"+1 and "next_ver" as reported in "show map". The contents to be committed to the map can be consulted with "show map @<ver> <map>" if desired. The specified version number has normally @@ -1903,7 +1918,7 @@ commit ssl cert <filename> Commit a temporary SSL certificate update transaction. In the case of an existing certificate (in a "Used" state in "show ssl - cert"), generate every SSL contextes and SNIs it need, insert them, and + cert"), generate every SSL contexts and SNIs it needs, insert them, and remove the previous ones. Replace in memory the previous SSL certificates everywhere the <filename> was used in the configuration. Upon failure it doesn't remove or insert anything. Once the temporary transaction is @@ -1952,16 +1967,16 @@ debug dev <command> [args]* del acl <acl> [<key>|#<ref>] Delete all the acl entries from the acl <acl> corresponding to the key <key>. - <acl> is the #<id> or the <file> returned by "show acl". If the <ref> is used, + <acl> is the #<id> or the <name> returned by "show acl". If the <ref> is used, this command delete only the listed reference. The reference can be found with - listing the content of the acl. Note that if the reference <acl> is a file and + listing the content of the acl. Note that if the reference <acl> is a name and is shared with a map, the entry will be also deleted in the map. del map <map> [<key>|#<ref>] Delete all the map entries from the map <map> corresponding to the key <key>. - <map> is the #<id> or the <file> returned by "show map". If the <ref> is used, + <map> is the #<id> or the <name> returned by "show map". If the <ref> is used, this command delete only the listed reference. The reference can be found with - listing the content of the map. Note that if the reference <map> is a file and + listing the content of the map. Note that if the reference <map> is a name and is shared with a acl, the entry will be also deleted in the map. del ssl ca-file <cafile> @@ -1992,7 +2007,7 @@ del server <backend>/<server> Remove a server attached to the backend <backend>. All servers are eligible, except servers which are referenced by other configuration elements. The server must be put in maintenance mode prior to its deletion. The operation - is cancelled if the serveur still has active or idle connection or its + is cancelled if the server still has active or idle connection or its connection queue is not empty. disable agent <backend>/<server> @@ -2060,6 +2075,10 @@ disable server <backend>/<server> This command is restricted and can only be issued on sockets configured for level "admin". +dump stats-file + Generate a stats-file which can be used to preload haproxy counters values on + startup. See "Stats-file" section for more detail. + enable agent <backend>/<server> Resume auxiliary agent check that was temporarily stopped. @@ -2142,7 +2161,7 @@ expert-mode [on|off] get map <map> <value> get acl <acl> <value> Lookup the value <value> in the map <map> or in the ACL <acl>. <map> or <acl> - are the #<id> or the <file> returned by "show map" or "show acl". This command + are the #<id> or the <name> returned by "show map" or "show acl". This command returns all the matching patterns associated with this map. This is useful for debugging maps and ACLs. The output format is composed by one line par matching type. Each line is composed by space-delimited series of words. @@ -2219,7 +2238,7 @@ new ssl crl-file <crlfile> prepare acl <acl> Allocate a new version number in ACL <acl> for atomic replacement. <acl> is - the #<id> or the <file> returned by "show acl". The new version number is + the #<id> or the <name> returned by "show acl". The new version number is shown in response after "New version created:". This number will then be usable to prepare additions of new entries into the ACL which will then atomically replace the current ones once committed. It is reported as @@ -2227,12 +2246,12 @@ prepare acl <acl> unused versions will automatically be removed once a more recent version is committed. Version numbers are unsigned 32-bit values which wrap at the end, so care must be taken when comparing them in an external program. This - command cannot be used if the reference <acl> is a file also used as a map. + command cannot be used if the reference <acl> is a name also used as a map. In this case, the "prepare map" command must be used instead. prepare map <map> Allocate a new version number in map <map> for atomic replacement. <map> is - the #<id> or the <file> returned by "show map". The new version number is + the #<id> or the <name> returned by "show map". The new version number is shown in response after "New version created:". This number will then be usable to prepare additions of new entries into the map which will then atomically replace the current ones once committed. It is reported as @@ -2281,7 +2300,7 @@ set anon global-key <key> set map <map> [<key>|#<ref>] <value> Modify the value corresponding to each key <key> in a map <map>. <map> is the - #<id> or <file> returned by "show map". If the <ref> is used in place of + #<id> or <name> returned by "show map". If the <ref> is used in place of <key>, only the entry pointed by <ref> is changed. The new value is <value>. set maxconn frontend <frontend> <value> @@ -2547,7 +2566,7 @@ set weight <backend>/<server> <weight>[%] show acl [[@<ver>] <acl>] Dump info about acl converters. Without argument, the list of all available acls is returned. If a <acl> is specified, its contents are dumped. <acl> is - the #<id> or <file>. By default the current version of the ACL is shown (the + the #<id> or <name>. By default the current version of the ACL is shown (the version currently being matched against and reported as 'curr_ver' in the ACL list). It is possible to instead dump other versions by prepending '@<ver>' before the ACL's identifier. The version works as a filter and non-existing @@ -2930,7 +2949,7 @@ show libs show map [[@<ver>] <map>] Dump info about map converters. Without argument, the list of all available maps is returned. If a <map> is specified, its contents are dumped. <map> is - the #<id> or <file>. By default the current version of the map is shown (the + the #<id> or <name>. By default the current version of the map is shown (the version currently being matched against and reported as 'curr_ver' in the map list). It is possible to instead dump other versions by prepending '@<ver>' before the map's identifier. The version works as a filter and non-existing @@ -3068,14 +3087,22 @@ show resolvers [<resolvers section id>] too_big: too big response outdated: number of response arrived too late (after another name server) -show quic [oneline|full] [all] +show quic [<format>] [<filter>] Dump information on all active QUIC frontend connections. This command is restricted and can only be issued on sockets configured for levels "operator" - or "admin". An optional format can be specified as first argument to control - the verbosity. Currently supported values are "oneline" which is the default - if format is unspecified or "full". By default, connections on closing or - draining state are not displayed. Use the extra argument "all" to include - them in the output. + or "admin". + + An optional argument can be specified to control the verbosity. Its value can + be interpreted in different way. The first possibility is to used predefined + values, "oneline" for the default format and "full" to display all + information. Alternatively, a list of comma-delimited fields can be specified + to restrict output. Currently supported values are "tp", "sock", "pktns", + "cc" and "mux". + + The final argument is used to restrict or extend the connection list. By + default, connections on closing or draining state are not displayed. Use the + extra argument "all" to include them in the output. It's also possible to + restrict to a single connection by specifying its hexadecimal address. show servers conn [<backend>] Dump the current and idle connections state of the servers belonging to the @@ -3998,6 +4025,37 @@ update ssl ocsp-response <certfile> local tree, its contents will be displayed on the standard output. The format is the same as the one described in "show ssl ocsp-response". +wait { -h | <delay> } [<condition> [<args>...]] + In its simplest form without any condition, this simply waits for the + requested delay before continuing. This can be used to collect metrics around + a specific interval. + + With a condition and optional arguments, the command will wait for the + specified condition to be satisfied, to unrecoverably fail, or to remain + unsatisfied for the whole <delay> duration. The supported conditions are: + + - srv-removable <proxy>/<server> : this will wait for the specified server to + be removable, i.e. be in maintenance and no longer have any connection on + it. Some conditions will never be accepted (e.g. not in maintenance) and + will cause the report of a specific error message indicating what condition + is not met. The server might even have been removed in parallel and no + longer exit. If everything is OK before the delay, a success is returned + and the operation is terminated. + + The default unit for the delay is milliseconds, though other units are + accepted if suffixed with the usual timer units (us, ms, s, m, h, d). When + used with the 'socat' utility, do not forget to extend socat's close timeout + to cover the wait time. Passing "-h" as the first or second argument provides + the command's usage. + Example: + $ socat -t20 /path/to/socket - <<< "show activity; wait 10s; show activity" + + $ socat -t5 /path/to/socket - <<< " + disable server px/srv1 + shutdown sessions server px/srv1 + wait 2s srv-removable px/srv1 + del server px/srv1" + 9.4. Master CLI --------------- @@ -4122,7 +4180,7 @@ reload return a reload status, once the reload was performed. Be careful with the timeout if a tool is used to parse it, it is only returned once the configuration is parsed and the new worker is forked. The "socat" command uses - a timeout of 0.5s by default so it will quits before showing the message if + a timeout of 0.5s by default so it will quit before showing the message if the reload is too long. "ncat" does not have a timeout by default. When compiled with USE_SHM_OPEN=1, the reload command is also able to dump the startup-logs of the master. @@ -4189,6 +4247,29 @@ show startup-logs Those messages are also dumped with the "reload" command. + +9.5. Stats-file +-------------- + +A so-called stats-file can be used to preload internal haproxy counters on +process startup with non-null values. Its main purpose is to preserve +statistics for worker processes across reloads. Only an excerpt of all the +exposed haproxy statistics is present in a stats-file as it only makes sense to +preload metric-type values. + +For the moment, only proxy counters are supported in stats-file. This allows to +preload values for frontends, backends, servers and listeners. However only +objects instances with a non-empty GUID are stored in a stats-file. This +guarantees that value will be preloaded for object with matching type and GUID, +even if other parameters differ. + +The CLI command "dump stats-file" purpose is to generate a stats-file. Format +of the stats-file is internally defined and freely subject to future changes +and extension. It is designed to be compatible at least across adjacent +haproxy stable branch releases, but may require optional extra configuration +when loading a stats-file to a process running on an older version. + + 10. Tricks for easier configuration management ---------------------------------------------- @@ -4208,7 +4289,7 @@ using regular expressions involving the dollar symbol). Environment variables also make it convenient to write configurations which are expected to work on various sites where only the address changes. It can also -permit to remove passwords from some configs. Example below where the the file +permit to remove passwords from some configs. Example below where the file "site1.env" file is sourced by the init script upon startup : $ cat site1.env @@ -4520,3 +4601,73 @@ A safe configuration will have : stats socket /var/run/haproxy.stat uid hatop gid hatop mode 600 +13.1. Linux capabilities support +------------------------------ + +Since version v2.9 haproxy supports Linux capabilities. If the binary is +compiled with USE_LINUX_CAP=1, it is able to preserve capabilities given in +'setcap' keyword during switching from root user to a non-root. + +Since version v3.1 haproxy also checks if capabilities given in 'setcap' +keyword were set in its binary file Permitted set by administrator +(capget syscall). If this a case it performs transition of these capabilities +in its process Effective set (capset syscall), while running as a non-root +user. + +This was done to avoid all potential use cases when haproxy starts and runs as +root: transparent proxy mode, binding to privileged ports. + +'setcap' keyword supports following network capabilities: +- cap_net_admin: transparent proxying, binding socket to a specific network + interface, using set-mark action; +- cap_net_raw (subset of cap_net_admin): transparent proxying; +- cap_net_bind_service: binding socket to a specific network interface; +- cap_sys_admin: creating socket in a specific network namespace. + +Haproxy never does the transition of these capabilities from its Permitted set +to the Effective, if they are not listed as 'setcap' argument. See more +information about 'setcap' keyword and supported capabilities in the chapter +3.1 Process management and security in the Configuration guide. + +Administrator may add needed capabilities in the haproxy binary file Permitted +set with the following command: + +Example: + # setcap cap_net_admin,cap_net_bind_service=p /usr/local/sbin/haproxy + +Added capabilities will be seen in process Permitted set after its start. +If the same capabilities are the arguments of 'setcap' keyword, they could be +also seen in the process Effective set. This could be check with the following +command: + +Example: + # grep Cap /proc/<haproxy PID>/status + CapInh: 0000000000000000 + CapPrm: 0000000000001400 + CapEff: 0000000000001400 + CapBnd: 000001ffffffffff + CapAmb: 0000000000000000 + +See more details about setcap and capabilities sets in Linux man pages +(capabilities(7)). + +In some use cases like transparent proxying or creating socket in a specific +network namespace, configuration file parser detects that cap_net_raw or +cap_sys_admin or some other supported capabilities are needed. Then, during +the initialization stage, haproxy process checks, if these capabilities could +be put in its Effective set. If it's not possible due to capget or capset +syscall failure (restrictions set on syscalls by some security modules like +SELinux, Seccomp, etc), process emits diagnostic warnings (start with -dD). + +Due to support of many different platforms with different system settings, +it's impossible for the parser to deduce from the configuration file, if +binding to privileged ports will be done. So, in the case of insufficient +privileges (run as non-root) process will terminate only with an alert +message like below. It's up to a user to recheck its configuration and haproxy +binary capabilities set. + +Example: + $ haproxy -dD -f haproxy.cfg + ... + [ALERT] (96797) : Binding [haproxy.cfg:36] for frontend fe: cannot bind socket (Permission denied) for [0.0.0.0:80] + [ALERT] (96797) : [haproxy.main()] Some protocols failed to start their listeners! Exiting. diff --git a/doc/peers-v2.0.txt b/doc/peers-v2.0.txt index 711c949..3b82369 100644 --- a/doc/peers-v2.0.txt +++ b/doc/peers-v2.0.txt @@ -227,6 +227,8 @@ bit 22: gpt array 23: gpc array 24: gpc rate array + 25: glitch counter + 26: glitch rate d) Table Switch Message diff --git a/include/haproxy/action-t.h b/include/haproxy/action-t.h index f77bdce..eee16a3 100644 --- a/include/haproxy/action-t.h +++ b/include/haproxy/action-t.h @@ -25,6 +25,7 @@ #include <haproxy/applet-t.h> #include <haproxy/stick_table-t.h> #include <haproxy/vars-t.h> +#include <haproxy/log-t.h> struct session; struct stream; @@ -141,15 +142,15 @@ struct act_rule { struct { int i; /* integer param (status, nice, loglevel, ..) */ struct ist str; /* string param (reason, header name, ...) */ - struct list fmt; /* log-format compatible expression */ + struct lf_expr fmt; /* log-format compatible expression */ struct my_regex *re; /* used by replace-header/value/uri/path */ } http; /* args used by some HTTP rules */ struct http_reply *http_reply; /* HTTP response to be used by return/deny/tarpit rules */ struct redirect_rule *redir; /* redirect rule or "http-request redirect" */ struct { char *ref; /* MAP or ACL file name to update */ - struct list key; /* pattern to retrieve MAP or ACL key */ - struct list value; /* pattern to retrieve MAP value */ + struct lf_expr key; /* pattern to retrieve MAP or ACL key */ + struct lf_expr value; /* pattern to retrieve MAP value */ } map; struct sample_expr *expr; struct { @@ -167,7 +168,7 @@ struct act_rule { } timeout; struct hlua_rule *hlua_rule; struct { - struct list fmt; /* log-format compatible expression */ + struct lf_expr fmt; /* log-format compatible expression */ struct sample_expr *expr; uint64_t name_hash; enum vars_scope scope; @@ -192,6 +193,10 @@ struct act_rule { struct sample_expr *name; /* used to differentiate idle connections */ } attach_srv; /* 'attach-srv' rule */ struct { + int value; + struct sample_expr *expr; + } expr_int; /* expr or int value (when expr is NULL)*/ + struct { void *p[4]; } act; /* generic pointers to be used by custom actions */ } arg; /* arguments used by some actions */ diff --git a/include/haproxy/applet-t.h b/include/haproxy/applet-t.h index bd96403..a305da6 100644 --- a/include/haproxy/applet-t.h +++ b/include/haproxy/applet-t.h @@ -27,13 +27,29 @@ #include <haproxy/dynbuf-t.h> #include <haproxy/freq_ctr-t.h> #include <haproxy/obj_type-t.h> +#include <haproxy/task-t.h> #include <haproxy/xref-t.h> /* flags for appctx->state */ -#define APPLET_WANT_DIE 0x01 /* applet was running and requested to die */ /* Room for per-command context (mostly CLI commands but not only) */ -#define APPLET_MAX_SVCCTX 88 +#define APPLET_MAX_SVCCTX 128 + +/* Appctx Flags */ +#define APPCTX_FL_INBLK_ALLOC 0x00000001 +#define APPCTX_FL_INBLK_FULL 0x00000002 +#define APPCTX_FL_OUTBLK_ALLOC 0x00000004 +#define APPCTX_FL_OUTBLK_FULL 0x00000008 +#define APPCTX_FL_EOI 0x00000010 +#define APPCTX_FL_EOS 0x00000020 +#define APPCTX_FL_ERR_PENDING 0x00000040 +#define APPCTX_FL_ERROR 0x00000080 +#define APPCTX_FL_SHUTDOWN 0x00000100 /* applet was shut down (->release() called if any). No more data exchange with SCs */ +#define APPCTX_FL_WANT_DIE 0x00000200 /* applet was running and requested to die */ +#define APPCTX_FL_INOUT_BUFS 0x00000400 /* applet uses its own buffers */ +#define APPCTX_FL_FASTFWD 0x00000800 /* zero-copy forwarding is in-use, don't fill the outbuf */ +#define APPCTX_FL_IN_MAYALLOC 0x00001000 /* applet may try again to allocate its inbuf */ +#define APPCTX_FL_OUT_MAYALLOC 0x00002000 /* applet may try again to allocate its outbuf */ struct appctx; struct proxy; @@ -49,6 +65,9 @@ struct applet { int (*init)(struct appctx *); /* callback to init resources, may be NULL. expect 0 if ok, -1 if an error occurs. */ void (*fct)(struct appctx *); /* internal I/O handler, may never be NULL */ + size_t (*rcv_buf)(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags); /* called from the upper layer to get data */ + size_t (*snd_buf)(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags); /* Called from the upper layet to put data */ + size_t (*fastfwd)(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags); /* Callback to fast-forward data */ void (*release)(struct appctx *); /* callback to release resources, may be NULL */ unsigned int timeout; /* execution timeout. */ }; @@ -57,9 +76,14 @@ struct applet { struct appctx { enum obj_type obj_type; /* OBJ_TYPE_APPCTX */ /* 3 unused bytes here */ - unsigned short state; /* Internal appctx state */ unsigned int st0; /* CLI state for stats, session state for peers */ unsigned int st1; /* prompt/payload (bitwise OR of APPCTX_CLI_ST1_*) for stats, session error for peers */ + + unsigned int flags; /* APPCTX_FL_* */ + struct buffer inbuf; + struct buffer outbuf; + size_t to_forward; + struct buffer *chunk; /* used to store unfinished commands */ struct applet *applet; /* applet this context refers to */ struct session *sess; /* session for frontend applets (NULL for backend applets) */ @@ -75,7 +99,7 @@ struct appctx { struct buffer_wait buffer_wait; /* position in the list of objects waiting for a buffer */ struct task *t; /* task associated to the applet */ struct freq_ctr call_rate; /* appctx call rate */ - struct list wait_entry; /* entry in a list of waiters for an event (e.g. ring events) */ + struct mt_list wait_entry; /* entry in a list of waiters for an event (e.g. ring events) */ /* The pointer seen by application code is appctx->svcctx. In 2.7 the * anonymous union and the "ctx" struct disappeared, and the struct diff --git a/include/haproxy/applet.h b/include/haproxy/applet.h index b04ffd9..1c9721d 100644 --- a/include/haproxy/applet.h +++ b/include/haproxy/applet.h @@ -38,6 +38,7 @@ extern unsigned int nb_applets; extern struct pool_head *pool_head_appctx; struct task *task_run_applet(struct task *t, void *context, unsigned int state); +struct task *task_process_applet(struct task *t, void *context, unsigned int state); int appctx_buf_available(void *arg); void *applet_reserve_svcctx(struct appctx *appctx, size_t size); void applet_reset_svcctx(struct appctx *appctx); @@ -48,6 +49,19 @@ int appctx_finalize_startup(struct appctx *appctx, struct proxy *px, struct buff void appctx_free_on_early_error(struct appctx *appctx); void appctx_free(struct appctx *appctx); +size_t appctx_htx_rcv_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags); +size_t appctx_raw_rcv_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags); +size_t appctx_rcv_buf(struct stconn *sc, struct buffer *buf, size_t count, unsigned int flags); + +size_t appctx_htx_snd_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags); +size_t appctx_raw_snd_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags); +size_t appctx_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, unsigned int flags); + +int appctx_fastfwd(struct stconn *sc, unsigned int count, unsigned int flags); +ssize_t applet_append_line(void *ctx, struct ist v1, struct ist v2, size_t ofs, size_t len); +static forceinline void applet_fl_set(struct appctx *appctx, uint on); +static forceinline void applet_fl_clr(struct appctx *appctx, uint off); + static inline struct appctx *appctx_new_here(struct applet *applet, struct sedesc *sedesc) { return appctx_new_on(applet, sedesc, tid); @@ -58,6 +72,41 @@ static inline struct appctx *appctx_new_anywhere(struct applet *applet, struct s return appctx_new_on(applet, sedesc, -1); } + +/* + * Release a buffer, if any, and try to wake up entities waiting in the buffer + * wait queue. + */ +static inline void appctx_release_buf(struct appctx *appctx, struct buffer *bptr) +{ + if (bptr->size) { + b_free(bptr); + offer_buffers(appctx->buffer_wait.target, 1); + } +} + +/* + * Allocate a buffer. If if fails, it adds the appctx in buffer wait queue and + * sets the relevant blocking flag depending on the side (assuming that bptr is + * either &appctx->inbuf or &appctx->outbuf). Upon success it will also clear + * the equivalent MAYALLOC flags. + */ +static inline struct buffer *appctx_get_buf(struct appctx *appctx, struct buffer *bptr) +{ + struct buffer *buf = NULL; + int is_inbuf = (bptr == &appctx->inbuf); + + if (likely(!LIST_INLIST(&appctx->buffer_wait.list))) { + if (unlikely((buf = b_alloc(bptr, is_inbuf ? DB_MUX_TX : DB_SE_RX)) == NULL)) { + b_queue(is_inbuf ? DB_MUX_TX : DB_SE_RX, &appctx->buffer_wait, appctx, appctx_buf_available); + applet_fl_set(appctx, is_inbuf ? APPCTX_FL_INBLK_ALLOC : APPCTX_FL_OUTBLK_ALLOC); + } else { + applet_fl_clr(appctx, is_inbuf ? APPCTX_FL_IN_MAYALLOC : APPCTX_FL_OUT_MAYALLOC); + } + } + return buf; +} + /* Helper function to call .init applet callback function, if it exists. Returns 0 * on success and -1 on error. */ @@ -78,9 +127,11 @@ static inline int appctx_init(struct appctx *appctx) /* Releases an appctx previously allocated by appctx_new(). */ static inline void __appctx_free(struct appctx *appctx) { + appctx_release_buf(appctx, &appctx->inbuf); + appctx_release_buf(appctx, &appctx->outbuf); + task_destroy(appctx->t); - if (LIST_INLIST(&appctx->buffer_wait.list)) - LIST_DEL_INIT(&appctx->buffer_wait.list); + b_dequeue(&appctx->buffer_wait); if (appctx->sess) session_free(appctx->sess); BUG_ON(appctx->sedesc && !se_fl_test(appctx->sedesc, SE_FL_ORPHAN)); @@ -109,6 +160,67 @@ static inline struct stream *appctx_strm(const struct appctx *appctx) return __sc_strm(appctx->sedesc->sc); } +/* returns 1 if the appctx is attached on the backend side or 0 if it is + * attached on the frontend side. Note that only frontend appctx may have no SC. + */ +static inline int appctx_is_back(const struct appctx *appctx) +{ + struct stconn *sc = appctx_sc(appctx); + + return !!(sc && (sc->flags & SC_FL_ISBACK)); +} + +static forceinline void applet_fl_zero(struct appctx *appctx) +{ + appctx->flags = 0; +} + +static forceinline void applet_fl_setall(struct appctx *appctx, uint all) +{ + appctx->flags = all; +} + +static forceinline void applet_fl_set(struct appctx *appctx, uint on) +{ + if (((on & (APPCTX_FL_EOS|APPCTX_FL_EOI)) && appctx->flags & APPCTX_FL_ERR_PENDING) || + ((on & APPCTX_FL_ERR_PENDING) && appctx->flags & (APPCTX_FL_EOI|APPCTX_FL_EOS))) + on |= APPCTX_FL_ERROR; + appctx->flags |= on; +} + +static forceinline void applet_fl_clr(struct appctx *appctx, uint off) +{ + appctx->flags &= ~off; +} + +static forceinline uint applet_fl_test(const struct appctx *appctx, uint test) +{ + return !!(appctx->flags & test); +} + +static forceinline uint applet_fl_get(const struct appctx *appctx) +{ + return appctx->flags; +} + +static inline void applet_set_eoi(struct appctx *appctx) +{ + applet_fl_set(appctx, APPCTX_FL_EOI); +} + +static inline void applet_set_eos(struct appctx *appctx) +{ + applet_fl_set(appctx, APPCTX_FL_EOS); +} + +static inline void applet_set_error(struct appctx *appctx) +{ + if (applet_fl_test(appctx, (APPCTX_FL_EOS|APPCTX_FL_EOI))) + applet_fl_set(appctx, APPCTX_FL_ERROR); + else + applet_fl_set(appctx, APPCTX_FL_ERR_PENDING); +} + /* The applet announces it has more data to deliver to the stream's input * buffer. */ @@ -173,20 +285,32 @@ static inline void applet_expect_data(struct appctx *appctx) */ static inline int applet_putchk(struct appctx *appctx, struct buffer *chunk) { - struct sedesc *se = appctx->sedesc; int ret; - ret = ci_putchk(sc_ic(se->sc), chunk); - if (ret < 0) { - /* XXX: Handle all errors as a lack of space because callers - * don't handles other cases for now. So applets must be - * careful to handles shutdown (-2) and invalid calls (-3) by - * themselves. - */ - sc_need_room(se->sc, chunk->data); - ret = -1; + if (appctx->flags & APPCTX_FL_INOUT_BUFS) { + if (b_data(chunk) > b_room(&appctx->outbuf)) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); + ret = -1; + } + else { + ret = b_putblk(&appctx->outbuf, b_head(chunk), b_data(chunk)); + chunk->data -= ret; + } + } + else { + struct sedesc *se = appctx->sedesc; + + ret = ci_putchk(sc_ic(se->sc), chunk); + if (ret < 0) { + /* XXX: Handle all errors as a lack of space because callers + * don't handles other cases for now. So applets must be + * careful to handles shutdown (-2) and invalid calls (-3) by + * themselves. + */ + sc_need_room(se->sc, chunk->data); + ret = -1; + } } - return ret; } @@ -196,18 +320,29 @@ static inline int applet_putchk(struct appctx *appctx, struct buffer *chunk) */ static inline int applet_putblk(struct appctx *appctx, const char *blk, int len) { - struct sedesc *se = appctx->sedesc; int ret; - ret = ci_putblk(sc_ic(se->sc), blk, len); - if (ret < -1) { - /* XXX: Handle all errors as a lack of space because callers - * don't handles other cases for now. So applets must be - * careful to handles shutdown (-2) and invalid calls (-3) by - * themselves. - */ - sc_need_room(se->sc, len); - ret = -1; + if (appctx->flags & APPCTX_FL_INOUT_BUFS) { + if (len > b_room(&appctx->outbuf)) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); + ret = -1; + } + else + ret = b_putblk(&appctx->outbuf, blk, len); + } + else { + struct sedesc *se = appctx->sedesc; + + ret = ci_putblk(sc_ic(se->sc), blk, len); + if (ret < 0) { + /* XXX: Handle all errors as a lack of space because callers + * don't handles other cases for now. So applets must be + * careful to handles shutdown (-2) and invalid calls (-3) by + * themselves. + */ + sc_need_room(se->sc, len); + ret = -1; + } } return ret; @@ -220,20 +355,32 @@ static inline int applet_putblk(struct appctx *appctx, const char *blk, int len) */ static inline int applet_putstr(struct appctx *appctx, const char *str) { - struct sedesc *se = appctx->sedesc; int ret; - ret = ci_putstr(sc_ic(se->sc), str); - if (ret == -1) { - /* XXX: Handle all errors as a lack of space because callers - * don't handles other cases for now. So applets must be - * careful to handles shutdown (-2) and invalid calls (-3) by - * themselves. - */ - sc_need_room(se->sc, strlen(str)); - ret = -1; - } + if (appctx->flags & APPCTX_FL_INOUT_BUFS) { + int len = strlen(str); + if (len > b_room(&appctx->outbuf)) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); + ret = -1; + } + else + ret = b_putblk(&appctx->outbuf, str, len); + } + else { + struct sedesc *se = appctx->sedesc; + + ret = ci_putstr(sc_ic(se->sc), str); + if (ret < 0) { + /* XXX: Handle all errors as a lack of space because callers + * don't handles other cases for now. So applets must be + * careful to handles shutdown (-2) and invalid calls (-3) by + * themselves. + */ + sc_need_room(se->sc, strlen(str)); + ret = -1; + } + } return ret; } @@ -243,20 +390,32 @@ static inline int applet_putstr(struct appctx *appctx, const char *str) */ static inline int applet_putchr(struct appctx *appctx, char chr) { - struct sedesc *se = appctx->sedesc; int ret; - ret = ci_putchr(sc_ic(se->sc), chr); - if (ret == -1) { - /* XXX: Handle all errors as a lack of space because callers - * don't handles other cases for now. So applets must be - * careful to handles shutdown (-2) and invalid calls (-3) by - * themselves. - */ - sc_need_room(se->sc, 1); - ret = -1; + if (appctx->flags & APPCTX_FL_INOUT_BUFS) { + if (b_full(&appctx->outbuf)) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); + ret = -1; + } + else { + b_putchr(&appctx->outbuf, chr); + ret = 1; + } + } + else { + struct sedesc *se = appctx->sedesc; + + ret = ci_putchr(sc_ic(se->sc), chr); + if (ret < 0) { + /* XXX: Handle all errors as a lack of space because callers + * don't handles other cases for now. So applets must be + * careful to handles shutdown (-2) and invalid calls (-3) by + * themselves. + */ + sc_need_room(se->sc, 1); + ret = -1; + } } - return ret; } diff --git a/include/haproxy/atomic.h b/include/haproxy/atomic.h index d64e192..146f2ad 100644 --- a/include/haproxy/atomic.h +++ b/include/haproxy/atomic.h @@ -185,6 +185,7 @@ #define __ha_barrier_full() do { } while (0) #define __ha_compiler_barrier() do { } while (0) #define __ha_cpu_relax() ({ 1; }) +#define __ha_cpu_relax_for_read() ({ 1; }) #else /* !USE_THREAD */ @@ -198,10 +199,11 @@ #define HA_ATOMIC_LOAD(val) \ ({ \ - typeof(*(val)) ret = \ - ({ __sync_synchronize(); *(volatile typeof(val))val; }); \ + typeof((val)) __val_load = (val); \ + typeof(*(val)) __ret_val = \ + ({ __sync_synchronize(); *(volatile typeof(__val_load))__val_load; }); \ __sync_synchronize(); \ - ret; \ + __ret_val; \ }) #define HA_ATOMIC_STORE(val, new) \ @@ -586,6 +588,9 @@ __ha_cas_dw(void *target, void *compare, const void *set) /* short-lived CPU relaxation */ #define __ha_cpu_relax() ({ asm volatile("rep;nop\n"); 1; }) +/* dummy relaxation: x86 prefers not to wait at all in read loops */ +#define __ha_cpu_relax_for_read() ({ 1; }) + #elif defined(__arm__) && (defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)) static __inline void @@ -651,6 +656,9 @@ static __inline int __ha_cas_dw(void *target, void *compare, const void *set) /* short-lived CPU relaxation */ #define __ha_cpu_relax() ({ asm volatile(""); 1; }) +/* short wait in read loops */ +#define __ha_cpu_relax_for_read() ({ asm volatile(""); 1; }) + #elif defined (__aarch64__) static __inline void @@ -697,6 +705,9 @@ __ha_barrier_atomic_full(void) */ #define __ha_cpu_relax() ({ asm volatile("isb" ::: "memory"); 1; }) +/* aarch64 prefers to wait for real in read loops */ +#define __ha_cpu_relax_for_read() ({ asm volatile("isb" ::: "memory"); 1; }) + #if defined(__ARM_FEATURE_ATOMICS) && !defined(__clang__) // ARMv8.1-A atomics /* returns 0 on failure, non-zero on success */ @@ -799,6 +810,9 @@ static __inline int __ha_cas_dw(void *target, void *compare, void *set) /* short-lived CPU relaxation */ #define __ha_cpu_relax() ({ asm volatile(""); 1; }) +/* default wait in read loops */ +#define __ha_cpu_relax_for_read() ({ asm volatile(""); 1; }) + #endif /* end of arch-specific barrier/dwcas */ static inline void __ha_compiler_barrier(void) diff --git a/include/haproxy/backend-t.h b/include/haproxy/backend-t.h index 02a2cc5..bc21fd1 100644 --- a/include/haproxy/backend-t.h +++ b/include/haproxy/backend-t.h @@ -28,6 +28,7 @@ #include <haproxy/lb_fwlc-t.h> #include <haproxy/lb_fwrr-t.h> #include <haproxy/lb_map-t.h> +#include <haproxy/lb_ss-t.h> #include <haproxy/server-t.h> #include <haproxy/thread-t.h> @@ -58,6 +59,9 @@ #define BE_LB_CB_LC 0x00000000 /* least-connections */ #define BE_LB_CB_FAS 0x00000001 /* first available server (opposite of leastconn) */ +/* BE_LB_SA_* is used with BE_LB_KIND_SA */ +#define BE_LB_SA_SS 0x00000000 /* stick to server as long as it is available */ + #define BE_LB_PARM 0x000000FF /* mask to get/clear the LB param */ /* Required input(s) */ @@ -73,6 +77,7 @@ #define BE_LB_KIND_RR 0x00010000 /* round-robin */ #define BE_LB_KIND_CB 0x00020000 /* connection-based */ #define BE_LB_KIND_HI 0x00030000 /* hash of input (see hash inputs above) */ +#define BE_LB_KIND_SA 0x00040000 /* standalone (specific algorithms, cannot be grouped) */ #define BE_LB_KIND 0x00070000 /* mask to get/clear LB algorithm */ /* All known variants of load balancing algorithms. These can be cleared using @@ -83,6 +88,7 @@ #define BE_LB_ALGO_RND (BE_LB_KIND_RR | BE_LB_NEED_NONE | BE_LB_RR_RANDOM) /* random value */ #define BE_LB_ALGO_LC (BE_LB_KIND_CB | BE_LB_NEED_NONE | BE_LB_CB_LC) /* least connections */ #define BE_LB_ALGO_FAS (BE_LB_KIND_CB | BE_LB_NEED_NONE | BE_LB_CB_FAS) /* first available server */ +#define BE_LB_ALGO_SS (BE_LB_KIND_SA | BE_LB_NEED_NONE | BE_LB_SA_SS) /* sticky */ #define BE_LB_ALGO_SRR (BE_LB_KIND_RR | BE_LB_NEED_NONE | BE_LB_RR_STATIC) /* static round robin */ #define BE_LB_ALGO_SH (BE_LB_KIND_HI | BE_LB_NEED_ADDR | BE_LB_HASH_SRC) /* hash: source IP */ #define BE_LB_ALGO_UH (BE_LB_KIND_HI | BE_LB_NEED_HTTP | BE_LB_HASH_URI) /* hash: HTTP URI */ @@ -91,7 +97,6 @@ #define BE_LB_ALGO_RCH (BE_LB_KIND_HI | BE_LB_NEED_DATA | BE_LB_HASH_RDP) /* hash: RDP cookie value */ #define BE_LB_ALGO_SMP (BE_LB_KIND_HI | BE_LB_NEED_DATA | BE_LB_HASH_SMP) /* hash: sample expression */ #define BE_LB_ALGO_LH (BE_LB_KIND_HI | BE_LB_NEED_LOG | BE_LB_HASH_SMP) /* log hash: sample expression */ -#define BE_LB_ALGO_LS (BE_LB_KIND_CB | BE_LB_NEED_LOG | BE_LB_CB_FAS) /* log sticky */ #define BE_LB_ALGO (BE_LB_KIND | BE_LB_NEED | BE_LB_PARM ) /* mask to clear algo */ /* Higher bits define how a given criterion is mapped to a server. In fact it @@ -147,11 +152,7 @@ struct lbprm { struct lb_fwlc fwlc; struct lb_chash chash; struct lb_fas fas; - struct { - struct server **srv; /* array containing in-use log servers */ - struct list avail; /* servers available for lb are registered in this list */ - uint32_t lastid; /* last relative id used */ - } log; /* used in log-balancing context (PR_MODE_SYSLOG backend) */ + struct lb_ss ss; }; uint32_t algo; /* load balancing algorithm and variants: BE_LB_* */ int tot_wact, tot_wbck; /* total effective weights of active and backup servers */ @@ -161,7 +162,7 @@ struct lbprm { int wmult; /* ratio between user weight and effective weight */ int wdiv; /* ratio between effective weight and user weight */ int hash_balance_factor; /* load balancing factor * 100, 0 if disabled */ - struct sample_expr *expr; /* sample expression for "balance hash" */ + struct sample_expr *expr; /* sample expression for "balance (log-)hash" */ char *arg_str; /* name of the URL parameter/header/cookie used for hashing */ int arg_len; /* strlen(arg_str), computed only once */ int arg_opt1; /* extra option 1 for the LB algo (algo-specific) */ diff --git a/include/haproxy/backend.h b/include/haproxy/backend.h index 4ab9170..93a4bee 100644 --- a/include/haproxy/backend.h +++ b/include/haproxy/backend.h @@ -30,6 +30,15 @@ #include <haproxy/stream-t.h> #include <haproxy/time.h> +struct server *get_server_sh(struct proxy *px, const char *addr, int len, const struct server *avoid); +struct server *get_server_uh(struct proxy *px, char *uri, int uri_len, const struct server *avoid); +struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len, const struct server *avoid); +struct server *get_server_ph_post(struct stream *s, const struct server *avoid); +struct server *get_server_hh(struct stream *s, const struct server *avoid); +struct server *get_server_rch(struct stream *s, const struct server *avoid); +struct server *get_server_expr(struct stream *s, const struct server *avoid); +struct server *get_server_rnd(struct stream *s, const struct server *avoid); + int assign_server(struct stream *s); int assign_server_address(struct stream *s); int assign_server_and_queue(struct stream *s); @@ -50,7 +59,6 @@ int tcp_persist_rdp_cookie(struct stream *s, struct channel *req, int an_bit); int be_downtime(struct proxy *px); void recount_servers(struct proxy *px); void update_backend_weight(struct proxy *px); -int be_lastsession(const struct proxy *be); /* Returns number of usable servers in backend */ static inline int be_usable_srv(struct proxy *be) diff --git a/include/haproxy/buf.h b/include/haproxy/buf.h index e98161e..ad22e6f 100644 --- a/include/haproxy/buf.h +++ b/include/haproxy/buf.h @@ -91,6 +91,27 @@ static inline size_t b_full(const struct buffer *b) return !b_room(b); } +/* b_add_ofs() : return new offset within buffer after applying wrapping. Only + * offsets resulting from initial positions added to counts within buffer size + * limits are handled. + */ +static inline size_t b_add_ofs(const struct buffer *b, size_t ofs, size_t count) +{ + ofs += count; + if (ofs >= b->size) + ofs -= b->size; + return ofs; +} + +/* b_rel_ofs() : take an absolute offset in the buffer, and return it relative + * to the buffer's head for use with b_peek(). + */ +static inline size_t b_rel_ofs(const struct buffer *b, size_t ofs) +{ + if (ofs < b->head) + ofs += b->size; + return ofs - b->head; +} /* b_stop() : returns the pointer to the byte following the end of the buffer, * which may be out of the buffer if the buffer ends on the last byte of the @@ -314,6 +335,38 @@ static inline size_t b_contig_space(const struct buffer *b) return left; } +/* b_getblk_ofs() : gets one full block of data at once from a buffer, starting + * from offset <offset> after the buffer's area, and for exactly <len> bytes. + * As a convenience to avoid complex checks in callers, the offset is allowed + * to exceed a valid one by no more than one buffer size, and will automatically + * be wrapped. The caller is responsible for ensuring that <len> doesn't exceed + * the known length of the available data at this position, otherwise undefined + * data will be returned. This is meant to be used on concurrently accessed + * buffers, so that a reader can read a known area while the buffer is being fed + * and trimmed. The function guarantees never to use ->head nor ->data. The + * buffer is left unaffected. It always returns the number of bytes copied. + */ +static inline size_t b_getblk_ofs(const struct buffer *buf, char *blk, size_t len, size_t offset) +{ + size_t firstblock; + + if (offset >= buf->size) + offset -= buf->size; + + BUG_ON(offset >= buf->size); + + firstblock = buf->size - offset; + + if (firstblock >= len) + firstblock = len; + + memcpy(blk, b_orig(buf) + offset, firstblock); + + if (len > firstblock) + memcpy(blk + firstblock, b_orig(buf), len - firstblock); + return len; +} + /* b_getblk() : gets one full block of data at once from a buffer, starting * from offset <offset> after the buffer's head, and limited to no more than * <len> bytes. The caller is responsible for ensuring that neither <offset> @@ -382,6 +435,121 @@ static inline size_t b_getblk_nc(const struct buffer *buf, const char **blk1, si return 1; } +/* Locates the longest part of the buffer that is composed exclusively of + * characters not in the <delim> set, and delimited by one of these characters, + * and returns the initial part and the first of such delimiters. A single + * escape character in <escape> may be specified so that when not 0 and found, + * the character that follows it is never taken as a delimiter. Note that + * <delim> cannot contain the zero byte, hence this function is not usable with + * byte zero as a delimiter. + * + * Return values : + * >0 : number of bytes read. Includes the sep if present before len or end. + * =0 : no sep before end found. <str> is left undefined. + * + * The buffer is left unaffected. Unused buffers are left in an undefined state. + */ +static inline size_t b_getdelim(const struct buffer *buf, size_t offset, size_t count, + char *str, size_t len, const char *delim, char escape) +{ + uchar delim_map[256 / 8]; + int found, escaped; + uint pos, bit; + size_t ret, max; + uchar b; + char *p; + + ret = 0; + p = b_peek(buf, offset); + + max = len; + if (!count || offset+count > b_data(buf)) + goto out; + if (max > count) { + max = count; + str[max-1] = 0; + } + + /* create the byte map */ + memset(delim_map, 0, sizeof(delim_map)); + while ((b = *delim)) { + pos = b >> 3; + bit = b & 7; + delim_map[pos] |= 1 << bit; + delim++; + } + + found = escaped = 0; + while (max) { + *str++ = b = *p; + ret++; + max--; + + if (escape && (escaped || *p == escape)) { + escaped = !escaped; + goto skip; + } + + pos = b >> 3; + bit = b & 7; + if (delim_map[pos] & (1 << bit)) { + found = 1; + break; + } + skip: + p = b_next(buf, p); + } + + if (ret > 0 && !found) + ret = 0; + out: + if (max) + *str = 0; + return ret; +} + +/* Gets one text line out of aa buffer. + * Return values : + * >0 : number of bytes read. Includes the \n if present before len or end. + * =0 : no '\n' before end found. <str> is left undefined. + * + * The buffer is left unaffected. Unused buffers are left in an undefined state. + */ +static inline size_t b_getline(const struct buffer *buf, size_t offset, size_t count, + char *str, size_t len) +{ + size_t ret, max; + char *p; + + ret = 0; + p = b_peek(buf, offset); + + max = len; + if (!count || offset+count > b_data(buf)) + goto out; + if (max > count) { + max = count; + str[max-1] = 0; + } + + while (max) { + *str++ = *p; + ret++; + max--; + + if (*p == '\n') + break; + p = b_next(buf, p); + } + + if (ret > 0 && *(str-1) != '\n') + ret = 0; + out: + if (max) + *str = 0; + return ret; +} + /*********************************************/ /* Functions used to modify the buffer state */ @@ -536,6 +704,40 @@ static inline void b_putchr(struct buffer *b, char c) b->data++; } +/* b_putblk_ofs(): puts one full block of data of length <len> from <blk> into + * the buffer, starting from absolute offset <offset> after the buffer's area. + * As a convenience to avoid complex checks in callers, the offset is allowed + * to exceed a valid one by no more than one buffer size, and will automatically + * be wrapped. The caller is responsible for ensuring that <len> doesn't exceed + * the known length of the available room at this position, otherwise data may + * be overwritten. The buffer's length is *not* updated, so generally the caller + * will have updated it before calling this function. This is meant to be used + * on concurrently accessed buffers, so that a writer can append data while a + * reader is blocked by other means from reaching the current area The function + * guarantees never to use ->head nor ->data. It always returns the number of + * bytes copied. + */ +static inline size_t b_putblk_ofs(struct buffer *buf, char *blk, size_t len, size_t offset) +{ + size_t firstblock; + + if (offset >= buf->size) + offset -= buf->size; + + BUG_ON(offset >= buf->size); + + firstblock = buf->size - offset; + + if (firstblock >= len) + firstblock = len; + + memcpy(b_orig(buf) + offset, blk, firstblock); + + if (len > firstblock) + memcpy(b_orig(buf), blk + firstblock, len - firstblock); + return len; +} + /* __b_putblk() : tries to append <len> bytes from block <blk> to the end of * buffer <b> without checking for free space (it's up to the caller to do it). * Supports wrapping. It must not be called with len == 0. @@ -619,7 +821,7 @@ static inline size_t b_xfer(struct buffer *dst, struct buffer *src, size_t count * b_room(dst). * Returns the number of bytes copied. */ -static inline size_t b_ncat(struct buffer *dst, struct buffer *src, size_t count) +static inline size_t b_ncat(struct buffer *dst, const struct buffer *src, size_t count) { size_t ret, block1, block2; diff --git a/include/haproxy/bug.h b/include/haproxy/bug.h index 1356acf..b89ed22 100644 --- a/include/haproxy/bug.h +++ b/include/haproxy/bug.h @@ -28,6 +28,7 @@ #ifndef _HAPROXY_BUG_H #define _HAPROXY_BUG_H +#include <stddef.h> #include <haproxy/atomic.h> #include <haproxy/compiler.h> @@ -85,6 +86,23 @@ static inline __attribute((always_inline)) void ha_crash_now(void) #endif // end of arch-specific ha_crash_now() definitions + +/* ABORT_NOW() usually takes no argument and will cause the program to abort + * exactly where it is. We prefer to emit an invalid instruction to preserve + * all registers, but it may fall back to a regular abort depending on the + * platform. An optional argument can be a message string that will cause + * the emission of a message saying "ABORT at" followed by the file and line + * number then that message followed by a final line feed. This can be helpful + * in situations where the core cannot be retrieved for example. However it + * will definitely cause the loss of some registers, so should be avoided when + * not strictly necessary. + */ +#define ABORT_NOW(...) \ + _ABORT_NOW(__FILE__, __LINE__, __VA_ARGS__) + +#define _ABORT_NOW(file, line, ...) \ + __ABORT_NOW(file, line, __VA_ARGS__) + #ifdef DEBUG_USE_ABORT /* abort() is better recognized by code analysis tools */ @@ -104,12 +122,22 @@ static __attribute__((noinline,noreturn,unused)) void abort_with_line(uint line) abort(); } -#define ABORT_NOW() do { DUMP_TRACE(); abort_with_line(__LINE__); } while (0) +#define __ABORT_NOW(file, line, ...) do { \ + if (sizeof("" __VA_ARGS__) > 1) \ + complain(NULL, "\nABORT at " file ":" #line ": " __VA_ARGS__ "\n", 1); \ + DUMP_TRACE(); \ + abort_with_line(__LINE__); \ + } while (0) #else /* More efficient than abort() because it does not mangle the * stack and stops at the exact location we need. */ -#define ABORT_NOW() do { DUMP_TRACE(); ha_crash_now(); } while (0) +#define __ABORT_NOW(file, line, ...) do { \ + if (sizeof("" __VA_ARGS__) > 1) \ + complain(NULL, "\nABORT at " file ":" #line ": " __VA_ARGS__ "\n", 1); \ + DUMP_TRACE(); \ + ha_crash_now(); \ + } while (0) #endif /* This is the generic low-level macro dealing with conditional warnings and @@ -118,13 +146,21 @@ static __attribute__((noinline,noreturn,unused)) void abort_with_line(uint line) * the case where it wouldn't die. The <crash> flag is made of: * - crash & 1: crash yes/no; * - crash & 2: taint as bug instead of warn + * The optional argument must be a single constant string that will be appended + * on a second line after the condition message, to give a bit more context + * about the problem. */ -#define _BUG_ON(cond, file, line, crash, pfx, sfx) \ - __BUG_ON(cond, file, line, crash, pfx, sfx) +#define _BUG_ON(cond, file, line, crash, pfx, sfx, ...) \ + __BUG_ON(cond, file, line, crash, pfx, sfx, __VA_ARGS__) -#define __BUG_ON(cond, file, line, crash, pfx, sfx) \ +#define __BUG_ON(cond, file, line, crash, pfx, sfx, ...) \ (void)(unlikely(cond) ? ({ \ - complain(NULL, "\n" pfx "condition \"" #cond "\" matched at " file ":" #line "" sfx "\n", crash); \ + const char *msg; \ + if (sizeof("" __VA_ARGS__) > 1) \ + msg ="\n" pfx "condition \"" #cond "\" matched at " file ":" #line "" sfx "\n" __VA_ARGS__ "\n"; \ + else \ + msg = "\n" pfx "condition \"" #cond "\" matched at " file ":" #line "" sfx "\n"; \ + complain(NULL, msg, crash); \ if (crash & 1) \ ABORT_NOW(); \ else \ @@ -137,13 +173,18 @@ static __attribute__((noinline,noreturn,unused)) void abort_with_line(uint line) * certain unexpected conditions in field. Later on, in cores it will be * possible to verify these counters. */ -#define _BUG_ON_ONCE(cond, file, line, crash, pfx, sfx) \ - __BUG_ON_ONCE(cond, file, line, crash, pfx, sfx) +#define _BUG_ON_ONCE(cond, file, line, crash, pfx, sfx, ...) \ + __BUG_ON_ONCE(cond, file, line, crash, pfx, sfx, __VA_ARGS__) -#define __BUG_ON_ONCE(cond, file, line, crash, pfx, sfx) \ +#define __BUG_ON_ONCE(cond, file, line, crash, pfx, sfx, ...) \ (void)(unlikely(cond) ? ({ \ static int __match_count_##line; \ - complain(&__match_count_##line, "\n" pfx "condition \"" #cond "\" matched at " file ":" #line "" sfx "\n", crash); \ + const char *msg; \ + if (sizeof("" __VA_ARGS__) > 1) \ + msg ="\n" pfx "condition \"" #cond "\" matched at " file ":" #line "" sfx "\n" __VA_ARGS__ "\n"; \ + else \ + msg = "\n" pfx "condition \"" #cond "\" matched at " file ":" #line "" sfx "\n"; \ + complain(&__match_count_##line, msg, crash); \ if (crash & 1) \ ABORT_NOW(); \ else \ @@ -163,32 +204,32 @@ static __attribute__((noinline,noreturn,unused)) void abort_with_line(uint line) */ /* The macros below are for general use */ -#if defined(DEBUG_STRICT) +#if defined(DEBUG_STRICT) && (DEBUG_STRICT > 0) # if defined(DEBUG_STRICT_ACTION) && (DEBUG_STRICT_ACTION < 1) /* Lowest level: BUG_ON() warns, WARN_ON() warns, CHECK_IF() warns */ -# define BUG_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 2, "WARNING: bug ", " (not crashing but process is untrusted now, please report to developers)") -# define WARN_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 0, "WARNING: warn ", " (please report to developers)") -# define CHECK_IF(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)") +# define BUG_ON(cond, ...) _BUG_ON (cond, __FILE__, __LINE__, 2, "WARNING: bug ", " (not crashing but process is untrusted now, please report to developers)", __VA_ARGS__) +# define WARN_ON(cond, ...) _BUG_ON (cond, __FILE__, __LINE__, 0, "WARNING: warn ", " (please report to developers)", __VA_ARGS__) +# define CHECK_IF(cond, ...) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)", __VA_ARGS__) # elif !defined(DEBUG_STRICT_ACTION) || (DEBUG_STRICT_ACTION == 1) /* default level: BUG_ON() crashes, WARN_ON() warns, CHECK_IF() warns */ -# define BUG_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "") -# define WARN_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 0, "WARNING: warn ", " (please report to developers)") -# define CHECK_IF(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)") +# define BUG_ON(cond, ...) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "", __VA_ARGS__) +# define WARN_ON(cond, ...) _BUG_ON (cond, __FILE__, __LINE__, 0, "WARNING: warn ", " (please report to developers)", __VA_ARGS__) +# define CHECK_IF(cond, ...) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)", __VA_ARGS__) # elif defined(DEBUG_STRICT_ACTION) && (DEBUG_STRICT_ACTION == 2) /* Stricter level: BUG_ON() crashes, WARN_ON() crashes, CHECK_IF() warns */ -# define BUG_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "") -# define WARN_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 1, "FATAL: warn ", "") -# define CHECK_IF(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)") +# define BUG_ON(cond, ...) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "", __VA_ARGS__) +# define WARN_ON(cond, ...) _BUG_ON (cond, __FILE__, __LINE__, 1, "FATAL: warn ", "", __VA_ARGS__) +# define CHECK_IF(cond, ...) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)", __VA_ARGS__) # elif defined(DEBUG_STRICT_ACTION) && (DEBUG_STRICT_ACTION >= 3) /* Developer/CI level: BUG_ON() crashes, WARN_ON() crashes, CHECK_IF() crashes */ -# define BUG_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "") -# define WARN_ON(cond) _BUG_ON (cond, __FILE__, __LINE__, 1, "FATAL: warn ", "") -# define CHECK_IF(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 1, "FATAL: check ", "") +# define BUG_ON(cond, ...) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "", __VA_ARGS__) +# define WARN_ON(cond, ...) _BUG_ON (cond, __FILE__, __LINE__, 1, "FATAL: warn ", "", __VA_ARGS__) +# define CHECK_IF(cond, ...) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 1, "FATAL: check ", "", __VA_ARGS__) # endif #else -# define BUG_ON(cond) do { (void)sizeof(cond); } while (0) -# define WARN_ON(cond) do { (void)sizeof(cond); } while (0) -# define CHECK_IF(cond) do { (void)sizeof(cond); } while (0) +# define BUG_ON(cond, ...) do { (void)sizeof(cond); } while (0) +# define WARN_ON(cond, ...) do { (void)sizeof(cond); } while (0) +# define CHECK_IF(cond, ...) do { (void)sizeof(cond); } while (0) #endif /* These macros are only for hot paths and remain disabled unless DEBUG_STRICT is 2 or above. @@ -198,20 +239,20 @@ static __attribute__((noinline,noreturn,unused)) void abort_with_line(uint line) #if defined(DEBUG_STRICT) && (DEBUG_STRICT > 1) # if defined(DEBUG_STRICT_ACTION) && (DEBUG_STRICT_ACTION < 1) /* Lowest level: BUG_ON() warns, CHECK_IF() warns */ -# define BUG_ON_HOT(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 2, "WARNING: bug ", " (not crashing but process is untrusted now, please report to developers)") -# define CHECK_IF_HOT(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)") +# define BUG_ON_HOT(cond, ...) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 2, "WARNING: bug ", " (not crashing but process is untrusted now, please report to developers)", __VA_ARGS__) +# define CHECK_IF_HOT(cond, ...) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)", __VA_ARGS__) # elif !defined(DEBUG_STRICT_ACTION) || (DEBUG_STRICT_ACTION < 3) /* default level: BUG_ON() crashes, CHECK_IF() warns */ -# define BUG_ON_HOT(cond) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "") -# define CHECK_IF_HOT(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)") +# define BUG_ON_HOT(cond, ...) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "", __VA_ARGS__) +# define CHECK_IF_HOT(cond, ...) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 0, "WARNING: check ", " (please report to developers)", __VA_ARGS__) # elif defined(DEBUG_STRICT_ACTION) && (DEBUG_STRICT_ACTION >= 3) /* Developer/CI level: BUG_ON() crashes, CHECK_IF() crashes */ -# define BUG_ON_HOT(cond) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "") -# define CHECK_IF_HOT(cond) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 1, "FATAL: check ", "") +# define BUG_ON_HOT(cond, ...) _BUG_ON (cond, __FILE__, __LINE__, 3, "FATAL: bug ", "", __VA_ARGS__) +# define CHECK_IF_HOT(cond, ...) _BUG_ON_ONCE(cond, __FILE__, __LINE__, 1, "FATAL: check ", "", __VA_ARGS__) # endif #else -# define BUG_ON_HOT(cond) do { (void)sizeof(cond); } while (0) -# define CHECK_IF_HOT(cond) do { (void)sizeof(cond); } while (0) +# define BUG_ON_HOT(cond, ...) do { (void)sizeof(cond) ; } while (0) +# define CHECK_IF_HOT(cond, ...) do { (void)sizeof(cond) ; } while (0) #endif diff --git a/include/haproxy/cbuf-t.h b/include/haproxy/cbuf-t.h index 27d3bf1..fee97c3 100644 --- a/include/haproxy/cbuf-t.h +++ b/include/haproxy/cbuf-t.h @@ -27,6 +27,7 @@ #endif #endif +#include <stddef.h> #include <haproxy/list-t.h> extern struct pool_head *pool_head_cbuf; diff --git a/include/haproxy/cfgparse.h b/include/haproxy/cfgparse.h index adcabb3..3a769d5 100644 --- a/include/haproxy/cfgparse.h +++ b/include/haproxy/cfgparse.h @@ -36,6 +36,7 @@ struct acl_cond; #define CFG_USERLIST 3 #define CFG_PEERS 4 #define CFG_CRTLIST 5 +#define CFG_CRTSTORE 6 /* various keyword modifiers */ enum kw_mod { diff --git a/include/haproxy/channel.h b/include/haproxy/channel.h index 17dd75f..22949e1 100644 --- a/include/haproxy/channel.h +++ b/include/haproxy/channel.h @@ -818,6 +818,69 @@ static inline size_t channel_empty(const struct channel *chn) return (IS_HTX_STRM(chn) ? htx_is_empty(htxbuf(&chn->buf)) : c_empty(chn)); } +/* Check channel's last_read date against the idle timeer to verify the producer + * is still streaming data or not + */ +static inline void channel_check_idletimer(struct channel *chn) +{ + if ((chn->flags & (CF_STREAMER | CF_STREAMER_FAST)) && !co_data(chn) && + global.tune.idle_timer && + (unsigned short)(now_ms - chn->last_read) >= global.tune.idle_timer) { + /* The buffer was empty and nothing was transferred for more + * than one second. This was caused by a pause and not by + * congestion. Reset any streaming mode to reduce latency. + */ + chn->xfer_small = 0; + chn->xfer_large = 0; + chn->flags &= ~(CF_STREAMER | CF_STREAMER_FAST); + } +} + +/* Check amount of transferred data after a receive. If <xferred> is greater + * than 0, the <last_read> date is updated and STREAMER flags for the channels + * are verified. + */ +static inline void channel_check_xfer(struct channel *chn, size_t xferred) +{ + if (!xferred) + return; + + if ((chn->flags & (CF_STREAMER | CF_STREAMER_FAST)) && + (xferred <= c_size(chn) / 2)) { + chn->xfer_large = 0; + chn->xfer_small++; + if (chn->xfer_small >= 3) { + /* we have read less than half of the buffer in + * one pass, and this happened at least 3 times. + * This is definitely not a streamer. + */ + chn->flags &= ~(CF_STREAMER | CF_STREAMER_FAST); + } + else if (chn->xfer_small >= 2) { + /* if the buffer has been at least half full twchne, + * we receive faster than we send, so at least it + * is not a "fast streamer". + */ + chn->flags &= ~CF_STREAMER_FAST; + } + } + else if (!(chn->flags & CF_STREAMER_FAST) && (xferred >= channel_data_limit(chn))) { + /* we read a full buffer at once */ + chn->xfer_small = 0; + chn->xfer_large++; + if (chn->xfer_large >= 3) { + /* we call this buffer a fast streamer if it manages + * to be filled in one call 3 consecutive times. + */ + chn->flags |= (CF_STREAMER | CF_STREAMER_FAST); + } + } + else { + chn->xfer_small = 0; + chn->xfer_large = 0; + } + chn->last_read = now_ms; +} /* Returns the amount of bytes that can be written over the input data at once, * including reserved space which may be overwritten. This is used by Lua to @@ -852,12 +915,17 @@ static inline int ci_space_for_replace(const struct channel *chn) */ static inline int channel_alloc_buffer(struct channel *chn, struct buffer_wait *wait) { - if (b_alloc(&chn->buf) != NULL) - return 1; + int force_noqueue; - if (!LIST_INLIST(&wait->list)) - LIST_APPEND(&th_ctx->buffer_wq, &wait->list); + /* If the producer has been notified of recent availability, we must + * not check the queue again. + */ + force_noqueue = !!(chn_prod(chn)->flags & SC_FL_HAVE_BUFF); + + if (b_alloc(&chn->buf, DB_CHANNEL | (force_noqueue ? DB_F_NOQUEUE : 0)) != NULL) + return 1; + b_requeue(DB_CHANNEL, wait); return 0; } diff --git a/include/haproxy/cli-t.h b/include/haproxy/cli-t.h index cad6728..8555ea8 100644 --- a/include/haproxy/cli-t.h +++ b/include/haproxy/cli-t.h @@ -45,7 +45,7 @@ #define APPCTX_CLI_ST1_PAYLOAD (1 << 1) #define APPCTX_CLI_ST1_NOLF (1 << 2) #define APPCTX_CLI_ST1_TIMED (1 << 3) -#define APPCTX_CLI_ST1_SHUT_EXPECTED (1 << 4) +#define APPCTX_CLI_ST1_LASTCMD (1 << 4) #define CLI_PREFIX_KW_NB 5 #define CLI_MAX_MATCHES 5 @@ -56,6 +56,7 @@ enum { CLI_ST_INIT = 0, /* initial state, must leave to zero ! */ CLI_ST_END, /* final state, let's close */ CLI_ST_GETREQ, /* wait for a request */ + CLI_ST_PARSEREQ, /* parse a request */ CLI_ST_OUTPUT, /* all states after this one are responses */ CLI_ST_PROMPT, /* display the prompt (first output, same code) */ CLI_ST_PRINT, /* display const message in cli->msg */ @@ -82,6 +83,31 @@ struct cli_print_ctx { int severity; /* severity of the message to be returned according to (syslog) rfc5424 */ }; +/* context for the "wait" command that's used to wait for some time on a + * condition. We store the start date and the expiration date. The error + * value is set by the I/O handler to be printed by the release handler at + * the end. + */ +enum cli_wait_err { + CLI_WAIT_ERR_DONE, // condition satisfied + CLI_WAIT_ERR_INTR, // interrupted + CLI_WAIT_ERR_EXP, // finished on wait expiration + CLI_WAIT_ERR_FAIL, // finished early (unrecoverable) +}; + +enum cli_wait_cond { + CLI_WAIT_COND_NONE, // no condition to wait on + CLI_WAIT_COND_SRV_UNUSED,// wait for server to become unused +}; + +struct cli_wait_ctx { + uint start, deadline; // both are in ticks. + enum cli_wait_cond cond; // CLI_WAIT_COND_* + enum cli_wait_err error; // CLI_WAIT_ERR_* + char *args[4]; // up to 4 args taken at parse time, all strduped + const char *msg; // static error message for failures if not NULL +}; + struct cli_kw { const char *str_kw[CLI_PREFIX_KW_NB]; /* keywords ended by NULL, limited to CLI_PREFIX_KW_NB separated keywords combination */ diff --git a/include/haproxy/compat.h b/include/haproxy/compat.h index 0fe5a0b..3829060 100644 --- a/include/haproxy/compat.h +++ b/include/haproxy/compat.h @@ -94,11 +94,19 @@ typedef struct { } empty_t; #endif #ifndef MIN -#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MIN(a, b) ({ \ + typeof(a) _a = (a); \ + typeof(a) _b = (b); \ + ((_a < _b) ? _a : _b); \ +}) #endif #ifndef MAX -#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MAX(a, b) ({ \ + typeof(a) _a = (a); \ + typeof(a) _b = (b); \ + ((_a > _b) ? _a : _b); \ +}) #endif /* this is for libc5 for example */ diff --git a/include/haproxy/connection-t.h b/include/haproxy/connection-t.h index 71269c6..83969da 100644 --- a/include/haproxy/connection-t.h +++ b/include/haproxy/connection-t.h @@ -37,6 +37,8 @@ #include <haproxy/port_range-t.h> #include <haproxy/protocol-t.h> #include <haproxy/show_flags-t.h> +#include <haproxy/stconn-t.h> +#include <haproxy/task-t.h> #include <haproxy/thread-t.h> /* referenced below */ @@ -54,14 +56,6 @@ struct bind_conf; struct qcs; struct ssl_sock_ctx; -/* Note: subscribing to these events is only valid after the caller has really - * attempted to perform the operation, and failed to proceed or complete. - */ -enum sub_event_type { - SUB_RETRY_RECV = 0x00000001, /* Schedule the tasklet when we can attempt to recv again */ - SUB_RETRY_SEND = 0x00000002, /* Schedule the tasklet when we can attempt to send again */ -}; - /* For each direction, we have a CO_FL_XPRT_<DIR>_ENA flag, which * indicates if read or write is desired in that direction for the respective * layers. The current status corresponding to the current layer being used is @@ -87,10 +81,11 @@ enum { CO_FL_REVERSED = 0x00000004, /* connection has been reversed to backend / reversed and accepted on frontend */ CO_FL_ACT_REVERSING = 0x00000008, /* connection has been reversed to frontend but not yet accepted */ - /* unused : 0x00000008 */ - /* unused : 0x00000010 */ - /* unused : 0x00000020 */ + CO_FL_OPT_MARK = 0x00000010, /* connection has a special sockopt mark */ + + CO_FL_OPT_TOS = 0x00000020, /* connection has a special sockopt tos */ + /* unused : 0x00000040, 0x00000080 */ /* These flags indicate whether the Control and Transport layers are initialized */ @@ -173,13 +168,14 @@ static forceinline char *conn_show_flags(char *buf, size_t len, const char *deli _(0); /* flags */ _(CO_FL_SAFE_LIST, _(CO_FL_IDLE_LIST, _(CO_FL_CTRL_READY, - _(CO_FL_REVERSED, _(CO_FL_ACT_REVERSING, _(CO_FL_XPRT_READY, - _(CO_FL_WANT_DRAIN, _(CO_FL_WAIT_ROOM, _(CO_FL_EARLY_SSL_HS, _(CO_FL_EARLY_DATA, - _(CO_FL_SOCKS4_SEND, _(CO_FL_SOCKS4_RECV, _(CO_FL_SOCK_RD_SH, _(CO_FL_SOCK_WR_SH, - _(CO_FL_ERROR, _(CO_FL_FDLESS, _(CO_FL_WAIT_L4_CONN, _(CO_FL_WAIT_L6_CONN, - _(CO_FL_SEND_PROXY, _(CO_FL_ACCEPT_PROXY, _(CO_FL_ACCEPT_CIP, _(CO_FL_SSL_WAIT_HS, - _(CO_FL_PRIVATE, _(CO_FL_RCVD_PROXY, _(CO_FL_SESS_IDLE, _(CO_FL_XPRT_TRACKED - )))))))))))))))))))))))))); + _(CO_FL_REVERSED, _(CO_FL_ACT_REVERSING, _(CO_FL_OPT_MARK, _(CO_FL_OPT_TOS, + _(CO_FL_XPRT_READY, _(CO_FL_WANT_DRAIN, _(CO_FL_WAIT_ROOM, _(CO_FL_EARLY_SSL_HS, + _(CO_FL_EARLY_DATA, _(CO_FL_SOCKS4_SEND, _(CO_FL_SOCKS4_RECV, _(CO_FL_SOCK_RD_SH, + _(CO_FL_SOCK_WR_SH, _(CO_FL_ERROR, _(CO_FL_FDLESS, _(CO_FL_WAIT_L4_CONN, + _(CO_FL_WAIT_L6_CONN, _(CO_FL_SEND_PROXY, _(CO_FL_ACCEPT_PROXY, _(CO_FL_ACCEPT_CIP, + _(CO_FL_SSL_WAIT_HS, _(CO_FL_PRIVATE, _(CO_FL_RCVD_PROXY, _(CO_FL_SESS_IDLE, + _(CO_FL_XPRT_TRACKED + )))))))))))))))))))))))))))); /* epilogue */ _(~0U); return buf; @@ -283,18 +279,7 @@ enum { enum { CO_SFL_MSG_MORE = 0x0001, /* More data to come afterwards */ CO_SFL_STREAMER = 0x0002, /* Producer is continuously streaming data */ -}; - -/* mux->shutr() modes */ -enum co_shr_mode { - CO_SHR_DRAIN = 0, /* read shutdown, drain any extra stuff */ - CO_SHR_RESET = 1, /* read shutdown, reset any extra stuff */ -}; - -/* mux->shutw() modes */ -enum co_shw_mode { - CO_SHW_NORMAL = 0, /* regular write shutdown */ - CO_SHW_SILENT = 1, /* imminent close, don't notify peer */ + CO_SFL_LAST_DATA = 0x0003, /* Sent data are the last ones, shutdown is pending */ }; /* known transport layers (for ease of lookup) */ @@ -338,11 +323,13 @@ enum mux_ctl_type { MUX_CTL_REVERSE_CONN, /* Notify about an active reverse connection accepted. */ MUX_CTL_SUBS_RECV, /* Notify the mux it must wait for read events again */ MUX_CTL_GET_GLITCHES, /* returns number of glitches on the connection */ + MUX_CTL_GET_NBSTRM, /* Return the current number of streams on the connection */ + MUX_CTL_GET_MAXSTRM, /* Return the max number of streams supported by the connection */ }; /* sctl command used by mux->sctl() */ enum mux_sctl_type { - MUX_SCTL_SID, /* Return the mux stream ID as ouput, as a signed 64bits integer */ + MUX_SCTL_SID, /* Return the mux stream ID as output, as a signed 64bits integer */ }; /* response for ctl MUX_STATUS */ @@ -369,16 +356,6 @@ struct socks4_request { char user_id[8]; /* the user ID string, variable length, terminated with a null (0x00); Using "HAProxy\0" */ }; -/* Describes a set of subscriptions. Multiple events may be registered at the - * same time. The callee should assume everything not pending for completion is - * implicitly possible. It's illegal to change the tasklet if events are still - * registered. - */ -struct wait_event { - struct tasklet *tasklet; - int events; /* set of enum sub_event_type above */ -}; - /* A connection handle is how we differentiate two connections on the lower * layers. It usually is a file descriptor but can be a connection id. The * CO_FL_FDLESS flag indicates which one is relevant. @@ -408,7 +385,7 @@ struct xprt_ops { int (*prepare_srv)(struct server *srv); /* prepare a server context */ void (*destroy_srv)(struct server *srv); /* destroy a server context */ int (*get_alpn)(const struct connection *conn, void *xprt_ctx, const char **str, int *len); /* get application layer name */ - int (*takeover)(struct connection *conn, void *xprt_ctx, int orig_tid); /* Let the xprt know the fd have been taken over */ + int (*takeover)(struct connection *conn, void *xprt_ctx, int orig_tid, int release); /* Let the xprt know the fd have been taken over */ void (*set_idle)(struct connection *conn, void *xprt_ctx); /* notify the xprt that the connection becomes idle. implies set_used. */ void (*set_used)(struct connection *conn, void *xprt_ctx); /* notify the xprt that the connection leaves idle. implies set_idle. */ char name[8]; /* transport layer name, zero-terminated */ @@ -436,8 +413,7 @@ struct mux_ops { size_t (*done_fastfwd)(struct stconn *sc); /* Callback to terminate fast data forwarding */ int (*fastfwd)(struct stconn *sc, unsigned int count, unsigned int flags); /* Callback to init fast data forwarding */ int (*resume_fastfwd)(struct stconn *sc, unsigned int flags); /* Callback to resume fast data forwarding */ - void (*shutr)(struct stconn *sc, enum co_shr_mode); /* shutr function */ - void (*shutw)(struct stconn *sc, enum co_shw_mode); /* shutw function */ + void (*shut)(struct stconn *sc, enum se_shut_mode, struct se_abort_info *reason); /* shutdown function */ int (*attach)(struct connection *conn, struct sedesc *, struct session *sess); /* attach a stconn to an outgoing connection */ struct stconn *(*get_first_sc)(const struct connection *); /* retrieves any valid stconn from this connection */ @@ -453,7 +429,12 @@ struct mux_ops { int (*used_streams)(struct connection *conn); /* Returns the number of streams in use on a connection. */ void (*destroy)(void *ctx); /* Let the mux know one of its users left, so it may have to disappear */ int (*ctl)(struct connection *conn, enum mux_ctl_type mux_ctl, void *arg); /* Provides information about the mux connection */ - int (*takeover)(struct connection *conn, int orig_tid); /* Attempts to migrate the connection to the current thread */ + + /* Attempts to migrate <conn> from <orig_tid> to the current thread. If + * <release> is true, it will be destroyed immediately after by caller. + */ + int (*takeover)(struct connection *conn, int orig_tid, int release); + unsigned int flags; /* some flags characterizing the mux's capabilities (MX_FL_*) */ char name[8]; /* mux layer name, zero-terminated */ }; @@ -492,14 +473,15 @@ struct conn_src { * CAUTION! Always update CONN_HASH_PARAMS_TYPE_COUNT when adding a new entry. */ enum conn_hash_params_t { - CONN_HASH_PARAMS_TYPE_SNI = 0x1, + CONN_HASH_PARAMS_TYPE_NAME = 0x1, CONN_HASH_PARAMS_TYPE_DST_ADDR = 0x2, CONN_HASH_PARAMS_TYPE_DST_PORT = 0x4, CONN_HASH_PARAMS_TYPE_SRC_ADDR = 0x8, CONN_HASH_PARAMS_TYPE_SRC_PORT = 0x10, CONN_HASH_PARAMS_TYPE_PROXY = 0x20, + CONN_HASH_PARAMS_TYPE_MARK_TOS = 0x40, }; -#define CONN_HASH_PARAMS_TYPE_COUNT 6 +#define CONN_HASH_PARAMS_TYPE_COUNT 7 #define CONN_HASH_PAYLOAD_LEN \ (((sizeof(((struct conn_hash_node *)0)->node.key)) * 8) - CONN_HASH_PARAMS_TYPE_COUNT) @@ -512,8 +494,9 @@ enum conn_hash_params_t { * connection hash. */ struct conn_hash_params { - uint64_t sni_prehash; + uint64_t name_prehash; uint64_t proxy_prehash; + uint64_t mark_tos_prehash; void *target; struct sockaddr_storage *src_addr; struct sockaddr_storage *dst_addr; @@ -560,7 +543,7 @@ struct connection { struct mt_list toremove_list; /* list element when idle connection is ready to be purged */ }; union { - struct list session_list; /* used by backend conns, list of attached connections to a session */ + struct list sess_el; /* used by private backend conns, list elem into session */ struct list stopping_list; /* used by frontend conns, attach point in mux stopping list */ }; union conn_handle handle; /* connection handle at the socket layer */ @@ -582,6 +565,8 @@ struct connection { enum obj_type *target; /* Listener for active reverse, server for passive. */ struct buffer name; /* Only used for passive reverse. Used as SNI when connection added to server idle pool. */ } reverse; + uint32_t mark; /* set network mark, if CO_FL_OPT_MARK is set */ + uint8_t tos; /* set ip tos, if CO_FL_OPT_TOS is set */ }; /* node for backend connection in the idle trees for http-reuse diff --git a/include/haproxy/connection.h b/include/haproxy/connection.h index c7d9883..aa61cc7 100644 --- a/include/haproxy/connection.h +++ b/include/haproxy/connection.h @@ -26,6 +26,7 @@ #include <haproxy/api.h> #include <haproxy/buf.h> +#include <haproxy/sock.h> #include <haproxy/connection-t.h> #include <haproxy/stconn-t.h> #include <haproxy/fd.h> @@ -52,7 +53,7 @@ extern struct mux_stopping_data mux_stopping_data[MAX_THREADS]; /* receive a PROXY protocol header over a connection */ int conn_recv_proxy(struct connection *conn, int flag); int conn_send_proxy(struct connection *conn, unsigned int flag); -int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm); +int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm, struct session *sess); struct conn_tlv_list *conn_get_tlv(struct connection *conn, int type); int conn_append_debug_info(struct buffer *buf, const struct connection *conn, const char *pfx); @@ -88,6 +89,7 @@ void conn_delete_from_tree(struct connection *conn); void conn_init(struct connection *conn, void *target); struct connection *conn_new(void *target); void conn_free(struct connection *conn); +void conn_release(struct connection *conn); struct conn_hash_node *conn_alloc_hash_node(struct connection *conn); struct sockaddr_storage *sockaddr_alloc(struct sockaddr_storage **sap, const struct sockaddr_storage *orig, socklen_t len); void sockaddr_free(struct sockaddr_storage **sap); @@ -95,13 +97,7 @@ void sockaddr_free(struct sockaddr_storage **sap); /* connection hash stuff */ uint64_t conn_calculate_hash(const struct conn_hash_params *params); -uint64_t conn_hash_prehash(char *buf, size_t size); -void conn_hash_update(char *buf, size_t *idx, - const void *data, size_t size, - enum conn_hash_params_t *flags, - enum conn_hash_params_t type); -uint64_t conn_hash_digest(char *buf, size_t bufsize, - enum conn_hash_params_t flags); +uint64_t conn_hash_prehash(const char *buf, size_t size); int conn_reverse(struct connection *conn); @@ -426,19 +422,7 @@ static inline void conn_set_tos(const struct connection *conn, int tos) if (!conn || !conn_ctrl_ready(conn) || (conn->flags & CO_FL_FDLESS)) return; -#ifdef IP_TOS - if (conn->src->ss_family == AF_INET) - setsockopt(conn->handle.fd, IPPROTO_IP, IP_TOS, &tos, sizeof(tos)); -#endif -#ifdef IPV6_TCLASS - if (conn->src->ss_family == AF_INET6) { - if (IN6_IS_ADDR_V4MAPPED(&((struct sockaddr_in6 *)conn->src)->sin6_addr)) - /* v4-mapped addresses need IP_TOS */ - setsockopt(conn->handle.fd, IPPROTO_IP, IP_TOS, &tos, sizeof(tos)); - else - setsockopt(conn->handle.fd, IPPROTO_IPV6, IPV6_TCLASS, &tos, sizeof(tos)); - } -#endif + sock_set_tos(conn->handle.fd, conn->src, tos); } /* Sets the netfilter mark on the connection's socket. The connection is tested @@ -449,13 +433,7 @@ static inline void conn_set_mark(const struct connection *conn, int mark) if (!conn || !conn_ctrl_ready(conn) || (conn->flags & CO_FL_FDLESS)) return; -#if defined(SO_MARK) - setsockopt(conn->handle.fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); -#elif defined(SO_USER_COOKIE) - setsockopt(conn->handle.fd, SOL_SOCKET, SO_USER_COOKIE, &mark, sizeof(mark)); -#elif defined(SO_RTABLE) - setsockopt(conn->handle.fd, SOL_SOCKET, SO_RTABLE, &mark, sizeof(mark)); -#endif + sock_set_mark(conn->handle.fd, conn->ctrl->fam->sock_family, mark); } /* Sets adjust the TCP quick-ack feature on the connection's socket. The diff --git a/include/haproxy/counters-t.h b/include/haproxy/counters-t.h index 933c228..8539d6c 100644 --- a/include/haproxy/counters-t.h +++ b/include/haproxy/counters-t.h @@ -23,6 +23,8 @@ #ifndef _HAPROXY_COUNTERS_T_H #define _HAPROXY_COUNTERS_T_H +#include <haproxy/freq_ctr-t.h> + /* counters used by listeners and frontends */ struct fe_counters { unsigned int conn_max; /* max # of active sessions */ @@ -63,15 +65,19 @@ struct fe_counters { long long cache_hits; /* cache hits */ } http; } p; /* protocol-specific stats */ + + struct freq_ctr sess_per_sec; /* sessions per second on this server */ + struct freq_ctr req_per_sec; /* HTTP requests per second on the frontend */ + struct freq_ctr conn_per_sec; /* received connections per second on the frontend */ + + unsigned long last_change; /* last time, when the state was changed */ }; /* counters used by servers and backends */ struct be_counters { unsigned int conn_max; /* max # of active sessions */ - long long cum_conn; /* cumulated number of received connections */ long long cum_sess; /* cumulated number of accepted connections */ long long cum_lbconn; /* cumulated number of sessions processed by load balancing (BE only) */ - unsigned long last_sess; /* last session time */ unsigned int cps_max; /* maximum of new connections received per second */ unsigned int sps_max; /* maximum of new connections accepted per second (sessions) */ @@ -116,6 +122,11 @@ struct be_counters { long long cache_hits; /* cache hits */ } http; } p; /* protocol-specific stats */ + + struct freq_ctr sess_per_sec; /* sessions per second on this server */ + + unsigned long last_sess; /* last session time */ + unsigned long last_change; /* last time, when the state was changed */ }; #endif /* _HAPROXY_COUNTERS_T_H */ diff --git a/include/haproxy/defaults.h b/include/haproxy/defaults.h index 7430c61..eda346a 100644 --- a/include/haproxy/defaults.h +++ b/include/haproxy/defaults.h @@ -22,6 +22,8 @@ #ifndef _HAPROXY_DEFAULTS_H #define _HAPROXY_DEFAULTS_H +#include <haproxy/compat.h> + /* MAX_THREADS defines the highest limit for the global nbthread value. It * defaults to the number of bits in a long integer when threads are enabled * but may be lowered to save resources on embedded systems. @@ -69,18 +71,9 @@ #define BUFSIZE 16384 #endif -/* certain buffers may only be allocated for responses in order to avoid - * deadlocks caused by request queuing. 2 buffers is the absolute minimum - * acceptable to ensure that a request gaining access to a server can get - * a response buffer even if it doesn't completely flush the request buffer. - * The worst case is an applet making use of a request buffer that cannot - * completely be sent while the server starts to respond, and all unreserved - * buffers are allocated by request buffers from pending connections in the - * queue waiting for this one to flush. Both buffers reserved buffers may - * thus be used at the same time. - */ +// number of per-thread emergency buffers for low-memory conditions #ifndef RESERVED_BUFS -#define RESERVED_BUFS 2 +#define RESERVED_BUFS 4 #endif // reserved buffer space for header rewriting @@ -478,6 +471,10 @@ #define CONFIG_HAP_POOL_BUCKETS (1UL << (CONFIG_HAP_POOL_BUCKETS_BITS)) +#ifndef CONFIG_HAP_TBL_BUCKETS +# define CONFIG_HAP_TBL_BUCKETS CONFIG_HAP_POOL_BUCKETS +#endif + /* Number of samples used to compute the times reported in stats. A power of * two is highly recommended, and this value multiplied by the largest response * time must not overflow and unsigned int. See freq_ctr.h for more information. @@ -530,4 +527,33 @@ # endif #endif +/* number of ring wait queues depending on the number + * of threads. + */ +#ifndef RING_WAIT_QUEUES +# if defined(USE_THREAD) && MAX_THREADS >= 32 +# define RING_WAIT_QUEUES 16 +# elif defined(USE_THREAD) +# define RING_WAIT_QUEUES ((MAX_THREADS + 1) / 2) +# else +# define RING_WAIT_QUEUES 1 +# endif +#endif + +/* it has been found that 6 queues was optimal on various archs at various + * thread counts, so let's use that by default. + */ +#ifndef RING_DFLT_QUEUES +# define RING_DFLT_QUEUES 6 +#endif + +/* Let's make DEBUG_STRICT default to 1 to get rid of it in the makefile */ +#ifndef DEBUG_STRICT +# define DEBUG_STRICT 1 +#endif + +#if !defined(DEBUG_MEMORY_POOLS) +# define DEBUG_MEMORY_POOLS 1 +#endif + #endif /* _HAPROXY_DEFAULTS_H */ diff --git a/include/haproxy/dgram-t.h b/include/haproxy/dgram-t.h index 4e4c2af..5ed24ef 100644 --- a/include/haproxy/dgram-t.h +++ b/include/haproxy/dgram-t.h @@ -22,6 +22,8 @@ #ifndef _HAPROXY_HAPROXY_DGRAM_T_H #define _HAPROXY_HAPROXY_DGRAM_T_H +#include <haproxy/api-t.h> +#include <haproxy/thread-t.h> #include <arpa/inet.h> /* diff --git a/include/haproxy/dns-t.h b/include/haproxy/dns-t.h index 1c876e3..175c7d1 100644 --- a/include/haproxy/dns-t.h +++ b/include/haproxy/dns-t.h @@ -27,8 +27,8 @@ #include <haproxy/connection-t.h> #include <haproxy/buf-t.h> #include <haproxy/dgram-t.h> +#include <haproxy/dns_ring-t.h> #include <haproxy/obj_type-t.h> -#include <haproxy/ring-t.h> #include <haproxy/stats-t.h> #include <haproxy/task-t.h> #include <haproxy/thread.h> @@ -78,7 +78,7 @@ struct dns_additional_record { */ struct dns_stream_server { struct server *srv; - struct ring *ring_req; + struct dns_ring *ring_req; int max_slots; int maxconn; int idle_conns; @@ -97,7 +97,7 @@ struct dns_stream_server { struct dns_dgram_server { struct dgram_conn conn; /* transport layer */ - struct ring *ring_req; + struct dns_ring *ring_req; size_t ofs_req; // ring buffer reader offset }; @@ -121,7 +121,7 @@ struct dns_session { struct task *task_exp; struct eb_root query_ids; /* tree to quickly lookup/retrieve query ids currently in use */ size_t ofs; // ring buffer reader offset - struct ring ring; + struct dns_ring ring; struct { uint16_t len; uint16_t offset; @@ -136,6 +136,7 @@ struct dns_session { struct dns_nameserver { char *id; /* nameserver unique identifier */ void *parent; + unsigned int puid; /* parent-unique numeric id */ struct { const char *file; /* file where the section appears */ int line; /* line where the section appears */ @@ -153,8 +154,9 @@ struct dns_nameserver { /* mixed dns and resolver counters, we will have to split them */ struct dns_counters { - char *id; - char *pid; + char *id; /* nameserver id */ + char *pid; /* parent resolver id */ + unsigned int ns_puid; /* nameserver numerical id (ns->puid) */ long long sent; /* - queries sent */ long long snd_error; /* - sending errors */ union { diff --git a/include/haproxy/dns_ring-t.h b/include/haproxy/dns_ring-t.h new file mode 100644 index 0000000..2c15784 --- /dev/null +++ b/include/haproxy/dns_ring-t.h @@ -0,0 +1,110 @@ +/* + * include/haproxy/dns_ring-t.h + * This file provides definitions for ring buffers used for disposable data. + * This is a fork of ring-t.h for DNS usages. + * + * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_DNS_RING_T_H +#define _HAPROXY_DNS_RING_T_H + +#include <haproxy/api-t.h> +#include <haproxy/buf-t.h> +#include <haproxy/thread.h> + +/* The code below handles circular buffers with single-producer and multiple + * readers (up to 255). The buffer storage area must remain always allocated. + * It's made of series of payload blocks followed by a readers count (RC). + * There is always a readers count at the beginning of the buffer as well. Each + * payload block is composed of a varint-encoded size (VI) followed by the + * actual payload (PL). + * + * The readers count is encoded on a single byte. It indicates how many readers + * are still waiting at this position. The writer writes after the buffer's + * tail, which initially starts just past the first readers count. Then it + * knows by reading this count that it must wake up the readers to indicate + * data availability. When a reader reads the payload block, it increments the + * next readers count and decrements the current one. The area between the + * initial readers count and the next one is protected from overwriting for as + * long as the initial count is non-null. As such these readers count are + * effective barriers against data recycling. + * + * Only the writer is allowed to update the buffer's tail/head. This ensures + * that events can remain as long as possible so that late readers can get the + * maximum history available. It also helps dealing with multi-thread accesses + * using a simple RW lock during the buffer head's manipulation. The writer + * will have to delete some old records starting at the head until the new + * message can fit or a non-null readers count is encountered. If a message + * cannot fit due to insufficient room, the message is lost and the drop + * counted must be incremented. + * + * Like any buffer, this buffer naturally wraps at the end and continues at the + * beginning. The creation process consists in immediately adding a null + * readers count byte into the buffer. The write process consists in always + * writing a payload block followed by a new readers count. The delete process + * consists in removing a null readers count and payload block. As such, there + * is always at least one readers count byte in the buffer available at the + * head for new readers to attach to, and one before the tail, both of which + * may be the same when the buffer doesn't contain any event. It is thus safe + * for any reader to simply keep the absolute offset of the last visited + * position and to restart from there. The write will update the buffer's + * absolute offset when deleting entries. All this also has the benefit of + * allowing a buffer to be hot-resized without losing its contents. + * + * Thus we have this : + * - init of empty buffer: + * head-, ,-tail + * [ RC | xxxxxxxxxxxxxxxxxxxxxxxxxx ] + * + * - reader attached: + * head-, ,-tail + * [ RC | xxxxxxxxxxxxxxxxxxxxxxxxxx ] + * ^- +1 + * + * - append of one event: + * appended + * head-, <----------> ,-tail + * [ RC | VI | PL | RC | xxxxxxxxxxx ] + * + * - reader advancing: + * head-, ,-tail + * [ RC | VI | PL | RC | xxxxxxxxxxx ] + * ^- -1 ^- +1 + * + * - writer removing older message: + * head-, ,-tail + * [ xxxxxxxxxxxx | RC | xxxxxxxxxxx ] + * <----------> + * removed + */ + +struct dns_ring { + struct buffer buf; // storage area + struct mt_list waiters; // list of waiters, for now, CLI "show event" + __decl_thread(HA_RWLOCK_T lock); + int readers_count; +}; + +#endif /* _HAPROXY_DNS_RING_T_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/dns_ring.h b/include/haproxy/dns_ring.h new file mode 100644 index 0000000..88bbb4a --- /dev/null +++ b/include/haproxy/dns_ring.h @@ -0,0 +1,46 @@ +/* + * include/haproxy/dns_ring.h + * Exported functions for ring buffers used for disposable data. + * This is a fork of ring.h for DNS usage. + * + * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_DNS_RING_H +#define _HAPROXY_DNS_RING_H + +#include <stdlib.h> +#include <import/ist.h> +#include <haproxy/dns_ring-t.h> + +struct appctx; + +struct dns_ring *dns_ring_new(size_t size); +void dns_ring_init(struct dns_ring *ring, void* area, size_t size); +void dns_ring_free(struct dns_ring *ring); +ssize_t dns_ring_write(struct dns_ring *ring, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg); +int dns_ring_attach(struct dns_ring *ring); +void dns_ring_detach_appctx(struct dns_ring *ring, struct appctx *appctx, size_t ofs); + +#endif /* _HAPROXY_DNS_RING_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/dynbuf-t.h b/include/haproxy/dynbuf-t.h index b5545ab..f0ca187 100644 --- a/include/haproxy/dynbuf-t.h +++ b/include/haproxy/dynbuf-t.h @@ -22,6 +22,79 @@ #ifndef _HAPROXY_DYNBUF_T_H #define _HAPROXY_DYNBUF_T_H +#include <haproxy/list-t.h> + +/* Describe the levels of criticality of each allocation based on the expected + * use case. We distinguish multiple use cases, from the least important to the + * most important one: + * - allocate a buffer to grow a non-empty ring: this should be avoided when + * resources are becoming scarce. + * - allocate a buffer for very unlikely situations (e.g. L7 retries, early + * data). These may acceptably fail on low resources. + * - buffer used to receive data in the mux at the connection level. Please + * note that this level might later be resplit into two levels, one for + * initial data such as a new request, which may be rejected and postponed, + * and one for data continuation, which may be needed to complete a request + * or receive some control data allowing another buffer to be flushed. + * - buffer used to produce data at the endpoint for internal consumption, + * typically mux streams and applets. These buffers will be allocated until + * a channel picks them. Not processing them might sometimes lead to a mux + * being clogged and blocking other streams from progressing. + * - channel buffer: this one may be allocated to perform a synchronous recv, + * or just preparing for the possibility of an instant response. The + * response channel always allocates a buffer when entering process_stream, + * which is immediately released if unused when leaving. + * - buffer used by the mux sending side, often allocated by the mux's + * snd_buf() handler to encode the outgoing channel's data. + * - buffer permanently allocated at boot (e.g. temporary compression + * buffers). If these fail, we can't boot. + * + * Please DO NOT CHANGE THESE LEVELS without first getting a full understanding + * of how all this works and touching the DB_F_CRIT_MASK and DB_CRIT_TO_QUEUE() + * macros below! + */ +enum dynbuf_crit { + DB_GROW_RING = 0, // used to grow an existing buffer ring + DB_UNLIKELY, // unlikely to be needed (e.g. L7 retries) + /* The 4 levels below are subject to queueing */ + DB_MUX_RX, // buffer used to store incoming data from the system + DB_SE_RX, // buffer used to store incoming data for the channel + DB_CHANNEL, // buffer used by the channel for synchronous reads + DB_MUX_TX, // buffer used to store outgoing mux data + /* The one below may never fail */ + DB_PERMANENT, // buffers permanently allocated. +}; + +/* The values above are expected to be passed to b_alloc(). In addition, some + * Extra flags can be passed by oring the crit value above with one of these + * high-bit flags. + */ +#define DB_F_NOQUEUE 0x80000000U // ignore presence of others in queue +#define DB_F_CRIT_MASK 0x000000FFU // mask to keep the criticality bits + + +/* We'll deal with 4 queues, with indexes numbered from 0 to 3 based on the + * criticality of the allocation. All criticality levels are mapped to a 2-bit + * queue index. While some levels never use the queue (the first two), some of + * the others will share a same queue, and all levels will define a ratio of + * allocated emergency buffers below which we refrain from trying to allocate. + * In practice, for now the thresholds will just be the queue number times 33% + * so that queue 0 is allowed to deplete emergency buffers and queue 3 not at + * all. This gives us: queue idx=3 for DB_MUX_RX and below, 2 for DB_SE_RX, + * 1 for DB_CHANNEL, 0 for DB_MUX_TX and above. This must match the DYNBUF_NBQ + * in tinfo-t.h. + */ + +#define DB_CRIT_TO_QUEUE(crit) ((0x000001BF >> ((crit) * 2)) & 3) + +#define DB_GROW_RING_Q DB_CRIT_TO_QUEUE(DB_GROW_RING) +#define DB_UNLIKELY_Q DB_CRIT_TO_QUEUE(DB_UNLIKELY) +#define DB_MUX_RX_Q DB_CRIT_TO_QUEUE(DB_MUX_RX) +#define DB_SE_RX_Q DB_CRIT_TO_QUEUE(DB_SE_RX) +#define DB_CHANNEL_Q DB_CRIT_TO_QUEUE(DB_CHANNEL) +#define DB_MUX_TX_Q DB_CRIT_TO_QUEUE(DB_MUX_TX) +#define DB_PERMANENT_Q DB_CRIT_TO_QUEUE(DB_PERMANENT) + /* an element of the <buffer_wq> list. It represents an object that need to * acquire a buffer to continue its process. */ diff --git a/include/haproxy/dynbuf.h b/include/haproxy/dynbuf.h index a89800c..4a6595d 100644 --- a/include/haproxy/dynbuf.h +++ b/include/haproxy/dynbuf.h @@ -32,6 +32,7 @@ #include <haproxy/buf.h> #include <haproxy/chunk.h> #include <haproxy/dynbuf-t.h> +#include <haproxy/global.h> #include <haproxy/pool.h> extern struct pool_head *pool_head_buffer; @@ -56,21 +57,67 @@ static inline int buffer_almost_full(const struct buffer *buf) /* Functions below are used for buffer allocation */ /**************************************************/ +/* returns non-zero if one may try to allocate a buffer for criticality flags + * <crit> (made of a criticality and optional flags). + */ +static inline int b_may_alloc_for_crit(uint crit) +{ + int q = DB_CRIT_TO_QUEUE(crit & DB_F_CRIT_MASK); + + /* if this queue or any more critical ones have entries, we must wait */ + if (!(crit & DB_F_NOQUEUE) && th_ctx->bufq_map & ((2 << q) - 1)) + return 0; + + /* If the emergency buffers are too low, we won't try to allocate a + * buffer either so that we speed up their release. As a corrolary, it + * means that we're always allowed to try to fall back to an emergency + * buffer if pool_alloc() fails. The minimum number of available + * emergency buffers for an allocation depends on the queue: + * q == 0 -> 0% + * q == 1 -> 33% + * q == 2 -> 66% + * q == 3 -> 100% + */ + if (th_ctx->emergency_bufs_left * 3 < q * global.tune.reserved_bufs) + return 0; + return 1; +} + +/* Allocates one of the emergency buffers or returns NULL if there are none left */ +static inline char *__b_get_emergency_buf(void) +{ + char *ret; + + if (!th_ctx->emergency_bufs_left) + return NULL; + + th_ctx->emergency_bufs_left--; + ret = th_ctx->emergency_bufs[th_ctx->emergency_bufs_left]; + th_ctx->emergency_bufs[th_ctx->emergency_bufs_left] = NULL; + return ret; +} + /* Ensures that <buf> is allocated, or allocates it. If no memory is available, * ((char *)1) is assigned instead with a zero size. The allocated buffer is * returned, or NULL in case no memory is available. Since buffers only contain * user data, poisonning is always disabled as it brings no benefit and impacts * performance. Due to the difficult buffer_wait management, they are not - * subject to forced allocation failures either. + * subject to forced allocation failures either. If other waiters are present + * at higher criticality levels, we refrain from allocating. */ -#define b_alloc(_buf) \ -({ \ - char *_area; \ - struct buffer *_retbuf = _buf; \ - \ - if (!_retbuf->size) { \ +#define b_alloc(_buf, _crit) \ +({ \ + char *_area = NULL; \ + struct buffer *_retbuf = _buf; \ + uint _criticality = _crit; \ + \ + if (!_retbuf->size) { \ *_retbuf = BUF_WANTED; \ - _area = pool_alloc_flag(pool_head_buffer, POOL_F_NO_POISON | POOL_F_NO_FAIL); \ + if (b_may_alloc_for_crit(_criticality)) { \ + _area = pool_alloc_flag(pool_head_buffer, POOL_F_NO_POISON | POOL_F_NO_FAIL); \ + if (unlikely(!_area)) \ + _area = __b_get_emergency_buf(); \ + } \ if (unlikely(!_area)) { \ activity[tid].buf_wait++; \ _retbuf = NULL; \ @@ -95,7 +142,10 @@ static inline int buffer_almost_full(const struct buffer *buf) */ \ *(_buf) = BUF_NULL; \ __ha_barrier_store(); \ - pool_free(pool_head_buffer, area); \ + if (th_ctx->emergency_bufs_left < global.tune.reserved_bufs) \ + th_ctx->emergency_bufs[th_ctx->emergency_bufs_left++] = area; \ + else \ + pool_free(pool_head_buffer, area); \ } while (0) \ /* Releases buffer <buf> if allocated, and marks it empty. */ @@ -116,10 +166,90 @@ void __offer_buffers(void *from, unsigned int count); static inline void offer_buffers(void *from, unsigned int count) { - if (!LIST_ISEMPTY(&th_ctx->buffer_wq)) + int q; + + if (likely(!th_ctx->bufq_map)) + return; + + for (q = 0; q < DYNBUF_NBQ; q++) { + if (!(th_ctx->bufq_map & (1 << q))) + continue; + + BUG_ON_HOT(LIST_ISEMPTY(&th_ctx->buffer_wq[q])); __offer_buffers(from, count); + break; + } +} + +/* Queues a buffer request for the current thread via <bw>, and returns + * non-zero if the criticality allows to queue a request, otherwise returns + * zero. If the <bw> was already queued, non-zero is returned so that the call + * is idempotent. It is assumed that the buffer_wait struct had already been + * preset with its context and callback, otherwise please use b_queue() + * instead. + */ +static inline int b_requeue(enum dynbuf_crit crit, struct buffer_wait *bw) +{ + int q = DB_CRIT_TO_QUEUE(crit); + + if (LIST_INLIST(&bw->list)) + return 1; + + /* these ones are never queued */ + if (crit < DB_MUX_RX) + return 0; + + th_ctx->bufq_map |= 1 << q; + LIST_APPEND(&th_ctx->buffer_wq[q], &bw->list); + return 1; } +/* Queues a buffer request for the current thread via <bw> with the given <ctx> + * and <cb>, and returns non-zero if the criticality allows to queue a request, + * otherwise returns zero. If the <bw> was already queued, non-zero is returned + * so that the call is idempotent. If the buffer_wait struct had already been + * preset with the ctx and cb, please use the lighter b_requeue() instead. + */ +static inline int b_queue(enum dynbuf_crit crit, struct buffer_wait *bw, void *ctx, int (*cb)(void *)) +{ + bw->target = ctx; + bw->wakeup_cb = cb; + return b_requeue(crit, bw); +} + +/* Dequeues bw element <bw> from its list at for thread <thr> and updates the + * thread's bufq_map if it was the last element. The element is assumed to be + * in a list (it's the caller's job to test it). This is only meant to really + * be used either by the owner thread or under thread isolation. You should + * use b_dequeue() instead. + */ +static inline void _b_dequeue(struct buffer_wait *bw, int thr) +{ + struct thread_ctx *ctx = &ha_thread_ctx[thr]; + uint q; + + /* trick: detect if we're the last one and pointing to a root, so we + * can figure the queue number since the root belongs to an array. + */ + if (LIST_ATMOST1(&bw->list)) { + /* OK then which root? */ + q = bw->list.n - &ctx->buffer_wq[0]; + BUG_ON_HOT(q >= DYNBUF_NBQ); + ctx->bufq_map &= ~(1 << q); + } + LIST_DEL_INIT(&bw->list); +} + +/* Dequeues bw element <bw> from its list and updates the bufq_map if if was + * the last element. All users of buffer_wait should use this to dequeue (e.g. + * when killing a pending request on timeout) so as to make sure that we keep + * consistency between the list heads and the bitmap. + */ +static inline void b_dequeue(struct buffer_wait *bw) +{ + if (unlikely(LIST_INLIST(&bw->list))) + _b_dequeue(bw, tid); +} #endif /* _HAPROXY_DYNBUF_H */ diff --git a/include/haproxy/fcgi-app-t.h b/include/haproxy/fcgi-app-t.h index fb6ab27..6233aef 100644 --- a/include/haproxy/fcgi-app-t.h +++ b/include/haproxy/fcgi-app-t.h @@ -28,6 +28,7 @@ #include <haproxy/acl-t.h> #include <haproxy/api-t.h> #include <haproxy/arg-t.h> +#include <haproxy/log-t.h> #include <haproxy/fcgi.h> #include <haproxy/filters-t.h> #include <haproxy/regex-t.h> @@ -59,7 +60,7 @@ struct fcgi_rule_conf { struct fcgi_rule { enum fcgi_rule_type type; struct ist name; /* name of the parameter/header */ - struct list value; /* log-format compatible expression, may be empty */ + struct lf_expr value; /* log-format compatible expression, may be empty */ struct acl_cond *cond; /* acl condition to set the param */ struct list list; }; @@ -67,7 +68,7 @@ struct fcgi_rule { /* parameter rule to set/unset a param at the end of the analyzis */ struct fcgi_param_rule { struct ist name; - struct list *value; /* if empty , unset the parameter */ + struct lf_expr *value; /* if empty , unset the parameter */ struct ebpt_node node; }; diff --git a/include/haproxy/filters-t.h b/include/haproxy/filters-t.h index c86ef6f..2acacd0 100644 --- a/include/haproxy/filters-t.h +++ b/include/haproxy/filters-t.h @@ -22,6 +22,7 @@ #define _HAPROXY_FILTERS_T_H #include <haproxy/api-t.h> +#include <haproxy/buf-t.h> /* Flags set on a filter config */ #define FLT_CFG_FL_HTX 0x00000001 /* The filter can filter HTX streams */ diff --git a/include/haproxy/freq_ctr.h b/include/haproxy/freq_ctr.h index f3f6903..f037cbb 100644 --- a/include/haproxy/freq_ctr.h +++ b/include/haproxy/freq_ctr.h @@ -32,6 +32,14 @@ ullong freq_ctr_total(const struct freq_ctr *ctr, uint period, int pend); int freq_ctr_overshoot_period(const struct freq_ctr *ctr, uint period, uint freq); uint update_freq_ctr_period_slow(struct freq_ctr *ctr, uint period, uint inc); +/* Only usable during single threaded startup phase. */ +static inline void preload_freq_ctr(struct freq_ctr *ctr, uint value) +{ + ctr->curr_ctr = 0; + ctr->prev_ctr = value; + ctr->curr_tick = now_ms & ~1; +} + /* Update a frequency counter by <inc> incremental units. It is automatically * rotated if the period is over. It is important that it correctly initializes * a null area. diff --git a/include/haproxy/global-t.h b/include/haproxy/global-t.h index 9b3cd78..7665ef2 100644 --- a/include/haproxy/global-t.h +++ b/include/haproxy/global-t.h @@ -46,7 +46,7 @@ #define MODE_DUMP_NB_L 0x10000 /* dump line numbers when the configuration file is dump */ /* list of last checks to perform, depending on config options */ -#define LSTCHK_CAP_BIND 0x00000001 /* check that we can bind to any port */ +#define LSTCHK_SYSADM 0x00000001 /* check that we have CAP_SYS_ADMIN */ #define LSTCHK_NETADM 0x00000002 /* check that we have CAP_NET_ADMIN */ /* Global tuning options */ @@ -84,6 +84,7 @@ #define GTUNE_LISTENER_MQ_FAIR (1<<27) #define GTUNE_LISTENER_MQ_OPT (1<<28) #define GTUNE_LISTENER_MQ_ANY (GTUNE_LISTENER_MQ_FAIR | GTUNE_LISTENER_MQ_OPT) +#define GTUNE_QUIC_CC_HYSTART (1<<29) #define NO_ZERO_COPY_FWD 0x0001 /* Globally disable zero-copy FF */ #define NO_ZERO_COPY_FWD_PT 0x0002 /* disable zero-copy FF for PT (recv & send are disabled automatically) */ @@ -95,6 +96,7 @@ #define NO_ZERO_COPY_FWD_QUIC_SND 0x0080 /* disable zero-copy FF for QUIC on send */ #define NO_ZERO_COPY_FWD_FCGI_RCV 0x0100 /* disable zero-copy FF for FCGI on received */ #define NO_ZERO_COPY_FWD_FCGI_SND 0x0200 /* disable zero-copy FF for FCGI on send */ +#define NO_ZERO_COPY_FWD_APPLET 0x0400 /* disable zero-copy FF for applets */ extern int cluster_secret_isset; /* non zero means a cluster secret was initialized */ @@ -153,6 +155,7 @@ struct global { char *log_send_hostname; /* set hostname in syslog header */ char *server_state_base; /* path to a directory where server state files can be found */ char *server_state_file; /* path to the file where server states are loaded from */ + char *stats_file; /* path to stats-file */ unsigned char cluster_secret[16]; /* 128 bits of an SHA1 digest of a secret defined as ASCII string */ struct { int maxpollevents; /* max number of poll events at once */ @@ -189,9 +192,11 @@ struct global { int nb_stk_ctr; /* number of stick counters, defaults to MAX_SESS_STKCTR */ int default_shards; /* default shards for listeners, or -1 (by-thread) or -2 (by-group) */ uint max_checks_per_thread; /* if >0, no more than this concurrent checks per thread */ + uint ring_queues; /* if >0, #ring queues, otherwise equals #thread groups */ #ifdef USE_QUIC unsigned int quic_backend_max_idle_timeout; unsigned int quic_frontend_max_idle_timeout; + unsigned int quic_frontend_glitches_threshold; unsigned int quic_frontend_max_streams_bidi; unsigned int quic_retry_threshold; unsigned int quic_reorder_ratio; @@ -209,7 +214,10 @@ struct global { } unix_bind; struct proxy *cli_fe; /* the frontend holding the stats settings */ int numa_cpu_mapping; + int thread_limit; /* hard limit on the number of threads */ int prealloc_fd; + uchar clt_privileged_ports; /* bitmask to allow client privileged ports exchanges per protocol */ + /* 3-bytes hole */ int cfg_curr_line; /* line number currently being parsed */ const char *cfg_curr_file; /* config file currently being parsed or NULL */ char *cfg_curr_section; /* config section name currently being parsed or NULL */ diff --git a/include/haproxy/global.h b/include/haproxy/global.h index 2e7fa6b..5553468 100644 --- a/include/haproxy/global.h +++ b/include/haproxy/global.h @@ -78,6 +78,7 @@ static inline int already_warned(unsigned int warning) } extern unsigned int experimental_directives_allowed; +extern unsigned int deprecated_directives_allowed; struct cfg_keyword; int check_kw_experimental(struct cfg_keyword *kw, const char *file, int linenum, diff --git a/include/haproxy/guid-t.h b/include/haproxy/guid-t.h new file mode 100644 index 0000000..9eea355 --- /dev/null +++ b/include/haproxy/guid-t.h @@ -0,0 +1,15 @@ +#ifndef _HAPROXY_GUID_T_H +#define _HAPROXY_GUID_T_H + +#include <import/ebtree-t.h> +#include <haproxy/obj_type-t.h> + +/* Maximum GUID size excluding final '\0' */ +#define GUID_MAX_LEN 127 + +struct guid_node { + struct ebpt_node node; /* attach point into GUID global tree */ + enum obj_type *obj_type; /* pointer to GUID obj owner */ +}; + +#endif /* _HAPROXY_GUID_T_H */ diff --git a/include/haproxy/guid.h b/include/haproxy/guid.h new file mode 100644 index 0000000..ecfeb6a --- /dev/null +++ b/include/haproxy/guid.h @@ -0,0 +1,16 @@ +#ifndef _HAPROXY_GUID_H +#define _HAPROXY_GUID_H + +#include <haproxy/guid-t.h> + +extern struct eb_root guid_tree; + +void guid_init(struct guid_node *node); +int guid_insert(enum obj_type *obj_type, const char *uid, char **errmsg); +void guid_remove(struct guid_node *guid); +struct guid_node *guid_lookup(const char *uid); + +int guid_is_valid_fmt(const char *uid, char **errmsg); +char *guid_name(const struct guid_node *guid); + +#endif /* _HAPROXY_GUID_H */ diff --git a/include/haproxy/h1.h b/include/haproxy/h1.h index 7152c6e..0eb0395 100644 --- a/include/haproxy/h1.h +++ b/include/haproxy/h1.h @@ -153,7 +153,6 @@ union h1_sl { /* useful start line pointers, relative t int h1_headers_to_hdr_list(char *start, const char *stop, struct http_hdr *hdr, unsigned int hdr_num, struct h1m *h1m, union h1_sl *slp); -int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max); int h1_parse_cont_len_header(struct h1m *h1m, struct ist *value); int h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value); diff --git a/include/haproxy/h3.h b/include/haproxy/h3.h index 1bedf43..8b91061 100644 --- a/include/haproxy/h3.h +++ b/include/haproxy/h3.h @@ -48,29 +48,25 @@ #define H3_SETTINGS_MAX_FIELD_SECTION_SIZE 0x06 #define H3_SETTINGS_QPACK_BLOCKED_STREAMS 0x07 -/* Errors. */ +/* RFC 9114 8. Error Handling */ enum h3_err { - H3_NO_ERROR = 0x100, - H3_GENERAL_PROTOCOL_ERROR = 0x101, - H3_INTERNAL_ERROR = 0x102, - H3_STREAM_CREATION_ERROR = 0x103, - H3_CLOSED_CRITICAL_STREAM = 0x104, - H3_FRAME_UNEXPECTED = 0x105, - H3_FRAME_ERROR = 0x106, - H3_EXCESSIVE_LOAD = 0x107, - H3_ID_ERROR = 0x108, - H3_SETTINGS_ERROR = 0x109, - H3_MISSING_SETTINGS = 0x10a, - H3_REQUEST_REJECTED = 0x10b, - H3_REQUEST_CANCELLED = 0x10c, - H3_REQUEST_INCOMPLETE = 0x10d, - H3_MESSAGE_ERROR = 0x10e, - H3_CONNECT_ERROR = 0x10f, - H3_VERSION_FALLBACK = 0x110, - - QPACK_DECOMPRESSION_FAILED = 0x200, - QPACK_ENCODER_STREAM_ERROR = 0x201, - QPACK_DECODER_STREAM_ERROR = 0x202, + H3_ERR_NO_ERROR = 0x100, + H3_ERR_GENERAL_PROTOCOL_ERROR = 0x101, + H3_ERR_INTERNAL_ERROR = 0x102, + H3_ERR_STREAM_CREATION_ERROR = 0x103, + H3_ERR_CLOSED_CRITICAL_STREAM = 0x104, + H3_ERR_FRAME_UNEXPECTED = 0x105, + H3_ERR_FRAME_ERROR = 0x106, + H3_ERR_EXCESSIVE_LOAD = 0x107, + H3_ERR_ID_ERROR = 0x108, + H3_ERR_SETTINGS_ERROR = 0x109, + H3_ERR_MISSING_SETTINGS = 0x10a, + H3_ERR_REQUEST_REJECTED = 0x10b, + H3_ERR_REQUEST_CANCELLED = 0x10c, + H3_ERR_REQUEST_INCOMPLETE = 0x10d, + H3_ERR_MESSAGE_ERROR = 0x10e, + H3_ERR_CONNECT_ERROR = 0x10f, + H3_ERR_VERSION_FALLBACK = 0x110, }; /* Frame types. */ diff --git a/include/haproxy/http.h b/include/haproxy/http.h index 2992640..e28f3cc 100644 --- a/include/haproxy/http.h +++ b/include/haproxy/http.h @@ -27,15 +27,20 @@ #include <import/ist.h> #include <haproxy/api.h> #include <haproxy/http-t.h> +#include <haproxy/intops.h> extern const int http_err_codes[HTTP_ERR_SIZE]; extern const char *http_err_msgs[HTTP_ERR_SIZE]; extern const struct ist http_known_methods[HTTP_METH_OTHER]; extern const uint8_t http_char_classes[256]; +extern long http_err_status_codes[512 / sizeof(long)]; +extern long http_fail_status_codes[512 / sizeof(long)]; enum http_meth_t find_http_meth(const char *str, const int len); int http_get_status_idx(unsigned int status); const char *http_get_reason(unsigned int status); +void http_status_add_range(long *array, uint low, uint high); +void http_status_del_range(long *array, uint low, uint high); struct ist http_get_host_port(const struct ist host); int http_is_default_port(const struct ist schm, const struct ist port); int http_validate_scheme(const struct ist schm); @@ -212,6 +217,18 @@ static inline int http_path_has_forbidden_char(const struct ist ist, const char return 0; } +/* Checks status code array <array> for the presence of status code <status>. + * Returns non-zero if the code is present, zero otherwise. Any status code is + * permitted. + */ +static inline int http_status_matches(const long *array, uint status) +{ + if (status < 100 || status > 599) + return 0; + + return ha_bit_test(status - 100, array); +} + #endif /* _HAPROXY_HTTP_H */ /* diff --git a/include/haproxy/http_ana-t.h b/include/haproxy/http_ana-t.h index 5b7342f..f43aa32 100644 --- a/include/haproxy/http_ana-t.h +++ b/include/haproxy/http_ana-t.h @@ -73,8 +73,8 @@ /* used only for keep-alive purposes, to indicate we're on a second transaction */ #define TX_NOT_FIRST 0x00040000 /* the transaction is not the first one */ -#define TX_L7_RETRY 0x000800000 /* The transaction may attempt L7 retries */ -#define TX_D_L7_RETRY 0x001000000 /* Disable L7 retries on this transaction, even if configured to do it */ +#define TX_L7_RETRY 0x00080000 /* The transaction may attempt L7 retries */ +#define TX_D_L7_RETRY 0x00100000 /* Disable L7 retries on this transaction, even if configured to do it */ /* This function is used to report flags in debugging tools. Please reflect * below any single-bit flag addition above in the same order via the diff --git a/include/haproxy/http_client-t.h b/include/haproxy/http_client-t.h index 7ae0e61..2c07f77 100644 --- a/include/haproxy/http_client-t.h +++ b/include/haproxy/http_client-t.h @@ -64,6 +64,7 @@ enum { #define HC_F_RES_HDR 0x02 #define HC_F_RES_BODY 0x04 #define HC_F_RES_END 0x08 +#define HC_F_HTTPPROXY 0x10 #endif /* ! _HAPROXY_HTTCLIENT__T_H */ diff --git a/include/haproxy/http_client.h b/include/haproxy/http_client.h index 241ca24..93f3fc1 100644 --- a/include/haproxy/http_client.h +++ b/include/haproxy/http_client.h @@ -1,6 +1,7 @@ #ifndef _HAPROXY_HTTPCLIENT_H #define _HAPROXY_HTTPCLIENT_H +#include <haproxy/buf.h> #include <haproxy/http_client-t.h> void httpclient_destroy(struct httpclient *hc); diff --git a/include/haproxy/http_htx-t.h b/include/haproxy/http_htx-t.h index 8051925..1dd86aa 100644 --- a/include/haproxy/http_htx-t.h +++ b/include/haproxy/http_htx-t.h @@ -28,6 +28,7 @@ #include <haproxy/buf-t.h> #include <haproxy/http-t.h> +#include <haproxy/log-t.h> #include <haproxy/htx-t.h> /* Context used to find/remove an HTTP header. */ @@ -41,9 +42,9 @@ struct http_hdr_ctx { /* Structure used to build the header list of an HTTP reply */ struct http_reply_hdr { - struct ist name; /* the header name */ - struct list value; /* the log-format string value */ - struct list list; /* header chained list */ + struct ist name; /* the header name */ + struct lf_expr value; /* the log-format string value */ + struct list list; /* header linked list */ }; #define HTTP_REPLY_EMPTY 0x00 /* the reply has no payload */ @@ -60,7 +61,7 @@ struct http_reply { char *ctype; /* The response content-type, may be NULL */ struct list hdrs; /* A list of http_reply_hdr */ union { - struct list fmt; /* A log-format string (type = HTTP_REPLY_LOGFMT) */ + struct lf_expr fmt; /* A log-format string (type = HTTP_REPLY_LOGFMT) */ struct buffer obj; /* A raw string (type = HTTP_REPLY_RAW) */ struct buffer *errmsg; /* The error message to use as response (type = HTTP_REPLY_ERRMSG). * may be NULL, if so rely on the proxy error messages */ diff --git a/include/haproxy/htx-t.h b/include/haproxy/htx-t.h index 2ea6bc8..5312ae1 100644 --- a/include/haproxy/htx-t.h +++ b/include/haproxy/htx-t.h @@ -177,7 +177,7 @@ static forceinline char *hsl_show_flags(char *buf, size_t len, const char *delim #define HTX_FL_PARSING_ERROR 0x00000001 /* Set when a parsing error occurred */ #define HTX_FL_PROCESSING_ERROR 0x00000002 /* Set when a processing error occurred */ #define HTX_FL_FRAGMENTED 0x00000004 /* Set when the HTX buffer is fragmented */ -#define HTX_FL_PROXY_RESP 0x00000008 /* Set when the response was generated by HAProxy */ +/* 0x00000008 unused */ #define HTX_FL_EOM 0x00000010 /* Set when end-of-message is reached from the HTTP point of view * (at worst, on the EOM block is missing) */ @@ -192,7 +192,7 @@ static forceinline char *htx_show_flags(char *buf, size_t len, const char *delim _(0); /* flags */ _(HTX_FL_PARSING_ERROR, _(HTX_FL_PROCESSING_ERROR, - _(HTX_FL_FRAGMENTED, _(HTX_FL_PROXY_RESP, _(HTX_FL_EOM))))); + _(HTX_FL_FRAGMENTED, _(HTX_FL_EOM)))); /* epilogue */ _(~0U); return buf; @@ -225,7 +225,9 @@ struct htx_ret { struct htx_blk *blk; /* An HTX block */ }; -/* HTX start-line */ +/* HTX start-line. This is almost always aligned except in rare cases where + * parts of the URI are rewritten, hence the packed attribute. + */ struct htx_sl { unsigned int flags; /* HTX_SL_F_* */ union { @@ -237,11 +239,16 @@ struct htx_sl { } res; } info; - /* XXX 2 bytes unused */ + /* XXX 2 bytes unused, must be present to keep the rest aligned + * (check with "pahole -C htx_sl" that len[] is aligned in case + * of doubt). + */ + char __pad_1; + char __pad_2; unsigned int len[3]; /* length of different parts of the start-line */ char l[VAR_ARRAY]; -}; +} __attribute__((packed)); /* Internal representation of an HTTP message */ struct htx { diff --git a/include/haproxy/intops.h b/include/haproxy/intops.h index 34010cc..589f90e 100644 --- a/include/haproxy/intops.h +++ b/include/haproxy/intops.h @@ -96,6 +96,132 @@ static inline uint64_t rotr64(uint64_t v, uint8_t bits) return v; } +/* Returns non-zero if any of the 4 bytes composing the u32 <x> is below the + * value <min8> or above <min8>+127. Please note that the result will be made + * of a 0x80 at positions corresponding to the offending bytes, and that as + * such the result is a u32 as well. It is designed like this so that the + * operation can be cascaded by ORing the results of multiple blocks. It is + * crucial for performance that <min8> is passed as a build-time constant so + * as to avoid an expensive multiply. A zero on output confirms that all four + * bytes are greater than or equal to <min8> and not lower than <min8>-127. + * This is essentially used to skip long sequences of text matching the rule + * when the cost of stopping on a false positive is low (i.e. parse multiple + * bytes at a time and continue one byte at a time at the end of the series). + */ +static inline __attribute__((always_inline)) +uint32_t is_char4_below_opt(uint32_t x, uint8_t min8) +{ + uint32_t min32 = min8 * 0x01010101U; + + return (x - min32) & 0x80808080U; +} + +/* Returns non-zero if any of the 4 bytes composing the u32 <x> is above the + * value <max8> or below <max8>-127. Please note that the result will be made + * of a 0x80 at positions corresponding to the offending bytes, and that as + * such the result is a u32 as well. It is designed like this so that the + * operation can be cascaded by ORing the results of multiple blocks. It is + * crucial for performance that <max8> is passed as a build-time constant so + * as to avoid an expensive multiply. A zero on output confirms that all four + * bytes are lower than or equal to <max8> and not greater than <max8>+127. + * This is essentially used to skip long sequences of text matching the rule + * when the cost of stopping on a false positive is low (i.e. parse multiple + * bytes at a time and continue one byte at a time at the end of the series). + */ +static inline __attribute__((always_inline)) +uint32_t is_char4_above_opt(uint32_t x, uint8_t max8) +{ + uint32_t max32 = max8 * 0x01010101U; + + return (max32 - x) & 0x80808080U; +} + +/* Returns non-zero if any of the 4 bytes composing the u32 <x> is outside of + * the range defined by <min8> to <max8> included. Please note that the result + * will be made of a 0x80 at positions corresponding to the offending bytes, + * and that as such the result is a u32 as well. It is designed like this so + * that the operation can be cascaded by ORing the results of multiple blocks. + * There is one restriction in this simplified version, the distance between + * min8 and max8 must be lower than 0x80. It is crucial for performance that + * the bounds (min8 and max8) are passed as build-time constants so as to avoid + * an expensive multiply. A zero on output confirms that all four bytes are + * included in the defined range. + */ +static inline __attribute__((always_inline)) +uint32_t is_char4_outside(uint32_t x, uint8_t min8, uint8_t max8) +{ + uint32_t min32 = min8 * 0x01010101U; + uint32_t max32 = max8 * 0x01010101U; + + return (((x - min32) | (max32 - x)) & 0x80808080U); +} + +/* Returns non-zero if any of the 8 bytes composing the u64 <x> is below the + * value <min8> or above <min8>+127. Please note that the result will be made + * of a 0x80 at positions corresponding to the offending bytes, and that as + * such the result is a u64 as well. It is designed like this so that the + * operation can be cascaded by ORing the results of multiple blocks. It is + * crucial for performance that <min8> is passed as a build-time constant so + * as to avoid an expensive multiply. A zero on output confirms that all eight + * bytes are greater than or equal to <min8> and not lower than <min8>-127. + * This is essentially used to skip long sequences of text matching the rule + * when the cost of stopping on a false positive is low (i.e. parse multiple + * bytes at a time and continue one byte at a time at the end of the series). + */ +static inline __attribute__((always_inline)) +uint64_t is_char8_below_opt(uint64_t x, uint8_t min8) +{ + uint64_t min64 = min8 * 0x0101010101010101ULL; + + return (x - min64) & 0x8080808080808080ULL; +} + +/* Returns non-zero if any of the 8 bytes composing the u64 <x> is above the + * value <max8> or below <max8>-127. Please note that the result will be made + * of a 0x80 at positions corresponding to the offending bytes, and that as + * such the result is a u64 as well. It is designed like this so that the + * operation can be cascaded by ORing the results of multiple blocks. It is + * crucial for performance that <max8> is passed as a build-time constant so + * as to avoid an expensive multiply. A zero on output confirms that all eight + * bytes are lower than or equal to <max8> and not greater than <max8>+127. + * This is essentially used to skip long sequences of text matching the rule + * when the cost of stopping on a false positive is low (i.e. parse multiple + * bytes at a time and continue one byte at a time at the end of the series). + */ +static inline __attribute__((always_inline)) +uint64_t is_char8_above_opt(uint64_t x, uint8_t max8) +{ + uint64_t max64 = max8 * 0x0101010101010101ULL; + + return (max64 - x) & 0x8080808080808080ULL; +} + +/* Returns non-zero if any of the 8 bytes composing the u64 <x> is outside of + * the range defined by <min8> to <max8> included. Please note that the result + * will be made of a 0x80 at positions corresponding to some of the offending + * bytes, and that as such the result is a u64 as well. On 32-bit mcahines, the + * operation will be made of two adjacent 32-bit checks. It is designed like + * this so that the operation can be cascaded by ORing the results of multiple + * blocks. There is one restriction in this simplified version, the distance + * between min8 and max8 must be lower than 0x80. It is crucial for performance + * that the bounds (min8 and max8) are passed as build-time constants so as to + * avoid an expensive multiply. A zero on output confirms that all eight bytes + * are included in the defined range. + */ +static inline __attribute__((always_inline)) +uint64_t is_char8_outside(uint64_t x, uint8_t min8, uint8_t max8) +{ + if (sizeof(long) >= 8) { + uint64_t min64 = min8 * 0x0101010101010101ULL; + uint64_t max64 = max8 * 0x0101010101010101ULL; + + return (((x - min64) | (max64 - x)) & 0x8080808080808080ULL); + } + else + return is_char4_outside(x >> 0, min8, max8) | + is_char4_outside(x >> 32, min8, max8); +} + /* Simple popcountl implementation. It returns the number of ones in a word. * Described here : https://graphics.stanford.edu/~seander/bithacks.html */ diff --git a/include/haproxy/jwt-t.h b/include/haproxy/jwt-t.h index e94607e..d4f9e69 100644 --- a/include/haproxy/jwt-t.h +++ b/include/haproxy/jwt-t.h @@ -22,6 +22,7 @@ #ifndef _HAPROXY_JWT_T_H #define _HAPROXY_JWT_T_H +#include <import/ebmbtree.h> #include <haproxy/openssl-compat.h> #ifdef USE_OPENSSL diff --git a/include/haproxy/lb_ss-t.h b/include/haproxy/lb_ss-t.h new file mode 100644 index 0000000..9014bce --- /dev/null +++ b/include/haproxy/lb_ss-t.h @@ -0,0 +1,32 @@ +/* + * include/haproxy/lb_ss-t.h + * Types for sticky load-balancing + * + * Copyright 2024 HAProxy Technologies + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LB_LH_T_H +#define _HAPROXY_LB_LH_T_H + +#include <haproxy/api-t.h> +#include <haproxy/server-t.h> + +struct lb_ss { + struct server *srv; /* sticked server */ +}; + +#endif /* _HAPROXY_LB_LH_T_H */ diff --git a/include/haproxy/lb_ss.h b/include/haproxy/lb_ss.h new file mode 100644 index 0000000..6ec3153 --- /dev/null +++ b/include/haproxy/lb_ss.h @@ -0,0 +1,33 @@ +/* + * include/haproxy/lb_ss.h + * sticky load-balancing + * + * Copyright 2024 HAProxy Technologies + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_LB_SS_H +#define _HAPROXY_LB_SS_H + +#include <haproxy/api.h> +#include <haproxy/proxy-t.h> +#include <haproxy/server-t.h> + +void recalc_server_ss(struct proxy *px); +void init_server_ss(struct proxy *px); +struct server *ss_get_server(struct proxy *px); + +#endif /* _HAPROXY_LB_SS_H */ diff --git a/include/haproxy/linuxcap.h b/include/haproxy/linuxcap.h index 9c337a4..9395b7b 100644 --- a/include/haproxy/linuxcap.h +++ b/include/haproxy/linuxcap.h @@ -3,5 +3,6 @@ int prepare_caps_for_setuid(int from_uid, int to_uid); int finalize_caps_after_setuid(int from_uid, int to_uid); +int prepare_caps_from_permitted_set(int from_uid, int to_uid, const char *program_name); #endif /* _HAPROXY_LINUXCAP_H */ diff --git a/include/haproxy/list.h b/include/haproxy/list.h index 368e6d7..b922bc1 100644 --- a/include/haproxy/list.h +++ b/include/haproxy/list.h @@ -106,6 +106,13 @@ */ #define LIST_INLIST(el) ((el)->n != (el)) +/* checks if the list element <el> has the same prev and next, i.e. it's either + * detached or alone in a list since (it points to itself or to a single other + * node). One can check that an element is strictly attached and alone by + * combining this with LIST_INLIST(). + */ +#define LIST_ATMOST1(el) ((el)->n == (el)->p) + /* atomically checks if the list element's next pointer points to anything * different from itself, implying the element should be part of a list. This * usually is similar to LIST_INLIST() except that while that one might be diff --git a/include/haproxy/listener-t.h b/include/haproxy/listener-t.h index 7f5e52a..b9a8447 100644 --- a/include/haproxy/listener-t.h +++ b/include/haproxy/listener-t.h @@ -28,6 +28,7 @@ #include <import/ebtree-t.h> #include <haproxy/api-t.h> +#include <haproxy/guid-t.h> #include <haproxy/obj_type-t.h> #include <haproxy/quic_cc-t.h> #include <haproxy/quic_sock-t.h> @@ -138,7 +139,6 @@ struct ssl_bind_conf { unsigned int verify:3; /* verify method (set of SSL_VERIFY_* flags) */ unsigned int no_ca_names:1;/* do not send ca names to clients (ca_file related) */ unsigned int early_data:1; /* early data allowed */ - unsigned int ocsp_update:2;/* enable OCSP auto update */ char *ca_file; /* CAfile to use on verify and ca-names */ char *ca_verify_file; /* CAverify file to use on verify only */ char *crl_file; /* CRLfile to use on verify */ @@ -169,9 +169,6 @@ struct bind_conf { unsigned long long ca_ignerr_bitfield[IGNERR_BF_SIZE]; /* ignored verify errors in handshake if depth > 0 */ unsigned long long crt_ignerr_bitfield[IGNERR_BF_SIZE]; /* ignored verify errors in handshake if depth == 0 */ void *initial_ctx; /* SSL context for initial negotiation */ - void *default_ctx; /* SSL context of first/default certificate */ - struct ckch_inst *default_inst; - struct ssl_bind_conf *default_ssl_conf; /* custom SSL conf of default_ctx */ int strict_sni; /* refuse negotiation if sni doesn't match a certificate */ int ssl_options; /* ssl options */ struct eb_root sni_ctx; /* sni_ctx tree of all known certs full-names sorted by name */ @@ -210,6 +207,8 @@ struct bind_conf { char *arg; /* argument passed to "bind" for better error reporting */ char *file; /* file where the section appears */ int line; /* line where the section appears */ + char *guid_prefix; /* prefix for listeners GUID */ + size_t guid_idx; /* next index for listeners GUID generation */ char *rhttp_srvname; /* name of server when using "rhttp@" address */ int rhttp_nbconn; /* count of connections to initiate in parallel */ __decl_thread(HA_RWLOCK_T sni_lock); /* lock the SNI trees during add/del operations */ @@ -255,6 +254,8 @@ struct listener { struct eb32_node id; /* place in the tree of used IDs */ } conf; /* config information */ + struct guid_node guid; /* GUID global tree node */ + struct li_per_thread *per_thr; /* per-thread fields (one per thread in the group) */ EXTRA_COUNTERS(extra_counters); diff --git a/include/haproxy/listener.h b/include/haproxy/listener.h index 5b3dc18..3627a79 100644 --- a/include/haproxy/listener.h +++ b/include/haproxy/listener.h @@ -192,6 +192,13 @@ int default_resume_listener(struct listener *l); */ int bind_complete_thread_setup(struct bind_conf *bind_conf, int *err_code); +/* Generate and insert unique GUID for each listeners of <bind_conf> instance + * if GUID prefix is defined. + * + * Returns 0 on success else non-zero. + */ +int bind_generate_guid(struct bind_conf *bind_conf); + /* * Registers the bind keyword list <kwl> as a list of valid keywords for next * parsing sessions. diff --git a/include/haproxy/log-t.h b/include/haproxy/log-t.h index a0a25ac..8768e10 100644 --- a/include/haproxy/log-t.h +++ b/include/haproxy/log-t.h @@ -38,6 +38,7 @@ #define UNIQUEID_LEN 128 /* flags used in logformat_node->options */ +#define LOG_OPT_NONE 0x00000000 #define LOG_OPT_HEXA 0x00000001 #define LOG_OPT_MANDATORY 0x00000002 #define LOG_OPT_QUOTE 0x00000004 @@ -46,6 +47,11 @@ #define LOG_OPT_HTTP 0x00000020 #define LOG_OPT_ESC 0x00000040 #define LOG_OPT_MERGE_SPACES 0x00000080 +#define LOG_OPT_BIN 0x00000100 +/* unused: 0x00000200 ... 0x00000800 */ +#define LOG_OPT_ENCODE_JSON 0x00001000 +#define LOG_OPT_ENCODE_CBOR 0x00002000 +#define LOG_OPT_ENCODE 0x00003000 /* Fields that need to be extracted from the incoming connection or request for @@ -122,75 +128,10 @@ enum log_tgt { /* lists of fields that can be logged, for logformat_node->type */ enum { - LOG_FMT_TEXT = 0, /* raw text */ - LOG_FMT_EXPR, /* sample expression */ + LOG_FMT_TEXT = 0, /* raw text */ + LOG_FMT_EXPR, /* sample expression */ LOG_FMT_SEPARATOR, /* separator replaced by one space */ - - /* information fields */ - LOG_FMT_GLOBAL, - LOG_FMT_CLIENTIP, - LOG_FMT_CLIENTPORT, - LOG_FMT_BACKENDIP, - LOG_FMT_BACKENDPORT, - LOG_FMT_FRONTENDIP, - LOG_FMT_FRONTENDPORT, - LOG_FMT_SERVERPORT, - LOG_FMT_SERVERIP, - LOG_FMT_COUNTER, - LOG_FMT_LOGCNT, - LOG_FMT_PID, - LOG_FMT_DATE, - LOG_FMT_DATEGMT, - LOG_FMT_DATELOCAL, - LOG_FMT_TS, - LOG_FMT_MS, - LOG_FMT_FRONTEND, - LOG_FMT_FRONTEND_XPRT, - LOG_FMT_BACKEND, - LOG_FMT_SERVER, - LOG_FMT_BYTES, - LOG_FMT_BYTES_UP, - LOG_FMT_Ta, - LOG_FMT_Th, - LOG_FMT_Ti, - LOG_FMT_TQ, - LOG_FMT_TW, - LOG_FMT_TC, - LOG_FMT_Tr, - LOG_FMT_tr, - LOG_FMT_trg, - LOG_FMT_trl, - LOG_FMT_TR, - LOG_FMT_TD, - LOG_FMT_TT, - LOG_FMT_TU, - LOG_FMT_STATUS, - LOG_FMT_CCLIENT, - LOG_FMT_CSERVER, - LOG_FMT_TERMSTATE, - LOG_FMT_TERMSTATE_CK, - LOG_FMT_ACTCONN, - LOG_FMT_FECONN, - LOG_FMT_BECONN, - LOG_FMT_SRVCONN, - LOG_FMT_RETRIES, - LOG_FMT_SRVQUEUE, - LOG_FMT_BCKQUEUE, - LOG_FMT_HDRREQUEST, - LOG_FMT_HDRRESPONS, - LOG_FMT_HDRREQUESTLIST, - LOG_FMT_HDRRESPONSLIST, - LOG_FMT_REQ, - LOG_FMT_HTTP_METHOD, - LOG_FMT_HTTP_URI, - LOG_FMT_HTTP_PATH, - LOG_FMT_HTTP_PATH_ONLY, - LOG_FMT_HTTP_QUERY, - LOG_FMT_HTTP_VERSION, - LOG_FMT_HOSTNAME, - LOG_FMT_UNIQUEID, - LOG_FMT_SSL_CIPHER, - LOG_FMT_SSL_VERSION, + LOG_FMT_ALIAS, /* reference to logformat_alias */ }; /* enum for parse_logformat_string */ @@ -198,8 +139,11 @@ enum { LF_INIT = 0, // before first character LF_TEXT, // normal text LF_SEPARATOR, // a single separator - LF_VAR, // variable name, after '%' or '%{..}' - LF_STARTVAR, // % in text + LF_ALIAS, // alias name, after '%' or '%{..}' + LF_STARTALIAS, // % in text + LF_STONAME, // after '%(' and before ')' + LF_STOTYPE, // after ':' while in STONAME + LF_EDONAME, // ')' after '%(' LF_STARG, // after '%{' and berore '}' LF_EDARG, // '}' after '%{' LF_STEXPR, // after '%[' or '%{..}[' and berore ']' @@ -207,13 +151,49 @@ enum { LF_END, // \0 found }; +/* log_format aliases (ie: %alias), see logformat_aliases table in log.c for + * available aliases definitions + */ +struct logformat_node; // forward-declaration +struct logformat_alias { + char *name; + int type; + int mode; + int lw; /* logwait bitsfield */ + int (*config_callback)(struct logformat_node *node, struct proxy *curproxy); +}; struct logformat_node { struct list list; int type; // LOG_FMT_* int options; // LOG_OPT_* + int typecast; // explicit typecasting for printing purposes (SMP_T_{SAME,BOOL,STR,SINT}) + char *name; // printable name for output types that require named fields (ie: json) char *arg; // text for LOG_FMT_TEXT, arg for others void *expr; // for use with LOG_FMT_EXPR + const struct logformat_alias *alias; // set if ->type == LOG_FMT_ALIAS +}; + +enum lf_expr_flags { + LF_FL_NONE = 0x00, + LF_FL_COMPILED = 0x01 +}; + +/* a full logformat expr made of one or multiple logformat nodes */ +struct lf_expr { + struct list list; /* to store lf_expr inside a list */ + union { + struct { + struct list list; /* logformat_node list */ + int options; /* global '%o' options (common to all nodes) */ + } nodes; + char *str; /* original string prior to parsing (NULL once compiled) */ + }; + struct { + char *file; /* file where the lft appears */ + int line; /* line where the lft appears */ + } conf; // parsing hints + uint8_t flags; /* LF_FL_* flags */ }; /* Range of indexes for log sampling. */ diff --git a/include/haproxy/log.h b/include/haproxy/log.h index 68b8207..bc86552 100644 --- a/include/haproxy/log.h +++ b/include/haproxy/log.h @@ -64,8 +64,23 @@ void syslog_fd_handler(int fd); int init_log_buffers(void); void deinit_log_buffers(void); +void lf_expr_init(struct lf_expr *expr); +int lf_expr_dup(const struct lf_expr *orig, struct lf_expr *dest); +void lf_expr_xfer(struct lf_expr *src, struct lf_expr *dst); +void lf_expr_deinit(struct lf_expr *expr); +static inline int lf_expr_isempty(const struct lf_expr *expr) +{ + return !(expr->flags & LF_FL_COMPILED) || LIST_ISEMPTY(&expr->nodes.list); +} +int lf_expr_compile(struct lf_expr *lf_expr, struct arg_list *al, int options, int cap, char **err); +int lf_expr_postcheck(struct lf_expr *lf_expr, struct proxy *px, char **err); + +/* Deinitialize log buffers used for syslog messages */ +void free_logformat_list(struct list *fmt); +void free_logformat_node(struct logformat_node *node); + /* build a log line for the session and an optional stream */ -int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t maxsize, struct list *list_format); +int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t maxsize, struct lf_expr *lf_expr); /* * send a log for the stream when we have enough info about it. @@ -81,14 +96,15 @@ void app_log(struct list *loggers, struct buffer *tag, int level, const char *fo /* * add to the logformat linked list */ -int add_to_logformat_list(char *start, char *end, int type, struct list *list_format, char **err); +int add_to_logformat_list(char *start, char *end, int type, struct lf_expr *lf_expr, char **err); + +ssize_t syslog_applet_append_event(void *ctx, struct ist v1, struct ist v2, size_t ofs, size_t len); /* * Parse the log_format string and fill a linked list. - * Variable name are preceded by % and composed by characters [a-zA-Z0-9]* : %varname - * You can set arguments using { } : %{many arguments}varname + * Refer to source file for details */ -int parse_logformat_string(const char *str, struct proxy *curproxy, struct list *list_format, int options, int cap, char **err); +int parse_logformat_string(const char *str, struct proxy *curproxy, struct lf_expr *lf_expr, int options, int cap, char **err); int postresolve_logger_list(struct list *loggers, const char *section, const char *section_name); @@ -131,27 +147,6 @@ int get_log_level(const char *lev); int get_log_facility(const char *fac); /* - * Write a string in the log string - * Take cares of quote options - * - * Return the address of the \0 character, or NULL on error - */ -char *lf_text_len(char *dst, const char *src, size_t len, size_t size, const struct logformat_node *node); - -/* - * Write a IP address to the log string - * +X option write in hexadecimal notation, most significant byte on the left - */ -char *lf_ip(char *dst, const struct sockaddr *sockaddr, size_t size, const struct logformat_node *node); - -/* - * Write a port to the log - * +X option write in hexadecimal notation, most significant byte on the left - */ -char *lf_port(char *dst, const struct sockaddr *sockaddr, size_t size, const struct logformat_node *node); - - -/* * Function to handle log header building (exported for sinks) */ char *update_log_hdr_rfc5424(const time_t time, suseconds_t frac); @@ -162,9 +157,9 @@ char * get_format_pid_sep2(int format, size_t *len); /* * Builds a log line for the stream (must be valid). */ -static inline int build_logline(struct stream *s, char *dst, size_t maxsize, struct list *list_format) +static inline int build_logline(struct stream *s, char *dst, size_t maxsize, struct lf_expr *lf_expr) { - return sess_build_logline(strm_sess(s), s, dst, maxsize, list_format); + return sess_build_logline(strm_sess(s), s, dst, maxsize, lf_expr); } struct ist *build_log_header(struct log_header hdr, size_t *nbelem); diff --git a/include/haproxy/mqtt-t.h b/include/haproxy/mqtt-t.h index 51f55ea..3af2d11 100644 --- a/include/haproxy/mqtt-t.h +++ b/include/haproxy/mqtt-t.h @@ -22,6 +22,7 @@ #ifndef _HAPROXY_MQTT_T_H #define _HAPROXY_MQTT_T_H +#include <inttypes.h> #include <import/ist.h> /* MQTT protocol version diff --git a/include/haproxy/mux_h1-t.h b/include/haproxy/mux_h1-t.h index 2f49a49..e0c29c2 100644 --- a/include/haproxy/mux_h1-t.h +++ b/include/haproxy/mux_h1-t.h @@ -31,13 +31,14 @@ /* Flags indicating why writing output data are blocked */ #define H1C_F_OUT_ALLOC 0x00000001 /* mux is blocked on lack of output buffer */ #define H1C_F_OUT_FULL 0x00000002 /* mux is blocked on output buffer full */ -/* 0x00000004 - 0x00000008 unused */ +#define H1C_F_OUT_MAYALLOC 0x00000004 /* mux was just unblocked and may try to alloc out again */ /* Flags indicating why reading input data are blocked. */ +#define H1C_F_IN_MAYALLOC 0x00000008 /* mux was just unblocked and may try to alloc in again */ #define H1C_F_IN_ALLOC 0x00000010 /* mux is blocked on lack of input buffer */ #define H1C_F_IN_FULL 0x00000020 /* mux is blocked on input buffer full */ #define H1C_F_IN_SALLOC 0x00000040 /* mux is blocked on lack of stream's request buffer */ -/* 0x00000080 unused */ +#define H1C_F_IN_SMAYALLOC 0x00000080 /* mux was just unblocked and may try to alloc strm again */ #define H1C_F_EOS 0x00000100 /* End-of-stream seen on the H1 connection (read0 detected) */ #define H1C_F_ERR_PENDING 0x00000200 /* A write error was detected (block sends but not reads) */ @@ -66,12 +67,12 @@ static forceinline char *h1c_show_flags(char *buf, size_t len, const char *delim /* prologue */ _(0); /* flags */ - _(H1C_F_OUT_ALLOC, _(H1C_F_OUT_FULL, - _(H1C_F_IN_ALLOC, _(H1C_F_IN_FULL, _(H1C_F_IN_SALLOC, + _(H1C_F_OUT_ALLOC, _(H1C_F_OUT_FULL, _(H1C_F_OUT_MAYALLOC, + _(H1C_F_IN_MAYALLOC, _(H1C_F_IN_ALLOC, _(H1C_F_IN_FULL, _(H1C_F_IN_SALLOC, _(H1C_F_IN_SMAYALLOC, _(H1C_F_EOS, _(H1C_F_ERR_PENDING, _(H1C_F_ERROR, _(H1C_F_SILENT_SHUT, _(H1C_F_ABRT_PENDING, _(H1C_F_ABRTED, _(H1C_F_WANT_FASTFWD, _(H1C_F_WAIT_NEXT_REQ, _(H1C_F_UPG_H2C, _(H1C_F_CO_MSG_MORE, - _(H1C_F_CO_STREAMER, _(H1C_F_CANT_FASTFWD, _(H1C_F_IS_BACK)))))))))))))))))); + _(H1C_F_CO_STREAMER, _(H1C_F_CANT_FASTFWD, _(H1C_F_IS_BACK))))))))))))))))))))); /* epilogue */ _(~0U); return buf; @@ -92,13 +93,12 @@ static forceinline char *h1c_show_flags(char *buf, size_t len, const char *delim #define H1S_F_WANT_CLO 0x00000040 #define H1S_F_WANT_MSK 0x00000070 #define H1S_F_NOT_FIRST 0x00000080 /* The H1 stream is not the first one */ -#define H1S_F_BODYLESS_RESP 0x00000100 /* Bodyless response message */ +/* 0x00000100 unused */ #define H1S_F_INTERNAL_ERROR 0x00000200 /* Set when an internal error occurred during the message parsing */ #define H1S_F_NOT_IMPL_ERROR 0x00000400 /* Set when a feature is not implemented during the message parsing */ #define H1S_F_PARSING_ERROR 0x00000800 /* Set when an error occurred during the message parsing */ #define H1S_F_PROCESSING_ERROR 0x00001000 /* Set when an error occurred during the message xfer */ -#define H1S_F_ERROR_MASK 0x00003800 /* stream error mask */ #define H1S_F_HAVE_SRV_NAME 0x00002000 /* Set during output process if the server name header was added to the request */ #define H1S_F_HAVE_O_CONN 0x00004000 /* Set during output process to know connection mode was processed */ @@ -106,6 +106,9 @@ static forceinline char *h1c_show_flags(char *buf, size_t len, const char *delim #define H1S_F_HAVE_CLEN 0x00010000 /* Set during output process to know C*L header was found or generated */ #define H1S_F_HAVE_CHNK 0x00020000 /* Set during output process to know "T-E; chunk" header was found or generated */ +#define H1S_F_BODYLESS_REQ 0x00040000 /* Bodyless request message */ +#define H1S_F_BODYLESS_RESP 0x00080000 /* Bodyless response message */ + /* This function is used to report flags in debugging tools. Please reflect * below any single-bit flag addition above in the same order via the * __APPEND_FLAG macro. The new end of the buffer is returned. @@ -118,10 +121,10 @@ static forceinline char *h1s_show_flags(char *buf, size_t len, const char *delim /* flags */ _(H1S_F_RX_BLK, _(H1S_F_TX_BLK, _(H1S_F_RX_CONGESTED, _(H1S_F_WANT_KAL, _(H1S_F_WANT_TUN, _(H1S_F_WANT_CLO, - _(H1S_F_NOT_FIRST, _(H1S_F_BODYLESS_RESP, + _(H1S_F_NOT_FIRST, _(H1S_F_INTERNAL_ERROR, _(H1S_F_NOT_IMPL_ERROR, _(H1S_F_PARSING_ERROR, _(H1S_F_PROCESSING_ERROR, _(H1S_F_HAVE_SRV_NAME, _(H1S_F_HAVE_O_CONN, _(H1S_F_HAVE_WS_KEY, - _(H1S_F_HAVE_CLEN, _(H1S_F_HAVE_CHNK))))))))))))))))); + _(H1S_F_HAVE_CLEN, _(H1S_F_HAVE_CHNK, _(H1S_F_BODYLESS_REQ, _(H1S_F_BODYLESS_RESP)))))))))))))))))); /* epilogue */ _(~0U); return buf; @@ -134,6 +137,7 @@ enum h1_cs { H1_CS_EMBRYONIC, /* Connection is waiting for the message headers (H1S is not NULL, not attached to a SC - Frontend connection only) */ H1_CS_UPGRADING, /* TCP>H1 upgrade in-progress (H1S is not NULL and attached to a SC - Frontend connection only) */ H1_CS_RUNNING, /* Connection fully established and the H1S is processing data (H1S is not NULL and attached to a SC) */ + H1_CS_DRAINING, /* H1C is draining the message before destroying the H1S (H1S is not NULL but no SC attached) */ H1_CS_CLOSING, /* Send pending outgoing data and close the connection ASAP (H1S may be NULL) */ H1_CS_CLOSED, /* Connection must be closed now and H1C must be released (H1S is NULL) */ H1_CS_ENTRIES, @@ -150,6 +154,7 @@ static inline const char *h1c_st_to_str(enum h1_cs st) case H1_CS_EMBRYONIC: return "EMB"; case H1_CS_UPGRADING: return "UPG"; case H1_CS_RUNNING: return "RUN"; + case H1_CS_DRAINING: return "DRN"; case H1_CS_CLOSING: return "CLI"; case H1_CS_CLOSED: return "CLD"; default: return "???"; diff --git a/include/haproxy/mux_quic-t.h b/include/haproxy/mux_quic-t.h index abfc20a..02f8a72 100644 --- a/include/haproxy/mux_quic-t.h +++ b/include/haproxy/mux_quic-t.h @@ -13,6 +13,7 @@ #include <haproxy/htx-t.h> #include <haproxy/list-t.h> #include <haproxy/ncbuf-t.h> +#include <haproxy/quic_fctl-t.h> #include <haproxy/quic_frame-t.h> #include <haproxy/quic_stream-t.h> #include <haproxy/stconn-t.h> @@ -30,7 +31,7 @@ enum qcs_type { #define QC_CF_ERRL 0x00000001 /* fatal error detected locally, connection should be closed soon */ #define QC_CF_ERRL_DONE 0x00000002 /* local error properly handled, connection can be released */ -#define QC_CF_BLK_MFCTL 0x00000004 /* sending blocked due to connection flow-control */ +/* unused 0x00000004 */ #define QC_CF_CONN_FULL 0x00000008 /* no stream buffers available on connection */ #define QC_CF_APP_SHUT 0x00000010 /* Application layer shutdown done. */ #define QC_CF_ERR_CONN 0x00000020 /* fatal error reported by transport layer */ @@ -40,6 +41,7 @@ struct qcc { uint64_t nb_sc; /* number of attached stream connectors */ uint64_t nb_hreq; /* number of in-progress http requests */ uint32_t flags; /* QC_CF_* */ + int glitches; /* total number of glitches on this connection */ /* flow-control fields set by us enforced on our side. */ struct { @@ -70,8 +72,7 @@ struct qcc { } rfctl; struct { - uint64_t offsets; /* sum of all offsets prepared */ - uint64_t sent_offsets; /* sum of all offset sent */ + struct quic_fctl fc; /* stream flow control applied on sending */ } tx; uint64_t largest_bidi_r; /* largest remote bidi stream ID opened. */ @@ -83,6 +84,8 @@ struct qcc { struct list send_retry_list; /* list of qcs eligible to send retry */ struct list send_list; /* list of qcs ready to send (STREAM, STOP_SENDING or RESET_STREAM emission) */ + struct list fctl_list; /* list of sending qcs blocked on conn flow control */ + struct list buf_wait_list; /* list of qcs blocked on stream desc buf */ struct wait_event wait_event; /* To be used if we're waiting for I/Os */ @@ -105,7 +108,7 @@ struct qcc { #define QC_SF_FIN_STREAM 0x00000002 /* FIN bit must be set for last frame of the stream */ #define QC_SF_BLK_MROOM 0x00000004 /* app layer is blocked waiting for room in the qcs.tx.buf */ #define QC_SF_DETACH 0x00000008 /* sc is detached but there is remaining data to send */ -#define QC_SF_BLK_SFCTL 0x00000010 /* stream blocked due to stream flow control limit */ +/* unused 0x00000010 */ #define QC_SF_DEM_FULL 0x00000020 /* demux blocked on request channel buffer full */ #define QC_SF_READ_ABORTED 0x00000040 /* Rx closed using STOP_SENDING*/ #define QC_SF_TO_RESET 0x00000080 /* a RESET_STREAM must be sent */ @@ -155,10 +158,7 @@ struct qcs { uint64_t msd_init; /* initial max-stream-data */ } rx; struct { - uint64_t offset; /* last offset of data ready to be sent */ - uint64_t sent_offset; /* last offset sent by transport layer */ - struct buffer buf; /* transmit buffer before sending via xprt */ - uint64_t msd; /* fctl bytes limit to respect on emission */ + struct quic_fctl fc; /* stream flow control applied on sending */ } tx; struct eb64_node by_id; @@ -168,6 +168,8 @@ struct qcs { struct list el; /* element of qcc.send_retry_list */ struct list el_send; /* element of qcc.send_list */ struct list el_opening; /* element of qcc.opening_list */ + struct list el_fctl; /* element of qcc.fctl_list */ + struct list el_buf; /* element of qcc.buf_wait_list */ struct wait_event wait_event; struct wait_event *subs; @@ -185,18 +187,38 @@ enum qcc_app_ops_close_side { /* QUIC application layer operations */ struct qcc_app_ops { + /* Initialize <qcc> connection app context. */ int (*init)(struct qcc *qcc); + /* Finish connection initialization if prelude required. */ + int (*finalize)(void *ctx); + + /* Initialize <qcs> stream app context or leave it to NULL if rejected. */ int (*attach)(struct qcs *qcs, void *conn_ctx); - ssize_t (*decode_qcs)(struct qcs *qcs, struct buffer *b, int fin); - size_t (*snd_buf)(struct qcs *qcs, struct buffer *buf, size_t count); + + /* Convert received HTTP payload to HTX. */ + ssize_t (*rcv_buf)(struct qcs *qcs, struct buffer *b, int fin); + + /* Convert HTX to HTTP payload for sending. */ + size_t (*snd_buf)(struct qcs *qcs, struct buffer *b, size_t count); + + /* Negotiate and commit fast-forward data from opposite MUX. */ size_t (*nego_ff)(struct qcs *qcs, size_t count); size_t (*done_ff)(struct qcs *qcs); + + /* Notify about <qcs> stream closure. */ int (*close)(struct qcs *qcs, enum qcc_app_ops_close_side side); + /* Free <qcs> stream app context. */ void (*detach)(struct qcs *qcs); - int (*finalize)(void *ctx); - void (*shutdown)(void *ctx); /* Close a connection. */ + + /* Perform graceful shutdown. */ + void (*shutdown)(void *ctx); + /* Free connection app context. */ void (*release)(void *ctx); + + /* Increment app counters on CONNECTION_CLOSE_APP reception. */ void (*inc_err_cnt)(void *ctx, int err_code); + /* Set QCC error code as suspicious activity has been detected. */ + void (*report_susp)(void *ctx); }; #endif /* USE_QUIC */ diff --git a/include/haproxy/mux_quic.h b/include/haproxy/mux_quic.h index 872c5ea..1ed8ad1 100644 --- a/include/haproxy/mux_quic.h +++ b/include/haproxy/mux_quic.h @@ -13,19 +13,24 @@ #include <haproxy/stconn.h> void qcc_set_error(struct qcc *qcc, int err, int app); +int qcc_report_glitch(struct qcc *qcc, int inc); struct qcs *qcc_init_stream_local(struct qcc *qcc, int bidi); struct stconn *qcs_attach_sc(struct qcs *qcs, struct buffer *buf, char fin); int qcs_is_close_local(struct qcs *qcs); int qcs_is_close_remote(struct qcs *qcs); -struct buffer *qcs_get_buf(struct qcs *qcs, struct buffer *bptr); int qcs_subscribe(struct qcs *qcs, int event_type, struct wait_event *es); void qcs_notify_recv(struct qcs *qcs); void qcs_notify_send(struct qcs *qcs); +int qcc_notify_buf(struct qcc *qcc); -void qcc_emit_cc_app(struct qcc *qcc, int err, int immediate); +struct buffer *qcc_get_stream_rxbuf(struct qcs *qcs); +struct buffer *qcc_get_stream_txbuf(struct qcs *qcs, int *err); +int qcc_realign_stream_txbuf(const struct qcs *qcs, struct buffer *out); +int qcc_release_stream_txbuf(struct qcs *qcs); +int qcc_stream_can_send(const struct qcs *qcs); void qcc_reset_stream(struct qcs *qcs, int err); -void qcc_send_stream(struct qcs *qcs, int urg); +void qcc_send_stream(struct qcs *qcs, int urg, int count); void qcc_abort_stream_read(struct qcs *qcs); int qcc_recv(struct qcc *qcc, uint64_t id, uint64_t len, uint64_t offset, char fin, char *data); @@ -111,6 +116,8 @@ static inline void qcs_wait_http_req(struct qcs *qcs) LIST_APPEND(&qcc->opening_list, &qcs->el_opening); } +void qcc_show_quic(struct qcc *qcc); + #endif /* USE_QUIC */ #endif /* _HAPROXY_MUX_QUIC_H */ diff --git a/include/haproxy/net_helper.h b/include/haproxy/net_helper.h index f019d30..ee27ed4 100644 --- a/include/haproxy/net_helper.h +++ b/include/haproxy/net_helper.h @@ -91,6 +91,34 @@ static inline void write_ptr(void *p, const void *ptr) return write_u64(p, (uintptr_t)ptr); } +/* Read a float in native host order */ +static inline float read_flt(const void *p) +{ + const union { float flt; } __attribute__((packed))*u = p; + return u->flt; +} + +/* Write a float in native host order */ +static inline void write_flt(void *p, const float flt) +{ + union { float flt; } __attribute__((packed))*u = p; + u->flt = flt; +} + +/* Read a double in native host order */ +static inline double read_dbl(const void *p) +{ + const union { double dbl; } __attribute__((packed))*u = p; + return u->dbl; +} + +/* Write a double in native host order */ +static inline void write_dbl(void *p, const double dbl) +{ + union { double dbl; } __attribute__((packed))*u = p; + u->dbl = dbl; +} + /* Read a possibly wrapping number of bytes <bytes> into destination <dst>. The * first segment is composed of <s1> bytes at p1. The remaining byte(s), if any, * are read from <p2>. <s1> may be zero and may also be larger than <bytes>. The diff --git a/include/haproxy/openssl-compat.h b/include/haproxy/openssl-compat.h index 5639468..d145fb4 100644 --- a/include/haproxy/openssl-compat.h +++ b/include/haproxy/openssl-compat.h @@ -48,6 +48,7 @@ #include <haproxy/quic_openssl_compat.h> #endif + #if defined(LIBRESSL_VERSION_NUMBER) /* LibreSSL is a fork of OpenSSL 1.0.1g but pretends to be 2.0.0, thus * systematically breaking when some code is written for a specific version @@ -108,6 +109,24 @@ #define HAVE_SSL_get0_verified_chain #endif +#if defined(SSL_OP_NO_ANTI_REPLAY) +#define HAVE_SSL_0RTT +#endif + +/* At this time, wolfssl, libressl and the openssl QUIC compatibility do not support 0-RTT */ +#if defined(HAVE_SSL_0RTT) && !defined(USE_QUIC_OPENSSL_COMPAT) && !defined(LIBRESSL_VERSION_NUMBER) && !defined(USE_OPENSSL_WOLFSSL) +#define HAVE_SSL_0RTT_QUIC +#endif + + +#if defined(SSL_CTX_set_security_level) || HA_OPENSSL_VERSION_NUMBER >= 0x1010100fL +#define HAVE_SSL_SET_SECURITY_LEVEL +#endif + +#if !defined(HAVE_SSL_SET_SECURITY_LEVEL) +/* define a nope function for set_security_level */ +#define SSL_CTX_set_security_level(ctx, level) ({}) +#endif #if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL) #define HAVE_OSSL_PARAM @@ -372,6 +391,10 @@ static inline unsigned long ERR_peek_error_func(const char **func) #define EVP_CTRL_AEAD_SET_TAG EVP_CTRL_GCM_SET_TAG #endif +#if !defined(EVP_CTRL_AEAD_GET_TAG) +#define EVP_CTRL_AEAD_GET_TAG EVP_CTRL_GCM_GET_TAG +#endif + /* Supported hash function for TLS tickets */ #ifdef OPENSSL_NO_SHA256 #define TLS_TICKET_HASH_FUNCT EVP_sha1 @@ -483,5 +506,10 @@ static inline unsigned long ERR_peek_error_func(const char **func) #define SSL_CTX_set1_sigalgs_list SSL_CTX_set1_sigalgs_list #endif +#ifndef SSL_CTX_get_tlsext_status_cb +# define SSL_CTX_get_tlsext_status_cb(ctx, cb) \ + *(cb) = (void (*) (void))ctx->tlsext_status_cb +#endif + #endif /* USE_OPENSSL */ #endif /* _HAPROXY_OPENSSL_COMPAT_H */ diff --git a/include/haproxy/pattern-t.h b/include/haproxy/pattern-t.h index 6c1ba24..aa3a178 100644 --- a/include/haproxy/pattern-t.h +++ b/include/haproxy/pattern-t.h @@ -92,9 +92,11 @@ enum { PAT_MATCH_NUM }; -#define PAT_REF_MAP 0x1 /* Set if the reference is used by at least one map. */ -#define PAT_REF_ACL 0x2 /* Set if the reference is used by at least one acl. */ -#define PAT_REF_SMP 0x4 /* Flag used if the reference contains a sample. */ +#define PAT_REF_MAP 0x01 /* Set if the reference is used by at least one map. */ +#define PAT_REF_ACL 0x02 /* Set if the reference is used by at least one acl. */ +#define PAT_REF_SMP 0x04 /* Flag used if the reference contains a sample. */ +#define PAT_REF_FILE 0x08 /* Set if the reference was loaded from a file */ +#define PAT_REF_ID 0x10 /* Set if the reference is only an ID (not loaded from a file) */ /* This struct contain a list of reference strings for dunamically * updatable patterns. diff --git a/include/haproxy/peers-t.h b/include/haproxy/peers-t.h index 124fac3..19619d0 100644 --- a/include/haproxy/peers-t.h +++ b/include/haproxy/peers-t.h @@ -34,6 +34,102 @@ #include <haproxy/stick_table-t.h> #include <haproxy/thread-t.h> +/* peer state with respects of its applet, as seen from outside */ +enum peer_app_state { + PEER_APP_ST_STOPPED = 0, /* The peer has no applet */ + PEER_APP_ST_STARTING, /* The peer has an applet with a validated connection but sync task must ack it first */ + PEER_APP_ST_RUNNING, /* The starting state was processed by the sync task and the peer can process messages */ + PEER_APP_ST_STOPPING, /* The peer applet was released but the sync task must ack it before switching the peer in STOPPED state */ +}; + +/* peer learn state */ +enum peer_learn_state { + PEER_LR_ST_NOTASSIGNED = 0,/* The peer is not assigned for a leason */ + PEER_LR_ST_ASSIGNED, /* The peer is assigned for a leason */ + PEER_LR_ST_PROCESSING, /* The peer has started the leason and it is not finished */ + PEER_LR_ST_FINISHED, /* The peer has finished the leason, this state must be ack by the sync task */ +}; + +/******************************/ +/* peers section resync flags */ +/******************************/ +#define PEERS_F_RESYNC_LOCAL_FINISHED 0x00000001 /* Learn from local peer finished or no more needed */ +#define PEERS_F_RESYNC_REMOTE_FINISHED 0x00000002 /* Learn from remote peer finished or no more needed */ +#define PEERS_F_RESYNC_ASSIGN 0x00000004 /* A peer was assigned to learn our lesson */ +/* unused 0x00000008..0x00080000 */ +#define PEERS_F_DBG_RESYNC_LOCALTIMEOUT 0x00100000 /* Timeout waiting for a full resync from a local node was experienced at lest once (for debugging purpose) */ +#define PEERS_F_DBG_RESYNC_REMOTETIMEOUT 0x00200000 /* Timeout waiting for a full resync from a remote node was experienced at lest once (for debugging purpose) */ +#define PEERS_F_DBG_RESYNC_LOCALABORT 0x00400000 /* Session aborted learning from a local node was experienced at lest once (for debugging purpose) */ +#define PEERS_F_DBG_RESYNC_REMOTEABORT 0x00800000 /* Session aborted learning from a remote node was experienced at lest once (for debugging purpose) */ +#define PEERS_F_DBG_RESYNC_LOCALFINISHED 0x01000000 /* A fully up to date local node teach us at lest once (for debugging purpose) */ +#define PEERS_F_DBG_RESYNC_REMOTEFINISHED 0x02000000 /* A fully up to remote node teach us at lest once (for debugging purpose) */ +#define PEERS_F_DBG_RESYNC_LOCALPARTIAL 0x04000000 /* A partially up to date local node teach us at lest once (for debugging purpose) */ +#define PEERS_F_DBG_RESYNC_REMOTEPARTIAL 0x08000000 /* A partially up to date remote node teach us at lest once (for debugging purpose) */ +#define PEERS_F_DBG_RESYNC_LOCALASSIGN 0x10000000 /* A local node was assigned for a full resync at lest once (for debugging purpose) */ +#define PEERS_F_DBG_RESYNC_REMOTEASSIGN 0x20000000 /* A remote node was assigned for a full resync at lest once (for debugging purpose) */ + +#define PEERS_RESYNC_FROMLOCAL 0x00000000 /* No resync finished, must be performed from local first */ +#define PEERS_RESYNC_FROMREMOTE PEERS_F_RESYNC_LOCAL_FINISHED /* Resync from local peer finished, must be performed from remote peer now */ +#define PEERS_RESYNC_STATEMASK (PEERS_F_RESYNC_LOCAL_FINISHED|PEERS_F_RESYNC_REMOTE_FINISHED) +#define PEERS_RESYNC_FINISHED (PEERS_F_RESYNC_LOCAL_FINISHED|PEERS_F_RESYNC_REMOTE_FINISHED) + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *peers_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(PEERS_F_RESYNC_LOCAL_FINISHED, _(PEERS_F_RESYNC_REMOTE_FINISHED, _(PEERS_F_RESYNC_ASSIGN, + _(PEERS_F_DBG_RESYNC_LOCALTIMEOUT, _(PEERS_F_DBG_RESYNC_REMOTETIMEOUT, + _(PEERS_F_DBG_RESYNC_LOCALABORT, _(PEERS_F_DBG_RESYNC_REMOTEABORT, + _(PEERS_F_DBG_RESYNC_LOCALFINISHED, _(PEERS_F_DBG_RESYNC_REMOTEFINISHED, + _(PEERS_F_DBG_RESYNC_LOCALPARTIAL, _(PEERS_F_DBG_RESYNC_REMOTEPARTIAL, + _(PEERS_F_DBG_RESYNC_LOCALASSIGN, _(PEERS_F_DBG_RESYNC_REMOTEABORT))))))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} + +/******************************/ +/* Peer flags */ +/******************************/ +#define PEER_F_TEACH_PROCESS 0x00000001 /* Teach a lesson to current peer */ +#define PEER_F_TEACH_FINISHED 0x00000002 /* Teach conclude, (wait for confirm) */ +#define PEER_F_LOCAL_TEACH_COMPLETE 0x00000004 /* The old local peer taught all that it known to new one */ +#define PEER_F_LEARN_NOTUP2DATE 0x00000008 /* Learn from peer finished but peer is not up to date */ +#define PEER_F_WAIT_SYNCTASK_ACK 0x00000010 /* Stop all processing waiting for the sync task acknowledgement when the applet state changes */ +#define PEER_F_ALIVE 0x00000020 /* Used to flag a peer a alive. */ +#define PEER_F_HEARTBEAT 0x00000040 /* Heartbeat message to send. */ +#define PEER_F_DWNGRD 0x00000080 /* When this flag is enabled, we must downgrade the supported version announced during peer sessions. */ +/* unused 0x00000100..0x00080000 */ +#define PEER_F_DBG_RESYNC_REQUESTED 0x00100000 /* A resnyc was explicitly requested at least once (for debugging purpose) */ + +#define PEER_TEACH_FLAGS (PEER_F_TEACH_PROCESS|PEER_F_TEACH_FINISHED) + +/* This function is used to report flags in debugging tools. Please reflect + * below any single-bit flag addition above in the same order via the + * __APPEND_FLAG macro. The new end of the buffer is returned. + */ +static forceinline char *peer_show_flags(char *buf, size_t len, const char *delim, uint flg) +{ +#define _(f, ...) __APPEND_FLAG(buf, len, delim, flg, f, #f, __VA_ARGS__) + /* prologue */ + _(0); + /* flags */ + _(PEER_F_TEACH_PROCESS, _(PEER_F_TEACH_FINISHED, _(PEER_F_LOCAL_TEACH_COMPLETE, + _(PEER_F_LEARN_NOTUP2DATE, _(PEER_F_WAIT_SYNCTASK_ACK, + _(PEER_F_ALIVE, _(PEER_F_HEARTBEAT, _(PEER_F_DWNGRD, + _(PEER_F_DBG_RESYNC_REQUESTED))))))))); + /* epilogue */ + _(~0U); + return buf; +#undef _ +} struct shared_table { struct stktable *table; /* stick table to sync */ @@ -52,6 +148,8 @@ struct shared_table { struct peer { int local; /* proxy state */ + enum peer_app_state appstate; /* peer app state */ + enum peer_learn_state learnstate; /* peer learn state */ __decl_thread(HA_SPINLOCK_T lock); /* lock used to handle this peer section */ char *id; struct { @@ -59,10 +157,6 @@ struct peer { int line; /* line where the section appears */ } conf; /* config information */ time_t last_change; - struct sockaddr_storage addr; /* peer address */ - struct protocol *proto; /* peer address protocol */ - struct xprt_ops *xprt; /* peer socket operations at transport layer */ - void *sock_init_arg; /* socket operations's opaque init argument if needed */ unsigned int flags; /* peer session flags */ unsigned int statuscode; /* current/last session status code */ unsigned int reconnect; /* next connect timer */ diff --git a/include/haproxy/peers.h b/include/haproxy/peers.h index e3c5fd3..d100c0c 100644 --- a/include/haproxy/peers.h +++ b/include/haproxy/peers.h @@ -40,30 +40,10 @@ int peers_register_table(struct peers *, struct stktable *table); void peers_setup_frontend(struct proxy *fe); void peers_register_keywords(struct peers_kw_list *pkwl); -#if defined(USE_OPENSSL) static inline enum obj_type *peer_session_target(struct peer *p, struct stream *s) { - if (p->srv->use_ssl) - return &p->srv->obj_type; - else - return &s->be->obj_type; + return &p->srv->obj_type; } -static inline struct xprt_ops *peer_xprt(struct peer *p) -{ - return p->srv->use_ssl ? xprt_get(XPRT_SSL) : xprt_get(XPRT_RAW); -} -#else -static inline enum obj_type *peer_session_target(struct peer *p, struct stream *s) -{ - return &s->be->obj_type; -} - -static inline struct xprt_ops *peer_xprt(struct peer *p) -{ - return xprt_get(XPRT_RAW); -} -#endif - #endif /* _HAPROXY_PEERS_H */ diff --git a/include/haproxy/pool.h b/include/haproxy/pool.h index bf7cb8d..66ad292 100644 --- a/include/haproxy/pool.h +++ b/include/haproxy/pool.h @@ -77,7 +77,7 @@ if (likely(!(pool_debugging & POOL_DBG_TAG))) \ break; \ if (*(typeof(pool)*)(((char *)__i) + __p->size) != __p) { \ - pool_inspect_item("tag mismatch on free()", pool, item, caller); \ + pool_inspect_item("tag mismatch on free()", __p, __i, caller, -1); \ ABORT_NOW(); \ } \ } while (0) @@ -126,7 +126,7 @@ void *pool_destroy(struct pool_head *pool); void pool_destroy_all(void); void *__pool_alloc(struct pool_head *pool, unsigned int flags); void __pool_free(struct pool_head *pool, void *ptr); -void pool_inspect_item(const char *msg, struct pool_head *pool, const void *item, const void *caller); +void pool_inspect_item(const char *msg, struct pool_head *pool, const void *item, const void *caller, ssize_t ofs); /****************** Thread-local cache management ******************/ diff --git a/include/haproxy/proto_quic.h b/include/haproxy/proto_quic.h index a0e2b98..b420f35 100644 --- a/include/haproxy/proto_quic.h +++ b/include/haproxy/proto_quic.h @@ -21,6 +21,10 @@ #ifndef _HAPROXY_PROTO_QUIC_H #define _HAPROXY_PROTO_QUIC_H +#include <import/ebtree.h> +#include <haproxy/api-t.h> +#include <haproxy/thread-t.h> + extern struct protocol proto_quic4; extern struct protocol proto_quic6; diff --git a/include/haproxy/proto_rhttp.h b/include/haproxy/proto_rhttp.h index 421680f..6676e04 100644 --- a/include/haproxy/proto_rhttp.h +++ b/include/haproxy/proto_rhttp.h @@ -10,6 +10,7 @@ int rhttp_bind_receiver(struct receiver *rx, char **errmsg); int rhttp_bind_listener(struct listener *listener, char *errmsg, int errlen); void rhttp_enable_listener(struct listener *l); void rhttp_disable_listener(struct listener *l); +int rhttp_suspend_listener(struct listener *l); struct connection *rhttp_accept_conn(struct listener *l, int *status); void rhttp_unbind_receiver(struct listener *l); int rhttp_set_affinity(struct connection *conn, int new_tid); diff --git a/include/haproxy/proto_sockpair.h b/include/haproxy/proto_sockpair.h index bb0256e..e8cb2ac 100644 --- a/include/haproxy/proto_sockpair.h +++ b/include/haproxy/proto_sockpair.h @@ -21,6 +21,8 @@ #ifndef _HAPROXY_PROTO_SOCKPAIR_H #define _HAPROXY_PROTO_SOCKPAIR_H +#include <haproxy/receiver-t.h> + extern struct proto_fam proto_fam_sockpair; extern struct protocol proto_sockpair; diff --git a/include/haproxy/proto_udp.h b/include/haproxy/proto_udp.h index 1c4da77..6475bf9 100644 --- a/include/haproxy/proto_udp.h +++ b/include/haproxy/proto_udp.h @@ -24,6 +24,8 @@ #ifndef _PROTO_PROTO_UDP_H #define _PROTO_PROTO_UDP_H +#include <haproxy/receiver-t.h> + extern struct protocol proto_udp4; extern struct protocol proto_udp6; diff --git a/include/haproxy/protobuf.h b/include/haproxy/protobuf.h index 009bd13..512288b 100644 --- a/include/haproxy/protobuf.h +++ b/include/haproxy/protobuf.h @@ -365,13 +365,13 @@ int protobuf_smp_store_64bit(struct sample *smp, int type, case PBUF_T_64BIT_FIXED64: case PBUF_T_64BIT_SFIXED64: smp->data.type = SMP_T_SINT; - smp->data.u.sint = pbuf_le64toh(*(uint64_t *)pos); + smp->data.u.sint = pbuf_le64toh(read_u64(pos)); smp->flags = SMP_F_VOL_TEST; break; case PBUF_T_64BIT_DOUBLE: smp->data.type = SMP_T_SINT; - smp->data.u.sint = pbuf_le64toh(*(double *)pos); + smp->data.u.sint = pbuf_le64toh(read_dbl(pos)); smp->flags = SMP_F_VOL_TEST; break; @@ -455,19 +455,19 @@ int protobuf_smp_store_32bit(struct sample *smp, int type, case PBUF_T_32BIT_FIXED32: smp->data.type = SMP_T_SINT; - smp->data.u.sint = pbuf_le32toh(*(uint32_t *)pos); + smp->data.u.sint = pbuf_le32toh(read_u32(pos)); smp->flags = SMP_F_VOL_TEST; break; case PBUF_T_32BIT_SFIXED32: smp->data.type = SMP_T_SINT; - smp->data.u.sint = (int32_t)pbuf_le32toh(*(uint32_t *)pos); + smp->data.u.sint = (int32_t)pbuf_le32toh(read_u32(pos)); smp->flags = SMP_F_VOL_TEST; break; case PBUF_T_32BIT_FLOAT: smp->data.type = SMP_T_SINT; - smp->data.u.sint = pbuf_le32toh(*(float *)pos); + smp->data.u.sint = pbuf_le32toh(read_flt(pos)); smp->flags = SMP_F_VOL_TEST; break; diff --git a/include/haproxy/protocol-t.h b/include/haproxy/protocol-t.h index b85f29c..0c5bd9e 100644 --- a/include/haproxy/protocol-t.h +++ b/include/haproxy/protocol-t.h @@ -138,6 +138,17 @@ struct protocol { struct list list; /* list of registered protocols (under proto_lock) */ }; +/* Transport protocol identifiers which can be used as masked values. */ +enum ha_proto { + HA_PROTO_NONE = 0x00, + + HA_PROTO_TCP = 0x01, + HA_PROTO_UDP = 0x02, + HA_PROTO_QUIC = 0x04, + + HA_PROTO_ANY = 0xff, +}; + #endif /* _HAPROXY_PROTOCOL_T_H */ /* diff --git a/include/haproxy/proxy-t.h b/include/haproxy/proxy-t.h index 2f7bf7b..f6ed211 100644 --- a/include/haproxy/proxy-t.h +++ b/include/haproxy/proxy-t.h @@ -34,7 +34,7 @@ #include <haproxy/backend-t.h> #include <haproxy/compression-t.h> #include <haproxy/counters-t.h> -#include <haproxy/freq_ctr-t.h> +#include <haproxy/guid-t.h> #include <haproxy/obj_type-t.h> #include <haproxy/queue-t.h> #include <haproxy/server-t.h> @@ -92,11 +92,11 @@ enum PR_SRV_STATE_FILE { #define PR_O_IGNORE_PRB 0x00000200 /* ignore empty requests (aborts and timeouts) */ #define PR_O_NULLNOLOG 0x00000400 /* a connect without request will not be logged */ #define PR_O_WREQ_BODY 0x00000800 /* always wait for the HTTP request body */ -#define PR_O_HTTP_UPG 0x00001000 /* Contain a "switch-mode http" tcp-request rule */ +#define PR_O_HTTP_UPG 0x00001000 /* implicit (default/use backend) or explicit (switch-mode) http upgrade */ /* unused: 0x00002000 */ #define PR_O_PERSIST 0x00004000 /* server persistence stays effective even when server is down */ #define PR_O_LOGASAP 0x00008000 /* log as soon as possible, without waiting for the stream to complete */ -#define PR_O_ERR_LOGFMT 0x00010000 /* use log-format for connection error message */ +/* unused: 0x00010000 */ #define PR_O_CHK_CACHE 0x00020000 /* require examination of cacheability of the 'set-cookie' field */ #define PR_O_TCP_CLI_KA 0x00040000 /* enable TCP keep-alive on client-side streams */ #define PR_O_TCP_SRV_KA 0x00080000 /* enable TCP keep-alive on server-side streams */ @@ -214,6 +214,7 @@ enum PR_SRV_STATE_FILE { #define PR_FL_EXPLICIT_REF 0x08 /* The default proxy is explicitly referenced by another proxy */ #define PR_FL_IMPLICIT_REF 0x10 /* The default proxy is implicitly referenced by another proxy */ #define PR_FL_PAUSED 0x20 /* The proxy was paused at run time (reversible) */ +#define PR_FL_CHECKED 0x40 /* The proxy configuration was fully checked (including postparsing checks) */ struct stream; @@ -352,10 +353,6 @@ struct proxy { struct queue queue; /* queued requests (pendconns) */ int totpend; /* total number of pending connections on this instance (for stats) */ unsigned int feconn, beconn; /* # of active frontend and backends streams */ - struct freq_ctr fe_req_per_sec; /* HTTP requests per second on the frontend */ - struct freq_ctr fe_conn_per_sec; /* received connections per second on the frontend */ - struct freq_ctr fe_sess_per_sec; /* accepted sessions per second on the frontend (after tcp rules) */ - struct freq_ctr be_sess_per_sec; /* sessions per second on the backend */ unsigned int fe_sps_lim; /* limit on new sessions per second on the frontend */ unsigned int fullconn; /* #conns on backend above which servers are used at full load */ unsigned int tot_fe_maxconn; /* #maxconn of frontends linked to that backend, it is used to compute fullconn */ @@ -363,9 +360,7 @@ struct proxy { int conn_retries; /* maximum number of connect retries */ unsigned int retry_type; /* Type of retry allowed */ int redispatch_after; /* number of retries before redispatch */ - unsigned down_trans; /* up-down transitions */ unsigned down_time; /* total time the proxy was down */ - time_t last_change; /* last time, when the state was changed */ int (*accept)(struct stream *s); /* application layer's accept() */ struct conn_src conn_src; /* connection source settings */ enum obj_type *default_target; /* default target to use for accepted streams or NULL */ @@ -373,12 +368,12 @@ struct proxy { struct proxy *next_stkt_ref; /* Link to the list of proxies which refer to the same stick-table. */ struct list loggers; /* one per 'log' directive */ - struct list logformat; /* log_format linked list */ - struct list logformat_sd; /* log_format linked list for the RFC5424 structured-data part */ - struct list logformat_error; /* log_format linked list used in case of connection error on the frontend */ + struct lf_expr logformat; /* log_format linked list */ + struct lf_expr logformat_sd; /* log_format linked list for the RFC5424 structured-data part */ + struct lf_expr logformat_error; /* log_format linked list used in case of connection error on the frontend */ struct buffer log_tag; /* override default syslog tag */ struct ist header_unique_id; /* unique-id header */ - struct list format_unique_id; /* unique-id format */ + struct lf_expr format_unique_id; /* unique-id format */ int to_log; /* things to be logged (LW_*) */ int nb_req_cap, nb_rsp_cap; /* # of headers to be captured */ struct cap_hdr *req_cap; /* chained list of request headers to be captured */ @@ -426,18 +421,7 @@ struct proxy { struct arg_list args; /* sample arg list that need to be resolved */ unsigned int refcount; /* refcount on this proxy (only used for default proxy for now) */ struct ebpt_node by_name; /* proxies are stored sorted by name here */ - char *logformat_string; /* log format string */ - char *lfs_file; /* file name where the logformat string appears (strdup) */ - int lfs_line; /* file name where the logformat string appears */ - int uif_line; /* file name where the unique-id-format string appears */ - char *uif_file; /* file name where the unique-id-format string appears (strdup) */ - char *uniqueid_format_string; /* unique-id format string */ - char *logformat_sd_string; /* log format string for the RFC5424 structured-data part */ - char *lfsd_file; /* file name where the structured-data logformat string for RFC5424 appears (strdup) */ - int lfsd_line; /* file name where the structured-data logformat string for RFC5424 appears */ - char *error_logformat_string; - char *elfs_file; - int elfs_line; + struct list lf_checks; /* list of logformats found in the proxy section that needs to be checked during postparse */ } conf; /* config information */ struct http_ext *http_ext; /* http ext options */ struct eb_root used_server_addr; /* list of server addresses in use */ @@ -467,6 +451,8 @@ struct proxy { */ struct list filter_configs; /* list of the filters that are declared on this proxy */ + struct guid_node guid; /* GUID global tree node */ + EXTRA_COUNTERS(extra_counters_fe); EXTRA_COUNTERS(extra_counters_be); }; @@ -478,7 +464,7 @@ struct switching_rule { union { struct proxy *backend; /* target backend */ char *name; /* target backend name during config parsing */ - struct list expr; /* logformat expression to use for dynamic rules */ + struct lf_expr expr; /* logformat expression to use for dynamic rules */ } be; char *file; int line; @@ -492,7 +478,7 @@ struct server_rule { struct server *ptr; /* target server */ char *name; /* target server name during config parsing */ } srv; - struct list expr; /* logformat expression to use for dynamic rules */ + struct lf_expr expr; /* logformat expression to use for dynamic rules */ char *file; int line; }; @@ -521,7 +507,7 @@ struct redirect_rule { int type; int rdr_len; char *rdr_str; - struct list rdr_fmt; + struct lf_expr rdr_fmt; int code; unsigned int flags; int cookie_len; diff --git a/include/haproxy/proxy.h b/include/haproxy/proxy.h index efdfa21..974c78a 100644 --- a/include/haproxy/proxy.h +++ b/include/haproxy/proxy.h @@ -59,8 +59,6 @@ struct proxy *proxy_find_by_id(int id, int cap, int table); struct proxy *proxy_find_by_name(const char *name, int cap, int table); struct proxy *proxy_find_best_match(int cap, const char *name, int id, int *diff); struct server *findserver(const struct proxy *px, const char *name); -struct server *findserver_unique_id(const struct proxy *px, int puid, uint32_t rid); -struct server *findserver_unique_name(const struct proxy *px, const char *name, uint32_t rid); int proxy_cfg_ensure_no_http(struct proxy *curproxy); int proxy_cfg_ensure_no_log(struct proxy *curproxy); void init_new_proxy(struct proxy *p); @@ -136,7 +134,7 @@ static inline void proxy_inc_fe_conn_ctr(struct listener *l, struct proxy *fe) if (l && l->counters) _HA_ATOMIC_INC(&l->counters->cum_conn); HA_ATOMIC_UPDATE_MAX(&fe->fe_counters.cps_max, - update_freq_ctr(&fe->fe_conn_per_sec, 1)); + update_freq_ctr(&fe->fe_counters.conn_per_sec, 1)); } /* increase the number of cumulated connections accepted by the designated frontend */ @@ -147,7 +145,7 @@ static inline void proxy_inc_fe_sess_ctr(struct listener *l, struct proxy *fe) if (l && l->counters) _HA_ATOMIC_INC(&l->counters->cum_sess); HA_ATOMIC_UPDATE_MAX(&fe->fe_counters.sps_max, - update_freq_ctr(&fe->fe_sess_per_sec, 1)); + update_freq_ctr(&fe->fe_counters.sess_per_sec, 1)); } /* increase the number of cumulated HTTP sessions on the designated frontend. @@ -165,12 +163,12 @@ static inline void proxy_inc_fe_cum_sess_ver_ctr(struct listener *l, struct prox _HA_ATOMIC_INC(&l->counters->cum_sess_ver[http_ver - 1]); } -/* increase the number of cumulated connections on the designated backend */ +/* increase the number of cumulated streams on the designated backend */ static inline void proxy_inc_be_ctr(struct proxy *be) { - _HA_ATOMIC_INC(&be->be_counters.cum_conn); + _HA_ATOMIC_INC(&be->be_counters.cum_sess); HA_ATOMIC_UPDATE_MAX(&be->be_counters.sps_max, - update_freq_ctr(&be->be_sess_per_sec, 1)); + update_freq_ctr(&be->be_counters.sess_per_sec, 1)); } /* increase the number of cumulated requests on the designated frontend. @@ -187,7 +185,7 @@ static inline void proxy_inc_fe_req_ctr(struct listener *l, struct proxy *fe, if (l && l->counters) _HA_ATOMIC_INC(&l->counters->p.http.cum_req[http_ver]); HA_ATOMIC_UPDATE_MAX(&fe->fe_counters.p.http.rps_max, - update_freq_ctr(&fe->fe_req_per_sec, 1)); + update_freq_ctr(&fe->fe_counters.req_per_sec, 1)); } /* Returns non-zero if the proxy is configured to retry a request if we got that status, 0 otherwise */ diff --git a/include/haproxy/qmux_http.h b/include/haproxy/qmux_http.h index a7dbe7c..4a77114 100644 --- a/include/haproxy/qmux_http.h +++ b/include/haproxy/qmux_http.h @@ -10,7 +10,6 @@ size_t qcs_http_rcv_buf(struct qcs *qcs, struct buffer *buf, size_t count, char *fin); size_t qcs_http_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count, char *fin); -size_t qcs_http_reset_buf(struct qcs *qcs, struct buffer *buf, size_t count); #endif /* USE_QUIC */ diff --git a/include/haproxy/qpack-dec.h b/include/haproxy/qpack-dec.h index 993f450..f0f531f 100644 --- a/include/haproxy/qpack-dec.h +++ b/include/haproxy/qpack-dec.h @@ -21,6 +21,8 @@ #ifndef _HAPROXY_QPACK_DEC_H #define _HAPROXY_QPACK_DEC_H +#include <inttypes.h> + struct buffer; struct http_hdr; @@ -28,12 +30,13 @@ struct http_hdr; *Nothing to see with the RFC. */ enum { - QPACK_ERR_NONE = 0, /* no error */ - QPACK_ERR_RIC, /* cannot decode Required Insert Count prefix field */ - QPACK_ERR_DB, /* cannot decode Delta Base prefix field */ - QPACK_ERR_TRUNCATED, /* truncated stream */ - QPACK_ERR_HUFFMAN, /* huffman decoding error */ - QPACK_ERR_TOO_LARGE, /* decoded request/response is too large */ + QPACK_RET_NONE = 0, /* no error */ + QPACK_RET_DECOMP, /* corresponds to RFC 9204 decompression error */ + QPACK_RET_RIC, /* cannot decode Required Insert Count prefix field */ + QPACK_RET_DB, /* cannot decode Delta Base prefix field */ + QPACK_RET_TRUNCATED, /* truncated stream */ + QPACK_RET_HUFFMAN, /* huffman decoding error */ + QPACK_RET_TOO_LARGE, /* decoded request/response is too large */ }; struct qpack_dec { @@ -48,4 +51,6 @@ int qpack_decode_fs(const unsigned char *buf, uint64_t len, struct buffer *tmp, int qpack_decode_enc(struct buffer *buf, int fin, void *ctx); int qpack_decode_dec(struct buffer *buf, int fin, void *ctx); +int qpack_err_decode(const int value); + #endif /* _HAPROXY_QPACK_DEC_H */ diff --git a/include/haproxy/qpack-t.h b/include/haproxy/qpack-t.h index 0e1736a..1cc6dab 100644 --- a/include/haproxy/qpack-t.h +++ b/include/haproxy/qpack-t.h @@ -43,5 +43,12 @@ #define QPACK_DEC_INST_SCCL 0x40 // Stream Cancellation #define QPACK_DEC_INST_SACK 0x80 // Section Acknowledgment +/* RFC 9204 6. Error Handling */ +enum qpack_err { + QPACK_ERR_DECOMPRESSION_FAILED = 0x200, + QPACK_ERR_ENCODER_STREAM_ERROR = 0x201, + QPACK_ERR_DECODER_STREAM_ERROR = 0x202, +}; + #endif /* USE_QUIC */ #endif /* _HAPROXY_QPACK_T_H */ diff --git a/include/haproxy/qpack-tbl-t.h b/include/haproxy/qpack-tbl-t.h index c27c623..7b3e2f9 100644 --- a/include/haproxy/qpack-tbl-t.h +++ b/include/haproxy/qpack-tbl-t.h @@ -26,6 +26,8 @@ #ifndef _HAPROXY_QPACK_TBL_T_H #define _HAPROXY_QPACK_TBL_T_H +#include <inttypes.h> + /* * Gcc before 3.0 needs [0] to declare a variable-size array */ diff --git a/include/haproxy/queue-t.h b/include/haproxy/queue-t.h index 8f6a1ec..7d9c31c 100644 --- a/include/haproxy/queue-t.h +++ b/include/haproxy/queue-t.h @@ -24,6 +24,7 @@ #include <import/ebtree-t.h> #include <haproxy/api-t.h> +#include <haproxy/thread-t.h> struct proxy; struct server; diff --git a/include/haproxy/quic_ack-t.h b/include/haproxy/quic_ack-t.h index 95b77f1..64182e6 100644 --- a/include/haproxy/quic_ack-t.h +++ b/include/haproxy/quic_ack-t.h @@ -13,6 +13,10 @@ #ifndef _HAPROXY_QUIC_ACK_T_H #define _HAPROXY_QUIC_ACK_T_H +#include <inttypes.h> +#include <stddef.h> +#include <import/eb64tree.h> + /* The maximum number of ack ranges to be built in ACK frames */ #define QUIC_MAX_ACK_RANGES 32 diff --git a/include/haproxy/quic_ack.h b/include/haproxy/quic_ack.h index 540e2c0..baa27ac 100644 --- a/include/haproxy/quic_ack.h +++ b/include/haproxy/quic_ack.h @@ -13,6 +13,12 @@ #ifndef _HAPROXY_QUIC_ACK_H #define _HAPROXY_QUIC_ACK_H +#include <inttypes.h> + +struct quic_conn; +struct quic_arng; +struct quic_arngs; + void quic_free_arngs(struct quic_conn *qc, struct quic_arngs *arngs); int quic_update_ack_ranges_list(struct quic_conn *qc, struct quic_arngs *arngs, diff --git a/include/haproxy/quic_cc-t.h b/include/haproxy/quic_cc-t.h index 888efca..e678172 100644 --- a/include/haproxy/quic_cc-t.h +++ b/include/haproxy/quic_cc-t.h @@ -46,6 +46,8 @@ extern unsigned long long last_ts; enum quic_cc_algo_state_type { /* Slow start. */ QUIC_CC_ST_SS, + /* Conservative slow start (HyStart++ only) */ + QUIC_CC_ST_CS, /* Congestion avoidance. */ QUIC_CC_ST_CA, /* Recovery period. */ @@ -66,6 +68,7 @@ struct quic_cc_event { union { struct ack { uint64_t acked; + uint64_t pn; unsigned int time_sent; } ack; struct loss { @@ -84,7 +87,7 @@ struct quic_cc { /* <conn> is there only for debugging purpose. */ struct quic_conn *qc; struct quic_cc_algo *algo; - uint32_t priv[16]; + uint32_t priv[18]; }; struct quic_cc_path { @@ -117,6 +120,7 @@ struct quic_cc_algo { void (*event)(struct quic_cc *cc, struct quic_cc_event *ev); void (*slow_start)(struct quic_cc *cc); void (*state_trace)(struct buffer *buf, const struct quic_cc *cc); + void (*hystart_start_round)(struct quic_cc *cc, uint64_t pn); }; #endif /* USE_QUIC */ diff --git a/include/haproxy/quic_cc_hystart.h b/include/haproxy/quic_cc_hystart.h new file mode 100644 index 0000000..4ed122c --- /dev/null +++ b/include/haproxy/quic_cc_hystart.h @@ -0,0 +1,129 @@ +/* RFC 9406: HyStart++: Modified Slow Start for TCP. */ + +/* HyStart++ constants */ +#define HYSTART_MIN_RTT_THRESH 4U /* ms */ +#define HYSTART_MAX_RTT_THRESH 16U /* ms */ +#define HYSTART_MIN_RTT_DIVISOR 8 +#define HYSTART_N_RTT_SAMPLE 8 +#define HYSTART_CSS_GROWTH_DIVISOR 4 +#define HYSTART_CSS_ROUNDS 5 +#define HYSTART_LIMIT 8 /* Must be infinite if paced */ + +#define QUIC_CLAMP(a, b, c) ({ \ + typeof(a) _a = (a); \ + typeof(b) _b = (b); \ + typeof(c) _c = (c); \ + (void) (&_a == &_b); \ + (void) (&_b == &_c); \ + _b < _a ? _a : _b > _c ? _c : _b; }) + +struct quic_hystart { + /* Current round minimum RTT. */ + uint32_t curr_rnd_min_rtt; + /* Last round minimum RTT. */ + uint32_t last_rnd_min_rtt; + /* Conservative Slow State baseline minimum RTT */ + uint32_t css_baseline_min_rtt; + uint32_t rtt_sample_count; + uint32_t css_rnd_count; + uint64_t wnd_end; +}; + +/* Reset <h> Hystart++ algorithm state. + * Never fail. + */ +static inline void quic_cc_hystart_reset(struct quic_hystart *h) +{ + h->curr_rnd_min_rtt = UINT32_MAX; + h->last_rnd_min_rtt = UINT32_MAX; + h->css_baseline_min_rtt = UINT32_MAX; + h->rtt_sample_count = 0; + h->css_rnd_count = 0; + h->wnd_end = UINT64_MAX; +} + +/* Track the minimum RTT. */ +static inline void quic_cc_hystart_track_min_rtt(struct quic_cc *cc, + struct quic_hystart *h, + unsigned int latest_rtt) +{ + if (h->wnd_end == UINT64_MAX) + return; + + h->curr_rnd_min_rtt = QUIC_MIN(h->curr_rnd_min_rtt, latest_rtt); + h->rtt_sample_count++; +} + +/* RFC 9406 4.2. Algorithm Details + * At the start of each round during standard slow start [RFC5681] and CSS, + * initialize the variables used to compute the last round's and current round's + * minimum RTT. + * + * Never fail. + */ +static inline void quic_cc_hystart_start_round(struct quic_hystart *h, uint64_t pn) +{ + if (h->wnd_end != UINT64_MAX) { + /* Round already started */ + return; + } + + h->wnd_end = pn; + h->last_rnd_min_rtt = h->curr_rnd_min_rtt; + h->rtt_sample_count = 0; +} + +/* RFC 9406 4.2. Algorithm Details + * For rounds where at least N_RTT_SAMPLE RTT samples have been obtained and + * currentRoundMinRTT and lastRoundMinRTT are valid, check to see if delay + *increase triggers slow start exit. + * + * Depending on <h> HyStart++ algorithm state, returns 1 if the underlying + * congestion control algorithm may enter the Conservative Slow Start (CSS) + * state, 0 if not. + */ +static inline int quic_cc_hystart_may_enter_cs(struct quic_hystart *h) +{ + uint32_t rtt_thresh; + + if (h->rtt_sample_count < HYSTART_N_RTT_SAMPLE || + h->curr_rnd_min_rtt == UINT32_MAX || h->last_rnd_min_rtt == UINT32_MAX) + return 0; + + rtt_thresh = QUIC_CLAMP(HYSTART_MIN_RTT_THRESH, + h->last_rnd_min_rtt / HYSTART_MIN_RTT_DIVISOR, + HYSTART_MAX_RTT_THRESH); + if (h->curr_rnd_min_rtt + rtt_thresh >= h->last_rnd_min_rtt) { + h->css_baseline_min_rtt = h->curr_rnd_min_rtt; + h->rtt_sample_count = 0; + return 1; + } + + return 0; +} + + +/* RFC 9406 4.2. Algorithm Details + * For CSS rounds where at least N_RTT_SAMPLE RTT samples have been obtained, + * check to see if the current round's minRTT drops below baseline (cssBaselineMinRtt) + * indicating that slow start exit was spurious. + * + * Return 1 if slow start exit was spurious, 0 if not. If the slow start + * exist was spurious, the caller must update the underlying congestion control + * algorithm to make it re-enter slow start state. + */ +static inline int quic_cc_hystart_may_reenter_ss(struct quic_hystart *h) +{ + if (h->rtt_sample_count < HYSTART_N_RTT_SAMPLE) + return 0; + + h->css_rnd_count++; + h->rtt_sample_count = 0; + + if (h->curr_rnd_min_rtt >= h->css_baseline_min_rtt) { + return 0; + } + + h->css_baseline_min_rtt = UINT32_MAX; + return 1; +} diff --git a/include/haproxy/quic_conn-t.h b/include/haproxy/quic_conn-t.h index 8aec6f0..a126e04 100644 --- a/include/haproxy/quic_conn-t.h +++ b/include/haproxy/quic_conn-t.h @@ -176,8 +176,15 @@ enum quic_pkt_type { */ #define QUIC_CONN_MAX_PACKET 64 -#define QUIC_STATELESS_RESET_PACKET_HEADER_LEN 5 -#define QUIC_STATELESS_RESET_PACKET_MINLEN (22 + QUIC_HAP_CID_LEN) +/* RFC 9000 10.3. Stateless Reset + * + * To entities other than its intended recipient, a Stateless Reset will + * appear to be a packet with a short header. For the Stateless Reset to + * appear as a valid QUIC packet, the Unpredictable Bits field needs to + * include at least 38 bits of data (or 5 bytes, less the two fixed + * bits). + */ +#define QUIC_STATELESS_RESET_PACKET_MINLEN (5 + QUIC_STATELESS_RESET_TOKEN_LEN) /* Similar to kernel min()/max() definitions. */ #define QUIC_MIN(a, b) ({ \ diff --git a/include/haproxy/quic_conn.h b/include/haproxy/quic_conn.h index 92caed4..60bc407 100644 --- a/include/haproxy/quic_conn.h +++ b/include/haproxy/quic_conn.h @@ -138,7 +138,7 @@ static inline struct ncbuf *quic_get_ncbuf(struct ncbuf *ncbuf) if (!ncb_is_null(ncbuf)) return ncbuf; - b_alloc(&buf); + b_alloc(&buf, DB_MUX_RX); BUG_ON(b_is_null(&buf)); *ncbuf = ncb_make(buf.area, buf.size, 0); diff --git a/include/haproxy/quic_fctl-t.h b/include/haproxy/quic_fctl-t.h new file mode 100644 index 0000000..9331619 --- /dev/null +++ b/include/haproxy/quic_fctl-t.h @@ -0,0 +1,15 @@ +#ifndef _HAPROXY_QUIC_FCTL_T_H +#define _HAPROXY_QUIC_FCTL_T_H + +#include <stdint.h> + +struct quic_fctl { + /* Offset set by peer which must not be exceeded on send. */ + uint64_t limit; + /* Offset which must never exceed limit. */ + uint64_t off_real; + /* Offset which can go beyond limit one time before being blocked. */ + uint64_t off_soft; +}; + +#endif /* _HAPROXY_QUIC_FCTL_T_H */ diff --git a/include/haproxy/quic_fctl.h b/include/haproxy/quic_fctl.h new file mode 100644 index 0000000..8818372 --- /dev/null +++ b/include/haproxy/quic_fctl.h @@ -0,0 +1,19 @@ +#ifndef _HAPROXY_QUIC_FCTL_H +#define _HAPROXY_QUIC_FCTL_H + +#include <haproxy/quic_fctl-t.h> + +void qfctl_init(struct quic_fctl *fctl, uint64_t limit); + +int qfctl_rblocked(const struct quic_fctl *fctl); +int qfctl_sblocked(const struct quic_fctl *fctl); + +int qfctl_set_max(struct quic_fctl *fctl, uint64_t val, + int *unblock_soft, int *unblock_real); + +int qfctl_rinc(struct quic_fctl *fctl, uint64_t diff); +int qfctl_sinc(struct quic_fctl *fctl, uint64_t diff); + +uint64_t qfctl_rcap(const struct quic_fctl *fctl); + +#endif /* _HAPROXY_QUIC_FCTL_H */ diff --git a/include/haproxy/quic_rx-t.h b/include/haproxy/quic_rx-t.h index 9ef8e7a..6b5a0c4 100644 --- a/include/haproxy/quic_rx-t.h +++ b/include/haproxy/quic_rx-t.h @@ -5,6 +5,13 @@ extern struct pool_head *pool_head_quic_conn_rxbuf; extern struct pool_head *pool_head_quic_dgram; extern struct pool_head *pool_head_quic_rx_packet; +#include <import/eb64tree.h> +#include <haproxy/api-t.h> +#include <haproxy/quic_cid-t.h> +#include <inttypes.h> +#include <sys/socket.h> + +struct quic_version; /* Maximum number of ack-eliciting received packets since the last * ACK frame was sent */ diff --git a/include/haproxy/quic_rx.h b/include/haproxy/quic_rx.h index 494bc4a..3e65acb 100644 --- a/include/haproxy/quic_rx.h +++ b/include/haproxy/quic_rx.h @@ -30,8 +30,6 @@ int quic_dgram_parse(struct quic_dgram *dgram, struct quic_conn *from_qc, int qc_treat_rx_pkts(struct quic_conn *qc); int qc_parse_hd_form(struct quic_rx_packet *pkt, unsigned char **pos, const unsigned char *end); -int qc_treat_rx_crypto_frms(struct quic_conn *qc, struct quic_enc_level *el, - struct ssl_sock_ctx *ctx); int qc_handle_frms_of_lost_pkt(struct quic_conn *qc, struct quic_tx_packet *pkt, struct list *pktns_frm_list); diff --git a/include/haproxy/quic_sock-t.h b/include/haproxy/quic_sock-t.h index 67a5749..0b9c18c 100644 --- a/include/haproxy/quic_sock-t.h +++ b/include/haproxy/quic_sock-t.h @@ -36,8 +36,8 @@ struct quic_dgram { struct sockaddr_storage daddr; struct quic_conn *qc; - struct list recv_list; /* elemt to quic_receiver_buf <dgram_list>. */ - struct mt_list handler_list; /* elem to quic_dghdlr <dgrams>. */ + struct list recv_list; /* element pointing to quic_receiver_buf <dgram_list>. */ + struct mt_list handler_list; /* element pointing to quic_dghdlr <dgrams>. */ }; /* QUIC datagram handler */ diff --git a/include/haproxy/quic_sock.h b/include/haproxy/quic_sock.h index 531cf62..7236147 100644 --- a/include/haproxy/quic_sock.h +++ b/include/haproxy/quic_sock.h @@ -72,6 +72,14 @@ static inline char qc_test_fd(struct quic_conn *qc) return qc->fd >= 0; } +/* Returns active socket for <qc> connection. This may be its owned connection + * socket or the listener one as a fallback. + */ +static inline int qc_fd(struct quic_conn *qc) +{ + return qc_test_fd(qc) ? qc->fd : qc->li->rx.fd; +} + /* Try to increment <l> handshake current counter. If listener limit is * reached, incrementation is rejected and 0 is returned. */ diff --git a/include/haproxy/quic_ssl.h b/include/haproxy/quic_ssl.h index 8f7df47..a84f5ff 100644 --- a/include/haproxy/quic_ssl.h +++ b/include/haproxy/quic_ssl.h @@ -35,10 +35,6 @@ int ssl_quic_initial_ctx(struct bind_conf *bind_conf); int qc_alloc_ssl_sock_ctx(struct quic_conn *qc); -int qc_ssl_provide_quic_data(struct ncbuf *ncbuf, - enum ssl_encryption_level_t level, - struct ssl_sock_ctx *ctx, - const unsigned char *data, size_t len); int qc_ssl_provide_all_quic_data(struct quic_conn *qc, struct ssl_sock_ctx *ctx); static inline void qc_free_ssl_sock_ctx(struct ssl_sock_ctx **ctx) diff --git a/include/haproxy/quic_tls-t.h b/include/haproxy/quic_tls-t.h index 326e01b..7f90d9a 100644 --- a/include/haproxy/quic_tls-t.h +++ b/include/haproxy/quic_tls-t.h @@ -21,6 +21,7 @@ #include <import/ebtree.h> +#include <haproxy/buf-t.h> #include <haproxy/ncbuf-t.h> #include <haproxy/quic_ack-t.h> #include <haproxy/openssl-compat.h> @@ -237,10 +238,12 @@ struct quic_cstream { struct quic_enc_level { struct list list; - /* Attach point to enqueue this encryption level during retransmissions */ - struct list retrans; - /* pointer to list used only during retransmissions */ - struct list *retrans_frms; + + /* Attach point to register encryption level before sending. */ + struct list el_send; + /* Pointer to the frames used by sending functions */ + struct list *send_frms; + /* Encryption level, as defined by the TLS stack. */ enum ssl_encryption_level_t level; /* TLS encryption context (AEAD only) */ diff --git a/include/haproxy/quic_tls.h b/include/haproxy/quic_tls.h index 86b8c1e..67c25aa 100644 --- a/include/haproxy/quic_tls.h +++ b/include/haproxy/quic_tls.h @@ -140,7 +140,15 @@ static inline const EVP_CIPHER *tls_aead(const SSL_CIPHER *cipher) return EVP_aes_128_gcm(); case TLS1_3_CK_AES_256_GCM_SHA384: return EVP_aes_256_gcm(); -#if !defined(OPENSSL_IS_AWSLC) +#if !defined(OPENSSL_IS_AWSLC) && (!defined(LIBRESSL_VERSION_NUMBER) || LIBRESSL_VERSION_NUMBER >= 0x4000000fL) + /* WT: LibreSSL has an issue with CHACHA20 running in-place till 3.9.2 + * included, but the fix is already identified and will be merged + * into next major version. Given that on machines without AES-NI + * CHACHA20 is selected by default, this makes connections freeze + * on non-x86 machines, so we prefer to break them so that the + * client falls back to TCP. See GH issue #2569 for the context. + * Thanks to Theo Buehler for his help! + */ case TLS1_3_CK_CHACHA20_POLY1305_SHA256: return EVP_chacha20_poly1305(); #endif diff --git a/include/haproxy/quic_tx-t.h b/include/haproxy/quic_tx-t.h index 4653f04..6979204 100644 --- a/include/haproxy/quic_tx-t.h +++ b/include/haproxy/quic_tx-t.h @@ -5,6 +5,9 @@ #define QUIC_DGRAM_HEADLEN (sizeof(uint16_t) + sizeof(void *)) #define QUIC_MAX_CC_BUFSIZE (2 * (QUIC_MIN_CC_PKTSIZE + QUIC_DGRAM_HEADLEN)) +#include <import/eb64tree.h> +#include <haproxy/list-t.h> + extern struct pool_head *pool_head_quic_tx_packet; extern struct pool_head *pool_head_quic_cc_buf; diff --git a/include/haproxy/quic_tx.h b/include/haproxy/quic_tx.h index 0659c14..55530d9 100644 --- a/include/haproxy/quic_tx.h +++ b/include/haproxy/quic_tx.h @@ -33,9 +33,11 @@ void qc_txb_release(struct quic_conn *qc); int qc_purge_txbuf(struct quic_conn *qc, struct buffer *buf); struct buffer *qc_get_txb(struct quic_conn *qc); -int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels); -int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx); -int qc_send_app_pkts(struct quic_conn *qc, struct list *frms); +void qel_register_send(struct list *send_list, struct quic_enc_level *qel, + struct list *frms); +int qel_need_sending(struct quic_enc_level *qel, struct quic_conn *qc); +int qc_send(struct quic_conn *qc, int old_data, struct list *send_list); + int qc_dgrams_retransmit(struct quic_conn *qc); void qc_prep_hdshk_fast_retrans(struct quic_conn *qc, struct list *ifrms, struct list *hfrms); @@ -79,7 +81,7 @@ static inline void quic_tx_packet_refdec(struct quic_tx_packet *pkt) } /* Return the number of bytes which may be sent from <qc> connection when - * it has not already been validated. Note that this is the responsability + * it has not already been validated. Note that this is the responsibility * of the caller to check that the case with quic_peer_validated_addr(). * This latter BUG_ON() if 3 * qc->rx.bytes < qc->tx.prep_bytes. */ diff --git a/include/haproxy/receiver-t.h b/include/haproxy/receiver-t.h index 0ae441e..90f52aa 100644 --- a/include/haproxy/receiver-t.h +++ b/include/haproxy/receiver-t.h @@ -37,6 +37,7 @@ #define RX_F_MWORKER 0x00000004 /* keep the FD open in the master but close it in the children */ #define RX_F_MUST_DUP 0x00000008 /* this receiver's fd must be dup() from a reference; ignore socket-level ops here */ #define RX_F_NON_SUSPENDABLE 0x00000010 /* this socket cannot be suspended hence must always be unbound */ +#define RX_F_PASS_PKTINFO 0x00000020 /* pass pktinfo in received messages */ /* Bit values for rx_settings->options */ #define RX_O_FOREIGN 0x00000001 /* receives on foreign addresses */ diff --git a/include/haproxy/resolvers-t.h b/include/haproxy/resolvers-t.h index b727463..e10c6fa 100644 --- a/include/haproxy/resolvers-t.h +++ b/include/haproxy/resolvers-t.h @@ -134,7 +134,7 @@ struct resolv_response { struct resolvers { __decl_thread(HA_SPINLOCK_T lock); unsigned int accepted_payload_size; /* maximum payload size we accept for responses */ - int nb_nameservers; /* total number of active nameservers in a resolvers section */ + int nb_nameservers; /* total number of nameservers in a resolvers section */ int resolve_retries; /* number of retries before giving up */ struct { /* time to: */ int resolve; /* wait between 2 queries for the same resolution */ @@ -274,10 +274,7 @@ enum { * OR provided IP found and preference is not match and an IP * matching preference was found. */ - RSLV_UPD_CNAME, /* CNAME without any IP provided in the response */ - RSLV_UPD_NAME_ERROR, /* name in the response did not match the query */ RSLV_UPD_NO_IP_FOUND, /* no IP could be found in the response */ - RSLV_UPD_OBSOLETE_IP, /* The server IP was obsolete, and no other IP was found */ }; struct proxy; diff --git a/include/haproxy/resolvers.h b/include/haproxy/resolvers.h index 5d4c744..d4054b6 100644 --- a/include/haproxy/resolvers.h +++ b/include/haproxy/resolvers.h @@ -34,6 +34,7 @@ extern struct list sec_resolvers; extern unsigned int resolv_failed_resolutions; struct resolvers *find_resolvers_by_id(const char *id); +struct dns_nameserver *find_nameserver_by_resolvers_and_id(struct resolvers *parent, unsigned int id); struct resolv_srvrq *find_srvrq_by_name(const char *name, struct proxy *px); struct resolv_srvrq *new_resolv_srvrq(struct server *srv, char *fqdn); struct resolv_answer_item *find_srvrq_answer_record(const struct resolv_requester *requester); @@ -60,7 +61,6 @@ int stats_dump_resolvers(struct stconn *sc, struct list *stat_modules); void resolv_stats_clear_counters(int clrall, struct list *stat_modules); int resolv_allocate_counters(struct list *stat_modules); -int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk); int resolvers_create_default(); #endif // _HAPROXY_RESOLVER_H diff --git a/include/haproxy/ring-t.h b/include/haproxy/ring-t.h index b89c886..4e091ee 100644 --- a/include/haproxy/ring-t.h +++ b/include/haproxy/ring-t.h @@ -27,7 +27,7 @@ #include <haproxy/thread.h> /* The code below handles circular buffers with single-producer and multiple - * readers (up to 255). The buffer storage area must remain always allocated. + * readers (up to 254). The buffer storage area must remain always allocated. * It's made of series of payload blocks followed by a readers count (RC). * There is always a readers count at the beginning of the buffer as well. Each * payload block is composed of a varint-encoded size (VI) followed by the @@ -96,11 +96,62 @@ #define RING_WF_WAIT_MODE 0x00000001 /* wait for new contents */ #define RING_WF_SEEK_NEW 0x00000002 /* seek to new contents */ +/* ring flags */ +#define RING_FL_MAPPED 0x00000001 /* mmapped area, must not free() */ + +/* keep values below in decimal, they may be dumped in error messages */ +#define RING_WRITING_SIZE 255 /* the next message's size is being written */ +#define RING_MAX_READERS 254 /* highest supported value for RC */ + +/* mask used to lock the tail */ +#define RING_TAIL_LOCK (1ULL << ((sizeof(size_t) * 8) - 1)) + +/* A cell describing a waiting thread. + * ->next is initialized to 0x1 before the pointer is set, so that any + * leader thread can see that the pointer is not set yet. This allows + * to enqueue all waiting threads very quickly using XCHG() on the head + * without having to rely on a flaky CAS, while threads finish their setup + * in parallel. The pointer will turn to NULL again once the thread is + * released. + */ +struct ring_wait_cell { + size_t to_send_self; // size needed to serialize this msg + size_t needed_tot; // size needed to serialize pending msgs + size_t maxlen; // msg truncated to this size + const struct ist *pfx; // prefixes + size_t npfx; // #prefixes + const struct ist *msg; // message parts + size_t nmsg; // #message parts + struct ring_wait_cell *next; // next waiting thread +}; + +/* this is the mmapped part */ +struct ring_storage { + size_t size; // storage size + size_t rsvd; // header length (used for file-backed maps) + THREAD_PAD(64 - 2 * sizeof(size_t)); + size_t tail; // storage tail + THREAD_PAD(64 - sizeof(size_t)); + size_t head; // storage head + THREAD_PAD(64 - sizeof(size_t)); + char area[0]; // storage area begins immediately here +}; + +/* this is the ring definition, config, waiters etc */ struct ring { - struct buffer buf; // storage area - struct list waiters; // list of waiters, for now, CLI "show event" - __decl_thread(HA_RWLOCK_T lock); + struct ring_storage *storage; // the mapped part + struct mt_list waiters; // list of waiters, for now, CLI "show event" int readers_count; + uint flags; // RING_FL_* + uint pending; // new writes that have not yet been subject to a wakeup + uint waking; // indicates a thread is currently waking up readers + + /* keep the queue in a separate cache line below */ + THREAD_PAD(64 - 3*sizeof(void*) - 4*sizeof(int)); + struct { + struct ring_wait_cell *ptr; + THREAD_PAD(64 - sizeof(void*)); + } queue[RING_WAIT_QUEUES + 1]; // wait queue + 1 spacer }; #endif /* _HAPROXY_RING_T_H */ diff --git a/include/haproxy/ring.h b/include/haproxy/ring.h index 71217d5..201ede4 100644 --- a/include/haproxy/ring.h +++ b/include/haproxy/ring.h @@ -25,13 +25,13 @@ #include <stdlib.h> #include <import/ist.h> #include <haproxy/ring-t.h> +#include <haproxy/vecpair.h> struct appctx; struct ring *ring_new(size_t size); -struct ring *ring_make_from_area(void *area, size_t size); -struct ring *ring_cast_from_area(void *area); -void ring_init(struct ring *ring, void* area, size_t size); +struct ring *ring_make_from_area(void *area, size_t size, int reset); +void ring_init(struct ring *ring, void *area, size_t size, int reset); struct ring *ring_resize(struct ring *ring, size_t size); void ring_free(struct ring *ring); ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg); @@ -42,6 +42,81 @@ int cli_io_handler_show_ring(struct appctx *appctx); void cli_io_release_show_ring(struct appctx *appctx); size_t ring_max_payload(const struct ring *ring); +int ring_dispatch_messages(struct ring *ring, void *ctx, size_t *ofs_ptr, size_t *last_ofs_ptr, uint flags, + ssize_t (*msg_handler)(void *ctx, struct ist v1, struct ist v2, size_t ofs, size_t len)); + +/* returns the ring storage's usable area */ +static inline void *ring_area(const struct ring *ring) +{ + return ring->storage->area; +} + +/* returns the allocated area for the ring. It covers the whole + * area made of both the ring_storage and the usable area. + */ +static inline void *ring_allocated_area(const struct ring *ring) +{ + return ring->storage; +} + +/* returns the number of bytes in the ring */ +static inline size_t ring_data(const struct ring *ring) +{ + size_t tail = HA_ATOMIC_LOAD(&ring->storage->tail) & ~RING_TAIL_LOCK; + + return ((ring->storage->head <= tail) ? + 0 : ring->storage->size) + tail - ring->storage->head; +} + +/* returns the usable size in bytes for the ring. It is smaller than + * the allocate size by the size of the ring_storage header. + */ +static inline size_t ring_size(const struct ring *ring) +{ + return ring->storage->size; +} + +/* returns the allocated size in bytes for the ring. It covers the whole + * area made of both the ring_storage and the usable area. + */ +static inline size_t ring_allocated_size(const struct ring *ring) +{ + return ring->storage->size + ring->storage->rsvd; +} + +/* returns the head offset of the ring */ +static inline size_t ring_head(const struct ring *ring) +{ + return ring->storage->head; +} + +/* returns the ring's tail offset without the lock bit */ +static inline size_t ring_tail(const struct ring *ring) +{ + return HA_ATOMIC_LOAD(&ring->storage->tail) & ~RING_TAIL_LOCK; +} + +/* duplicates ring <src> over ring <dst> for no more than <max> bytes or no + * more than the amount of data present in <src>. It's assumed that the + * destination ring is always large enough for <max>. The number of bytes + * copied (the min of src's size and max) is returned. + */ +static inline size_t ring_dup(struct ring *dst, const struct ring *src, size_t max) +{ + struct ist v1, v2; + + vp_ring_to_data(&v1, &v2, ring_area(src), ring_size(src), ring_head(src), ring_tail(src)); + + if (max > ring_data(src)) + max = ring_data(src); + + BUG_ON(max > ring_size(dst)); + + vp_peek_ofs(v1, v2, 0, ring_area(dst), max); + dst->storage->head = 0; + dst->storage->tail = max; + return max; +} #endif /* _HAPROXY_RING_H */ diff --git a/include/haproxy/sample.h b/include/haproxy/sample.h index 7e05e78..e8694c6 100644 --- a/include/haproxy/sample.h +++ b/include/haproxy/sample.h @@ -31,6 +31,7 @@ extern sample_cast_fct sample_casts[SMP_TYPES][SMP_TYPES]; extern const unsigned int fetch_cap[SMP_SRC_ENTRIES]; extern const char *smp_to_type[SMP_TYPES]; +int type_to_smp(const char *type); struct sample_expr *sample_parse_expr(char **str, int *idx, const char *file, int line, char **err, struct arg_list *al, char **endptr); int sample_parse_expr_cnv(char **str, int *idx, char **endptr, char **err_msg, struct arg_list *al, const char *file, int line, diff --git a/include/haproxy/sc_strm.h b/include/haproxy/sc_strm.h index 41f07e9..4eaef88 100644 --- a/include/haproxy/sc_strm.h +++ b/include/haproxy/sc_strm.h @@ -40,6 +40,12 @@ struct task *sc_conn_io_cb(struct task *t, void *ctx, unsigned int state); int sc_conn_sync_recv(struct stconn *sc); void sc_conn_sync_send(struct stconn *sc); +int sc_applet_sync_recv(struct stconn *sc); +void sc_applet_sync_send(struct stconn *sc); + +int sc_applet_sync_recv(struct stconn *sc); +void sc_applet_sync_send(struct stconn *sc); + /* returns the channel which receives data from this stream connector (input channel) */ static inline struct channel *sc_ic(const struct stconn *sc) @@ -149,8 +155,11 @@ static inline int sc_alloc_ibuf(struct stconn *sc, struct buffer_wait *wait) int ret; ret = channel_alloc_buffer(sc_ic(sc), wait); - if (!ret) + if (ret) + sc_used_buff(sc); + else sc_need_buff(sc); + return ret; } @@ -319,6 +328,30 @@ static inline void sc_chk_snd(struct stconn *sc) sc->app_ops->chk_snd(sc); } + +/* Perform a synchronous receive using the right version, depending the endpoing + * is a connection or an applet. + */ +static inline int sc_sync_recv(struct stconn *sc) +{ + if (sc_ep_test(sc, SE_FL_T_MUX)) + return sc_conn_sync_recv(sc); + else if (sc_ep_test(sc, SE_FL_T_APPLET)) + return sc_applet_sync_recv(sc); + return 0; +} + +/* Perform a synchronous send using the right version, depending the endpoing is + * a connection or an applet. + */ +static inline void sc_sync_send(struct stconn *sc) +{ + if (sc_ep_test(sc, SE_FL_T_MUX)) + sc_conn_sync_send(sc); + else if (sc_ep_test(sc, SE_FL_T_APPLET)) + sc_applet_sync_send(sc); +} + /* Combines both sc_update_rx() and sc_update_tx() at once */ static inline void sc_update(struct stconn *sc) { diff --git a/include/haproxy/server-t.h b/include/haproxy/server-t.h index 666d2cc..af58a56 100644 --- a/include/haproxy/server-t.h +++ b/include/haproxy/server-t.h @@ -31,7 +31,7 @@ #include <haproxy/check-t.h> #include <haproxy/connection-t.h> #include <haproxy/counters-t.h> -#include <haproxy/freq_ctr-t.h> +#include <haproxy/guid-t.h> #include <haproxy/listener-t.h> #include <haproxy/obj_type-t.h> #include <haproxy/queue-t.h> @@ -41,6 +41,7 @@ #include <haproxy/task-t.h> #include <haproxy/thread-t.h> #include <haproxy/event_hdl-t.h> +#include <haproxy/log-t.h> #include <haproxy/tools-t.h> @@ -223,6 +224,13 @@ struct pid_list { int exited; }; +/* srv methods of computing chash keys */ +enum srv_hash_key { + SRV_HASH_KEY_ID = 0, /* derived from server puid */ + SRV_HASH_KEY_ADDR, /* derived from server address */ + SRV_HASH_KEY_ADDR_PORT /* derived from server address and port */ +}; + /* A tree occurrence is a descriptor of a place in a tree, with a pointer back * to the server itself. */ @@ -262,7 +270,7 @@ enum __attribute__((__packed__)) srv_ws_mode { */ struct srv_pp_tlv_list { struct list list; - struct list fmt; + struct lf_expr fmt; char *fmt_string; unsigned char type; }; @@ -293,6 +301,8 @@ struct server { struct srv_per_tgroup *per_tgrp; /* array of per-tgroup stuff such as idle conns */ unsigned int *curr_idle_thr; /* Current number of orphan idling connections per thread */ + char *pool_conn_name; + struct sample_expr *pool_conn_name_expr; unsigned int pool_purge_delay; /* Delay before starting to purge the idle conns pool */ unsigned int low_idle_conns; /* min idle connection count to start picking from other threads */ unsigned int max_idle_conns; /* Max number of connection allowed in the orphan connections list */ @@ -338,6 +348,7 @@ struct server { unsigned int est_need_conns; /* Estimate on the number of needed connections (max of curr and previous max_used) */ struct queue queue; /* pending connections */ + struct mt_list sess_conns; /* list of private conns managed by a session on this server */ /* Element below are usd by LB algorithms and must be doable in * parallel to other threads reusing connections above. @@ -356,7 +367,6 @@ struct server { int cur_sess; /* number of currently active sessions (including syn_sent) */ int served; /* # of active sessions currently being served (ie not pending) */ int consecutive_errors; /* current number of consecutive errors */ - struct freq_ctr sess_per_sec; /* sessions per second on this server */ struct be_counters counters; /* statistics counters */ /* Below are some relatively stable settings, only changed under the lock */ @@ -366,12 +376,13 @@ struct server { struct tree_occ *lb_nodes; /* lb_nodes_tot * struct tree_occ */ unsigned lb_nodes_tot; /* number of allocated lb_nodes (C-HASH) */ unsigned lb_nodes_now; /* number of lb_nodes placed in the tree (C-HASH) */ + enum srv_hash_key hash_key; /* method to compute node hash (C-HASH) */ + unsigned lb_server_key; /* hash of the values indicated by "hash_key" (C-HASH) */ const struct netns_entry *netns; /* contains network namespace name or NULL. Network namespace comes from configuration */ struct xprt_ops *xprt; /* transport-layer operations */ unsigned int svc_port; /* the port to connect to (for relevant families) */ unsigned down_time; /* total time the server was down */ - time_t last_change; /* last time, when the state was changed */ int puid; /* proxy-unique server ID, used for SNMP, and "first" LB algo */ int tcp_ut; /* for TCP, user timeout */ @@ -455,6 +466,8 @@ struct server { event_hdl_sub_list e_subs; /* event_hdl: server's subscribers list (atomically updated) */ + struct guid_node guid; /* GUID global tree node */ + /* warning, these structs are huge, keep them at the bottom */ struct conn_src conn_src; /* connection source settings */ struct sockaddr_storage addr; /* the address to connect to, doesn't include the port */ @@ -608,6 +621,56 @@ struct server_inetaddr { } port; }; +/* struct to store information about server's addr / port updater in + * INET context + */ +enum server_inetaddr_updater_by { + SERVER_INETADDR_UPDATER_BY_NONE = 0, + SERVER_INETADDR_UPDATER_BY_CLI, + SERVER_INETADDR_UPDATER_BY_LUA, + SERVER_INETADDR_UPDATER_BY_DNS_AR, + SERVER_INETADDR_UPDATER_BY_DNS_CACHE, + SERVER_INETADDR_UPDATER_BY_DNS_RESOLVER, + /* changes here must be reflected in SERVER_INETADDR_UPDATER_* + * helper macros and in server_inetaddr_updater_by_to_str() func + */ +}; +struct server_inetaddr_updater { + enum server_inetaddr_updater_by by; // by identifier (unique) + uint8_t dns; // is dns involved? + union { + struct { + unsigned int ns_id; // nameserver id responsible for the update + } dns_resolver; // SERVER_INETADDR_UPDATER_DNS_RESOLVER specific infos + } u; // per updater's additional ctx +}; +#define SERVER_INETADDR_UPDATER_NONE \ + (struct server_inetaddr_updater){ .by = SERVER_INETADDR_UPDATER_BY_NONE, \ + .dns = 0 } + +#define SERVER_INETADDR_UPDATER_CLI \ + (struct server_inetaddr_updater){ .by = SERVER_INETADDR_UPDATER_BY_CLI, \ + .dns = 0 } + +#define SERVER_INETADDR_UPDATER_LUA \ + (struct server_inetaddr_updater){ .by = SERVER_INETADDR_UPDATER_BY_LUA, \ + .dns = 0 } + +#define SERVER_INETADDR_UPDATER_DNS_AR \ + (struct server_inetaddr_updater){ .by = SERVER_INETADDR_UPDATER_BY_DNS_AR, \ + .dns = 1 } + +#define SERVER_INETADDR_UPDATER_DNS_CACHE \ + (struct server_inetaddr_updater){ .by = SERVER_INETADDR_UPDATER_BY_DNS_CACHE, \ + .dns = 1 } + +#define SERVER_INETADDR_UPDATER_DNS_RESOLVER(_ns_id) \ + (struct server_inetaddr_updater){ \ + .by = SERVER_INETADDR_UPDATER_BY_DNS_RESOLVER, \ + .dns = 1, \ + .u.dns_resolver.ns_id = _ns_id, \ + } + /* data provided to EVENT_HDL_SUB_SERVER_INETADDR handlers through * event_hdl facility * @@ -622,7 +685,7 @@ struct event_hdl_cb_data_server_inetaddr { struct { struct server_inetaddr prev; struct server_inetaddr next; - uint8_t purge_conn; /* set to 1 if the network change will force a connection cleanup */ + struct server_inetaddr_updater updater; } safe; /* no unsafe data */ }; diff --git a/include/haproxy/server.h b/include/haproxy/server.h index 2ba6e45..b8f8c71 100644 --- a/include/haproxy/server.h +++ b/include/haproxy/server.h @@ -26,9 +26,11 @@ #include <haproxy/api.h> #include <haproxy/applet-t.h> +#include <haproxy/arg-t.h> #include <haproxy/freq_ctr.h> #include <haproxy/proxy-t.h> #include <haproxy/resolvers-t.h> +#include <haproxy/sample-t.h> #include <haproxy/server-t.h> #include <haproxy/task.h> #include <haproxy/thread-t.h> @@ -43,17 +45,24 @@ extern struct list servers_list; extern struct dict server_key_dict; int srv_downtime(const struct server *s); -int srv_lastsession(const struct server *s); int srv_getinter(const struct check *check); +void srv_settings_init(struct server *srv); void srv_settings_cpy(struct server *srv, const struct server *src, int srv_tmpl); int parse_server(const char *file, int linenum, char **args, struct proxy *curproxy, const struct proxy *defproxy, int parse_flags); -int srv_update_addr(struct server *s, void *ip, int ip_sin_family, const char *updater); -int server_parse_sni_expr(struct server *newsrv, struct proxy *px, char **err); -const char *srv_update_addr_port(struct server *s, const char *addr, const char *port, char *updater); +int srv_update_addr(struct server *s, void *ip, int ip_sin_family, struct server_inetaddr_updater updater); +struct sample_expr *_parse_srv_expr(char *expr, struct arg_list *args_px, + const char *file, int linenum, char **err); +int server_set_inetaddr(struct server *s, const struct server_inetaddr *inetaddr, struct server_inetaddr_updater updater, struct buffer *msg); +int server_set_inetaddr_warn(struct server *s, const struct server_inetaddr *inetaddr, struct server_inetaddr_updater updater); +void server_get_inetaddr(struct server *s, struct server_inetaddr *inetaddr); +const char *srv_update_addr_port(struct server *s, const char *addr, const char *port, struct server_inetaddr_updater updater); +const char *server_inetaddr_updater_by_to_str(enum server_inetaddr_updater_by by); const char *srv_update_check_addr_port(struct server *s, const char *addr, const char *port); const char *srv_update_agent_addr_port(struct server *s, const char *addr, const char *port); struct server *server_find_by_id(struct proxy *bk, int id); +struct server *server_find_by_id_unique(struct proxy *bk, int id, uint32_t rid); struct server *server_find_by_name(struct proxy *bk, const char *name); +struct server *server_find_by_name_unique(struct proxy *bk, const char *name, uint32_t rid); struct server *server_find_best_match(struct proxy *bk, char *name, int id, int *diff); void apply_server_state(void); void srv_compute_all_admin_states(struct proxy *px); @@ -69,11 +78,11 @@ void srv_set_ssl(struct server *s, int use_ssl); const char *srv_adm_st_chg_cause(enum srv_adm_st_chg_cause cause); const char *srv_op_st_chg_cause(enum srv_op_st_chg_cause cause); void srv_event_hdl_publish_check(struct server *srv, struct check *check); +int srv_check_for_deletion(const char *bename, const char *svname, struct proxy **pb, struct server **ps, const char **pm); /* functions related to server name resolution */ int srv_prepare_for_resolution(struct server *srv, const char *hostname); -int srvrq_update_srv_status(struct server *s, int has_no_ip); -int snr_update_srv_status(struct server *s, int has_no_ip); +int srvrq_set_srv_down(struct server *s); int srv_set_fqdn(struct server *srv, const char *fqdn, int resolv_locked); const char *srv_update_fqdn(struct server *server, const char *fqdn, const char *updater, int dns_locked); int snr_resolution_cb(struct resolv_requester *requester, struct dns_counters *counters); @@ -118,14 +127,6 @@ const char *server_parse_weight_change_request(struct server *sv, const char *weight_str); /* - * Parses addr_str and configures sv accordingly. updater precise - * the source of the change in the associated message log. - * Returns NULL on success, error message string otherwise. - */ -const char *server_parse_addr_change_request(struct server *sv, - const char *addr_str, const char *updater); - -/* * Parses maxconn_str and configures sv accordingly. * Returns NULL on success, error message string otherwise. */ @@ -177,12 +178,12 @@ void srv_set_dyncookie(struct server *s); int srv_check_reuse_ws(struct server *srv); const struct mux_ops *srv_get_ws_proto(struct server *srv); -/* increase the number of cumulated connections on the designated server */ +/* increase the number of cumulated streams on the designated server */ static inline void srv_inc_sess_ctr(struct server *s) { _HA_ATOMIC_INC(&s->counters.cum_sess); HA_ATOMIC_UPDATE_MAX(&s->counters.sps_max, - update_freq_ctr(&s->sess_per_sec, 1)); + update_freq_ctr(&s->counters.sess_per_sec, 1)); } /* set the time of last session on the designated server */ diff --git a/include/haproxy/session-t.h b/include/haproxy/session-t.h index dff167e..7f034dd 100644 --- a/include/haproxy/session-t.h +++ b/include/haproxy/session-t.h @@ -39,6 +39,7 @@ enum { SESS_FL_NONE = 0x00000000, /* nothing */ SESS_FL_PREFER_LAST = 0x00000001, /* NTML authent, we should reuse last conn */ + SESS_FL_RELEASE_LI = 0x00000002, /* session responsible to decrement listener counters on release */ }; /* max number of idle server connections kept attached to a session */ @@ -57,15 +58,23 @@ struct session { long t_idle; /* idle duration, -1 if never occurs */ int idle_conns; /* Number of connections we're currently responsible for that we are not using */ unsigned int flags; /* session flags, SESS_FL_* */ - struct list srv_list; /* List of servers and the connections the session is currently responsible for */ + struct list priv_conns; /* list of private conns */ struct sockaddr_storage *src; /* source address (pool), when known, otherwise NULL */ struct sockaddr_storage *dst; /* destination address (pool), when known, otherwise NULL */ }; -struct sess_srv_list { - void *target; +/* + * List of private conns managed by a session, indexed by server + * Stored both into the session and server instances + */ +struct sess_priv_conns { + void *target; /* Server or dispatch used for indexing */ struct list conn_list; /* Head of the connections list */ - struct list srv_list; /* Next element of the server list */ + + struct list sess_el; /* Element of session.priv_conns */ + struct mt_list srv_el; /* Element of server.sess_conns */ + + int tid; }; #endif /* _HAPROXY_SESSION_T_H */ diff --git a/include/haproxy/session.h b/include/haproxy/session.h index 8a62805..48d2fa0 100644 --- a/include/haproxy/session.h +++ b/include/haproxy/session.h @@ -32,13 +32,16 @@ #include <haproxy/stick_table.h> extern struct pool_head *pool_head_session; -extern struct pool_head *pool_head_sess_srv_list; +extern struct pool_head *pool_head_sess_priv_conns; struct session *session_new(struct proxy *fe, struct listener *li, enum obj_type *origin); void session_free(struct session *sess); +void conn_session_free(struct connection *conn); int session_accept_fd(struct connection *cli_conn); int conn_complete_session(struct connection *conn); struct task *session_expire_embryonic(struct task *t, void *context, unsigned int state); +void __session_add_glitch_ctr(struct session *sess, uint inc); + /* Remove the refcount from the session to the tracked counters, and clear the * pointer to ensure this is only performed once. The caller is responsible for @@ -123,11 +126,21 @@ static inline void session_inc_http_fail_ctr(struct session *sess) stkctr_inc_http_fail_ctr(&sess->stkctr[i]); } +/* Add <inc> to the number of cumulated glitches in the tracked counters, and + * implicitly update the rate if also tracked. + */ +static inline void session_add_glitch_ctr(struct session *sess, uint inc) +{ + if (sess->stkctr && inc) + __session_add_glitch_ctr(sess, inc); +} -/* Remove the connection from the session list, and destroy the srv_list if it's now empty */ +/* Remove the connection from the session list, and destroy sess_priv_conns + * element if it's now empty. + */ static inline void session_unown_conn(struct session *sess, struct connection *conn) { - struct sess_srv_list *srv_list = NULL; + struct sess_priv_conns *pconns = NULL; BUG_ON(objt_listener(conn->target)); @@ -138,58 +151,66 @@ static inline void session_unown_conn(struct session *sess, struct connection *c * conn->owner that points to a dead session, but in this case the * element is not linked. */ - if (!LIST_INLIST(&conn->session_list)) + if (!LIST_INLIST(&conn->sess_el)) return; if (conn->flags & CO_FL_SESS_IDLE) sess->idle_conns--; - LIST_DEL_INIT(&conn->session_list); + LIST_DEL_INIT(&conn->sess_el); conn->owner = NULL; - list_for_each_entry(srv_list, &sess->srv_list, srv_list) { - if (srv_list->target == conn->target) { - if (LIST_ISEMPTY(&srv_list->conn_list)) { - LIST_DELETE(&srv_list->srv_list); - pool_free(pool_head_sess_srv_list, srv_list); + list_for_each_entry(pconns, &sess->priv_conns, sess_el) { + if (pconns->target == conn->target) { + if (LIST_ISEMPTY(&pconns->conn_list)) { + LIST_DELETE(&pconns->sess_el); + MT_LIST_DELETE(&pconns->srv_el); + pool_free(pool_head_sess_priv_conns, pconns); } break; } } } -/* Add the connection <conn> to the server list of the session <sess>. This - * function is called only if the connection is private. Nothing is performed if - * the connection is already in the session sever list or if the session does - * not own the connection. +/* Add the connection <conn> to the private conns list of session <sess>. This + * function is called only if the connection is private. Nothing is performed + * if the connection is already in the session list or if the session does not + * owned the connection. */ static inline int session_add_conn(struct session *sess, struct connection *conn, void *target) { - struct sess_srv_list *srv_list = NULL; + struct sess_priv_conns *pconns = NULL; + struct server *srv = objt_server(conn->target); int found = 0; BUG_ON(objt_listener(conn->target)); /* Already attach to the session or not the connection owner */ - if (!LIST_ISEMPTY(&conn->session_list) || (conn->owner && conn->owner != sess)) + if (!LIST_ISEMPTY(&conn->sess_el) || (conn->owner && conn->owner != sess)) return 1; - list_for_each_entry(srv_list, &sess->srv_list, srv_list) { - if (srv_list->target == target) { + list_for_each_entry(pconns, &sess->priv_conns, sess_el) { + if (pconns->target == target) { found = 1; break; } } if (!found) { /* The session has no connection for the server, create a new entry */ - srv_list = pool_alloc(pool_head_sess_srv_list); - if (!srv_list) + pconns = pool_alloc(pool_head_sess_priv_conns); + if (!pconns) return 0; - srv_list->target = target; - LIST_INIT(&srv_list->conn_list); - LIST_APPEND(&sess->srv_list, &srv_list->srv_list); + pconns->target = target; + LIST_INIT(&pconns->conn_list); + LIST_APPEND(&sess->priv_conns, &pconns->sess_el); + + MT_LIST_INIT(&pconns->srv_el); + if (srv) + MT_LIST_APPEND(&srv->sess_conns, &pconns->srv_el); + + pconns->tid = tid; } - LIST_APPEND(&srv_list->conn_list, &conn->session_list); + LIST_APPEND(&pconns->conn_list, &conn->sess_el); - /* Ensure owner is set for connection. It could have been resetted + /* Ensure owner is set for connection. It could have been reset * prior on after a session_add_conn() failure. */ conn->owner = sess; @@ -226,11 +247,11 @@ static inline int session_check_idle_conn(struct session *sess, struct connectio static inline struct connection *session_get_conn(struct session *sess, void *target, int64_t hash) { struct connection *srv_conn = NULL; - struct sess_srv_list *srv_list; + struct sess_priv_conns *pconns; - list_for_each_entry(srv_list, &sess->srv_list, srv_list) { - if (srv_list->target == target) { - list_for_each_entry(srv_conn, &srv_list->conn_list, session_list) { + list_for_each_entry(pconns, &sess->priv_conns, sess_el) { + if (pconns->target == target) { + list_for_each_entry(srv_conn, &pconns->conn_list, sess_el) { if ((srv_conn->hash_node && srv_conn->hash_node->node.key == hash) && srv_conn->mux && (srv_conn->mux->avail_streams(srv_conn) > 0) && diff --git a/include/haproxy/shctx.h b/include/haproxy/shctx.h index a57cf15..01bb09d 100644 --- a/include/haproxy/shctx.h +++ b/include/haproxy/shctx.h @@ -21,7 +21,7 @@ int shctx_init(struct shared_context **orig_shctx, int maxblocks, int blocksize, unsigned int maxobjsz, - int extra); + int extra, __maybe_unused const char *name); struct shared_block *shctx_row_reserve_hot(struct shared_context *shctx, struct shared_block *last, int data_len); void shctx_row_detach(struct shared_context *shctx, struct shared_block *first); diff --git a/include/haproxy/sink-t.h b/include/haproxy/sink-t.h index 79a0dda..d5e1cec 100644 --- a/include/haproxy/sink-t.h +++ b/include/haproxy/sink-t.h @@ -60,9 +60,8 @@ struct sink { struct sig_handler *forward_sighandler; /* signal handler */ struct { struct ring *ring; // used by ring buffer and STRM sender - unsigned int dropped; // dropped events since last one. + unsigned int dropped; // 2*dropped events since last one + 1 for purge in progress. int fd; // fd num for FD type sink - __decl_thread(HA_RWLOCK_T lock); // shared/excl for dropped } ctx; }; diff --git a/include/haproxy/sink.h b/include/haproxy/sink.h index 3b428a1..bdc8447 100644 --- a/include/haproxy/sink.h +++ b/include/haproxy/sink.h @@ -52,18 +52,10 @@ int sink_announce_dropped(struct sink *sink, struct log_header hdr); static inline ssize_t sink_write(struct sink *sink, struct log_header hdr, size_t maxlen, const struct ist msg[], size_t nmsg) { - ssize_t sent; + ssize_t sent = 0; - if (unlikely(sink->ctx.dropped > 0)) { - /* We need to take an exclusive lock so that other producers - * don't do the same thing at the same time and above all we - * want to be sure others have finished sending their messages - * so that the dropped event arrives exactly at the right - * position. - */ - HA_RWLOCK_WRLOCK(RING_LOCK, &sink->ctx.lock); + if (unlikely(HA_ATOMIC_LOAD(&sink->ctx.dropped) > 0)) { sent = sink_announce_dropped(sink, hdr); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &sink->ctx.lock); if (!sent) { /* we failed, we don't try to send our log as if it @@ -73,13 +65,11 @@ static inline ssize_t sink_write(struct sink *sink, struct log_header hdr, } } - HA_RWLOCK_RDLOCK(RING_LOCK, &sink->ctx.lock); sent = __sink_write(sink, hdr, maxlen, msg, nmsg); - HA_RWLOCK_RDUNLOCK(RING_LOCK, &sink->ctx.lock); fail: if (unlikely(sent <= 0)) - HA_ATOMIC_INC(&sink->ctx.dropped); + HA_ATOMIC_ADD(&sink->ctx.dropped, 2); return sent; } diff --git a/include/haproxy/sock.h b/include/haproxy/sock.h index 60e81ec..017e0ad 100644 --- a/include/haproxy/sock.h +++ b/include/haproxy/sock.h @@ -30,7 +30,7 @@ #include <haproxy/listener-t.h> #include <haproxy/sock-t.h> -int sock_create_server_socket(struct connection *conn); +int sock_create_server_socket(struct connection *conn, struct proxy *be, int *stream_err); void sock_enable(struct receiver *rx); void sock_disable(struct receiver *rx); void sock_unbind(struct receiver *rx); @@ -51,6 +51,39 @@ int sock_check_events(struct connection *conn, int event_type); void sock_ignore_events(struct connection *conn, int event_type); int _sock_supports_reuseport(const struct proto_fam *fam, int type, int protocol); +/* Sets tos sockopt on socket depending on addr target family */ +static inline void sock_set_tos(int fd, struct sockaddr_storage *addr, int tos) +{ +#ifdef IP_TOS + if (addr->ss_family == AF_INET) + setsockopt(fd, IPPROTO_IP, IP_TOS, &tos, sizeof(tos)); +#endif +#ifdef IPV6_TCLASS + if (addr->ss_family == AF_INET6) { + if (IN6_IS_ADDR_V4MAPPED(&((struct sockaddr_in6 *)addr)->sin6_addr)) + /* v4-mapped addresses need IP_TOS */ + setsockopt(fd, IPPROTO_IP, IP_TOS, &tos, sizeof(tos)); + else + setsockopt(fd, IPPROTO_IPV6, IPV6_TCLASS, &tos, sizeof(tos)); + } +#endif +} + +/* Sets mark sockopt on socket */ +static inline void sock_set_mark(int fd, sa_family_t sock_family, int mark) +{ + if ((sock_family == AF_INET) || (sock_family == AF_INET6)) { +#if defined(SO_MARK) + setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); +/* FreeBSD */ +#elif defined(SO_USER_COOKIE) + setsockopt(fd, SOL_SOCKET, SO_USER_COOKIE, &mark, sizeof(mark)); +/* OpenBSD */ +#elif defined(SO_RTABLE) + setsockopt(fd, SOL_SOCKET, SO_RTABLE, &mark, sizeof(mark)); +#endif + } +} #endif /* _HAPROXY_SOCK_H */ diff --git a/include/haproxy/ssl_ckch-t.h b/include/haproxy/ssl_ckch-t.h index 0002b84..0e501e5 100644 --- a/include/haproxy/ssl_ckch-t.h +++ b/include/haproxy/ssl_ckch-t.h @@ -55,6 +55,16 @@ struct ckch_data { struct buffer *ocsp_response; X509 *ocsp_issuer; OCSP_CERTID *ocsp_cid; +}; + +/* configuration for the ckch_store */ +struct ckch_conf { + int used; + char *crt; + char *key; + char *ocsp; + char *issuer; + char *sctl; int ocsp_update_mode; }; @@ -71,6 +81,7 @@ struct ckch_store { struct ckch_data *data; struct list ckch_inst; /* list of ckch_inst which uses this ckch_node */ struct list crtlist_entry; /* list of entries which use this store */ + struct ckch_conf conf; struct ebmb_node node; char path[VAR_ARRAY]; }; @@ -150,6 +161,16 @@ enum { CERT_TYPE_MAX, }; +/* + * When crt-store options are set from a crt-list, the crt-store options must be explicit everywhere. + * When crt-store options are set from a crt-store, the crt-store options can be empty, or the exact same + */ +enum { + CKCH_CONF_SET_EMPTY = 0, /* config is empty */ + CKCH_CONF_SET_CRTLIST = 1, /* config is set from a crt-list */ + CKCH_CONF_SET_CRTSTORE = 2, /* config is defined in a crt-store */ +}; + struct cert_exts { const char *ext; int type; @@ -157,5 +178,23 @@ struct cert_exts { /* add a parsing callback */ }; +/* argument types */ +enum parse_type_t { + PARSE_TYPE_NONE = 0, + PARSE_TYPE_INT, + PARSE_TYPE_STR, /* string which is strdup() */ + PARSE_TYPE_ONOFF, /* "on" or "off" keyword */ +}; + +struct ckch_conf_kws { + const char *name; + ssize_t offset; + enum parse_type_t type; + int (*func)(void *value, char *buf, struct ckch_data *d, int cli, char **err); + char **base; /* ptr to the base path */ +}; + +extern struct ckch_conf_kws ckch_conf_kws[]; + #endif /* USE_OPENSSL */ #endif /* _HAPROXY_SSL_CKCH_T_H */ diff --git a/include/haproxy/ssl_ckch.h b/include/haproxy/ssl_ckch.h index 64ac3df..e635663 100644 --- a/include/haproxy/ssl_ckch.h +++ b/include/haproxy/ssl_ckch.h @@ -37,18 +37,27 @@ int ssl_sock_load_sctl_from_file(const char *sctl_path, char *buf, struct ckch_d int ssl_sock_load_issuer_file_into_ckch(const char *path, char *buf, struct ckch_data *data, char **err); /* ckch_store functions */ -struct ckch_store *ckchs_load_cert_file(char *path, char **err); +struct ckch_store *ckch_store_new_load_files_path(char *path, char **err); +struct ckch_store *ckch_store_new_load_files_conf(char *name, struct ckch_conf *conf, char **err); struct ckch_store *ckchs_lookup(char *path); struct ckch_store *ckchs_dup(const struct ckch_store *src); struct ckch_store *ckch_store_new(const char *filename); void ckch_store_free(struct ckch_store *store); void ckch_store_replace(struct ckch_store *old_ckchs, struct ckch_store *new_ckchs); +int ckch_store_load_files(struct ckch_conf *f, struct ckch_store *c, int cli, char **err); + +/* ckch_conf functions */ + +int ckch_conf_parse(char **args, int cur_arg, struct ckch_conf *f, int *found, const char *file, int linenum, char **err); +void ckch_conf_clean(struct ckch_conf *conf); +int ckch_conf_cmp(struct ckch_conf *conf1, struct ckch_conf *conf2, char **err); +int ckch_conf_cmp_empty(struct ckch_conf *prev, char **err); /* ckch_inst functions */ void ckch_inst_free(struct ckch_inst *inst); struct ckch_inst *ckch_inst_new(); int ckch_inst_new_load_store(const char *path, struct ckch_store *ckchs, struct bind_conf *bind_conf, - struct ssl_bind_conf *ssl_conf, char **sni_filter, int fcount, struct ckch_inst **ckchi, char **err); + struct ssl_bind_conf *ssl_conf, char **sni_filter, int fcount, int is_default, struct ckch_inst **ckchi, char **err); int ckch_inst_new_load_srv_store(const char *path, struct ckch_store *ckchs, struct ckch_inst **ckchi, char **err); int ckch_inst_rebuild(struct ckch_store *ckch_store, struct ckch_inst *ckchi, @@ -70,6 +79,14 @@ int ssl_store_load_locations_file(char *path, int create_if_none, enum cafile_ty int __ssl_store_load_locations_file(char *path, int create_if_none, enum cafile_type type, int shuterror); extern struct cert_exts cert_exts[]; +extern int (*ssl_commit_crlfile_cb)(const char *path, X509_STORE *ctx, char **err); + +/* ckch_conf keyword loading */ +static inline int ckch_conf_load_pem(void *value, char *buf, struct ckch_data *d, int cli, char **err) { if (cli) return 0; return ssl_sock_load_pem_into_ckch(value, buf, d, err); } +static inline int ckch_conf_load_key(void *value, char *buf, struct ckch_data *d, int cli, char **err) { if (cli) return 0; return ssl_sock_load_key_into_ckch(value, buf, d, err); } +static inline int ckch_conf_load_ocsp_response(void *value, char *buf, struct ckch_data *d, int cli, char **err) { if (cli) return 0; return ssl_sock_load_ocsp_response_from_file(value, buf, d, err); } +static inline int ckch_conf_load_ocsp_issuer(void *value, char *buf, struct ckch_data *d, int cli, char **err) { if (cli) return 0; return ssl_sock_load_issuer_file_into_ckch(value, buf, d, err); } +static inline int ckch_conf_load_sctl(void *value, char *buf, struct ckch_data *d, int cli, char **err) { if (cli) return 0; return ssl_sock_load_sctl_from_file(value, buf, d, err); } #endif /* USE_OPENSSL */ #endif /* _HAPROXY_SSL_CRTLIST_H */ diff --git a/include/haproxy/ssl_crtlist.h b/include/haproxy/ssl_crtlist.h index 961cfc3..f81ee9e 100644 --- a/include/haproxy/ssl_crtlist.h +++ b/include/haproxy/ssl_crtlist.h @@ -38,7 +38,7 @@ void crtlist_free(struct crtlist *crtlist); struct crtlist *crtlist_new(const char *filename, int unique); /* file loading */ -int crtlist_parse_line(char *line, char **crt_path, struct crtlist_entry *entry, const char *file, int linenum, int from_cli, char **err); +int crtlist_parse_line(char *line, char **crt_path, struct crtlist_entry *entry, struct ckch_conf *conf, const char *file, int linenum, int from_cli, char **err); int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *curproxy, struct crtlist **crtlist, char **err); int crtlist_load_cert_dir(char *path, struct bind_conf *bind_conf, struct crtlist **crtlist, char **err); diff --git a/include/haproxy/ssl_gencert.h b/include/haproxy/ssl_gencert.h new file mode 100644 index 0000000..9065934 --- /dev/null +++ b/include/haproxy/ssl_gencert.h @@ -0,0 +1,35 @@ +/* + * include/haproxy/ssl_gencert.h + * This file contains definition for ssl 'generate-certificates' option. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _HAPROXY_SSL_GENCERT_H +#define _HAPROXY_SSL_GENCERT_H +#ifdef USE_OPENSSL + +#include <haproxy/listener-t.h> +#include <haproxy/ssl_sock-t.h> + +int ssl_sock_generate_certificate(const char *servername, struct bind_conf *bind_conf, SSL *ssl); +int ssl_sock_generate_certificate_from_conn(struct bind_conf *bind_conf, SSL *ssl); +SSL_CTX *ssl_sock_assign_generated_cert(unsigned int key, struct bind_conf *bind_conf, SSL *ssl); +SSL_CTX *ssl_sock_get_generated_cert(unsigned int key, struct bind_conf *bind_conf); +int ssl_sock_set_generated_cert(SSL_CTX *ctx, unsigned int key, struct bind_conf *bind_conf); +unsigned int ssl_sock_generated_cert_key(const void *data, size_t len); + +#endif /* USE_OPENSSL */ +#endif /* _HAPROXY_SSL_GENCERT_H */ diff --git a/include/haproxy/ssl_ocsp-t.h b/include/haproxy/ssl_ocsp-t.h index 028d6fa..f9fef4d 100644 --- a/include/haproxy/ssl_ocsp-t.h +++ b/include/haproxy/ssl_ocsp-t.h @@ -47,7 +47,8 @@ struct certificate_ocsp { struct ebmb_node key; unsigned char key_data[OCSP_MAX_CERTID_ASN1_LENGTH]; unsigned int key_length; - int refcount; + int refcount_store; /* Number of ckch_store that reference this certificate_ocsp */ + int refcount; /* Number of actual references to this certificate_ocsp (SSL_CTXs mostly) */ struct buffer response; long expire; X509 *issuer; @@ -60,8 +61,9 @@ struct certificate_ocsp { unsigned int last_update_status;/* Status of the last OCSP update */ unsigned int num_success; /* Number of successful updates */ unsigned int num_failure; /* Number of failed updates */ - unsigned int fail_count:31; /* Number of successive failures */ + unsigned int fail_count:30; /* Number of successive failures */ unsigned int update_once:1; /* Set if an entry should not be reinserted into te tree after update */ + unsigned int updating:1; /* Set if an entry is already being updated */ char path[VAR_ARRAY]; }; diff --git a/include/haproxy/ssl_ocsp.h b/include/haproxy/ssl_ocsp.h index 54a1b88..f6a72b9 100644 --- a/include/haproxy/ssl_ocsp.h +++ b/include/haproxy/ssl_ocsp.h @@ -36,6 +36,7 @@ int ssl_sock_get_ocsp_arg_kt_index(int evp_keytype); int ssl_sock_ocsp_stapling_cbk(SSL *ssl, void *arg); void ssl_sock_free_ocsp(struct certificate_ocsp *ocsp); +void ssl_sock_free_ocsp_instance(struct certificate_ocsp *ocsp); int ssl_sock_load_ocsp_response(struct buffer *ocsp_response, struct certificate_ocsp *ocsp, @@ -54,7 +55,7 @@ void ssl_destroy_ocsp_update_task(void); int ssl_ocsp_update_insert(struct certificate_ocsp *ocsp); -int ocsp_update_check_cfg_consistency(struct ckch_store *store, struct crtlist_entry *entry, char *crt_path, char **err); +int ocsp_update_init(void *value, char *buf, struct ckch_data *d, int cli, char **err); #endif /* (defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) */ diff --git a/include/haproxy/ssl_sock-t.h b/include/haproxy/ssl_sock-t.h index fdf41a7..d111883 100644 --- a/include/haproxy/ssl_sock-t.h +++ b/include/haproxy/ssl_sock-t.h @@ -105,11 +105,9 @@ enum { }; /* bind ocsp update mode */ -enum { - SSL_SOCK_OCSP_UPDATE_DFLT = 0, - SSL_SOCK_OCSP_UPDATE_OFF = 1, - SSL_SOCK_OCSP_UPDATE_ON = 2, -}; +#define SSL_SOCK_OCSP_UPDATE_OFF -1 +#define SSL_SOCK_OCSP_UPDATE_DFLT 0 +#define SSL_SOCK_OCSP_UPDATE_ON 1 /* states of the CLI IO handler for 'set ssl cert' */ enum { @@ -264,6 +262,7 @@ struct ssl_sock_ctx { struct global_ssl { char *crt_base; /* base directory path for certificates */ + char *key_base; /* base directory path for private keys */ char *ca_base; /* base directory path for CAs and CRLs */ char *issuers_chain_path; /* from "issuers-chain-path" */ int skip_self_issued_ca; @@ -303,11 +302,14 @@ struct global_ssl { int keylog; /* activate keylog */ int extra_files; /* which files not defined in the configuration file are we looking for */ int extra_files_noext; /* whether we remove the extension when looking up a extra file */ + int security_level; /* configure the openssl security level */ #ifndef OPENSSL_NO_OCSP struct { unsigned int delay_max; unsigned int delay_min; + int mode; /* default mode used for ocsp auto-update (off, on) */ + int disable; } ocsp_update; #endif }; diff --git a/include/haproxy/ssl_sock.h b/include/haproxy/ssl_sock.h index 02d5b02..773bb32 100644 --- a/include/haproxy/ssl_sock.h +++ b/include/haproxy/ssl_sock.h @@ -114,18 +114,16 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *priv); #endif int increment_sslconn(); -SSL_CTX *ssl_sock_assign_generated_cert(unsigned int key, struct bind_conf *bind_conf, SSL *ssl); -SSL_CTX *ssl_sock_get_generated_cert(unsigned int key, struct bind_conf *bind_conf); -int ssl_sock_set_generated_cert(SSL_CTX *ctx, unsigned int key, struct bind_conf *bind_conf); -unsigned int ssl_sock_generated_cert_key(const void *data, size_t len); void ssl_sock_load_cert_sni(struct ckch_inst *ckch_inst, struct bind_conf *bind_conf); +struct sni_ctx *ssl_sock_chose_sni_ctx(struct bind_conf *s, const char *servername, + int have_rsa_sig, int have_ecdsa_sig); #ifdef SSL_MODE_ASYNC void ssl_async_fd_handler(int fd); void ssl_async_fd_free(int fd); #endif struct issuer_chain* ssl_get0_issuer_chain(X509 *cert); int ssl_load_global_issuer_from_BIO(BIO *in, char *fp, char **err); -int ssl_sock_load_cert(char *path, struct bind_conf *bind_conf, char **err); +int ssl_sock_load_cert(char *path, struct bind_conf *bind_conf, int is_default, char **err); int ssl_sock_load_srv_cert(char *path, struct server *server, int create_if_none, char **err); void ssl_free_global_issuers(void); int ssl_initialize_random(void); @@ -139,6 +137,12 @@ int ssl_get_ocspresponse_detail(unsigned char *ocsp_certid, struct buffer *out); int ssl_ocsp_response_print(struct buffer *ocsp_response, struct buffer *out); #endif +#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL) +DH *ssl_get_tmp_dh_cbk(SSL *ssl, int export, int keylen); +#else +void ssl_sock_set_tmp_dh_from_pkey(SSL_CTX *ctx, EVP_PKEY *pkey); +#endif + /* ssl shctx macro */ #define sh_ssl_sess_tree_delete(s) ebmb_delete(&(s)->key); diff --git a/include/haproxy/stats-file-t.h b/include/haproxy/stats-file-t.h new file mode 100644 index 0000000..03813e0 --- /dev/null +++ b/include/haproxy/stats-file-t.h @@ -0,0 +1,12 @@ +#ifndef _HAPROXY_STATS_FILE_T_H +#define _HAPROXY_STATS_FILE_T_H + +/* Sections present in stats-file separated by header lines. */ +enum stfile_domain { + STFILE_DOMAIN_UNSET = 0, + + STFILE_DOMAIN_PX_FE, /* #fe headers */ + STFILE_DOMAIN_PX_BE, /* #be headers */ +}; + +#endif /* _HAPROXY_STATS_FILE_T_H */ diff --git a/include/haproxy/stats-file.h b/include/haproxy/stats-file.h new file mode 100644 index 0000000..d3853b4 --- /dev/null +++ b/include/haproxy/stats-file.h @@ -0,0 +1,25 @@ +#ifndef _HAPROXY_STATS_FILE_H +#define _HAPROXY_STATS_FILE_H + +#include <haproxy/stats-file-t.h> + +#include <sys/types.h> + +#include <haproxy/buf-t.h> +#include <haproxy/stats-t.h> + +int stats_dump_fields_file(struct buffer *out, + const struct field *stats, size_t stats_count, + struct show_stat_ctx *ctx); + +void stats_dump_file_header(int type, struct buffer *out); + +/* Maximum number of parsed stat column in a header line. + * Directly based on ST_I_PX_MAX, with value doubled to obtain compatibility + * between haproxy adjacent versions. + */ +#define STAT_FILE_MAX_COL_COUNT (ST_I_PX_MAX*2) + +void apply_stats_file(void); + +#endif /* _HAPROXY_STATS_FILE_H */ diff --git a/include/haproxy/stats-html-t.h b/include/haproxy/stats-html-t.h new file mode 100644 index 0000000..1b77aea --- /dev/null +++ b/include/haproxy/stats-html-t.h @@ -0,0 +1,21 @@ +#ifndef _HAPROXY_STATS_HTML_T_H +#define _HAPROXY_STATS_HTML_T_H + +/* HTTP stats : applet.st0 */ +enum { + STAT_HTTP_INIT = 0, /* Initial state */ + STAT_HTTP_HEAD, /* send headers before dump */ + STAT_HTTP_DUMP, /* dumping stats */ + STAT_HTTP_POST, /* waiting post data */ + STAT_HTTP_LAST, /* sending last chunk of response */ + STAT_HTTP_DONE, /* dump is finished */ + STAT_HTTP_END, /* finished */ +}; + +/* HTML form to limit output scope */ +#define STAT_SCOPE_TXT_MAXLEN 20 /* max len for scope substring */ +#define STAT_SCOPE_INPUT_NAME "scope" /* pattern form scope name <input> in html form */ +#define STAT_SCOPE_PATTERN "?" STAT_SCOPE_INPUT_NAME "=" + + +#endif /* _HAPROXY_STATS_HTML_T_H */ diff --git a/include/haproxy/stats-html.h b/include/haproxy/stats-html.h new file mode 100644 index 0000000..912ec59 --- /dev/null +++ b/include/haproxy/stats-html.h @@ -0,0 +1,22 @@ +#ifndef _HAPROXY_STATS_HTML_H +#define _HAPROXY_STATS_HTML_H + +#include <haproxy/stats-html-t.h> + +#include <haproxy/applet-t.h> +#include <haproxy/buf-t.h> +#include <haproxy/proxy-t.h> +#include <haproxy/stats-t.h> +#include <haproxy/stconn-t.h> + +void stats_dump_html_head(struct appctx *appctx); +void stats_dump_html_info(struct stconn *sc); +int stats_dump_fields_html(struct buffer *out, const struct field *stats, + struct show_stat_ctx *ctx); +void stats_dump_html_px_hdr(struct stconn *sc, struct proxy *px); +void stats_dump_html_px_end(struct stconn *sc, struct proxy *px); +void stats_dump_html_end(struct buffer *out); + +extern struct applet http_stats_applet; + +#endif /* _HAPROXY_STATS_HTML_H */ diff --git a/include/haproxy/stats-json.h b/include/haproxy/stats-json.h new file mode 100644 index 0000000..d6c4382 --- /dev/null +++ b/include/haproxy/stats-json.h @@ -0,0 +1,24 @@ +#ifndef _HAPROXY_STATS_JSON_H +#define _HAPROXY_STATS_JSON_H + +#include <haproxy/applet-t.h> +#include <haproxy/buf-t.h> +#include <haproxy/stats-t.h> + +void stats_dump_json_header(struct buffer *out); + +int stats_dump_fields_json(struct buffer *out, + const struct field *stats, size_t stats_count, + struct show_stat_ctx *ctx); + +void stats_dump_json_end(struct buffer *out); + +int stats_dump_json_info_fields(struct buffer *out, + const struct field *info, + struct show_stat_ctx *ctx); + +void stats_dump_json_schema(struct buffer *out); + +int stats_dump_json_schema_to_buffer(struct appctx *appctx); + +#endif /* _HAPROXY_STATS_JSON_H */ diff --git a/include/haproxy/stats-proxy.h b/include/haproxy/stats-proxy.h new file mode 100644 index 0000000..81a60f0 --- /dev/null +++ b/include/haproxy/stats-proxy.h @@ -0,0 +1,14 @@ +#ifndef _HAPROXY_STATS_PROXY_H +#define _HAPROXY_STATS_PROXY_H + +#include <haproxy/api-t.h> + +struct buffer; +struct htx; +struct stconn; + +int stats_dump_proxies(struct stconn *sc, struct buffer *buf, struct htx *htx); + +void proxy_stats_clear_counters(int clrall, struct list *stat_modules); + +#endif /* _HAPROXY_STATS_PROXY_H */ diff --git a/include/haproxy/stats-t.h b/include/haproxy/stats-t.h index 34a4cc2..d4d01e9 100644 --- a/include/haproxy/stats-t.h +++ b/include/haproxy/stats-t.h @@ -22,32 +22,35 @@ #ifndef _HAPROXY_STATS_T_H #define _HAPROXY_STATS_T_H +#include <import/ebtree-t.h> #include <haproxy/api-t.h> +#include <haproxy/buf-t.h> /* Flags for applet.ctx.stats.flags */ -#define STAT_FMT_HTML 0x00000001 /* dump the stats in HTML format */ -#define STAT_FMT_TYPED 0x00000002 /* use the typed output format */ -#define STAT_FMT_JSON 0x00000004 /* dump the stats in JSON format */ -#define STAT_HIDE_DOWN 0x00000008 /* hide 'down' servers in the stats page */ -#define STAT_NO_REFRESH 0x00000010 /* do not automatically refresh the stats page */ -#define STAT_ADMIN 0x00000020 /* indicate a stats admin level */ -#define STAT_CHUNKED 0x00000040 /* use chunked encoding (HTTP/1.1) */ -#define STAT_JSON_SCHM 0x00000080 /* dump the json schema */ - -#define STAT_HIDEVER 0x00000100 /* conf: do not report the version and reldate */ -#define STAT_SHNODE 0x00000200 /* conf: show node name */ -#define STAT_SHDESC 0x00000400 /* conf: show description */ -#define STAT_SHLGNDS 0x00000800 /* conf: show legends */ -#define STAT_SHOW_FDESC 0x00001000 /* show the field descriptions when possible */ -#define STAT_SHMODULES 0x00002000 /* conf: show modules */ -#define STAT_HIDE_MAINT 0x00004000 /* hide maint/disabled servers */ -#define STAT_CONVDONE 0x00008000 /* conf: rules conversion done */ -#define STAT_USE_FLOAT 0x00010000 /* use floats where possible in the outputs */ - -#define STAT_BOUND 0x00800000 /* bound statistics to selected proxies/types/services */ -#define STAT_STARTED 0x01000000 /* some output has occurred */ - -#define STAT_FMT_MASK 0x00000007 +#define STAT_F_FMT_HTML 0x00000001 /* dump the stats in HTML format */ +#define STAT_F_FMT_TYPED 0x00000002 /* use the typed output format */ +#define STAT_F_FMT_JSON 0x00000004 /* dump the stats in JSON format */ +#define STAT_F_FMT_FILE 0x00000008 /* dump stats-file */ +#define STAT_F_NO_REFRESH 0x00000010 /* do not automatically refresh the stats page */ +#define STAT_F_ADMIN 0x00000020 /* indicate a stats admin level */ +#define STAT_F_CHUNKED 0x00000040 /* use chunked encoding (HTTP/1.1) */ +#define STAT_F_JSON_SCHM 0x00000080 /* dump the json schema */ + +#define STAT_F_HIDEVER 0x00000100 /* conf: do not report the version and reldate */ +#define STAT_F_SHNODE 0x00000200 /* conf: show node name */ +#define STAT_F_SHDESC 0x00000400 /* conf: show description */ +#define STAT_F_SHLGNDS 0x00000800 /* conf: show legends */ +#define STAT_F_SHOW_FDESC 0x00001000 /* show the column descriptions when possible */ +#define STAT_F_SHMODULES 0x00002000 /* conf: show modules */ +#define STAT_F_HIDE_MAINT 0x00004000 /* hide maint/disabled servers */ +#define STAT_F_CONVDONE 0x00008000 /* conf: rules conversion done */ +#define STAT_F_USE_FLOAT 0x00010000 /* use floats where possible in the outputs */ +#define STAT_F_HIDE_DOWN 0x00020000 /* hide 'down' servers in the stats page */ + +#define STAT_F_BOUND 0x00800000 /* bound statistics to selected proxies/types/services */ +#define STAT_F_STARTED 0x01000000 /* some output has occurred */ + +#define STAT_F_FMT_MASK 0x0000000f #define STATS_TYPE_FE 0 #define STATS_TYPE_BE 1 @@ -57,17 +60,6 @@ #define STATS_DOMAIN (0) /* used for bitshifting, type of statistics: proxy or dns */ #define STATS_PX_CAP (8) /* used for bitshifting, differentiate obj1 type for proxy statistics */ -/* HTTP stats : applet.st0 */ -enum { - STAT_HTTP_INIT = 0, /* Initial state */ - STAT_HTTP_HEAD, /* send headers before dump */ - STAT_HTTP_DUMP, /* dumping stats */ - STAT_HTTP_POST, /* waiting post data */ - STAT_HTTP_LAST, /* sending last chunk of response */ - STAT_HTTP_DONE, /* dump is finished */ - STAT_HTTP_END, /* finished */ -}; - /* status codes available for the stats admin page */ enum { STAT_STATUS_INIT = 0, @@ -82,11 +74,6 @@ enum { STAT_STATUS_SIZE }; -/* HTML form to limit output scope */ -#define STAT_SCOPE_TXT_MAXLEN 20 /* max len for scope substring */ -#define STAT_SCOPE_INPUT_NAME "scope" /* pattern form scope name <input> in html form */ -#define STAT_SCOPE_PATTERN "?" STAT_SCOPE_INPUT_NAME "=" - /* Actions available for the stats admin forms */ enum { ST_ADM_ACTION_NONE = 0, @@ -265,224 +252,240 @@ enum field_scope { FS_MASK = 0xFF000000, }; -/* Show info fields for CLI output. For any field added here, please add the - * text representation in the info_fields array. Please only append at the end, - * before the INF_TOTAL_FIELDS entry, and never insert anything in the middle +/* Show info columns for CLI output. For any column added here, please add the + * text representation in the metrics_info array. Please only append at the end, + * before the ST_I_INF_MAX entry, and never insert anything in the middle * nor at the beginning. */ -enum info_field { - INF_NAME, - INF_VERSION, - INF_RELEASE_DATE, - INF_NBTHREAD, - INF_NBPROC, - INF_PROCESS_NUM, - INF_PID, - INF_UPTIME, - INF_UPTIME_SEC, - INF_MEMMAX_MB, - INF_POOL_ALLOC_MB, - INF_POOL_USED_MB, - INF_POOL_FAILED, - INF_ULIMIT_N, - INF_MAXSOCK, - INF_MAXCONN, - INF_HARD_MAXCONN, - INF_CURR_CONN, - INF_CUM_CONN, - INF_CUM_REQ, - INF_MAX_SSL_CONNS, - INF_CURR_SSL_CONNS, - INF_CUM_SSL_CONNS, - INF_MAXPIPES, - INF_PIPES_USED, - INF_PIPES_FREE, - INF_CONN_RATE, - INF_CONN_RATE_LIMIT, - INF_MAX_CONN_RATE, - INF_SESS_RATE, - INF_SESS_RATE_LIMIT, - INF_MAX_SESS_RATE, - INF_SSL_RATE, - INF_SSL_RATE_LIMIT, - INF_MAX_SSL_RATE, - INF_SSL_FRONTEND_KEY_RATE, - INF_SSL_FRONTEND_MAX_KEY_RATE, - INF_SSL_FRONTEND_SESSION_REUSE_PCT, - INF_SSL_BACKEND_KEY_RATE, - INF_SSL_BACKEND_MAX_KEY_RATE, - INF_SSL_CACHE_LOOKUPS, - INF_SSL_CACHE_MISSES, - INF_COMPRESS_BPS_IN, - INF_COMPRESS_BPS_OUT, - INF_COMPRESS_BPS_RATE_LIM, - INF_ZLIB_MEM_USAGE, - INF_MAX_ZLIB_MEM_USAGE, - INF_TASKS, - INF_RUN_QUEUE, - INF_IDLE_PCT, - INF_NODE, - INF_DESCRIPTION, - INF_STOPPING, - INF_JOBS, - INF_UNSTOPPABLE_JOBS, - INF_LISTENERS, - INF_ACTIVE_PEERS, - INF_CONNECTED_PEERS, - INF_DROPPED_LOGS, - INF_BUSY_POLLING, - INF_FAILED_RESOLUTIONS, - INF_TOTAL_BYTES_OUT, - INF_TOTAL_SPLICED_BYTES_OUT, - INF_BYTES_OUT_RATE, - INF_DEBUG_COMMANDS_ISSUED, - INF_CUM_LOG_MSGS, - INF_BUILD_INFO, - INF_MEMMAX_BYTES, - INF_POOL_ALLOC_BYTES, - INF_POOL_USED_BYTES, - INF_START_TIME_SEC, - INF_TAINTED, - INF_WARNINGS, - INF_MAXCONN_REACHED, - INF_BOOTTIME_MS, - INF_NICED_TASKS, +enum stat_idx_info { + ST_I_INF_NAME, + ST_I_INF_VERSION, + ST_I_INF_RELEASE_DATE, + ST_I_INF_NBTHREAD, + ST_I_INF_NBPROC, + ST_I_INF_PROCESS_NUM, + ST_I_INF_PID, + ST_I_INF_UPTIME, + ST_I_INF_UPTIME_SEC, + ST_I_INF_MEMMAX_MB, + ST_I_INF_POOL_ALLOC_MB, + ST_I_INF_POOL_USED_MB, + ST_I_INF_POOL_FAILED, + ST_I_INF_ULIMIT_N, + ST_I_INF_MAXSOCK, + ST_I_INF_MAXCONN, + ST_I_INF_HARD_MAXCONN, + ST_I_INF_CURR_CONN, + ST_I_INF_CUM_CONN, + ST_I_INF_CUM_REQ, + ST_I_INF_MAX_SSL_CONNS, + ST_I_INF_CURR_SSL_CONNS, + ST_I_INF_CUM_SSL_CONNS, + ST_I_INF_MAXPIPES, + ST_I_INF_PIPES_USED, + ST_I_INF_PIPES_FREE, + ST_I_INF_CONN_RATE, + ST_I_INF_CONN_RATE_LIMIT, + ST_I_INF_MAX_CONN_RATE, + ST_I_INF_SESS_RATE, + ST_I_INF_SESS_RATE_LIMIT, + ST_I_INF_MAX_SESS_RATE, + ST_I_INF_SSL_RATE, + ST_I_INF_SSL_RATE_LIMIT, + ST_I_INF_MAX_SSL_RATE, + ST_I_INF_SSL_FRONTEND_KEY_RATE, + ST_I_INF_SSL_FRONTEND_MAX_KEY_RATE, + ST_I_INF_SSL_FRONTEND_SESSION_REUSE_PCT, + ST_I_INF_SSL_BACKEND_KEY_RATE, + ST_I_INF_SSL_BACKEND_MAX_KEY_RATE, + ST_I_INF_SSL_CACHE_LOOKUPS, + ST_I_INF_SSL_CACHE_MISSES, + ST_I_INF_COMPRESS_BPS_IN, + ST_I_INF_COMPRESS_BPS_OUT, + ST_I_INF_COMPRESS_BPS_RATE_LIM, + ST_I_INF_ZLIB_MEM_USAGE, + ST_I_INF_MAX_ZLIB_MEM_USAGE, + ST_I_INF_TASKS, + ST_I_INF_RUN_QUEUE, + ST_I_INF_IDLE_PCT, + ST_I_INF_NODE, + ST_I_INF_DESCRIPTION, + ST_I_INF_STOPPING, + ST_I_INF_JOBS, + ST_I_INF_UNSTOPPABLE_JOBS, + ST_I_INF_LISTENERS, + ST_I_INF_ACTIVE_PEERS, + ST_I_INF_CONNECTED_PEERS, + ST_I_INF_DROPPED_LOGS, + ST_I_INF_BUSY_POLLING, + ST_I_INF_FAILED_RESOLUTIONS, + ST_I_INF_TOTAL_BYTES_OUT, + ST_I_INF_TOTAL_SPLICED_BYTES_OUT, + ST_I_INF_BYTES_OUT_RATE, + ST_I_INF_DEBUG_COMMANDS_ISSUED, + ST_I_INF_CUM_LOG_MSGS, + ST_I_INF_BUILD_INFO, + ST_I_INF_MEMMAX_BYTES, + ST_I_INF_POOL_ALLOC_BYTES, + ST_I_INF_POOL_USED_BYTES, + ST_I_INF_START_TIME_SEC, + ST_I_INF_TAINTED, + ST_I_INF_WARNINGS, + ST_I_INF_MAXCONN_REACHED, + ST_I_INF_BOOTTIME_MS, + ST_I_INF_NICED_TASKS, /* must always be the last one */ - INF_TOTAL_FIELDS + ST_I_INF_MAX +}; + +/* Represent an exposed statistic. */ +struct stat_col { + const char *name; /* short name, used notably in CSV headers */ + const char *desc; /* user-friendly description */ + + uint32_t type; /* combination of field_nature and field_format */ + uint8_t cap; /* mask of stats_domain_px_cap to restrain metrics to an object types subset */ + + /* used only for generic metrics */ + struct { + int offset[2]; /* offset in counters */ + } metric; }; -/* Stats fields for CSV output. For any field added here, please add the text - * representation in the stat_fields array. Please only append at the end, - * before the ST_F_TOTAL_FIELDS entry, and never insert anything in the middle +/* Stats columns for CSV output. For any column added here, please add the text + * representation in the metrics_px array. Please only append at the end, + * before the ST_I_PX_MAX entry, and never insert anything in the middle * nor at the beginning.When adding an entry here, one must always add a - * corresponding one in stat_fields[] otherwise Lua's get_stats() will break, + * corresponding one in metrics_px[] otherwise Lua's get_stats() will break, * and "show stats" will show a null. */ -enum stat_field { - ST_F_PXNAME, - ST_F_SVNAME, - ST_F_QCUR, - ST_F_QMAX, - ST_F_SCUR, - ST_F_SMAX, - ST_F_SLIM, - ST_F_STOT, - ST_F_BIN , - ST_F_BOUT, - ST_F_DREQ, - ST_F_DRESP, - ST_F_EREQ, - ST_F_ECON, - ST_F_ERESP, - ST_F_WRETR, - ST_F_WREDIS, - ST_F_STATUS, - ST_F_WEIGHT, - ST_F_ACT, - ST_F_BCK, - ST_F_CHKFAIL, - ST_F_CHKDOWN, - ST_F_LASTCHG, - ST_F_DOWNTIME, - ST_F_QLIMIT, - ST_F_PID, - ST_F_IID, - ST_F_SID, - ST_F_THROTTLE, - ST_F_LBTOT, - ST_F_TRACKED, - ST_F_TYPE, - ST_F_RATE, - ST_F_RATE_LIM, - ST_F_RATE_MAX, - ST_F_CHECK_STATUS, - ST_F_CHECK_CODE, - ST_F_CHECK_DURATION, - ST_F_HRSP_1XX, - ST_F_HRSP_2XX, - ST_F_HRSP_3XX, - ST_F_HRSP_4XX, - ST_F_HRSP_5XX, - ST_F_HRSP_OTHER, - ST_F_HANAFAIL, - ST_F_REQ_RATE, - ST_F_REQ_RATE_MAX, - ST_F_REQ_TOT, - ST_F_CLI_ABRT, - ST_F_SRV_ABRT, - ST_F_COMP_IN, - ST_F_COMP_OUT, - ST_F_COMP_BYP, - ST_F_COMP_RSP, - ST_F_LASTSESS, - ST_F_LAST_CHK, - ST_F_LAST_AGT, - ST_F_QTIME, - ST_F_CTIME, - ST_F_RTIME, - ST_F_TTIME, - ST_F_AGENT_STATUS, - ST_F_AGENT_CODE, - ST_F_AGENT_DURATION, - ST_F_CHECK_DESC, - ST_F_AGENT_DESC, - ST_F_CHECK_RISE, - ST_F_CHECK_FALL, - ST_F_CHECK_HEALTH, - ST_F_AGENT_RISE, - ST_F_AGENT_FALL, - ST_F_AGENT_HEALTH, - ST_F_ADDR, - ST_F_COOKIE, - ST_F_MODE, - ST_F_ALGO, - ST_F_CONN_RATE, - ST_F_CONN_RATE_MAX, - ST_F_CONN_TOT, - ST_F_INTERCEPTED, - ST_F_DCON, - ST_F_DSES, - ST_F_WREW, - ST_F_CONNECT, - ST_F_REUSE, - ST_F_CACHE_LOOKUPS, - ST_F_CACHE_HITS, - ST_F_SRV_ICUR, - ST_F_SRV_ILIM, - ST_F_QT_MAX, - ST_F_CT_MAX, - ST_F_RT_MAX, - ST_F_TT_MAX, - ST_F_EINT, - ST_F_IDLE_CONN_CUR, - ST_F_SAFE_CONN_CUR, - ST_F_USED_CONN_CUR, - ST_F_NEED_CONN_EST, - ST_F_UWEIGHT, - ST_F_AGG_SRV_STATUS, - ST_F_AGG_SRV_CHECK_STATUS, - ST_F_AGG_CHECK_STATUS, - ST_F_SRID, - ST_F_SESS_OTHER, - ST_F_H1SESS, - ST_F_H2SESS, - ST_F_H3SESS, - ST_F_REQ_OTHER, - ST_F_H1REQ, - ST_F_H2REQ, - ST_F_H3REQ, - ST_F_PROTO, +enum stat_idx_px { + ST_I_PX_PXNAME, + ST_I_PX_SVNAME, + ST_I_PX_QCUR, + ST_I_PX_QMAX, + ST_I_PX_SCUR, + ST_I_PX_SMAX, + ST_I_PX_SLIM, + ST_I_PX_STOT, + ST_I_PX_BIN , + ST_I_PX_BOUT, + ST_I_PX_DREQ, + ST_I_PX_DRESP, + ST_I_PX_EREQ, + ST_I_PX_ECON, + ST_I_PX_ERESP, + ST_I_PX_WRETR, + ST_I_PX_WREDIS, + ST_I_PX_STATUS, + ST_I_PX_WEIGHT, + ST_I_PX_ACT, + ST_I_PX_BCK, + ST_I_PX_CHKFAIL, + ST_I_PX_CHKDOWN, + ST_I_PX_LASTCHG, + ST_I_PX_DOWNTIME, + ST_I_PX_QLIMIT, + ST_I_PX_PID, + ST_I_PX_IID, + ST_I_PX_SID, + ST_I_PX_THROTTLE, + ST_I_PX_LBTOT, + ST_I_PX_TRACKED, + ST_I_PX_TYPE, + ST_I_PX_RATE, + ST_I_PX_RATE_LIM, + ST_I_PX_RATE_MAX, + ST_I_PX_CHECK_STATUS, + ST_I_PX_CHECK_CODE, + ST_I_PX_CHECK_DURATION, + ST_I_PX_HRSP_1XX, + ST_I_PX_HRSP_2XX, + ST_I_PX_HRSP_3XX, + ST_I_PX_HRSP_4XX, + ST_I_PX_HRSP_5XX, + ST_I_PX_HRSP_OTHER, + ST_I_PX_HANAFAIL, + ST_I_PX_REQ_RATE, + ST_I_PX_REQ_RATE_MAX, + ST_I_PX_REQ_TOT, + ST_I_PX_CLI_ABRT, + ST_I_PX_SRV_ABRT, + ST_I_PX_COMP_IN, + ST_I_PX_COMP_OUT, + ST_I_PX_COMP_BYP, + ST_I_PX_COMP_RSP, + ST_I_PX_LASTSESS, + ST_I_PX_LAST_CHK, + ST_I_PX_LAST_AGT, + ST_I_PX_QTIME, + ST_I_PX_CTIME, + ST_I_PX_RTIME, + ST_I_PX_TTIME, + ST_I_PX_AGENT_STATUS, + ST_I_PX_AGENT_CODE, + ST_I_PX_AGENT_DURATION, + ST_I_PX_CHECK_DESC, + ST_I_PX_AGENT_DESC, + ST_I_PX_CHECK_RISE, + ST_I_PX_CHECK_FALL, + ST_I_PX_CHECK_HEALTH, + ST_I_PX_AGENT_RISE, + ST_I_PX_AGENT_FALL, + ST_I_PX_AGENT_HEALTH, + ST_I_PX_ADDR, + ST_I_PX_COOKIE, + ST_I_PX_MODE, + ST_I_PX_ALGO, + ST_I_PX_CONN_RATE, + ST_I_PX_CONN_RATE_MAX, + ST_I_PX_CONN_TOT, + ST_I_PX_INTERCEPTED, + ST_I_PX_DCON, + ST_I_PX_DSES, + ST_I_PX_WREW, + ST_I_PX_CONNECT, + ST_I_PX_REUSE, + ST_I_PX_CACHE_LOOKUPS, + ST_I_PX_CACHE_HITS, + ST_I_PX_SRV_ICUR, + ST_I_PX_SRV_ILIM, + ST_I_PX_QT_MAX, + ST_I_PX_CT_MAX, + ST_I_PX_RT_MAX, + ST_I_PX_TT_MAX, + ST_I_PX_EINT, + ST_I_PX_IDLE_CONN_CUR, + ST_I_PX_SAFE_CONN_CUR, + ST_I_PX_USED_CONN_CUR, + ST_I_PX_NEED_CONN_EST, + ST_I_PX_UWEIGHT, + ST_I_PX_AGG_SRV_STATUS, + ST_I_PX_AGG_SRV_CHECK_STATUS, + ST_I_PX_AGG_CHECK_STATUS, + ST_I_PX_SRID, + ST_I_PX_SESS_OTHER, + ST_I_PX_H1SESS, + ST_I_PX_H2SESS, + ST_I_PX_H3SESS, + ST_I_PX_REQ_OTHER, + ST_I_PX_H1REQ, + ST_I_PX_H2REQ, + ST_I_PX_H3REQ, + ST_I_PX_PROTO, /* must always be the last one */ - ST_F_TOTAL_FIELDS + ST_I_PX_MAX +}; + +/* Node for name-indexed stat tree from generate_stat_tree(). */ +struct stcol_node { + const struct stat_col *col; + struct ebmb_node name; }; -/* Please consider updating stats_dump_fields_*(), - * stats_dump_.*_info_fields() and stats_*_schema() - * when modifying struct field or related enums. - */ struct field { uint32_t type; union { @@ -511,9 +514,9 @@ struct stats_module { const char *name; /* functor used to generate the stats module using counters provided through data parameter */ - void (*fill_stats)(void *data, struct field *); + int (*fill_stats)(void *data, struct field *, unsigned int *); - struct name_desc *stats; /* name/description of stats provided by the module */ + struct stat_col *stats; /* statistics provided by the module */ void *counters; /* initial values of allocated counters */ size_t counters_off[COUNTERS_OFF_END]; /* list of offsets of allocated counters in various objects */ size_t stats_count; /* count of stats provided */ @@ -548,6 +551,16 @@ enum stats_domain_px_cap { STATS_PX_CAP_MASK = 0xff }; +/* Shortcut names for enum stats_domain_px_cap only for declaration convenience */ +#define STATS_PX_CAP_LFBS (STATS_PX_CAP_MASK) +#define STATS_PX_CAP_LFB_ (STATS_PX_CAP_FE|STATS_PX_CAP_BE|STATS_PX_CAP_LI) +#define STATS_PX_CAP_LF__ (STATS_PX_CAP_FE|STATS_PX_CAP_LI) +#define STATS_PX_CAP__FBS (STATS_PX_CAP_FE|STATS_PX_CAP_BE|STATS_PX_CAP_SRV) +#define STATS_PX_CAP__FB_ (STATS_PX_CAP_FE|STATS_PX_CAP_BE) +#define STATS_PX_CAP__F__ (STATS_PX_CAP_FE) +#define STATS_PX_CAP___BS (STATS_PX_CAP_BE|STATS_PX_CAP_SRV) +#define STATS_PX_CAP____S (STATS_PX_CAP_SRV) + /* the context of a "show stat" command in progress on the CLI or the stats applet */ struct show_stat_ctx { struct proxy *http_px; /* parent proxy of the current applet (only relevant for HTTP applet) */ @@ -558,9 +571,10 @@ struct show_stat_ctx { int scope_len; /* length of the string above in the buffer */ int field; /* current field iterator when stat line is dumped through returning function */ int px_st; /* STAT_PX_ST* */ - unsigned int flags; /* STAT_* from stats-t.h */ + unsigned int flags; /* STAT_F_* from stats-t.h */ int iid, type, sid; /* proxy id, type and service id if bounding of stats is enabled */ int st_code; /* the status code returned by an action */ + struct buffer chunk; /* temporary buffer which holds a single-line output */ enum stat_state state; /* phase of output production */ }; diff --git a/include/haproxy/stats.h b/include/haproxy/stats.h index f9e6d97..e227f3f 100644 --- a/include/haproxy/stats.h +++ b/include/haproxy/stats.h @@ -33,31 +33,42 @@ struct buffer; struct proxy; struct appctx; struct htx; +struct stconn; -/* These two structs contains all field names and descriptions according to - * the the number of entries in "enum stat_field" and "enum info_field" +/* These two structs contains all column names and descriptions according to + * the the number of entries in "enum stat_idx_px" and "enum stat_idx_info" */ -extern const struct name_desc stat_fields[]; -extern const struct name_desc info_fields[]; +extern const struct stat_col stat_cols_px[]; +extern const struct name_desc stat_cols_info[]; extern const char *stat_status_codes[]; extern struct applet http_stats_applet; -extern THREAD_LOCAL struct field info[]; -extern THREAD_LOCAL struct field *stat_l[]; +extern struct list stats_module_list[]; +extern THREAD_LOCAL struct field stat_line_info[]; +extern THREAD_LOCAL struct field *stat_lines[]; +extern struct name_desc *stat_cols[STATS_DOMAIN_COUNT]; +extern size_t stat_cols_len[STATS_DOMAIN_COUNT]; + +int generate_stat_tree(struct eb_root *st_tree, const struct stat_col cols[]); struct htx; -int stats_putchk(struct appctx *appctx, struct htx *htx); +int stats_putchk(struct appctx *appctx, struct buffer *buf, struct htx *htx); +int stats_is_full(struct appctx *appctx, struct buffer *buf, struct htx *htx); + +const char *stats_scope_ptr(struct appctx *appctx); -int stats_dump_one_line(const struct field *stats, size_t stats_count, struct appctx *appctx); +int stats_dump_one_line(const struct field *line, size_t stats_count, struct appctx *appctx); int stats_fill_info(struct field *info, int len, uint flags); -int stats_fill_fe_stats(struct proxy *px, struct field *stats, int len, - enum stat_field *selected_field); -int stats_fill_li_stats(struct proxy *px, struct listener *l, int flags, - struct field *stats, int len, enum stat_field *selected_field); -int stats_fill_sv_stats(struct proxy *px, struct server *sv, int flags, - struct field *stats, int len, enum stat_field *selected_field); -int stats_fill_be_stats(struct proxy *px, int flags, struct field *stats, int len, - enum stat_field *selected_field); +int stats_fill_fe_line(struct proxy *px, int flags, struct field *line, int len, + enum stat_idx_px *index); +int stats_fill_li_line(struct proxy *px, struct listener *l, int flags, + struct field *line, int len, enum stat_idx_px *index); +int stats_fill_sv_line(struct proxy *px, struct server *sv, int flags, + struct field *line, int len, enum stat_idx_px *index); +int stats_fill_be_line(struct proxy *px, int flags, struct field *line, int len, + enum stat_idx_px *index); + +int stats_dump_stat_to_buffer(struct stconn *sc, struct buffer *buf, struct htx *htx); int stats_emit_raw_data_field(struct buffer *out, const struct field *f); int stats_emit_typed_data_field(struct buffer *out, const struct field *f); @@ -65,6 +76,22 @@ int stats_emit_field_tags(struct buffer *out, const struct field *f, char delim); +/* Returns true if <col> is fully defined, false if only used as name-desc. */ +static inline int stcol_is_generic(const struct stat_col *col) +{ + return !!(col->cap); +} + +static inline enum field_format stcol_format(const struct stat_col *col) +{ + return col->type & FF_MASK; +} + +static inline enum field_nature stcol_nature(const struct stat_col *col) +{ + return col->type & FN_MASK; +} + static inline enum field_format field_format(const struct field *f, int e) { return f[e].type & FF_MASK; @@ -129,6 +156,16 @@ static inline struct field mkf_flt(uint32_t type, double value) #define MK_STATS_PROXY_DOMAIN(px_cap) \ ((px_cap) << STATS_PX_CAP | STATS_DOMAIN_PROXY) +static inline uint8_t stats_get_domain(uint32_t domain) +{ + return domain >> STATS_DOMAIN & STATS_DOMAIN_MASK; +} + +static inline enum stats_domain_px_cap stats_px_get_cap(uint32_t domain) +{ + return domain >> STATS_PX_CAP & STATS_PX_CAP_MASK; +} + int stats_allocate_proxy_counters_internal(struct extra_counters **counters, int type, int px_cap); int stats_allocate_proxy_counters(struct proxy *px); diff --git a/include/haproxy/stconn-t.h b/include/haproxy/stconn-t.h index 63bcb79..f418e95 100644 --- a/include/haproxy/stconn-t.h +++ b/include/haproxy/stconn-t.h @@ -26,6 +26,7 @@ #include <haproxy/connection-t.h> #include <haproxy/pipe-t.h> #include <haproxy/show_flags-t.h> +#include <haproxy/task-t.h> #include <haproxy/xref-t.h> enum iobuf_flags { @@ -40,6 +41,13 @@ enum iobuf_flags { IOBUF_FL_EOI = 0x00000010, /* A EOI was encountered on producer side */ }; +/* Flags used */ +enum nego_ff_flags { + NEGO_FF_FL_NONE = 0x00000000, /* For initialization purposes */ + NEGO_FF_FL_MAY_SPLICE = 0x00000001, /* Consumer may choose to use kernel splicing if it supports it */ + NEGO_FF_FL_EXACT_SIZE = 0x00000002, /* Size passed for the nego is the expected exact size to forwarded */ +}; + struct iobuf { struct pipe *pipe; /* non-NULL only when data present */ struct buffer *buf; @@ -107,6 +115,14 @@ enum se_flags { SE_FL_APPLET_NEED_CONN = 0x80000000, /* applet is waiting for the other side to (fail to) connect */ }; +/* Shutdown modes */ +enum se_shut_mode { + SE_SHR_DRAIN = 0x00000001, /* read shutdown, drain any extra stuff */ + SE_SHR_RESET = 0x00000002, /* read shutdown, reset any extra stuff */ + SE_SHW_NORMAL = 0x00000004, /* regular write shutdown */ + SE_SHW_SILENT = 0x00000008, /* imminent close, don't notify peer */ +}; + /* This function is used to report flags in debugging tools. Please reflect * below any single-bit flag addition above in the same order via the * __APPEND_FLAG macro. The new end of the buffer is returned. @@ -188,6 +204,7 @@ enum sc_flags { SC_FL_SHUT_DONE = 0x00020000, /* A shutdown was performed for the SC */ SC_FL_EOS = 0x00040000, /* End of stream was reached (from down side to up side) */ + SC_FL_HAVE_BUFF = 0x00080000, /* A buffer is ready, flag will be cleared once allocated */ }; /* This function is used to report flags in debugging tools. Please reflect @@ -205,7 +222,7 @@ static forceinline char *sc_show_flags(char *buf, size_t len, const char *delim, _(SC_FL_NEED_BUFF, _(SC_FL_NEED_ROOM, _(SC_FL_RCV_ONCE, _(SC_FL_SND_ASAP, _(SC_FL_SND_NEVERWAIT, _(SC_FL_SND_EXP_MORE, _(SC_FL_ABRT_WANTED, _(SC_FL_SHUT_WANTED, _(SC_FL_ABRT_DONE, _(SC_FL_SHUT_DONE, - _(SC_FL_EOS))))))))))))))))))); + _(SC_FL_EOS, _(SC_FL_HAVE_BUFF)))))))))))))))))))); /* epilogue */ _(~0U); return buf; @@ -250,6 +267,24 @@ enum sc_state_bit { struct stconn; +/* represent the abort code, enriched with contextual info: + * - First 5 bits are used for the source (31 possible sources) + * - other bits are reserved for now + */ +#define SE_ABRT_SRC_SHIFT 0 +#define SE_ABRT_SRC_MASK 0x0000001f + +#define SE_ABRT_SRC_MUX_PT 0x01 /* Code set by the PT mux */ +#define SE_ABRT_SRC_MUX_H1 0x02 /* Code set bu the H1 mux */ +#define SE_ABRT_SRC_MUX_H2 0x03 /* Code set bu the H2 mux */ +#define SE_ABRT_SRC_MUX_QUIC 0x04 /* Code set bu the QUIC/H3 mux */ +#define SE_ABRT_SRC_MUX_FCGI 0x05 /* Code set bu the FCGI mux */ + +struct se_abort_info { + uint32_t info; + uint64_t code; +}; + /* A Stream Endpoint Descriptor (sedesc) is the link between the stream * connector (ex. stconn) and the Stream Endpoint (mux or appctx). * It always exists for either of them, and binds them together. It also @@ -280,6 +315,7 @@ struct sedesc { struct stconn *sc; /* the stream connector we're attached to, or NULL */ struct iobuf iobuf; /* contains data forwarded by the other side and that must be sent by the stream endpoint */ unsigned int flags; /* SE_FL_* */ + struct se_abort_info abort_info; /* Info about abort, as reported by the endpoint and eventually enriched by the app level */ unsigned int lra; /* the last read activity */ unsigned int fsb; /* the first send blocked */ /* 4 bytes hole here */ diff --git a/include/haproxy/stconn.h b/include/haproxy/stconn.h index 7869fa3..f60eaa8 100644 --- a/include/haproxy/stconn.h +++ b/include/haproxy/stconn.h @@ -34,11 +34,13 @@ struct appctx; struct stream; struct check; -#define IS_HTX_SC(sc) (sc_conn(sc) && IS_HTX_CONN(__sc_conn(sc))) +#define IS_HTX_SC(sc) ((sc_conn(sc) && IS_HTX_CONN(__sc_conn(sc))) || (sc_appctx(sc) && IS_HTX_STRM(__sc_strm(sc)))) struct sedesc *sedesc_new(); void sedesc_free(struct sedesc *sedesc); +void se_shutdown(struct sedesc *sedesc, enum se_shut_mode mode); + struct stconn *sc_new_from_endp(struct sedesc *sedesc, struct session *sess, struct buffer *input); struct stconn *sc_new_from_strm(struct stream *strm, unsigned int flags); struct stconn *sc_new_from_check(struct check *check, unsigned int flags); @@ -255,7 +257,7 @@ static inline void *__sc_mux_strm(const struct stconn *sc) { return __sc_endp(sc); } -static inline struct appctx *sc_mux_strm(const struct stconn *sc) +static inline void *sc_mux_strm(const struct stconn *sc) { if (sc_ep_test(sc, SE_FL_T_MUX)) return __sc_mux_strm(sc); @@ -318,54 +320,6 @@ static inline const char *sc_get_data_name(const struct stconn *sc) return sc->app_ops->name; } -/* shut read */ -static inline void sc_conn_shutr(struct stconn *sc, enum co_shr_mode mode) -{ - const struct mux_ops *mux; - - BUG_ON(!sc_conn(sc)); - - if (sc_ep_test(sc, SE_FL_SHR)) - return; - - /* clean data-layer shutdown */ - mux = sc_mux_ops(sc); - if (mux && mux->shutr) - mux->shutr(sc, mode); - sc_ep_set(sc, (mode == CO_SHR_DRAIN) ? SE_FL_SHRD : SE_FL_SHRR); -} - -/* shut write */ -static inline void sc_conn_shutw(struct stconn *sc, enum co_shw_mode mode) -{ - const struct mux_ops *mux; - - BUG_ON(!sc_conn(sc)); - - if (sc_ep_test(sc, SE_FL_SHW)) - return; - - /* clean data-layer shutdown */ - mux = sc_mux_ops(sc); - if (mux && mux->shutw) - mux->shutw(sc, mode); - sc_ep_set(sc, (mode == CO_SHW_NORMAL) ? SE_FL_SHWN : SE_FL_SHWS); -} - -/* completely close a stream connector (but do not detach it) */ -static inline void sc_conn_shut(struct stconn *sc) -{ - sc_conn_shutw(sc, CO_SHW_SILENT); - sc_conn_shutr(sc, CO_SHR_RESET); -} - -/* completely close a stream connector after draining possibly pending data (but do not detach it) */ -static inline void sc_conn_drain_and_shut(struct stconn *sc) -{ - sc_conn_shutw(sc, CO_SHW_SILENT); - sc_conn_shutr(sc, CO_SHR_DRAIN); -} - /* Returns non-zero if the stream connector's Rx path is blocked because of * lack of room in the input buffer. This usually happens after applets failed * to deliver data into the channel's buffer and reported it via sc_need_room(). @@ -423,12 +377,15 @@ static inline void se_need_remote_conn(struct sedesc *se) } /* The application layer tells the stream connector that it just got the input - * buffer it was waiting for. A read activity is reported. + * buffer it was waiting for. A read activity is reported. The SC_FL_HAVE_BUFF + * flag is set and held until sc_used_buff() is called to indicatee it was + * used. */ static inline void sc_have_buff(struct stconn *sc) { if (sc->flags & SC_FL_NEED_BUFF) { sc->flags &= ~SC_FL_NEED_BUFF; + sc->flags |= SC_FL_HAVE_BUFF; sc_ep_report_read_activity(sc); } } @@ -443,6 +400,14 @@ static inline void sc_need_buff(struct stconn *sc) sc->flags |= SC_FL_NEED_BUFF; } +/* The stream connector indicates that it has successfully allocated the buffer + * it was previously waiting for so it drops the SC_FL_HAVE_BUFF bit. + */ +static inline void sc_used_buff(struct stconn *sc) +{ + sc->flags &= ~SC_FL_HAVE_BUFF; +} + /* Tell a stream connector some room was made in the input buffer and any * failed attempt to inject data into it may be tried again. This is usually * called after a successful transfer of buffer contents to the other side. @@ -502,7 +467,7 @@ static inline void se_need_more_data(struct sedesc *se) } -static inline size_t se_nego_ff(struct sedesc *se, struct buffer *input, size_t count, unsigned int may_splice) +static inline size_t se_nego_ff(struct sedesc *se, struct buffer *input, size_t count, unsigned int flags) { size_t ret = 0; @@ -517,7 +482,7 @@ static inline size_t se_nego_ff(struct sedesc *se, struct buffer *input, size_t goto end; } - ret = mux->nego_fastfwd(se->sc, input, count, may_splice); + ret = mux->nego_fastfwd(se->sc, input, count, flags); if (se->iobuf.flags & IOBUF_FL_FF_BLOCKED) { sc_ep_report_blocked_send(se->sc, 0); @@ -537,21 +502,53 @@ static inline size_t se_nego_ff(struct sedesc *se, struct buffer *input, size_t return ret; } -static inline void se_done_ff(struct sedesc *se) +/* Returns the number of bytes forwarded. May be 0 if nothing is forwarded. It + * may also be 0 if there is nothing to forward. Note it is not dependent on + * data in the buffer but only on the amount of data to forward. + */ +static inline size_t se_done_ff(struct sedesc *se) { + size_t ret = 0; + if (se_fl_test(se, SE_FL_T_MUX)) { const struct mux_ops *mux = se->conn->mux; - size_t sent, to_send = se_ff_data(se); + size_t to_send = se_ff_data(se); BUG_ON(!mux->done_fastfwd); - sent = mux->done_fastfwd(se->sc); - if (to_send) { - if (sent == to_send) + ret = mux->done_fastfwd(se->sc); + if (ret) { + /* Something was forwarded, unblock the zero-copy forwarding. + * If all data was sent, report and send activity. + * Otherwise report a conditional blocked send. + */ + se->iobuf.flags &= ~IOBUF_FL_FF_BLOCKED; + if (ret == to_send) sc_ep_report_send_activity(se->sc); else - sc_ep_report_blocked_send(se->sc, sent != 0); + sc_ep_report_blocked_send(se->sc, 1); + } + else { + /* Nothing was forwarded. If there was something to forward, + * it means the sends are blocked. + * In addition, if the zero-copy forwarding is blocked because the + * producer requests more room, we must subs for sends. + */ + if (to_send) + sc_ep_report_blocked_send(se->sc, 0); + if (se->iobuf.flags & IOBUF_FL_FF_BLOCKED) { + sc_ep_report_blocked_send(se->sc, 0); + + if (!(se->sc->wait_event.events & SUB_RETRY_SEND)) { + /* The SC must be subs for send to be notify when some + * space is made + */ + mux->subscribe(se->sc, SUB_RETRY_SEND, &se->sc->wait_event); + } + } } } + + return ret; } #endif /* _HAPROXY_STCONN_H */ diff --git a/include/haproxy/stick_table-t.h b/include/haproxy/stick_table-t.h index 749cb9a..4b98439 100644 --- a/include/haproxy/stick_table-t.h +++ b/include/haproxy/stick_table-t.h @@ -58,7 +58,8 @@ enum { STKTABLE_DT_GPT, /* array of gpt */ STKTABLE_DT_GPC, /* array of gpc */ STKTABLE_DT_GPC_RATE, /* array of gpc_rate */ - + STKTABLE_DT_GLITCH_CNT, /* cumulated number of front glitches */ + STKTABLE_DT_GLITCH_RATE, /* rate of front glitches */ STKTABLE_STATIC_DATA_TYPES,/* number of types above */ /* up to STKTABLE_EXTRA_DATA_TYPES types may be registered here, always @@ -118,7 +119,7 @@ union stktable_data { unsigned long long std_t_ull; struct freq_ctr std_t_frqp; struct dict_entry *std_t_dict; -}; +} __attribute__((packed, aligned(sizeof(int)))); /* known data types */ struct stktable_data_type { @@ -146,7 +147,8 @@ struct stksess { unsigned int expire; /* session expiration date */ unsigned int ref_cnt; /* reference count, can only purge when zero */ __decl_thread(HA_RWLOCK_T lock); /* lock related to the table entry */ - int shard; /* shard */ + int shard; /* shard number used by peers */ + int seen; /* 0 only when no peer has seen this entry yet */ struct eb32_node exp; /* ebtree node used to hold the session in expiration tree */ struct eb32_node upd; /* ebtree node used to hold the update sequence tree */ struct ebmb_node key; /* ebtree node used to hold the session in table */ @@ -196,8 +198,12 @@ struct stktable { THREAD_ALIGN(64); - struct eb_root keys; /* head of sticky session tree */ - struct eb_root exps; /* head of sticky session expiration tree */ + struct { + struct eb_root keys; /* head of sticky session tree */ + struct eb_root exps; /* head of sticky session expiration tree */ + __decl_thread(HA_RWLOCK_T sh_lock); /* for the trees above */ + } shards[CONFIG_HAP_TBL_BUCKETS]; + unsigned int refcnt; /* number of local peer over all peers sections attached to this table */ unsigned int current; /* number of sticky sessions currently in table */ diff --git a/include/haproxy/stick_table.h b/include/haproxy/stick_table.h index 3200437..2c5e7a2 100644 --- a/include/haproxy/stick_table.h +++ b/include/haproxy/stick_table.h @@ -29,7 +29,9 @@ #include <haproxy/freq_ctr.h> #include <haproxy/sample-t.h> #include <haproxy/stick_table-t.h> +#include <haproxy/thread.h> #include <haproxy/ticks.h> +#include <haproxy/xxhash.h> extern struct stktable *stktables_list; extern struct pool_head *pool_head_stk_ctr; @@ -191,6 +193,19 @@ static inline void *stktable_data_ptr_idx(struct stktable *t, struct stksess *ts return __stktable_data_ptr(t, ts, type) + idx*stktable_type_size(stktable_data_types[type].std_type); } +/* return a shard number for key <key> of len <len> present in table <t>, for + * use with the tree indexing. The value will be from 0 to + * CONFIG_HAP_TBL_BUCKETS-1. + */ +static inline uint stktable_calc_shard_num(const struct stktable *t, const void *key, size_t len) +{ +#if CONFIG_HAP_TBL_BUCKETS > 1 + return XXH32(key, len, t->hash_seed) % CONFIG_HAP_TBL_BUCKETS; +#else + return 0; +#endif +} + /* kill an entry if it's expired and its ref_cnt is zero */ static inline int __stksess_kill_if_expired(struct stktable *t, struct stksess *ts) { @@ -202,14 +217,26 @@ static inline int __stksess_kill_if_expired(struct stktable *t, struct stksess * static inline void stksess_kill_if_expired(struct stktable *t, struct stksess *ts, int decrefcnt) { + uint shard; + size_t len; if (decrefcnt && HA_ATOMIC_SUB_FETCH(&ts->ref_cnt, 1) != 0) return; if (t->expire != TICK_ETERNITY && tick_is_expired(ts->expire, now_ms)) { - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); + if (t->type == SMP_T_STR) + len = strlen((const char *)ts->key.key); + else + len = t->key_size; + + shard = stktable_calc_shard_num(t, ts->key.key, len); + + /* make the compiler happy when shard is not used without threads */ + ALREADY_CHECKED(shard); + + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); __stksess_kill_if_expired(t, ts); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); } } @@ -401,4 +428,36 @@ static inline int stkctr_inc_bytes_out_ctr(struct stkctr *stkctr, unsigned long return 1; } +/* Add <inc> to the number of cumulated front glitches in the tracked counter + * <stkctr>. It returns 0 if the entry pointer does not exist and nothing is + * performed. Otherwise it returns 1. + */ +static inline int stkctr_add_glitch_ctr(struct stkctr *stkctr, uint inc) +{ + struct stksess *ts; + void *ptr1, *ptr2; + + ts = stkctr_entry(stkctr); + if (!ts) + return 0; + + HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock); + + ptr1 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_GLITCH_CNT); + if (ptr1) + stktable_data_cast(ptr1, std_t_uint) += inc; + + ptr2 = stktable_data_ptr(stkctr->table, ts, STKTABLE_DT_GLITCH_RATE); + if (ptr2) + update_freq_ctr_period(&stktable_data_cast(ptr2, std_t_frqp), + stkctr->table->data_arg[STKTABLE_DT_GLITCH_RATE].u, inc); + + HA_RWLOCK_WRUNLOCK(STK_SESS_LOCK, &ts->lock); + + /* If data was modified, we need to touch to re-schedule sync */ + if (ptr1 || ptr2) + stktable_touch_local(stkctr->table, ts, 0); + return 1; +} + #endif /* _HAPROXY_STICK_TABLE_H */ diff --git a/include/haproxy/stream-t.h b/include/haproxy/stream-t.h index 4280692..b96512e 100644 --- a/include/haproxy/stream-t.h +++ b/include/haproxy/stream-t.h @@ -40,7 +40,7 @@ */ #define SF_DIRECT 0x00000001 /* connection made on the server matching the client cookie */ #define SF_ASSIGNED 0x00000002 /* no need to assign a server to this stream */ -/* unused: 0x00000004 */ +#define SF_MAYALLOC 0x00000004 /* we were notified that a work buffer might be available now */ #define SF_BE_ASSIGNED 0x00000008 /* a backend was assigned. Conns are accounted. */ #define SF_FORCE_PRST 0x00000010 /* force persistence here, even if server is down */ @@ -86,6 +86,8 @@ #define SF_SRV_REUSED_ANTICIPATED 0x00200000 /* the connection was reused but the mux is not ready yet */ #define SF_WEBSOCKET 0x00400000 /* websocket stream */ // TODO: must be removed #define SF_SRC_ADDR 0x00800000 /* get the source ip/port with getsockname */ +#define SF_BC_MARK 0x01000000 /* need to set specific mark on backend/srv conn upon connect */ +#define SF_BC_TOS 0x02000000 /* need to set specific tos on backend/srv conn upon connect */ /* This function is used to report flags in debugging tools. Please reflect * below any single-bit flag addition above in the same order via the @@ -100,7 +102,7 @@ static forceinline char *strm_show_flags(char *buf, size_t len, const char *deli _(0); /* flags & enums */ _(SF_IGNORE_PRST, _(SF_SRV_REUSED, _(SF_SRV_REUSED_ANTICIPATED, - _(SF_WEBSOCKET, _(SF_SRC_ADDR))))); + _(SF_WEBSOCKET, _(SF_SRC_ADDR, _(SF_BC_MARK, _(SF_BC_TOS))))))); _e(SF_FINST_MASK, SF_FINST_R, _e(SF_FINST_MASK, SF_FINST_C, _e(SF_FINST_MASK, SF_FINST_H, _e(SF_FINST_MASK, SF_FINST_D, @@ -114,9 +116,9 @@ static forceinline char *strm_show_flags(char *buf, size_t len, const char *deli _e(SF_ERR_MASK, SF_ERR_DOWN, _e(SF_ERR_MASK, SF_ERR_KILLED, _e(SF_ERR_MASK, SF_ERR_UP, _e(SF_ERR_MASK, SF_ERR_CHK_PORT)))))))))))); - _(SF_DIRECT, _(SF_ASSIGNED, _(SF_BE_ASSIGNED, _(SF_FORCE_PRST, + _(SF_DIRECT, _(SF_ASSIGNED, _(SF_MAYALLOC, _(SF_BE_ASSIGNED, _(SF_FORCE_PRST, _(SF_MONITOR, _(SF_CURR_SESS, _(SF_CONN_EXP, _(SF_REDISP, - _(SF_IGNORE, _(SF_REDIRECTABLE, _(SF_HTX))))))))))); + _(SF_IGNORE, _(SF_REDIRECTABLE, _(SF_HTX)))))))))))); /* epilogue */ _(~0U); @@ -209,6 +211,9 @@ struct stream { int flags; /* some flags describing the stream */ unsigned int uniq_id; /* unique ID used for the traces */ + uint32_t bc_mark; /* set mark on back conn if SF_BC_MARK is set */ + uint8_t bc_tos; /* set tos on back conn if SF_BC_TOS is set */ + /* 3 unused bytes here */ enum obj_type *target; /* target to use for this stream */ struct session *sess; /* the session this stream is attached to */ diff --git a/include/haproxy/stream.h b/include/haproxy/stream.h index a884007..12c58b8 100644 --- a/include/haproxy/stream.h +++ b/include/haproxy/stream.h @@ -69,7 +69,7 @@ void stream_shutdown(struct stream *stream, int why); void stream_dump_and_crash(enum obj_type *obj, int rate); void strm_dump_to_buffer(struct buffer *buf, const struct stream *strm, const char *pfx, uint32_t anon_key); -struct ist stream_generate_unique_id(struct stream *strm, struct list *format); +struct ist stream_generate_unique_id(struct stream *strm, struct lf_expr *format); void stream_process_counters(struct stream *s); void sess_change_server(struct stream *strm, struct server *newsrv); diff --git a/include/haproxy/systemd.h b/include/haproxy/systemd.h new file mode 100644 index 0000000..65b0ab6 --- /dev/null +++ b/include/haproxy/systemd.h @@ -0,0 +1,7 @@ +#ifndef _HAPROXY_SYSTEMD_H +#define _HAPROXY_SYSTEMD_H + +int sd_notify(int unset_environment, const char *message); +int sd_notifyf(int unset_environment, const char *format, ...); + +#endif diff --git a/include/haproxy/task-t.h b/include/haproxy/task-t.h index ea52de9..b525420 100644 --- a/include/haproxy/task-t.h +++ b/include/haproxy/task-t.h @@ -164,6 +164,24 @@ struct tasklet { */ }; +/* Note: subscribing to these events is only valid after the caller has really + * attempted to perform the operation, and failed to proceed or complete. + */ +enum sub_event_type { + SUB_RETRY_RECV = 0x00000001, /* Schedule the tasklet when we can attempt to recv again */ + SUB_RETRY_SEND = 0x00000002, /* Schedule the tasklet when we can attempt to send again */ +}; + +/* Describes a set of subscriptions. Multiple events may be registered at the + * same time. The callee should assume everything not pending for completion is + * implicitly possible. It's illegal to change the tasklet if events are still + * registered. + */ +struct wait_event { + struct tasklet *tasklet; + int events; /* set of enum sub_event_type above */ +}; + /* * The task callback (->process) is responsible for updating ->expire. It must * return a pointer to the task itself, except if the task has been deleted, in diff --git a/include/haproxy/tcpcheck-t.h b/include/haproxy/tcpcheck-t.h index 8878995..22310ee 100644 --- a/include/haproxy/tcpcheck-t.h +++ b/include/haproxy/tcpcheck-t.h @@ -134,9 +134,9 @@ struct tcpcheck_connect { }; struct tcpcheck_http_hdr { - struct ist name; /* the header name */ - struct list value; /* the log-format string value */ - struct list list; /* header chained list */ + struct ist name; /* the header name */ + struct lf_expr value; /* the log-format string value */ + struct list list; /* header linked list */ }; struct tcpcheck_codes { @@ -147,20 +147,20 @@ struct tcpcheck_codes { struct tcpcheck_send { enum tcpcheck_send_type type; union { - struct ist data; /* an ASCII string or a binary sequence */ - struct list fmt; /* an ASCII or hexa log-format string */ + struct ist data; /* an ASCII string or a binary sequence */ + struct lf_expr fmt; /* an ASCII or hexa log-format string */ struct { unsigned int flags; /* TCPCHK_SND_HTTP_FL_* */ struct http_meth meth; /* the HTTP request method */ union { struct ist uri; /* the HTTP request uri is a string */ - struct list uri_fmt; /* or a log-format string */ + struct lf_expr uri_fmt; /* or a log-format string */ }; struct ist vsn; /* the HTTP request version string */ struct list hdrs; /* the HTTP request header list */ union { struct ist body; /* the HTTP request payload is a string */ - struct list body_fmt; /* or a log-format string */ + struct lf_expr body_fmt;/* or a log-format string */ }; } http; /* Info about the HTTP request to send */ }; @@ -173,16 +173,16 @@ struct tcpcheck_expect { struct ist data; /* Matching a literal string / binary anywhere in the response. */ struct my_regex *regex; /* Matching a regex pattern. */ struct tcpcheck_codes codes; /* Matching a list of codes */ - struct list fmt; /* Matching a log-format string / binary */ + struct lf_expr fmt; /* Matching a log-format string / binary */ struct { union { struct ist name; - struct list name_fmt; + struct lf_expr name_fmt; struct my_regex *name_re; }; union { struct ist value; - struct list value_fmt; + struct lf_expr value_fmt; struct my_regex *value_re; }; } hdr; /* Matching a header pattern */ @@ -196,9 +196,9 @@ struct tcpcheck_expect { enum healthcheck_status ok_status; /* The healthcheck status to use on success (default: L7OKD) */ enum healthcheck_status err_status; /* The healthcheck status to use on error (default: L7RSP) */ enum healthcheck_status tout_status; /* The healthcheck status to use on timeout (default: L7TOUT) */ - struct list onerror_fmt; /* log-format string to use as comment on error */ - struct list onsuccess_fmt; /* log-format string to use as comment on success (if last rule) */ - struct sample_expr *status_expr; /* sample expr to determine the check status code */ + struct lf_expr onerror_fmt; /* log-format string to use as comment on error */ + struct lf_expr onsuccess_fmt; /* log-format string to use as comment on success (if last rule) */ + struct sample_expr *status_expr; /* sample expr to determine the check status code */ }; struct tcpcheck_action_kw { diff --git a/include/haproxy/tcpcheck.h b/include/haproxy/tcpcheck.h index 3abd1ef..55c564a 100644 --- a/include/haproxy/tcpcheck.h +++ b/include/haproxy/tcpcheck.h @@ -83,6 +83,10 @@ struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct pro struct list *rules, unsigned int proto, const char *file, int line, char **errmsg); +int proxy_parse_tcpcheck(char **args, int section, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **errmsg); + int proxy_parse_tcp_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx, const char *file, int line); int proxy_parse_redis_check_opt(char **args, int cur_arg, struct proxy *curpx, const struct proxy *defpx, diff --git a/include/haproxy/thread-t.h b/include/haproxy/thread-t.h index f3552c2..5d36107 100644 --- a/include/haproxy/thread-t.h +++ b/include/haproxy/thread-t.h @@ -162,4 +162,59 @@ struct ha_rwlock { #endif /* DEBUG_THREAD */ +/* WARNING!!! if you update this enum, please also keep lock_label() up to date + * below. + */ +enum lock_label { + TASK_RQ_LOCK, + TASK_WQ_LOCK, + LISTENER_LOCK, + PROXY_LOCK, + SERVER_LOCK, + LBPRM_LOCK, + SIGNALS_LOCK, + STK_TABLE_LOCK, + STK_SESS_LOCK, + APPLETS_LOCK, + PEER_LOCK, + SHCTX_LOCK, + SSL_LOCK, + SSL_GEN_CERTS_LOCK, + PATREF_LOCK, + PATEXP_LOCK, + VARS_LOCK, + COMP_POOL_LOCK, + LUA_LOCK, + NOTIF_LOCK, + SPOE_APPLET_LOCK, + DNS_LOCK, + PID_LIST_LOCK, + EMAIL_ALERTS_LOCK, + PIPES_LOCK, + TLSKEYS_REF_LOCK, + AUTH_LOCK, + RING_LOCK, + DICT_LOCK, + PROTO_LOCK, + QUEUE_LOCK, + CKCH_LOCK, + SNI_LOCK, + SSL_SERVER_LOCK, + SFT_LOCK, /* sink forward target */ + IDLE_CONNS_LOCK, + OCSP_LOCK, + QC_CID_LOCK, + CACHE_LOCK, + OTHER_LOCK, + /* WT: make sure never to use these ones outside of development, + * we need them for lock profiling! + */ + DEBUG1_LOCK, + DEBUG2_LOCK, + DEBUG3_LOCK, + DEBUG4_LOCK, + DEBUG5_LOCK, + LOCK_LABELS +}; + #endif /* _HAPROXY_THREAD_T_H */ diff --git a/include/haproxy/thread.h b/include/haproxy/thread.h index 8c7520b..0984c67 100644 --- a/include/haproxy/thread.h +++ b/include/haproxy/thread.h @@ -386,62 +386,6 @@ int thread_cpu_mask_forced(void); #define HA_RWLOCK_TRYSKLOCK(lbl,l) __ha_rwlock_trysklock(lbl, l, __func__, __FILE__, __LINE__) #define HA_RWLOCK_TRYRDTOSK(lbl,l) __ha_rwlock_tryrdtosk(lbl, l, __func__, __FILE__, __LINE__) -/* WARNING!!! if you update this enum, please also keep lock_label() up to date - * below. - */ -enum lock_label { - TASK_RQ_LOCK, - TASK_WQ_LOCK, - LISTENER_LOCK, - PROXY_LOCK, - SERVER_LOCK, - LBPRM_LOCK, - SIGNALS_LOCK, - STK_TABLE_LOCK, - STK_SESS_LOCK, - APPLETS_LOCK, - PEER_LOCK, - SHCTX_LOCK, - SSL_LOCK, - SSL_GEN_CERTS_LOCK, - PATREF_LOCK, - PATEXP_LOCK, - VARS_LOCK, - COMP_POOL_LOCK, - LUA_LOCK, - NOTIF_LOCK, - SPOE_APPLET_LOCK, - DNS_LOCK, - PID_LIST_LOCK, - EMAIL_ALERTS_LOCK, - PIPES_LOCK, - TLSKEYS_REF_LOCK, - AUTH_LOCK, - RING_LOCK, - DICT_LOCK, - PROTO_LOCK, - QUEUE_LOCK, - CKCH_LOCK, - SNI_LOCK, - SSL_SERVER_LOCK, - SFT_LOCK, /* sink forward target */ - IDLE_CONNS_LOCK, - OCSP_LOCK, - QC_CID_LOCK, - CACHE_LOCK, - OTHER_LOCK, - /* WT: make sure never to use these ones outside of development, - * we need them for lock profiling! - */ - DEBUG1_LOCK, - DEBUG2_LOCK, - DEBUG3_LOCK, - DEBUG4_LOCK, - DEBUG5_LOCK, - LOCK_LABELS -}; - - /* Following functions are used to collect some stats about locks. We wrap * pthread functions to known how much time we wait in a lock. */ diff --git a/include/haproxy/tinfo-t.h b/include/haproxy/tinfo-t.h index 357c4c0..636d5b2 100644 --- a/include/haproxy/tinfo-t.h +++ b/include/haproxy/tinfo-t.h @@ -65,6 +65,8 @@ enum { #define TH_FL_STARTED 0x00000010 /* set once the thread starts */ #define TH_FL_IN_LOOP 0x00000020 /* set only inside the polling loop */ +/* we have 4 buffer-wait queues, in highest to lowest emergency order */ +#define DYNBUF_NBQ 4 /* Thread group information. This defines a base and a count of global thread * IDs which belong to it, and which can be looked up into thread_info/ctx. It @@ -110,7 +112,7 @@ struct thread_info { uint tid, ltid; /* process-wide and group-wide thread ID (start at 0) */ ulong ltid_bit; /* bit masks for the tid/ltid */ uint tgid; /* ID of the thread group this thread belongs to (starts at 1; 0=unset) */ - /* 32-bit hole here */ + uint ring_queue; /* queue number for the rings */ ullong pth_id; /* the pthread_t cast to a ullong */ void *stack_top; /* the top of the stack when entering the thread */ @@ -133,19 +135,25 @@ struct thread_ctx { int current_queue; /* points to current tasklet list being run, -1 if none */ unsigned int nb_tasks; /* number of tasks allocated on this thread */ uint8_t tl_class_mask; /* bit mask of non-empty tasklets classes */ + uint8_t bufq_map; /* one bit per non-empty buffer_wq */ - // 7 bytes hole here + // 2 bytes hole here + unsigned int nb_rhttp_conns; /* count of current conns used for active reverse HTTP */ + struct sched_activity *sched_profile_entry; /* profile entry in use by the current task/tasklet, only if sched_wake_date>0 */ + + ALWAYS_ALIGN(2*sizeof(void*)); + struct list buffer_wq[DYNBUF_NBQ]; /* buffer waiters, 4 criticality-based queues */ struct list pool_lru_head; /* oldest objects in thread-local pool caches */ - struct list buffer_wq; /* buffer waiters */ struct list streams; /* list of streams attached to this thread */ struct list quic_conns; /* list of active quic-conns attached to this thread */ struct list quic_conns_clo; /* list of closing quic-conns attached to this thread */ struct list queued_checks; /* checks waiting for a connection slot */ - unsigned int nb_rhttp_conns; /* count of current conns used for active reverse HTTP */ - - ALWAYS_ALIGN(2*sizeof(void*)); struct list tasklets[TL_CLASSES]; /* tasklets (and/or tasks) to run, by class */ + void **emergency_bufs; /* array of buffers allocated at boot. Next free one is [emergency_bufs_left-1] */ + uint emergency_bufs_left; /* number of emergency buffers left in magic_bufs[] */ + // around 36 bytes here for thread-local variables + // third cache line here on 64 bits: accessed mostly using atomic ops ALWAYS_ALIGN(64); struct mt_list shared_tasklet_list; /* Tasklet to be run, woken up by other threads */ @@ -158,7 +166,6 @@ struct thread_ctx { uint32_t sched_wake_date; /* current task/tasklet's wake date or 0 */ uint32_t sched_call_date; /* current task/tasklet's call date (valid if sched_wake_date > 0) */ - struct sched_activity *sched_profile_entry; /* profile entry in use by the current task/tasklet, only if sched_wake_date>0 */ uint64_t prev_cpu_time; /* previous per thread CPU time */ uint64_t prev_mono_time; /* previous system wide monotonic time */ @@ -172,6 +179,7 @@ struct thread_ctx { unsigned long long out_bytes; /* total #of bytes emitted */ unsigned long long spliced_out_bytes; /* total #of bytes emitted though a kernel pipe */ struct buffer *thread_dump_buffer; /* NULL out of dump, valid during a dump, 0x01 once done */ + // around 64 bytes here for shared variables ALWAYS_ALIGN(128); }; diff --git a/include/haproxy/tools-t.h b/include/haproxy/tools-t.h index 32d8193..a63e0f6 100644 --- a/include/haproxy/tools-t.h +++ b/include/haproxy/tools-t.h @@ -22,6 +22,8 @@ #ifndef _HAPROXY_TOOLS_T_H #define _HAPROXY_TOOLS_T_H +#include <netinet/in.h> + /* size used for max length of decimal representation of long long int. */ #define NB_LLMAX_STR (sizeof("-9223372036854775807")-1) @@ -163,4 +165,20 @@ struct net_addr_type { int xprt_type; // transport layer }; +/* To easily pass context to cbor encode functions + */ +struct cbor_encode_ctx { + /* function pointer that cbor encode functions will use to encode a + * single byte. + * + * The function needs to return the position of the last written byte + * on success and NULL on failure. The function cannot write past <stop> + */ + char *(*e_fct_byte)(struct cbor_encode_ctx *ctx, + char *start, char *stop, uint8_t byte); + + /* to provide some user-context to the encode_fct_* funcs */ + void *e_fct_ctx; +}; + #endif /* _HAPROXY_TOOLS_T_H */ diff --git a/include/haproxy/tools.h b/include/haproxy/tools.h index 3726f63..937adaa 100644 --- a/include/haproxy/tools.h +++ b/include/haproxy/tools.h @@ -42,6 +42,7 @@ #include <haproxy/api.h> #include <haproxy/chunk.h> #include <haproxy/intops.h> +#include <haproxy/global.h> #include <haproxy/namespace-t.h> #include <haproxy/protocol-t.h> #include <haproxy/tools-t.h> @@ -399,11 +400,11 @@ int addr_is_local(const struct netns_entry *ns, * <map> with the hexadecimal representation of their ASCII-code (2 digits) * prefixed by <escape>, and will store the result between <start> (included) * and <stop> (excluded), and will always terminate the string with a '\0' - * before <stop>. The position of the '\0' is returned if the conversion - * completes. If bytes are missing between <start> and <stop>, then the - * conversion will be incomplete and truncated. If <stop> <= <start>, the '\0' - * cannot even be stored so we return <start> without writing the 0. + * before <stop>. If bytes are missing between <start> and <stop>, then the + * conversion will be incomplete and truncated. * The input string must also be zero-terminated. + * + * Return the address of the \0 character, or NULL on error */ extern const char hextab[]; extern long query_encode_map[]; @@ -424,13 +425,33 @@ char *encode_chunk(char *start, char *stop, * is reached or NULL-byte is encountered. The result will * be stored between <start> (included) and <stop> (excluded). This * function will always try to terminate the resulting string with a '\0' - * before <stop>, and will return its position if the conversion - * completes. + * before <stop>. + * + * Return the address of the \0 character, or NULL on error */ char *escape_string(char *start, char *stop, const char escape, const long *map, const char *string, const char *string_stop); +/* Below are RFC8949 compliant cbor encode helper functions, see source + * file for functions descriptions + */ +char *cbor_encode_uint64_prefix(struct cbor_encode_ctx *ctx, + char *start, char *stop, + uint64_t value, uint8_t prefix); +char *cbor_encode_int64(struct cbor_encode_ctx *ctx, + char *start, char *stop, int64_t value); +char *cbor_encode_bytes_prefix(struct cbor_encode_ctx *ctx, + char *start, char *stop, + const char *bytes, size_t len, + uint8_t prefix); +char *cbor_encode_bytes(struct cbor_encode_ctx *ctx, + char *start, char *stop, + const char *bytes, size_t len); +char *cbor_encode_text(struct cbor_encode_ctx *ctx, + char *start, char *stop, + const char *text, size_t len); + /* Check a string for using it in a CSV output format. If the string contains * one of the following four char <">, <,>, CR or LF, the string is * encapsulated between <"> and the <"> are escaped by a <""> sequence. @@ -761,6 +782,21 @@ static inline int set_host_port(struct sockaddr_storage *addr, int port) return 0; } +/* Returns true if <addr> port is forbidden as client source using <proto>. */ +static inline int port_is_restricted(const struct sockaddr_storage *addr, + enum ha_proto proto) +{ + const uint16_t port = get_host_port(addr); + + BUG_ON_HOT(proto != HA_PROTO_TCP && proto != HA_PROTO_QUIC); + + /* RFC 6335 6. Port Number Ranges */ + if (unlikely(port < 1024 && port > 0)) + return !(global.clt_privileged_ports & proto); + + return 0; +} + /* Convert mask from bit length form to in_addr form. * This function never fails. */ @@ -1006,6 +1042,8 @@ int dump_binary(struct buffer *out, const char *buf, int bsize); int dump_text_line(struct buffer *out, const char *buf, int bsize, int len, int *line, int ptr); void dump_addr_and_bytes(struct buffer *buf, const char *pfx, const void *addr, int n); +void dump_area_with_syms(struct buffer *output, const void *base, const void *addr, + const void *special, const char *spec_type, const char *spec_name); void dump_hex(struct buffer *out, const char *pfx, const void *buf, int len, int unsafe); int may_access(const void *ptr); const void *resolve_sym_name(struct buffer *buf, const char *pfx, const void *addr); @@ -1053,7 +1091,8 @@ static inline void *my_realloc2(void *ptr, size_t size) int parse_dotted_uints(const char *s, unsigned int **nums, size_t *sz); /* PRNG */ -void ha_generate_uuid(struct buffer *output); +void ha_generate_uuid_v4(struct buffer *output); +void ha_generate_uuid_v7(struct buffer *output); void ha_random_seed(const unsigned char *seed, size_t len); void ha_random_jump96(uint32_t dist); uint64_t ha_random64(void); @@ -1176,4 +1215,8 @@ int openssl_compare_current_version(const char *version); /* compare the current OpenSSL name to a string */ int openssl_compare_current_name(const char *name); +/* vma helpers */ +void vma_set_name(void *addr, size_t size, const char *type, const char *name); +void vma_set_name_id(void *addr, size_t size, const char *type, const char *name, unsigned int id); + #endif /* _HAPROXY_TOOLS_H */ diff --git a/include/haproxy/vars.h b/include/haproxy/vars.h index ebd1f15..9fa351c 100644 --- a/include/haproxy/vars.h +++ b/include/haproxy/vars.h @@ -25,15 +25,20 @@ #include <haproxy/api-t.h> #include <haproxy/session-t.h> #include <haproxy/stream-t.h> +#include <haproxy/thread.h> #include <haproxy/vars-t.h> extern struct vars proc_vars; +struct sample; +struct arg; void vars_init_head(struct vars *vars, enum vars_scope scope); void var_accounting_diff(struct vars *vars, struct session *sess, struct stream *strm, int size); unsigned int var_clear(struct var *var, int force); void vars_prune(struct vars *vars, struct session *sess, struct stream *strm); void vars_prune_per_sess(struct vars *vars); +int var_set(uint64_t name_hash, enum vars_scope scope, struct sample *smp, uint flags); +int var_unset(uint64_t name_hash, enum vars_scope scope, struct sample *smp); int vars_get_by_name(const char *name, size_t len, struct sample *smp, const struct buffer *def); int vars_set_by_name_ifexist(const char *name, size_t len, struct sample *smp); int vars_set_by_name(const char *name, size_t len, struct sample *smp); diff --git a/include/haproxy/vecpair.h b/include/haproxy/vecpair.h new file mode 100644 index 0000000..e495706 --- /dev/null +++ b/include/haproxy/vecpair.h @@ -0,0 +1,588 @@ +/* + * include/haproxy/vecpair.h + * Vector pair handling - functions definitions. + * + * Copyright (C) 2000-2024 Willy Tarreau - w@1wt.eu + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _HAPROXY_VECPAIR_H +#define _HAPROXY_VECPAIR_H + +#include <sys/types.h> +#include <string.h> +#include <import/ist.h> +#include <haproxy/api.h> + + +/* Principles of operation + * ----------------------- + * These functions take two vectors represented as ISTs, they're each the + * pointer to and the length of a work area. Functions operate over these + * two areas as if they were a contiguous area. It is up to the caller to + * use them to designate free space or data depending on whether it wants + * to write or read to the area. This allows to easily represent a wrapping + * buffer, both for data and free space. + * + * In order to ease sequencing of operations, most of the functions below + * will: + * - always consider v1 before v2 + * - always ignore any vector whose length is zero (the pointer is ignored) + * - automatically switch from v1 to v2 upon updates, including if their + * size is zero + * - end after both v1 and v2 are depleted (len==0) + * - update the affected vectors after operation (pointer, length) so that + * they can easily be chained without adding new tests + * - return the number of bytes processed after operation. + * + * These functions do not need to know the allocated size nor any such thing, + * it's the caller's job to know that and to build the relevant vector pair. + * See the vp_{ring,data,room}_to_{ring,data,room}() functions at the end for + * this. + */ + +/* vp_isempty(): returns true if both areas are empty */ +static inline int vp_isempty(const struct ist v1, const struct ist v2) +{ + return !v1.len && !v2.len; +} + +/* vp_size(): returns the total size of the two vectors */ +static inline size_t vp_size(const struct ist v1, const struct ist v2) +{ + return v1.len + v2.len; +} + +/* _vp_head() : returns the pointer to the head (beginning) of the area, which is + * the address of the first byte of the first non-empty area. It must not be + * called with both areas empty. + */ +static inline char *_vp_head(const struct ist v1, const struct ist v2) +{ + return v1.len ? v1.ptr : v2.ptr; +} + +/* vp_head() : returns the pointer to the head (beginning) of the area, which is + * the address of the first byte of the first non-empty area. It may return + * NULL if both areas are empty. + */ +static inline char *vp_head(const struct ist v1, const struct ist v2) +{ + return v1.len ? v1.ptr : v2.len ? v2.ptr : NULL; +} + +/* _vp_addr() : return the address corresponding to applying an offset <ofs> + * after the head. It must not be called with an offset larger than the total + * area size. + */ +static inline char *_vp_addr(const struct ist v1, const struct ist v2, size_t ofs) +{ + if (ofs < v1.len) + return v1.ptr + ofs; + else { + ofs -= v1.len; + return v2.ptr + ofs; + } +} + +/* vp_addr() : return the address corresponding to applying an offset <ofs> + * after the head. It may return NULL if the length is beyond the total area + * size. + */ +static inline char *vp_addr(const struct ist v1, const struct ist v2, size_t ofs) +{ + if (ofs < v1.len) + return v1.ptr + ofs; + else { + ofs -= v1.len; + if (ofs >= v2.len) + return NULL; + return v2.ptr + ofs; + } +} + +/* vp_ofs() : return the offset corresponding to the pointer <p> within either + * v1 or v2, or a size equal to the sum of both lengths if <p> is outside both + * areas. + */ +static inline size_t vp_ofs(const struct ist v1, const struct ist v2, const char *p) +{ + if (p >= v1.ptr && p < v1.ptr + v1.len) + return p - v1.ptr; + + if (p >= v2.ptr && p < v2.ptr + v2.len) + return v1.len + (p - v2.ptr); + + return v1.len + v2.len; +} + +/* vp_next() : return the address of the next character after <p> or NULL if it + * runs out of both v1 and v2. + */ +static inline char *vp_next(const struct ist v1, const struct ist v2, const char *p) +{ + size_t ofs = vp_ofs(v1, v2, p); + + return vp_addr(v1, v2, ofs + 1); +} + +/* vp_seek_addr() : return the pointer to the byte at relative offset <seek> in + * the area(s). The caller must ensure that seek is strictly smaller than the + * total amount of bytes in the vectors. + */ +static inline char *vp_seek_addr(struct ist v1, struct ist v2, size_t seek) +{ + if (seek < v1.len) + return v1.ptr + seek; + else + return v2.ptr + seek - v1.len; +} + +/*********************************************/ +/* Functions used to modify the buffer state */ +/*********************************************/ + +/* vp_skip() : skip the requested amount of bytes from the area(s) and update + * them accordingly. If the amount to skip exceeds the total size of the two + * areas, they're emptied and the total number of emptied bytes is returned. + * It is unspecified what area pointers point to after their len is emptied. + */ +static inline size_t vp_skip(struct ist *v1, struct ist *v2, size_t skip) +{ + if (skip <= v1->len) { + v1->ptr += skip; + v1->len -= skip; + } + else { + if (skip > v1->len + v2->len) + skip = v1->len + v2->len; + + v2->ptr += skip - v1->len; + v2->len -= skip - v1->len; + v1->ptr += v1->len; + v1->len = 0; + } + return skip; +} + +/* vp_getchr() : tries to retrieve the next from the beginning of the area, and + * advance the beginning by one char on success. An int equal to the unsigned + * char is returned on success, otherwise a negative value if there is nothing + * left in the area. + */ +static inline int vp_getchr(struct ist *v1, struct ist *v2) +{ + int c = -1; + + if (v1->len) { + v1->len--; + c = (unsigned char)*(v1->ptr++); + } + else if (v2->len) { + v2->len--; + c = (unsigned char)*(v2->ptr++); + } + + return c; +} + +/* vp_getblk_ofs() : gets one full block of data at once from a pair of vectors, + * starting from offset <ofs> after the head, and for up to <len> bytes. The + * caller is responsible for ensuring that <ofs> does not exceed the total + * number of bytes available in the areas. The areas will then be updated so + * that the next head points to the first unread byte (i.e. skip <ofs> plus + * the number of bytes returned). The number of bytes copied is returned. This + * is meant to be used on concurrently accessed areas, so that a reader can + * read a known area while it is been concurrently fed and/or trimmed. Usually + * you'd prefer to use the more convenient vp_getblk() or vp_peek_ofs(). + */ +static inline size_t vp_getblk_ofs(struct ist *v1, struct ist *v2, size_t ofs, char *blk, size_t len) +{ + size_t ret = 0; + size_t block; + + BUG_ON_HOT(ofs >= v1->len + v2->len); + + vp_skip(v1, v2, ofs); + + block = v1->len; + if (block > len) + block = len; + + if (block) { + memcpy(blk + ret, v1->ptr, block); + v1->ptr += block; + v1->len -= block; + ret += block; + len -= block; + } + + block = v2->len; + if (block > len) + block = len; + + if (block) { + memcpy(blk + ret, v2->ptr, block); + v2->ptr += block; + v2->len -= block; + ret += block; + } + + return ret; +} + +/* vp_getblk() : gets one full block of data at once from a pair of vectors, + * starting from their head, and for up to <len> bytes. The areas will be + * updated so that the next head points to the first unread byte. The number + * of bytes copied is returned. This is meant to be used on concurrently + * accessed areas, so that a reader can read a known area while it is been + * concurrently fed and/or trimmed. See also vp_peek_ofs(). + */ +static inline size_t vp_getblk(struct ist *v1, struct ist *v2, char *blk, size_t len) +{ + return vp_getblk_ofs(v1, v2, 0, blk, len); +} + +/* vp_peek() : gets one full block of data at once from a pair of vectors, + * starting from offset <ofs> after the head, and for up to <len> bytes. + * The caller is responsible for ensuring that <ofs> does not exceed the + * total number of bytes available in the areas. The areas are *not* updated. + * The number of bytes copied is returned. This is meant to be used on + * concurrently accessed areas, so that a reader can read a known area while + * it is been concurrently fed and/or trimmed. See also vp_getblk(). + */ +static inline size_t vp_peek_ofs(struct ist v1, struct ist v2, size_t ofs, char *blk, size_t len) +{ + return vp_getblk_ofs(&v1, &v2, ofs, blk, len); +} + +/* vp_putchr() : tries to append char <c> at the beginning of the area, and + * advance the beginning by one char. Data are truncated if there is no room + * left. + */ +static inline void vp_putchr(struct ist *v1, struct ist *v2, char c) +{ + if (v1->len) { + v1->len--; + *(v1->ptr++) = c; + } + else if (v2->len) { + v2->len--; + *(v2->ptr++) = c; + } +} + +/* vp_putblk_ofs() : put one full block of data at once into a pair of vectors, + * starting from offset <ofs> after the head, and for exactly <len> bytes. + * The caller is responsible for ensuring that <ofs> does not exceed the total + * number of bytes available in the areas. The function will check that it is + * indeed possible to put <len> bytes after <ofs> before proceeding. If the + * areas can accept such data, they will then be updated so that the next + * head points to the first untouched byte (i.e. skip <ofs> plus the number + * of bytes sent). The number of bytes copied is returned on success, or 0 is + * returned if it cannot be copied, in which case the areas are left + * untouched. This is meant to be used on concurrently accessed areas, so that + * a reader can read a known area while it is been concurrently fed and/or + * trimmed. Usually you'd prefer to use the more convenient vp_putblk() or + * vp_poke_ofs(). + */ +static inline size_t vp_putblk_ofs(struct ist *v1, struct ist *v2, size_t ofs, const char *blk, size_t len) +{ + size_t ret = 0; + size_t block; + + BUG_ON_HOT(ofs >= v1->len + v2->len); + + if (len && ofs + len <= v1->len + v2->len) { + vp_skip(v1, v2, ofs); + + block = v1->len; + if (block > len) + block = len; + + if (block) { + memcpy(v1->ptr, blk + ret, block); + v1->ptr += block; + v1->len -= block; + ret += block; + len -= block; + } + + block = v2->len; + if (block > len) + block = len; + + if (block) { + memcpy(v2->ptr, blk + ret, block); + v2->ptr += block; + v2->len -= block; + ret += block; + } + } + return ret; +} + +/* vp_pokeblk() : puts one full block of data at once into a pair of vectors, + * starting from offset <ofs> after the head, and for exactly <len> bytes. + * The caller is responsible for ensuring that neither <ofs> nor <ofs> + <len> + * exceed the total number of bytes available in the areas. This is meant to + * be used on concurrently accessed areas, so that a reader can read a known + * area while* it is been concurrently fed and/or trimmed. The area pointers + * are left unaffected. The number of bytes copied is returned. + */ +static inline size_t vp_poke_ofs(struct ist v1, struct ist v2, size_t ofs, const char *blk, size_t len) +{ + return vp_putblk_ofs(&v1, &v2, ofs, blk, len); +} + +/* vp_putblk() : put one full block of data at once into a pair of vectors, + * starting at the head, and for exactly <len> bytes. The caller is + * responsible for ensuring that <len> does not exceed the total number of + * bytes available in the areas. This is meant to be used on concurrently + * accessed areas, so that a reader can read a known area while it is been + * concurrently fed and/or trimmed. The area pointers are updated according to + * the amount of bytes copied. The number of bytes copied is returned. + */ +static inline size_t vp_putblk(struct ist *v1, struct ist *v2, const char *blk, size_t len) +{ + vp_putblk_ofs(v1, v2, 0, blk, len); + return len; +} + +/* vp_put_varint_ofs(): encode 64-bit value <v> as a varint into a pair of + * vectors, starting at an offset after the head. The code assumes that the + * caller has checked that the encoded value fits in the areas so that there + * are no length checks inside the loop. Vectors are updated and the number of + * written bytes is returned (excluding the offset). + */ +static inline size_t vp_put_varint_ofs(struct ist *v1, struct ist *v2, size_t ofs, uint64_t v) +{ + size_t data = 0; + + BUG_ON_HOT(ofs >= v1->len + v2->len); + + vp_skip(v1, v2, ofs); + + if (v >= 0xF0) { + /* more than one byte, first write the 4 least significant + * bits, then follow with 7 bits per byte. + */ + vp_putchr(v1, v2, v | 0xF0); + v = (v - 0xF0) >> 4; + + while (1) { + data++; + if (v < 0x80) + break; + vp_putchr(v1, v2, v | 0x80); + v = (v - 0x80) >> 7; + } + } + + /* last byte */ + vp_putchr(v1, v2, v); + data++; + return data; +} + +/* vp_put_varint(): encode 64-bit value <v> as a varint into a pair of vectors, + * starting at the head. The code assumes that the caller has checked that + * the encoded value fits in the areas so that there are no length checks + * inside the loop. Vectors are updated and the number of written bytes is + * returned. + */ +static inline size_t vp_put_varint(struct ist *v1, struct ist *v2, uint64_t v) +{ + return vp_put_varint_ofs(v1, v2, 0, v); +} + +/* vp_get_varint_ofs(): try to decode a varint from a pair of vectors, starting + * at offset <ofs> after the head, into value <vptr>. Returns the number of + * bytes parsed in case of success, or 0 if there were not enough bytes, in + * which case the contents of <vptr> are not updated. Vectors are updated to + * skip the offset and the number of bytes parsed if there are enough bytes, + * otherwise the parsing area is left untouched. The code assumes the caller + * has checked that the offset is smaller than or equal to the number of bytes + * in the vectors. + */ +static inline size_t vp_get_varint_ofs(struct ist *v1, struct ist *v2, size_t ofs, uint64_t *vptr) +{ + size_t data = v1->len + v2->len; + const char *head, *wrap; + uint64_t v = 0; + int bits = 0; + size_t ret; + + BUG_ON_HOT(ofs > data); + + vp_skip(v1, v2, ofs); + + /* let's see where we start from. The wrapping area only concerns the + * end of the first area, even if it's empty it does not overlap with + * the second one so we don't care about v1 being set or not. + */ + head = v1->len ? v1->ptr : v2->ptr; + wrap = v1->ptr + v1->len; + data -= ofs; + + if (data != 0 && ((uint8_t)*head >= 0xF0)) { + v = (uint8_t)*head; + bits += 4; + while (1) { + if (++head == wrap) + head = v2->ptr; + data--; + if (!data || !(*head & 0x80)) + break; + v += (uint64_t)(uint8_t)*head << bits; + bits += 7; + } + } + + /* last byte */ + if (!data) + return 0; + + v += (uint64_t)(uint8_t)*head << bits; + *vptr = v; + data--; + + ret = v1->len + v2->len - data; + vp_skip(v1, v2, ret); + return ret; +} + +/* vp_get_varint(): try to decode a varint from a pair of vectors, starting at + * the head, into value <vptr>. Returns the number of bytes parsed in case of + * success, or 0 if there were not enough bytes, in which case the contents of + * <vptr> are not updated. Vectors are updated to skip the bytes parsed if + * there are enough bytes, otherwise they're left untouched. + */ +static inline size_t vp_get_varint(struct ist *v1, struct ist *v2, uint64_t *vptr) +{ + return vp_get_varint_ofs(v1, v2, 0, vptr); +} + +/* vp_peek_varint_ofs(): try to decode a varint from a pair of vectors, starting at + * the head, into value <vptr>. Returns the number of bytes parsed in case of + * success, or 0 if there were not enough bytes, in which case the contents of + * <vptr> are not updated. + */ +static inline size_t vp_peek_varint_ofs(struct ist v1, struct ist v2, size_t ofs, uint64_t *vptr) +{ + return vp_get_varint_ofs(&v1, &v2, ofs, vptr); +} + + +/************************************************************/ +/* ring-buffer API */ +/* This is used to manipulate rings made of (head,tail) */ +/* It creates vectors for reading (data) and writing (room) */ +/************************************************************/ + +/* build 2 vectors <v1> and <v2> corresponding to the available data in ring + * buffer of size <size>, starting at address <area>, with a head <head> and + * a tail <tail>. <v2> is non-empty only if the data wraps (i.e. tail<head). + */ +static inline void vp_ring_to_data(struct ist *v1, struct ist *v2, char *area, size_t size, size_t head, size_t tail) +{ + v1->ptr = area + head; + v1->len = ((head <= tail) ? tail : size) - head; + v2->ptr = area; + v2->len = (tail < head) ? tail : 0; +} + +/* build 2 vectors <v1> and <v2> corresponding to the available room in ring + * buffer of size <size>, starting at address <area>, with a head <head> and + * a tail <tail>. <v2> is non-empty only if the room wraps (i.e. head>tail). + */ +static inline void vp_ring_to_room(struct ist *v1, struct ist *v2, char *area, size_t size, size_t head, size_t tail) +{ + v1->ptr = area + tail; + v1->len = ((tail <= head) ? head : size) - tail; + v2->ptr = area; + v2->len = (head < tail) ? head : 0; +} + +/* Set a ring's <head> and <tail> according to the data area represented by the + * concatenation of <v1> and <v2> which must point to two adjacent areas within + * a ring buffer of <size> bytes starting at <area>. <v1>, if not empty, starts + * at the head and <v2>, if not empty, ends at the tail. If both vectors are of + * length zero, the ring is considered empty and both its head and tail will be + * reset. + */ +static inline void vp_data_to_ring(const struct ist v1, const struct ist v2, char *area, size_t size, size_t *head, size_t *tail) +{ + size_t ofs; + + if (!v1.len && !v2.len) { + *head = *tail = 0; + return; + } + + ofs = (v1.len ? v1.ptr : v2.ptr) - area; + if (ofs >= size) + ofs -= size; + *head = ofs; + + ofs = (v2.len ? v2.ptr + v2.len : v1.ptr + v1.len) - area; + if (ofs >= size) + ofs -= size; + *tail = ofs; +} + +/* Set a ring's <head> and <tail> according to the room area represented by the + * concatenation of <v1> and <v2> which must point to two adjacent areas within + * a ring buffer of <size> bytes starting at <area>. <v1>, if not empty, starts + * at the tail and <v2>, if not empty, ends at the head. If both vectors are of + * length zero, the ring is considered full and both its head and tail will be + * reset (which cannot be distinguished from empty). The caller must make sure + * not to fill a ring with this API. + */ +static inline void vp_room_to_ring(const struct ist v1, const struct ist v2, char *area, size_t size, size_t *head, size_t *tail) +{ + size_t ofs; + + if (!v1.len && !v2.len) { + *head = *tail = 0; + return; + } + + ofs = (v1.len ? v1.ptr : v2.ptr) - area; + if (ofs >= size) + ofs -= size; + *tail = ofs; + + ofs = (v2.len ? v2.ptr + v2.len : v1.ptr + v1.len) - area; + if (ofs >= size) + ofs -= size; + *head = ofs; +} + +#endif /* _HAPROXY_VECPAIR_H */ + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/include/haproxy/version.h b/include/haproxy/version.h index 651a8de..5d9c886 100644 --- a/include/haproxy/version.h +++ b/include/haproxy/version.h @@ -33,13 +33,13 @@ #ifdef CONFIG_PRODUCT_BRANCH #define PRODUCT_BRANCH CONFIG_PRODUCT_BRANCH #else -#define PRODUCT_BRANCH "2.9" +#define PRODUCT_BRANCH "3.0" #endif #ifdef CONFIG_PRODUCT_STATUS #define PRODUCT_STATUS CONFIG_PRODUCT_STATUS #else -#define PRODUCT_STATUS "Status: stable branch - will stop receiving fixes around Q1 2025." +#define PRODUCT_STATUS "Status: long-term supported branch - will stop receiving fixes around Q2 2029." #endif #ifdef CONFIG_PRODUCT_URL_BUGS diff --git a/include/haproxy/xref.h b/include/haproxy/xref.h index 42eed58..25f9d3c 100644 --- a/include/haproxy/xref.h +++ b/include/haproxy/xref.h @@ -28,6 +28,7 @@ #ifndef __HAPROXY_XREF_H__ #define __HAPROXY_XREF_H__ +#include <haproxy/api.h> #include <haproxy/xref-t.h> /* xref is used to create relation between two elements. diff --git a/include/import/ebtree.h b/include/import/ebtree.h index d6e51d5..31a9cac 100644 --- a/include/import/ebtree.h +++ b/include/import/ebtree.h @@ -250,39 +250,84 @@ #include <import/ebtree-t.h> #include <haproxy/api.h> -static inline int flsnz8_generic(unsigned int x) +/* returns clz from 7 to 0 for 0x01 to 0xFF. Returns 7 for 0 as well. */ +static inline unsigned int clz8(unsigned char c) { - int ret = 0; - if (x >> 4) { x >>= 4; ret += 4; } - return ret + ((0xFFFFAA50U >> (x << 1)) & 3) + 1; + unsigned int r = 4; + + if (c & 0xf0) { + r = 0; + c >>= 4; + } + return r + ((0x000055afU >> (c * 2)) & 0x3); } -/* Note: we never need to run fls on null keys, so we can optimize the fls - * function by removing a conditional jump. +/* FLSNZ: find last set bit for non-zero value. "Last" here means the highest + * one. It returns a value from 1 to 32 for 1<<0 to 1<<31. */ -#if defined(__i386__) || defined(__x86_64__) -/* this code is similar on 32 and 64 bit */ -static inline int flsnz(int x) + +#if (defined(__i386__) || defined(__x86_64__)) && !defined(__atom__) +/* DO NOT USE ON ATOM! The instruction is emulated and is several times slower + * than doing the math by hand. + */ +static inline unsigned int flsnz32(unsigned int x) { - int r; + unsigned int r; __asm__("bsrl %1,%0\n" : "=r" (r) : "rm" (x)); - return r+1; + return r + 1; +} +#define flsnz32(x) flsnz32(x) + +# if defined(__x86_64__) +static inline unsigned int flsnz64(unsigned long long x) +{ + unsigned long long r; + __asm__("bsrq %1,%0\n" + : "=r" (r) : "rm" (x)); + return r + 1; +} +# define flsnz64(x) flsnz64(x) +# endif + +#elif !defined(__atom__) && defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 2))) +/* gcc >= 4.2 brings __builtin_clz() and __builtin_clzl(), usable for non-x86 */ + +static inline unsigned int flsnz32(unsigned int x) +{ + return 32 - __builtin_clz(x); +} +# define flsnz32(x) flsnz32(x) + +# if defined(__SIZEOF_LONG__) && (__SIZEOF_LONG__ > 4) +static inline unsigned int flsnz64(unsigned long x) +{ + return (__SIZEOF_LONG__ * 8) - __builtin_clzl(x); } +# define flsnz64(x) flsnz64(x) +# endif -static inline int flsnz8(unsigned char x) +#endif /* end of arch-specific implementations */ + +/*** Fallback versions below ***/ + +#ifndef flsnz8 +# if defined(flsnz32) +# define flsnz8(x) flsnz32((unsigned char)x) +# else +static inline unsigned int flsnz8(unsigned int x) { - int r; - __asm__("movzbl %%al, %%eax\n" - "bsrl %%eax,%0\n" - : "=r" (r) : "a" (x)); - return r+1; + unsigned int ret = 0; + if (x >> 4) { x >>= 4; ret += 4; } + return ret + ((0xFFFFAA50U >> (x << 1)) & 3) + 1; } +# define flsnz8(x) flsnz8(x) +# endif +#endif -#else -// returns 1 to 32 for 1<<0 to 1<<31. Undefined for 0. -#define flsnz(___a) ({ \ - register int ___x, ___bits = 0; \ +#ifndef flsnz32 +# define flsnz32(___a) ({ \ + register unsigned int ___x, ___bits = 0; \ ___x = (___a); \ if (___x & 0xffff0000) { ___x &= 0xffff0000; ___bits += 16;} \ if (___x & 0xff00ff00) { ___x &= 0xff00ff00; ___bits += 8;} \ @@ -291,16 +336,10 @@ static inline int flsnz8(unsigned char x) if (___x & 0xaaaaaaaa) { ___x &= 0xaaaaaaaa; ___bits += 1;} \ ___bits + 1; \ }) - -static inline int flsnz8(unsigned int x) -{ - return flsnz8_generic(x); -} - - #endif -static inline int fls64(unsigned long long x) +#ifndef flsnz64 +static inline unsigned int flsnz64(unsigned long long x) { unsigned int h; unsigned int bits = 32; @@ -310,10 +349,21 @@ static inline int fls64(unsigned long long x) h = x; bits = 0; } - return flsnz(h) + bits; + return flsnz32(h) + bits; } +# define flsnz64(x) flsnz64(x) +#endif + +#ifndef flsnz_long +# define flsnz_long(x) ((sizeof(long) > 4) ? flsnz64(x) : flsnz32(x)) +#endif -#define fls_auto(x) ((sizeof(x) > 4) ? fls64(x) : flsnz(x)) +#ifndef flsnz +# define flsnz(x) ((sizeof(x) > 4) ? flsnz64(x) : (sizeof(x) > 1) ? flsnz32(x) : flsnz8(x)) +#endif + +#define fls64(x) flsnz64(x) +#define fls_auto(x) ((x) ? flsnz(x) : 0) /* Linux-like "container_of". It returns a pointer to the structure of type * <type> which has its member <name> stored at address <ptr>. @@ -720,9 +770,9 @@ static forceinline void __eb_delete(struct eb_node *node) * bytes. Note that parts or all of <ignore> bits may be rechecked. It is only * passed here as a hint to speed up the check. */ -static forceinline int equal_bits(const unsigned char *a, - const unsigned char *b, - int ignore, int len) +static forceinline size_t equal_bits(const unsigned char *a, + const unsigned char *b, + size_t ignore, size_t len) { for (ignore >>= 3, a += ignore, b += ignore, ignore <<= 3; ignore < len; ) { @@ -738,7 +788,7 @@ static forceinline int equal_bits(const unsigned char *a, * it as the number of identical bits. Note that low bit numbers are * assigned to high positions in the byte, as we compare them as strings. */ - ignore -= flsnz8(c); + ignore -= flsnz_long(c); break; } } @@ -786,12 +836,12 @@ static forceinline int check_bits(const unsigned char *a, * permitted. Equal strings are reported as a negative number of bits, which * indicates the end was reached. */ -static forceinline int string_equal_bits(const unsigned char *a, - const unsigned char *b, - int ignore) +static forceinline size_t string_equal_bits(const unsigned char *a, + const unsigned char *b, + size_t ignore) { - int beg; - unsigned char c; + unsigned char c, d; + size_t beg; beg = ignore >> 3; @@ -799,8 +849,6 @@ static forceinline int string_equal_bits(const unsigned char *a, * or at the first zero we encounter on either side. */ while (1) { - unsigned char d; - c = a[beg]; d = b[beg]; beg++; @@ -809,14 +857,14 @@ static forceinline int string_equal_bits(const unsigned char *a, if (c) break; if (!d) - return -1; + return (size_t)-1; } /* OK now we know that a and b differ at byte <beg>, or that both are zero. * We have to find what bit is differing and report it as the number of * identical bits. Note that low bit numbers are assigned to high positions * in the byte, as we compare them as strings. */ - return (beg << 3) - flsnz8(c); + return (beg << 3) - flsnz(c); } static forceinline int cmp_bits(const unsigned char *a, const unsigned char *b, unsigned int pos) diff --git a/include/import/ist.h b/include/import/ist.h index e4e1425..962d63b 100644 --- a/include/import/ist.h +++ b/include/import/ist.h @@ -331,6 +331,25 @@ static inline struct ist istzero(const struct ist ist, size_t size) return ret; } +/* Remove trailing newline characters if present in <ist> by reducing its + * length. Both '\n', '\r' and '\n\r' match. Return the modified ist. + */ +static inline struct ist iststrip(const struct ist ist) +{ + struct ist ret = ist; + + if (ret.len) { + if (ret.ptr[ret.len - 1] == '\n') + --ret.len; + } + if (ret.len) { + if (ret.ptr[ret.len - 1] == '\r') + --ret.len; + } + + return ret; +} + /* returns the ordinal difference between two strings : * < 0 if ist1 < ist2 * = 0 if ist1 == ist2 diff --git a/include/import/slz-tables.h b/include/import/slz-tables.h index 0b3a5b9..6e6d658 100644 --- a/include/import/slz-tables.h +++ b/include/import/slz-tables.h @@ -1,3 +1,5 @@ +#include <inttypes.h> + /* Fixed Huffman table as per RFC1951. * * Lit Value Bits Codes diff --git a/include/import/xxhash.h b/include/import/xxhash.h index a18e8c7..7c3c3fc 100644 --- a/include/import/xxhash.h +++ b/include/import/xxhash.h @@ -3387,7 +3387,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_can /* === Compiler specifics === */ -#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */ +#if ((defined(sun) || defined(__sun)) && defined(__cplusplus) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */ # define XXH_RESTRICT /* disable */ #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ # define XXH_RESTRICT restrict diff --git a/include/make/options.mk b/include/make/options.mk index 022981c..d212586 100644 --- a/include/make/options.mk +++ b/include/make/options.mk @@ -23,14 +23,15 @@ build_options = $(foreach opt,$(use_opts),$(call ignore_implicit,$(opt))) # Make a list of all known features with +/- prepended depending on their # activation status. Must be a macro so that dynamically enabled ones are # evaluated with their current status. -build_features = $(foreach opt,$(patsubst USE_%,%,$(sort $(use_opts))),$(if $(USE_$(opt)),+$(opt),-$(opt))) +build_features = $(foreach opt,$(patsubst USE_%,%,$(sort $(use_opts))),$(if $(USE_$(opt):0=),+$(opt),-$(opt))) -# This returns a list of -DUSE_* for all known USE_* that are set -opts_as_defines = $(foreach opt,$(use_opts),$(if $($(opt)),-D$(opt),)) +# This returns a list of -DUSE_* for all known USE_* that are set to anything +# neither empty nor '0'. +opts_as_defines = $(foreach opt,$(use_opts),$(if $($(opt):0=),-D$(opt),)) # Lists all enabled or disabled options without the "USE_" prefix -enabled_opts = $(foreach opt,$(patsubst USE_%,%,$(use_opts)),$(if $(USE_$(opt)),$(opt),)) -disabled_opts = $(foreach opt,$(patsubst USE_%,%,$(use_opts)),$(if $(USE_$(opt)),,$(opt))) +enabled_opts = $(foreach opt,$(patsubst USE_%,%,$(use_opts)),$(if $(USE_$(opt):0=),$(opt),)) +disabled_opts = $(foreach opt,$(patsubst USE_%,%,$(use_opts)),$(if $(USE_$(opt):0=),,$(opt))) # preset all XXX_{INC,LIB,CFLAGS,LDFLAGS,SRC} variables to empty for $1=XXX reset_opt_vars = $(foreach name,INC LIB CFLAGS LDFLAGS SRC,$(eval $(1)_$(name)=)) @@ -50,3 +51,15 @@ endef # collect all enabled USE_foo's foo_{C,LD}FLAGS into OPTIONS_{C,LD}FLAGS collect_opts_flags = $(foreach opt,$(enabled_opts),$(eval $(call collect_opt_flags,$(opt)))) + +# Check that any USE_* variable that was forced actually exist. For this we'll +# build a list of the MAKEOVERRIDES variables that start with USE_*, and keep +# the ones that do not match any of the patterns built by appending '=%' to all +# use_opts. The outstanding ones are thus unknown and each of them produces a +# warning. +warn_unknown_options = \ + $(foreach unknown, \ + $(filter-out $(foreach opt,$(use_opts),$(opt:==%)), \ + $(foreach opt,$(MAKEOVERRIDES), \ + $(strip $(filter USE_%,$(opt))))), \ + $(warning Warning: ignoring unknown build option: $(unknown))) diff --git a/include/make/verbose.mk b/include/make/verbose.mk index c37d513..6ee10a0 100644 --- a/include/make/verbose.mk +++ b/include/make/verbose.mk @@ -10,6 +10,7 @@ endif # or to themselves depending on the verbosity level. ifeq ($V,1) cmd_CC = $(CC) +cmd_CXX = $(CXX) cmd_LD = $(LD) cmd_AR = $(AR) cmd_MAKE = +$(MAKE) @@ -17,12 +18,14 @@ else ifeq (3.81,$(firstword $(sort $(MAKE_VERSION) 3.81))) # 3.81 or above cmd_CC = $(info $ CC $@) $(Q)$(CC) +cmd_CXX = $(info $ CXX $@) $(Q)$(CXX) cmd_LD = $(info $ LD $@) $(Q)$(LD) cmd_AR = $(info $ AR $@) $(Q)$(AR) cmd_MAKE = $(info $ MAKE $@) $(Q)+$(MAKE) else # 3.80 or older cmd_CC = $(Q)echo " CC $@";$(CC) +cmd_CXX = $(Q)echo " CXX $@";$(CXX) cmd_LD = $(Q)echo " LD $@";$(LD) cmd_AR = $(Q)echo " AR $@";$(AR) cmd_MAKE = $(Q)echo " MAKE $@";$(MAKE) diff --git a/reg-tests/cache/vary.vtc b/reg-tests/cache/vary.vtc index 6c8cedf..782a76c 100644 --- a/reg-tests/cache/vary.vtc +++ b/reg-tests/cache/vary.vtc @@ -91,6 +91,20 @@ server s1 { -hdr "Content-Encoding: gzip" \ -bodylen 59 + rxreq + expect req.url == "/origin-referer" + txresp -hdr "Vary: origin,referer" \ + -hdr "Cache-Control: max-age=5" \ + -hdr "Content-Encoding: gzip" \ + -bodylen 60 + + rxreq + expect req.url == "/origin-referer" + txresp -hdr "Vary: origin,referer" \ + -hdr "Cache-Control: max-age=5" \ + -hdr "Content-Encoding: gzip" \ + -bodylen 61 + # Multiple Accept-Encoding headers rxreq expect req.url == "/multiple_headers" @@ -366,6 +380,43 @@ client c1 -connect ${h1_fe_sock} { expect resp.bodylen == 59 expect resp.http.X-Cache-Hit == 1 + # Mixed Vary (origin + Referer) + txreq -url "/origin-referer" \ + -hdr "Accept-Encoding: br, gzip" \ + -hdr "Referer: referer" \ + -hdr "Origin: origin" + rxresp + expect resp.status == 200 + expect resp.bodylen == 60 + expect resp.http.X-Cache-Hit == 0 + + txreq -url "/origin-referer" \ + -hdr "Accept-Encoding: br, gzip" \ + -hdr "Referer: referer" \ + -hdr "Origin: origin" + rxresp + expect resp.status == 200 + expect resp.bodylen == 60 + expect resp.http.X-Cache-Hit == 1 + + txreq -url "/origin-referer" \ + -hdr "Accept-Encoding: br, gzip" \ + -hdr "Referer: other-referer" \ + -hdr "Origin: other-origin" + rxresp + expect resp.status == 200 + expect resp.bodylen == 61 + expect resp.http.X-Cache-Hit == 0 + + txreq -url "/origin-referer" \ + -hdr "Accept-Encoding: br, gzip" \ + -hdr "Referer: other-referer" \ + -hdr "Origin: other-origin" + rxresp + expect resp.status == 200 + expect resp.bodylen == 61 + expect resp.http.X-Cache-Hit == 1 + # Multiple Accept-encoding headers txreq -url "/multiple_headers" \ -hdr "Accept-Encoding: gzip" \ diff --git a/reg-tests/connection/h2_glitches.vtc b/reg-tests/connection/h2_glitches.vtc index 39ec4d6..4f25164 100644 --- a/reg-tests/connection/h2_glitches.vtc +++ b/reg-tests/connection/h2_glitches.vtc @@ -11,7 +11,9 @@ haproxy hap -conf { listen fe1 bind "fd@${fe1}" proto h2 - http-request return status 200 hdr x-glitches %[fc_glitches] + tcp-request session track-sc0 src + http-request return status 200 hdr x-glitches %[fc_glitches] hdr x-glitch-cnt %[sc0_glitch_cnt] hdr x-glitch-rate %[sc0_glitch_rate] + stick-table type ip size 10 store glitch_cnt,glitch_rate(1m) } -start # valid request: no glitch @@ -73,6 +75,8 @@ client c2-path -connect ${hap_fe1_sock} { rxresp expect resp.status == 200 expect resp.http.x-glitches == 1 + expect resp.http.x-glitch-cnt == 1 + expect resp.http.x-glitch-rate == 1 } -run } -run @@ -104,5 +108,7 @@ client c3-scheme -connect ${hap_fe1_sock} { rxresp expect resp.status == 200 expect resp.http.x-glitches == 0 + expect resp.http.x-glitch-cnt == 1 + expect resp.http.x-glitch-rate == 1 } -run } -run diff --git a/reg-tests/connection/http_reuse_conn_hash.vtc b/reg-tests/connection/http_reuse_conn_hash.vtc index 991e86f..d77f759 100644 --- a/reg-tests/connection/http_reuse_conn_hash.vtc +++ b/reg-tests/connection/http_reuse_conn_hash.vtc @@ -13,6 +13,16 @@ haproxy h1 -conf { bind "fd@${feS_sni}" server srv2 ${h1_feR_ssl_addr}:${h1_feR_ssl_port} ssl sni "req.hdr(x-sni)" verify none pool-low-conn 2 + # pool-conn-name + listen sender-name + bind "fd@${feS_name}" + server srv2 ${h1_feR_addr}:${h1_feR_port} pool-conn-name "req.hdr(x-name)" pool-low-conn 2 + + # sni + pool-conn-name + listen sender-sni-name + bind "fd@${feS_sni_name}" + server srv2 ${h1_feR_ssl_addr}:${h1_feR_ssl_port} ssl sni "req.hdr(x-sni)" verify none pool-conn-name "req.hdr(x-name)" pool-low-conn 2 + # set-dst # specify dst1_addr for server, which should be identical to dst2_addr # port is specified by the client in header x-dst-port @@ -29,6 +39,7 @@ haproxy h1 -conf { server srv2 ${h1_feR_proxy_addr}:${h1_feR_proxy_port} send-proxy pool-low-conn 2 listen receiver + bind "fd@${feR}" bind "fd@${feR_ssl}" ssl crt ${testdir}/common.pem bind "fd@${feR_proxy}" accept-proxy http-request return status 200 @@ -72,6 +83,62 @@ client c_sni -connect ${h1_feS_sni_sock} { expect resp.http.http_first_request == "0" } -run +client c_name -connect ${h1_feS_name_sock} { + # first request + txreq \ + -hdr "x-name: www.custom.com" + rxresp + expect resp.http.http_first_request == "1" + + # second request with same name, connection must be reused + txreq \ + -hdr "x-name: www.custom.com" + rxresp + expect resp.http.http_first_request == "0" + + # third request with a different name, a new connection must be used + txreq \ + -hdr "x-name: www.custom2.com" + rxresp + expect resp.http.http_first_request == "1" + + # fourth request, reuse name2 + txreq \ + -hdr "x-name: www.custom2.com" + rxresp + expect resp.http.http_first_request == "0" +} -run + +client c_sni_name -connect ${h1_feS_sni_name_sock} { + # first request + txreq \ + -hdr "x-sni: www.custom.com" \ + -hdr "x-name: www.custom.com" + rxresp + expect resp.http.http_first_request == "1" + + # second request with same name but different sni, connection must be reused + txreq \ + -hdr "x-sni: www.custom2.com" \ + -hdr "x-name: www.custom.com" + rxresp + expect resp.http.http_first_request == "0" + + # third request with a different name, a new connection must be used + txreq \ + -hdr "x-sni: www.custom2.com" \ + -hdr "x-name: www.custom2.com" + rxresp + expect resp.http.http_first_request == "1" + + # fourth request, reuse name2 with a new sni + txreq \ + -hdr "x-sni: www.custom3.com" \ + -hdr "x-name: www.custom2.com" + rxresp + expect resp.http.http_first_request == "0" +} -run + # http-reuse with destination address client c_dst1 -connect ${h1_feS_dst_sock} { txreq \ diff --git a/reg-tests/connection/reverse_connect_full.vtc b/reg-tests/connection/reverse_connect_full.vtc index 238831f..cc88382 100644 --- a/reg-tests/connection/reverse_connect_full.vtc +++ b/reg-tests/connection/reverse_connect_full.vtc @@ -1,7 +1,7 @@ varnishtest "Reverse connect full test" feature ignore_unknown_macro -#REQUIRE_VERSION=2.9 +feature cmd "$HAPROXY_PROGRAM -cc 'version_atleast(2.9-dev0)'" server s1 { rxreq diff --git a/reg-tests/connection/reverse_server.vtc b/reg-tests/connection/reverse_server.vtc index 50fe8ce..5cd77ca 100644 --- a/reg-tests/connection/reverse_server.vtc +++ b/reg-tests/connection/reverse_server.vtc @@ -1,7 +1,7 @@ varnishtest "Reverse server test" feature ignore_unknown_macro -#REQUIRE_VERSION=2.9 +feature cmd "$HAPROXY_PROGRAM -cc 'version_atleast(2.9-dev0)'" barrier b1 cond 2 diff --git a/reg-tests/connection/reverse_server_name.vtc b/reg-tests/connection/reverse_server_name.vtc index 0fd850f..3a24601 100644 --- a/reg-tests/connection/reverse_server_name.vtc +++ b/reg-tests/connection/reverse_server_name.vtc @@ -2,7 +2,7 @@ varnishtest "Reverse server with a name parameter test" feature cmd "$HAPROXY_PROGRAM -cc 'feature(OPENSSL)'" feature ignore_unknown_macro -#REQUIRE_VERSION=2.9 +feature cmd "$HAPROXY_PROGRAM -cc 'version_atleast(2.9-dev0)'" barrier b1 cond 2 diff --git a/reg-tests/contrib/prometheus.vtc b/reg-tests/contrib/prometheus.vtc index a481240..89d65d7 100644 --- a/reg-tests/contrib/prometheus.vtc +++ b/reg-tests/contrib/prometheus.vtc @@ -1,6 +1,6 @@ varnishtest "prometheus exporter test" -#REQUIRE_VERSION=2.4 +feature cmd "$HAPROXY_PROGRAM -cc 'version_atleast(3.0-dev0)'" #REQUIRE_SERVICES=prometheus-exporter feature ignore_unknown_macro @@ -39,9 +39,9 @@ haproxy h1 -conf { } -start client c1 -connect ${h1_stats_sock} { + # test general metrics txreq -url "/metrics" rxresp - # test general metrics expect resp.status == 200 expect resp.body ~ ".*haproxy_process.*" expect resp.body ~ ".*haproxy_frontend.*" @@ -49,8 +49,20 @@ client c1 -connect ${h1_stats_sock} { expect resp.body ~ ".*haproxy_backend.*" expect resp.body ~ ".*haproxy_server.*" expect resp.body ~ ".*haproxy_sticktable.*" + expect resp.body ~ ".*haproxy_resolver.*" + + # test well known labels presence + expect resp.body ~ ".*haproxy_process_build_info{version=\".*\"} 1.*" + expect resp.body ~ ".*haproxy_frontend_http_responses_total{proxy=\"stats\",code=\"4xx\"} 0.*" + expect resp.body ~ ".*haproxy_frontend_status{proxy=\"fe\",state=\"UP\"} 1.*" + expect resp.body ~ ".*haproxy_listener_status{proxy=\"stats\",listener=\"sock-1\",state=\"WAITING\"} 0.*" + expect resp.body ~ ".*haproxy_backend_status{proxy=\"be\",state=\"UP\"} 1.*" + expect resp.body ~ ".*haproxy_server_status{proxy=\"be\",server=\"s1\",state=\"DOWN\"} 0.*" + expect resp.body ~ ".*haproxy_server_check_status{proxy=\"be\",server=\"s2\",state=\"HANA\"} 0.*" # test expected NaN values + txreq -url "/metrics?scope=backend&scope=server" + rxresp expect resp.body ~ ".*haproxy_server_check_failures_total{proxy=\"be\",server=\"s1\"} NaN.*" expect resp.body ~ ".*haproxy_server_check_up_down_total{proxy=\"be\",server=\"s1\"} NaN.*" expect resp.body ~ ".*haproxy_server_check_failures_total{proxy=\"be\",server=\"s2\"} 0.*" @@ -72,15 +84,6 @@ client c1 -connect ${h1_stats_sock} { expect resp.body ~ ".*haproxy_server_idle_connections_limit{proxy=\"be\",server=\"s1\"} NaN.*" expect resp.body ~ ".*haproxy_server_idle_connections_limit{proxy=\"be\",server=\"s2\"} 42.*" - # test well known labels presence - expect resp.body ~ ".*haproxy_process_build_info{version=\".*\"} 1.*" - expect resp.body ~ ".*haproxy_frontend_http_responses_total{proxy=\"stats\",code=\"4xx\"} 0.*" - expect resp.body ~ ".*haproxy_frontend_status{proxy=\"fe\",state=\"UP\"} 1.*" - expect resp.body ~ ".*haproxy_listener_status{proxy=\"stats\",listener=\"sock-1\",state=\"WAITING\"} 0.*" - expect resp.body ~ ".*haproxy_backend_status{proxy=\"be\",state=\"UP\"} 1.*" - expect resp.body ~ ".*haproxy_server_status{proxy=\"be\",server=\"s1\",state=\"DOWN\"} 0.*" - expect resp.body ~ ".*haproxy_server_check_status{proxy=\"be\",server=\"s2\",state=\"HANA\"} 0.*" - # test scope txreq -url "/metrics?scope=" rxresp @@ -96,6 +99,7 @@ client c1 -connect ${h1_stats_sock} { expect resp.body !~ ".*haproxy_backend.*" expect resp.body ~ ".*haproxy_server.*" expect resp.body !~ ".*haproxy_sticktable.*" + expect resp.body !~ ".*haproxy_resolver.*" txreq -url "/metrics?scope=frontend&scope=backend" rxresp @@ -106,6 +110,7 @@ client c1 -connect ${h1_stats_sock} { expect resp.body ~ ".*haproxy_backend.*" expect resp.body !~ ".*haproxy_server.*" expect resp.body !~ ".*haproxy_sticktable.*" + expect resp.body !~ ".*haproxy_resolver.*" txreq -url "/metrics?scope" rxresp diff --git a/reg-tests/http-messaging/h1_host_normalization.vtc b/reg-tests/http-messaging/h1_host_normalization.vtc index 48174b8..909d525 100644 --- a/reg-tests/http-messaging/h1_host_normalization.vtc +++ b/reg-tests/http-messaging/h1_host_normalization.vtc @@ -175,22 +175,62 @@ syslog S1 -level info { # C32 recv - expect ~ "^.* uri: GET http:// HTTP/1.1; host: {}$" + expect ~ "^.* uri: GET http:/// HTTP/1.1; host: {}$" barrier b1 sync # C33 recv - expect ~ "^.* uri: GET https:// HTTP/1.1; host: {}$" + expect ~ "^.* uri: GET https:/// HTTP/1.1; host: {}$" barrier b1 sync # C34 recv - expect ~ "^.* uri: GET http:// HTTP/1.1; host: {}$" + expect ~ "^.* uri: GET http:/// HTTP/1.1; host: {}$" barrier b1 sync # C35 recv - expect ~ "^.* uri: GET https:// HTTP/1.1; host: {}$" + expect ~ "^.* uri: GET https:/// HTTP/1.1; host: {}$" + barrier b1 sync + + # C36 + recv + expect ~ "^.* uri: GET http://hostname/ HTTP/1.1; host: {hostname}$" + barrier b1 sync + + # C37 + recv + expect ~ "^.* uri: GET http://hostname/ HTTP/1.1; host: {hostname}$" + barrier b1 sync + + # C38 + recv + expect ~ "^.* uri: GET http://hostname/ HTTP/1.1; host: {hostname}$" + barrier b1 sync + + # C39 + recv + expect ~ "^.* uri: GET https://hostname/ HTTP/1.1; host: {hostname}$" + barrier b1 sync + + # C40 + recv + expect ~ "^.* uri: GET https://hostname/ HTTP/1.1; host: {hostname}$" + barrier b1 sync + + # C41 + recv + expect ~ "^.* uri: GET https://hostname/ HTTP/1.1; host: {hostname}$" + barrier b1 sync + + # C42 + recv + expect ~ "^.* uri: GET http://hostname:81/ HTTP/1.1; host: {hostname:81}$" + barrier b1 sync + + # C43 + recv + expect ~ "^.* uri: GET https://hostname:444/ HTTP/1.1; host: {hostname:444}$" } -start @@ -759,4 +799,108 @@ client c35 -connect ${h1_fe_sock} { expect resp.status == 200 } -run +# Wait matching on log message +barrier b1 sync + +client c36 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "http://hostname" \ + -hdr "host: hostname" + + rxresp + expect resp.status == 200 +} -run + +# Wait matching on log message +barrier b1 sync + +client c37 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "http://hostname:80" \ + -hdr "host: hostname" + + rxresp + expect resp.status == 200 +} -run + +# Wait matching on log message +barrier b1 sync + +client c38 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "http://hostname:" \ + -hdr "host: hostname" + + rxresp + expect resp.status == 200 +} -run + +# Wait matching on log message +barrier b1 sync + +client c39 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "https://hostname" \ + -hdr "host: hostname" + + rxresp + expect resp.status == 200 +} -run + +# Wait matching on log message +barrier b1 sync + +client c40 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "https://hostname:443" \ + -hdr "host: hostname" + + rxresp + expect resp.status == 200 +} -run + +# Wait matching on log message +barrier b1 sync + +client c41 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "https://hostname:" \ + -hdr "host: hostname" + + rxresp + expect resp.status == 200 +} -run + +# Wait matching on log message +barrier b1 sync + +client c42 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "http://hostname:81" \ + -hdr "host: hostname:81" + + rxresp + expect resp.status == 200 +} -run + +# Wait matching on log message +barrier b1 sync + +client c43 -connect ${h1_fe_sock} { + txreq \ + -req "GET" \ + -url "https://hostname:444" \ + -hdr "host: hostname:444" + + rxresp + expect resp.status == 200 +} -run + syslog S1 -wait diff --git a/reg-tests/http-messaging/h1_request_target_validation.vtc b/reg-tests/http-messaging/h1_request_target_validation.vtc new file mode 100644 index 0000000..63e194a --- /dev/null +++ b/reg-tests/http-messaging/h1_request_target_validation.vtc @@ -0,0 +1,111 @@ +varnishtest "HTTP request tests: H1 request target parsing" + +feature ignore_unknown_macro + +#REQUIRE_VERSION=3.0 + +haproxy h1 -conf { + global + # WT: limit false-positives causing "HTTP header incomplete" due to + # idle server connections being randomly used and randomly expiring + # under us. + tune.idle-pool.shared off + + defaults + mode http + timeout connect "${HAPROXY_TEST_TIMEOUT-5s}" + timeout client "${HAPROXY_TEST_TIMEOUT-5s}" + timeout server "${HAPROXY_TEST_TIMEOUT-5s}" + + listen li1 + bind "fd@${li1}" + http-request return status 200 +} -start + +client c1 -connect ${h1_li1_sock} { + txreq -req "OPTIONS" -url "*" + rxresp + expect resp.status == 200 + +} -run + +client c2 -connect ${h1_li1_sock} { + txreq -req "OPTIONS" -url "/" + rxresp + expect resp.status == 200 + +} -run + +client c3 -connect ${h1_li1_sock} { + txreq -req "OPTIONS" -url "http://haproxy.org" \ + -hdr "Host: haproxy.org" + rxresp + expect resp.status == 200 + +} -run + +client c4 -connect ${h1_li1_sock} { + txreq -req "OPTIONS" -url "*/test" + rxresp + expect resp.status == 400 +} -run + +client c5 -connect ${h1_li1_sock} { + txreq -req "GET" -url "*" + rxresp + expect resp.status == 400 +} -run + +client c6 -connect ${h1_li1_sock} { + txreq -req "CONNECT" -url "haproxy.org:80" \ + -hdr "Host: haproxy.org" + rxresp + expect resp.status == 200 + +} -run + +client c7 -connect ${h1_li1_sock} { + txreq -req "CONNECT" -url "haproxy.org" \ + -hdr "Host: haproxy.org" + rxresp + expect resp.status == 400 +} -run + +client c8 -connect ${h1_li1_sock} { + txreq -req "CONNECT" -url "/" + rxresp + expect resp.status == 400 +} -run + +client c9 -connect ${h1_li1_sock} { + txreq -req "CONNECT" -url "http://haproxy.org:80" \ + -hdr "Host: haproxy.org" + rxresp + expect resp.status == 400 +} -run + +client c11 -connect ${h1_li1_sock} { + txreq -req "GET" -url "/" \ + -hdr "Host: haproxy.org" + rxresp + expect resp.status == 200 +} -run + +client c12 -connect ${h1_li1_sock} { + txreq -req "GET" -url "haproxy.org:80" \ + -hdr "Host: haproxy.org" + rxresp + expect resp.status == 400 +} -run + +client c13 -connect ${h1_li1_sock} { + txreq -req "GET" -url "admin" + rxresp + expect resp.status == 400 +} -run + +client c14 -connect ${h1_li1_sock} { + txreq -req "GET" -url "admin/a/b" + rxresp + expect resp.status == 400 +} -run diff --git a/reg-tests/http-messaging/truncated.vtc b/reg-tests/http-messaging/truncated.vtc index 7579f6d..7f262d7 100644 --- a/reg-tests/http-messaging/truncated.vtc +++ b/reg-tests/http-messaging/truncated.vtc @@ -1,5 +1,4 @@ varnishtest "HTTP response size tests: H2->H1 (HTX and legacy mode)" -#REQUIRE_VERSION=1.9 feature ignore_unknown_macro diff --git a/reg-tests/http-rules/acl_cli_spaces.vtc b/reg-tests/http-rules/acl_cli_spaces.vtc index a554977..334133d 100644 --- a/reg-tests/http-rules/acl_cli_spaces.vtc +++ b/reg-tests/http-rules/acl_cli_spaces.vtc @@ -10,8 +10,6 @@ server s1 { haproxy h1 -W -S -conf { defaults mode http - log global - option httplog timeout connect "${HAPROXY_TEST_TIMEOUT-5s}" timeout client "${HAPROXY_TEST_TIMEOUT-5s}" timeout server "${HAPROXY_TEST_TIMEOUT-5s}" diff --git a/reg-tests/http-rules/http-err-fail.vtc b/reg-tests/http-rules/http-err-fail.vtc new file mode 100644 index 0000000..6d8f9ea --- /dev/null +++ b/reg-tests/http-rules/http-err-fail.vtc @@ -0,0 +1,84 @@ +varnishtest "test for http-err-codes/http-fail-codes redefinitions" + +feature cmd "$HAPROXY_PROGRAM -cc 'version_atleast(3.0-dev1)'" +feature ignore_unknown_macro + +server s2 { + rxreq + txresp -status 220 +} -start + +server s3 { + rxreq + txresp -status 300 +} -start + +server s4 { + rxreq + txresp -status 400 +} -start + +server s5 { + rxreq + txresp -status 555 +} -start + + +haproxy h1 -conf { + global + http-err-codes 220 +300-499 -300-399 # only 220, 400-499 remain + http-fail-codes -550-580 +555,599,556-566 + + defaults + mode http + timeout connect "${HAPROXY_TEST_TIMEOUT-5s}" + timeout client "${HAPROXY_TEST_TIMEOUT-5s}" + timeout server "${HAPROXY_TEST_TIMEOUT-5s}" + option socket-stats + + frontend fe + bind "fd@${fe}" + http-request track-sc0 path + http-after-response add-header x-table err=%[sc0_http_err_cnt],fail=%[sc0_http_fail_cnt] + stick-table type string size 100 store http_err_cnt,http_fail_cnt + default_backend be + + backend be + use-server s2 if { path -m beg /2 } + use-server s3 if { path -m beg /3 } + use-server s4 if { path -m beg /4 } + use-server s5 if { path -m beg /5 } + + server s2 ${s2_addr}:${s2_port} + server s3 ${s3_addr}:${s3_port} + server s4 ${s4_addr}:${s4_port} + server s5 ${s5_addr}:${s5_port} +} -start + +client c2 -connect ${h1_fe_sock} { + txreq -url "/2" + rxresp + expect resp.status == 220 + expect resp.http.x-table ~ "err=1,fail=0" +} -run + +client c3 -connect ${h1_fe_sock} { + txreq -url "/3" + rxresp + expect resp.status == 300 + expect resp.http.x-table ~ "err=0,fail=0" +} -run + +client c4 -connect ${h1_fe_sock} { + txreq -url "/4" + rxresp + expect resp.status == 400 + expect resp.http.x-table ~ "err=1,fail=0" +} -run + +client c5 -connect ${h1_fe_sock} { + txreq -url "/5" + rxresp + expect resp.status == 555 + expect resp.http.x-table ~ "err=0,fail=1" +} -run diff --git a/reg-tests/http-rules/map_ordering.map b/reg-tests/http-rules/map_ordering.map index dcd9529..dc9ac71 100644 --- a/reg-tests/http-rules/map_ordering.map +++ b/reg-tests/http-rules/map_ordering.map @@ -2,3 +2,5 @@ first.domain.tld first domain.tld domain second.domain.tld second +# This entry is used to test duplicate behavior (ie: tree-based match) +first.domain.tld first_dup diff --git a/reg-tests/http-rules/map_ordering.vtc b/reg-tests/http-rules/map_ordering.vtc index 40da465..923d19f 100644 --- a/reg-tests/http-rules/map_ordering.vtc +++ b/reg-tests/http-rules/map_ordering.vtc @@ -1,4 +1,4 @@ -varnishtest "Test list-based matching types ordering" +varnishtest "Ensure mapfile ordering is preserved when loading the file" feature cmd "$HAPROXY_PROGRAM -cc 'version_atleast(2.5-dev0)'" feature ignore_unknown_macro @@ -14,9 +14,13 @@ haproxy h1 -conf { # check list ordering using map_dom (list-based match) http-request return hdr dom %[req.hdr(Host),lower,map_dom(${testdir}/map_ordering.map)] if { url_beg /dom } + + # check tree ordering using map_str (tree-based match) and duplicated keys + http-request return hdr str %[req.hdr(Host),lower,map_str(${testdir}/map_ordering.map)] if { url_beg /str } + } -start -# Check map ordering +# Check map ordering for list-based matching types client c1 -connect ${h1_fe1_sock} { # first.domain.tld is above domain.tld so it should match first txreq -url "/dom" -hdr "Host: first.domain.tld" @@ -30,3 +34,13 @@ client c1 -connect ${h1_fe1_sock} { expect resp.status == 200 expect resp.http.dom == "domain" } -run + +# Check map ordering for tree-based matching types (check that the matching +# key is the first one seen in the file) +client c2 -connect ${h1_fe1_sock} { + # first.domain.tld is first mapped to "first" in the mapfile + txreq -url "/str" -hdr "Host: first.domain.tld" + rxresp + expect resp.status == 200 + expect resp.http.str == "first" +} -run diff --git a/reg-tests/sample_fetches/acl.vtc b/reg-tests/sample_fetches/acl.vtc new file mode 100644 index 0000000..905ae3e --- /dev/null +++ b/reg-tests/sample_fetches/acl.vtc @@ -0,0 +1,35 @@ +varnishtest "Test acl() sample fetch" +feature cmd "$HAPROXY_PROGRAM -cc 'version_atleast(2.9-dev2)'" + +feature ignore_unknown_macro + +haproxy h1 -conf { + defaults + mode http + timeout connect "${HAPROXY_TEST_TIMEOUT-5s}" + timeout client "${HAPROXY_TEST_TIMEOUT-5s}" + timeout server "${HAPROXY_TEST_TIMEOUT-5s}" + + frontend fe1 + bind "fd@${fe1}" + + acl ACL1 always_true + acl ACL2 acl(ACL1) + acl ACL3 acl(!ACL2) + acl ACL4 acl(ACL2,!ACL3) + + http-request return status 200 hdr x-acl "ACL1=%[acl(ACL1)] ACL2=%[acl(ACL2)] ACL3=%[acl(ACL3)] ACL4=%[acl(ACL4)] TRUE=%[acl(TRUE)]" + + log-format ACL1=%[acl(ACL1)] +} -start + +client c1 -connect ${h1_fe1_sock} { + txreq -req GET -url / + rxresp + expect resp.status == 200 + expect resp.http.x-acl ~ "ACL1=1" + expect resp.http.x-acl ~ "ACL2=1" + expect resp.http.x-acl ~ "ACL3=0" + expect resp.http.x-acl ~ "ACL4=1" + expect resp.http.x-acl ~ "TRUE=1" +} -run diff --git a/reg-tests/ssl/crt_store.vtc b/reg-tests/ssl/crt_store.vtc new file mode 100644 index 0000000..685183e --- /dev/null +++ b/reg-tests/ssl/crt_store.vtc @@ -0,0 +1,31 @@ +#REGTEST_TYPE=devel +varnishtest "Test the crt-store section" +feature cmd "$HAPROXY_PROGRAM -cc 'version_atleast(3.0-dev7)'" +feature cmd "$HAPROXY_PROGRAM -cc 'feature(OPENSSL)'" +feature ignore_unknown_macro + +# +# Basic check for the crt-store, ensure that loading works and that we can't +# load a crt which was used before +# + + +haproxy h1 -arg -V -conf-OK { + crt-store + load crt "${testdir}/common.crt" key "${testdir}/common.key" + + listen ssl-lst + bind "${tmpdir}/ssl.sock" ssl crt ${testdir}/common.crt strict-sni + +} + +haproxy h2 -arg -V -conf-BAD {} { + + listen ssl-lst + bind "${tmpdir}/ssl.sock" ssl crt ${testdir}/common.pem strict-sni + + crt-store + load crt "${testdir}/common.pem" key "${testdir}/common.key" + +} + diff --git a/reg-tests/ssl/ocsp_auto_update.vtc b/reg-tests/ssl/ocsp_auto_update.vtc index a1d9a3c..0193953 100644 --- a/reg-tests/ssl/ocsp_auto_update.vtc +++ b/reg-tests/ssl/ocsp_auto_update.vtc @@ -1,4 +1,5 @@ #REGTEST_TYPE=slow +# reg-test is around ~2.5s # broken with BoringSSL. @@ -14,29 +15,20 @@ # soon as possible by the update task. # # The ocsp responder used in all the tests will be an openssl using the -# certificate database in ocsp_update/index.txt. It will listen on port 12346 -# which is not the same as the one specified in the certificates' OCSP URI -# which point to port 12345. The link from port 12345 to port 12346 will be -# ensured through HAProxy instances that will enable logs, later used as a -# synchronization mean. -# -# Unfortunately some arbitrary "sleep" calls are still needed to leave some -# time for the ocsp update task to actually process the ocsp responses and -# reinsert them into the tree. This explains why the test's mode is set to -# "slow". -# -# The fourth test case focuses on the "update ssl ocsp-response" CLI command -# and tests two certificates that have a known OCSP response loaded during init -# but no OCSP auto update. The only difference between the two certificates is -# that one has a separate .issuer file while the other one has the issuer -# certificate directly in the main .pem file. +# certificate database in ocsp_update/index.txt. It will listen on port 12345 +# which was specified explicitly in the certificates used in the tests. +# The synchronization will be based on the logs emitted by the OCSP update task +# directly. When this log is created, we will know that the update was +# effective and the updated OCSP response is loaded in the tree. So any +# following call to "show ssl ocsp-response" will display the latest response +# information. # # If this test does not work anymore: # - Check that you have openssl and socat varnishtest "Test the OCSP auto update feature" -feature cmd "$HAPROXY_PROGRAM -cc 'version_atleast(2.7-dev0)'" -feature cmd "$HAPROXY_PROGRAM -cc 'feature(OPENSSL) && !ssllib_name_startswith(BoringSSL) && openssl_version_atleast(1.1.1)'" +feature cmd "$HAPROXY_PROGRAM -cc 'version_atleast(3.0-dev0)'" +feature cmd "$HAPROXY_PROGRAM -cc 'feature(OPENSSL) && !ssllib_name_startswith(BoringSSL) && !ssllib_name_startswith(LibreSSL) && openssl_version_atleast(1.1.1)'" feature cmd "command -v openssl && command -v socat" feature ignore_unknown_macro @@ -102,26 +94,23 @@ haproxy h1 -wait # This test will focus on two separate certificates that have the same OCSP uri # (http://ocsp.haproxy.com:12345) but no OCSP response loaded at build time. # The update mode is set to 'on' in the two crt-lists used. The two ocsp -# responses should then be fetched automatically after init. We use an http -# listener as a rebound on which http log is enabled towards Syslog_http. This -# ensures that two requests are sent by the ocsp auto update task and it -# enables to use a barrier to synchronize the ocsp task and the subsequent cli -# calls. Thanks to the barrier we know that when calling "show ssl -# ocsp-response" on the cli, the two answers should already have been received -# and processed. +# responses should then be fetched automatically after init. +# We rely on the OCSP logs to ensure that the two updates are over before +# calling "show ssl ocsp-response". This is done through the Syslog_ocsp +# listener and a dedicated barrier. -process p1 "openssl ocsp -index ${testdir}/ocsp_update/index.txt -rsigner ${testdir}/ocsp_update/ocsp.haproxy.com.pem -CA ${testdir}/ocsp_update/ocsp_update_rootca.crt -nrequest 2 -ndays 1 -port 12346 -timeout 5" -start +process p2 "openssl ocsp -index ${testdir}/ocsp_update/index.txt -rsigner ${testdir}/ocsp_update/ocsp.haproxy.com.pem -CA ${testdir}/ocsp_update/ocsp_update_rootca.crt -nrequest 2 -ndays 1 -port 12345 -timeout 5" -start -barrier b1 cond 2 -cyclic +barrier b2 cond 2 -cyclic -syslog Syslog_http -level info { +syslog Syslog_ocsp -level notice { recv - expect ~ "GET /MEMwQTA%2FMD0wOzAJBgUrDgMCGgUABBSKg%2BAGD6%2F3Ccp%2Bm5VSKi6BY1%2FaCgQU9lKw5DXV6pI4UVCPCtvpLYXeAHoCAhAV HTTP/1.1" + expect ~ "<OCSP-UPDATE> ${testdir}/ocsp_update/multicert_no_ocsp/server_ocsp_rsa.pem 1 \"Update successful\" 0 1" recv - expect ~ "GET /MEMwQTA%2FMD0wOzAJBgUrDgMCGgUABBSKg%2BAGD6%2F3Ccp%2Bm5VSKi6BY1%2FaCgQU9lKw5DXV6pI4UVCPCtvpLYXeAHoCAhAW HTTP/1.1" + expect ~ "<OCSP-UPDATE> ${testdir}/ocsp_update/multicert_no_ocsp/server_ocsp_ecdsa.pem 1 \"Update successful\" 0 1" - barrier b1 sync + barrier b2 sync } -start haproxy h2 -conf { @@ -130,6 +119,7 @@ haproxy h2 -conf { tune.ssl.capture-buffer-size 1 stats socket "${tmpdir}/h2/stats" level admin crt-base ${testdir}/ocsp_update + log ${Syslog_ocsp_addr}:${Syslog_ocsp_port} local0 notice notice defaults mode http @@ -146,18 +136,9 @@ haproxy h2 -conf { frontend ssl-ecdsa-fe bind "${tmpdir}/ssl3.sock" ssl crt-list ${testdir}/ocsp_update/multicert_ecdsa.crt-list ca-file ${testdir}/set_cafile_rootCA.crt verify none crt-ignore-err all http-request return status 200 - - listen http_rebound_lst - mode http - option httplog - log ${Syslog_http_addr}:${Syslog_http_port} local0 - bind "127.0.0.1:12345" - server s1 "127.0.0.1:12346" } -start -barrier b1 sync - -shell "sleep 1" +barrier b2 sync # We should have two distinct ocsp IDs known that were loaded at build time and # the responses' contents should have been filled automatically by the ocsp @@ -176,7 +157,7 @@ haproxy h2 -cli { } haproxy h2 -wait -process p1 -wait -expect-exit 0 +process p2 -wait -expect-exit 0 ################### @@ -189,15 +170,14 @@ process p1 -wait -expect-exit 0 # will not enable ocsp-update on its certificate. Only one request should then # be sent. -process p2 "openssl ocsp -index ${testdir}/ocsp_update/index.txt -rsigner ${testdir}/ocsp_update/ocsp.haproxy.com.pem -CA ${testdir}/ocsp_update/ocsp_update_rootca.crt -nrequest 1 -ndays 1 -port 12346 -timeout 5" -start +process p3 "openssl ocsp -index ${testdir}/ocsp_update/index.txt -rsigner ${testdir}/ocsp_update/ocsp.haproxy.com.pem -CA ${testdir}/ocsp_update/ocsp_update_rootca.crt -nrequest 1 -ndays 1 -port 12345 -timeout 5" -start -barrier b2 cond 2 -cyclic +barrier b3 cond 2 -cyclic -syslog Syslog_http2 -level info { +syslog Syslog_ocsp3 -level notice { recv - expect ~ "GET /MEMwQTA%2FMD0wOzAJBgUrDgMCGgUABBSKg%2BAGD6%2F3Ccp%2Bm5VSKi6BY1%2FaCgQU9lKw5DXV6pI4UVCPCtvpLYXeAHoCAhAV HTTP/1.1" - - barrier b2 sync + expect ~ "<OCSP-UPDATE> ${testdir}/ocsp_update/multicert_no_ocsp/server_ocsp_rsa.pem 1 \"Update successful\" 0 1" + barrier b3 sync } -start haproxy h3 -conf { @@ -206,6 +186,7 @@ haproxy h3 -conf { tune.ssl.capture-buffer-size 1 stats socket "${tmpdir}/h3/stats" level admin crt-base ${testdir}/ocsp_update + log ${Syslog_ocsp3_addr}:${Syslog_ocsp3_port} local0 notice notice defaults mode http @@ -222,18 +203,9 @@ haproxy h3 -conf { frontend ssl-ecdsa-fe bind "${tmpdir}/ssl5.sock" ssl crt-list ${testdir}/ocsp_update/multicert_ecdsa_no_update.crt-list ca-file ${testdir}/set_cafile_rootCA.crt verify none crt-ignore-err all http-request return status 200 - - listen http_rebound_lst - mode http - option httplog - log ${Syslog_http2_addr}:${Syslog_http2_port} local0 - bind "127.0.0.1:12345" - server s1 "127.0.0.1:12346" } -start -barrier b2 sync - -shell "sleep 1" +barrier b3 sync # We should have a single ocsp ID known that was loaded at build time and the # response should be filled @@ -248,7 +220,7 @@ haproxy h3 -cli { } haproxy h3 -wait -process p2 -wait +process p3 -wait @@ -258,8 +230,27 @@ process p2 -wait # (CLI COMMAND) # # # #################### +# This test focuses on the "update ssl ocsp-response" CLI command and tests two +# certificates that have a known OCSP response loaded during init but no OCSP +# auto update. The only difference between the two certificates is that one has +# a separate .issuer file while the other one has the issuer certificate +# directly in the main .pem file. +# We store the original "Produced At" date of the responses loaded during init +# in haproxy proc variables in order to compare them to their new value after +# the update is performed. + +process p4 "openssl ocsp -index ${testdir}/ocsp_update/index.txt -rsigner ${testdir}/ocsp_update/ocsp.haproxy.com.pem -CA ${testdir}/ocsp_update/ocsp_update_rootca.crt -nrequest 2 -ndays 1 -port 12345 -timeout 5" -start -process p3 "openssl ocsp -index ${testdir}/ocsp_update/index.txt -rsigner ${testdir}/ocsp_update/ocsp.haproxy.com.pem -CA ${testdir}/ocsp_update/ocsp_update_rootca.crt -nrequest 2 -ndays 1 -port 12346 -timeout 5" -start +barrier b4 cond 2 -cyclic + +syslog Syslog_ocsp4 -level notice { + recv + expect ~ "<OCSP-UPDATE> ${testdir}/ocsp_update/multicert/server_ocsp.pem.rsa 1 \"Update successful\" 0 1" + + recv + expect ~ "<OCSP-UPDATE> ${testdir}/ocsp_update/multicert/server_ocsp_ecdsa.pem 1 \"Update successful\" 0 1" + barrier b4 sync +} -start haproxy h4 -conf { global @@ -267,6 +258,7 @@ haproxy h4 -conf { tune.ssl.capture-buffer-size 1 stats socket "${tmpdir}/h4/stats" level admin crt-base ${testdir}/ocsp_update + log ${Syslog_ocsp4_addr}:${Syslog_ocsp4_port} local0 notice notice defaults mode http @@ -283,19 +275,12 @@ haproxy h4 -conf { frontend ssl-ecdsa-ocsp bind "${tmpdir}/ssl6.sock" ssl crt ${testdir}/ocsp_update/multicert/server_ocsp_ecdsa.pem ca-file ${testdir}/set_cafile_rootCA.crt verify none crt-ignore-err all http-request return status 200 - - listen http_rebound_lst - mode http - option httplog - bind "127.0.0.1:12345" - http-response set-var(proc.processed) int(1) - server s1 "127.0.0.1:12346" } -start # We need to "enable" the cli with a first cli call before using it only through socats haproxy h4 -cli { - send "show ssl ocsp-response" - expect ~ "Certificate ID key : 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021016" + send "show ssl cert" + expect ~ "" } # We should have two OCSP responses loaded during init @@ -307,62 +292,53 @@ shell { echo "$responses" | grep "Serial Number: 1015" } -shell { - echo "show ssl ocsp-response 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021015" | socat "${tmpdir}/h4/stats" - | grep "Cert Status: revoked" -} +haproxy h4 -cli { + send "show ssl ocsp-response 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021015" + expect ~ "Cert Status: revoked" -shell { - echo "show ssl ocsp-response 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021016" | socat "${tmpdir}/h4/stats" - | grep "Cert Status: good" + send "show ssl ocsp-response 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021016" + expect ~ "Cert Status: good" } # Update the first ocsp response (ckch_data has a non-NULL ocsp_issuer pointer) shell { # Store the current "Produced At" in order to ensure that after the update # the OCSP response actually changed - produced_at=$(echo "show ssl ocsp-response 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021015" | socat "${tmpdir}/h4/stats" - | grep "Produced At") + produced_at1=$(echo "show ssl ocsp-response 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021015" | socat "${tmpdir}/h4/stats" - | grep "Produced At" | tr -d ' ') echo "update ssl ocsp-response ${testdir}/ocsp_update/multicert/server_ocsp.pem.rsa" | socat "${tmpdir}/h4/stats" - - while ! echo "get var proc.processed" | socat "${tmpdir}/h4/stats" - | grep 'proc.processed: type=sint value=<1>' - do - echo "get var proc.processed" | socat "${tmpdir}/h4/stats" - >> /tmp/toto - sleep 0.5 - done - echo "experimental-mode on;set var proc.processed int(0)" | socat "${tmpdir}/h4/stats" - + # Update the second ocsp response (ckch_data has a NULL ocsp_issuer pointer) + # Store the current "Produced At" in order to ensure that after the update + # the OCSP response actually changed + produced_at2=$(echo "show ssl ocsp-response 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021016" | socat "${tmpdir}/h4/stats" - | grep "Produced At" | tr -d ' ') - ocsp_response=$(echo "show ssl ocsp-response 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021015" | socat "${tmpdir}/h4/stats" -) - new_produced_at=$(echo "$ocsp_response" | grep "Produced At") + echo "update ssl ocsp-response ${testdir}/ocsp_update/multicert/server_ocsp_ecdsa.pem" | socat "${tmpdir}/h4/stats" - - echo "$ocsp_response" | grep -q "Serial Number: 1015" && \ - echo "$ocsp_response" | grep -q "Cert Status: revoked" && \ - [ "$new_produced_at" != "$produced_at" ] + echo "experimental-mode on;set var proc.produced_at1 str($produced_at1)" | socat "${tmpdir}/h4/stats" - + echo "experimental-mode on;set var proc.produced_at2 str($produced_at2)" | socat "${tmpdir}/h4/stats" - } -# Update the second ocsp response (ckch_data has a NULL ocsp_issuer pointer) -shell { - # Store the current "Produced At" in order to ensure that after the update - # the OCSP response actually changed - produced_at=$(echo "show ssl ocsp-response 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021016" | socat "${tmpdir}/h4/stats" - | grep "Produced At") +barrier b4 sync - echo "update ssl ocsp-response ${testdir}/ocsp_update/multicert/server_ocsp_ecdsa.pem" | socat "${tmpdir}/h4/stats" - - while ! echo "get var proc.processed" | socat "${tmpdir}/h4/stats" - | grep 'proc.processed: type=sint value=<1>' - do - echo "get var proc.processed" | socat "${tmpdir}/h4/stats" - >> /tmp/toto - sleep 0.5 - done +shell { + produced_at1_after=$(echo "show ssl ocsp-response 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021015" | socat "${tmpdir}/h4/stats" - | grep "Produced At" | tr -d ' ') + produced_at2_after=$(echo "show ssl ocsp-response 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021016" | socat "${tmpdir}/h4/stats" - | grep "Produced At" | tr -d ' ') - echo "experimental-mode on;set var proc.processed int(0)" | socat "${tmpdir}/h4/stats" - + ocsp_response1=$(echo "show ssl ocsp-response 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021015" | socat "${tmpdir}/h4/stats" -) + ocsp_response2=$(echo "show ssl ocsp-response 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021016" | socat "${tmpdir}/h4/stats" -) - ocsp_response=$(echo "show ssl ocsp-response 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021016" | socat "${tmpdir}/h4/stats" -) - new_produced_at=$(echo "$ocsp_response" | grep "Produced At") + echo "$ocsp_response1" | grep -q "Serial Number: 1015" && \ + echo "$ocsp_response1" | grep -q "Cert Status: revoked" && \ + echo "$ocsp_response2" | grep -q "Serial Number: 1016" && \ + echo "$ocsp_response2" | grep -q "Cert Status: revoked" && \ + [ "$produced_at1_after" != "$(echo \"experimental-mode on; get var proc.produced_at1\" | socat \"${tmpdir}/h4/stats\")" ] && \ + [ "$produced_at2_after" != "$(echo \"experimental-mode on; get var proc.produced_at2\" | socat \"${tmpdir}/h4/stats\")" ] - echo "$ocsp_response" | grep -q "Serial Number: 1016" && \ - echo "$ocsp_response" | grep -q "Cert Status: revoked" && \ - [ "$new_produced_at" != "$produced_at" ] } haproxy h4 -wait -process p3 -wait +process p4 -wait #################### @@ -376,16 +352,16 @@ process p3 -wait # to the "show ssl ocsp-response" command. -process p5 "openssl ocsp -index ${testdir}/ocsp_update/index.txt -rsigner ${testdir}/ocsp_update/ocsp.haproxy.com.pem -CA ${testdir}/ocsp_update/ocsp_update_rootca.crt -nrequest 2 -ndays 1 -port 12346 -timeout 5" -start +process p5 "openssl ocsp -index ${testdir}/ocsp_update/index.txt -rsigner ${testdir}/ocsp_update/ocsp.haproxy.com.pem -CA ${testdir}/ocsp_update/ocsp_update_rootca.crt -nrequest 2 -ndays 1 -port 12345 -timeout 5" -start barrier b5 cond 2 -cyclic -syslog Syslog_http5 -level info { +syslog Syslog_ocsp5 -level notice { recv - expect ~ "GET /MEMwQTA%2FMD0wOzAJBgUrDgMCGgUABBSKg%2BAGD6%2F3Ccp%2Bm5VSKi6BY1%2FaCgQU9lKw5DXV6pI4UVCPCtvpLYXeAHoCAhAV HTTP/1.1" + expect ~ "<OCSP-UPDATE> .*/ocsp_update/multicert_no_ocsp/server_ocsp_rsa.pem 1 \"Update successful\" 0 1" recv - expect ~ "GET /MEMwQTA%2FMD0wOzAJBgUrDgMCGgUABBSKg%2BAGD6%2F3Ccp%2Bm5VSKi6BY1%2FaCgQU9lKw5DXV6pI4UVCPCtvpLYXeAHoCAhAW HTTP/1.1" + expect ~ "<OCSP-UPDATE> .*/ocsp_update/multicert_no_ocsp/server_ocsp_ecdsa.pem 1 \"Update successful\" 0 1" barrier b5 sync } -start @@ -396,6 +372,7 @@ haproxy h5 -conf { tune.ssl.capture-buffer-size 1 stats socket "${tmpdir}/h5/stats" level admin crt-base ${testdir}/ocsp_update + log ${Syslog_ocsp5_addr}:${Syslog_ocsp5_port} local0 notice notice defaults mode http @@ -412,19 +389,10 @@ haproxy h5 -conf { frontend ssl-ecdsa-fe bind "${tmpdir}/ssl8.sock" ssl crt-list ${testdir}/ocsp_update/multicert_ecdsa.crt-list ca-file ${testdir}/set_cafile_rootCA.crt verify none crt-ignore-err all http-request return status 200 - - listen http_rebound_lst - mode http - option httplog - log ${Syslog_http5_addr}:${Syslog_http5_port} local0 - bind "127.0.0.1:12345" - server s1 "127.0.0.1:12346" } -start barrier b5 sync -shell "sleep 1" - # Use "show ssl ocsp-updates" CLI command # We should have one line per OCSP response and each one of them should have been successfully updated once # The command's output follows this format: @@ -469,13 +437,13 @@ process p5 -wait # the 'ocsp-update on' option will be taken into account by the OCSP # auto update task # -process p6 "openssl ocsp -index ${testdir}/ocsp_update/index.txt -rsigner ${testdir}/ocsp_update/ocsp.haproxy.com.pem -CA ${testdir}/ocsp_update/ocsp_update_rootca.crt -nrequest 1 -ndays 1 -port 12346 -timeout 5" -start +process p6 "openssl ocsp -index ${testdir}/ocsp_update/index.txt -rsigner ${testdir}/ocsp_update/ocsp.haproxy.com.pem -CA ${testdir}/ocsp_update/ocsp_update_rootca.crt -nrequest 1 -ndays 1 -port 12345 -timeout 5" -start barrier b6 cond 2 -cyclic -syslog Syslog_http6 -level info { +syslog Syslog_ocsp6 -level notice { recv - expect ~ "GET /MEMwQTA%2FMD0wOzAJBgUrDgMCGgUABBSKg%2BAGD6%2F3Ccp%2Bm5VSKi6BY1%2FaCgQU9lKw5DXV6pI4UVCPCtvpLYXeAHoCAhAV HTTP/1.1" + expect ~ "<OCSP-UPDATE> ${testdir}/ocsp_update/multicert/server_ocsp.pem.rsa 1 \"Update successful\" 0 1" barrier b6 sync } -start @@ -486,6 +454,7 @@ haproxy h6 -conf { tune.ssl.capture-buffer-size 1 stats socket "${tmpdir}/h6/stats" level admin crt-base ${testdir} + log ${Syslog_ocsp6_addr}:${Syslog_ocsp6_port} local0 notice notice defaults mode http @@ -500,12 +469,6 @@ haproxy h6 -conf { bind "${tmpdir}/ssl9.sock" ssl crt-list ${testdir}/simple.crt-list ca-file ${testdir}/set_cafile_rootCA.crt verify none crt-ignore-err all http-request return status 200 - listen http_rebound_lst - mode http - option httplog - log ${Syslog_http6_addr}:${Syslog_http6_port} local0 - bind "127.0.0.1:12345" - server s1 "127.0.0.1:12346" } -start # We need to "enable" the cli with a first cli call before using it only through socats @@ -527,9 +490,258 @@ shell { barrier b6 sync -shell "sleep 1" - haproxy h6 -cli { send "show ssl ocsp-updates" expect ~ "303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021016 .*| 1 | 0 | 1 | Update successful" } + +haproxy h6 -wait +process p6 -wait + + +###################### +# # +# SEVENTH TEST CASE # +# # +###################### + +# Check that the global "tune.ocsp-update.mode" option works and that it +# applies to certificates added via the CLI as well. +# +process p7 "openssl ocsp -index ${testdir}/ocsp_update/index.txt -rsigner ${testdir}/ocsp_update/ocsp.haproxy.com.pem -CA ${testdir}/ocsp_update/ocsp_update_rootca.crt -nrequest 2 -ndays 1 -port 12345 -timeout 5" -start + +barrier b7 cond 2 -cyclic + +syslog Syslog_ocsp7 -level notice { + recv + expect ~ "<OCSP-UPDATE> ${testdir}/ocsp_update/multicert_no_ocsp/server_ocsp_ecdsa.pem 1 \"Update successful\" 0 1" + + barrier b7 sync + + recv + expect ~ "<OCSP-UPDATE> ${testdir}/server_ocsp_rsa.pem 1 \"Update successful\" 0 1" + + barrier b7 sync +} -start + +haproxy h7 -conf { + global + tune.ssl.default-dh-param 2048 + tune.ssl.capture-buffer-size 1 + stats socket "${tmpdir}/h7/stats" level admin + crt-base ${testdir} + ocsp-update.mode on + log ${Syslog_ocsp7_addr}:${Syslog_ocsp7_port} local0 notice notice + + defaults + mode http + option httplog + log stderr local0 debug err + option logasap + timeout connect "${HAPROXY_TEST_TIMEOUT-5s}" + timeout client "${HAPROXY_TEST_TIMEOUT-5s}" + timeout server "${HAPROXY_TEST_TIMEOUT-5s}" + + frontend ssl-fe + bind "${tmpdir}/ssl_h7.sock" ssl crt ${testdir}/ocsp_update/multicert_no_ocsp/server_ocsp_ecdsa.pem ca-file ${testdir}/set_cafile_rootCA.crt verify none crt-ignore-err all + bind "${tmpdir}/ssl_h7_2.sock" ssl crt-list ${testdir}/simple.crt-list ca-file ${testdir}/set_cafile_rootCA.crt verify none crt-ignore-err all + http-request return status 200 +} -start + +barrier b7 sync + +# Create a new certificate that has an OCSP uri and add it to the +# existing CLI with the 'ocsp-update on' command. +shell { + echo "new ssl cert ${testdir}/server_ocsp_rsa.pem" | socat "${tmpdir}/h7/stats" - + printf "set ssl cert ${testdir}/server_ocsp_rsa.pem <<\n$(cat ${testdir}/ocsp_update/multicert_no_ocsp/server_ocsp_rsa.pem)\n\n" | socat "${tmpdir}/h7/stats" - + echo "commit ssl cert ${testdir}/server_ocsp_rsa.pem" | socat "${tmpdir}/h7/stats" - + + # We should have ocsp-update enabled via the global option + printf "add ssl crt-list ${testdir}/simple.crt-list <<\n${testdir}/server_ocsp_rsa.pem foo.com\n\n" | socat "${tmpdir}/h7/stats" - +} + +barrier b7 sync + +haproxy h7 -cli { + send "show ssl ocsp-updates" + expect ~ "303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021016 | ${testdir}/ocsp_update/multicert_no_ocsp/server_ocsp_ecdsa.pem .*| 1 | 0 | 1 | Update successful" + + send "show ssl ocsp-updates" + expect ~ "303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021015 | ${testdir}/server_ocsp_rsa.pem .*| 1 | 0 | 1 | Update successful" +} + +haproxy h7 -wait +process p7 -wait + + +###################### +# # +# EIGHTH TEST CASE # +# # +###################### + +# +# Check that removing crt-list instances does not remove the OCSP responses +# from the tree but that they will not be auto updated anymore if the last +# instance is removed (via del ssl crt-list). +# + +haproxy h8 -conf { + global + tune.ssl.default-dh-param 2048 + tune.ssl.capture-buffer-size 1 + stats socket "${tmpdir}/h8/stats" level admin + crt-base ${testdir}/ocsp_update + + defaults + mode http + option httplog + log stderr local0 debug err + option logasap + timeout connect "${HAPROXY_TEST_TIMEOUT-5s}" + timeout client "${HAPROXY_TEST_TIMEOUT-5s}" + timeout server "${HAPROXY_TEST_TIMEOUT-5s}" + + frontend ssl-fe + bind "${tmpdir}/ssl-h8.sock" ssl crt-list ${testdir}/ocsp_update/multicert_both_certs.crt-list ca-file ${testdir}/set_cafile_rootCA.crt verify none crt-ignore-err all + http-request return status 200 + + listen http_rebound_lst + mode http + bind "127.0.0.1:12345" + server s1 "127.0.0.1:12346" +} -start + +# Check that the two certificates are taken into account in the auto update process +haproxy h8 -cli { + send "show ssl ocsp-updates" + expect ~ "303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021015 .*" + + send "show ssl ocsp-updates" + expect ~ "303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021016 .*" +} + +# Remove the second line from the crt-list and check that the corresponding +# ocsp response was removed from the auto update list but is still present in the +# system +haproxy h8 -cli { + send "del ssl crt-list ${testdir}/ocsp_update/multicert_both_certs.crt-list ${testdir}/ocsp_update/multicert/server_ocsp.pem.ecdsa" + expect ~ "Entry.*deleted in crtlist" + + send "show ssl ocsp-updates" + expect !~ "303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021016 .*" + + send "show ssl ocsp-response" + expect ~ "Certificate ID key : 303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021016" + + send "show ssl ocsp-response ${testdir}/ocsp_update/multicert/server_ocsp.pem.ecdsa" + expect ~ ".* Cert Status: good.*" +} + +# Add the previously removed crt-list line with auto-update enabled and check that +# the ocsp response appears in the auto update list +shell { + printf "add ssl crt-list ${testdir}/ocsp_update/multicert_both_certs.crt-list <<\nmulticert/server_ocsp.pem.ecdsa [ocsp-update on] foo.bar\n\n" | socat "${tmpdir}/h8/stats" - | grep "Inserting certificate.*in crt-list" +} + +haproxy h8 -cli { + send "show ssl ocsp-updates" + expect ~ "303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021016 .*" +} + +# Check that the auto update option consistency check work even when crt-list +# lines are added through the cli +shell { + printf "add ssl crt-list ${testdir}/ocsp_update/multicert_both_certs.crt-list <<\nmulticert/server_ocsp.pem.ecdsa foo.foo\n\n" | socat "${tmpdir}/h8/stats" - | grep "different parameter 'ocsp-update'" +} + +haproxy h8 -wait + +#################### +# # +# NINTH TEST CASE # +# # +#################### + +# +# Check that a certificate created through the CLI and which does not have ocsp +# update enabled can be updated via "update ssl ocsp-response" command. +# + +process p9 "openssl ocsp -index ${testdir}/ocsp_update/index.txt -rsigner ${testdir}/ocsp_update/ocsp.haproxy.com.pem -CA ${testdir}/ocsp_update/ocsp_update_rootca.crt -nrequest 1 -ndays 1 -port 12345 -timeout 5" -start + +barrier b9 cond 2 -cyclic + +syslog Syslog_ocsp9 -level notice { + recv + expect ~ "<OCSP-UPDATE> ${testdir}/ocsp_update/rsa.pem 1 \"Update successful\" 0 1" + + barrier b9 sync +} -start + + +haproxy h9 -conf { + global + tune.ssl.default-dh-param 2048 + tune.ssl.capture-buffer-size 1 + stats socket "${tmpdir}/h9/stats" level admin + crt-base ${testdir}/ocsp_update + log ${Syslog_ocsp9_addr}:${Syslog_ocsp9_port} local0 notice notice + + defaults + mode http + option httplog + log stderr local0 debug err + option logasap + timeout connect "${HAPROXY_TEST_TIMEOUT-5s}" + timeout client "${HAPROXY_TEST_TIMEOUT-5s}" + timeout server "${HAPROXY_TEST_TIMEOUT-5s}" + + frontend ssl-fe + bind "${tmpdir}/ssl-h9.sock" ssl crt-list ${testdir}/ocsp_update/multicert_ecdsa_no_update.crt-list ca-file ${testdir}/set_cafile_rootCA.crt verify none crt-ignore-err all + http-request return status 200 +} -start + +# We need to "enable" the cli with a first cli call before using it only through socats +haproxy h9 -cli { + send "show ssl cert" + expect ~ "" +} + +# Create a new certificate and add it in the crt-list with ocsp auto-update enabled +shell { + echo "new ssl cert ${testdir}/ocsp_update/rsa.pem" | socat "${tmpdir}/h9/stats" - + printf "set ssl cert ${testdir}/ocsp_update/rsa.pem <<\n$(cat ${testdir}/ocsp_update/multicert/server_ocsp.pem.rsa)\n\n" | socat "${tmpdir}/h9/stats" - + printf "set ssl cert ${testdir}/ocsp_update/rsa.pem.issuer <<\n$(cat ${testdir}/ocsp_update/ocsp_update_rootca.crt)\n\n" | socat "${tmpdir}/h9/stats" - + printf "set ssl cert ${testdir}/ocsp_update/rsa.pem.ocsp <<\n$(base64 -w 1000 ${testdir}/ocsp_update/multicert/server_ocsp.pem.rsa.ocsp)\n\n" | socat "${tmpdir}/h9/stats" - + echo "commit ssl cert ${testdir}/ocsp_update/rsa.pem" | socat "${tmpdir}/h9/stats" - + + printf "add ssl crt-list ${testdir}/ocsp_update/multicert_ecdsa_no_update.crt-list <<\nrsa.pem [ocsp-update off] foo.bar\n\n" | socat "${tmpdir}/h9/stats" - +} + +# Check that the line is in the crt-list +haproxy h9 -cli { + send "show ssl crt-list ${testdir}/ocsp_update/multicert_ecdsa_no_update.crt-list" + expect ~ "${testdir}/ocsp_update/rsa.pem.*ocsp-update off.*foo.bar" +} + +# Check that the new certificate is NOT in the auto update list +haproxy h9 -cli { + send "show ssl ocsp-updates" + expect !~ "303b300906052b0e03021a050004148a83e0060faff709ca7e9b95522a2e81635fda0a0414f652b0e435d5ea923851508f0adbe92d85de007a02021015.*" +} + +shell { + echo "update ssl ocsp-response ${testdir}/ocsp_update/rsa.pem" | socat "${tmpdir}/h9/stats" - +} + +barrier b9 sync + +haproxy h9 -cli { + send "show ssl ocsp-response ${testdir}/ocsp_update/rsa.pem" + expect ~ ".* Cert Status: revoked.*" +} + +haproxy h9 -wait +process p9 -wait diff --git a/reg-tests/ssl/ocsp_compat_check.vtc b/reg-tests/ssl/ocsp_compat_check.vtc new file mode 100644 index 0000000..7dbcdf9 --- /dev/null +++ b/reg-tests/ssl/ocsp_compat_check.vtc @@ -0,0 +1,401 @@ +#REGTEST_TYPE=devel + +# broken with BoringSSL. +# +# This reg-test tries loading multiple configurations that make use of the +# 'ocsp-update' crt-list option and the global 'ocsp-update.mode' +# option. It ensures that an error message is raised when the user provides an +# incoherent configuration. Any configuration in which a given certificate has +# the ocsp auto update mode set to 'on' as well as 'off' simultaneously should +# raise an ALERT type message and not start. +# The first batch of configurations should all raise errors and the second +# batch should all load properly. We do not focus on the actual auto update in +# this reg-test though so no actual proxy instance will be launched. + +varnishtest "Test the OCSP auto update feature" +feature cmd "$HAPROXY_PROGRAM -cc 'version_atleast(3.0-dev0)'" +feature cmd "$HAPROXY_PROGRAM -cc 'feature(OPENSSL) && !ssllib_name_startswith(BoringSSL) && openssl_version_atleast(1.1.1)'" +feature ignore_unknown_macro + + +############################# +# # +# WRONG CONFIGURATIONS # +# # +############################# + + +# test1 +# global_option OFF +# bind line DFLT (OFF) (first) +# crt-list ON (second) +shell { + cat << EOF > ${tmpdir}/ocsp_compat_check.list +server_ocsp_ecdsa.pem [ocsp-update on] foo.com +EOF + + cat << EOF > ${tmpdir}/ocsp_compat_check.cfg +global + crt-base ${testdir}/ocsp_update/multicert +# ocsp-update.mode on + +defaults + log stderr local0 debug err + +listen ssl-lst + bind "${tmpdir}/ssl.sock" ssl crt server_ocsp_ecdsa.pem crt-list ${tmpdir}/ocsp_compat_check.list + server s1 127.0.0.1:80 +EOF + + haproxy_output="$($HAPROXY_PROGRAM -f ${tmpdir}/ocsp_compat_check.cfg -c 2>&1)" + haproxy_ret=$? + echo "==== test 1" + echo "$haproxy_output" + echo "HAProxy return code: $haproxy_ret" + [ $haproxy_ret -ne 0 ] && echo "$haproxy_output" | grep -q "different parameter 'ocsp-update'" +} + +# test2 +# global_option ON +# bind line DFLT/ON (first) +# crt-list OFF (second) +shell { + cat << EOF > ${tmpdir}/ocsp_compat_check.list +server_ocsp_ecdsa.pem [ocsp-update off] foo.com +EOF + + cat << EOF > ${tmpdir}/ocsp_compat_check.cfg +global + crt-base ${testdir}/ocsp_update/multicert + ocsp-update.mode on + +defaults + log stderr local0 debug err + +listen ssl-lst + bind "${tmpdir}/ssl.sock" ssl crt server_ocsp_ecdsa.pem crt-list ${tmpdir}/ocsp_compat_check.list + server s1 127.0.0.1:80 +EOF + + haproxy_output="$($HAPROXY_PROGRAM -f ${tmpdir}/ocsp_compat_check.cfg -c 2>&1)" + haproxy_ret=$? + echo "==== test 2" + echo "$haproxy_output" + echo "HAProxy return code: $haproxy_ret" + [ $haproxy_ret -ne 0 ] && echo "$haproxy_output" | grep -q "different parameter 'ocsp-update'" +} + +# test3 +# global_option OFF +# bind line DFLT/OFF(first) +# crt-list ON (second) +shell { + cat << EOF > ${tmpdir}/ocsp_compat_check.list +server_ocsp_ecdsa.pem [ocsp-update on] foo.com +EOF + + cat << EOF > ${tmpdir}/ocsp_compat_check.cfg +global + crt-base ${testdir}/ocsp_update/multicert + ocsp-update.mode off + +defaults + log stderr local0 debug err + +listen ssl-lst + bind "${tmpdir}/ssl.sock" ssl crt server_ocsp_ecdsa.pem crt-list ${tmpdir}/ocsp_compat_check.list + server s1 127.0.0.1:80 +EOF + + haproxy_output="$($HAPROXY_PROGRAM -f ${tmpdir}/ocsp_compat_check.cfg -c 2>&1)" + haproxy_ret=$? + echo "==== test 3" + echo "$haproxy_output" + echo "HAProxy return code: $haproxy_ret" + [ $haproxy_ret -ne 0 ] && echo "$haproxy_output" | grep -q "different parameter 'ocsp-update'" +} + +# test4 +# global_option OFF +# bind line DFLT OFF (second) +# crt-list ON (first) +shell { + cat << EOF > ${tmpdir}/ocsp_compat_check.list +server_ocsp_ecdsa.pem [ocsp-update on] foo.com +EOF + + cat << EOF > ${tmpdir}/ocsp_compat_check.cfg +global + crt-base ${testdir}/ocsp_update/multicert +# ocsp-update.mode off + +defaults + log stderr local0 debug err + +listen ssl-lst + bind "${tmpdir}/ssl.sock" ssl crt-list ${tmpdir}/ocsp_compat_check.list + bind "${tmpdir}/ssl2.sock" ssl crt server_ocsp_ecdsa.pem + server s1 127.0.0.1:80 +EOF + + haproxy_output="$($HAPROXY_PROGRAM -f ${tmpdir}/ocsp_compat_check.cfg -c 2>&1)" + haproxy_ret=$? + echo "==== test 4" + echo "$haproxy_output" + echo "HAProxy return code: $haproxy_ret" + [ $haproxy_ret -ne 0 ] && echo "$haproxy_output" | grep -q "different parameter 'ocsp-update'" +} + +# test5 +# global_option ON +# bind line DFLT (second) +# crt-list OFF (first) +shell { + cat << EOF > ${tmpdir}/ocsp_compat_check.list +server_ocsp_ecdsa.pem [ocsp-update off] foo.com +EOF + + cat << EOF > ${tmpdir}/ocsp_compat_check.cfg +global + crt-base ${testdir}/ocsp_update/multicert + ocsp-update.mode on + +defaults + log stderr local0 debug err + +listen ssl-lst + bind "${tmpdir}/ssl.sock" ssl crt-list ${tmpdir}/ocsp_compat_check.list + bind "${tmpdir}/ssl2.sock" ssl crt server_ocsp_ecdsa.pem + server s1 127.0.0.1:80 +EOF + + haproxy_output="$($HAPROXY_PROGRAM -f ${tmpdir}/ocsp_compat_check.cfg -c 2>&1)" + haproxy_ret=$? + echo "==== test 5" + echo "$haproxy_output" + echo "HAProxy return code: $haproxy_ret" + [ $haproxy_ret -ne 0 ] && echo "$haproxy_output" | grep -q "different parameter 'ocsp-update'" +} + +# test6 +# global_option OFF +# bind line DFLT (second) +# crt-list ON (first) +shell { + cat << EOF > ${tmpdir}/ocsp_compat_check.list +server_ocsp_ecdsa.pem [ocsp-update on] foo.com +EOF + + cat << EOF > ${tmpdir}/ocsp_compat_check.cfg +global + crt-base ${testdir}/ocsp_update/multicert + ocsp-update.mode off + +defaults + log stderr local0 debug err + +listen ssl-lst + bind "${tmpdir}/ssl.sock" ssl crt-list ${tmpdir}/ocsp_compat_check.list + bind "${tmpdir}/ssl2.sock" ssl crt server_ocsp_ecdsa.pem + server s1 127.0.0.1:80 +EOF + + haproxy_output="$($HAPROXY_PROGRAM -f ${tmpdir}/ocsp_compat_check.cfg -c 2>&1)" + haproxy_ret=$? + echo "==== test 6" + echo "$haproxy_output" + echo "HAProxy return code: $haproxy_ret" + [ $haproxy_ret -ne 0 ] && echo "$haproxy_output" | grep -q "different parameter 'ocsp-update'" +} + +# test7 +# global_option DFLT +# bind line - +# crt-list ON +# crt-list DFLT +shell { + cat << EOF > ${tmpdir}/ocsp_compat_check.list +server_ocsp_ecdsa.pem [ocsp-update on] foo.com +server_ocsp_ecdsa.pem bar.com +EOF + + cat << EOF > ${tmpdir}/ocsp_compat_check.cfg +global + crt-base ${testdir}/ocsp_update/multicert +# ocsp-update.mode off + +defaults + log stderr local0 debug err + +listen ssl-lst + bind "${tmpdir}/ssl.sock" ssl crt-list ${tmpdir}/ocsp_compat_check.list + server s1 127.0.0.1:80 +EOF + + haproxy_output="$($HAPROXY_PROGRAM -f ${tmpdir}/ocsp_compat_check.cfg -c 2>&1)" + haproxy_ret=$? + echo "==== test 7" + echo "$haproxy_output" + [ $haproxy_ret -ne 0 ] && echo "$haproxy_output" | grep -q "different parameter 'ocsp-update'" +} + +# test8 +# global_option DFLT +# bind line - +# crt-list DFLT +# crt-list ON +shell { + cat << EOF > ${tmpdir}/ocsp_compat_check.list +server_ocsp_ecdsa.pem bar.com +server_ocsp_ecdsa.pem [ocsp-update on] foo.com +EOF + + cat << EOF > ${tmpdir}/ocsp_compat_check.cfg +global + crt-base ${testdir}/ocsp_update/multicert +# ocsp-update.mode off + +defaults + log stderr local0 debug err + +listen ssl-lst + bind "${tmpdir}/ssl.sock" ssl crt-list ${tmpdir}/ocsp_compat_check.list + server s1 127.0.0.1:80 +EOF + + haproxy_output="$($HAPROXY_PROGRAM -f ${tmpdir}/ocsp_compat_check.cfg -c 2>&1)" + haproxy_ret=$? + echo "==== test 8" + echo "$haproxy_output" + echo "HAProxy return code: $haproxy_ret" + [ $haproxy_ret -ne 0 ] && echo "$haproxy_output" | grep -q "different parameter 'ocsp-update'" +} + +# test9 +# global_option ON +# bind line - +# crt-list OFF +# crt-list DFLT +shell { + cat << EOF > ${tmpdir}/ocsp_compat_check.list +server_ocsp_ecdsa.pem [ocsp-update off] foo.com +server_ocsp_ecdsa.pem bar.com +EOF + + cat << EOF > ${tmpdir}/ocsp_compat_check.cfg +global + crt-base ${testdir}/ocsp_update/multicert + ocsp-update.mode on + +defaults + log stderr local0 debug err + +listen ssl-lst + bind "${tmpdir}/ssl.sock" ssl crt-list ${tmpdir}/ocsp_compat_check.list + server s1 127.0.0.1:80 +EOF + + haproxy_output="$($HAPROXY_PROGRAM -f ${tmpdir}/ocsp_compat_check.cfg -c 2>&1)" + haproxy_ret=$? + echo "==== test 9" + echo "$haproxy_output" + echo "HAProxy return code: $haproxy_ret" + [ $haproxy_ret -ne 0 ] && echo "$haproxy_output" | grep -q "different parameter 'ocsp-update'" +} + +# test10 +# global_option ON +# bind line - +# crt-list DFLT +# crt-list OFF +shell { + cat << EOF > ${tmpdir}/ocsp_compat_check.list +server_ocsp_ecdsa.pem bar.com +server_ocsp_ecdsa.pem [ocsp-update off] foo.com +EOF + + cat << EOF > ${tmpdir}/ocsp_compat_check.cfg +global + crt-base ${testdir}/ocsp_update/multicert + ocsp-update.mode on + +defaults + log stderr local0 debug err + +listen ssl-lst + bind "${tmpdir}/ssl.sock" ssl crt-list ${tmpdir}/ocsp_compat_check.list + server s1 127.0.0.1:80 +EOF + + haproxy_output="$($HAPROXY_PROGRAM -f ${tmpdir}/ocsp_compat_check.cfg -c 2>&1)" + haproxy_ret=$? + echo "==== test 10" + echo "$haproxy_output" + echo "HAProxy return code: $haproxy_ret" + [ $haproxy_ret -ne 0 ] && echo "$haproxy_output" | grep -q "different parameter 'ocsp-update'" +} + +# test11 +# global_option OFF +# bind line - +# crt-list ON +# crt-list DFLT +shell { + cat << EOF > ${tmpdir}/ocsp_compat_check.list +server_ocsp_ecdsa.pem [ocsp-update on] foo.com +server_ocsp_ecdsa.pem bar.com +EOF + + cat << EOF > ${tmpdir}/ocsp_compat_check.cfg +global + crt-base ${testdir}/ocsp_update/multicert + ocsp-update.mode off + +defaults + log stderr local0 debug err + +listen ssl-lst + bind "${tmpdir}/ssl.sock" ssl crt-list ${tmpdir}/ocsp_compat_check.list + server s1 127.0.0.1:80 +EOF + + haproxy_output="$($HAPROXY_PROGRAM -f ${tmpdir}/ocsp_compat_check.cfg -c 2>&1)" + haproxy_ret=$? + echo "==== test 11" + echo "$haproxy_output" + echo "HAProxy return code: $haproxy_ret" + [ $haproxy_ret -ne 0 ] && echo "$haproxy_output" | grep -q "different parameter 'ocsp-update'" +} + +# test12 +# global_option OFF +# bind line - +# crt-list DFLT +# crt-list ON +shell { + cat << EOF > ${tmpdir}/ocsp_compat_check.list +server_ocsp_ecdsa.pem bar.com +server_ocsp_ecdsa.pem [ocsp-update on] foo.com +EOF + + cat << EOF > ${tmpdir}/ocsp_compat_check.cfg +global + crt-base ${testdir}/ocsp_update/multicert + ocsp-update.mode off + +defaults + log stderr local0 debug err + +listen ssl-lst + bind "${tmpdir}/ssl.sock" ssl crt-list ${tmpdir}/ocsp_compat_check.list + server s1 127.0.0.1:80 +EOF + + haproxy_output="$($HAPROXY_PROGRAM -f ${tmpdir}/ocsp_compat_check.cfg -c 2>&1)" + haproxy_ret=$? + echo "==== test 12" + echo "$haproxy_output" + echo "HAProxy return code: $haproxy_ret" + [ $haproxy_ret -ne 0 ] && echo "$haproxy_output" | grep -q "different parameter 'ocsp-update'" +} + diff --git a/reg-tests/ssl/ocsp_update/multicert_both_certs.crt-list b/reg-tests/ssl/ocsp_update/multicert_both_certs.crt-list new file mode 100644 index 0000000..0ec641f --- /dev/null +++ b/reg-tests/ssl/ocsp_update/multicert_both_certs.crt-list @@ -0,0 +1,2 @@ +multicert/server_ocsp.pem.rsa [ocsp-update on ssl-min-ver TLSv1.2] * +multicert/server_ocsp.pem.ecdsa [ocsp-update on ssl-min-ver TLSv1.2] * diff --git a/reg-tests/stats/sample-stats-file b/reg-tests/stats/sample-stats-file new file mode 100644 index 0000000..4748579 --- /dev/null +++ b/reg-tests/stats/sample-stats-file @@ -0,0 +1,26 @@ +#fe guid,stot, + +// valid line +guid-fe,1024, + +// invalid non numerical value must be silently ignored +guid-fe,abc, + +// listener counters not allocated if no option socket-stats +guid-feS-0,1024 +guid-fe2S-0,1024 + +// unknown GUID must be silently ignored +guid-unknown,1024, + +// invalid GUID side must be silently ignored +guid-be,1024 + +// unknown section line must be silently ignored +#inval guid,other, +guid-foo,0,0, + +// valid lines +#be guid,unknown,stot, +guid-be,512,1024, +guid-srv,512,1024, diff --git a/reg-tests/stats/stats-file.vtc b/reg-tests/stats/stats-file.vtc new file mode 100644 index 0000000..d7c501a --- /dev/null +++ b/reg-tests/stats/stats-file.vtc @@ -0,0 +1,35 @@ +varnishtest "Preload counters via stats-file" + +feature cmd "$HAPROXY_PROGRAM -cc 'version_atleast(3.0-dev9)'" +feature ignore_unknown_macro + +haproxy h1 -conf { + global + stats-file ${testdir}/sample-stats-file + + frontend fe + guid guid-fe + bind "fd@${feS}" guid-prefix guid-feS + + frontend fe2 + guid guid-fe2 + option socket-stats + bind "fd@${fe2S}" guid-prefix guid-fe2S + + backend be + guid guid-be + server srv ${s1_addr}:${s1_port} guid guid-srv +} -start + +haproxy h1 -cli { + send "show stat fe 15 -1 typed" + expect ~ "F.*.*.*.stot.1:MCP:u64:1024" + + send "show stat fe2 15 -1 typed" + expect ~ "L.*.*.*.stot.1:MCP:u64:1024" + + send "show stat be 15 -1 typed" + expect ~ "B.*.*.*.stot.1:MCP:u64:1024" + send "show stat be 15 -1 typed" + expect ~ "S.*.*.*.stot.1:MCP:u64:1024" +} diff --git a/reg-tests/webstats/missing-stats-fields.vtc b/reg-tests/webstats/missing-stats-fields.vtc index c85855d..8b292c3 100644 --- a/reg-tests/webstats/missing-stats-fields.vtc +++ b/reg-tests/webstats/missing-stats-fields.vtc @@ -1,6 +1,6 @@ varnishtest "Verifies the absence of (null) in 'show stats' header" -# This can happen if a new ST_F_xxx enum is added without updating +# This can happen if a new ST_I_PX_xxx enum is added without updating # stats_fields[]. feature ignore_unknown_macro diff --git a/scripts/build-ssl.sh b/scripts/build-ssl.sh index 1c17775..f1a6f8a 100755 --- a/scripts/build-ssl.sh +++ b/scripts/build-ssl.sh @@ -1,8 +1,11 @@ #!/bin/sh set -eux +BUILDSSL_DESTDIR=${BUILDSSL_DESTDIR:-${HOME}/opt} +BUILDSSL_TMPDIR=${BUILDSSL_TMPDIR:-/tmp/download-cache} + download_openssl () { - if [ ! -f "download-cache/openssl-${OPENSSL_VERSION}.tar.gz" ]; then + if [ ! -f "${BUILDSSL_TMPDIR}/openssl-${OPENSSL_VERSION}.tar.gz" ]; then # # OpenSSL has different links for latest and previous releases @@ -10,10 +13,12 @@ download_openssl () { # current version as latest, if it fails, follow with previous # - wget -P download-cache/ \ + wget -P ${BUILDSSL_TMPDIR}/ \ "https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz" || \ - wget -P download-cache/ \ - "https://www.openssl.org/source/old/${OPENSSL_VERSION%[a-z]}/openssl-${OPENSSL_VERSION}.tar.gz" + wget -P ${BUILDSSL_TMPDIR}/ \ + "https://www.openssl.org/source/old/${OPENSSL_VERSION%[a-z]}/openssl-${OPENSSL_VERSION}.tar.gz" || \ + wget -P ${BUILDSSL_TMPDIR}/ \ + "https://github.com/openssl/openssl/releases/download/openssl-${OPENSSL_VERSION}/openssl-${OPENSSL_VERSION}.tar.gz" fi } @@ -21,8 +26,8 @@ download_openssl () { # while older ones require to build everything sequentially. build_openssl_linux () { ( - cd "openssl-${OPENSSL_VERSION}/" - ./config shared --prefix="${HOME}/opt" --openssldir="${HOME}/opt" --libdir=lib -DPURIFY + cd "${BUILDSSL_TMPDIR}/openssl-${OPENSSL_VERSION}/" + ./config shared --prefix="${BUILDSSL_DESTDIR}" --openssldir="${BUILDSSL_DESTDIR}" --libdir=lib -DPURIFY if [ -z "${OPENSSL_VERSION##1.*}" ]; then make all else @@ -34,16 +39,18 @@ build_openssl_linux () { build_openssl_osx () { ( - cd "openssl-${OPENSSL_VERSION}/" + cd "${BUILDSSL_TMPDIR}/openssl-${OPENSSL_VERSION}/" ./Configure darwin64-x86_64-cc shared \ - --prefix="${HOME}/opt" --openssldir="${HOME}/opt" --libdir=lib -DPURIFY + --prefix="${BUILDSSL_DESTDIR}" --openssldir="${BUILDSSL_DESTDIR}" --libdir=lib -DPURIFY make depend build_sw install_sw ) } build_openssl () { - if [ "$(cat ${HOME}/opt/.openssl-version)" != "${OPENSSL_VERSION}" ]; then - tar zxf "download-cache/openssl-${OPENSSL_VERSION}.tar.gz" + if [ "$(cat ${BUILDSSL_DESTDIR}/.openssl-version)" != "${OPENSSL_VERSION}" ]; then + + mkdir -p "${BUILDSSL_TMPDIR}/openssl-${OPENSSL_VERSION}/" + tar zxf "${BUILDSSL_TMPDIR}/openssl-${OPENSSL_VERSION}.tar.gz" -C "${BUILDSSL_TMPDIR}/openssl-${OPENSSL_VERSION}/" --strip-components=1 case `uname` in 'Darwin') build_openssl_osx @@ -51,105 +58,143 @@ build_openssl () { 'Linux') build_openssl_linux ;; + *) + echo "not yet implemented" + exit 1 + ;; esac - echo "${OPENSSL_VERSION}" > "${HOME}/opt/.openssl-version" + echo "${OPENSSL_VERSION}" > "${BUILDSSL_DESTDIR}/.openssl-version" fi } download_libressl () { - if [ ! -f "download-cache/libressl-${LIBRESSL_VERSION}.tar.gz" ]; then - wget -P download-cache/ \ + if [ ! -f "${BUILDSSL_TMPDIR}/libressl-${LIBRESSL_VERSION}.tar.gz" ]; then + wget -P ${BUILDSSL_TMPDIR}/ \ "https://cdn.openbsd.org/pub/OpenBSD/LibreSSL/libressl-${LIBRESSL_VERSION}.tar.gz" fi } build_libressl () { - if [ "$(cat ${HOME}/opt/.libressl-version)" != "${LIBRESSL_VERSION}" ]; then - tar zxf "download-cache/libressl-${LIBRESSL_VERSION}.tar.gz" + if [ "$(cat ${BUILDSSL_DESTDIR}/.libressl-version)" != "${LIBRESSL_VERSION}" ]; then + mkdir -p "${BUILDSSL_TMPDIR}/libressl-${LIBRESSL_VERSION}/" + tar zxf "${BUILDSSL_TMPDIR}/libressl-${LIBRESSL_VERSION}.tar.gz" -C "${BUILDSSL_TMPDIR}/libressl-${LIBRESSL_VERSION}/" --strip-components=1 ( - cd "libressl-${LIBRESSL_VERSION}/" - ./configure --prefix="${HOME}/opt" + cd "${BUILDSSL_TMPDIR}/libressl-${LIBRESSL_VERSION}/" + ./configure --prefix="${BUILDSSL_DESTDIR}" make all install ) - echo "${LIBRESSL_VERSION}" > "${HOME}/opt/.libressl-version" + echo "${LIBRESSL_VERSION}" > "${BUILDSSL_DESTDIR}/.libressl-version" fi } download_boringssl () { - if [ ! -d "download-cache/boringssl" ]; then - git clone --depth=1 https://boringssl.googlesource.com/boringssl download-cache/boringssl + + # travis-ci comes with go-1.11, while boringssl requires go-1.13 + eval "$(curl -sL https://raw.githubusercontent.com/travis-ci/gimme/master/gimme | GIMME_GO_VERSION=1.13 bash)" + + if [ ! -d "${BUILDSSL_TMPDIR}/boringssl" ]; then + git clone --depth=1 https://boringssl.googlesource.com/boringssl ${BUILDSSL_TMPDIR}/boringssl else ( - cd download-cache/boringssl + cd ${BUILDSSL_TMPDIR}/boringssl git pull ) fi } +build_boringssl () { + cd ${BUILDSSL_TMPDIR}/boringssl + if [ -d build ]; then rm -rf build; fi + mkdir build + cd build + cmake -GNinja -DCMAKE_BUILD_TYPE=release -DBUILD_SHARED_LIBS=1 .. + ninja + + rm -rf ${BUILDSSL_DESTDIR}/lib || exit 0 + rm -rf ${BUILDSSL_DESTDIR}/include || exit 0 + + mkdir -p ${BUILDSSL_DESTDIR}/lib + cp crypto/libcrypto.so ssl/libssl.so ${BUILDSSL_DESTDIR}/lib + + mkdir -p ${BUILDSSL_DESTDIR}/include + cp -r ../include/* ${BUILDSSL_DESTDIR}/include +} + download_aws_lc () { - if [ ! -f "download-cache/aws-lc-${AWS_LC_VERSION}.tar.gz" ]; then - mkdir -p download-cache - wget -q -O "download-cache/aws-lc-${AWS_LC_VERSION}.tar.gz" \ + if [ ! -f "${BUILDSSL_TMPDIR}/aws-lc-${AWS_LC_VERSION}.tar.gz" ]; then + mkdir -p "${BUILDSSL_TMPDIR}" + wget -q -O "${BUILDSSL_TMPDIR}/aws-lc-${AWS_LC_VERSION}.tar.gz" \ "https://github.com/aws/aws-lc/archive/refs/tags/v${AWS_LC_VERSION}.tar.gz" fi } build_aws_lc () { - if [ "$(cat ${HOME}/opt/.aws_lc-version)" != "${AWS_LC_VERSION}" ]; then - tar zxf "download-cache/aws-lc-${AWS_LC_VERSION}.tar.gz" + if [ "$(cat ${BUILDSSL_DESTDIR}/.aws_lc-version)" != "${AWS_LC_VERSION}" ]; then + mkdir -p "${BUILDSSL_TMPDIR}/aws-lc-${AWS_LC_VERSION}/" + tar zxf "${BUILDSSL_TMPDIR}/aws-lc-${AWS_LC_VERSION}.tar.gz" -C "${BUILDSSL_TMPDIR}/aws-lc-${AWS_LC_VERSION}/" --strip-components=1 ( - cd "aws-lc-${AWS_LC_VERSION}/" + cd "${BUILDSSL_TMPDIR}/aws-lc-${AWS_LC_VERSION}/" mkdir -p build cd build cmake -version cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=1 -DDISABLE_GO=1 -DDISABLE_PERL=1 \ - -DBUILD_TESTING=0 -DCMAKE_INSTALL_PREFIX=${HOME}/opt .. + -DBUILD_TESTING=0 -DCMAKE_INSTALL_PREFIX=${BUILDSSL_DESTDIR} .. make -j$(nproc) make install ) - echo "${AWS_LC_VERSION}" > "${HOME}/opt/.aws_lc-version" + echo "${AWS_LC_VERSION}" > "${BUILDSSL_DESTDIR}/.aws_lc-version" fi } download_quictls () { - if [ ! -d "download-cache/quictls" ]; then - git clone --depth=1 https://github.com/quictls/openssl download-cache/quictls + if [ ! -d "${BUILDSSL_TMPDIR}/quictls" ]; then + git clone --depth=1 https://github.com/quictls/openssl ${BUILDSSL_TMPDIR}/quictls else ( - cd download-cache/quictls + cd ${BUILDSSL_TMPDIR}/quictls git pull ) fi } +build_quictls () { + cd ${BUILDSSL_TMPDIR}/quictls + ./config shared no-tests ${QUICTLS_EXTRA_ARGS:-} --prefix="${BUILDSSL_DESTDIR}" --openssldir="${BUILDSSL_DESTDIR}" --libdir=lib -DPURIFY + make -j$(nproc) build_sw + make install_sw +} + download_wolfssl () { - if [ ! -f "download-cache/wolfssl-${WOLFSSL_VERSION}.tar.gz" ]; then - mkdir -p download-cache + if [ ! -f "${BUILDSSL_TMPDIR}/wolfssl-${WOLFSSL_VERSION}.tar.gz" ]; then + mkdir -p ${BUILDSSL_TMPDIR} if [ "${WOLFSSL_VERSION%%-*}" != "git" ]; then - wget -q -O "download-cache/wolfssl-${WOLFSSL_VERSION}.tar.gz" \ + wget -q -O "${BUILDSSL_TMPDIR}/wolfssl-${WOLFSSL_VERSION}.tar.gz" \ "https://github.com/wolfSSL/wolfssl/archive/refs/tags/v${WOLFSSL_VERSION}-stable.tar.gz" else - wget -q -O "download-cache/wolfssl-${WOLFSSL_VERSION}.tar.gz" \ + wget -q -O "${BUILDSSL_TMPDIR}/wolfssl-${WOLFSSL_VERSION}.tar.gz" \ "https://github.com/wolfSSL/wolfssl/archive/${WOLFSSL_VERSION##git-}.tar.gz" fi fi } build_wolfssl () { - if [ "$(cat ${HOME}/opt/.wolfssl-version)" != "${WOLFSSL_VERSION}" ]; then - mkdir "wolfssl-${WOLFSSL_VERSION}/" - tar zxf "download-cache/wolfssl-${WOLFSSL_VERSION}.tar.gz" -C "wolfssl-${WOLFSSL_VERSION}/" --strip-components=1 + if [ "$(cat ${BUILDSSL_DESTDIR}/.wolfssl-version)" != "${WOLFSSL_VERSION}" ]; then + mkdir -p "${BUILDSSL_TMPDIR}/wolfssl-${WOLFSSL_VERSION}/" + tar zxf "${BUILDSSL_TMPDIR}/wolfssl-${WOLFSSL_VERSION}.tar.gz" -C "${BUILDSSL_TMPDIR}/wolfssl-${WOLFSSL_VERSION}/" --strip-components=1 ( - cd "wolfssl-${WOLFSSL_VERSION}/" + cd "${BUILDSSL_TMPDIR}/wolfssl-${WOLFSSL_VERSION}/" autoreconf -i - ./configure --enable-haproxy --enable-quic --prefix="${HOME}/opt" + ./configure --enable-haproxy --enable-quic --prefix="${BUILDSSL_DESTDIR}" make -j$(nproc) make install ) - echo "${WOLFSSL_VERSION}" > "${HOME}/opt/.wolfssl-version" + echo "${WOLFSSL_VERSION}" > "${BUILDSSL_DESTDIR}/.wolfssl-version" fi } +mkdir -p "${BUILDSSL_DESTDIR}" + + if [ ! -z ${LIBRESSL_VERSION+x} ]; then download_libressl build_libressl @@ -161,28 +206,8 @@ if [ ! -z ${OPENSSL_VERSION+x} ]; then fi if [ ! -z ${BORINGSSL+x} ]; then - ( - - # travis-ci comes with go-1.11, while boringssl requires go-1.13 - eval "$(curl -sL https://raw.githubusercontent.com/travis-ci/gimme/master/gimme | GIMME_GO_VERSION=1.13 bash)" - - download_boringssl - cd download-cache/boringssl - if [ -d build ]; then rm -rf build; fi - mkdir build - cd build - cmake -GNinja -DCMAKE_BUILD_TYPE=release -DBUILD_SHARED_LIBS=1 .. - ninja - - rm -rf ${HOME}/opt/lib || exit 0 - rm -rf ${HOME}/opt/include || exit 0 - - mkdir -p ${HOME}/opt/lib - cp crypto/libcrypto.so ssl/libssl.so ${HOME}/opt/lib - - mkdir -p ${HOME}/opt/include - cp -r ../include/* ${HOME}/opt/include - ) + download_boringssl + build_boringssl fi if [ ! -z ${AWS_LC_VERSION+x} ]; then @@ -191,15 +216,8 @@ if [ ! -z ${AWS_LC_VERSION+x} ]; then fi if [ ! -z ${QUICTLS+x} ]; then - ( download_quictls - cd download-cache/quictls - - ./config shared no-tests ${QUICTLS_EXTRA_ARGS:-} --prefix="${HOME}/opt" --openssldir="${HOME}/opt" --libdir=lib -DPURIFY - make -j$(nproc) build_sw - make install_sw - - ) + build_quictls fi if [ ! -z ${WOLFSSL_VERSION+x} ]; then diff --git a/scripts/build-vtest.sh b/scripts/build-vtest.sh index 4db35d6..9ae4306 100755 --- a/scripts/build-vtest.sh +++ b/scripts/build-vtest.sh @@ -6,5 +6,25 @@ curl -fsSL https://github.com/vtest/VTest/archive/master.tar.gz -o VTest.tar.gz mkdir ../vtest tar xvf VTest.tar.gz -C ../vtest --strip-components=1 # Special flags due to: https://github.com/vtest/VTest/issues/12 -make -C ../vtest FLAGS="-O2 -s -Wall" +# Note: do not use "make -C ../vtest", otherwise MAKEFLAGS contains "w" +# and fails (see Options/Recursion in GNU Make doc, it contains the list +# of options without the leading '-'). +# MFLAGS works on BSD but misses variable definitions on GNU Make. +# Better just avoid the -C and do the cd ourselves then. + +cd ../vtest + +set +e +CPUS=${CPUS:-$(nproc 2>/dev/null)} +CPUS=${CPUS:-1} +set -e + +# +# temporarily detect Apple Silicon (it's using /opt/homebrew instead of /usr/local) +# +if test -f /opt/homebrew/include/pcre2.h; then + make -j${CPUS} FLAGS="-O2 -s -Wall" INCS="-Isrc -Ilib -I/usr/local/include -I/opt/homebrew/include -pthread" +else + make -j${CPUS} FLAGS="-O2 -s -Wall" +fi diff --git a/scripts/mk-patch-list.sh b/scripts/mk-patch-list.sh new file mode 100755 index 0000000..aa6aa6d --- /dev/null +++ b/scripts/mk-patch-list.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +die() { + [ "$#" -eq 0 ] || echo "$*" >&2 + exit 1 +} + +err() { + echo "$*" >&2 +} + +quit() { + [ "$#" -eq 0 ] || echo "$*" + exit 0 +} + +#### Main + +USAGE="Usage: ${0##*/} [-o <output_dir>] [-s <start_num>] [-b <base>] commit_id..." +OUTPUT= +BASE= +NUM= + +while [ -n "$1" -a -z "${1##-*}" ]; do + case "$1" in + -b) BASE="$2" ; shift 2 ;; + -o) OUTPUT="$2" ; shift 2 ;; + -s) NUM="$2" ; shift 2 ;; + -h|--help) quit "$USAGE" ;; + *) die "$USAGE" ;; + esac +done + +PATCHES=( "$@" ) +NUM=${NUM:-1} + +for p in ${PATCHES[@]}; do + if [ -n "$BASE" ]; then + # find the patch number from the base. + # E.g. v2.9-dev0-774-gd710dfbac + NUM=$(git describe --match "$BASE" "$p") + NUM=${NUM#"$BASE"-} + NUM=${NUM%-*} + fi + git format-patch -k -1 --start-number=$NUM ${OUTPUT:+-o $OUTPUT} "$p" + ((NUM++)) +done diff --git a/scripts/run-regtests.sh b/scripts/run-regtests.sh index 85f1341..79dd8e9 100755 --- a/scripts/run-regtests.sh +++ b/scripts/run-regtests.sh @@ -312,8 +312,9 @@ _version() { HAPROXY_PROGRAM="${HAPROXY_PROGRAM:-${PWD}/haproxy}" -HAPROXY_ARGS="${HAPROXY_ARGS--dM}" +HAPROXY_ARGS="${HAPROXY_ARGS--dM -dI}" VTEST_PROGRAM="${VTEST_PROGRAM:-vtest}" +VTEST_TIMEOUT="${VTEST_TIMEOUT:-10}" TESTDIR="${TMPDIR:-/tmp}" REGTESTS="" LINEFEED=" @@ -344,16 +345,16 @@ if [ $preparefailed ]; then fi { read HAPROXY_VERSION; read TARGET; read FEATURES; read SERVICES; } << EOF -$($HAPROXY_PROGRAM $HAPROXY_ARGS -vv | grep 'HA-\?Proxy version\|TARGET.*=\|^Feature\|^Available services' | sed 's/.* [:=] //') +$($HAPROXY_PROGRAM $HAPROXY_ARGS -vv | grep -E 'HA-?Proxy version|TARGET.*=|^Feature|^Available services' | sed 's/.* [:=] //') EOF HAPROXY_VERSION=$(echo $HAPROXY_VERSION | cut -d " " -f 3) echo "Testing with haproxy version: $HAPROXY_VERSION" -PROJECT_VERSION=$(${MAKE:-make} version 2>&1 | grep '^VERSION:\|^SUBVERS:'|cut -f2 -d' '|tr -d '\012') +PROJECT_VERSION=$(${MAKE:-make} version 2>&1 | grep -E '^VERSION:|^SUBVERS:'|cut -f2 -d' '|tr -d '\012') if [ -z "${PROJECT_VERSION}${MAKE}" ]; then # try again with gmake, just in case - PROJECT_VERSION=$(gmake version 2>&1 | grep '^VERSION:\|^SUBVERS:'|cut -f2 -d' '|tr -d '\012') + PROJECT_VERSION=$(gmake version 2>&1 | grep -E '^VERSION:|^SUBVERS:'|cut -f2 -d' '|tr -d '\012') fi FEATURES_PATTERN=" $FEATURES " @@ -396,7 +397,7 @@ if [ -n "$testlist" ]; then if [ -n "$jobcount" ]; then jobcount="-j $jobcount" fi - cmd="$VTEST_PROGRAM -b $((2<<20)) -k -t 10 $keep_logs $verbose $debug $jobcount $vtestparams $testlist" + cmd="$VTEST_PROGRAM -b $((2<<20)) -k -t ${VTEST_TIMEOUT} $keep_logs $verbose $debug $jobcount $vtestparams $testlist" eval $cmd _vtresult=$? else @@ -546,6 +546,25 @@ struct acl_expr *parse_acl_expr(const char **args, char **err, struct arg_list * */ if (!pat_ref_add(ref, arg, NULL, err)) goto out_free_expr; + + if (global.mode & MODE_DIAG) { + if (strcmp(arg, "&&") == 0 || strcmp(arg, "and") == 0 || + strcmp(arg, "||") == 0 || strcmp(arg, "or") == 0) + ha_diag_warning("parsing [%s:%d] : pattern '%s' looks like a failed attempt at using an operator inside a pattern list\n", file, line, arg); + else if (strcmp(arg, "#") == 0 || strcmp(arg, "//") == 0) + ha_diag_warning("parsing [%s:%d] : pattern '%s' looks like a failed attempt at commenting an end of line\n", file, line, arg); + else if (find_acl_kw(arg)) + ha_diag_warning("parsing [%s:%d] : pattern '%s' suspiciously looks like a known acl keyword\n", file, line, arg); + else { + const char *begw = arg, *endw; + + for (endw = begw; is_idchar(*endw); endw++) + ; + + if (endw != begw && find_sample_fetch(begw, endw - begw)) + ha_diag_warning("parsing [%s:%d] : pattern '%s' suspiciously looks like a known sample fetch keyword\n", file, line, arg); + } + } args++; } @@ -1331,7 +1350,11 @@ int smp_fetch_acl_parse(struct arg *args, char **err_msg) name++; } - if (!(acl_sample->terms[i].acl = find_acl_by_name(name, &curproxy->acl))) { + + if ( + !(acl_sample->terms[i].acl = find_acl_by_name(name, &curproxy->acl)) && + !(acl_sample->terms[i].acl = find_acl_default(name, &curproxy->acl, err_msg, NULL, NULL, 0)) + ) { memprintf(err_msg, "ACL '%s' not found", name); goto err; } diff --git a/src/activity.c b/src/activity.c index 07a30e6..5417deb 100644 --- a/src/activity.c +++ b/src/activity.c @@ -647,17 +647,12 @@ static int cli_io_handler_show_profiling(struct appctx *appctx) unsigned long long tot_alloc_calls, tot_free_calls; unsigned long long tot_alloc_bytes, tot_free_bytes; #endif - struct stconn *sc = appctx_sc(appctx); struct buffer *name_buffer = get_trash_chunk(); const struct ha_caller *caller; const char *str; int max_lines; int i, j, max; - /* FIXME: Don't watch the other side ! */ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - chunk_reset(&trash); switch (profiling & HA_PROF_TASKS_MASK) { @@ -808,8 +803,14 @@ static int cli_io_handler_show_profiling(struct appctx *appctx) else chunk_appendf(&trash, "[other]"); - chunk_appendf(&trash," %s(%lld)", memprof_methods[entry->method], - (long long)(entry->alloc_tot - entry->free_tot) / (long long)(entry->alloc_calls + entry->free_calls)); + if ((tmp_memstats[i].method != MEMPROF_METH_P_ALLOC) && + (tmp_memstats[i].method != MEMPROF_METH_MALLOC) && + (tmp_memstats[i].method != MEMPROF_METH_CALLOC)) { + chunk_appendf(&trash," %s(%lld)", memprof_methods[entry->method], + (long long)(entry->alloc_tot - entry->free_tot) / (long long)(entry->alloc_calls + entry->free_calls)); + } else + chunk_appendf(&trash," %s(%lld)", memprof_methods[entry->method], + (long long)(entry->alloc_tot) / (long long)(entry->alloc_calls)); if (entry->alloc_tot && entry->free_tot) { /* that's a realloc, show the total diff to help spot leaks */ @@ -834,9 +835,13 @@ static int cli_io_handler_show_profiling(struct appctx *appctx) tot_alloc_calls = tot_free_calls = tot_alloc_bytes = tot_free_bytes = 0; for (i = 0; i < max_lines; i++) { tot_alloc_calls += tmp_memstats[i].alloc_calls; - tot_free_calls += tmp_memstats[i].free_calls; tot_alloc_bytes += tmp_memstats[i].alloc_tot; - tot_free_bytes += tmp_memstats[i].free_tot; + if ((tmp_memstats[i].method != MEMPROF_METH_P_ALLOC) && + (tmp_memstats[i].method != MEMPROF_METH_MALLOC) && + (tmp_memstats[i].method != MEMPROF_METH_CALLOC)) { + tot_free_calls += tmp_memstats[i].free_calls; + tot_free_bytes += tmp_memstats[i].free_tot; + } } chunk_appendf(&trash, @@ -911,7 +916,6 @@ static int cli_parse_show_profiling(char **args, char *payload, struct appctx *a static int cli_io_handler_show_tasks(struct appctx *appctx) { struct sched_activity tmp_activity[SCHED_ACT_HASH_BUCKETS] __attribute__((aligned(64))); - struct stconn *sc = appctx_sc(appctx); struct buffer *name_buffer = get_trash_chunk(); struct sched_activity *entry; const struct tasklet *tl; @@ -922,10 +926,6 @@ static int cli_io_handler_show_tasks(struct appctx *appctx) int thr, queue; int i, max; - /* FIXME: Don't watch the other side ! */ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - /* It's not possible to scan queues in small chunks and yield in the * middle of the dump and come back again. So what we're doing instead * is to freeze all threads and inspect their queues at once as fast as @@ -1057,17 +1057,12 @@ static int cli_io_handler_show_tasks(struct appctx *appctx) */ static int cli_io_handler_show_activity(struct appctx *appctx) { - struct stconn *sc = appctx_sc(appctx); struct show_activity_ctx *actctx = appctx->svcctx; int tgt = actctx->thr; // target thread, -1 for all, 0 for total only uint up_sec, up_usec; int base_line; ullong up; - /* FIXME: Don't watch the other side ! */ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - /* this macro is used below to dump values. The thread number is "thr", * and runs from 0 to nbt-1 when values are printed using the formula. * We normally try to dmup integral lines in order to keep counters diff --git a/src/applet.c b/src/applet.c index b695a9f..c528963 100644 --- a/src/applet.c +++ b/src/applet.c @@ -15,13 +15,17 @@ #include <haproxy/api.h> #include <haproxy/applet.h> +#include <haproxy/cfgparse.h> #include <haproxy/channel.h> +#include <haproxy/htx.h> #include <haproxy/list.h> #include <haproxy/sc_strm.h> #include <haproxy/stconn.h> #include <haproxy/stream.h> #include <haproxy/task.h> #include <haproxy/trace.h> +#include <haproxy/vecpair.h> +#include <haproxy/xref.h> unsigned int nb_applets = 0; @@ -50,6 +54,14 @@ static const struct trace_event applet_trace_events[] = { { .mask = APPLET_EV_ERR, .name = "app_err", .desc = "error on appctx" }, #define APPLET_EV_START (1ULL << 5) { .mask = APPLET_EV_START, .name = "app_start", .desc = "start appctx" }, +#define APPLET_EV_RECV (1ULL << 6) + { .mask = APPLET_EV_START, .name = "app_receive", .desc = "RX on appctx" }, +#define APPLET_EV_SEND (1ULL << 7) + { .mask = APPLET_EV_START, .name = "app_send", .desc = "TX on appctx" }, +#define APPLET_EV_BLK (1ULL << 8) + { .mask = APPLET_EV_START, .name = "app_blk", .desc = "appctx blocked" }, +#define APPLET_EV_WAKE (1ULL << 9) + { .mask = APPLET_EV_START, .name = "app_wake", .desc = "appctx woken up" }, {} }; @@ -129,9 +141,9 @@ static void applet_trace(enum trace_level level, uint64_t mask, const struct tra if (src->verbosity == STRM_VERB_CLEAN) return; - chunk_appendf(&trace_buf, " appctx=%p .t=%p .t.exp=%d .state=%d .st0=%d .st1=%d", + chunk_appendf(&trace_buf, " appctx=%p .t=%p .t.exp=%d .flags=0x%x .st0=%d .st1=%d to_fwd=%lu", appctx, appctx->t, tick_isset(appctx->t->expire) ? TICKS_TO_MS(appctx->t->expire - now_ms) : TICK_ETERNITY, - appctx->state, appctx->st0, appctx->st1); + appctx->flags, appctx->st0, appctx->st1, (ulong)appctx->to_forward); if (!sc || src->verbosity == STRM_VERB_MINIMAL) return; @@ -167,21 +179,41 @@ static void applet_trace(enum trace_level level, uint64_t mask, const struct tra (src->verbosity == STRM_VERB_ADVANCED && src->level < TRACE_LEVEL_DATA)) return; - /* channels' buffer info */ - if (s->flags & SF_HTX) { - struct htx *ichtx = htxbuf(&ic->buf); - struct htx *ochtx = htxbuf(&oc->buf); + if (appctx->t->process == task_run_applet) { + /* channels' buffer info */ + if (s->flags & SF_HTX) { + struct htx *ichtx = htxbuf(&ic->buf); + struct htx *ochtx = htxbuf(&oc->buf); - chunk_appendf(&trace_buf, " htx=(%u/%u#%u, %u/%u#%u)", - ichtx->data, ichtx->size, htx_nbblks(ichtx), - ochtx->data, ochtx->size, htx_nbblks(ochtx)); + chunk_appendf(&trace_buf, " htx=(%u/%u#%u, %u/%u#%u)", + ichtx->data, ichtx->size, htx_nbblks(ichtx), + ochtx->data, ochtx->size, htx_nbblks(ochtx)); + } + else { + chunk_appendf(&trace_buf, " buf=(%u@%p+%u/%u, %u@%p+%u/%u)", + (unsigned int)b_data(&ic->buf), b_orig(&ic->buf), + (unsigned int)b_head_ofs(&ic->buf), (unsigned int)b_size(&ic->buf), + (unsigned int)b_data(&oc->buf), b_orig(&oc->buf), + (unsigned int)b_head_ofs(&oc->buf), (unsigned int)b_size(&oc->buf)); + } } else { - chunk_appendf(&trace_buf, " buf=(%u@%p+%u/%u, %u@%p+%u/%u)", - (unsigned int)b_data(&ic->buf), b_orig(&ic->buf), - (unsigned int)b_head_ofs(&ic->buf), (unsigned int)b_size(&ic->buf), - (unsigned int)b_data(&oc->buf), b_orig(&oc->buf), - (unsigned int)b_head_ofs(&oc->buf), (unsigned int)b_size(&oc->buf)); + /* RX/TX buffer info */ + if (s->flags & SF_HTX) { + struct htx *rxhtx = htxbuf(&appctx->inbuf); + struct htx *txhtx = htxbuf(&appctx->outbuf); + + chunk_appendf(&trace_buf, " htx=(%u/%u#%u, %u/%u#%u)", + rxhtx->data, rxhtx->size, htx_nbblks(rxhtx), + txhtx->data, txhtx->size, htx_nbblks(txhtx)); + } + else { + chunk_appendf(&trace_buf, " buf=(%u@%p+%u/%u, %u@%p+%u/%u)", + (unsigned int)b_data(&appctx->inbuf), b_orig(&appctx->inbuf), + (unsigned int)b_head_ofs(&appctx->inbuf), (unsigned int)b_size(&appctx->inbuf), + (unsigned int)b_data(&appctx->outbuf), b_orig(&appctx->outbuf), + (unsigned int)b_head_ofs(&appctx->outbuf), (unsigned int)b_size(&appctx->outbuf)); + } } } @@ -207,7 +239,7 @@ struct appctx *appctx_new_on(struct applet *applet, struct sedesc *sedesc, int t goto fail_appctx; } - LIST_INIT(&appctx->wait_entry); + MT_LIST_INIT(&appctx->wait_entry); appctx->obj_type = OBJ_TYPE_APPCTX; appctx->applet = applet; appctx->sess = NULL; @@ -229,7 +261,18 @@ struct appctx *appctx_new_on(struct applet *applet, struct sedesc *sedesc, int t } appctx->sedesc = sedesc; - appctx->t->process = task_run_applet; + + appctx->flags = 0; + appctx->inbuf = BUF_NULL; + appctx->outbuf = BUF_NULL; + appctx->to_forward = 0; + + if (applet->rcv_buf != NULL && applet->snd_buf != NULL) { + appctx->t->process = task_process_applet; + applet_fl_set(appctx, APPCTX_FL_INOUT_BUFS); + } + else + appctx->t->process = task_run_applet; appctx->t->context = appctx; LIST_INIT(&appctx->buffer_wait.list); @@ -314,7 +357,7 @@ void appctx_free(struct appctx *appctx) /* if it's running, or about to run, defer the freeing * until the callback is called. */ - appctx->state |= APPLET_WANT_DIE; + applet_fl_set(appctx, APPCTX_FL_WANT_DIE); task_wakeup(appctx->t, TASK_WOKEN_OTHER); TRACE_DEVEL("Cannot release APPCTX now, wake it up", APPLET_EV_FREE, appctx); } @@ -348,55 +391,366 @@ void applet_reset_svcctx(struct appctx *appctx) appctx->svcctx = NULL; } -/* call the applet's release() function if any, and marks the sedesc as shut. - * Needs to be called upon close(). +/* call the applet's release() function if any, and marks the sedesc as shut + * once both read and write side are shut. Needs to be called upon close(). */ void appctx_shut(struct appctx *appctx) { - if (se_fl_test(appctx->sedesc, SE_FL_SHR | SE_FL_SHW)) + if (applet_fl_test(appctx, APPCTX_FL_SHUTDOWN)) return; TRACE_ENTER(APPLET_EV_RELEASE, appctx); + if (appctx->applet->release) appctx->applet->release(appctx); + applet_fl_set(appctx, APPCTX_FL_SHUTDOWN); - if (LIST_INLIST(&appctx->buffer_wait.list)) - LIST_DEL_INIT(&appctx->buffer_wait.list); + b_dequeue(&appctx->buffer_wait); - se_fl_set(appctx->sedesc, SE_FL_SHRR | SE_FL_SHWN); TRACE_LEAVE(APPLET_EV_RELEASE, appctx); } +/* releases unused buffers after processing. It will try to wake up as many + * entities as the number of buffers that it releases. + */ +static void appctx_release_buffers(struct appctx * appctx) +{ + int offer = 0; + + if (b_size(&appctx->inbuf) && !b_data(&appctx->inbuf)) { + offer++; + b_free(&appctx->inbuf); + } + if (b_size(&appctx->outbuf) && !b_data(&appctx->outbuf)) { + offer++; + b_free(&appctx->outbuf); + } + + /* if we're certain to have at least 1 buffer available, and there is + * someone waiting, we can wake up a waiter and offer them. + */ + if (offer) + offer_buffers(appctx, offer); +} + /* Callback used to wake up an applet when a buffer is available. The applet * <appctx> is woken up if an input buffer was requested for the associated - * stream connector. In this case the buffer is immediately allocated and the - * function returns 1. Otherwise it returns 0. Note that this automatically - * covers multiple wake-up attempts by ensuring that the same buffer will not - * be accounted for multiple times. + * stream connector. In this case the buffer is expected to be allocated later, + * the applet is woken up, and the function returns 1 to mention this buffer is + * expected to be used. Otherwise it returns 0. */ int appctx_buf_available(void *arg) { struct appctx *appctx = arg; struct stconn *sc = appctx_sc(appctx); + int ret = 0; + + if (applet_fl_test(appctx, APPCTX_FL_INBLK_ALLOC)) { + applet_fl_clr(appctx, APPCTX_FL_INBLK_ALLOC); + applet_fl_set(appctx, APPCTX_FL_IN_MAYALLOC); + TRACE_STATE("unblocking appctx on inbuf allocation", APPLET_EV_RECV|APPLET_EV_BLK|APPLET_EV_WAKE, appctx); + ret = 1; + } + + if (applet_fl_test(appctx, APPCTX_FL_OUTBLK_ALLOC)) { + applet_fl_clr(appctx, APPCTX_FL_OUTBLK_ALLOC); + applet_fl_set(appctx, APPCTX_FL_OUT_MAYALLOC); + TRACE_STATE("unblocking appctx on outbuf allocation", APPLET_EV_SEND|APPLET_EV_BLK|APPLET_EV_WAKE, appctx); + ret = 1; + } + + /* allocation requested ? if no, give up. */ + if (sc->flags & SC_FL_NEED_BUFF) { + sc_have_buff(sc); + ret = 1; + } + + /* The requested buffer might already have been allocated (channel, + * fast-forward etc), in which case we won't need to take that one. + * Otherwise we expect to take it. + */ + if (!c_size(sc_ic(sc)) && !sc_ep_have_ff_data(sc_opposite(sc))) + ret = 1; + leave: + if (ret) + task_wakeup(appctx->t, TASK_WOKEN_RES); + return ret; +} + +size_t appctx_htx_rcv_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags) +{ + struct htx *appctx_htx = htx_from_buf(&appctx->outbuf); + struct htx *buf_htx = NULL; + size_t ret = 0; + + if (htx_is_empty(appctx_htx)) { + htx_to_buf(appctx_htx, &appctx->outbuf); + goto out; + } + + ret = appctx_htx->data; + buf_htx = htx_from_buf(buf); + if (htx_is_empty(buf_htx) && htx_used_space(appctx_htx) <= count) { + htx_to_buf(buf_htx, buf); + htx_to_buf(appctx_htx, &appctx->outbuf); + b_xfer(buf, &appctx->outbuf, b_data(&appctx->outbuf)); + goto out; + } + + htx_xfer_blks(buf_htx, appctx_htx, count, HTX_BLK_UNUSED); + buf_htx->flags |= (appctx_htx->flags & (HTX_FL_PARSING_ERROR|HTX_FL_PROCESSING_ERROR)); + if (htx_is_empty(appctx_htx)) { + buf_htx->flags |= (appctx_htx->flags & HTX_FL_EOM); + } + buf_htx->extra = (appctx_htx->extra ? (appctx_htx->data + appctx_htx->extra) : 0); + htx_to_buf(buf_htx, buf); + htx_to_buf(appctx_htx, &appctx->outbuf); + ret -= appctx_htx->data; + + out: + return ret; +} + +size_t appctx_raw_rcv_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags) +{ + return b_xfer(buf, &appctx->outbuf, MIN(count, b_data(&appctx->outbuf))); +} + +size_t appctx_rcv_buf(struct stconn *sc, struct buffer *buf, size_t count, unsigned int flags) +{ + struct appctx *appctx = __sc_appctx(sc); + size_t ret = 0; + + TRACE_ENTER(APPLET_EV_RECV, appctx); + + if (applet_fl_test(appctx, APPCTX_FL_OUTBLK_ALLOC)) + goto end; + + if (!count) + goto end; + + if (!appctx_get_buf(appctx, &appctx->outbuf)) { + TRACE_STATE("waiting for appctx outbuf allocation", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + goto end; + } + + if (flags & CO_RFL_BUF_FLUSH) + applet_fl_set(appctx, APPCTX_FL_FASTFWD); + + ret = appctx->applet->rcv_buf(appctx, buf, count, flags); + if (ret) + applet_fl_clr(appctx, APPCTX_FL_OUTBLK_FULL); + + if (b_data(&appctx->outbuf)) { + se_fl_set(appctx->sedesc, SE_FL_RCV_MORE | SE_FL_WANT_ROOM); + TRACE_STATE("waiting for more room", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + else { + se_fl_clr(appctx->sedesc, SE_FL_RCV_MORE | SE_FL_WANT_ROOM); + if (applet_fl_test(appctx, APPCTX_FL_EOI)) { + se_fl_set(appctx->sedesc, SE_FL_EOI); + TRACE_STATE("report EOI to SE", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + if (applet_fl_test(appctx, APPCTX_FL_EOS)) { + se_fl_set(appctx->sedesc, SE_FL_EOS); + TRACE_STATE("report EOS to SE", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + if (applet_fl_test(appctx, APPCTX_FL_ERROR)) { + se_fl_set(appctx->sedesc, SE_FL_ERROR); + TRACE_STATE("report ERROR to SE", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + } + + end: + TRACE_LEAVE(APPLET_EV_RECV, appctx); + return ret; +} + +size_t appctx_htx_snd_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags) +{ + struct htx *appctx_htx = htx_from_buf(&appctx->inbuf); + struct htx *buf_htx = htx_from_buf(buf); + size_t ret = 0; + + ret = buf_htx->data; + if (htx_is_empty(appctx_htx) && buf_htx->data == count) { + htx_to_buf(appctx_htx, &appctx->inbuf); + htx_to_buf(buf_htx, buf); + b_xfer(&appctx->inbuf, buf, b_data(buf)); + goto end; + } + + htx_xfer_blks(appctx_htx, buf_htx, count, HTX_BLK_UNUSED); + if (htx_is_empty(buf_htx)) { + appctx_htx->flags |= (buf_htx->flags & HTX_FL_EOM); + } + + appctx_htx->extra = (buf_htx->extra ? (buf_htx->data + buf_htx->extra) : 0); + htx_to_buf(appctx_htx, &appctx->outbuf); + htx_to_buf(buf_htx, buf); + ret -= buf_htx->data; +end: + if (ret < count) { + applet_fl_set(appctx, APPCTX_FL_INBLK_FULL); + TRACE_STATE("report appctx inbuf is full", APPLET_EV_SEND|APPLET_EV_BLK, appctx); + } + return ret; +} + +size_t appctx_raw_snd_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned flags) +{ + size_t ret = 0; + + ret = b_xfer(&appctx->inbuf, buf, MIN(b_room(&appctx->inbuf), count)); + if (ret < count) { + applet_fl_set(appctx, APPCTX_FL_INBLK_FULL); + TRACE_STATE("report appctx inbuf is full", APPLET_EV_SEND|APPLET_EV_BLK, appctx); + } + end: + return ret; +} + +size_t appctx_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, unsigned int flags) +{ + struct appctx *appctx = __sc_appctx(sc); + size_t ret = 0; + + TRACE_ENTER(APPLET_EV_SEND, appctx); + + if (applet_fl_test(appctx, (APPCTX_FL_ERROR|APPCTX_FL_ERR_PENDING))) + goto end; + + if (applet_fl_test(appctx, (APPCTX_FL_INBLK_FULL|APPCTX_FL_INBLK_ALLOC))) + goto end; + + if (!count) + goto end; + + if (!appctx_get_buf(appctx, &appctx->inbuf)) { + TRACE_STATE("waiting for appctx inbuf allocation", APPLET_EV_SEND|APPLET_EV_BLK, appctx); + goto end; + } + + ret = appctx->applet->snd_buf(appctx, buf, count, flags); + + end: + if (applet_fl_test(appctx, (APPCTX_FL_ERROR|APPCTX_FL_ERR_PENDING))) { + BUG_ON((applet_fl_get(appctx) & (APPCTX_FL_EOS|APPCTX_FL_ERROR|APPCTX_FL_ERR_PENDING)) == (APPCTX_FL_EOS|APPCTX_FL_ERR_PENDING)); + applet_set_error(appctx); + TRACE_STATE("report ERR_PENDING/ERROR to SE", APPLET_EV_SEND, appctx); + } + TRACE_LEAVE(APPLET_EV_SEND, appctx); + return ret; +} + +int appctx_fastfwd(struct stconn *sc, unsigned int count, unsigned int flags) +{ + struct appctx *appctx = __sc_appctx(sc); + struct xref *peer; + struct sedesc *sdo = NULL; + unsigned int len, nego_flags = NEGO_FF_FL_NONE; + int ret = 0; + + TRACE_ENTER(APPLET_EV_RECV, appctx); + + applet_fl_set(appctx, APPCTX_FL_FASTFWD); + + /* TODO: outbuf must be empty. Find a better way to handle that but for now just return -1 */ + if (b_data(&appctx->outbuf)) { + TRACE_STATE("Output buffer not empty, cannot fast-forward data", APPLET_EV_RECV, appctx); + return -1; + } + + peer = xref_get_peer_and_lock(&appctx->sedesc->xref); + if (!peer) { + TRACE_STATE("Opposite endpoint not available yet", APPLET_EV_RECV, appctx); + goto end; + } + sdo = container_of(peer, struct sedesc, xref); + xref_unlock(&appctx->sedesc->xref, peer); + + if (appctx->to_forward && count > appctx->to_forward) { + count = appctx->to_forward; + nego_flags |= NEGO_FF_FL_EXACT_SIZE; + } - /* allocation requested ? */ - if (!(sc->flags & SC_FL_NEED_BUFF)) - return 0; + len = se_nego_ff(sdo, &BUF_NULL, count, nego_flags); + if (sdo->iobuf.flags & IOBUF_FL_NO_FF) { + sc_ep_clr(sc, SE_FL_MAY_FASTFWD_PROD); + applet_fl_clr(appctx, APPCTX_FL_FASTFWD); + TRACE_DEVEL("Fast-forwarding not supported by opposite endpoint, disable it", APPLET_EV_RECV, appctx); + goto end; + } + if (sdo->iobuf.flags & IOBUF_FL_FF_BLOCKED) { + sc_ep_set(sc, /* SE_FL_RCV_MORE | */SE_FL_WANT_ROOM); + TRACE_STATE("waiting for more room", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + goto end; + } + + b_add(sdo->iobuf.buf, sdo->iobuf.offset); + ret = appctx->applet->fastfwd(appctx, sdo->iobuf.buf, len, 0); + b_sub(sdo->iobuf.buf, sdo->iobuf.offset); + sdo->iobuf.data += ret; + + if (se_fl_test(appctx->sedesc, SE_FL_WANT_ROOM)) { + /* The applet request more room, report the info at the iobuf level */ + sdo->iobuf.flags |= IOBUF_FL_FF_BLOCKED; + TRACE_STATE("waiting for more room", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + + if (applet_fl_test(appctx, APPCTX_FL_EOI)) { + se_fl_set(appctx->sedesc, SE_FL_EOI); + sdo->iobuf.flags |= IOBUF_FL_EOI; /* TODO: it may be good to have a flag to be sure we can + * forward the EOI the to consumer side + */ + TRACE_STATE("report EOI to SE", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + if (applet_fl_test(appctx, APPCTX_FL_EOS)) { + se_fl_set(appctx->sedesc, SE_FL_EOS); + TRACE_STATE("report EOS to SE", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + if (applet_fl_test(appctx, APPCTX_FL_ERROR)) { + se_fl_set(appctx->sedesc, SE_FL_ERROR); + TRACE_STATE("report ERROR to SE", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + /* else */ + /* applet_have_more_data(appctx); */ - sc_have_buff(sc); + if (se_done_ff(sdo) != 0) { + /* Something was forwarding, don't reclaim more room */ + se_fl_clr(appctx->sedesc, SE_FL_WANT_ROOM); + TRACE_STATE("more room available", APPLET_EV_RECV|APPLET_EV_BLK, appctx); + } + +end: + TRACE_LEAVE(APPLET_EV_RECV, appctx); + return ret; +} - /* was already allocated another way ? if so, don't take this one */ - if (c_size(sc_ic(sc)) || sc_ep_have_ff_data(sc_opposite(sc))) - return 0; +/* Atomically append a line to applet <ctx>'s output, appending a trailing LF. + * The line is read from vectors <v1> and <v2> at offset <ofs> relative to the + * area's origin, for <len> bytes. It returns the number of bytes consumed from + * the input vectors on success, -1 if it temporarily cannot (buffer full), -2 + * if it will never be able to (too large msg). The vectors are not modified. + * The caller is responsible for making sure that there are at least ofs+len + * bytes in the input vectors. + */ +ssize_t applet_append_line(void *ctx, struct ist v1, struct ist v2, size_t ofs, size_t len) +{ + struct appctx *appctx = ctx; - /* allocation possible now ? */ - if (!b_alloc(&sc_ic(sc)->buf)) { - sc_need_buff(sc); - return 0; + if (unlikely(len + 1 > b_size(&trash))) { + /* too large a message to ever fit, let's skip it */ + return -2; } - task_wakeup(appctx->t, TASK_WOKEN_RES); - return 1; + chunk_reset(&trash); + vp_peek_ofs(v1, v2, ofs, trash.area, len); + trash.data += len; + trash.area[trash.data++] = '\n'; + if (applet_putchk(appctx, &trash) == -1) + return -1; + return len; } /* Default applet handler */ @@ -404,13 +758,14 @@ struct task *task_run_applet(struct task *t, void *context, unsigned int state) { struct appctx *app = context; struct stconn *sc, *sco; + struct channel *ic, *oc; unsigned int rate; - size_t count; + size_t input, output; int did_send = 0; TRACE_ENTER(APPLET_EV_PROCESS, app); - if (app->state & APPLET_WANT_DIE) { + if (applet_fl_test(app, APPCTX_FL_WANT_DIE)) { TRACE_DEVEL("APPCTX want die, release it", APPLET_EV_FREE, app); __appctx_free(app); return NULL; @@ -434,6 +789,9 @@ struct task *task_run_applet(struct task *t, void *context, unsigned int state) sc = appctx_sc(app); sco = sc_opposite(sc); + ic = sc_ic(sc); + oc = sc_oc(sc); + /* We always pretend the applet can't get and doesn't want to * put, it's up to it to change this if needed. This ensures * that one applet which ignores any event will not spin. @@ -450,7 +808,10 @@ struct task *task_run_applet(struct task *t, void *context, unsigned int state) if (!sc_alloc_ibuf(sc, &app->buffer_wait)) applet_have_more_data(app); - count = co_data(sc_oc(sc)); + channel_check_idletimer(ic); + + input = ic->total; + output = co_data(oc); app->applet->fct(app); TRACE_POINT(APPLET_EV_PROCESS, app); @@ -458,9 +819,9 @@ struct task *task_run_applet(struct task *t, void *context, unsigned int state) /* now check if the applet has released some room and forgot to * notify the other side about it. */ - if (count != co_data(sc_oc(sc))) { - sc_oc(sc)->flags |= CF_WRITE_EVENT | CF_WROTE_DATA; - if (sco->room_needed < 0 || channel_recv_max(sc_oc(sc)) >= sco->room_needed) + if (output != co_data(oc)) { + oc->flags |= CF_WRITE_EVENT | CF_WROTE_DATA; + if (sco->room_needed < 0 || channel_recv_max(oc) >= sco->room_needed) sc_have_room(sco); did_send = 1; } @@ -469,14 +830,18 @@ struct task *task_run_applet(struct task *t, void *context, unsigned int state) sc_have_room(sco); } - if (sc_ic(sc)->flags & CF_READ_EVENT) + input = ic->total - input; + if (input) { + channel_check_xfer(ic, input); sc_ep_report_read_activity(sc); + } + /* TODO: May be move in appctx_rcv_buf or sc_applet_process ? */ if (sc_waiting_room(sc) && (sc->flags & SC_FL_ABRT_DONE)) { sc_ep_set(sc, SE_FL_EOS|SE_FL_ERROR); } - if (!co_data(sc_oc(sc))) { + if (!co_data(oc)) { if (did_send) sc_ep_report_send_activity(sc); } @@ -495,7 +860,109 @@ struct task *task_run_applet(struct task *t, void *context, unsigned int state) } sc->app_ops->wake(sc); - channel_release_buffer(sc_ic(sc), &app->buffer_wait); + channel_release_buffer(ic, &app->buffer_wait); + TRACE_LEAVE(APPLET_EV_PROCESS, app); + return t; +} + + +/* Default applet handler based on IN/OUT buffers. It is a true task here, no a tasklet */ +struct task *task_process_applet(struct task *t, void *context, unsigned int state) +{ + struct appctx *app = context; + struct stconn *sc; + unsigned int rate; + + TRACE_ENTER(APPLET_EV_PROCESS, app); + + if (applet_fl_test(app, APPCTX_FL_WANT_DIE)) { + TRACE_DEVEL("APPCTX want die, release it", APPLET_EV_FREE, app); + __appctx_free(app); + return NULL; + } + + if (se_fl_test(app->sedesc, SE_FL_ORPHAN)) { + /* Finalize init of orphan appctx. .init callback function must + * be defined and it must finalize appctx startup. + */ + BUG_ON(!app->applet->init); + + if (appctx_init(app) == -1) { + TRACE_DEVEL("APPCTX init failed", APPLET_EV_FREE|APPLET_EV_ERR, app); + appctx_free_on_early_error(app); + return NULL; + } + BUG_ON(!app->sess || !appctx_sc(app) || !appctx_strm(app)); + TRACE_DEVEL("APPCTX initialized", APPLET_EV_PROCESS, app); + } + + sc = appctx_sc(app); + + sc_applet_sync_send(sc); + + /* We always pretend the applet can't get and doesn't want to + * put, it's up to it to change this if needed. This ensures + * that one applet which ignores any event will not spin. + */ + applet_need_more_data(app); + applet_have_no_more_data(app); + + app->applet->fct(app); + + TRACE_POINT(APPLET_EV_PROCESS, app); + + if (b_data(&app->outbuf) || se_fl_test(app->sedesc, SE_FL_MAY_FASTFWD_PROD) || + applet_fl_test(app, APPCTX_FL_EOI|APPCTX_FL_EOS|APPCTX_FL_ERROR)) + applet_have_more_data(app); + + sc_applet_sync_recv(sc); + + /* TODO: May be move in appctx_rcv_buf or sc_applet_process ? */ + if (sc_waiting_room(sc) && (sc->flags & SC_FL_ABRT_DONE)) { + sc_ep_set(sc, SE_FL_EOS|SE_FL_ERROR); + } + + /* measure the call rate and check for anomalies when too high */ + if (((b_size(sc_ib(sc)) && sc->flags & SC_FL_NEED_BUFF) || // asks for a buffer which is present + (b_size(sc_ib(sc)) && !b_data(sc_ib(sc)) && sc->flags & SC_FL_NEED_ROOM) || // asks for room in an empty buffer + (b_data(sc_ob(sc)) && sc_is_send_allowed(sc)) || // asks for data already present + (!b_data(sc_ib(sc)) && b_data(sc_ob(sc)) && // didn't return anything ... + (!(sc_oc(sc)->flags & CF_WRITE_EVENT) && (sc->flags & SC_FL_SHUT_WANTED))))) { // ... and left data pending after a shut + rate = update_freq_ctr(&app->call_rate, 1); + if (rate >= 100000 && app->call_rate.prev_ctr) // looped like this more than 100k times over last second + stream_dump_and_crash(&app->obj_type, read_freq_ctr(&app->call_rate)); + } + + sc->app_ops->wake(sc); + appctx_release_buffers(app); TRACE_LEAVE(APPLET_EV_PROCESS, app); return t; } + +/* config parser for global "tune.applet.zero-copy-forwarding" */ +static int cfg_parse_applet_zero_copy_fwd(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + if (too_many_args(1, args, err, NULL)) + return -1; + + if (strcmp(args[1], "on") == 0) + global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_APPLET; + else if (strcmp(args[1], "off") == 0) + global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_APPLET; + else { + memprintf(err, "'%s' expects 'on' or 'off'.", args[0]); + return -1; + } + return 0; +} + + +/* config keyword parsers */ +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "tune.applet.zero-copy-forwarding", cfg_parse_applet_zero_copy_fwd }, + { 0, NULL, NULL } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); diff --git a/src/backend.c b/src/backend.c index 39d2c75..d74ae40 100644 --- a/src/backend.c +++ b/src/backend.c @@ -39,6 +39,7 @@ #include <haproxy/lb_fwlc.h> #include <haproxy/lb_fwrr.h> #include <haproxy/lb_map.h> +#include <haproxy/lb_ss.h> #include <haproxy/log.h> #include <haproxy/namespace.h> #include <haproxy/obj_type.h> @@ -61,14 +62,6 @@ #define TRACE_SOURCE &trace_strm -int be_lastsession(const struct proxy *be) -{ - if (be->be_counters.last_sess) - return ns_to_sec(now_ns) - be->be_counters.last_sess; - - return -1; -} - /* helper function to invoke the correct hash method */ unsigned int gen_hash(const struct proxy* px, const char* key, unsigned long len) { @@ -176,7 +169,7 @@ void update_backend_weight(struct proxy *px) * If any server is found, it will be returned. If no valid server is found, * NULL is returned. */ -static struct server *get_server_sh(struct proxy *px, const char *addr, int len, const struct server *avoid) +struct server *get_server_sh(struct proxy *px, const char *addr, int len, const struct server *avoid) { unsigned int h, l; @@ -220,7 +213,7 @@ static struct server *get_server_sh(struct proxy *px, const char *addr, int len, * algorithm out of a tens because it gave him the best results. * */ -static struct server *get_server_uh(struct proxy *px, char *uri, int uri_len, const struct server *avoid) +struct server *get_server_uh(struct proxy *px, char *uri, int uri_len, const struct server *avoid) { unsigned int hash = 0; int c; @@ -268,7 +261,7 @@ static struct server *get_server_uh(struct proxy *px, char *uri, int uri_len, co * is returned. If any server is found, it will be returned. If no valid server * is found, NULL is returned. */ -static struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len, const struct server *avoid) +struct server *get_server_ph(struct proxy *px, const char *uri, int uri_len, const struct server *avoid) { unsigned int hash = 0; const char *start, *end; @@ -327,7 +320,7 @@ static struct server *get_server_ph(struct proxy *px, const char *uri, int uri_l /* * this does the same as the previous server_ph, but check the body contents */ -static struct server *get_server_ph_post(struct stream *s, const struct server *avoid) +struct server *get_server_ph_post(struct stream *s, const struct server *avoid) { unsigned int hash = 0; struct channel *req = &s->req; @@ -412,7 +405,7 @@ static struct server *get_server_ph_post(struct stream *s, const struct server * * is found, NULL is returned. When lbprm.arg_opt1 is set, the hash will only * apply to the middle part of a domain name ("use_domain_only" option). */ -static struct server *get_server_hh(struct stream *s, const struct server *avoid) +struct server *get_server_hh(struct stream *s, const struct server *avoid) { unsigned int hash = 0; struct proxy *px = s->be; @@ -485,7 +478,7 @@ static struct server *get_server_hh(struct stream *s, const struct server *avoid } /* RDP Cookie HASH. */ -static struct server *get_server_rch(struct stream *s, const struct server *avoid) +struct server *get_server_rch(struct stream *s, const struct server *avoid) { unsigned int hash = 0; struct proxy *px = s->be; @@ -530,7 +523,7 @@ static struct server *get_server_rch(struct stream *s, const struct server *avoi /* sample expression HASH. Returns NULL if the sample is not found or if there * are no server, relying on the caller to fall back to round robin instead. */ -static struct server *get_server_expr(struct stream *s, const struct server *avoid) +struct server *get_server_expr(struct stream *s, const struct server *avoid) { struct proxy *px = s->be; struct sample *smp; @@ -560,7 +553,7 @@ static struct server *get_server_expr(struct stream *s, const struct server *avo } /* random value */ -static struct server *get_server_rnd(struct stream *s, const struct server *avoid) +struct server *get_server_rnd(struct stream *s, const struct server *avoid) { unsigned int hash = 0; struct proxy *px = s->be; @@ -653,9 +646,9 @@ int assign_server(struct stream *s) if ((s->be->lbprm.algo & BE_LB_KIND) != BE_LB_KIND_HI && ((s->sess->flags & SESS_FL_PREFER_LAST) || (s->be->options & PR_O_PREF_LAST))) { - struct sess_srv_list *srv_list; - list_for_each_entry(srv_list, &s->sess->srv_list, srv_list) { - struct server *tmpsrv = objt_server(srv_list->target); + struct sess_priv_conns *pconns; + list_for_each_entry(pconns, &s->sess->priv_conns, sess_el) { + struct server *tmpsrv = objt_server(pconns->target); if (tmpsrv && tmpsrv->proxy == s->be && ((s->sess->flags & SESS_FL_PREFER_LAST) || @@ -663,7 +656,7 @@ int assign_server(struct stream *s) server_has_room(tmpsrv) || ( tmpsrv->queue.length + 1 < s->be->max_ka_queue))) && srv_currently_usable(tmpsrv)) { - list_for_each_entry(conn, &srv_list->conn_list, session_list) { + list_for_each_entry(conn, &pconns->conn_list, sess_el) { if (!(conn->flags & CO_FL_WAIT_XPRT)) { srv = tmpsrv; s->target = &srv->obj_type; @@ -813,6 +806,14 @@ int assign_server(struct stream *s) break; default: + if ((s->be->lbprm.algo & BE_LB_KIND) == BE_LB_KIND_SA) { + /* some special algos that cannot be grouped together */ + + if ((s->be->lbprm.algo & BE_LB_PARM) == BE_LB_SA_SS) + srv = ss_get_server(s->be); + + break; + } /* unknown balancing algorithm */ err = SRV_STATUS_INTERNAL; goto out; @@ -1232,7 +1233,7 @@ struct connection *conn_backend_get(struct stream *s, struct server *srv, int is continue; conn = srv_lookup_conn(is_safe ? &srv->per_thr[i].safe_conns : &srv->per_thr[i].idle_conns, hash); while (conn) { - if (conn->mux->takeover && conn->mux->takeover(conn, i) == 0) { + if (conn->mux->takeover && conn->mux->takeover(conn, i, 0) == 0) { conn_delete_from_tree(conn); _HA_ATOMIC_INC(&activity[tid].fd_takeover); found = 1; @@ -1245,7 +1246,7 @@ struct connection *conn_backend_get(struct stream *s, struct server *srv, int is if (!found && !is_safe && srv->curr_safe_nb > 0) { conn = srv_lookup_conn(&srv->per_thr[i].safe_conns, hash); while (conn) { - if (conn->mux->takeover && conn->mux->takeover(conn, i) == 0) { + if (conn->mux->takeover && conn->mux->takeover(conn, i, 0) == 0) { conn_delete_from_tree(conn); _HA_ATOMIC_INC(&activity[tid].fd_takeover); found = 1; @@ -1348,9 +1349,7 @@ int connect_server(struct stream *s) int reuse = 0; int init_mux = 0; int err; -#ifdef USE_OPENSSL - struct sample *sni_smp = NULL; -#endif + struct sample *name_smp = NULL; struct sockaddr_storage *bind_addr = NULL; int proxy_line_ret; int64_t hash = 0; @@ -1372,13 +1371,11 @@ int connect_server(struct stream *s) if (err != SRV_STATUS_OK) return SF_ERR_INTERNAL; -#ifdef USE_OPENSSL - if (srv && srv->ssl_ctx.sni) { - sni_smp = sample_fetch_as_type(s->be, s->sess, s, - SMP_OPT_DIR_REQ | SMP_OPT_FINAL, - srv->ssl_ctx.sni, SMP_T_STR); + if (srv && srv->pool_conn_name_expr) { + name_smp = sample_fetch_as_type(s->be, s->sess, s, + SMP_OPT_DIR_REQ | SMP_OPT_FINAL, + srv->pool_conn_name_expr, SMP_T_STR); } -#endif /* do not reuse if mode is not http */ if (!IS_HTX_STRM(s)) { @@ -1402,17 +1399,12 @@ int connect_server(struct stream *s) /* 1. target */ hash_params.target = s->target; -#ifdef USE_OPENSSL - /* 2. sni - * only test if the sample is not null as smp_make_safe (called before - * ssl_sock_set_servername) can only fails if this is not the case - */ - if (sni_smp) { - hash_params.sni_prehash = - conn_hash_prehash(sni_smp->data.u.str.area, - sni_smp->data.u.str.data); + /* 2. pool-conn-name */ + if (name_smp) { + hash_params.name_prehash = + conn_hash_prehash(name_smp->data.u.str.area, + name_smp->data.u.str.data); } -#endif /* USE_OPENSSL */ /* 3. destination address */ if (srv && srv_is_transparent(srv)) @@ -1423,13 +1415,43 @@ int connect_server(struct stream *s) /* 5. proxy protocol */ if (srv && srv->pp_opts) { - proxy_line_ret = make_proxy_line(trash.area, trash.size, srv, cli_conn, s); + proxy_line_ret = make_proxy_line(trash.area, trash.size, srv, cli_conn, s, strm_sess(s)); if (proxy_line_ret) { hash_params.proxy_prehash = conn_hash_prehash(trash.area, proxy_line_ret); } } + /* 6. Custom mark, tos? */ + if (s->flags & (SF_BC_MARK | SF_BC_TOS)) { + /* mark: 32bits, tos: 8bits = 40bits + * last 2 bits are there to indicate if mark and/or tos are set + * total: 42bits: + * + * 63==== (unused) ====42 39----32 31-----------------------------0 + * 0000000000000000000000 11 00000111 00000000000000000000000000000011 + * ^^ ^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + * || | | + * / \ \ \ + * / \ \ \ + * tos? mark? \ mark value (32bits) + * tos value (8bits) + * ie: in the above example: + * - mark is set, mark = 3 + * - tos is set, tos = 7 + */ + if (s->flags & SF_BC_MARK) { + hash_params.mark_tos_prehash |= s->bc_mark; + /* 41th bit: mark set */ + hash_params.mark_tos_prehash |= 1ULL << 40; + } + if (s->flags & SF_BC_TOS) { + hash_params.mark_tos_prehash |= (uint64_t)s->bc_tos << 32; + /* 42th bit: tos set */ + hash_params.mark_tos_prehash |= 1ULL << 41; + } + } + hash = conn_calculate_hash(&hash_params); /* first, search for a matching connection in the session's idle conns */ @@ -1617,6 +1639,18 @@ skip_reuse: srv_conn->src = bind_addr; bind_addr = NULL; + /* mark? */ + if (s->flags & SF_BC_MARK) { + srv_conn->mark = s->bc_mark; + srv_conn->flags |= CO_FL_OPT_MARK; + } + + /* tos? */ + if (s->flags & SF_BC_TOS) { + srv_conn->tos = s->bc_tos; + srv_conn->flags |= CO_FL_OPT_TOS; + } + srv_conn->hash_node->node.key = hash; } } @@ -1744,7 +1778,13 @@ skip_reuse: return err; #ifdef USE_OPENSSL - if (!(s->flags & SF_SRV_REUSED)) { + /* Set socket SNI unless connection is reused. */ + if (srv && srv->ssl_ctx.sni && !(s->flags & SF_SRV_REUSED)) { + struct sample *sni_smp = NULL; + + sni_smp = sample_fetch_as_type(s->be, s->sess, s, + SMP_OPT_DIR_REQ | SMP_OPT_FINAL, + srv->ssl_ctx.sni, SMP_T_STR); if (smp_make_safe(sni_smp)) ssl_sock_set_servername(srv_conn, sni_smp->data.u.str.area); } @@ -2515,8 +2555,8 @@ void back_handle_st_rdy(struct stream *s) */ void set_backend_down(struct proxy *be) { - be->last_change = ns_to_sec(now_ns); - _HA_ATOMIC_INC(&be->down_trans); + be->be_counters.last_change = ns_to_sec(now_ns); + _HA_ATOMIC_INC(&be->be_counters.down_trans); if (!(global.mode & MODE_STARTING)) { ha_alert("%s '%s' has no server available!\n", proxy_type_str(be), be->id); @@ -2588,10 +2628,10 @@ no_cookie: } int be_downtime(struct proxy *px) { - if (px->lbprm.tot_weight && px->last_change < ns_to_sec(now_ns)) // ignore negative time + if (px->lbprm.tot_weight && px->be_counters.last_change < ns_to_sec(now_ns)) // ignore negative time return px->down_time; - return ns_to_sec(now_ns) - px->last_change + px->down_time; + return ns_to_sec(now_ns) - px->be_counters.last_change + px->down_time; } /* @@ -2836,7 +2876,7 @@ int backend_parse_balance(const char **args, char **err, struct proxy *curproxy) } else if (strcmp(args[0], "sticky") == 0) { curproxy->lbprm.algo &= ~BE_LB_ALGO; - curproxy->lbprm.algo |= BE_LB_ALGO_LS; + curproxy->lbprm.algo |= BE_LB_ALGO_SS; } else { memprintf(err, "only supports 'roundrobin', 'static-rr', 'leastconn', 'source', 'uri', 'url_param', 'hash', 'hdr(name)', 'rdp-cookie(name)', 'log-hash' and 'sticky' options."); @@ -3029,7 +3069,7 @@ smp_fetch_be_sess_rate(const struct arg *args, struct sample *smp, const char *k smp->flags = SMP_F_VOL_TEST; smp->data.type = SMP_T_SINT; - smp->data.u.sint = read_freq_ctr(&px->be_sess_per_sec); + smp->data.u.sint = read_freq_ctr(&px->be_counters.sess_per_sec); return 1; } @@ -3212,7 +3252,7 @@ smp_fetch_srv_sess_rate(const struct arg *args, struct sample *smp, const char * { smp->flags = SMP_F_VOL_TEST; smp->data.type = SMP_T_SINT; - smp->data.u.sint = read_freq_ctr(&args->data.srv->sess_per_sec); + smp->data.u.sint = read_freq_ctr(&args->data.srv->counters.sess_per_sec); return 1; } diff --git a/src/cache.c b/src/cache.c index 9f12f10..32f2e47 100644 --- a/src/cache.c +++ b/src/cache.c @@ -77,6 +77,7 @@ struct cache_appctx { unsigned int rem_data; /* Remaining bytes for the last data block (HTX only, 0 means process next block) */ unsigned int send_notmodified:1; /* In case of conditional request, we might want to send a "304 Not Modified" response instead of the stored data. */ unsigned int unused:31; + /* 4 bytes hole here */ struct shared_block *next; /* The next block of data to be sent for this cache entry. */ }; @@ -193,7 +194,7 @@ struct cache_entry { unsigned int latest_validation; /* latest validation date */ unsigned int expire; /* expiration date (wall clock time) */ unsigned int age; /* Origin server "Age" header value */ - + unsigned int body_size; /* Size of the body */ int refcount; struct eb32_node eb; /* ebtree node used to hold the cache object */ @@ -231,8 +232,8 @@ DECLARE_STATIC_POOL(pool_head_cache_st, "cache_st", sizeof(struct cache_st)); static struct eb32_node *insert_entry(struct cache *cache, struct cache_tree *tree, struct cache_entry *new_entry); static void delete_entry(struct cache_entry *del_entry); -static void release_entry_locked(struct cache_tree *cache, struct cache_entry *entry); -static void release_entry_unlocked(struct cache_tree *cache, struct cache_entry *entry); +static inline void release_entry_locked(struct cache_tree *cache, struct cache_entry *entry); +static inline void release_entry_unlocked(struct cache_tree *cache, struct cache_entry *entry); /* * Find a cache_entry in the <cache>'s tree that has the hash <hash>. @@ -753,6 +754,7 @@ cache_store_http_payload(struct stream *s, struct filter *filter, struct http_ms struct htx_blk *blk; struct shared_block *fb; struct htx_ret htxret; + size_t data_len = 0; unsigned int orig_len, to_forward; int ret; @@ -789,6 +791,7 @@ cache_store_http_payload(struct stream *s, struct filter *filter, struct http_ms chunk_memcat(&trash, (char *)&info, sizeof(info)); chunk_istcat(&trash, v); to_forward += v.len; + data_len += v.len; len -= v.len; break; @@ -817,6 +820,8 @@ cache_store_http_payload(struct stream *s, struct filter *filter, struct http_ms goto no_cache; } + /* disguise below to shut a warning on */ + DISGUISE((struct cache_entry *)st->first_block->data)->body_size += data_len; ret = shctx_row_data_append(shctx, st->first_block, (unsigned char *)b_head(&trash), b_data(&trash)); if (ret < 0) @@ -1133,7 +1138,7 @@ static int http_check_vary_header(struct htx *htx, unsigned int *vary_signature) * "vary" on the accept-encoding value. * Returns 0 if we found a known encoding in the response, -1 otherwise. */ -static int set_secondary_key_encoding(struct htx *htx, char *secondary_key) +static int set_secondary_key_encoding(struct htx *htx, unsigned int vary_signature, char *secondary_key) { unsigned int resp_encoding_bitmap = 0; const struct vary_hashing_information *info = vary_information; @@ -1143,6 +1148,11 @@ static int set_secondary_key_encoding(struct htx *htx, char *secondary_key) unsigned int encoding_value; struct http_hdr_ctx ctx = { .blk = NULL }; + /* We must not set the accept encoding part of the secondary signature + * if the response does not vary on 'Accept Encoding'. */ + if (!(vary_signature & VARY_ACCEPT_ENCODING)) + return 0; + /* Look for the accept-encoding part of the secondary_key. */ while (count < hash_info_count && info->value != VARY_ACCEPT_ENCODING) { offset += info->hash_length; @@ -1404,7 +1414,7 @@ enum act_return http_action_store_cache(struct act_rule *rule, struct proxy *px, * We will not cache a response that has an unknown encoding (not * explicitly supported in parse_encoding_value function). */ if (cache->vary_processing_enabled && vary_signature) - if (set_secondary_key_encoding(htx, object->secondary_key)) + if (set_secondary_key_encoding(htx, vary_signature, object->secondary_key)) goto out; if (!shctx_row_reserve_hot(shctx, first, trash.data)) { @@ -1480,8 +1490,7 @@ static unsigned int htx_cache_dump_blk(struct appctx *appctx, struct htx *htx, e unsigned int max, total; uint32_t blksz; - max = htx_get_max_blksz(htx, - channel_htx_recv_max(sc_ic(appctx_sc(appctx)), htx)); + max = htx_free_data_space(htx); if (!max) return 0; blksz = ((type == HTX_BLK_HDR || type == HTX_BLK_TLR) @@ -1521,14 +1530,14 @@ static unsigned int htx_cache_dump_data_blk(struct appctx *appctx, struct htx *h struct cache_appctx *ctx = appctx->svcctx; struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0]; struct shared_context *shctx = shctx_ptr(cconf->c.cache); - unsigned int max, total, rem_data; + unsigned int max, total, rem_data, data_len; uint32_t blksz; - max = htx_get_max_blksz(htx, - channel_htx_recv_max(sc_ic(appctx_sc(appctx)), htx)); + max = htx_free_data_space(htx); if (!max) return 0; + data_len = 0; rem_data = 0; if (ctx->rem_data) { blksz = ctx->rem_data; @@ -1551,6 +1560,7 @@ static unsigned int htx_cache_dump_data_blk(struct appctx *appctx, struct htx *h offset += sz; blksz -= sz; total += sz; + data_len += sz; if (sz < max) break; if (blksz || offset == shctx->block_size) { @@ -1563,6 +1573,7 @@ static unsigned int htx_cache_dump_data_blk(struct appctx *appctx, struct htx *h ctx->next = shblk; ctx->sent += total; ctx->rem_data = rem_data + blksz; + appctx->to_forward -= data_len; return total; } @@ -1619,6 +1630,108 @@ static size_t htx_cache_dump_msg(struct appctx *appctx, struct htx *htx, unsigne return total; } +static unsigned int ff_cache_dump_data_blk(struct appctx *appctx, struct buffer *buf, unsigned int len, + uint32_t info, struct shared_block *shblk, unsigned int offset) +{ + struct cache_appctx *ctx = appctx->svcctx; + struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0]; + struct shared_context *shctx = shctx_ptr(cconf->c.cache); + unsigned int total, rem_data, data_len; + uint32_t blksz; + + total = 0; + data_len = 0; + rem_data = 0; + if (ctx->rem_data) + blksz = ctx->rem_data; + else { + blksz = (info & 0xfffffff); + ctx->sent += 4; + } + if (blksz > len) { + rem_data = blksz - len; + blksz = len; + } + + while (blksz) { + size_t sz; + + len = MIN(blksz, shctx->block_size - offset); + sz = b_putblk(buf, (char *)(shblk->data + offset), len); + offset += sz; + blksz -= sz; + total += sz; + data_len += sz; + if (sz < len) + break; + if (blksz || offset == shctx->block_size) { + shblk = LIST_NEXT(&shblk->list, typeof(shblk), list); + offset = 0; + } + } + + ctx->offset = offset; + ctx->next = shblk; + ctx->sent += total; + ctx->rem_data = rem_data + blksz; + appctx->to_forward -= data_len; + return total; +} + +static size_t ff_cache_dump_msg(struct appctx *appctx, struct buffer *buf, unsigned int len) +{ + struct cache_appctx *ctx = appctx->svcctx; + struct cache_entry *cache_ptr = ctx->entry; + struct shared_block *first = block_ptr(cache_ptr); + struct cache_flt_conf *cconf = appctx->rule->arg.act.p[0]; + struct shared_context *shctx = shctx_ptr(cconf->c.cache); + struct shared_block *shblk; + unsigned int offset, sz; + unsigned int ret, total = 0; + + while (len && (ctx->sent != first->len - sizeof(*cache_ptr))) { + enum htx_blk_type type; + uint32_t info; + + shblk = ctx->next; + offset = ctx->offset; + if (ctx->rem_data) { + type = HTX_BLK_DATA; + info = 0; + goto add_data_blk; + } + + /* Get info of the next HTX block. May be split on 2 shblk */ + sz = MIN(4, shctx->block_size - offset); + memcpy((char *)&info, (const char *)shblk->data + offset, sz); + offset += sz; + if (sz < 4) { + shblk = LIST_NEXT(&shblk->list, typeof(shblk), list); + memcpy(((char *)&info)+sz, (const char *)shblk->data, 4 - sz); + offset = (4 - sz); + } + + /* Get payload of the next HTX block and insert it. */ + type = (info >> 28); + if (type == HTX_BLK_DATA) { + add_data_blk: + ret = ff_cache_dump_data_blk(appctx, buf, len, info, shblk, offset); + } + else + ret = 0; + + if (!ret) + break; + total += ret; + len -= ret; + + if (ctx->rem_data) + break; + } + + return total; +} + static int htx_cache_add_age_hdr(struct appctx *appctx, struct htx *htx) { struct cache_appctx *ctx = appctx->svcctx; @@ -1637,31 +1750,58 @@ static int htx_cache_add_age_hdr(struct appctx *appctx, struct htx *htx) return 1; } +static size_t http_cache_fastfwd(struct appctx *appctx, struct buffer *buf, size_t count, unsigned int flags) +{ + struct cache_appctx *ctx = appctx->svcctx; + struct cache_entry *cache_ptr = ctx->entry; + struct shared_block *first = block_ptr(cache_ptr); + size_t ret; + + BUG_ON(!appctx->to_forward || count > appctx->to_forward); + + ret = ff_cache_dump_msg(appctx, buf, count); + + if (!appctx->to_forward) { + se_fl_clr(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD); + applet_fl_clr(appctx, APPCTX_FL_FASTFWD); + if (ctx->sent == first->len - sizeof(*cache_ptr)) { + applet_set_eoi(appctx); + applet_set_eos(appctx); + appctx->st0 = HTX_CACHE_END; + } + } + return ret; +} + static void http_cache_io_handler(struct appctx *appctx) { struct cache_appctx *ctx = appctx->svcctx; struct cache_entry *cache_ptr = ctx->entry; struct shared_block *first = block_ptr(cache_ptr); - struct stconn *sc = appctx_sc(appctx); - struct channel *req = sc_oc(sc); - struct channel *res = sc_ic(sc); - struct htx *req_htx, *res_htx; + struct htx *res_htx = NULL; struct buffer *errmsg; unsigned int len; - size_t ret, total = 0; + size_t ret; - res_htx = htx_from_buf(&res->buf); - total = res_htx->data; + if (applet_fl_test(appctx, APPCTX_FL_OUTBLK_ALLOC|APPCTX_FL_OUTBLK_FULL)) + goto exit; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) - goto out; + if (applet_fl_test(appctx, APPCTX_FL_FASTFWD) && se_fl_test(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD)) + goto exit; - /* Check if the input buffer is available. */ - if (!b_size(&res->buf)) { - sc_need_room(sc, 0); - goto out; + if (!appctx_get_buf(appctx, &appctx->outbuf)) { + goto exit; + } + + if (unlikely(applet_fl_test(appctx, APPCTX_FL_EOS|APPCTX_FL_ERROR))) { + goto exit; } + res_htx = htx_from_buf(&appctx->outbuf); + + len = first->len - sizeof(*cache_ptr) - ctx->sent; + res_htx = htx_from_buf(&appctx->outbuf); + if (appctx->st0 == HTX_CACHE_INIT) { ctx->next = block_ptr(cache_ptr); ctx->offset = sizeof(*cache_ptr); @@ -1671,8 +1811,13 @@ static void http_cache_io_handler(struct appctx *appctx) } if (appctx->st0 == HTX_CACHE_HEADER) { + struct ist meth; + + if (unlikely(applet_fl_test(appctx, APPCTX_FL_INBLK_ALLOC))) { + goto exit; + } + /* Headers must be dump at once. Otherwise it is an error */ - len = first->len - sizeof(*cache_ptr) - ctx->sent; ret = htx_cache_dump_msg(appctx, res_htx, len, HTX_BLK_EOH); if (!ret || (htx_get_tail_type(res_htx) != HTX_BLK_EOH) || !htx_cache_add_age_hdr(appctx, res_htx)) @@ -1689,60 +1834,66 @@ static void http_cache_io_handler(struct appctx *appctx) /* Skip response body for HEAD requests or in case of "304 Not * Modified" response. */ - if (__sc_strm(sc)->txn->meth == HTTP_METH_HEAD || ctx->send_notmodified) + meth = htx_sl_req_meth(http_get_stline(htxbuf(&appctx->inbuf))); + if (find_http_meth(istptr(meth), istlen(meth)) == HTTP_METH_HEAD || ctx->send_notmodified) appctx->st0 = HTX_CACHE_EOM; - else + else { + if (!(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_APPLET)) + se_fl_set(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD); + + appctx->to_forward = cache_ptr->body_size; + len = first->len - sizeof(*cache_ptr) - ctx->sent; appctx->st0 = HTX_CACHE_DATA; + } } if (appctx->st0 == HTX_CACHE_DATA) { - len = first->len - sizeof(*cache_ptr) - ctx->sent; if (len) { ret = htx_cache_dump_msg(appctx, res_htx, len, HTX_BLK_UNUSED); if (ret < len) { - sc_need_room(sc, channel_htx_recv_max(res, res_htx) + 1); + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); goto out; } } + BUG_ON(appctx->to_forward); appctx->st0 = HTX_CACHE_EOM; } if (appctx->st0 == HTX_CACHE_EOM) { /* no more data are expected. */ res_htx->flags |= HTX_FL_EOM; - se_fl_set(appctx->sedesc, SE_FL_EOI); - + applet_set_eoi(appctx); + se_fl_clr(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD); + applet_fl_clr(appctx, APPCTX_FL_FASTFWD); appctx->st0 = HTX_CACHE_END; } end: - if (appctx->st0 == HTX_CACHE_END) - se_fl_set(appctx->sedesc, SE_FL_EOS); + if (appctx->st0 == HTX_CACHE_END) { + applet_set_eos(appctx); + } out: - total = res_htx->data - total; - if (total) - channel_add_input(res, total); - htx_to_buf(res_htx, &res->buf); + if (res_htx) + htx_to_buf(res_htx, &appctx->outbuf); + exit: /* eat the whole request */ - if (co_data(req)) { - req_htx = htx_from_buf(&req->buf); - co_htx_skip(req, req_htx, co_data(req)); - htx_to_buf(req_htx, &req->buf); - } + b_reset(&appctx->inbuf); + applet_fl_clr(appctx, APPCTX_FL_INBLK_FULL); + appctx->sedesc->iobuf.flags &= ~IOBUF_FL_FF_BLOCKED; return; error: /* Sent and HTTP error 500 */ - b_reset(&res->buf); + b_reset(&appctx->outbuf); errmsg = &http_err_chunks[HTTP_ERR_500]; - res->buf.data = b_data(errmsg); - memcpy(res->buf.area, b_head(errmsg), b_data(errmsg)); - res_htx = htx_from_buf(&res->buf); + appctx->outbuf.data = b_data(errmsg); + memcpy(appctx->outbuf.area, b_head(errmsg), b_data(errmsg)); + res_htx = htx_from_buf(&appctx->outbuf); - total = 0; - se_fl_set(appctx->sedesc, SE_FL_ERROR); + applet_set_eos(appctx); + applet_set_error(appctx); appctx->st0 = HTX_CACHE_END; goto end; } @@ -2324,7 +2475,7 @@ int post_check_cache() list_for_each_entry_safe(cache_config, back, &caches_config, list) { ret_shctx = shctx_init(&shctx, cache_config->maxblocks, CACHE_BLOCKSIZE, - cache_config->maxobjsz, sizeof(struct cache)); + cache_config->maxobjsz, sizeof(struct cache), cache_config->id); if (ret_shctx <= 0) { if (ret_shctx == SHCTX_E_INIT_LOCK) @@ -2995,9 +3146,13 @@ struct applet http_cache_applet = { .obj_type = OBJ_TYPE_APPLET, .name = "<CACHE>", /* used for logging */ .fct = http_cache_io_handler, + .rcv_buf = appctx_htx_rcv_buf, + .snd_buf = appctx_htx_snd_buf, + .fastfwd = http_cache_fastfwd, .release = http_cache_applet_release, }; + /* config parsers for this section */ REGISTER_CONFIG_SECTION("cache", cfg_parse_cache, cfg_post_parse_section_cache); REGISTER_POST_CHECK(post_check_cache); diff --git a/src/cfgparse-global.c b/src/cfgparse-global.c index f31e7a0..452c0e5 100644 --- a/src/cfgparse-global.c +++ b/src/cfgparse-global.c @@ -36,8 +36,7 @@ static const char *common_kw_list[] = { "insecure-fork-wanted", "insecure-setuid-wanted", "nosplice", "nogetaddrinfo", "noreuseport", "quiet", "zero-warning", "tune.runqueue-depth", "tune.maxpollevents", "tune.maxaccept", - "tune.recv_enough", "tune.buffers.limit", - "tune.buffers.reserve", "tune.bufsize", "tune.maxrewrite", + "tune.recv_enough", "tune.bufsize", "tune.maxrewrite", "tune.idletimer", "tune.rcvbuf.client", "tune.rcvbuf.server", "tune.sndbuf.client", "tune.sndbuf.server", "tune.pipesize", "tune.http.cookielen", "tune.http.logurilen", "tune.http.maxhdr", @@ -52,6 +51,7 @@ static const char *common_kw_list[] = { "presetenv", "unsetenv", "resetenv", "strict-limits", "localpeer", "numa-cpu-mapping", "defaults", "listen", "frontend", "backend", "peers", "resolvers", "cluster-secret", "no-quic", "limited-quic", + "stats-file", NULL /* must be last */ }; @@ -75,6 +75,9 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm) alertif_too_many_args(0, file, linenum, args, &err_code); goto out; } + else if (strcmp(args[0], "expose-deprecated-directives") == 0) { + deprecated_directives_allowed = 1; + } else if (strcmp(args[0], "expose-experimental-directives") == 0) { experimental_directives_allowed = 1; } @@ -263,36 +266,6 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm) } global.tune.recv_enough = atol(args[1]); } - else if (strcmp(args[0], "tune.buffers.limit") == 0) { - if (alertif_too_many_args(1, file, linenum, args, &err_code)) - goto out; - if (*(args[1]) == 0) { - ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]); - err_code |= ERR_ALERT | ERR_FATAL; - goto out; - } - global.tune.buf_limit = atol(args[1]); - if (global.tune.buf_limit) { - if (global.tune.buf_limit < 3) - global.tune.buf_limit = 3; - if (global.tune.buf_limit <= global.tune.reserved_bufs) - global.tune.buf_limit = global.tune.reserved_bufs + 1; - } - } - else if (strcmp(args[0], "tune.buffers.reserve") == 0) { - if (alertif_too_many_args(1, file, linenum, args, &err_code)) - goto out; - if (*(args[1]) == 0) { - ha_alert("parsing [%s:%d] : '%s' expects an integer argument.\n", file, linenum, args[0]); - err_code |= ERR_ALERT | ERR_FATAL; - goto out; - } - global.tune.reserved_bufs = atol(args[1]); - if (global.tune.reserved_bufs < 2) - global.tune.reserved_bufs = 2; - if (global.tune.buf_limit && global.tune.buf_limit <= global.tune.reserved_bufs) - global.tune.buf_limit = global.tune.reserved_bufs + 1; - } else if (strcmp(args[0], "tune.bufsize") == 0) { if (alertif_too_many_args(1, file, linenum, args, &err_code)) goto out; @@ -1028,6 +1001,21 @@ int cfg_parse_global(const char *file, int linenum, char **args, int kwm) global.server_state_file = strdup(args[1]); } + else if (strcmp(args[0], "stats-file") == 0) { /* path to the file where HAProxy can load the server states */ + if (global.stats_file != NULL) { + ha_alert("parsing [%s:%d] : '%s' already specified. Continuing.\n", file, linenum, args[0]); + err_code |= ERR_ALERT; + goto out; + } + + if (!*(args[1])) { + ha_alert("parsing [%s:%d] : '%s' expect one argument: a file path.\n", file, linenum, args[0]); + err_code |= ERR_FATAL; + goto out; + } + + global.stats_file = strdup(args[1]); + } else if (strcmp(args[0], "log-tag") == 0) { /* tag to report to syslog */ if (alertif_too_many_args(1, file, linenum, args, &err_code)) goto out; @@ -1388,8 +1376,59 @@ static int cfg_parse_prealloc_fd(char **args, int section_type, struct proxy *cu return 0; } +/* Parser for harden.reject-privileged-ports.{tcp|quic}. */ +static int cfg_parse_reject_privileged_ports(char **args, int section_type, + struct proxy *curpx, + const struct proxy *defpx, + const char *file, int line, char **err) +{ + struct ist proto; + char onoff; + + if (!*(args[1])) { + memprintf(err, "'%s' expects either 'on' or 'off'.", args[0]); + return -1; + } + + proto = ist(args[0]); + while (istlen(istfind(proto, '.'))) + proto = istadv(istfind(proto, '.'), 1); + + if (strcmp(args[1], "on") == 0) { + onoff = 1; + } + else if (strcmp(args[1], "off") == 0) { + onoff = 0; + } + else { + memprintf(err, "'%s' expects either 'on' or 'off'.", args[0]); + return -1; + } + + if (istmatch(proto, ist("tcp"))) { + if (!onoff) + global.clt_privileged_ports |= HA_PROTO_TCP; + else + global.clt_privileged_ports &= ~HA_PROTO_TCP; + } + else if (istmatch(proto, ist("quic"))) { + if (!onoff) + global.clt_privileged_ports |= HA_PROTO_QUIC; + else + global.clt_privileged_ports &= ~HA_PROTO_QUIC; + } + else { + memprintf(err, "invalid protocol for '%s'.", args[0]); + return -1; + } + + return 0; +} + static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "prealloc-fd", cfg_parse_prealloc_fd }, + { CFG_GLOBAL, "harden.reject-privileged-ports.tcp", cfg_parse_reject_privileged_ports }, + { CFG_GLOBAL, "harden.reject-privileged-ports.quic", cfg_parse_reject_privileged_ports }, { 0, NULL, NULL }, }}; diff --git a/src/cfgparse-listen.c b/src/cfgparse-listen.c index a97b1e5..9ee8174 100644 --- a/src/cfgparse-listen.c +++ b/src/cfgparse-listen.c @@ -1819,13 +1819,13 @@ int cfg_parse_listen(const char *file, int linenum, char **args, int kwm) if (!stats_check_init_uri_auth(&curproxy->uri_auth)) goto alloc_error; } else if (strcmp(args[1], "hide-version") == 0) { - if (!stats_set_flag(&curproxy->uri_auth, STAT_HIDEVER)) + if (!stats_set_flag(&curproxy->uri_auth, STAT_F_HIDEVER)) goto alloc_error; } else if (strcmp(args[1], "show-legends") == 0) { - if (!stats_set_flag(&curproxy->uri_auth, STAT_SHLGNDS)) + if (!stats_set_flag(&curproxy->uri_auth, STAT_F_SHLGNDS)) goto alloc_error; } else if (strcmp(args[1], "show-modules") == 0) { - if (!stats_set_flag(&curproxy->uri_auth, STAT_SHMODULES)) + if (!stats_set_flag(&curproxy->uri_auth, STAT_F_SHMODULES)) goto alloc_error; } else if (strcmp(args[1], "show-node") == 0) { @@ -2096,33 +2096,27 @@ stats_error_parsing: if (alertif_too_many_args_idx(1, 1, file, linenum, args, &err_code)) goto out; } - if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) { + if (curproxy->logformat.str && curproxy->cap & PR_CAP_DEF) { char *oldlogformat = "log-format"; char *clflogformat = ""; - if (curproxy->conf.logformat_string == default_http_log_format) + if (curproxy->logformat.str == default_http_log_format) oldlogformat = "option httplog"; - else if (curproxy->conf.logformat_string == default_tcp_log_format) + else if (curproxy->logformat.str == default_tcp_log_format) oldlogformat = "option tcplog"; - else if (curproxy->conf.logformat_string == clf_http_log_format) + else if (curproxy->logformat.str == clf_http_log_format) oldlogformat = "option httplog clf"; - else if (curproxy->conf.logformat_string == default_https_log_format) + else if (curproxy->logformat.str == default_https_log_format) oldlogformat = "option httpslog"; if (logformat == clf_http_log_format) clflogformat = " clf"; ha_warning("parsing [%s:%d]: 'option httplog%s' overrides previous '%s' in 'defaults' section.\n", file, linenum, clflogformat, oldlogformat); } - if (curproxy->conf.logformat_string != default_http_log_format && - curproxy->conf.logformat_string != default_tcp_log_format && - curproxy->conf.logformat_string != clf_http_log_format && - curproxy->conf.logformat_string != default_https_log_format) - free(curproxy->conf.logformat_string); - curproxy->conf.logformat_string = logformat; - - free(curproxy->conf.lfs_file); - curproxy->conf.lfs_file = strdup(curproxy->conf.args.file); - curproxy->conf.lfs_line = curproxy->conf.args.line; + lf_expr_deinit(&curproxy->logformat); + curproxy->logformat.str = logformat; + curproxy->logformat.conf.file = strdup(curproxy->conf.args.file); + curproxy->logformat.conf.line = curproxy->conf.args.line; if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) { ha_warning("parsing [%s:%d] : backend '%s' : 'option httplog' directive is ignored in backends.\n", @@ -2131,31 +2125,25 @@ stats_error_parsing: } } else if (strcmp(args[1], "tcplog") == 0) { - if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) { + if (curproxy->logformat.str && curproxy->cap & PR_CAP_DEF) { char *oldlogformat = "log-format"; - if (curproxy->conf.logformat_string == default_http_log_format) + if (curproxy->logformat.str == default_http_log_format) oldlogformat = "option httplog"; - else if (curproxy->conf.logformat_string == default_tcp_log_format) + else if (curproxy->logformat.str == default_tcp_log_format) oldlogformat = "option tcplog"; - else if (curproxy->conf.logformat_string == clf_http_log_format) + else if (curproxy->logformat.str == clf_http_log_format) oldlogformat = "option httplog clf"; - else if (curproxy->conf.logformat_string == default_https_log_format) + else if (curproxy->logformat.str == default_https_log_format) oldlogformat = "option httpslog"; ha_warning("parsing [%s:%d]: 'option tcplog' overrides previous '%s' in 'defaults' section.\n", file, linenum, oldlogformat); } /* generate a detailed TCP log */ - if (curproxy->conf.logformat_string != default_http_log_format && - curproxy->conf.logformat_string != default_tcp_log_format && - curproxy->conf.logformat_string != clf_http_log_format && - curproxy->conf.logformat_string != default_https_log_format) - free(curproxy->conf.logformat_string); - curproxy->conf.logformat_string = default_tcp_log_format; - - free(curproxy->conf.lfs_file); - curproxy->conf.lfs_file = strdup(curproxy->conf.args.file); - curproxy->conf.lfs_line = curproxy->conf.args.line; + lf_expr_deinit(&curproxy->logformat); + curproxy->logformat.str = default_tcp_log_format; + curproxy->logformat.conf.file = strdup(curproxy->conf.args.file); + curproxy->logformat.conf.line = curproxy->conf.args.line; if (alertif_too_many_args_idx(0, 1, file, linenum, args, &err_code)) goto out; @@ -2170,30 +2158,24 @@ stats_error_parsing: char *logformat; /* generate a complete HTTP log */ logformat = default_https_log_format; - if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) { + if (curproxy->logformat.str && curproxy->cap & PR_CAP_DEF) { char *oldlogformat = "log-format"; - if (curproxy->conf.logformat_string == default_http_log_format) + if (curproxy->logformat.str == default_http_log_format) oldlogformat = "option httplog"; - else if (curproxy->conf.logformat_string == default_tcp_log_format) + else if (curproxy->logformat.str == default_tcp_log_format) oldlogformat = "option tcplog"; - else if (curproxy->conf.logformat_string == clf_http_log_format) + else if (curproxy->logformat.str == clf_http_log_format) oldlogformat = "option httplog clf"; - else if (curproxy->conf.logformat_string == default_https_log_format) + else if (curproxy->logformat.str == default_https_log_format) oldlogformat = "option httpslog"; ha_warning("parsing [%s:%d]: 'option httplog' overrides previous '%s' in 'defaults' section.\n", file, linenum, oldlogformat); } - if (curproxy->conf.logformat_string != default_http_log_format && - curproxy->conf.logformat_string != default_tcp_log_format && - curproxy->conf.logformat_string != clf_http_log_format && - curproxy->conf.logformat_string != default_https_log_format) - free(curproxy->conf.logformat_string); - curproxy->conf.logformat_string = logformat; - - free(curproxy->conf.lfs_file); - curproxy->conf.lfs_file = strdup(curproxy->conf.args.file); - curproxy->conf.lfs_line = curproxy->conf.args.line; + lf_expr_deinit(&curproxy->logformat); + curproxy->logformat.str = logformat; + curproxy->logformat.conf.file = strdup(curproxy->conf.args.file); + curproxy->logformat.conf.line = curproxy->conf.args.line; if (!(curproxy->cap & PR_CAP_DEF) && !(curproxy->cap & PR_CAP_FE)) { ha_warning("parsing [%s:%d] : backend '%s' : 'option httpslog' directive is ignored in backends.\n", @@ -2591,14 +2573,12 @@ stats_error_parsing: err_code |= ERR_ALERT | ERR_FATAL; goto out; } - free(curproxy->conf.uniqueid_format_string); - curproxy->conf.uniqueid_format_string = strdup(args[1]); - if (!curproxy->conf.uniqueid_format_string) + lf_expr_deinit(&curproxy->format_unique_id); + curproxy->format_unique_id.str = strdup(args[1]); + if (!curproxy->format_unique_id.str) goto alloc_error; - - free(curproxy->conf.uif_file); - curproxy->conf.uif_file = strdup(curproxy->conf.args.file); - curproxy->conf.uif_line = curproxy->conf.args.line; + curproxy->format_unique_id.conf.file = strdup(curproxy->conf.args.file); + curproxy->format_unique_id.conf.line = curproxy->conf.args.line; } else if (strcmp(args[0], "unique-id-header") == 0) { @@ -2630,32 +2610,26 @@ stats_error_parsing: err_code |= ERR_ALERT | ERR_FATAL; goto out; } - if (curproxy->conf.logformat_string && curproxy->cap & PR_CAP_DEF) { + if (curproxy->logformat.str && curproxy->cap & PR_CAP_DEF) { char *oldlogformat = "log-format"; - if (curproxy->conf.logformat_string == default_http_log_format) + if (curproxy->logformat.str == default_http_log_format) oldlogformat = "option httplog"; - else if (curproxy->conf.logformat_string == default_tcp_log_format) + else if (curproxy->logformat.str == default_tcp_log_format) oldlogformat = "option tcplog"; - else if (curproxy->conf.logformat_string == clf_http_log_format) + else if (curproxy->logformat.str == clf_http_log_format) oldlogformat = "option httplog clf"; - else if (curproxy->conf.logformat_string == default_https_log_format) + else if (curproxy->logformat.str == default_https_log_format) oldlogformat = "option httpslog"; ha_warning("parsing [%s:%d]: 'log-format' overrides previous '%s' in 'defaults' section.\n", file, linenum, oldlogformat); } - if (curproxy->conf.logformat_string != default_http_log_format && - curproxy->conf.logformat_string != default_tcp_log_format && - curproxy->conf.logformat_string != clf_http_log_format && - curproxy->conf.logformat_string != default_https_log_format) - free(curproxy->conf.logformat_string); - curproxy->conf.logformat_string = strdup(args[1]); - if (!curproxy->conf.logformat_string) + lf_expr_deinit(&curproxy->logformat); + curproxy->logformat.str = strdup(args[1]); + if (!curproxy->logformat.str) goto alloc_error; - - free(curproxy->conf.lfs_file); - curproxy->conf.lfs_file = strdup(curproxy->conf.args.file); - curproxy->conf.lfs_line = curproxy->conf.args.line; + curproxy->logformat.conf.file = strdup(curproxy->conf.args.file); + curproxy->logformat.conf.line = curproxy->conf.args.line; /* get a chance to improve log-format error reporting by * reporting the correct line-number when possible. @@ -2678,15 +2652,12 @@ stats_error_parsing: goto out; } - if (curproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format) - free(curproxy->conf.logformat_sd_string); - curproxy->conf.logformat_sd_string = strdup(args[1]); - if (!curproxy->conf.logformat_sd_string) + lf_expr_deinit(&curproxy->logformat_sd); + curproxy->logformat_sd.str = strdup(args[1]); + if (!curproxy->logformat_sd.str) goto alloc_error; - - free(curproxy->conf.lfsd_file); - curproxy->conf.lfsd_file = strdup(curproxy->conf.args.file); - curproxy->conf.lfsd_line = curproxy->conf.args.line; + curproxy->logformat_sd.conf.file = strdup(curproxy->conf.args.file); + curproxy->logformat_sd.conf.line = curproxy->conf.args.line; /* get a chance to improve log-format-sd error reporting by * reporting the correct line-number when possible. @@ -2708,18 +2679,17 @@ stats_error_parsing: err_code |= ERR_ALERT | ERR_FATAL; goto out; } - if (curproxy->conf.error_logformat_string && curproxy->cap & PR_CAP_DEF) { + if (curproxy->logformat_error.str && curproxy->cap & PR_CAP_DEF) { ha_warning("parsing [%s:%d]: 'error-log-format' overrides previous 'error-log-format' in 'defaults' section.\n", file, linenum); } - free(curproxy->conf.error_logformat_string); - curproxy->conf.error_logformat_string = strdup(args[1]); - if (!curproxy->conf.error_logformat_string) + lf_expr_deinit(&curproxy->logformat_error); + curproxy->logformat_error.str = strdup(args[1]); + if (!curproxy->logformat_error.str) goto alloc_error; - free(curproxy->conf.elfs_file); - curproxy->conf.elfs_file = strdup(curproxy->conf.args.file); - curproxy->conf.elfs_line = curproxy->conf.args.line; + curproxy->logformat_error.conf.file = strdup(curproxy->conf.args.file); + curproxy->logformat_error.conf.line = curproxy->conf.args.line;; /* get a chance to improve log-format error reporting by * reporting the correct line-number when possible. diff --git a/src/cfgparse-quic.c b/src/cfgparse-quic.c index 3b38efa..4a23bf2 100644 --- a/src/cfgparse-quic.c +++ b/src/cfgparse-quic.c @@ -235,6 +235,8 @@ static int cfg_parse_quic_tune_setting(char **args, int section_type, suffix = args[0] + prefix_len; if (strcmp(suffix, "frontend.conn-tx-buffers.limit") == 0) global.tune.quic_streams_buf = arg; + else if (strcmp(suffix, "frontend.glitches-threshold") == 0) + global.tune.quic_frontend_glitches_threshold = arg; else if (strcmp(suffix, "frontend.max-streams-bidi") == 0) global.tune.quic_frontend_max_streams_bidi = arg; else if (strcmp(suffix, "max-frame-loss") == 0) @@ -257,35 +259,56 @@ static int cfg_parse_quic_tune_setting(char **args, int section_type, return 0; } -/* config parser for global "tune.quic.zero-copy-fwd-send" */ -static int cfg_parse_quic_zero_copy_fwd_snd(char **args, int section_type, struct proxy *curpx, - const struct proxy *defpx, const char *file, int line, - char **err) +/* config parser for global "tune.quic.* {on|off}" */ +static int cfg_parse_quic_tune_on_off(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) { + int on; + int prefix_len = strlen("tune.quic."); + const char *suffix; + if (too_many_args(1, args, err, NULL)) return -1; if (strcmp(args[1], "on") == 0) - global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_QUIC_SND; + on = 1; else if (strcmp(args[1], "off") == 0) - global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_QUIC_SND; + on = 0; else { memprintf(err, "'%s' expects 'on' or 'off'.", args[0]); return -1; } + + suffix = args[0] + prefix_len; + if (strcmp(suffix, "zero-copy-fwd-send") == 0 ) { + if (on) + global.tune.no_zero_copy_fwd &= ~NO_ZERO_COPY_FWD_QUIC_SND; + else + global.tune.no_zero_copy_fwd |= NO_ZERO_COPY_FWD_QUIC_SND; + } + else if (strcmp(suffix, "cc-hystart") == 0) { + if (on) + global.tune.options |= GTUNE_QUIC_CC_HYSTART; + else + global.tune.options &= ~GTUNE_QUIC_CC_HYSTART; + } + return 0; } static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "tune.quic.socket-owner", cfg_parse_quic_tune_socket_owner }, { CFG_GLOBAL, "tune.quic.backend.max-idle-timeou", cfg_parse_quic_time }, + { CFG_GLOBAL, "tune.quic.cc-hystart", cfg_parse_quic_tune_on_off }, { CFG_GLOBAL, "tune.quic.frontend.conn-tx-buffers.limit", cfg_parse_quic_tune_setting }, + { CFG_GLOBAL, "tune.quic.frontend.glitches-threshold", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.frontend.max-streams-bidi", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.frontend.max-idle-timeout", cfg_parse_quic_time }, { CFG_GLOBAL, "tune.quic.max-frame-loss", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.reorder-ratio", cfg_parse_quic_tune_setting }, { CFG_GLOBAL, "tune.quic.retry-threshold", cfg_parse_quic_tune_setting }, - { CFG_GLOBAL, "tune.quic.zero-copy-fwd-send", cfg_parse_quic_zero_copy_fwd_snd }, + { CFG_GLOBAL, "tune.quic.zero-copy-fwd-send", cfg_parse_quic_tune_on_off }, { 0, NULL, NULL } }}; diff --git a/src/cfgparse-ssl.c b/src/cfgparse-ssl.c index 5666336..e7a7d47 100644 --- a/src/cfgparse-ssl.c +++ b/src/cfgparse-ssl.c @@ -777,22 +777,23 @@ static int bind_parse_ciphersuites(char **args, int cur_arg, struct proxy *px, s static int bind_parse_crt(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err) { char path[MAXPATHLEN]; + int default_crt = *args[cur_arg] == 'd' ? 1 : 0; if (!*args[cur_arg + 1]) { memprintf(err, "'%s' : missing certificate location", args[cur_arg]); return ERR_ALERT | ERR_FATAL; } - if ((*args[cur_arg + 1] != '/' ) && global_ssl.crt_base) { + if ((*args[cur_arg + 1] != '@') && (*args[cur_arg + 1] != '/' ) && global_ssl.crt_base) { if ((strlen(global_ssl.crt_base) + 1 + strlen(args[cur_arg + 1]) + 1) > sizeof(path) || snprintf(path, sizeof(path), "%s/%s", global_ssl.crt_base, args[cur_arg + 1]) > sizeof(path)) { memprintf(err, "'%s' : path too long", args[cur_arg]); return ERR_ALERT | ERR_FATAL; } - return ssl_sock_load_cert(path, conf, err); + return ssl_sock_load_cert(path, conf, default_crt, err); } - return ssl_sock_load_cert(args[cur_arg + 1], conf, err); + return ssl_sock_load_cert(args[cur_arg + 1], conf, default_crt, err); } /* parse the "crt-list" bind keyword. Returns a set of ERR_* flags possibly with an error in <err>. */ @@ -1472,35 +1473,6 @@ static int bind_parse_no_ca_names(char **args, int cur_arg, struct proxy *px, st return ssl_bind_parse_no_ca_names(args, cur_arg, px, &conf->ssl_conf, 0, err); } - -static int ssl_bind_parse_ocsp_update(char **args, int cur_arg, struct proxy *px, - struct ssl_bind_conf *ssl_conf, int from_cli, char **err) -{ - if (!*args[cur_arg + 1]) { - memprintf(err, "'%s' : expecting <on|off>", args[cur_arg]); - return ERR_ALERT | ERR_FATAL; - } - - if (strcmp(args[cur_arg + 1], "on") == 0) - ssl_conf->ocsp_update = SSL_SOCK_OCSP_UPDATE_ON; - else if (strcmp(args[cur_arg + 1], "off") == 0) - ssl_conf->ocsp_update = SSL_SOCK_OCSP_UPDATE_OFF; - else { - memprintf(err, "'%s' : expecting <on|off>", args[cur_arg]); - return ERR_ALERT | ERR_FATAL; - } - - if (ssl_conf->ocsp_update == SSL_SOCK_OCSP_UPDATE_ON) { - /* We might need to create the main ocsp update task */ - int ret = ssl_create_ocsp_update_task(err); - if (ret) - return ret; - } - - return 0; -} - - /***************************** "server" keywords Parsing ********************************************/ /* parse the "npn" bind keyword */ @@ -1827,7 +1799,7 @@ static int srv_parse_crt(char **args, int *cur_arg, struct proxy *px, struct ser return ERR_ALERT | ERR_FATAL; } - if ((*args[*cur_arg + 1] != '/') && global_ssl.crt_base) + if ((*args[*cur_arg + 1] != '@') && (*args[*cur_arg + 1] != '/') && global_ssl.crt_base) memprintf(&newsrv->ssl_ctx.client_crt, "%s/%s", global_ssl.crt_base, args[*cur_arg + 1]); else memprintf(&newsrv->ssl_ctx.client_crt, "%s", args[*cur_arg + 1]); @@ -2092,16 +2064,23 @@ static int ssl_parse_default_server_options(char **args, int section_type, struc return 0; } -/* parse the "ca-base" / "crt-base" keywords in global section. +/* parse the "ca-base" / "crt-base" / "key-base" keywords in global section. * Returns <0 on alert, >0 on warning, 0 on success. */ -static int ssl_parse_global_ca_crt_base(char **args, int section_type, struct proxy *curpx, +static int ssl_parse_global_path_base(char **args, int section_type, struct proxy *curpx, const struct proxy *defpx, const char *file, int line, char **err) { char **target; - target = (args[0][1] == 'a') ? &global_ssl.ca_base : &global_ssl.crt_base; + if (args[0][1] == 'a') + target = &global_ssl.ca_base; + else if (args[0][1] == 'r') + target = &global_ssl.crt_base; + else if (args[0][1] == 'e') + target = &global_ssl.key_base; + else + return -1; if (too_many_args(1, args, err, NULL)) return -1; @@ -2119,77 +2098,56 @@ static int ssl_parse_global_ca_crt_base(char **args, int section_type, struct pr return 0; } -/* parse the "ssl-skip-self-issued-ca" keyword in global section. */ -static int ssl_parse_skip_self_issued_ca(char **args, int section_type, struct proxy *curpx, - const struct proxy *defpx, const char *file, int line, +/* parse the "ssl-security-level" keyword in global section. */ +static int ssl_parse_security_level(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int linenum, char **err) { -#ifdef SSL_CTX_build_cert_chain - global_ssl.skip_self_issued_ca = 1; - return 0; -#else - memprintf(err, "global statement '%s' requires at least OpenSSL 1.0.2.", args[0]); +#ifndef HAVE_SSL_SET_SECURITY_LEVEL + memprintf(err, "global statement '%s' requires at least OpenSSL 1.1.1.", args[0]); return -1; -#endif -} - - -static int ssl_parse_global_ocsp_maxdelay(char **args, int section_type, struct proxy *curpx, - const struct proxy *defpx, const char *file, int line, - char **err) -{ - int value = 0; +#else + char *endptr; - if (*(args[1]) == 0) { - memprintf(err, "'%s' expects an integer argument.", args[0]); + if (!*args[1]) { + ha_alert("parsing [%s:%d] : '%s' : missing value\n", file, linenum, args[0]); return -1; } - value = atoi(args[1]); - if (value < 0) { - memprintf(err, "'%s' expects a positive numeric value.", args[0]); + global_ssl.security_level = strtol(args[1], &endptr, 10); + if (*endptr != '\0') { + ha_alert("parsing [%s:%d] : '%s' : expects an integer argument, found '%s'\n", + file, linenum, args[0], args[1]); return -1; } - if (global_ssl.ocsp_update.delay_min > value) { - memprintf(err, "'%s' can not be lower than tune.ssl.ocsp-update.mindelay.", args[0]); + if (global_ssl.security_level < 0 || global_ssl.security_level > 5) { + ha_alert("parsing [%s:%d] : '%s' : expects a value between 0 and 5\n", + file, linenum, args[0]); return -1; } - - global_ssl.ocsp_update.delay_max = value; +#endif return 0; } -static int ssl_parse_global_ocsp_mindelay(char **args, int section_type, struct proxy *curpx, - const struct proxy *defpx, const char *file, int line, - char **err) +/* parse the "ssl-skip-self-issued-ca" keyword in global section. */ +static int ssl_parse_skip_self_issued_ca(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) { - int value = 0; - - if (*(args[1]) == 0) { - memprintf(err, "'%s' expects an integer argument.", args[0]); - return -1; - } - - value = atoi(args[1]); - if (value < 0) { - memprintf(err, "'%s' expects a positive numeric value.", args[0]); - return -1; - } - - if (value > global_ssl.ocsp_update.delay_max) { - memprintf(err, "'%s' can not be higher than tune.ssl.ocsp-update.maxdelay.", args[0]); - return -1; - } - - global_ssl.ocsp_update.delay_min = value; - +#ifdef SSL_CTX_build_cert_chain + global_ssl.skip_self_issued_ca = 1; return 0; +#else + memprintf(err, "global statement '%s' requires at least OpenSSL 1.0.2.", args[0]); + return -1; +#endif } + /* Note: must not be declared <const> as its list will be overwritten. * Please take care of keeping this list alphabetically sorted, doing so helps * all code contributors. @@ -2199,7 +2157,12 @@ static int ssl_parse_global_ocsp_mindelay(char **args, int section_type, struct */ /* the <ssl_crtlist_kws> keywords are used for crt-list parsing, they *MUST* be safe - * with their proxy argument NULL and must only fill the ssl_bind_conf */ + * with their proxy argument NULL and must only fill the ssl_bind_conf + * + * /!\ Please update configuration.txt at the crt-list option of the Bind options + * section when adding a keyword in ssl_crtlist_kws. /!\ + * + */ struct ssl_crtlist_kw ssl_crtlist_kws[] = { { "allow-0rtt", ssl_bind_parse_allow_0rtt, 0 }, /* allow 0-RTT */ { "alpn", ssl_bind_parse_alpn, 1 }, /* set ALPN supported protocols */ @@ -2218,7 +2181,6 @@ struct ssl_crtlist_kw ssl_crtlist_kws[] = { { "ssl-min-ver", ssl_bind_parse_tls_method_minmax,1 }, /* minimum version */ { "ssl-max-ver", ssl_bind_parse_tls_method_minmax,1 }, /* maximum version */ { "verify", ssl_bind_parse_verify, 1 }, /* set SSL verify method */ - { "ocsp-update", ssl_bind_parse_ocsp_update, 1 }, /* ocsp update mode (on or off) */ { NULL, NULL, 0 }, }; @@ -2240,6 +2202,7 @@ static struct bind_kw_list bind_kws = { "SSL", { }, { { "crt-ignore-err", bind_parse_ignore_err, 1 }, /* set error IDs to ignore on verify depth == 0 */ { "crt-list", bind_parse_crt_list, 1 }, /* load a list of crt from this location */ { "curves", bind_parse_curves, 1 }, /* set SSL curve suite */ + { "default-crt", bind_parse_crt, 1 }, /* load SSL certificates from this location */ { "ecdhe", bind_parse_ecdhe, 1 }, /* defines named curve for elliptic curve Diffie-Hellman */ { "force-sslv3", bind_parse_tls_method_options, 0 }, /* force SSLv3 */ { "force-tlsv10", bind_parse_tls_method_options, 0 }, /* force TLSv10 */ @@ -2323,8 +2286,9 @@ static struct srv_kw_list srv_kws = { "SSL", { }, { INITCALL1(STG_REGISTER, srv_register_keywords, &srv_kws); static struct cfg_kw_list cfg_kws = {ILH, { - { CFG_GLOBAL, "ca-base", ssl_parse_global_ca_crt_base }, - { CFG_GLOBAL, "crt-base", ssl_parse_global_ca_crt_base }, + { CFG_GLOBAL, "ca-base", ssl_parse_global_path_base }, + { CFG_GLOBAL, "crt-base", ssl_parse_global_path_base }, + { CFG_GLOBAL, "key-base", ssl_parse_global_path_base }, { CFG_GLOBAL, "issuers-chain-path", ssl_load_global_issuers_from_path }, { CFG_GLOBAL, "maxsslconn", ssl_parse_global_int }, { CFG_GLOBAL, "ssl-default-bind-options", ssl_parse_default_bind_options }, @@ -2341,6 +2305,7 @@ static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "ssl-provider", ssl_parse_global_ssl_provider }, { CFG_GLOBAL, "ssl-provider-path", ssl_parse_global_ssl_provider_path }, #endif + { CFG_GLOBAL, "ssl-security-level", ssl_parse_security_level }, { CFG_GLOBAL, "ssl-skip-self-issued-ca", ssl_parse_skip_self_issued_ca }, { CFG_GLOBAL, "tune.ssl.cachesize", ssl_parse_global_int }, #ifndef OPENSSL_NO_DH @@ -2372,10 +2337,6 @@ static struct cfg_kw_list cfg_kws = {ILH, { { CFG_GLOBAL, "ssl-default-server-ciphersuites", ssl_parse_global_ciphersuites }, { CFG_GLOBAL, "ssl-load-extra-files", ssl_parse_global_extra_files }, { CFG_GLOBAL, "ssl-load-extra-del-ext", ssl_parse_global_extra_noext }, -#ifndef OPENSSL_NO_OCSP - { CFG_GLOBAL, "tune.ssl.ocsp-update.maxdelay", ssl_parse_global_ocsp_maxdelay }, - { CFG_GLOBAL, "tune.ssl.ocsp-update.mindelay", ssl_parse_global_ocsp_mindelay }, -#endif { 0, NULL, NULL }, }}; diff --git a/src/cfgparse-tcp.c b/src/cfgparse-tcp.c index a4f6f29..2f68daf 100644 --- a/src/cfgparse-tcp.c +++ b/src/cfgparse-tcp.c @@ -169,6 +169,8 @@ static int bind_parse_namespace(char **args, int cur_arg, struct proxy *px, stru ha_alert("Cannot open namespace '%s'.\n", args[cur_arg + 1]); return ERR_ALERT | ERR_FATAL; } + global.last_checks |= LSTCHK_SYSADM; + return 0; } #endif diff --git a/src/cfgparse.c b/src/cfgparse.c index bee3040..f5cde50 100644 --- a/src/cfgparse.c +++ b/src/cfgparse.c @@ -64,6 +64,7 @@ #include <haproxy/lb_fwlc.h> #include <haproxy/lb_fwrr.h> #include <haproxy/lb_map.h> +#include <haproxy/lb_ss.h> #include <haproxy/listener.h> #include <haproxy/log.h> #include <haproxy/sink.h> @@ -633,8 +634,6 @@ static struct peer *cfg_peers_add_peer(struct peers *peers, p->conf.file = strdup(file); p->conf.line = linenum; p->last_change = ns_to_sec(now_ns); - p->xprt = xprt_get(XPRT_RAW); - p->sock_init_arg = NULL; HA_SPIN_INIT(&p->lock); if (id) p->id = strdup(id); @@ -659,6 +658,7 @@ static struct peer *cfg_peers_add_peer(struct peers *peers, int cfg_parse_peers(const char *file, int linenum, char **args, int kwm) { static struct peers *curpeers = NULL; + static struct sockaddr_storage *bind_addr = NULL; static int nb_shards = 0; struct peer *newpeer = NULL; const char *err; @@ -729,12 +729,20 @@ int cfg_parse_peers(const char *file, int linenum, char **args, int kwm) * Newly allocated listener is at the end of the list */ l = LIST_ELEM(bind_conf->listeners.p, typeof(l), by_bind); + bind_addr = &l->rx.addr; global.maxsock++; /* for the listening socket */ bind_line = 1; if (cfg_peers->local) { + /* Local peer already defined using "server" line has no + * address yet, we should update its server's addr:port + * settings + */ newpeer = cfg_peers->local; + BUG_ON(!newpeer->srv); + newpeer->srv->addr = *bind_addr; + newpeer->srv->svc_port = get_host_port(bind_addr); } else { /* This peer is local. @@ -747,8 +755,6 @@ int cfg_parse_peers(const char *file, int linenum, char **args, int kwm) goto out; } } - newpeer->addr = l->rx.addr; - newpeer->proto = l->rx.proto; cur_arg++; } @@ -779,6 +785,7 @@ int cfg_parse_peers(const char *file, int linenum, char **args, int kwm) else if (strcmp(args[0], "peers") == 0) { /* new peers section */ /* Initialize these static variables when entering a new "peers" section*/ bind_line = peer_line = 0; + bind_addr = NULL; if (!*args[1]) { ha_alert("parsing [%s:%d] : missing name for peers section.\n", file, linenum); err_code |= ERR_ALERT | ERR_ABORT; @@ -889,6 +896,15 @@ int cfg_parse_peers(const char *file, int linenum, char **args, int kwm) goto out; } + if (!parse_addr && bind_addr) { + /* local peer declared using "server": has name but no + * address: we use the known "bind" line addr settings + * as implicit server's addr and port. + */ + curpeers->peers_fe->srv->addr = *bind_addr; + curpeers->peers_fe->srv->svc_port = get_host_port(bind_addr); + } + if (nb_shards && curpeers->peers_fe->srv->shard > nb_shards) { ha_warning("parsing [%s:%d] : '%s %s' : %d peer shard greater value than %d shards value is ignored.\n", file, linenum, args[0], args[1], curpeers->peers_fe->srv->shard, nb_shards); @@ -902,16 +918,6 @@ int cfg_parse_peers(const char *file, int linenum, char **args, int kwm) err_code |= ERR_WARN; } - /* If the peer address has just been parsed, let's copy it to <newpeer> - * and initializes ->proto. - */ - if (peer || !local_peer) { - newpeer->addr = curpeers->peers_fe->srv->addr; - newpeer->proto = protocol_lookup(newpeer->addr.ss_family, PROTO_TYPE_STREAM, 0); - } - - newpeer->xprt = xprt_get(XPRT_RAW); - newpeer->sock_init_arg = NULL; HA_SPIN_INIT(&newpeer->lock); newpeer->srv = curpeers->peers_fe->srv; @@ -2699,7 +2705,6 @@ static int numa_detect_topology() int check_config_validity() { int cfgerr = 0; - struct proxy *curproxy = NULL; struct proxy *init_proxies_list = NULL; struct stktable *t; struct server *newsrv = NULL; @@ -2725,6 +2730,13 @@ int check_config_validity() if (!global.tune.requri_len) global.tune.requri_len = REQURI_LEN; + if (!global.thread_limit) + global.thread_limit = MAX_THREADS; + +#if defined(USE_THREAD) + if (thread_cpus_enabled_at_boot > global.thread_limit) + thread_cpus_enabled_at_boot = global.thread_limit; +#endif if (!global.nbthread) { /* nbthread not set, thus automatic. In this case, and only if * running on a single process, we enable the same number of @@ -2748,13 +2760,24 @@ int check_config_validity() global.nbtgroups = 1; if (global.nbthread > MAX_THREADS_PER_GROUP * global.nbtgroups) { - ha_diag_warning("nbthread not set, found %d CPUs, limiting to %d threads (maximum is %d per thread group). Please set nbthreads and/or increase thread-groups in the global section to silence this warning.\n", - global.nbthread, MAX_THREADS_PER_GROUP * global.nbtgroups, MAX_THREADS_PER_GROUP); + if (global.nbthread <= global.thread_limit) + ha_diag_warning("nbthread not set, found %d CPUs, limiting to %d threads (maximum is %d per thread group). " + "Please set nbthreads and/or increase thread-groups in the global section to silence this warning.\n", + global.nbthread, MAX_THREADS_PER_GROUP * global.nbtgroups, MAX_THREADS_PER_GROUP); global.nbthread = MAX_THREADS_PER_GROUP * global.nbtgroups; } + + if (global.nbthread > global.thread_limit) + global.nbthread = global.thread_limit; } #endif } + else if (global.nbthread > global.thread_limit) { + ha_warning("nbthread forced to a higher value (%d) than the configured thread-hard-limit (%d), enforcing the limit. " + "Please fix either value to remove this warning.\n", + global.nbthread, global.thread_limit); + global.nbthread = global.thread_limit; + } if (!global.nbtgroups) global.nbtgroups = 1; @@ -2879,8 +2902,7 @@ init_proxies_list_stage1: #ifdef USE_OPENSSL /* no-alpn ? If so, it's the right moment to remove it */ if (bind_conf->ssl_conf.alpn_str && !bind_conf->ssl_conf.alpn_len) { - free(bind_conf->ssl_conf.alpn_str); - bind_conf->ssl_conf.alpn_str = NULL; + ha_free(&bind_conf->ssl_conf.alpn_str); } #ifdef TLSEXT_TYPE_application_layer_protocol_negotiation else if (!bind_conf->ssl_conf.alpn_str && !bind_conf->ssl_conf.npn_str && @@ -2934,6 +2956,12 @@ init_proxies_list_stage1: if (err_code & ERR_FATAL) goto out; } + + if (bind_generate_guid(bind_conf)) { + cfgerr++; + err_code |= ERR_FATAL | ERR_ALERT; + goto out; + } } switch (curproxy->mode) { @@ -3120,6 +3148,12 @@ init_proxies_list_stage1: curproxy->id); err_code |= ERR_WARN; } + if (target->mode == PR_MODE_HTTP) { + /* at least one of the used backends will provoke an + * HTTP upgrade + */ + curproxy->options |= PR_O_HTTP_UPG; + } } } @@ -3135,7 +3169,7 @@ init_proxies_list_stage1: * parsing is cancelled and be.name is restored to be resolved. */ pxname = rule->be.name; - LIST_INIT(&rule->be.expr); + lf_expr_init(&rule->be.expr); curproxy->conf.args.ctx = ARGC_UBK; curproxy->conf.args.file = rule->file; curproxy->conf.args.line = rule->line; @@ -3147,20 +3181,23 @@ init_proxies_list_stage1: cfgerr++; continue; } - node = LIST_NEXT(&rule->be.expr, struct logformat_node *, list); + node = LIST_NEXT(&rule->be.expr.nodes.list, struct logformat_node *, list); - if (!LIST_ISEMPTY(&rule->be.expr)) { - if (node->type != LOG_FMT_TEXT || node->list.n != &rule->be.expr) { + if (!lf_expr_isempty(&rule->be.expr)) { + if (node->type != LOG_FMT_TEXT || node->list.n != &rule->be.expr.nodes.list) { rule->dynamic = 1; free(pxname); + /* backend is not yet known so we cannot assume its type, + * thus we should consider that at least one of the used + * backends may provoke HTTP upgrade + */ + curproxy->options |= PR_O_HTTP_UPG; continue; } /* Only one element in the list, a simple string: free the expression and * fall back to static rule */ - LIST_DELETE(&node->list); - free(node->arg); - free(node); + lf_expr_deinit(&rule->be.expr); } rule->dynamic = 0; @@ -3187,6 +3224,12 @@ init_proxies_list_stage1: } else { ha_free(&rule->be.name); rule->be.backend = target; + if (target->mode == PR_MODE_HTTP) { + /* at least one of the used backends will provoke an + * HTTP upgrade + */ + curproxy->options |= PR_O_HTTP_UPG; + } } err_code |= warnif_tcp_http_cond(curproxy, rule->cond); } @@ -3202,7 +3245,7 @@ init_proxies_list_stage1: * to a static rule, thus the parsing is cancelled and we fall back to setting srv.ptr. */ server_name = srule->srv.name; - LIST_INIT(&srule->expr); + lf_expr_init(&srule->expr); curproxy->conf.args.ctx = ARGC_USRV; err = NULL; if (!parse_logformat_string(server_name, curproxy, &srule->expr, 0, SMP_VAL_FE_HRQ_HDR, &err)) { @@ -3212,10 +3255,10 @@ init_proxies_list_stage1: cfgerr++; continue; } - node = LIST_NEXT(&srule->expr, struct logformat_node *, list); + node = LIST_NEXT(&srule->expr.nodes.list, struct logformat_node *, list); - if (!LIST_ISEMPTY(&srule->expr)) { - if (node->type != LOG_FMT_TEXT || node->list.n != &srule->expr) { + if (!lf_expr_isempty(&srule->expr)) { + if (node->type != LOG_FMT_TEXT || node->list.n != &srule->expr.nodes.list) { srule->dynamic = 1; free(server_name); continue; @@ -3223,9 +3266,7 @@ init_proxies_list_stage1: /* Only one element in the list, a simple string: free the expression and * fall back to static rule */ - LIST_DELETE(&node->list); - free(node->arg); - free(node); + lf_expr_deinit(&srule->expr); } srule->dynamic = 0; @@ -3335,7 +3376,7 @@ init_proxies_list_stage1: } } - if (curproxy->uri_auth && !(curproxy->uri_auth->flags & STAT_CONVDONE) && + if (curproxy->uri_auth && !(curproxy->uri_auth->flags & STAT_F_CONVDONE) && !LIST_ISEMPTY(&curproxy->uri_auth->http_req_rules) && (curproxy->uri_auth->userlist || curproxy->uri_auth->auth_realm )) { ha_alert("%s '%s': stats 'auth'/'realm' and 'http-request' can't be used at the same time.\n", @@ -3345,7 +3386,7 @@ init_proxies_list_stage1: } if (curproxy->uri_auth && curproxy->uri_auth->userlist && - (!(curproxy->uri_auth->flags & STAT_CONVDONE) || + (!(curproxy->uri_auth->flags & STAT_F_CONVDONE) || LIST_ISEMPTY(&curproxy->uri_auth->http_req_rules))) { const char *uri_auth_compat_req[10]; struct act_rule *rule; @@ -3376,16 +3417,16 @@ init_proxies_list_stage1: if (curproxy->uri_auth->auth_realm) { ha_free(&curproxy->uri_auth->auth_realm); } - curproxy->uri_auth->flags |= STAT_CONVDONE; + curproxy->uri_auth->flags |= STAT_F_CONVDONE; } out_uri_auth_compat: /* check whether we have a logger that uses RFC5424 log format */ list_for_each_entry(tmplogger, &curproxy->loggers, list) { if (tmplogger->format == LOG_FORMAT_RFC5424) { - if (!curproxy->conf.logformat_sd_string) { + if (!curproxy->logformat_sd.str) { /* set the default logformat_sd_string */ - curproxy->conf.logformat_sd_string = default_rfc5424_sd_log_format; + curproxy->logformat_sd.str = default_rfc5424_sd_log_format; } break; } @@ -3393,31 +3434,21 @@ out_uri_auth_compat: /* compile the log format */ if (!(curproxy->cap & PR_CAP_FE)) { - if (curproxy->conf.logformat_string != default_http_log_format && - curproxy->conf.logformat_string != default_tcp_log_format && - curproxy->conf.logformat_string != clf_http_log_format) - free(curproxy->conf.logformat_string); - curproxy->conf.logformat_string = NULL; - ha_free(&curproxy->conf.lfs_file); - curproxy->conf.lfs_line = 0; - - if (curproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format) - free(curproxy->conf.logformat_sd_string); - curproxy->conf.logformat_sd_string = NULL; - ha_free(&curproxy->conf.lfsd_file); - curproxy->conf.lfsd_line = 0; - } - - if (curproxy->conf.logformat_string) { + lf_expr_deinit(&curproxy->logformat); + lf_expr_deinit(&curproxy->logformat_sd); + } + + if (curproxy->logformat.str) { curproxy->conf.args.ctx = ARGC_LOG; - curproxy->conf.args.file = curproxy->conf.lfs_file; - curproxy->conf.args.line = curproxy->conf.lfs_line; + curproxy->conf.args.file = curproxy->logformat.conf.file; + curproxy->conf.args.line = curproxy->logformat.conf.line; err = NULL; - if (!parse_logformat_string(curproxy->conf.logformat_string, curproxy, &curproxy->logformat, + if (!lf_expr_compile(&curproxy->logformat, &curproxy->conf.args, LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES, - SMP_VAL_FE_LOG_END, &err)) { + SMP_VAL_FE_LOG_END, &err) || + !lf_expr_postcheck(&curproxy->logformat, curproxy, &err)) { ha_alert("Parsing [%s:%d]: failed to parse log-format : %s.\n", - curproxy->conf.lfs_file, curproxy->conf.lfs_line, err); + curproxy->logformat.conf.file, curproxy->logformat.conf.line, err); free(err); cfgerr++; } @@ -3425,21 +3456,18 @@ out_uri_auth_compat: curproxy->conf.args.line = 0; } - if (curproxy->conf.logformat_sd_string) { + if (curproxy->logformat_sd.str) { curproxy->conf.args.ctx = ARGC_LOGSD; - curproxy->conf.args.file = curproxy->conf.lfsd_file; - curproxy->conf.args.line = curproxy->conf.lfsd_line; + curproxy->conf.args.file = curproxy->logformat_sd.conf.file; + curproxy->conf.args.line = curproxy->logformat_sd.conf.line; err = NULL; - if (!parse_logformat_string(curproxy->conf.logformat_sd_string, curproxy, &curproxy->logformat_sd, + if (!lf_expr_compile(&curproxy->logformat_sd, &curproxy->conf.args, LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES, - SMP_VAL_FE_LOG_END, &err)) { - ha_alert("Parsing [%s:%d]: failed to parse log-format-sd : %s.\n", - curproxy->conf.lfsd_file, curproxy->conf.lfsd_line, err); - free(err); - cfgerr++; - } else if (!add_to_logformat_list(NULL, NULL, LF_SEPARATOR, &curproxy->logformat_sd, &err)) { + SMP_VAL_FE_LOG_END, &err) || + !add_to_logformat_list(NULL, NULL, LF_SEPARATOR, &curproxy->logformat_sd, &err) || + !lf_expr_postcheck(&curproxy->logformat_sd, curproxy, &err)) { ha_alert("Parsing [%s:%d]: failed to parse log-format-sd : %s.\n", - curproxy->conf.lfsd_file, curproxy->conf.lfsd_line, err); + curproxy->logformat_sd.conf.file, curproxy->logformat_sd.conf.line, err); free(err); cfgerr++; } @@ -3447,21 +3475,22 @@ out_uri_auth_compat: curproxy->conf.args.line = 0; } - if (curproxy->conf.uniqueid_format_string) { + if (curproxy->format_unique_id.str) { int where = 0; curproxy->conf.args.ctx = ARGC_UIF; - curproxy->conf.args.file = curproxy->conf.uif_file; - curproxy->conf.args.line = curproxy->conf.uif_line; + curproxy->conf.args.file = curproxy->format_unique_id.conf.file; + curproxy->conf.args.line = curproxy->format_unique_id.conf.line; err = NULL; if (curproxy->cap & PR_CAP_FE) where |= SMP_VAL_FE_HRQ_HDR; if (curproxy->cap & PR_CAP_BE) where |= SMP_VAL_BE_HRQ_HDR; - if (!parse_logformat_string(curproxy->conf.uniqueid_format_string, curproxy, &curproxy->format_unique_id, - LOG_OPT_HTTP|LOG_OPT_MERGE_SPACES, where, &err)) { + if (!lf_expr_compile(&curproxy->format_unique_id, &curproxy->conf.args, + LOG_OPT_HTTP|LOG_OPT_MERGE_SPACES, where, &err) || + !lf_expr_postcheck(&curproxy->format_unique_id, curproxy, &err)) { ha_alert("Parsing [%s:%d]: failed to parse unique-id : %s.\n", - curproxy->conf.uif_file, curproxy->conf.uif_line, err); + curproxy->format_unique_id.conf.file, curproxy->format_unique_id.conf.line, err); free(err); cfgerr++; } @@ -3469,16 +3498,17 @@ out_uri_auth_compat: curproxy->conf.args.line = 0; } - if (curproxy->conf.error_logformat_string) { + if (curproxy->logformat_error.str) { curproxy->conf.args.ctx = ARGC_LOG; - curproxy->conf.args.file = curproxy->conf.elfs_file; - curproxy->conf.args.line = curproxy->conf.elfs_line; + curproxy->conf.args.file = curproxy->logformat_error.conf.file; + curproxy->conf.args.line = curproxy->logformat_error.conf.line; err = NULL; - if (!parse_logformat_string(curproxy->conf.error_logformat_string, curproxy, &curproxy->logformat_error, + if (!lf_expr_compile(&curproxy->logformat_error, &curproxy->conf.args, LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES, - SMP_VAL_FE_LOG_END, &err)) { + SMP_VAL_FE_LOG_END, &err) || + !lf_expr_postcheck(&curproxy->logformat_error, curproxy, &err)) { ha_alert("Parsing [%s:%d]: failed to parse error-log-format : %s.\n", - curproxy->conf.elfs_file, curproxy->conf.elfs_line, err); + curproxy->logformat_error.conf.file, curproxy->logformat_error.conf.line, err); free(err); cfgerr++; } @@ -3655,8 +3685,6 @@ out_uri_auth_compat: newsrv->conf.id.key = newsrv->puid = next_id; eb32_insert(&curproxy->conf.used_server_id, &newsrv->conf.id); } - newsrv->conf.name.key = newsrv->id; - ebis_insert(&curproxy->conf.used_server_name, &newsrv->conf.name); next_id++; newsrv = newsrv->next; @@ -3723,12 +3751,6 @@ out_uri_auth_compat: * on what LB algorithm was chosen. */ - if (curproxy->mode == PR_MODE_SYSLOG) { - /* log load-balancing requires special init that is performed - * during log-postparsing step - */ - goto skip_server_lb_init; - } curproxy->lbprm.algo &= ~(BE_LB_LKUP | BE_LB_PROP_DYN); switch (curproxy->lbprm.algo & BE_LB_KIND) { case BE_LB_KIND_RR: @@ -3767,8 +3789,13 @@ out_uri_auth_compat: init_server_map(curproxy); } break; + case BE_LB_KIND_SA: + if ((curproxy->lbprm.algo & BE_LB_PARM) == BE_LB_SA_SS) { + curproxy->lbprm.algo |= BE_LB_PROP_DYN; + init_server_ss(curproxy); + } + break; } - skip_server_lb_init: HA_RWLOCK_INIT(&curproxy->lbprm.lock); if (curproxy->options & PR_O_LOGASAP) @@ -3776,7 +3803,7 @@ out_uri_auth_compat: if (!(curproxy->cap & PR_CAP_INT) && (curproxy->mode == PR_MODE_TCP || curproxy->mode == PR_MODE_HTTP) && (curproxy->cap & PR_CAP_FE) && LIST_ISEMPTY(&curproxy->loggers) && - (!LIST_ISEMPTY(&curproxy->logformat) || !LIST_ISEMPTY(&curproxy->logformat_sd))) { + (!lf_expr_isempty(&curproxy->logformat) || !lf_expr_isempty(&curproxy->logformat_sd))) { ha_warning("log format ignored for %s '%s' since it has no log address.\n", proxy_type_str(curproxy), curproxy->id); err_code |= ERR_WARN; @@ -3798,6 +3825,12 @@ out_uri_auth_compat: err_code |= ERR_WARN; } + if (isttest(curproxy->monitor_uri)) { + ha_warning("'monitor-uri' statement ignored for %s '%s' as it requires HTTP mode.\n", + proxy_type_str(curproxy), curproxy->id); + err_code |= ERR_WARN; + } + if (!LIST_ISEMPTY(&curproxy->http_req_rules)) { ha_warning("'http-request' rules ignored for %s '%s' as they require HTTP mode.\n", proxy_type_str(curproxy), curproxy->id); @@ -4181,6 +4214,11 @@ init_proxies_list_stage2: /* listener ID not set, use automatic numbering with first * spare entry starting with next_luid. */ + if (listener->by_fe.p != &curproxy->conf.listeners) { + struct listener *prev_li = LIST_PREV(&listener->by_fe, typeof(prev_li), by_fe); + if (prev_li->luid) + next_id = prev_li->luid + 1; + } next_id = get_next_id(&curproxy->conf.used_listener_id, next_id); listener->conf.id.key = listener->luid = next_id; eb32_insert(&curproxy->conf.used_listener_id, &listener->conf.id); diff --git a/src/check.c b/src/check.c index 2753c93..64464c4 100644 --- a/src/check.c +++ b/src/check.c @@ -1031,9 +1031,9 @@ int httpchk_build_status_header(struct server *s, struct buffer *buf) s->queue.length); if ((s->cur_state == SRV_ST_STARTING) && - ns_to_sec(now_ns) < s->last_change + s->slowstart && - ns_to_sec(now_ns) >= s->last_change) { - ratio = MAX(1, 100 * (ns_to_sec(now_ns) - s->last_change) / s->slowstart); + ns_to_sec(now_ns) < s->counters.last_change + s->slowstart && + ns_to_sec(now_ns) >= s->counters.last_change) { + ratio = MAX(1, 100 * (ns_to_sec(now_ns) - s->counters.last_change) / s->slowstart); chunk_appendf(buf, "; throttle=%d%%", ratio); } @@ -1382,7 +1382,7 @@ struct task *process_chk_conn(struct task *t, void *context, unsigned int state) * as a failed response coupled with "observe layer7" caused the * server state to be suddenly changed. */ - sc_conn_drain_and_shut(sc); + se_shutdown(sc->sedesc, SE_SHR_DRAIN|SE_SHW_SILENT); } if (sc) { @@ -1415,8 +1415,7 @@ struct task *process_chk_conn(struct task *t, void *context, unsigned int state) } } - if (LIST_INLIST(&check->buf_wait.list)) - LIST_DEL_INIT(&check->buf_wait.list); + b_dequeue(&check->buf_wait); check_release_buf(check, &check->bi); check_release_buf(check, &check->bo); @@ -1505,13 +1504,13 @@ int check_buf_available(void *target) BUG_ON(!check->sc); - if ((check->state & CHK_ST_IN_ALLOC) && b_alloc(&check->bi)) { + if ((check->state & CHK_ST_IN_ALLOC) && b_alloc(&check->bi, DB_CHANNEL)) { TRACE_STATE("unblocking check, input buffer allocated", CHK_EV_TCPCHK_EXP|CHK_EV_RX_BLK, check); check->state &= ~CHK_ST_IN_ALLOC; tasklet_wakeup(check->sc->wait_event.tasklet); return 1; } - if ((check->state & CHK_ST_OUT_ALLOC) && b_alloc(&check->bo)) { + if ((check->state & CHK_ST_OUT_ALLOC) && b_alloc(&check->bo, DB_CHANNEL)) { TRACE_STATE("unblocking check, output buffer allocated", CHK_EV_TCPCHK_SND|CHK_EV_TX_BLK, check); check->state &= ~CHK_ST_OUT_ALLOC; tasklet_wakeup(check->sc->wait_event.tasklet); @@ -1529,10 +1528,8 @@ struct buffer *check_get_buf(struct check *check, struct buffer *bptr) struct buffer *buf = NULL; if (likely(!LIST_INLIST(&check->buf_wait.list)) && - unlikely((buf = b_alloc(bptr)) == NULL)) { - check->buf_wait.target = check; - check->buf_wait.wakeup_cb = check_buf_available; - LIST_APPEND(&th_ctx->buffer_wq, &check->buf_wait.list); + unlikely((buf = b_alloc(bptr, DB_CHANNEL)) == NULL)) { + b_queue(DB_CHANNEL, &check->buf_wait, check, check_buf_available); } return buf; } @@ -455,7 +455,7 @@ static struct proxy *cli_alloc_fe(const char *name, const char *file, int line) init_new_proxy(fe); fe->next = proxies_list; proxies_list = fe; - fe->last_change = ns_to_sec(now_ns); + fe->fe_counters.last_change = ns_to_sec(now_ns); fe->id = strdup("GLOBAL"); fe->cap = PR_CAP_FE|PR_CAP_INT; fe->maxconn = 10; /* default to 10 concurrent connections */ @@ -742,9 +742,8 @@ static int cli_parse_request(struct appctx *appctx) int i = 0; struct cli_kw *kw; - p = appctx->chunk->area; - end = p + appctx->chunk->data; - + p = b_head(&appctx->inbuf); + end = b_tail(&appctx->inbuf); /* * Get pointers on words. * One extra slot is reserved to store a pointer on a null byte. @@ -806,29 +805,13 @@ static int cli_parse_request(struct appctx *appctx) i++; } /* fill unused slots */ - p = appctx->chunk->area + appctx->chunk->data; + p = b_tail(&appctx->inbuf); for (; i < MAX_CLI_ARGS + 1; i++) args[i] = p; if (!**args) return 0; - if (appctx->st1 & APPCTX_CLI_ST1_SHUT_EXPECTED) { - /* The previous command line was finished by a \n in non-interactive mode. - * It should not be followed by another command line. In non-interactive mode, - * only one line should be processed. Because of a bug, it is not respected. - * So emit a warning, only once in the process life, to warn users their script - * must be updated. - */ - appctx->st1 &= ~APPCTX_CLI_ST1_SHUT_EXPECTED; - if (ONLY_ONCE()) { - ha_warning("Commands sent to the CLI were chained using a new line character while in non-interactive mode." - " This is not reliable, not officially supported and will not be supported anymore in future versions. " - "Please use ';' to delimit commands instead."); - } - } - - kw = cli_find_kw(args); if (!kw || (kw->level & ~appctx->cli_level & ACCESS_MASTER_ONLY) || @@ -916,6 +899,151 @@ static int cli_output_msg(struct appctx *appctx, const char *msg, int severity, return applet_putchk(appctx, tmp); } +int cli_init(struct appctx *appctx) +{ + struct stconn *sc = appctx_sc(appctx); + struct bind_conf *bind_conf = strm_li(__sc_strm(sc))->bind_conf; + + appctx->cli_severity_output = bind_conf->severity_output; + applet_reset_svcctx(appctx); + appctx->st0 = CLI_ST_GETREQ; + appctx->cli_level = bind_conf->level; + + /* Wakeup the applet ASAP. */ + applet_need_more_data(appctx); + return 0; + +} + +size_t cli_snd_buf(struct appctx *appctx, struct buffer *buf, size_t count, unsigned flags) +{ + char *str; + size_t len, ret = 0; + int lf = 0; + + if (appctx->st0 == CLI_ST_INIT) + cli_init(appctx); + else if (appctx->st0 != CLI_ST_GETREQ) + goto end; + + if (b_space_wraps(&appctx->inbuf)) + b_slow_realign(&appctx->inbuf, trash.area, b_data(&appctx->inbuf)); + + while (1) { + /* payload doesn't take escapes nor does it end on semi-colons, + * so we use the regular getline. Normal mode however must stop + * on LFs and semi-colons that are not prefixed by a backslash. + * Note we reserve one byte at the end to insert a trailing nul + * byte. + */ + str = b_tail(&appctx->inbuf); + if (!(appctx->st1 & APPCTX_CLI_ST1_PAYLOAD)) + len = b_getdelim(buf, ret, count, str, b_room(&appctx->inbuf) - 1, "\n;", '\\'); + else + len = b_getline(buf, ret, count, str, b_room(&appctx->inbuf) - 1); + + if (!len) { + if (!b_room(buf) || (count > b_room(&appctx->inbuf) - 1)) { + cli_err(appctx, "The command is too big for the buffer size. Please change tune.bufsize in the configuration to use a bigger command.\n"); + applet_set_error(appctx); + b_reset(&appctx->inbuf); + } + else if (flags & CO_SFL_LAST_DATA) { + applet_set_eos(appctx); + applet_set_error(appctx); + b_reset(&appctx->inbuf); + } + break; + } + + ret += len; + count -= len; + + if (str[len-1] == '\n') + lf = 1; + + /* Remove the trailing \r, if any and add a null byte at the + * end. For normal mode, the trailing \n is removed, but we + * conserve if for payload mode. + */ + len--; + if (len && str[len-1] == '\r') + len--; + if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) { + str[len+1] = '\0'; + b_add(&appctx->inbuf, len+1); + } + else { + str[len] = '\0'; + b_add(&appctx->inbuf, len); + } + + if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) { + /* look for a pattern */ + if (len == strlen(appctx->cli_payload_pat)) { + /* here use 'len' because str still contains the \n */ + if (strncmp(str, appctx->cli_payload_pat, len) == 0) { + /* remove the last two \n */ + b_sub(&appctx->inbuf, strlen(appctx->cli_payload_pat) + 2); + *b_tail(&appctx->inbuf) = '\0'; + appctx->st1 &= ~APPCTX_CLI_ST1_PAYLOAD; + if (!(appctx->st1 & APPCTX_CLI_ST1_PROMPT) && lf) + appctx->st1 |= APPCTX_CLI_ST1_LASTCMD; + } + } + } + else { + char *last_arg; + + /* + * Look for the "payload start" pattern at the end of a + * line Its location is not remembered here, this is + * just to switch to a gathering mode. + * + * The pattern must start by << followed by 0 to 7 + * characters, and finished by the end of the command + * (\n or ;). + */ + + /* look for the first space starting by the end of the line */ + for (last_arg = b_tail(&appctx->inbuf); last_arg != b_head(&appctx->inbuf); last_arg--) { + if (*last_arg == ' ' || *last_arg == '\t') { + last_arg++; + break; + } + } + + if (strncmp(last_arg, PAYLOAD_PATTERN, strlen(PAYLOAD_PATTERN)) == 0) { + ssize_t pat_len = strlen(last_arg + strlen(PAYLOAD_PATTERN)); + + /* A customized pattern can't be more than 7 characters + * if it's more, don't make it a payload + */ + if (pat_len < sizeof(appctx->cli_payload_pat)) { + appctx->st1 |= APPCTX_CLI_ST1_PAYLOAD; + /* copy the customized pattern, don't store the << */ + strncpy(appctx->cli_payload_pat, last_arg + strlen(PAYLOAD_PATTERN), sizeof(appctx->cli_payload_pat)-1); + appctx->cli_payload_pat[sizeof(appctx->cli_payload_pat)-1] = '\0'; + b_add(&appctx->inbuf, 1); // keep the trailing \0 after the pattern + } + } + else { + if (!(appctx->st1 & APPCTX_CLI_ST1_PROMPT) && lf) + appctx->st1 |= APPCTX_CLI_ST1_LASTCMD; + } + } + + if (!(appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) || (appctx->st1 & APPCTX_CLI_ST1_PROMPT)) { + appctx->st0 = CLI_ST_PARSEREQ; + break; + } + } + b_del(buf, ret); + + end: + return ret; +} + /* This I/O handler runs as an applet embedded in a stream connector. It is * used to processes I/O from/to the stats unix socket. The system relies on a * state machine handling requests and various responses. We read a request, @@ -926,181 +1054,62 @@ static int cli_output_msg(struct appctx *appctx, const char *msg, int severity, */ static void cli_io_handler(struct appctx *appctx) { - struct stconn *sc = appctx_sc(appctx); - struct channel *req = sc_oc(sc); - struct channel *res = sc_ic(sc); - struct bind_conf *bind_conf = strm_li(__sc_strm(sc))->bind_conf; - int reql; - int len; - int lf = 0; + if (applet_fl_test(appctx, APPCTX_FL_OUTBLK_ALLOC|APPCTX_FL_OUTBLK_FULL)) + goto out; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { - co_skip(sc_oc(sc), co_data(sc_oc(sc))); + if (!appctx_get_buf(appctx, &appctx->outbuf)) { goto out; } - /* Check if the input buffer is available. */ - if (!b_size(&res->buf)) { - sc_need_room(sc, 0); + if (unlikely(applet_fl_test(appctx, APPCTX_FL_EOS|APPCTX_FL_ERROR))) { + appctx->st0 = CLI_ST_END; goto out; } while (1) { if (appctx->st0 == CLI_ST_INIT) { /* reset severity to default at init */ - appctx->cli_severity_output = bind_conf->severity_output; - applet_reset_svcctx(appctx); - appctx->st0 = CLI_ST_GETREQ; - appctx->cli_level = bind_conf->level; + cli_init(appctx); + break; } else if (appctx->st0 == CLI_ST_END) { - se_fl_set(appctx->sedesc, SE_FL_EOS); - free_trash_chunk(appctx->chunk); - appctx->chunk = NULL; + applet_set_eos(appctx); break; } else if (appctx->st0 == CLI_ST_GETREQ) { - char *str; - - /* use a trash chunk to store received data */ - if (!appctx->chunk) { - appctx->chunk = alloc_trash_chunk(); - if (!appctx->chunk) { - se_fl_set(appctx->sedesc, SE_FL_ERROR); - appctx->st0 = CLI_ST_END; - continue; - } - } - - str = appctx->chunk->area + appctx->chunk->data; - - /* ensure we have some output room left in the event we - * would want to return some info right after parsing. - */ - if (buffer_almost_full(sc_ib(sc))) { - sc_need_room(sc, b_size(&res->buf) / 2); - break; - } - - /* payload doesn't take escapes nor does it end on semi-colons, so - * we use the regular getline. Normal mode however must stop on - * LFs and semi-colons that are not prefixed by a backslash. Note - * that we reserve one byte at the end to insert a trailing nul byte. + /* Now we close the output if we're not in interactive + * mode and the request buffer is empty. This still + * allows pipelined requests to be sent in + * non-interactive mode. */ - - if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) - reql = co_getline(sc_oc(sc), str, - appctx->chunk->size - appctx->chunk->data - 1); - else - reql = co_getdelim(sc_oc(sc), str, - appctx->chunk->size - appctx->chunk->data - 1, - "\n;", '\\'); - - if (reql <= 0) { /* closed or EOL not found */ - if (reql == 0) - break; - se_fl_set(appctx->sedesc, SE_FL_ERROR); + if (se_fl_test(appctx->sedesc, SE_FL_SHW)) { appctx->st0 = CLI_ST_END; continue; } - - if (str[reql-1] == '\n') - lf = 1; - - /* now it is time to check that we have a full line, - * remove the trailing \n and possibly \r, then cut the - * line. + break; + } + else if (appctx->st0 == CLI_ST_PARSEREQ) { + /* ensure we have some output room left in the event we + * would want to return some info right after parsing. */ - len = reql - 1; - if (str[len] != '\n' && str[len] != ';') { - se_fl_set(appctx->sedesc, SE_FL_ERROR); - appctx->st0 = CLI_ST_END; - continue; - } - - if (len && str[len-1] == '\r') - len--; - - str[len] = '\0'; - appctx->chunk->data += len; - - if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) { - appctx->chunk->area[appctx->chunk->data] = '\n'; - appctx->chunk->area[appctx->chunk->data + 1] = 0; - appctx->chunk->data++; + if (buffer_almost_full(&appctx->outbuf)) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); + break; } + appctx->t->expire = TICK_ETERNITY; appctx->st0 = CLI_ST_PROMPT; - if (appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) { - /* look for a pattern */ - if (len == strlen(appctx->cli_payload_pat)) { - /* here use 'len' because str still contains the \n */ - if (strncmp(str, appctx->cli_payload_pat, len) == 0) { - /* remove the last two \n */ - appctx->chunk->data -= strlen(appctx->cli_payload_pat) + 2; - appctx->chunk->area[appctx->chunk->data] = 0; - cli_parse_request(appctx); - chunk_reset(appctx->chunk); - /* NB: cli_sock_parse_request() may have put - * another CLI_ST_O_* into appctx->st0. - */ - - appctx->st1 &= ~APPCTX_CLI_ST1_PAYLOAD; - if (!(appctx->st1 & APPCTX_CLI_ST1_PROMPT) && lf) - appctx->st1 |= APPCTX_CLI_ST1_SHUT_EXPECTED; - } - } - } - else { - char *last_arg; - /* - * Look for the "payload start" pattern at the end of a line - * Its location is not remembered here, this is just to switch - * to a gathering mode. - * The pattern must start by << followed by 0 - * to 7 characters, and finished by the end of - * the command (\n or ;). - */ - /* look for the first space starting by the end of the line */ - for (last_arg = appctx->chunk->area + appctx->chunk->data; last_arg != appctx->chunk->area; last_arg--) { - if (*last_arg == ' ' || *last_arg == '\t') { - last_arg++; - break; - } - } - if (strncmp(last_arg, PAYLOAD_PATTERN, strlen(PAYLOAD_PATTERN)) == 0) { - ssize_t pat_len = strlen(last_arg + strlen(PAYLOAD_PATTERN)); - - /* A customized pattern can't be more than 7 characters - * if it's more, don't make it a payload - */ - if (pat_len < sizeof(appctx->cli_payload_pat)) { - appctx->st1 |= APPCTX_CLI_ST1_PAYLOAD; - /* copy the customized pattern, don't store the << */ - strncpy(appctx->cli_payload_pat, last_arg + strlen(PAYLOAD_PATTERN), sizeof(appctx->cli_payload_pat)-1); - appctx->cli_payload_pat[sizeof(appctx->cli_payload_pat)-1] = '\0'; - appctx->chunk->data++; // keep the trailing \0 after the pattern - } - } - else { - /* no payload, the command is complete: parse the request */ - cli_parse_request(appctx); - chunk_reset(appctx->chunk); - if (!(appctx->st1 & APPCTX_CLI_ST1_PROMPT) && lf) - appctx->st1 |= APPCTX_CLI_ST1_SHUT_EXPECTED; - } + if (!(appctx->st1 & APPCTX_CLI_ST1_PAYLOAD)) { + cli_parse_request(appctx); + b_reset(&appctx->inbuf); } - - /* re-adjust req buffer */ - co_skip(sc_oc(sc), reql); - sc_opposite(sc)->flags |= SC_FL_RCV_ONCE; /* we plan to read small requests */ } else { /* output functions */ struct cli_print_ctx *ctx; const char *msg; int sev; - + cli_output: switch (appctx->st0) { case CLI_ST_PROMPT: break; @@ -1146,17 +1155,28 @@ static void cli_io_handler(struct appctx *appctx) appctx->st0 == CLI_ST_PRINT_UMSGERR) { usermsgs_clr(NULL); } + appctx->t->expire = TICK_ETERNITY; appctx->st0 = CLI_ST_PROMPT; } + if (applet_fl_test(appctx, APPCTX_FL_ERR_PENDING)) { + appctx->st0 = CLI_ST_END; + continue; + } + break; case CLI_ST_CALLBACK: /* use custom pointer */ if (appctx->io_handler) if (appctx->io_handler(appctx)) { + appctx->t->expire = TICK_ETERNITY; appctx->st0 = CLI_ST_PROMPT; if (appctx->io_release) { appctx->io_release(appctx); appctx->io_release = NULL; + /* some release handlers might have + * pending output to print. + */ + continue; } } break; @@ -1175,7 +1195,7 @@ static void cli_io_handler(struct appctx *appctx) * when entering a payload with interactive mode, change the prompt * to emphasize that more data can still be sent */ - if (appctx->chunk->data && appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) + if (b_data(&appctx->inbuf) && appctx->st1 & APPCTX_CLI_ST1_PAYLOAD) prompt = "+ "; else if (appctx->st1 & APPCTX_CLI_ST1_TIMED) { uint up = ns_to_sec(now_ns - start_time_ns); @@ -1209,8 +1229,8 @@ static void cli_io_handler(struct appctx *appctx) * allows pipelined requests to be sent in * non-interactive mode. */ - if (!(appctx->st1 & APPCTX_CLI_ST1_PROMPT) && !co_data(req) && (!(appctx->st1 & APPCTX_CLI_ST1_PAYLOAD))) { - se_fl_set(appctx->sedesc, SE_FL_EOI); + if ((appctx->st1 & (APPCTX_CLI_ST1_PROMPT|APPCTX_CLI_ST1_PAYLOAD|APPCTX_CLI_ST1_LASTCMD)) == APPCTX_CLI_ST1_LASTCMD) { + applet_set_eoi(appctx); appctx->st0 = CLI_ST_END; continue; } @@ -1230,14 +1250,16 @@ static void cli_io_handler(struct appctx *appctx) * refills the buffer with new bytes in non-interactive * mode, avoiding to close on apparently empty commands. */ - if (co_data(sc_oc(sc))) { - appctx_wakeup(appctx); - goto out; - } + break; } } out: + if (appctx->st0 == CLI_ST_END) { + /* eat the whole request */ + b_reset(&appctx->inbuf); + applet_fl_clr(appctx, APPCTX_FL_INBLK_FULL); + } return; } @@ -1247,9 +1269,6 @@ static void cli_io_handler(struct appctx *appctx) */ static void cli_release_handler(struct appctx *appctx) { - free_trash_chunk(appctx->chunk); - appctx->chunk = NULL; - if (appctx->io_release) { appctx->io_release(appctx); appctx->io_release = NULL; @@ -1272,13 +1291,8 @@ static void cli_release_handler(struct appctx *appctx) static int cli_io_handler_show_env(struct appctx *appctx) { struct show_env_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); char **var = ctx->var; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - chunk_reset(&trash); /* we have two inner loops here, one for the proxy, the other one for @@ -1308,16 +1322,11 @@ static int cli_io_handler_show_env(struct appctx *appctx) */ static int cli_io_handler_show_fd(struct appctx *appctx) { - struct stconn *sc = appctx_sc(appctx); struct show_fd_ctx *fdctx = appctx->svcctx; uint match = fdctx->show_mask; int fd = fdctx->fd; int ret = 1; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - goto end; - chunk_reset(&trash); /* isolate the threads once per round. We're limited to a buffer worth @@ -2010,6 +2019,174 @@ static int cli_parse_set_ratelimit(char **args, char *payload, struct appctx *ap return 1; } +/* Parse a "wait <time>" command. + * It uses a "cli_wait_ctx" struct for its context. + * Returns 0 if the server deletion has been successfully scheduled, 1 on failure. + */ +static int cli_parse_wait(char **args, char *payload, struct appctx *appctx, void *private) +{ + struct cli_wait_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx)); + uint wait_ms; + const char *err; + + if (!cli_has_level(appctx, ACCESS_LVL_ADMIN)) + return 1; + + if (!*args[1]) + return cli_err(appctx, "Expects a duration in milliseconds.\n"); + + err = parse_time_err(args[1], &wait_ms, TIME_UNIT_MS); + if (err || wait_ms < 1) { + /* in case -h is passed as the first option, continue to the next test */ + if (strcmp(args[1], "-h") == 0) + args--; + else + return cli_err(appctx, "Invalid duration.\n"); + } + + if (strcmp(args[2], "srv-removable") == 0) { + struct ist be_name, sv_name; + + if (!*args[3]) + return cli_err(appctx, "Missing server name (<backend>/<server>).\n"); + + sv_name = ist(args[3]); + be_name = istsplit(&sv_name, '/'); + if (!istlen(sv_name)) + return cli_err(appctx, "Require 'backend/server'.\n"); + + be_name = istdup(be_name); + sv_name = istdup(sv_name); + if (!isttest(be_name) || !isttest(sv_name)) { + free(istptr(be_name)); + free(istptr(sv_name)); + return cli_err(appctx, "Out of memory trying to clone the server name.\n"); + } + + ctx->args[0] = ist0(be_name); + ctx->args[1] = ist0(sv_name); + ctx->cond = CLI_WAIT_COND_SRV_UNUSED; + } + else if (*args[2]) { + /* show the command's help either upon request (-h) or error */ + err = "Usage: wait {-h|<duration>} [condition [args...]]\n" + " - '-h' displays this help\n" + " - <duration> is the maximum wait time, optionally suffixed by the unit among\n" + " 'us', 'ms', 's', 'm', 'h', and 'd'. ; the default unit is milliseconds.\n" + " - <condition> indicates what to wait for, no longer than the specified\n" + " duration. Supported conditions are:\n" + " - <none> : by default, just sleep for the specified duration.\n" + " - srv-removable <px>/<sv> : wait for this server to become removable.\n" + ""; + + if (strcmp(args[2], "-h") == 0) + return cli_msg(appctx, LOG_INFO, err); + else + return cli_err(appctx, err); + } + + ctx->start = now_ms; + ctx->deadline = tick_add(now_ms, wait_ms); + + /* proceed with the I/O handler */ + return 0; +} + +/* Execute a "wait" condition. The delay is exponentially incremented between + * now_ms and ctx->deadline in powers of 1.5 and with a bound set to 10% of the + * programmed wait time, so that in a few wakeups we can later check a condition + * with reasonable accuracy. Shutdowns and other errors are handled as well and + * terminate the operation, but not new inputs so that it remains possible to + * chain other commands after it. Returns 0 if not finished, 1 if finished. + */ +static int cli_io_handler_wait(struct appctx *appctx) +{ + struct cli_wait_ctx *ctx = appctx->svcctx; + uint total, elapsed, left, wait; + int ret; + + /* note: upon first invocation, the timeout is not set */ + if (tick_isset(appctx->t->expire) && + !tick_is_expired(appctx->t->expire, now_ms)) + goto wait; + + /* here we should evaluate our waiting conditions, if any */ + + if (ctx->cond == CLI_WAIT_COND_SRV_UNUSED) { + /* check if the server in args[0]/args[1] can be released now */ + thread_isolate(); + ret = srv_check_for_deletion(ctx->args[0], ctx->args[1], NULL, NULL, NULL); + thread_release(); + + if (ret < 0) { + /* unrecoverable failure */ + ctx->error = CLI_WAIT_ERR_FAIL; + return 1; + } else if (ret > 0) { + /* immediate success */ + ctx->error = CLI_WAIT_ERR_DONE; + return 1; + } + /* let's check the timer */ + } + + /* and here we recalculate the new wait time or abort */ + left = tick_remain(now_ms, ctx->deadline); + if (!left) { + /* let the release handler know we've expired. When there is no + * wait condition, it's a simple sleep so we declare we're done. + */ + if (ctx->cond == CLI_WAIT_COND_NONE) + ctx->error = CLI_WAIT_ERR_DONE; + else + ctx->error = CLI_WAIT_ERR_EXP; + return 1; + } + + total = tick_remain(ctx->start, ctx->deadline); + elapsed = total - left; + wait = elapsed / 2 + 1; + if (wait > left) + wait = left; + else if (wait > total / 10) + wait = total / 10; + + appctx->t->expire = tick_add(now_ms, wait); + + wait: + /* Stop waiting upon close/abort/error */ + if (unlikely(se_fl_test(appctx->sedesc, SE_FL_SHW)) && !b_data(&appctx->inbuf)) { + ctx->error = CLI_WAIT_ERR_INTR; + return 1; + } + + return 0; +} + + +/* release structs allocated by "delete server" */ +static void cli_release_wait(struct appctx *appctx) +{ + struct cli_wait_ctx *ctx = appctx->svcctx; + const char *msg; + int i; + + switch (ctx->error) { + case CLI_WAIT_ERR_EXP: msg = "Wait delay expired.\n"; break; + case CLI_WAIT_ERR_INTR: msg = "Interrupted.\n"; break; + case CLI_WAIT_ERR_FAIL: msg = ctx->msg ? ctx->msg : "Failed.\n"; break; + default: msg = "Done.\n"; break; + } + + for (i = 0; i < sizeof(ctx->args) / sizeof(ctx->args[0]); i++) + ha_free(&ctx->args[i]); + + if (ctx->error == CLI_WAIT_ERR_DONE) + cli_msg(appctx, LOG_INFO, msg); + else + cli_err(appctx, msg); +} + /* parse the "expose-fd" argument on the bind lines */ static int bind_parse_expose_fd(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err) { @@ -2471,8 +2648,13 @@ static int pcli_prefix_to_pid(const char *prefix) return -1; } -/* Return:: - * >= 0 : number of words to escape +/* + * pcli_find_and_exec_kw() parses a command for the master CLI. It looks for a + * prefix or a command that is handled directly by the proxy and never sent to + * a worker. + * + * Return: + * >= 0 : number of words that were parsed and need to be skipped * = -1 : error */ int pcli_find_and_exec_kw(struct stream *s, char **args, int argl, char **errmsg, int *next_pid) @@ -2959,7 +3141,7 @@ int pcli_wait_for_response(struct stream *s, struct channel *rep, int an_bit) pendconn_free(s); /* let's do a final log if we need it */ - if (!LIST_ISEMPTY(&fe->logformat) && s->logs.logwait && + if (!lf_expr_isempty(&fe->logformat) && s->logs.logwait && !(s->flags & SF_MONITOR) && (!(fe->options & PR_O_NULLNOLOG) || s->req.total)) { s->do_log(s); @@ -3366,6 +3548,8 @@ static struct applet cli_applet = { .obj_type = OBJ_TYPE_APPLET, .name = "<CLI>", /* used for logging */ .fct = cli_io_handler, + .rcv_buf = appctx_raw_rcv_buf, + .snd_buf = cli_snd_buf, .release = cli_release_handler, }; @@ -3374,6 +3558,8 @@ static struct applet mcli_applet = { .obj_type = OBJ_TYPE_APPLET, .name = "<MCLI>", /* used for logging */ .fct = cli_io_handler, + .rcv_buf = appctx_raw_rcv_buf, + .snd_buf = cli_snd_buf, .release = cli_release_handler, }; @@ -3401,6 +3587,7 @@ static struct cli_kw_list cli_kws = {{ },{ { { "show", "version", NULL }, "show version : show version of the current process", cli_parse_show_version, NULL, NULL, NULL, ACCESS_MASTER }, { { "operator", NULL }, "operator : lower the level of the current CLI session to operator", cli_parse_set_lvl, NULL, NULL, NULL, ACCESS_MASTER}, { { "user", NULL }, "user : lower the level of the current CLI session to user", cli_parse_set_lvl, NULL, NULL, NULL, ACCESS_MASTER}, + { { "wait", NULL }, "wait {-h|<delay_ms>} cond [args...] : wait the specified delay or condition (-h to see list)", cli_parse_wait, cli_io_handler_wait, cli_release_wait, NULL }, {{},} }}; diff --git a/src/clock.c b/src/clock.c index ec2133c..7734389 100644 --- a/src/clock.c +++ b/src/clock.c @@ -135,7 +135,7 @@ uint64_t now_cpu_time_thread(int thr) /* set the clock source for the local thread */ void clock_set_local_source(void) { -#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) +#if defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0) && defined(_POSIX_THREAD_CPUTIME) && (_POSIX_THREAD_CPUTIME >= 0) #ifdef USE_THREAD pthread_getcpuclockid(pthread_self(), &per_thread_clock_id[tid]); #else diff --git a/src/compression.c b/src/compression.c index 7b75461..a4464e0 100644 --- a/src/compression.c +++ b/src/compression.c @@ -300,7 +300,7 @@ static int rfc195x_add_data(struct comp_ctx *comp_ctx, const char *in_data, int * data and need a buffer now. We reuse the same buffer, as it's * not used out of the scope of a series of add_data()*, end(). */ - if (b_alloc(&tmpbuf) == NULL) + if (b_alloc(&tmpbuf, DB_PERMANENT) == NULL) return -1; /* no memory */ b_reset(&tmpbuf); memcpy(b_tail(&tmpbuf), comp_ctx->direct_ptr, comp_ctx->direct_len); diff --git a/src/connection.c b/src/connection.c index ed6beb7..3fedad9 100644 --- a/src/connection.c +++ b/src/connection.c @@ -134,7 +134,7 @@ fail: /* If connection is interrupted without CO_FL_ERROR, receiver task won't free it. */ BUG_ON(!(conn->flags & CO_FL_ERROR)); - task_wakeup(l->rx.rhttp.task, TASK_WOKEN_ANY); + task_wakeup(l->rx.rhttp.task, TASK_WOKEN_RES); } return -1; } else @@ -474,7 +474,7 @@ void conn_init(struct connection *conn, void *target) conn->proxy_netns = NULL; MT_LIST_INIT(&conn->toremove_list); if (conn_is_back(conn)) - LIST_INIT(&conn->session_list); + LIST_INIT(&conn->sess_el); else LIST_INIT(&conn->stopping_list); LIST_INIT(&conn->tlv_list); @@ -511,12 +511,12 @@ static int conn_backend_init(struct connection *conn) */ static void conn_backend_deinit(struct connection *conn) { - /* If the connection is owned by the session, remove it from its list - */ - if (conn_is_back(conn) && LIST_INLIST(&conn->session_list)) { + /* If the connection is owned by the session, remove it from its list. */ + if (LIST_INLIST(&conn->sess_el)) session_unown_conn(conn->owner, conn); - } - else if (!(conn->flags & CO_FL_PRIVATE)) { + + /* If the connection is not private, it is accounted by the server. */ + if (!(conn->flags & CO_FL_PRIVATE)) { if (obj_type(conn->target) == OBJ_TYPE_SERVER) srv_release_conn(__objt_server(conn->target), conn); } @@ -603,6 +603,21 @@ void conn_free(struct connection *conn) pool_free(pool_head_connection, conn); } +/* Close all <conn> internal layers accordingly prior to freeing it. */ +void conn_release(struct connection *conn) +{ + if (conn->mux) { + conn->mux->destroy(conn->ctx); + } + else { + conn_stop_tracking(conn); + conn_full_close(conn); + if (conn->destroy_cb) + conn->destroy_cb(conn); + conn_free(conn); + } +} + struct conn_hash_node *conn_alloc_hash_node(struct connection *conn) { struct conn_hash_node *hash_node = NULL; @@ -1114,111 +1129,112 @@ int conn_recv_proxy(struct connection *conn, int flag) break; } - /* TLV parsing */ - while (tlv_offset < total_v2_len) { - struct ist tlv; - struct tlv *tlv_packet = NULL; - struct conn_tlv_list *new_tlv = NULL; - size_t data_len = 0; - - /* Verify that we have at least TLV_HEADER_SIZE bytes left */ - if (tlv_offset + TLV_HEADER_SIZE > total_v2_len) - goto bad_header; + /* unsupported protocol, keep local connection address */ + break; + case 0x00: /* LOCAL command */ + /* keep local connection address for LOCAL */ - tlv_packet = (struct tlv *) &trash.area[tlv_offset]; - tlv = ist2((const char *)tlv_packet->value, get_tlv_length(tlv_packet)); - tlv_offset += istlen(tlv) + TLV_HEADER_SIZE; + tlv_offset = PP2_HEADER_LEN; + break; + default: + goto bad_header; /* not a supported command */ + } - /* Verify that the TLV length does not exceed the total PROXYv2 length */ - if (tlv_offset > total_v2_len) - goto bad_header; + /* TLV parsing */ + while (tlv_offset < total_v2_len) { + struct ist tlv; + struct tlv *tlv_packet = NULL; + struct conn_tlv_list *new_tlv = NULL; + size_t data_len = 0; - /* Prepare known TLV types */ - switch (tlv_packet->type) { - case PP2_TYPE_CRC32C: { - uint32_t n_crc32c; + /* Verify that we have at least TLV_HEADER_SIZE bytes left */ + if (tlv_offset + TLV_HEADER_SIZE > total_v2_len) + goto bad_header; - /* Verify that this TLV is exactly 4 bytes long */ - if (istlen(tlv) != PP2_CRC32C_LEN) - goto bad_header; + tlv_packet = (struct tlv *) &trash.area[tlv_offset]; + tlv = ist2((const char *)tlv_packet->value, get_tlv_length(tlv_packet)); + tlv_offset += istlen(tlv) + TLV_HEADER_SIZE; - n_crc32c = read_n32(istptr(tlv)); - write_n32(istptr(tlv), 0); // compute with CRC==0 + /* Verify that the TLV length does not exceed the total PROXYv2 length */ + if (tlv_offset > total_v2_len) + goto bad_header; - if (hash_crc32c(trash.area, total_v2_len) != n_crc32c) - goto bad_header; - break; - } -#ifdef USE_NS - case PP2_TYPE_NETNS: { - const struct netns_entry *ns; + /* Prepare known TLV types */ + switch (tlv_packet->type) { + case PP2_TYPE_CRC32C: { + uint32_t n_crc32c; - ns = netns_store_lookup(istptr(tlv), istlen(tlv)); - if (ns) - conn->proxy_netns = ns; - break; - } -#endif - case PP2_TYPE_AUTHORITY: { - /* For now, keep the length restriction by HAProxy */ - if (istlen(tlv) > HA_PP2_AUTHORITY_MAX) - goto bad_header; + /* Verify that this TLV is exactly 4 bytes long */ + if (istlen(tlv) != PP2_CRC32C_LEN) + goto bad_header; - break; - } - case PP2_TYPE_UNIQUE_ID: { - if (istlen(tlv) > UNIQUEID_LEN) - goto bad_header; - break; - } - default: - break; - } + n_crc32c = read_n32(istptr(tlv)); + write_n32(istptr(tlv), 0); // compute with CRC==0 - /* If we did not find a known TLV type that we can optimize for, we generically allocate it */ - data_len = get_tlv_length(tlv_packet); + if (hash_crc32c(trash.area, total_v2_len) != n_crc32c) + goto bad_header; + break; + } +#ifdef USE_NS + case PP2_TYPE_NETNS: { + const struct netns_entry *ns; - /* Prevent attackers from allocating too much memory */ - if (unlikely(data_len > HA_PP2_MAX_ALLOC)) - goto fail; + ns = netns_store_lookup(istptr(tlv), istlen(tlv)); + if (ns) + conn->proxy_netns = ns; + break; + } +#endif + case PP2_TYPE_AUTHORITY: { + /* For now, keep the length restriction by HAProxy */ + if (istlen(tlv) > HA_PP2_AUTHORITY_MAX) + goto bad_header; - /* Alloc memory based on data_len */ - if (data_len > HA_PP2_TLV_VALUE_256) - new_tlv = malloc(get_tlv_length(tlv_packet) + sizeof(struct conn_tlv_list)); - else if (data_len <= HA_PP2_TLV_VALUE_128) - new_tlv = pool_alloc(pool_head_pp_tlv_128); - else - new_tlv = pool_alloc(pool_head_pp_tlv_256); + break; + } + case PP2_TYPE_UNIQUE_ID: { + if (istlen(tlv) > UNIQUEID_LEN) + goto bad_header; + break; + } + default: + break; + } - if (unlikely(!new_tlv)) - goto fail; + /* If we did not find a known TLV type that we can optimize for, we generically allocate it */ + data_len = get_tlv_length(tlv_packet); - new_tlv->type = tlv_packet->type; + /* Prevent attackers from allocating too much memory */ + if (unlikely(data_len > HA_PP2_MAX_ALLOC)) + goto fail; - /* Save TLV to make it accessible via sample fetch */ - memcpy(new_tlv->value, tlv.ptr, data_len); - new_tlv->len = data_len; + /* Alloc memory based on data_len */ + if (data_len > HA_PP2_TLV_VALUE_256) + new_tlv = malloc(get_tlv_length(tlv_packet) + sizeof(struct conn_tlv_list)); + else if (data_len <= HA_PP2_TLV_VALUE_128) + new_tlv = pool_alloc(pool_head_pp_tlv_128); + else + new_tlv = pool_alloc(pool_head_pp_tlv_256); - LIST_APPEND(&conn->tlv_list, &new_tlv->list); - } + if (unlikely(!new_tlv)) + goto fail; + new_tlv->type = tlv_packet->type; - /* Verify that the PROXYv2 header ends at a TLV boundary. - * This is can not be true, because the TLV parsing already - * verifies that a TLV does not exceed the total length and - * also that there is space for a TLV header. - */ - BUG_ON(tlv_offset != total_v2_len); + /* Save TLV to make it accessible via sample fetch */ + memcpy(new_tlv->value, tlv.ptr, data_len); + new_tlv->len = data_len; - /* unsupported protocol, keep local connection address */ - break; - case 0x00: /* LOCAL command */ - /* keep local connection address for LOCAL */ - break; - default: - goto bad_header; /* not a supported command */ + LIST_APPEND(&conn->tlv_list, &new_tlv->list); } + /* Verify that the PROXYv2 header ends at a TLV boundary. + * This is can not be true, because the TLV parsing already + * verifies that a TLV does not exceed the total length and + * also that there is space for a TLV header. + */ + BUG_ON(tlv_offset != total_v2_len); + trash.data = total_v2_len; goto eat_header; @@ -1305,10 +1321,11 @@ int conn_send_proxy(struct connection *conn, unsigned int flag) */ if (sc && sc_strm(sc)) { + struct stream *strm = __sc_strm(sc); ret = make_proxy_line(trash.area, trash.size, objt_server(conn->target), sc_conn(sc_opposite(sc)), - __sc_strm(sc)); + strm, strm_sess(strm)); } else { /* The target server expects a LOCAL line to be sent first. Retrieving @@ -1319,7 +1336,7 @@ int conn_send_proxy(struct connection *conn, unsigned int flag) ret = make_proxy_line(trash.area, trash.size, objt_server(conn->target), conn, - NULL); + NULL, conn->owner); } if (!ret) @@ -1925,7 +1942,7 @@ static int make_tlv(char *dest, int dest_len, char type, uint16_t length, const } /* Note: <remote> is explicitly allowed to be NULL */ -static int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm) +static int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm, struct session *sess) { const char pp2_signature[] = PP2_SIGNATURE; void *tlv_crc32c_p = NULL; @@ -2006,7 +2023,7 @@ static int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct } } - if (strm) { + if (sess) { struct buffer *replace = NULL; list_for_each_entry(srv_tlv, &srv->pp_tlvs, list) { @@ -2015,12 +2032,12 @@ static int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct /* Users will always need to provide a value, in case of forwarding, they should use fc_pp_tlv. * for generic types. Otherwise, we will send an empty TLV. */ - if (!LIST_ISEMPTY(&srv_tlv->fmt)) { + if (!lf_expr_isempty(&srv_tlv->fmt)) { replace = alloc_trash_chunk(); if (unlikely(!replace)) return 0; - replace->data = build_logline(strm, replace->area, replace->size, &srv_tlv->fmt); + replace->data = sess_build_logline(sess, strm, replace->area, replace->size, &srv_tlv->fmt); if (unlikely((buf_len - ret) < sizeof(struct tlv))) { free_trash_chunk(replace); @@ -2163,12 +2180,12 @@ static int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct } /* Note: <remote> is explicitly allowed to be NULL */ -int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm) +int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connection *remote, struct stream *strm, struct session *sess) { int ret = 0; if (srv && (srv->pp_opts & SRV_PP_V2)) { - ret = make_proxy_line_v2(buf, buf_len, srv, remote, strm); + ret = make_proxy_line_v2(buf, buf_len, srv, remote, strm, sess); } else { const struct sockaddr_storage *src = NULL; @@ -2514,6 +2531,59 @@ int smp_fetch_fc_err_str(const struct arg *args, struct sample *smp, const char return 1; } + +/* fetch the current number of streams opened for a connection */ +int smp_fetch_fc_nb_streams(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct connection *conn; + unsigned int nb_strm; + + conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) : smp->strm ? sc_conn(smp->strm->scb) : NULL; + + if (!conn) + return 0; + + if (!conn->mux || !conn->mux->ctl) { + if (!conn->mux) + smp->flags |= SMP_F_MAY_CHANGE; + return 0; + } + + nb_strm = conn->mux->ctl(conn, MUX_CTL_GET_NBSTRM, NULL); + + smp->flags = SMP_F_VOL_TEST; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = nb_strm; + + return 1; +} + +/* fetch the maximum number of streams supported by a connection */ +int smp_fetch_fc_streams_limit(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct connection *conn; + unsigned int strm_limit; + + conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) : smp->strm ? sc_conn(smp->strm->scb) : NULL; + + if (!conn) + return 0; + + if (!conn->mux || !conn->mux->ctl) { + if (!conn->mux) + smp->flags |= SMP_F_MAY_CHANGE; + return 0; + } + + strm_limit = conn->mux->ctl(conn, MUX_CTL_GET_MAXSTRM, NULL); + + smp->flags = 0; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = strm_limit; + + return 1; +} + /* Note: must not be declared <const> as its list will be overwritten. * Note: fetches that may return multiple types should be declared using the * appropriate pseudo-type. If not available it must be declared as the lowest @@ -2524,14 +2594,18 @@ static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, { { "bc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4SRV }, { "bc_glitches", smp_fetch_fc_glitches, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV }, { "bc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV }, + { "bc_nb_streams", smp_fetch_fc_nb_streams, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, + { "bc_setting_streams_limit", smp_fetch_fc_streams_limit, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, { "fc_err", smp_fetch_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI }, { "fc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4CLI }, { "fc_glitches", smp_fetch_fc_glitches, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI }, { "fc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI }, { "fc_rcvd_proxy", smp_fetch_fc_rcvd_proxy, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI }, + { "fc_nb_streams", smp_fetch_fc_nb_streams, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI }, { "fc_pp_authority", smp_fetch_fc_pp_authority, 0, NULL, SMP_T_STR, SMP_USE_L4CLI }, { "fc_pp_unique_id", smp_fetch_fc_pp_unique_id, 0, NULL, SMP_T_STR, SMP_USE_L4CLI }, - { "fc_pp_tlv", smp_fetch_fc_pp_tlv, ARG1(1, STR), smp_check_tlv_type, SMP_T_STR, SMP_USE_L4CLI }, + { "fc_pp_tlv", smp_fetch_fc_pp_tlv, ARG1(1, STR), smp_check_tlv_type, SMP_T_STR, SMP_USE_L5CLI }, + { "fc_settings_streams_limit", smp_fetch_fc_streams_limit, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI }, { /* END */ }, }}; @@ -2544,9 +2618,38 @@ static struct cfg_kw_list cfg_kws = {ILH, { INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); +/* Generate the hash of a connection with params as input + * Each non-null field of params is taken into account for the hash calcul. + */ +uint64_t conn_hash_prehash(const char *buf, size_t size) +{ + return XXH64(buf, size, 0); +} + +/* Computes <data> hash into <hash>. In the same time, <flags> + * are updated with <type> for the hash header. + */ +static void conn_hash_update(XXH64_state_t *hash, + const void *data, size_t size, + enum conn_hash_params_t *flags, + enum conn_hash_params_t type) +{ + XXH64_update(hash, data, size); + *flags |= type; +} + +static uint64_t conn_hash_digest(XXH64_state_t *hash, + enum conn_hash_params_t flags) +{ + const uint64_t flags_u64 = (uint64_t)flags; + const uint64_t f_hash = XXH64_digest(hash); + + return (flags_u64 << CONN_HASH_PAYLOAD_LEN) | CONN_HASH_GET_PAYLOAD(f_hash); +} + /* private function to handle sockaddr as input for connection hash */ static void conn_calculate_hash_sockaddr(const struct sockaddr_storage *ss, - char *buf, size_t *idx, + XXH64_state_t *hash, enum conn_hash_params_t *hash_flags, enum conn_hash_params_t param_type_addr, enum conn_hash_params_t param_type_port) @@ -2558,12 +2661,12 @@ static void conn_calculate_hash_sockaddr(const struct sockaddr_storage *ss, case AF_INET: addr = (struct sockaddr_in *)ss; - conn_hash_update(buf, idx, + conn_hash_update(hash, &addr->sin_addr, sizeof(addr->sin_addr), hash_flags, param_type_addr); if (addr->sin_port) { - conn_hash_update(buf, idx, + conn_hash_update(hash, &addr->sin_port, sizeof(addr->sin_port), hash_flags, param_type_port); } @@ -2573,12 +2676,12 @@ static void conn_calculate_hash_sockaddr(const struct sockaddr_storage *ss, case AF_INET6: addr6 = (struct sockaddr_in6 *)ss; - conn_hash_update(buf, idx, + conn_hash_update(hash, &addr6->sin6_addr, sizeof(addr6->sin6_addr), hash_flags, param_type_addr); if (addr6->sin6_port) { - conn_hash_update(buf, idx, + conn_hash_update(hash, &addr6->sin6_port, sizeof(addr6->sin6_port), hash_flags, param_type_port); } @@ -2587,76 +2690,48 @@ static void conn_calculate_hash_sockaddr(const struct sockaddr_storage *ss, } } -/* Generate the hash of a connection with params as input - * Each non-null field of params is taken into account for the hash calcul. - */ -uint64_t conn_hash_prehash(char *buf, size_t size) -{ - return XXH64(buf, size, 0); -} - -/* Append <data> into <buf> at <idx> offset in preparation for connection hash - * calcul. <idx> is incremented beyond data <size>. In the same time, <flags> - * are updated with <type> for the hash header. - */ -void conn_hash_update(char *buf, size_t *idx, - const void *data, size_t size, - enum conn_hash_params_t *flags, - enum conn_hash_params_t type) -{ - memcpy(&buf[*idx], data, size); - *idx += size; - *flags |= type; -} - -uint64_t conn_hash_digest(char *buf, size_t bufsize, - enum conn_hash_params_t flags) -{ - const uint64_t flags_u64 = (uint64_t)flags; - const uint64_t hash = XXH64(buf, bufsize, 0); - - return (flags_u64 << CONN_HASH_PAYLOAD_LEN) | CONN_HASH_GET_PAYLOAD(hash); -} - uint64_t conn_calculate_hash(const struct conn_hash_params *params) { - char *buf; - size_t idx = 0; - uint64_t hash = 0; enum conn_hash_params_t hash_flags = 0; + XXH64_state_t hash; - buf = trash.area; + XXH64_reset(&hash, 0); - conn_hash_update(buf, &idx, ¶ms->target, sizeof(params->target), &hash_flags, 0); + conn_hash_update(&hash, ¶ms->target, sizeof(params->target), &hash_flags, 0); - if (params->sni_prehash) { - conn_hash_update(buf, &idx, - ¶ms->sni_prehash, sizeof(params->sni_prehash), - &hash_flags, CONN_HASH_PARAMS_TYPE_SNI); + if (params->name_prehash) { + conn_hash_update(&hash, + ¶ms->name_prehash, sizeof(params->name_prehash), + &hash_flags, CONN_HASH_PARAMS_TYPE_NAME); } if (params->dst_addr) { conn_calculate_hash_sockaddr(params->dst_addr, - buf, &idx, &hash_flags, + &hash, &hash_flags, CONN_HASH_PARAMS_TYPE_DST_ADDR, CONN_HASH_PARAMS_TYPE_DST_PORT); } if (params->src_addr) { conn_calculate_hash_sockaddr(params->src_addr, - buf, &idx, &hash_flags, + &hash, &hash_flags, CONN_HASH_PARAMS_TYPE_SRC_ADDR, CONN_HASH_PARAMS_TYPE_SRC_PORT); } if (params->proxy_prehash) { - conn_hash_update(buf, &idx, + conn_hash_update(&hash, ¶ms->proxy_prehash, sizeof(params->proxy_prehash), &hash_flags, CONN_HASH_PARAMS_TYPE_PROXY); } - hash = conn_hash_digest(buf, idx, hash_flags); - return hash; + if (params->mark_tos_prehash) { + conn_hash_update(&hash, + ¶ms->mark_tos_prehash, sizeof(params->mark_tos_prehash), + &hash_flags, CONN_HASH_PARAMS_TYPE_MARK_TOS); + } + + return conn_hash_digest(&hash, hash_flags); } /* Reverse a <conn> connection instance. This effectively moves the connection @@ -2695,7 +2770,7 @@ int conn_reverse(struct connection *conn) /* data cannot wrap else prehash usage is incorrect */ BUG_ON(b_data(&conn->reverse.name) != b_contig_data(&conn->reverse.name, 0)); - hash_params.sni_prehash = + hash_params.name_prehash = conn_hash_prehash(b_head(&conn->reverse.name), b_data(&conn->reverse.name)); } @@ -2722,7 +2797,10 @@ int conn_reverse(struct connection *conn) conn->target = &l->obj_type; conn->flags |= CO_FL_ACT_REVERSING; - task_wakeup(l->rx.rhttp.task, TASK_WOKEN_ANY); + task_wakeup(l->rx.rhttp.task, TASK_WOKEN_RES); + + /* Initialize session origin after reversal. Mandatory for several fetches. */ + sess->origin = &conn->obj_type; } /* Invert source and destination addresses if already set. */ diff --git a/src/cpuset.c b/src/cpuset.c index 82e350f..a20b81a 100644 --- a/src/cpuset.c +++ b/src/cpuset.c @@ -280,7 +280,7 @@ int cpu_map_configured(void) static int cpuset_alloc(void) { /* allocate the structures used to store CPU topology info */ - cpu_map = (struct cpu_map*)calloc(MAX_TGROUPS, sizeof(*cpu_map)); + cpu_map = calloc(MAX_TGROUPS, sizeof(*cpu_map)); if (!cpu_map) return 0; diff --git a/src/debug.c b/src/debug.c index 756c194..5f21f02 100644 --- a/src/debug.c +++ b/src/debug.c @@ -46,6 +46,7 @@ #include <haproxy/thread.h> #include <haproxy/time.h> #include <haproxy/tools.h> +#include <haproxy/trace.h> #include <import/ist.h> @@ -112,7 +113,7 @@ struct post_mortem { uid_t boot_uid; gid_t boot_gid; struct rlimit limit_fd; // RLIMIT_NOFILE - struct rlimit limit_ram; // RLIMIT_AS or RLIMIT_DATA + struct rlimit limit_ram; // RLIMIT_DATA #if defined(USE_THREAD) struct { @@ -456,13 +457,8 @@ void ha_task_dump(struct buffer *buf, const struct task *task, const char *pfx) */ static int cli_io_handler_show_threads(struct appctx *appctx) { - struct stconn *sc = appctx_sc(appctx); int thr; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - if (appctx->st0) thr = appctx->st1; else @@ -666,7 +662,7 @@ int debug_parse_cli_bug(char **args, char *payload, struct appctx *appctx, void return 1; _HA_ATOMIC_INC(&debug_commands_issued); - BUG_ON(one > zero); + BUG_ON(one > zero, "This was triggered on purpose from the CLI 'debug dev bug' command."); return 1; } @@ -679,7 +675,7 @@ int debug_parse_cli_warn(char **args, char *payload, struct appctx *appctx, void return 1; _HA_ATOMIC_INC(&debug_commands_issued); - WARN_ON(one > zero); + WARN_ON(one > zero, "This was triggered on purpose from the CLI 'debug dev warn' command."); return 1; } @@ -692,7 +688,7 @@ int debug_parse_cli_check(char **args, char *payload, struct appctx *appctx, voi return 1; _HA_ATOMIC_INC(&debug_commands_issued); - CHECK_IF(one > zero); + CHECK_IF(one > zero, "This was triggered on purpose from the CLI 'debug dev check' command."); return 1; } @@ -1504,6 +1500,112 @@ static int debug_parse_cli_sched(char **args, char *payload, struct appctx *appc return cli_err(appctx, "Not enough memory"); } +#if defined(DEBUG_DEV) +/* All of this is for "trace dbg" */ + +static struct trace_source trace_dbg __read_mostly = { + .name = IST("dbg"), + .desc = "trace debugger", + .report_events = ~0, // report everything by default +}; + +#define TRACE_SOURCE &trace_dbg +INITCALL1(STG_REGISTER, trace_register_source, TRACE_SOURCE); + +/* This is the task handler used to send traces in loops. Note that the task's + * context contains the number of remaining calls to be done. The task sends 20 + * messages per wakeup. + */ +static struct task *debug_trace_task(struct task *t, void *ctx, unsigned int state) +{ + ulong count; + + /* send 2 traces enter/leave +18 devel = 20 traces total */ + TRACE_ENTER(1); + TRACE_DEVEL("msg01 has 20 bytes .", 1); + TRACE_DEVEL("msg02 has 20 bytes .", 1); + TRACE_DEVEL("msg03 has 20 bytes .", 1); + TRACE_DEVEL("msg04 has 70 bytes payload: 0123456789 0123456789 0123456789 012345678", 1); + TRACE_DEVEL("msg05 has 70 bytes payload: 0123456789 0123456789 0123456789 012345678", 1); + TRACE_DEVEL("msg06 has 70 bytes payload: 0123456789 0123456789 0123456789 012345678", 1); + TRACE_DEVEL("msg07 has 120 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012", 1); + TRACE_DEVEL("msg08 has 120 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012", 1); + TRACE_DEVEL("msg09 has 120 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012", 1); + TRACE_DEVEL("msg10 has 170 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345678", 1); + TRACE_DEVEL("msg11 has 170 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345678", 1); + TRACE_DEVEL("msg12 has 170 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345678", 1); + TRACE_DEVEL("msg13 has 220 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123", 1); + TRACE_DEVEL("msg14 has 220 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123", 1); + TRACE_DEVEL("msg15 has 220 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123", 1); + TRACE_DEVEL("msg16 has 270 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789", 1); + TRACE_DEVEL("msg17 has 270 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789", 1); + TRACE_DEVEL("msg18 has 270 bytes payload: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789", 1); + TRACE_LEAVE(1); + + count = (ulong)t->context; + t->context = (void*)count - 1; + + if (count) + task_wakeup(t, TASK_WOKEN_MSG); + else { + task_destroy(t); + t = NULL; + } + return t; +} + +/* parse a "debug dev trace" command + * debug dev trace <nbthr>. + * It will create as many tasks (one per thread), starting from lowest threads. + * The traces will stop after 1M wakeups or 20M messages ~= 4GB of data. + */ +static int debug_parse_cli_trace(char **args, char *payload, struct appctx *appctx, void *private) +{ + unsigned long count = 1; + unsigned long i; + char *msg = NULL; + char *endarg; + + if (!cli_has_level(appctx, ACCESS_LVL_ADMIN)) + return 1; + + _HA_ATOMIC_INC(&debug_commands_issued); + + if (!args[3][0]) { + memprintf(&msg, "Need a thread count. Note that 20M msg will be sent per thread.\n"); + goto fail; + } + + /* parse the new value . */ + count = strtoll(args[3], &endarg, 0); + if (args[3][1] && *endarg) { + memprintf(&msg, "Ignoring unparsable thread number '%s'.\n", args[3]); + goto fail; + } + + if (count >= global.nbthread) + count = global.nbthread; + + for (i = 0; i < count; i++) { + struct task *task = task_new_on(i); + + if (!task) + goto fail; + + task->process = debug_trace_task; + task->context = (void*)(ulong)1000000; // 1M wakeups = 20M messages + task_wakeup(task, TASK_WOKEN_INIT); + } + + if (msg && *msg) + return cli_dynmsg(appctx, LOG_INFO, msg); + return 1; + + fail: + return cli_dynmsg(appctx, LOG_ERR, msg); +} +#endif /* DEBUG_DEV */ + /* CLI state for "debug dev fd" */ struct dev_fd_ctx { int start_fd; @@ -1531,7 +1633,6 @@ static int debug_parse_cli_fd(char **args, char *payload, struct appctx *appctx, static int debug_iohandler_fd(struct appctx *appctx) { struct dev_fd_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); struct sockaddr_storage sa; struct stat statbuf; socklen_t salen, vlen; @@ -1540,10 +1641,6 @@ static int debug_iohandler_fd(struct appctx *appctx) int ret = 1; int i, fd; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - goto end; - chunk_reset(&trash); thread_isolate(); @@ -1693,7 +1790,6 @@ static int debug_iohandler_fd(struct appctx *appctx) } thread_release(); - end: return ret; } @@ -1763,15 +1859,10 @@ static int debug_parse_cli_memstats(char **args, char *payload, struct appctx *a static int debug_iohandler_memstats(struct appctx *appctx) { struct dev_mem_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); struct mem_stats *ptr; const char *pfx = ctx->match; int ret = 1; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - goto end; - if (!ctx->width) { /* we don't know the first column's width, let's compute it * now based on a first pass on printable entries and their @@ -2182,11 +2273,7 @@ static int feed_post_mortem() post_mortem.process.boot_gid = getegid(); getrlimit(RLIMIT_NOFILE, &post_mortem.process.limit_fd); -#if defined(RLIMIT_AS) - getrlimit(RLIMIT_AS, &post_mortem.process.limit_ram); -#elif defined(RLIMIT_DATA) getrlimit(RLIMIT_DATA, &post_mortem.process.limit_ram); -#endif if (strcmp(post_mortem.platform.utsname.sysname, "Linux") == 0) feed_post_mortem_linux(); @@ -2295,6 +2382,9 @@ static struct cli_kw_list cli_kws = {{ },{ {{ "debug", "dev", "sym", NULL }, "debug dev sym <addr> : resolve symbol address", debug_parse_cli_sym, NULL, NULL, NULL, ACCESS_EXPERT }, {{ "debug", "dev", "task", NULL }, "debug dev task <ptr> [wake|expire|kill] : show/wake/expire/kill task/tasklet", debug_parse_cli_task, NULL, NULL, NULL, ACCESS_EXPERT }, {{ "debug", "dev", "tkill", NULL }, "debug dev tkill [thr] [sig] : send signal to thread", debug_parse_cli_tkill, NULL, NULL, NULL, ACCESS_EXPERT }, +#if defined(DEBUG_DEV) + {{ "debug", "dev", "trace", NULL }, "debug dev trace [nbthr] : flood traces from that many threads", debug_parse_cli_trace, NULL, NULL, NULL, ACCESS_EXPERT }, +#endif {{ "debug", "dev", "warn", NULL }, "debug dev warn : call WARN_ON() and possibly crash", debug_parse_cli_warn, NULL, NULL, NULL, ACCESS_EXPERT }, {{ "debug", "dev", "write", NULL }, "debug dev write [size] : write that many bytes in return", debug_parse_cli_write, NULL, NULL, NULL, ACCESS_EXPERT }, @@ -27,10 +27,10 @@ #include <haproxy/cli.h> #include <haproxy/dgram.h> #include <haproxy/dns.h> +#include <haproxy/dns_ring.h> #include <haproxy/errors.h> #include <haproxy/fd.h> #include <haproxy/log.h> -#include <haproxy/ring.h> #include <haproxy/sc_strm.h> #include <haproxy/stconn.h> #include <haproxy/stream.h> @@ -108,7 +108,7 @@ int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len) struct ist myist; myist = ist2(buf, len); - ret = ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); + ret = dns_ring_write(ns->dgram->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); if (!ret) { ns->counters->snd_error++; HA_SPIN_UNLOCK(DNS_LOCK, &dgram->lock); @@ -131,7 +131,7 @@ int dns_send_nameserver(struct dns_nameserver *ns, void *buf, size_t len) struct ist myist; myist = ist2(buf, len); - ret = ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); + ret = dns_ring_write(ns->stream->ring_req, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); if (!ret) { ns->counters->snd_error++; return -1; @@ -290,7 +290,7 @@ static void dns_resolve_send(struct dgram_conn *dgram) { int fd; struct dns_nameserver *ns; - struct ring *ring; + struct dns_ring *ring; struct buffer *buf; uint64_t msg_len; size_t len, cnt, ofs; @@ -407,21 +407,21 @@ int dns_dgram_init(struct dns_nameserver *ns, struct sockaddr_storage *sk) ns->dgram = dgram; dgram->ofs_req = ~0; /* init ring offset */ - dgram->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE); + dgram->ring_req = dns_ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE); if (!dgram->ring_req) { ha_alert("memory allocation error initializing the ring for nameserver.\n"); goto out; } /* attach the task as reader */ - if (!ring_attach(dgram->ring_req)) { + if (!dns_ring_attach(dgram->ring_req)) { /* mark server attached to the ring */ ha_alert("nameserver sets too many watchers > 255 on ring. This is a bug and should not happen.\n"); goto out; } return 0; out: - ring_free(dgram->ring_req); + dns_ring_free(dgram->ring_req); free(dgram); @@ -436,14 +436,14 @@ static void dns_session_io_handler(struct appctx *appctx) { struct stconn *sc = appctx_sc(appctx); struct dns_session *ds = appctx->svcctx; - struct ring *ring = &ds->ring; + struct dns_ring *ring = &ds->ring; struct buffer *buf = &ring->buf; uint64_t msg_len; int available_room; size_t len, cnt, ofs; int ret = 0; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { + if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR)))) { co_skip(sc_oc(sc), co_data(sc_oc(sc))); goto out; } @@ -471,7 +471,7 @@ static void dns_session_io_handler(struct appctx *appctx) } HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock); - LIST_DEL_INIT(&appctx->wait_entry); + MT_LIST_DELETE(&appctx->wait_entry); HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock); HA_RWLOCK_RDLOCK(DNS_LOCK, &ring->lock); @@ -633,8 +633,8 @@ static void dns_session_io_handler(struct appctx *appctx) if (ret) { /* let's be woken up once new request to write arrived */ HA_RWLOCK_WRLOCK(DNS_LOCK, &ring->lock); - BUG_ON(LIST_INLIST(&appctx->wait_entry)); - LIST_APPEND(&ring->waiters, &appctx->wait_entry); + BUG_ON(MT_LIST_INLIST(&appctx->wait_entry)); + MT_LIST_APPEND(&ring->waiters, &appctx->wait_entry); HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ring->lock); applet_have_no_more_data(appctx); } @@ -797,7 +797,7 @@ void dns_session_free(struct dns_session *ds) BUG_ON(!LIST_ISEMPTY(&ds->list)); BUG_ON(!LIST_ISEMPTY(&ds->waiter)); BUG_ON(!LIST_ISEMPTY(&ds->queries)); - BUG_ON(!LIST_ISEMPTY(&ds->ring.waiters)); + BUG_ON(!MT_LIST_ISEMPTY(&ds->ring.waiters)); BUG_ON(!eb_is_empty(&ds->query_ids)); pool_free(dns_session_pool, ds); } @@ -844,12 +844,12 @@ static void dns_session_release(struct appctx *appctx) if (!ds) return; - /* We do not call ring_appctx_detach here + /* We do not call dns_ring_appctx_detach here * because we want to keep readers counters * to retry a conn with a different appctx. */ HA_RWLOCK_WRLOCK(DNS_LOCK, &ds->ring.lock); - LIST_DEL_INIT(&appctx->wait_entry); + MT_LIST_DELETE(&appctx->wait_entry); HA_RWLOCK_WRUNLOCK(DNS_LOCK, &ds->ring.lock); dss = ds->dss; @@ -1058,9 +1058,9 @@ struct dns_session *dns_session_new(struct dns_stream_server *dss) if (!ds->tx_ring_area) goto error; - ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE); + dns_ring_init(&ds->ring, ds->tx_ring_area, DNS_TCP_MSG_RING_MAX_SIZE); /* never fail because it is the first watcher attached to the ring */ - DISGUISE(ring_attach(&ds->ring)); + DISGUISE(dns_ring_attach(&ds->ring)); if ((ds->task_exp = task_new_here()) == NULL) goto error; @@ -1095,7 +1095,7 @@ static struct task *dns_process_req(struct task *t, void *context, unsigned int { struct dns_nameserver *ns = (struct dns_nameserver *)context; struct dns_stream_server *dss = ns->stream; - struct ring *ring = dss->ring_req; + struct dns_ring *ring = dss->ring_req; struct buffer *buf = &ring->buf; uint64_t msg_len; size_t len, cnt, ofs; @@ -1151,7 +1151,7 @@ static struct task *dns_process_req(struct task *t, void *context, unsigned int if (!LIST_ISEMPTY(&dss->free_sess)) { ds = LIST_NEXT(&dss->free_sess, struct dns_session *, list); - if (ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) { + if (dns_ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1) > 0) { ds->nb_queries++; if (ds->nb_queries >= DNS_STREAM_MAX_PIPELINED_REQ) LIST_DEL_INIT(&ds->list); @@ -1171,8 +1171,8 @@ static struct task *dns_process_req(struct task *t, void *context, unsigned int if (!LIST_ISEMPTY(&dss->idle_sess)) { ds = LIST_NEXT(&dss->idle_sess, struct dns_session *, list); - /* ring is empty so this ring_write should never fail */ - ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); + /* ring is empty so this dns_ring_write should never fail */ + dns_ring_write(&ds->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); ds->nb_queries++; LIST_DEL_INIT(&ds->list); @@ -1196,8 +1196,8 @@ static struct task *dns_process_req(struct task *t, void *context, unsigned int /* allocate a new session */ ads = dns_session_new(dss); if (ads) { - /* ring is empty so this ring_write should never fail */ - ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); + /* ring is empty so this dns_ring_write should never fail */ + dns_ring_write(&ads->ring, DNS_TCP_MSG_MAX_SIZE, NULL, 0, &myist, 1); ads->nb_queries++; LIST_INSERT(&dss->free_sess, &ads->list); } @@ -1248,7 +1248,7 @@ int dns_stream_init(struct dns_nameserver *ns, struct server *srv) dss->maxconn = srv->maxconn; dss->ofs_req = ~0; /* init ring offset */ - dss->ring_req = ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE); + dss->ring_req = dns_ring_new(2*DNS_TCP_MSG_RING_MAX_SIZE); if (!dss->ring_req) { ha_alert("memory allocation error initializing the ring for dns tcp server '%s'.\n", srv->id); goto out; @@ -1264,7 +1264,7 @@ int dns_stream_init(struct dns_nameserver *ns, struct server *srv) dss->task_req->context = ns; /* attach the task as reader */ - if (!ring_attach(dss->ring_req)) { + if (!dns_ring_attach(dss->ring_req)) { /* mark server attached to the ring */ ha_alert("server '%s': too many watchers for ring. this should never happen.\n", srv->id); goto out; @@ -1306,7 +1306,7 @@ out: if (dss && dss->task_req) task_destroy(dss->task_req); if (dss && dss->ring_req) - ring_free(dss->ring_req); + dns_ring_free(dss->ring_req); free(dss); return -1; diff --git a/src/dns_ring.c b/src/dns_ring.c new file mode 100644 index 0000000..01ce593 --- /dev/null +++ b/src/dns_ring.c @@ -0,0 +1,225 @@ +/* + * Ring buffer management + * This is a fork of ring.c for DNS usage. + * + * Copyright (C) 2000-2019 Willy Tarreau - w@1wt.eu + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation, version 2.1 + * exclusively. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <stdlib.h> +#include <haproxy/api.h> +#include <haproxy/applet.h> +#include <haproxy/buf.h> +#include <haproxy/cli.h> +#include <haproxy/dns_ring.h> +#include <haproxy/sc_strm.h> +#include <haproxy/stconn.h> +#include <haproxy/thread.h> + +/* Initialize a pre-allocated ring with the buffer area + * of size */ +void dns_ring_init(struct dns_ring *ring, void *area, size_t size) +{ + HA_RWLOCK_INIT(&ring->lock); + MT_LIST_INIT(&ring->waiters); + ring->readers_count = 0; + ring->buf = b_make(area, size, 0, 0); + /* write the initial RC byte */ + b_putchr(&ring->buf, 0); +} + +/* Creates and returns a ring buffer of size <size> bytes. Returns NULL on + * allocation failure. + */ +struct dns_ring *dns_ring_new(size_t size) +{ + struct dns_ring *ring = NULL; + void *area = NULL; + + if (size < 2) + goto fail; + + ring = malloc(sizeof(*ring)); + if (!ring) + goto fail; + + area = malloc(size); + if (!area) + goto fail; + + dns_ring_init(ring, area, size); + return ring; + fail: + free(area); + free(ring); + return NULL; +} + +/* destroys and frees ring <ring> */ +void dns_ring_free(struct dns_ring *ring) +{ + if (!ring) + return; + + free(ring->buf.area); + free(ring); +} + +/* Tries to send <npfx> parts from <prefix> followed by <nmsg> parts from <msg> + * to ring <ring>. The message is sent atomically. It may be truncated to + * <maxlen> bytes if <maxlen> is non-null. There is no distinction between the + * two lists, it's just a convenience to help the caller prepend some prefixes + * when necessary. It takes the ring's write lock to make sure no other thread + * will touch the buffer during the update. Returns the number of bytes sent, + * or <=0 on failure. + */ +ssize_t dns_ring_write(struct dns_ring *ring, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg) +{ + struct buffer *buf = &ring->buf; + struct appctx *appctx; + size_t totlen = 0; + size_t lenlen; + uint64_t dellen; + int dellenlen; + struct mt_list *elt1, elt2; + ssize_t sent = 0; + int i; + + /* we have to find some room to add our message (the buffer is + * never empty and at least contains the previous counter) and + * to update both the buffer contents and heads at the same + * time (it's doable using atomic ops but not worth the + * trouble, let's just lock). For this we first need to know + * the total message's length. We cannot measure it while + * copying due to the varint encoding of the length. + */ + for (i = 0; i < npfx; i++) + totlen += pfx[i].len; + for (i = 0; i < nmsg; i++) + totlen += msg[i].len; + + if (totlen > maxlen) + totlen = maxlen; + + lenlen = varint_bytes(totlen); + + HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); + if (lenlen + totlen + 1 + 1 > b_size(buf)) + goto done_buf; + + while (b_room(buf) < lenlen + totlen + 1) { + /* we need to delete the oldest message (from the end), + * and we have to stop if there's a reader stuck there. + * Unless there's corruption in the buffer it's guaranteed + * that we have enough data to find 1 counter byte, a + * varint-encoded length (1 byte min) and the message + * payload (0 bytes min). + */ + if (*b_head(buf)) + goto done_buf; + dellenlen = b_peek_varint(buf, 1, &dellen); + if (!dellenlen) + goto done_buf; + BUG_ON(b_data(buf) < 1 + dellenlen + dellen); + + b_del(buf, 1 + dellenlen + dellen); + } + + /* OK now we do have room */ + __b_put_varint(buf, totlen); + + totlen = 0; + for (i = 0; i < npfx; i++) { + size_t len = pfx[i].len; + + if (len + totlen > maxlen) + len = maxlen - totlen; + if (len) + __b_putblk(buf, pfx[i].ptr, len); + totlen += len; + } + + for (i = 0; i < nmsg; i++) { + size_t len = msg[i].len; + + if (len + totlen > maxlen) + len = maxlen - totlen; + if (len) + __b_putblk(buf, msg[i].ptr, len); + totlen += len; + } + + *b_tail(buf) = 0; buf->data++; // new read counter + sent = lenlen + totlen + 1; + + /* notify potential readers */ + mt_list_for_each_entry_safe(appctx, &ring->waiters, wait_entry, elt1, elt2) + appctx_wakeup(appctx); + + done_buf: + HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); + return sent; +} + +/* Tries to attach appctx <appctx> as a new reader on ring <ring>. This is + * meant to be used by low level appctx code such as CLI or ring forwarding. + * For higher level functions, please see the relevant parts in appctx or CLI. + * It returns non-zero on success or zero on failure if too many users are + * already attached. On success, the caller MUST call dns_ring_detach_appctx() + * to detach itself, even if it was never woken up. + */ +int dns_ring_attach(struct dns_ring *ring) +{ + int users = ring->readers_count; + + do { + if (users >= 255) + return 0; + } while (!_HA_ATOMIC_CAS(&ring->readers_count, &users, users + 1)); + return 1; +} + +/* detach an appctx from a ring. The appctx is expected to be waiting at offset + * <ofs> relative to the beginning of the storage, or ~0 if not waiting yet. + * Nothing is done if <ring> is NULL. + */ +void dns_ring_detach_appctx(struct dns_ring *ring, struct appctx *appctx, size_t ofs) +{ + if (!ring) + return; + + HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); + if (ofs != ~0) { + /* reader was still attached */ + if (ofs < b_head_ofs(&ring->buf)) + ofs += b_size(&ring->buf) - b_head_ofs(&ring->buf); + else + ofs -= b_head_ofs(&ring->buf); + + BUG_ON(ofs >= b_size(&ring->buf)); + MT_LIST_DELETE(&appctx->wait_entry); + HA_ATOMIC_DEC(b_peek(&ring->buf, ofs)); + } + HA_ATOMIC_DEC(&ring->readers_count); + HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); +} + +/* + * Local variables: + * c-indent-level: 8 + * c-basic-offset: 8 + * End: + */ diff --git a/src/dynbuf.c b/src/dynbuf.c index 712e334..aec9667 100644 --- a/src/dynbuf.c +++ b/src/dynbuf.c @@ -15,10 +15,12 @@ #include <string.h> #include <haproxy/api.h> +#include <haproxy/cfgparse.h> #include <haproxy/dynbuf.h> #include <haproxy/global.h> #include <haproxy/list.h> #include <haproxy/pool.h> +#include <haproxy/tools.h> struct pool_head *pool_head_buffer __read_mostly; @@ -28,13 +30,24 @@ int init_buffer() void *buffer; int thr; int done; + int i; pool_head_buffer = create_pool("buffer", global.tune.bufsize, MEM_F_SHARED|MEM_F_EXACT); if (!pool_head_buffer) return 0; - for (thr = 0; thr < MAX_THREADS; thr++) - LIST_INIT(&ha_thread_ctx[thr].buffer_wq); + /* make sure any change to the queues assignment isn't overlooked */ + BUG_ON(DB_PERMANENT - DB_UNLIKELY - 1 != DYNBUF_NBQ); + BUG_ON(DB_MUX_RX_Q < DB_SE_RX_Q || DB_MUX_RX_Q >= DYNBUF_NBQ); + BUG_ON(DB_SE_RX_Q < DB_CHANNEL_Q || DB_SE_RX_Q >= DYNBUF_NBQ); + BUG_ON(DB_CHANNEL_Q < DB_MUX_TX_Q || DB_CHANNEL_Q >= DYNBUF_NBQ); + BUG_ON(DB_MUX_TX_Q >= DYNBUF_NBQ); + + for (thr = 0; thr < MAX_THREADS; thr++) { + for (i = 0; i < DYNBUF_NBQ; i++) + LIST_INIT(&ha_thread_ctx[thr].buffer_wq[i]); + ha_thread_ctx[thr].bufq_map = 0; + } /* The reserved buffer is what we leave behind us. Thus we always need @@ -102,6 +115,7 @@ void buffer_dump(FILE *o, struct buffer *b, int from, int to) void __offer_buffers(void *from, unsigned int count) { struct buffer_wait *wait, *wait_back; + int q; /* For now, we consider that all objects need 1 buffer, so we can stop * waking up them once we have enough of them to eat all the available @@ -109,18 +123,117 @@ void __offer_buffers(void *from, unsigned int count) * other tasks, but that's a rough estimate. Similarly, for each cached * event we'll need 1 buffer. */ - list_for_each_entry_safe(wait, wait_back, &th_ctx->buffer_wq, list) { - if (!count) - break; - - if (wait->target == from || !wait->wakeup_cb(wait->target)) + for (q = 0; q < DYNBUF_NBQ; q++) { + if (!(th_ctx->bufq_map & (1 << q))) continue; + BUG_ON_HOT(LIST_ISEMPTY(&th_ctx->buffer_wq[q])); + + list_for_each_entry_safe(wait, wait_back, &th_ctx->buffer_wq[q], list) { + if (!count) + break; + + if (wait->target == from || !wait->wakeup_cb(wait->target)) + continue; + + LIST_DEL_INIT(&wait->list); + count--; + } + if (LIST_ISEMPTY(&th_ctx->buffer_wq[q])) + th_ctx->bufq_map &= ~(1 << q); + } +} + +/* config parser for global "tune.buffers.limit", accepts a number >= 0 */ +static int cfg_parse_tune_buffers_limit(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + int limit; - LIST_DEL_INIT(&wait->list); - count--; + if (too_many_args(1, args, err, NULL)) + return -1; + + limit = atoi(args[1]); + if (limit < 0) { + memprintf(err, "'%s' expects a non-negative number but got '%s'.", args[0], args[1]); + return -1; + } + + global.tune.buf_limit = limit; + if (global.tune.buf_limit) { + if (global.tune.buf_limit < 3) + global.tune.buf_limit = 3; } + + return 0; } +/* config parser for global "tune.buffers.reserve", accepts a number >= 0 */ +static int cfg_parse_tune_buffers_reserve(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + int reserve; + + if (too_many_args(1, args, err, NULL)) + return -1; + + reserve = atoi(args[1]); + if (reserve < 0) { + memprintf(err, "'%s' expects a non-negative number but got '%s'.", args[0], args[1]); + return -1; + } + + global.tune.reserved_bufs = reserve; + return 0; +} + +/* allocate emergency buffers for the thread */ +static int alloc_emergency_buffers_per_thread(void) +{ + int idx; + + th_ctx->emergency_bufs_left = global.tune.reserved_bufs; + th_ctx->emergency_bufs = calloc(global.tune.reserved_bufs, sizeof(*th_ctx->emergency_bufs)); + if (!th_ctx->emergency_bufs) + return 0; + + for (idx = 0; idx < global.tune.reserved_bufs; idx++) { + /* reserved bufs are not subject to the limit, so we must push it */ + if (_HA_ATOMIC_LOAD(&pool_head_buffer->limit)) + _HA_ATOMIC_INC(&pool_head_buffer->limit); + th_ctx->emergency_bufs[idx] = pool_alloc_flag(pool_head_buffer, POOL_F_NO_POISON | POOL_F_NO_FAIL); + if (!th_ctx->emergency_bufs[idx]) + return 0; + } + + return 1; +} + +/* frees the thread's emergency buffers */ +static void free_emergency_buffers_per_thread(void) +{ + int idx; + + if (th_ctx->emergency_bufs) { + for (idx = 0; idx < global.tune.reserved_bufs; idx++) + pool_free(pool_head_buffer, th_ctx->emergency_bufs[idx]); + } + + ha_free(&th_ctx->emergency_bufs); +} + +/* config keyword parsers */ +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "tune.buffers.limit", cfg_parse_tune_buffers_limit }, + { CFG_GLOBAL, "tune.buffers.reserve", cfg_parse_tune_buffers_reserve }, + { 0, NULL, NULL } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); +REGISTER_PER_THREAD_ALLOC(alloc_emergency_buffers_per_thread); +REGISTER_PER_THREAD_FREE(free_emergency_buffers_per_thread); + /* * Local variables: * c-indent-level: 8 diff --git a/src/errors.c b/src/errors.c index 7a2d14a..197a0cd 100644 --- a/src/errors.c +++ b/src/errors.c @@ -90,11 +90,7 @@ static struct ring *startup_logs_from_fd(int fd, int new) if (area == MAP_FAILED || area == NULL) goto error; - if (new) - r = ring_make_from_area(area, STARTUP_LOG_SIZE); - else - r = ring_cast_from_area(area); - + r = ring_make_from_area(area, STARTUP_LOG_SIZE, new); if (r == NULL) goto error; @@ -116,7 +112,7 @@ error: * Once in wait mode, the shm must be copied and closed. * */ -void startup_logs_init() +void startup_logs_init_shm() { struct ring *r = NULL; char *str_fd, *endptr; @@ -180,24 +176,29 @@ error: } -#else /* ! USE_SHM_OPEN */ +#endif /* ! USE_SHM_OPEN */ void startup_logs_init() { +#ifdef USE_SHM_OPEN + startup_logs_init_shm(); +#else /* ! USE_SHM_OPEN */ startup_logs = ring_new(STARTUP_LOG_SIZE); -} - #endif + if (startup_logs) + vma_set_name(ring_allocated_area(startup_logs), + ring_allocated_size(startup_logs), + "errors", "startup_logs"); +} /* free the startup logs, unmap if it was an shm */ void startup_logs_free(struct ring *r) { #ifdef USE_SHM_OPEN if (r == shm_startup_logs) - munmap(r, STARTUP_LOG_SIZE); - else + munmap(ring_allocated_area(r), STARTUP_LOG_SIZE); #endif /* ! USE_SHM_OPEN */ - ring_free(r); + ring_free(r); } /* duplicate a startup logs which was previously allocated in a shm */ @@ -206,12 +207,11 @@ struct ring *startup_logs_dup(struct ring *src) struct ring *dst = NULL; /* must use the size of the previous buffer */ - dst = ring_new(b_size(&src->buf)); + dst = ring_new(ring_allocated_size(src)); if (!dst) goto error; - b_reset(&dst->buf); - b_ncat(&dst->buf, &src->buf, b_data(&src->buf)); + ring_dup(dst, src, ring_size(src)); error: return dst; } diff --git a/src/ev_epoll.c b/src/ev_epoll.c index c42cf2e..352620d 100644 --- a/src/ev_epoll.c +++ b/src/ev_epoll.c @@ -275,6 +275,8 @@ static int init_epoll_per_thread() epoll_events = calloc(1, sizeof(struct epoll_event) * global.tune.maxpollevents); if (epoll_events == NULL) goto fail_alloc; + vma_set_name_id(epoll_events, sizeof(struct epoll_event) * global.tune.maxpollevents, + "ev_epoll", "epoll_events", tid + 1); if (MAX_THREADS > 1 && tid) { epoll_fd[tid] = epoll_create(global.maxsock + 1); diff --git a/src/ev_evports.c b/src/ev_evports.c index 07676e6..ee357bc 100644 --- a/src/ev_evports.c +++ b/src/ev_evports.c @@ -185,6 +185,14 @@ static void _do_poll(struct poller *p, int exp, int wake) do { int timeout = (global.tune.options & GTUNE_BUSY_POLLING) ? 0 : wait_time; int interrupted = 0; + /* Note: normally we should probably expect to pass + * global.tune.maxpollevents here so as to process multiple + * events at once, but it appears unreliable in tests, even + * starting with value 2, and it seems basically nobody's + * using that anymore so it's probably not worth spending days + * investigating this poller more to improve its performance, + * let's switch back to 1. --WT + */ nevlist = 1; /* desired number of events to be retrieved */ timeout_ts.tv_sec = (timeout / 1000); timeout_ts.tv_nsec = (timeout % 1000) * 1000000; @@ -194,6 +202,12 @@ static void _do_poll(struct poller *p, int exp, int wake) evports_evlist_max, &nevlist, /* updated to the number of events retrieved */ &timeout_ts); + + /* Be careful, nevlist here is always updated by the syscall + * even on status == -1, so it must always be respected + * otherwise events are lost. Awkward API BTW, I wonder how + * they thought ENOSYS ought to be handled... -WT + */ if (status != 0) { int e = errno; switch (e) { @@ -206,7 +220,7 @@ static void _do_poll(struct poller *p, int exp, int wake) /* nevlist >= 0 */ break; default: - nevlist = 0; + /* signal or anything else */ interrupted = 1; break; } diff --git a/src/ev_poll.c b/src/ev_poll.c index e98630c..8051420 100644 --- a/src/ev_poll.c +++ b/src/ev_poll.c @@ -25,6 +25,7 @@ #include <haproxy/signal.h> #include <haproxy/task.h> #include <haproxy/ticks.h> +#include <haproxy/tools.h> #ifndef POLLRDHUP @@ -249,6 +250,8 @@ static int init_poll_per_thread() poll_events = calloc(1, sizeof(struct pollfd) * global.maxsock); if (poll_events == NULL) return 0; + vma_set_name_id(poll_events, sizeof(struct pollfd) * global.maxsock, + "ev_poll", "poll_events", tid + 1); return 1; } @@ -279,8 +282,10 @@ static int _do_init(struct poller *p) if ((fd_evts[DIR_RD] = calloc(1, fd_evts_bytes)) == NULL) goto fail_srevt; + vma_set_name(fd_evts[DIR_RD], fd_evts_bytes, "ev_poll", "fd_evts_rd"); if ((fd_evts[DIR_WR] = calloc(1, fd_evts_bytes)) == NULL) goto fail_swevt; + vma_set_name(fd_evts[DIR_WR], fd_evts_bytes, "ev_poll", "fd_evts_wr"); hap_register_per_thread_init(init_poll_per_thread); hap_register_per_thread_deinit(deinit_poll_per_thread); diff --git a/src/ev_select.c b/src/ev_select.c index eadd588..9588e8a 100644 --- a/src/ev_select.c +++ b/src/ev_select.c @@ -21,6 +21,7 @@ #include <haproxy/global.h> #include <haproxy/task.h> #include <haproxy/ticks.h> +#include <haproxy/tools.h> /* private data */ @@ -223,9 +224,11 @@ static int init_select_per_thread() tmp_evts[DIR_RD] = calloc(1, fd_set_bytes); if (tmp_evts[DIR_RD] == NULL) goto fail; + vma_set_name_id(tmp_evts[DIR_RD], fd_set_bytes, "ev_select", "tmp_evts_rd", tid + 1); tmp_evts[DIR_WR] = calloc(1, fd_set_bytes); if (tmp_evts[DIR_WR] == NULL) goto fail; + vma_set_name_id(tmp_evts[DIR_WR], fd_set_bytes, "ev_select", "tmp_evts_wr", tid + 1); return 1; fail: free(tmp_evts[DIR_RD]); @@ -263,8 +266,10 @@ static int _do_init(struct poller *p) if ((fd_evts[DIR_RD] = calloc(1, fd_set_bytes)) == NULL) goto fail_srevt; + vma_set_name(fd_evts[DIR_RD], fd_set_bytes, "ev_select", "fd_evts_rd"); if ((fd_evts[DIR_WR] = calloc(1, fd_set_bytes)) == NULL) goto fail_swevt; + vma_set_name(fd_evts[DIR_WR], fd_set_bytes, "ev_select", "fd_evts_wr"); hap_register_per_thread_init(init_select_per_thread); hap_register_per_thread_deinit(deinit_select_per_thread); diff --git a/src/event_hdl.c b/src/event_hdl.c index f5bb5b6..f4f7b19 100644 --- a/src/event_hdl.c +++ b/src/event_hdl.c @@ -138,7 +138,7 @@ struct event_hdl_sub_type event_hdl_string_to_sub_type(const char *name) int it; for (it = 0; it < (int)(sizeof(event_hdl_sub_type_map) / sizeof(event_hdl_sub_type_map[0])); it++) { - if (!strcmp(name, event_hdl_sub_type_map[it].name)) + if (strcmp(name, event_hdl_sub_type_map[it].name) == 0) return event_hdl_sub_type_map[it].type; } return EVENT_HDL_SUB_NONE; diff --git a/src/fcgi-app.c b/src/fcgi-app.c index 00562f8..e8117a3 100644 --- a/src/fcgi-app.c +++ b/src/fcgi-app.c @@ -134,16 +134,7 @@ static void fcgi_release_rule(struct fcgi_rule *rule) if (!rule) return; - if (!LIST_ISEMPTY(&rule->value)) { - struct logformat_node *lf, *lfb; - - list_for_each_entry_safe(lf, lfb, &rule->value, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } - } + lf_expr_deinit(&rule->value); /* ->cond and ->name are not owned by the rule */ free(rule); } @@ -256,7 +247,7 @@ static int fcgi_flt_check(struct proxy *px, struct flt_conf *fconf) rule->type = crule->type; rule->name = ist(crule->name); rule->cond = crule->cond; - LIST_INIT(&rule->value); + lf_expr_init(&rule->value); if (crule->value) { if (!parse_logformat_string(crule->value, px, &rule->value, LOG_OPT_HTTP, @@ -84,8 +84,8 @@ #if defined(USE_POLL) #include <poll.h> -#include <errno.h> #endif +#include <errno.h> #include <haproxy/api.h> #include <haproxy/activity.h> @@ -981,8 +981,8 @@ void my_closefrom(int start) break; } while (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR || errno == ENOMEM); - if (ret) - ret = fd - start; + /* always check the whole range */ + ret = fd - start; for (idx = 0; idx < ret; idx++) { if (poll_events[idx].revents & POLLNVAL) @@ -1108,6 +1108,7 @@ void poller_pipe_io_handler(int fd) static int alloc_pollers_per_thread() { fd_updt = calloc(global.maxsock, sizeof(*fd_updt)); + vma_set_name_id(fd_updt, global.maxsock * sizeof(*fd_updt), "fd", "fd_updt", tid + 1); return fd_updt != NULL; } @@ -1158,10 +1159,11 @@ int init_pollers() int p; struct poller *bp; - if ((fdtab_addr = calloc(global.maxsock, sizeof(*fdtab) + 64)) == NULL) { + if ((fdtab_addr = calloc(1, global.maxsock * sizeof(*fdtab) + 64)) == NULL) { ha_alert("Not enough memory to allocate %d entries for fdtab!\n", global.maxsock); goto fail_tab; } + vma_set_name(fdtab_addr, global.maxsock * sizeof(*fdtab) + 64, "fd", "fdtab_addr"); /* always provide an aligned fdtab */ fdtab = (struct fdtab*)((((size_t)fdtab_addr) + 63) & -(size_t)64); @@ -1170,11 +1172,13 @@ int init_pollers() ha_alert("Not enough memory to allocate %d entries for polled_mask!\n", global.maxsock); goto fail_polledmask; } + vma_set_name(polled_mask, global.maxsock * sizeof(*polled_mask), "fd", "polled_mask"); if ((fdinfo = calloc(global.maxsock, sizeof(*fdinfo))) == NULL) { ha_alert("Not enough memory to allocate %d entries for fdinfo!\n", global.maxsock); goto fail_info; } + vma_set_name(fdinfo, global.maxsock * sizeof(*fdinfo), "fd", "fdinfo"); for (p = 0; p < MAX_TGROUPS; p++) update_list[p].first = update_list[p].last = -1; diff --git a/src/flt_bwlim.c b/src/flt_bwlim.c index 66c2883..c5078c8 100644 --- a/src/flt_bwlim.c +++ b/src/flt_bwlim.c @@ -219,26 +219,26 @@ static int bwlim_check(struct proxy *px, struct flt_conf *fconf) target = px->table; if (!target) { - ha_alert("Proxy %s : unable to find table '%s' referenced by bwlim filter '%s'", + ha_alert("Proxy %s : unable to find table '%s' referenced by bwlim filter '%s'\n", px->id, conf->table.n ? conf->table.n : px->id, conf->name); return 1; } if ((conf->flags & BWLIM_FL_IN) && !target->data_ofs[STKTABLE_DT_BYTES_IN_RATE]) { ha_alert("Proxy %s : stick-table '%s' uses a data type incompatible with bwlim filter '%s'." - " It must be 'bytes_in_rate'", + " It must be 'bytes_in_rate'\n", px->id, conf->table.n ? conf->table.n : px->id, conf->name); return 1; } else if ((conf->flags & BWLIM_FL_OUT) && !target->data_ofs[STKTABLE_DT_BYTES_OUT_RATE]) { ha_alert("Proxy %s : stick-table '%s' uses a data type incompatible with bwlim filter '%s'." - " It must be 'bytes_out_rate'", + " It must be 'bytes_out_rate'\n", px->id, conf->table.n ? conf->table.n : px->id, conf->name); return 1; } if (!stktable_compatible_sample(conf->expr, target->type)) { - ha_alert("Proxy %s : stick-table '%s' uses a key type incompatible with bwlim filter '%s'", + ha_alert("Proxy %s : stick-table '%s' uses a key type incompatible with bwlim filter '%s'\n", px->id, conf->table.n ? conf->table.n : px->id, conf->name); return 1; } diff --git a/src/flt_http_comp.c b/src/flt_http_comp.c index 30f9d2a..e601ff6 100644 --- a/src/flt_http_comp.c +++ b/src/flt_http_comp.c @@ -73,9 +73,9 @@ comp_flt_init(struct proxy *px, struct flt_conf *fconf) static int comp_flt_init_per_thread(struct proxy *px, struct flt_conf *fconf) { - if (b_alloc(&tmpbuf) == NULL) + if (b_alloc(&tmpbuf, DB_PERMANENT) == NULL) return -1; - if (b_alloc(&zbuf) == NULL) + if (b_alloc(&zbuf, DB_PERMANENT) == NULL) return -1; return 0; } diff --git a/src/flt_spoe.c b/src/flt_spoe.c index 43f6bd9..95930f1 100644 --- a/src/flt_spoe.c +++ b/src/flt_spoe.c @@ -249,7 +249,7 @@ static const char *spoe_appctx_state_str[SPOE_APPCTX_ST_END+1] = { static char * generate_pseudo_uuid() { - ha_generate_uuid(&trash); + ha_generate_uuid_v4(&trash); return my_strndup(trash.area, trash.data); } @@ -1131,7 +1131,6 @@ spoe_handle_healthcheck_response(char *frame, size_t size, char *err, int errlen static int spoe_send_frame(struct appctx *appctx, char *buf, size_t framesz) { - struct stconn *sc = appctx_sc(appctx); int ret; uint32_t netint; @@ -1140,15 +1139,8 @@ spoe_send_frame(struct appctx *appctx, char *buf, size_t framesz) netint = htonl(framesz); memcpy(buf, (char *)&netint, 4); ret = applet_putblk(appctx, buf, framesz+4); - if (ret <= 0) { - if (ret == -3 && b_is_null(&sc_ic(sc)->buf)) { - /* WT: is this still needed for the case ret==-3 ? */ - sc_need_room(sc, 0); - return 1; /* retry */ - } - SPOE_APPCTX(appctx)->status_code = SPOE_FRM_ERR_IO; - return -1; /* error */ - } + if (ret <= 0) + return 1; /* retry */ return framesz; } @@ -1934,7 +1926,7 @@ spoe_handle_appctx(struct appctx *appctx) if (SPOE_APPCTX(appctx) == NULL) return; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { + if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR)))) { co_skip(sc_oc(sc), co_data(sc_oc(sc))); goto out; } @@ -2860,21 +2852,19 @@ spoe_acquire_buffer(struct buffer *buf, struct buffer_wait *buffer_wait) if (buf->size) return 1; - if (LIST_INLIST(&buffer_wait->list)) - LIST_DEL_INIT(&buffer_wait->list); + b_dequeue(buffer_wait); - if (b_alloc(buf)) + if (b_alloc(buf, DB_CHANNEL)) return 1; - LIST_APPEND(&th_ctx->buffer_wq, &buffer_wait->list); + b_requeue(DB_CHANNEL, buffer_wait); return 0; } static void spoe_release_buffer(struct buffer *buf, struct buffer_wait *buffer_wait) { - if (LIST_INLIST(&buffer_wait->list)) - LIST_DEL_INIT(&buffer_wait->list); + b_dequeue(buffer_wait); /* Release the buffer if needed */ if (buf->size) { @@ -3022,7 +3012,7 @@ spoe_init(struct proxy *px, struct flt_conf *fconf) /* conf->agent_fe was already initialized during the config * parsing. Finish initialization. */ - conf->agent_fe.last_change = ns_to_sec(now_ns); + conf->agent_fe.fe_counters.last_change = ns_to_sec(now_ns); conf->agent_fe.cap = PR_CAP_FE; conf->agent_fe.mode = PR_MODE_TCP; conf->agent_fe.maxconn = 0; diff --git a/src/frontend.c b/src/frontend.c index ad2e39e..3b3bcbb 100644 --- a/src/frontend.c +++ b/src/frontend.c @@ -55,7 +55,7 @@ int frontend_accept(struct stream *s) if ((fe->mode == PR_MODE_TCP || fe->mode == PR_MODE_HTTP) && (!LIST_ISEMPTY(&fe->loggers))) { - if (likely(!LIST_ISEMPTY(&fe->logformat))) { + if (likely(!lf_expr_isempty(&fe->logformat))) { /* we have the client ip */ if (s->logs.logwait & LW_CLIP) if (!(s->logs.logwait &= ~(LW_CLIP|LW_INIT))) @@ -252,7 +252,7 @@ smp_fetch_fe_req_rate(const struct arg *args, struct sample *smp, const char *kw smp->flags = SMP_F_VOL_TEST; smp->data.type = SMP_T_SINT; - smp->data.u.sint = read_freq_ctr(&px->fe_req_per_sec); + smp->data.u.sint = read_freq_ctr(&px->fe_counters.req_per_sec); return 1; } @@ -272,7 +272,7 @@ smp_fetch_fe_sess_rate(const struct arg *args, struct sample *smp, const char *k smp->flags = SMP_F_VOL_TEST; smp->data.type = SMP_T_SINT; - smp->data.u.sint = read_freq_ctr(&px->fe_sess_per_sec); + smp->data.u.sint = read_freq_ctr(&px->fe_counters.sess_per_sec); return 1; } diff --git a/src/guid.c b/src/guid.c new file mode 100644 index 0000000..f1365b6 --- /dev/null +++ b/src/guid.c @@ -0,0 +1,161 @@ +#include <haproxy/guid.h> + +#include <import/ebistree.h> +#include <haproxy/listener-t.h> +#include <haproxy/obj_type.h> +#include <haproxy/proxy.h> +#include <haproxy/server-t.h> +#include <haproxy/tools.h> + +/* GUID global tree */ +struct eb_root guid_tree = EB_ROOT_UNIQUE; + +/* Initialize <guid> members. */ +void guid_init(struct guid_node *guid) +{ + guid->node.key = NULL; + guid->node.node.leaf_p = NULL; +} + +/* Insert <objt> into GUID global tree with key <uid>. Must only be called on + * thread isolation. On failure, <errmsg> will be allocated with an error + * description. Caller is responsible to free it. + * + * Returns 0 on success else non-zero. + */ +int guid_insert(enum obj_type *objt, const char *uid, char **errmsg) +{ + struct guid_node *guid = NULL; + struct guid_node *dup; + struct ebpt_node *node; + char *key = NULL; + char *dup_name = NULL; + + if (!guid_is_valid_fmt(uid, errmsg)) + goto err; + + switch (obj_type(objt)) { + case OBJ_TYPE_PROXY: + guid = &__objt_proxy(objt)->guid; + break; + + case OBJ_TYPE_LISTENER: + guid = &__objt_listener(objt)->guid; + break; + + case OBJ_TYPE_SERVER: + guid = &__objt_server(objt)->guid; + break; + + default: + /* No guid support for this objtype. */ + ABORT_NOW(); + return 0; + } + + key = strdup(uid); + if (!key) { + memprintf(errmsg, "key alloc failure"); + goto err; + } + + guid->node.key = key; + node = ebis_insert(&guid_tree, &guid->node); + if (node != &guid->node) { + dup = ebpt_entry(node, struct guid_node, node); + dup_name = guid_name(dup); + memprintf(errmsg, "duplicate entry with %s", dup_name); + goto err; + } + + guid->obj_type = objt; + return 0; + + err: + ha_free(&key); + ha_free(&dup_name); + return 1; +} + +/* Remove <guid> node from GUID global tree. Must only be called on thread + * isolation. Safe to call even if node is not currently stored. + */ +void guid_remove(struct guid_node *guid) +{ + ebpt_delete(&guid->node); + ha_free(&guid->node.key); +} + +/* Retrieve an instance from GUID global tree with key <uid>. + * + * Returns the GUID instance or NULL if key not found. + */ +struct guid_node *guid_lookup(const char *uid) +{ + struct ebpt_node *node = NULL; + struct guid_node *guid = NULL; + + node = ebis_lookup(&guid_tree, uid); + if (node) + guid = ebpt_entry(node, struct guid_node, node); + + return guid; +} + +/* Returns a boolean checking if <uid> respects GUID format. If <errmsg> is not + * NULL, it will be allocated with an error description in case of invalid + * format. + */ +int guid_is_valid_fmt(const char *uid, char **errmsg) +{ + const size_t len = strlen(uid); + const char *c; + + if (!len || len > GUID_MAX_LEN) { + memprintf(errmsg, "invalid length"); + return 0; + } + + c = invalid_char(uid); + if (c) { + memprintf(errmsg, "invalid character '%c'", c[0]); + return 0; + } + + return 1; +} + +/* Generate a user-friendly description for the instance attached via <guid> + * node. The string is dynamically allocated and the caller is responsible to + * free it. + * + * Returns a pointer to the dynamically allocated message. + */ +char *guid_name(const struct guid_node *guid) +{ + char *msg = NULL; + struct proxy *px; + struct listener *l; + struct server *srv; + + switch (obj_type(guid->obj_type)) { + case OBJ_TYPE_PROXY: + px = __objt_proxy(guid->obj_type); + return memprintf(&msg, "%s %s", proxy_cap_str(px->cap), px->id); + + case OBJ_TYPE_LISTENER: + l = __objt_listener(guid->obj_type); + return memprintf(&msg, "listener %s (%s:%d)", + l->bind_conf->frontend->id, + l->bind_conf->file, l->bind_conf->line); + + case OBJ_TYPE_SERVER: + srv = __objt_server(guid->obj_type); + return memprintf(&msg, "server %s/%s", srv->proxy->id, srv->id); + + default: + break; + } + + return NULL; +} @@ -183,11 +183,11 @@ int h1_parse_xfer_enc_header(struct h1m *h1m, struct ist value) * is hast header, its value is normalized. 0 is returned on success, -1 if the * authority is invalid and -2 if the host is invalid. */ -static int h1_validate_connect_authority(struct ist authority, struct ist *host_hdr) +static int h1_validate_connect_authority(struct ist scheme, struct ist authority, struct ist *host_hdr) { struct ist uri_host, uri_port, host, host_port; - if (!isttest(authority)) + if (isttest(scheme) || !isttest(authority)) goto invalid_authority; uri_host = authority; uri_port = http_get_host_port(authority); @@ -575,12 +575,7 @@ int h1_headers_to_hdr_list(char *start, const char *stop, #ifdef HA_UNALIGNED_LE /* speedup: skip bytes not between 0x24 and 0x7e inclusive */ while (ptr <= end - sizeof(int)) { - int x = *(int *)ptr - 0x24242424; - if (x & 0x80808080) - break; - - x -= 0x5b5b5b5b; - if (!(x & 0x80808080)) + if (is_char4_outside(*(uint *)ptr, 0x24, 0x7e)) break; ptr += sizeof(int); @@ -930,14 +925,14 @@ int h1_headers_to_hdr_list(char *start, const char *stop, */ #ifdef HA_UNALIGNED_LE64 while (ptr <= end - sizeof(long)) { - if ((*(long *)ptr - 0x0e0e0e0e0e0e0e0eULL) & 0x8080808080808080ULL) + if (is_char8_below_opt(*(ulong *)ptr, 0x0e)) goto http_msg_hdr_val2; ptr += sizeof(long); } #endif #ifdef HA_UNALIGNED_LE while (ptr <= end - sizeof(int)) { - if ((*(int*)ptr - 0x0e0e0e0e) & 0x80808080) + if (is_char4_below_opt(*(uint *)ptr, 0x0e)) goto http_msg_hdr_val2; ptr += sizeof(int); } @@ -1105,46 +1100,88 @@ int h1_headers_to_hdr_list(char *start, const char *stop, if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP))) { struct http_uri_parser parser = http_uri_parser_init(sl.rq.u); - struct ist scheme, authority; + struct ist scheme, authority = IST_NULL; int ret; - scheme = http_parse_scheme(&parser); - authority = http_parse_authority(&parser, 1); - if (sl.rq.meth == HTTP_METH_CONNECT) { - struct ist *host = ((host_idx != -1) ? &hdr[host_idx].v : NULL); - - ret = h1_validate_connect_authority(authority, host); - if (ret < 0) { - if (h1m->err_pos < -1) { - state = H1_MSG_LAST_LF; - /* WT: gcc seems to see a path where sl.rq.u.ptr was used - * uninitialized, but it doesn't know that the function is - * called with initial states making this impossible. - */ - ALREADY_CHECKED(sl.rq.u.ptr); - ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */ - goto http_msg_invalid; - } - if (h1m->err_pos == -1) /* capture the error pointer */ - h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */ + /* WT: gcc seems to see a path where sl.rq.u.ptr was used + * uninitialized, but it doesn't know that the function is + * called with initial states making this impossible. + */ + ALREADY_CHECKED(sl.rq.u.ptr); + switch (parser.format) { + case URI_PARSER_FORMAT_ASTERISK: + /* We must take care "PRI * HTTP/2.0" is supported here. check for OTHER methods here is enough */ + if ((sl.rq.meth != HTTP_METH_OTHER && sl.rq.meth != HTTP_METH_OPTIONS) || istlen(sl.rq.u) != 1) { + ptr = sl.rq.u.ptr; /* Set ptr on the error */ + goto http_msg_invalid; } - } - else if (host_idx != -1 && istlen(authority)) { - struct ist host = hdr[host_idx].v; + break; + + case URI_PARSER_FORMAT_ABSPATH: + if (sl.rq.meth == HTTP_METH_CONNECT) { + ptr = sl.rq.u.ptr; /* Set ptr on the error */ + goto http_msg_invalid; + } + break; - /* For non-CONNECT method, the authority must match the host header value */ - if (!isteqi(authority, host)) { - ret = h1_validate_mismatch_authority(scheme, authority, host); + case URI_PARSER_FORMAT_ABSURI_OR_AUTHORITY: + scheme = http_parse_scheme(&parser); + if (!isttest(scheme)) { /* scheme not found: MUST be an authority */ + struct ist *host = NULL; + + if (sl.rq.meth != HTTP_METH_CONNECT) { + ptr = sl.rq.u.ptr; /* Set ptr on the error */ + goto http_msg_invalid; + } + if (host_idx != -1) + host = &hdr[host_idx].v; + authority = http_parse_authority(&parser, 1); + ret = h1_validate_connect_authority(scheme, authority, host); if (ret < 0) { if (h1m->err_pos < -1) { state = H1_MSG_LAST_LF; - ptr = host.ptr; /* Set ptr on the error */ + /* WT: gcc seems to see a path where sl.rq.u.ptr was used + * uninitialized, but it doesn't know that the function is + * called with initial states making this impossible. + */ + ALREADY_CHECKED(sl.rq.u.ptr); + ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */ goto http_msg_invalid; } if (h1m->err_pos == -1) /* capture the error pointer */ - h1m->err_pos = v.ptr - start + skip; /* >= 0 now */ + h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */ + } + } + else { /* Scheme found: MUST be an absolute-URI */ + struct ist host = IST_NULL; + + if (sl.rq.meth == HTTP_METH_CONNECT) { + ptr = sl.rq.u.ptr; /* Set ptr on the error */ + goto http_msg_invalid; + } + + if (host_idx != -1) + host = hdr[host_idx].v; + authority = http_parse_authority(&parser, 1); + /* For non-CONNECT method, the authority must match the host header value */ + if (isttest(host) && !isteqi(authority, host)) { + ret = h1_validate_mismatch_authority(scheme, authority, host); + if (ret < 0) { + if (h1m->err_pos < -1) { + state = H1_MSG_LAST_LF; + ptr = host.ptr; /* Set ptr on the error */ + goto http_msg_invalid; + } + if (h1m->err_pos == -1) /* capture the error pointer */ + h1m->err_pos = v.ptr - start + skip; /* >= 0 now */ + } } } + break; + + default: + ptr = sl.rq.u.ptr; /* Set ptr on the error */ + goto http_msg_invalid; } } @@ -1227,57 +1264,6 @@ int h1_headers_to_hdr_list(char *start, const char *stop, goto try_again; } -/* This function performs a very minimal parsing of the trailers block present - * at offset <ofs> in <buf> for up to <max> bytes, and returns the number of - * bytes to delete to skip the trailers. It may return 0 if it's missing some - * input data, or < 0 in case of parse error (in which case the caller may have - * to decide how to proceed, possibly eating everything). - */ -int h1_measure_trailers(const struct buffer *buf, unsigned int ofs, unsigned int max) -{ - const char *stop = b_peek(buf, ofs + max); - int count = ofs; - - while (1) { - const char *p1 = NULL, *p2 = NULL; - const char *start = b_peek(buf, count); - const char *ptr = start; - - /* scan current line and stop at LF or CRLF */ - while (1) { - if (ptr == stop) - return 0; - - if (*ptr == '\n') { - if (!p1) - p1 = ptr; - p2 = ptr; - break; - } - - if (*ptr == '\r') { - if (p1) - return -1; - p1 = ptr; - } - - ptr = b_next(buf, ptr); - } - - /* after LF; point to beginning of next line */ - p2 = b_next(buf, p2); - count += b_dist(buf, start, p2); - - /* LF/CRLF at beginning of line => end of trailers at p2. - * Everything was scheduled for forwarding, there's nothing left - * from this message. */ - if (p1 == start) - break; - /* OK, next line then */ - } - return count - ofs; -} - /* Generate a random key for a WebSocket Handshake in respect with rfc6455 * The key is 128-bits long encoded as a base64 string in <key_out> parameter * (25 bytes long). @@ -36,8 +36,8 @@ #include <haproxy/qmux_http.h> #include <haproxy/qpack-dec.h> #include <haproxy/qpack-enc.h> -#include <haproxy/quic_conn-t.h> #include <haproxy/quic_enc.h> +#include <haproxy/quic_fctl.h> #include <haproxy/quic_frame.h> #include <haproxy/stats-t.h> #include <haproxy/tools.h> @@ -58,19 +58,21 @@ static const struct trace_event h3_trace_events[] = { { .mask = H3_EV_RX_HDR, .name = "rx_hdr", .desc = "receipt of H3 HEADERS frame" }, #define H3_EV_RX_SETTINGS (1ULL << 3) { .mask = H3_EV_RX_SETTINGS, .name = "rx_settings", .desc = "receipt of H3 SETTINGS frame" }, -#define H3_EV_TX_DATA (1ULL << 4) +#define H3_EV_TX_FRAME (1ULL << 4) + { .mask = H3_EV_TX_FRAME, .name = "tx_frame", .desc = "transmission of any H3 frame" }, +#define H3_EV_TX_DATA (1ULL << 5) { .mask = H3_EV_TX_DATA, .name = "tx_data", .desc = "transmission of H3 DATA frame" }, -#define H3_EV_TX_HDR (1ULL << 5) +#define H3_EV_TX_HDR (1ULL << 6) { .mask = H3_EV_TX_HDR, .name = "tx_hdr", .desc = "transmission of H3 HEADERS frame" }, -#define H3_EV_TX_SETTINGS (1ULL << 6) +#define H3_EV_TX_SETTINGS (1ULL << 7) { .mask = H3_EV_TX_SETTINGS, .name = "tx_settings", .desc = "transmission of H3 SETTINGS frame" }, -#define H3_EV_H3S_NEW (1ULL << 7) +#define H3_EV_H3S_NEW (1ULL << 8) { .mask = H3_EV_H3S_NEW, .name = "h3s_new", .desc = "new H3 stream" }, -#define H3_EV_H3S_END (1ULL << 8) +#define H3_EV_H3S_END (1ULL << 9) { .mask = H3_EV_H3S_END, .name = "h3s_end", .desc = "H3 stream terminated" }, -#define H3_EV_H3C_NEW (1ULL << 9) +#define H3_EV_H3C_NEW (1ULL << 10) { .mask = H3_EV_H3C_NEW, .name = "h3c_new", .desc = "new H3 connection" }, -#define H3_EV_H3C_END (1ULL << 10) +#define H3_EV_H3C_END (1ULL << 11) { .mask = H3_EV_H3C_END, .name = "h3c_end", .desc = "H3 connection terminated" }, #define H3_EV_STRM_SEND (1ULL << 12) { .mask = H3_EV_STRM_SEND, .name = "strm_send", .desc = "sending data for stream" }, @@ -129,7 +131,7 @@ static uint64_t h3_settings_max_field_section_size = QUIC_VARINT_8_BYTE_MAX; /* struct h3c { struct qcc *qcc; struct qcs *ctrl_strm; /* Control stream */ - enum h3_err err; + int err; uint32_t flags; /* Settings */ @@ -195,7 +197,8 @@ static ssize_t h3_init_uni_stream(struct h3c *h3c, struct qcs *qcs, case H3_UNI_S_T_CTRL: if (h3c->flags & H3_CF_UNI_CTRL_SET) { TRACE_ERROR("duplicated control stream", H3_EV_H3S_NEW, qcs->qcc->conn, qcs); - qcc_set_error(qcs->qcc, H3_STREAM_CREATION_ERROR, 1); + qcc_set_error(qcs->qcc, H3_ERR_STREAM_CREATION_ERROR, 1); + qcc_report_glitch(qcs->qcc, 1); goto err; } h3c->flags |= H3_CF_UNI_CTRL_SET; @@ -210,7 +213,8 @@ static ssize_t h3_init_uni_stream(struct h3c *h3c, struct qcs *qcs, case H3_UNI_S_T_QPACK_DEC: if (h3c->flags & H3_CF_UNI_QPACK_DEC_SET) { TRACE_ERROR("duplicated qpack decoder stream", H3_EV_H3S_NEW, qcs->qcc->conn, qcs); - qcc_set_error(qcs->qcc, H3_STREAM_CREATION_ERROR, 1); + qcc_set_error(qcs->qcc, H3_ERR_STREAM_CREATION_ERROR, 1); + qcc_report_glitch(qcs->qcc, 1); goto err; } h3c->flags |= H3_CF_UNI_QPACK_DEC_SET; @@ -221,7 +225,8 @@ static ssize_t h3_init_uni_stream(struct h3c *h3c, struct qcs *qcs, case H3_UNI_S_T_QPACK_ENC: if (h3c->flags & H3_CF_UNI_QPACK_ENC_SET) { TRACE_ERROR("duplicated qpack encoder stream", H3_EV_H3S_NEW, qcs->qcc->conn, qcs); - qcc_set_error(qcs->qcc, H3_STREAM_CREATION_ERROR, 1); + qcc_set_error(qcs->qcc, H3_ERR_STREAM_CREATION_ERROR, 1); + qcc_report_glitch(qcs->qcc, 1); goto err; } h3c->flags |= H3_CF_UNI_QPACK_ENC_SET; @@ -320,7 +325,7 @@ static int h3_check_frame_valid(struct h3c *h3c, struct qcs *qcs, uint64_t ftype /* cf H3_FT_HEADERS case. */ if (h3s->type == H3S_T_CTRL || (h3s->st_req != H3S_ST_REQ_HEADERS && h3s->st_req != H3S_ST_REQ_DATA)) { - ret = H3_FRAME_UNEXPECTED; + ret = H3_ERR_FRAME_UNEXPECTED; } break; @@ -347,7 +352,7 @@ static int h3_check_frame_valid(struct h3c *h3c, struct qcs *qcs, uint64_t ftype * own rules; see Section 9. */ if (h3s->type == H3S_T_CTRL || h3s->st_req == H3S_ST_REQ_TRAILERS) - ret = H3_FRAME_UNEXPECTED; + ret = H3_ERR_FRAME_UNEXPECTED; break; case H3_FT_CANCEL_PUSH: @@ -374,9 +379,9 @@ static int h3_check_frame_valid(struct h3c *h3c, struct qcs *qcs, uint64_t ftype */ if (h3s->type != H3S_T_CTRL) - ret = H3_FRAME_UNEXPECTED; + ret = H3_ERR_FRAME_UNEXPECTED; else if (!(h3c->flags & H3_CF_SETTINGS_RECV)) - ret = H3_MISSING_SETTINGS; + ret = H3_ERR_MISSING_SETTINGS; break; case H3_FT_SETTINGS: @@ -394,7 +399,7 @@ static int h3_check_frame_valid(struct h3c *h3c, struct qcs *qcs, uint64_t ftype * H3_FRAME_UNEXPECTED. */ if (h3s->type != H3S_T_CTRL || h3c->flags & H3_CF_SETTINGS_RECV) - ret = H3_FRAME_UNEXPECTED; + ret = H3_ERR_FRAME_UNEXPECTED; break; case H3_FT_PUSH_PROMISE: @@ -406,7 +411,7 @@ static int h3_check_frame_valid(struct h3c *h3c, struct qcs *qcs, uint64_t ftype */ /* TODO server-side only. */ - ret = H3_FRAME_UNEXPECTED; + ret = H3_ERR_FRAME_UNEXPECTED; break; default: @@ -420,7 +425,7 @@ static int h3_check_frame_valid(struct h3c *h3c, struct qcs *qcs, uint64_t ftype * not satisfy that requirement and SHOULD be treated as an error. */ if (h3s->type == H3S_T_CTRL && !(h3c->flags & H3_CF_SETTINGS_RECV)) - ret = H3_MISSING_SETTINGS; + ret = H3_ERR_MISSING_SETTINGS; break; } @@ -461,7 +466,8 @@ static int h3_check_body_size(struct qcs *qcs, int fin) if (h3s->data_len > h3s->body_len || (fin && h3s->data_len < h3s->body_len)) { TRACE_ERROR("Content-length does not match DATA frame size", H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(qcs->qcc, 1); ret = -1; } @@ -527,6 +533,7 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, int cookie = -1, last_cookie = -1, i; const char *ctl; int relaxed = !!(h3c->qcc->proxy->options2 & PR_O2_REQBUG_OK); + int qpack_err; /* RFC 9114 4.1.2. Malformed Requests and Responses * @@ -558,14 +565,16 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, list, sizeof(list) / sizeof(list[0])); if (ret < 0) { TRACE_ERROR("QPACK decoding error", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3c->err = -ret; + if ((qpack_err = qpack_err_decode(ret)) >= 0) { + h3c->err = qpack_err; + qcc_report_glitch(qcs->qcc, 1); + } len = -1; goto out; } - if (!qcs_get_buf(qcs, &htx_buf)) { + if (!b_alloc(&htx_buf, DB_SE_RX)) { TRACE_ERROR("HTX buffer alloc failure", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -604,7 +613,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, ctl = ist_find_ctl(list[hdr_idx].v); if (unlikely(ctl) && http_header_has_forbidden_char(list[hdr_idx].v, ctl)) { TRACE_ERROR("control character present in pseudo-header value", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -615,7 +625,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, if (isteq(list[hdr_idx].n, ist(":method"))) { if (isttest(meth)) { TRACE_ERROR("duplicated method pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -624,7 +635,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, else if (isteq(list[hdr_idx].n, ist(":path"))) { if (isttest(path)) { TRACE_ERROR("duplicated path pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -636,7 +648,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, ctl = ist_find_range(list[hdr_idx].v, 0, '#'); if (unlikely(ctl) && http_path_has_forbidden_char(list[hdr_idx].v, ctl)) { TRACE_ERROR("forbidden character in ':path' pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -648,7 +661,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, if (isttest(scheme)) { /* duplicated pseudo-header */ TRACE_ERROR("duplicated scheme pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -657,20 +671,23 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, else if (isteq(list[hdr_idx].n, ist(":authority"))) { if (isttest(authority)) { TRACE_ERROR("duplicated authority pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } if (h3_set_authority(qcs, &authority, list[hdr_idx].v)) { - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } } else { TRACE_ERROR("unknown pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -687,7 +704,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, */ if (!isttest(meth) || !isttest(scheme) || !isttest(path)) { TRACE_ERROR("missing mandatory pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -698,7 +716,6 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, sl = htx_add_stline(htx, HTX_BLK_REQ_SL, flags, meth, path, ist("HTTP/3.0")); if (!sl) { - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -710,7 +727,6 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, if (isttest(authority)) { if (!htx_add_header(htx, ist("host"), authority)) { - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -723,7 +739,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, if (istmatch(list[hdr_idx].n, ist(":"))) { TRACE_ERROR("pseudo-header field after fields", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -732,7 +749,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, const char c = list[hdr_idx].n.ptr[i]; if ((uint8_t)(c - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(c)) { TRACE_ERROR("invalid characters in field name", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -753,14 +771,16 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, ctl = ist_find_ctl(list[hdr_idx].v); if (unlikely(ctl) && http_header_has_forbidden_char(list[hdr_idx].v, ctl)) { TRACE_ERROR("control character present in header value", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } if (isteq(list[hdr_idx].n, ist("host"))) { if (h3_set_authority(qcs, &authority, list[hdr_idx].v)) { - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -776,7 +796,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, h3s->flags & H3_SF_HAVE_CLEN); if (ret < 0) { TRACE_ERROR("invalid content-length", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -810,7 +831,8 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, * connection-specific fields MUST be treated as malformed. */ TRACE_ERROR("invalid connection header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -823,13 +845,13 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, * NOT contain any value other than "trailers". */ TRACE_ERROR("invalid te header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } if (!htx_add_header(htx, list[hdr_idx].n, list[hdr_idx].v)) { - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -845,21 +867,20 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, */ if (!isttest(authority)) { TRACE_ERROR("missing mandatory pseudo-header", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } if (cookie >= 0) { if (http_cookie_merge(htx, list, cookie)) { - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } } if (!htx_add_endof(htx, HTX_BLK_EOH)) { - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -871,7 +892,6 @@ static ssize_t h3_headers_to_htx(struct qcs *qcs, const struct buffer *buf, htx = NULL; if (!qcs_attach_sc(qcs, &htx_buf, fin)) { - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -927,6 +947,7 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, struct http_hdr list[global.tune.max_http_hdr]; int hdr_idx, ret; const char *ctl; + int qpack_err; int i; TRACE_ENTER(H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); @@ -937,14 +958,16 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, list, sizeof(list) / sizeof(list[0])); if (ret < 0) { TRACE_ERROR("QPACK decoding error", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3c->err = -ret; + if ((qpack_err = qpack_err_decode(ret)) >= 0) { + h3c->err = qpack_err; + qcc_report_glitch(qcs->qcc, 1); + } len = -1; goto out; } - if (!(appbuf = qcs_get_buf(qcs, &qcs->rx.app_buf))) { + if (!(appbuf = qcc_get_stream_rxbuf(qcs))) { TRACE_ERROR("HTX buffer alloc failure", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -978,7 +1001,8 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, */ if (istmatch(list[hdr_idx].n, ist(":"))) { TRACE_ERROR("pseudo-header field in trailers", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -987,7 +1011,8 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, const char c = list[hdr_idx].n.ptr[i]; if ((uint8_t)(c - 'A') < 'Z' - 'A' || !HTTP_IS_TOKEN(c)) { TRACE_ERROR("invalid characters in field name", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -1002,7 +1027,8 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, isteq(list[hdr_idx].n, ist("te")) || isteq(list[hdr_idx].n, ist("transfer-encoding"))) { TRACE_ERROR("forbidden HTTP/3 headers", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } @@ -1021,14 +1047,14 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, ctl = ist_find_ctl(list[hdr_idx].v); if (unlikely(ctl) && http_header_has_forbidden_char(list[hdr_idx].v, ctl)) { TRACE_ERROR("control character present in trailer value", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3s->err = H3_MESSAGE_ERROR; + h3s->err = H3_ERR_MESSAGE_ERROR; + qcc_report_glitch(h3c->qcc, 1); len = -1; goto out; } if (!htx_add_trailer(htx, list[hdr_idx].n, list[hdr_idx].v)) { TRACE_ERROR("cannot add trailer", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -1038,7 +1064,6 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, if (!htx_add_endof(htx, HTX_BLK_EOT)) { TRACE_ERROR("cannot add trailer", H3_EV_RX_FRAME|H3_EV_RX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; len = -1; goto out; } @@ -1064,8 +1089,6 @@ static ssize_t h3_trailers_to_htx(struct qcs *qcs, const struct buffer *buf, static ssize_t h3_data_to_htx(struct qcs *qcs, const struct buffer *buf, uint64_t len, char fin) { - struct h3s *h3s = qcs->ctx; - struct h3c *h3c = h3s->h3c; struct buffer *appbuf; struct htx *htx = NULL; size_t htx_sent = 0; @@ -1074,11 +1097,9 @@ static ssize_t h3_data_to_htx(struct qcs *qcs, const struct buffer *buf, TRACE_ENTER(H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs); - if (!(appbuf = qcs_get_buf(qcs, &qcs->rx.app_buf))) { + if (!(appbuf = qcc_get_stream_rxbuf(qcs))) { TRACE_ERROR("data buffer alloc failure", H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; - len = -1; - goto out; + goto err; } htx = htx_from_buf(appbuf); @@ -1129,6 +1150,10 @@ static ssize_t h3_data_to_htx(struct qcs *qcs, const struct buffer *buf, TRACE_LEAVE(H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs); return htx_sent; + + err: + TRACE_DEVEL("leaving on error", H3_EV_RX_FRAME|H3_EV_RX_DATA, qcs->qcc->conn, qcs); + return -1; } /* Parse a SETTINGS frame of length <len> of payload <buf>. @@ -1150,7 +1175,8 @@ static ssize_t h3_parse_settings_frm(struct h3c *h3c, const struct buffer *buf, while (b_data(&b)) { if (!b_quic_dec_int(&id, &b, &ret) || !b_quic_dec_int(&value, &b, &ret)) { - h3c->err = H3_FRAME_ERROR; + h3c->err = H3_ERR_FRAME_ERROR; + qcc_report_glitch(h3c->qcc, 1); return -1; } @@ -1167,7 +1193,8 @@ static ssize_t h3_parse_settings_frm(struct h3c *h3c, const struct buffer *buf, /* Ignore duplicate check for ID too big used for GREASE. */ if (id < sizeof(mask)) { if (ha_bit_test(id, &mask)) { - h3c->err = H3_SETTINGS_ERROR; + h3c->err = H3_ERR_SETTINGS_ERROR; + qcc_report_glitch(h3c->qcc, 1); return -1; } ha_bit_set(id, &mask); @@ -1197,7 +1224,8 @@ static ssize_t h3_parse_settings_frm(struct h3c *h3c, const struct buffer *buf, * their receipt MUST be treated as a connection error of type * H3_SETTINGS_ERROR. */ - h3c->err = H3_SETTINGS_ERROR; + h3c->err = H3_ERR_SETTINGS_ERROR; + qcc_report_glitch(h3c->qcc, 1); return -1; default: /* MUST be ignored */ @@ -1209,16 +1237,16 @@ static ssize_t h3_parse_settings_frm(struct h3c *h3c, const struct buffer *buf, return ret; } -/* Decode <qcs> remotely initiated bidi-stream. <fin> must be set to indicate - * that we received the last data of the stream. +/* Transcode HTTP/3 payload received in buffer <b> to HTX data for stream + * <qcs>. If <fin> is set, it indicates that no more data will arrive after. * * Returns 0 on success else non-zero. */ -static ssize_t h3_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) +static ssize_t h3_rcv_buf(struct qcs *qcs, struct buffer *b, int fin) { struct h3s *h3s = qcs->ctx; struct h3c *h3c = h3s->h3c; - ssize_t total = 0, ret; + ssize_t total = 0, ret = 0; TRACE_ENTER(H3_EV_RX_FRAME, qcs->qcc->conn, qcs); @@ -1256,31 +1284,36 @@ static ssize_t h3_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) */ if (h3s->type == H3S_T_CTRL && fin) { TRACE_ERROR("control stream closed by remote peer", H3_EV_RX_FRAME, qcs->qcc->conn, qcs); - qcc_set_error(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1); + qcc_set_error(qcs->qcc, H3_ERR_CLOSED_CRITICAL_STREAM, 1); + qcc_report_glitch(qcs->qcc, 1); goto err; } if (!b_data(b) && fin && quic_stream_is_bidi(qcs->id)) { struct buffer *appbuf; struct htx *htx; + int eom; TRACE_PROTO("received FIN without data", H3_EV_RX_FRAME, qcs->qcc->conn, qcs); - if (!(appbuf = qcs_get_buf(qcs, &qcs->rx.app_buf))) { + if (!(appbuf = qcc_get_stream_rxbuf(qcs))) { TRACE_ERROR("data buffer alloc failure", H3_EV_RX_FRAME, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; + qcc_set_error(qcs->qcc, H3_ERR_INTERNAL_ERROR, 1); goto err; } htx = htx_from_buf(appbuf); - if (!htx_set_eom(htx)) { + eom = htx_set_eom(htx); + htx_to_buf(htx, appbuf); + if (!eom) { TRACE_ERROR("cannot set EOM", H3_EV_RX_FRAME, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; + qcc_set_error(qcs->qcc, H3_ERR_INTERNAL_ERROR, 1); + goto err; } - htx_to_buf(htx, appbuf); + goto done; } - while (b_data(b) && !(qcs->flags & QC_SF_DEM_FULL) && !h3c->err && !h3s->err) { + while (b_data(b) && !(qcs->flags & QC_SF_DEM_FULL) && ret >= 0) { uint64_t ftype, flen; char last_stream_frame = 0; @@ -1307,6 +1340,7 @@ static ssize_t h3_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) if ((ret = h3_check_frame_valid(h3c, qcs, ftype))) { TRACE_ERROR("received an invalid frame", H3_EV_RX_FRAME, qcs->qcc->conn, qcs); qcc_set_error(qcs->qcc, ret, 1); + qcc_report_glitch(qcs->qcc, 1); goto err; } @@ -1329,7 +1363,8 @@ static ssize_t h3_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) */ if (flen > QC_S_RX_BUF_SZ) { TRACE_ERROR("received a too big frame", H3_EV_RX_FRAME, qcs->qcc->conn, qcs); - qcc_set_error(qcs->qcc, H3_EXCESSIVE_LOAD, 1); + qcc_set_error(qcs->qcc, H3_ERR_EXCESSIVE_LOAD, 1); + qcc_report_glitch(qcs->qcc, 1); goto err; } break; @@ -1405,6 +1440,10 @@ static ssize_t h3_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) qcc_set_error(qcs->qcc, h3c->err, 1); return b_data(b); } + else if (unlikely(ret < 0)) { + qcc_set_error(qcs->qcc, H3_ERR_INTERNAL_ERROR, 1); + goto err; + } /* TODO may be useful to wakeup the MUX if blocked due to full buffer. * However, currently, io-cb of MUX does not handle Rx. @@ -1419,17 +1458,6 @@ static ssize_t h3_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) return -1; } -/* Returns buffer for data sending. - * May be NULL if the allocation failed. - */ -static struct buffer *mux_get_buf(struct qcs *qcs) -{ - if (!b_size(&qcs->tx.buf)) - b_alloc(&qcs->tx.buf); - - return &qcs->tx.buf; -} - /* Function used to emit stream data from <qcs> control uni-stream. * * On success return the number of sent bytes. A negative code is used on @@ -1437,13 +1465,14 @@ static struct buffer *mux_get_buf(struct qcs *qcs) */ static int h3_control_send(struct qcs *qcs, void *ctx) { + int err; int ret; struct h3c *h3c = ctx; unsigned char data[(2 + 3) * 2 * QUIC_VARINT_MAX_SIZE]; /* enough for 3 settings */ struct buffer pos, *res; size_t frm_len; - TRACE_ENTER(H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); + TRACE_ENTER(H3_EV_TX_FRAME|H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); BUG_ON_HOT(h3c->flags & H3_CF_SETTINGS_SENT); @@ -1472,9 +1501,14 @@ static int h3_control_send(struct qcs *qcs, void *ctx) b_quic_enc_int(&pos, h3_settings_max_field_section_size, 0); } - res = mux_get_buf(qcs); - if (b_is_null(res)) { - TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); + if (qfctl_sblocked(&qcs->tx.fc) || qfctl_sblocked(&qcs->qcc->tx.fc)) { + TRACE_ERROR("not enough initial credit for control stream", H3_EV_TX_FRAME|H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); + goto err; + } + + if (!(res = qcc_get_stream_txbuf(qcs, &err))) { + /* Consider alloc failure fatal for control stream even on conn buf limit. */ + TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_FRAME|H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); goto err; } @@ -1487,22 +1521,21 @@ static int h3_control_send(struct qcs *qcs, void *ctx) ret = b_force_xfer(res, &pos, b_data(&pos)); if (ret > 0) { /* Register qcs for sending before other streams. */ - qcc_send_stream(qcs, 1); + qcc_send_stream(qcs, 1, ret); h3c->flags |= H3_CF_SETTINGS_SENT; } - TRACE_LEAVE(H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); + TRACE_LEAVE(H3_EV_TX_FRAME|H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); return ret; err: - TRACE_DEVEL("leaving on error", H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); + TRACE_DEVEL("leaving on error", H3_EV_TX_FRAME|H3_EV_TX_SETTINGS, qcs->qcc->conn, qcs); return -1; } static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx) { - struct h3s *h3s = qcs->ctx; - struct h3c *h3c = h3s->h3c; + int err; struct buffer outbuf; struct buffer headers_buf = BUF_NULL; struct buffer *res; @@ -1515,7 +1548,7 @@ static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx) int hdr; int status = 0; - TRACE_ENTER(H3_EV_TX_HDR, qcs->qcc->conn, qcs); + TRACE_ENTER(H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); sl = NULL; hdr = 0; @@ -1537,8 +1570,7 @@ static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx) } else if (type == HTX_BLK_HDR) { if (unlikely(hdr >= sizeof(list) / sizeof(list[0]) - 1)) { - TRACE_ERROR("too many headers", H3_EV_TX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; + TRACE_ERROR("too many headers", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); goto err; } list[hdr].n = htx_get_blk_name(htx, blk); @@ -1555,11 +1587,14 @@ static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx) list[hdr].n = ist(""); - res = mux_get_buf(qcs); - if (b_is_null(res)) { - TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; - goto err; + if (!(res = qcc_get_stream_txbuf(qcs, &err))) { + if (err) { + TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + goto err; + } + + TRACE_STATE("conn buf limit reached", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + goto end; } /* At least 5 bytes to store frame type + length as a varint max size */ @@ -1571,11 +1606,12 @@ static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx) /* Start the headers after frame type + length */ headers_buf = b_make(b_head(res) + 5, b_size(res) - 5, 0, 0); + TRACE_DATA("encoding HEADERS frame", H3_EV_TX_FRAME|H3_EV_TX_HDR, + qcs->qcc->conn, qcs); if (qpack_encode_field_section_line(&headers_buf)) ABORT_NOW(); if (qpack_encode_int_status(&headers_buf, status)) { - TRACE_ERROR("invalid status code", H3_EV_TX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; + TRACE_ERROR("invalid status code", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); goto err; } @@ -1630,11 +1666,12 @@ static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx) break; } - TRACE_LEAVE(H3_EV_TX_HDR, qcs->qcc->conn, qcs); + end: + TRACE_LEAVE(H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); return ret; err: - TRACE_DEVEL("leaving on error", H3_EV_TX_HDR, qcs->qcc->conn, qcs); + TRACE_DEVEL("leaving on error", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); return -1; } @@ -1646,12 +1683,12 @@ static int h3_resp_headers_send(struct qcs *qcs, struct htx *htx) * Caller is responsible to emit an empty QUIC STREAM frame to signal the end * of the stream. * - * Returns the size of HTX blocks removed. + * Returns the size of HTX blocks removed. A negative error code is returned in + * case of a fatal error which should caused a connection closure. */ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) { - struct h3s *h3s = qcs->ctx; - struct h3c *h3c = h3s->h3c; + int err; struct buffer headers_buf = BUF_NULL; struct buffer *res; struct http_hdr list[global.tune.max_http_hdr]; @@ -1661,7 +1698,7 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) int ret = 0; int hdr; - TRACE_ENTER(H3_EV_TX_HDR, qcs->qcc->conn, qcs); + TRACE_ENTER(H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); hdr = 0; for (blk = htx_get_head_blk(htx); blk; blk = htx_get_next_blk(htx, blk)) { @@ -1675,8 +1712,7 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) if (type == HTX_BLK_TLR) { if (unlikely(hdr >= sizeof(list) / sizeof(list[0]) - 1)) { - TRACE_ERROR("too many headers", H3_EV_TX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; + TRACE_ERROR("too many headers", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); goto err; } list[hdr].n = htx_get_blk_name(htx, blk); @@ -1684,8 +1720,7 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) hdr++; } else { - TRACE_ERROR("unexpected HTX block", H3_EV_TX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; + TRACE_ERROR("unexpected HTX block", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); goto err; } } @@ -1694,22 +1729,41 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) /* No headers encoded here so no need to generate a H3 HEADERS * frame. Mux will send an empty QUIC STREAM frame with FIN. */ - TRACE_DATA("skipping trailer", H3_EV_TX_HDR, qcs->qcc->conn, qcs); + TRACE_DATA("skipping trailer", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + + /* Truncate UNUSED / EOT HTX blocks. */ + blk = htx_get_head_blk(htx); + while (blk) { + type = htx_get_blk_type(blk); + ret += htx_get_blksz(blk); + blk = htx_remove_blk(htx, blk); + if (type == HTX_BLK_EOT) + break; + } goto end; } + list[hdr].n = ist(""); - res = mux_get_buf(qcs); - if (b_is_null(res)) { - TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_HDR, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; - goto err; + start: + if (!(res = qcc_get_stream_txbuf(qcs, &err))) { + if (err) { + TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + goto err; + } + + TRACE_STATE("conn buf limit reached", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + goto end; } /* At least 9 bytes to store frame type + length as a varint max size */ if (b_room(res) < 9) { - qcs->flags |= QC_SF_BLK_MROOM; - goto err; + TRACE_STATE("not enough room for trailers frame", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + if (qcc_release_stream_txbuf(qcs)) + goto end; + + /* Buffer released, restart processing. */ + goto start; } /* Force buffer realignment as size required to encode headers is unknown. */ @@ -1719,8 +1773,12 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) headers_buf = b_make(b_peek(res, b_data(res) + 9), b_contig_space(res) - 9, 0, 0); if (qpack_encode_field_section_line(&headers_buf)) { - qcs->flags |= QC_SF_BLK_MROOM; - goto err; + TRACE_STATE("not enough room for trailers section line", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + if (qcc_release_stream_txbuf(qcs)) + goto end; + + /* Buffer released, restart processing. */ + goto start; } tail = b_tail(&headers_buf); @@ -1740,8 +1798,12 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) } if (qpack_encode_header(&headers_buf, list[hdr].n, list[hdr].v)) { - qcs->flags |= QC_SF_BLK_MROOM; - goto err; + TRACE_STATE("not enough room for all trailers", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + if (qcc_release_stream_txbuf(qcs)) + goto end; + + /* Buffer released, restart processing. */ + goto start; } } @@ -1750,20 +1812,21 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) /* No headers encoded here so no need to generate a H3 HEADERS * frame. Mux will send an empty QUIC STREAM frame with FIN. */ - TRACE_DATA("skipping trailer", H3_EV_TX_HDR, qcs->qcc->conn, qcs); - goto end; + TRACE_DATA("skipping trailer", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + } + else { + /* Now that all headers are encoded, we are certain that res + * buffer is big enough. + */ + TRACE_DATA("encoding TRAILERS frame", H3_EV_TX_FRAME|H3_EV_TX_HDR, + qcs->qcc->conn, qcs); + b_putchr(res, 0x01); /* h3 HEADERS frame type */ + if (!b_quic_enc_int(res, b_data(&headers_buf), 8)) + ABORT_NOW(); + b_add(res, b_data(&headers_buf)); } - /* Now that all headers are encoded, we are certain that res buffer is - * big enough. - */ - b_putchr(res, 0x01); /* h3 HEADERS frame type */ - if (!b_quic_enc_int(res, b_data(&headers_buf), 8)) - ABORT_NOW(); - b_add(res, b_data(&headers_buf)); - - end: - ret = 0; + /* Encoding success, truncate HTX blocks until EOT. */ blk = htx_get_head_blk(htx); while (blk) { type = htx_get_blk_type(blk); @@ -1773,23 +1836,28 @@ static int h3_resp_trailers_send(struct qcs *qcs, struct htx *htx) break; } - TRACE_LEAVE(H3_EV_TX_HDR, qcs->qcc->conn, qcs); + end: + TRACE_LEAVE(H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); return ret; err: - TRACE_DEVEL("leaving on error", H3_EV_TX_HDR, qcs->qcc->conn, qcs); + TRACE_DEVEL("leaving on error", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); return -1; } -/* Returns the total of bytes sent. This corresponds to the +/* Convert a series of HTX data blocks from <htx> buffer of size <count> into + * HTTP/3 frames encoded into <qcs> Tx buffer. The caller must also specify the + * underlying HTX area via <buf> as this will be used if zero-copy can be + * performed. + * + * Returns the total bytes of encoded HTTP/3 payload. This corresponds to the * total bytes of HTX block removed. A negative error code is returned in case * of a fatal error which should caused a connection closure. */ -static int h3_resp_data_send(struct qcs *qcs, struct buffer *buf, size_t count) +static int h3_resp_data_send(struct qcs *qcs, struct htx *htx, + struct buffer *buf, size_t count) { - struct htx *htx; - struct h3s *h3s = qcs->ctx; - struct h3c *h3c = h3s->h3c; + int err; struct buffer outbuf; struct buffer *res; size_t total = 0; @@ -1797,9 +1865,7 @@ static int h3_resp_data_send(struct qcs *qcs, struct buffer *buf, size_t count) struct htx_blk *blk; enum htx_blk_type type; - TRACE_ENTER(H3_EV_TX_DATA, qcs->qcc->conn, qcs); - - htx = htx_from_buf(buf); + TRACE_ENTER(H3_EV_TX_FRAME|H3_EV_TX_DATA, qcs->qcc->conn, qcs); new_frame: if (!count || htx_is_empty(htx)) @@ -1815,31 +1881,43 @@ static int h3_resp_data_send(struct qcs *qcs, struct buffer *buf, size_t count) if (type != HTX_BLK_DATA) goto end; - res = mux_get_buf(qcs); - if (b_is_null(res)) { - TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_DATA, qcs->qcc->conn, qcs); - h3c->err = H3_INTERNAL_ERROR; - goto err; + if (!(res = qcc_get_stream_txbuf(qcs, &err))) { + if (err) { + TRACE_ERROR("cannot allocate Tx buffer", H3_EV_TX_FRAME|H3_EV_TX_DATA, qcs->qcc->conn, qcs); + goto err; + } + + /* Connection buf limit reached, stconn will subscribe on SEND. */ + TRACE_STATE("conn buf limit reached", H3_EV_TX_FRAME|H3_EV_TX_HDR, qcs->qcc->conn, qcs); + goto end; } + /* If HTX contains only one DATA block, try to exchange it with MUX + * buffer to perform zero-copy. This is only achievable if MUX buffer + * is currently empty. + */ if (unlikely(fsize == count && - !b_data(res) && - htx_nbblks(htx) == 1 && type == HTX_BLK_DATA)) { + !b_data(res) && + htx_nbblks(htx) == 1 && type == HTX_BLK_DATA)) { void *old_area = res->area; - /* map an H2 frame to the HTX block so that we can put the - * frame header there. - */ - *res = b_make(buf->area, buf->size, sizeof(struct htx) + blk->addr - hsize, fsize + hsize); - outbuf = b_make(b_head(res), hsize, 0, 0); - b_putchr(&outbuf, 0x00); /* h3 frame type = DATA */ - b_quic_enc_int(&outbuf, fsize, QUIC_VARINT_MAX_SIZE); /* h3 frame length */ + TRACE_DATA("perform zero-copy DATA transfer", + H3_EV_TX_FRAME|H3_EV_TX_DATA, qcs->qcc->conn, qcs); + + /* remap MUX buffer to HTX area, keep an offset for H3 header. */ + *res = b_make(buf->area, buf->size, + sizeof(struct htx) + blk->addr - hsize, 0); + + /* write H3 header frame before old HTX block. */ + b_putchr(res, 0x00); /* h3 frame type = DATA */ + b_quic_enc_int(res, fsize, QUIC_VARINT_MAX_SIZE); /* h3 frame length */ + b_add(res, fsize); - /* and exchange with our old area */ + /* assign old MUX area to HTX buffer. */ buf->area = old_area; buf->data = buf->head = 0; total += fsize; - fsize = 0; + goto end; } @@ -1851,23 +1929,29 @@ static int h3_resp_data_send(struct qcs *qcs, struct buffer *buf, size_t count) outbuf = b_make(b_tail(res), b_contig_space(res), 0, 0); if (b_size(&outbuf) > hsize || !b_space_wraps(res)) break; - b_slow_realign(res, trash.area, b_data(res)); + if (qcc_realign_stream_txbuf(qcs, res)) + break; } - /* Not enough room for headers and at least one data byte, block the - * stream. It is expected that the stream connector layer will subscribe - * on SEND. + /* Not enough room for headers and at least one data byte, try to + * release the current buffer and allocate a new one. If not possible, + * stconn layer will subscribe on SEND. */ if (b_size(&outbuf) <= hsize) { - TRACE_STATE("not enough room for data frame", H3_EV_TX_DATA, qcs->qcc->conn, qcs); - qcs->flags |= QC_SF_BLK_MROOM; - goto end; + TRACE_STATE("not enough room for data frame", H3_EV_TX_FRAME|H3_EV_TX_DATA, qcs->qcc->conn, qcs); + if (qcc_release_stream_txbuf(qcs)) + goto end; + + /* Buffer released, restart processing. */ + goto new_frame; } if (b_size(&outbuf) < hsize + fsize) fsize = b_size(&outbuf) - hsize; BUG_ON(fsize <= 0); + TRACE_DATA("encoding DATA frame", H3_EV_TX_FRAME|H3_EV_TX_DATA, + qcs->qcc->conn, qcs); b_putchr(&outbuf, 0x00); /* h3 frame type = DATA */ b_quic_enc_int(&outbuf, fsize, 0); /* h3 frame length */ @@ -1885,19 +1969,17 @@ static int h3_resp_data_send(struct qcs *qcs, struct buffer *buf, size_t count) goto new_frame; end: - TRACE_LEAVE(H3_EV_TX_DATA, qcs->qcc->conn, qcs); + TRACE_LEAVE(H3_EV_TX_FRAME|H3_EV_TX_DATA, qcs->qcc->conn, qcs); return total; err: BUG_ON(total); /* Must return HTX removed size if at least on frame encoded. */ - TRACE_DEVEL("leaving on error", H3_EV_TX_DATA, qcs->qcc->conn, qcs); + TRACE_DEVEL("leaving on error", H3_EV_TX_FRAME|H3_EV_TX_DATA, qcs->qcc->conn, qcs); return -1; } static size_t h3_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count) { - struct h3s *h3s = qcs->ctx; - struct h3c *h3c = h3s->h3c; size_t total = 0; enum htx_blk_type btype; struct htx *htx; @@ -1910,12 +1992,7 @@ static size_t h3_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count) htx = htx_from_buf(buf); - if (htx->extra && htx->extra == HTX_UNKOWN_PAYLOAD_LENGTH) - qcs->flags |= QC_SF_UNKNOWN_PL_LENGTH; - - while (count && !htx_is_empty(htx) && - !(qcs->flags & QC_SF_BLK_MROOM) && !h3c->err) { - + while (count && !htx_is_empty(htx) && qcc_stream_can_send(qcs) && ret >= 0) { idx = htx_get_head(htx); blk = htx_get_blk(htx, idx); btype = htx_get_blk_type(blk); @@ -1937,9 +2014,11 @@ static size_t h3_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count) break; case HTX_BLK_DATA: - ret = h3_resp_data_send(qcs, buf, count); + ret = h3_resp_data_send(qcs, htx, buf, count); if (ret > 0) { + /* Reload HTX. This is necessary if 0-copy was performed. */ htx = htx_from_buf(buf); + total += ret; count -= ret; if (ret < bsize) @@ -1964,16 +2043,11 @@ static size_t h3_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count) count -= bsize; break; } - - /* If an error occured, either buffer space or connection error - * must be set to break current loop. - */ - BUG_ON(ret < 0 && !(qcs->flags & QC_SF_BLK_MROOM) && !h3c->err); } - /* Interrupt sending on connection error. */ - if (unlikely(h3c->err)) { - qcc_set_error(qcs->qcc, h3c->err, 1); + /* Interrupt sending on fatal error. */ + if (unlikely(ret < 0)) { + qcc_set_error(qcs->qcc, H3_ERR_INTERNAL_ERROR, 1); goto out; } @@ -1998,7 +2072,7 @@ static size_t h3_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count) /* Generate a STOP_SENDING if full response transferred before * receiving the full request. */ - qcs->err = H3_NO_ERROR; + qcs->err = H3_ERR_NO_ERROR; qcc_abort_stream_read(qcs); } @@ -2011,15 +2085,21 @@ static size_t h3_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count) static size_t h3_nego_ff(struct qcs *qcs, size_t count) { + int err; struct buffer *res; int hsize; size_t sz, ret = 0; TRACE_ENTER(H3_EV_STRM_SEND, qcs->qcc->conn, qcs); - res = mux_get_buf(qcs); - if (b_is_null(res)) { - qcs->sd->iobuf.flags |= IOBUF_FL_NO_FF; + start: + if (!(res = qcc_get_stream_txbuf(qcs, &err))) { + if (err) { + qcs->sd->iobuf.flags |= IOBUF_FL_NO_FF; + goto end; + } + + qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; goto end; } @@ -2028,7 +2108,8 @@ static size_t h3_nego_ff(struct qcs *qcs, size_t count) while (1) { if (b_contig_space(res) >= hsize || !b_space_wraps(res)) break; - b_slow_realign(res, trash.area, b_data(res)); + if (qcc_realign_stream_txbuf(qcs, res)) + break; } /* Not enough room for headers and at least one data byte, block the @@ -2036,9 +2117,13 @@ static size_t h3_nego_ff(struct qcs *qcs, size_t count) * on SEND. */ if (b_contig_space(res) <= hsize) { - qcs->flags |= QC_SF_BLK_MROOM; - qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; - goto end; + if (qcc_release_stream_txbuf(qcs)) { + qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; + goto end; + } + + /* Buffer released, restart processing. */ + goto start; } /* Cannot forward more than available room in output buffer */ @@ -2064,6 +2149,8 @@ static size_t h3_done_ff(struct qcs *qcs) h3_debug_printf(stderr, "%s\n", __func__); if (qcs->sd->iobuf.data) { + TRACE_DATA("encoding DATA frame (fast forward)", + H3_EV_TX_FRAME|H3_EV_TX_DATA, qcs->qcc->conn, qcs); b_sub(qcs->sd->iobuf.buf, qcs->sd->iobuf.data); b_putchr(qcs->sd->iobuf.buf, 0x00); /* h3 frame type = DATA */ b_quic_enc_int(qcs->sd->iobuf.buf, qcs->sd->iobuf.data, QUIC_VARINT_MAX_SIZE); /* h3 frame length */ @@ -2105,7 +2192,8 @@ static int h3_close(struct qcs *qcs, enum qcc_app_ops_close_side side) */ if (qcs == h3c->ctrl_strm || h3s->type == H3S_T_CTRL) { TRACE_ERROR("closure detected on control stream", H3_EV_H3S_END, qcs->qcc->conn, qcs); - qcc_set_error(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1); + qcc_set_error(qcs->qcc, H3_ERR_CLOSED_CRITICAL_STREAM, 1); + qcc_report_glitch(qcs->qcc, 1); return 1; } @@ -2136,7 +2224,7 @@ static int h3_attach(struct qcs *qcs, void *conn_ctx) */ TRACE_STATE("reject stream higher than goaway", H3_EV_H3S_NEW, qcs->qcc->conn, qcs); qcc_abort_stream_read(qcs); - qcc_reset_stream(qcs, H3_REQUEST_REJECTED); + qcc_reset_stream(qcs, H3_ERR_REQUEST_REJECTED); goto done; } @@ -2187,47 +2275,18 @@ static void h3_detach(struct qcs *qcs) TRACE_LEAVE(H3_EV_H3S_END, qcs->qcc->conn, qcs); } -/* Initialize H3 control stream and prepare SETTINGS emission. - * - * Returns 0 on success else non-zero. - */ -static int h3_finalize(void *ctx) -{ - struct h3c *h3c = ctx; - struct qcc *qcc = h3c->qcc; - struct qcs *qcs; - - TRACE_ENTER(H3_EV_H3C_NEW, qcc->conn); - - qcs = qcc_init_stream_local(h3c->qcc, 0); - if (!qcs) { - TRACE_ERROR("cannot init control stream", H3_EV_H3C_NEW, qcc->conn); - goto err; - } - - h3c->ctrl_strm = qcs; - - if (h3_control_send(qcs, h3c) < 0) - goto err; - - TRACE_LEAVE(H3_EV_H3C_NEW, qcc->conn); - return 0; - - err: - TRACE_DEVEL("leaving on error", H3_EV_H3C_NEW, qcc->conn); - return 1; -} - /* Generate a GOAWAY frame for <h3c> connection on the control stream. * * Returns 0 on success else non-zero. */ static int h3_send_goaway(struct h3c *h3c) { + int err; struct qcs *qcs = h3c->ctrl_strm; struct buffer pos, *res; unsigned char data[3 * QUIC_VARINT_MAX_SIZE]; size_t frm_len = quic_int_getsize(h3c->id_goaway); + size_t xfer; TRACE_ENTER(H3_EV_H3C_END, h3c->qcc->conn); @@ -2242,15 +2301,16 @@ static int h3_send_goaway(struct h3c *h3c) b_quic_enc_int(&pos, frm_len, 0); b_quic_enc_int(&pos, h3c->id_goaway, 0); - res = mux_get_buf(qcs); - if (b_is_null(res) || b_room(res) < b_data(&pos)) { - /* Do not try forcefully to emit GOAWAY if no space left. */ + res = qcc_get_stream_txbuf(qcs, &err); + if (!res || b_room(res) < b_data(&pos) || + qfctl_sblocked(&qcs->tx.fc) || qfctl_sblocked(&h3c->qcc->tx.fc)) { + /* Do not try forcefully to emit GOAWAY if no buffer available or not enough space left. */ TRACE_ERROR("cannot send GOAWAY", H3_EV_H3C_END, h3c->qcc->conn, qcs); goto err; } - b_force_xfer(res, &pos, b_data(&pos)); - qcc_send_stream(qcs, 1); + xfer = b_force_xfer(res, &pos, b_data(&pos)); + qcc_send_stream(qcs, 1, xfer); h3c->flags |= H3_CF_GOAWAY_SENT; TRACE_LEAVE(H3_EV_H3C_END, h3c->qcc->conn); @@ -2271,7 +2331,7 @@ static int h3_send_goaway(struct h3c *h3c) static int h3_init(struct qcc *qcc) { struct h3c *h3c; - struct quic_conn *qc = qcc->conn->handle.qc; + const struct listener *li = __objt_listener(qcc->conn->target); TRACE_ENTER(H3_EV_H3C_NEW, qcc->conn); @@ -2288,9 +2348,8 @@ static int h3_init(struct qcc *qcc) h3c->id_goaway = 0; qcc->ctx = h3c; - /* TODO cleanup only ref to quic_conn */ h3c->prx_counters = - EXTRA_COUNTERS_GET(qc->li->bind_conf->frontend->extra_counters_fe, + EXTRA_COUNTERS_GET(li->bind_conf->frontend->extra_counters_fe, &h3_stats_module); LIST_INIT(&h3c->buf_wait.list); @@ -2298,10 +2357,43 @@ static int h3_init(struct qcc *qcc) return 1; fail_no_h3: + qcc_set_error(qcc, H3_ERR_INTERNAL_ERROR, 1); TRACE_DEVEL("leaving on error", H3_EV_H3C_NEW, qcc->conn); return 0; } +/* Initialize H3 control stream and prepare SETTINGS emission. + * + * Returns 0 on success else non-zero. + */ +static int h3_finalize(void *ctx) +{ + struct h3c *h3c = ctx; + struct qcc *qcc = h3c->qcc; + struct qcs *qcs; + + TRACE_ENTER(H3_EV_H3C_NEW, qcc->conn); + + qcs = qcc_init_stream_local(qcc, 0); + if (!qcs) { + TRACE_ERROR("cannot init control stream", H3_EV_H3C_NEW, qcc->conn); + goto err; + } + + h3c->ctrl_strm = qcs; + + if (h3_control_send(qcs, h3c) < 0) + goto err; + + TRACE_LEAVE(H3_EV_H3C_NEW, qcc->conn); + return 0; + + err: + qcc_set_error(qcc, H3_ERR_INTERNAL_ERROR, 1); + TRACE_DEVEL("leaving on error", H3_EV_H3C_NEW, qcc->conn); + return 1; +} + /* Send a HTTP/3 GOAWAY followed by a CONNECTION_CLOSE_APP. */ static void h3_shutdown(void *ctx) { @@ -2324,7 +2416,7 @@ static void h3_shutdown(void *ctx) * graceful shutdown SHOULD use the H3_NO_ERROR error code when closing * the connection. */ - h3c->qcc->err = quic_err_app(H3_NO_ERROR); + h3c->qcc->err = quic_err_app(H3_ERR_NO_ERROR); TRACE_LEAVE(H3_EV_H3C_END, h3c->qcc->conn); } @@ -2343,6 +2435,12 @@ static void h3_stats_inc_err_cnt(void *ctx, int err_code) h3_inc_err_cnt(h3c->prx_counters, err_code); } +static void h3_report_susp(void *ctx) +{ + struct h3c *h3c = ctx; + h3c->qcc->err = quic_err_app(H3_ERR_EXCESSIVE_LOAD); +} + static inline const char *h3_ft_str(uint64_t type) { switch (type) { @@ -2389,15 +2487,16 @@ static void h3_trace(enum trace_level level, uint64_t mask, /* HTTP/3 application layer operations */ const struct qcc_app_ops h3_ops = { .init = h3_init, + .finalize = h3_finalize, .attach = h3_attach, - .decode_qcs = h3_decode_qcs, + .rcv_buf = h3_rcv_buf, .snd_buf = h3_snd_buf, .nego_ff = h3_nego_ff, .done_ff = h3_done_ff, .close = h3_close, .detach = h3_detach, - .finalize = h3_finalize, .shutdown = h3_shutdown, .inc_err_cnt = h3_stats_inc_err_cnt, + .report_susp = h3_report_susp, .release = h3_release, }; diff --git a/src/h3_stats.c b/src/h3_stats.c index c96093f..48dac33 100644 --- a/src/h3_stats.c +++ b/src/h3_stats.c @@ -1,4 +1,5 @@ #include <haproxy/h3.h> +#include <haproxy/qpack-t.h> #include <haproxy/stats.h> enum { @@ -35,7 +36,7 @@ enum { H3_STATS_COUNT /* must be the last */ }; -static struct name_desc h3_stats[] = { +static struct stat_col h3_stats[] = { /* h3 frame type counters */ [H3_ST_DATA] = { .name = "h3_data", .desc = "Total number of DATA frames received" }, @@ -128,40 +129,114 @@ static struct h3_counters { long long qpack_decoder_stream_error; /* total number of QPACK_DECODER_STREAM_ERROR errors received */ } h3_counters; -static void h3_fill_stats(void *data, struct field *stats) +static int h3_fill_stats(void *data, struct field *stats, unsigned int *selected_field) { struct h3_counters *counters = data; + unsigned int current_field = (selected_field != NULL ? *selected_field : 0); - /* h3 frame type counters */ - stats[H3_ST_DATA] = mkf_u64(FN_COUNTER, counters->h3_data); - stats[H3_ST_HEADERS] = mkf_u64(FN_COUNTER, counters->h3_headers); - stats[H3_ST_CANCEL_PUSH] = mkf_u64(FN_COUNTER, counters->h3_cancel_push); - stats[H3_ST_PUSH_PROMISE] = mkf_u64(FN_COUNTER, counters->h3_push_promise); - stats[H3_ST_MAX_PUSH_ID] = mkf_u64(FN_COUNTER, counters->h3_max_push_id); - stats[H3_ST_GOAWAY] = mkf_u64(FN_COUNTER, counters->h3_goaway); - stats[H3_ST_SETTINGS] = mkf_u64(FN_COUNTER, counters->h3_settings); - /* h3 error counters */ - stats[H3_ST_H3_NO_ERROR] = mkf_u64(FN_COUNTER, counters->h3_no_error); - stats[H3_ST_H3_GENERAL_PROTOCOL_ERROR] = mkf_u64(FN_COUNTER, counters->h3_general_protocol_error); - stats[H3_ST_H3_INTERNAL_ERROR] = mkf_u64(FN_COUNTER, counters->h3_internal_error); - stats[H3_ST_H3_STREAM_CREATION_ERROR] = mkf_u64(FN_COUNTER, counters->h3_stream_creation_error); - stats[H3_ST_H3_CLOSED_CRITICAL_STREAM] = mkf_u64(FN_COUNTER, counters->h3_closed_critical_stream); - stats[H3_ST_H3_FRAME_UNEXPECTED] = mkf_u64(FN_COUNTER, counters->h3_frame_unexpected); - stats[H3_ST_H3_FRAME_ERROR] = mkf_u64(FN_COUNTER, counters->h3_frame_error); - stats[H3_ST_H3_EXCESSIVE_LOAD] = mkf_u64(FN_COUNTER, counters->h3_excessive_load); - stats[H3_ST_H3_ID_ERROR] = mkf_u64(FN_COUNTER, counters->h3_id_error); - stats[H3_ST_H3_SETTINGS_ERROR] = mkf_u64(FN_COUNTER, counters->h3_settings_error); - stats[H3_ST_H3_MISSING_SETTINGS] = mkf_u64(FN_COUNTER, counters->h3_missing_settings); - stats[H3_ST_H3_REQUEST_REJECTED] = mkf_u64(FN_COUNTER, counters->h3_request_rejected); - stats[H3_ST_H3_REQUEST_CANCELLED] = mkf_u64(FN_COUNTER, counters->h3_request_cancelled); - stats[H3_ST_H3_REQUEST_INCOMPLETE] = mkf_u64(FN_COUNTER, counters->h3_request_incomplete); - stats[H3_ST_H3_MESSAGE_ERROR] = mkf_u64(FN_COUNTER, counters->h3_message_error); - stats[H3_ST_H3_CONNECT_ERROR] = mkf_u64(FN_COUNTER, counters->h3_connect_error); - stats[H3_ST_H3_VERSION_FALLBACK] = mkf_u64(FN_COUNTER, counters->h3_version_fallback); - /* QPACK error counters */ - stats[H3_ST_QPACK_DECOMPRESSION_FAILED] = mkf_u64(FN_COUNTER, counters->qpack_decompression_failed); - stats[H3_ST_QPACK_ENCODER_STREAM_ERROR] = mkf_u64(FN_COUNTER, counters->qpack_encoder_stream_error); - stats[H3_ST_QPACK_DECODER_STREAM_ERROR] = mkf_u64(FN_COUNTER, counters->qpack_decoder_stream_error); + for (; current_field < H3_STATS_COUNT; current_field++) { + struct field metric = { 0 }; + + switch (current_field) { + /* h3 frame type counters */ + case H3_ST_DATA: + metric = mkf_u64(FN_COUNTER, counters->h3_data); + break; + case H3_ST_HEADERS: + metric = mkf_u64(FN_COUNTER, counters->h3_headers); + break; + case H3_ST_CANCEL_PUSH: + metric = mkf_u64(FN_COUNTER, counters->h3_cancel_push); + break; + case H3_ST_PUSH_PROMISE: + metric = mkf_u64(FN_COUNTER, counters->h3_push_promise); + break; + case H3_ST_MAX_PUSH_ID: + metric = mkf_u64(FN_COUNTER, counters->h3_max_push_id); + break; + case H3_ST_GOAWAY: + metric = mkf_u64(FN_COUNTER, counters->h3_goaway); + break; + case H3_ST_SETTINGS: + metric = mkf_u64(FN_COUNTER, counters->h3_settings); + break; + + /* h3 error counters */ + case H3_ST_H3_NO_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_no_error); + break; + case H3_ST_H3_GENERAL_PROTOCOL_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_general_protocol_error); + break; + case H3_ST_H3_INTERNAL_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_internal_error); + break; + case H3_ST_H3_STREAM_CREATION_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_stream_creation_error); + break; + case H3_ST_H3_CLOSED_CRITICAL_STREAM: + metric = mkf_u64(FN_COUNTER, counters->h3_closed_critical_stream); + break; + case H3_ST_H3_FRAME_UNEXPECTED: + metric = mkf_u64(FN_COUNTER, counters->h3_frame_unexpected); + break; + case H3_ST_H3_FRAME_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_frame_error); + break; + case H3_ST_H3_EXCESSIVE_LOAD: + metric = mkf_u64(FN_COUNTER, counters->h3_excessive_load); + break; + case H3_ST_H3_ID_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_id_error); + break; + case H3_ST_H3_SETTINGS_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_settings_error); + break; + case H3_ST_H3_MISSING_SETTINGS: + metric = mkf_u64(FN_COUNTER, counters->h3_missing_settings); + break; + case H3_ST_H3_REQUEST_REJECTED: + metric = mkf_u64(FN_COUNTER, counters->h3_request_rejected); + break; + case H3_ST_H3_REQUEST_CANCELLED: + metric = mkf_u64(FN_COUNTER, counters->h3_request_cancelled); + break; + case H3_ST_H3_REQUEST_INCOMPLETE: + metric = mkf_u64(FN_COUNTER, counters->h3_request_incomplete); + break; + case H3_ST_H3_MESSAGE_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_message_error); + break; + case H3_ST_H3_CONNECT_ERROR: + metric = mkf_u64(FN_COUNTER, counters->h3_connect_error); + break; + case H3_ST_H3_VERSION_FALLBACK: + metric = mkf_u64(FN_COUNTER, counters->h3_version_fallback); + break; + + /* QPACK error counters */ + case H3_ST_QPACK_DECOMPRESSION_FAILED: + metric = mkf_u64(FN_COUNTER, counters->qpack_decompression_failed); + break; + case H3_ST_QPACK_ENCODER_STREAM_ERROR: + metric = mkf_u64(FN_COUNTER, counters->qpack_encoder_stream_error); + break; + case H3_ST_QPACK_DECODER_STREAM_ERROR: + metric = mkf_u64(FN_COUNTER, counters->qpack_decoder_stream_error); + break; + default: + /* not used for frontends. If a specific metric + * is requested, return an error. Otherwise continue. + */ + if (selected_field != NULL) + return 0; + continue; + } + stats[current_field] = metric; + if (selected_field != NULL) + break; + } + return 1; } struct stats_module h3_stats_module = { @@ -180,64 +255,64 @@ INITCALL1(STG_REGISTER, stats_register_module, &h3_stats_module); void h3_inc_err_cnt(struct h3_counters *ctrs, int error_code) { switch (error_code) { - case H3_NO_ERROR: + case H3_ERR_NO_ERROR: HA_ATOMIC_INC(&ctrs->h3_no_error); break; - case H3_GENERAL_PROTOCOL_ERROR: + case H3_ERR_GENERAL_PROTOCOL_ERROR: HA_ATOMIC_INC(&ctrs->h3_general_protocol_error); break; - case H3_INTERNAL_ERROR: + case H3_ERR_INTERNAL_ERROR: HA_ATOMIC_INC(&ctrs->h3_internal_error); break; - case H3_STREAM_CREATION_ERROR: + case H3_ERR_STREAM_CREATION_ERROR: HA_ATOMIC_INC(&ctrs->h3_stream_creation_error); break; - case H3_CLOSED_CRITICAL_STREAM: + case H3_ERR_CLOSED_CRITICAL_STREAM: HA_ATOMIC_INC(&ctrs->h3_closed_critical_stream); break; - case H3_FRAME_UNEXPECTED: + case H3_ERR_FRAME_UNEXPECTED: HA_ATOMIC_INC(&ctrs->h3_frame_unexpected); break; - case H3_FRAME_ERROR: + case H3_ERR_FRAME_ERROR: HA_ATOMIC_INC(&ctrs->h3_frame_error); break; - case H3_EXCESSIVE_LOAD: + case H3_ERR_EXCESSIVE_LOAD: HA_ATOMIC_INC(&ctrs->h3_excessive_load); break; - case H3_ID_ERROR: + case H3_ERR_ID_ERROR: HA_ATOMIC_INC(&ctrs->h3_id_error); break; - case H3_SETTINGS_ERROR: + case H3_ERR_SETTINGS_ERROR: HA_ATOMIC_INC(&ctrs->h3_settings_error); break; - case H3_MISSING_SETTINGS: + case H3_ERR_MISSING_SETTINGS: HA_ATOMIC_INC(&ctrs->h3_missing_settings); break; - case H3_REQUEST_REJECTED: + case H3_ERR_REQUEST_REJECTED: HA_ATOMIC_INC(&ctrs->h3_request_rejected); break; - case H3_REQUEST_CANCELLED: + case H3_ERR_REQUEST_CANCELLED: HA_ATOMIC_INC(&ctrs->h3_request_cancelled); break; - case H3_REQUEST_INCOMPLETE: + case H3_ERR_REQUEST_INCOMPLETE: HA_ATOMIC_INC(&ctrs->h3_request_incomplete); break; - case H3_MESSAGE_ERROR: + case H3_ERR_MESSAGE_ERROR: HA_ATOMIC_INC(&ctrs->h3_message_error); break; - case H3_CONNECT_ERROR: + case H3_ERR_CONNECT_ERROR: HA_ATOMIC_INC(&ctrs->h3_connect_error); break; - case H3_VERSION_FALLBACK: + case H3_ERR_VERSION_FALLBACK: HA_ATOMIC_INC(&ctrs->h3_version_fallback); break; - case QPACK_DECOMPRESSION_FAILED: + case QPACK_ERR_DECOMPRESSION_FAILED: HA_ATOMIC_INC(&ctrs->qpack_decompression_failed); break; - case QPACK_ENCODER_STREAM_ERROR: + case QPACK_ERR_ENCODER_STREAM_ERROR: HA_ATOMIC_INC(&ctrs->qpack_encoder_stream_error); break; - case QPACK_DECODER_STREAM_ERROR: + case QPACK_ERR_DECODER_STREAM_ERROR: HA_ATOMIC_INC(&ctrs->qpack_decoder_stream_error); break; default: diff --git a/src/haproxy.c b/src/haproxy.c index 1659d3d..c987fdb 100644 --- a/src/haproxy.c +++ b/src/haproxy.c @@ -60,7 +60,7 @@ #include <assert.h> #endif #if defined(USE_SYSTEMD) -#include <systemd/sd-daemon.h> +#include <haproxy/systemd.h> #endif #include <import/sha1.h> @@ -118,6 +118,7 @@ #include <haproxy/sock.h> #include <haproxy/sock_inet.h> #include <haproxy/ssl_sock.h> +#include <haproxy/stats-file.h> #include <haproxy/stats-t.h> #include <haproxy/stream.h> #include <haproxy/task.h> @@ -209,6 +210,8 @@ struct global global = { .maxsslconn = DEFAULT_MAXSSLCONN, #endif #endif + /* by default allow clients which use a privileged port for TCP only */ + .clt_privileged_ports = HA_PROTO_TCP, /* others NULL OK */ }; @@ -267,6 +270,7 @@ unsigned int warned = 0; unsigned int tainted = 0; unsigned int experimental_directives_allowed = 0; +unsigned int deprecated_directives_allowed = 0; int check_kw_experimental(struct cfg_keyword *kw, const char *file, int linenum, char **errmsg) @@ -564,9 +568,6 @@ static void display_build_opts() #ifdef BUILD_TARGET "\n TARGET = " BUILD_TARGET #endif -#ifdef BUILD_CPU - "\n CPU = " BUILD_CPU -#endif #ifdef BUILD_CC "\n CC = " BUILD_CC #endif @@ -659,6 +660,7 @@ static void usage(char *name) " -dW fails if any warning is emitted\n" " -dD diagnostic mode : warn about suspicious configuration statements\n" " -dF disable fast-forward\n" + " -dI enable insecure fork\n" " -dZ disable zero-copy forwarding\n" " -sf/-st [pid ]* finishes/terminates old pids.\n" " -x <unix_socket> get listening sockets from a unix socket\n" @@ -721,6 +723,7 @@ static void mworker_reexec(int hardreload) char *msg = NULL; struct rlimit limit; struct mworker_proc *current_child = NULL; + int x_off = 0; /* disable -x by putting -x /dev/null */ mworker_block_signals(); setenv("HAPROXY_MWORKER_REEXEC", "1", 1); @@ -768,6 +771,10 @@ static void mworker_reexec(int hardreload) /* copy the program name */ next_argv[next_argc++] = old_argv[0]; + /* we need to reintroduce /dev/null every time */ + if (old_unixsocket && strcmp(old_unixsocket, "/dev/null") == 0) + x_off = 1; + /* insert the new options just after argv[0] in case we have a -- */ if (getenv("HAPROXY_MWORKER_WAIT_ONLY") == NULL) { @@ -791,8 +798,7 @@ static void mworker_reexec(int hardreload) msg = NULL; } } - - if (current_child) { + if (!x_off && current_child) { /* add the -x option with the socketpair of the current worker */ next_argv[next_argc++] = "-x"; if ((next_argv[next_argc++] = memprintf(&msg, "sockpair@%d", current_child->ipc_fd[0])) == NULL) @@ -801,6 +807,12 @@ static void mworker_reexec(int hardreload) } } + if (x_off) { + /* if the cmdline contained a -x /dev/null, continue to use it */ + next_argv[next_argc++] = "-x"; + next_argv[next_argc++] = "/dev/null"; + } + /* copy the previous options */ for (i = 1; i < old_argc; i++) next_argv[next_argc++] = old_argv[i]; @@ -842,8 +854,17 @@ void mworker_reload(int hardreload) } #if defined(USE_SYSTEMD) - if (global.tune.options & GTUNE_USE_SYSTEMD) - sd_notify(0, "RELOADING=1\nSTATUS=Reloading Configuration.\n"); + if (global.tune.options & GTUNE_USE_SYSTEMD) { + struct timespec ts; + + (void)clock_gettime(CLOCK_MONOTONIC, &ts); + + sd_notifyf(0, + "RELOADING=1\n" + "STATUS=Reloading Configuration.\n" + "MONOTONIC_USEC=%" PRIu64 "\n", + (ts.tv_sec * 1000000ULL + ts.tv_nsec / 1000ULL)); + } #endif mworker_reexec(hardreload); } @@ -998,19 +1019,19 @@ static void sig_dump_state(struct sig_handler *sh) chunk_printf(&trash, "SIGHUP: Proxy %s has no servers. Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.", p->id, - p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn); + p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_sess); } else if (p->srv_act == 0) { chunk_printf(&trash, "SIGHUP: Proxy %s %s ! Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.", p->id, (p->srv_bck) ? "is running on backup servers" : "has no server available", - p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn); + p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_sess); } else { chunk_printf(&trash, "SIGHUP: Proxy %s has %d active servers and %d backup servers available." " Conn: act(FE+BE): %d+%d, %d pend (%d unass), tot(FE+BE): %lld+%lld.", p->id, p->srv_act, p->srv_bck, - p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_conn); + p->feconn, p->beconn, p->totpend, p->queue.length, p->fe_counters.cum_conn, p->be_counters.cum_sess); } ha_warning("%s\n", trash.area); send_log(p, LOG_NOTICE, "%s\n", trash.area); @@ -1625,7 +1646,7 @@ static void init_args(int argc, char **argv) global.tune.options |= GTUNE_USE_FAST_FWD; /* Use fast-forward by default */ /* Use zero-copy forwarding by default */ - global.tune.no_zero_copy_fwd = NO_ZERO_COPY_FWD_QUIC_SND; + global.tune.no_zero_copy_fwd = 0; /* keep a copy of original arguments for the master process */ old_argv = copy_argv(argc, argv); @@ -1679,6 +1700,8 @@ static void init_args(int argc, char **argv) #endif else if (*flag == 'd' && flag[1] == 'F') global.tune.options &= ~GTUNE_USE_FAST_FWD; + else if (*flag == 'd' && flag[1] == 'I') + global.tune.options |= GTUNE_INSECURE_FORK; else if (*flag == 'd' && flag[1] == 'V') global.ssl_server_verify = SSL_SERVER_VERIFY_NONE; else if (*flag == 'd' && flag[1] == 'Z') @@ -2325,6 +2348,7 @@ static void init(int argc, char **argv) } list_for_each_entry(ppcf, &post_proxy_check_list, list) err_code |= ppcf->fct(px); + px->flags |= PR_FL_CHECKED; } if (err_code & (ERR_ABORT|ERR_FATAL)) { ha_alert("Fatal errors found in configuration.\n"); @@ -2354,6 +2378,9 @@ static void init(int argc, char **argv) /* Apply server states */ apply_server_state(); + /* Preload internal counters. */ + apply_stats_file(); + for (px = proxies_list; px; px = px->next) srv_compute_all_admin_states(px); @@ -2794,9 +2821,6 @@ static void init(int argc, char **argv) #ifdef BUILD_TARGET chunk_appendf(&trash, "TARGET='%s'", BUILD_TARGET); #endif -#ifdef BUILD_CPU - chunk_appendf(&trash, " CPU='%s'", BUILD_CPU); -#endif #ifdef BUILD_OPTIONS chunk_appendf(&trash, " %s", BUILD_OPTIONS); #endif @@ -2935,6 +2959,7 @@ void deinit(void) ha_free(&localpeer); ha_free(&global.server_state_base); ha_free(&global.server_state_file); + ha_free(&global.stats_file); task_destroy(idle_conn_task); idle_conn_task = NULL; @@ -3064,7 +3089,7 @@ void run_poll_loop() if (thread_has_tasks()) { activity[tid].wake_tasks++; _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_SLEEPING); - } else if (signal_queue_len) { + } else if (signal_queue_len && tid == 0) { /* this check is required after setting TH_FL_SLEEPING to avoid * a race with wakeup on signals using wake_threads() */ _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_SLEEPING); @@ -3146,6 +3171,18 @@ static void *run_thread_poll_loop(void *data) #endif ha_thread_info[tid].stack_top = __builtin_frame_address(0); + /* Assign the ring queue. Contrary to an intuitive thought, this does + * not benefit from locality and it's counter-productive to group + * threads from a same group or range number in the same queue. In some + * sense it arranges us because it means we can use a modulo and ensure + * that even small numbers of threads are well spread. + */ + ha_thread_info[tid].ring_queue = + (tid % MIN(global.nbthread, + (global.tune.ring_queues ? + global.tune.ring_queues : + RING_DFLT_QUEUES))) % RING_WAIT_QUEUES; + /* thread is started, from now on it is not idle nor harmless */ thread_harmless_end(); thread_idle_end(); @@ -3341,9 +3378,6 @@ int main(int argc, char **argv) #ifdef BUILD_TARGET "\n TARGET = " BUILD_TARGET #endif -#ifdef BUILD_CPU - "\n CPU = " BUILD_CPU -#endif #ifdef BUILD_CC "\n CC = " BUILD_CC #endif @@ -3445,18 +3479,6 @@ int main(int argc, char **argv) if (global.rlimit_memmax) { limit.rlim_cur = limit.rlim_max = global.rlimit_memmax * 1048576ULL; -#ifdef RLIMIT_AS - if (setrlimit(RLIMIT_AS, &limit) == -1) { - if (global.tune.options & GTUNE_STRICT_LIMITS) { - ha_alert("[%s.main()] Cannot fix MEM limit to %d megs.\n", - argv[0], global.rlimit_memmax); - exit(1); - } - else - ha_warning("[%s.main()] Cannot fix MEM limit to %d megs.\n", - argv[0], global.rlimit_memmax); - } -#else if (setrlimit(RLIMIT_DATA, &limit) == -1) { if (global.tune.options & GTUNE_STRICT_LIMITS) { ha_alert("[%s.main()] Cannot fix MEM limit to %d megs.\n", @@ -3467,9 +3489,16 @@ int main(int argc, char **argv) ha_warning("[%s.main()] Cannot fix MEM limit to %d megs.\n", argv[0], global.rlimit_memmax); } -#endif } +#if defined(USE_LINUX_CAP) + /* If CAP_NET_BIND_SERVICE is in binary file permitted set and process + * is started and run under the same non-root user, this allows + * binding to privileged ports. + */ + prepare_caps_from_permitted_set(geteuid(), global.uid, argv[0]); +#endif + /* Try to get the listeners FD from the previous process using * _getsocks on the stat socket, it must never been done in wait mode * and check mode @@ -3599,13 +3628,13 @@ int main(int argc, char **argv) if ((global.mode & (MODE_MWORKER | MODE_DAEMON)) == 0) set_identity(argv[0]); - /* set_identity() above might have dropped LSTCHK_NETADM if - * it changed to a new UID while preserving enough permissions - * to honnor LSTCHK_NETADM. + /* set_identity() above might have dropped LSTCHK_NETADM or/and + * LSTCHK_SYSADM if it changed to a new UID while preserving enough + * permissions to honnor LSTCHK_NETADM/LSTCHK_SYSADM. */ - if ((global.last_checks & LSTCHK_NETADM) && getuid()) { + if ((global.last_checks & (LSTCHK_NETADM|LSTCHK_SYSADM)) && getuid()) { /* If global.uid is present in config, it is already set as euid - * and ruid by set_identity() call just above, so it's better to + * and ruid by set_identity() just above, so it's better to * remind the user to fix uncoherent settings. */ if (global.uid) { @@ -516,7 +516,15 @@ static inline int hlua_timer_check(const struct hlua_timer *timer) /* Interrupts the Lua processing each "hlua_nb_instruction" instructions. * it is used for preventing infinite loops. + */ +static unsigned int hlua_nb_instruction = 0; + +/* Wrapper to retrieve the number of instructions between two interrupts + * depending on user settings and current hlua context. If not already + * explicitly set, we compute the ideal value using hard limits releaved + * by Thierry Fournier's work, whose original notes may be found below: * + * -- * I test the scheer with an infinite loop containing one incrementation * and one test. I run this loop between 10 seconds, I raise a ceil of * 710M loops from one interrupt each 9000 instructions, so I fix the value @@ -537,9 +545,42 @@ static inline int hlua_timer_check(const struct hlua_timer *timer) * 10000 | 710 * 100000 | 710 * 1000000 | 710 + * -- * + * Thanks to his work, we know we can safely use values between 500 and 10000 + * without a significant impact on performance. */ -static unsigned int hlua_nb_instruction = 10000; +static inline unsigned int hlua_get_nb_instruction(struct hlua *hlua) +{ + int ceil = 10000; /* above 10k, no significant performance gain */ + int floor = 500; /* below 500, significant performance loss */ + + if (hlua_nb_instruction) { + /* value enforced by user */ + return hlua_nb_instruction; + } + + /* not set, assign automatic value */ + if (hlua->state_id == 0) { + /* this function is expected to be called during runtime (after config + * parsing), thus global.nb_thread is expected to be set. + */ + BUG_ON(global.nbthread == 0); + + /* main lua stack (shared global lock), take number of threads into + * account in an attempt to reduce thread contention + */ + return MAX(floor, ceil / global.nbthread); + } + else { + /* per-thread lua stack, less contention is expected (no global lock), + * allow up to the maximum number of instructions and hope that the + * user manually yields after heavy (lock dependent) work from lua + * script (e.g.: map manipulation). + */ + return ceil; + } +} /* Descriptor for the memory allocation state. The limit is pre-initialised to * 0 until it is replaced by "tune.lua.maxmem" during the config parsing, or it @@ -1783,7 +1824,7 @@ void hlua_hook(lua_State *L, lua_Debug *ar) /* Try to interrupt the process at the end of the current * unyieldable function. */ - lua_sethook(hlua->T, hlua_hook, LUA_MASKRET|LUA_MASKCOUNT, hlua_nb_instruction); + lua_sethook(hlua->T, hlua_hook, LUA_MASKRET|LUA_MASKCOUNT, hlua_get_nb_instruction(hlua)); } /* This function start or resumes the Lua stack execution. If the flag @@ -1823,10 +1864,10 @@ static enum hlua_exec hlua_ctx_resume(struct hlua *lua, int yield_allowed) resume_execution: - /* This hook interrupts the Lua processing each 'hlua_nb_instruction' + /* This hook interrupts the Lua processing each 'hlua_get_nb_instruction() * instructions. it is used for preventing infinite loops. */ - lua_sethook(lua->T, hlua_hook, LUA_MASKCOUNT, hlua_nb_instruction); + lua_sethook(lua->T, hlua_hook, LUA_MASKCOUNT, hlua_get_nb_instruction(lua)); /* Remove all flags except the running flags. */ HLUA_SET_RUN(lua); @@ -2113,7 +2154,7 @@ static int hlua_set_map(lua_State *L) /* This function is an LUA binding. It provides a function * for retrieving a var from the proc scope in core. */ - static int hlua_core_get_var(lua_State *L) +__LJMP static int hlua_core_get_var(lua_State *L) { const char *name; size_t len; @@ -2135,7 +2176,6 @@ static int hlua_set_map(lua_State *L) } return MAY_LJMP(hlua_smp2lua(L, &smp)); - return 1; } /* This function disables the sending of email through the @@ -2415,7 +2455,7 @@ static void hlua_socket_handler(struct appctx *appctx) struct hlua_csk_ctx *ctx = appctx->svcctx; struct stconn *sc = appctx_sc(appctx); - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { + if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR)))) { co_skip(sc_oc(sc), co_data(sc_oc(sc))); notification_wake(&ctx->wake_on_read); notification_wake(&ctx->wake_on_write); @@ -3574,7 +3614,7 @@ __LJMP static struct channel *hlua_checkchannel(lua_State *L, int ud) /* Pushes the channel onto the top of the stack. If the stask does not have a * free slots, the function fails and returns 0; */ -static int hlua_channel_new(lua_State *L, struct channel *channel) +__LJMP static int hlua_channel_new(lua_State *L, struct channel *channel) { /* Check stack size. */ if (!lua_checkstack(L, 3)) @@ -4592,7 +4632,7 @@ __LJMP static struct hlua_smp *hlua_checkfetches(lua_State *L, int ud) /* This function creates and push in the stack a fetch object according * with a current TXN. */ -static int hlua_fetches_new(lua_State *L, struct hlua_txn *txn, unsigned int flags) +__LJMP static int hlua_fetches_new(lua_State *L, struct hlua_txn *txn, unsigned int flags) { struct hlua_smp *hsmp; @@ -4714,7 +4754,7 @@ __LJMP static struct hlua_smp *hlua_checkconverters(lua_State *L, int ud) /* This function creates and push in the stack a Converters object * according with a current TXN. */ -static int hlua_converters_new(lua_State *L, struct hlua_txn *txn, unsigned int flags) +__LJMP static int hlua_converters_new(lua_State *L, struct hlua_txn *txn, unsigned int flags) { struct hlua_smp *hsmp; @@ -6095,7 +6135,7 @@ __LJMP static struct hlua_txn *hlua_checkhttp(lua_State *L, int ud) /* This function creates and push in the stack a HTTP object * according with a current TXN. */ -static int hlua_http_new(lua_State *L, struct hlua_txn *txn) +__LJMP static int hlua_http_new(lua_State *L, struct hlua_txn *txn) { struct hlua_txn *htxn; @@ -8131,7 +8171,7 @@ __LJMP static int hlua_get_priv(lua_State *L) * return 0 if the stack does not contains free slots, * otherwise it returns 1. */ -static int hlua_txn_new(lua_State *L, struct stream *s, struct proxy *p, int dir, int flags) +__LJMP static int hlua_txn_new(lua_State *L, struct stream *s, struct proxy *p, int dir, int flags) { struct hlua_txn *htxn; @@ -8311,30 +8351,25 @@ __LJMP static int hlua_txn_log_alert(lua_State *L) return 0; } -__LJMP static int hlua_txn_set_loglevel(lua_State *L) +__LJMP static int hlua_txn_set_fc_mark(lua_State *L) { struct hlua_txn *htxn; - int ll; + int mark; - MAY_LJMP(check_args(L, 2, "set_loglevel")); + MAY_LJMP(check_args(L, 2, "set_fc_mark")); htxn = MAY_LJMP(hlua_checktxn(L, 1)); - ll = MAY_LJMP(luaL_checkinteger(L, 2)); - - if (ll < -1 || ll > NB_LOG_LEVELS) - WILL_LJMP(luaL_argerror(L, 2, "Bad log level. It must be one of the following value:" - " core.silent(-1), core.emerg(0), core.alert(1), core.crit(2), core.error(3)," - " core.warning(4), core.notice(5), core.info(6) or core.debug(7)")); + mark = MAY_LJMP(luaL_checkinteger(L, 2)); - htxn->s->logs.level = (ll == -1) ? ll : ll + 1; + conn_set_mark(objt_conn(htxn->s->sess->origin), mark); return 0; } -__LJMP static int hlua_txn_set_tos(lua_State *L) +__LJMP static int hlua_txn_set_fc_tos(lua_State *L) { struct hlua_txn *htxn; int tos; - MAY_LJMP(check_args(L, 2, "set_tos")); + MAY_LJMP(check_args(L, 2, "set_fc_tos")); htxn = MAY_LJMP(hlua_checktxn(L, 1)); tos = MAY_LJMP(luaL_checkinteger(L, 2)); @@ -8342,16 +8377,21 @@ __LJMP static int hlua_txn_set_tos(lua_State *L) return 0; } -__LJMP static int hlua_txn_set_mark(lua_State *L) +__LJMP static int hlua_txn_set_loglevel(lua_State *L) { struct hlua_txn *htxn; - int mark; + int ll; - MAY_LJMP(check_args(L, 2, "set_mark")); + MAY_LJMP(check_args(L, 2, "set_loglevel")); htxn = MAY_LJMP(hlua_checktxn(L, 1)); - mark = MAY_LJMP(luaL_checkinteger(L, 2)); + ll = MAY_LJMP(luaL_checkinteger(L, 2)); - conn_set_mark(objt_conn(htxn->s->sess->origin), mark); + if (ll < -1 || ll > NB_LOG_LEVELS) + WILL_LJMP(luaL_argerror(L, 2, "Bad log level. It must be one of the following value:" + " core.silent(-1), core.emerg(0), core.alert(1), core.crit(2), core.error(3)," + " core.warning(4), core.notice(5), core.info(6) or core.debug(7)")); + + htxn->s->logs.level = (ll == -1) ? ll : ll + 1; return 0; } @@ -8617,7 +8657,7 @@ __LJMP static int hlua_txn_done(lua_State *L) /* Pushes the TXN reply onto the top of the stack. If the stask does not have a * free slots, the function fails and returns 0; */ -static int hlua_txn_reply_new(lua_State *L) +__LJMP static int hlua_txn_reply_new(lua_State *L) { struct hlua_txn *htxn; const char *reason, *body = NULL; @@ -9575,7 +9615,7 @@ __LJMP static void hlua_event_hdl_cb_push_args(struct hlua_event_sub *hlua_sub, */ px = proxy_find_by_id(e_server->safe.proxy_uuid, PR_CAP_BE, 0); BUG_ON(!px); - server = findserver_unique_id(px, e_server->safe.puid, e_server->safe.rid); + server = server_find_by_id_unique(px, e_server->safe.puid, e_server->safe.rid); if (server) { lua_pushstring(hlua->T, "reference"); hlua_fcn_new_server(hlua->T, server); @@ -9658,14 +9698,14 @@ static struct task *hlua_event_runner(struct task *task, void *context, unsigned error = hlua_tostring_safe(hlua_sub->hlua->T, -1); else error = "critical error"; - ha_alert("Lua event_hdl: %s.\n", error); + SEND_ERR(NULL, "Lua event_hdl: %s.\n", error); hlua_unlock(hlua_sub->hlua); goto skip_event; } /* Check stack available size. */ if (!lua_checkstack(hlua_sub->hlua->T, 5)) { - ha_alert("Lua event_hdl: full stack.\n"); + SEND_ERR(NULL, "Lua event_hdl: full stack.\n"); RESET_SAFE_LJMP(hlua_sub->hlua); goto skip_event; } @@ -13939,9 +13979,11 @@ lua_State *hlua_init_state(int thread_num) hlua_class_function(L, "get_var", hlua_get_var); hlua_class_function(L, "done", hlua_txn_done); hlua_class_function(L, "reply", hlua_txn_reply_new); + hlua_class_function(L, "set_fc_mark", hlua_txn_set_fc_mark); + hlua_class_function(L, "set_fc_tos", hlua_txn_set_fc_tos); hlua_class_function(L, "set_loglevel", hlua_txn_set_loglevel); - hlua_class_function(L, "set_tos", hlua_txn_set_tos); - hlua_class_function(L, "set_mark", hlua_txn_set_mark); + hlua_class_function(L, "set_mark", hlua_txn_set_fc_mark); // DEPRECATED, use set_fc_mark + hlua_class_function(L, "set_tos", hlua_txn_set_fc_tos); // DEPRECATED, use set_fc_tos hlua_class_function(L, "set_priority_class", hlua_txn_set_priority_class); hlua_class_function(L, "set_priority_offset", hlua_txn_set_priority_offset); hlua_class_function(L, "deflog", hlua_txn_deflog); diff --git a/src/hlua_fcn.c b/src/hlua_fcn.c index d8dcdfd..0340ce1 100644 --- a/src/hlua_fcn.c +++ b/src/hlua_fcn.c @@ -53,7 +53,7 @@ static int class_stktable_ref; static int class_proxy_list_ref; static int class_server_list_ref; -#define STATS_LEN (MAX((int)ST_F_TOTAL_FIELDS, (int)INF_TOTAL_FIELDS)) +#define STATS_LEN (MAX((int)ST_I_PX_MAX, (int)ST_I_INF_MAX)) static THREAD_LOCAL struct field stats[STATS_LEN]; @@ -377,8 +377,8 @@ static int hlua_get_info(lua_State *L) stats_fill_info(stats, STATS_LEN, 0); lua_newtable(L); - for (i=0; i<INF_TOTAL_FIELDS; i++) { - lua_pushstring(L, info_fields[i].name); + for (i=0; i<ST_I_INF_MAX; i++) { + lua_pushstring(L, stat_cols_info[i].name); hlua_fcn_pushfield(L, &stats[i]); lua_settable(L, -3); } @@ -982,6 +982,7 @@ int hlua_stktable_dump(lua_State *L) int i; int skip_entry; void *ptr; + int shard = 0; // FIXME: this should be stored in the context and iterate to scan the table t = hlua_check_stktable(L, 1); type = lua_type(L, 2); @@ -1042,16 +1043,17 @@ int hlua_stktable_dump(lua_State *L) lua_newtable(L); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); - eb = ebmb_first(&t->keys); + next_shard: + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + eb = ebmb_first(&t->shards[shard].keys); for (n = eb; n; n = ebmb_next(n)) { ts = ebmb_entry(n, struct stksess, key); if (!ts) { - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); - return 1; + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + goto done; } HA_ATOMIC_INC(&ts->ref_cnt); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); /* multi condition/value filter */ skip_entry = 0; @@ -1090,7 +1092,7 @@ int hlua_stktable_dump(lua_State *L) } if (skip_entry) { - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); HA_ATOMIC_DEC(&ts->ref_cnt); continue; } @@ -1114,10 +1116,14 @@ int hlua_stktable_dump(lua_State *L) lua_newtable(L); hlua_stktable_entry(L, t, ts); lua_settable(L, -3); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); HA_ATOMIC_DEC(&ts->ref_cnt); } - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + done: + shard++; + if (shard < CONFIG_HAP_TBL_BUCKETS) + goto next_shard; return 1; } @@ -1152,12 +1158,12 @@ int hlua_listener_get_stats(lua_State *L) return 1; } - stats_fill_li_stats(li->bind_conf->frontend, li, STAT_SHLGNDS, stats, - STATS_LEN, NULL); + stats_fill_li_line(li->bind_conf->frontend, li, STAT_F_SHLGNDS, stats, + STATS_LEN, NULL); lua_newtable(L); - for (i=0; i<ST_F_TOTAL_FIELDS; i++) { - lua_pushstring(L, stat_fields[i].name); + for (i=0; i<ST_I_PX_MAX; i++) { + lua_pushstring(L, stat_cols_px[i].name); hlua_fcn_pushfield(L, &stats[i]); lua_settable(L, -3); } @@ -1198,12 +1204,12 @@ int hlua_server_get_stats(lua_State *L) return 1; } - stats_fill_sv_stats(srv->proxy, srv, STAT_SHLGNDS, stats, - STATS_LEN, NULL); + stats_fill_sv_line(srv->proxy, srv, STAT_F_SHLGNDS, stats, + STATS_LEN, NULL); lua_newtable(L); - for (i=0; i<ST_F_TOTAL_FIELDS; i++) { - lua_pushstring(L, stat_fields[i].name); + for (i=0; i<ST_I_PX_MAX; i++) { + lua_pushstring(L, stat_cols_px[i].name); hlua_fcn_pushfield(L, &stats[i]); lua_settable(L, -3); } @@ -1329,14 +1335,14 @@ static int hlua_server_index(struct lua_State *L) { const char *key = lua_tostring(L, 2); - if (!strcmp(key, "name")) { + if (strcmp(key, "name") == 0) { if (ONLY_ONCE()) ha_warning("hlua: use of server 'name' attribute is deprecated and will eventually be removed, please use get_name() function instead: %s\n", hlua_traceback(L, ", ")); lua_pushvalue(L, 1); hlua_server_get_name(L); return 1; } - if (!strcmp(key, "puid")) { + if (strcmp(key, "puid") == 0) { if (ONLY_ONCE()) ha_warning("hlua: use of server 'puid' attribute is deprecated and will eventually be removed, please use get_puid() function instead: %s\n", hlua_traceback(L, ", ")); lua_pushvalue(L, 1); @@ -1513,7 +1519,7 @@ int hlua_server_set_addr(lua_State *L) port = NULL; HA_SPIN_LOCK(SERVER_LOCK, &srv->lock); - err = srv_update_addr_port(srv, addr, port, "Lua script"); + err = srv_update_addr_port(srv, addr, port, SERVER_INETADDR_UPDATER_LUA); HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock); if (!err) lua_pushnil(L); @@ -1980,14 +1986,14 @@ static int hlua_proxy_index(struct lua_State *L) { const char *key = lua_tostring(L, 2); - if (!strcmp(key, "name")) { + if (strcmp(key, "name") == 0) { if (ONLY_ONCE()) ha_warning("hlua: use of proxy 'name' attribute is deprecated and will eventually be removed, please use get_name() function instead: %s\n", hlua_traceback(L, ", ")); lua_pushvalue(L, 1); hlua_proxy_get_name(L); return 1; } - if (!strcmp(key, "uuid")) { + if (strcmp(key, "uuid") == 0) { if (ONLY_ONCE()) ha_warning("hlua: use of proxy 'uuid' attribute is deprecated and will eventually be removed, please use get_uuid() function instead: %s\n", hlua_traceback(L, ", ")); lua_pushvalue(L, 1); @@ -2046,12 +2052,12 @@ int hlua_proxy_get_stats(lua_State *L) px = hlua_check_proxy(L, 1); if (px->cap & PR_CAP_BE) - stats_fill_be_stats(px, STAT_SHLGNDS, stats, STATS_LEN, NULL); + stats_fill_be_line(px, STAT_F_SHLGNDS, stats, STATS_LEN, NULL); else - stats_fill_fe_stats(px, stats, STATS_LEN, NULL); + stats_fill_fe_line(px, 0, stats, STATS_LEN, NULL); lua_newtable(L); - for (i=0; i<ST_F_TOTAL_FIELDS; i++) { - lua_pushstring(L, stat_fields[i].name); + for (i=0; i<ST_I_PX_MAX; i++) { + lua_pushstring(L, stat_cols_px[i].name); hlua_fcn_pushfield(L, &stats[i]); lua_settable(L, -3); } diff --git a/src/hq_interop.c b/src/hq_interop.c index 31c2101..c88f888 100644 --- a/src/hq_interop.c +++ b/src/hq_interop.c @@ -8,8 +8,10 @@ #include <haproxy/http.h> #include <haproxy/mux_quic.h> #include <haproxy/qmux_http.h> +#include <haproxy/qmux_trace.h> +#include <haproxy/trace.h> -static ssize_t hq_interop_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) +static ssize_t hq_interop_rcv_buf(struct qcs *qcs, struct buffer *b, int fin) { struct htx *htx; struct htx_sl *sl; @@ -25,7 +27,7 @@ static ssize_t hq_interop_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) if (!fin) return 0; - b_alloc(&htx_buf); + b_alloc(&htx_buf, DB_MUX_RX); htx = htx_from_buf(&htx_buf); /* skip method */ @@ -83,34 +85,21 @@ static ssize_t hq_interop_decode_qcs(struct qcs *qcs, struct buffer *b, int fin) return b_data(b); } -static struct buffer *mux_get_buf(struct qcs *qcs) -{ - if (!b_size(&qcs->tx.buf)) - b_alloc(&qcs->tx.buf); - - return &qcs->tx.buf; -} - static size_t hq_interop_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count) { enum htx_blk_type btype; - struct htx *htx; + struct htx *htx = NULL; struct htx_blk *blk; int32_t idx; uint32_t bsize, fsize; - struct buffer *res, outbuf; + struct buffer *res = NULL; size_t total = 0; - - res = mux_get_buf(qcs); - outbuf = b_make(b_tail(res), b_contig_space(res), 0, 0); + int err; htx = htx_from_buf(buf); - if (htx->extra && htx->extra == HTX_UNKOWN_PAYLOAD_LENGTH) - qcs->flags |= QC_SF_UNKNOWN_PL_LENGTH; - - while (count && !htx_is_empty(htx) && !(qcs->flags & QC_SF_BLK_MROOM)) { + while (count && !htx_is_empty(htx) && qcc_stream_can_send(qcs)) { /* Not implemented : QUIC on backend side */ idx = htx_get_head(htx); blk = htx_get_blk(htx, idx); @@ -121,18 +110,48 @@ static size_t hq_interop_snd_buf(struct qcs *qcs, struct buffer *buf, switch (btype) { case HTX_BLK_DATA: + res = qcc_get_stream_txbuf(qcs, &err); + if (!res) { + if (err) + ABORT_NOW(); + goto end; + } + + if (unlikely(fsize == count && + !b_data(res) && + htx_nbblks(htx) == 1 && btype == HTX_BLK_DATA)) { + void *old_area = res->area; + + TRACE_DATA("perform zero-copy DATA transfer", QMUX_EV_STRM_SEND, + qcs->qcc->conn, qcs); + + /* remap MUX buffer to HTX area */ + *res = b_make(buf->area, buf->size, + sizeof(struct htx) + blk->addr, fsize); + + /* assign old MUX area to HTX buffer. */ + buf->area = old_area; + buf->data = buf->head = 0; + total += fsize; + + /* reload HTX with empty buffer. */ + *htx = *htx_from_buf(buf); + goto end; + } + if (fsize > count) fsize = count; - if (b_room(&outbuf) < fsize) - fsize = b_room(&outbuf); + if (b_contig_space(res) < fsize) + fsize = b_contig_space(res); if (!fsize) { - qcs->flags |= QC_SF_BLK_MROOM; - goto end; + /* Release buf and restart parsing if sending still possible. */ + qcc_release_stream_txbuf(qcs); + continue; } - b_putblk(&outbuf, htx_get_blk_ptr(htx, blk), fsize); + b_putblk(res, htx_get_blk_ptr(htx, blk), fsize); total += fsize; count -= fsize; @@ -155,12 +174,56 @@ static size_t hq_interop_snd_buf(struct qcs *qcs, struct buffer *buf, } end: - b_add(res, b_data(&outbuf)); htx_to_buf(htx, buf); return total; } +static size_t hq_interop_nego_ff(struct qcs *qcs, size_t count) +{ + int err, ret = 0; + struct buffer *res; + + start: + res = qcc_get_stream_txbuf(qcs, &err); + if (!res) { + if (err) + ABORT_NOW(); + qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; + goto end; + } + + if (!b_room(res)) { + if (qcc_release_stream_txbuf(qcs)) { + qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; + goto end; + } + + goto start; + } + + /* No header required for HTTP/0.9, no need to reserve an offset. */ + qcs->sd->iobuf.buf = res; + qcs->sd->iobuf.offset = 0; + qcs->sd->iobuf.data = 0; + + ret = MIN(count, b_contig_space(res)); + end: + return ret; +} + +static size_t hq_interop_done_ff(struct qcs *qcs) +{ + const size_t ret = qcs->sd->iobuf.data; + + /* No header required for HTTP/0.9, simply mark ff as done. */ + qcs->sd->iobuf.buf = NULL; + qcs->sd->iobuf.offset = 0; + qcs->sd->iobuf.data = 0; + + return ret; +} + static int hq_interop_attach(struct qcs *qcs, void *conn_ctx) { qcs_wait_http_req(qcs); @@ -168,7 +231,9 @@ static int hq_interop_attach(struct qcs *qcs, void *conn_ctx) } const struct qcc_app_ops hq_interop_ops = { - .decode_qcs = hq_interop_decode_qcs, + .rcv_buf = hq_interop_rcv_buf, .snd_buf = hq_interop_snd_buf, + .nego_ff = hq_interop_nego_ff, + .done_ff = hq_interop_done_ff, .attach = hq_interop_attach, }; @@ -12,6 +12,7 @@ #include <ctype.h> #include <haproxy/api.h> +#include <haproxy/cfgparse.h> #include <haproxy/http.h> #include <haproxy/tools.h> @@ -344,6 +345,14 @@ const struct ist http_known_methods[HTTP_METH_OTHER] = { [HTTP_METH_CONNECT] = IST("CONNECT"), }; +/* 500 bits to indicate for each status code from 100 to 599 if it participates + * to the error or failure class. The last 12 bits are not assigned for now. + * Not initialized, has to be done at boot. This is manipulated using + * http_status_{add,del}_range(). + */ +long http_err_status_codes[512 / sizeof(long)] = { }; +long http_fail_status_codes[512 / sizeof(long)] = { }; + /* * returns a known method among HTTP_METH_* or HTTP_METH_OTHER for all unknown * ones. @@ -352,15 +361,15 @@ enum http_meth_t find_http_meth(const char *str, const int len) { const struct ist m = ist2(str, len); - if (isteq(m, ist("GET"))) return HTTP_METH_GET; - else if (isteq(m, ist("HEAD"))) return HTTP_METH_HEAD; - else if (isteq(m, ist("POST"))) return HTTP_METH_POST; - else if (isteq(m, ist("CONNECT"))) return HTTP_METH_CONNECT; - else if (isteq(m, ist("PUT"))) return HTTP_METH_PUT; - else if (isteq(m, ist("OPTIONS"))) return HTTP_METH_OPTIONS; - else if (isteq(m, ist("DELETE"))) return HTTP_METH_DELETE; - else if (isteq(m, ist("TRACE"))) return HTTP_METH_TRACE; - else return HTTP_METH_OTHER; + if (isteq(m, http_known_methods[HTTP_METH_GET])) return HTTP_METH_GET; + else if (isteq(m, http_known_methods[HTTP_METH_PUT])) return HTTP_METH_PUT; + else if (isteq(m, http_known_methods[HTTP_METH_HEAD])) return HTTP_METH_HEAD; + else if (isteq(m, http_known_methods[HTTP_METH_POST])) return HTTP_METH_POST; + else if (isteq(m, http_known_methods[HTTP_METH_TRACE])) return HTTP_METH_TRACE; + else if (isteq(m, http_known_methods[HTTP_METH_DELETE])) return HTTP_METH_DELETE; + else if (isteq(m, http_known_methods[HTTP_METH_CONNECT])) return HTTP_METH_CONNECT; + else if (isteq(m, http_known_methods[HTTP_METH_OPTIONS])) return HTTP_METH_OPTIONS; + else return HTTP_METH_OTHER; } /* This function returns HTTP_ERR_<num> (enum) matching http status code. @@ -368,28 +377,27 @@ enum http_meth_t find_http_meth(const char *str, const int len) */ int http_get_status_idx(unsigned int status) { - switch (status) { - case 200: return HTTP_ERR_200; - case 400: return HTTP_ERR_400; - case 401: return HTTP_ERR_401; - case 403: return HTTP_ERR_403; - case 404: return HTTP_ERR_404; - case 405: return HTTP_ERR_405; - case 407: return HTTP_ERR_407; - case 408: return HTTP_ERR_408; - case 410: return HTTP_ERR_410; - case 413: return HTTP_ERR_413; - case 421: return HTTP_ERR_421; - case 422: return HTTP_ERR_422; - case 425: return HTTP_ERR_425; - case 429: return HTTP_ERR_429; - case 500: return HTTP_ERR_500; - case 501: return HTTP_ERR_501; - case 502: return HTTP_ERR_502; - case 503: return HTTP_ERR_503; - case 504: return HTTP_ERR_504; - default: return HTTP_ERR_500; - } + /* This table was built using dev/phash and easily finds solutions up + * to 21 different entries and produces much better code with 32 + * (padded with err 500 below as it's the default, though only [19] is + * the real one). + */ + const uchar codes[32] = { + HTTP_ERR_408, HTTP_ERR_200, HTTP_ERR_504, HTTP_ERR_400, + HTTP_ERR_500, HTTP_ERR_500, HTTP_ERR_401, HTTP_ERR_410, + HTTP_ERR_500, HTTP_ERR_500, HTTP_ERR_500, HTTP_ERR_500, + HTTP_ERR_500, HTTP_ERR_429, HTTP_ERR_403, HTTP_ERR_500, + HTTP_ERR_421, HTTP_ERR_404, HTTP_ERR_413, HTTP_ERR_500, + HTTP_ERR_422, HTTP_ERR_405, HTTP_ERR_500, HTTP_ERR_501, + HTTP_ERR_500, HTTP_ERR_500, HTTP_ERR_500, HTTP_ERR_502, + HTTP_ERR_407, HTTP_ERR_500, HTTP_ERR_503, HTTP_ERR_425, + }; + uint hash = ((status * 118) >> 5) % 32; + uint ret = codes[hash]; + + if (http_err_codes[ret] == status) + return ret; + return HTTP_ERR_500; } /* This function returns a reason associated with the HTTP status. @@ -478,6 +486,40 @@ const char *http_get_reason(unsigned int status) } } +/* add status codes from low to high included to status codes array <array> + * which must be compatible with http_err_codes and http_fail_codes (i.e. 512 + * bits each). This is not thread save and is meant for being called during + * boot only. Only status codes 100-599 are permitted. + */ +void http_status_add_range(long *array, uint low, uint high) +{ + low -= 100; + high -= 100; + + BUG_ON(low > 499); + BUG_ON(high > 499); + + while (low <= high) + ha_bit_set(low++, array); +} + +/* remove status codes from low to high included to status codes array <array> + * which must be compatible with http_err_codes and http_fail_codes (i.e. 512 + * bits each). This is not thread save and is meant for being called during + * boot only. Only status codes 100-599 are permitted. + */ +void http_status_del_range(long *array, uint low, uint high) +{ + low -= 100; + high -= 100; + + BUG_ON(low > 499); + BUG_ON(high > 499); + + while (low <= high) + ha_bit_clr(low++, array); +} + /* Returns the ist string corresponding to port part (without ':') in the host * <host>, IST_NULL if no ':' is found or an empty IST if there is no digit. In * the last case, the result is the original ist trimmed to 0. So be sure to test @@ -1431,3 +1473,111 @@ struct ist http_trim_trailing_spht(struct ist value) return ret; } + +/* initialize the required structures and arrays */ +static void _http_init() +{ + /* preset the default status codes that count as errors and failures */ + http_status_add_range(http_err_status_codes, 400, 499); + http_status_add_range(http_fail_status_codes, 500, 599); + http_status_del_range(http_fail_status_codes, 501, 501); + http_status_del_range(http_fail_status_codes, 505, 505); +} +INITCALL0(STG_INIT, _http_init); + +/* + * registered keywords below + */ + +/* parses a global "http-err-codes" and "http-fail-codes" directive. */ +static int http_parse_http_err_fail_codes(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + const char *cmd = args[0]; + const char *p, *b, *e; + int op, low, high; + long *bitfield; + int ret = -1; + + if (strcmp(cmd, "http-err-codes") == 0) + bitfield = http_err_status_codes; + else if (strcmp(cmd, "http-fail-codes") == 0) + bitfield = http_fail_status_codes; + else + ABORT_NOW(); + + if (!*args[1]) { + memprintf(err, "Missing status codes range for '%s'.", cmd); + goto end; + } + + /* operation: <0 = remove, 0 = replace, >0 = add. The operation is only + * reset for each new arg so that we can do +200,300,400 without + * changing the operation. + */ + for (; *(p = *(++args)); ) { + switch (*p) { + case '+': op = 1; p++; break; + case '-': op = -1; p++; break; + default: op = 0; break; + } + + if (!*p) + goto inval; + + while (1) { + b = p; + e = p + strlen(p); + low = read_uint(&p, e); + if (b == e || p == b) + goto inval; + + high = low; + if (*p == '-') { + p++; + b = p; + high = read_uint(&p, e); + if (b == e || p == b || (*p && *p != ',')) + goto inval; + } + else if (*p && *p != ',') + goto inval; + + if (high < low || low < 100 || high > 599) { + memprintf(err, "Invalid status codes range '%s' in '%s'.\n" + " Codes must be between 100 and 599 and ranges in ascending order.", + *args, cmd); + goto end; + } + + if (!op) + memset(bitfield, 0, sizeof(http_err_status_codes)); + if (op >= 0) + http_status_add_range(bitfield, low, high); + if (op < 0) + http_status_del_range(bitfield, low, high); + + if (!*p) + break; + /* skip ',' */ + p++; + } + } + ret = 0; + end: + return ret; + inval: + memprintf(err, "Invalid status codes range '%s' in '%s' at position %lu. Ranges must be in the form [+-]{low[-{high}]}[,...].", + *args, cmd, (ulong)(p - *args)); + goto end; + +} + +static struct cfg_kw_list cfg_kws = {{ },{ + { CFG_GLOBAL, "http-err-codes", http_parse_http_err_fail_codes }, + { CFG_GLOBAL, "http-fail-codes", http_parse_http_err_fail_codes }, + { /* END */ } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); diff --git a/src/http_act.c b/src/http_act.c index 7d45780..3a902ab 100644 --- a/src/http_act.c +++ b/src/http_act.c @@ -46,17 +46,10 @@ */ static void release_http_action(struct act_rule *rule) { - struct logformat_node *lf, *lfb; - istfree(&rule->arg.http.str); if (rule->arg.http.re) regex_free(rule->arg.http.re); - list_for_each_entry_safe(lf, lfb, &rule->arg.http.fmt, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } + lf_expr_deinit(&rule->arg.http.fmt); } /* Release memory allocated by HTTP actions relying on an http reply. Concretly, @@ -179,7 +172,7 @@ static enum act_parse_ret parse_set_req_line(const char **args, int *orig_arg, s } rule->action_ptr = http_action_set_req_line; rule->release_ptr = release_http_action; - LIST_INIT(&rule->arg.http.fmt); + lf_expr_init(&rule->arg.http.fmt); if (!*args[cur_arg] || (*args[cur_arg + 1] && strcmp(args[cur_arg + 1], "if") != 0 && strcmp(args[cur_arg + 1], "unless") != 0)) { @@ -616,7 +609,7 @@ static enum act_parse_ret parse_replace_uri(const char **args, int *orig_arg, st rule->action_ptr = http_action_replace_uri; rule->release_ptr = release_http_action; - LIST_INIT(&rule->arg.http.fmt); + lf_expr_init(&rule->arg.http.fmt); if (!*args[cur_arg] || !*args[cur_arg+1] || (*args[cur_arg+2] && strcmp(args[cur_arg+2], "if") != 0 && strcmp(args[cur_arg+2], "unless") != 0)) { @@ -680,7 +673,7 @@ static enum act_parse_ret parse_http_set_status(const char **args, int *orig_arg rule->action = ACT_CUSTOM; rule->action_ptr = action_http_set_status; rule->release_ptr = release_http_action; - LIST_INIT(&rule->arg.http.fmt); + lf_expr_init(&rule->arg.http.fmt); /* Check if an argument is available */ if (!*args[*orig_arg]) { @@ -1317,7 +1310,7 @@ static enum act_parse_ret parse_http_auth(const char **args, int *orig_arg, stru rule->flags |= ACT_FLAG_FINAL; rule->action_ptr = http_action_auth; rule->release_ptr = release_http_action; - LIST_INIT(&rule->arg.http.fmt); + lf_expr_init(&rule->arg.http.fmt); cur_arg = *orig_arg; if (strcmp(args[cur_arg], "realm") == 0) { @@ -1497,7 +1490,7 @@ static enum act_parse_ret parse_http_set_header(const char **args, int *orig_arg rule->action_ptr = http_action_set_header; } rule->release_ptr = release_http_action; - LIST_INIT(&rule->arg.http.fmt); + lf_expr_init(&rule->arg.http.fmt); cur_arg = *orig_arg; if (!*args[cur_arg] || !*args[cur_arg+1]) { @@ -1529,10 +1522,6 @@ static enum act_parse_ret parse_http_set_header(const char **args, int *orig_arg return ACT_RET_PRS_ERR; } - free(px->conf.lfs_file); - px->conf.lfs_file = strdup(px->conf.args.file); - px->conf.lfs_line = px->conf.args.line; - /* some characters are totally forbidden in header names and * may happen by accident when writing configs, causing strange * failures in field. Better catch these ones early, nobody will @@ -1623,7 +1612,7 @@ static enum act_parse_ret parse_http_replace_header(const char **args, int *orig rule->action = 1; // replace-value rule->action_ptr = http_action_replace_header; rule->release_ptr = release_http_action; - LIST_INIT(&rule->arg.http.fmt); + lf_expr_init(&rule->arg.http.fmt); cur_arg = *orig_arg; if (!*args[cur_arg] || !*args[cur_arg+1] || !*args[cur_arg+2]) { @@ -1661,10 +1650,6 @@ static enum act_parse_ret parse_http_replace_header(const char **args, int *orig return ACT_RET_PRS_ERR; } - free(px->conf.lfs_file); - px->conf.lfs_file = strdup(px->conf.args.file); - px->conf.lfs_line = px->conf.args.line; - *orig_arg = cur_arg + 1; return ACT_RET_PRS_OK; } @@ -1726,7 +1711,7 @@ static enum act_parse_ret parse_http_del_header(const char **args, int *orig_arg rule->action = PAT_MATCH_STR; rule->action_ptr = http_action_del_header; rule->release_ptr = release_http_action; - LIST_INIT(&rule->arg.http.fmt); + lf_expr_init(&rule->arg.http.fmt); cur_arg = *orig_arg; if (!*args[cur_arg]) { @@ -1901,23 +1886,10 @@ static enum act_return http_action_set_map(struct act_rule *rule, struct proxy * /* Release memory allocated by an http map/acl action. */ static void release_http_map(struct act_rule *rule) { - struct logformat_node *lf, *lfb; - free(rule->arg.map.ref); - list_for_each_entry_safe(lf, lfb, &rule->arg.map.key, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } - if (rule->action == 1) { - list_for_each_entry_safe(lf, lfb, &rule->arg.map.value, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } - } + lf_expr_deinit(&rule->arg.map.key); + if (rule->action == 1) + lf_expr_deinit(&rule->arg.map.value); } /* Parse a "add-acl", "del-acl", "set-map" or "del-map" actions. It takes one or @@ -1979,7 +1951,7 @@ static enum act_parse_ret parse_http_set_map(const char **args, int *orig_arg, s } /* key pattern */ - LIST_INIT(&rule->arg.map.key); + lf_expr_init(&rule->arg.map.key); if (!parse_logformat_string(args[cur_arg], px, &rule->arg.map.key, LOG_OPT_HTTP, cap, err)) { free(rule->arg.map.ref); return ACT_RET_PRS_ERR; @@ -1988,17 +1960,13 @@ static enum act_parse_ret parse_http_set_map(const char **args, int *orig_arg, s if (rule->action == 1) { /* value pattern for set-map only */ cur_arg++; - LIST_INIT(&rule->arg.map.value); + lf_expr_init(&rule->arg.map.value); if (!parse_logformat_string(args[cur_arg], px, &rule->arg.map.value, LOG_OPT_HTTP, cap, err)) { free(rule->arg.map.ref); return ACT_RET_PRS_ERR; } } - free(px->conf.lfs_file); - px->conf.lfs_file = strdup(px->conf.args.file); - px->conf.lfs_line = px->conf.args.line; - *orig_arg = cur_arg + 1; return ACT_RET_PRS_OK; } @@ -2044,13 +2012,14 @@ static enum act_return http_action_track_sc(struct act_rule *rule, struct proxy * but here we're tracking after this ought to have been done so we have * to do it on purpose. */ - if (rule->from == ACT_F_HTTP_RES && (unsigned)(s->txn->status - 400) < 100) { + if (rule->from == ACT_F_HTTP_RES && + http_status_matches(http_err_status_codes, s->txn->status)) { ptr3 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_ERR_CNT); ptr4 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_ERR_RATE); } - if (rule->from == ACT_F_HTTP_RES && (unsigned)(s->txn->status - 500) < 100 && - s->txn->status != 501 && s->txn->status != 505) { + if (rule->from == ACT_F_HTTP_RES && + http_status_matches(http_fail_status_codes, s->txn->status)) { ptr5 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_FAIL_CNT); ptr6 = stktable_data_ptr(t, ts, STKTABLE_DT_HTTP_FAIL_RATE); } diff --git a/src/http_ana.c b/src/http_ana.c index 178f874..5196341 100644 --- a/src/http_ana.c +++ b/src/http_ana.c @@ -35,6 +35,7 @@ #include <haproxy/sc_strm.h> #include <haproxy/server-t.h> #include <haproxy/stats.h> +#include <haproxy/stats-html.h> #include <haproxy/stconn.h> #include <haproxy/stream.h> #include <haproxy/trace.h> @@ -328,7 +329,8 @@ int http_wait_for_request(struct stream *s, struct channel *req, int an_bit) return_int_err: txn->status = 500; - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors); if (sess->listener && sess->listener->counters) _HA_ATOMIC_INC(&sess->listener->counters->internal_errors); @@ -584,7 +586,8 @@ int http_process_req_common(struct stream *s, struct channel *req, int an_bit, s return_int_err: txn->status = 500; - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors); if (s->flags & SF_BE_ASSIGNED) _HA_ATOMIC_INC(&s->be->be_counters.internal_errors); @@ -657,7 +660,7 @@ int http_process_request(struct stream *s, struct channel *req, int an_bit) * A unique ID is generated even when it is not sent to ensure that the ID can make use of * fetches only available in the HTTP request processing stage. */ - if (!LIST_ISEMPTY(&sess->fe->format_unique_id)) { + if (!lf_expr_isempty(&sess->fe->format_unique_id)) { struct ist unique_id = stream_generate_unique_id(s, &sess->fe->format_unique_id); if (!isttest(unique_id)) { @@ -734,7 +737,8 @@ int http_process_request(struct stream *s, struct channel *req, int an_bit) return_int_err: txn->status = 500; - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors); if (s->flags & SF_BE_ASSIGNED) _HA_ATOMIC_INC(&s->be->be_counters.internal_errors); @@ -836,7 +840,8 @@ int http_wait_for_request_body(struct stream *s, struct channel *req, int an_bit return_int_err: txn->status = 500; - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors); if (s->flags & SF_BE_ASSIGNED) _HA_ATOMIC_INC(&s->be->be_counters.internal_errors); @@ -1084,7 +1089,8 @@ int http_request_forward_body(struct stream *s, struct channel *req, int an_bit) goto return_prx_cond; return_int_err: - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors); _HA_ATOMIC_INC(&s->be->be_counters.internal_errors); if (sess->listener && sess->listener->counters) @@ -1241,7 +1247,7 @@ int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit) return 0; } - if (txn->flags & TX_NOT_FIRST) + if (s->flags & SF_SRV_REUSED) goto abort_keep_alive; _HA_ATOMIC_INC(&s->be->be_counters.failed_resp); @@ -1335,7 +1341,7 @@ int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit) } } - if (txn->flags & TX_NOT_FIRST) + if (s->flags & SF_SRV_REUSED) goto abort_keep_alive; _HA_ATOMIC_INC(&s->be->be_counters.failed_resp); @@ -1360,7 +1366,7 @@ int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit) /* 5: write error to client (we don't send any message then) */ else if (sc_ep_test(s->scf, SE_FL_ERR_PENDING)) { - if (txn->flags & TX_NOT_FIRST) + if (s->flags & SF_SRV_REUSED) goto abort_keep_alive; _HA_ATOMIC_INC(&s->be->be_counters.failed_resp); @@ -1444,22 +1450,22 @@ int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit) if (sl->flags & HTX_SL_F_CONN_UPG) msg->flags |= HTTP_MSGF_CONN_UPG; - n = txn->status / 100; - if (n < 1 || n > 5) - n = 0; - /* when the client triggers a 4xx from the server, it's most often due * to a missing object or permission. These events should be tracked * because if they happen often, it may indicate a brute force or a * vulnerability scan. */ - if (n == 4) + if (http_status_matches(http_err_status_codes, txn->status)) stream_inc_http_err_ctr(s); - if (n == 5 && txn->status != 501 && txn->status != 505) + if (http_status_matches(http_fail_status_codes, txn->status)) stream_inc_http_fail_ctr(s); if (objt_server(s->target)) { + n = txn->status / 100; + if (n < 1 || n > 5) + n = 0; + _HA_ATOMIC_INC(&__objt_server(s->target)->counters.p.http.rsp[n]); _HA_ATOMIC_INC(&__objt_server(s->target)->counters.p.http.cum_req); } @@ -1557,11 +1563,17 @@ int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit) txn->flags |= TX_CON_WANT_TUN; } - /* check for NTML authentication headers in 401 (WWW-Authenticate) and - * 407 (Proxy-Authenticate) responses and set the connection to private + /* Check for NTML authentication headers in 401 (WWW-Authenticate) and + * 407 (Proxy-Authenticate) responses and set the connection to + * private. + * + * Note that this is not performed when using a true multiplexer unless + * connection is already attached to the session as nothing prevents it + * from being shared already by several sessions here. */ srv_conn = sc_conn(s->scb); - if (srv_conn) { + if (srv_conn && + (LIST_INLIST(&srv_conn->sess_el) || strcmp(srv_conn->mux->name, "H1") == 0)) { struct ist hdr; struct http_hdr_ctx ctx; @@ -1611,7 +1623,8 @@ int http_wait_for_response(struct stream *s, struct channel *rep, int an_bit) if (objt_server(s->target)) _HA_ATOMIC_INC(&__objt_server(s->target)->counters.internal_errors); txn->status = 500; - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; goto return_prx_cond; return_bad_res: @@ -1894,7 +1907,7 @@ int http_process_res_common(struct stream *s, struct channel *rep, int an_bit, s * bytes from the server, then this is the right moment. We have * to temporarily assign bytes_out to log what we currently have. */ - if (!LIST_ISEMPTY(&sess->fe->logformat) && !(s->logs.logwait & LW_BYTES)) { + if (!lf_expr_isempty(&sess->fe->logformat) && !(s->logs.logwait & LW_BYTES)) { s->logs.t_close = s->logs.t_data; /* to get a valid end date */ s->logs.bytes_out = htx->data; s->do_log(s); @@ -1930,7 +1943,8 @@ int http_process_res_common(struct stream *s, struct channel *rep, int an_bit, s return_int_err: txn->status = 500; - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; _HA_ATOMIC_INC(&sess->fe->fe_counters.internal_errors); _HA_ATOMIC_INC(&s->be->be_counters.internal_errors); if (sess->listener && sess->listener->counters) @@ -2198,7 +2212,8 @@ int http_response_forward_body(struct stream *s, struct channel *res, int an_bit _HA_ATOMIC_INC(&sess->listener->counters->internal_errors); if (objt_server(s->target)) _HA_ATOMIC_INC(&__objt_server(s->target)->counters.internal_errors); - s->flags |= SF_ERR_INTERNAL; + if (!(s->flags & SF_ERR_MASK)) + s->flags |= SF_ERR_INTERNAL; goto return_error; return_bad_res: @@ -2236,7 +2251,7 @@ int http_apply_redirect_rule(struct redirect_rule *rule, struct stream *s, struc struct buffer *chunk; struct ist status, reason, location; unsigned int flags; - int ret = 1, close = 0; /* Try to keep the connection alive byt default */ + int ret = 1; chunk = alloc_trash_chunk(); if (!chunk) { @@ -2409,9 +2424,6 @@ int http_apply_redirect_rule(struct redirect_rule *rule, struct stream *s, struc break; } - if (!(txn->req.flags & HTTP_MSGF_BODYLESS) && txn->req.msg_state != HTTP_MSG_DONE) - close = 1; - htx = htx_from_buf(&res->buf); /* Trim any possible response */ channel_htx_truncate(&s->res, htx); @@ -2422,9 +2434,6 @@ int http_apply_redirect_rule(struct redirect_rule *rule, struct stream *s, struc sl->info.res.status = rule->code; s->txn->status = rule->code; - if (close && !htx_add_header(htx, ist("Connection"), ist("close"))) - goto fail; - if (!htx_add_header(htx, ist("Content-length"), ist("0")) || !htx_add_header(htx, ist("Location"), location)) goto fail; @@ -3877,9 +3886,9 @@ static int http_handle_stats(struct stream *s, struct channel *req, struct proxy ctx->st_code = STAT_STATUS_INIT; ctx->http_px = px; ctx->flags |= uri_auth->flags; - ctx->flags |= STAT_FMT_HTML; /* assume HTML mode by default */ + ctx->flags |= STAT_F_FMT_HTML; /* assume HTML mode by default */ if ((msg->flags & HTTP_MSGF_VER_11) && (txn->meth != HTTP_METH_HEAD)) - ctx->flags |= STAT_CHUNKED; + ctx->flags |= STAT_F_CHUNKED; htx = htxbuf(&req->buf); sl = http_get_stline(htx); @@ -3888,14 +3897,14 @@ static int http_handle_stats(struct stream *s, struct channel *req, struct proxy for (h = lookup; h <= end - 3; h++) { if (memcmp(h, ";up", 3) == 0) { - ctx->flags |= STAT_HIDE_DOWN; + ctx->flags |= STAT_F_HIDE_DOWN; break; } } for (h = lookup; h <= end - 9; h++) { if (memcmp(h, ";no-maint", 9) == 0) { - ctx->flags |= STAT_HIDE_MAINT; + ctx->flags |= STAT_F_HIDE_MAINT; break; } } @@ -3903,7 +3912,7 @@ static int http_handle_stats(struct stream *s, struct channel *req, struct proxy if (uri_auth->refresh) { for (h = lookup; h <= end - 10; h++) { if (memcmp(h, ";norefresh", 10) == 0) { - ctx->flags |= STAT_NO_REFRESH; + ctx->flags |= STAT_F_NO_REFRESH; break; } } @@ -3911,31 +3920,31 @@ static int http_handle_stats(struct stream *s, struct channel *req, struct proxy for (h = lookup; h <= end - 4; h++) { if (memcmp(h, ";csv", 4) == 0) { - ctx->flags &= ~(STAT_FMT_MASK|STAT_JSON_SCHM); + ctx->flags &= ~(STAT_F_FMT_MASK|STAT_F_JSON_SCHM); break; } } for (h = lookup; h <= end - 6; h++) { if (memcmp(h, ";typed", 6) == 0) { - ctx->flags &= ~(STAT_FMT_MASK|STAT_JSON_SCHM); - ctx->flags |= STAT_FMT_TYPED; + ctx->flags &= ~(STAT_F_FMT_MASK|STAT_F_JSON_SCHM); + ctx->flags |= STAT_F_FMT_TYPED; break; } } for (h = lookup; h <= end - 5; h++) { if (memcmp(h, ";json", 5) == 0) { - ctx->flags &= ~(STAT_FMT_MASK|STAT_JSON_SCHM); - ctx->flags |= STAT_FMT_JSON; + ctx->flags &= ~(STAT_F_FMT_MASK|STAT_F_JSON_SCHM); + ctx->flags |= STAT_F_FMT_JSON; break; } } for (h = lookup; h <= end - 12; h++) { if (memcmp(h, ";json-schema", 12) == 0) { - ctx->flags &= ~STAT_FMT_MASK; - ctx->flags |= STAT_JSON_SCHM; + ctx->flags &= ~STAT_F_FMT_MASK; + ctx->flags |= STAT_F_JSON_SCHM; break; } } @@ -4004,7 +4013,7 @@ static int http_handle_stats(struct stream *s, struct channel *req, struct proxy if (ret) { /* no rule, or the rule matches */ - ctx->flags |= STAT_ADMIN; + ctx->flags |= STAT_F_ADMIN; break; } } @@ -4012,21 +4021,21 @@ static int http_handle_stats(struct stream *s, struct channel *req, struct proxy if (txn->meth == HTTP_METH_GET || txn->meth == HTTP_METH_HEAD) appctx->st0 = STAT_HTTP_HEAD; else if (txn->meth == HTTP_METH_POST) { - if (ctx->flags & STAT_ADMIN) { + if (ctx->flags & STAT_F_ADMIN) { appctx->st0 = STAT_HTTP_POST; if (msg->msg_state < HTTP_MSG_DATA) req->analysers |= AN_REQ_HTTP_BODY; } else { /* POST without admin level */ - ctx->flags &= ~STAT_CHUNKED; + ctx->flags &= ~STAT_F_CHUNKED; ctx->st_code = STAT_STATUS_DENY; appctx->st0 = STAT_HTTP_LAST; } } else { /* Unsupported method */ - ctx->flags &= ~STAT_CHUNKED; + ctx->flags &= ~STAT_F_CHUNKED; ctx->st_code = STAT_STATUS_IVAL; appctx->st0 = STAT_HTTP_LAST; } @@ -4191,7 +4200,6 @@ void http_perform_server_redirect(struct stream *s, struct stconn *sc) s->txn->status = 302; if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache")) || - !htx_add_header(htx, ist("Connection"), ist("close")) || !htx_add_header(htx, ist("Content-length"), ist("0")) || !htx_add_header(htx, ist("Location"), location)) goto fail; @@ -4473,7 +4481,8 @@ int http_forward_proxy_resp(struct stream *s, int final) size_t data; if (final) { - htx->flags |= HTX_FL_PROXY_RESP; + if (s->txn->server_status == -1) + s->txn->server_status = 0; if (!htx_is_empty(htx) && !http_eval_after_res_rules(s)) return 0; diff --git a/src/http_client.c b/src/http_client.c index d7e50c0..6deff05 100644 --- a/src/http_client.c +++ b/src/http_client.c @@ -190,7 +190,6 @@ err: static int hc_cli_io_handler(struct appctx *appctx) { struct hcli_svc_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); struct httpclient *hc = ctx->hc; struct http_hdr *hdrs, *hdr; @@ -217,10 +216,7 @@ static int hc_cli_io_handler(struct appctx *appctx) } if (ctx->flags & HC_F_RES_BODY) { - int ret; - - ret = httpclient_res_xfer(hc, sc_ib(sc)); - channel_add_input(sc_ic(sc), ret); /* forward what we put in the buffer channel */ + httpclient_res_xfer(hc, &appctx->outbuf); /* remove the flag if the buffer was emptied */ if (httpclient_data(hc)) @@ -281,11 +277,14 @@ int httpclient_req_gen(struct httpclient *hc, const struct ist url, enum http_me struct htx *htx; int err_code = 0; struct ist meth_ist, vsn; - unsigned int flags = HTX_SL_F_VER_11 | HTX_SL_F_NORMALIZED_URI | HTX_SL_F_HAS_SCHM; + unsigned int flags = HTX_SL_F_VER_11 | HTX_SL_F_HAS_SCHM | HTX_SL_F_HAS_AUTHORITY; int i; int foundhost = 0, foundaccept = 0, foundua = 0; - if (!b_alloc(&hc->req.buf)) + if (!(hc->flags & HC_F_HTTPPROXY)) + flags |= HTX_SL_F_NORMALIZED_URI; + + if (!b_alloc(&hc->req.buf, DB_CHANNEL)) goto error; if (meth >= HTTP_METH_OTHER) @@ -403,7 +402,7 @@ int httpclient_req_xfer(struct httpclient *hc, struct ist src, int end) int ret = 0; struct htx *htx; - if (!b_alloc(&hc->req.buf)) + if (!b_alloc(&hc->req.buf, DB_CHANNEL)) goto error; htx = htx_from_buf(&hc->req.buf); @@ -704,7 +703,7 @@ void httpclient_applet_io_handler(struct appctx *appctx) uint32_t sz; int ret; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { + if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR)))) { if (co_data(res)) { htx = htx_from_buf(&res->buf); co_htx_skip(res, htx, co_data(res)); @@ -918,7 +917,7 @@ void httpclient_applet_io_handler(struct appctx *appctx) if (htx_is_empty(htx)) goto out; - if (!b_alloc(&hc->res.buf)) + if (!b_alloc(&hc->res.buf, DB_MUX_TX)) goto out; if (b_full(&hc->res.buf)) @@ -1223,7 +1222,8 @@ struct proxy *httpclient_create_proxy(const char *id) px->timeout.connect = httpclient_timeout_connect; px->timeout.client = TICK_ETERNITY; /* The HTTP Client use the "option httplog" with the global loggers */ - px->conf.logformat_string = httpclient_log_format; + px->logformat.str = httpclient_log_format; + px->logformat.conf.file = strdup("httpclient"); px->http_needed = 1; /* clear HTTP server */ @@ -1343,9 +1343,9 @@ static int httpclient_precheck() httpclient_proxy = httpclient_create_proxy("<HTTPCLIENT>"); if (!httpclient_proxy) - return 1; + return ERR_RETRYABLE; - return 0; + return ERR_NONE; } /* Initialize the logs for every proxy dedicated to the httpclient */ @@ -1376,18 +1376,6 @@ static int httpclient_postcheck_proxy(struct proxy *curproxy) } LIST_APPEND(&curproxy->loggers, &node->list); } - if (curproxy->conf.logformat_string) { - curproxy->conf.args.ctx = ARGC_LOG; - if (!parse_logformat_string(curproxy->conf.logformat_string, curproxy, &curproxy->logformat, - LOG_OPT_MANDATORY|LOG_OPT_MERGE_SPACES, - SMP_VAL_FE_LOG_END, &errmsg)) { - memprintf(&errmsg, "failed to parse log-format : %s.", errmsg); - err_code |= ERR_ALERT | ERR_FATAL; - goto err; - } - curproxy->conf.args.file = NULL; - curproxy->conf.args.line = 0; - } #ifdef USE_OPENSSL /* initialize the SNI for the SSL servers */ @@ -1401,9 +1389,22 @@ static int httpclient_postcheck_proxy(struct proxy *curproxy) /* init the SNI expression */ /* always use the host header as SNI, without the port */ srv_ssl->sni_expr = strdup("req.hdr(host),field(1,:)"); - err_code |= server_parse_sni_expr(srv_ssl, curproxy, &errmsg); - if (err_code & ERR_CODE) { - memprintf(&errmsg, "failed to configure sni: %s.", errmsg); + srv_ssl->ssl_ctx.sni = _parse_srv_expr(srv_ssl->sni_expr, + &curproxy->conf.args, + NULL, 0, NULL); + if (!srv_ssl->ssl_ctx.sni) { + memprintf(&errmsg, "failed to configure sni."); + err_code |= ERR_ALERT | ERR_FATAL; + goto err; + } + + srv_ssl->pool_conn_name = strdup(srv_ssl->sni_expr); + srv_ssl->pool_conn_name_expr = _parse_srv_expr(srv_ssl->pool_conn_name, + &curproxy->conf.args, + NULL, 0, NULL); + if (!srv_ssl->pool_conn_name_expr) { + memprintf(&errmsg, "failed to configure pool-conn-name."); + err_code |= ERR_ALERT | ERR_FATAL; goto err; } } diff --git a/src/http_fetch.c b/src/http_fetch.c index 1f3e4a0..ad1e8c5 100644 --- a/src/http_fetch.c +++ b/src/http_fetch.c @@ -36,6 +36,7 @@ #include <haproxy/sample.h> #include <haproxy/sc_strm.h> #include <haproxy/stream.h> +#include <haproxy/log.h> #include <haproxy/tools.h> #include <haproxy/version.h> @@ -314,7 +315,7 @@ struct htx *smp_prefetch_htx(struct sample *smp, struct channel *chn, struct che else { if (txn->status == -1) txn->status = sl->info.res.status; - if (!(htx->flags & HTX_FL_PROXY_RESP) && txn->server_status == -1) + if (txn->server_status == -1) txn->server_status = sl->info.res.status; } if (sl->flags & HTX_SL_F_VER_11) @@ -477,7 +478,7 @@ static int smp_fetch_uniqueid(const struct arg *args, struct sample *smp, const { struct ist unique_id; - if (LIST_ISEMPTY(&smp->sess->fe->format_unique_id)) + if (lf_expr_isempty(&smp->sess->fe->format_unique_id)) return 0; if (!smp->strm) diff --git a/src/http_htx.c b/src/http_htx.c index 004d343..36356ed 100644 --- a/src/http_htx.c +++ b/src/http_htx.c @@ -1117,7 +1117,6 @@ error: void release_http_reply(struct http_reply *http_reply) { - struct logformat_node *lf, *lfb; struct http_reply_hdr *hdr, *hdrb; if (!http_reply) @@ -1126,12 +1125,7 @@ void release_http_reply(struct http_reply *http_reply) ha_free(&http_reply->ctype); list_for_each_entry_safe(hdr, hdrb, &http_reply->hdrs, list) { LIST_DELETE(&hdr->list); - list_for_each_entry_safe(lf, lfb, &hdr->value, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } + lf_expr_deinit(&hdr->value); istfree(&hdr->name); free(hdr); } @@ -1141,14 +1135,8 @@ void release_http_reply(struct http_reply *http_reply) } else if (http_reply->type == HTTP_REPLY_RAW) chunk_destroy(&http_reply->body.obj); - else if (http_reply->type == HTTP_REPLY_LOGFMT) { - list_for_each_entry_safe(lf, lfb, &http_reply->body.fmt, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } - } + else if (http_reply->type == HTTP_REPLY_LOGFMT) + lf_expr_deinit(&http_reply->body.fmt); free(http_reply); } @@ -1497,7 +1485,6 @@ int http_check_http_reply(struct http_reply *reply, struct proxy *px, char **err struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struct proxy *px, int default_status, char **errmsg) { - struct logformat_node *lf, *lfb; struct http_reply *reply = NULL; struct http_reply_hdr *hdr, *hdrb; struct stat stat; @@ -1682,7 +1669,7 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc fd = -1; obj[objlen] = '\0'; reply->type = HTTP_REPLY_LOGFMT; - LIST_INIT(&reply->body.fmt); + lf_expr_init(&reply->body.fmt); cur_arg++; } else if (strcmp(args[cur_arg], "lf-string") == 0) { @@ -1699,7 +1686,7 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc obj = strdup(args[cur_arg]); objlen = strlen(args[cur_arg]); reply->type = HTTP_REPLY_LOGFMT; - LIST_INIT(&reply->body.fmt); + lf_expr_init(&reply->body.fmt); cur_arg++; } else if (strcmp(args[cur_arg], "hdr") == 0) { @@ -1722,7 +1709,7 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc goto error; } LIST_APPEND(&reply->hdrs, &hdr->list); - LIST_INIT(&hdr->value); + lf_expr_init(&hdr->value); hdr->name = ist(strdup(args[cur_arg])); if (!isttest(hdr->name)) { memprintf(errmsg, "out of memory"); @@ -1731,9 +1718,6 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc if (!parse_logformat_string(args[cur_arg+1], px, &hdr->value, LOG_OPT_HTTP, cap, errmsg)) goto error; - free(px->conf.lfs_file); - px->conf.lfs_file = strdup(px->conf.args.file); - px->conf.lfs_line = px->conf.args.line; cur_arg += 2; } else @@ -1778,12 +1762,7 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc px->conf.args.file, px->conf.args.line); list_for_each_entry_safe(hdr, hdrb, &reply->hdrs, list) { LIST_DELETE(&hdr->list); - list_for_each_entry_safe(lf, lfb, &hdr->value, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } + lf_expr_deinit(&hdr->value); istfree(&hdr->name); free(hdr); } @@ -1811,7 +1790,7 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc } } else if (reply->type == HTTP_REPLY_LOGFMT) { /* log-format payload using 'lf-file' of 'lf-string' parameter */ - LIST_INIT(&reply->body.fmt); + lf_expr_init(&reply->body.fmt); if ((reply->status == 204 || reply->status == 304)) { memprintf(errmsg, "No body expected for %d responses", reply->status); goto error; @@ -1822,10 +1801,6 @@ struct http_reply *http_parse_http_reply(const char **args, int *orig_arg, struc } if (!parse_logformat_string(obj, px, &reply->body.fmt, LOG_OPT_HTTP, cap, errmsg)) goto error; - - free(px->conf.lfs_file); - px->conf.lfs_file = strdup(px->conf.args.file); - px->conf.lfs_line = px->conf.args.line; } free(obj); @@ -1853,8 +1828,9 @@ int http_scheme_based_normalize(struct htx *htx) { struct http_hdr_ctx ctx; struct htx_sl *sl; - struct ist uri, scheme, authority, host, port; + struct ist uri, scheme, authority, host, port, path; struct http_uri_parser parser; + int normalize = 0; sl = http_get_stline(htx); @@ -1871,14 +1847,21 @@ int http_scheme_based_normalize(struct htx *htx) /* Extract the port if present in authority */ authority = http_parse_authority(&parser, 1); + path = http_parse_path(&parser); port = http_get_host_port(authority); - if (!isttest(port)) { - /* if no port found, no normalization to proceed */ - return 0; + if (!isttest(port) || !http_is_default_port(scheme, port)) + host = authority; + else { + host = isttrim(authority, istlen(authority) - istlen(port) - 1); + normalize = 1; + } + + if (!isttest(path)) { + path = ist("/"); + normalize = 1; } - host = isttrim(authority, istlen(authority) - istlen(port) - 1); - if (http_is_default_port(scheme, port)) { + if (normalize) { /* reconstruct the uri with removal of the port */ struct buffer *temp = get_trash_chunk(); struct ist meth, vsn; @@ -1894,8 +1877,8 @@ int http_scheme_based_normalize(struct htx *htx) /* reconstruct uri without port */ chunk_memcat(temp, uri.ptr, authority.ptr - uri.ptr); chunk_istcat(temp, host); - chunk_memcat(temp, istend(authority), istend(uri) - istend(authority)); - uri = ist2(temp->area + meth.len + vsn.len, host.len + uri.len - authority.len); /* uri */ + chunk_istcat(temp, path); + uri = ist2(temp->area + meth.len + vsn.len, host.len + path.len + authority.ptr - uri.ptr); /* uri */ http_replace_stline(htx, meth, uri, vsn); diff --git a/src/http_rules.c b/src/http_rules.c index 192f0c7..6ceacdf 100644 --- a/src/http_rules.c +++ b/src/http_rules.c @@ -320,17 +320,10 @@ struct act_rule *parse_http_after_res_cond(const char **args, const char *file, /* completely free redirect rule */ void http_free_redirect_rule(struct redirect_rule *rdr) { - struct logformat_node *lf, *lfb; - free_acl_cond(rdr->cond); free(rdr->rdr_str); free(rdr->cookie_str); - list_for_each_entry_safe(lf, lfb, &rdr->rdr_fmt, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } + lf_expr_deinit(&rdr->rdr_fmt); free(rdr); } @@ -447,7 +440,7 @@ struct redirect_rule *http_parse_redirect_rule(const char *file, int linenum, st if (!rule) goto out_of_memory; rule->cond = cond; - LIST_INIT(&rule->rdr_fmt); + lf_expr_init(&rule->rdr_fmt); if (!use_fmt) { /* old-style static redirect rule */ @@ -473,9 +466,6 @@ struct redirect_rule *http_parse_redirect_rule(const char *file, int linenum, st if (!parse_logformat_string(destination, curproxy, &rule->rdr_fmt, LOG_OPT_HTTP, cap, errmsg)) { goto err; } - free(curproxy->conf.lfs_file); - curproxy->conf.lfs_file = strdup(curproxy->conf.args.file); - curproxy->conf.lfs_line = curproxy->conf.args.line; } } diff --git a/src/lb_chash.c b/src/lb_chash.c index 4e8fb15..b3e472e 100644 --- a/src/lb_chash.c +++ b/src/lb_chash.c @@ -21,8 +21,9 @@ #include <haproxy/backend.h> #include <haproxy/errors.h> #include <haproxy/queue.h> -#include <haproxy/server-t.h> +#include <haproxy/server.h> #include <haproxy/tools.h> +#include <haproxy/xxhash.h> /* Return next tree node after <node> which must still be in the tree, or be * NULL. Lookup wraps around the end to the beginning. If the next node is the @@ -58,6 +59,77 @@ static inline void chash_dequeue_srv(struct server *s) } } +/* Compute a key that can be used to insert a node into the CHASH tree. Servers + * have a base key, which can be computed in several ways (see + * chash_compute_server_key) and this function uses that seed to generate hash + * keys for however many nodes need to be inserted into the tree. + */ +static inline u32 chash_compute_node_key(struct server *s, unsigned node_index) +{ + return full_hash(s->lb_server_key + node_index); +} + +/* Compute the base server key that will be used to compute node keys. Servers + * may be configured to determine their hashes either from their ID, address, or + * address+port; the latter options allow independent HAProxy instances to agree + * on routing decisions, regardless of their order in the server list (which may + * be arbitrary, since it could depend on factors such as the order of entries + * in a DNS SRV record). If an address is not known or if the server is + * configured with `hash-key id` (the default) then the key will be determined + * from the server's puid. + */ +static inline u32 chash_compute_server_key(struct server *s) +{ + enum srv_hash_key hash_key = s->hash_key; + struct server_inetaddr srv_addr; + u32 key; + + /* If hash-key is addr or addr-port then we need the address, but if we + * can't determine the address then we fall back on hashing the puid. + */ + switch (hash_key) { + case SRV_HASH_KEY_ADDR: + case SRV_HASH_KEY_ADDR_PORT: + server_get_inetaddr(s, &srv_addr); + if (srv_addr.family != AF_INET && srv_addr.family != AF_INET6) { + hash_key = SRV_HASH_KEY_ID; + } + break; + + default: + break; + } + + if (hash_key == SRV_HASH_KEY_ADDR_PORT) { + key = full_hash(srv_addr.port.svc); + } else { + key = 0; + } + + switch (hash_key) { + case SRV_HASH_KEY_ADDR_PORT: + case SRV_HASH_KEY_ADDR: + switch (srv_addr.family) { + case AF_INET: + key = full_hash(key + srv_addr.addr.v4.s_addr); + break; + case AF_INET6: + key = XXH32(srv_addr.addr.v6.s6_addr, 16, key); + break; + default: + break; + } + break; + + case SRV_HASH_KEY_ID: + default: + key = full_hash(s->puid); + break; + } + + return key; +} + /* Adjust the number of entries of a server in its tree. The server must appear * as many times as its weight indicates it. If it's there too often, we remove * the last occurrences. If it's not there enough, we add more occurrences. To @@ -67,6 +139,15 @@ static inline void chash_dequeue_srv(struct server *s) */ static inline void chash_queue_dequeue_srv(struct server *s) { + u32 server_key = chash_compute_server_key(s); + + /* If the server key changed then we must rehash all the nodes. */ + if (server_key != s->lb_server_key) { + chash_dequeue_srv(s); + s->lb_nodes_tot = 0; + s->lb_server_key = server_key; + } + while (s->lb_nodes_now > s->next_eweight) { if (s->lb_nodes_now >= s->lb_nodes_tot) // should always be false anyway s->lb_nodes_now = s->lb_nodes_tot; @@ -95,7 +176,7 @@ static inline void chash_queue_dequeue_srv(struct server *s) (s->next_eweight - s->lb_nodes_tot) * sizeof(*s->lb_nodes)); for (j = s->lb_nodes_tot; j < s->next_eweight; j++) { s->lb_nodes[j].server = s; - s->lb_nodes[j].node.key = full_hash(s->puid * SRV_EWGHT_RANGE + j); + s->lb_nodes[j].node.key = chash_compute_node_key(s, j); } s->lb_nodes_tot = s->next_eweight; } @@ -238,9 +319,6 @@ static void chash_update_server_weight(struct server *srv) int old_state, new_state; struct proxy *p = srv->proxy; - if (!srv_lb_status_changed(srv)) - return; - /* If changing the server's weight changes its state, we simply apply * the procedures we already have for status change. If the state * remains down, the server is not in any tree, so it's as easy as @@ -505,9 +583,10 @@ int chash_init_server_tree(struct proxy *p) ha_alert("failed to allocate lb_nodes for server %s.\n", srv->id); return -1; } + srv->lb_server_key = chash_compute_server_key(srv); for (node = 0; node < srv->lb_nodes_tot; node++) { srv->lb_nodes[node].server = srv; - srv->lb_nodes[node].node.key = full_hash(srv->puid * SRV_EWGHT_RANGE + node); + srv->lb_nodes[node].node.key = chash_compute_node_key(srv, node); } if (srv_currently_usable(srv)) diff --git a/src/lb_ss.c b/src/lb_ss.c new file mode 100644 index 0000000..4af031b --- /dev/null +++ b/src/lb_ss.c @@ -0,0 +1,183 @@ +/* + * sticky load-balancing + * + * Copyright 2024 HAProxy Technologies + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <haproxy/api.h> +#include <haproxy/backend.h> +#include <haproxy/lb_ss.h> +#include <haproxy/server-t.h> + +/* this function updates the stick server according to server <srv>'s new state. + * + * The server's lock must be held. The lbprm's lock will be used. + */ +static void ss_set_server_status_down(struct server *srv) +{ + struct proxy *p = srv->proxy; + + if (!srv_lb_status_changed(srv)) + return; + + if (srv_willbe_usable(srv)) + goto out_update_state; + + HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock); + + if (!srv_currently_usable(srv)) + /* server was already down */ + goto out_update_backend; + + if (srv->flags & SRV_F_BACKUP) { + p->lbprm.tot_wbck -= srv->cur_eweight; + p->srv_bck--; + } else { + p->lbprm.tot_wact -= srv->cur_eweight; + p->srv_act--; + } + if (srv == p->lbprm.ss.srv) { + /* sticked server is down, elect a new server + * that we will be sticking on. + */ + recalc_server_ss(p); + } + + out_update_backend: + /* check/update tot_used, tot_weight */ + update_backend_weight(p); + HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock); + + out_update_state: + srv_lb_commit_status(srv); +} + +/* This function updates the stick server according to server <srv>'s new state. + * + * The server's lock must be held. The lbprm's lock will be used. + */ +static void ss_set_server_status_up(struct server *srv) +{ + struct proxy *p = srv->proxy; + + if (!srv_lb_status_changed(srv)) + return; + + if (!srv_willbe_usable(srv)) + goto out_update_state; + + HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock); + + if (srv_currently_usable(srv)) + /* server was already up */ + goto out_update_backend; + + if (srv->flags & SRV_F_BACKUP) { + p->lbprm.tot_wbck += srv->next_eweight; + p->srv_bck++; + } else { + p->lbprm.tot_wact += srv->next_eweight; + p->srv_act++; + } + if (!p->lbprm.ss.srv || + ((p->lbprm.ss.srv->flags & SRV_F_BACKUP) && !(srv->flags & SRV_F_BACKUP))) { + /* we didn't have a server or were sticking on a backup server, + * but now we have an active server, let's switch to it + */ + p->lbprm.ss.srv = srv; + } + + out_update_backend: + /* check/update tot_used, tot_weight */ + update_backend_weight(p); + HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock); + + out_update_state: + srv_lb_commit_status(srv); +} + +/* This function elects a new stick server for proxy px. + * + * The lbprm's lock must be held. + */ +void recalc_server_ss(struct proxy *px) +{ + struct server *cur, *first; + int flag; + + if (!px->lbprm.tot_used) + return; /* no server */ + + /* here we *know* that we have some servers */ + if (px->srv_act) + flag = 0; + else + flag = SRV_F_BACKUP; + + first = NULL; + + for (cur = px->srv; cur; cur = cur->next) { + if ((cur->flags & SRV_F_BACKUP) == flag && + srv_willbe_usable(cur)) { + first = cur; + break; + } + } + px->lbprm.ss.srv = first; +} + +/* This function is responsible for preparing sticky LB algorithm. + * It should be called only once per proxy, at config time. + */ +void init_server_ss(struct proxy *p) +{ + struct server *srv; + + p->lbprm.set_server_status_up = ss_set_server_status_up; + p->lbprm.set_server_status_down = ss_set_server_status_down; + p->lbprm.update_server_eweight = NULL; + + if (!p->srv) + return; + + for (srv = p->srv; srv; srv = srv->next) { + srv->next_eweight = 1; /* ignore weights, all servers have the same weight */ + srv_lb_commit_status(srv); + } + + /* recounts servers and their weights */ + recount_servers(p); + update_backend_weight(p); + recalc_server_ss(p); +} + +/* + * This function returns the server that we're sticking on. If any server + * is found, it will be returned. If no valid server is found, NULL is + * returned. + * + * The lbprm's lock will be used. + */ +struct server *ss_get_server(struct proxy *px) +{ + struct server *srv = NULL; + + HA_RWLOCK_RDLOCK(LBPRM_LOCK, &px->lbprm.lock); + srv = px->lbprm.ss.srv; + HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &px->lbprm.lock); + return srv; +} diff --git a/src/linuxcap.c b/src/linuxcap.c index 4a2a3ab..63a510f 100644 --- a/src/linuxcap.c +++ b/src/linuxcap.c @@ -40,11 +40,20 @@ static const struct { #ifdef CAP_NET_BIND_SERVICE { CAP_NET_BIND_SERVICE, "cap_net_bind_service" }, #endif +#ifdef CAP_SYS_ADMIN + { CAP_SYS_ADMIN, "cap_sys_admin" }, +#endif /* must be last */ { 0, 0 } }; /* provided by sys/capability.h on some distros */ +static inline int capget(cap_user_header_t hdrp, const cap_user_data_t datap) +{ + return syscall(SYS_capget, hdrp, datap); +} + +/* provided by sys/capability.h on some distros */ static inline int capset(cap_user_header_t hdrp, const cap_user_data_t datap) { return syscall(SYS_capset, hdrp, datap); @@ -53,6 +62,86 @@ static inline int capset(cap_user_header_t hdrp, const cap_user_data_t datap) /* defaults to zero, i.e. we don't keep any cap after setuid() */ static uint32_t caplist; +/* try to check if CAP_NET_ADMIN, CAP_NET_RAW or CAP_SYS_ADMIN are in the + * process Effective set in the case when euid is non-root. If there is a + * match, LSTCHK_NETADM or LSTCHK_SYSADM is unset respectively from + * global.last_checks to avoid warning due to global.last_checks verifications + * later at the process init stage. + * If there is no any supported by haproxy capability in the process Effective + * set, try to check the process Permitted set. In this case we promote from + * Permitted set to Effective only the capabilities, that were marked by user + * via 'capset' keyword in the global section (caplist). If there is match with + * caplist and CAP_NET_ADMIN/CAP_NET_RAW or CAP_SYS_ADMIN are in this list, + * LSTCHK_NETADM or/and LSTCHK_SYSADM will be unset by the same reason. + * We do this only if the current euid is non-root and there is no global.uid. + * Otherwise, the process will continue either to run under root, or it will do + * a transition to unprivileged user later in prepare_caps_for_setuid(), + * which specially manages its capabilities in that case. + * Always returns 0. Diagnostic warnings will be emitted only, if + * LSTCHK_NETADM/LSTCHK_SYSADM is presented in global.last_checks and some + * failures are encountered. + */ +int prepare_caps_from_permitted_set(int from_uid, int to_uid, const char *program_name) +{ + struct __user_cap_data_struct start_cap_data = { }; + struct __user_cap_header_struct cap_hdr = { + .pid = 0, /* current process */ + .version = _LINUX_CAPABILITY_VERSION_1, + }; + + /* started as root */ + if (!from_uid) + return 0; + + /* will change ruid and euid later in set_identity() */ + if (to_uid) + return 0; + + /* first, let's check if CAP_NET_ADMIN or CAP_NET_RAW is already in + * the process effective set. This may happen, when administrator sets + * these capabilities and the file effective bit on haproxy binary via + * setcap, see capabilities man page for details. + */ + if (capget(&cap_hdr, &start_cap_data) == -1) { + if (global.last_checks & (LSTCHK_NETADM | LSTCHK_SYSADM)) + ha_diag_warning("Failed to get process capabilities using capget(): %s. " + "Can't use capabilities that might be set on %s binary " + "by administrator.\n", strerror(errno), program_name); + return 0; + } + + if (start_cap_data.effective & ((1 << CAP_NET_ADMIN)|(1 << CAP_NET_RAW))) { + global.last_checks &= ~LSTCHK_NETADM; + return 0; + } + + if (start_cap_data.effective & ((1 << CAP_SYS_ADMIN))) { + global.last_checks &= ~LSTCHK_SYSADM; + return 0; + } + + /* second, try to check process permitted set, in this case caplist is + * necessary. Allows to put cap_net_bind_service in process effective + * set, if it is in the caplist and also presented in the binary + * permitted set. + */ + if (caplist && start_cap_data.permitted & caplist) { + start_cap_data.effective |= start_cap_data.permitted & caplist; + if (capset(&cap_hdr, &start_cap_data) == 0) { + if (caplist & ((1 << CAP_NET_ADMIN)|(1 << CAP_NET_RAW))) + global.last_checks &= ~LSTCHK_NETADM; + if (caplist & (1 << CAP_SYS_ADMIN)) + global.last_checks &= ~LSTCHK_SYSADM; + } else if (global.last_checks & (LSTCHK_NETADM|LSTCHK_SYSADM)) { + ha_diag_warning("Failed to put capabilities from caplist in %s " + "process Effective capabilities set using capset(): %s\n", + program_name, strerror(errno)); + } + } + + return 0; +} + /* try to apply capabilities before switching UID from <from_uid> to <to_uid>. * In practice we need to do this in 4 steps: * - set PR_SET_KEEPCAPS to preserve caps across the final setuid() @@ -61,7 +150,8 @@ static uint32_t caplist; * - set the effective and permitted caps again * - then the caller can safely call setuid() * On success LSTCHK_NETADM is unset from global.last_checks, if CAP_NET_ADMIN - * or CAP_NET_RAW was found in the caplist from config. + * or CAP_NET_RAW was found in the caplist from config. Same for + * LSTCHK_SYSADM, if CAP_SYS_ADMIN was found in the caplist from config. * We don't do this if the current euid is not zero or if the target uid * is zero. Returns 0 on success, negative on failure. Alerts may be emitted. */ @@ -107,6 +197,9 @@ int prepare_caps_for_setuid(int from_uid, int to_uid) if (caplist & ((1 << CAP_NET_ADMIN)|(1 << CAP_NET_RAW))) global.last_checks &= ~LSTCHK_NETADM; + if (caplist & (1 << CAP_SYS_ADMIN)) + global.last_checks &= ~LSTCHK_SYSADM; + /* all's good */ return 0; } diff --git a/src/listener.c b/src/listener.c index 75e164a..a348558 100644 --- a/src/listener.c +++ b/src/listener.c @@ -27,6 +27,7 @@ #include <haproxy/freq_ctr.h> #include <haproxy/frontend.h> #include <haproxy/global.h> +#include <haproxy/guid.h> #include <haproxy/list.h> #include <haproxy/listener.h> #include <haproxy/log.h> @@ -443,9 +444,9 @@ int default_resume_listener(struct listener *l) err = l->rx.proto->fam->bind(&l->rx, &errmsg); if (err != ERR_NONE) { if (err & ERR_WARN) - ha_warning("Resuming listener: %s\n", errmsg); + ha_warning("Resuming listener: protocol %s: %s.\n", l->rx.proto->name, errmsg); else if (err & ERR_ALERT) - ha_alert("Resuming listener: %s\n", errmsg); + ha_alert("Resuming listener: protocol %s: %s.\n", l->rx.proto->name, errmsg); ha_free(&errmsg); if (err & (ERR_FATAL | ERR_ABORT)) { ret = 0; @@ -460,9 +461,9 @@ int default_resume_listener(struct listener *l) BUG_ON(!l->rx.proto->listen); err = l->rx.proto->listen(l, msg, sizeof(msg)); if (err & ERR_ALERT) - ha_alert("Resuming listener: %s\n", msg); + ha_alert("Resuming listener: protocol %s: %s.\n", l->rx.proto->name, msg); else if (err & ERR_WARN) - ha_warning("Resuming listener: %s\n", msg); + ha_warning("Resuming listener: protocol %s: %s.\n", l->rx.proto->name, msg); if (err & (ERR_FATAL | ERR_ABORT)) { ret = 0; @@ -816,6 +817,8 @@ int create_listeners(struct bind_conf *bc, const struct sockaddr_storage *ss, if (fd != -1) l->rx.flags |= RX_F_INHERITED; + guid_init(&l->guid); + l->extra_counters = NULL; HA_RWLOCK_INIT(&l->lock); @@ -913,6 +916,7 @@ struct listener *clone_listener(struct listener *src) goto oom1; memcpy(l, src, sizeof(*l)); + l->luid = 0; // don't dup the listener's ID! if (l->name) { l->name = strdup(l->name); if (!l->name) @@ -1066,11 +1070,11 @@ void listener_accept(struct listener *l) } #endif if (p && p->fe_sps_lim) { - int max = freq_ctr_remain(&p->fe_sess_per_sec, p->fe_sps_lim, 0); + int max = freq_ctr_remain(&p->fe_counters.sess_per_sec, p->fe_sps_lim, 0); if (unlikely(!max)) { /* frontend accept rate limit was reached */ - expire = tick_add(now_ms, next_event_delay(&p->fe_sess_per_sec, p->fe_sps_lim, 0)); + expire = tick_add(now_ms, next_event_delay(&p->fe_counters.sess_per_sec, p->fe_sps_lim, 0)); goto limit_proxy; } @@ -1541,7 +1545,7 @@ void listener_accept(struct listener *l) dequeue_all_listeners(); if (p && !MT_LIST_ISEMPTY(&p->listener_queue) && - (!p->fe_sps_lim || freq_ctr_remain(&p->fe_sess_per_sec, p->fe_sps_lim, 0) > 0)) + (!p->fe_sps_lim || freq_ctr_remain(&p->fe_counters.sess_per_sec, p->fe_sps_lim, 0) > 0)) dequeue_proxy_listeners(p); } return; @@ -1600,14 +1604,14 @@ void listener_release(struct listener *l) dequeue_all_listeners(); if (fe && !MT_LIST_ISEMPTY(&fe->listener_queue) && - (!fe->fe_sps_lim || freq_ctr_remain(&fe->fe_sess_per_sec, fe->fe_sps_lim, 0) > 0)) + (!fe->fe_sps_lim || freq_ctr_remain(&fe->fe_counters.sess_per_sec, fe->fe_sps_lim, 0) > 0)) dequeue_proxy_listeners(fe); else { unsigned int wait; int expire = TICK_ETERNITY; if (fe->task && fe->fe_sps_lim && - (wait = next_event_delay(&fe->fe_sess_per_sec,fe->fe_sps_lim, 0))) { + (wait = next_event_delay(&fe->fe_counters.sess_per_sec,fe->fe_sps_lim, 0))) { /* we're blocking because a limit was reached on the number of * requests/s on the frontend. We want to re-check ASAP, which * means in 1 ms before estimated expiration date, because the @@ -1713,8 +1717,8 @@ int bind_complete_thread_setup(struct bind_conf *bind_conf, int *err_code) else { if (fe != global.cli_fe) ha_diag_warning("[%s:%d]: Disabling per-thread sharding for listener in" - " %s '%s' because SO_REUSEPORT is disabled\n", - bind_conf->file, bind_conf->line, proxy_type_str(fe), fe->id); + " %s '%s' because SO_REUSEPORT is disabled for %s protocol.\n", + bind_conf->file, bind_conf->line, proxy_type_str(fe), fe->id, li->rx.proto->name); shards = 1; } } @@ -1727,8 +1731,8 @@ int bind_complete_thread_setup(struct bind_conf *bind_conf, int *err_code) /* We also need to check if an explicit shards count was set and cannot be honored */ if (shards > 1 && !protocol_supports_flag(li->rx.proto, PROTO_F_REUSEPORT_SUPPORTED)) { - ha_warning("[%s:%d]: Disabling sharding for listener in %s '%s' because SO_REUSEPORT is disabled\n", - bind_conf->file, bind_conf->line, proxy_type_str(fe), fe->id); + ha_warning("[%s:%d]: Disabling sharding for listener in %s '%s' because SO_REUSEPORT is disabled for %s protocol.\n", + bind_conf->file, bind_conf->line, proxy_type_str(fe), fe->id, li->rx.proto->name); shards = 1; } @@ -1807,6 +1811,12 @@ int bind_complete_thread_setup(struct bind_conf *bind_conf, int *err_code) *err_code |= ERR_FATAL | ERR_ALERT; return cfgerr; } + /* assign the ID to the first one only */ + new_li->luid = new_li->conf.id.key = tmp_li->luid; + tmp_li->luid = 0; + eb32_delete(&tmp_li->conf.id); + if (tmp_li->luid) + eb32_insert(&fe->conf.used_listener_id, &new_li->conf.id); new_li = tmp_li; } } @@ -1825,6 +1835,12 @@ int bind_complete_thread_setup(struct bind_conf *bind_conf, int *err_code) *err_code |= ERR_FATAL | ERR_ALERT; return cfgerr; } + /* assign the ID to the first one only */ + new_li->luid = new_li->conf.id.key = li->luid; + li->luid = 0; + eb32_delete(&li->conf.id); + if (li->luid) + eb32_insert(&fe->conf.used_listener_id, &new_li->conf.id); } } @@ -1832,6 +1848,43 @@ int bind_complete_thread_setup(struct bind_conf *bind_conf, int *err_code) return cfgerr; } +/* Generate and insert unique GUID for each listeners of <bind_conf> instance + * if GUID prefix is defined. + * + * Returns 0 on success else non-zero. + */ +int bind_generate_guid(struct bind_conf *bind_conf) +{ + struct listener *l; + char *guid_err = NULL; + + if (!bind_conf->guid_prefix) + return 0; + + list_for_each_entry(l, &bind_conf->listeners, by_bind) { + if (bind_conf->guid_idx == (size_t)-1) { + ha_alert("[%s:%d] : error on GUID generation : Too many listeners.\n", + bind_conf->file, bind_conf->line); + return 1; + } + + chunk_printf(&trash, "%s-%lld", bind_conf->guid_prefix, + (ullong)bind_conf->guid_idx); + + if (guid_insert(&l->obj_type, b_head(&trash), &guid_err)) { + ha_alert("[%s:%d] : error on GUID generation : %s. " + "You may fix it by adjusting guid-prefix.\n", + bind_conf->file, bind_conf->line, guid_err); + ha_free(&guid_err); + return 1; + } + + ++bind_conf->guid_idx; + } + + return 0; +} + /* * Registers the bind keyword list <kwl> as a list of valid keywords for next * parsing sessions. @@ -1975,6 +2028,9 @@ struct bind_conf *bind_conf_alloc(struct proxy *fe, const char *file, #endif LIST_INIT(&bind_conf->listeners); + bind_conf->guid_prefix = NULL; + bind_conf->guid_idx = 0; + bind_conf->rhttp_srvname = NULL; return bind_conf; @@ -2082,6 +2138,26 @@ static int bind_parse_backlog(char **args, int cur_arg, struct proxy *px, struct return 0; } +/* parse the "guid-prefix" bind keyword */ +static int bind_parse_guid_prefix(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err) +{ + char *prefix = NULL; + + if (!*args[cur_arg + 1]) { + memprintf(err, "'%s' : expects an argument", args[cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + prefix = strdup(args[cur_arg + 1]); + if (!prefix) { + memprintf(err, "'%s' : out of memory", args[cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + conf->guid_prefix = prefix; + return 0; +} + /* parse the "id" bind keyword */ static int bind_parse_id(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err) { @@ -2225,8 +2301,8 @@ int bind_parse_args_list(struct bind_conf *bind_conf, char **args, int cur_arg, #if (!defined(IP_PKTINFO) && !defined(IP_RECVDSTADDR)) || !defined(IPV6_RECVPKTINFO) list_for_each_entry(l, &bind_conf->listeners, by_bind) { if (++listener_count > 1 || !is_inet_addr(&l->rx.addr)) { - ha_diag_warning("parsing [%s:%d] : '%s %s' in section '%s' : UDP binding on multiple addresses without IP_PKTINFO or equivalent support may be unreliable.\n", - file, linenum, args[0], args[1], section); + ha_warning("parsing [%s:%d] : '%s %s' in section '%s' : UDP binding on multiple addresses without IP_PKTINFO or equivalent support may be unreliable.\n", + file, linenum, args[0], args[1], section); break; } } @@ -2486,6 +2562,7 @@ static struct bind_kw_list bind_kws = { "ALL", { }, { { "accept-netscaler-cip", bind_parse_accept_netscaler_cip, 1, 0 }, /* enable NetScaler Client IP insertion protocol */ { "accept-proxy", bind_parse_accept_proxy, 0, 0 }, /* enable PROXY protocol */ { "backlog", bind_parse_backlog, 1, 0 }, /* set backlog of listening socket */ + { "guid-prefix", bind_parse_guid_prefix, 1, 1 }, /* set guid of listening socket */ { "id", bind_parse_id, 1, 1 }, /* set id of listening socket */ { "maxconn", bind_parse_maxconn, 1, 0 }, /* set maxconn of listening socket */ { "name", bind_parse_name, 1, 1 }, /* set name of listening socket */ @@ -33,6 +33,10 @@ #include <haproxy/http.h> #include <haproxy/http_ana.h> #include <haproxy/listener.h> +#include <haproxy/lb_chash.h> +#include <haproxy/lb_fwrr.h> +#include <haproxy/lb_map.h> +#include <haproxy/lb_ss.h> #include <haproxy/log.h> #include <haproxy/proxy.h> #include <haproxy/sample.h> @@ -45,6 +49,7 @@ #include <haproxy/time.h> #include <haproxy/hash.h> #include <haproxy/tools.h> +#include <haproxy/vecpair.h> /* global recv logs counter */ int cum_log_messages; @@ -90,7 +95,9 @@ static const struct log_fmt_st log_formats[LOG_FORMATS] = { * that the byte should be escaped. Be careful to always pass bytes from 0 to * 255 exclusively to the macros. */ +long no_escape_map[(256/8) / sizeof(long)]; long rfc5424_escape_map[(256/8) / sizeof(long)]; +long json_escape_map[(256/8) / sizeof(long)]; long hdr_encode_map[(256/8) / sizeof(long)]; long url_encode_map[(256/8) / sizeof(long)]; long http_encode_map[(256/8) / sizeof(long)]; @@ -112,21 +119,81 @@ const char *log_levels[NB_LOG_LEVELS] = { const char sess_term_cond[16] = "-LcCsSPRIDKUIIII"; /* normal, Local, CliTo, CliErr, SrvTo, SrvErr, PxErr, Resource, Internal, Down, Killed, Up, -- */ const char sess_fin_state[8] = "-RCHDLQT"; /* cliRequest, srvConnect, srvHeader, Data, Last, Queue, Tarpit */ +const struct buffer empty = { }; -/* log_format */ -struct logformat_type { - char *name; - int type; - int mode; - int lw; /* logwait bitsfield */ - int (*config_callback)(struct logformat_node *node, struct proxy *curproxy); -}; - int prepare_addrsource(struct logformat_node *node, struct proxy *curproxy); -/* log_format variable names */ -static const struct logformat_type logformat_keywords[] = { +/* logformat alias types (internal use) */ +enum logformat_alias_type { + LOG_FMT_GLOBAL, + LOG_FMT_CLIENTIP, + LOG_FMT_CLIENTPORT, + LOG_FMT_BACKENDIP, + LOG_FMT_BACKENDPORT, + LOG_FMT_FRONTENDIP, + LOG_FMT_FRONTENDPORT, + LOG_FMT_SERVERPORT, + LOG_FMT_SERVERIP, + LOG_FMT_COUNTER, + LOG_FMT_LOGCNT, + LOG_FMT_PID, + LOG_FMT_DATE, + LOG_FMT_DATEGMT, + LOG_FMT_DATELOCAL, + LOG_FMT_TS, + LOG_FMT_MS, + LOG_FMT_FRONTEND, + LOG_FMT_FRONTEND_XPRT, + LOG_FMT_BACKEND, + LOG_FMT_SERVER, + LOG_FMT_BYTES, + LOG_FMT_BYTES_UP, + LOG_FMT_Ta, + LOG_FMT_Th, + LOG_FMT_Ti, + LOG_FMT_TQ, + LOG_FMT_TW, + LOG_FMT_TC, + LOG_FMT_Tr, + LOG_FMT_tr, + LOG_FMT_trg, + LOG_FMT_trl, + LOG_FMT_TR, + LOG_FMT_TD, + LOG_FMT_TT, + LOG_FMT_TU, + LOG_FMT_STATUS, + LOG_FMT_CCLIENT, + LOG_FMT_CSERVER, + LOG_FMT_TERMSTATE, + LOG_FMT_TERMSTATE_CK, + LOG_FMT_ACTCONN, + LOG_FMT_FECONN, + LOG_FMT_BECONN, + LOG_FMT_SRVCONN, + LOG_FMT_RETRIES, + LOG_FMT_SRVQUEUE, + LOG_FMT_BCKQUEUE, + LOG_FMT_HDRREQUEST, + LOG_FMT_HDRRESPONS, + LOG_FMT_HDRREQUESTLIST, + LOG_FMT_HDRRESPONSLIST, + LOG_FMT_REQ, + LOG_FMT_HTTP_METHOD, + LOG_FMT_HTTP_URI, + LOG_FMT_HTTP_PATH, + LOG_FMT_HTTP_PATH_ONLY, + LOG_FMT_HTTP_QUERY, + LOG_FMT_HTTP_VERSION, + LOG_FMT_HOSTNAME, + LOG_FMT_UNIQUEID, + LOG_FMT_SSL_CIPHER, + LOG_FMT_SSL_VERSION, +}; + +/* log_format alias names */ +static const struct logformat_alias logformat_aliases[] = { { "o", LOG_FMT_GLOBAL, PR_MODE_TCP, 0, NULL }, /* global option */ /* please keep these lines sorted ! */ @@ -208,6 +275,36 @@ char *log_format = NULL; */ char default_rfc5424_sd_log_format[] = "- "; +/* returns true if the input logformat string is one of the default ones declared + * above + */ +static inline int logformat_str_isdefault(const char *str) +{ + return str == httpclient_log_format || + str == default_http_log_format || + str == default_https_log_format || + str == clf_http_log_format || + str == default_tcp_log_format || + str == default_rfc5424_sd_log_format; +} + +/* free logformat str if it is not a default (static) one */ +static inline void logformat_str_free(char **str) +{ + if (!logformat_str_isdefault(*str)) + ha_free(str); +} + +/* duplicate and return logformat str if it is not a default (static) + * one, else return the original one + */ +static inline char *logformat_str_dup(char *str) +{ + if (logformat_str_isdefault(str)) + return str; + return strdup(str); +} + /* total number of dropped logs */ unsigned int dropped_logs = 0; @@ -221,17 +318,20 @@ THREAD_LOCAL char *logline = NULL; */ THREAD_LOCAL char *logline_rfc5424 = NULL; -struct logformat_var_args { +struct logformat_node_args { char *name; int mask; }; -struct logformat_var_args var_args_list[] = { +struct logformat_node_args node_args_list[] = { // global { "M", LOG_OPT_MANDATORY }, { "Q", LOG_OPT_QUOTE }, { "X", LOG_OPT_HEXA }, { "E", LOG_OPT_ESC }, + { "bin", LOG_OPT_BIN }, + { "json", LOG_OPT_ENCODE_JSON }, + { "cbor", LOG_OPT_ENCODE_CBOR }, { 0, 0 } }; @@ -240,17 +340,19 @@ struct logformat_var_args var_args_list[] = { */ int prepare_addrsource(struct logformat_node *node, struct proxy *curproxy) { - curproxy->options2 |= PR_O2_SRC_ADDR; + if ((curproxy->flags & PR_FL_CHECKED)) + return 0; - return 0; + curproxy->options2 |= PR_O2_SRC_ADDR; + return 1; } /* - * Parse args in a logformat_var. Returns 0 in error + * Parse args in a logformat_node. Returns 0 in error * case, otherwise, it returns 1. */ -int parse_logformat_var_args(char *args, struct logformat_node *node, char **err) +int parse_logformat_node_args(char *args, struct logformat_node *node, char **err) { int i = 0; int end = 0; @@ -258,7 +360,7 @@ int parse_logformat_var_args(char *args, struct logformat_node *node, char **err char *sp = NULL; // start pointer if (args == NULL) { - memprintf(err, "internal error: parse_logformat_var_args() expects non null 'args'"); + memprintf(err, "internal error: parse_logformat_node_args() expects non null 'args'"); return 0; } @@ -279,13 +381,19 @@ int parse_logformat_var_args(char *args, struct logformat_node *node, char **err if (*args == '\0' || *args == ',') { *args = '\0'; - for (i = 0; sp && var_args_list[i].name; i++) { - if (strcmp(sp, var_args_list[i].name) == 0) { + for (i = 0; sp && node_args_list[i].name; i++) { + if (strcmp(sp, node_args_list[i].name) == 0) { if (flags == 1) { - node->options |= var_args_list[i].mask; + /* Ensure we don't mix encoding types, existing + * encoding type prevails over new ones + */ + if (node->options & LOG_OPT_ENCODE) + node->options |= (node_args_list[i].mask & ~LOG_OPT_ENCODE); + else + node->options |= node_args_list[i].mask; break; } else if (flags == 2) { - node->options &= ~var_args_list[i].mask; + node->options &= ~node_args_list[i].mask; break; } } @@ -300,64 +408,71 @@ int parse_logformat_var_args(char *args, struct logformat_node *node, char **err } /* - * Parse a variable '%varname' or '%{args}varname' in log-format. The caller + * Parse an alias '%aliasname' or '%{args}aliasname' in log-format. The caller * must pass the args part in the <arg> pointer with its length in <arg_len>, - * and varname with its length in <var> and <var_len> respectively. <arg> is - * ignored when arg_len is 0. Neither <var> nor <var_len> may be null. + * and aliasname with its length in <alias> and <alias_len> respectively. <arg> + * is ignored when arg_len is 0. Neither <alias> nor <alias_len> may be null. * Returns false in error case and err is filled, otherwise returns true. */ -int parse_logformat_var(char *arg, int arg_len, char *var, int var_len, struct proxy *curproxy, struct list *list_format, int *defoptions, char **err) +static int parse_logformat_alias(char *arg, int arg_len, char *name, int name_len, int typecast, + char *alias, int alias_len, struct lf_expr *lf_expr, + int *defoptions, char **err) { int j; + struct list *list_format= &lf_expr->nodes.list; struct logformat_node *node = NULL; - for (j = 0; logformat_keywords[j].name; j++) { // search a log type - if (strlen(logformat_keywords[j].name) == var_len && - strncmp(var, logformat_keywords[j].name, var_len) == 0) { - if (logformat_keywords[j].mode != PR_MODE_HTTP || curproxy->mode == PR_MODE_HTTP) { - node = calloc(1, sizeof(*node)); - if (!node) { - memprintf(err, "out of memory error"); + for (j = 0; logformat_aliases[j].name; j++) { // search a log type + if (strlen(logformat_aliases[j].name) == alias_len && + strncmp(alias, logformat_aliases[j].name, alias_len) == 0) { + node = calloc(1, sizeof(*node)); + if (!node) { + memprintf(err, "out of memory error"); + goto error_free; + } + node->type = LOG_FMT_ALIAS; + node->alias = &logformat_aliases[j]; + node->typecast = typecast; + if (name && name_len) + node->name = my_strndup(name, name_len); + node->options = *defoptions; + if (arg_len) { + node->arg = my_strndup(arg, arg_len); + if (!parse_logformat_node_args(node->arg, node, err)) goto error_free; + } + if (node->alias->type == LOG_FMT_GLOBAL) { + *defoptions = node->options; + if (lf_expr->nodes.options == LOG_OPT_NONE) + lf_expr->nodes.options = node->options; + else { + /* global options were previously set and were + * overwritten for nodes that appear after the + * current one. + * + * However, for lf_expr->nodes.options we must + * keep a track of options common to ALL nodes, + * thus we take previous global options into + * account to compute the new logformat + * expression wide (global) node options. + */ + lf_expr->nodes.options &= node->options; } - node->type = logformat_keywords[j].type; - node->options = *defoptions; - if (arg_len) { - node->arg = my_strndup(arg, arg_len); - if (!parse_logformat_var_args(node->arg, node, err)) - goto error_free; - } - if (node->type == LOG_FMT_GLOBAL) { - *defoptions = node->options; - free(node->arg); - free(node); - } else { - if (logformat_keywords[j].config_callback && - logformat_keywords[j].config_callback(node, curproxy) != 0) { - goto error_free; - } - curproxy->to_log |= logformat_keywords[j].lw; - LIST_APPEND(list_format, &node->list); - } - return 1; + free_logformat_node(node); } else { - memprintf(err, "format variable '%s' is reserved for HTTP mode", - logformat_keywords[j].name); - goto error_free; + LIST_APPEND(list_format, &node->list); } + return 1; } } - j = var[var_len]; - var[var_len] = 0; - memprintf(err, "no such format variable '%s'. If you wanted to emit the '%%' character verbatim, you need to use '%%%%'", var); - var[var_len] = j; + j = alias[alias_len]; + alias[alias_len] = 0; + memprintf(err, "no such format alias '%s'. If you wanted to emit the '%%' character verbatim, you need to use '%%%%'", alias); + alias[alias_len] = j; error_free: - if (node) { - free(node->arg); - free(node); - } + free_logformat_node(node); return 0; } @@ -367,13 +482,14 @@ int parse_logformat_var(char *arg, int arg_len, char *var, int var_len, struct p * start: start pointer * end: end text pointer * type: string type - * list_format: destination list + * lf_expr: destination logformat expr (list of fmt nodes) * * LOG_TEXT: copy chars from start to end excluding end. * */ -int add_to_logformat_list(char *start, char *end, int type, struct list *list_format, char **err) +int add_to_logformat_list(char *start, char *end, int type, struct lf_expr *lf_expr, char **err) { + struct list *list_format = &lf_expr->nodes.list; char *str; if (type == LF_TEXT) { /* type text */ @@ -401,17 +517,19 @@ int add_to_logformat_list(char *start, char *end, int type, struct list *list_fo } /* - * Parse the sample fetch expression <text> and add a node to <list_format> upon - * success. At the moment, sample converters are not yet supported but fetch arguments - * should work. The curpx->conf.args.ctx must be set by the caller. If an end pointer + * Parse the sample fetch expression <text> and add a node to <lf_expr> upon + * success. The curpx->conf.args.ctx must be set by the caller. If an end pointer * is passed in <endptr>, it will be updated with the pointer to the first character * not part of the sample expression. * * In error case, the function returns 0, otherwise it returns 1. */ -int add_sample_to_logformat_list(char *text, char *arg, int arg_len, struct proxy *curpx, struct list *list_format, int options, int cap, char **err, char **endptr) +static int add_sample_to_logformat_list(char *text, char *name, int name_len, int typecast, + char *arg, int arg_len, struct lf_expr *lf_expr, + struct arg_list *al, int options, int cap, char **err, char **endptr) { char *cmd[2]; + struct list *list_format = &lf_expr->nodes.list; struct sample_expr *expr = NULL; struct logformat_node *node = NULL; int cmd_arg; @@ -420,8 +538,8 @@ int add_sample_to_logformat_list(char *text, char *arg, int arg_len, struct prox cmd[1] = ""; cmd_arg = 0; - expr = sample_parse_expr(cmd, &cmd_arg, curpx->conf.args.file, curpx->conf.args.line, err, - &curpx->conf.args, endptr); + expr = sample_parse_expr(cmd, &cmd_arg, lf_expr->conf.file, lf_expr->conf.line, err, + al, endptr); if (!expr) { memprintf(err, "failed to parse sample expression <%s> : %s", text, *err); goto error_free; @@ -429,16 +547,20 @@ int add_sample_to_logformat_list(char *text, char *arg, int arg_len, struct prox node = calloc(1, sizeof(*node)); if (!node) { + release_sample_expr(expr); memprintf(err, "out of memory error"); goto error_free; } + if (name && name_len) + node->name = my_strndup(name, name_len); node->type = LOG_FMT_EXPR; + node->typecast = typecast; node->expr = expr; node->options = options; if (arg_len) { node->arg = my_strndup(arg, arg_len); - if (!parse_logformat_var_args(node->arg, node, err)) + if (!parse_logformat_node_args(node->arg, node, err)) goto error_free; } if (expr->fetch->val & cap & SMP_VAL_REQUEST) @@ -455,71 +577,71 @@ int add_sample_to_logformat_list(char *text, char *arg, int arg_len, struct prox if ((options & LOG_OPT_HTTP) && (expr->fetch->use & (SMP_USE_L6REQ|SMP_USE_L6RES))) { ha_warning("parsing [%s:%d] : L6 sample fetch <%s> ignored in HTTP log-format string.\n", - curpx->conf.args.file, curpx->conf.args.line, text); + lf_expr->conf.file, lf_expr->conf.line, text); } - /* check if we need to allocate an http_txn struct for HTTP parsing */ - /* Note, we may also need to set curpx->to_log with certain fetches */ - curpx->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY); - - /* FIXME: temporary workaround for missing LW_XPRT and LW_REQ flags - * needed with some sample fetches (eg: ssl*). We always set it for - * now on, but this will leave with sample capabilities soon. - */ - curpx->to_log |= LW_XPRT; - if (curpx->http_needed) - curpx->to_log |= LW_REQ; LIST_APPEND(list_format, &node->list); return 1; error_free: - release_sample_expr(expr); - if (node) { - free(node->arg); - free(node); - } + free_logformat_node(node); return 0; } /* - * Parse the log_format string and fill a linked list. - * Variable name are preceded by % and composed by characters [a-zA-Z0-9]* : %varname - * You can set arguments using { } : %{many arguments}varname. - * The curproxy->conf.args.ctx must be set by the caller. + * Compile logformat expression (from string to list of logformat nodes) + * + * Aliases are preceded by % and composed by characters [a-zA-Z0-9]* : %aliasname + * Expressions are preceded by % and enclosed in square brackets: %[expr] + * You can set arguments using { } : %{many arguments}aliasname + * %{many arguments}[expr] * - * fmt: the string to parse - * curproxy: the proxy affected - * list_format: the destination list + * lf_expr: the destination logformat expression (logformat_node list) + * which is supposed to be configured (str and conf set) but + * shouldn't be compiled (shouldn't contain any nodes) + * al: arg list where sample expr should store arg dependency (if the logformat + * expression involves sample expressions), may be NULL * options: LOG_OPT_* to force on every node * cap: all SMP_VAL_* flags supported by the consumer * * The function returns 1 in success case, otherwise, it returns 0 and err is filled. */ -int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list *list_format, int options, int cap, char **err) +int lf_expr_compile(struct lf_expr *lf_expr, + struct arg_list *al, int options, int cap, char **err) { + char *fmt = lf_expr->str; /* will be freed unless default */ char *sp, *str, *backfmt; /* start pointer for text parts */ char *arg = NULL; /* start pointer for args */ - char *var = NULL; /* start pointer for vars */ + char *alias = NULL; /* start pointer for aliases */ + char *name = NULL; /* token name (optional) */ + char *typecast_str = NULL; /* token output type (if custom name is set) */ int arg_len = 0; - int var_len = 0; + int alias_len = 0; + int name_len = 0; + int typecast = SMP_T_SAME; /* relaxed by default */ int cformat; /* current token format */ int pformat; /* previous token format */ - struct logformat_node *tmplf, *back; + + BUG_ON((lf_expr->flags & LF_FL_COMPILED)); + + if (!fmt) + return 1; // nothing to do sp = str = backfmt = strdup(fmt); if (!str) { memprintf(err, "out of memory error"); return 0; } - curproxy->to_log |= LW_INIT; - /* flush the list first. */ - list_for_each_entry_safe(tmplf, back, list_format, list) { - LIST_DELETE(&tmplf->list); - release_sample_expr(tmplf->expr); - free(tmplf->arg); - free(tmplf); - } + /* Prepare lf_expr nodes, past this lf_expr doesn't know about ->str + * anymore as ->str and ->nodes are part of the same union. ->str has + * been saved as local 'fmt' string pointer, so we must free it before + * returning. + */ + LIST_INIT(&lf_expr->nodes.list); + lf_expr->nodes.options = LOG_OPT_NONE; + /* we must set the compiled flag now for proper deinit in case of failure */ + lf_expr->flags |= LF_FL_COMPILED; for (cformat = LF_INIT; cformat != LF_END; str++) { pformat = cformat; @@ -533,20 +655,62 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list * We use the common LF_INIT state to dispatch to the different final states. */ switch (pformat) { - case LF_STARTVAR: // text immediately following a '%' - arg = NULL; var = NULL; - arg_len = var_len = 0; + case LF_STARTALIAS: // text immediately following a '%' + arg = NULL; alias = NULL; + name = NULL; + name_len = 0; + typecast = SMP_T_SAME; + arg_len = alias_len = 0; + if (*str == '(') { // custom output name + cformat = LF_STONAME; + name = str + 1; + } + else + goto startalias; + break; + + case LF_STONAME: // text immediately following '%(' + case LF_STOTYPE: + if (cformat == LF_STONAME && *str == ':') { // start custom output type + cformat = LF_STOTYPE; + name_len = str -name; + typecast_str = str + 1; + } + else if (*str == ')') { // end of custom output name + if (cformat == LF_STONAME) + name_len = str - name; + else { + /* custom type */ + *str = 0; // so that typecast_str is 0 terminated + typecast = type_to_smp(typecast_str); + if (typecast != SMP_T_STR && typecast != SMP_T_SINT && + typecast != SMP_T_BOOL) { + memprintf(err, "unexpected output type '%.*s' at position %d line : '%s'. Supported types are: str, sint, bool", (int)(str - typecast_str), typecast_str, (int)(typecast_str - backfmt), fmt); + goto fail; + } + } + cformat = LF_EDONAME; + } else if (!isalnum((unsigned char)*str) && *str != '_' && *str != '-') { + memprintf(err, "invalid character in custom name near '%c' at position %d line : '%s'", + *str, (int)(str - backfmt), fmt); + + goto fail; + } + break; + + case LF_EDONAME: // text immediately following %(name) + startalias: if (*str == '{') { // optional argument cformat = LF_STARG; arg = str + 1; } else if (*str == '[') { cformat = LF_STEXPR; - var = str + 1; // store expr in variable name + alias = str + 1; // store expr in alias name } - else if (isalpha((unsigned char)*str)) { // variable name - cformat = LF_VAR; - var = str; + else if (isalpha((unsigned char)*str)) { // alias name + cformat = LF_ALIAS; + alias = str; } else if (*str == '%') cformat = LF_TEXT; // convert this character to a literal (useful for '%') @@ -555,7 +719,7 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list cformat = LF_TEXT; pformat = LF_TEXT; /* finally we include the previous char as well */ sp = str - 1; /* send both the '%' and the current char */ - memprintf(err, "unexpected variable name near '%c' at position %d line : '%s'. Maybe you want to write a single '%%', use the syntax '%%%%'", + memprintf(err, "unexpected alias name near '%c' at position %d line : '%s'. Maybe you want to write a single '%%', use the syntax '%%%%'", *str, (int)(str - backfmt), fmt); goto fail; @@ -575,15 +739,15 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list case LF_EDARG: // text immediately following '%{arg}' if (*str == '[') { cformat = LF_STEXPR; - var = str + 1; // store expr in variable name + alias = str + 1; // store expr in alias name break; } - else if (isalnum((unsigned char)*str)) { // variable name - cformat = LF_VAR; - var = str; + else if (isalnum((unsigned char)*str)) { // alias name + cformat = LF_ALIAS; + alias = str; break; } - memprintf(err, "parse argument modifier without variable name near '%%{%s}'", arg); + memprintf(err, "parse argument modifier without alias name near '%%{%s}'", arg); goto fail; case LF_STEXPR: // text immediately following '%[' @@ -592,7 +756,7 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list * part of the expression, which MUST be the trailing * angle bracket. */ - if (!add_sample_to_logformat_list(var, arg, arg_len, curproxy, list_format, options, cap, err, &str)) + if (!add_sample_to_logformat_list(alias, name, name_len, typecast, arg, arg_len, lf_expr, al, options, cap, err, &str)) goto fail; if (*str == ']') { @@ -604,26 +768,26 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list char c = *str; *str = 0; if (isprint((unsigned char)c)) - memprintf(err, "expected ']' after '%s', but found '%c'", var, c); + memprintf(err, "expected ']' after '%s', but found '%c'", alias, c); else - memprintf(err, "missing ']' after '%s'", var); + memprintf(err, "missing ']' after '%s'", alias); goto fail; } break; - case LF_VAR: // text part of a variable name - var_len = str - var; + case LF_ALIAS: // text part of a alias name + alias_len = str - alias; if (!isalnum((unsigned char)*str)) - cformat = LF_INIT; // not variable name anymore + cformat = LF_INIT; // not alias name anymore break; default: // LF_INIT, LF_TEXT, LF_SEPARATOR, LF_END, LF_EDEXPR cformat = LF_INIT; } - if (cformat == LF_INIT) { /* resynchronize state to text/sep/startvar */ + if (cformat == LF_INIT) { /* resynchronize state to text/sep/startalias */ switch (*str) { - case '%': cformat = LF_STARTVAR; break; + case '%': cformat = LF_STARTALIAS; break; case 0 : cformat = LF_END; break; case ' ': if (options & LOG_OPT_MERGE_SPACES) { @@ -637,13 +801,13 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list if (cformat != pformat || pformat == LF_SEPARATOR) { switch (pformat) { - case LF_VAR: - if (!parse_logformat_var(arg, arg_len, var, var_len, curproxy, list_format, &options, err)) + case LF_ALIAS: + if (!parse_logformat_alias(arg, arg_len, name, name_len, typecast, alias, alias_len, lf_expr, &options, err)) goto fail; break; case LF_TEXT: case LF_SEPARATOR: - if (!add_to_logformat_list(sp, str, pformat, list_format, err)) + if (!add_to_logformat_list(sp, str, pformat, lf_expr, err)) goto fail; break; } @@ -651,18 +815,236 @@ int parse_logformat_string(const char *fmt, struct proxy *curproxy, struct list } } - if (pformat == LF_STARTVAR || pformat == LF_STARG || pformat == LF_STEXPR) { - memprintf(err, "truncated line after '%s'", var ? var : arg ? arg : "%"); + if (pformat == LF_STARTALIAS || pformat == LF_STARG || pformat == LF_STEXPR || pformat == LF_STONAME || pformat == LF_STOTYPE || pformat == LF_EDONAME) { + memprintf(err, "truncated line after '%s'", alias ? alias : arg ? arg : "%"); + goto fail; + } + logformat_str_free(&fmt); + ha_free(&backfmt); + + return 1; + fail: + logformat_str_free(&fmt); + ha_free(&backfmt); + return 0; +} + +/* lf_expr_compile() helper: uses <curproxy> to deduce settings and + * simplify function usage, mostly for legacy purpose + * + * curproxy->conf.args.ctx must be set by the caller. + * + * The logformat expression will be scheduled for postcheck on the proxy unless + * the proxy was already checked, in which case all checks will be performed right + * away. + * + * Returns 1 on success and 0 on failure. On failure: <lf_expr> will be cleaned + * up and <err> will be set. + */ +int parse_logformat_string(const char *fmt, struct proxy *curproxy, + struct lf_expr *lf_expr, + int options, int cap, char **err) +{ + int ret; + + + /* reinit lf_expr (if previously set) */ + lf_expr_deinit(lf_expr); + + lf_expr->str = strdup(fmt); + if (!lf_expr->str) { + memprintf(err, "out of memory error"); + goto fail; + } + + /* Save some parsing infos to raise relevant error messages during + * postparsing if needed + */ + if (curproxy->conf.args.file) { + lf_expr->conf.file = strdup(curproxy->conf.args.file); + lf_expr->conf.line = curproxy->conf.args.line; + } + + ret = lf_expr_compile(lf_expr, &curproxy->conf.args, options, cap, err); + + if (!ret) + goto fail; + + if (!(curproxy->flags & PR_FL_CHECKED)) { + /* add the lf_expr to the proxy checks to delay postparsing + * since config-related proxy properties are not stable yet + */ + LIST_APPEND(&curproxy->conf.lf_checks, &lf_expr->list); + } + else { + /* probably called during runtime or with proxy already checked, + * perform the postcheck right away + */ + if (!lf_expr_postcheck(lf_expr, curproxy, err)) + goto fail; + } + return 1; + + fail: + lf_expr_deinit(lf_expr); + return 0; +} + +/* automatically resolves incompatible LOG_OPT options by taking into + * account current options and global options + */ +static inline void _lf_expr_postcheck_node_opt(int *options, int g_options) +{ + /* encoding is incompatible with HTTP option, so it is ignored + * if HTTP option is set, unless HTTP option wasn't set globally + * and encoding was set globally, which means encoding takes the + * precedence> + */ + if (*options & LOG_OPT_HTTP) { + if ((g_options & (LOG_OPT_HTTP | LOG_OPT_ENCODE)) == LOG_OPT_ENCODE) { + /* global encoding enabled and http enabled individually */ + *options &= ~LOG_OPT_HTTP; + } + else + *options &= ~LOG_OPT_ENCODE; + } + + if (*options & LOG_OPT_ENCODE) { + /* when encoding is set, ignore +E option */ + *options &= ~LOG_OPT_ESC; + } +} + +/* Performs LOG_OPT postparsing check on logformat node <node> belonging to a + * given logformat expression <lf_expr> + * + * It returns 1 on success and 0 on error, <err> will be set in case of error + */ +static int lf_expr_postcheck_node_opt(struct lf_expr *lf_expr, struct logformat_node *node, char **err) +{ + /* per-node encoding options cannot be disabled if already + * enabled globally + * + * Also, ensure we don't mix encoding types, global setting + * prevails over per-node one. + * + * Finally, ignore LOG_OPT_BIN since it is a global-only option + */ + if (lf_expr->nodes.options & LOG_OPT_ENCODE) { + node->options &= ~(LOG_OPT_BIN | LOG_OPT_ENCODE); + node->options |= (lf_expr->nodes.options & LOG_OPT_ENCODE); + } + else + node->options &= ~LOG_OPT_BIN; + + _lf_expr_postcheck_node_opt(&node->options, lf_expr->nodes.options); + + return 1; +} + +/* Performs a postparsing check on logformat expression <expr> for a given <px> + * proxy. The function will behave differently depending on the proxy state + * (during parsing we will try to adapt proxy configuration to make it + * compatible with logformat expression, but once the proxy is checked, we fail + * as soon as we face incompatibilities) + * + * It returns 1 on success and 0 on error, <err> will be set in case of error. + */ +int lf_expr_postcheck(struct lf_expr *lf_expr, struct proxy *px, char **err) +{ + struct logformat_node *lf; + + if (!(px->flags & PR_FL_CHECKED)) + px->to_log |= LW_INIT; + + /* postcheck global node options */ + _lf_expr_postcheck_node_opt(&lf_expr->nodes.options, LOG_OPT_NONE); + + list_for_each_entry(lf, &lf_expr->nodes.list, list) { + if (lf->type == LOG_FMT_EXPR) { + struct sample_expr *expr = lf->expr; + uint8_t http_needed = !!(expr->fetch->use & SMP_USE_HTTP_ANY); + + if ((px->flags & PR_FL_CHECKED)) { + /* fail as soon as proxy properties are not compatible */ + if (http_needed && !px->http_needed) { + memprintf(err, "sample fetch '%s' requires HTTP enabled proxy which is not available here", + expr->fetch->kw); + goto fail; + } + goto next_node; + } + /* check if we need to allocate an http_txn struct for HTTP parsing */ + /* Note, we may also need to set curpx->to_log with certain fetches */ + px->http_needed |= http_needed; + + /* FIXME: temporary workaround for missing LW_XPRT and LW_REQ flags + * needed with some sample fetches (eg: ssl*). We always set it for + * now on, but this will leave with sample capabilities soon. + */ + px->to_log |= LW_XPRT; + if (px->http_needed) + px->to_log |= LW_REQ; + } + else if (lf->type == LOG_FMT_ALIAS) { + if (lf->alias->mode == PR_MODE_HTTP && px->mode != PR_MODE_HTTP) { + memprintf(err, "format alias '%s' is reserved for HTTP mode", + lf->alias->name); + goto fail; + } + if (lf->alias->config_callback && + !lf->alias->config_callback(lf, px)) { + memprintf(err, "cannot configure format alias '%s' in this context", + lf->alias->name); + goto fail; + } + if (!(px->flags & PR_FL_CHECKED)) + px->to_log |= lf->alias->lw; + } + next_node: + /* postcheck individual node's options */ + if (!lf_expr_postcheck_node_opt(lf_expr, lf, err)) + goto fail; + } + if ((px->to_log & (LW_REQ | LW_RESP)) && + (px->mode != PR_MODE_HTTP && !(px->options & PR_O_HTTP_UPG))) { + memprintf(err, "logformat expression not usable here (at least one node depends on HTTP mode)"); goto fail; } - free(backfmt); return 1; fail: - free(backfmt); return 0; } +/* postparse logformats defined at <px> level */ +static int postcheck_logformat_proxy(struct proxy *px) +{ + char *err = NULL; + struct lf_expr *lf_expr, *back_lf; + int err_code = ERR_NONE; + + list_for_each_entry_safe(lf_expr, back_lf, &px->conf.lf_checks, list) { + BUG_ON(!(lf_expr->flags & LF_FL_COMPILED)); + if (!lf_expr_postcheck(lf_expr, px, &err)) + err_code |= ERR_FATAL | ERR_ALERT; + /* check performed, ensure it doesn't get checked twice */ + LIST_DEL_INIT(&lf_expr->list); + if (err_code & ERR_CODE) + break; + } + + if (err) { + memprintf(&err, "error detected while postparsing logformat expression used by %s '%s' : %s", proxy_type_str(px), px->id, err); + if (lf_expr->conf.file) + memprintf(&err, "parsing [%s:%d] : %s.\n", lf_expr->conf.file, lf_expr->conf.line, err); + ha_alert("%s", err); + ha_free(&err); + } + + return err_code; +} + /* * Parse the first range of indexes from a string made of a list of comma separated * ranges of indexes. Note that an index may be considered as a particular range @@ -775,110 +1157,6 @@ static int dup_log_target(struct log_target *def, struct log_target *cpy) return 0; } -/* must be called under the lbprm lock */ -static void _log_backend_srv_queue(struct server *srv) -{ - struct proxy *p = srv->proxy; - - /* queue the server in the proxy lb array to make it easily searchable by - * log-balance algorithms. Here we use the srv array as a general server - * pool of in-use servers, lookup is done using a relative positional id - * (array is contiguous) - * - * We use the avail server list to get a quick hand on available servers - * (those that are UP) - */ - if (srv->flags & SRV_F_BACKUP) { - if (!p->srv_act) - p->lbprm.log.srv[p->srv_bck] = srv; - p->srv_bck++; - } - else { - if (!p->srv_act) { - /* we will be switching to act tree in LB logic, thus we need to - * reset the lastid - */ - HA_ATOMIC_STORE(&p->lbprm.log.lastid, 0); - } - p->lbprm.log.srv[p->srv_act] = srv; - p->srv_act++; - } - /* append the server to the list of available servers */ - LIST_APPEND(&p->lbprm.log.avail, &srv->lb_list); - - p->lbprm.tot_weight = (p->srv_act) ? p->srv_act : p->srv_bck; -} - -static void log_backend_srv_up(struct server *srv) -{ - struct proxy *p __maybe_unused = srv->proxy; - - if (!srv_lb_status_changed(srv)) - return; /* nothing to do */ - if (srv_currently_usable(srv) || !srv_willbe_usable(srv)) - return; /* false alarm */ - - HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock); - _log_backend_srv_queue(srv); - HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock); -} - -/* must be called under lbprm lock */ -static void _log_backend_srv_recalc(struct proxy *p) -{ - unsigned int it = 0; - struct server *cur_srv; - - list_for_each_entry(cur_srv, &p->lbprm.log.avail, lb_list) { - uint8_t backup = cur_srv->flags & SRV_F_BACKUP; - - if ((!p->srv_act && backup) || - (p->srv_act && !backup)) - p->lbprm.log.srv[it++] = cur_srv; - } -} - -/* must be called under the lbprm lock */ -static void _log_backend_srv_dequeue(struct server *srv) -{ - struct proxy *p = srv->proxy; - - if (srv->flags & SRV_F_BACKUP) { - p->srv_bck--; - } - else { - p->srv_act--; - if (!p->srv_act) { - /* we will be switching to bck tree in LB logic, thus we need to - * reset the lastid - */ - HA_ATOMIC_STORE(&p->lbprm.log.lastid, 0); - } - } - - /* remove the srv from the list of available (UP) servers */ - LIST_DELETE(&srv->lb_list); - - /* reconstruct the array of usable servers */ - _log_backend_srv_recalc(p); - - p->lbprm.tot_weight = (p->srv_act) ? p->srv_act : p->srv_bck; -} - -static void log_backend_srv_down(struct server *srv) -{ - struct proxy *p __maybe_unused = srv->proxy; - - if (!srv_lb_status_changed(srv)) - return; /* nothing to do */ - if (!srv_currently_usable(srv) || srv_willbe_usable(srv)) - return; /* false alarm */ - - HA_RWLOCK_WRLOCK(LBPRM_LOCK, &p->lbprm.lock); - _log_backend_srv_dequeue(srv); - HA_RWLOCK_WRUNLOCK(LBPRM_LOCK, &p->lbprm.lock); -} - /* check that current configuration is compatible with "mode log" */ static int _postcheck_log_backend_compat(struct proxy *be) { @@ -943,8 +1221,11 @@ static int _postcheck_log_backend_compat(struct proxy *be) } if (balance_algo != BE_LB_ALGO_RR && balance_algo != BE_LB_ALGO_RND && - balance_algo != BE_LB_ALGO_LS && + balance_algo != BE_LB_ALGO_SS && balance_algo != BE_LB_ALGO_LH) { + /* cannot correct the error since lbprm init was already performed + * in cfgparse.c, so fail loudly + */ ha_alert("in %s '%s': \"balance\" only supports 'roundrobin', 'random', 'sticky' and 'log-hash'.\n", proxy_type_str(be), be->id); err_code |= ERR_ALERT | ERR_FATAL; } @@ -966,30 +1247,6 @@ static int postcheck_log_backend(struct proxy *be) if (err_code & ERR_CODE) return err_code; - /* First time encountering this log backend, perform some init - */ - be->lbprm.set_server_status_up = log_backend_srv_up; - be->lbprm.set_server_status_down = log_backend_srv_down; - be->lbprm.log.lastid = 0; /* initial value */ - LIST_INIT(&be->lbprm.log.avail); - - /* alloc srv array (it will be used for active and backup server lists in turn, - * so we ensure that the longest list will fit - */ - be->lbprm.log.srv = calloc(MAX(be->srv_act, be->srv_bck), - sizeof(*be->lbprm.log.srv)); - - if (!be->lbprm.log.srv ) { - memprintf(&msg, "memory error when allocating server array (%d entries)", - MAX(be->srv_act, be->srv_bck)); - err_code |= ERR_ALERT | ERR_FATAL; - goto end; - } - - /* reinit srv counters, lbprm queueing will recount */ - be->srv_act = 0; - be->srv_bck = 0; - /* "log-balance hash" needs to compile its expression */ if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_LH) { struct sample_expr *expr; @@ -1100,13 +1357,10 @@ static int postcheck_log_backend(struct proxy *be) goto end; } srv->log_target->flags |= LOG_TARGET_FL_RESOLVED; - srv->cur_eweight = 1; /* ignore weights, all servers have the same weight */ - _log_backend_srv_queue(srv); srv = srv->next; } end: if (err_code & ERR_CODE) { - ha_free(&be->lbprm.log.srv); /* free log servers array */ ha_alert("log backend '%s': failed to initialize: %s.\n", be->id, msg); ha_free(&msg); } @@ -1171,6 +1425,7 @@ struct logger *dup_logger(struct logger *def) /* default values */ cpy->conf.file = NULL; + cpy->lb.smp_rgs = NULL; LIST_INIT(&cpy->list); /* special members */ @@ -1181,6 +1436,13 @@ struct logger *dup_logger(struct logger *def) if (!cpy->conf.file) goto error; } + if (def->lb.smp_rgs) { + cpy->lb.smp_rgs = malloc(sizeof(*cpy->lb.smp_rgs) * def->lb.smp_rgs_sz); + if (!cpy->lb.smp_rgs) + goto error; + memcpy(cpy->lb.smp_rgs, def->lb.smp_rgs, + sizeof(*cpy->lb.smp_rgs) * def->lb.smp_rgs_sz); + } /* inherit from original reference if set */ cpy->ref = (def->ref) ? def->ref : def; @@ -1204,6 +1466,7 @@ void free_logger(struct logger *logger) BUG_ON(LIST_INLIST(&logger->list)); ha_free(&logger->conf.file); deinit_log_target(&logger->target); + free(logger->lb.smp_rgs); free(logger); } @@ -1564,127 +1827,440 @@ int get_log_facility(const char *fac) return facility; } -/* - * Encode the string. +struct lf_buildctx { + char _buf[256];/* fixed size buffer for building small strings */ + int options; /* LOG_OPT_* options */ + int typecast; /* same as logformat_node->typecast */ + int in_text; /* inside variable-length text */ + union { + struct cbor_encode_ctx cbor; /* cbor-encode specific ctx */ + } encode; +}; + +static THREAD_LOCAL struct lf_buildctx lf_buildctx; + +/* helper to encode a single byte in hex form * - * When using the +E log format option, it will try to escape '"\]' - * characters with '\' as prefix. The same prefix should not be used as - * <escape>. + * Returns the position of the last written byte on success and NULL on + * error. */ -static char *lf_encode_string(char *start, char *stop, - const char escape, const long *map, - const char *string, - struct logformat_node *node) +static char *_encode_byte_hex(char *start, char *stop, unsigned char byte) { - if (node->options & LOG_OPT_ESC) { - if (start < stop) { - stop--; /* reserve one byte for the final '\0' */ - while (start < stop && *string != '\0') { - if (!ha_bit_test((unsigned char)(*string), map)) { - if (!ha_bit_test((unsigned char)(*string), rfc5424_escape_map)) - *start++ = *string; - else { - if (start + 2 >= stop) - break; - *start++ = '\\'; - *start++ = *string; - } - } - else { - if (start + 3 >= stop) - break; - *start++ = escape; - *start++ = hextab[(*string >> 4) & 15]; - *start++ = hextab[*string & 15]; - } - string++; - } - *start = '\0'; - } + /* hex form requires 2 bytes */ + if ((stop - start) < 2) + return NULL; + *start++ = hextab[(byte >> 4) & 15]; + *start++ = hextab[byte & 15]; + return start; +} + +/* lf cbor function ptr used to encode a single byte according to RFC8949 + * + * for now only hex form is supported. + * + * The function may only be called under CBOR context (that is when + * LOG_OPT_ENCODE_CBOR option is set). + * + * Returns the position of the last written byte on success and NULL on + * error. + */ +static char *_lf_cbor_encode_byte(struct cbor_encode_ctx *cbor_ctx, + char *start, char *stop, unsigned char byte) +{ + struct lf_buildctx *ctx; + + BUG_ON(!cbor_ctx || !cbor_ctx->e_fct_ctx); + ctx = cbor_ctx->e_fct_ctx; + + if (ctx->options & LOG_OPT_BIN) { + /* raw output */ + if ((stop - start) < 1) + return NULL; + *start++ = byte; + return start; + } + return _encode_byte_hex(start, stop, byte); +} + +/* helper function to prepare lf_buildctx struct based on global options + * and current node settings (may be NULL) + */ +static inline void lf_buildctx_prepare(struct lf_buildctx *ctx, + int g_options, + const struct logformat_node *node) +{ + if (node) { + /* consider node's options and typecast setting */ + ctx->options = node->options; + ctx->typecast = node->typecast; } else { - return encode_string(start, stop, escape, map, string); + ctx->options = g_options; + ctx->typecast = SMP_T_SAME; /* default */ + } + + if (ctx->options & LOG_OPT_ENCODE_CBOR) { + /* prepare cbor-specific encode ctx */ + ctx->encode.cbor.e_fct_byte = _lf_cbor_encode_byte; + ctx->encode.cbor.e_fct_ctx = ctx; + } +} + +/* helper function for _lf_encode_bytes() to escape a single byte + * with <escape> + */ +static inline char *_lf_escape_byte(char *start, char *stop, + char byte, const char escape) +{ + if (start + 3 >= stop) + return NULL; + *start++ = escape; + *start++ = hextab[(byte >> 4) & 15]; + *start++ = hextab[byte & 15]; + + return start; +} + +/* helper function for _lf_encode_bytes() to escape a single byte + * with <escape> and deal with cbor-specific encoding logic + */ +static inline char *_lf_cbor_escape_byte(char *start, char *stop, + char byte, const char escape, + uint8_t cbor_string_prefix, + struct lf_buildctx *ctx) +{ + char escaped_byte[3]; + + escaped_byte[0] = escape; + escaped_byte[1] = hextab[(byte >> 4) & 15]; + escaped_byte[2] = hextab[byte & 15]; + + start = cbor_encode_bytes_prefix(&ctx->encode.cbor, start, stop, + escaped_byte, 3, + cbor_string_prefix); + + return start; +} + +/* helper function for _lf_encode_bytes() to encode a single byte + * and escape it with <escape> if found in <map> + * + * The function assumes that at least 1 byte is available for writing + * + * Returns the address of the last written byte on success, or NULL + * on error + */ +static inline char *_lf_map_escape_byte(char *start, char *stop, + const char *byte, + const char escape, const long *map, + const char **pending, uint8_t cbor_string_prefix, + struct lf_buildctx *ctx) +{ + if (!ha_bit_test((unsigned char)(*byte), map)) + *start++ = *byte; + else + start = _lf_escape_byte(start, stop, *byte, escape); + + return start; +} + +/* helper function for _lf_encode_bytes() to encode a single byte + * and escape it with <escape> if found in <map> and deal with + * cbor-specific encoding logic. + * + * The function assumes that at least 1 byte is available for writing + * + * Returns the address of the last written byte on success, or NULL + * on error + */ +static inline char *_lf_cbor_map_escape_byte(char *start, char *stop, + const char *byte, + const char escape, const long *map, + const char **pending, uint8_t cbor_string_prefix, + struct lf_buildctx *ctx) +{ + /* We try our best to minimize the number of chunks produced for the + * indefinite-length byte string as each chunk has an extra overhead + * as per RFC8949. + * + * To achieve that, we try to emit consecutive bytes together + */ + if (!ha_bit_test((unsigned char)(*byte), map)) { + /* do nothing and let the caller continue seeking data, + * pending data will be flushed later + */ + } else { + /* first, flush pending unescaped bytes */ + start = cbor_encode_bytes_prefix(&ctx->encode.cbor, start, stop, + *pending, (byte - *pending), + cbor_string_prefix); + if (start == NULL) + return NULL; + + *pending = byte + 1; + + /* escape current matching byte */ + start = _lf_cbor_escape_byte(start, stop, *byte, escape, + cbor_string_prefix, + ctx); + } + + return start; +} + +/* helper function for _lf_encode_bytes() to encode a single byte + * and escape it with <escape> if found in <map> or escape it with + * '\' if found in rfc5424_escape_map + * + * The function assumes that at least 1 byte is available for writing + * + * Returns the address of the last written byte on success, or NULL + * on error + */ +static inline char *_lf_rfc5424_escape_byte(char *start, char *stop, + const char *byte, + const char escape, const long *map, + const char **pending, uint8_t cbor_string_prefix, + struct lf_buildctx *ctx) +{ + if (!ha_bit_test((unsigned char)(*byte), map)) { + if (!ha_bit_test((unsigned char)(*byte), rfc5424_escape_map)) + *start++ = *byte; + else { + if (start + 2 >= stop) + return NULL; + *start++ = '\\'; + *start++ = *byte; + } + } + else + start = _lf_escape_byte(start, stop, *byte, escape); + + return start; +} + +/* helper function for _lf_encode_bytes() to encode a single byte + * and escape it with <escape> if found in <map> or escape it with + * '\' if found in json_escape_map + * + * The function assumes that at least 1 byte is available for writing + * + * Returns the address of the last written byte on success, or NULL + * on error + */ +static inline char *_lf_json_escape_byte(char *start, char *stop, + const char *byte, + const char escape, const long *map, + const char **pending, uint8_t cbor_string_prefix, + struct lf_buildctx *ctx) +{ + if (!ha_bit_test((unsigned char)(*byte), map)) { + if (!ha_bit_test((unsigned char)(*byte), json_escape_map)) + *start++ = *byte; + else { + if (start + 2 >= stop) + return NULL; + *start++ = '\\'; + *start++ = *byte; + } } + else + start = _lf_escape_byte(start, stop, *byte, escape); return start; } /* - * Encode the chunk. + * helper for lf_encode_{string,chunk}: + * encode the input bytes, input <bytes> is processed until <bytes_stop> + * is reached. If <bytes_stop> is NULL, <bytes> is expected to be NULL + * terminated. * * When using the +E log format option, it will try to escape '"\]' * characters with '\' as prefix. The same prefix should not be used as * <escape>. + * + * When using json encoding, string will be escaped according to + * json escape map + * + * When using cbor encoding, escape option is ignored. However bytes found + * in <map> will still be escaped with <escape>. + * + * Return the address of the \0 character, or NULL on error */ -static char *lf_encode_chunk(char *start, char *stop, - const char escape, const long *map, - const struct buffer *chunk, - struct logformat_node *node) +static char *_lf_encode_bytes(char *start, char *stop, + const char escape, const long *map, + const char *bytes, const char *bytes_stop, + struct lf_buildctx *ctx) { - char *str, *end; - - if (node->options & LOG_OPT_ESC) { - if (start < stop) { - str = chunk->area; - end = chunk->area + chunk->data; - - stop--; /* reserve one byte for the final '\0' */ - while (start < stop && str < end) { - if (!ha_bit_test((unsigned char)(*str), map)) { - if (!ha_bit_test((unsigned char)(*str), rfc5424_escape_map)) - *start++ = *str; - else { - if (start + 2 >= stop) - break; - *start++ = '\\'; - *start++ = *str; - } - } - else { - if (start + 3 >= stop) - break; - *start++ = escape; - *start++ = hextab[(*str >> 4) & 15]; - *start++ = hextab[*str & 15]; - } - str++; - } - *start = '\0'; + char *ret; + const char *pending; + uint8_t cbor_string_prefix = 0; + char *(*encode_byte)(char *start, char *stop, + const char *byte, + const char escape, const long *map, + const char **pending, uint8_t cbor_string_prefix, + struct lf_buildctx *ctx); + + if (ctx->options & LOG_OPT_ENCODE_JSON) + encode_byte = _lf_json_escape_byte; + else if (ctx->options & LOG_OPT_ENCODE_CBOR) + encode_byte = _lf_cbor_map_escape_byte; + else if (ctx->options & LOG_OPT_ESC) + encode_byte = _lf_rfc5424_escape_byte; + else + encode_byte = _lf_map_escape_byte; + + if (ctx->options & LOG_OPT_ENCODE_CBOR) { + if (!bytes_stop) { + /* printable chars: use cbor text */ + cbor_string_prefix = 0x60; + } + else { + /* non printable chars: use cbor byte string */ + cbor_string_prefix = 0x40; } } - else { - return encode_chunk(start, stop, escape, map, chunk); + + if (start < stop) { + stop--; /* reserve one byte for the final '\0' */ + + if ((ctx->options & LOG_OPT_ENCODE_CBOR) && !ctx->in_text) { + /* start indefinite-length cbor byte string or text */ + start = _lf_cbor_encode_byte(&ctx->encode.cbor, start, stop, + (cbor_string_prefix | 0x1F)); + if (start == NULL) + return NULL; + } + pending = bytes; + + /* we have 2 distinct loops to keep checks outside of the loop + * for better performance + */ + if (bytes && !bytes_stop) { + while (start < stop && *bytes != '\0') { + ret = encode_byte(start, stop, bytes, escape, map, + &pending, cbor_string_prefix, + ctx); + if (ret == NULL) + break; + start = ret; + bytes++; + } + } else if (bytes) { + while (start < stop && bytes < bytes_stop) { + ret = encode_byte(start, stop, bytes, escape, map, + &pending, cbor_string_prefix, + ctx); + if (ret == NULL) + break; + start = ret; + bytes++; + } + } + + if (ctx->options & LOG_OPT_ENCODE_CBOR) { + if (pending != bytes) { + /* flush pending unescaped bytes */ + start = cbor_encode_bytes_prefix(&ctx->encode.cbor, start, stop, + pending, (bytes - pending), + cbor_string_prefix); + if (start == NULL) + return NULL; + } + if (!ctx->in_text) { + /* cbor break (to end indefinite-length text or byte string) */ + start = _lf_cbor_encode_byte(&ctx->encode.cbor, start, stop, 0xFF); + if (start == NULL) + return NULL; + } + } + + *start = '\0'; + return start; } - return start; + return NULL; } /* - * Write a string in the log string - * Take cares of quote and escape options + * Encode the string. + */ +static char *lf_encode_string(char *start, char *stop, + const char escape, const long *map, + const char *string, + struct lf_buildctx *ctx) +{ + return _lf_encode_bytes(start, stop, escape, map, + string, NULL, ctx); +} + +/* + * Encode the chunk. + */ +static char *lf_encode_chunk(char *start, char *stop, + const char escape, const long *map, + const struct buffer *chunk, + struct lf_buildctx *ctx) +{ + return _lf_encode_bytes(start, stop, escape, map, + chunk->area, chunk->area + chunk->data, + ctx); +} + +/* + * Write a raw string in the log string + * Take care of escape option + * + * When using json encoding, string will be escaped according + * to json escape map + * + * When using cbor encoding, escape option is ignored. * * Return the address of the \0 character, or NULL on error */ -char *lf_text_len(char *dst, const char *src, size_t len, size_t size, const struct logformat_node *node) +static inline char *_lf_text_len(char *dst, const char *src, + size_t len, size_t size, struct lf_buildctx *ctx) { - if (size < 2) - return NULL; + const long *escape_map = NULL; + char *ret; - if (node->options & LOG_OPT_QUOTE) { - *(dst++) = '"'; - size--; - } + if (ctx->options & LOG_OPT_ENCODE_JSON) + escape_map = json_escape_map; + else if (ctx->options & LOG_OPT_ESC) + escape_map = rfc5424_escape_map; if (src && len) { + if (ctx->options & LOG_OPT_ENCODE_CBOR) { + /* it's actually less costly to compute the actual text size to + * write a single fixed length text at once rather than emitting + * indefinite length text in cbor, because indefinite-length text + * has to be made of multiple chunks of known size as per RFC8949... + */ + { + int _len; + + /* strnlen(src, len) portable equivalent: */ + for (_len = 0; _len < len && src[_len]; _len++) + ; + + len = _len; + } + + ret = cbor_encode_text(&ctx->encode.cbor, dst, dst + size, src, len); + if (ret == NULL) + return NULL; + len = ret - dst; + } + /* escape_string and strlcpy2 will both try to add terminating NULL-byte - * to dst, so we need to make sure that extra byte will fit into dst - * before calling them + * to dst */ - if (node->options & LOG_OPT_ESC) { + else if (escape_map) { char *ret; - ret = escape_string(dst, (dst + size - 1), '\\', rfc5424_escape_map, src, src + len); - if (ret == NULL || *ret != '\0') + ret = escape_string(dst, dst + size, '\\', escape_map, src, src + len); + if (ret == NULL) return NULL; len = ret - dst; } @@ -1692,90 +2268,276 @@ char *lf_text_len(char *dst, const char *src, size_t len, size_t size, const str if (++len > size) len = size; len = strlcpy2(dst, src, len); + if (len == 0) + return NULL; } - - size -= len; dst += len; + size -= len; } - else if ((node->options & (LOG_OPT_QUOTE|LOG_OPT_MANDATORY)) == LOG_OPT_MANDATORY) { - if (size < 2) - return NULL; - *(dst++) = '-'; - size -= 1; - } - if (node->options & LOG_OPT_QUOTE) { - if (size < 2) + if (size < 1) + return NULL; + *dst = '\0'; + + return dst; +} + +/* + * Quote a string, then leverage _lf_text_len() to write it + */ +static inline char *_lf_quotetext_len(char *dst, const char *src, + size_t len, size_t size, struct lf_buildctx *ctx) +{ + if (size < 2) + return NULL; + + *(dst++) = '"'; + size--; + + if (src && len) { + char *ret; + + ret = _lf_text_len(dst, src, len, size, ctx); + if (ret == NULL) return NULL; - *(dst++) = '"'; + size -= (ret - dst); + dst += (ret - dst); } + if (size < 2) + return NULL; + *(dst++) = '"'; + + *dst = '\0'; + return dst; +} + +/* + * Write a string in the log string + * Take care of quote, mandatory and escape and encoding options + * + * Return the address of the \0 character, or NULL on error + */ +static char *lf_text_len(char *dst, const char *src, size_t len, size_t size, struct lf_buildctx *ctx) +{ + if ((ctx->options & (LOG_OPT_QUOTE | LOG_OPT_ENCODE_JSON))) + return _lf_quotetext_len(dst, src, len, size, ctx); + else if ((ctx->options & LOG_OPT_ENCODE_CBOR) || + (src && len)) + return _lf_text_len(dst, src, len, size, ctx); + + if (size < 2) + return NULL; + + if ((ctx->options & LOG_OPT_MANDATORY)) + return _lf_text_len(dst, "-", 1, size, ctx); + *dst = '\0'; + return dst; } -static inline char *lf_text(char *dst, const char *src, size_t size, const struct logformat_node *node) +/* + * Same as lf_text_len() except that it ignores mandatory and quoting options. + * Quoting is only performed when strictly required by the encoding method. + */ +static char *lf_rawtext_len(char *dst, const char *src, size_t len, size_t size, struct lf_buildctx *ctx) +{ + if (!ctx->in_text && + (ctx->options & LOG_OPT_ENCODE_JSON)) + return _lf_quotetext_len(dst, src, len, size, ctx); + return _lf_text_len(dst, src, len, size, ctx); +} + +/* lf_text_len() helper when <src> is null-byte terminated */ +static inline char *lf_text(char *dst, const char *src, size_t size, struct lf_buildctx *ctx) { - return lf_text_len(dst, src, size, size, node); + return lf_text_len(dst, src, size, size, ctx); +} + +/* lf_rawtext_len() helper when <src> is null-byte terminated */ +static inline char *lf_rawtext(char *dst, const char *src, size_t size, struct lf_buildctx *ctx) +{ + return lf_rawtext_len(dst, src, size, size, ctx); } /* * Write a IP address to the log string * +X option write in hexadecimal notation, most significant byte on the left */ -char *lf_ip(char *dst, const struct sockaddr *sockaddr, size_t size, const struct logformat_node *node) +static char *lf_ip(char *dst, const struct sockaddr *sockaddr, size_t size, struct lf_buildctx *ctx) { char *ret = dst; int iret; char pn[INET6_ADDRSTRLEN]; - if (node->options & LOG_OPT_HEXA) { + if (ctx->options & LOG_OPT_HEXA) { unsigned char *addr = NULL; switch (sockaddr->sa_family) { case AF_INET: + { addr = (unsigned char *)&((struct sockaddr_in *)sockaddr)->sin_addr.s_addr; - iret = snprintf(dst, size, "%02X%02X%02X%02X", addr[0], addr[1], addr[2], addr[3]); + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), "%02X%02X%02X%02X", + addr[0], addr[1], addr[2], addr[3]); + if (iret < 0 || iret >= size) + return NULL; + ret = lf_rawtext(dst, ctx->_buf, size, ctx); + break; + } case AF_INET6: + { addr = (unsigned char *)&((struct sockaddr_in6 *)sockaddr)->sin6_addr.s6_addr; - iret = snprintf(dst, size, "%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X", - addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], addr[6], addr[7], - addr[8], addr[9], addr[10], addr[11], addr[12], addr[13], addr[14], addr[15]); + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), + "%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X%02X", + addr[0], addr[1], addr[2], addr[3], + addr[4], addr[5], addr[6], addr[7], + addr[8], addr[9], addr[10], addr[11], + addr[12], addr[13], addr[14], addr[15]); + if (iret < 0 || iret >= size) + return NULL; + ret = lf_rawtext(dst, ctx->_buf, size, ctx); + break; + } default: return NULL; } - if (iret < 0 || iret > size) - return NULL; - ret += iret; } else { addr_to_str((struct sockaddr_storage *)sockaddr, pn, sizeof(pn)); - ret = lf_text(dst, pn, size, node); - if (ret == NULL) - return NULL; + ret = lf_text(dst, pn, size, ctx); } return ret; } +/* Logformat expr wrapper to write a boolean according to node + * encoding settings + */ +static char *lf_bool_encode(char *dst, size_t size, uint8_t value, + struct lf_buildctx *ctx) +{ + /* encode as a regular bool value */ + + if (ctx->options & LOG_OPT_ENCODE_JSON) { + char *ret = dst; + int iret; + + if (value) + iret = snprintf(dst, size, "true"); + else + iret = snprintf(dst, size, "false"); + + if (iret < 0 || iret >= size) + return NULL; + ret += iret; + return ret; + } + if (ctx->options & LOG_OPT_ENCODE_CBOR) { + if (value) + return _lf_cbor_encode_byte(&ctx->encode.cbor, dst, dst + size, 0xF5); + return _lf_cbor_encode_byte(&ctx->encode.cbor, dst, dst + size, 0xF4); + } + + return NULL; /* not supported */ +} + +/* Logformat expr wrapper to write an integer according to node + * encoding settings and typecast settings. + */ +static char *lf_int_encode(char *dst, size_t size, int64_t value, + struct lf_buildctx *ctx) +{ + if (ctx->typecast == SMP_T_BOOL) { + /* either true or false */ + return lf_bool_encode(dst, size, !!value, ctx); + } + + if (ctx->options & LOG_OPT_ENCODE_JSON) { + char *ret = dst; + int iret = 0; + + if (ctx->typecast == SMP_T_STR) { + /* encode as a string number (base10 with "quotes"): + * may be useful to work around the limited resolution + * of JS number types for instance + */ + iret = snprintf(dst, size, "\"%lld\"", (long long int)value); + } + else { + /* encode as a regular int64 number (base10) */ + iret = snprintf(dst, size, "%lld", (long long int)value); + } + + if (iret < 0 || iret >= size) + return NULL; + ret += iret; + + return ret; + } + else if (ctx->options & LOG_OPT_ENCODE_CBOR) { + /* Always print as a regular int64 number (STR typecast isn't + * supported) + */ + return cbor_encode_int64(&ctx->encode.cbor, dst, dst + size, value); + } + + return NULL; /* not supported */ +} + +enum lf_int_hdl { + LF_INT_LTOA = 0, + LF_INT_LLTOA, + LF_INT_ULTOA, + LF_INT_UTOA_PAD_4, +}; + +/* + * Logformat expr wrapper to write an integer, uses <dft_hdl> to know + * how to encode the value by default (if no encoding is used) + */ +static inline char *lf_int(char *dst, size_t size, int64_t value, + struct lf_buildctx *ctx, + enum lf_int_hdl dft_hdl) +{ + if (ctx->options & LOG_OPT_ENCODE) + return lf_int_encode(dst, size, value, ctx); + + switch (dft_hdl) { + case LF_INT_LTOA: + return ltoa_o(value, dst, size); + case LF_INT_LLTOA: + return lltoa(value, dst, size); + case LF_INT_ULTOA: + return ultoa_o(value, dst, size); + case LF_INT_UTOA_PAD_4: + { + if (size < 4) + return NULL; + return utoa_pad(value, dst, 4); + } + } + return NULL; +} + /* * Write a port to the log * +X option write in hexadecimal notation, most significant byte on the left */ -char *lf_port(char *dst, const struct sockaddr *sockaddr, size_t size, const struct logformat_node *node) +static char *lf_port(char *dst, const struct sockaddr *sockaddr, size_t size, struct lf_buildctx *ctx) { char *ret = dst; int iret; - if (node->options & LOG_OPT_HEXA) { + if (ctx->options & LOG_OPT_HEXA) { const unsigned char *port = (const unsigned char *)&((struct sockaddr_in *)sockaddr)->sin_port; - iret = snprintf(dst, size, "%02X%02X", port[0], port[1]); - if (iret < 0 || iret > size) + + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), "%02X%02X", port[0], port[1]); + if (iret < 0 || iret >= size) return NULL; - ret += iret; + ret = lf_rawtext(dst, ctx->_buf, size, ctx); } else { - ret = ltoa_o(get_host_port((struct sockaddr_storage *)sockaddr), dst, size); - if (ret == NULL) - return NULL; + ret = lf_int(dst, size, get_host_port((struct sockaddr_storage *)sockaddr), + ctx, LF_INT_LTOA); } return ret; } @@ -2255,51 +3017,25 @@ static inline void __do_send_log_backend(struct proxy *be, struct log_header hdr int nblogger, size_t maxlen, char *message, size_t size) { - struct server *srv; - uint32_t targetid = ~0; /* default value to check if it was explicitly assigned */ - uint32_t nb_srv; - - HA_RWLOCK_RDLOCK(LBPRM_LOCK, &be->lbprm.lock); - - if (be->srv_act) { - nb_srv = be->srv_act; - } - else if (be->srv_bck) { - /* no more active servers but backup ones are, switch to backup farm */ - nb_srv = be->srv_bck; - if (!(be->options & PR_O_USE_ALL_BK)) { - /* log balancing disabled on backup farm */ - targetid = 0; /* use first server */ - goto skip_lb; - } - } - else { - /* no srv available, can't log */ - goto drop; - } + struct server *srv = NULL; /* log-balancing logic: */ if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_RR) { - /* Atomically load and update lastid since it's not protected - * by any write lock - * - * Wrapping is expected and could lead to unexpected ID reset in the - * middle of a cycle, but given that this only happens once in every - * 4 billions it is quite negligible - */ - targetid = HA_ATOMIC_FETCH_ADD(&be->lbprm.log.lastid, 1) % nb_srv; + srv = fwrr_get_next_server(be, NULL); } - else if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_LS) { + else if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_SS) { /* sticky mode: use first server in the pool, which will always stay * first during dequeuing and requeuing, unless it becomes unavailable * and will be replaced by another one */ - targetid = 0; + srv = ss_get_server(be); } else if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_RND) { - /* random mode */ - targetid = statistical_prng() % nb_srv; + unsigned int hash; + + hash = statistical_prng(); /* random */ + srv = chash_get_server_hash(be, hash, NULL); } else if ((be->lbprm.algo & BE_LB_ALGO) == BE_LB_ALGO_LH) { struct sample result; @@ -2314,28 +3050,24 @@ static inline void __do_send_log_backend(struct proxy *be, struct log_header hdr if (sample_process_cnv(be->lbprm.expr, &result)) { /* gen_hash takes binary input, ensure that we provide such value to it */ if (result.data.type == SMP_T_BIN || sample_casts[result.data.type][SMP_T_BIN]) { + unsigned int hash; + sample_casts[result.data.type][SMP_T_BIN](&result); - targetid = gen_hash(be, result.data.u.str.area, result.data.u.str.data) % nb_srv; + hash = gen_hash(be, result.data.u.str.area, result.data.u.str.data); + srv = map_get_server_hash(be, hash); } } } - skip_lb: - - if (targetid == ~0) { - /* no target assigned, nothing to do */ + if (!srv) { + /* no srv available, can't log */ goto drop; } - /* find server based on targetid */ - srv = be->lbprm.log.srv[targetid]; - HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &be->lbprm.lock); - __do_send_log(srv->log_target, hdr, nblogger, maxlen, message, size); return; drop: - HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &be->lbprm.lock); _HA_ATOMIC_INC(&dropped_logs); } @@ -2347,7 +3079,7 @@ static inline void __do_send_log_backend(struct proxy *be, struct log_header hdr * data to build the header. */ void process_send_log(struct list *loggers, int level, int facility, - struct ist *metadata, char *message, size_t size) + struct ist *metadata, char *message, size_t size) { struct logger *logger; int nblogger; @@ -2463,16 +3195,131 @@ const char sess_set_cookie[8] = "NPDIRU67"; /* No set-cookie, Set-cookie found a Set-cookie Updated, unknown, unknown */ /* + * try to write a cbor byte if there is enough space, or goto out + */ +#define LOG_CBOR_BYTE(x) do { \ + ret = _lf_cbor_encode_byte(&ctx->encode.cbor, \ + tmplog, \ + dst + maxsize, \ + (x)); \ + if (ret == NULL) \ + goto out; \ + tmplog = ret; \ + } while (0) + +/* * try to write a character if there is enough space, or goto out */ #define LOGCHAR(x) do { \ - if (tmplog < dst + maxsize - 1) { \ - *(tmplog++) = (x); \ - } else { \ - goto out; \ - } \ + if ((ctx->options & LOG_OPT_ENCODE_CBOR) && \ + ctx->in_text) { \ + char _x[1]; \ + /* encode the char as text chunk since we \ + * cannot just throw random bytes and expect \ + * cbor decoder to know how to handle them \ + */ \ + _x[0] = (x); \ + ret = cbor_encode_text(&ctx->encode.cbor, \ + tmplog, \ + dst + maxsize, \ + _x, sizeof(_x)); \ + if (ret == NULL) \ + goto out; \ + tmplog = ret; \ + break; \ + } \ + if (tmplog < dst + maxsize - 1) { \ + *(tmplog++) = (x); \ + } else { \ + goto out; \ + } \ } while(0) +/* indicate that a new variable-length text is starting, sets in_text + * variable to indicate that a var text was started and deals with + * encoding and options to know if some special treatment is needed. + */ +#define LOG_VARTEXT_START() do { \ + ctx->in_text = 1; \ + if (ctx->options & LOG_OPT_ENCODE_CBOR) { \ + /* start indefinite-length cbor text */ \ + LOG_CBOR_BYTE(0x7F); \ + break; \ + } \ + /* put the text within quotes if JSON encoding \ + * is used or quoting is enabled \ + */ \ + if (ctx->options & \ + (LOG_OPT_QUOTE | LOG_OPT_ENCODE_JSON)) { \ + LOGCHAR('"'); \ + } \ + } while (0) + +/* properly finish a variable text that was started using LOG_VARTEXT_START + * checks the in_text variable to know if a text was started or not, and + * deals with encoding and options to know if some special treatment is + * needed. + */ +#define LOG_VARTEXT_END() do { \ + if (!ctx->in_text) \ + break; \ + ctx->in_text = 0; \ + if (ctx->options & LOG_OPT_ENCODE_CBOR) { \ + /* end indefinite-length cbor text with break*/\ + LOG_CBOR_BYTE(0xFF); \ + break; \ + } \ + /* add the ending quote if JSON encoding is \ + * used or quoting is enabled \ + */ \ + if (ctx->options & \ + (LOG_OPT_QUOTE | LOG_OPT_ENCODE_JSON)) { \ + LOGCHAR('"'); \ + } \ + } while (0) + +/* Prints additional logvalue hint represented by <chr>. + * It is useful to express that <chr> is not part of the "raw" value and + * should be considered as optional metadata instead. + */ +#define LOGMETACHAR(chr) do { \ + /* ignored when encoding is used */ \ + if (ctx->options & LOG_OPT_ENCODE) \ + break; \ + LOGCHAR(chr); \ + } while (0) + +/* indicate the start of a string array */ +#define LOG_STRARRAY_START() do { \ + if (ctx->options & LOG_OPT_ENCODE_JSON) \ + LOGCHAR('['); \ + if (ctx->options & LOG_OPT_ENCODE_CBOR) { \ + /* start indefinite-length array */ \ + LOG_CBOR_BYTE(0x9F); \ + } \ + } while (0) + +/* indicate that a new element is added to the string array */ +#define LOG_STRARRAY_NEXT() do { \ + if (ctx->options & LOG_OPT_ENCODE_CBOR) \ + break; \ + if (ctx->options & LOG_OPT_ENCODE_JSON) { \ + LOGCHAR(','); \ + LOGCHAR(' '); \ + } \ + else \ + LOGCHAR(' '); \ + } while (0) + +/* indicate the end of a string array */ +#define LOG_STRARRAY_END() do { \ + if (ctx->options & LOG_OPT_ENCODE_JSON) \ + LOGCHAR(']'); \ + if (ctx->options & LOG_OPT_ENCODE_CBOR) { \ + /* cbor break */ \ + LOG_CBOR_BYTE(0xFF); \ + } \ + } while (0) /* Initializes some log data at boot */ static void init_log() @@ -2480,6 +3327,9 @@ static void init_log() char *tmp; int i; + /* Initialize the no escape map, which may be used to bypass escaping */ + memset(no_escape_map, 0, sizeof(no_escape_map)); + /* Initialize the escape map for the RFC5424 structured-data : '"\]' * inside PARAM-VALUE should be escaped with '\' as prefix. * See https://tools.ietf.org/html/rfc5424#section-6.3.3 for more @@ -2493,6 +3343,15 @@ static void init_log() tmp++; } + /* Initialize the escape map for JSON strings : '"\' */ + memset(json_escape_map, 0, sizeof(json_escape_map)); + + tmp = "\"\\"; + while (*tmp) { + ha_bit_set(*tmp, json_escape_map); + tmp++; + } + /* initialize the log header encoding map : '{|}"#' should be encoded with * '#' as prefix, as well as non-printable characters ( <32 or >= 127 ). * URL encoding only requires '"', '#' to be encoded as well as non- @@ -2583,19 +3442,133 @@ void deinit_log_forward() } } -/* Builds a log line in <dst> based on <list_format>, and stops before reaching +/* Releases memory for a single log-format node */ +void free_logformat_node(struct logformat_node *node) +{ + if (!node) + return; + + release_sample_expr(node->expr); + node->expr = NULL; + ha_free(&node->name); + ha_free(&node->arg); + ha_free(&node); +} + +/* Releases memory allocated for a log-format string */ +void free_logformat_list(struct list *fmt) +{ + struct logformat_node *lf, *lfb; + + if ((fmt == NULL) || LIST_ISEMPTY(fmt)) + return; + + list_for_each_entry_safe(lf, lfb, fmt, list) { + LIST_DELETE(&lf->list); + free_logformat_node(lf); + } +} + +/* Prepares log-format expression struct */ +void lf_expr_init(struct lf_expr *expr) +{ + LIST_INIT(&expr->list); + expr->flags = LF_FL_NONE; + expr->str = NULL; + expr->conf.file = NULL; + expr->conf.line = 0; +} + +/* Releases and resets a log-format expression */ +void lf_expr_deinit(struct lf_expr *expr) +{ + if ((expr->flags & LF_FL_COMPILED)) + free_logformat_list(&expr->nodes.list); + else + logformat_str_free(&expr->str); + free(expr->conf.file); + /* remove from parent list (if any) */ + LIST_DEL_INIT(&expr->list); + + lf_expr_init(expr); +} + +/* Transfer a compiled log-format expression from <src> to <dst> + * at the end of the operation, <src> is reset + */ +void lf_expr_xfer(struct lf_expr *src, struct lf_expr *dst) +{ + struct logformat_node *lf, *lfb; + + /* first, reset any existing expr */ + lf_expr_deinit(dst); + + BUG_ON(!(src->flags & LF_FL_COMPILED)); + + /* then proceed with transfer between <src> and <dst> */ + dst->conf.file = src->conf.file; + dst->conf.line = src->conf.line; + + dst->flags |= LF_FL_COMPILED; + LIST_INIT(&dst->nodes.list); + + list_for_each_entry_safe(lf, lfb, &src->nodes.list, list) { + LIST_DELETE(&lf->list); + LIST_APPEND(&dst->nodes.list, &lf->list); + } + + /* replace <src> with <dst> in <src>'s list by first adding + * <dst> after <src>, then removing <src>... + */ + LIST_INSERT(&src->list, &dst->list); + LIST_DEL_INIT(&src->list); + + /* src is now empty, perform an explicit reset */ + lf_expr_init(src); +} + +/* tries to duplicate an uncompiled logformat expression from <orig> to <dest> + * + * Returns 1 on success and 0 on failure. + */ +int lf_expr_dup(const struct lf_expr *orig, struct lf_expr *dest) +{ + BUG_ON((orig->flags & LF_FL_COMPILED)); + lf_expr_deinit(dest); + if (orig->str) { + dest->str = logformat_str_dup(orig->str); + if (!dest->str) + goto error; + } + if (orig->conf.file) { + dest->conf.file = strdup(orig->conf.file); + if (!dest->conf.file) + goto error; + } + dest->conf.line = orig->conf.line; + + return 1; + + error: + lf_expr_deinit(dest); + return 0; +} + +/* Builds a log line in <dst> based on <lf_expr>, and stops before reaching * <maxsize> characters. Returns the size of the output string in characters, * not counting the trailing zero which is always added if the resulting size * is not zero. It requires a valid session and optionally a stream. If the * stream is NULL, default values will be assumed for the stream part. */ -int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t maxsize, struct list *list_format) +int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t maxsize, struct lf_expr *lf_expr) { + struct lf_buildctx *ctx = &lf_buildctx; struct proxy *fe = sess->fe; struct proxy *be; struct http_txn *txn; const struct strm_logs *logs; struct connection *fe_conn, *be_conn; + struct list *list_format = &lf_expr->nodes.list; unsigned int s_flags; unsigned int uniq_id; struct buffer chunk; @@ -2617,6 +3590,8 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t struct strm_logs tmp_strm_log; struct ist path; struct http_uri_parser parser; + int g_options = lf_expr->nodes.options; /* global */ + int first_node = 1; /* FIXME: let's limit ourselves to frontend logging for now. */ @@ -2698,73 +3673,203 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t tmplog = dst; + /* reset static ctx struct */ + ctx->in_text = 0; + + /* start with global ctx by default */ + lf_buildctx_prepare(ctx, g_options, NULL); + /* fill logbuffer */ - if (LIST_ISEMPTY(list_format)) + if (!(ctx->options & LOG_OPT_ENCODE) && lf_expr_isempty(lf_expr)) return 0; + if (ctx->options & LOG_OPT_ENCODE_JSON) + LOGCHAR('{'); + else if (ctx->options & LOG_OPT_ENCODE_CBOR) { + /* start indefinite-length map */ + LOG_CBOR_BYTE(0xBF); + } + list_for_each_entry(tmp, list_format, list) { #ifdef USE_OPENSSL struct connection *conn; #endif const struct sockaddr_storage *addr; const char *src = NULL; + const char *value_beg = NULL; struct sample *key; - const struct buffer empty = { }; - switch (tmp->type) { - case LOG_FMT_SEPARATOR: - if (!last_isspace) { + /* first start with basic types (use continue statement to skip + * the current node) + */ + if (tmp->type == LOG_FMT_SEPARATOR) { + if (g_options & LOG_OPT_ENCODE) { + /* ignored when global encoding is set */ + continue; + } + if (!last_isspace) { + LOGCHAR(' '); + last_isspace = 1; + } + continue; + } + else if (tmp->type == LOG_FMT_TEXT) { + /* text */ + if (g_options & LOG_OPT_ENCODE) { + /* ignored when global encoding is set */ + continue; + } + src = tmp->arg; + iret = strlcpy2(tmplog, src, dst + maxsize - tmplog); + if (iret == 0) + goto out; + tmplog += iret; + last_isspace = 0; /* data was written */ + continue; + } + + /* dynamic types handling (use "goto next_fmt" statement to skip + * the current node) + */ + + if (g_options & LOG_OPT_ENCODE) { + /* only consider global ctx for key encoding */ + lf_buildctx_prepare(ctx, g_options, NULL); + + if (!tmp->name) + goto next_fmt; /* cannot represent anonymous field, ignore */ + + if (!first_node) { + if (ctx->options & LOG_OPT_ENCODE_JSON) { + LOGCHAR(','); LOGCHAR(' '); - last_isspace = 1; } - break; + } - case LOG_FMT_TEXT: // text - src = tmp->arg; - iret = strlcpy2(tmplog, src, dst + maxsize - tmplog); + if (ctx->options & LOG_OPT_ENCODE_JSON) { + LOGCHAR('"'); + iret = strlcpy2(tmplog, tmp->name, dst + maxsize - tmplog); if (iret == 0) goto out; tmplog += iret; - last_isspace = 0; - break; + LOGCHAR('"'); + LOGCHAR(':'); + LOGCHAR(' '); + } + else if (ctx->options & LOG_OPT_ENCODE_CBOR) { + ret = cbor_encode_text(&ctx->encode.cbor, tmplog, + dst + maxsize, tmp->name, + strlen(tmp->name)); + if (ret == NULL) + goto out; + tmplog = ret; + } - case LOG_FMT_EXPR: // sample expression, may be request or response - key = NULL; - if (tmp->options & LOG_OPT_REQ_CAP) - key = sample_fetch_as_type(be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, tmp->expr, SMP_T_STR); + first_node = 0; + } + value_beg = tmplog; + + /* get the chance to consider per-node options (if not already + * set globally) for printing the value + */ + lf_buildctx_prepare(ctx, g_options, tmp); - if (!key && (tmp->options & LOG_OPT_RES_CAP)) - key = sample_fetch_as_type(be, sess, s, SMP_OPT_DIR_RES|SMP_OPT_FINAL, tmp->expr, SMP_T_STR); + if (tmp->type == LOG_FMT_EXPR) { + /* sample expression, may be request or response */ + int type; - if (!key && !(tmp->options & (LOG_OPT_REQ_CAP|LOG_OPT_RES_CAP))) // cfg, cli - key = sample_fetch_as_type(be, sess, s, SMP_OPT_FINAL, tmp->expr, SMP_T_STR); + key = NULL; + if (ctx->options & LOG_OPT_REQ_CAP) + key = sample_process(be, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, tmp->expr, NULL); - if (tmp->options & LOG_OPT_HTTP) + if (!key && (ctx->options & LOG_OPT_RES_CAP)) + key = sample_process(be, sess, s, SMP_OPT_DIR_RES|SMP_OPT_FINAL, tmp->expr, NULL); + + if (!key && !(ctx->options & (LOG_OPT_REQ_CAP|LOG_OPT_RES_CAP))) // cfg, cli + key = sample_process(be, sess, s, SMP_OPT_FINAL, tmp->expr, NULL); + + type = SMP_T_STR; // default + + if (key && key->data.type == SMP_T_BIN && + (ctx->options & LOG_OPT_BIN)) { + /* output type is binary, and binary option is set: + * preserve output type unless typecast is set to + * force output type to string + */ + if (ctx->typecast != SMP_T_STR) + type = SMP_T_BIN; + } + + /* if encoding is set, try to preserve output type + * with respect to typecast settings + * (ie: str, sint, bool) + * + * Special case for cbor encoding: we also try to + * preserve bin output type since cbor encoders + * know how to deal with binary data. + */ + if (ctx->options & LOG_OPT_ENCODE) { + if (ctx->typecast == SMP_T_STR || + ctx->typecast == SMP_T_SINT || + ctx->typecast == SMP_T_BOOL) { + /* enforce type */ + type = ctx->typecast; + } + else if (key && + (key->data.type == SMP_T_SINT || + key->data.type == SMP_T_BOOL || + ((ctx->options & LOG_OPT_ENCODE_CBOR) && + key->data.type == SMP_T_BIN))) { + /* preserve type */ + type = key->data.type; + } + } + + if (key && !sample_convert(key, type)) + key = NULL; + if (ctx->options & LOG_OPT_HTTP) + ret = lf_encode_chunk(tmplog, dst + maxsize, + '%', http_encode_map, key ? &key->data.u.str : &empty, ctx); + else { + if (key && type == SMP_T_BIN) ret = lf_encode_chunk(tmplog, dst + maxsize, - '%', http_encode_map, key ? &key->data.u.str : &empty, tmp); + 0, no_escape_map, + &key->data.u.str, + ctx); + else if (key && type == SMP_T_SINT) + ret = lf_int_encode(tmplog, dst + maxsize - tmplog, + key->data.u.sint, ctx); + else if (key && type == SMP_T_BOOL) + ret = lf_bool_encode(tmplog, dst + maxsize - tmplog, + key->data.u.sint, ctx); else ret = lf_text_len(tmplog, - key ? key->data.u.str.area : NULL, - key ? key->data.u.str.data : 0, - dst + maxsize - tmplog, - tmp); - if (ret == 0) - goto out; - tmplog = ret; - last_isspace = 0; - break; + key ? key->data.u.str.area : NULL, + key ? key->data.u.str.data : 0, + dst + maxsize - tmplog, + ctx); + } + if (ret == NULL) + goto out; + tmplog = ret; + last_isspace = 0; /* consider that data was written */ + goto next_fmt; + } + + BUG_ON(tmp->type != LOG_FMT_ALIAS); + /* logformat alias */ + switch (tmp->alias->type) { case LOG_FMT_CLIENTIP: // %ci addr = (s ? sc_src(s->scf) : sess_src(sess)); if (addr) - ret = lf_ip(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp); + ret = lf_ip(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, ctx); else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_CLIENTPORT: // %cp @@ -2772,30 +3877,29 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t if (addr) { /* sess->listener is always defined when the session's owner is an inbound connections */ if (addr->ss_family == AF_UNIX) - ret = ltoa_o(sess->listener->luid, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, + sess->listener->luid, ctx, LF_INT_LTOA); else - ret = lf_port(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp); + ret = lf_port(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, ctx); } else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_FRONTENDIP: // %fi addr = (s ? sc_dst(s->scf) : sess_dst(sess)); if (addr) - ret = lf_ip(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp); + ret = lf_ip(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, ctx); else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_FRONTENDPORT: // %fp @@ -2803,184 +3907,233 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t if (addr) { /* sess->listener is always defined when the session's owner is an inbound connections */ if (addr->ss_family == AF_UNIX) - ret = ltoa_o(sess->listener->luid, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, + sess->listener->luid, ctx, LF_INT_LTOA); else - ret = lf_port(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, tmp); + ret = lf_port(tmplog, (struct sockaddr *)addr, dst + maxsize - tmplog, ctx); } else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_BACKENDIP: // %bi if (be_conn && conn_get_src(be_conn)) - ret = lf_ip(tmplog, (const struct sockaddr *)be_conn->src, dst + maxsize - tmplog, tmp); + ret = lf_ip(tmplog, (const struct sockaddr *)be_conn->src, dst + maxsize - tmplog, ctx); else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_BACKENDPORT: // %bp if (be_conn && conn_get_src(be_conn)) - ret = lf_port(tmplog, (struct sockaddr *)be_conn->src, dst + maxsize - tmplog, tmp); + ret = lf_port(tmplog, (struct sockaddr *)be_conn->src, dst + maxsize - tmplog, ctx); else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_SERVERIP: // %si if (be_conn && conn_get_dst(be_conn)) - ret = lf_ip(tmplog, (struct sockaddr *)be_conn->dst, dst + maxsize - tmplog, tmp); + ret = lf_ip(tmplog, (struct sockaddr *)be_conn->dst, dst + maxsize - tmplog, ctx); else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_SERVERPORT: // %sp if (be_conn && conn_get_dst(be_conn)) - ret = lf_port(tmplog, (struct sockaddr *)be_conn->dst, dst + maxsize - tmplog, tmp); + ret = lf_port(tmplog, (struct sockaddr *)be_conn->dst, dst + maxsize - tmplog, ctx); else - ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, tmp); + ret = lf_text_len(tmplog, NULL, 0, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_DATE: // %t = accept date + { + // "26/Apr/2024:09:39:58.774" + get_localtime(logs->accept_date.tv_sec, &tm); - ret = date2str_log(tmplog, &tm, &logs->accept_date, dst + maxsize - tmplog); + if (ctx->options & LOG_OPT_ENCODE) { + if (!date2str_log(ctx->_buf, &tm, &logs->accept_date, sizeof(ctx->_buf))) + goto out; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); + } + else // speedup + ret = date2str_log(tmplog, &tm, &logs->accept_date, dst + maxsize - tmplog); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_tr: // %tr = start of request date + { + // "26/Apr/2024:09:39:58.774" + /* Note that the timers are valid if we get here */ tv_ms_add(&tv, &logs->accept_date, logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0); get_localtime(tv.tv_sec, &tm); - ret = date2str_log(tmplog, &tm, &tv, dst + maxsize - tmplog); + if (ctx->options & LOG_OPT_ENCODE) { + if (!date2str_log(ctx->_buf, &tm, &tv, sizeof(ctx->_buf))) + goto out; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); + } + else // speedup + ret = date2str_log(tmplog, &tm, &tv, dst + maxsize - tmplog); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_DATEGMT: // %T = accept date, GMT + { + // "26/Apr/2024:07:41:11 +0000" + get_gmtime(logs->accept_date.tv_sec, &tm); - ret = gmt2str_log(tmplog, &tm, dst + maxsize - tmplog); + if (ctx->options & LOG_OPT_ENCODE) { + if (!gmt2str_log(ctx->_buf, &tm, sizeof(ctx->_buf))) + goto out; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); + } + else // speedup + ret = gmt2str_log(tmplog, &tm, dst + maxsize - tmplog); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_trg: // %trg = start of request date, GMT + { + // "26/Apr/2024:07:41:11 +0000" + tv_ms_add(&tv, &logs->accept_date, logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0); get_gmtime(tv.tv_sec, &tm); - ret = gmt2str_log(tmplog, &tm, dst + maxsize - tmplog); + if (ctx->options & LOG_OPT_ENCODE) { + if (!gmt2str_log(ctx->_buf, &tm, sizeof(ctx->_buf))) + goto out; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); + } + else // speedup + ret = gmt2str_log(tmplog, &tm, dst + maxsize - tmplog); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_DATELOCAL: // %Tl = accept date, local + { + // "26/Apr/2024:09:42:32 +0200" + get_localtime(logs->accept_date.tv_sec, &tm); - ret = localdate2str_log(tmplog, logs->accept_date.tv_sec, &tm, dst + maxsize - tmplog); + if (ctx->options & LOG_OPT_ENCODE) { + if (!localdate2str_log(ctx->_buf, logs->accept_date.tv_sec, + &tm, sizeof(ctx->_buf))) + goto out; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); + } + else // speedup + ret = localdate2str_log(tmplog, logs->accept_date.tv_sec, + &tm, dst + maxsize - tmplog); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_trl: // %trl = start of request date, local + { + // "26/Apr/2024:09:42:32 +0200" + tv_ms_add(&tv, &logs->accept_date, logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0); get_localtime(tv.tv_sec, &tm); - ret = localdate2str_log(tmplog, tv.tv_sec, &tm, dst + maxsize - tmplog); + if (ctx->options & LOG_OPT_ENCODE) { + if (!localdate2str_log(ctx->_buf, tv.tv_sec, &tm, sizeof(ctx->_buf))) + goto out; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); + } + else // speedup + ret = localdate2str_log(tmplog, tv.tv_sec, &tm, dst + maxsize - tmplog); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_TS: // %Ts - if (tmp->options & LOG_OPT_HEXA) { - iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", (unsigned int)logs->accept_date.tv_sec); - if (iret < 0 || iret > dst + maxsize - tmplog) + { + unsigned long value = logs->accept_date.tv_sec; + + if (ctx->options & LOG_OPT_HEXA) { + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), "%04X", (unsigned int)value); + if (iret < 0 || iret >= dst + maxsize - tmplog) goto out; - last_isspace = 0; - tmplog += iret; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); } else { - ret = ltoa_o(logs->accept_date.tv_sec, tmplog, dst + maxsize - tmplog); - if (ret == NULL) - goto out; - tmplog = ret; - last_isspace = 0; + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); } - break; + if (ret == NULL) + goto out; + tmplog = ret; + break; + } case LOG_FMT_MS: // %ms - if (tmp->options & LOG_OPT_HEXA) { - iret = snprintf(tmplog, dst + maxsize - tmplog, "%02X",(unsigned int)logs->accept_date.tv_usec/1000); - if (iret < 0 || iret > dst + maxsize - tmplog) + { + unsigned int value = (unsigned int)logs->accept_date.tv_usec/1000; + + if (ctx->options & LOG_OPT_HEXA) { + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), "%02X", value); + if (iret < 0 || iret >= dst + maxsize - tmplog) goto out; - last_isspace = 0; - tmplog += iret; - } else { - if ((dst + maxsize - tmplog) < 4) - goto out; - ret = utoa_pad((unsigned int)logs->accept_date.tv_usec/1000, - tmplog, 4); + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); + } else { + ret = lf_int(tmplog, dst + maxsize - tmplog, value, + ctx, LF_INT_UTOA_PAD_4); + } if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; + break; } - break; case LOG_FMT_FRONTEND: // %f src = fe->id; - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_FRONTEND_XPRT: // %ft src = fe->id; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - iret = strlcpy2(tmplog, src, dst + maxsize - tmplog); - if (iret == 0) + LOG_VARTEXT_START(); + ret = lf_rawtext(tmplog, src, dst + maxsize - tmplog, ctx); + if (ret == NULL) goto out; - tmplog += iret; + tmplog = ret; /* sess->listener may be undefined if the session's owner is a health-check */ if (sess->listener && sess->listener->bind_conf->xprt->get_ssl_sock_ctx) LOGCHAR('~'); - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; #ifdef USE_OPENSSL case LOG_FMT_SSL_CIPHER: // %sslc @@ -2989,11 +4142,10 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t if (conn) { src = ssl_sock_get_cipher_name(conn); } - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_SSL_VERSION: // %sslv @@ -3002,20 +4154,18 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t if (conn) { src = ssl_sock_get_proto_version(conn); } - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; #endif case LOG_FMT_BACKEND: // %b src = be->id; - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_SERVER: // %s @@ -3035,293 +4185,310 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t src = "<NOSRV>"; break; } - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_Th: // %Th = handshake time - ret = ltoa_o(logs->t_handshake, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, logs->t_handshake, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_Ti: // %Ti = HTTP idle time - ret = ltoa_o(logs->t_idle, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, logs->t_idle, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_TR: // %TR = HTTP request time - ret = ltoa_o((t_request >= 0) ? t_request - logs->t_idle - logs->t_handshake : -1, - tmplog, dst + maxsize - tmplog); + { + long value = (t_request >= 0) ? t_request - logs->t_idle - logs->t_handshake : -1; + + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_TQ: // %Tq = Th + Ti + TR - ret = ltoa_o(t_request, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, t_request, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_TW: // %Tw - ret = ltoa_o((logs->t_queue >= 0) ? logs->t_queue - t_request : -1, - tmplog, dst + maxsize - tmplog); + { + long value = (logs->t_queue >= 0) ? logs->t_queue - t_request : -1; + + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_TC: // %Tc - ret = ltoa_o((logs->t_connect >= 0) ? logs->t_connect - logs->t_queue : -1, - tmplog, dst + maxsize - tmplog); + { + long value = (logs->t_connect >= 0) ? logs->t_connect - logs->t_queue : -1; + + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_Tr: // %Tr - ret = ltoa_o((logs->t_data >= 0) ? logs->t_data - logs->t_connect : -1, - tmplog, dst + maxsize - tmplog); + { + long value = (logs->t_data >= 0) ? logs->t_data - logs->t_connect : -1; + + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_TD: // %Td + { + long value; + if (be->mode == PR_MODE_HTTP) - ret = ltoa_o((logs->t_data >= 0) ? logs->t_close - logs->t_data : -1, - tmplog, dst + maxsize - tmplog); + value = (logs->t_data >= 0) ? logs->t_close - logs->t_data : -1; else - ret = ltoa_o((logs->t_connect >= 0) ? logs->t_close - logs->t_connect : -1, - tmplog, dst + maxsize - tmplog); + value = (logs->t_connect >= 0) ? logs->t_close - logs->t_connect : -1; + + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); + if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_Ta: // %Ta = active time = Tt - Th - Ti + { + long value = logs->t_close - (logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0); + if (!(fe->to_log & LW_BYTES)) - LOGCHAR('+'); - ret = ltoa_o(logs->t_close - (logs->t_idle >= 0 ? logs->t_idle + logs->t_handshake : 0), - tmplog, dst + maxsize - tmplog); + LOGMETACHAR('+'); + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_TT: // %Tt = total time if (!(fe->to_log & LW_BYTES)) - LOGCHAR('+'); - ret = ltoa_o(logs->t_close, tmplog, dst + maxsize - tmplog); + LOGMETACHAR('+'); + ret = lf_int(tmplog, dst + maxsize - tmplog, logs->t_close, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_TU: // %Tu = total time seen by user = Tt - Ti + { + long value = logs->t_close - (logs->t_idle >= 0 ? logs->t_idle : 0); + if (!(fe->to_log & LW_BYTES)) - LOGCHAR('+'); - ret = ltoa_o(logs->t_close - (logs->t_idle >= 0 ? logs->t_idle : 0), - tmplog, dst + maxsize - tmplog); + LOGMETACHAR('+'); + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_STATUS: // %ST - ret = ltoa_o(status, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, status, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_BYTES: // %B if (!(fe->to_log & LW_BYTES)) - LOGCHAR('+'); - ret = lltoa(logs->bytes_out, tmplog, dst + maxsize - tmplog); + LOGMETACHAR('+'); + ret = lf_int(tmplog, dst + maxsize - tmplog, logs->bytes_out, ctx, LF_INT_LLTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_BYTES_UP: // %U - ret = lltoa(logs->bytes_in, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, logs->bytes_in, ctx, LF_INT_LLTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_CCLIENT: // %CC src = txn ? txn->cli_cookie : NULL; - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_CSERVER: // %CS src = txn ? txn->srv_cookie : NULL; - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_TERMSTATE: // %ts - LOGCHAR(sess_term_cond[(s_flags & SF_ERR_MASK) >> SF_ERR_SHIFT]); - LOGCHAR(sess_fin_state[(s_flags & SF_FINST_MASK) >> SF_FINST_SHIFT]); - *tmplog = '\0'; - last_isspace = 0; + { + ctx->_buf[0] = sess_term_cond[(s_flags & SF_ERR_MASK) >> SF_ERR_SHIFT]; + ctx->_buf[1] = sess_fin_state[(s_flags & SF_FINST_MASK) >> SF_FINST_SHIFT]; + ret = lf_rawtext_len(tmplog, ctx->_buf, 2, maxsize - (tmplog - dst), ctx); + if (ret == NULL) + goto out; + tmplog = ret; break; + } case LOG_FMT_TERMSTATE_CK: // %tsc, same as TS with cookie state (for mode HTTP) - LOGCHAR(sess_term_cond[(s_flags & SF_ERR_MASK) >> SF_ERR_SHIFT]); - LOGCHAR(sess_fin_state[(s_flags & SF_FINST_MASK) >> SF_FINST_SHIFT]); - LOGCHAR((txn && (be->ck_opts & PR_CK_ANY)) ? sess_cookie[(txn->flags & TX_CK_MASK) >> TX_CK_SHIFT] : '-'); - LOGCHAR((txn && (be->ck_opts & PR_CK_ANY)) ? sess_set_cookie[(txn->flags & TX_SCK_MASK) >> TX_SCK_SHIFT] : '-'); - last_isspace = 0; + { + ctx->_buf[0] = sess_term_cond[(s_flags & SF_ERR_MASK) >> SF_ERR_SHIFT]; + ctx->_buf[1] = sess_fin_state[(s_flags & SF_FINST_MASK) >> SF_FINST_SHIFT]; + ctx->_buf[2] = (txn && (be->ck_opts & PR_CK_ANY)) ? sess_cookie[(txn->flags & TX_CK_MASK) >> TX_CK_SHIFT] : '-'; + ctx->_buf[3] = (txn && (be->ck_opts & PR_CK_ANY)) ? sess_set_cookie[(txn->flags & TX_SCK_MASK) >> TX_SCK_SHIFT] : '-'; + ret = lf_rawtext_len(tmplog, ctx->_buf, 4, maxsize - (tmplog - dst), ctx); + if (ret == NULL) + goto out; + tmplog = ret; break; + } case LOG_FMT_ACTCONN: // %ac - ret = ltoa_o(actconn, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, actconn, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_FECONN: // %fc - ret = ltoa_o(fe->feconn, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, fe->feconn, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_BECONN: // %bc - ret = ltoa_o(be->beconn, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, be->beconn, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_SRVCONN: // %sc + { + unsigned long value; + switch (obj_type(s ? s->target : sess->origin)) { case OBJ_TYPE_SERVER: - ret = ultoa_o(__objt_server(s->target)->cur_sess, - tmplog, dst + maxsize - tmplog); + value = __objt_server(s->target)->cur_sess; break; case OBJ_TYPE_CHECK: - ret = ultoa_o(__objt_check(sess->origin)->server - ? __objt_check(sess->origin)->server->cur_sess - : 0, tmplog, dst + maxsize - tmplog); + value = (__objt_check(sess->origin)->server + ? __objt_check(sess->origin)->server->cur_sess + : 0); break; default: - ret = ultoa_o(0, tmplog, dst + maxsize - tmplog); + value = 0; break; } + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_ULTOA); + if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_RETRIES: // %rc + { + long int value = (s ? s->conn_retries : 0); + if (s_flags & SF_REDISP) - LOGCHAR('+'); - ret = ltoa_o((s ? s->conn_retries : 0), tmplog, dst + maxsize - tmplog); + LOGMETACHAR('+'); + ret = lf_int(tmplog, dst + maxsize - tmplog, value, ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; + } case LOG_FMT_SRVQUEUE: // %sq - ret = ltoa_o(logs->srv_queue_pos, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, logs->srv_queue_pos, + ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_BCKQUEUE: // %bq - ret = ltoa_o(logs->prx_queue_pos, tmplog, dst + maxsize - tmplog); + ret = lf_int(tmplog, dst + maxsize - tmplog, logs->prx_queue_pos, + ctx, LF_INT_LTOA); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_HDRREQUEST: // %hr /* request header */ if (fe->nb_req_cap && s && s->req_cap) { - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); LOGCHAR('{'); for (hdr = 0; hdr < fe->nb_req_cap; hdr++) { if (hdr) LOGCHAR('|'); if (s->req_cap[hdr] != NULL) { ret = lf_encode_string(tmplog, dst + maxsize, - '#', hdr_encode_map, s->req_cap[hdr], tmp); - if (ret == NULL || *ret != '\0') + '#', hdr_encode_map, s->req_cap[hdr], ctx); + if (ret == NULL) goto out; tmplog = ret; } } LOGCHAR('}'); - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; } break; case LOG_FMT_HDRREQUESTLIST: // %hrl /* request header list */ if (fe->nb_req_cap && s && s->req_cap) { + LOG_STRARRAY_START(); for (hdr = 0; hdr < fe->nb_req_cap; hdr++) { if (hdr > 0) - LOGCHAR(' '); - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_STRARRAY_NEXT(); + LOG_VARTEXT_START(); if (s->req_cap[hdr] != NULL) { ret = lf_encode_string(tmplog, dst + maxsize, - '#', hdr_encode_map, s->req_cap[hdr], tmp); - if (ret == NULL || *ret != '\0') + '#', hdr_encode_map, s->req_cap[hdr], ctx); + if (ret == NULL) goto out; tmplog = ret; - } else if (!(tmp->options & LOG_OPT_QUOTE)) + } else if (!(ctx->options & LOG_OPT_QUOTE)) LOGCHAR('-'); - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; + /* Manually end variable text as we're emitting multiple + * texts at once + */ + LOG_VARTEXT_END(); } + LOG_STRARRAY_END(); } break; @@ -3329,70 +4496,63 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t case LOG_FMT_HDRRESPONS: // %hs /* response header */ if (fe->nb_rsp_cap && s && s->res_cap) { - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); LOGCHAR('{'); for (hdr = 0; hdr < fe->nb_rsp_cap; hdr++) { if (hdr) LOGCHAR('|'); if (s->res_cap[hdr] != NULL) { ret = lf_encode_string(tmplog, dst + maxsize, - '#', hdr_encode_map, s->res_cap[hdr], tmp); - if (ret == NULL || *ret != '\0') + '#', hdr_encode_map, s->res_cap[hdr], ctx); + if (ret == NULL) goto out; tmplog = ret; } } LOGCHAR('}'); - last_isspace = 0; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); } break; case LOG_FMT_HDRRESPONSLIST: // %hsl /* response header list */ if (fe->nb_rsp_cap && s && s->res_cap) { + LOG_STRARRAY_START(); for (hdr = 0; hdr < fe->nb_rsp_cap; hdr++) { if (hdr > 0) - LOGCHAR(' '); - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_STRARRAY_NEXT(); + LOG_VARTEXT_START(); if (s->res_cap[hdr] != NULL) { ret = lf_encode_string(tmplog, dst + maxsize, - '#', hdr_encode_map, s->res_cap[hdr], tmp); - if (ret == NULL || *ret != '\0') + '#', hdr_encode_map, s->res_cap[hdr], ctx); + if (ret == NULL) goto out; tmplog = ret; - } else if (!(tmp->options & LOG_OPT_QUOTE)) + } else if (!(ctx->options & LOG_OPT_QUOTE)) LOGCHAR('-'); - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; + /* Manually end variable text as we're emitting multiple + * texts at once + */ + LOG_VARTEXT_END(); } + LOG_STRARRAY_END(); } break; case LOG_FMT_REQ: // %r /* Request */ - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); uri = txn && txn->uri ? txn->uri : "<BADREQ>"; ret = lf_encode_string(tmplog, dst + maxsize, - '#', url_encode_map, uri, tmp); - if (ret == NULL || *ret != '\0') + '#', url_encode_map, uri, ctx); + if (ret == NULL) goto out; tmplog = ret; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; case LOG_FMT_HTTP_PATH: // %HP uri = txn && txn->uri ? txn->uri : "<BADREQ>"; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); end = uri + strlen(uri); // look for the first whitespace character @@ -3417,22 +4577,18 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t chunk.data = spc - uri; } - ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp); - if (ret == NULL || *ret != '\0') + ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, ctx); + if (ret == NULL) goto out; tmplog = ret; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; case LOG_FMT_HTTP_PATH_ONLY: // %HPO uri = txn && txn->uri ? txn->uri : "<BADREQ>"; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); end = uri + strlen(uri); @@ -3463,20 +4619,16 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t chunk.data = path.len; } - ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp); - if (ret == NULL || *ret != '\0') + ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, ctx); + if (ret == NULL) goto out; tmplog = ret; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; case LOG_FMT_HTTP_QUERY: // %HQ - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); if (!txn || !txn->uri) { chunk.area = "<BADREQ>"; @@ -3497,22 +4649,18 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t chunk.data = uri - qmark; } - ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp); - if (ret == NULL || *ret != '\0') + ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, ctx); + if (ret == NULL) goto out; tmplog = ret; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; case LOG_FMT_HTTP_URI: // %HU uri = txn && txn->uri ? txn->uri : "<BADREQ>"; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); end = uri + strlen(uri); // look for the first whitespace character @@ -3537,21 +4685,17 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t chunk.data = spc - uri; } - ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp); - if (ret == NULL || *ret != '\0') + ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, ctx); + if (ret == NULL) goto out; tmplog = ret; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; case LOG_FMT_HTTP_METHOD: // %HM uri = txn && txn->uri ? txn->uri : "<BADREQ>"; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); end = uri + strlen(uri); // look for the first whitespace character @@ -3567,21 +4711,17 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t chunk.data = spc - uri; } - ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp); - if (ret == NULL || *ret != '\0') + ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, ctx); + if (ret == NULL) goto out; tmplog = ret; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; case LOG_FMT_HTTP_VERSION: // %HV uri = txn && txn->uri ? txn->uri : "<BADREQ>"; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); + LOG_VARTEXT_START(); end = uri + strlen(uri); // look for the first whitespace character @@ -3612,87 +4752,121 @@ int sess_build_logline(struct session *sess, struct stream *s, char *dst, size_t chunk.data = end - uri; } - ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, tmp); - if (ret == NULL || *ret != '\0') + ret = lf_encode_chunk(tmplog, dst + maxsize, '#', url_encode_map, &chunk, ctx); + if (ret == NULL) goto out; tmplog = ret; - if (tmp->options & LOG_OPT_QUOTE) - LOGCHAR('"'); - last_isspace = 0; break; case LOG_FMT_COUNTER: // %rt - if (tmp->options & LOG_OPT_HEXA) { - iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", uniq_id); - if (iret < 0 || iret > dst + maxsize - tmplog) + if (ctx->options & LOG_OPT_HEXA) { + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), "%04X", uniq_id); + if (iret < 0 || iret >= dst + maxsize - tmplog) goto out; - last_isspace = 0; - tmplog += iret; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); } else { - ret = ltoa_o(uniq_id, tmplog, dst + maxsize - tmplog); - if (ret == NULL) - goto out; - tmplog = ret; - last_isspace = 0; + ret = lf_int(tmplog, dst + maxsize - tmplog, uniq_id, ctx, LF_INT_LTOA); } + if (ret == NULL) + goto out; + tmplog = ret; break; case LOG_FMT_LOGCNT: // %lc - if (tmp->options & LOG_OPT_HEXA) { - iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", fe->log_count); - if (iret < 0 || iret > dst + maxsize - tmplog) + if (ctx->options & LOG_OPT_HEXA) { + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), "%04X", fe->log_count); + if (iret < 0 || iret >= dst + maxsize - tmplog) goto out; - last_isspace = 0; - tmplog += iret; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); } else { - ret = ultoa_o(fe->log_count, tmplog, dst + maxsize - tmplog); - if (ret == NULL) - goto out; - tmplog = ret; - last_isspace = 0; + ret = lf_int(tmplog, dst + maxsize - tmplog, fe->log_count, + ctx, LF_INT_ULTOA); } + if (ret == NULL) + goto out; + tmplog = ret; break; case LOG_FMT_HOSTNAME: // %H src = hostname; - ret = lf_text(tmplog, src, dst + maxsize - tmplog, tmp); + ret = lf_text(tmplog, src, dst + maxsize - tmplog, ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; case LOG_FMT_PID: // %pid - if (tmp->options & LOG_OPT_HEXA) { - iret = snprintf(tmplog, dst + maxsize - tmplog, "%04X", pid); - if (iret < 0 || iret > dst + maxsize - tmplog) + if (ctx->options & LOG_OPT_HEXA) { + iret = snprintf(ctx->_buf, sizeof(ctx->_buf), "%04X", pid); + if (iret < 0 || iret >= dst + maxsize - tmplog) goto out; - last_isspace = 0; - tmplog += iret; + ret = lf_rawtext(tmplog, ctx->_buf, dst + maxsize - tmplog, ctx); } else { - ret = ltoa_o(pid, tmplog, dst + maxsize - tmplog); - if (ret == NULL) - goto out; - tmplog = ret; - last_isspace = 0; + ret = lf_int(tmplog, dst + maxsize - tmplog, pid, ctx, LF_INT_LTOA); } + if (ret == NULL) + goto out; + tmplog = ret; break; case LOG_FMT_UNIQUEID: // %ID ret = NULL; if (s) - ret = lf_text_len(tmplog, s->unique_id.ptr, s->unique_id.len, maxsize - (tmplog - dst), tmp); + ret = lf_text_len(tmplog, s->unique_id.ptr, s->unique_id.len, maxsize - (tmplog - dst), ctx); else - ret = lf_text_len(tmplog, NULL, 0, maxsize - (tmplog - dst), tmp); + ret = lf_text_len(tmplog, NULL, 0, maxsize - (tmplog - dst), ctx); if (ret == NULL) goto out; tmplog = ret; - last_isspace = 0; break; } + next_fmt: + if (value_beg == tmplog) { + /* handle the case where no data was generated for the value after + * the key was already announced + */ + if (ctx->options & LOG_OPT_ENCODE_JSON) { + /* for JSON, we simply output 'null' */ + iret = snprintf(tmplog, dst + maxsize - tmplog, "null"); + if (iret < 0 || iret >= dst + maxsize - tmplog) + goto out; + tmplog += iret; + } + if (ctx->options & LOG_OPT_ENCODE_CBOR) { + /* for CBOR, we have the '22' primitive which is known as + * NULL + */ + LOG_CBOR_BYTE(0xF6); + } + + } + + /* if variable text was started for the current node data, we need + * to end it + */ + LOG_VARTEXT_END(); + if (tmplog != value_beg) { + /* data was actually generated for the current dynamic + * node, reset the space hint so that a new space may + * now be emitted when relevant. + */ + last_isspace = 0; + } + } + + /* back to global ctx (some encoding types may need to output + * ending closure) + */ + lf_buildctx_prepare(ctx, g_options, NULL); + + if (ctx->options & LOG_OPT_ENCODE_JSON) + LOGCHAR('}'); + else if (ctx->options & LOG_OPT_ENCODE_CBOR) { + /* end indefinite-length map */ + LOG_CBOR_BYTE(0xFF); } out: @@ -3738,11 +4912,11 @@ void strm_log(struct stream *s) } /* if unique-id was not generated */ - if (!isttest(s->unique_id) && !LIST_ISEMPTY(&sess->fe->format_unique_id)) { + if (!isttest(s->unique_id) && !lf_expr_isempty(&sess->fe->format_unique_id)) { stream_generate_unique_id(s, &sess->fe->format_unique_id); } - if (!LIST_ISEMPTY(&sess->fe->logformat_sd)) { + if (!lf_expr_isempty(&sess->fe->logformat_sd)) { sd_size = build_logline(s, logline_rfc5424, global.max_syslog_len, &sess->fe->logformat_sd); } @@ -3780,13 +4954,13 @@ void sess_log(struct session *sess) if (sess->fe->options2 & PR_O2_LOGERRORS) level = LOG_ERR; - if (!LIST_ISEMPTY(&sess->fe->logformat_sd)) { + if (!lf_expr_isempty(&sess->fe->logformat_sd)) { sd_size = sess_build_logline(sess, NULL, logline_rfc5424, global.max_syslog_len, &sess->fe->logformat_sd); } - if (!LIST_ISEMPTY(&sess->fe->logformat_error)) + if (!lf_expr_isempty(&sess->fe->logformat_error)) size = sess_build_logline(sess, NULL, logline, global.max_syslog_len, &sess->fe->logformat_error); else size = sess_build_logline(sess, NULL, logline, global.max_syslog_len, &sess->fe->logformat); @@ -4212,7 +5386,7 @@ static void syslog_io_handler(struct appctx *appctx) char *message; size_t size; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { + if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR)))) { co_skip(sc_oc(sc), co_data(sc_oc(sc))); goto out; } @@ -4337,6 +5511,40 @@ static struct applet syslog_applet = { .release = NULL, }; +/* Atomically append an event to applet >ctx>'s output, prepending it with its + * size in decimal followed by a space. The line is read from vectors <v1> and + * <v2> at offset <ofs> relative to the area's origin, for <len> bytes. It + * returns the number of bytes consumed from the input vectors on success, -1 + * if it temporarily cannot (buffer full), -2 if it will never be able to (too + * large msg). The input vectors are not modified. The caller is responsible for + * making sure that there are at least ofs+len bytes in the input buffer. + */ +ssize_t syslog_applet_append_event(void *ctx, struct ist v1, struct ist v2, size_t ofs, size_t len) +{ + struct appctx *appctx = ctx; + char *p; + + /* first, encode the message's size */ + chunk_reset(&trash); + p = ulltoa(len, trash.area, b_size(&trash)); + if (p) { + trash.data = p - trash.area; + trash.area[trash.data++] = ' '; + } + + /* check if the message has a chance to fit */ + if (unlikely(!p || trash.data + len > b_size(&trash))) + return -2; + + /* try to transfer it or report full */ + trash.data += vp_peek_ofs(v1, v2, ofs, trash.area + trash.data, len); + if (applet_putchk(appctx, &trash) == -1) + return -1; + + /* OK done */ + return len; +} + /* * Parse "log-forward" section and create corresponding sink buffer. * @@ -4397,7 +5605,7 @@ int cfg_parse_log_forward(const char *file, int linenum, char **args, int kwm) px->conf.file = strdup(file); px->conf.line = linenum; px->mode = PR_MODE_SYSLOG; - px->last_change = ns_to_sec(now_ns); + px->fe_counters.last_change = ns_to_sec(now_ns); px->cap = PR_CAP_FE; px->maxconn = 10; px->timeout.client = TICK_ETERNITY; @@ -4653,6 +5861,7 @@ static int postresolve_loggers() REGISTER_CONFIG_SECTION("log-forward", cfg_parse_log_forward, NULL); REGISTER_POST_CHECK(postresolve_loggers); REGISTER_POST_PROXY_CHECK(postcheck_log_backend); +REGISTER_POST_PROXY_CHECK(postcheck_logformat_proxy); REGISTER_PER_THREAD_ALLOC(init_log_buffers); REGISTER_PER_THREAD_FREE(deinit_log_buffers); @@ -258,7 +258,7 @@ static long get_value(struct lru64_head *lru, long a) /* do the painful work here */ a = sum(a); if (item) - lru64_commit(item, (void *)a, lru, 1, 0); + lru64_commit(item, (void *)a, lru, 0, 0); return a; } @@ -170,6 +170,34 @@ int sample_load_map(struct arg *arg, struct sample_conv *conv, return 1; } +/* try to match input sample against map entries, returns matched entry's key + * on success + */ +static int sample_conv_map_key(const struct arg *arg_p, struct sample *smp, void *private) +{ + struct map_descriptor *desc; + struct pattern *pat; + + /* get config */ + desc = arg_p[0].data.map; + + /* Execute the match function. */ + pat = pattern_exec_match(&desc->pat, smp, 1); + + /* Match case. */ + if (pat) { + smp->data.type = SMP_T_STR; + smp->flags |= SMP_F_CONST; + smp->data.u.str.area = (char *)pat->ref->pattern; + smp->data.u.str.data = strlen(pat->ref->pattern); + return 1; + } + return 0; +} + +/* try to match input sample against map entries, returns matched entry's value + * on success + */ static int sample_conv_map(const struct arg *arg_p, struct sample *smp, void *private) { struct map_descriptor *desc; @@ -345,22 +373,8 @@ struct show_map_ctx { static int cli_io_handler_pat_list(struct appctx *appctx) { struct show_map_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); struct pat_ref_elt *elt; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) { - /* If we're forced to shut down, we might have to remove our - * reference to the last ref_elt being dumped. - */ - if (!LIST_ISEMPTY(&ctx->bref.users)) { - HA_RWLOCK_WRLOCK(PATREF_LOCK, &ctx->ref->lock); - LIST_DEL_INIT(&ctx->bref.users); - HA_RWLOCK_WRUNLOCK(PATREF_LOCK, &ctx->ref->lock); - } - return 1; - } - switch (ctx->state) { case STATE_INIT: ctx->state = STATE_LIST; @@ -1226,6 +1240,16 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, { { "map_int_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_SINT, SMP_T_ADDR, (void *)PAT_MATCH_INT }, { "map_ip_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_ADDR, SMP_T_ADDR, (void *)PAT_MATCH_IP }, + { "map_str_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_STR }, + { "map_beg_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_BEG }, + { "map_sub_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_SUB }, + { "map_dir_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_DIR }, + { "map_dom_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_DOM }, + { "map_end_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_END }, + { "map_reg_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_REG }, + { "map_int_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_SINT, SMP_T_STR, (void *)PAT_MATCH_INT }, + { "map_ip_key", sample_conv_map_key, ARG1(1,STR), sample_load_map, SMP_T_ADDR, SMP_T_STR, (void *)PAT_MATCH_IP }, + { /* END */ }, }}; diff --git a/src/mux_fcgi.c b/src/mux_fcgi.c index 448d8bb..102a4f0 100644 --- a/src/mux_fcgi.c +++ b/src/mux_fcgi.c @@ -488,14 +488,14 @@ static int fcgi_buf_available(void *target) struct fcgi_conn *fconn = target; struct fcgi_strm *fstrm; - if ((fconn->flags & FCGI_CF_DEM_DALLOC) && b_alloc(&fconn->dbuf)) { + if ((fconn->flags & FCGI_CF_DEM_DALLOC) && b_alloc(&fconn->dbuf, DB_MUX_RX)) { TRACE_STATE("unblocking fconn, dbuf allocated", FCGI_EV_FCONN_RECV|FCGI_EV_FCONN_BLK|FCGI_EV_FCONN_WAKE, fconn->conn); fconn->flags &= ~FCGI_CF_DEM_DALLOC; fcgi_conn_restart_reading(fconn, 1); return 1; } - if ((fconn->flags & FCGI_CF_MUX_MALLOC) && b_alloc(br_tail(fconn->mbuf))) { + if ((fconn->flags & FCGI_CF_MUX_MALLOC) && b_alloc(br_tail(fconn->mbuf), DB_MUX_TX)) { TRACE_STATE("unblocking fconn, mbuf allocated", FCGI_EV_FCONN_SEND|FCGI_EV_FCONN_BLK|FCGI_EV_FCONN_WAKE, fconn->conn); fconn->flags &= ~FCGI_CF_MUX_MALLOC; if (fconn->flags & FCGI_CF_DEM_MROOM) { @@ -507,7 +507,7 @@ static int fcgi_buf_available(void *target) if ((fconn->flags & FCGI_CF_DEM_SALLOC) && (fstrm = fcgi_conn_st_by_id(fconn, fconn->dsi)) && fcgi_strm_sc(fstrm) && - b_alloc(&fstrm->rxbuf)) { + b_alloc(&fstrm->rxbuf, DB_SE_RX)) { TRACE_STATE("unblocking fstrm, rxbuf allocated", FCGI_EV_STRM_RECV|FCGI_EV_FSTRM_BLK|FCGI_EV_STRM_WAKE, fconn->conn, fstrm); fconn->flags &= ~FCGI_CF_DEM_SALLOC; fcgi_conn_restart_reading(fconn, 1); @@ -523,10 +523,8 @@ static inline struct buffer *fcgi_get_buf(struct fcgi_conn *fconn, struct buffer struct buffer *buf = NULL; if (likely(!LIST_INLIST(&fconn->buf_wait.list)) && - unlikely((buf = b_alloc(bptr)) == NULL)) { - fconn->buf_wait.target = fconn; - fconn->buf_wait.wakeup_cb = fcgi_buf_available; - LIST_APPEND(&th_ctx->buffer_wq, &fconn->buf_wait.list); + unlikely((buf = b_alloc(bptr, DB_MUX_RX)) == NULL)) { + b_queue(DB_MUX_RX, &fconn->buf_wait, fconn, fcgi_buf_available); } return buf; } @@ -755,8 +753,7 @@ static void fcgi_release(struct fcgi_conn *fconn) TRACE_POINT(FCGI_EV_FCONN_END); - if (LIST_INLIST(&fconn->buf_wait.list)) - LIST_DEL_INIT(&fconn->buf_wait.list); + b_dequeue(&fconn->buf_wait); fcgi_release_buf(fconn, &fconn->dbuf); fcgi_release_mbuf(fconn); @@ -3089,7 +3086,9 @@ static int fcgi_wake(struct connection *conn) static int fcgi_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *output) { + struct fcgi_conn *fconn = conn->ctx; int ret = 0; + switch (mux_ctl) { case MUX_CTL_STATUS: if (!(conn->flags & CO_FL_WAIT_XPRT)) @@ -3097,6 +3096,10 @@ static int fcgi_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *ou return ret; case MUX_CTL_EXIT_STATUS: return MUX_ES_UNKNOWN; + case MUX_CTL_GET_NBSTRM: + return fconn->nb_streams; + case MUX_CTL_GET_MAXSTRM: + return fconn->streams_limit; default: return -1; } @@ -3581,6 +3584,10 @@ static void fcgi_detach(struct sedesc *sd) } } if (eb_is_empty(&fconn->streams_by_id)) { + /* mark that the tasklet may lose its context to another thread and + * that the handler needs to check it under the idle conns lock. + */ + HA_ATOMIC_OR(&fconn->wait_event.tasklet->state, TASK_F_USR1); if (session_check_idle_conn(fconn->conn->owner, fconn->conn) != 0) { /* The connection is destroyed, let's leave */ TRACE_DEVEL("outgoing connection killed", FCGI_EV_STRM_END|FCGI_EV_FCONN_ERR); @@ -3619,7 +3626,7 @@ static void fcgi_detach(struct sedesc *sd) } else if (!fconn->conn->hash_node->node.node.leaf_p && fcgi_avail_streams(fconn->conn) > 0 && objt_server(fconn->conn->target) && - !LIST_INLIST(&fconn->conn->session_list)) { + !LIST_INLIST(&fconn->conn->sess_el)) { srv_add_to_avail_list(__objt_server(fconn->conn->target), fconn->conn); } } @@ -3787,24 +3794,16 @@ struct task *fcgi_deferred_shut(struct task *t, void *ctx, unsigned int state) return NULL; } -/* shutr() called by the stream connector (mux_ops.shutr) */ -static void fcgi_shutr(struct stconn *sc, enum co_shr_mode mode) -{ - struct fcgi_strm *fstrm = __sc_mux_strm(sc); - - TRACE_POINT(FCGI_EV_STRM_SHUT, fstrm->fconn->conn, fstrm); - if (!mode) - return; - fcgi_do_shutr(fstrm); -} - -/* shutw() called by the stream connector (mux_ops.shutw) */ -static void fcgi_shutw(struct stconn *sc, enum co_shw_mode mode) +static void fcgi_shut(struct stconn *sc, enum se_shut_mode mode, struct se_abort_info *reason) { struct fcgi_strm *fstrm = __sc_mux_strm(sc); - TRACE_POINT(FCGI_EV_STRM_SHUT, fstrm->fconn->conn, fstrm); - fcgi_do_shutw(fstrm); + TRACE_ENTER(FCGI_EV_STRM_SHUT, fstrm->fconn->conn, fstrm); + if (mode & (SE_SHW_SILENT|SE_SHW_NORMAL)) + fcgi_do_shutw(fstrm); + if (mode & SE_SHR_RESET) + fcgi_do_shutr(fstrm); + TRACE_LEAVE(FCGI_EV_STRM_SHUT, fstrm->fconn->conn, fstrm); } /* Called from the upper layer, to subscribe <es> to events <event_type>. The @@ -4163,25 +4162,35 @@ static int fcgi_show_fd(struct buffer *msg, struct connection *conn) * Return 0 if successful, non-zero otherwise. * Expected to be called with the old thread lock held. */ -static int fcgi_takeover(struct connection *conn, int orig_tid) +static int fcgi_takeover(struct connection *conn, int orig_tid, int release) { struct fcgi_conn *fcgi = conn->ctx; struct task *task; - struct task *new_task; - struct tasklet *new_tasklet; + struct task *new_task = NULL; + struct tasklet *new_tasklet = NULL; /* Pre-allocate tasks so that we don't have to roll back after the xprt * has been migrated. */ - new_task = task_new_here(); - new_tasklet = tasklet_new(); - if (!new_task || !new_tasklet) - goto fail; + if (!release) { + /* If the connection is attached to a buffer_wait (extremely + * rare), it will be woken up at any instant by its own thread + * and we can't undo it anyway, so let's give up on this one. + * It's not interesting anyway since it's not usable right now. + */ + if (LIST_INLIST(&fcgi->buf_wait.list)) + goto fail; + + new_task = task_new_here(); + new_tasklet = tasklet_new(); + if (!new_task || !new_tasklet) + goto fail; + } if (fd_takeover(conn->handle.fd, conn) != 0) goto fail; - if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid) != 0) { + if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid, release) != 0) { /* We failed to takeover the xprt, even if the connection may * still be valid, flag it as error'd, as we have already * taken over the fd, and wake the tasklet, so that it will @@ -4208,8 +4217,10 @@ static int fcgi_takeover(struct connection *conn, int orig_tid) fcgi->task = new_task; new_task = NULL; - fcgi->task->process = fcgi_timeout_task; - fcgi->task->context = fcgi; + if (!release) { + fcgi->task->process = fcgi_timeout_task; + fcgi->task->context = fcgi; + } } /* To let the tasklet know it should free itself, and do nothing else, @@ -4219,10 +4230,26 @@ static int fcgi_takeover(struct connection *conn, int orig_tid) tasklet_wakeup_on(fcgi->wait_event.tasklet, orig_tid); fcgi->wait_event.tasklet = new_tasklet; - fcgi->wait_event.tasklet->process = fcgi_io_cb; - fcgi->wait_event.tasklet->context = fcgi; - fcgi->conn->xprt->subscribe(fcgi->conn, fcgi->conn->xprt_ctx, - SUB_RETRY_RECV, &fcgi->wait_event); + if (!release) { + fcgi->wait_event.tasklet->process = fcgi_io_cb; + fcgi->wait_event.tasklet->context = fcgi; + fcgi->conn->xprt->subscribe(fcgi->conn, fcgi->conn->xprt_ctx, + SUB_RETRY_RECV, &fcgi->wait_event); + } + + if (release) { + /* we're being called for a server deletion and are running + * under thread isolation. That's the only way we can + * unregister a possible subscription of the original + * connection from its owner thread's queue, as this involves + * manipulating thread-unsafe areas. Note that it is not + * possible to just call b_dequeue() here as it would update + * the current thread's bufq_map and not the original one. + */ + BUG_ON(!thread_isolated()); + if (LIST_INLIST(&fcgi->buf_wait.list)) + _b_dequeue(&fcgi->buf_wait, orig_tid); + } if (new_task) __task_free(new_task); @@ -4252,8 +4279,7 @@ static const struct mux_ops mux_fcgi_ops = { .snd_buf = fcgi_snd_buf, .subscribe = fcgi_subscribe, .unsubscribe = fcgi_unsubscribe, - .shutr = fcgi_shutr, - .shutw = fcgi_shutw, + .shut = fcgi_shut, .ctl = fcgi_ctl, .sctl = fcgi_sctl, .show_fd = fcgi_show_fd, diff --git a/src/mux_h1.c b/src/mux_h1.c index 6593661..6bdaf71 100644 --- a/src/mux_h1.c +++ b/src/mux_h1.c @@ -227,7 +227,7 @@ enum { }; -static struct name_desc h1_stats[] = { +static struct stat_col h1_stats[] = { [H1_ST_OPEN_CONN] = { .name = "h1_open_connections", .desc = "Count of currently open connections" }, [H1_ST_OPEN_STREAM] = { .name = "h1_open_streams", @@ -264,21 +264,54 @@ static struct h1_counters { #endif } h1_counters; -static void h1_fill_stats(void *data, struct field *stats) +static int h1_fill_stats(void *data, struct field *stats, unsigned int *selected_field) { struct h1_counters *counters = data; + unsigned int current_field = (selected_field != NULL ? *selected_field : 0); - stats[H1_ST_OPEN_CONN] = mkf_u64(FN_GAUGE, counters->open_conns); - stats[H1_ST_OPEN_STREAM] = mkf_u64(FN_GAUGE, counters->open_streams); - stats[H1_ST_TOTAL_CONN] = mkf_u64(FN_COUNTER, counters->total_conns); - stats[H1_ST_TOTAL_STREAM] = mkf_u64(FN_COUNTER, counters->total_streams); + for (; current_field < H1_STATS_COUNT; current_field++) { + struct field metric = { 0 }; - stats[H1_ST_BYTES_IN] = mkf_u64(FN_COUNTER, counters->bytes_in); - stats[H1_ST_BYTES_OUT] = mkf_u64(FN_COUNTER, counters->bytes_out); + switch (current_field) { + case H1_ST_OPEN_CONN: + metric = mkf_u64(FN_GAUGE, counters->open_conns); + break; + case H1_ST_OPEN_STREAM: + metric = mkf_u64(FN_GAUGE, counters->open_streams); + break; + case H1_ST_TOTAL_CONN: + metric = mkf_u64(FN_COUNTER, counters->total_conns); + break; + case H1_ST_TOTAL_STREAM: + metric = mkf_u64(FN_COUNTER, counters->total_streams); + break; + case H1_ST_BYTES_IN: + metric = mkf_u64(FN_COUNTER, counters->bytes_in); + break; + case H1_ST_BYTES_OUT: + metric = mkf_u64(FN_COUNTER, counters->bytes_out); + break; #if defined(USE_LINUX_SPLICE) - stats[H1_ST_SPLICED_BYTES_IN] = mkf_u64(FN_COUNTER, counters->spliced_bytes_in); - stats[H1_ST_SPLICED_BYTES_OUT] = mkf_u64(FN_COUNTER, counters->spliced_bytes_out); + case H1_ST_SPLICED_BYTES_IN: + metric = mkf_u64(FN_COUNTER, counters->spliced_bytes_in); + break; + case H1_ST_SPLICED_BYTES_OUT: + metric = mkf_u64(FN_COUNTER, counters->spliced_bytes_out); + break; #endif + default: + /* not used for frontends. If a specific metric + * is requested, return an error. Otherwise continue. + */ + if (selected_field != NULL) + return 0; + continue; + } + stats[current_field] = metric; + if (selected_field != NULL) + break; + } + return 1; } static struct stats_module h1_stats_module = { @@ -302,6 +335,8 @@ DECLARE_STATIC_POOL(pool_head_h1s, "h1s", sizeof(struct h1s)); static int h1_recv(struct h1c *h1c); static int h1_send(struct h1c *h1c); static int h1_process(struct h1c *h1c); +static void h1_release(struct h1c *h1c); + /* h1_io_cb is exported to see it resolved in "show fd" */ struct task *h1_io_cb(struct task *t, void *ctx, unsigned int state); struct task *h1_timeout_task(struct task *t, void *context, unsigned int state); @@ -466,45 +501,91 @@ static int h1_buf_available(void *target) { struct h1c *h1c = target; - if ((h1c->flags & H1C_F_IN_ALLOC) && b_alloc(&h1c->ibuf)) { - TRACE_STATE("unblocking h1c, ibuf allocated", H1_EV_H1C_RECV|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn); + if (h1c->flags & H1C_F_IN_ALLOC) { h1c->flags &= ~H1C_F_IN_ALLOC; - if (h1_recv_allowed(h1c)) - tasklet_wakeup(h1c->wait_event.tasklet); - return 1; + h1c->flags |= H1C_F_IN_MAYALLOC; } - if ((h1c->flags & H1C_F_OUT_ALLOC) && b_alloc(&h1c->obuf)) { - TRACE_STATE("unblocking h1s, obuf allocated", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1c->h1s); + if ((h1c->flags & H1C_F_OUT_ALLOC) && h1c->h1s) { + TRACE_STATE("unblocking h1s, obuf allocatable", H1_EV_TX_DATA|H1_EV_H1S_BLK|H1_EV_STRM_WAKE, h1c->conn, h1c->h1s); h1c->flags &= ~H1C_F_OUT_ALLOC; - if (h1c->h1s) - h1_wake_stream_for_send(h1c->h1s); - return 1; + h1c->flags |= H1C_F_OUT_MAYALLOC; + h1_wake_stream_for_send(h1c->h1s); } - if ((h1c->flags & H1C_F_IN_SALLOC) && h1c->h1s && b_alloc(&h1c->h1s->rxbuf)) { - TRACE_STATE("unblocking h1c, stream rxbuf allocated", H1_EV_H1C_RECV|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn); + if ((h1c->flags & H1C_F_IN_SALLOC) && h1c->h1s) { + TRACE_STATE("unblocking h1c, stream rxbuf allocatable", H1_EV_H1C_RECV|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn); h1c->flags &= ~H1C_F_IN_SALLOC; + h1c->flags |= H1C_F_IN_SMAYALLOC; tasklet_wakeup(h1c->wait_event.tasklet); - return 1; } - return 0; + if ((h1c->flags & H1C_F_IN_MAYALLOC) && h1_recv_allowed(h1c)) { + TRACE_STATE("unblocking h1c, ibuf allocatable", H1_EV_H1C_RECV|H1_EV_H1C_BLK|H1_EV_H1C_WAKE, h1c->conn); + tasklet_wakeup(h1c->wait_event.tasklet); + } + + return 1; +} + +/* + * Allocate the h1c's ibuf. If if fails, it adds the mux in buffer wait queue, + * and sets the H1C_F_IN_ALLOC flag on the connection. It will advertise a more + * urgent allocation when a stream is already present than when none is present + * since in one case a buffer might be needed to permit to release another one, + * while in the other case we've simply not started anything. + */ +static inline struct buffer *h1_get_ibuf(struct h1c *h1c) +{ + struct buffer *buf; + + if (unlikely((buf = b_alloc(&h1c->ibuf, DB_MUX_RX | + ((h1c->flags & H1C_F_IN_MAYALLOC) ? DB_F_NOQUEUE : 0))) == NULL)) { + b_queue(DB_MUX_RX, &h1c->buf_wait, h1c, h1_buf_available); + h1c->flags |= H1C_F_IN_ALLOC; + } + else + h1c->flags &= ~H1C_F_IN_MAYALLOC; + + return buf; } /* - * Allocate a buffer. If if fails, it adds the mux in buffer wait queue. + * Allocate the h1c's obuf. If if fails, it adds the mux in buffer wait queue, + * and sets the H1C_F_OUT_ALLOC flag on the connection. */ -static inline struct buffer *h1_get_buf(struct h1c *h1c, struct buffer *bptr) +static inline struct buffer *h1_get_obuf(struct h1c *h1c) { - struct buffer *buf = NULL; + struct buffer *buf; - if (likely(!LIST_INLIST(&h1c->buf_wait.list)) && - unlikely((buf = b_alloc(bptr)) == NULL)) { - h1c->buf_wait.target = h1c; - h1c->buf_wait.wakeup_cb = h1_buf_available; - LIST_APPEND(&th_ctx->buffer_wq, &h1c->buf_wait.list); + if (unlikely((buf = b_alloc(&h1c->obuf, DB_MUX_TX | + ((h1c->flags & H1C_F_OUT_MAYALLOC) ? DB_F_NOQUEUE : 0))) == NULL)) { + b_queue(DB_MUX_TX, &h1c->buf_wait, h1c, h1_buf_available); + h1c->flags |= H1C_F_OUT_ALLOC; } + else + h1c->flags &= ~H1C_F_OUT_MAYALLOC; + + return buf; +} + +/* + * Allocate the h1s's rxbuf. If if fails, it adds the mux in buffer wait queue, + * and sets the H1C_F_IN_SALLOC flag on the connection. + */ +static inline struct buffer *h1_get_rxbuf(struct h1s *h1s) +{ + struct h1c *h1c = h1s->h1c; + struct buffer *buf; + + if (unlikely((buf = b_alloc(&h1s->rxbuf, DB_SE_RX | + ((h1c->flags & H1C_F_IN_SMAYALLOC) ? DB_F_NOQUEUE : 0))) == NULL)) { + b_queue(DB_SE_RX, &h1c->buf_wait, h1c, h1_buf_available); + h1c->flags |= H1C_F_IN_SALLOC; + } + else + h1c->flags &= ~H1C_F_IN_SMAYALLOC; + return buf; } @@ -521,11 +602,11 @@ static inline void h1_release_buf(struct h1c *h1c, struct buffer *bptr) } /* Returns 1 if the H1 connection is alive (IDLE, EMBRYONIC, RUNNING or - * RUNNING). Ortherwise 0 is returned. + * DRAINING). Ortherwise 0 is returned. */ static inline int h1_is_alive(const struct h1c *h1c) { - return (h1c->state <= H1_CS_RUNNING); + return (h1c->state <= H1_CS_DRAINING); } /* Switch the H1 connection to CLOSING or CLOSED mode, depending on the output @@ -869,7 +950,8 @@ static void h1s_destroy(struct h1s *h1s) h1_release_buf(h1c, &h1s->rxbuf); h1c->flags &= ~(H1C_F_WANT_FASTFWD| - H1C_F_OUT_FULL|H1C_F_OUT_ALLOC|H1C_F_IN_SALLOC| + H1C_F_OUT_FULL|H1C_F_OUT_ALLOC|H1C_F_OUT_MAYALLOC| + H1C_F_IN_SALLOC|H1C_F_IN_SMAYALLOC| H1C_F_CO_MSG_MORE|H1C_F_CO_STREAMER); if (!(h1c->flags & (H1C_F_EOS|H1C_F_ERR_PENDING|H1C_F_ERROR|H1C_F_ABRT_PENDING|H1C_F_ABRTED)) && /* No error/read0/abort */ @@ -893,6 +975,162 @@ static void h1s_destroy(struct h1s *h1s) } } + +/* Check if shutdown performed of an an H1S must lead to a connection shutdown + * of if it can be kept alive. It returns 1 if the connection must be shut down + * and 0 it if can be kept alive. + */ +static int h1s_must_shut_conn(struct h1s *h1s) +{ + struct h1c *h1c = h1s->h1c; + int ret; + + TRACE_ENTER(H1_EV_STRM_SHUT, h1c->conn, h1s); + + if (se_fl_test(h1s->sd, SE_FL_KILL_CONN)) { + TRACE_STATE("stream wants to kill the connection", H1_EV_STRM_SHUT, h1c->conn, h1s); + ret = 1; + } + else if (h1c->state == H1_CS_CLOSING || (h1c->flags & (H1C_F_EOS|H1C_F_ERR_PENDING|H1C_F_ERROR))) { + TRACE_STATE("shutdown on connection (EOS || CLOSING || ERROR)", H1_EV_STRM_SHUT, h1c->conn, h1s); + ret = 1; + } + else if (h1c->state == H1_CS_UPGRADING) { + TRACE_STATE("keep connection alive (UPGRADING)", H1_EV_STRM_SHUT, h1c->conn, h1s); + ret = 0; + } + else if (!(h1c->flags & H1C_F_IS_BACK) && h1s->req.state != H1_MSG_DONE && h1s->res.state == H1_MSG_DONE) { + TRACE_STATE("defer shutdown to drain request first", H1_EV_STRM_SHUT, h1c->conn, h1s); + ret = 0; + } + else if (((h1s->flags & H1S_F_WANT_KAL) && h1s->req.state == H1_MSG_DONE && h1s->res.state == H1_MSG_DONE)) { + TRACE_STATE("keep connection alive (want_kal)", H1_EV_STRM_SHUT, h1c->conn, h1s); + ret = 0; + } + else { + /* The default case, do the shutdown */ + ret = 1; + } + + TRACE_LEAVE(H1_EV_STRM_SHUT, h1c->conn, h1s); + return ret; +} + +/* Really detach the H1S. Most of time of it called from h1_detach() when the + * stream is detached from the connection. But if the request message must be + * drained first, the detach is deferred. + */ +static void h1s_finish_detach(struct h1s *h1s) +{ + struct h1c *h1c; + struct session *sess; + int is_not_first; + + TRACE_ENTER(H1_EV_STRM_END, h1s ? h1s->h1c->conn : NULL, h1s); + + sess = h1s->sess; + h1c = h1s->h1c; + + sess->accept_date = date; + sess->accept_ts = now_ns; + sess->t_handshake = 0; + sess->t_idle = -1; + + is_not_first = h1s->flags & H1S_F_NOT_FIRST; + h1s_destroy(h1s); + + if (h1c->state == H1_CS_IDLE && (h1c->flags & H1C_F_IS_BACK)) { + /* this connection may be killed at any moment, we want it to + * die "cleanly" (i.e. only an RST). + */ + h1c->flags |= H1C_F_SILENT_SHUT; + + /* If there are any excess server data in the input buffer, + * release it and close the connection ASAP (some data may + * remain in the output buffer). This happens if a server sends + * invalid responses. So in such case, we don't want to reuse + * the connection + */ + if (b_data(&h1c->ibuf)) { + h1_release_buf(h1c, &h1c->ibuf); + h1_close(h1c); + TRACE_DEVEL("remaining data on detach, kill connection", H1_EV_STRM_END|H1_EV_H1C_END); + goto release; + } + + if (h1c->conn->flags & CO_FL_PRIVATE) { + /* Add the connection in the session server list, if not already done */ + if (!session_add_conn(sess, h1c->conn, h1c->conn->target)) { + h1c->conn->owner = NULL; + h1c->conn->mux->destroy(h1c); + goto end; + } + /* Always idle at this step */ + + /* mark that the tasklet may lose its context to another thread and + * that the handler needs to check it under the idle conns lock. + */ + HA_ATOMIC_OR(&h1c->wait_event.tasklet->state, TASK_F_USR1); + if (session_check_idle_conn(sess, h1c->conn)) { + /* The connection got destroyed, let's leave */ + TRACE_DEVEL("outgoing connection killed", H1_EV_STRM_END|H1_EV_H1C_END); + goto end; + } + } + else { + if (h1c->conn->owner == sess) + h1c->conn->owner = NULL; + + /* mark that the tasklet may lose its context to another thread and + * that the handler needs to check it under the idle conns lock. + */ + HA_ATOMIC_OR(&h1c->wait_event.tasklet->state, TASK_F_USR1); + h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event); + xprt_set_idle(h1c->conn, h1c->conn->xprt, h1c->conn->xprt_ctx); + + if (!srv_add_to_idle_list(objt_server(h1c->conn->target), h1c->conn, is_not_first)) { + /* The server doesn't want it, let's kill the connection right away */ + h1c->conn->mux->destroy(h1c); + TRACE_DEVEL("outgoing connection killed", H1_EV_STRM_END|H1_EV_H1C_END); + goto end; + } + /* At this point, the connection has been added to the + * server idle list, so another thread may already have + * hijacked it, so we can't do anything with it. + */ + return; + } + } + + release: + /* We don't want to close right now unless the connection is in error or shut down for writes */ + if ((h1c->flags & H1C_F_ERROR) || + (h1c->state == H1_CS_CLOSED) || + (h1c->state == H1_CS_CLOSING && !b_data(&h1c->obuf)) || + !h1c->conn->owner) { + TRACE_DEVEL("killing dead connection", H1_EV_STRM_END, h1c->conn); + h1_release(h1c); + } + else { + if (h1c->state == H1_CS_IDLE) { + /* If we have a new request, process it immediately or + * subscribe for reads waiting for new data + */ + if (unlikely(b_data(&h1c->ibuf))) { + if (h1_process(h1c) == -1) + goto end; + } + else + h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event); + } + h1_set_idle_expiration(h1c); + h1_refresh_timeout(h1c); + } + end: + TRACE_LEAVE(H1_EV_STRM_END); +} + + /* * Initialize the mux once it's attached. It is expected that conn->ctx points * to the existing stream connector (for outgoing connections or for incoming @@ -1049,9 +1287,7 @@ static void h1_release(struct h1c *h1c) } - if (LIST_INLIST(&h1c->buf_wait.list)) - LIST_DEL_INIT(&h1c->buf_wait.list); - + b_dequeue(&h1c->buf_wait); h1_release_buf(h1c, &h1c->ibuf); h1_release_buf(h1c, &h1c->obuf); @@ -1416,21 +1652,33 @@ static void h1_capture_bad_message(struct h1c *h1c, struct h1s *h1s, &ctx, h1_show_error_snapshot); } -/* Emit the chunksize followed by a CRLF in front of data of the buffer +/* Emit the chunk size <chksz> followed by a CRLF in front of data of the buffer * <buf>. It goes backwards and starts with the byte before the buffer's * head. The caller is responsible for ensuring there is enough room left before - * the buffer's head for the string. + * the buffer's head for the string. if <length> is greater than 0, it + * represents the expected total length of the chunk size, including the + * CRLF. So it will be padded with 0 to resepct this length. It is the caller + * responsibility to pass the right value. if <length> is set to 0 (or less that + * the smallest size to represent the chunk size), it is ignored. */ -static void h1_prepend_chunk_size(struct buffer *buf, size_t chksz) +static void h1_prepend_chunk_size(struct buffer *buf, size_t chksz, size_t length) { char *beg, *end; beg = end = b_head(buf); *--beg = '\n'; *--beg = '\r'; + if (length) + length -= 2; do { *--beg = hextab[chksz & 0xF]; + if (length) + --length; } while (chksz >>= 4); + while (length) { + *--beg = '0'; + --length; + } buf->head -= (end - beg); b_add(buf, end - beg); } @@ -2328,15 +2576,47 @@ static size_t h1_make_eoh(struct h1s *h1s, struct h1m *h1m, struct htx *htx, siz b_slow_realign(&h1c->obuf, trash.area, b_data(&h1c->obuf)); outbuf = b_make(b_tail(&h1c->obuf), b_contig_space(&h1c->obuf), 0, 0); + /* Deal with removed "Content-Length" or "Transfer-Encoding" headers during analysis */ + if (((h1m->flags & H1_MF_CLEN) && !(h1s->flags & H1S_F_HAVE_CLEN))|| + ((h1m->flags & H1_MF_CHNK) && !(h1s->flags & H1S_F_HAVE_CHNK))) { + TRACE_STATE("\"Content-Length\" or \"Transfer-Encoding\" header removed during analysis", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s); + + if (h1s->flags & (H1S_F_HAVE_CLEN|H1S_F_HAVE_CHNK)) { + /* At least on header is present, we can continue */ + if (!(h1s->flags & H1S_F_HAVE_CLEN)) { + h1m->curr_len = h1m->body_len = 0; + h1m->flags &= ~H1_MF_CLEN; + } + else /* h1s->flags & H1S_F_HAVE_CHNK */ + h1m->flags &= ~(H1_MF_XFER_ENC|H1_MF_CHNK); + } + else { + /* Both headers are missing */ + if (h1m->flags & H1_MF_RESP) { + /* It is a esponse: Switch to unknown xfer length */ + h1m->flags &= ~(H1_MF_XFER_LEN|H1_MF_XFER_ENC|H1_MF_CLEN|H1_MF_CHNK); + h1s->flags &= ~(H1S_F_HAVE_CLEN|H1S_F_HAVE_CHNK); + TRACE_STATE("Switch response to unknown XFER length", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s); + } + else { + /* It is the request: Add "Content-Length: 0" header and skip payload */ + struct ist n = ist("content-length"); + if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV)) + h1_adjust_case_outgoing_hdr(h1s, h1m, &n); + if (!h1_format_htx_hdr(n, ist("0"), &outbuf)) + goto full; + + h1m->flags = (h1m->flags & ~(H1_MF_XFER_ENC|H1_MF_CHNK)) | H1_MF_CLEN; + h1s->flags = (h1s->flags & ~H1S_F_HAVE_CHNK) | (H1S_F_HAVE_CLEN|H1S_F_BODYLESS_REQ); + h1m->curr_len = h1m->body_len = 0; + TRACE_STATE("Set request content-length to 0 and skip payload", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s); + } + } + } + /* Deal with "Connection" header */ if (!(h1s->flags & H1S_F_HAVE_O_CONN)) { - if ((htx->flags & HTX_FL_PROXY_RESP) && h1s->req.state != H1_MSG_DONE) { - /* If the reply comes from haproxy while the request is - * not finished, we force the connection close. */ - h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO; - TRACE_STATE("force close mode (resp)", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1s->h1c->conn, h1s); - } - else if ((h1m->flags & (H1_MF_XFER_ENC|H1_MF_CLEN)) == (H1_MF_XFER_ENC|H1_MF_CLEN)) { + if ((h1m->flags & (H1_MF_XFER_ENC|H1_MF_CLEN)) == (H1_MF_XFER_ENC|H1_MF_CLEN)) { /* T-E + C-L: force close */ h1s->flags = (h1s->flags & ~H1S_F_WANT_MSK) | H1S_F_WANT_CLO; h1m->flags &= ~H1_MF_CLEN; @@ -2384,23 +2664,6 @@ static size_t h1_make_eoh(struct h1s *h1s, struct h1m *h1m, struct htx *htx, siz h1s->flags |= H1S_F_HAVE_CHNK; } - /* Deal with "Content-Length header */ - if ((h1m->flags & H1_MF_CLEN) && !(h1s->flags & H1S_F_HAVE_CLEN)) { - char *end; - - h1m->curr_len = h1m->body_len = htx->data + htx->extra - sz; - end = DISGUISE(ulltoa(h1m->body_len, trash.area, b_size(&trash))); - - n = ist("content-length"); - v = ist2(trash.area, end-trash.area); - if (h1c->px->options2 & (PR_O2_H1_ADJ_BUGCLI|PR_O2_H1_ADJ_BUGSRV)) - h1_adjust_case_outgoing_hdr(h1s, h1m, &n); - if (!h1_format_htx_hdr(n, v, &outbuf)) - goto full; - TRACE_STATE("add \"Content-Length: <LEN>\"", H1_EV_TX_DATA|H1_EV_TX_HDRS, h1c->conn, h1s); - h1s->flags |= H1S_F_HAVE_CLEN; - } - /* Add the server name to a header (if requested) */ if (!(h1s->flags & H1S_F_HAVE_SRV_NAME) && !(h1m->flags & H1_MF_RESP) && isttest(h1c->px->server_id_hdr_name)) { @@ -2555,7 +2818,8 @@ static size_t h1_make_data(struct h1s *h1s, struct h1m *h1m, struct buffer *buf, * end-to-end. This is the situation that happens all the time with * large files. */ - if ((!(h1m->flags & H1_MF_RESP) || !(h1s->flags & H1S_F_BODYLESS_RESP)) && + if (((!(h1m->flags & H1_MF_RESP) && !(h1s->flags & H1S_F_BODYLESS_REQ)) || + ((h1m->flags & H1_MF_RESP) && !(h1s->flags & H1S_F_BODYLESS_RESP))) && !b_data(&h1c->obuf) && (!(h1m->flags & H1_MF_CHNK) || ((h1m->flags & H1_MF_CHNK) && (!h1m->curr_len || count == h1m->curr_len))) && htx_nbblks(htx) == 1 && @@ -2612,7 +2876,7 @@ static size_t h1_make_data(struct h1s *h1s, struct h1m *h1m, struct buffer *buf, /* Because chunk meta-data are prepended, the chunk size of the current chunk * must be handled before the end of the previous chunk. */ - h1_prepend_chunk_size(&h1c->obuf, h1m->curr_len); + h1_prepend_chunk_size(&h1c->obuf, h1m->curr_len, 0); if (h1m->state == H1_MSG_CHUNK_CRLF) h1_prepend_chunk_crlf(&h1c->obuf); @@ -2682,8 +2946,9 @@ static size_t h1_make_data(struct h1s *h1s, struct h1m *h1m, struct buffer *buf, last_data = 1; } - if ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP)) { - TRACE_PROTO("Skip data for bodyless response", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, htx); + if ((!(h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_REQ)) || + ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) { + TRACE_PROTO("Skip data for bodyless message", H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s, htx); goto nextblk; } @@ -2754,7 +3019,8 @@ static size_t h1_make_data(struct h1s *h1s, struct h1m *h1m, struct buffer *buf, } else if (type == HTX_BLK_EOT || type == HTX_BLK_TLR) { - if ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP)) { + if ((!(h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_REQ)) || + ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) { /* Do nothing the payload must be skipped * because it is a bodyless response */ @@ -2954,7 +3220,9 @@ static size_t h1_make_trailers(struct h1s *h1s, struct h1m *h1m, struct htx *htx if (sz > count) goto error; - if (!(h1m->flags & H1_MF_CHNK) || ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) + if (!(h1m->flags & H1_MF_CHNK) || + (!(h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_REQ)) || + ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) goto nextblk; n = htx_get_blk_name(htx, blk); @@ -2967,7 +3235,9 @@ static size_t h1_make_trailers(struct h1s *h1s, struct h1m *h1m, struct htx *htx goto full; } else if (type == HTX_BLK_EOT) { - if (!(h1m->flags & H1_MF_CHNK) || ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) { + if (!(h1m->flags & H1_MF_CHNK) || + (!(h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_REQ)) || + ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) { TRACE_PROTO((!(h1m->flags & H1_MF_RESP) ? "H1 request trailers skipped" : "H1 response trailers skipped"), H1_EV_TX_DATA|H1_EV_TX_TLRS, h1c->conn, h1s); } @@ -3023,8 +3293,7 @@ static size_t h1_make_chunk(struct h1s *h1s, struct h1m * h1m, size_t len) TRACE_ENTER(H1_EV_TX_DATA|H1_EV_TX_BODY, h1c->conn, h1s); - if (!h1_get_buf(h1c, &h1c->obuf)) { - h1c->flags |= H1C_F_OUT_ALLOC; + if (!h1_get_obuf(h1c)) { TRACE_STATE("waiting for h1c obuf allocation", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s); goto end; } @@ -3077,8 +3346,7 @@ static size_t h1_process_mux(struct h1c *h1c, struct buffer *buf, size_t count) if (h1s->flags & (H1S_F_INTERNAL_ERROR|H1S_F_PROCESSING_ERROR|H1S_F_TX_BLK)) goto end; - if (!h1_get_buf(h1c, &h1c->obuf)) { - h1c->flags |= H1C_F_OUT_ALLOC; + if (!h1_get_obuf(h1c)) { TRACE_STATE("waiting for h1c obuf allocation", H1_EV_TX_DATA|H1_EV_H1S_BLK, h1c->conn, h1s); goto end; } @@ -3252,8 +3520,8 @@ static int h1_send_error(struct h1c *h1c) goto out; } - if (!h1_get_buf(h1c, &h1c->obuf)) { - h1c->flags |= (H1C_F_OUT_ALLOC|H1C_F_ABRT_PENDING); + if (!h1_get_obuf(h1c)) { + h1c->flags |= H1C_F_ABRT_PENDING; TRACE_STATE("waiting for h1c obuf allocation", H1_EV_H1C_ERR|H1_EV_H1C_BLK, h1c->conn); goto out; } @@ -3291,6 +3559,11 @@ static int h1_handle_internal_err(struct h1c *h1c) struct session *sess = h1c->conn->owner; int ret = 0; + if (h1c->state == H1_CS_DRAINING) { + h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED; + h1s_destroy(h1c->h1s); + goto end; + } session_inc_http_req_ctr(sess); proxy_inc_fe_req_ctr(sess->listener, sess->fe, 1); _HA_ATOMIC_INC(&sess->fe->fe_counters.p.http.rsp[5]); @@ -3301,6 +3574,7 @@ static int h1_handle_internal_err(struct h1c *h1c) h1c->errcode = 500; ret = h1_send_error(h1c); sess_log(sess); + end: return ret; } @@ -3314,6 +3588,11 @@ static int h1_handle_parsing_error(struct h1c *h1c) struct session *sess = h1c->conn->owner; int ret = 0; + if (h1c->state == H1_CS_DRAINING) { + h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED; + h1s_destroy(h1c->h1s); + goto end; + } if (!b_data(&h1c->ibuf) && ((h1c->flags & H1C_F_WAIT_NEXT_REQ) || (sess->fe->options & PR_O_IGNORE_PRB))) { h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED; h1_close(h1c); @@ -3347,6 +3626,11 @@ static int h1_handle_not_impl_err(struct h1c *h1c) struct session *sess = h1c->conn->owner; int ret = 0; + if (h1c->state == H1_CS_DRAINING) { + h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED; + h1s_destroy(h1c->h1s); + goto end; + } if (!b_data(&h1c->ibuf) && ((h1c->flags & H1C_F_WAIT_NEXT_REQ) || (sess->fe->options & PR_O_IGNORE_PRB))) { h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED; h1_close(h1c); @@ -3377,6 +3661,11 @@ static int h1_handle_req_tout(struct h1c *h1c) struct session *sess = h1c->conn->owner; int ret = 0; + if (h1c->state == H1_CS_DRAINING) { + h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED; + h1s_destroy(h1c->h1s); + goto end; + } if (!b_data(&h1c->ibuf) && ((h1c->flags & H1C_F_WAIT_NEXT_REQ) || (sess->fe->options & PR_O_IGNORE_PRB))) { h1c->flags = (h1c->flags & ~H1C_F_WAIT_NEXT_REQ) | H1C_F_ABRTED; h1_close(h1c); @@ -3421,8 +3710,7 @@ static int h1_recv(struct h1c *h1c) return 1; } - if (!h1_get_buf(h1c, &h1c->ibuf)) { - h1c->flags |= H1C_F_IN_ALLOC; + if (!h1_get_ibuf(h1c)) { TRACE_STATE("waiting for h1c ibuf allocation", H1_EV_H1C_RECV|H1_EV_H1C_BLK, h1c->conn); return 0; } @@ -3594,7 +3882,7 @@ static int h1_process(struct h1c * h1c) /* Try to parse now the first block of a request, creating the H1 stream if necessary */ if (b_data(&h1c->ibuf) && /* Input data to be processed */ - (h1c->state < H1_CS_RUNNING) && /* IDLE, EMBRYONIC or UPGRADING */ + ((h1c->state < H1_CS_RUNNING) || (h1c->state == H1_CS_DRAINING)) && /* IDLE, EMBRYONIC, UPGRADING or DRAINING */ !(h1c->flags & (H1C_F_IN_SALLOC|H1C_F_ABRT_PENDING))) { /* No allocation failure on the stream rxbuf and no ERROR on the H1C */ struct h1s *h1s = h1c->h1s; struct buffer *buf; @@ -3605,7 +3893,8 @@ static int h1_process(struct h1c * h1c) goto release; /* First of all handle H1 to H2 upgrade (no need to create the H1 stream) */ - if (!(h1c->flags & H1C_F_WAIT_NEXT_REQ) && /* First request */ + if (h1c->state != H1_CS_DRAINING && /* Not draining message */ + !(h1c->flags & H1C_F_WAIT_NEXT_REQ) && /* First request */ !(h1c->px->options2 & PR_O2_NO_H2_UPGRADE) && /* H2 upgrade supported by the proxy */ !(conn->mux->flags & MX_FL_NO_UPG)) { /* the current mux supports upgrades */ /* Try to match H2 preface before parsing the request headers. */ @@ -3635,9 +3924,8 @@ static int h1_process(struct h1c * h1c) h1s->sess->t_idle = ns_to_ms(now_ns - h1s->sess->accept_ts) - h1s->sess->t_handshake; /* Get the stream rxbuf */ - buf = h1_get_buf(h1c, &h1s->rxbuf); + buf = h1_get_rxbuf(h1s); if (!buf) { - h1c->flags |= H1C_F_IN_SALLOC; TRACE_STATE("waiting for stream rxbuf allocation", H1_EV_H1C_WAKE|H1_EV_H1C_BLK, h1c->conn); return 0; } @@ -3646,7 +3934,7 @@ static int h1_process(struct h1c * h1c) h1_process_demux(h1c, buf, count); h1_release_buf(h1c, &h1s->rxbuf); h1_set_idle_expiration(h1c); - if (h1c->state < H1_CS_RUNNING) { + if (h1c->state != H1_CS_RUNNING) { // TODO: be sure state cannot change in h1_process_demux if (h1s->flags & H1S_F_INTERNAL_ERROR) { h1_handle_internal_err(h1c); TRACE_ERROR("internal error detected", H1_EV_H1C_WAKE|H1_EV_H1C_ERR); @@ -3689,6 +3977,11 @@ static int h1_process(struct h1c * h1c) if (h1_send_error(h1c)) h1_send(h1c); } + else if (h1c->state == H1_CS_DRAINING) { + BUG_ON(h1c->h1s->sd && !se_fl_test(h1c->h1s->sd, SE_FL_ORPHAN)); + h1s_destroy(h1c->h1s); + TRACE_STATE("abort/error when draining message. destroy h1s and close h1c", H1_EV_H1S_END, h1c->conn); + } else { h1_close(h1c); TRACE_STATE("close h1c", H1_EV_H1S_END, h1c->conn); @@ -3717,6 +4010,17 @@ static int h1_process(struct h1c * h1c) h1_alert(h1s); } } + else if (h1c->state == H1_CS_DRAINING) { + BUG_ON(!h1c->h1s); + if (se_fl_test(h1c->h1s->sd, SE_FL_EOI)) { + if (h1s_must_shut_conn(h1c->h1s)) { + h1_shutw_conn(conn); + goto release; + } + h1s_finish_detach(h1c->h1s); + goto end; + } + } if (!b_data(&h1c->ibuf)) h1_release_buf(h1c, &h1c->ibuf); @@ -4025,8 +4329,6 @@ static void h1_detach(struct sedesc *sd) { struct h1s *h1s = sd->se; struct h1c *h1c; - struct session *sess; - int is_not_first; TRACE_ENTER(H1_EV_STRM_END, h1s ? h1s->h1c->conn : NULL, h1s); @@ -4034,149 +4336,47 @@ static void h1_detach(struct sedesc *sd) TRACE_LEAVE(H1_EV_STRM_END); return; } - - sess = h1s->sess; h1c = h1s->h1c; - sess->accept_date = date; - sess->accept_ts = now_ns; - sess->t_handshake = 0; - sess->t_idle = -1; - - is_not_first = h1s->flags & H1S_F_NOT_FIRST; - h1s_destroy(h1s); - - if (h1c->state == H1_CS_IDLE && (h1c->flags & H1C_F_IS_BACK)) { - /* this connection may be killed at any moment, we want it to - * die "cleanly" (i.e. only an RST). + if (h1c->state == H1_CS_RUNNING && !(h1c->flags & H1C_F_IS_BACK) && h1s->req.state != H1_MSG_DONE) { + h1c->state = H1_CS_DRAINING; + TRACE_DEVEL("Deferring H1S destroy to drain message", H1_EV_STRM_END, h1s->h1c->conn, h1s); + /* If we have a pending data, process it immediately or + * subscribe for reads waiting for new data */ - h1c->flags |= H1C_F_SILENT_SHUT; - - /* If there are any excess server data in the input buffer, - * release it and close the connection ASAP (some data may - * remain in the output buffer). This happens if a server sends - * invalid responses. So in such case, we don't want to reuse - * the connection - */ - if (b_data(&h1c->ibuf)) { - h1_release_buf(h1c, &h1c->ibuf); - h1_close(h1c); - TRACE_DEVEL("remaining data on detach, kill connection", H1_EV_STRM_END|H1_EV_H1C_END); - goto release; - } - - if (h1c->conn->flags & CO_FL_PRIVATE) { - /* Add the connection in the session server list, if not already done */ - if (!session_add_conn(sess, h1c->conn, h1c->conn->target)) { - h1c->conn->owner = NULL; - h1c->conn->mux->destroy(h1c); + if (unlikely(b_data(&h1c->ibuf))) { + if (h1_process(h1c) == -1) goto end; - } - /* Always idle at this step */ - if (session_check_idle_conn(sess, h1c->conn)) { - /* The connection got destroyed, let's leave */ - TRACE_DEVEL("outgoing connection killed", H1_EV_STRM_END|H1_EV_H1C_END); - goto end; - } } - else { - if (h1c->conn->owner == sess) - h1c->conn->owner = NULL; - - /* mark that the tasklet may lose its context to another thread and - * that the handler needs to check it under the idle conns lock. - */ - HA_ATOMIC_OR(&h1c->wait_event.tasklet->state, TASK_F_USR1); + else h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event); - xprt_set_idle(h1c->conn, h1c->conn->xprt, h1c->conn->xprt_ctx); - - if (!srv_add_to_idle_list(objt_server(h1c->conn->target), h1c->conn, is_not_first)) { - /* The server doesn't want it, let's kill the connection right away */ - h1c->conn->mux->destroy(h1c); - TRACE_DEVEL("outgoing connection killed", H1_EV_STRM_END|H1_EV_H1C_END); - goto end; - } - /* At this point, the connection has been added to the - * server idle list, so another thread may already have - * hijacked it, so we can't do anything with it. - */ - return; - } - } - - release: - /* We don't want to close right now unless the connection is in error or shut down for writes */ - if ((h1c->flags & H1C_F_ERROR) || - (h1c->state == H1_CS_CLOSED) || - (h1c->state == H1_CS_CLOSING && !b_data(&h1c->obuf)) || - !h1c->conn->owner) { - TRACE_DEVEL("killing dead connection", H1_EV_STRM_END, h1c->conn); - h1_release(h1c); - } - else { - if (h1c->state == H1_CS_IDLE) { - /* If we have a new request, process it immediately or - * subscribe for reads waiting for new data - */ - if (unlikely(b_data(&h1c->ibuf))) { - if (h1_process(h1c) == -1) - goto end; - } - else - h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event); - } h1_set_idle_expiration(h1c); h1_refresh_timeout(h1c); } + else + h1s_finish_detach(h1s); + end: TRACE_LEAVE(H1_EV_STRM_END); } - -static void h1_shutr(struct stconn *sc, enum co_shr_mode mode) +static void h1_shut(struct stconn *sc, enum se_shut_mode mode, struct se_abort_info *reason) { struct h1s *h1s = __sc_mux_strm(sc); struct h1c *h1c; - if (!h1s) - return; - h1c = h1s->h1c; - - TRACE_POINT(H1_EV_STRM_SHUT, h1c->conn, h1s, 0, (size_t[]){mode}); -} - -static void h1_shutw(struct stconn *sc, enum co_shw_mode mode) -{ - struct h1s *h1s = __sc_mux_strm(sc); - struct h1c *h1c; - - if (!h1s) + if (!h1s || !(mode & (SE_SHW_SILENT|SE_SHW_NORMAL))) return; h1c = h1s->h1c; TRACE_ENTER(H1_EV_STRM_SHUT, h1c->conn, h1s, 0, (size_t[]){mode}); - if (se_fl_test(h1s->sd, SE_FL_KILL_CONN)) { - TRACE_STATE("stream wants to kill the connection", H1_EV_STRM_SHUT, h1c->conn, h1s); - goto do_shutw; - } - if (h1c->state == H1_CS_CLOSING || (h1c->flags & (H1C_F_EOS|H1C_F_ERR_PENDING|H1C_F_ERROR))) { - TRACE_STATE("shutdown on connection (EOS || CLOSING || ERROR)", H1_EV_STRM_SHUT, h1c->conn, h1s); - goto do_shutw; - } - - if (h1c->state == H1_CS_UPGRADING) { - TRACE_STATE("keep connection alive (UPGRADING)", H1_EV_STRM_SHUT, h1c->conn, h1s); + if (!h1s_must_shut_conn(h1s)) goto end; - } - if (((h1s->flags & H1S_F_WANT_KAL) && h1s->req.state == H1_MSG_DONE && h1s->res.state == H1_MSG_DONE)) { - TRACE_STATE("keep connection alive (want_kal)", H1_EV_STRM_SHUT, h1c->conn, h1s); - goto end; - } do_shutw: h1_close(h1c); - if (mode != CO_SHW_NORMAL) + if (mode & SE_SHW_NORMAL) h1c->flags |= H1C_F_SILENT_SHUT; if (!b_data(&h1c->obuf)) @@ -4405,12 +4605,12 @@ static inline struct sedesc *h1s_opposite_sd(struct h1s *h1s) return sdo; } -static size_t h1_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int may_splice) +static size_t h1_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int flags) { struct h1s *h1s = __sc_mux_strm(sc); struct h1c *h1c = h1s->h1c; struct h1m *h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->res : &h1s->req); - size_t ret = 0; + size_t sz, offset = 0, ret = 0; TRACE_ENTER(H1_EV_STRM_SEND, h1c->conn, h1s, 0, (size_t[]){count}); @@ -4420,21 +4620,55 @@ static size_t h1_nego_ff(struct stconn *sc, struct buffer *input, size_t count, goto out; } - /* TODO: add check on curr_len if CLEN */ + if ((!(h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_REQ)) || + ((h1m->flags & H1_MF_RESP) && (h1s->flags & H1S_F_BODYLESS_RESP))) { + TRACE_STATE("Bodyless message, disable fastfwd", H1_EV_STRM_SEND|H1_EV_STRM_ERR, h1c->conn, h1s); + h1s->sd->iobuf.flags |= IOBUF_FL_NO_FF; + goto out; + } - if (h1m->flags & H1_MF_CHNK) { + if (h1m->flags & H1_MF_CLEN) { + if ((flags & NEGO_FF_FL_EXACT_SIZE) && count > h1m->curr_len) { + TRACE_ERROR("more payload than announced", H1_EV_STRM_SEND|H1_EV_STRM_ERR, h1c->conn, h1s); + h1s->sd->iobuf.flags |= IOBUF_FL_NO_FF; + goto out; + } + } + else if (h1m->flags & H1_MF_CHNK) { if (h1m->curr_len) { BUG_ON(h1m->state != H1_MSG_DATA); - if (count > h1m->curr_len) + if (count > h1m->curr_len) { + if ((flags & NEGO_FF_FL_EXACT_SIZE) && count > h1m->curr_len) { + TRACE_ERROR("chunk bigger than announced", H1_EV_STRM_SEND|H1_EV_STRM_ERR, h1c->conn, h1s); + h1s->sd->iobuf.flags |= IOBUF_FL_NO_FF; + goto out; + } count = h1m->curr_len; + } } else { BUG_ON(h1m->state != H1_MSG_CHUNK_CRLF && h1m->state != H1_MSG_CHUNK_SIZE); - if (!h1_make_chunk(h1s, h1m, count)) { + if (flags & NEGO_FF_FL_EXACT_SIZE) { + if (!h1_make_chunk(h1s, h1m, count)) { h1s->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; - goto out; + goto out; + } + h1m->curr_len = count; + } + else { + /* The producer does not know the chunk size, thus this will be emitted at the + * end, in done_ff(). So splicing cannot be used (see TODO below). + * We will reserve 10 bytes to handle at most 4Go chunk ! + * (<8-bytes SIZE><CRLF><CHUNK-DATA>) + */ + if (count > MAX_RANGE(unsigned int)) + count = MAX_RANGE(unsigned int); + offset = 10; + /* Add 2 more bytes to finish the previous chunk */ + if (h1m->state == H1_MSG_CHUNK_CRLF) + offset += 2; + goto no_splicing; } - h1m->curr_len = count; } } @@ -4445,7 +4679,7 @@ static size_t h1_nego_ff(struct stconn *sc, struct buffer *input, size_t count, * and then data in pipe, or the opposite. For now, it is not * supported to mix data. */ - if (!b_data(input) && !b_data(&h1c->obuf) && may_splice) { + if (!b_data(input) && !b_data(&h1c->obuf) && (flags & NEGO_FF_FL_MAY_SPLICE)) { #if defined(USE_LINUX_SPLICE) if (h1c->conn->xprt->snd_pipe && (h1s->sd->iobuf.pipe || (pipes_used < global.maxpipes && (h1s->sd->iobuf.pipe = get_pipe())))) { h1s->sd->iobuf.offset = 0; @@ -4458,8 +4692,8 @@ static size_t h1_nego_ff(struct stconn *sc, struct buffer *input, size_t count, TRACE_DEVEL("Unable to allocate pipe for splicing, fallback to buffer", H1_EV_STRM_SEND, h1c->conn, h1s); } - if (!h1_get_buf(h1c, &h1c->obuf)) { - h1c->flags |= H1C_F_OUT_ALLOC; + no_splicing: + if (!h1_get_obuf(h1c)) { h1s->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; TRACE_STATE("waiting for opposite h1c obuf allocation", H1_EV_STRM_SEND|H1_EV_H1S_BLK, h1c->conn, h1s); goto out; @@ -4468,21 +4702,22 @@ static size_t h1_nego_ff(struct stconn *sc, struct buffer *input, size_t count, if (b_space_wraps(&h1c->obuf)) b_slow_realign(&h1c->obuf, trash.area, b_data(&h1c->obuf)); - h1s->sd->iobuf.buf = &h1c->obuf; - h1s->sd->iobuf.offset = 0; - h1s->sd->iobuf.data = 0; - - /* Cannot forward more than available room in output buffer */ - if (count > b_room(&h1c->obuf)) - count = b_room(&h1c->obuf); - - if (!count) { + if (b_contig_space(&h1c->obuf) <= offset) { h1c->flags |= H1C_F_OUT_FULL; h1s->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; TRACE_STATE("output buffer full", H1_EV_STRM_SEND|H1_EV_H1S_BLK, h1c->conn, h1s); goto out; } + /* Cannot forward more than available room in output buffer */ + sz = b_contig_space(&h1c->obuf) - offset; + if (count > sz) + count = sz; + + h1s->sd->iobuf.buf = &h1c->obuf; + h1s->sd->iobuf.offset = offset; + h1s->sd->iobuf.data = 0; + /* forward remaining input data */ if (b_data(input)) { size_t xfer = count; @@ -4529,6 +4764,17 @@ static size_t h1_done_ff(struct stconn *sc) if (b_room(&h1c->obuf) == sd->iobuf.offset) h1c->flags |= H1C_F_OUT_FULL; + if (sd->iobuf.data && sd->iobuf.offset) { + struct buffer buf = b_make(b_orig(&h1c->obuf), b_size(&h1c->obuf), + b_peek_ofs(&h1c->obuf, b_data(&h1c->obuf) - sd->iobuf.data + sd->iobuf.offset), + sd->iobuf.data); + h1_prepend_chunk_size(&buf, sd->iobuf.data, sd->iobuf.offset - ((h1m->state == H1_MSG_CHUNK_CRLF) ? 2 : 0)); + if (h1m->state == H1_MSG_CHUNK_CRLF) + h1_prepend_chunk_crlf(&buf); + b_add(&h1c->obuf, sd->iobuf.offset); + h1m->state = H1_MSG_CHUNK_CRLF; + } + total = sd->iobuf.data; sd->iobuf.buf = NULL; sd->iobuf.offset = 0; @@ -4583,6 +4829,7 @@ static int h1_fastfwd(struct stconn *sc, unsigned int count, unsigned int flags) struct h1m *h1m = (!(h1c->flags & H1C_F_IS_BACK) ? &h1s->req : &h1s->res); struct sedesc *sdo = NULL; size_t total = 0, try = 0; + unsigned int nego_flags = NEGO_FF_FL_NONE; int ret = 0; TRACE_ENTER(H1_EV_STRM_RECV, h1c->conn, h1s, 0, (size_t[]){count}); @@ -4612,10 +4859,15 @@ static int h1_fastfwd(struct stconn *sc, unsigned int count, unsigned int flags) retry: ret = 0; - if (h1m->state == H1_MSG_DATA && (h1m->flags & (H1_MF_CHNK|H1_MF_CLEN)) && count > h1m->curr_len) + if (h1m->state == H1_MSG_DATA && (h1m->flags & (H1_MF_CHNK|H1_MF_CLEN)) && count > h1m->curr_len) { + flags |= NEGO_FF_FL_EXACT_SIZE; count = h1m->curr_len; + } + + if (h1c->conn->xprt->rcv_pipe && !!(flags & CO_RFL_MAY_SPLICE) && !(sdo->iobuf.flags & IOBUF_FL_NO_SPLICING)) + nego_flags |= NEGO_FF_FL_MAY_SPLICE; - try = se_nego_ff(sdo, &h1c->ibuf, count, h1c->conn->xprt->rcv_pipe && !!(flags & CO_RFL_MAY_SPLICE) && !(sdo->iobuf.flags & IOBUF_FL_NO_SPLICING)); + try = se_nego_ff(sdo, &h1c->ibuf, count, nego_flags); if (b_room(&h1c->ibuf) && (h1c->flags & H1C_F_IN_FULL)) { h1c->flags &= ~H1C_F_IN_FULL; TRACE_STATE("h1c ibuf not full anymore", H1_EV_STRM_RECV|H1_EV_H1C_BLK); @@ -4848,6 +5100,10 @@ static int h1_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *outp if (!(h1c->wait_event.events & SUB_RETRY_RECV)) h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, SUB_RETRY_RECV, &h1c->wait_event); return 0; + case MUX_CTL_GET_NBSTRM: + return h1_used_streams(conn); + case MUX_CTL_GET_MAXSTRM: + return 1; default: return -1; } @@ -5032,25 +5288,35 @@ static int add_hdr_case_adjust(const char *from, const char *to, char **err) * Return 0 if successful, non-zero otherwise. * Expected to be called with the old thread lock held. */ -static int h1_takeover(struct connection *conn, int orig_tid) +static int h1_takeover(struct connection *conn, int orig_tid, int release) { struct h1c *h1c = conn->ctx; struct task *task; - struct task *new_task; - struct tasklet *new_tasklet; + struct task *new_task = NULL; + struct tasklet *new_tasklet = NULL; /* Pre-allocate tasks so that we don't have to roll back after the xprt * has been migrated. */ - new_task = task_new_here(); - new_tasklet = tasklet_new(); - if (!new_task || !new_tasklet) - goto fail; + if (!release) { + /* If the connection is attached to a buffer_wait (extremely + * rare), it will be woken up at any instant by its own thread + * and we can't undo it anyway, so let's give up on this one. + * It's not interesting anyway since it's not usable right now. + */ + if (LIST_INLIST(&h1c->buf_wait.list)) + goto fail; + + new_task = task_new_here(); + new_tasklet = tasklet_new(); + if (!new_task || !new_tasklet) + goto fail; + } if (fd_takeover(conn->handle.fd, conn) != 0) goto fail; - if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid) != 0) { + if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid, release) != 0) { /* We failed to takeover the xprt, even if the connection may * still be valid, flag it as error'd, as we have already * taken over the fd, and wake the tasklet, so that it will @@ -5077,8 +5343,10 @@ static int h1_takeover(struct connection *conn, int orig_tid) h1c->task = new_task; new_task = NULL; - h1c->task->process = h1_timeout_task; - h1c->task->context = h1c; + if (!release) { + h1c->task->process = h1_timeout_task; + h1c->task->context = h1c; + } } /* To let the tasklet know it should free itself, and do nothing else, @@ -5088,10 +5356,26 @@ static int h1_takeover(struct connection *conn, int orig_tid) tasklet_wakeup_on(h1c->wait_event.tasklet, orig_tid); h1c->wait_event.tasklet = new_tasklet; - h1c->wait_event.tasklet->process = h1_io_cb; - h1c->wait_event.tasklet->context = h1c; - h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, - SUB_RETRY_RECV, &h1c->wait_event); + if (!release) { + h1c->wait_event.tasklet->process = h1_io_cb; + h1c->wait_event.tasklet->context = h1c; + h1c->conn->xprt->subscribe(h1c->conn, h1c->conn->xprt_ctx, + SUB_RETRY_RECV, &h1c->wait_event); + } + + if (release) { + /* we're being called for a server deletion and are running + * under thread isolation. That's the only way we can + * unregister a possible subscription of the original + * connection from its owner thread's queue, as this involves + * manipulating thread-unsafe areas. Note that it is not + * possible to just call b_dequeue() here as it would update + * the current thread's bufq_map and not the original one. + */ + BUG_ON(!thread_isolated()); + if (LIST_INLIST(&h1c->buf_wait.list)) + _b_dequeue(&h1c->buf_wait, orig_tid); + } if (new_task) __task_free(new_task); @@ -5321,8 +5605,7 @@ static const struct mux_ops mux_http_ops = { .resume_fastfwd = h1_resume_fastfwd, .subscribe = h1_subscribe, .unsubscribe = h1_unsubscribe, - .shutr = h1_shutr, - .shutw = h1_shutw, + .shut = h1_shut, .show_fd = h1_show_fd, .show_sd = h1_show_sd, .ctl = h1_ctl, @@ -5349,8 +5632,7 @@ static const struct mux_ops mux_h1_ops = { .resume_fastfwd = h1_resume_fastfwd, .subscribe = h1_subscribe, .unsubscribe = h1_unsubscribe, - .shutr = h1_shutr, - .shutw = h1_shutw, + .shut = h1_shut, .show_fd = h1_show_fd, .show_sd = h1_show_sd, .ctl = h1_ctl, diff --git a/src/mux_h2.c b/src/mux_h2.c index 7ce0e6e..c28c5e1 100644 --- a/src/mux_h2.c +++ b/src/mux_h2.c @@ -306,7 +306,7 @@ enum { H2_STATS_COUNT /* must be the last member of the enum */ }; -static struct name_desc h2_stats[] = { +static struct stat_col h2_stats[] = { [H2_ST_HEADERS_RCVD] = { .name = "h2_headers_rcvd", .desc = "Total number of received HEADERS frames" }, [H2_ST_DATA_RCVD] = { .name = "h2_data_rcvd", @@ -355,25 +355,67 @@ static struct h2_counters { long long total_streams; /* total number of streams */ } h2_counters; -static void h2_fill_stats(void *data, struct field *stats) +static int h2_fill_stats(void *data, struct field *stats, unsigned int *selected_field) { struct h2_counters *counters = data; + unsigned int current_field = (selected_field != NULL ? *selected_field : 0); - stats[H2_ST_HEADERS_RCVD] = mkf_u64(FN_COUNTER, counters->headers_rcvd); - stats[H2_ST_DATA_RCVD] = mkf_u64(FN_COUNTER, counters->data_rcvd); - stats[H2_ST_SETTINGS_RCVD] = mkf_u64(FN_COUNTER, counters->settings_rcvd); - stats[H2_ST_RST_STREAM_RCVD] = mkf_u64(FN_COUNTER, counters->rst_stream_rcvd); - stats[H2_ST_GOAWAY_RCVD] = mkf_u64(FN_COUNTER, counters->goaway_rcvd); - - stats[H2_ST_CONN_PROTO_ERR] = mkf_u64(FN_COUNTER, counters->conn_proto_err); - stats[H2_ST_STRM_PROTO_ERR] = mkf_u64(FN_COUNTER, counters->strm_proto_err); - stats[H2_ST_RST_STREAM_RESP] = mkf_u64(FN_COUNTER, counters->rst_stream_resp); - stats[H2_ST_GOAWAY_RESP] = mkf_u64(FN_COUNTER, counters->goaway_resp); - - stats[H2_ST_OPEN_CONN] = mkf_u64(FN_GAUGE, counters->open_conns); - stats[H2_ST_OPEN_STREAM] = mkf_u64(FN_GAUGE, counters->open_streams); - stats[H2_ST_TOTAL_CONN] = mkf_u64(FN_COUNTER, counters->total_conns); - stats[H2_ST_TOTAL_STREAM] = mkf_u64(FN_COUNTER, counters->total_streams); + for (; current_field < H2_STATS_COUNT; current_field++) { + struct field metric = { 0 }; + + switch (current_field) { + case H2_ST_HEADERS_RCVD: + metric = mkf_u64(FN_COUNTER, counters->headers_rcvd); + break; + case H2_ST_DATA_RCVD: + metric = mkf_u64(FN_COUNTER, counters->data_rcvd); + break; + case H2_ST_SETTINGS_RCVD: + metric = mkf_u64(FN_COUNTER, counters->settings_rcvd); + break; + case H2_ST_RST_STREAM_RCVD: + metric = mkf_u64(FN_COUNTER, counters->rst_stream_rcvd); + break; + case H2_ST_GOAWAY_RCVD: + metric = mkf_u64(FN_COUNTER, counters->goaway_rcvd); + break; + case H2_ST_CONN_PROTO_ERR: + metric = mkf_u64(FN_COUNTER, counters->conn_proto_err); + break; + case H2_ST_STRM_PROTO_ERR: + metric = mkf_u64(FN_COUNTER, counters->strm_proto_err); + break; + case H2_ST_RST_STREAM_RESP: + metric = mkf_u64(FN_COUNTER, counters->rst_stream_resp); + break; + case H2_ST_GOAWAY_RESP: + metric = mkf_u64(FN_COUNTER, counters->goaway_resp); + break; + case H2_ST_OPEN_CONN: + metric = mkf_u64(FN_GAUGE, counters->open_conns); + break; + case H2_ST_OPEN_STREAM: + metric = mkf_u64(FN_GAUGE, counters->open_streams); + break; + case H2_ST_TOTAL_CONN: + metric = mkf_u64(FN_COUNTER, counters->total_conns); + break; + case H2_ST_TOTAL_STREAM: + metric = mkf_u64(FN_COUNTER, counters->total_streams); + break; + default: + /* not used for frontends. If a specific metric + * is requested, return an error. Otherwise continue. + */ + if (selected_field != NULL) + return 0; + continue; + } + stats[current_field] = metric; + if (selected_field != NULL) + break; + } + return 1; } static struct stats_module h2_stats_module = { @@ -770,13 +812,13 @@ static int h2_buf_available(void *target) struct h2c *h2c = target; struct h2s *h2s; - if ((h2c->flags & H2_CF_DEM_DALLOC) && b_alloc(&h2c->dbuf)) { + if ((h2c->flags & H2_CF_DEM_DALLOC) && b_alloc(&h2c->dbuf, DB_MUX_RX)) { h2c->flags &= ~H2_CF_DEM_DALLOC; h2c_restart_reading(h2c, 1); return 1; } - if ((h2c->flags & H2_CF_MUX_MALLOC) && b_alloc(br_tail(h2c->mbuf))) { + if ((h2c->flags & H2_CF_MUX_MALLOC) && b_alloc(br_tail(h2c->mbuf), DB_MUX_TX)) { h2c->flags &= ~H2_CF_MUX_MALLOC; if (h2c->flags & H2_CF_DEM_MROOM) { @@ -788,7 +830,7 @@ static int h2_buf_available(void *target) if ((h2c->flags & H2_CF_DEM_SALLOC) && (h2s = h2c_st_by_id(h2c, h2c->dsi)) && h2s_sc(h2s) && - b_alloc(&h2s->rxbuf)) { + b_alloc(&h2s->rxbuf, DB_SE_RX)) { h2c->flags &= ~H2_CF_DEM_SALLOC; h2c_restart_reading(h2c, 1); return 1; @@ -802,10 +844,8 @@ static inline struct buffer *h2_get_buf(struct h2c *h2c, struct buffer *bptr) struct buffer *buf = NULL; if (likely(!LIST_INLIST(&h2c->buf_wait.list)) && - unlikely((buf = b_alloc(bptr)) == NULL)) { - h2c->buf_wait.target = h2c; - h2c->buf_wait.wakeup_cb = h2_buf_available; - LIST_APPEND(&th_ctx->buffer_wq, &h2c->buf_wait.list); + unlikely((buf = b_alloc(bptr, DB_MUX_RX)) == NULL)) { + b_queue(DB_MUX_RX, &h2c->buf_wait, h2c, h2_buf_available); } return buf; } @@ -1153,8 +1193,7 @@ static void h2_release(struct h2c *h2c) hpack_dht_free(h2c->ddht); - if (LIST_INLIST(&h2c->buf_wait.list)) - LIST_DEL_INIT(&h2c->buf_wait.list); + b_dequeue(&h2c->buf_wait); h2_release_buf(h2c, &h2c->dbuf); h2_release_mbuf(h2c); @@ -1222,6 +1261,20 @@ static inline int h2s_mws(const struct h2s *h2s) return h2s->sws + h2s->h2c->miw; } +/* Returns 1 if the H2 error of the opposite side is forwardable to the peer. + * Otherwise 0 is returned. + * For now, only CANCEL from the client is forwardable to the server. + */ +static inline int h2s_is_forwardable_abort(struct h2s *h2s, struct se_abort_info *reason) +{ + enum h2_err err = H2_ERR_NO_ERROR; + + if (reason && ((reason->info & SE_ABRT_SRC_MASK) >> SE_ABRT_SRC_SHIFT) == SE_ABRT_SRC_MUX_H2) + err = reason->code; + + return ((h2s->h2c->flags & H2_CF_IS_BACK) && (err == H2_ERR_CANCEL)); +} + /* marks an error on the connection. Before settings are sent, we must not send * a GOAWAY frame, and the error state will prevent h2c_send_goaway_error() * from verifying this so we set H2_CF_GOAWAY_FAILED to make sure it will not @@ -2770,6 +2823,10 @@ static int h2c_handle_rst_stream(struct h2c *h2c, struct h2s *h2s) if (h2s_sc(h2s)) { se_fl_set_error(h2s->sd); + if (!h2s->sd->abort_info.info) { + h2s->sd->abort_info.info = (SE_ABRT_SRC_MUX_H2 << SE_ABRT_SRC_SHIFT); + h2s->sd->abort_info.code = h2s->errcode; + } h2s_alert(h2s); } @@ -4344,8 +4401,13 @@ static int h2_process(struct h2c *h2c) if (!(h2c->flags & H2_CF_DEM_BLOCK_ANY) && (b_data(&h2c->dbuf) || (h2c->flags & H2_CF_RCVD_SHUT))) { + int prev_glitches = h2c->glitches; + h2_process_demux(h2c); + if (h2c->glitches != prev_glitches && !(h2c->flags & H2_CF_IS_BACK)) + session_add_glitch_ctr(h2c->conn->owner, h2c->glitches - prev_glitches); + if (h2c->st0 >= H2_CS_ERROR || (h2c->flags & H2_CF_ERROR)) b_reset(&h2c->dbuf); @@ -4664,6 +4726,12 @@ static int h2_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *outp case MUX_CTL_GET_GLITCHES: return h2c->glitches; + case MUX_CTL_GET_NBSTRM: + return h2c->nb_streams; + + case MUX_CTL_GET_MAXSTRM: + return h2c->streams_limit; + default: return -1; } @@ -4772,6 +4840,10 @@ static void h2_detach(struct sedesc *sd) } } if (eb_is_empty(&h2c->streams_by_id)) { + /* mark that the tasklet may lose its context to another thread and + * that the handler needs to check it under the idle conns lock. + */ + HA_ATOMIC_OR(&h2c->wait_event.tasklet->state, TASK_F_USR1); if (session_check_idle_conn(h2c->conn->owner, h2c->conn) != 0) { /* At this point either the connection is destroyed, or it's been added to the server idle list, just stop */ TRACE_DEVEL("leaving without reusable idle connection", H2_EV_STRM_END); @@ -4811,7 +4883,7 @@ static void h2_detach(struct sedesc *sd) } else if (!h2c->conn->hash_node->node.node.leaf_p && h2_avail_streams(h2c->conn) > 0 && objt_server(h2c->conn->target) && - !LIST_INLIST(&h2c->conn->session_list)) { + !LIST_INLIST(&h2c->conn->sess_el)) { srv_add_to_avail_list(__objt_server(h2c->conn->target), h2c->conn); } } @@ -4837,7 +4909,7 @@ static void h2_detach(struct sedesc *sd) } /* Performs a synchronous or asynchronous shutr(). */ -static void h2_do_shutr(struct h2s *h2s) +static void h2_do_shutr(struct h2s *h2s, struct se_abort_info *reason) { struct h2c *h2c = h2s->h2c; @@ -4860,6 +4932,10 @@ static void h2_do_shutr(struct h2s *h2s) h2c_error(h2c, H2_ERR_ENHANCE_YOUR_CALM); h2s_error(h2s, H2_ERR_ENHANCE_YOUR_CALM); } + else if (h2s_is_forwardable_abort(h2s, reason)) { + TRACE_STATE("shutr using opposite endp code", H2_EV_STRM_SHUT, h2c->conn, h2s); + h2s_error(h2s, reason->code); + } else if (!(h2s->flags & H2_SF_HEADERS_SENT)) { /* Nothing was never sent for this stream, so reset with * REFUSED_STREAM error to let the client retry the @@ -4905,8 +4981,9 @@ add_to_list: return; } + /* Performs a synchronous or asynchronous shutw(). */ -static void h2_do_shutw(struct h2s *h2s) +static void h2_do_shutw(struct h2s *h2s, struct se_abort_info *reason) { struct h2c *h2c = h2s->h2c; @@ -4916,6 +4993,7 @@ static void h2_do_shutw(struct h2s *h2s) TRACE_ENTER(H2_EV_STRM_SHUT, h2c->conn, h2s); if (h2s->st != H2_SS_ERROR && + !h2s_is_forwardable_abort(h2s, reason) && (h2s->flags & (H2_SF_HEADERS_SENT | H2_SF_MORE_HTX_DATA)) == H2_SF_HEADERS_SENT) { /* we can cleanly close using an empty data frame only after headers * and if no more data is expected to be sent. @@ -4940,6 +5018,10 @@ static void h2_do_shutw(struct h2s *h2s) h2c_error(h2c, H2_ERR_ENHANCE_YOUR_CALM); h2s_error(h2s, H2_ERR_ENHANCE_YOUR_CALM); } + else if (h2s_is_forwardable_abort(h2s, reason)) { + TRACE_STATE("shutw using opposite endp code", H2_EV_STRM_SHUT, h2c->conn, h2s); + h2s_error(h2s, reason->code); + } else if (h2s->flags & H2_SF_MORE_HTX_DATA) { /* some unsent data were pending (e.g. abort during an upload), * let's send a CANCEL. @@ -5006,10 +5088,10 @@ struct task *h2_deferred_shut(struct task *t, void *ctx, unsigned int state) } if (h2s->flags & H2_SF_WANT_SHUTW) - h2_do_shutw(h2s); + h2_do_shutw(h2s, NULL); if (h2s->flags & H2_SF_WANT_SHUTR) - h2_do_shutr(h2s); + h2_do_shutr(h2s, NULL); if (!(h2s->flags & (H2_SF_WANT_SHUTR|H2_SF_WANT_SHUTW))) { /* We're done trying to send, remove ourself from the send_list */ @@ -5028,24 +5110,17 @@ struct task *h2_deferred_shut(struct task *t, void *ctx, unsigned int state) return t; } -/* shutr() called by the stream connector (mux_ops.shutr) */ -static void h2_shutr(struct stconn *sc, enum co_shr_mode mode) -{ - struct h2s *h2s = __sc_mux_strm(sc); - - TRACE_ENTER(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s); - if (mode) - h2_do_shutr(h2s); - TRACE_LEAVE(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s); -} - -/* shutw() called by the stream connector (mux_ops.shutw) */ -static void h2_shutw(struct stconn *sc, enum co_shw_mode mode) +static void h2_shut(struct stconn *sc, enum se_shut_mode mode, struct se_abort_info *reason) { struct h2s *h2s = __sc_mux_strm(sc); TRACE_ENTER(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s); - h2_do_shutw(h2s); + if (mode & (SE_SHW_SILENT|SE_SHW_NORMAL)) { + /* Pass the reason for silent shutw only (abort) */ + h2_do_shutw(h2s, (mode & SE_SHW_SILENT) ? reason : NULL); + } + if (mode & SE_SHR_RESET) + h2_do_shutr(h2s, reason); TRACE_LEAVE(H2_EV_STRM_SHUT, h2s->h2c->conn, h2s); } @@ -6197,10 +6272,9 @@ static size_t h2s_snd_bhdrs(struct h2s *h2s, struct htx *htx) } /* Try to send a DATA frame matching HTTP response present in HTX structure - * present in <buf>, for stream <h2s>. Returns the number of bytes sent. The - * caller must check the stream's status to detect any error which might have - * happened subsequently to a successful send. Returns the number of data bytes - * consumed, or zero if nothing done. + * present in <buf>, for stream <h2s>. The caller must check the stream's status + * to detect any error which might have happened subsequently to a successful + * send. Returns the number of data bytes consumed, or zero if nothing done. */ static size_t h2s_make_data(struct h2s *h2s, struct buffer *buf, size_t count) { @@ -7095,7 +7169,7 @@ static size_t h2_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, in return total; } -static size_t h2_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int may_splice) +static size_t h2_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int flags) { struct h2s *h2s = __sc_mux_strm(sc); struct h2c *h2c = h2s->h2c; @@ -7454,25 +7528,35 @@ static int h2_show_sd(struct buffer *msg, struct sedesc *sd, const char *pfx) * Return 0 if successful, non-zero otherwise. * Expected to be called with the old thread lock held. */ -static int h2_takeover(struct connection *conn, int orig_tid) +static int h2_takeover(struct connection *conn, int orig_tid, int release) { struct h2c *h2c = conn->ctx; struct task *task; - struct task *new_task; - struct tasklet *new_tasklet; + struct task *new_task = NULL; + struct tasklet *new_tasklet = NULL; /* Pre-allocate tasks so that we don't have to roll back after the xprt * has been migrated. */ - new_task = task_new_here(); - new_tasklet = tasklet_new(); - if (!new_task || !new_tasklet) - goto fail; + if (!release) { + /* If the connection is attached to a buffer_wait (extremely + * rare), it will be woken up at any instant by its own thread + * and we can't undo it anyway, so let's give up on this one. + * It's not interesting anyway since it's not usable right now. + */ + if (LIST_INLIST(&h2c->buf_wait.list)) + goto fail; + + new_task = task_new_here(); + new_tasklet = tasklet_new(); + if (!new_task || !new_tasklet) + goto fail; + } if (fd_takeover(conn->handle.fd, conn) != 0) goto fail; - if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid) != 0) { + if (conn->xprt->takeover && conn->xprt->takeover(conn, conn->xprt_ctx, orig_tid, release) != 0) { /* We failed to takeover the xprt, even if the connection may * still be valid, flag it as error'd, as we have already * taken over the fd, and wake the tasklet, so that it will @@ -7499,8 +7583,10 @@ static int h2_takeover(struct connection *conn, int orig_tid) h2c->task = new_task; new_task = NULL; - h2c->task->process = h2_timeout_task; - h2c->task->context = h2c; + if (!release) { + h2c->task->process = h2_timeout_task; + h2c->task->context = h2c; + } } /* To let the tasklet know it should free itself, and do nothing else, @@ -7510,10 +7596,26 @@ static int h2_takeover(struct connection *conn, int orig_tid) tasklet_wakeup_on(h2c->wait_event.tasklet, orig_tid); h2c->wait_event.tasklet = new_tasklet; - h2c->wait_event.tasklet->process = h2_io_cb; - h2c->wait_event.tasklet->context = h2c; - h2c->conn->xprt->subscribe(h2c->conn, h2c->conn->xprt_ctx, - SUB_RETRY_RECV, &h2c->wait_event); + if (!release) { + h2c->wait_event.tasklet->process = h2_io_cb; + h2c->wait_event.tasklet->context = h2c; + h2c->conn->xprt->subscribe(h2c->conn, h2c->conn->xprt_ctx, + SUB_RETRY_RECV, &h2c->wait_event); + } + + if (release) { + /* we're being called for a server deletion and are running + * under thread isolation. That's the only way we can + * unregister a possible subscription of the original + * connection from its owner thread's queue, as this involves + * manipulating thread-unsafe areas. Note that it is not + * possible to just call b_dequeue() here as it would update + * the current thread's bufq_map and not the original one. + */ + BUG_ON(!thread_isolated()); + if (LIST_INLIST(&h2c->buf_wait.list)) + _b_dequeue(&h2c->buf_wait, orig_tid); + } if (new_task) __task_free(new_task); @@ -7690,8 +7792,7 @@ static const struct mux_ops h2_ops = { .destroy = h2_destroy, .avail_streams = h2_avail_streams, .used_streams = h2_used_streams, - .shutr = h2_shutr, - .shutw = h2_shutw, + .shut = h2_shut, .ctl = h2_ctl, .sctl = h2_sctl, .show_fd = h2_show_fd, diff --git a/src/mux_pt.c b/src/mux_pt.c index 3cca6a1..6dbbe04 100644 --- a/src/mux_pt.c +++ b/src/mux_pt.c @@ -462,39 +462,30 @@ static int mux_pt_avail_streams(struct connection *conn) return 1 - mux_pt_used_streams(conn); } -static void mux_pt_shutr(struct stconn *sc, enum co_shr_mode mode) -{ - struct connection *conn = __sc_conn(sc); - struct mux_pt_ctx *ctx = conn->ctx; - - TRACE_ENTER(PT_EV_STRM_SHUT, conn, sc); - - se_fl_clr(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM); - if (conn_xprt_ready(conn) && conn->xprt->shutr) - conn->xprt->shutr(conn, conn->xprt_ctx, - (mode == CO_SHR_DRAIN)); - else if (mode == CO_SHR_DRAIN) - conn_ctrl_drain(conn); - if (se_fl_test(ctx->sd, SE_FL_SHW)) - conn_full_close(conn); - - TRACE_LEAVE(PT_EV_STRM_SHUT, conn, sc); -} - -static void mux_pt_shutw(struct stconn *sc, enum co_shw_mode mode) +static void mux_pt_shut(struct stconn *sc, enum se_shut_mode mode, struct se_abort_info *reason) { struct connection *conn = __sc_conn(sc); struct mux_pt_ctx *ctx = conn->ctx; TRACE_ENTER(PT_EV_STRM_SHUT, conn, sc); + if (mode & (SE_SHW_SILENT|SE_SHW_NORMAL)) { + if (conn_xprt_ready(conn) && conn->xprt->shutw) + conn->xprt->shutw(conn, conn->xprt_ctx, (mode & SE_SHW_NORMAL)); + if (conn->flags & CO_FL_SOCK_RD_SH) + conn_full_close(conn); + else + conn_sock_shutw(conn, (mode & SE_SHW_NORMAL)); + } - if (conn_xprt_ready(conn) && conn->xprt->shutw) - conn->xprt->shutw(conn, conn->xprt_ctx, - (mode == CO_SHW_NORMAL)); - if (!se_fl_test(ctx->sd, SE_FL_SHR)) - conn_sock_shutw(conn, (mode == CO_SHW_NORMAL)); - else - conn_full_close(conn); + if (mode & (SE_SHR_RESET|SE_SHR_DRAIN)) { + se_fl_clr(ctx->sd, SE_FL_RCV_MORE | SE_FL_WANT_ROOM); + if (conn_xprt_ready(conn) && conn->xprt->shutr) + conn->xprt->shutr(conn, conn->xprt_ctx, (mode & SE_SHR_DRAIN)); + else if (mode & SE_SHR_DRAIN) + conn_ctrl_drain(conn); + if (conn->flags & CO_FL_SOCK_WR_SH) + conn_full_close(conn); + } TRACE_LEAVE(PT_EV_STRM_SHUT, conn, sc); } @@ -582,7 +573,7 @@ static inline struct sedesc *mux_pt_opposite_sd(struct mux_pt_ctx *ctx) return sdo; } -static size_t mux_pt_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int may_splice) +static size_t mux_pt_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int flags) { struct connection *conn = __sc_conn(sc); struct mux_pt_ctx *ctx = conn->ctx; @@ -597,7 +588,7 @@ static size_t mux_pt_nego_ff(struct stconn *sc, struct buffer *input, size_t cou * and then data in pipe, or the opposite. For now, it is not * supported to mix data. */ - if (!b_data(input) && may_splice) { + if (!b_data(input) && (flags & NEGO_FF_FL_MAY_SPLICE)) { if (conn->xprt->snd_pipe && (ctx->sd->iobuf.pipe || (pipes_used < global.maxpipes && (ctx->sd->iobuf.pipe = get_pipe())))) { ctx->sd->iobuf.offset = 0; ctx->sd->iobuf.data = 0; @@ -653,6 +644,7 @@ static int mux_pt_fastfwd(struct stconn *sc, unsigned int count, unsigned int fl struct mux_pt_ctx *ctx = conn->ctx; struct sedesc *sdo = NULL; size_t total = 0, try = 0; + unsigned int nego_flags = NEGO_FF_FL_NONE; int ret = 0; TRACE_ENTER(PT_EV_RX_DATA, conn, sc, 0, (size_t[]){count}); @@ -665,7 +657,10 @@ static int mux_pt_fastfwd(struct stconn *sc, unsigned int count, unsigned int fl goto out; } - try = se_nego_ff(sdo, &BUF_NULL, count, conn->xprt->rcv_pipe && !!(flags & CO_RFL_MAY_SPLICE) && !(sdo->iobuf.flags & IOBUF_FL_NO_SPLICING)); + if (conn->xprt->rcv_pipe && !!(flags & CO_RFL_MAY_SPLICE) && !(sdo->iobuf.flags & IOBUF_FL_NO_SPLICING)) + nego_flags |= NEGO_FF_FL_MAY_SPLICE; + + try = se_nego_ff(sdo, &BUF_NULL, count, nego_flags); if (sdo->iobuf.flags & IOBUF_FL_NO_FF) { /* Fast forwarding is not supported by the consumer */ se_fl_clr(ctx->sd, SE_FL_MAY_FASTFWD_PROD); @@ -786,6 +781,7 @@ static int mux_pt_unsubscribe(struct stconn *sc, int event_type, struct wait_eve static int mux_pt_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *output) { int ret = 0; + switch (mux_ctl) { case MUX_CTL_STATUS: if (!(conn->flags & CO_FL_WAIT_XPRT)) @@ -793,6 +789,10 @@ static int mux_pt_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void * return ret; case MUX_CTL_EXIT_STATUS: return MUX_ES_UNKNOWN; + case MUX_CTL_GET_NBSTRM: + return mux_pt_used_streams(conn); + case MUX_CTL_GET_MAXSTRM: + return 1; default: return -1; } @@ -862,8 +862,7 @@ const struct mux_ops mux_tcp_ops = { .destroy = mux_pt_destroy_meth, .ctl = mux_pt_ctl, .sctl = mux_pt_sctl, - .shutr = mux_pt_shutr, - .shutw = mux_pt_shutw, + .shut = mux_pt_shut, .flags = MX_FL_NONE, .name = "PASS", }; @@ -888,8 +887,7 @@ const struct mux_ops mux_pt_ops = { .destroy = mux_pt_destroy_meth, .ctl = mux_pt_ctl, .sctl = mux_pt_sctl, - .shutr = mux_pt_shutr, - .shutw = mux_pt_shutw, + .shut = mux_pt_shut, .flags = MX_FL_NONE|MX_FL_NO_UPG, .name = "PASS", }; diff --git a/src/mux_quic.c b/src/mux_quic.c index 05c92fa..ae504ee 100644 --- a/src/mux_quic.c +++ b/src/mux_quic.c @@ -3,6 +3,7 @@ #include <import/eb64tree.h> #include <haproxy/api.h> +#include <haproxy/chunk.h> #include <haproxy/connection.h> #include <haproxy/dynbuf.h> #include <haproxy/h3.h> @@ -13,6 +14,7 @@ #include <haproxy/qmux_http.h> #include <haproxy/qmux_trace.h> #include <haproxy/quic_conn.h> +#include <haproxy/quic_fctl.h> #include <haproxy/quic_frame.h> #include <haproxy/quic_sock.h> #include <haproxy/quic_stream.h> @@ -58,6 +60,8 @@ static void qcs_free(struct qcs *qcs) /* Safe to use even if already removed from the list. */ LIST_DEL_INIT(&qcs->el_opening); LIST_DEL_INIT(&qcs->el_send); + LIST_DEL_INIT(&qcs->el_fctl); + LIST_DEL_INIT(&qcs->el_buf); /* Release stream endpoint descriptor. */ BUG_ON(qcs->sd && !se_fl_test(qcs->sd, SE_FL_ORPHAN)); @@ -68,11 +72,10 @@ static void qcs_free(struct qcs *qcs) qcc->app_ops->detach(qcs); /* Release qc_stream_desc buffer from quic-conn layer. */ - qc_stream_desc_release(qcs->stream, qcs->tx.sent_offset); + qc_stream_desc_release(qcs->stream, qcs->tx.fc.off_real); - /* Free Rx/Tx buffers. */ + /* Free Rx buffer. */ qcs_free_ncbuf(qcs, &qcs->rx.ncbuf); - b_free(&qcs->tx.buf); /* Remove qcs from qcc tree. */ eb64_delete(&qcs->by_id); @@ -97,34 +100,45 @@ static struct qcs *qcs_new(struct qcc *qcc, uint64_t id, enum qcs_type type) qcs->stream = NULL; qcs->qcc = qcc; - qcs->sd = NULL; qcs->flags = QC_SF_NONE; qcs->st = QC_SS_IDLE; qcs->ctx = NULL; + qcs->sd = sedesc_new(); + if (!qcs->sd) + goto err; + qcs->sd->se = qcs; + qcs->sd->conn = qcc->conn; + se_fl_set(qcs->sd, SE_FL_T_MUX | SE_FL_ORPHAN | SE_FL_NOT_FIRST); + se_expect_no_data(qcs->sd); + + if (!(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_QUIC_SND)) + se_fl_set(qcs->sd, SE_FL_MAY_FASTFWD_CONS); + /* App callback attach may register the stream for http-request wait. * These fields must be initialed before. */ LIST_INIT(&qcs->el_opening); LIST_INIT(&qcs->el_send); + LIST_INIT(&qcs->el_fctl); + LIST_INIT(&qcs->el_buf); qcs->start = TICK_ETERNITY; /* store transport layer stream descriptor in qcc tree */ qcs->id = qcs->by_id.key = id; eb64_insert(&qcc->streams_by_id, &qcs->by_id); - /* If stream is local, use peer remote-limit, or else the opposite. */ + /* Different limits can be set by the peer for local and remote bidi streams. */ if (quic_stream_is_bidi(id)) { - qcs->tx.msd = quic_stream_is_local(qcc, id) ? qcc->rfctl.msd_bidi_r : - qcc->rfctl.msd_bidi_l; + qfctl_init(&qcs->tx.fc, quic_stream_is_local(qcc, id) ? + qcc->rfctl.msd_bidi_r : qcc->rfctl.msd_bidi_l); } else if (quic_stream_is_local(qcc, id)) { - qcs->tx.msd = qcc->rfctl.msd_uni_l; + qfctl_init(&qcs->tx.fc, qcc->rfctl.msd_uni_l); + } + else { + qfctl_init(&qcs->tx.fc, 0); } - - /* Properly set flow-control blocking if initial MSD is nul. */ - if (!qcs->tx.msd) - qcs->flags |= QC_SF_BLK_SFCTL; qcs->rx.ncbuf = NCBUF_NULL; qcs->rx.app_buf = BUF_NULL; @@ -139,10 +153,6 @@ static struct qcs *qcs_new(struct qcc *qcc, uint64_t id, enum qcs_type type) } qcs->rx.msd_init = qcs->rx.msd; - qcs->tx.buf = BUF_NULL; - qcs->tx.offset = 0; - qcs->tx.sent_offset = 0; - qcs->wait_event.tasklet = NULL; qcs->wait_event.events = 0; qcs->subs = NULL; @@ -423,15 +433,6 @@ int qcs_is_close_remote(struct qcs *qcs) return qcs->st == QC_SS_HREM || qcs->st == QC_SS_CLO; } -/* Allocate if needed buffer <bptr> for stream <qcs>. - * - * Returns the buffer instance or NULL on allocation failure. - */ -struct buffer *qcs_get_buf(struct qcs *qcs, struct buffer *bptr) -{ - return b_alloc(bptr); -} - /* Allocate if needed buffer <ncbuf> for stream <qcs>. * * Returns the buffer instance or NULL on allocation failure. @@ -441,7 +442,7 @@ static struct ncbuf *qcs_get_ncbuf(struct qcs *qcs, struct ncbuf *ncbuf) struct buffer buf = BUF_NULL; if (ncb_is_null(ncbuf)) { - if (!b_alloc(&buf)) + if (!b_alloc(&buf, DB_MUX_RX)) return NULL; *ncbuf = ncb_make(buf.area, buf.size, 0); @@ -511,6 +512,35 @@ void qcs_notify_send(struct qcs *qcs) } } +/* Notify on a new stream-desc buffer available for <qcc> connection. + * + * Returns true if a stream was woken up. If false is returned, this indicates + * to the caller that it's currently unnecessary to notify for the rest of the + * available buffers. + */ +int qcc_notify_buf(struct qcc *qcc) +{ + struct qcs *qcs; + int ret = 0; + + TRACE_ENTER(QMUX_EV_QCC_WAKE, qcc->conn); + + if (qcc->flags & QC_CF_CONN_FULL) { + TRACE_STATE("new stream desc buffer available", QMUX_EV_QCC_WAKE, qcc->conn); + qcc->flags &= ~QC_CF_CONN_FULL; + } + + if (!LIST_ISEMPTY(&qcc->buf_wait_list)) { + qcs = LIST_ELEM(qcc->buf_wait_list.n, struct qcs *, el_buf); + LIST_DEL_INIT(&qcs->el_buf); + qcs_notify_send(qcs); + ret = 1; + } + + TRACE_LEAVE(QMUX_EV_QCC_WAKE, qcc->conn); + return ret; +} + /* A fatal error is detected locally for <qcc> connection. It should be closed * with a CONNECTION_CLOSE using <err> code. Set <app> to true to indicate that * the code must be considered as an application level error. This function @@ -536,6 +566,28 @@ void qcc_set_error(struct qcc *qcc, int err, int app) tasklet_wakeup(qcc->wait_event.tasklet); } +/* Increment glitch counter for <qcc> connection by <inc> steps. If configured + * threshold reached, close the connection with an error code. + */ +int qcc_report_glitch(struct qcc *qcc, int inc) +{ + const int max = global.tune.quic_frontend_glitches_threshold; + + qcc->glitches += inc; + if (max && qcc->glitches >= max && !(qcc->flags & QC_CF_ERRL)) { + if (qcc->app_ops->report_susp) { + qcc->app_ops->report_susp(qcc->ctx); + qcc_set_error(qcc, qcc->err.code, 1); + } + else { + qcc_set_error(qcc, QC_ERR_INTERNAL_ERROR, 0); + } + return 1; + } + + return 0; +} + /* Open a locally initiated stream for the connection <qcc>. Set <bidi> for a * bidirectional stream, else an unidirectional stream is opened. The next * available ID on the connection will be used according to the stream type. @@ -650,17 +702,6 @@ struct stconn *qcs_attach_sc(struct qcs *qcs, struct buffer *buf, char fin) struct qcc *qcc = qcs->qcc; struct session *sess = qcc->conn->owner; - qcs->sd = sedesc_new(); - if (!qcs->sd) - return NULL; - - qcs->sd->se = qcs; - qcs->sd->conn = qcc->conn; - se_fl_set(qcs->sd, SE_FL_T_MUX | SE_FL_ORPHAN | SE_FL_NOT_FIRST); - se_expect_no_data(qcs->sd); - - if (!(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_QUIC_SND)) - se_fl_set(qcs->sd, SE_FL_MAY_FASTFWD_CONS); /* TODO duplicated from mux_h2 */ sess->t_idle = ns_to_ms(now_ns - sess->accept_ts) - sess->t_handshake; @@ -899,7 +940,7 @@ static int qcc_decode_qcs(struct qcc *qcc, struct qcs *qcs) fin = 1; if (!(qcs->flags & QC_SF_READ_ABORTED)) { - ret = qcc->app_ops->decode_qcs(qcs, &b, fin); + ret = qcc->app_ops->rcv_buf(qcs, &b, fin); if (ret < 0) { TRACE_ERROR("decoding error", QMUX_EV_QCS_RECV, qcc->conn, qcs); goto err; @@ -930,25 +971,170 @@ static int qcc_decode_qcs(struct qcc *qcc, struct qcs *qcs) return 1; } +/* Allocate if needed and retrieve <qcs> stream buffer for data reception. + * + * Returns buffer pointer. May be NULL on allocation failure. + */ +struct buffer *qcc_get_stream_rxbuf(struct qcs *qcs) +{ + return b_alloc(&qcs->rx.app_buf, DB_MUX_RX); +} + +/* Allocate if needed and retrieve <qcs> stream buffer for data emission. + * + * <err> is an output argument which is useful to differentiate the failure + * cause when the buffer cannot be allocated. It is set to 0 if the connection + * buffer limit is reached. For fatal errors, its value is non-zero. + * + * Returns buffer pointer. May be NULL on allocation failure, in which case + * <err> will refer to the cause. + */ +struct buffer *qcc_get_stream_txbuf(struct qcs *qcs, int *err) +{ + struct qcc *qcc = qcs->qcc; + int buf_avail; + struct buffer *out = qc_stream_buf_get(qcs->stream); + + /* Stream must not try to reallocate a buffer if currently waiting for one. */ + BUG_ON(LIST_INLIST(&qcs->el_buf)); + + *err = 0; + + if (!out) { + if (qcc->flags & QC_CF_CONN_FULL) { + LIST_APPEND(&qcc->buf_wait_list, &qcs->el_buf); + goto out; + } + + out = qc_stream_buf_alloc(qcs->stream, qcs->tx.fc.off_real, + &buf_avail); + if (!out) { + if (buf_avail) { + TRACE_ERROR("stream desc alloc failure", QMUX_EV_QCS_SEND, qcc->conn, qcs); + *err = 1; + goto out; + } + + TRACE_STATE("hitting stream desc buffer limit", QMUX_EV_QCS_SEND, qcc->conn, qcs); + LIST_APPEND(&qcc->buf_wait_list, &qcs->el_buf); + qcc->flags |= QC_CF_CONN_FULL; + goto out; + } + + if (!b_alloc(out, DB_MUX_TX)) { + TRACE_ERROR("buffer alloc failure", QMUX_EV_QCS_SEND, qcc->conn, qcs); + *err = 1; + goto out; + } + } + + out: + return out; +} + +/* Returns total number of bytes not already sent to quic-conn layer. */ +static uint64_t qcs_prep_bytes(const struct qcs *qcs) +{ + struct buffer *out = qc_stream_buf_get(qcs->stream); + uint64_t diff, base_off; + + if (!out) + return 0; + + /* if ack_offset < buf_offset, it points to an older buffer. */ + base_off = MAX(qcs->stream->buf_offset, qcs->stream->ack_offset); + diff = qcs->tx.fc.off_real - base_off; + return b_data(out) - diff; +} + +/* Try to realign <out> buffer for <qcs> stream. This is done only if there is + * no data waiting for ACK. + * + * Returns 0 if realign was performed else non-zero. + */ +int qcc_realign_stream_txbuf(const struct qcs *qcs, struct buffer *out) +{ + if (qcs_prep_bytes(qcs) == b_data(out)) { + b_slow_realign(out, trash.area, b_data(out)); + return 0; + } + + return 1; +} + +/* Release the current <qcs> Tx buffer. This is useful if space left is not + * enough anymore. A new instance can then be allocated to continue sending. + * + * This operation fails if there is not yet sent bytes in the buffer. In this + * case, stream layer should interrupt sending until further notification. + * + * Returns 0 if buffer is released and a new one can be allocated or non-zero + * if there is still remaining data. + */ +int qcc_release_stream_txbuf(struct qcs *qcs) +{ + const uint64_t bytes = qcs_prep_bytes(qcs); + + /* Cannot release buffer if prepared data is not fully sent. */ + if (bytes) { + qcs->flags |= QC_SF_BLK_MROOM; + return 1; + } + + qc_stream_buf_release(qcs->stream); + return 0; +} + +/* Returns true if stream layer can proceed to emission via <qcs>. */ +int qcc_stream_can_send(const struct qcs *qcs) +{ + return !(qcs->flags & QC_SF_BLK_MROOM) && !LIST_INLIST(&qcs->el_buf); +} + +/* Wakes up every streams of <qcc> which are currently waiting for sending but + * are blocked on connection flow control. + */ +static void qcc_notify_fctl(struct qcc *qcc) +{ + struct qcs *qcs; + + while (!LIST_ISEMPTY(&qcc->fctl_list)) { + qcs = LIST_ELEM(qcc->fctl_list.n, struct qcs *, el_fctl); + LIST_DEL_INIT(&qcs->el_fctl); + qcs_notify_send(qcs); + } +} + /* Prepare for the emission of RESET_STREAM on <qcs> with error code <err>. */ void qcc_reset_stream(struct qcs *qcs, int err) { struct qcc *qcc = qcs->qcc; + const uint64_t diff = qcs_prep_bytes(qcs); if ((qcs->flags & QC_SF_TO_RESET) || qcs_is_close_local(qcs)) return; + /* TODO if QCS waiting for buffer, it could be removed from + * <qcc.buf_wait_list> if sending is closed now. + */ + TRACE_STATE("reset stream", QMUX_EV_QCS_END, qcc->conn, qcs); qcs->flags |= QC_SF_TO_RESET; qcs->err = err; - /* Remove prepared stream data from connection flow-control calcul. */ - if (qcs->tx.offset > qcs->tx.sent_offset) { - const uint64_t diff = qcs->tx.offset - qcs->tx.sent_offset; - BUG_ON(qcc->tx.offsets - diff < qcc->tx.sent_offsets); - qcc->tx.offsets -= diff; - /* Reset qcs offset to prevent BUG_ON() on qcs_destroy(). */ - qcs->tx.offset = qcs->tx.sent_offset; + if (diff) { + const int soft_blocked = qfctl_sblocked(&qcc->tx.fc); + + /* Soft offset cannot be inferior to real one. */ + BUG_ON(qcc->tx.fc.off_soft - diff < qcc->tx.fc.off_real); + + /* Subtract to conn flow control data amount prepared on stream not yet sent. */ + qcc->tx.fc.off_soft -= diff; + if (soft_blocked && !qfctl_sblocked(&qcc->tx.fc)) + qcc_notify_fctl(qcc); + + /* Reset QCS soft off to prevent BUG_ON() on qcs_destroy(). */ + qcs->tx.fc.off_soft = qcs->tx.fc.off_real; } /* Report send error to stream-endpoint layer. */ @@ -957,15 +1143,16 @@ void qcc_reset_stream(struct qcs *qcs, int err) qcs_alert(qcs); } - qcc_send_stream(qcs, 1); + qcc_send_stream(qcs, 1, 0); tasklet_wakeup(qcc->wait_event.tasklet); } /* Register <qcs> stream for emission of STREAM, STOP_SENDING or RESET_STREAM. * Set <urg> to 1 if stream content should be treated in priority compared to - * other streams. + * other streams. For STREAM emission, <count> must contains the size of the + * frame payload. This is used for flow control accounting. */ -void qcc_send_stream(struct qcs *qcs, int urg) +void qcc_send_stream(struct qcs *qcs, int urg, int count) { struct qcc *qcc = qcs->qcc; @@ -983,6 +1170,11 @@ void qcc_send_stream(struct qcs *qcs, int urg) LIST_APPEND(&qcs->qcc->send_list, &qcs->el_send); } + if (count) { + qfctl_sinc(&qcc->tx.fc, count); + qfctl_sinc(&qcs->tx.fc, count); + } + TRACE_LEAVE(QMUX_EV_QCS_SEND, qcc->conn, qcs); } @@ -999,7 +1191,7 @@ void qcc_abort_stream_read(struct qcs *qcs) TRACE_STATE("abort stream read", QMUX_EV_QCS_END, qcc->conn, qcs); qcs->flags |= (QC_SF_TO_STOP_SENDING|QC_SF_READ_ABORTED); - qcc_send_stream(qcs, 1); + qcc_send_stream(qcs, 1, 0); tasklet_wakeup(qcc->wait_event.tasklet); end: @@ -1203,17 +1395,19 @@ int qcc_recv(struct qcc *qcc, uint64_t id, uint64_t len, uint64_t offset, */ int qcc_recv_max_data(struct qcc *qcc, uint64_t max) { + int unblock_soft = 0, unblock_real = 0; + TRACE_ENTER(QMUX_EV_QCC_RECV, qcc->conn); TRACE_PROTO("receiving MAX_DATA", QMUX_EV_QCC_RECV, qcc->conn); - if (qcc->rfctl.md < max) { - qcc->rfctl.md = max; + if (qfctl_set_max(&qcc->tx.fc, max, &unblock_soft, &unblock_real)) { TRACE_DATA("increase remote max-data", QMUX_EV_QCC_RECV, qcc->conn); - if (qcc->flags & QC_CF_BLK_MFCTL) { - qcc->flags &= ~QC_CF_BLK_MFCTL; + if (unblock_real) tasklet_wakeup(qcc->wait_event.tasklet); - } + + if (unblock_soft) + qcc_notify_fctl(qcc); } TRACE_LEAVE(QMUX_EV_QCC_RECV, qcc->conn); @@ -1249,16 +1443,18 @@ int qcc_recv_max_stream_data(struct qcc *qcc, uint64_t id, uint64_t max) goto err; if (qcs) { + int unblock_soft = 0, unblock_real = 0; + TRACE_PROTO("receiving MAX_STREAM_DATA", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs); - if (max > qcs->tx.msd) { - qcs->tx.msd = max; + if (qfctl_set_max(&qcs->tx.fc, max, &unblock_soft, &unblock_real)) { TRACE_DATA("increase remote max-stream-data", QMUX_EV_QCC_RECV|QMUX_EV_QCS_RECV, qcc->conn, qcs); - - if (qcs->flags & QC_SF_BLK_SFCTL) { - qcs->flags &= ~QC_SF_BLK_SFCTL; + if (unblock_real) { /* TODO optim: only wakeup IO-CB if stream has data to sent. */ tasklet_wakeup(qcc->wait_event.tasklet); } + + if (unblock_soft) + qcs_notify_send(qcs); } } @@ -1410,14 +1606,18 @@ int qcc_recv_stop_sending(struct qcc *qcc, uint64_t id, uint64_t err) } } - /* If FIN already reached, future RESET_STREAMS will be ignored. - * Manually set EOS in this case. - */ + /* Manually set EOS if FIN already reached as futures RESET_STREAM will be ignored in this case. */ if (qcs_sc(qcs) && se_fl_test(qcs->sd, SE_FL_EOI)) { se_fl_set(qcs->sd, SE_FL_EOS); qcs_alert(qcs); } + /* If not defined yet, set abort info for the sedesc */ + if (!qcs->sd->abort_info.info) { + qcs->sd->abort_info.info = (SE_ABRT_SRC_MUX_QUIC << SE_ABRT_SRC_SHIFT); + qcs->sd->abort_info.code = err; + } + /* RFC 9000 3.5. Solicited State Transitions * * An endpoint that receives a STOP_SENDING frame @@ -1500,12 +1700,12 @@ static void qcs_destroy(struct qcs *qcs) TRACE_ENTER(QMUX_EV_QCS_END, conn, qcs); - /* MUST not removed a stream with sending prepared data left. This is - * to ensure consistency on connection flow-control calculation. - */ - BUG_ON(qcs->tx.offset < qcs->tx.sent_offset); + if (!(qcc->flags & (QC_CF_ERR_CONN|QC_CF_ERRL))) { + /* MUST not removed a stream with sending prepared data left. This is + * to ensure consistency on connection flow-control calculation. + */ + BUG_ON(qcs->tx.fc.off_soft != qcs->tx.fc.off_real); - if (!(qcc->flags & QC_CF_ERRL)) { if (quic_stream_is_remote(qcc, id)) qcc_release_remote_stream(qcc, id); } @@ -1515,114 +1715,52 @@ static void qcs_destroy(struct qcs *qcs) TRACE_LEAVE(QMUX_EV_QCS_END, conn); } -/* Transfer as much as possible data on <qcs> from <in> to <out>. This is done - * in respect with available flow-control at stream and connection level. +/* Prepare a STREAM frame for <qcs> instance using <out> as payload. The frame + * is appended in <frm_list>. Set <fin> if this is supposed to be the last + * stream frame. If <out> is NULL an empty STREAM frame is built : this may be + * useful if FIN needs to be sent without any data left. Frame length will be + * truncated if greater than <fc_conn_wnd>. This allows to prepare several + * frames in a loop while respecting connection flow control window. * - * Returns the total bytes of transferred data or a negative error code. + * Returns the payload length of the STREAM frame or a negative error code. */ -static int qcs_xfer_data(struct qcs *qcs, struct buffer *out, struct buffer *in) +static int qcs_build_stream_frm(struct qcs *qcs, struct buffer *out, char fin, + struct list *frm_list, uint64_t window_conn) { struct qcc *qcc = qcs->qcc; - int left, to_xfer; - int total = 0; + struct quic_frame *frm; + const uint64_t window_stream = qfctl_rcap(&qcs->tx.fc); + const uint64_t bytes = qcs_prep_bytes(qcs); + uint64_t total; TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs); - if (!qcs_get_buf(qcs, out)) { - TRACE_ERROR("buffer alloc failure", QMUX_EV_QCS_SEND, qcc->conn, qcs); - goto err; - } - - /* - * QCS out buffer diagram - * head left to_xfer - * -------------> ----------> -----> - * -------------------------------------------------- - * |...............|xxxxxxxxxxx|<<<<< - * -------------------------------------------------- - * ^ ack-off ^ sent-off ^ off - * - * STREAM frame - * ^ ^ - * |xxxxxxxxxxxxxxxxx| - */ - - BUG_ON_HOT(qcs->tx.sent_offset < qcs->stream->ack_offset); - BUG_ON_HOT(qcs->tx.offset < qcs->tx.sent_offset); - BUG_ON_HOT(qcc->tx.offsets < qcc->tx.sent_offsets); + /* This must only be called if there is data left, or at least a standalone FIN. */ + BUG_ON((!out || !b_data(out)) && !fin); - left = qcs->tx.offset - qcs->tx.sent_offset; - to_xfer = QUIC_MIN(b_data(in), b_room(out)); + total = bytes; - BUG_ON_HOT(qcs->tx.offset > qcs->tx.msd); - /* do not exceed flow control limit */ - if (qcs->tx.offset + to_xfer > qcs->tx.msd) { + /* do not exceed stream flow control limit */ + if (total > window_stream) { TRACE_DATA("do not exceed stream flow control", QMUX_EV_QCS_SEND, qcc->conn, qcs); - to_xfer = qcs->tx.msd - qcs->tx.offset; + total = window_stream; } - BUG_ON_HOT(qcc->tx.offsets > qcc->rfctl.md); - /* do not overcome flow control limit on connection */ - if (qcc->tx.offsets + to_xfer > qcc->rfctl.md) { + /* do not exceed connection flow control limit */ + if (total > window_conn) { TRACE_DATA("do not exceed conn flow control", QMUX_EV_QCS_SEND, qcc->conn, qcs); - to_xfer = qcc->rfctl.md - qcc->tx.offsets; + total = window_conn; } - if (!left && !to_xfer) - goto out; - - total = b_force_xfer(out, in, to_xfer); - - out: - { - struct qcs_xfer_data_trace_arg arg = { - .prep = b_data(out), .xfer = total, - }; - TRACE_LEAVE(QMUX_EV_QCS_SEND|QMUX_EV_QCS_XFER_DATA, - qcc->conn, qcs, &arg); - } - - return total; - - err: - TRACE_DEVEL("leaving on error", QMUX_EV_QCS_SEND, qcc->conn, qcs); - return -1; -} - -/* Prepare a STREAM frame for <qcs> instance using <out> as payload. The frame - * is appended in <frm_list>. Set <fin> if this is supposed to be the last - * stream frame. If <out> is NULL an empty STREAM frame is built : this may be - * useful if FIN needs to be sent without any data left. - * - * Returns the payload length of the STREAM frame or a negative error code. - */ -static int qcs_build_stream_frm(struct qcs *qcs, struct buffer *out, char fin, - struct list *frm_list) -{ - struct qcc *qcc = qcs->qcc; - struct quic_frame *frm; - int head, total; - uint64_t base_off; - - TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs); - - /* if ack_offset < buf_offset, it points to an older buffer. */ - base_off = MAX(qcs->stream->buf_offset, qcs->stream->ack_offset); - BUG_ON(qcs->tx.sent_offset < base_off); - - head = qcs->tx.sent_offset - base_off; - total = out ? b_data(out) - head : 0; - BUG_ON(total < 0); + /* Reset FIN if bytes to send is capped by flow control. */ + if (total < bytes) + fin = 0; if (!total && !fin) { /* No need to send anything if total is NULL and no FIN to signal. */ TRACE_LEAVE(QMUX_EV_QCS_SEND, qcc->conn, qcs); return 0; } - BUG_ON((!total && qcs->tx.sent_offset > qcs->tx.offset) || - (total && qcs->tx.sent_offset >= qcs->tx.offset)); - BUG_ON(qcs->tx.sent_offset + total > qcs->tx.offset); - BUG_ON(qcc->tx.sent_offsets + total > qcc->rfctl.md); TRACE_PROTO("sending STREAM frame", QMUX_EV_QCS_SEND, qcc->conn, qcs); frm = qc_frm_alloc(QUIC_FT_STREAM_8); @@ -1638,7 +1776,7 @@ static int qcs_build_stream_frm(struct qcs *qcs, struct buffer *out, char fin, if (total) { frm->stream.buf = out; - frm->stream.data = (unsigned char *)b_peek(out, head); + frm->stream.data = (unsigned char *)b_peek(out, b_data(out) - bytes); } else { /* Empty STREAM frame. */ @@ -1650,9 +1788,9 @@ static int qcs_build_stream_frm(struct qcs *qcs, struct buffer *out, char fin, if (fin) frm->type |= QUIC_STREAM_FRAME_TYPE_FIN_BIT; - if (qcs->tx.sent_offset) { + if (qcs->tx.fc.off_real) { frm->type |= QUIC_STREAM_FRAME_TYPE_OFF_BIT; - frm->stream.offset.key = qcs->tx.sent_offset; + frm->stream.offset.key = qcs->tx.fc.off_real; } /* Always set length bit as we do not know if there is remaining frames @@ -1680,23 +1818,6 @@ static int qcs_build_stream_frm(struct qcs *qcs, struct buffer *out, char fin, return -1; } -/* Check after transferring data from qcs.tx.buf if FIN must be set on the next - * STREAM frame for <qcs>. - * - * Returns true if FIN must be set else false. - */ -static int qcs_stream_fin(struct qcs *qcs) -{ - return qcs->flags & QC_SF_FIN_STREAM && !b_data(&qcs->tx.buf); -} - -/* Return true if <qcs> has data to send in new STREAM frames. */ -static forceinline int qcs_need_sending(struct qcs *qcs) -{ - return b_data(&qcs->tx.buf) || qcs->tx.sent_offset < qcs->tx.offset || - qcs_stream_fin(qcs); -} - /* This function must be called by the upper layer to inform about the sending * of a STREAM frame for <qcs> instance. The frame is of <data> length and on * <offset>. @@ -1708,42 +1829,45 @@ void qcc_streams_sent_done(struct qcs *qcs, uint64_t data, uint64_t offset) TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs); - BUG_ON(offset > qcs->tx.sent_offset); - BUG_ON(offset + data > qcs->tx.offset); + /* Real off MUST always be the greatest offset sent. */ + BUG_ON(offset > qcs->tx.fc.off_real); /* check if the STREAM frame has already been notified. It can happen * for retransmission. */ - if (offset + data < qcs->tx.sent_offset) { + if (offset + data < qcs->tx.fc.off_real) { TRACE_DEVEL("offset already notified", QMUX_EV_QCS_SEND, qcc->conn, qcs); goto out; } qcs_idle_open(qcs); - diff = offset + data - qcs->tx.sent_offset; + diff = offset + data - qcs->tx.fc.off_real; if (diff) { + struct quic_fctl *fc_conn = &qcc->tx.fc; + struct quic_fctl *fc_strm = &qcs->tx.fc; + + /* Ensure real offset never exceeds soft value. */ + BUG_ON(fc_conn->off_real + diff > fc_conn->off_soft); + BUG_ON(fc_strm->off_real + diff > fc_strm->off_soft); + /* increase offset sum on connection */ - qcc->tx.sent_offsets += diff; - BUG_ON_HOT(qcc->tx.sent_offsets > qcc->rfctl.md); - if (qcc->tx.sent_offsets == qcc->rfctl.md) { - qcc->flags |= QC_CF_BLK_MFCTL; - TRACE_STATE("connection flow-control reached", QMUX_EV_QCS_SEND, qcc->conn); + if (qfctl_rinc(fc_conn, diff)) { + TRACE_STATE("connection flow-control reached", + QMUX_EV_QCS_SEND, qcc->conn); } /* increase offset on stream */ - qcs->tx.sent_offset += diff; - BUG_ON_HOT(qcs->tx.sent_offset > qcs->tx.msd); - BUG_ON_HOT(qcs->tx.sent_offset > qcs->tx.offset); - if (qcs->tx.sent_offset == qcs->tx.msd) { - qcs->flags |= QC_SF_BLK_SFCTL; - TRACE_STATE("stream flow-control reached", QMUX_EV_QCS_SEND, qcc->conn, qcs); + if (qfctl_rinc(fc_strm, diff)) { + TRACE_STATE("stream flow-control reached", + QMUX_EV_QCS_SEND, qcc->conn, qcs); } - - /* If qcs.stream.buf is full, release it to the lower layer. */ - if (qcs->tx.offset == qcs->tx.sent_offset && - b_full(&qcs->stream->buf->buf)) { + /* Release buffer if everything sent and buf is full or stream is waiting for room. */ + if (!qcs_prep_bytes(qcs) && + (b_full(&qcs->stream->buf->buf) || qcs->flags & QC_SF_BLK_MROOM)) { qc_stream_buf_release(qcs->stream); + qcs->flags &= ~QC_SF_BLK_MROOM; + qcs_notify_send(qcs); } /* Add measurement for send rate. This is done at the MUX layer @@ -1752,7 +1876,7 @@ void qcc_streams_sent_done(struct qcs *qcs, uint64_t data, uint64_t offset) increment_send_rate(diff, 0); } - if (qcs->tx.offset == qcs->tx.sent_offset && !b_data(&qcs->tx.buf)) { + if (!qc_stream_buf_get(qcs->stream) || !qcs_prep_bytes(qcs)) { /* Remove stream from send_list if all was sent. */ LIST_DEL_INIT(&qcs->el_send); TRACE_STATE("stream sent done", QMUX_EV_QCS_SEND, qcc->conn, qcs); @@ -1842,7 +1966,7 @@ static int qcs_send_reset(struct qcs *qcs) frm->reset_stream.id = qcs->id; frm->reset_stream.app_error_code = qcs->err; - frm->reset_stream.final_size = qcs->tx.sent_offset; + frm->reset_stream.final_size = qcs->tx.fc.off_real; LIST_APPEND(&frms, &frm->list); if (qcc_send_frames(qcs->qcc, &frms)) { @@ -1910,87 +2034,46 @@ static int qcs_send_stop_sending(struct qcs *qcs) return 0; } -/* Used internally by qcc_io_send function. Proceed to send for <qcs>. This will - * transfer data from qcs buffer to its quic_stream counterpart. A STREAM frame - * is then generated and inserted in <frms> list. +/* Used internally by qcc_io_send function. Proceed to send for <qcs>. A STREAM + * frame is generated pointing to QCS stream descriptor content and inserted in + * <frms> list. Frame length will be truncated if greater than <window_conn>. + * This allows to prepare several frames in a loop while respecting connection + * flow control window. * - * Returns the total bytes transferred between qcs and quic_stream buffers. Can - * be null if out buffer cannot be allocated. On error a negative error code is - * used. + * Returns the payload length of the STREAM frame or a negative error code. */ -static int qcs_send(struct qcs *qcs, struct list *frms) +static int qcs_send(struct qcs *qcs, struct list *frms, uint64_t window_conn) { struct qcc *qcc = qcs->qcc; - struct buffer *buf = &qcs->tx.buf; struct buffer *out = qc_stream_buf_get(qcs->stream); - int xfer = 0, buf_avail; - char fin = 0; + int flen = 0; + const char fin = qcs->flags & QC_SF_FIN_STREAM; TRACE_ENTER(QMUX_EV_QCS_SEND, qcc->conn, qcs); /* Cannot send STREAM on remote unidirectional streams. */ BUG_ON(quic_stream_is_uni(qcs->id) && quic_stream_is_remote(qcc, qcs->id)); - if (b_data(buf)) { - /* Allocate <out> buffer if not already done. */ - if (!out) { - if (qcc->flags & QC_CF_CONN_FULL) - goto out; - - out = qc_stream_buf_alloc(qcs->stream, qcs->tx.offset, - &buf_avail); - if (!out) { - if (buf_avail) { - TRACE_ERROR("stream desc alloc failure", QMUX_EV_QCS_SEND, qcc->conn, qcs); - goto err; - } - - TRACE_STATE("hitting stream desc buffer limit", QMUX_EV_QCS_SEND, qcc->conn, qcs); - qcc->flags |= QC_CF_CONN_FULL; - goto out; - } - } - - /* Transfer data from <buf> to <out>. */ - xfer = qcs_xfer_data(qcs, out, buf); - if (xfer < 0) - goto err; - - if (xfer > 0) { - qcs_notify_send(qcs); - qcs->flags &= ~QC_SF_BLK_MROOM; - } + /* This function must not be called if there is nothing to send. */ + BUG_ON(!fin && !qcs_prep_bytes(qcs)); - qcs->tx.offset += xfer; - BUG_ON_HOT(qcs->tx.offset > qcs->tx.msd); - qcc->tx.offsets += xfer; - BUG_ON_HOT(qcc->tx.offsets > qcc->rfctl.md); - - /* out buffer cannot be emptied if qcs offsets differ. */ - BUG_ON(!b_data(out) && qcs->tx.sent_offset != qcs->tx.offset); + /* Skip STREAM frame allocation if already subscribed for send. + * Happens on sendto transient error or network congestion. + */ + if (qcc->wait_event.events & SUB_RETRY_SEND) { + TRACE_DEVEL("already subscribed for sending", + QMUX_EV_QCS_SEND, qcc->conn, qcs); + goto err; } - /* FIN is set if all incoming data were transferred. */ - fin = qcs_stream_fin(qcs); - /* Build a new STREAM frame with <out> buffer. */ - if (qcs->tx.sent_offset != qcs->tx.offset || fin) { - /* Skip STREAM frame allocation if already subscribed for send. - * Happens on sendto transient error or network congestion. - */ - if (qcc->wait_event.events & SUB_RETRY_SEND) { - TRACE_DEVEL("already subscribed for sending", - QMUX_EV_QCS_SEND, qcc->conn, qcs); - goto err; - } - - if (qcs_build_stream_frm(qcs, out, fin, frms) < 0) - goto err; - } + flen = qcs_build_stream_frm(qcs, out, fin, frms, window_conn); + if (flen < 0) + goto err; out: TRACE_LEAVE(QMUX_EV_QCS_SEND, qcc->conn, qcs); - return xfer; + return flen; err: TRACE_DEVEL("leaving on error", QMUX_EV_QCS_SEND, qcc->conn, qcs); @@ -2008,7 +2091,8 @@ static int qcc_io_send(struct qcc *qcc) /* Temporary list for QCS on error. */ struct list qcs_failed = LIST_HEAD_INIT(qcs_failed); struct qcs *qcs, *qcs_tmp, *first_qcs = NULL; - int ret, total = 0; + uint64_t window_conn = qfctl_rcap(&qcc->tx.fc); + int ret, total = 0, resent; TRACE_ENTER(QMUX_EV_QCC_SEND, qcc->conn); @@ -2055,8 +2139,8 @@ static int qcc_io_send(struct qcc *qcc) break; /* Stream must not be present in send_list if it has nothing to send. */ - BUG_ON(!(qcs->flags & (QC_SF_TO_STOP_SENDING|QC_SF_TO_RESET)) && - !qcs_need_sending(qcs)); + BUG_ON(!(qcs->flags & (QC_SF_FIN_STREAM|QC_SF_TO_STOP_SENDING|QC_SF_TO_RESET)) && + (!qcs->stream || !qcs_prep_bytes(qcs))); /* Each STOP_SENDING/RESET_STREAM frame is sent individually to * guarantee its emission. @@ -2070,7 +2154,8 @@ static int qcc_io_send(struct qcc *qcc) /* Remove stream from send_list if it had only STOP_SENDING * to send. */ - if (!(qcs->flags & QC_SF_TO_RESET) && !qcs_need_sending(qcs)) { + if (!(qcs->flags & (QC_SF_FIN_STREAM|QC_SF_TO_RESET)) && + (!qcs->stream || !qcs_prep_bytes(qcs))) { LIST_DEL_INIT(&qcs->el_send); continue; } @@ -2091,9 +2176,12 @@ static int qcc_io_send(struct qcc *qcc) continue; } - if (!(qcc->flags & QC_CF_BLK_MFCTL) && - !(qcs->flags & QC_SF_BLK_SFCTL)) { - if ((ret = qcs_send(qcs, &frms)) < 0) { + /* Total sent bytes must not exceed connection window. */ + BUG_ON(total > window_conn); + + if (!qfctl_rblocked(&qcc->tx.fc) && + !qfctl_rblocked(&qcs->tx.fc) && window_conn > total) { + if ((ret = qcs_send(qcs, &frms, window_conn - total)) < 0) { /* Temporarily remove QCS from send-list. */ LIST_DEL_INIT(&qcs->el_send); LIST_APPEND(&qcs_failed, &qcs->el_send); @@ -2117,7 +2205,10 @@ static int qcc_io_send(struct qcc *qcc) /* Retry sending until no frame to send, data rejected or connection * flow-control limit reached. */ - while (qcc_send_frames(qcc, &frms) == 0 && !(qcc->flags & QC_CF_BLK_MFCTL)) { + while (qcc_send_frames(qcc, &frms) == 0 && !qfctl_rblocked(&qcc->tx.fc)) { + window_conn = qfctl_rcap(&qcc->tx.fc); + resent = 0; + /* Reloop over <qcc.send_list>. Useful for streams which have * fulfilled their qc_stream_desc buf and have now release it. */ @@ -2126,16 +2217,20 @@ static int qcc_io_send(struct qcc *qcc) * new qc_stream_desc should be present in send_list as * long as transport layer can handle all data. */ - BUG_ON(qcs->stream->buf && !(qcs->flags & QC_SF_BLK_SFCTL)); + BUG_ON(qcs->stream->buf && !qfctl_rblocked(&qcs->tx.fc)); + + /* Total sent bytes must not exceed connection window. */ + BUG_ON(resent > window_conn); - if (!(qcs->flags & QC_SF_BLK_SFCTL)) { - if ((ret = qcs_send(qcs, &frms)) < 0) { + if (!qfctl_rblocked(&qcs->tx.fc) && window_conn > resent) { + if ((ret = qcs_send(qcs, &frms, window_conn - resent)) < 0) { LIST_DEL_INIT(&qcs->el_send); LIST_APPEND(&qcs_failed, &qcs->el_send); continue; } total += ret; + resent += ret; } } } @@ -2156,7 +2251,7 @@ static int qcc_io_send(struct qcc *qcc) LIST_APPEND(&qcc->send_list, &qcs->el_send); } - if (!(qcc->flags & QC_CF_BLK_MFCTL)) + if (!qfctl_rblocked(&qcc->tx.fc)) tasklet_wakeup(qcc->wait_event.tasklet); } @@ -2276,7 +2371,7 @@ static void qcc_shutdown(struct qcc *qcc) qcc_io_send(qcc); } else { - qcc->err = quic_err_app(QC_ERR_NO_ERROR); + qcc->err = quic_err_transport(QC_ERR_NO_ERROR); } /* Register "no error" code at transport layer. Do not use @@ -2381,9 +2476,7 @@ static int qcc_io_process(struct qcc *qcc) return 0; } -/* release function. This one should be called to free all resources allocated - * to the mux. - */ +/* Free all resources allocated for <qcc> connection. */ static void qcc_release(struct qcc *qcc) { struct connection *conn = qcc->conn; @@ -2391,8 +2484,6 @@ static void qcc_release(struct qcc *qcc) TRACE_ENTER(QMUX_EV_QCC_END, conn); - qcc_shutdown(qcc); - if (qcc->task) { task_destroy(qcc->task); qcc->task = NULL; @@ -2465,6 +2556,7 @@ struct task *qcc_io_cb(struct task *t, void *ctx, unsigned int status) return NULL; release: + qcc_shutdown(qcc); qcc_release(qcc); TRACE_LEAVE(QMUX_EV_QCC_WAKE); return NULL; @@ -2507,6 +2599,7 @@ static struct task *qcc_timeout_task(struct task *t, void *ctx, unsigned int sta */ if (qcc_is_dead(qcc)) { TRACE_STATE("releasing dead connection", QMUX_EV_QCC_WAKE, qcc->conn); + qcc_shutdown(qcc); qcc_release(qcc); } @@ -2519,6 +2612,17 @@ static struct task *qcc_timeout_task(struct task *t, void *ctx, unsigned int sta return t; } +/* Minimal initialization of <qcc> members to use qcc_release() safely. */ +static void _qcc_init(struct qcc *qcc) +{ + qcc->conn = NULL; + qcc->task = NULL; + qcc->wait_event.tasklet = NULL; + qcc->app_ops = NULL; + qcc->streams_by_id = EB_ROOT_UNIQUE; + LIST_INIT(&qcc->lfctl.frms); +} + static int qmux_init(struct connection *conn, struct proxy *prx, struct session *sess, struct buffer *input) { @@ -2530,24 +2634,19 @@ static int qmux_init(struct connection *conn, struct proxy *prx, qcc = pool_alloc(pool_head_qcc); if (!qcc) { TRACE_ERROR("alloc failure", QMUX_EV_QCC_NEW); - goto fail_no_qcc; + goto err; } - qcc->conn = conn; + _qcc_init(qcc); conn->ctx = qcc; qcc->nb_hreq = qcc->nb_sc = 0; qcc->flags = 0; - - qcc->app_ops = NULL; - - qcc->streams_by_id = EB_ROOT_UNIQUE; + qcc->glitches = 0; + qcc->err = quic_err_transport(QC_ERR_NO_ERROR); /* Server parameters, params used for RX flow control. */ lparams = &conn->handle.qc->rx.params; - qcc->tx.sent_offsets = qcc->tx.offsets = 0; - - LIST_INIT(&qcc->lfctl.frms); qcc->lfctl.ms_bidi = qcc->lfctl.ms_bidi_init = lparams->initial_max_streams_bidi; qcc->lfctl.ms_uni = lparams->initial_max_streams_uni; qcc->lfctl.msd_bidi_l = lparams->initial_max_stream_data_bidi_local; @@ -2559,7 +2658,7 @@ static int qmux_init(struct connection *conn, struct proxy *prx, qcc->lfctl.offsets_recv = qcc->lfctl.offsets_consume = 0; rparams = &conn->handle.qc->tx.params; - qcc->rfctl.md = rparams->initial_max_data; + qfctl_init(&qcc->tx.fc, rparams->initial_max_data); qcc->rfctl.msd_bidi_l = rparams->initial_max_stream_data_bidi_local; qcc->rfctl.msd_bidi_r = rparams->initial_max_stream_data_bidi_remote; qcc->rfctl.msd_uni_l = rparams->initial_max_stream_data_uni; @@ -2580,10 +2679,12 @@ static int qmux_init(struct connection *conn, struct proxy *prx, qcc->wait_event.tasklet = tasklet_new(); if (!qcc->wait_event.tasklet) { TRACE_ERROR("taslket alloc failure", QMUX_EV_QCC_NEW); - goto fail_no_tasklet; + goto err; } LIST_INIT(&qcc->send_list); + LIST_INIT(&qcc->fctl_list); + LIST_INIT(&qcc->buf_wait_list); qcc->wait_event.tasklet->process = qcc_io_cb; qcc->wait_event.tasklet->context = qcc; @@ -2591,7 +2692,7 @@ static int qmux_init(struct connection *conn, struct proxy *prx, qcc->proxy = prx; /* haproxy timeouts */ - if (conn_is_back(qcc->conn)) { + if (conn_is_back(conn)) { qcc->timeout = prx->timeout.server; qcc->shut_timeout = tick_isset(prx->timeout.serverfin) ? prx->timeout.serverfin : prx->timeout.server; @@ -2608,7 +2709,7 @@ static int qmux_init(struct connection *conn, struct proxy *prx, qcc->task = task_new_here(); if (!qcc->task) { TRACE_ERROR("timeout task alloc failure", QMUX_EV_QCC_NEW); - goto fail_no_timeout_task; + goto err; } qcc->task->process = qcc_timeout_task; qcc->task->context = qcc; @@ -2619,11 +2720,12 @@ static int qmux_init(struct connection *conn, struct proxy *prx, HA_ATOMIC_STORE(&conn->handle.qc->qcc, qcc); + /* Register conn as app_ops may use it. */ + qcc->conn = conn; + if (qcc_install_app_ops(qcc, conn->handle.qc->app_ops)) { - TRACE_PROTO("Cannot install app layer", QMUX_EV_QCC_NEW|QMUX_EV_QCC_ERR, qcc->conn); - /* prepare a CONNECTION_CLOSE frame */ - quic_set_connection_close(conn->handle.qc, quic_err_transport(QC_ERR_APPLICATION_ERROR)); - goto fail_install_app_ops; + TRACE_PROTO("Cannot install app layer", QMUX_EV_QCC_NEW|QMUX_EV_QCC_ERR, conn); + goto err; } if (qcc->app_ops == &h3_ops) @@ -2636,19 +2738,24 @@ static int qmux_init(struct connection *conn, struct proxy *prx, /* init read cycle */ tasklet_wakeup(qcc->wait_event.tasklet); - TRACE_LEAVE(QMUX_EV_QCC_NEW, qcc->conn); + TRACE_LEAVE(QMUX_EV_QCC_NEW, conn); return 0; - fail_install_app_ops: - if (qcc->app_ops && qcc->app_ops->release) - qcc->app_ops->release(qcc->ctx); - task_destroy(qcc->task); - fail_no_timeout_task: - tasklet_free(qcc->wait_event.tasklet); - fail_no_tasklet: - pool_free(pool_head_qcc, qcc); - fail_no_qcc: - TRACE_LEAVE(QMUX_EV_QCC_NEW); + err: + /* Prepare CONNECTION_CLOSE, using INTERNAL_ERROR as fallback code if unset. */ + if (!(conn->handle.qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE)) { + struct quic_err err = qcc && qcc->err.code ? + qcc->err : quic_err_transport(QC_ERR_INTERNAL_ERROR); + quic_set_connection_close(conn->handle.qc, err); + } + + if (qcc) { + /* In case of MUX init failure, session will ensure connection is freed. */ + qcc->conn = NULL; + qcc_release(qcc); + } + + TRACE_DEVEL("leaving on error", QMUX_EV_QCC_NEW, conn); return -1; } @@ -2704,6 +2811,7 @@ static void qmux_strm_detach(struct sedesc *sd) return; release: + qcc_shutdown(qcc); qcc_release(qcc); TRACE_LEAVE(QMUX_EV_STRM_END); return; @@ -2786,11 +2894,18 @@ static size_t qmux_strm_snd_buf(struct stconn *sc, struct buffer *buf, size_t count, int flags) { struct qcs *qcs = __sc_mux_strm(sc); + const size_t old_data = qcs_prep_bytes(qcs); size_t ret = 0; char fin; TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); + /* Stream must not be woken up if already waiting for conn buffer. */ + BUG_ON(LIST_INLIST(&qcs->el_buf)); + + /* Sending forbidden if QCS is locally closed (FIN or RESET_STREAM sent). */ + BUG_ON(qcs_is_close_local(qcs) || (qcs->flags & QC_SF_TO_RESET)); + /* stream layer has been detached so no transfer must occur after. */ BUG_ON_HOT(qcs->flags & QC_SF_DETACH); @@ -2801,8 +2916,20 @@ static size_t qmux_strm_snd_buf(struct stconn *sc, struct buffer *buf, goto end; } - if (qcs_is_close_local(qcs) || (qcs->flags & QC_SF_TO_RESET)) { - ret = qcs_http_reset_buf(qcs, buf, count); + if (qfctl_sblocked(&qcs->qcc->tx.fc)) { + TRACE_DEVEL("leaving on connection flow control", + QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); + if (!LIST_INLIST(&qcs->el_fctl)) { + TRACE_DEVEL("append to fctl-list", + QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); + LIST_APPEND(&qcs->qcc->fctl_list, &qcs->el_fctl); + } + goto end; + } + + if (qfctl_sblocked(&qcs->tx.fc)) { + TRACE_DEVEL("leaving on flow-control reached", + QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); goto end; } @@ -2813,7 +2940,9 @@ static size_t qmux_strm_snd_buf(struct stconn *sc, struct buffer *buf, } if (ret || fin) { - qcc_send_stream(qcs, 0); + const size_t data = qcs_prep_bytes(qcs) - old_data; + if (data || fin) + qcc_send_stream(qcs, 0, data); if (!(qcs->qcc->wait_event.events & SUB_RETRY_SEND)) tasklet_wakeup(qcs->qcc->wait_event.tasklet); } @@ -2825,18 +2954,25 @@ static size_t qmux_strm_snd_buf(struct stconn *sc, struct buffer *buf, } -static size_t qmux_nego_ff(struct stconn *sc, struct buffer *input, size_t count, unsigned int may_splice) +static size_t qmux_strm_nego_ff(struct stconn *sc, struct buffer *input, + size_t count, unsigned int flags) { struct qcs *qcs = __sc_mux_strm(sc); size_t ret = 0; TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); + /* Stream must not be woken up if already waiting for conn buffer. */ + BUG_ON(LIST_INLIST(&qcs->el_buf)); + + /* Sending forbidden if QCS is locally closed (FIN or RESET_STREAM sent). */ + BUG_ON(qcs_is_close_local(qcs) || (qcs->flags & QC_SF_TO_RESET)); + /* stream layer has been detached so no transfer must occur after. */ BUG_ON_HOT(qcs->flags & QC_SF_DETACH); if (!qcs->qcc->app_ops->nego_ff || !qcs->qcc->app_ops->done_ff) { - /* Fast forwading is not supported by the QUIC application layer */ + /* Fast forwarding is not supported by the QUIC application layer */ qcs->sd->iobuf.flags |= IOBUF_FL_NO_FF; goto end; } @@ -2850,6 +2986,22 @@ static size_t qmux_nego_ff(struct stconn *sc, struct buffer *input, size_t count goto end; } + if (qfctl_sblocked(&qcs->qcc->tx.fc)) { + TRACE_DEVEL("leaving on connection flow control", QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); + if (!LIST_INLIST(&qcs->el_fctl)) { + TRACE_DEVEL("append to fctl-list", QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); + LIST_APPEND(&qcs->qcc->fctl_list, &qcs->el_fctl); + } + qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; + goto end; + } + + if (qfctl_sblocked(&qcs->tx.fc)) { + TRACE_DEVEL("leaving on flow-control reached", QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); + qcs->sd->iobuf.flags |= IOBUF_FL_FF_BLOCKED; + goto end; + } + /* Alawys disable splicing */ qcs->sd->iobuf.flags |= IOBUF_FL_NO_SPLICING; @@ -2880,36 +3032,37 @@ static size_t qmux_nego_ff(struct stconn *sc, struct buffer *input, size_t count return ret; } -static size_t qmux_done_ff(struct stconn *sc) +static size_t qmux_strm_done_ff(struct stconn *sc) { struct qcs *qcs = __sc_mux_strm(sc); struct qcc *qcc = qcs->qcc; struct sedesc *sd = qcs->sd; - size_t total = 0; + size_t total = 0, data = sd->iobuf.data; TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); - if (sd->iobuf.flags & IOBUF_FL_EOI) + if (sd->iobuf.flags & IOBUF_FL_EOI) { + TRACE_STATE("reached stream fin", QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); qcs->flags |= QC_SF_FIN_STREAM; + } if (!(qcs->flags & QC_SF_FIN_STREAM) && !sd->iobuf.data) goto end; + data += sd->iobuf.offset; total = qcs->qcc->app_ops->done_ff(qcs); - qcc_send_stream(qcs, 0); + if (data || qcs->flags & QC_SF_FIN_STREAM) + qcc_send_stream(qcs, 0, data); if (!(qcs->qcc->wait_event.events & SUB_RETRY_SEND)) tasklet_wakeup(qcc->wait_event.tasklet); end: - if (!b_data(&qcs->tx.buf)) - b_free(&qcs->tx.buf); - TRACE_LEAVE(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); return total; } -static int qmux_resume_ff(struct stconn *sc, unsigned int flags) +static int qmux_strm_resume_ff(struct stconn *sc, unsigned int flags) { return 0; } @@ -2962,16 +3115,20 @@ static int qmux_wake(struct connection *conn) return 0; release: + qcc_shutdown(qcc); qcc_release(qcc); TRACE_LEAVE(QMUX_EV_QCC_WAKE); return 1; } -static void qmux_strm_shutw(struct stconn *sc, enum co_shw_mode mode) +static void qmux_strm_shut(struct stconn *sc, enum se_shut_mode mode, struct se_abort_info *reason) { struct qcs *qcs = __sc_mux_strm(sc); struct qcc *qcc = qcs->qcc; + if (!(mode & (SE_SHW_SILENT|SE_SHW_NORMAL))) + return; + TRACE_ENTER(QMUX_EV_STRM_SHUT, qcc->conn, qcs); /* Early closure reported if QC_SF_FIN_STREAM not yet set. */ @@ -2984,7 +3141,7 @@ static void qmux_strm_shutw(struct stconn *sc, enum co_shw_mode mode) TRACE_STATE("set FIN STREAM", QMUX_EV_STRM_SHUT, qcc->conn, qcs); qcs->flags |= QC_SF_FIN_STREAM; - qcc_send_stream(qcs, 0); + qcc_send_stream(qcs, 0, 0); } } else { @@ -2999,6 +3156,34 @@ static void qmux_strm_shutw(struct stconn *sc, enum co_shw_mode mode) TRACE_LEAVE(QMUX_EV_STRM_SHUT, qcc->conn, qcs); } +static int qmux_ctl(struct connection *conn, enum mux_ctl_type mux_ctl, void *output) +{ + struct qcc *qcc = conn->ctx; + + switch (mux_ctl) { + case MUX_CTL_EXIT_STATUS: + return MUX_ES_UNKNOWN; + + case MUX_CTL_GET_GLITCHES: + return qcc->glitches; + + case MUX_CTL_GET_NBSTRM: { + struct qcs *qcs; + unsigned int nb_strm = qcc->nb_sc; + + list_for_each_entry(qcs, &qcc->opening_list, el_opening) + nb_strm++; + return nb_strm; + } + + case MUX_CTL_GET_MAXSTRM: + return qcc->lfctl.ms_bidi_init; + + default: + return -1; + } +} + static int qmux_sctl(struct stconn *sc, enum mux_sctl_type mux_sctl, void *output) { int ret = 0; @@ -3048,19 +3233,41 @@ static const struct mux_ops qmux_ops = { .detach = qmux_strm_detach, .rcv_buf = qmux_strm_rcv_buf, .snd_buf = qmux_strm_snd_buf, - .nego_fastfwd = qmux_nego_ff, - .done_fastfwd = qmux_done_ff, - .resume_fastfwd = qmux_resume_ff, + .nego_fastfwd = qmux_strm_nego_ff, + .done_fastfwd = qmux_strm_done_ff, + .resume_fastfwd = qmux_strm_resume_ff, .subscribe = qmux_strm_subscribe, .unsubscribe = qmux_strm_unsubscribe, .wake = qmux_wake, - .shutw = qmux_strm_shutw, + .shut = qmux_strm_shut, + .ctl = qmux_ctl, .sctl = qmux_sctl, .show_sd = qmux_strm_show_sd, .flags = MX_FL_HTX|MX_FL_NO_UPG|MX_FL_FRAMED, .name = "QUIC", }; +void qcc_show_quic(struct qcc *qcc) +{ + struct eb64_node *node; + chunk_appendf(&trash, " qcc=0x%p flags=0x%x sc=%llu hreq=%llu\n", + qcc, qcc->flags, (ullong)qcc->nb_sc, (ullong)qcc->nb_hreq); + + node = eb64_first(&qcc->streams_by_id); + while (node) { + struct qcs *qcs = eb64_entry(node, struct qcs, by_id); + chunk_appendf(&trash, " qcs=0x%p id=%llu flags=0x%x st=%s", + qcs, (ullong)qcs->id, qcs->flags, + qcs_st_to_str(qcs->st)); + if (!quic_stream_is_uni(qcs->id) || !quic_stream_is_local(qcc, qcs->id)) + chunk_appendf(&trash, " rxoff=%llu", (ullong)qcs->rx.offset); + if (!quic_stream_is_uni(qcs->id) || !quic_stream_is_remote(qcc, qcs->id)) + chunk_appendf(&trash, " txoff=%llu", (ullong)qcs->tx.fc.off_real); + chunk_appendf(&trash, "\n"); + node = eb64_next(node); + } +} + static struct mux_proto_list mux_proto_quic = { .token = IST("quic"), .mode = PROTO_MODE_HTTP, .side = PROTO_SIDE_FE, .mux = &qmux_ops }; diff --git a/src/mworker.c b/src/mworker.c index c71446a..c4461cc 100644 --- a/src/mworker.c +++ b/src/mworker.c @@ -20,10 +20,6 @@ #include <sys/wait.h> #include <unistd.h> -#if defined(USE_SYSTEMD) -#include <systemd/sd-daemon.h> -#endif - #include <haproxy/api.h> #include <haproxy/cfgparse.h> #include <haproxy/cli.h> @@ -45,6 +41,9 @@ #include <haproxy/tools.h> #include <haproxy/version.h> +#if defined(USE_SYSTEMD) +#include <haproxy/systemd.h> +#endif static int exitcode = -1; static int max_reloads = -1; /* number max of reloads a worker can have until they are killed */ @@ -565,17 +564,12 @@ void mworker_cleanup_proc() /* Displays workers and processes */ static int cli_io_handler_show_proc(struct appctx *appctx) { - struct stconn *sc = appctx_sc(appctx); struct mworker_proc *child; int old = 0; int up = date.tv_sec - proc_self->timestamp; char *uptime = NULL; char *reloadtxt = NULL; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - if (up < 0) /* must never be negative because of clock drift */ up = 0; @@ -719,15 +713,10 @@ static int cli_parse_reload(char **args, char *payload, struct appctx *appctx, v static int cli_io_handler_show_loadstatus(struct appctx *appctx) { char *env; - struct stconn *sc = appctx_sc(appctx); if (!cli_has_level(appctx, ACCESS_LVL_OPER)) return 1; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - env = getenv("HAPROXY_LOAD_SUCCESS"); if (!env) return 1; @@ -738,7 +727,7 @@ static int cli_io_handler_show_loadstatus(struct appctx *appctx) chunk_printf(&trash, "Success=1\n"); } #ifdef USE_SHM_OPEN - if (startup_logs && b_data(&startup_logs->buf) > 1) + if (startup_logs && ring_data(startup_logs) > 1) chunk_appendf(&trash, "--\n"); if (applet_putchk(appctx, &trash) == -1) diff --git a/src/pattern.c b/src/pattern.c index 52dda5e..f07223f 100644 --- a/src/pattern.c +++ b/src/pattern.c @@ -1547,6 +1547,10 @@ struct pat_ref *pat_ref_lookup(const char *reference) { struct pat_ref *ref; + /* Skip file@ prefix, it is the default case. Can be mixed with ref omitting the prefix */ + if (strlen(reference) > 5 && strncmp(reference, "file@", 5) == 0) + reference += 5; + list_for_each_entry(ref, &pattern_reference, list) if (ref->reference && strcmp(reference, ref->reference) == 0) return ref; @@ -1834,6 +1838,22 @@ struct pat_ref *pat_ref_new(const char *reference, const char *display, unsigned } } + + if (strlen(reference) > 5 && strncmp(reference, "virt@", 5) == 0) + flags |= PAT_REF_ID; + else if (strlen(reference) > 4 && strncmp(reference, "opt@", 4) == 0) { + flags |= (PAT_REF_ID|PAT_REF_FILE); // Will be decided later + reference += 4; + } + else { + /* A file by default */ + flags |= PAT_REF_FILE; + /* Skip file@ prefix to be mixed with ref omitting the prefix */ + if (strlen(reference) > 5 && strncmp(reference, "file@", 5) == 0) + reference += 5; + } + + ref->reference = strdup(reference); if (!ref->reference) { free(ref->display); @@ -2239,7 +2259,7 @@ struct pattern_expr *pattern_new_expr(struct pattern_head *head, struct pat_ref * * Return non-zero in case of success, otherwise 0. */ -int pat_ref_read_from_file_smp(struct pat_ref *ref, const char *filename, char **err) +int pat_ref_read_from_file_smp(struct pat_ref *ref, char **err) { FILE *file; char *c; @@ -2250,11 +2270,17 @@ int pat_ref_read_from_file_smp(struct pat_ref *ref, const char *filename, char * char *value_beg; char *value_end; - file = fopen(filename, "r"); + file = fopen(ref->reference, "r"); if (!file) { - memprintf(err, "failed to open pattern file <%s>", filename); + if (ref->flags & PAT_REF_ID) { + /* file not found for an optional file, switch it to a virtual list of patterns */ + ref->flags &= ~PAT_REF_FILE; + return 1; + } + memprintf(err, "failed to open pattern file <%s>", ref->reference); return 0; } + ref->flags |= PAT_REF_FILE; /* now parse all patterns. The file may contain only one pattern * followed by one value per line. The start spaces, separator spaces @@ -2310,7 +2336,7 @@ int pat_ref_read_from_file_smp(struct pat_ref *ref, const char *filename, char * if (ferror(file)) { memprintf(err, "error encountered while reading <%s> : %s", - filename, strerror(errno)); + ref->reference, strerror(errno)); goto out_close; } /* success */ @@ -2324,7 +2350,7 @@ int pat_ref_read_from_file_smp(struct pat_ref *ref, const char *filename, char * /* Reads patterns from a file. If <err_msg> is non-NULL, an error message will * be returned there on errors and the caller will have to free it. */ -int pat_ref_read_from_file(struct pat_ref *ref, const char *filename, char **err) +int pat_ref_read_from_file(struct pat_ref *ref, char **err) { FILE *file; char *c; @@ -2332,9 +2358,14 @@ int pat_ref_read_from_file(struct pat_ref *ref, const char *filename, char **err int ret = 0; int line = 0; - file = fopen(filename, "r"); + file = fopen(ref->reference, "r"); if (!file) { - memprintf(err, "failed to open pattern file <%s>", filename); + if (ref->flags & PAT_REF_ID) { + /* file not found for an optional file, switch it to a virtual list of patterns */ + ref->flags &= ~PAT_REF_FILE; + return 1; + } + memprintf(err, "failed to open pattern file <%s>", ref->reference); return 0; } @@ -2365,14 +2396,14 @@ int pat_ref_read_from_file(struct pat_ref *ref, const char *filename, char **err continue; if (!pat_ref_append(ref, arg, NULL, line)) { - memprintf(err, "out of memory when loading patterns from file <%s>", filename); + memprintf(err, "out of memory when loading patterns from file <%s>", ref->reference); goto out_close; } } if (ferror(file)) { memprintf(err, "error encountered while reading <%s> : %s", - filename, strerror(errno)); + ref->reference, strerror(errno)); goto out_close; } ret = 1; /* success */ @@ -2406,14 +2437,16 @@ int pattern_read_from_file(struct pattern_head *head, unsigned int refflags, return 0; } - if (load_smp) { - ref->flags |= PAT_REF_SMP; - if (!pat_ref_read_from_file_smp(ref, filename, err)) - return 0; - } - else { - if (!pat_ref_read_from_file(ref, filename, err)) - return 0; + if (ref->flags & PAT_REF_FILE) { + if (load_smp) { + ref->flags |= PAT_REF_SMP; + if (!pat_ref_read_from_file_smp(ref, err)) + return 0; + } + else { + if (!pat_ref_read_from_file(ref, err)) + return 0; + } } } else { @@ -2479,7 +2512,10 @@ int pattern_read_from_file(struct pattern_head *head, unsigned int refflags, /* Load reference content in the pattern expression. * We need to load elements in the same order they were seen in the - * file as list-based matching types may rely on it. + * file. Indeed, some list-based matching types may rely on it as the + * list is positional, and for tree-based matching, even if the tree is + * content-based in case of duplicated keys we only want the first key + * in the file to be considered. */ list_for_each_entry(elt, &ref->head, list) { if (!pat_ref_push(elt, expr, patflags, err)) { diff --git a/src/peers.c b/src/peers.c index 9ba3d9b..4ec981c 100644 --- a/src/peers.c +++ b/src/peers.c @@ -49,57 +49,12 @@ #include <haproxy/tools.h> #include <haproxy/trace.h> - -/*******************************/ -/* Current peer learning state */ -/*******************************/ - -/******************************/ -/* Current peers section resync state */ -/******************************/ -#define PEERS_F_RESYNC_LOCAL 0x00000001 /* Learn from local finished or no more needed */ -#define PEERS_F_RESYNC_REMOTE 0x00000002 /* Learn from remote finished or no more needed */ -#define PEERS_F_RESYNC_ASSIGN 0x00000004 /* A peer was assigned to learn our lesson */ -#define PEERS_F_RESYNC_PROCESS 0x00000008 /* The assigned peer was requested for resync */ -#define PEERS_F_RESYNC_LOCALTIMEOUT 0x00000010 /* Timeout waiting for a full resync from a local node */ -#define PEERS_F_RESYNC_REMOTETIMEOUT 0x00000020 /* Timeout waiting for a full resync from a remote node */ -#define PEERS_F_RESYNC_LOCALABORT 0x00000040 /* Session aborted learning from a local node */ -#define PEERS_F_RESYNC_REMOTEABORT 0x00000080 /* Session aborted learning from a remote node */ -#define PEERS_F_RESYNC_LOCALFINISHED 0x00000100 /* A local node teach us and was fully up to date */ -#define PEERS_F_RESYNC_REMOTEFINISHED 0x00000200 /* A remote node teach us and was fully up to date */ -#define PEERS_F_RESYNC_LOCALPARTIAL 0x00000400 /* A local node teach us but was partially up to date */ -#define PEERS_F_RESYNC_REMOTEPARTIAL 0x00000800 /* A remote node teach us but was partially up to date */ -#define PEERS_F_RESYNC_LOCALASSIGN 0x00001000 /* A local node was assigned for a full resync */ -#define PEERS_F_RESYNC_REMOTEASSIGN 0x00002000 /* A remote node was assigned for a full resync */ -#define PEERS_F_RESYNC_REQUESTED 0x00004000 /* A resync was explicitly requested */ -#define PEERS_F_DONOTSTOP 0x00010000 /* Main table sync task block process during soft stop - to push data to new process */ - -#define PEERS_RESYNC_STATEMASK (PEERS_F_RESYNC_LOCAL|PEERS_F_RESYNC_REMOTE) -#define PEERS_RESYNC_FROMLOCAL 0x00000000 -#define PEERS_RESYNC_FROMREMOTE PEERS_F_RESYNC_LOCAL -#define PEERS_RESYNC_FINISHED (PEERS_F_RESYNC_LOCAL|PEERS_F_RESYNC_REMOTE) - /***********************************/ /* Current shared table sync state */ /***********************************/ #define SHTABLE_F_TEACH_STAGE1 0x00000001 /* Teach state 1 complete */ #define SHTABLE_F_TEACH_STAGE2 0x00000002 /* Teach state 2 complete */ -/******************************/ -/* Remote peer teaching state */ -/******************************/ -#define PEER_F_TEACH_PROCESS 0x00000001 /* Teach a lesson to current peer */ -#define PEER_F_TEACH_FINISHED 0x00000008 /* Teach conclude, (wait for confirm) */ -#define PEER_F_TEACH_COMPLETE 0x00000010 /* All that we know already taught to current peer, used only for a local peer */ -#define PEER_F_LEARN_ASSIGN 0x00000100 /* Current peer was assigned for a lesson */ -#define PEER_F_LEARN_NOTUP2DATE 0x00000200 /* Learn from peer finished but peer is not up to date */ -#define PEER_F_ALIVE 0x20000000 /* Used to flag a peer a alive. */ -#define PEER_F_HEARTBEAT 0x40000000 /* Heartbeat message to send. */ -#define PEER_F_DWNGRD 0x80000000 /* When this flag is enabled, we must downgrade the supported version announced during peer sessions. */ - -#define PEER_TEACH_RESET ~(PEER_F_TEACH_PROCESS|PEER_F_TEACH_FINISHED) /* PEER_F_TEACH_COMPLETE should never be reset */ -#define PEER_LEARN_RESET ~(PEER_F_LEARN_ASSIGN|PEER_F_LEARN_NOTUP2DATE) #define PEER_RESYNC_TIMEOUT 5000 /* 5 seconds */ #define PEER_RECONNECT_TIMEOUT 5000 /* 5 seconds */ @@ -334,6 +289,7 @@ static const struct trace_event peers_trace_events[] = { { .mask = PEERS_EV_SESSREL, .name = "sessrl", .desc = "peer session releasing" }, #define PEERS_EV_PROTOERR (1 << 6) { .mask = PEERS_EV_PROTOERR, .name = "protoerr", .desc = "protocol error" }, + { } }; static const struct name_desc peers_trace_lockon_args[4] = { @@ -489,6 +445,38 @@ static const char *statuscode_str(int statuscode) } } +static const char *peer_app_state_str(enum peer_app_state appstate) +{ + switch (appstate) { + case PEER_APP_ST_STOPPED: + return "STOPPED"; + case PEER_APP_ST_STARTING: + return "STARTING"; + case PEER_APP_ST_RUNNING: + return "RUNNING"; + case PEER_APP_ST_STOPPING: + return "STOPPING"; + default: + return "UNKNOWN"; + } +} + +static const char *peer_learn_state_str(enum peer_learn_state learnstate) +{ + switch (learnstate) { + case PEER_LR_ST_NOTASSIGNED: + return "NOTASSIGNED"; + case PEER_LR_ST_ASSIGNED: + return "ASSIGNED"; + case PEER_LR_ST_PROCESSING: + return "PROCESSING"; + case PEER_LR_ST_FINISHED: + return "FINISHED"; + default: + return "UNKNOWN"; + } +} + /* This function encode an uint64 to 'dynamic' length format. The encoded value is written at address *str, and the caller must assure that size after *str is large enough. @@ -1059,21 +1047,14 @@ void __peer_session_deinit(struct peer *peer) /* Re-init current table pointers to force announcement on re-connect */ peer->remote_table = peer->last_local_table = peer->stop_local_table = NULL; peer->appctx = NULL; - if (peer->flags & PEER_F_LEARN_ASSIGN) { - /* unassign current peer for learning */ - peer->flags &= ~(PEER_F_LEARN_ASSIGN); - peers->flags &= ~(PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS); - if (peer->local) - peers->flags |= PEERS_F_RESYNC_LOCALABORT; - else - peers->flags |= PEERS_F_RESYNC_REMOTEABORT; - /* reschedule a resync */ - peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(5000)); - } - /* reset teaching and learning flags to 0 */ - peer->flags &= PEER_TEACH_RESET; - peer->flags &= PEER_LEARN_RESET; + /* reset teaching flags to 0 */ + peer->flags &= ~PEER_TEACH_FLAGS; + + /* Mark the peer as stopping and wait for the sync task */ + peer->flags |= PEER_F_WAIT_SYNCTASK_ACK; + peer->appstate = PEER_APP_ST_STOPPING; + task_wakeup(peers->sync_task, TASK_WOKEN_MSG); } @@ -1083,8 +1064,9 @@ static int peer_session_init(struct appctx *appctx) struct stream *s; struct sockaddr_storage *addr = NULL; - if (!sockaddr_alloc(&addr, &peer->addr, sizeof(peer->addr))) + if (!sockaddr_alloc(&addr, &peer->srv->addr, sizeof(peer->srv->addr))) goto out_error; + set_host_port(addr, peer->srv->svc_port); if (appctx_finalize_startup(appctx, peer->peers->peers_fe, &BUF_NULL) == -1) goto out_free_addr; @@ -1393,7 +1375,7 @@ static inline int peer_send_resync_finishedmsg(struct appctx *appctx, .control.head = { PEER_MSG_CLASS_CONTROL, }, }; - p.control.head[1] = (peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FINISHED ? + p.control.head[1] = (HA_ATOMIC_LOAD(&peers->flags) & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FINISHED ? PEER_MSG_CTRL_RESYNCFINISHED : PEER_MSG_CTRL_RESYNCPARTIAL; TRACE_PROTO("send control message", PEERS_EV_CTRLMSG, @@ -1472,11 +1454,12 @@ static inline int peer_send_error_protomsg(struct appctx *appctx) /* * Function used to lookup for recent stick-table updates associated with - * <st> shared stick-table when a lesson must be taught a peer (PEER_F_LEARN_ASSIGN flag set). + * <st> shared stick-table when a lesson must be taught a peer (learn state is not PEER_LR_ST_NOTASSIGNED). */ static inline struct stksess *peer_teach_process_stksess_lookup(struct shared_table *st) { struct eb32_node *eb; + struct stksess *ret; eb = eb32_lookup_ge(&st->table->updates, st->last_pushed+1); if (!eb) { @@ -1496,7 +1479,10 @@ static inline struct stksess *peer_teach_process_stksess_lookup(struct shared_ta return NULL; } - return eb32_entry(eb, struct stksess, upd); + ret = eb32_entry(eb, struct stksess, upd); + if (!_HA_ATOMIC_LOAD(&ret->seen)) + _HA_ATOMIC_STORE(&ret->seen, 1); + return ret; } /* @@ -1506,6 +1492,7 @@ static inline struct stksess *peer_teach_process_stksess_lookup(struct shared_ta static inline struct stksess *peer_teach_stage1_stksess_lookup(struct shared_table *st) { struct eb32_node *eb; + struct stksess *ret; eb = eb32_lookup_ge(&st->table->updates, st->last_pushed+1); if (!eb) { @@ -1516,7 +1503,10 @@ static inline struct stksess *peer_teach_stage1_stksess_lookup(struct shared_tab return NULL; } - return eb32_entry(eb, struct stksess, upd); + ret = eb32_entry(eb, struct stksess, upd); + if (!_HA_ATOMIC_LOAD(&ret->seen)) + _HA_ATOMIC_STORE(&ret->seen, 1); + return ret; } /* @@ -1526,6 +1516,7 @@ static inline struct stksess *peer_teach_stage1_stksess_lookup(struct shared_tab static inline struct stksess *peer_teach_stage2_stksess_lookup(struct shared_table *st) { struct eb32_node *eb; + struct stksess *ret; eb = eb32_lookup_ge(&st->table->updates, st->last_pushed+1); if (!eb || eb->key > st->teaching_origin) { @@ -1533,7 +1524,10 @@ static inline struct stksess *peer_teach_stage2_stksess_lookup(struct shared_tab return NULL; } - return eb32_entry(eb, struct stksess, upd); + ret = eb32_entry(eb, struct stksess, upd); + if (!_HA_ATOMIC_LOAD(&ret->seen)) + _HA_ATOMIC_STORE(&ret->seen, 1); + return ret; } /* @@ -1621,10 +1615,7 @@ static inline int peer_send_teachmsgs(struct appctx *appctx, struct peer *p, updates_sent++; if (updates_sent >= peers_max_updates_at_once) { - /* pretend we're full so that we get back ASAP */ - struct stconn *sc = appctx_sc(appctx); - - sc_need_room(sc, 0); + applet_have_more_data(appctx); ret = -1; break; } @@ -1637,7 +1628,7 @@ static inline int peer_send_teachmsgs(struct appctx *appctx, struct peer *p, /* * Function to emit update messages for <st> stick-table when a lesson must - * be taught to the peer <p> (PEER_F_LEARN_ASSIGN flag set). + * be taught to the peer <p> (learn state is not PEER_LR_ST_NOTASSIGNED). * * Note that <st> shared stick-table is locked when calling this function, and * the lock is dropped then re-acquired. @@ -1650,13 +1641,7 @@ static inline int peer_send_teachmsgs(struct appctx *appctx, struct peer *p, static inline int peer_send_teach_process_msgs(struct appctx *appctx, struct peer *p, struct shared_table *st) { - int ret; - - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->updt_lock); - ret = peer_send_teachmsgs(appctx, p, peer_teach_process_stksess_lookup, st); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &st->table->updt_lock); - - return ret; + return peer_send_teachmsgs(appctx, p, peer_teach_process_stksess_lookup, st); } /* @@ -2487,73 +2472,27 @@ static inline int peer_treat_awaited_msg(struct appctx *appctx, struct peer *pee } /* reset teaching flags to 0 */ - peer->flags &= PEER_TEACH_RESET; + peer->flags &= ~PEER_TEACH_FLAGS; /* flag to start to teach lesson */ - peer->flags |= PEER_F_TEACH_PROCESS; - peers->flags |= PEERS_F_RESYNC_REQUESTED; + peer->flags |= (PEER_F_TEACH_PROCESS|PEER_F_DBG_RESYNC_REQUESTED); } else if (msg_head[1] == PEER_MSG_CTRL_RESYNCFINISHED) { TRACE_PROTO("received control message", PEERS_EV_CTRLMSG, NULL, &msg_head[1], peers->local->id, peer->id); - if (peer->flags & PEER_F_LEARN_ASSIGN) { - int commit_a_finish = 1; - - peer->flags &= ~PEER_F_LEARN_ASSIGN; - peers->flags &= ~(PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS); - if (peer->srv->shard) { - struct peer *ps; - - peers->flags |= PEERS_F_RESYNC_REMOTEPARTIAL; - peer->flags |= PEER_F_LEARN_NOTUP2DATE; - for (ps = peers->remote; ps; ps = ps->next) { - if (ps->srv->shard == peer->srv->shard) { - /* flag all peers from same shard - * notup2date to disable request - * of a resync frm them - */ - ps->flags |= PEER_F_LEARN_NOTUP2DATE; - } - else if (ps->srv->shard && !(ps->flags & PEER_F_LEARN_NOTUP2DATE)) { - /* it remains some other shards not requested - * we don't commit a resync finish to request - * the other shards - */ - commit_a_finish = 0; - } - } - - if (!commit_a_finish) { - /* it remains some shard to request, we schedule a new request - */ - peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); - task_wakeup(peers->sync_task, TASK_WOKEN_MSG); - } - } - - if (commit_a_finish) { - peers->flags |= (PEERS_F_RESYNC_LOCAL|PEERS_F_RESYNC_REMOTE); - if (peer->local) - peers->flags |= PEERS_F_RESYNC_LOCALFINISHED; - else - peers->flags |= PEERS_F_RESYNC_REMOTEFINISHED; - } + if (peer->learnstate == PEER_LR_ST_PROCESSING) { + peer->learnstate = PEER_LR_ST_FINISHED; + peer->flags |= PEER_F_WAIT_SYNCTASK_ACK; + task_wakeup(peers->sync_task, TASK_WOKEN_MSG); } peer->confirm++; } else if (msg_head[1] == PEER_MSG_CTRL_RESYNCPARTIAL) { TRACE_PROTO("received control message", PEERS_EV_CTRLMSG, NULL, &msg_head[1], peers->local->id, peer->id); - if (peer->flags & PEER_F_LEARN_ASSIGN) { - peer->flags &= ~PEER_F_LEARN_ASSIGN; - peers->flags &= ~(PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS); - - if (peer->local) - peers->flags |= PEERS_F_RESYNC_LOCALPARTIAL; - else - peers->flags |= PEERS_F_RESYNC_REMOTEPARTIAL; - peer->flags |= PEER_F_LEARN_NOTUP2DATE; - peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + if (peer->learnstate == PEER_LR_ST_PROCESSING) { + peer->learnstate = PEER_LR_ST_FINISHED; + peer->flags |= (PEER_F_LEARN_NOTUP2DATE|PEER_F_WAIT_SYNCTASK_ACK); task_wakeup(peers->sync_task, TASK_WOKEN_MSG); } peer->confirm++; @@ -2566,7 +2505,7 @@ static inline int peer_treat_awaited_msg(struct appctx *appctx, struct peer *pee /* If stopping state */ if (stopping) { /* Close session, push resync no more needed */ - peer->flags |= PEER_F_TEACH_COMPLETE; + peer->flags |= PEER_F_LOCAL_TEACH_COMPLETE; appctx->st0 = PEER_SESS_ST_END; return 0; } @@ -2576,7 +2515,7 @@ static inline int peer_treat_awaited_msg(struct appctx *appctx, struct peer *pee } /* reset teaching flags to 0 */ - peer->flags &= PEER_TEACH_RESET; + peer->flags &= ~PEER_TEACH_FLAGS; } else if (msg_head[1] == PEER_MSG_CTRL_HEARTBEAT) { TRACE_PROTO("received control message", PEERS_EV_CTRLMSG, @@ -2650,16 +2589,13 @@ static inline int peer_send_msgs(struct appctx *appctx, { int repl; - /* Need to request a resync */ - if ((peer->flags & PEER_F_LEARN_ASSIGN) && - (peers->flags & PEERS_F_RESYNC_ASSIGN) && - !(peers->flags & PEERS_F_RESYNC_PROCESS)) { - + /* Need to request a resync (only possible for a remote peer at this stage) */ + if (peer->learnstate == PEER_LR_ST_ASSIGNED) { + BUG_ON(peer->local); repl = peer_send_resync_reqmsg(appctx, peer, peers); if (repl <= 0) return repl; - - peers->flags |= PEERS_F_RESYNC_PROCESS; + peer->learnstate = PEER_LR_ST_PROCESSING; } /* Nothing to read, now we start to write */ @@ -2688,18 +2624,19 @@ static inline int peer_send_msgs(struct appctx *appctx, } if (!(peer->flags & PEER_F_TEACH_PROCESS)) { - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &st->table->updt_lock); - if (!(peer->flags & PEER_F_LEARN_ASSIGN) && - (st->last_pushed != st->table->localupdate)) { + int must_send; + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &st->table->updt_lock); + must_send = (peer->learnstate == PEER_LR_ST_NOTASSIGNED) && (st->last_pushed != st->table->localupdate); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &st->table->updt_lock); + + if (must_send) { repl = peer_send_teach_process_msgs(appctx, peer, st); if (repl <= 0) { - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->updt_lock); peer->stop_local_table = peer->last_local_table; return repl; } } - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->updt_lock); } else if (!(peer->flags & PEER_F_TEACH_FINISHED)) { if (!(st->flags & SHTABLE_F_TEACH_STAGE1)) { @@ -2733,10 +2670,7 @@ static inline int peer_send_msgs(struct appctx *appctx, updates++; if (updates >= peers_max_updates_at_once) { - /* pretend we're full so that we get back ASAP */ - struct stconn *sc = appctx_sc(appctx); - - sc_need_room(sc, 0); + applet_have_more_data(appctx); return -1; } @@ -2872,88 +2806,16 @@ static inline int peer_getline_last(struct appctx *appctx, struct peer **curpeer } /* - * Init <peer> peer after having accepted it at peer protocol level. - */ -static inline void init_accepted_peer(struct peer *peer, struct peers *peers) -{ - struct shared_table *st; - - peer->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT)); - /* Register status code */ - peer->statuscode = PEER_SESS_SC_SUCCESSCODE; - peer->last_hdshk = now_ms; - - /* Awake main task */ - task_wakeup(peers->sync_task, TASK_WOKEN_MSG); - - /* Init confirm counter */ - peer->confirm = 0; - - /* Init cursors */ - for (st = peer->tables; st ; st = st->next) { - uint commitid, updateid; - - st->last_get = st->last_acked = 0; - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &st->table->updt_lock); - /* if st->update appears to be in future it means - * that the last acked value is very old and we - * remain unconnected a too long time to use this - * acknowledgement as a reset. - * We should update the protocol to be able to - * signal the remote peer that it needs a full resync. - * Here a partial fix consist to set st->update at - * the max past value - */ - if ((int)(st->table->localupdate - st->update) < 0) - st->update = st->table->localupdate + (2147483648U); - st->teaching_origin = st->last_pushed = st->update; - st->flags = 0; - - updateid = st->last_pushed; - commitid = _HA_ATOMIC_LOAD(&st->table->commitupdate); - - while ((int)(updateid - commitid) > 0) { - if (_HA_ATOMIC_CAS(&st->table->commitupdate, &commitid, updateid)) - break; - __ha_cpu_relax(); - } - - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->updt_lock); - } - - /* reset teaching and learning flags to 0 */ - peer->flags &= PEER_TEACH_RESET; - peer->flags &= PEER_LEARN_RESET; - - /* if current peer is local */ - if (peer->local) { - /* if current host need resyncfrom local and no process assigned */ - if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMLOCAL && - !(peers->flags & PEERS_F_RESYNC_ASSIGN)) { - /* assign local peer for a lesson, consider lesson already requested */ - peer->flags |= PEER_F_LEARN_ASSIGN; - peers->flags |= (PEERS_F_RESYNC_ASSIGN|PEERS_F_RESYNC_PROCESS); - peers->flags |= PEERS_F_RESYNC_LOCALASSIGN; - } - - } - else if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE && - !(peers->flags & PEERS_F_RESYNC_ASSIGN)) { - /* assign peer for a lesson */ - peer->flags |= PEER_F_LEARN_ASSIGN; - peers->flags |= PEERS_F_RESYNC_ASSIGN; - peers->flags |= PEERS_F_RESYNC_REMOTEASSIGN; - } -} - -/* - * Init <peer> peer after having connected it at peer protocol level. + * Init <peer> peer after validating a connection at peer protocol level. It may + * a incoming or outgoing connection. The peer init must be acknowledge by the + * sync task. Message processing is blocked in the meanwhile. */ static inline void init_connected_peer(struct peer *peer, struct peers *peers) { struct shared_table *st; peer->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT)); + /* Init cursors */ for (st = peer->tables; st ; st = st->next) { uint updateid, commitid; @@ -2986,28 +2848,25 @@ static inline void init_connected_peer(struct peer *peer, struct peers *peers) HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &st->table->updt_lock); } + /* Awake main task to ack the new peer state */ + task_wakeup(peers->sync_task, TASK_WOKEN_MSG); + /* Init confirm counter */ peer->confirm = 0; - /* reset teaching and learning flags to 0 */ - peer->flags &= PEER_TEACH_RESET; - peer->flags &= PEER_LEARN_RESET; + /* reset teaching flags to 0 */ + peer->flags &= ~PEER_TEACH_FLAGS; - /* If current peer is local */ - if (peer->local) { - /* flag to start to teach lesson */ - peer->flags |= PEER_F_TEACH_PROCESS; - } - else if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE && - !(peers->flags & PEERS_F_RESYNC_ASSIGN)) { - /* If peer is remote and resync from remote is needed, - and no peer currently assigned */ - - /* assign peer for a lesson */ - peer->flags |= PEER_F_LEARN_ASSIGN; - peers->flags |= PEERS_F_RESYNC_ASSIGN; - peers->flags |= PEERS_F_RESYNC_REMOTEASSIGN; + if (peer->local && !(appctx_is_back(peer->appctx))) { + /* If the local peer has established the connection (appctx is + * on the frontend side), flag it to start to teach lesson. + */ + peer->flags |= PEER_F_TEACH_PROCESS; } + + /* Mark the peer as starting and wait the sync task */ + peer->flags |= PEER_F_WAIT_SYNCTASK_ACK; + peer->appstate = PEER_APP_ST_STARTING; } /* @@ -3024,7 +2883,7 @@ static void peer_io_handler(struct appctx *appctx) unsigned int maj_ver, min_ver; int prev_state; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { + if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR)))) { co_skip(sc_oc(sc), co_data(sc_oc(sc))); goto out; } @@ -3091,6 +2950,7 @@ switchstate: */ curpeer->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000)); peer_session_forceshutdown(curpeer); + curpeer->heartbeat = TICK_ETERNITY; curpeer->coll++; } @@ -3127,7 +2987,11 @@ switchstate: goto switchstate; } - init_accepted_peer(curpeer, curpeers); + /* Register status code */ + curpeer->statuscode = PEER_SESS_SC_SUCCESSCODE; + curpeer->last_hdshk = now_ms; + + init_connected_peer(curpeer, curpeers); /* switch to waiting message state */ _HA_ATOMIC_INC(&connected_peers); @@ -3216,6 +3080,13 @@ switchstate: } } + if (curpeer->flags & PEER_F_WAIT_SYNCTASK_ACK) + goto out; + + /* local peer is assigned of a lesson, start it */ + if (curpeer->learnstate == PEER_LR_ST_ASSIGNED && curpeer->local) + curpeer->learnstate = PEER_LR_ST_PROCESSING; + reql = peer_recv_msg(appctx, (char *)msg_head, sizeof msg_head, &msg_len, &totl); if (reql <= 0) { if (reql == -1) @@ -3348,7 +3219,7 @@ static void peer_session_forceshutdown(struct peer *peer) /* Pre-configures a peers frontend to accept incoming connections */ void peers_setup_frontend(struct proxy *fe) { - fe->last_change = ns_to_sec(now_ns); + fe->fe_counters.last_change = ns_to_sec(now_ns); fe->cap = PR_CAP_FE | PR_CAP_BE; fe->mode = PR_MODE_PEERS; fe->maxconn = 0; @@ -3394,274 +3265,432 @@ static struct appctx *peer_session_create(struct peers *peers, struct peer *peer return NULL; } -/* - * Task processing function to manage re-connect, peer session - * tasks wakeup on local update and heartbeat. Let's keep it exported so that it - * resolves in stack traces and "show tasks". +/* Clear LEARN flags to a given peer, dealing with aborts if it was assigned for + * learning. In this case, the resync timeout is re-armed. */ -struct task *process_peer_sync(struct task * task, void *context, unsigned int state) +static void clear_peer_learning_status(struct peer *peer) { - struct peers *peers = context; - struct peer *ps; - struct shared_table *st; + if (peer->learnstate != PEER_LR_ST_NOTASSIGNED) { + struct peers *peers = peer->peers; - task->expire = TICK_ETERNITY; + /* unassign current peer for learning */ + HA_ATOMIC_AND(&peers->flags, ~PEERS_F_RESYNC_ASSIGN); + HA_ATOMIC_OR(&peers->flags, (peer->local ? PEERS_F_DBG_RESYNC_LOCALABORT : PEERS_F_DBG_RESYNC_REMOTEABORT)); - /* Acquire lock for all peers of the section */ - for (ps = peers->remote; ps; ps = ps->next) - HA_SPIN_LOCK(PEER_LOCK, &ps->lock); + /* reschedule a resync */ + peer->peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(5000)); + peer->learnstate = PEER_LR_ST_NOTASSIGNED; + } + peer->flags &= ~PEER_F_LEARN_NOTUP2DATE; +} - if (!stopping) { - /* Normal case (not soft stop)*/ +static void sync_peer_learn_state(struct peers *peers, struct peer *peer) +{ + unsigned int flags = 0; - /* resync timeout set to TICK_ETERNITY means we just start - * a new process and timer was not initialized. - * We must arm this timer to switch to a request to a remote - * node if incoming connection from old local process never - * comes. - */ - if (peers->resync_timeout == TICK_ETERNITY) - peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + if (peer->learnstate != PEER_LR_ST_FINISHED) + return; + + /* The learning process is now finished */ + if (peer->flags & PEER_F_LEARN_NOTUP2DATE) { + /* Partial resync */ + flags |= (peer->local ? PEERS_F_DBG_RESYNC_LOCALPARTIAL : PEERS_F_DBG_RESYNC_REMOTEPARTIAL); + peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + } + else { + /* Full resync */ + struct peer *rem_peer; + int commit_a_finish = 1; + + if (peer->srv->shard) { + flags |= PEERS_F_DBG_RESYNC_REMOTEPARTIAL; + peer->flags |= PEER_F_LEARN_NOTUP2DATE; + for (rem_peer = peers->remote; rem_peer; rem_peer = rem_peer->next) { + if (rem_peer->srv->shard && rem_peer != peer) { + HA_SPIN_LOCK(PEER_LOCK, &rem_peer->lock); + if (rem_peer->srv->shard == peer->srv->shard) { + /* flag all peers from same shard + * notup2date to disable request + * of a resync frm them + */ + rem_peer->flags |= PEER_F_LEARN_NOTUP2DATE; + } + else if (!(rem_peer->flags & PEER_F_LEARN_NOTUP2DATE)) { + /* it remains some other shards not requested + * we don't commit a resync finish to request + * the other shards + */ + commit_a_finish = 0; + } + HA_SPIN_UNLOCK(PEER_LOCK, &rem_peer->lock); + } + } - if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMLOCAL) && - (!nb_oldpids || tick_is_expired(peers->resync_timeout, now_ms)) && - !(peers->flags & PEERS_F_RESYNC_ASSIGN)) { - /* Resync from local peer needed - no peer was assigned for the lesson - and no old local peer found - or resync timeout expire */ + if (!commit_a_finish) { + /* it remains some shard to request, we schedule a new request */ + peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + } + } - /* flag no more resync from local, to try resync from remotes */ - peers->flags |= PEERS_F_RESYNC_LOCAL; - peers->flags |= PEERS_F_RESYNC_LOCALTIMEOUT; + if (commit_a_finish) { + flags |= (PEERS_F_RESYNC_LOCAL_FINISHED|PEERS_F_RESYNC_REMOTE_FINISHED); + flags |= (peer->local ? PEERS_F_DBG_RESYNC_LOCALFINISHED : PEERS_F_DBG_RESYNC_REMOTEFINISHED); + } + } + peer->learnstate = PEER_LR_ST_NOTASSIGNED; + HA_ATOMIC_AND(&peers->flags, ~PEERS_F_RESYNC_ASSIGN); + HA_ATOMIC_OR(&peers->flags, flags); - /* reschedule a resync */ - peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + appctx_wakeup(peer->appctx); +} + +/* Synchronise the peer applet state with its associated peers section. This + * function handles STARTING->RUNNING and STOPPING->STOPPED transitions. + */ +static void sync_peer_app_state(struct peers *peers, struct peer *peer) +{ + if (peer->appstate == PEER_APP_ST_STOPPING) { + clear_peer_learning_status(peer); + peer->appstate = PEER_APP_ST_STOPPED; + } + else if (peer->appstate == PEER_APP_ST_STARTING) { + clear_peer_learning_status(peer); + if (peer->local & appctx_is_back(peer->appctx)) { + /* if local peer has accepted the connection (appctx is + * on the backend side), flag it to learn a lesson and + * be sure it will start immediately. This only happens + * if no resync is in progress and if the lacal resync + * was not already performed. + */ + if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMLOCAL && + !(peers->flags & PEERS_F_RESYNC_ASSIGN)) { + /* assign local peer for a lesson */ + peer->learnstate = PEER_LR_ST_ASSIGNED; + HA_ATOMIC_OR(&peers->flags, PEERS_F_RESYNC_ASSIGN|PEERS_F_DBG_RESYNC_LOCALASSIGN); + } + } + else if (!peer->local) { + /* If a connection was validated for a remote peer, flag + * it to learn a lesson but don't start it yet. The peer + * must request it explicitly. This only happens if no + * resync is in progress and if the remote resync was + * not already performed. + */ + if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE && + !(peers->flags & PEERS_F_RESYNC_ASSIGN)) { + /* assign remote peer for a lesson */ + peer->learnstate = PEER_LR_ST_ASSIGNED; + HA_ATOMIC_OR(&peers->flags, PEERS_F_RESYNC_ASSIGN|PEERS_F_DBG_RESYNC_REMOTEASSIGN); + } } + peer->appstate = PEER_APP_ST_RUNNING; + appctx_wakeup(peer->appctx); + } +} - /* For each session */ - for (ps = peers->remote; ps; ps = ps->next) { - /* For each remote peers */ - if (!ps->local) { - if (!ps->appctx) { - /* no active peer connection */ - if (ps->statuscode == 0 || - ((ps->statuscode == PEER_SESS_SC_CONNECTCODE || - ps->statuscode == PEER_SESS_SC_SUCCESSCODE || - ps->statuscode == PEER_SESS_SC_CONNECTEDCODE) && - tick_is_expired(ps->reconnect, now_ms))) { - /* connection never tried - * or previous peer connection established with success - * or previous peer connection failed while connecting - * and reconnection timer is expired */ - - /* retry a connect */ - ps->appctx = peer_session_create(peers, ps); - } - else if (!tick_is_expired(ps->reconnect, now_ms)) { - /* If previous session failed during connection - * but reconnection timer is not expired */ +/* Process the sync task for a running process. It is called from process_peer_sync() only */ +static void __process_running_peer_sync(struct task *task, struct peers *peers, unsigned int state) +{ + struct peer *peer; + struct shared_table *st; - /* reschedule task for reconnect */ - task->expire = tick_first(task->expire, ps->reconnect); - } - /* else do nothing */ - } /* !ps->appctx */ - else if (ps->statuscode == PEER_SESS_SC_SUCCESSCODE) { - /* current peer connection is active and established */ - if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE) && - !(peers->flags & PEERS_F_RESYNC_ASSIGN) && - !(ps->flags & PEER_F_LEARN_NOTUP2DATE)) { - /* Resync from a remote is needed - * and no peer was assigned for lesson - * and current peer may be up2date */ - - /* assign peer for the lesson */ - ps->flags |= PEER_F_LEARN_ASSIGN; - peers->flags |= PEERS_F_RESYNC_ASSIGN; - peers->flags |= PEERS_F_RESYNC_REMOTEASSIGN; - - /* wake up peer handler to handle a request of resync */ - appctx_wakeup(ps->appctx); + /* resync timeout set to TICK_ETERNITY means we just start + * a new process and timer was not initialized. + * We must arm this timer to switch to a request to a remote + * node if incoming connection from old local process never + * comes. + */ + if (peers->resync_timeout == TICK_ETERNITY) + peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + + if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMLOCAL) && + (!nb_oldpids || tick_is_expired(peers->resync_timeout, now_ms)) && + !(peers->flags & PEERS_F_RESYNC_ASSIGN)) { + /* Resync from local peer needed + no peer was assigned for the lesson + and no old local peer found + or resync timeout expire */ + + /* flag no more resync from local, to try resync from remotes */ + HA_ATOMIC_OR(&peers->flags, PEERS_F_RESYNC_LOCAL_FINISHED|PEERS_F_DBG_RESYNC_LOCALTIMEOUT); + + /* reschedule a resync */ + peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + } + + /* For each session */ + for (peer = peers->remote; peer; peer = peer->next) { + HA_SPIN_LOCK(PEER_LOCK, &peer->lock); + + sync_peer_learn_state(peers, peer); + sync_peer_app_state(peers, peer); + + /* Peer changes, if any, were now ack by the sync task. Unblock + * the peer (any wakeup should already be performed, no need to + * do it here) + */ + peer->flags &= ~PEER_F_WAIT_SYNCTASK_ACK; + + /* For each remote peers */ + if (!peer->local) { + if (!peer->appctx) { + /* no active peer connection */ + if (peer->statuscode == 0 || + ((peer->statuscode == PEER_SESS_SC_CONNECTCODE || + peer->statuscode == PEER_SESS_SC_SUCCESSCODE || + peer->statuscode == PEER_SESS_SC_CONNECTEDCODE) && + tick_is_expired(peer->reconnect, now_ms))) { + /* connection never tried + * or previous peer connection established with success + * or previous peer connection failed while connecting + * and reconnection timer is expired */ + + /* retry a connect */ + peer->appctx = peer_session_create(peers, peer); + } + else if (!tick_is_expired(peer->reconnect, now_ms)) { + /* If previous session failed during connection + * but reconnection timer is not expired */ + + /* reschedule task for reconnect */ + task->expire = tick_first(task->expire, peer->reconnect); + } + /* else do nothing */ + } /* !peer->appctx */ + else if (peer->statuscode == PEER_SESS_SC_SUCCESSCODE) { + /* current peer connection is active and established */ + if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE) && + !(peers->flags & PEERS_F_RESYNC_ASSIGN) && + !(peer->flags & PEER_F_LEARN_NOTUP2DATE)) { + /* Resync from a remote is needed + * and no peer was assigned for lesson + * and current peer may be up2date */ + + /* assign peer for the lesson */ + peer->learnstate = PEER_LR_ST_ASSIGNED; + HA_ATOMIC_OR(&peers->flags, PEERS_F_RESYNC_ASSIGN|PEERS_F_DBG_RESYNC_REMOTEASSIGN); + + /* wake up peer handler to handle a request of resync */ + appctx_wakeup(peer->appctx); + } + else { + int update_to_push = 0; + + /* Awake session if there is data to push */ + for (st = peer->tables; st ; st = st->next) { + if (st->last_pushed != st->table->localupdate) { + /* wake up the peer handler to push local updates */ + update_to_push = 1; + /* There is no need to send a heartbeat message + * when some updates must be pushed. The remote + * peer will consider <peer> peer as alive when it will + * receive these updates. + */ + peer->flags &= ~PEER_F_HEARTBEAT; + /* Re-schedule another one later. */ + peer->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT)); + /* Refresh reconnect if necessary */ + if (tick_is_expired(peer->reconnect, now_ms)) + peer->reconnect = tick_add(now_ms, MS_TO_TICKS(PEER_RECONNECT_TIMEOUT)); + /* We are going to send updates, let's ensure we will + * come back to send heartbeat messages or to reconnect. + */ + task->expire = tick_first(peer->reconnect, peer->heartbeat); + appctx_wakeup(peer->appctx); + break; + } } - else { - int update_to_push = 0; - - /* Awake session if there is data to push */ - for (st = ps->tables; st ; st = st->next) { - if (st->last_pushed != st->table->localupdate) { - /* wake up the peer handler to push local updates */ - update_to_push = 1; - /* There is no need to send a heartbeat message - * when some updates must be pushed. The remote - * peer will consider <ps> peer as alive when it will - * receive these updates. - */ - ps->flags &= ~PEER_F_HEARTBEAT; - /* Re-schedule another one later. */ - ps->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT)); - /* Refresh reconnect if necessary */ - if (tick_is_expired(ps->reconnect, now_ms)) - ps->reconnect = tick_add(now_ms, MS_TO_TICKS(PEER_RECONNECT_TIMEOUT)); - /* We are going to send updates, let's ensure we will - * come back to send heartbeat messages or to reconnect. + /* When there are updates to send we do not reconnect + * and do not send heartbeat message either. + */ + if (!update_to_push) { + if (tick_is_expired(peer->reconnect, now_ms)) { + if (peer->flags & PEER_F_ALIVE) { + /* This peer was alive during a 'reconnect' period. + * Flag it as not alive again for the next period. */ - task->expire = tick_first(ps->reconnect, ps->heartbeat); - appctx_wakeup(ps->appctx); - break; - } - } - /* When there are updates to send we do not reconnect - * and do not send heartbeat message either. - */ - if (!update_to_push) { - if (tick_is_expired(ps->reconnect, now_ms)) { - if (ps->flags & PEER_F_ALIVE) { - /* This peer was alive during a 'reconnect' period. - * Flag it as not alive again for the next period. - */ - ps->flags &= ~PEER_F_ALIVE; - ps->reconnect = tick_add(now_ms, MS_TO_TICKS(PEER_RECONNECT_TIMEOUT)); - } - else { - ps->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000)); - ps->heartbeat = TICK_ETERNITY; - peer_session_forceshutdown(ps); - ps->no_hbt++; - } + peer->flags &= ~PEER_F_ALIVE; + peer->reconnect = tick_add(now_ms, MS_TO_TICKS(PEER_RECONNECT_TIMEOUT)); } - else if (tick_is_expired(ps->heartbeat, now_ms)) { - ps->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT)); - ps->flags |= PEER_F_HEARTBEAT; - appctx_wakeup(ps->appctx); + else { + peer->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000)); + peer->heartbeat = TICK_ETERNITY; + peer_session_forceshutdown(peer); + sync_peer_app_state(peers, peer); + peer->no_hbt++; } - task->expire = tick_first(ps->reconnect, ps->heartbeat); } - } - /* else do nothing */ - } /* SUCCESSCODE */ - } /* !ps->peer->local */ - } /* for */ - - /* Resync from remotes expired: consider resync is finished */ - if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE) && - !(peers->flags & PEERS_F_RESYNC_ASSIGN) && - tick_is_expired(peers->resync_timeout, now_ms)) { - /* Resync from remote peer needed - * no peer was assigned for the lesson - * and resync timeout expire */ - - /* flag no more resync from remote, consider resync is finished */ - peers->flags |= PEERS_F_RESYNC_REMOTE; - peers->flags |= PEERS_F_RESYNC_REMOTETIMEOUT; - } - - if ((peers->flags & PEERS_RESYNC_STATEMASK) != PEERS_RESYNC_FINISHED) { - /* Resync not finished*/ - /* reschedule task to resync timeout if not expired, to ended resync if needed */ - if (!tick_is_expired(peers->resync_timeout, now_ms)) - task->expire = tick_first(task->expire, peers->resync_timeout); - } - } /* !stopping */ - else { - /* soft stop case */ - if (state & TASK_WOKEN_SIGNAL) { - /* We've just received the signal */ - if (!(peers->flags & PEERS_F_DONOTSTOP)) { - /* add DO NOT STOP flag if not present */ - _HA_ATOMIC_INC(&jobs); - peers->flags |= PEERS_F_DONOTSTOP; - - /* disconnect all connected peers to process a local sync - * this must be done only the first time we are switching - * in stopping state - */ - for (ps = peers->remote; ps; ps = ps->next) { - /* we're killing a connection, we must apply a random delay before - * retrying otherwise the other end will do the same and we can loop - * for a while. - */ - ps->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000)); - if (ps->appctx) { - peer_session_forceshutdown(ps); + else if (tick_is_expired(peer->heartbeat, now_ms)) { + peer->heartbeat = tick_add(now_ms, MS_TO_TICKS(PEER_HEARTBEAT_TIMEOUT)); + peer->flags |= PEER_F_HEARTBEAT; + appctx_wakeup(peer->appctx); + } + task->expire = tick_first(peer->reconnect, peer->heartbeat); } } + /* else do nothing */ + } /* SUCCESSCODE */ + } /* !peer->peer->local */ - /* Set resync timeout for the local peer and request a immediate reconnect */ - peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); - peers->local->reconnect = now_ms; + HA_SPIN_UNLOCK(PEER_LOCK, &peer->lock); + } /* for */ + + /* Resync from remotes expired or no remote peer: consider resync is finished */ + if (((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FROMREMOTE) && + !(peers->flags & PEERS_F_RESYNC_ASSIGN) && + (tick_is_expired(peers->resync_timeout, now_ms) || !peers->remote->next)) { + /* Resync from remote peer needed + * no peer was assigned for the lesson + * and resync timeout expire */ + + /* flag no more resync from remote, consider resync is finished */ + HA_ATOMIC_OR(&peers->flags, PEERS_F_RESYNC_REMOTE_FINISHED|PEERS_F_DBG_RESYNC_REMOTETIMEOUT); + } + + if ((peers->flags & PEERS_RESYNC_STATEMASK) != PEERS_RESYNC_FINISHED) { + /* Resync not finished*/ + /* reschedule task to resync timeout if not expired, to ended resync if needed */ + if (!tick_is_expired(peers->resync_timeout, now_ms)) + task->expire = tick_first(task->expire, peers->resync_timeout); + } +} + +/* Process the sync task for a stopping process. It is called from process_peer_sync() only */ +static void __process_stopping_peer_sync(struct task *task, struct peers *peers, unsigned int state) +{ + struct peer *peer; + struct shared_table *st; + static int dont_stop = 0; + + /* For each peer */ + for (peer = peers->remote; peer; peer = peer->next) { + HA_SPIN_LOCK(PEER_LOCK, &peer->lock); + + sync_peer_learn_state(peers, peer); + sync_peer_app_state(peers, peer); + + /* Peer changes, if any, were now ack by the sync task. Unblock + * the peer (any wakeup should already be performed, no need to + * do it here) + */ + peer->flags &= ~PEER_F_WAIT_SYNCTASK_ACK; + + if ((state & TASK_WOKEN_SIGNAL) && !dont_stop) { + /* we're killing a connection, we must apply a random delay before + * retrying otherwise the other end will do the same and we can loop + * for a while. + */ + peer->reconnect = tick_add(now_ms, MS_TO_TICKS(50 + ha_random() % 2000)); + if (peer->appctx) { + peer_session_forceshutdown(peer); + sync_peer_app_state(peers, peer); } } - ps = peers->local; - if (ps->flags & PEER_F_TEACH_COMPLETE) { - if (peers->flags & PEERS_F_DONOTSTOP) { - /* resync of new process was complete, current process can die now */ - _HA_ATOMIC_DEC(&jobs); - peers->flags &= ~PEERS_F_DONOTSTOP; - for (st = ps->tables; st ; st = st->next) - HA_ATOMIC_DEC(&st->table->refcnt); - } + HA_SPIN_UNLOCK(PEER_LOCK, &peer->lock); + } + + /* We've just received the signal */ + if (state & TASK_WOKEN_SIGNAL) { + if (!dont_stop) { + /* add DO NOT STOP flag if not present */ + _HA_ATOMIC_INC(&jobs); + dont_stop = 1; + + /* Set resync timeout for the local peer and request a immediate reconnect */ + peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + peers->local->reconnect = now_ms; } - else if (!ps->appctx) { - /* Re-arm resync timeout if necessary */ - if (!tick_isset(peers->resync_timeout)) - peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + } - /* If there's no active peer connection */ - if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FINISHED && - !tick_is_expired(peers->resync_timeout, now_ms) && - (ps->statuscode == 0 || - ps->statuscode == PEER_SESS_SC_SUCCESSCODE || - ps->statuscode == PEER_SESS_SC_CONNECTEDCODE || - ps->statuscode == PEER_SESS_SC_TRYAGAIN)) { - /* The resync is finished for the local peer and - * the resync timeout is not expired and - * connection never tried - * or previous peer connection was successfully established - * or previous tcp connect succeeded but init state incomplete - * or during previous connect, peer replies a try again statuscode */ - - if (!tick_is_expired(ps->reconnect, now_ms)) { - /* reconnection timer is not expired. reschedule task for reconnect */ - task->expire = tick_first(task->expire, ps->reconnect); - } - else { - /* connect to the local peer if we must push a local sync */ - if (peers->flags & PEERS_F_DONOTSTOP) { - peer_session_create(peers, ps); - } - } + peer = peers->local; + HA_SPIN_LOCK(PEER_LOCK, &peer->lock); + if (peer->flags & PEER_F_LOCAL_TEACH_COMPLETE) { + if (dont_stop) { + /* resync of new process was complete, current process can die now */ + _HA_ATOMIC_DEC(&jobs); + dont_stop = 0; + for (st = peer->tables; st ; st = st->next) + HA_ATOMIC_DEC(&st->table->refcnt); + } + } + else if (!peer->appctx) { + /* Re-arm resync timeout if necessary */ + if (!tick_isset(peers->resync_timeout)) + peers->resync_timeout = tick_add(now_ms, MS_TO_TICKS(PEER_RESYNC_TIMEOUT)); + + /* If there's no active peer connection */ + if ((peers->flags & PEERS_RESYNC_STATEMASK) == PEERS_RESYNC_FINISHED && + !tick_is_expired(peers->resync_timeout, now_ms) && + (peer->statuscode == 0 || + peer->statuscode == PEER_SESS_SC_SUCCESSCODE || + peer->statuscode == PEER_SESS_SC_CONNECTEDCODE || + peer->statuscode == PEER_SESS_SC_TRYAGAIN)) { + /* The resync is finished for the local peer and + * the resync timeout is not expired and + * connection never tried + * or previous peer connection was successfully established + * or previous tcp connect succeeded but init state incomplete + * or during previous connect, peer replies a try again statuscode */ + + if (!tick_is_expired(peer->reconnect, now_ms)) { + /* reconnection timer is not expired. reschedule task for reconnect */ + task->expire = tick_first(task->expire, peer->reconnect); } - else { - /* Other error cases */ - if (peers->flags & PEERS_F_DONOTSTOP) { - /* unable to resync new process, current process can die now */ - _HA_ATOMIC_DEC(&jobs); - peers->flags &= ~PEERS_F_DONOTSTOP; - for (st = ps->tables; st ; st = st->next) - HA_ATOMIC_DEC(&st->table->refcnt); + else { + /* connect to the local peer if we must push a local sync */ + if (dont_stop) { + peer_session_create(peers, peer); } } } - else if (ps->statuscode == PEER_SESS_SC_SUCCESSCODE ) { - /* Reset resync timeout during a resync */ - peers->resync_timeout = TICK_ETERNITY; - - /* current peer connection is active and established - * wake up all peer handlers to push remaining local updates */ - for (st = ps->tables; st ; st = st->next) { - if (st->last_pushed != st->table->localupdate) { - appctx_wakeup(ps->appctx); - break; - } + else { + /* Other error cases */ + if (dont_stop) { + /* unable to resync new process, current process can die now */ + _HA_ATOMIC_DEC(&jobs); + dont_stop = 0; + for (st = peer->tables; st ; st = st->next) + HA_ATOMIC_DEC(&st->table->refcnt); } } - } /* stopping */ + } + else if (peer->statuscode == PEER_SESS_SC_SUCCESSCODE ) { + /* Reset resync timeout during a resync */ + peers->resync_timeout = TICK_ETERNITY; + + /* current peer connection is active and established + * wake up all peer handlers to push remaining local updates */ + for (st = peer->tables; st ; st = st->next) { + if (st->last_pushed != st->table->localupdate) { + appctx_wakeup(peer->appctx); + break; + } + } + } + HA_SPIN_UNLOCK(PEER_LOCK, &peer->lock); +} - /* Release lock for all peers of the section */ - for (ps = peers->remote; ps; ps = ps->next) - HA_SPIN_UNLOCK(PEER_LOCK, &ps->lock); +/* + * Task processing function to manage re-connect, peer session + * tasks wakeup on local update and heartbeat. Let's keep it exported so that it + * resolves in stack traces and "show tasks". + */ +struct task *process_peer_sync(struct task * task, void *context, unsigned int state) +{ + struct peers *peers = context; + + task->expire = TICK_ETERNITY; + + if (!stopping) { + /* Normal case (not soft stop)*/ + __process_running_peer_sync(task, peers, state); + + } + else { + /* soft stop case */ + __process_stopping_peer_sync(task, peers, state); + } /* stopping */ /* Wakeup for re-connect */ return task; @@ -3940,7 +3969,7 @@ static int peers_dump_head(struct buffer *msg, struct appctx *appctx, struct pee peers, tm.tm_mday, monthname[tm.tm_mon], tm.tm_year+1900, tm.tm_hour, tm.tm_min, tm.tm_sec, - peers->id, peers->disabled, peers->flags, + peers->id, peers->disabled, HA_ATOMIC_LOAD(&peers->flags), peers->resync_timeout ? tick_is_expired(peers->resync_timeout, now_ms) ? "<PAST>" : human_time(TICKS_TO_MS(peers->resync_timeout - now_ms), @@ -3966,12 +3995,14 @@ static int peers_dump_peer(struct buffer *msg, struct appctx *appctx, struct pee struct stream *peer_s; struct shared_table *st; - addr_to_str(&peer->addr, pn, sizeof pn); - chunk_appendf(msg, " %p: id=%s(%s,%s) addr=%s:%d last_status=%s", + addr_to_str(&peer->srv->addr, pn, sizeof pn); + chunk_appendf(msg, " %p: id=%s(%s,%s) addr=%s:%d app_state=%s learn_state=%s last_status=%s", peer, peer->id, peer->local ? "local" : "remote", peer->appctx ? "active" : "inactive", - pn, get_host_port(&peer->addr), + pn, peer->srv->svc_port, + peer_app_state_str(peer->appstate), + peer_learn_state_str(peer->learnstate), statuscode_str(peer->statuscode)); chunk_appendf(msg, " last_hdshk=%s\n", @@ -40,31 +40,30 @@ static struct list pools __read_mostly = LIST_HEAD_INIT(pools); int mem_poison_byte __read_mostly = 'P'; int pool_trim_in_progress = 0; uint pool_debugging __read_mostly = /* set of POOL_DBG_* flags */ -#ifdef DEBUG_FAIL_ALLOC +#if defined(DEBUG_FAIL_ALLOC) && (DEBUG_FAIL_ALLOC > 0) POOL_DBG_FAIL_ALLOC | #endif -#ifdef DEBUG_DONT_SHARE_POOLS +#if defined(DEBUG_DONT_SHARE_POOLS) && (DEBUG_DONT_SHARE_POOLS > 0) POOL_DBG_DONT_MERGE | #endif -#ifdef DEBUG_POOL_INTEGRITY +#if defined(DEBUG_POOL_INTEGRITY) && (DEBUG_POOL_INTEGRITY > 0) POOL_DBG_COLD_FIRST | -#endif -#ifdef DEBUG_POOL_INTEGRITY POOL_DBG_INTEGRITY | #endif -#ifdef CONFIG_HAP_NO_GLOBAL_POOLS +#if defined(CONFIG_HAP_NO_GLOBAL_POOLS) POOL_DBG_NO_GLOBAL | #endif -#if defined(DEBUG_NO_POOLS) || defined(DEBUG_UAF) +#if defined(DEBUG_NO_POOLS) && (DEBUG_NO_POOLS > 0) POOL_DBG_NO_CACHE | #endif -#if defined(DEBUG_POOL_TRACING) +#if defined(DEBUG_POOL_TRACING) && (DEBUG_POOL_TRACING > 0) POOL_DBG_CALLER | #endif -#if defined(DEBUG_MEMORY_POOLS) +#if defined(DEBUG_MEMORY_POOLS) && (DEBUG_MEMORY_POOLS > 0) POOL_DBG_TAG | #endif -#if defined(DEBUG_UAF) +#if defined(DEBUG_UAF) && (DEBUG_UAF > 0) + POOL_DBG_NO_CACHE | POOL_DBG_UAF | #endif 0; @@ -497,7 +496,7 @@ void pool_check_pattern(struct pool_cache_head *pch, struct pool_head *pool, str u = ptr[ofs++]; while (ofs < size / sizeof(*ptr)) { if (unlikely(ptr[ofs] != u)) { - pool_inspect_item("cache corruption detected", pool, item, caller); + pool_inspect_item("cache corruption detected", pool, item, caller, ofs * sizeof(*ptr)); ABORT_NOW(); } ofs++; @@ -962,8 +961,12 @@ void pool_destroy_all() } } -/* carefully inspects an item upon fatal error and emit diagnostics */ -void pool_inspect_item(const char *msg, struct pool_head *pool, const void *item, const void *caller) +/* carefully inspects an item upon fatal error and emit diagnostics. + * If ofs < 0, no hint is provided regarding the content location. However if + * ofs >= 0, then we also try to inspect around that place where corruption + * was detected. + */ +void pool_inspect_item(const char *msg, struct pool_head *pool, const void *item, const void *caller, ssize_t ofs) { const struct pool_head *the_pool = NULL; @@ -980,6 +983,11 @@ void pool_inspect_item(const char *msg, struct pool_head *pool, const void *item " pool: %p ('%s', size %u, real %u, users %u)\n", item, pool, pool->name, pool->size, pool->alloc_sz, pool->users); + if (ofs >= 0) { + chunk_printf(&trash, "Contents around first corrupted address relative to pool item:.\n"); + dump_area_with_syms(&trash, item, item + ofs, NULL, NULL, NULL); + } + if (pool_debugging & POOL_DBG_TAG) { const void **pool_mark; struct pool_head *ph; @@ -1015,51 +1023,16 @@ void pool_inspect_item(const char *msg, struct pool_head *pool, const void *item } if (!the_pool) { - const char *start, *end, *p; + chunk_appendf(&trash, + "Tag does not match any other pool.\n"); pool_mark = (const void **)(((char *)item) + pool->size); - chunk_appendf(&trash, - "Tag does not match any other pool.\n" - "Contents around address %p+%lu=%p:\n", - item, (ulong)((const void*)pool_mark - (const void*)item), - pool_mark); - - /* dump in word-sized blocks */ - start = (const void *)(((uintptr_t)pool_mark - 32) & -sizeof(void*)); - end = (const void *)(((uintptr_t)pool_mark + 32 + sizeof(void*) - 1) & -sizeof(void*)); - - while (start < end) { - dump_addr_and_bytes(&trash, " ", start, sizeof(void*)); - chunk_strcat(&trash, " ["); - for (p = start; p < start + sizeof(void*); p++) { - if (!may_access(p)) - chunk_strcat(&trash, "*"); - else if (isprint((unsigned char)*p)) - chunk_appendf(&trash, "%c", *p); - else - chunk_strcat(&trash, "."); - } - - if (may_access(start)) - tag = *(const void **)start; - else - tag = NULL; - - if (tag == pool) { - /* the pool can often be there so let's detect it */ - chunk_appendf(&trash, "] [pool:%s", pool->name); - } - else if (tag) { - /* print pointers that resolve to a symbol */ - size_t back_data = trash.data; - chunk_strcat(&trash, "] ["); - if (!resolve_sym_name(&trash, NULL, tag)) - trash.data = back_data; - } - - chunk_strcat(&trash, "]\n"); - start = p; - } + if (resolve_sym_name(&trash, "Resolving the tag as a pool_free() location: ", *pool_mark)) + chunk_appendf(&trash, "\n"); + else + chunk_appendf(&trash, " (no match).\n"); + + dump_area_with_syms(&trash, item, pool_mark, pool, "pool", pool->name); } } } diff --git a/src/proto_quic.c b/src/proto_quic.c index 899cffe..93a24af 100644 --- a/src/proto_quic.c +++ b/src/proto_quic.c @@ -277,7 +277,7 @@ int quic_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct s int quic_connect_server(struct connection *conn, int flags) { - int fd; + int fd, stream_err; struct server *srv; struct proxy *be; struct conn_src *src; @@ -301,67 +301,12 @@ int quic_connect_server(struct connection *conn, int flags) return SF_ERR_INTERNAL; } - fd = conn->handle.fd = sock_create_server_socket(conn); - - if (fd == -1) { - qfprintf(stderr, "Cannot get a server socket.\n"); - - if (errno == ENFILE) { - conn->err_code = CO_ER_SYS_FDLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n", - be->id, global.maxsock); - } - else if (errno == EMFILE) { - conn->err_code = CO_ER_PROC_FDLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n", - be->id, global.maxsock); - } - else if (errno == ENOBUFS || errno == ENOMEM) { - conn->err_code = CO_ER_SYS_MEMLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n", - be->id, global.maxsock); - } - else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { - conn->err_code = CO_ER_NOPROTO; - } - else - conn->err_code = CO_ER_SOCK_ERR; - - /* this is a resource error */ - conn->flags |= CO_FL_ERROR; - return SF_ERR_RESOURCE; - } - - if (fd >= global.maxsock) { - /* do not log anything there, it's a normal condition when this option - * is used to serialize connections to a server ! - */ - ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n"); - close(fd); - conn->err_code = CO_ER_CONF_FDLIM; - conn->flags |= CO_FL_ERROR; - return SF_ERR_PRXCOND; /* it is a configuration limit */ - } - - if (fd_set_nonblock(fd) == -1) { - qfprintf(stderr,"Cannot set client socket to non blocking mode.\n"); - close(fd); - conn->err_code = CO_ER_SOCK_ERR; - conn->flags |= CO_FL_ERROR; - return SF_ERR_INTERNAL; - } - - if (master == 1 && fd_set_cloexec(fd) == -1) { - ha_alert("Cannot set CLOEXEC on client socket.\n"); - close(fd); - conn->err_code = CO_ER_SOCK_ERR; - conn->flags |= CO_FL_ERROR; - return SF_ERR_INTERNAL; - } + /* perform common checks on obtained socket FD, return appropriate Stream Error Flag in case of failure */ + fd = conn->handle.fd = sock_create_server_socket(conn, be, &stream_err); + if (fd == -1) + return stream_err; + /* FD is ok, perform protocol specific settings */ /* allow specific binding : * - server-specific at first * - proxy-specific next @@ -762,7 +707,7 @@ static int quic_alloc_dghdlrs(void) quic_cid_trees = calloc(QUIC_CID_TREES_CNT, sizeof(*quic_cid_trees)); if (!quic_cid_trees) { - ha_alert("Failed to allocate global CIDs trees.\n"); + ha_alert("Failed to allocate global quic CIDs trees.\n"); return 0; } diff --git a/src/proto_rhttp.c b/src/proto_rhttp.c index 452ee32..a6fc955 100644 --- a/src/proto_rhttp.c +++ b/src/proto_rhttp.c @@ -13,6 +13,7 @@ #include <haproxy/proxy.h> #include <haproxy/sample.h> #include <haproxy/server.h> +#include <haproxy/session.h> #include <haproxy/sock.h> #include <haproxy/ssl_sock.h> #include <haproxy/task.h> @@ -33,6 +34,7 @@ struct protocol proto_rhttp = { .listen = rhttp_bind_listener, .enable = rhttp_enable_listener, .disable = rhttp_disable_listener, + .suspend = rhttp_suspend_listener, .add = default_add_listener, .unbind = rhttp_unbind_receiver, .resume = default_resume_listener, @@ -54,11 +56,20 @@ static struct connection *new_reverse_conn(struct listener *l, struct server *sr { struct connection *conn = conn_new(srv); struct sockaddr_storage *bind_addr = NULL; + struct session *sess = NULL; if (!conn) goto err; HA_ATOMIC_INC(&th_ctx->nb_rhttp_conns); + /* session origin is only set after reversal. This ensures fetches + * will be functional only after reversal, in particular src/dst. + */ + sess = session_new(l->bind_conf->frontend, l, NULL); + if (!sess) + goto err; + + conn_set_owner(conn, sess, conn_session_free); conn_set_reverse(conn, &l->obj_type); if (alloc_bind_address(&bind_addr, srv, srv->proxy, NULL) != SRV_STATUS_OK) @@ -71,6 +82,14 @@ static struct connection *new_reverse_conn(struct listener *l, struct server *sr *conn->dst = srv->addr; set_host_port(conn->dst, srv->svc_port); + conn->send_proxy_ofs = 0; + if (srv->pp_opts) { + conn->flags |= CO_FL_SEND_PROXY; + conn->send_proxy_ofs = 1; /* must compute size */ + } + + /* TODO support SOCKS4 */ + if (conn_prepare(conn, protocol_lookup(conn->dst->ss_family, PROTO_TYPE_STREAM, 0), srv->xprt)) goto err; @@ -81,7 +100,7 @@ static struct connection *new_reverse_conn(struct listener *l, struct server *sr if (srv->ssl_ctx.sni) { struct sample *sni_smp = NULL; /* TODO remove NULL session which can cause crash depending on the SNI sample expr used. */ - sni_smp = sample_fetch_as_type(srv->proxy, NULL, NULL, + sni_smp = sample_fetch_as_type(srv->proxy, sess, NULL, SMP_OPT_DIR_REQ | SMP_OPT_FINAL, srv->ssl_ctx.sni, SMP_T_STR); if (smp_make_safe(sni_smp)) @@ -89,21 +108,35 @@ static struct connection *new_reverse_conn(struct listener *l, struct server *sr } #endif /* USE_OPENSSL */ + /* The CO_FL_SEND_PROXY flag may have been set by the connect method, + * if so, add our handshake pseudo-XPRT now. + */ + if (conn->flags & CO_FL_HANDSHAKE) { + if (xprt_add_hs(conn) < 0) + goto err; + } + if (conn_xprt_start(conn) < 0) goto err; if (!srv->use_ssl || (!srv->ssl_ctx.alpn_str && !srv->ssl_ctx.npn_str) || srv->mux_proto) { - if (conn_install_mux_be(conn, NULL, NULL, NULL) < 0) + if (conn_install_mux_be(conn, NULL, sess, NULL) < 0) goto err; } - /* Not expected here. */ - BUG_ON((conn->flags & CO_FL_HANDSHAKE)); return conn; err: + if (l->rx.rhttp.state != LI_PRECONN_ST_ERR) { + send_log(l->bind_conf->frontend, LOG_ERR, + "preconnect %s::%s: Error on conn allocation.\n", + l->bind_conf->frontend->id, l->bind_conf->rhttp_srvname); + l->rx.rhttp.state = LI_PRECONN_ST_ERR; + } + + /* No need to free session as conn.destroy_cb will take care of it. */ if (conn) { conn_stop_tracking(conn); conn_xprt_shutw(conn); @@ -284,11 +317,12 @@ int rhttp_bind_listener(struct listener *listener, char *errmsg, int errlen) /* Retrieve the first thread usable for this listener. */ mask = listener->rx.bind_thread & _HA_ATOMIC_LOAD(&tg->threads_enabled); - task_tid = my_ffsl(mask) + ha_tgroup_info[listener->rx.bind_tgroup].base; + task_tid = my_ffsl(mask) - 1 + ha_tgroup_info[listener->rx.bind_tgroup].base; if (!(task = task_new_on(task_tid))) { snprintf(errmsg, errlen, "Out of memory."); goto err; } + task->process = rhttp_process; task->context = listener; listener->rx.rhttp.task = task; @@ -363,6 +397,13 @@ int rhttp_bind_listener(struct listener *listener, char *errmsg, int errlen) return ERR_ALERT | ERR_FATAL; } +/* Do not support "disable frontend" for rhttp protocol. */ +int rhttp_suspend_listener(struct listener *l) +{ + send_log(l->bind_conf->frontend, LOG_ERR, "cannot disable a reverse-HTTP listener.\n"); + return -1; +} + void rhttp_enable_listener(struct listener *l) { if (l->rx.rhttp.state < LI_PRECONN_ST_INIT) { @@ -372,7 +413,7 @@ void rhttp_enable_listener(struct listener *l) l->rx.rhttp.state = LI_PRECONN_ST_INIT; } - task_wakeup(l->rx.rhttp.task, TASK_WOKEN_ANY); + task_wakeup(l->rx.rhttp.task, TASK_WOKEN_INIT); } void rhttp_disable_listener(struct listener *l) @@ -437,10 +478,10 @@ void rhttp_unbind_receiver(struct listener *l) int rhttp_set_affinity(struct connection *conn, int new_tid) { - /* Explicitely disable connection thread migration on accept. Indeed, + /* Explicitly disable connection thread migration on accept. Indeed, * it's unsafe to move a connection with its FD to another thread. Note * that active reverse task thread migration should be sufficient to - * ensure repartition of reversed connections accross listener threads. + * ensure repartition of reversed connections across listener threads. */ return -1; } @@ -452,7 +493,7 @@ int rhttp_accepting_conn(const struct receiver *rx) INITCALL1(STG_REGISTER, protocol_register, &proto_rhttp); -/* perform minimal intializations */ +/* perform minimal initializations */ static void init_rhttp() { int i; diff --git a/src/proto_tcp.c b/src/proto_tcp.c index 45ce27f..63be775 100644 --- a/src/proto_tcp.c +++ b/src/proto_tcp.c @@ -265,7 +265,7 @@ int tcp_bind_socket(int fd, int flags, struct sockaddr_storage *local, struct so int tcp_connect_server(struct connection *conn, int flags) { - int fd; + int fd, stream_err; struct server *srv; struct proxy *be; struct conn_src *src; @@ -298,68 +298,14 @@ int tcp_connect_server(struct connection *conn, int flags) return SF_ERR_INTERNAL; } - fd = conn->handle.fd = sock_create_server_socket(conn); - if (fd == -1) { - qfprintf(stderr, "Cannot get a server socket.\n"); - if (errno == ENFILE) { - conn->err_code = CO_ER_SYS_FDLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n", - be->id, global.maxsock); - } - else if (errno == EMFILE) { - conn->err_code = CO_ER_PROC_FDLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n", - be->id, global.maxsock); - } - else if (errno == ENOBUFS || errno == ENOMEM) { - conn->err_code = CO_ER_SYS_MEMLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n", - be->id, global.maxsock); - } - else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { - conn->err_code = CO_ER_NOPROTO; - } - else - conn->err_code = CO_ER_SOCK_ERR; - - /* this is a resource error */ - conn->flags |= CO_FL_ERROR; - return SF_ERR_RESOURCE; - } - - if (fd >= global.maxsock) { - /* do not log anything there, it's a normal condition when this option - * is used to serialize connections to a server ! - */ - ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n"); - close(fd); - conn->err_code = CO_ER_CONF_FDLIM; - conn->flags |= CO_FL_ERROR; - return SF_ERR_PRXCOND; /* it is a configuration limit */ - } - - if (fd_set_nonblock(fd) == -1 || - (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1)) { - qfprintf(stderr,"Cannot set client socket to non blocking mode.\n"); - close(fd); - conn->err_code = CO_ER_SOCK_ERR; - conn->flags |= CO_FL_ERROR; - return SF_ERR_INTERNAL; - } - - if (master == 1 && fd_set_cloexec(fd) == -1) { - ha_alert("Cannot set CLOEXEC on client socket.\n"); - close(fd); - conn->err_code = CO_ER_SOCK_ERR; - conn->flags |= CO_FL_ERROR; - return SF_ERR_INTERNAL; - } + /* perform common checks on obtained socket FD, return appropriate Stream Error Flag in case of failure */ + fd = conn->handle.fd = sock_create_server_socket(conn, be, &stream_err); + if (fd == -1) + return stream_err; + /* FD is OK, continue with protocol specific settings */ if (be->options & PR_O_TCP_SRV_KA) { setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)); diff --git a/src/proto_udp.c b/src/proto_udp.c index 9855974..7308e98 100644 --- a/src/proto_udp.c +++ b/src/proto_udp.c @@ -155,6 +155,26 @@ int udp_bind_listener(struct listener *listener, char *errmsg, int errlen) if (global.tune.frontend_sndbuf) setsockopt(listener->rx.fd, SOL_SOCKET, SO_SNDBUF, &global.tune.frontend_sndbuf, sizeof(global.tune.frontend_sndbuf)); + if (listener->rx.flags & RX_F_PASS_PKTINFO) { + /* set IP_PKTINFO to retrieve destination address on recv */ + switch (listener->rx.addr.ss_family) { + case AF_INET: +#if defined(IP_PKTINFO) + setsockopt(listener->rx.fd, IPPROTO_IP, IP_PKTINFO, &one, sizeof(one)); +#elif defined(IP_RECVDSTADDR) + setsockopt(listener->rx.fd, IPPROTO_IP, IP_RECVDSTADDR, &one, sizeof(one)); +#endif /* IP_PKTINFO || IP_RECVDSTADDR */ + break; + case AF_INET6: +#ifdef IPV6_RECVPKTINFO + setsockopt(listener->rx.fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, &one, sizeof(one)); +#endif + break; + default: + break; + } + } + listener_set_state(listener, LI_LISTEN); udp_return: diff --git a/src/proto_uxst.c b/src/proto_uxst.c index 7988e00..7828e31 100644 --- a/src/proto_uxst.c +++ b/src/proto_uxst.c @@ -219,7 +219,7 @@ static int uxst_suspend_receiver(struct receiver *rx) */ static int uxst_connect_server(struct connection *conn, int flags) { - int fd; + int fd, stream_err; struct server *srv; struct proxy *be; @@ -239,65 +239,12 @@ static int uxst_connect_server(struct connection *conn, int flags) return SF_ERR_INTERNAL; } - if ((fd = conn->handle.fd = socket(PF_UNIX, SOCK_STREAM, 0)) == -1) { - qfprintf(stderr, "Cannot get a server socket.\n"); - - if (errno == ENFILE) { - conn->err_code = CO_ER_SYS_FDLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached system FD limit (maxsock=%d). Please check system tunables.\n", - be->id, global.maxsock); - } - else if (errno == EMFILE) { - conn->err_code = CO_ER_PROC_FDLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached process FD limit (maxsock=%d). Please check 'ulimit-n' and restart.\n", - be->id, global.maxsock); - } - else if (errno == ENOBUFS || errno == ENOMEM) { - conn->err_code = CO_ER_SYS_MEMLIM; - send_log(be, LOG_EMERG, - "Proxy %s reached system memory limit (maxsock=%d). Please check system tunables.\n", - be->id, global.maxsock); - } - else if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) { - conn->err_code = CO_ER_NOPROTO; - } - else - conn->err_code = CO_ER_SOCK_ERR; - - /* this is a resource error */ - conn->flags |= CO_FL_ERROR; - return SF_ERR_RESOURCE; - } - - if (fd >= global.maxsock) { - /* do not log anything there, it's a normal condition when this option - * is used to serialize connections to a server ! - */ - ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n"); - close(fd); - conn->err_code = CO_ER_CONF_FDLIM; - conn->flags |= CO_FL_ERROR; - return SF_ERR_PRXCOND; /* it is a configuration limit */ - } - - if (fd_set_nonblock(fd) == -1) { - qfprintf(stderr,"Cannot set client socket to non blocking mode.\n"); - close(fd); - conn->err_code = CO_ER_SOCK_ERR; - conn->flags |= CO_FL_ERROR; - return SF_ERR_INTERNAL; - } - - if (master == 1 && fd_set_cloexec(fd) == -1) { - ha_alert("Cannot set CLOEXEC on client socket.\n"); - close(fd); - conn->err_code = CO_ER_SOCK_ERR; - conn->flags |= CO_FL_ERROR; - return SF_ERR_INTERNAL; - } + /* perform common checks on obtained socket FD, return appropriate Stream Error Flag in case of failure */ + fd = conn->handle.fd = sock_create_server_socket(conn, be, &stream_err); + if (fd == -1) + return stream_err; + /* FD is ok, continue with protocol specific settings */ if (global.tune.server_sndbuf) setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &global.tune.server_sndbuf, sizeof(global.tune.server_sndbuf)); diff --git a/src/protocol.c b/src/protocol.c index 25ed6b7..399835a 100644 --- a/src/protocol.c +++ b/src/protocol.c @@ -157,13 +157,13 @@ int protocol_bind_all(int verbose) struct proxy *px = listener->bind_conf->frontend; if (lerr & ERR_ALERT) - ha_alert("Binding [%s:%d] for %s %s: %s\n", + ha_alert("Binding [%s:%d] for %s %s: protocol %s: %s.\n", listener->bind_conf->file, listener->bind_conf->line, - proxy_type_str(px), px->id, errmsg); + proxy_type_str(px), px->id, proto->name, errmsg); else if (lerr & ERR_WARN) - ha_warning("Binding [%s:%d] for %s %s: %s\n", + ha_warning("Binding [%s:%d] for %s %s: protocol %s: %s.\n", listener->bind_conf->file, listener->bind_conf->line, - proxy_type_str(px), px->id, errmsg); + proxy_type_str(px), px->id, proto->name, errmsg); } if (lerr != ERR_NONE) ha_free(&errmsg); @@ -183,13 +183,13 @@ int protocol_bind_all(int verbose) struct proxy *px = listener->bind_conf->frontend; if (lerr & ERR_ALERT) - ha_alert("Starting [%s:%d] for %s %s: %s\n", + ha_alert("Starting [%s:%d] for %s %s: protocol %s: %s.\n", listener->bind_conf->file, listener->bind_conf->line, - proxy_type_str(px), px->id, msg); + proxy_type_str(px), px->id, proto->name, msg); else if (lerr & ERR_WARN) - ha_warning("Starting [%s:%d] for %s %s: %s\n", + ha_warning("Starting [%s:%d] for %s %s: protocol %s: %s.\n", listener->bind_conf->file, listener->bind_conf->line, - proxy_type_str(px), px->id, msg); + proxy_type_str(px), px->id, proto->name, msg); } if (lerr & ERR_ABORT) break; diff --git a/src/proxy.c b/src/proxy.c index 19e6c4b..f1d9d7a 100644 --- a/src/proxy.c +++ b/src/proxy.c @@ -29,6 +29,7 @@ #include <haproxy/fd.h> #include <haproxy/filters.h> #include <haproxy/global.h> +#include <haproxy/guid.h> #include <haproxy/http_ana.h> #include <haproxy/http_htx.h> #include <haproxy/http_ext.h> @@ -177,18 +178,6 @@ void free_stick_rules(struct list *rules) } } -static void free_logformat_list(struct list *lfs) -{ - struct logformat_node *lf, *lfb; - - list_for_each_entry_safe(lf, lfb, lfs, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } -} - void free_server_rules(struct list *srules) { struct server_rule *srule, *sruleb; @@ -196,7 +185,7 @@ void free_server_rules(struct list *srules) list_for_each_entry_safe(srule, sruleb, srules, list) { LIST_DELETE(&srule->list); free_acl_cond(srule->cond); - free_logformat_list(&srule->expr); + lf_expr_deinit(&srule->expr); free(srule->file); free(srule); } @@ -235,34 +224,17 @@ void free_proxy(struct proxy *p) #if defined(CONFIG_HAP_TRANSPARENT) free(p->conn_src.bind_hdr_name); #endif - if (p->conf.logformat_string != default_http_log_format && - p->conf.logformat_string != default_tcp_log_format && - p->conf.logformat_string != clf_http_log_format && - p->conf.logformat_string != default_https_log_format && - p->conf.logformat_string != httpclient_log_format) - free(p->conf.logformat_string); - - free(p->conf.lfs_file); - free(p->conf.uniqueid_format_string); istfree(&p->header_unique_id); - free(p->conf.uif_file); if ((p->lbprm.algo & BE_LB_LKUP) == BE_LB_LKUP_MAP) free(p->lbprm.map.srv); - if (p->mode == PR_MODE_SYSLOG) - free(p->lbprm.log.srv); - - if (p->conf.logformat_sd_string != default_rfc5424_sd_log_format) - free(p->conf.logformat_sd_string); - free(p->conf.lfsd_file); - - free(p->conf.error_logformat_string); - free(p->conf.elfs_file); list_for_each_entry_safe(cond, condb, &p->mon_fail_cond, list) { LIST_DELETE(&cond->list); free_acl_cond(cond); } + guid_remove(&p->guid); + EXTRA_COUNTERS_FREE(p->extra_counters_fe); EXTRA_COUNTERS_FREE(p->extra_counters_be); @@ -278,7 +250,7 @@ void free_proxy(struct proxy *p) LIST_DELETE(&rule->list); free_acl_cond(rule->cond); if (rule->dynamic) - free_logformat_list(&rule->be.expr); + lf_expr_deinit(&rule->be.expr); free(rule->file); free(rule); } @@ -293,10 +265,10 @@ void free_proxy(struct proxy *p) free_logger(log); } - free_logformat_list(&p->logformat); - free_logformat_list(&p->logformat_sd); - free_logformat_list(&p->format_unique_id); - free_logformat_list(&p->logformat_error); + lf_expr_deinit(&p->logformat); + lf_expr_deinit(&p->logformat_sd); + lf_expr_deinit(&p->format_unique_id); + lf_expr_deinit(&p->logformat_error); free_act_rules(&p->tcp_req.inspect_rules); free_act_rules(&p->tcp_rep.inspect_rules); @@ -344,6 +316,7 @@ void free_proxy(struct proxy *p) srv_free_params(&p->defsrv); list_for_each_entry_safe(l, l_next, &p->conf.listeners, by_fe) { + guid_remove(&l->guid); LIST_DELETE(&l->by_fe); LIST_DELETE(&l->by_bind); free(l->name); @@ -363,6 +336,7 @@ void free_proxy(struct proxy *p) free(bind_conf->arg); free(bind_conf->settings.interface); LIST_DELETE(&bind_conf->by_fe); + free(bind_conf->guid_prefix); free(bind_conf->rhttp_srvname); free(bind_conf); } @@ -1041,6 +1015,33 @@ static int proxy_parse_tcpka_intvl(char **args, int section, struct proxy *proxy } #endif +static int proxy_parse_guid(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + const char *guid; + char *guid_err = NULL; + + if (curpx->cap & PR_CAP_DEF) { + ha_alert("parsing [%s:%d] : '%s' not allowed in 'defaults' section.\n", file, line, args[0]); + return -1; + } + + if (!*args[1]) { + memprintf(err, "'%s' : expects an argument", args[0]); + return -1; + } + + guid = args[1]; + if (guid_insert(&curpx->obj_type, guid, &guid_err)) { + memprintf(err, "'%s': %s", args[0], guid_err); + ha_free(&guid_err); + return -1; + } + + return 0; +} + /* This function inserts proxy <px> into the tree of known proxies (regular * ones or defaults depending on px->cap & PR_CAP_DEF). The proxy's name is * used as the storing key so it must already have been initialized. @@ -1275,50 +1276,6 @@ struct server *findserver(const struct proxy *px, const char *name) { return target; } -/* - * This function finds a server with matching "<puid> x <rid>" within - * selected proxy <px>. - * Using the combination of proxy-uid + revision id ensures that the function - * will either return the server we're expecting or NULL if it has been removed - * from the proxy. - */ -struct server *findserver_unique_id(const struct proxy *px, int puid, uint32_t rid) { - - struct server *cursrv; - - if (!px) - return NULL; - - for (cursrv = px->srv; cursrv; cursrv = cursrv->next) { - if (cursrv->puid == puid && cursrv->rid == rid) - return cursrv; - } - - return NULL; -} - -/* - * This function finds a server with matching "<name> x <rid>" within - * selected proxy <px>. - * Using the combination of name + revision id ensures that the function will - * either return the server we're expecting or NULL if it has been removed - * from the proxy. - */ -struct server *findserver_unique_name(const struct proxy *px, const char *name, uint32_t rid) { - - struct server *cursrv; - - if (!px) - return NULL; - - for (cursrv = px->srv; cursrv; cursrv = cursrv->next) { - if (!strcmp(cursrv->id, name) && cursrv->rid == rid) - return cursrv; - } - - return NULL; -} - /* This function checks that the designated proxy has no http directives * enabled. It will output a warning if there are, and will fix some of them. * It returns the number of fatal errors encountered. This should be called @@ -1331,35 +1288,25 @@ int proxy_cfg_ensure_no_http(struct proxy *curproxy) ha_warning("cookie will be ignored for %s '%s' (needs 'mode http').\n", proxy_type_str(curproxy), curproxy->id); } - if (isttest(curproxy->monitor_uri)) { - ha_warning("monitor-uri will be ignored for %s '%s' (needs 'mode http').\n", - proxy_type_str(curproxy), curproxy->id); - } if (curproxy->lbprm.algo & BE_LB_NEED_HTTP) { curproxy->lbprm.algo &= ~BE_LB_ALGO; curproxy->lbprm.algo |= BE_LB_ALGO_RR; ha_warning("Layer 7 hash not possible for %s '%s' (needs 'mode http'). Falling back to round robin.\n", proxy_type_str(curproxy), curproxy->id); } - if (curproxy->to_log & (LW_REQ | LW_RESP)) { - curproxy->to_log &= ~(LW_REQ | LW_RESP); - ha_warning("parsing [%s:%d] : HTTP log/header format not usable with %s '%s' (needs 'mode http').\n", - curproxy->conf.lfs_file, curproxy->conf.lfs_line, - proxy_type_str(curproxy), curproxy->id); - } - if (curproxy->conf.logformat_string == default_http_log_format || - curproxy->conf.logformat_string == clf_http_log_format) { + if (curproxy->logformat.str == default_http_log_format || + curproxy->logformat.str == clf_http_log_format) { /* Note: we don't change the directive's file:line number */ - curproxy->conf.logformat_string = default_tcp_log_format; + curproxy->logformat.str = default_tcp_log_format; ha_warning("parsing [%s:%d] : 'option httplog' not usable with %s '%s' (needs 'mode http'). Falling back to 'option tcplog'.\n", - curproxy->conf.lfs_file, curproxy->conf.lfs_line, + curproxy->logformat.conf.file, curproxy->logformat.conf.line, proxy_type_str(curproxy), curproxy->id); } - else if (curproxy->conf.logformat_string == default_https_log_format) { + else if (curproxy->logformat.str == default_https_log_format) { /* Note: we don't change the directive's file:line number */ - curproxy->conf.logformat_string = default_tcp_log_format; + curproxy->logformat.str = default_tcp_log_format; ha_warning("parsing [%s:%d] : 'option httpslog' not usable with %s '%s' (needs 'mode http'). Falling back to 'option tcplog'.\n", - curproxy->conf.lfs_file, curproxy->conf.lfs_line, + curproxy->logformat.conf.file, curproxy->logformat.conf.line, proxy_type_str(curproxy), curproxy->id); } @@ -1410,14 +1357,11 @@ void init_new_proxy(struct proxy *p) LIST_INIT(&p->tcp_req.l5_rules); MT_LIST_INIT(&p->listener_queue); LIST_INIT(&p->loggers); - LIST_INIT(&p->logformat); - LIST_INIT(&p->logformat_sd); - LIST_INIT(&p->format_unique_id); - LIST_INIT(&p->logformat_error); LIST_INIT(&p->conf.bind); LIST_INIT(&p->conf.listeners); LIST_INIT(&p->conf.errors); LIST_INIT(&p->conf.args.list); + LIST_INIT(&p->conf.lf_checks); LIST_INIT(&p->filter_configs); LIST_INIT(&p->tcpcheck_rules.preset_vars); @@ -1436,6 +1380,8 @@ void init_new_proxy(struct proxy *p) /* Default to only allow L4 retries */ p->retry_type = PR_RE_CONN_FAILED; + guid_init(&p->guid); + p->extra_counters_fe = NULL; p->extra_counters_be = NULL; @@ -1460,29 +1406,12 @@ void proxy_preset_defaults(struct proxy *defproxy) defproxy->options2 |= PR_O2_INDEPSTR; defproxy->max_out_conns = MAX_SRV_LIST; - defproxy->defsrv.check.inter = DEF_CHKINTR; - defproxy->defsrv.check.fastinter = 0; - defproxy->defsrv.check.downinter = 0; - defproxy->defsrv.agent.inter = DEF_CHKINTR; - defproxy->defsrv.agent.fastinter = 0; - defproxy->defsrv.agent.downinter = 0; - defproxy->defsrv.check.rise = DEF_RISETIME; - defproxy->defsrv.check.fall = DEF_FALLTIME; - defproxy->defsrv.agent.rise = DEF_AGENT_RISETIME; - defproxy->defsrv.agent.fall = DEF_AGENT_FALLTIME; - defproxy->defsrv.check.port = 0; - defproxy->defsrv.agent.port = 0; - defproxy->defsrv.maxqueue = 0; - defproxy->defsrv.minconn = 0; - defproxy->defsrv.maxconn = 0; - defproxy->defsrv.max_reuse = -1; - defproxy->defsrv.max_idle_conns = -1; - defproxy->defsrv.pool_purge_delay = 5000; - defproxy->defsrv.slowstart = 0; - defproxy->defsrv.onerror = DEF_HANA_ONERR; - defproxy->defsrv.consecutive_errors_limit = DEF_HANA_ERRLIMIT; - defproxy->defsrv.uweight = defproxy->defsrv.iweight = 1; - LIST_INIT(&defproxy->defsrv.pp_tlvs); + srv_settings_init(&defproxy->defsrv); + + lf_expr_init(&defproxy->logformat); + lf_expr_init(&defproxy->logformat_sd); + lf_expr_init(&defproxy->format_unique_id); + lf_expr_init(&defproxy->logformat_error); defproxy->email_alert.level = LOG_ALERT; defproxy->load_server_state_from_file = PR_SRV_STATE_FILE_UNSPEC; @@ -1554,27 +1483,16 @@ void proxy_free_defaults(struct proxy *defproxy) h = h_next; } - if (defproxy->conf.logformat_string != default_http_log_format && - defproxy->conf.logformat_string != default_tcp_log_format && - defproxy->conf.logformat_string != clf_http_log_format && - defproxy->conf.logformat_string != default_https_log_format) { - ha_free(&defproxy->conf.logformat_string); - } - - if (defproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format) - ha_free(&defproxy->conf.logformat_sd_string); + lf_expr_deinit(&defproxy->logformat); + lf_expr_deinit(&defproxy->logformat_sd); + lf_expr_deinit(&defproxy->logformat_error); + lf_expr_deinit(&defproxy->format_unique_id); list_for_each_entry_safe(log, logb, &defproxy->loggers, list) { LIST_DEL_INIT(&log->list); free_logger(log); } - ha_free(&defproxy->conf.uniqueid_format_string); - ha_free(&defproxy->conf.error_logformat_string); - ha_free(&defproxy->conf.lfs_file); - ha_free(&defproxy->conf.lfsd_file); - ha_free(&defproxy->conf.uif_file); - ha_free(&defproxy->conf.elfs_file); chunk_destroy(&defproxy->log_tag); free_email_alert(defproxy); @@ -1652,6 +1570,7 @@ void proxy_unref_defaults(struct proxy *px) */ struct proxy *alloc_new_proxy(const char *name, unsigned int cap, char **errmsg) { + uint last_change; struct proxy *curproxy; if ((curproxy = calloc(1, sizeof(*curproxy))) == NULL) { @@ -1660,7 +1579,13 @@ struct proxy *alloc_new_proxy(const char *name, unsigned int cap, char **errmsg) } init_new_proxy(curproxy); - curproxy->last_change = ns_to_sec(now_ns); + + last_change = ns_to_sec(now_ns); + if (cap & PR_CAP_FE) + curproxy->fe_counters.last_change = last_change; + if (cap & PR_CAP_BE) + curproxy->be_counters.last_change = last_change; + curproxy->id = strdup(name); curproxy->cap = cap; @@ -1815,39 +1740,9 @@ static int proxy_defproxy_cpy(struct proxy *curproxy, const struct proxy *defpro if (defproxy->defbe.name) curproxy->defbe.name = strdup(defproxy->defbe.name); - /* get either a pointer to the logformat string or a copy of it */ - curproxy->conf.logformat_string = defproxy->conf.logformat_string; - if (curproxy->conf.logformat_string && - curproxy->conf.logformat_string != default_http_log_format && - curproxy->conf.logformat_string != default_tcp_log_format && - curproxy->conf.logformat_string != clf_http_log_format && - curproxy->conf.logformat_string != default_https_log_format) - curproxy->conf.logformat_string = strdup(curproxy->conf.logformat_string); - - if (defproxy->conf.lfs_file) { - curproxy->conf.lfs_file = strdup(defproxy->conf.lfs_file); - curproxy->conf.lfs_line = defproxy->conf.lfs_line; - } - - /* get either a pointer to the logformat string for RFC5424 structured-data or a copy of it */ - curproxy->conf.logformat_sd_string = defproxy->conf.logformat_sd_string; - if (curproxy->conf.logformat_sd_string && - curproxy->conf.logformat_sd_string != default_rfc5424_sd_log_format) - curproxy->conf.logformat_sd_string = strdup(curproxy->conf.logformat_sd_string); - - if (defproxy->conf.lfsd_file) { - curproxy->conf.lfsd_file = strdup(defproxy->conf.lfsd_file); - curproxy->conf.lfsd_line = defproxy->conf.lfsd_line; - } - - curproxy->conf.error_logformat_string = defproxy->conf.error_logformat_string; - if (curproxy->conf.error_logformat_string) - curproxy->conf.error_logformat_string = strdup(curproxy->conf.error_logformat_string); - - if (defproxy->conf.elfs_file) { - curproxy->conf.elfs_file = strdup(defproxy->conf.elfs_file); - curproxy->conf.elfs_line = defproxy->conf.elfs_line; - } + lf_expr_dup(&defproxy->logformat, &curproxy->logformat); + lf_expr_dup(&defproxy->logformat_sd, &curproxy->logformat_sd); + lf_expr_dup(&defproxy->logformat_error, &curproxy->logformat_error); } if (curproxy->cap & PR_CAP_BE) { @@ -1877,17 +1772,10 @@ static int proxy_defproxy_cpy(struct proxy *curproxy, const struct proxy *defpro LIST_APPEND(&curproxy->loggers, &node->list); } - curproxy->conf.uniqueid_format_string = defproxy->conf.uniqueid_format_string; - if (curproxy->conf.uniqueid_format_string) - curproxy->conf.uniqueid_format_string = strdup(curproxy->conf.uniqueid_format_string); + lf_expr_dup(&defproxy->format_unique_id, &curproxy->format_unique_id); chunk_dup(&curproxy->log_tag, &defproxy->log_tag); - if (defproxy->conf.uif_file) { - curproxy->conf.uif_file = strdup(defproxy->conf.uif_file); - curproxy->conf.uif_line = defproxy->conf.uif_line; - } - /* copy default header unique id */ if (isttest(defproxy->header_unique_id)) { const struct ist copy = istdup(defproxy->header_unique_id); @@ -2008,11 +1896,11 @@ void proxy_cond_disable(struct proxy *p) */ if ((p->mode == PR_MODE_TCP || p->mode == PR_MODE_HTTP || p->mode == PR_MODE_SYSLOG) && !(p->cap & PR_CAP_INT)) ha_warning("Proxy %s stopped (cumulated conns: FE: %lld, BE: %lld).\n", - p->id, p->fe_counters.cum_conn, p->be_counters.cum_conn); + p->id, p->fe_counters.cum_conn, p->be_counters.cum_sess); if ((p->mode == PR_MODE_TCP || p->mode == PR_MODE_HTTP) && !(p->cap & PR_CAP_INT)) send_log(p, LOG_WARNING, "Proxy %s stopped (cumulated conns: FE: %lld, BE: %lld).\n", - p->id, p->fe_counters.cum_conn, p->be_counters.cum_conn); + p->id, p->fe_counters.cum_conn, p->be_counters.cum_sess); if (p->table && p->table->size && p->table->sync_task) task_wakeup(p->table->sync_task, TASK_WOKEN_MSG); @@ -2099,7 +1987,7 @@ struct task *manage_proxy(struct task *t, void *context, unsigned int state) goto out; if (p->fe_sps_lim && - (wait = next_event_delay(&p->fe_sess_per_sec, p->fe_sps_lim, 0))) { + (wait = next_event_delay(&p->fe_counters.sess_per_sec, p->fe_sps_lim, 0))) { /* we're blocking because a limit was reached on the number of * requests/s on the frontend. We want to re-check ASAP, which * means in 1 ms before estimated expiration date, because the @@ -2722,6 +2610,7 @@ static struct cfg_kw_list cfg_kws = {ILH, { { CFG_LISTEN, "clitcpka-intvl", proxy_parse_tcpka_intvl }, { CFG_LISTEN, "srvtcpka-intvl", proxy_parse_tcpka_intvl }, #endif + { CFG_LISTEN, "guid", proxy_parse_guid }, { 0, NULL, NULL }, }}; @@ -2819,9 +2708,8 @@ static void dump_server_addr(const struct sockaddr_storage *addr, char *addr_str * ->px, the proxy's id ->only_pxid, the server's pointer from ->sv, and the * choice of what to dump from ->show_conn. */ -static int dump_servers_state(struct stconn *sc) +static int dump_servers_state(struct appctx *appctx) { - struct appctx *appctx = __sc_appctx(sc); struct show_srv_ctx *ctx = appctx->svcctx; struct proxy *px = ctx->px; struct server *srv; @@ -2842,7 +2730,7 @@ static int dump_servers_state(struct stconn *sc) dump_server_addr(&srv->check.addr, srv_check_addr); dump_server_addr(&srv->agent.addr, srv_agent_addr); - srv_time_since_last_change = ns_to_sec(now_ns) - srv->last_change; + srv_time_since_last_change = ns_to_sec(now_ns) - srv->counters.last_change; bk_f_forced_id = px->options & PR_O_FORCED_ID ? 1 : 0; srv_f_forced_id = srv->flags & SRV_F_FORCED_ID ? 1 : 0; @@ -2904,7 +2792,6 @@ static int dump_servers_state(struct stconn *sc) static int cli_io_handler_servers_state(struct appctx *appctx) { struct show_srv_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); struct proxy *curproxy; if (ctx->state == SHOW_SRV_HEAD) { @@ -2928,7 +2815,7 @@ static int cli_io_handler_servers_state(struct appctx *appctx) curproxy = ctx->px; /* servers are only in backends */ if ((curproxy->cap & PR_CAP_BE) && !(curproxy->cap & PR_CAP_INT)) { - if (!dump_servers_state(sc)) + if (!dump_servers_state(appctx)) return 0; } /* only the selected proxy is dumped */ @@ -3263,13 +3150,8 @@ static int cli_parse_show_errors(char **args, char *payload, struct appctx *appc static int cli_io_handler_show_errors(struct appctx *appctx) { struct show_errors_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); extern const char *monthname[12]; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - chunk_reset(&trash); if (!ctx->px) { @@ -3399,7 +3281,7 @@ static int cli_io_handler_show_errors(struct appctx *appctx) newline = ctx->bol; newptr = dump_text_line(&trash, es->buf, global.tune.bufsize, es->buf_len, &newline, ctx->ptr); if (newptr == ctx->ptr) { - sc_need_room(sc, 0); + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); goto cant_send_unlock; } diff --git a/src/qmux_http.c b/src/qmux_http.c index edf26b1..092eb15 100644 --- a/src/qmux_http.c +++ b/src/qmux_http.c @@ -78,6 +78,15 @@ size_t qcs_http_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count, TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); htx = htxbuf(buf); + + /* Extra care required for HTTP/1 responses without Content-Length nor + * chunked encoding. In this case, shutw callback will be use to signal + * the end of the message. QC_SF_UNKNOWN_PL_LENGTH is set to prevent a + * RESET_STREAM emission in this case. + */ + if (htx->extra && htx->extra == HTX_UNKOWN_PAYLOAD_LENGTH) + qcs->flags |= QC_SF_UNKNOWN_PL_LENGTH; + eom = (htx->flags & HTX_FL_EOM); ret = qcs->qcc->app_ops->snd_buf(qcs, buf, count); *fin = (eom && !b_data(buf)); @@ -86,23 +95,3 @@ size_t qcs_http_snd_buf(struct qcs *qcs, struct buffer *buf, size_t count, return ret; } - -/* QUIC MUX snd_buf reset. HTX data stored in <buf> of length <count> will be - * cleared. This can be used when data should not be transmitted any longer. - * - * Return the size in bytes of cleared data. - */ -size_t qcs_http_reset_buf(struct qcs *qcs, struct buffer *buf, size_t count) -{ - struct htx *htx; - - TRACE_ENTER(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); - - htx = htx_from_buf(buf); - htx_reset(htx); - htx_to_buf(htx, buf); - - TRACE_LEAVE(QMUX_EV_STRM_SEND, qcs->qcc->conn, qcs); - - return count; -} diff --git a/src/qmux_trace.c b/src/qmux_trace.c index b213ed4..254ebb0 100644 --- a/src/qmux_trace.c +++ b/src/qmux_trace.c @@ -76,15 +76,15 @@ static void qmux_trace(enum trace_level level, uint64_t mask, if (qcc->conn->handle.qc) chunk_appendf(&trace_buf, " qc=%p", qcc->conn->handle.qc); - chunk_appendf(&trace_buf, " md=%llu/%llu/%llu", - (ullong)qcc->rfctl.md, (ullong)qcc->tx.offsets, (ullong)qcc->tx.sent_offsets); + chunk_appendf(&trace_buf, " md=%llu/%llu", + (ullong)qcc->tx.fc.limit, (ullong)qcc->tx.fc.off_real); if (qcs) { chunk_appendf(&trace_buf, " qcs=%p .id=%llu .st=%s", qcs, (ullong)qcs->id, qcs_st_to_str(qcs->st)); - chunk_appendf(&trace_buf, " msd=%llu/%llu/%llu", - (ullong)qcs->tx.msd, (ullong)qcs->tx.offset, (ullong)qcs->tx.sent_offset); + chunk_appendf(&trace_buf, " msd=%llu/%llu", + (ullong)qcs->tx.fc.limit, (ullong)qcs->tx.fc.off_real); } if (mask & QMUX_EV_QCC_NQCS) { diff --git a/src/qpack-dec.c b/src/qpack-dec.c index 7a8726f..5798b08 100644 --- a/src/qpack-dec.c +++ b/src/qpack-dec.c @@ -111,7 +111,7 @@ int qpack_decode_enc(struct buffer *buf, int fin, void *ctx) * connection error of type H3_CLOSED_CRITICAL_STREAM. */ if (fin) { - qcc_set_error(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1); + qcc_set_error(qcs->qcc, H3_ERR_CLOSED_CRITICAL_STREAM, 1); return -1; } @@ -144,7 +144,7 @@ int qpack_decode_enc(struct buffer *buf, int fin, void *ctx) * QPACK_ENCODER_STREAM_ERROR. */ if (capacity) { - qcc_set_error(qcs->qcc, QPACK_ENCODER_STREAM_ERROR, 1); + qcc_set_error(qcs->qcc, QPACK_ERR_ENCODER_STREAM_ERROR, 1); return -1; } @@ -171,7 +171,7 @@ int qpack_decode_dec(struct buffer *buf, int fin, void *ctx) * connection error of type H3_CLOSED_CRITICAL_STREAM. */ if (fin) { - qcc_set_error(qcs->qcc, H3_CLOSED_CRITICAL_STREAM, 1); + qcc_set_error(qcs->qcc, H3_ERR_CLOSED_CRITICAL_STREAM, 1); return -1; } @@ -196,7 +196,7 @@ int qpack_decode_dec(struct buffer *buf, int fin, void *ctx) */ /* For the moment haproxy does not emit dynamic table insertion. */ - qcc_set_error(qcs->qcc, QPACK_DECODER_STREAM_ERROR, 1); + qcc_set_error(qcs->qcc, QPACK_ERR_DECODER_STREAM_ERROR, 1); return -1; } else if (inst & QPACK_DEC_INST_SACK) { @@ -218,12 +218,12 @@ static int qpack_decode_fs_pfx(uint64_t *enc_ric, uint64_t *db, int *sign_bit, { *enc_ric = qpack_get_varint(raw, len, 8); if (*len == (uint64_t)-1) - return -QPACK_ERR_RIC; + return -QPACK_RET_RIC; *sign_bit = **raw & 0x8; *db = qpack_get_varint(raw, len, 7); if (*len == (uint64_t)-1) - return -QPACK_ERR_DB; + return -QPACK_RET_DB; return 0; } @@ -234,7 +234,7 @@ static int qpack_decode_fs_pfx(uint64_t *enc_ric, uint64_t *db, int *sign_bit, * the end of the list with empty strings as name/value. * * Returns the number of headers inserted into list excluding the end marker. - * In case of error, a negative code QPACK_ERR_* is returned. + * In case of error, a negative code QPACK_RET_* is returned. */ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, struct http_hdr *list, int list_size) @@ -262,7 +262,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, while (len) { if (hdr_idx >= list_size) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TOO_LARGE; + ret = -QPACK_RET_TOO_LARGE; goto out; } @@ -283,7 +283,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, index = qpack_get_varint(&raw, &len, 3); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -292,7 +292,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, length = qpack_get_varint(&raw, &len, 7); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -300,7 +300,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, if (len < length) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -316,7 +316,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, * Count (Section 4.5.1), it MUST treat this as a connection error of * type QPACK_DECOMPRESSION_FAILED. */ - return -QPACK_DECOMPRESSION_FAILED; + return -QPACK_RET_DECOMP; } else if (efl_type == QPACK_IFL_WPBI) { /* Indexed field line with post-base index @@ -329,7 +329,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, index = qpack_get_varint(&raw, &len, 4); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -344,7 +344,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, * Count (Section 4.5.1), it MUST treat this as a connection error of * type QPACK_DECOMPRESSION_FAILED. */ - return -QPACK_DECOMPRESSION_FAILED; + return -QPACK_RET_DECOMP; } else if (efl_type & QPACK_IFL_BIT) { /* Indexed field line */ @@ -356,7 +356,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, index = qpack_get_varint(&raw, &len, 6); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -375,7 +375,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, * * TODO adjust this when dynamic table support is implemented. */ - return -QPACK_DECOMPRESSION_FAILED; + return -QPACK_RET_DECOMP; } qpack_debug_printf(stderr, " t=%d index=%llu", !!static_tbl, (unsigned long long)index); @@ -391,7 +391,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, index = qpack_get_varint(&raw, &len, 4); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -409,7 +409,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, * * TODO adjust this when dynamic table support is implemented. */ - return -QPACK_DECOMPRESSION_FAILED; + return -QPACK_RET_DECOMP; } qpack_debug_printf(stderr, " n=%d t=%d index=%llu", !!n, !!static_tbl, (unsigned long long)index); @@ -417,7 +417,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, length = qpack_get_varint(&raw, &len, 7); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -429,13 +429,13 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, trash = chunk_newstr(tmp); if (!trash) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_DECOMPRESSION_FAILED; + ret = -QPACK_RET_TOO_LARGE; goto out; } nlen = huff_dec(raw, length, trash, tmp->size - tmp->data); if (nlen == (uint32_t)-1) { qpack_debug_printf(stderr, " can't decode huffman.\n"); - ret = -QPACK_ERR_HUFFMAN; + ret = -QPACK_RET_HUFFMAN; goto out; } @@ -450,7 +450,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, if (len < length) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -468,7 +468,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, name_len = qpack_get_varint(&raw, &len, 3); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -477,7 +477,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, if (len < name_len) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -488,13 +488,13 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, trash = chunk_newstr(tmp); if (!trash) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_DECOMPRESSION_FAILED; + ret = -QPACK_RET_TOO_LARGE; goto out; } nlen = huff_dec(raw, name_len, trash, tmp->size - tmp->data); if (nlen == (uint32_t)-1) { qpack_debug_printf(stderr, " can't decode huffman.\n"); - ret = -QPACK_ERR_HUFFMAN; + ret = -QPACK_RET_HUFFMAN; goto out; } @@ -514,7 +514,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, value_len = qpack_get_varint(&raw, &len, 7); if (len == (uint64_t)-1) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -522,7 +522,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, if (len < value_len) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TRUNCATED; + ret = -QPACK_RET_TRUNCATED; goto out; } @@ -533,13 +533,13 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, trash = chunk_newstr(tmp); if (!trash) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_DECOMPRESSION_FAILED; + ret = -QPACK_RET_TOO_LARGE; goto out; } nlen = huff_dec(raw, value_len, trash, tmp->size - tmp->data); if (nlen == (uint32_t)-1) { qpack_debug_printf(stderr, " can't decode huffman.\n"); - ret = -QPACK_ERR_HUFFMAN; + ret = -QPACK_RET_HUFFMAN; goto out; } @@ -561,7 +561,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, */ if (!name.len) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_DECOMPRESSION_FAILED; + ret = -QPACK_RET_DECOMP; goto out; } @@ -574,7 +574,7 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, if (hdr_idx >= list_size) { qpack_debug_printf(stderr, "##ERR@%d\n", __LINE__); - ret = -QPACK_ERR_TOO_LARGE; + ret = -QPACK_RET_TOO_LARGE; goto out; } @@ -586,3 +586,11 @@ int qpack_decode_fs(const unsigned char *raw, uint64_t len, struct buffer *tmp, qpack_debug_printf(stderr, "-- done: ret=%d\n", ret); return ret; } + +/* Convert return value from qpack_decode_fs() to a standard error code usable + * in CONNECTION_CLOSE or -1 for an internal error. + */ +int qpack_err_decode(const int value) +{ + return (value == -QPACK_RET_DECOMP) ? QPACK_ERR_DECOMPRESSION_FAILED : -1; +} diff --git a/src/queue.c b/src/queue.c index f20285b..e55bb58 100644 --- a/src/queue.c +++ b/src/queue.c @@ -114,10 +114,10 @@ unsigned int srv_dynamic_maxconn(const struct server *s) s->proxy->beconn * s->maxconn / s->proxy->fullconn); if ((s->cur_state == SRV_ST_STARTING) && - ns_to_sec(now_ns) < s->last_change + s->slowstart && - ns_to_sec(now_ns) >= s->last_change) { + ns_to_sec(now_ns) < s->counters.last_change + s->slowstart && + ns_to_sec(now_ns) >= s->counters.last_change) { unsigned int ratio; - ratio = 100 * (ns_to_sec(now_ns) - s->last_change) / s->slowstart; + ratio = 100 * (ns_to_sec(now_ns) - s->counters.last_change) / s->slowstart; max = MAX(1, max * ratio / 100); } return max; diff --git a/src/quic_cc_cubic.c b/src/quic_cc_cubic.c index 76a62ac..4bd1a7c 100644 --- a/src/quic_cc_cubic.c +++ b/src/quic_cc_cubic.c @@ -1,4 +1,6 @@ +#include <haproxy/global-t.h> #include <haproxy/quic_cc.h> +#include <haproxy/quic_cc_hystart.h> #include <haproxy/quic_trace.h> #include <haproxy/ticks.h> #include <haproxy/trace.h> @@ -79,6 +81,8 @@ struct cubic { * in recovery period) (in ms). */ uint32_t recovery_start_time; + /* HyStart++ state. */ + struct quic_hystart hystart; }; static void quic_cc_cubic_reset(struct quic_cc *cc) @@ -96,6 +100,8 @@ static void quic_cc_cubic_reset(struct quic_cc *cc) c->last_w_max = 0; c->W_est = 0; c->recovery_start_time = 0; + if (global.tune.options & GTUNE_QUIC_CC_HYSTART) + quic_cc_hystart_reset(&c->hystart); TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); } @@ -242,7 +248,7 @@ static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked) * Note that K is stored in milliseconds. */ c->K = cubic_root(((c->last_w_max - path->cwnd) << CUBIC_SCALE_FACTOR_SHIFT) / (CUBIC_C_SCALED * path->mtu)); - /* Convert to miliseconds. */ + /* Convert to milliseconds. */ c->K *= 1000; c->W_target = c->last_w_max; } @@ -424,7 +430,25 @@ static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev) TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev); switch (ev->type) { case QUIC_CC_EVT_ACK: - if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) { + if (global.tune.options & GTUNE_QUIC_CC_HYSTART) { + struct quic_hystart *h = &c->hystart; + unsigned int acked = QUIC_MIN(ev->ack.acked, (uint64_t)HYSTART_LIMIT * path->mtu); + + if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked) + goto out; + + path->cwnd += acked; + path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt); + if (ev->ack.pn >= h->wnd_end) + h->wnd_end = UINT64_MAX; + if (quic_cc_hystart_may_enter_cs(&c->hystart)) { + /* Exit slow start and enter conservative slow start */ + c->state = QUIC_CC_ST_CS; + goto out; + } + } + else if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) { path->cwnd += ev->ack.acked; path->cwnd = QUIC_MIN(path->max_cwnd, path->cwnd); } @@ -470,6 +494,69 @@ static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev) TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); } +/* Conservative slow start callback. */ +static void quic_cc_cubic_cs_cb(struct quic_cc *cc, struct quic_cc_event *ev) +{ + struct quic_cc_path *path = container_of(cc, struct quic_cc_path, cc); + + TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); + TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev); + + switch (ev->type) { + case QUIC_CC_EVT_ACK: + { + struct cubic *c = quic_cc_priv(cc); + struct quic_hystart *h = &c->hystart; + unsigned int acked = + QUIC_MIN(ev->ack.acked, (uint64_t)HYSTART_LIMIT * path->mtu) / HYSTART_CSS_GROWTH_DIVISOR; + + if (path->cwnd >= QUIC_CC_INFINITE_SSTHESH - acked) + goto out; + + path->cwnd += acked; + path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); + quic_cc_hystart_track_min_rtt(cc, h, path->loss.latest_rtt); + if (quic_cc_hystart_may_reenter_ss(h)) { + /* Exit to slow start */ + c->state = QUIC_CC_ST_SS; + goto out; + } + + if (h->css_rnd_count >= HYSTART_CSS_ROUNDS) { + /* Exit to congestion avoidance + * + * RFC 9438 4.10. Slow start + * + * When CUBIC uses HyStart++ [RFC9406], it may exit the first slow start + * without incurring any packet loss and thus _W_max_ is undefined. In + * this special case, CUBIC sets _cwnd_prior = cwnd_ and switches to + * congestion avoidance. It then increases its congestion window size + * using Figure 1, where _t_ is the elapsed time since the beginning of + * the current congestion avoidance stage, _K_ is set to 0, and _W_max_ + * is set to the congestion window size at the beginning of the current + * congestion avoidance stage. + */ + c->last_w_max = path->cwnd; + c->t_epoch = 0; + c->state = QUIC_CC_ST_CA; + } + + break; + } + + case QUIC_CC_EVT_LOSS: + quic_enter_recovery(cc); + break; + case QUIC_CC_EVT_ECN_CE: + /* TODO */ + break; + } + + out: + TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc); + TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); +} + /* Recovery period callback */ static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev) { @@ -507,6 +594,7 @@ static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev) static void (*quic_cc_cubic_state_cbs[])(struct quic_cc *cc, struct quic_cc_event *ev) = { [QUIC_CC_ST_SS] = quic_cc_cubic_ss_cb, + [QUIC_CC_ST_CS] = quic_cc_cubic_cs_cb, [QUIC_CC_ST_CA] = quic_cc_cubic_ca_cb, [QUIC_CC_ST_RP] = quic_cc_cubic_rp_cb, }; @@ -518,6 +606,17 @@ static void quic_cc_cubic_event(struct quic_cc *cc, struct quic_cc_event *ev) return quic_cc_cubic_state_cbs[c->state](cc, ev); } +static void quic_cc_cubic_hystart_start_round(struct quic_cc *cc, uint64_t pn) +{ + struct cubic *c = quic_cc_priv(cc); + struct quic_hystart *h = &c->hystart; + + if (c->state != QUIC_CC_ST_SS && c->state != QUIC_CC_ST_CS) + return; + + quic_cc_hystart_start_round(h, pn); +} + static void quic_cc_cubic_state_trace(struct buffer *buf, const struct quic_cc *cc) { struct quic_cc_path *path; @@ -538,5 +637,6 @@ struct quic_cc_algo quic_cc_algo_cubic = { .init = quic_cc_cubic_init, .event = quic_cc_cubic_event, .slow_start = quic_cc_cubic_slow_start, + .hystart_start_round = quic_cc_cubic_hystart_start_round, .state_trace = quic_cc_cubic_state_trace, }; diff --git a/src/quic_cc_newreno.c b/src/quic_cc_newreno.c index 405b0ba..ca29877 100644 --- a/src/quic_cc_newreno.c +++ b/src/quic_cc_newreno.c @@ -196,6 +196,10 @@ static void quic_cc_nr_state_trace(struct buffer *buf, const struct quic_cc *cc) (unsigned long long)path->loss.nb_lost_pkt); } +static void quic_cc_nr_hystart_start_round(struct quic_cc *cc, uint64_t pn) +{ +} + static void (*quic_cc_nr_state_cbs[])(struct quic_cc *cc, struct quic_cc_event *ev) = { [QUIC_CC_ST_SS] = quic_cc_nr_ss_cb, @@ -215,6 +219,7 @@ struct quic_cc_algo quic_cc_algo_nr = { .init = quic_cc_nr_init, .event = quic_cc_nr_event, .slow_start = quic_cc_nr_slow_start, + .hystart_start_round = quic_cc_nr_hystart_start_round, .state_trace = quic_cc_nr_state_trace, }; diff --git a/src/quic_cli.c b/src/quic_cli.c index f237a1f..f0e147c 100644 --- a/src/quic_cli.c +++ b/src/quic_cli.c @@ -3,18 +3,29 @@ #include <haproxy/applet-t.h> #include <haproxy/cli.h> #include <haproxy/list.h> -#include <haproxy/tools.h> +#include <haproxy/mux_quic.h> #include <haproxy/quic_conn-t.h> #include <haproxy/quic_tp.h> +#include <haproxy/tools.h> /* incremented by each "show quic". */ unsigned int qc_epoch = 0; enum quic_dump_format { + QUIC_DUMP_FMT_DEFAULT, /* value used if not explicitly specified. */ + QUIC_DUMP_FMT_ONELINE, - QUIC_DUMP_FMT_FULL, + QUIC_DUMP_FMT_CUST, }; +#define QUIC_DUMP_FLD_TP 0x0001 +#define QUIC_DUMP_FLD_SOCK 0x0002 +#define QUIC_DUMP_FLD_PKTNS 0x0004 +#define QUIC_DUMP_FLD_CC 0x0008 +#define QUIC_DUMP_FLD_MUX 0x0010 +/* Do not forget to update FLD_MASK when adding a new field. */ +#define QUIC_DUMP_FLD_MASK 0x001f + /* appctx context used by "show quic" command */ struct show_quic_ctx { unsigned int epoch; @@ -22,10 +33,24 @@ struct show_quic_ctx { unsigned int thr; int flags; enum quic_dump_format format; + void *ptr; + int fields; }; #define QC_CLI_FL_SHOW_ALL 0x1 /* show closing/draining connections */ +/* Returns the output format for show quic. If specified explicitly use it as + * set. Else format depends if filtering on a single connection instance. If + * true, full format is preferred else oneline. + */ +static enum quic_dump_format cli_show_quic_format(const struct show_quic_ctx *ctx) +{ + if (ctx->format == QUIC_DUMP_FMT_DEFAULT) + return ctx->ptr ? QUIC_DUMP_FMT_CUST : QUIC_DUMP_FMT_ONELINE; + else + return ctx->format; +} + static int cli_parse_show_quic(char **args, char *payload, struct appctx *appctx, void *private) { struct show_quic_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx)); @@ -37,20 +62,87 @@ static int cli_parse_show_quic(char **args, char *payload, struct appctx *appctx ctx->epoch = _HA_ATOMIC_FETCH_ADD(&qc_epoch, 1); ctx->thr = 0; ctx->flags = 0; - ctx->format = QUIC_DUMP_FMT_ONELINE; + ctx->format = QUIC_DUMP_FMT_DEFAULT; + ctx->ptr = 0; + ctx->fields = 0; if (strcmp(args[argc], "oneline") == 0) { - /* format already used as default value */ + ctx->format = QUIC_DUMP_FMT_ONELINE; ++argc; } else if (strcmp(args[argc], "full") == 0) { - ctx->format = QUIC_DUMP_FMT_FULL; + ctx->format = QUIC_DUMP_FMT_CUST; + ctx->fields = QUIC_DUMP_FLD_MASK; ++argc; } + else if (*args[argc]) { + struct ist istarg = ist(args[argc]); + struct ist field = istsplit(&istarg, ','); + + do { + if (isteq(field, ist("tp"))) { + ctx->fields |= QUIC_DUMP_FLD_TP; + } + else if (isteq(field, ist("sock"))) { + ctx->fields |= QUIC_DUMP_FLD_SOCK; + } + else if (isteq(field, ist("pktns"))) { + ctx->fields |= QUIC_DUMP_FLD_PKTNS; + } + else if (isteq(field, ist("cc"))) { + ctx->fields |= QUIC_DUMP_FLD_CC; + } + else if (isteq(field, ist("mux"))) { + ctx->fields |= QUIC_DUMP_FLD_MUX; + } + else { + /* Current argument is comma-separated so it is + * interpreted as a field list but an unknown + * field name has been specified. + */ + if (istarg.len || ctx->fields) { + cli_err(appctx, "Invalid field.\n"); + return 1; + } + + break; + } + + field = istsplit(&istarg, ','); + } while (field.len); + + /* At least one valid field specified, select the associated + * format. Else parse the current argument as a filter. + */ + if (ctx->fields) { + ctx->format = QUIC_DUMP_FMT_CUST; + ++argc; + } + } + + if (*args[argc]) { + struct ist istarg = ist(args[argc]); + + if (istmatchi(istarg, ist("0x"))) { + char *nptr; + ctx->ptr = (void *)strtol(args[argc], &nptr, 16); + if (*nptr) { + cli_err(appctx, "Invalid quic_conn pointer.\n"); + return 1; + } + + if (!ctx->fields) + ctx->fields = QUIC_DUMP_FLD_MASK; - while (*args[argc]) { - if (strcmp(args[argc], "all") == 0) + ++argc; + } + else if (istmatch(istarg, ist("all"))) { ctx->flags |= QC_CLI_FL_SHOW_ALL; + } + else { + cli_err(appctx, "Invalid argument.\n"); + return 1; + } ++argc; } @@ -115,10 +207,8 @@ static void dump_quic_oneline(struct show_quic_ctx *ctx, struct quic_conn *qc) static void dump_quic_full(struct show_quic_ctx *ctx, struct quic_conn *qc) { struct quic_pktns *pktns; - struct eb64_node *node; - struct qc_stream_desc *stream; char bufaddr[INET6_ADDRSTRLEN], bufport[6]; - int expire, i, addnl; + int expire, addnl; unsigned char cid_len; addnl = 0; @@ -137,12 +227,14 @@ static void dump_quic_full(struct show_quic_ctx *ctx, struct quic_conn *qc) chunk_appendf(&trash, "\n"); - chunk_appendf(&trash, " loc. TPs:"); - quic_transport_params_dump(&trash, qc, &qc->rx.params); - chunk_appendf(&trash, "\n"); - chunk_appendf(&trash, " rem. TPs:"); - quic_transport_params_dump(&trash, qc, &qc->tx.params); - chunk_appendf(&trash, "\n"); + if (ctx->fields & QUIC_DUMP_FLD_TP) { + chunk_appendf(&trash, " loc. TPs:"); + quic_transport_params_dump(&trash, qc, &qc->rx.params); + chunk_appendf(&trash, "\n"); + chunk_appendf(&trash, " rem. TPs:"); + quic_transport_params_dump(&trash, qc, &qc->tx.params); + chunk_appendf(&trash, "\n"); + } /* Connection state */ if (qc->flags & QUIC_FL_CONN_CLOSING) @@ -170,44 +262,50 @@ static void dump_quic_full(struct show_quic_ctx *ctx, struct quic_conn *qc) chunk_appendf(&trash, "\n"); /* Socket */ - chunk_appendf(&trash, " fd=%d", qc->fd); - if (qc->local_addr.ss_family == AF_INET || - qc->local_addr.ss_family == AF_INET6) { - addr_to_str(&qc->local_addr, bufaddr, sizeof(bufaddr)); - port_to_str(&qc->local_addr, bufport, sizeof(bufport)); - chunk_appendf(&trash, " local_addr=%s:%s", bufaddr, bufport); + if (ctx->fields & QUIC_DUMP_FLD_SOCK) { + chunk_appendf(&trash, " fd=%d", qc->fd); + if (qc->local_addr.ss_family == AF_INET || + qc->local_addr.ss_family == AF_INET6) { + addr_to_str(&qc->local_addr, bufaddr, sizeof(bufaddr)); + port_to_str(&qc->local_addr, bufport, sizeof(bufport)); + chunk_appendf(&trash, " local_addr=%s:%s", bufaddr, bufport); + + addr_to_str(&qc->peer_addr, bufaddr, sizeof(bufaddr)); + port_to_str(&qc->peer_addr, bufport, sizeof(bufport)); + chunk_appendf(&trash, " foreign_addr=%s:%s", bufaddr, bufport); + } - addr_to_str(&qc->peer_addr, bufaddr, sizeof(bufaddr)); - port_to_str(&qc->peer_addr, bufport, sizeof(bufport)); - chunk_appendf(&trash, " foreign_addr=%s:%s", bufaddr, bufport); + chunk_appendf(&trash, "\n"); } - chunk_appendf(&trash, "\n"); - /* Packet number spaces information */ - pktns = qc->ipktns; - if (pktns) { - chunk_appendf(&trash, " [initl] rx.ackrng=%-6zu tx.inflight=%-6zu", - pktns->rx.arngs.sz, pktns->tx.in_flight); - } + if (ctx->fields & QUIC_DUMP_FLD_PKTNS) { + pktns = qc->ipktns; + if (pktns) { + chunk_appendf(&trash, " [initl] rx.ackrng=%-6zu tx.inflight=%-6zu\n", + pktns->rx.arngs.sz, pktns->tx.in_flight); + } - pktns = qc->hpktns; - if (pktns) { - chunk_appendf(&trash, " [hndshk] rx.ackrng=%-6zu tx.inflight=%-6zu\n", - pktns->rx.arngs.sz, pktns->tx.in_flight); - } + pktns = qc->hpktns; + if (pktns) { + chunk_appendf(&trash, " [hndshk] rx.ackrng=%-6zu tx.inflight=%-6zu\n", + pktns->rx.arngs.sz, pktns->tx.in_flight); + } - pktns = qc->apktns; - if (pktns) { - chunk_appendf(&trash, " [01rtt] rx.ackrng=%-6zu tx.inflight=%-6zu\n", - pktns->rx.arngs.sz, pktns->tx.in_flight); + pktns = qc->apktns; + if (pktns) { + chunk_appendf(&trash, " [01rtt] rx.ackrng=%-6zu tx.inflight=%-6zu\n", + pktns->rx.arngs.sz, pktns->tx.in_flight); + } } - chunk_appendf(&trash, " srtt=%-4u rttvar=%-4u rttmin=%-4u ptoc=%-4u cwnd=%-6llu" - " mcwnd=%-6llu sentpkts=%-6llu lostpkts=%-6llu reorderedpkts=%-6llu\n", - qc->path->loss.srtt, qc->path->loss.rtt_var, - qc->path->loss.rtt_min, qc->path->loss.pto_count, (ullong)qc->path->cwnd, - (ullong)qc->path->mcwnd, (ullong)qc->cntrs.sent_pkt, (ullong)qc->path->loss.nb_lost_pkt, (ullong)qc->path->loss.nb_reordered_pkt); + if (ctx->fields & QUIC_DUMP_FLD_CC) { + chunk_appendf(&trash, " srtt=%-4u rttvar=%-4u rttmin=%-4u ptoc=%-4u cwnd=%-6llu" + " mcwnd=%-6llu sentpkts=%-6llu lostpkts=%-6llu reorderedpkts=%-6llu\n", + qc->path->loss.srtt, qc->path->loss.rtt_var, + qc->path->loss.rtt_min, qc->path->loss.pto_count, (ullong)qc->path->cwnd, + (ullong)qc->path->mcwnd, (ullong)qc->cntrs.sent_pkt, (ullong)qc->path->loss.nb_lost_pkt, (ullong)qc->path->loss.nb_reordered_pkt); + } if (qc->cntrs.dropped_pkt) { chunk_appendf(&trash, " droppkts=%-6llu", qc->cntrs.dropped_pkt); @@ -256,23 +354,8 @@ static void dump_quic_full(struct show_quic_ctx *ctx, struct quic_conn *qc) if (addnl) chunk_appendf(&trash, "\n"); - /* Streams */ - node = eb64_first(&qc->streams_by_id); - i = 0; - while (node) { - stream = eb64_entry(node, struct qc_stream_desc, by_id); - node = eb64_next(node); - - chunk_appendf(&trash, " | stream=%-8llu", (unsigned long long)stream->by_id.key); - chunk_appendf(&trash, " off=%-8llu ack=%-8llu", - (unsigned long long)stream->buf_offset, - (unsigned long long)stream->ack_offset); - - if (!(++i % 3)) { - chunk_appendf(&trash, "\n"); - i = 0; - } - } + if (ctx->fields & QUIC_DUMP_FLD_MUX && qc->mux_state == QC_MUX_READY) + qcc_show_quic(qc->qcc); chunk_appendf(&trash, "\n"); } @@ -280,7 +363,6 @@ static void dump_quic_full(struct show_quic_ctx *ctx, struct quic_conn *qc) static int cli_io_handler_dump_quic(struct appctx *appctx) { struct show_quic_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); struct quic_conn *qc; thread_isolate(); @@ -288,16 +370,6 @@ static int cli_io_handler_dump_quic(struct appctx *appctx) if (ctx->thr >= global.nbthread) goto done; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) { - /* If we're forced to shut down, we might have to remove our - * reference to the last stream being dumped. - */ - if (!LIST_ISEMPTY(&ctx->bref.users)) - LIST_DEL_INIT(&ctx->bref.users); - goto done; - } - chunk_reset(&trash); if (!LIST_ISEMPTY(&ctx->bref.users)) { @@ -309,7 +381,7 @@ static int cli_io_handler_dump_quic(struct appctx *appctx) ctx->bref.ref = ha_thread_ctx[ctx->thr].quic_conns.n; /* Print legend for oneline format. */ - if (ctx->format == QUIC_DUMP_FMT_ONELINE) { + if (cli_show_quic_format(ctx) == QUIC_DUMP_FMT_ONELINE) { chunk_appendf(&trash, "# conn/frontend state " "in_flight infl_p lost_p " "Local Address Foreign Address " @@ -322,11 +394,12 @@ static int cli_io_handler_dump_quic(struct appctx *appctx) int done = 0; if (ctx->bref.ref == &ha_thread_ctx[ctx->thr].quic_conns) { - /* If closing connections requested through "all", move - * to quic_conns_clo list after browsing quic_conns. - * Else move directly to the next quic_conns thread. + /* If closing connections requested through "all" or a + * specific connection is filtered, move to + * quic_conns_clo list after browsing quic_conns. Else + * move directly to the next quic_conns thread. */ - if (ctx->flags & QC_CLI_FL_SHOW_ALL) { + if (ctx->flags & QC_CLI_FL_SHOW_ALL || ctx->ptr) { ctx->bref.ref = ha_thread_ctx[ctx->thr].quic_conns_clo.n; continue; } @@ -344,6 +417,10 @@ static int cli_io_handler_dump_quic(struct appctx *appctx) qc = LIST_ELEM(ctx->bref.ref, struct quic_conn *, el_th_ctx); if ((int)(qc->qc_epoch - ctx->epoch) > 0) done = 1; + + /* Skip to next element if filter on a different connection. */ + if (ctx->ptr && ctx->ptr != qc) + done = 1; } if (done) { @@ -355,13 +432,17 @@ static int cli_io_handler_dump_quic(struct appctx *appctx) continue; } - switch (ctx->format) { - case QUIC_DUMP_FMT_FULL: + switch (cli_show_quic_format(ctx)) { + case QUIC_DUMP_FMT_CUST: dump_quic_full(ctx, qc); break; case QUIC_DUMP_FMT_ONELINE: dump_quic_oneline(ctx, qc); break; + + case QUIC_DUMP_FMT_DEFAULT: + /* An explicit format must be returned by cli_show_quic_format(). */ + ABORT_NOW(); } if (applet_putchk(appctx, &trash) == -1) { @@ -371,6 +452,10 @@ static int cli_io_handler_dump_quic(struct appctx *appctx) } ctx->bref.ref = qc->el_th_ctx.n; + + /* If filtered connection displayed, show quic can be stopped early. */ + if (ctx->ptr) + goto done; } done: @@ -395,7 +480,7 @@ static void cli_release_show_quic(struct appctx *appctx) } static struct cli_kw_list cli_kws = {{ }, { - { { "show", "quic", NULL }, "show quic [oneline|full] [all] : display quic connections status", cli_parse_show_quic, cli_io_handler_dump_quic, cli_release_show_quic }, + { { "show", "quic", NULL }, "show quic [<format>] [<filter>] : display quic connections status", cli_parse_show_quic, cli_io_handler_dump_quic, cli_release_show_quic }, {{},} }}; diff --git a/src/quic_conn.c b/src/quic_conn.c index 5233496..6cc1d38 100644 --- a/src/quic_conn.c +++ b/src/quic_conn.c @@ -355,7 +355,7 @@ int qc_h3_request_reject(struct quic_conn *qc, uint64_t id) int ret = 0; struct quic_frame *ss, *rs; struct quic_enc_level *qel = qc->ael; - const uint64_t app_error_code = H3_REQUEST_REJECTED; + const uint64_t app_error_code = H3_ERR_REQUEST_REJECTED; TRACE_ENTER(QUIC_EV_CONN_PRSHPKT, qc); @@ -544,10 +544,10 @@ int quic_build_post_handshake_frames(struct quic_conn *qc) goto leave; } - /* QUIC connection packet handler task (post handshake) */ struct task *quic_conn_app_io_cb(struct task *t, void *context, unsigned int state) { + struct list send_list = LIST_HEAD_INIT(send_list); struct quic_conn *qc = context; struct quic_enc_level *qel; @@ -592,9 +592,13 @@ struct task *quic_conn_app_io_cb(struct task *t, void *context, unsigned int sta goto out; } + if (!qel_need_sending(qel, qc)) + goto out; + /* XXX TODO: how to limit the list frames to send */ - if (!qc_send_app_pkts(qc, &qel->pktns->tx.frms)) { - TRACE_DEVEL("qc_send_app_pkts() failed", QUIC_EV_CONN_IO_CB, qc); + qel_register_send(&send_list, qel, &qel->pktns->tx.frms); + if (!qc_send(qc, 0, &send_list)) { + TRACE_DEVEL("qc_send() failed", QUIC_EV_CONN_IO_CB, qc); goto out; } @@ -741,9 +745,9 @@ static struct quic_conn_closed *qc_new_cc_conn(struct quic_conn *qc) /* QUIC connection packet handler task. */ struct task *quic_conn_io_cb(struct task *t, void *context, unsigned int state) { - int ret; struct quic_conn *qc = context; - struct buffer *buf = NULL; + struct list send_list = LIST_HEAD_INIT(send_list); + struct quic_enc_level *qel; int st; struct tasklet *tl = (struct tasklet *)t; @@ -753,8 +757,8 @@ struct task *quic_conn_io_cb(struct task *t, void *context, unsigned int state) TRACE_PROTO("connection state", QUIC_EV_CONN_IO_CB, qc, &st); if (HA_ATOMIC_LOAD(&tl->state) & TASK_HEAVY) { - HA_ATOMIC_AND(&tl->state, ~TASK_HEAVY); qc_ssl_provide_all_quic_data(qc, qc->xprt_ctx); + HA_ATOMIC_AND(&tl->state, ~TASK_HEAVY); } /* Retranmissions */ @@ -771,11 +775,6 @@ struct task *quic_conn_io_cb(struct task *t, void *context, unsigned int state) if (!qc_treat_rx_pkts(qc)) goto out; - if (HA_ATOMIC_LOAD(&tl->state) & TASK_HEAVY) { - tasklet_wakeup(tl); - goto out; - } - if (qc->flags & QUIC_FL_CONN_TO_KILL) { TRACE_DEVEL("connection to be killed", QUIC_EV_CONN_PHPKTS, qc); goto out; @@ -797,34 +796,21 @@ struct task *quic_conn_io_cb(struct task *t, void *context, unsigned int state) } } - buf = qc_get_txb(qc); - if (!buf) - goto out; - - if (b_data(buf) && !qc_purge_txbuf(qc, buf)) - goto out; - - /* Currently buf cannot be non-empty at this stage. Even if a previous - * sendto() has failed it is emptied to simulate packet emission and - * rely on QUIC lost detection to try to emit it. - */ - BUG_ON_HOT(b_data(buf)); - b_reset(buf); + /* Insert each QEL into sending list if needed. */ + list_for_each_entry(qel, &qc->qel_list, list) { + if (qel_need_sending(qel, qc)) + qel_register_send(&send_list, qel, &qel->pktns->tx.frms); + } - ret = qc_prep_hpkts(qc, buf, NULL); - if (ret == -1) { - qc_txb_release(qc); + /* Skip sending if no QEL with frames to sent. */ + if (LIST_ISEMPTY(&send_list)) goto out; - } - if (ret && !qc_send_ppkts(buf, qc->xprt_ctx)) { - if (qc->flags & QUIC_FL_CONN_TO_KILL) - qc_txb_release(qc); + if (!qc_send(qc, 0, &send_list)) { + TRACE_DEVEL("qc_send() failed", QUIC_EV_CONN_IO_CB, qc); goto out; } - qc_txb_release(qc); - out: /* Release the Handshake encryption level and packet number space if * the Handshake is confirmed and if there is no need to send @@ -1818,7 +1804,14 @@ int qc_set_tid_affinity(struct quic_conn *qc, uint new_tid, struct listener *new qc_detach_th_ctx_list(qc, 0); node = eb64_first(qc->cids); - BUG_ON(!node || eb64_next(node)); /* One and only one CID must be present before affinity rebind. */ + /* One and only one CID must be present before affinity rebind. + * + * This could be triggered fairly easily if tasklet is scheduled just + * before thread migration for post-handshake state to generate new + * CIDs. In this case, QUIC_FL_CONN_IO_TO_REQUEUE should be used + * instead of tasklet_wakeup(). + */ + BUG_ON(!node || eb64_next(node)); conn_id = eb64_entry(node, struct quic_connection_id, seq_num); /* At this point no connection was accounted for yet on this diff --git a/src/quic_fctl.c b/src/quic_fctl.c new file mode 100644 index 0000000..b797e55 --- /dev/null +++ b/src/quic_fctl.c @@ -0,0 +1,96 @@ +#include <haproxy/quic_fctl.h> + +#include <haproxy/api.h> + +void qfctl_init(struct quic_fctl *fctl, uint64_t limit) +{ + fctl->limit = limit; + fctl->off_real = 0; + fctl->off_soft = 0; +} + +/* Returns true if real limit is blocked for <fctl> flow control instance. + * This happens if it is equal than current max value. + */ +int qfctl_rblocked(const struct quic_fctl *fctl) +{ + /* Real limit must never be exceeded. */ + BUG_ON(fctl->off_real > fctl->limit); + return fctl->off_real == fctl->limit; +} + +/* Returns true if soft limit is blocked for <fctl> flow control instance. + * This happens if it is equal or greater than current max value. + */ +int qfctl_sblocked(const struct quic_fctl *fctl) +{ + return fctl->off_soft >= fctl->limit; +} + +/* Set a new <val> maximum value for <fctl> flow control instance. If current + * offset is already equal or more, the new value is ignored. Additionally, + * <unblocked_soft> and <unblocked_real> can be used as output parameters to + * detect if the current update result in one or both of these offsets to be + * unblocked. + * + * Returns true if max is incremented else false. + */ +int qfctl_set_max(struct quic_fctl *fctl, uint64_t val, + int *out_unblock_soft, int *out_unblock_real) +{ + int unblock_soft = 0, unblock_real = 0; + int ret = 0; + + if (fctl->limit < val) { + if (fctl->off_soft >= fctl->limit && fctl->off_soft < val) + unblock_soft = 1; + if (fctl->off_real == fctl->limit && fctl->off_real < val) + unblock_real = 1; + + fctl->limit = val; + ret = 1; + } + + if (out_unblock_soft) + *out_unblock_soft = unblock_soft; + if (out_unblock_real) + *out_unblock_real = unblock_real; + + return ret; +} + +/* Increment real offset of <fctl> flow control instance by <diff>. This cannot + * exceed <fctl> limit. + * + * Returns true if limit is reached after increment. + */ +int qfctl_rinc(struct quic_fctl *fctl, uint64_t diff) +{ + /* Real limit must never be exceeded. */ + BUG_ON(fctl->off_real + diff > fctl->limit); + fctl->off_real += diff; + + return fctl->off_real == fctl->limit; +} + +/* Increment soft offset of <fctl> flow control instance by <diff>. This cannot + * be done if <fctl> limit was already reached. + * + * Returns true if limit is reached after increment. + */ +int qfctl_sinc(struct quic_fctl *fctl, uint64_t diff) +{ + /* Soft limit must not be incremented if already in excess. */ + BUG_ON(qfctl_sblocked(fctl)); + fctl->off_soft += diff; + + return fctl->off_soft >= fctl->limit; +} + +/* Return the remaining offset before reaching <fctl> limit. */ +uint64_t qfctl_rcap(const struct quic_fctl *fctl) +{ + /* Real limit must never be exceeded. */ + BUG_ON(fctl->off_real > fctl->limit); + return fctl->limit - fctl->off_real; +} diff --git a/src/quic_retry.c b/src/quic_retry.c index 1c58e5e..f1d55b8 100644 --- a/src/quic_retry.c +++ b/src/quic_retry.c @@ -60,7 +60,7 @@ static int quic_generate_retry_token_aad(unsigned char *aad, unsigned char *p; p = aad; - *(uint32_t *)p = htonl(version); + write_u32(p, htonl(version)); p += sizeof version; p += quic_saddr_cpy(p, addr); memcpy(p, cid->data, cid->len); diff --git a/src/quic_rx.c b/src/quic_rx.c index 585c71a..d5b45d6 100644 --- a/src/quic_rx.c +++ b/src/quic_rx.c @@ -506,6 +506,7 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc, qc_treat_ack_of_ack(qc, &pkt->pktns->rx.arngs, pkt->largest_acked_pn); ev.ack.acked = pkt->in_flight_len; ev.ack.time_sent = pkt->time_sent; + ev.ack.pn = pkt->pn_node.key; quic_cc_event(&qc->path->cc, &ev); LIST_DEL_INIT(&pkt->list); quic_tx_packet_refdec(pkt); @@ -740,8 +741,11 @@ static int qc_handle_crypto_frm(struct quic_conn *qc, goto leave; } - if (ncb_data(ncbuf, 0)) + /* Reschedule with TASK_HEAVY if CRYPTO data ready for decoding. */ + if (ncb_data(ncbuf, 0)) { HA_ATOMIC_OR(&qc->wait_event.tasklet->state, TASK_HEAVY); + tasklet_wakeup(qc->wait_event.tasklet); + } done: ret = 1; @@ -860,6 +864,7 @@ static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt, case QUIC_FT_PING: break; case QUIC_FT_ACK: + case QUIC_FT_ACK_ECN: { unsigned int rtt_sample; rtt_sample = UINT_MAX; @@ -902,6 +907,9 @@ static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt, if (!qc_handle_crypto_frm(qc, &frm.crypto, pkt, qel, &fast_retrans)) goto leave; break; + case QUIC_FT_NEW_TOKEN: + /* TODO */ + break; case QUIC_FT_STREAM_8 ... QUIC_FT_STREAM_F: { struct qf_stream *strm_frm = &frm.stream; @@ -974,7 +982,7 @@ static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt, break; case QUIC_FT_RETIRE_CONNECTION_ID: { - struct quic_cid_tree *tree; + struct quic_cid_tree *tree __maybe_unused; struct quic_connection_id *conn_id = NULL; if (!qc_handle_retire_connection_id_frm(qc, &frm, &pkt->dcid, &conn_id)) @@ -1001,6 +1009,10 @@ static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt, } break; } + case QUIC_FT_PATH_CHALLENGE: + case QUIC_FT_PATH_RESPONSE: + /* TODO */ + break; case QUIC_FT_CONNECTION_CLOSE: case QUIC_FT_CONNECTION_CLOSE_APP: /* Increment the error counters */ @@ -1040,8 +1052,8 @@ static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt, qc->state = QUIC_HS_ST_CONFIRMED; break; default: - TRACE_ERROR("unknosw frame type", QUIC_EV_CONN_PRSHPKT, qc); - goto leave; + /* Unknown frame type must be rejected by qc_parse_frm(). */ + ABORT_NOW(); } } @@ -1144,50 +1156,6 @@ static void qc_rm_hp_pkts(struct quic_conn *qc, struct quic_enc_level *el) TRACE_LEAVE(QUIC_EV_CONN_ELRMHP, qc); } -/* Process all the CRYPTO frame at <el> encryption level. This is the - * responsibility of the called to ensure there exists a CRYPTO data - * stream for this level. - * Return 1 if succeeded, 0 if not. - */ -int qc_treat_rx_crypto_frms(struct quic_conn *qc, struct quic_enc_level *el, - struct ssl_sock_ctx *ctx) -{ - int ret = 0; - struct ncbuf *ncbuf; - struct quic_cstream *cstream = el->cstream; - ncb_sz_t data; - - TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc); - - BUG_ON(!cstream); - ncbuf = &cstream->rx.ncbuf; - if (ncb_is_null(ncbuf)) - goto done; - - /* TODO not working if buffer is wrapping */ - while ((data = ncb_data(ncbuf, 0))) { - const unsigned char *cdata = (const unsigned char *)ncb_head(ncbuf); - - if (!qc_ssl_provide_quic_data(&el->cstream->rx.ncbuf, el->level, - ctx, cdata, data)) - goto leave; - - cstream->rx.offset += data; - TRACE_DEVEL("buffered crypto data were provided to TLS stack", - QUIC_EV_CONN_PHPKTS, qc, el); - } - - done: - ret = 1; - leave: - if (!ncb_is_null(ncbuf) && ncb_is_empty(ncbuf)) { - TRACE_DEVEL("freeing crypto buf", QUIC_EV_CONN_PHPKTS, qc, el); - quic_free_ncbuf(ncbuf); - } - TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc); - return ret; -} - /* Check if it's possible to remove header protection for packets related to * encryption level <qel>. If <qel> is NULL, assume it's false. * @@ -1317,15 +1285,6 @@ int qc_treat_rx_pkts(struct quic_conn *qc) qel->pktns->flags |= QUIC_FL_PKTNS_NEW_LARGEST_PN; } - if (qel->cstream) { - struct ncbuf *ncbuf = &qel->cstream->rx.ncbuf; - - if (!ncb_is_null(ncbuf) && ncb_data(ncbuf, 0)) { - /* Some in order CRYPTO data were bufferized. */ - HA_ATOMIC_OR(&qc->wait_event.tasklet->state, TASK_HEAVY); - } - } - /* Release the Initial encryption level and packet number space. */ if ((qc->flags & QUIC_FL_CONN_IPKTNS_DCD) && qel == qc->iel) { qc_enc_level_free(qc, &qc->iel); @@ -1503,7 +1462,7 @@ static inline int quic_read_uint32(uint32_t *val, if (end - *buf < sizeof *val) return 0; - *val = ntohl(*(uint32_t *)*buf); + *val = ntohl(read_u32(*buf)); *buf += sizeof *val; return 1; @@ -1728,6 +1687,9 @@ static struct quic_conn *quic_rx_pkt_retrieve_conn(struct quic_rx_packet *pkt, } } else if (!qc) { + /* Stateless Reset sent even for Long header packets as haproxy + * emits stateless_reset_token in its TPs. + */ TRACE_PROTO("RX non Initial pkt without connection", QUIC_EV_CONN_LPKT, NULL, NULL, NULL, pkt->version); if (!send_stateless_reset(l, &dgram->saddr, pkt)) TRACE_ERROR("stateless reset not sent", QUIC_EV_CONN_LPKT, qc); diff --git a/src/quic_sock.c b/src/quic_sock.c index f796513..a3f3768 100644 --- a/src/quic_sock.c +++ b/src/quic_sock.c @@ -29,6 +29,7 @@ #include <haproxy/listener.h> #include <haproxy/log.h> #include <haproxy/pool.h> +#include <haproxy/protocol-t.h> #include <haproxy/proto_quic.h> #include <haproxy/proxy-t.h> #include <haproxy/quic_cid.h> @@ -337,8 +338,8 @@ static struct quic_dgram *quic_rxbuf_purge_dgrams(struct quic_receiver_buf *rbuf return prev; } -/* Receive data from datagram socket <fd>. Data are placed in <out> buffer of - * length <len>. +/* Receive a single message from datagram socket <fd>. Data are placed in <out> + * buffer of length <len>. * * Datagram addresses will be returned via the next arguments. <from> will be * the peer address and <to> the reception one. Note that <to> can only be @@ -393,6 +394,11 @@ static ssize_t quic_recv(int fd, void *out, size_t len, if (ret < 0) goto end; + if (unlikely(port_is_restricted((struct sockaddr_storage *)from, HA_PROTO_QUIC))) { + ret = -1; + goto end; + } + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { switch (cmsg->cmsg_level) { case IPPROTO_IP: @@ -566,6 +572,86 @@ void quic_conn_sock_fd_iocb(int fd) TRACE_LEAVE(QUIC_EV_CONN_RCV, qc); } +static void cmsg_set_saddr(struct msghdr *msg, struct cmsghdr **cmsg, + struct sockaddr_storage *saddr) +{ + struct cmsghdr *c; +#ifdef IP_PKTINFO + struct in_pktinfo *in; +#endif /* IP_PKTINFO */ +#ifdef IPV6_RECVPKTINFO + struct in6_pktinfo *in6; +#endif /* IPV6_RECVPKTINFO */ + size_t sz = 0; + + /* First determine size of ancillary data depending on the system support. */ + switch (saddr->ss_family) { + case AF_INET: +#if defined(IP_PKTINFO) + sz = sizeof(struct in_pktinfo); +#elif defined(IP_RECVDSTADDR) + sz = sizeof(struct in_addr); +#endif /* IP_PKTINFO || IP_RECVDSTADDR */ + break; + case AF_INET6: +#ifdef IPV6_RECVPKTINFO + sz = sizeof(struct in6_pktinfo); +#endif /* IPV6_RECVPKTINFO */ + break; + default: + break; + } + + /* Size is null if system does not support send source address setting. */ + if (!sz) + return; + + /* Set first msg_controllen to be able to use CMSG_* macros. */ + msg->msg_controllen += CMSG_SPACE(sz); + + *cmsg = !(*cmsg) ? CMSG_FIRSTHDR(msg) : CMSG_NXTHDR(msg, *cmsg); + ALREADY_CHECKED(*cmsg); + c = *cmsg; + c->cmsg_len = CMSG_LEN(sz); + + switch (saddr->ss_family) { + case AF_INET: + c->cmsg_level = IPPROTO_IP; +#if defined(IP_PKTINFO) + c->cmsg_type = IP_PKTINFO; + in = (struct in_pktinfo *)CMSG_DATA(c); + in->ipi_ifindex = 0; + in->ipi_addr.s_addr = 0; + memcpy(&in->ipi_spec_dst, + &((struct sockaddr_in *)saddr)->sin_addr, + sizeof(struct in_addr)); +#elif defined(IP_RECVDSTADDR) + c->cmsg_type = IP_SENDSRCADDR; + memcpy(CMSG_DATA(c), + &((struct sockaddr_in *)saddr)->sin_addr, + sizeof(struct in_addr)); +#endif /* IP_PKTINFO || IP_RECVDSTADDR */ + + break; + + case AF_INET6: +#ifdef IPV6_RECVPKTINFO + c->cmsg_level = IPPROTO_IPV6; + c->cmsg_type = IPV6_PKTINFO; + in6 = (struct in6_pktinfo *)CMSG_DATA(c); + in6->ipi6_ifindex = 0; + memcpy(&in6->ipi6_addr, + &((struct sockaddr_in6 *)saddr)->sin6_addr, + sizeof(struct in6_addr)); +#endif /* IPV6_RECVPKTINFO */ + + break; + + default: + break; + } +} + /* Send a datagram stored into <buf> buffer with <sz> as size. * The caller must ensure there is at least <sz> bytes in this buffer. * @@ -581,106 +667,58 @@ int qc_snd_buf(struct quic_conn *qc, const struct buffer *buf, size_t sz, int flags) { ssize_t ret; + struct msghdr msg; + struct iovec vec; + struct cmsghdr *cmsg __maybe_unused = NULL; - do { - if (qc_test_fd(qc)) { - if (!fd_send_ready(qc->fd)) - return 0; - - ret = send(qc->fd, b_peek(buf, b_head_ofs(buf)), sz, - MSG_DONTWAIT | MSG_NOSIGNAL); - } -#if defined(IP_PKTINFO) || defined(IP_RECVDSTADDR) || defined(IPV6_RECVPKTINFO) - else if (is_addr(&qc->local_addr)) { - struct msghdr msg = { 0 }; - struct iovec vec; - struct cmsghdr *cmsg; + union { #ifdef IP_PKTINFO - struct in_pktinfo in; + char buf[CMSG_SPACE(sizeof(struct in_pktinfo))]; #endif /* IP_PKTINFO */ #ifdef IPV6_RECVPKTINFO - struct in6_pktinfo in6; + char buf6[CMSG_SPACE(sizeof(struct in6_pktinfo))]; #endif /* IPV6_RECVPKTINFO */ - union { -#ifdef IP_PKTINFO - char buf[CMSG_SPACE(sizeof(in))]; -#endif /* IP_PKTINFO */ -#ifdef IPV6_RECVPKTINFO - char buf6[CMSG_SPACE(sizeof(in6))]; -#endif /* IPV6_RECVPKTINFO */ - char bufaddr[CMSG_SPACE(sizeof(struct in_addr))]; - struct cmsghdr align; - } u; - - vec.iov_base = b_peek(buf, b_head_ofs(buf)); - vec.iov_len = sz; - msg.msg_name = &qc->peer_addr; - msg.msg_namelen = get_addr_len(&qc->peer_addr); - msg.msg_iov = &vec; - msg.msg_iovlen = 1; - - switch (qc->local_addr.ss_family) { - case AF_INET: -#if defined(IP_PKTINFO) - memset(&in, 0, sizeof(in)); - memcpy(&in.ipi_spec_dst, - &((struct sockaddr_in *)&qc->local_addr)->sin_addr, - sizeof(struct in_addr)); - - msg.msg_control = u.buf; - msg.msg_controllen = sizeof(u.buf); - - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = IPPROTO_IP; - cmsg->cmsg_type = IP_PKTINFO; - cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo)); - memcpy(CMSG_DATA(cmsg), &in, sizeof(in)); -#elif defined(IP_RECVDSTADDR) - msg.msg_control = u.bufaddr; - msg.msg_controllen = sizeof(u.bufaddr); - - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = IPPROTO_IP; - cmsg->cmsg_type = IP_SENDSRCADDR; - cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_addr)); - memcpy(CMSG_DATA(cmsg), - &((struct sockaddr_in *)&qc->local_addr)->sin_addr, - sizeof(struct in_addr)); -#endif /* IP_PKTINFO || IP_RECVDSTADDR */ - break; + char bufaddr[CMSG_SPACE(sizeof(struct in_addr))]; + struct cmsghdr align; + } ancillary_data; - case AF_INET6: -#ifdef IPV6_RECVPKTINFO - memset(&in6, 0, sizeof(in6)); - memcpy(&in6.ipi6_addr, - &((struct sockaddr_in6 *)&qc->local_addr)->sin6_addr, - sizeof(struct in6_addr)); - - msg.msg_control = u.buf6; - msg.msg_controllen = sizeof(u.buf6); - - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = IPPROTO_IPV6; - cmsg->cmsg_type = IPV6_PKTINFO; - cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo)); - memcpy(CMSG_DATA(cmsg), &in6, sizeof(in6)); -#endif /* IPV6_RECVPKTINFO */ - break; + vec.iov_base = b_peek(buf, b_head_ofs(buf)); + vec.iov_len = sz; - default: - break; - } + /* man 2 sendmsg + * + * The msg_name field is used on an unconnected socket to specify the + * target address for a datagram. It points to a buffer containing the + * address; the msg_namelen field should be set to the size of the + * address. For a connected socket, these fields should be specified + * as NULL and 0, respectively. + */ + if (!qc_test_fd(qc)) { + msg.msg_name = &qc->peer_addr; + msg.msg_namelen = get_addr_len(&qc->peer_addr); + } + else { + msg.msg_name = NULL; + msg.msg_namelen = 0; + } - ret = sendmsg(qc->li->rx.fd, &msg, - MSG_DONTWAIT|MSG_NOSIGNAL); - } -#endif /* IP_PKTINFO || IP_RECVDSTADDR || IPV6_RECVPKTINFO */ - else { - ret = sendto(qc->li->rx.fd, b_peek(buf, b_head_ofs(buf)), sz, - MSG_DONTWAIT|MSG_NOSIGNAL, - (struct sockaddr *)&qc->peer_addr, - get_addr_len(&qc->peer_addr)); - } + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + if (qc_test_fd(qc) && !fd_send_ready(qc->fd)) + return 0; + + /* Set source address when using listener socket if possible. */ + if (!qc_test_fd(qc) && is_addr(&qc->local_addr)) { + msg.msg_control = ancillary_data.bufaddr; + cmsg_set_saddr(&msg, &cmsg, &qc->local_addr); + } + + do { + ret = sendmsg(qc_fd(qc), &msg, MSG_DONTWAIT|MSG_NOSIGNAL); } while (ret < 0 && errno == EINTR); if (ret < 0) { @@ -740,7 +778,7 @@ int qc_rcv_buf(struct quic_conn *qc) max_sz = params->max_udp_payload_size; do { - if (!b_alloc(&buf)) + if (!b_alloc(&buf, DB_MUX_RX)) break; /* TODO subscribe for memory again available. */ b_reset(&buf); @@ -967,18 +1005,15 @@ void qc_want_recv(struct quic_conn *qc) struct quic_accept_queue *quic_accept_queues; /* Install <qc> on the queue ready to be accepted. The queue task is then woken - * up. If <qc> accept is already scheduled or done, nothing is done. + * up. */ void quic_accept_push_qc(struct quic_conn *qc) { struct quic_accept_queue *queue = &quic_accept_queues[tid]; struct li_per_thread *lthr = &qc->li->per_thr[ti->ltid]; - /* early return if accept is already in progress/done for this - * connection - */ - if (qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED) - return; + /* A connection must only be accepted once per instance. */ + BUG_ON(qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED); BUG_ON(MT_LIST_INLIST(&qc->accept_list)); HA_ATOMIC_INC(&qc->li->rx.quic_curr_accept); diff --git a/src/quic_ssl.c b/src/quic_ssl.c index 85b6717..66eb68e 100644 --- a/src/quic_ssl.c +++ b/src/quic_ssl.c @@ -2,7 +2,6 @@ #include <haproxy/ncbuf.h> #include <haproxy/proxy.h> #include <haproxy/quic_conn.h> -#include <haproxy/quic_rx.h> #include <haproxy/quic_sock.h> #include <haproxy/quic_ssl.h> #include <haproxy/quic_tls.h> @@ -442,24 +441,25 @@ int ssl_quic_initial_ctx(struct bind_conf *bind_conf) ctx = SSL_CTX_new(TLS_server_method()); bind_conf->initial_ctx = ctx; + if (global_ssl.security_level > -1) + SSL_CTX_set_security_level(ctx, global_ssl.security_level); SSL_CTX_set_options(ctx, options); SSL_CTX_set_mode(ctx, SSL_MODE_RELEASE_BUFFERS); SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION); SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION); -#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME -# if defined(HAVE_SSL_CLIENT_HELLO_CB) -# if defined(SSL_OP_NO_ANTI_REPLAY) if (bind_conf->ssl_conf.early_data) { - SSL_CTX_set_options(ctx, SSL_OP_NO_ANTI_REPLAY); -# if defined(USE_QUIC_OPENSSL_COMPAT) || defined(OPENSSL_IS_AWSLC) - ha_warning("Binding [%s:%d] for %s %s: 0-RTT is not supported in limited QUIC compatibility mode, ignored.\n", +#if !defined(HAVE_SSL_0RTT_QUIC) + ha_warning("Binding [%s:%d] for %s %s: 0-RTT with QUIC is not supported by this SSL library, ignored.\n", bind_conf->file, bind_conf->line, proxy_type_str(bind_conf->frontend), bind_conf->frontend->id); -# else +#else + SSL_CTX_set_options(ctx, SSL_OP_NO_ANTI_REPLAY); SSL_CTX_set_max_early_data(ctx, 0xffffffff); -# endif /* ! USE_QUIC_OPENSSL_COMPAT */ +#endif /* ! HAVE_SSL_0RTT_QUIC */ } -# endif /* !SSL_OP_NO_ANTI_REPLAY */ + +#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME +# if defined(HAVE_SSL_CLIENT_HELLO_CB) SSL_CTX_set_client_hello_cb(ctx, ssl_sock_switchctx_cbk, NULL); SSL_CTX_set_tlsext_servername_callback(ctx, ssl_sock_switchctx_err_cbk); # else /* ! HAVE_SSL_CLIENT_HELLO_CB */ @@ -502,10 +502,10 @@ static forceinline void qc_ssl_dump_errors(struct connection *conn) * Remaining parameter are there for debugging purposes. * Return 1 if succeeded, 0 if not. */ -int qc_ssl_provide_quic_data(struct ncbuf *ncbuf, - enum ssl_encryption_level_t level, - struct ssl_sock_ctx *ctx, - const unsigned char *data, size_t len) +static int qc_ssl_provide_quic_data(struct ncbuf *ncbuf, + enum ssl_encryption_level_t level, + struct ssl_sock_ctx *ctx, + const unsigned char *data, size_t len) { #ifdef DEBUG_STRICT enum ncb_ret ncb_ret; @@ -557,6 +557,39 @@ int qc_ssl_provide_quic_data(struct ncbuf *ncbuf, ERR_clear_error(); goto leave; } +#if defined(LIBRESSL_VERSION_NUMBER) + else if (qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE) { + /* Some libressl versions emit TLS alerts without making the handshake + * (SSL_do_handshake()) fail. This is at least the case for + * libressl-3.9.0 when forcing the TLS cipher to TLS_AES_128_CCM_SHA256. + */ + TRACE_ERROR("SSL handshake error", QUIC_EV_CONN_IO_CB, qc, &state, &ssl_err); + HA_ATOMIC_INC(&qc->prx_counters->hdshk_fail); + goto leave; + } +#endif + +#if defined(OPENSSL_IS_AWSLC) + /* As a server, if early data is accepted, SSL_do_handshake will + * complete as soon as the ClientHello is processed and server flight sent. + * SSL_write may be used to send half-RTT data. SSL_read will consume early + * data and transition to 1-RTT data as appropriate. Prior to the + * transition, SSL_in_init will report the handshake is still in progress. + * Callers may use it or SSL_in_early_data to defer or reject requests + * as needed. + * (see https://commondatastorage.googleapis.com/chromium-boringssl-docs/ssl.h.html#Early-data) + */ + + /* If we do not returned here, the handshake is considered as completed/confirmed. + * This has as bad side effect to discard the Handshake packet number space, + * so without sending the Handshake level CRYPTO data. + */ + if (SSL_in_early_data(ctx->ssl)) { + TRACE_PROTO("SSL handshake in progrees with early data", + QUIC_EV_CONN_IO_CB, qc, &state, &ssl_err); + goto out; + } +#endif TRACE_PROTO("SSL handshake OK", QUIC_EV_CONN_IO_CB, qc, &state); @@ -572,8 +605,17 @@ int qc_ssl_provide_quic_data(struct ncbuf *ncbuf, if (qc_is_listener(ctx->qc)) { qc->flags |= QUIC_FL_CONN_NEED_POST_HANDSHAKE_FRMS; qc->state = QUIC_HS_ST_CONFIRMED; - /* The connection is ready to be accepted. */ - quic_accept_push_qc(qc); + + if (!(qc->flags & QUIC_FL_CONN_ACCEPT_REGISTERED)) { + quic_accept_push_qc(qc); + } + else { + /* Connection already accepted if 0-RTT used. + * In this case, schedule quic-conn to ensure + * post-handshake frames are emitted. + */ + tasklet_wakeup(qc->wait_event.tasklet); + } BUG_ON(qc->li->rx.quic_curr_handshake == 0); HA_ATOMIC_DEC(&qc->li->rx.quic_curr_handshake); @@ -636,6 +678,8 @@ int qc_ssl_provide_all_quic_data(struct quic_conn *qc, struct ssl_sock_ctx *ctx) { int ret = 0; struct quic_enc_level *qel; + struct ncbuf *ncbuf; + ncb_sz_t data; TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc); list_for_each_entry(qel, &qc->qel_list, list) { @@ -644,8 +688,27 @@ int qc_ssl_provide_all_quic_data(struct quic_conn *qc, struct ssl_sock_ctx *ctx) if (!cstream) continue; - if (!qc_treat_rx_crypto_frms(qc, qel, ctx)) - goto leave; + ncbuf = &cstream->rx.ncbuf; + if (ncb_is_null(ncbuf)) + continue; + + /* TODO not working if buffer is wrapping */ + while ((data = ncb_data(ncbuf, 0))) { + const unsigned char *cdata = (const unsigned char *)ncb_head(ncbuf); + + if (!qc_ssl_provide_quic_data(&qel->cstream->rx.ncbuf, qel->level, + ctx, cdata, data)) + goto leave; + + cstream->rx.offset += data; + TRACE_DEVEL("buffered crypto data were provided to TLS stack", + QUIC_EV_CONN_PHPKTS, qc, qel); + } + + if (!ncb_is_null(ncbuf) && ncb_is_empty(ncbuf)) { + TRACE_DEVEL("freeing crypto buf", QUIC_EV_CONN_PHPKTS, qc, qel); + quic_free_ncbuf(ncbuf); + } } ret = 1; @@ -696,6 +759,43 @@ static int qc_ssl_sess_init(struct quic_conn *qc, SSL_CTX *ssl_ctx, SSL **ssl) return ret; } +#ifdef HAVE_SSL_0RTT_QUIC + +/* Enable early data for <ssl> QUIC TLS session. + * Return 1 if succeeded, 0 if not. + */ +static int qc_set_quic_early_data_enabled(struct quic_conn *qc, SSL *ssl) +{ +#if defined(OPENSSL_IS_AWSLC) + struct quic_transport_params p = {0}; + unsigned char buf[128]; + size_t len; + + /* Apply default values to <p> transport parameters. */ + quic_transport_params_init(&p, 1); + /* The stateless_reset_token transport parameter is not needed. */ + p.with_stateless_reset_token = 0; + len = quic_transport_params_encode(buf, buf + sizeof buf, &p, NULL, 1); + if (!len) { + TRACE_ERROR("quic_transport_params_encode() failed", QUIC_EV_CONN_RWSEC, qc); + return 0; + } + + /* XXX TODO: Should also add the application settings. XXX */ + if (!SSL_set_quic_early_data_context(ssl, buf, len)) { + TRACE_ERROR("SSL_set_quic_early_data_context() failed", QUIC_EV_CONN_RWSEC, qc); + return 0; + } + + SSL_set_early_data_enabled(ssl, 1); +#else + SSL_set_quic_early_data_enabled(ssl, 1); +#endif + + return 1; +} +#endif // HAVE_SSL_0RTT_QUIC + /* Allocate the ssl_sock_ctx from connection <qc>. This creates the tasklet * used to process <qc> received packets. The allocated context is stored in * <qc.xprt_ctx>. @@ -731,12 +831,10 @@ int qc_alloc_ssl_sock_ctx(struct quic_conn *qc) if (qc_is_listener(qc)) { if (qc_ssl_sess_init(qc, bc->initial_ctx, &ctx->ssl) == -1) goto err; -#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) && !defined(OPENSSL_IS_AWSLC) -#ifndef USE_QUIC_OPENSSL_COMPAT +#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) && defined(HAVE_SSL_0RTT_QUIC) /* Enabling 0-RTT */ - if (bc->ssl_conf.early_data) - SSL_set_quic_early_data_enabled(ctx->ssl, 1); -#endif + if (bc->ssl_conf.early_data && !qc_set_quic_early_data_enabled(qc, ctx->ssl)) + goto err; #endif SSL_set_accept_state(ctx->ssl); diff --git a/src/quic_stats.c b/src/quic_stats.c index 3657f30..9d9b343 100644 --- a/src/quic_stats.c +++ b/src/quic_stats.c @@ -2,7 +2,7 @@ #include <haproxy/quic_stats-t.h> #include <haproxy/stats.h> -static struct name_desc quic_stats[] = { +static struct stat_col quic_stats[] = { [QUIC_ST_RXBUF_FULL] = { .name = "quic_rxbuf_full", .desc = "Total number of cancelled reception due to full receiver buffer" }, [QUIC_ST_DROPPED_PACKET] = { .name = "quic_dropped_pkt", @@ -90,53 +90,154 @@ static struct name_desc quic_stats[] = { struct quic_counters quic_counters; -static void quic_fill_stats(void *data, struct field *stats) +static int quic_fill_stats(void *data, struct field *stats, unsigned int *selected_field) { struct quic_counters *counters = data; + unsigned int current_field = (selected_field != NULL ? *selected_field : 0); - stats[QUIC_ST_RXBUF_FULL] = mkf_u64(FN_COUNTER, counters->rxbuf_full); - stats[QUIC_ST_DROPPED_PACKET] = mkf_u64(FN_COUNTER, counters->dropped_pkt); - stats[QUIC_ST_DROPPED_PACKET_BUFOVERRUN] = mkf_u64(FN_COUNTER, counters->dropped_pkt_bufoverrun); - stats[QUIC_ST_DROPPED_PARSING] = mkf_u64(FN_COUNTER, counters->dropped_parsing); - stats[QUIC_ST_SOCKET_FULL] = mkf_u64(FN_COUNTER, counters->socket_full); - stats[QUIC_ST_SENDTO_ERR] = mkf_u64(FN_COUNTER, counters->sendto_err); - stats[QUIC_ST_SENDTO_ERR_UNKNWN] = mkf_u64(FN_COUNTER, counters->sendto_err_unknown); - stats[QUIC_ST_SENT_PACKET] = mkf_u64(FN_COUNTER, counters->sent_pkt); - stats[QUIC_ST_LOST_PACKET] = mkf_u64(FN_COUNTER, counters->lost_pkt); - stats[QUIC_ST_TOO_SHORT_INITIAL_DGRAM] = mkf_u64(FN_COUNTER, counters->too_short_initial_dgram); - stats[QUIC_ST_RETRY_SENT] = mkf_u64(FN_COUNTER, counters->retry_sent); - stats[QUIC_ST_RETRY_VALIDATED] = mkf_u64(FN_COUNTER, counters->retry_validated); - stats[QUIC_ST_RETRY_ERRORS] = mkf_u64(FN_COUNTER, counters->retry_error); - stats[QUIC_ST_HALF_OPEN_CONN] = mkf_u64(FN_GAUGE, counters->half_open_conn); - stats[QUIC_ST_HDSHK_FAIL] = mkf_u64(FN_COUNTER, counters->hdshk_fail); - stats[QUIC_ST_STATELESS_RESET_SENT] = mkf_u64(FN_COUNTER, counters->stateless_reset_sent); - /* Special events of interest */ - stats[QUIC_ST_CONN_MIGRATION_DONE] = mkf_u64(FN_COUNTER, counters->conn_migration_done); - /* Transport errors */ - stats[QUIC_ST_TRANSP_ERR_NO_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_no_error); - stats[QUIC_ST_TRANSP_ERR_INTERNAL_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_internal_error); - stats[QUIC_ST_TRANSP_ERR_CONNECTION_REFUSED] = mkf_u64(FN_COUNTER, counters->quic_transp_err_connection_refused); - stats[QUIC_ST_TRANSP_ERR_FLOW_CONTROL_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_flow_control_error); - stats[QUIC_ST_TRANSP_ERR_STREAM_LIMIT_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_stream_limit_error); - stats[QUIC_ST_TRANSP_ERR_STREAM_STATE_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_stream_state_error); - stats[QUIC_ST_TRANSP_ERR_FINAL_SIZE_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_final_size_error); - stats[QUIC_ST_TRANSP_ERR_FRAME_ENCODING_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_frame_encoding_error); - stats[QUIC_ST_TRANSP_ERR_TRANSPORT_PARAMETER_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_transport_parameter_error); - stats[QUIC_ST_TRANSP_ERR_CONNECTION_ID_LIMIT_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_connection_id_limit); - stats[QUIC_ST_TRANSP_ERR_PROTOCOL_VIOLATION] = mkf_u64(FN_COUNTER, counters->quic_transp_err_protocol_violation); - stats[QUIC_ST_TRANSP_ERR_INVALID_TOKEN] = mkf_u64(FN_COUNTER, counters->quic_transp_err_invalid_token); - stats[QUIC_ST_TRANSP_ERR_APPLICATION_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_application_error); - stats[QUIC_ST_TRANSP_ERR_CRYPTO_BUFFER_EXCEEDED] = mkf_u64(FN_COUNTER, counters->quic_transp_err_crypto_buffer_exceeded); - stats[QUIC_ST_TRANSP_ERR_KEY_UPDATE_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_key_update_error); - stats[QUIC_ST_TRANSP_ERR_AEAD_LIMIT_REACHED] = mkf_u64(FN_COUNTER, counters->quic_transp_err_aead_limit_reached); - stats[QUIC_ST_TRANSP_ERR_NO_VIABLE_PATH] = mkf_u64(FN_COUNTER, counters->quic_transp_err_no_viable_path); - stats[QUIC_ST_TRANSP_ERR_CRYPTO_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_crypto_error); - stats[QUIC_ST_TRANSP_ERR_UNKNOWN_ERROR] = mkf_u64(FN_COUNTER, counters->quic_transp_err_unknown_error); - /* Streams related counters */ - stats[QUIC_ST_DATA_BLOCKED] = mkf_u64(FN_COUNTER, counters->data_blocked); - stats[QUIC_ST_STREAM_DATA_BLOCKED] = mkf_u64(FN_COUNTER, counters->stream_data_blocked); - stats[QUIC_ST_STREAMS_BLOCKED_BIDI] = mkf_u64(FN_COUNTER, counters->streams_blocked_bidi); - stats[QUIC_ST_STREAMS_BLOCKED_UNI] = mkf_u64(FN_COUNTER, counters->streams_blocked_uni); + for (; current_field < QUIC_STATS_COUNT; current_field++) { + struct field metric = { 0 }; + + switch (current_field) { + case QUIC_ST_RXBUF_FULL: + metric = mkf_u64(FN_COUNTER, counters->rxbuf_full); + break; + case QUIC_ST_DROPPED_PACKET: + metric = mkf_u64(FN_COUNTER, counters->dropped_pkt); + break; + case QUIC_ST_DROPPED_PACKET_BUFOVERRUN: + metric = mkf_u64(FN_COUNTER, counters->dropped_pkt_bufoverrun); + break; + case QUIC_ST_DROPPED_PARSING: + metric = mkf_u64(FN_COUNTER, counters->dropped_parsing); + break; + case QUIC_ST_SOCKET_FULL: + metric = mkf_u64(FN_COUNTER, counters->socket_full); + break; + case QUIC_ST_SENDTO_ERR: + metric = mkf_u64(FN_COUNTER, counters->sendto_err); + break; + case QUIC_ST_SENDTO_ERR_UNKNWN: + metric = mkf_u64(FN_COUNTER, counters->sendto_err_unknown); + break; + case QUIC_ST_SENT_PACKET: + metric = mkf_u64(FN_COUNTER, counters->sent_pkt); + break; + case QUIC_ST_LOST_PACKET: + metric = mkf_u64(FN_COUNTER, counters->lost_pkt); + break; + case QUIC_ST_TOO_SHORT_INITIAL_DGRAM: + metric = mkf_u64(FN_COUNTER, counters->too_short_initial_dgram); + break; + case QUIC_ST_RETRY_SENT: + metric = mkf_u64(FN_COUNTER, counters->retry_sent); + break; + case QUIC_ST_RETRY_VALIDATED: + metric = mkf_u64(FN_COUNTER, counters->retry_validated); + break; + case QUIC_ST_RETRY_ERRORS: + metric = mkf_u64(FN_COUNTER, counters->retry_error); + break; + case QUIC_ST_HALF_OPEN_CONN: + metric = mkf_u64(FN_GAUGE, counters->half_open_conn); + break; + case QUIC_ST_HDSHK_FAIL: + metric = mkf_u64(FN_COUNTER, counters->hdshk_fail); + break; + case QUIC_ST_STATELESS_RESET_SENT: + metric = mkf_u64(FN_COUNTER, counters->stateless_reset_sent); + break; + + /* Special events of interest */ + case QUIC_ST_CONN_MIGRATION_DONE: + metric = mkf_u64(FN_COUNTER, counters->conn_migration_done); + break; + + /* Transport errors */ + case QUIC_ST_TRANSP_ERR_NO_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_no_error); + break; + case QUIC_ST_TRANSP_ERR_INTERNAL_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_internal_error); + break; + case QUIC_ST_TRANSP_ERR_CONNECTION_REFUSED: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_connection_refused); + break; + case QUIC_ST_TRANSP_ERR_FLOW_CONTROL_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_flow_control_error); + break; + case QUIC_ST_TRANSP_ERR_STREAM_LIMIT_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_stream_limit_error); + break; + case QUIC_ST_TRANSP_ERR_STREAM_STATE_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_stream_state_error); + break; + case QUIC_ST_TRANSP_ERR_FINAL_SIZE_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_final_size_error); + break; + case QUIC_ST_TRANSP_ERR_FRAME_ENCODING_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_frame_encoding_error); + break; + case QUIC_ST_TRANSP_ERR_TRANSPORT_PARAMETER_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_transport_parameter_error); + break; + case QUIC_ST_TRANSP_ERR_CONNECTION_ID_LIMIT_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_connection_id_limit); + break; + case QUIC_ST_TRANSP_ERR_PROTOCOL_VIOLATION: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_protocol_violation); + break; + case QUIC_ST_TRANSP_ERR_INVALID_TOKEN: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_invalid_token); + break; + case QUIC_ST_TRANSP_ERR_APPLICATION_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_application_error); + break; + case QUIC_ST_TRANSP_ERR_CRYPTO_BUFFER_EXCEEDED: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_crypto_buffer_exceeded); + break; + case QUIC_ST_TRANSP_ERR_KEY_UPDATE_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_key_update_error); + break; + case QUIC_ST_TRANSP_ERR_AEAD_LIMIT_REACHED: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_aead_limit_reached); + break; + case QUIC_ST_TRANSP_ERR_NO_VIABLE_PATH: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_no_viable_path); + break; + case QUIC_ST_TRANSP_ERR_CRYPTO_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_crypto_error); + break; + case QUIC_ST_TRANSP_ERR_UNKNOWN_ERROR: + metric = mkf_u64(FN_COUNTER, counters->quic_transp_err_unknown_error); + break; + + /* Streams related counters */ + case QUIC_ST_DATA_BLOCKED: + metric = mkf_u64(FN_COUNTER, counters->data_blocked); + break; + case QUIC_ST_STREAM_DATA_BLOCKED: + metric = mkf_u64(FN_COUNTER, counters->stream_data_blocked); + break; + case QUIC_ST_STREAMS_BLOCKED_BIDI: + metric = mkf_u64(FN_COUNTER, counters->streams_blocked_bidi); + break; + case QUIC_ST_STREAMS_BLOCKED_UNI: + metric = mkf_u64(FN_COUNTER, counters->streams_blocked_uni); + break; + default: + /* not used for frontends. If a specific metric + * is requested, return an error. Otherwise continue. + */ + if (selected_field != NULL) + return 0; + continue; + } + stats[current_field] = metric; + if (selected_field != NULL) + break; + } + return 1; } struct stats_module quic_stats_module = { diff --git a/src/quic_stream.c b/src/quic_stream.c index a4b984d..e153660 100644 --- a/src/quic_stream.c +++ b/src/quic_stream.c @@ -6,7 +6,7 @@ #include <haproxy/buf.h> #include <haproxy/dynbuf.h> #include <haproxy/list.h> -#include <haproxy/mux_quic-t.h> +#include <haproxy/mux_quic.h> #include <haproxy/pool.h> #include <haproxy/quic_conn.h> #include <haproxy/task.h> @@ -37,10 +37,13 @@ static void qc_stream_buf_free(struct qc_stream_desc *stream, /* notify MUX about available buffers. */ --qc->stream_buf_count; if (qc->mux_state == QC_MUX_READY) { - if (qc->qcc->flags & QC_CF_CONN_FULL) { - qc->qcc->flags &= ~QC_CF_CONN_FULL; - tasklet_wakeup(qc->qcc->wait_event.tasklet); - } + /* notify MUX about available buffers. + * + * TODO several streams may be woken up even if a single buffer + * is available for now. + */ + while (qcc_notify_buf(qc->qcc)) + ; } } @@ -202,11 +205,13 @@ void qc_stream_desc_free(struct qc_stream_desc *stream, int closing) qc->stream_buf_count -= free_count; if (qc->mux_state == QC_MUX_READY) { - /* notify MUX about available buffers. */ - if (qc->qcc->flags & QC_CF_CONN_FULL) { - qc->qcc->flags &= ~QC_CF_CONN_FULL; - tasklet_wakeup(qc->qcc->wait_event.tasklet); - } + /* notify MUX about available buffers. + * + * TODO several streams may be woken up even if a single buffer + * is available for now. + */ + while (qcc_notify_buf(qc->qcc)) + ; } } diff --git a/src/quic_tls.c b/src/quic_tls.c index aa72831..885df6f 100644 --- a/src/quic_tls.c +++ b/src/quic_tls.c @@ -206,8 +206,9 @@ static int quic_conn_enc_level_init(struct quic_conn *qc, if (!qel) goto leave; - LIST_INIT(&qel->retrans); - qel->retrans_frms = NULL; + LIST_INIT(&qel->el_send); + qel->send_frms = NULL; + qel->tx.crypto.bufs = NULL; qel->tx.crypto.nb_buf = 0; qel->cstream = NULL; diff --git a/src/quic_tp.c b/src/quic_tp.c index caf48ce..08d24b2 100644 --- a/src/quic_tp.c +++ b/src/quic_tp.c @@ -171,23 +171,23 @@ static int quic_transport_param_dec_version_info(struct tp_version_information * const unsigned char *end, int server) { size_t tp_len = end - *buf; - const uint32_t *ver, *others; + const unsigned char *ver, *others; /* <tp_len> must be a multiple of sizeof(uint32_t) */ if (tp_len < sizeof tp->chosen || (tp_len & 0x3)) return 0; - tp->chosen = ntohl(*(uint32_t *)*buf); + tp->chosen = ntohl(read_u32(*buf)); /* Must not be null */ if (!tp->chosen) return 0; *buf += sizeof tp->chosen; - others = (const uint32_t *)*buf; + others = *buf; /* Others versions must not be null */ - for (ver = others; ver < (const uint32_t *)end; ver++) { - if (!*ver) + for (ver = others; ver < end; ver += 4) { + if (!read_u32(ver)) return 0; } @@ -195,19 +195,19 @@ static int quic_transport_param_dec_version_info(struct tp_version_information * /* TODO: not supported */ return 0; - for (ver = others; ver < (const uint32_t *)end; ver++) { + for (ver = others; ver < end; ver += 4) { if (!tp->negotiated_version) { int i; for (i = 0; i < quic_versions_nb; i++) { - if (ntohl(*ver) == quic_versions[i].num) { + if (ntohl(read_u32(ver)) == quic_versions[i].num) { tp->negotiated_version = &quic_versions[i]; break; } } } - if (preferred_version && ntohl(*ver) == preferred_version->num) { + if (preferred_version && ntohl(read_u32(ver)) == preferred_version->num) { tp->negotiated_version = preferred_version; goto out; } @@ -565,7 +565,7 @@ int quic_transport_params_encode(unsigned char *buf, p->active_connection_id_limit)) return 0; - if (!quic_transport_param_enc_version_info(&pos, end, chosen_version, server)) + if (chosen_version && !quic_transport_param_enc_version_info(&pos, end, chosen_version, server)) return 0; return pos - head; diff --git a/src/quic_tx.c b/src/quic_tx.c index 306b4c2..6d487eb 100644 --- a/src/quic_tx.c +++ b/src/quic_tx.c @@ -88,7 +88,7 @@ static inline void free_quic_tx_packet(struct quic_conn *qc, struct buffer *qc_txb_alloc(struct quic_conn *qc) { struct buffer *buf = &qc->tx.buf; - if (!b_alloc(buf)) + if (!b_alloc(buf, DB_MUX_TX)) return NULL; return buf; @@ -202,104 +202,6 @@ static int qc_may_build_pkt(struct quic_conn *qc, struct list *frms, return 1; } -/* Prepare as much as possible QUIC packets for sending from prebuilt frames - * <frms>. Each packet is stored in a distinct datagram written to <buf>. - * - * Each datagram is prepended by a two fields header : the datagram length and - * the address of the packet contained in the datagram. - * - * Returns the number of bytes prepared in packets if succeeded (may be 0), or - * -1 if something wrong happened. - */ -static int qc_prep_app_pkts(struct quic_conn *qc, struct buffer *buf, - struct list *frms) -{ - int ret = -1, cc; - struct quic_enc_level *qel; - unsigned char *end, *pos; - struct quic_tx_packet *pkt; - size_t total; - - TRACE_ENTER(QUIC_EV_CONN_PHPKTS, qc); - - qel = qc->ael; - total = 0; - pos = (unsigned char *)b_tail(buf); - cc = qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE; - /* Each datagram is prepended with its length followed by the address - * of the first packet in the datagram (QUIC_DGRAM_HEADLEN). - */ - while ((!cc && b_contig_space(buf) >= (int)qc->path->mtu + QUIC_DGRAM_HEADLEN) || - (cc && b_contig_space(buf) >= QUIC_MIN_CC_PKTSIZE + QUIC_DGRAM_HEADLEN)) { - int err, probe, must_ack; - - TRACE_PROTO("TX prep app pkts", QUIC_EV_CONN_PHPKTS, qc, qel, frms); - probe = 0; - /* We do not probe if an immediate close was asked */ - if (!cc) - probe = qel->pktns->tx.pto_probe; - - if (!qc_may_build_pkt(qc, frms, qel, cc, probe, &must_ack)) - break; - - /* Leave room for the datagram header */ - pos += QUIC_DGRAM_HEADLEN; - if (cc) { - end = pos + QUIC_MIN_CC_PKTSIZE; - } - else if (!quic_peer_validated_addr(qc) && qc_is_listener(qc)) { - end = pos + QUIC_MIN(qc->path->mtu, quic_may_send_bytes(qc)); - } - else { - end = pos + qc->path->mtu; - } - - pkt = qc_build_pkt(&pos, end, qel, &qel->tls_ctx, frms, qc, NULL, 0, - QUIC_PACKET_TYPE_SHORT, must_ack, 0, probe, cc, &err); - switch (err) { - case -3: - qc_purge_txbuf(qc, buf); - goto leave; - case -2: - // trace already emitted by function above - goto leave; - case -1: - /* As we provide qc_build_pkt() with an enough big buffer to fulfill an - * MTU, we are here because of the congestion control window. There is - * no need to try to reuse this buffer. - */ - TRACE_PROTO("could not prepare anymore packet", QUIC_EV_CONN_PHPKTS, qc, qel); - goto out; - default: - break; - } - - /* This is to please to GCC. We cannot have (err >= 0 && !pkt) */ - BUG_ON(!pkt); - - if (qc->flags & QUIC_FL_CONN_RETRANS_OLD_DATA) - pkt->flags |= QUIC_FL_TX_PACKET_PROBE_WITH_OLD_DATA; - - total += pkt->len; - - /* Write datagram header. */ - qc_txb_store(buf, pkt->len, pkt); - /* Build only one datagram when an immediate close is required. */ - if (cc) - break; - } - - out: - if (total && cc) { - BUG_ON(buf != &qc->tx.cc_buf); - qc->tx.cc_dgram_len = total; - } - ret = total; - leave: - TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, qc); - return ret; -} - /* Free all frames in <l> list. In addition also remove all these frames * from the original ones if they are the results of duplications. */ @@ -362,7 +264,7 @@ static void qc_purge_tx_buf(struct quic_conn *qc, struct buffer *buf) * Remaining data are purged from the buffer and will eventually be detected * as lost which gives the opportunity to retry sending. */ -int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx) +static int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx) { int ret = 0; struct quic_conn *qc; @@ -427,6 +329,7 @@ int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx) time_sent = now_ms; for (pkt = first_pkt; pkt; pkt = next_pkt) { + struct quic_cc *cc = &qc->path->cc; /* RFC 9000 14.1 Initial datagram size * a server MUST expand the payload of all UDP datagrams carrying ack-eliciting * Initial packets to at least the smallest allowed maximum datagram size of @@ -466,6 +369,8 @@ int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx) } qc->path->in_flight += pkt->in_flight_len; pkt->pktns->tx.in_flight += pkt->in_flight_len; + if ((global.tune.options & GTUNE_QUIC_CC_HYSTART) && pkt->pktns == qc->apktns) + cc->algo->hystart_start_round(cc, pkt->pn_node.key); if (pkt->in_flight_len) qc_set_timer(qc); TRACE_PROTO("TX pkt", QUIC_EV_CONN_SPPKTS, qc, pkt); @@ -510,94 +415,14 @@ int qc_purge_txbuf(struct quic_conn *qc, struct buffer *buf) return 1; } -/* Try to send application frames from list <frms> on connection <qc>. - * - * Use qc_send_app_probing wrapper when probing with old data. - * - * Returns 1 on success. Some data might not have been sent due to congestion, - * in this case they are left in <frms> input list. The caller may subscribe on - * quic-conn to retry later. - * - * Returns 0 on critical error. - * TODO review and classify more distinctly transient from definitive errors to - * allow callers to properly handle it. - */ -int qc_send_app_pkts(struct quic_conn *qc, struct list *frms) -{ - int status = 0, ret; - struct buffer *buf; - - TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc); - - buf = qc_get_txb(qc); - if (!buf) { - TRACE_ERROR("could not get a buffer", QUIC_EV_CONN_TXPKT, qc); - goto err; - } - - if (b_data(buf) && !qc_purge_txbuf(qc, buf)) - goto err; - - /* Prepare and send packets until we could not further prepare packets. */ - do { - /* Currently buf cannot be non-empty at this stage. Even if a - * previous sendto() has failed it is emptied to simulate - * packet emission and rely on QUIC lost detection to try to - * emit it. - */ - BUG_ON_HOT(b_data(buf)); - b_reset(buf); - - ret = qc_prep_app_pkts(qc, buf, frms); - - if (b_data(buf) && !qc_send_ppkts(buf, qc->xprt_ctx)) { - if (qc->flags & QUIC_FL_CONN_TO_KILL) - qc_txb_release(qc); - goto err; - } - } while (ret > 0); - - qc_txb_release(qc); - if (ret < 0) - goto err; - - status = 1; - TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc); - return status; - - err: - TRACE_DEVEL("leaving in error", QUIC_EV_CONN_TXPKT, qc); - return 0; -} - -/* Try to send application frames from list <frms> on connection <qc>. Use this - * function when probing is required. - * - * Returns the result from qc_send_app_pkts function. - */ -static forceinline int qc_send_app_probing(struct quic_conn *qc, - struct list *frms) -{ - int ret; - - TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc); - - TRACE_PROTO("preparing old data (probing)", QUIC_EV_CONN_FRMLIST, qc, frms); - qc->flags |= QUIC_FL_CONN_RETRANS_OLD_DATA; - ret = qc_send_app_pkts(qc, frms); - qc->flags &= ~QUIC_FL_CONN_RETRANS_OLD_DATA; - - TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc); - return ret; -} - /* Try to send application frames from list <frms> on connection <qc>. This * function is provided for MUX upper layer usage only. * - * Returns the result from qc_send_app_pkts function. + * Returns the result from qc_send() function. */ int qc_send_mux(struct quic_conn *qc, struct list *frms) { + struct list send_list = LIST_HEAD_INIT(send_list); int ret; TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc); @@ -613,56 +438,27 @@ int qc_send_mux(struct quic_conn *qc, struct list *frms) if ((qc->flags & QUIC_FL_CONN_NEED_POST_HANDSHAKE_FRMS) && qc->state >= QUIC_HS_ST_COMPLETE) { quic_build_post_handshake_frames(qc); - qc_send_app_pkts(qc, &qc->ael->pktns->tx.frms); + qel_register_send(&send_list, qc->ael, &qc->ael->pktns->tx.frms); + qc_send(qc, 0, &send_list); } TRACE_STATE("preparing data (from MUX)", QUIC_EV_CONN_TXPKT, qc); qc->flags |= QUIC_FL_CONN_TX_MUX_CONTEXT; - ret = qc_send_app_pkts(qc, frms); + qel_register_send(&send_list, qc->ael, frms); + ret = qc_send(qc, 0, &send_list); qc->flags &= ~QUIC_FL_CONN_TX_MUX_CONTEXT; TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc); return ret; } -/* Return the encryption level following the one which contains <el> list head - * depending on <retrans> TX mode (retranmission or not). - */ -static inline struct quic_enc_level *qc_list_next_qel(struct list *el, int retrans) -{ - return !retrans ? LIST_NEXT(el, struct quic_enc_level *, list) : - LIST_NEXT(el, struct quic_enc_level *, retrans); -} - -/* Return the encryption level following <qel> depending on <retrans> TX mode - * (retranmission or not). +/* Select <*tls_ctx> and <*ver> for the encryption level <qel> of <qc> QUIC + * connection, depending on its state, especially the negotiated version. */ -static inline struct quic_enc_level *qc_next_qel(struct quic_enc_level *qel, int retrans) -{ - struct list *el = !retrans ? &qel->list : &qel->retrans; - - return qc_list_next_qel(el, retrans); -} - -/* Return 1 if <qel> is at the head of its list, 0 if not. */ -static inline int qc_qel_is_head(struct quic_enc_level *qel, struct list *l, - int retrans) -{ - return !retrans ? &qel->list == l : &qel->retrans == l; -} - -/* Select <*tls_ctx>, <*frms> and <*ver> for the encryption level <qel> of <qc> QUIC - * connection, depending on its state, especially the negotiated version and if - * retransmissions are required. If this the case <qels> is the list of encryption - * levels to used, or NULL if no retransmissions are required. - * Never fails. - */ -static inline void qc_select_tls_frms_ver(struct quic_conn *qc, - struct quic_enc_level *qel, - struct quic_tls_ctx **tls_ctx, - struct list **frms, - const struct quic_version **ver, - struct list *qels) +static inline void qc_select_tls_ver(struct quic_conn *qc, + struct quic_enc_level *qel, + struct quic_tls_ctx **tls_ctx, + const struct quic_version **ver) { if (qc->negotiated_version) { *ver = qc->negotiated_version; @@ -675,18 +471,11 @@ static inline void qc_select_tls_frms_ver(struct quic_conn *qc, *ver = qc->original_version; *tls_ctx = &qel->tls_ctx; } - - if (!qels) - *frms = &qel->pktns->tx.frms; - else - *frms = qel->retrans_frms; } /* Prepare as much as possible QUIC datagrams/packets for sending from <qels> * list of encryption levels. Several packets can be coalesced into a single - * datagram. The result is written into <buf>. Note that if <qels> is NULL, - * the encryption levels which will be used are those currently allocated - * and attached to the connection. + * datagram. The result is written into <buf>. * * Each datagram is prepended by a two fields header : the datagram length and * the address of first packet in the datagram. @@ -694,15 +483,15 @@ static inline void qc_select_tls_frms_ver(struct quic_conn *qc, * Returns the number of bytes prepared in datragrams/packets if succeeded * (may be 0), or -1 if something wrong happened. */ -int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels) +static int qc_prep_pkts(struct quic_conn *qc, struct buffer *buf, + struct list *qels) { - int ret, cc, retrans, padding; + int ret, cc, padding; struct quic_tx_packet *first_pkt, *prv_pkt; unsigned char *end, *pos; uint16_t dglen; size_t total; - struct list *qel_list; - struct quic_enc_level *qel; + struct quic_enc_level *qel, *tmp_qel; TRACE_ENTER(QUIC_EV_CONN_IO_CB, qc); /* Currently qc_prep_pkts() does not handle buffer wrapping so the @@ -712,32 +501,34 @@ int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels) ret = -1; cc = qc->flags & QUIC_FL_CONN_IMMEDIATE_CLOSE; - retrans = !!qels; padding = 0; first_pkt = prv_pkt = NULL; end = pos = (unsigned char *)b_head(buf); dglen = 0; total = 0; - qel_list = qels ? qels : &qc->qel_list; - qel = qc_list_next_qel(qel_list, retrans); - while (!qc_qel_is_head(qel, qel_list, retrans)) { + list_for_each_entry_safe(qel, tmp_qel, qels, el_send) { struct quic_tls_ctx *tls_ctx; const struct quic_version *ver; - struct list *frms, *next_frms; + struct list *frms = qel->send_frms, *next_frms; struct quic_enc_level *next_qel; if (qel == qc->eel) { /* Next encryption level */ - qel = qc_next_qel(qel, retrans); continue; } - qc_select_tls_frms_ver(qc, qel, &tls_ctx, &frms, &ver, qels); + qc_select_tls_ver(qc, qel, &tls_ctx, &ver); - next_qel = qc_next_qel(qel, retrans); - next_frms = qc_qel_is_head(next_qel, qel_list, retrans) ? NULL : - !qels ? &next_qel->pktns->tx.frms : next_qel->retrans_frms; + /* Retrieve next QEL. Set it to NULL if on qels last element. */ + if (qel->el_send.n != qels) { + next_qel = LIST_ELEM(qel->el_send.n, struct quic_enc_level *, el_send); + next_frms = next_qel->send_frms; + } + else { + next_qel = NULL; + next_frms = NULL; + } /* Build as much as datagrams at <qel> encryption level. * Each datagram is prepended with its length followed by the address @@ -756,7 +547,11 @@ int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels) probe = qel->pktns->tx.pto_probe; if (!qc_may_build_pkt(qc, frms, qel, cc, probe, &must_ack)) { - if (prv_pkt && qc_qel_is_head(next_qel, qel_list, retrans)) { + /* Remove qel from send_list if nothing to send. */ + LIST_DEL_INIT(&qel->el_send); + qel->send_frms = NULL; + + if (prv_pkt && !next_qel) { qc_txb_store(buf, dglen, first_pkt); /* Build only one datagram when an immediate close is required. */ if (cc) @@ -852,15 +647,13 @@ int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels) * the same datagram, except if <qel> is the Application data * encryption level which cannot be selected to do that. */ - if (LIST_ISEMPTY(frms) && qel != qc->ael && - !qc_qel_is_head(next_qel, qel_list, retrans)) { + if (LIST_ISEMPTY(frms) && qel != qc->ael && next_qel) { if (qel == qc->iel && (!qc_is_listener(qc) || cur_pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING)) padding = 1; prv_pkt = cur_pkt; - break; } else { qc_txb_store(buf, dglen, first_pkt); @@ -873,9 +666,6 @@ int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels) prv_pkt = NULL; } } - - /* Next encryption level */ - qel = next_qel; } out: @@ -891,24 +681,25 @@ int qc_prep_hpkts(struct quic_conn *qc, struct buffer *buf, struct list *qels) return ret; } -/* Sends handshake packets from up to two encryption levels <tel> and <next_te> - * with <tel_frms> and <next_tel_frms> as frame list respectively for <qc> - * QUIC connection. <old_data> is used as boolean to send data already sent but - * not already acknowledged (in flight). - * Returns 1 if succeeded, 0 if not. +/* Encode frames and send them as packets for <qc> connection. Input frames are + * specified via quic_enc_level <send_list> through their send_frms member. Set + * <old_data> when reemitted duplicated data. + * +* Returns 1 on success else 0. Note that <send_list> will always be reset +* after qc_send() exit. */ -int qc_send_hdshk_pkts(struct quic_conn *qc, int old_data, - struct quic_enc_level *qel1, struct quic_enc_level *qel2) +int qc_send(struct quic_conn *qc, int old_data, struct list *send_list) { + struct quic_enc_level *qel, *tmp_qel; int ret, status = 0; - struct buffer *buf = qc_get_txb(qc); - struct list qels = LIST_HEAD_INIT(qels); + struct buffer *buf; TRACE_ENTER(QUIC_EV_CONN_TXPKT, qc); + buf = qc_get_txb(qc); if (!buf) { TRACE_ERROR("buffer allocation failed", QUIC_EV_CONN_TXPKT, qc); - goto leave; + goto out; } if (b_data(buf) && !qc_purge_txbuf(qc, buf)) { @@ -916,63 +707,75 @@ int qc_send_hdshk_pkts(struct quic_conn *qc, int old_data, goto out; } - /* Currently buf cannot be non-empty at this stage. Even if a previous - * sendto() has failed it is emptied to simulate packet emission and - * rely on QUIC lost detection to try to emit it. - */ - BUG_ON_HOT(b_data(buf)); - b_reset(buf); - if (old_data) { TRACE_STATE("old data for probing asked", QUIC_EV_CONN_TXPKT, qc); qc->flags |= QUIC_FL_CONN_RETRANS_OLD_DATA; } - if (qel1) { - BUG_ON(LIST_INLIST(&qel1->retrans)); - LIST_APPEND(&qels, &qel1->retrans); - } + /* Prepare and send packets until we could not further prepare packets. */ + do { + /* Buffer must always be empty before qc_prep_pkts() usage. + * qc_send_ppkts() ensures it is cleared on success. + */ + BUG_ON_HOT(b_data(buf)); + b_reset(buf); - if (qel2) { - BUG_ON(LIST_INLIST(&qel2->retrans)); - LIST_APPEND(&qels, &qel2->retrans); - } + ret = qc_prep_pkts(qc, buf, send_list); - ret = qc_prep_hpkts(qc, buf, &qels); - if (ret == -1) { - qc_txb_release(qc); - TRACE_ERROR("Could not build some packets", QUIC_EV_CONN_TXPKT, qc); - goto out; - } + if (b_data(buf) && !qc_send_ppkts(buf, qc->xprt_ctx)) { + if (qc->flags & QUIC_FL_CONN_TO_KILL) + qc_txb_release(qc); + goto out; + } + } while (ret > 0 && !LIST_ISEMPTY(send_list)); - if (ret && !qc_send_ppkts(buf, qc->xprt_ctx)) { - if (qc->flags & QUIC_FL_CONN_TO_KILL) - qc_txb_release(qc); - TRACE_ERROR("Could not send some packets", QUIC_EV_CONN_TXPKT, qc); + qc_txb_release(qc); + if (ret < 0) goto out; - } - qc_txb_release(qc); status = 1; out: - if (qel1) { - LIST_DEL_INIT(&qel1->retrans); - qel1->retrans_frms = NULL; + if (old_data) { + TRACE_STATE("no more need old data for probing", QUIC_EV_CONN_TXPKT, qc); + qc->flags &= ~QUIC_FL_CONN_RETRANS_OLD_DATA; } - if (qel2) { - LIST_DEL_INIT(&qel2->retrans); - qel2->retrans_frms = NULL; + /* Always reset QEL sending list. */ + list_for_each_entry_safe(qel, tmp_qel, send_list, el_send) { + LIST_DEL_INIT(&qel->el_send); + qel->send_frms = NULL; } - TRACE_STATE("no more need old data for probing", QUIC_EV_CONN_TXPKT, qc); - qc->flags &= ~QUIC_FL_CONN_RETRANS_OLD_DATA; - leave: - TRACE_LEAVE(QUIC_EV_CONN_TXPKT, qc); + TRACE_DEVEL((status ? "leaving" : "leaving in error"), QUIC_EV_CONN_TXPKT, qc); return status; } +/* Insert <qel> into <send_list> in preparation for sending. Set its send + * frames list pointer to <frms>. + */ +void qel_register_send(struct list *send_list, struct quic_enc_level *qel, + struct list *frms) +{ + /* Ensure QEL is not already registered for sending. */ + BUG_ON(LIST_INLIST(&qel->el_send)); + + LIST_APPEND(send_list, &qel->el_send); + qel->send_frms = frms; +} + +/* Returns true if <qel> should be registered for sending. This is the case if + * frames are prepared, probing is set, <qc> ACK timer has fired or a + * CONNECTION_CLOSE is required. + */ +int qel_need_sending(struct quic_enc_level *qel, struct quic_conn *qc) +{ + return !LIST_ISEMPTY(&qel->pktns->tx.frms) || + qel->pktns->tx.pto_probe || + (qel->pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED) || + (qc->flags & (QUIC_FL_CONN_ACK_TIMER_FIRED|QUIC_FL_CONN_IMMEDIATE_CLOSE)); +} + /* Retransmit up to two datagrams depending on packet number space. * Return 0 when failed, 0 if not. */ @@ -993,9 +796,9 @@ int qc_dgrams_retransmit(struct quic_conn *qc) int i; for (i = 0; i < QUIC_MAX_NB_PTO_DGRAMS; i++) { + struct list send_list = LIST_HEAD_INIT(send_list); struct list ifrms = LIST_HEAD_INIT(ifrms); struct list hfrms = LIST_HEAD_INIT(hfrms); - struct list qels = LIST_HEAD_INIT(qels); qc_prep_hdshk_fast_retrans(qc, &ifrms, &hfrms); TRACE_DEVEL("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, &ifrms); @@ -1004,24 +807,25 @@ int qc_dgrams_retransmit(struct quic_conn *qc) ipktns->tx.pto_probe = 1; if (!LIST_ISEMPTY(&hfrms)) hpktns->tx.pto_probe = 1; - qc->iel->retrans_frms = &ifrms; + + qel_register_send(&send_list, qc->iel, &ifrms); if (qc->hel) - qc->hel->retrans_frms = &hfrms; - sret = qc_send_hdshk_pkts(qc, 1, qc->iel, qc->hel); + qel_register_send(&send_list, qc->hel, &hfrms); + + sret = qc_send(qc, 1, &send_list); qc_free_frm_list(qc, &ifrms); qc_free_frm_list(qc, &hfrms); if (!sret) goto leave; } else { - /* We are in the case where the anti-amplification limit will be - * reached after having sent this datagram or some handshake frames - * could not be allocated. There is no need to send more than one - * datagram. + /* No frame to send due to amplification limit + * or allocation failure. A PING frame will be + * emitted for probing. */ ipktns->tx.pto_probe = 1; - qc->iel->retrans_frms = &ifrms; - sret = qc_send_hdshk_pkts(qc, 0, qc->iel, NULL); + qel_register_send(&send_list, qc->iel, &ifrms); + sret = qc_send(qc, 0, &send_list); qc_free_frm_list(qc, &ifrms); qc_free_frm_list(qc, &hfrms); if (!sret) @@ -1042,14 +846,15 @@ int qc_dgrams_retransmit(struct quic_conn *qc) if (hpktns && (hpktns->flags & QUIC_FL_PKTNS_PROBE_NEEDED)) { hpktns->tx.pto_probe = 0; for (i = 0; i < QUIC_MAX_NB_PTO_DGRAMS; i++) { + struct list send_list = LIST_HEAD_INIT(send_list); struct list frms1 = LIST_HEAD_INIT(frms1); qc_prep_fast_retrans(qc, hpktns, &frms1, NULL); TRACE_DEVEL("Avail. ack eliciting frames", QUIC_EV_CONN_FRMLIST, qc, &frms1); if (!LIST_ISEMPTY(&frms1)) { hpktns->tx.pto_probe = 1; - qc->hel->retrans_frms = &frms1; - sret = qc_send_hdshk_pkts(qc, 1, qc->hel, NULL); + qel_register_send(&send_list, qc->hel, &frms1); + sret = qc_send(qc, 1, &send_list); qc_free_frm_list(qc, &frms1); if (!sret) goto leave; @@ -1060,6 +865,7 @@ int qc_dgrams_retransmit(struct quic_conn *qc) hpktns->flags &= ~QUIC_FL_PKTNS_PROBE_NEEDED; } else if (apktns && (apktns->flags & QUIC_FL_PKTNS_PROBE_NEEDED)) { + struct list send_list = LIST_HEAD_INIT(send_list); struct list frms2 = LIST_HEAD_INIT(frms2); struct list frms1 = LIST_HEAD_INIT(frms1); @@ -1070,7 +876,8 @@ int qc_dgrams_retransmit(struct quic_conn *qc) if (!LIST_ISEMPTY(&frms1)) { apktns->tx.pto_probe = 1; - sret = qc_send_app_probing(qc, &frms1); + qel_register_send(&send_list, qc->ael, &frms1); + sret = qc_send(qc, 1, &send_list); qc_free_frm_list(qc, &frms1); if (!sret) { qc_free_frm_list(qc, &frms2); @@ -1080,7 +887,8 @@ int qc_dgrams_retransmit(struct quic_conn *qc) if (!LIST_ISEMPTY(&frms2)) { apktns->tx.pto_probe = 1; - sret = qc_send_app_probing(qc, &frms2); + qel_register_send(&send_list, qc->ael, &frms2); + sret = qc_send(qc, 1, &send_list); qc_free_frm_list(qc, &frms2); if (!sret) goto leave; @@ -1173,24 +981,38 @@ int send_stateless_reset(struct listener *l, struct sockaddr_storage *dstaddr, TRACE_ENTER(QUIC_EV_STATELESS_RST); + /* RFC 9000 10.3. Stateless Reset + * + * Endpoints MUST discard packets that are too small to be valid QUIC + * packets. To give an example, with the set of AEAD functions defined + * in [QUIC-TLS], short header packets that are smaller than 21 bytes + * are never valid. + * + * [...] + * + * RFC 9000 10.3.3. Looping + * + * An endpoint MUST ensure that every Stateless Reset that it sends is + * smaller than the packet that triggered it, unless it maintains state + * sufficient to prevent looping. In the event of a loop, this results + * in packets eventually being too small to trigger a response. + */ + if (rxpkt->len <= QUIC_STATELESS_RESET_PACKET_MINLEN) { + TRACE_DEVEL("rxpkt too short", QUIC_EV_STATELESS_RST); + goto leave; + } + prx = l->bind_conf->frontend; prx_counters = EXTRA_COUNTERS_GET(prx->extra_counters_fe, &quic_stats_module); - /* 10.3 Stateless Reset (https://www.rfc-editor.org/rfc/rfc9000.html#section-10.3) - * The resulting minimum size of 21 bytes does not guarantee that a Stateless - * Reset is difficult to distinguish from other packets if the recipient requires - * the use of a connection ID. To achieve that end, the endpoint SHOULD ensure - * that all packets it sends are at least 22 bytes longer than the minimum - * connection ID length that it requests the peer to include in its packets, - * adding PADDING frames as necessary. This ensures that any Stateless Reset - * sent by the peer is indistinguishable from a valid packet sent to the endpoint. + + /* RFC 9000 10.3. Stateless Reset + * * An endpoint that sends a Stateless Reset in response to a packet that is * 43 bytes or shorter SHOULD send a Stateless Reset that is one byte shorter * than the packet it responds to. */ - - /* Note that we build at most a 42 bytes QUIC packet to mimic a short packet */ - pktlen = rxpkt->len <= 43 ? rxpkt->len - 1 : 0; - pktlen = QUIC_MAX(QUIC_STATELESS_RESET_PACKET_MINLEN, pktlen); + pktlen = rxpkt->len <= 43 ? rxpkt->len - 1 : + QUIC_STATELESS_RESET_PACKET_MINLEN; rndlen = pktlen - QUIC_STATELESS_RESET_TOKEN_LEN; /* Put a header of random bytes */ @@ -1320,7 +1142,7 @@ static inline int quic_write_uint32(unsigned char **buf, if (end - *buf < sizeof val) return 0; - *(uint32_t *)*buf = htonl(val); + write_u32(*buf, htonl(val)); *buf += sizeof val; return 1; diff --git a/src/resolvers.c b/src/resolvers.c index 3275cd2..47b0cce 100644 --- a/src/resolvers.c +++ b/src/resolvers.c @@ -28,6 +28,7 @@ #include <haproxy/check.h> #include <haproxy/cli.h> #include <haproxy/dns.h> +#include <haproxy/dns_ring.h> #include <haproxy/errors.h> #include <haproxy/fd.h> #include <haproxy/http_rules.h> @@ -36,7 +37,6 @@ #include <haproxy/protocol.h> #include <haproxy/proxy.h> #include <haproxy/resolvers.h> -#include <haproxy/ring.h> #include <haproxy/sample.h> #include <haproxy/sc_strm.h> #include <haproxy/server.h> @@ -50,6 +50,10 @@ #include <haproxy/vars.h> #include <haproxy/xxhash.h> +#if defined(USE_PROMEX) +#include <promex/promex.h> +#endif + struct list sec_resolvers = LIST_HEAD_INIT(sec_resolvers); struct list resolv_srvrq_list = LIST_HEAD_INIT(resolv_srvrq_list); @@ -92,7 +96,7 @@ enum { RSLV_STAT_END, }; -static struct name_desc resolv_stats[] = { +static struct stat_col resolv_stats[] = { [RSLV_STAT_ID] = { .name = "id", .desc = "ID" }, [RSLV_STAT_PID] = { .name = "pid", .desc = "Parent ID" }, [RSLV_STAT_SENT] = { .name = "sent", .desc = "Sent" }, @@ -114,26 +118,79 @@ static struct name_desc resolv_stats[] = { static struct dns_counters dns_counters; -static void resolv_fill_stats(void *d, struct field *stats) +static int resolv_fill_stats(void *d, struct field *stats, unsigned int *selected_field) { struct dns_counters *counters = d; - stats[RSLV_STAT_ID] = mkf_str(FO_CONFIG, counters->id); - stats[RSLV_STAT_PID] = mkf_str(FO_CONFIG, counters->pid); - stats[RSLV_STAT_SENT] = mkf_u64(FN_GAUGE, counters->sent); - stats[RSLV_STAT_SND_ERROR] = mkf_u64(FN_GAUGE, counters->snd_error); - stats[RSLV_STAT_VALID] = mkf_u64(FN_GAUGE, counters->app.resolver.valid); - stats[RSLV_STAT_UPDATE] = mkf_u64(FN_GAUGE, counters->app.resolver.update); - stats[RSLV_STAT_CNAME] = mkf_u64(FN_GAUGE, counters->app.resolver.cname); - stats[RSLV_STAT_CNAME_ERROR] = mkf_u64(FN_GAUGE, counters->app.resolver.cname_error); - stats[RSLV_STAT_ANY_ERR] = mkf_u64(FN_GAUGE, counters->app.resolver.any_err); - stats[RSLV_STAT_NX] = mkf_u64(FN_GAUGE, counters->app.resolver.nx); - stats[RSLV_STAT_TIMEOUT] = mkf_u64(FN_GAUGE, counters->app.resolver.timeout); - stats[RSLV_STAT_REFUSED] = mkf_u64(FN_GAUGE, counters->app.resolver.refused); - stats[RSLV_STAT_OTHER] = mkf_u64(FN_GAUGE, counters->app.resolver.other); - stats[RSLV_STAT_INVALID] = mkf_u64(FN_GAUGE, counters->app.resolver.invalid); - stats[RSLV_STAT_TOO_BIG] = mkf_u64(FN_GAUGE, counters->app.resolver.too_big); - stats[RSLV_STAT_TRUNCATED] = mkf_u64(FN_GAUGE, counters->app.resolver.truncated); - stats[RSLV_STAT_OUTDATED] = mkf_u64(FN_GAUGE, counters->app.resolver.outdated); + unsigned int current_field = (selected_field != NULL ? *selected_field : 0); + + for (; current_field < RSLV_STAT_END; current_field++) { + struct field metric = { 0 }; + + switch (current_field) { + case RSLV_STAT_ID: + metric = mkf_str(FO_CONFIG, counters->id); + break; + case RSLV_STAT_PID: + metric = mkf_str(FO_CONFIG, counters->pid); + break; + case RSLV_STAT_SENT: + metric = mkf_u64(FN_GAUGE, counters->sent); + break; + case RSLV_STAT_SND_ERROR: + metric = mkf_u64(FN_GAUGE, counters->snd_error); + break; + case RSLV_STAT_VALID: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.valid); + break; + case RSLV_STAT_UPDATE: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.update); + break; + case RSLV_STAT_CNAME: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.cname); + break; + case RSLV_STAT_CNAME_ERROR: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.cname_error); + break; + case RSLV_STAT_ANY_ERR: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.any_err); + break; + case RSLV_STAT_NX: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.nx); + break; + case RSLV_STAT_TIMEOUT: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.timeout); + break; + case RSLV_STAT_REFUSED: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.refused); + break; + case RSLV_STAT_OTHER: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.other); + break; + case RSLV_STAT_INVALID: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.invalid); + break; + case RSLV_STAT_TOO_BIG: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.too_big); + break; + case RSLV_STAT_TRUNCATED: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.truncated); + break; + case RSLV_STAT_OUTDATED: + metric = mkf_u64(FN_GAUGE, counters->app.resolver.outdated); + break; + default: + /* not used for frontends. If a specific metric + * is requested, return an error. Otherwise continue. + */ + if (selected_field != NULL) + return 0; + continue; + } + stats[current_field] = metric; + if (selected_field != NULL) + break; + } + return 1; } static struct stats_module rslv_stats_module = { @@ -170,6 +227,20 @@ struct resolvers *find_resolvers_by_id(const char *id) return NULL; } +/* Returns a pointer to the nameserver matching numerical <id> within <parent> + * resolver section. NULL is returned if no match is found. + */ +struct dns_nameserver *find_nameserver_by_resolvers_and_id(struct resolvers *parent, unsigned int id) +{ + struct dns_nameserver *ns; + + list_for_each_entry(ns, &parent->nameservers, list) { + if (ns->puid == id) + return ns; + } + return NULL; +} + /* Returns a pointer on the SRV request matching the name <name> for the proxy * <px>. NULL is returned if no match is found. */ @@ -645,14 +716,17 @@ static void leave_resolver_code() */ static void resolv_srvrq_cleanup_srv(struct server *srv) { + struct server_inetaddr srv_addr; + _resolv_unlink_resolution(srv->resolv_requester); HA_SPIN_LOCK(SERVER_LOCK, &srv->lock); - srvrq_update_srv_status(srv, 1); + srvrq_set_srv_down(srv); ha_free(&srv->hostname); ha_free(&srv->hostname_dn); srv->hostname_dn_len = 0; - memset(&srv->addr, 0, sizeof(srv->addr)); - srv->svc_port = 0; + memset(&srv_addr, 0, sizeof(srv_addr)); + /* unset server's addr AND port */ + server_set_inetaddr(srv, &srv_addr, SERVER_INETADDR_UPDATER_NONE, NULL); srv->flags |= SRV_F_NO_RESOLUTION; ebpt_delete(&srv->host_dn); @@ -815,12 +889,16 @@ static void resolv_check_response(struct resolv_resolution *res) srv_found: /* And update this server, if found (srv is locked here) */ if (srv) { + struct server_inetaddr srv_addr; + uint8_t ip_change = 0; + /* re-enable DNS resolution for this server by default */ srv->flags &= ~SRV_F_NO_RESOLUTION; srv->srvrq_check->expire = TICK_ETERNITY; - srv->svc_port = item->port; - srv->flags &= ~SRV_F_MAPPORTS; + server_get_inetaddr(srv, &srv_addr); + srv_addr.port.svc = item->port; + srv_addr.port.map = 0; /* Check if an Additional Record is associated to this SRV record. * Perform some sanity checks too to ensure the record can be used. @@ -833,10 +911,12 @@ srv_found: switch (item->ar_item->type) { case DNS_RTYPE_A: - srv_update_addr(srv, &item->ar_item->data.in4.sin_addr, AF_INET, "DNS additional record"); + srv_addr.family = AF_INET; + srv_addr.addr.v4 = item->ar_item->data.in4.sin_addr; break; case DNS_RTYPE_AAAA: - srv_update_addr(srv, &item->ar_item->data.in6.sin6_addr, AF_INET6, "DNS additional record"); + srv_addr.family = AF_INET6; + srv_addr.addr.v6 = item->ar_item->data.in6.sin6_addr; break; } @@ -846,8 +926,15 @@ srv_found: * It is usless to perform an extra resolution */ _resolv_unlink_resolution(srv->resolv_requester); + + ip_change = 1; } + if (ip_change) + server_set_inetaddr_warn(srv, &srv_addr, SERVER_INETADDR_UPDATER_DNS_AR); + else + server_set_inetaddr(srv, &srv_addr, SERVER_INETADDR_UPDATER_NONE, NULL); + if (!srv->hostname_dn) { const char *msg = NULL; char hostname[DNS_MAX_NAME_SIZE+1]; @@ -873,9 +960,6 @@ srv_found: resolv_link_resolution(srv, OBJ_TYPE_SERVER, 1); } - /* Update the server status */ - srvrq_update_srv_status(srv, (srv->addr.ss_family != AF_INET && srv->addr.ss_family != AF_INET6)); - if (!srv->resolv_opts.ignore_weight) { char weight[9]; int ha_weight; @@ -2487,11 +2571,11 @@ static void resolvers_destroy(struct resolvers *resolvers) fd_delete(ns->dgram->conn.t.sock.fd); close(ns->dgram->conn.t.sock.fd); } - ring_free(ns->dgram->ring_req); + dns_ring_free(ns->dgram->ring_req); free(ns->dgram); } if (ns->stream) { - ring_free(ns->stream->ring_req); + dns_ring_free(ns->stream->ring_req); task_destroy(ns->stream->task_req); task_destroy(ns->stream->task_rsp); free(ns->stream); @@ -2684,14 +2768,15 @@ static int stats_dump_resolv_to_buffer(struct stconn *sc, list_for_each_entry(mod, stat_modules, list) { struct counters_node *counters = EXTRA_COUNTERS_GET(ns->extra_counters, mod); - mod->fill_stats(counters, stats + idx); + if (!mod->fill_stats(counters, stats + idx, NULL)) + continue; idx += mod->stats_count; } if (!stats_dump_one_line(stats, idx, appctx)) return 0; - if (!stats_putchk(appctx, NULL)) + if (!stats_putchk(appctx, NULL, NULL)) goto full; return 1; @@ -2797,6 +2882,7 @@ int resolv_allocate_counters(struct list *stat_modules) if (strcmp(mod->name, "resolvers") == 0) { ns->counters = (struct dns_counters *)ns->extra_counters->data + mod->counters_off[COUNTERS_RSLV]; ns->counters->id = ns->id; + ns->counters->ns_puid = ns->puid; ns->counters->pid = resolvers->id; } } @@ -3238,7 +3324,7 @@ int check_action_do_resolve(struct act_rule *rule, struct proxy *px, char **err) void resolvers_setup_proxy(struct proxy *px) { - px->last_change = ns_to_sec(now_ns); + px->fe_counters.last_change = px->be_counters.last_change = ns_to_sec(now_ns); px->cap = PR_CAP_FE | PR_CAP_BE; px->maxconn = 0; px->conn_retries = 1; @@ -3371,7 +3457,9 @@ static int parse_resolve_conf(char **errmsg, char **warnmsg) newnameserver->parent = curr_resolvers; newnameserver->process_responses = resolv_process_responses; newnameserver->conf.line = resolv_linenum; + newnameserver->puid = curr_resolvers->nb_nameservers; LIST_APPEND(&curr_resolvers->nameservers, &newnameserver->list); + curr_resolvers->nb_nameservers++; } resolv_out: @@ -3428,6 +3516,7 @@ static int resolvers_new(struct resolvers **resolvers, const char *id, const cha r->timeout.resolve = 1000; r->timeout.retry = 1000; r->resolve_retries = 3; + r->nb_nameservers = 0; LIST_INIT(&r->nameservers); LIST_INIT(&r->resolutions.curr); LIST_INIT(&r->resolutions.wait); @@ -3572,8 +3661,10 @@ int cfg_parse_resolvers(const char *file, int linenum, char **args, int kwm) newnameserver->parent = curr_resolvers; newnameserver->process_responses = resolv_process_responses; newnameserver->conf.line = linenum; + newnameserver->puid = curr_resolvers->nb_nameservers; /* the nameservers are linked backward first */ LIST_APPEND(&curr_resolvers->nameservers, &newnameserver->list); + curr_resolvers->nb_nameservers++; } else if (strcmp(args[0], "parse-resolv-conf") == 0) { err_code |= parse_resolve_conf(&errmsg, &warnmsg); @@ -3744,14 +3835,14 @@ out: */ int resolvers_create_default() { - int err_code = 0; + int err_code = ERR_NONE; if (global.mode & MODE_MWORKER_WAIT) /* does not create the section if in wait mode */ - return 0; + return ERR_NONE; /* if the section already exists, do nothing */ if (find_resolvers_by_id("default")) - return 0; + return ERR_NONE; curr_resolvers = NULL; err_code |= resolvers_new(&curr_resolvers, "default", "<internal>", 0); @@ -3777,7 +3868,7 @@ err: /* we never return an error there, we only try to create this section * if that's possible */ - return 0; + return ERR_NONE; } int cfg_post_parse_resolvers() @@ -3811,3 +3902,70 @@ REGISTER_CONFIG_SECTION("resolvers", cfg_parse_resolvers, cfg_post_parse_re REGISTER_POST_DEINIT(resolvers_deinit); REGISTER_CONFIG_POSTPARSER("dns runtime resolver", resolvers_finalize_config); REGISTER_PRE_CHECK(resolvers_create_default); + +#if defined(USE_PROMEX) + +static int rslv_promex_metric_info(unsigned int id, struct promex_metric *metric, struct ist *desc) +{ + if (id >= RSLV_STAT_END) + return -1; + if (id == RSLV_STAT_ID || id == RSLV_STAT_PID) + return 0; + + *metric = (struct promex_metric){ .n = ist(resolv_stats[id].name), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_MODULE_METRIC }; + *desc = ist(resolv_stats[id].desc); + return 1; +} + +static void *rslv_promex_start_ts(void *unused, unsigned int id) +{ + struct resolvers *resolver = LIST_NEXT(&sec_resolvers, struct resolvers *, list); + + return LIST_NEXT(&resolver->nameservers, struct dns_nameserver *, list); +} + +static void *rslv_promex_next_ts(void *unused, void *metric_ctx, unsigned int id) +{ + struct dns_nameserver *ns = metric_ctx; + struct resolvers *resolver = ns->parent; + + ns = LIST_NEXT(&ns->list, struct dns_nameserver *, list); + if (&ns->list == &resolver->nameservers) { + resolver = LIST_NEXT(&resolver->list, struct resolvers *, list); + ns = ((&resolver->list == &sec_resolvers) + ? NULL + : LIST_NEXT(&resolver->nameservers, struct dns_nameserver *, list)); + } + return ns; +} + +static int rslv_promex_fill_ts(void *unused, void *metric_ctx, unsigned int id, struct promex_label *labels, struct field *field) +{ + struct dns_nameserver *ns = metric_ctx; + struct resolvers *resolver = ns->parent; + struct field stats[RSLV_STAT_END]; + int ret; + + labels[0].name = ist("resolver"); + labels[0].value = ist(resolver->id); + labels[1].name = ist("nameserver"); + labels[1].value = ist(ns->id); + + ret = resolv_fill_stats(ns->counters, stats, &id); + if (ret == 1) + *field = stats[id]; + return ret; +} + +static struct promex_module promex_resolver_module = { + .name = IST("resolver"), + .metric_info = rslv_promex_metric_info, + .start_ts = rslv_promex_start_ts, + .next_ts = rslv_promex_next_ts, + .fill_ts = rslv_promex_fill_ts, + .nb_metrics = RSLV_STAT_END, +}; + +INITCALL1(STG_REGISTER, promex_register_module, &promex_resolver_module); + +#endif @@ -22,11 +22,13 @@ #include <haproxy/api.h> #include <haproxy/applet.h> #include <haproxy/buf.h> +#include <haproxy/cfgparse.h> #include <haproxy/cli.h> #include <haproxy/ring.h> #include <haproxy/sc_strm.h> #include <haproxy/stconn.h> #include <haproxy/thread.h> +#include <haproxy/vecpair.h> /* context used to dump the contents of a ring via "show events" or "show errors" */ struct show_ring_ctx { @@ -35,117 +37,120 @@ struct show_ring_ctx { uint flags; /* set of RING_WF_* */ }; -/* Initialize a pre-allocated ring with the buffer area - * of size */ -void ring_init(struct ring *ring, void *area, size_t size) +/* Initialize a pre-allocated ring with the buffer area of size <size>. + * Makes the storage point to the indicated area and adjusts the declared + * ring size according to the position of the area in the storage. If <reset> + * is non-zero, the storage area is reset, otherwise it's left intact (except + * for the area origin pointer which is updated so that the area can come from + * an mmap()). + */ +void ring_init(struct ring *ring, void *area, size_t size, int reset) { - HA_RWLOCK_INIT(&ring->lock); - LIST_INIT(&ring->waiters); + MT_LIST_INIT(&ring->waiters); ring->readers_count = 0; - ring->buf = b_make(area, size, 0, 0); - /* write the initial RC byte */ - b_putchr(&ring->buf, 0); + ring->flags = 0; + ring->storage = area; + ring->pending = 0; + ring->waking = 0; + memset(&ring->queue, 0, sizeof(ring->queue)); + + if (reset) { + ring->storage->size = size - sizeof(*ring->storage); + ring->storage->rsvd = sizeof(*ring->storage); + ring->storage->head = 0; + ring->storage->tail = 0; + + /* write the initial RC byte */ + *ring->storage->area = 0; + ring->storage->tail = 1; + } } -/* Creates and returns a ring buffer of size <size> bytes. Returns NULL on - * allocation failure. +/* Creates a ring and its storage area at address <area> for <size> bytes. + * If <area> is null, then it's allocated of the requested size. The ring + * storage struct is part of the area so the usable area is slightly reduced. + * However the storage is immediately adjacent to the struct so that the ring + * remains consistent on-disk. ring_free() will ignore such ring storages and + * will only release the ring part, so the caller is responsible for releasing + * them. If <reset> is non-zero, the storage area is reset, otherwise it's left + * intact. */ -struct ring *ring_new(size_t size) +struct ring *ring_make_from_area(void *area, size_t size, int reset) { struct ring *ring = NULL; - void *area = NULL; + uint flags = 0; - if (size < 2) - goto fail; + if (size < sizeof(*ring->storage) + 2) + return NULL; ring = malloc(sizeof(*ring)); if (!ring) goto fail; - area = malloc(size); + if (!area) + area = malloc(size); + else + flags |= RING_FL_MAPPED; + if (!area) goto fail; - ring_init(ring, area, size); + ring_init(ring, area, size, reset); + ring->flags |= flags; return ring; fail: - free(area); free(ring); return NULL; } -/* Creates a unified ring + storage area at address <area> for <size> bytes. - * If <area> is null, then it's allocated of the requested size. The ring - * struct is part of the area so the usable area is slightly reduced. However - * the ring storage is immediately adjacent to the struct. ring_free() will - * ignore such rings, so the caller is responsible for releasing them. - */ -struct ring *ring_make_from_area(void *area, size_t size) -{ - struct ring *ring = NULL; - - if (size < sizeof(*ring)) - return NULL; - - if (!area) - area = malloc(size); - if (!area) - return NULL; - - ring = area; - area += sizeof(*ring); - ring_init(ring, area, size - sizeof(*ring)); - return ring; -} - -/* Cast an unified ring + storage area to a ring from <area>, without - * reinitializing the data buffer. - * - * Reinitialize the waiters and the lock. +/* Creates and returns a ring buffer of size <size> bytes. Returns NULL on + * allocation failure. The size is the area size, not the usable size. */ -struct ring *ring_cast_from_area(void *area) +struct ring *ring_new(size_t size) { - struct ring *ring = NULL; - - ring = area; - ring->buf.area = area + sizeof(*ring); - - HA_RWLOCK_INIT(&ring->lock); - LIST_INIT(&ring->waiters); - ring->readers_count = 0; - - return ring; + return ring_make_from_area(NULL, size, 1); } /* Resizes existing ring <ring> to <size> which must be larger, without losing * its contents. The new size must be at least as large as the previous one or * no change will be performed. The pointer to the ring is returned on success, - * or NULL on allocation failure. This will lock the ring for writes. + * or NULL on allocation failure. This will lock the ring for writes. The size + * is the allocated area size, and includes the ring_storage header. */ struct ring *ring_resize(struct ring *ring, size_t size) { - void *area; + struct ring_storage *old, *new; - if (b_size(&ring->buf) >= size) + if (size <= ring_data(ring) + sizeof(*ring->storage)) return ring; - area = malloc(size); - if (!area) + old = ring->storage; + new = malloc(size); + if (!new) return NULL; - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); + thread_isolate(); - /* recheck the buffer's size, it may have changed during the malloc */ - if (b_size(&ring->buf) < size) { + /* recheck the ring's size, it may have changed during the malloc */ + if (size > ring_data(ring) + sizeof(*ring->storage)) { /* copy old contents */ - b_getblk(&ring->buf, area, ring->buf.data, 0); - area = HA_ATOMIC_XCHG(&ring->buf.area, area); - ring->buf.size = size; + struct ist v1, v2; + size_t len; + + vp_ring_to_data(&v1, &v2, old->area, old->size, old->head, old->tail); + len = vp_size(v1, v2); + vp_peek_ofs(v1, v2, 0, new->area, len); + new->size = size - sizeof(*ring->storage); + new->rsvd = sizeof(*ring->storage); + new->head = 0; + new->tail = len; + new = HA_ATOMIC_XCHG(&ring->storage, new); } - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); + thread_release(); - free(area); + /* free the unused one */ + free(new); return ring; } @@ -156,10 +161,8 @@ void ring_free(struct ring *ring) return; /* make sure it was not allocated by ring_make_from_area */ - if (ring->buf.area == (void *)ring + sizeof(*ring)) - return; - - free(ring->buf.area); + if (!(ring->flags & RING_FL_MAPPED)) + free(ring->storage); free(ring); } @@ -173,12 +176,20 @@ void ring_free(struct ring *ring) */ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], size_t npfx, const struct ist msg[], size_t nmsg) { - struct buffer *buf = &ring->buf; - struct appctx *appctx; - size_t totlen = 0; + struct ring_wait_cell **ring_queue_ptr = DISGUISE(&ring->queue[ti->ring_queue].ptr); + struct ring_wait_cell cell, *next_cell, *curr_cell; + size_t *tail_ptr = &ring->storage->tail; + size_t head_ofs, tail_ofs, new_tail_ofs; + size_t ring_size; + char *ring_area; + struct ist v1, v2; + size_t msglen = 0; size_t lenlen; + size_t needed; uint64_t dellen; int dellenlen; + uint8_t *lock_ptr; + uint8_t readers; ssize_t sent = 0; int i; @@ -191,20 +202,125 @@ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], siz * copying due to the varint encoding of the length. */ for (i = 0; i < npfx; i++) - totlen += pfx[i].len; + msglen += pfx[i].len; for (i = 0; i < nmsg; i++) - totlen += msg[i].len; + msglen += msg[i].len; - if (totlen > maxlen) - totlen = maxlen; + if (msglen > maxlen) + msglen = maxlen; - lenlen = varint_bytes(totlen); + lenlen = varint_bytes(msglen); - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); - if (lenlen + totlen + 1 + 1 > b_size(buf)) - goto done_buf; + /* We need: + * - lenlen bytes for the size encoding + * - msglen for the message + * - one byte for the new marker + * + * Note that we'll also reserve one extra byte to make sure we never + * leave a full buffer (the vec-to-ring conversion cannot be done if + * both areas are of size 0). + */ + needed = lenlen + msglen + 1; - while (b_room(buf) < lenlen + totlen + 1) { + /* these ones do not change under us (only resize affects them and it + * must be done under thread isolation). + */ + ring_area = ring->storage->area; + ring_size = ring->storage->size; + + if (needed + 1 > ring_size) + goto leave; + + cell.to_send_self = needed; + cell.needed_tot = 0; // only when non-zero the cell is considered ready. + cell.maxlen = msglen; + cell.pfx = pfx; + cell.npfx = npfx; + cell.msg = msg; + cell.nmsg = nmsg; + + /* insert our cell into the queue before the previous one. We may have + * to wait a bit if the queue's leader is attempting an election to win + * the tail, hence the busy value (should be rare enough). + */ + next_cell = HA_ATOMIC_XCHG(ring_queue_ptr, &cell); + + /* let's add the cumulated size of pending messages to ours */ + cell.next = next_cell; + if (next_cell) { + size_t next_needed; + + while ((next_needed = HA_ATOMIC_LOAD(&next_cell->needed_tot)) == 0) + __ha_cpu_relax_for_read(); + needed += next_needed; + } + + /* now <needed> will represent the size to store *all* messages. The + * atomic store may unlock a subsequent thread waiting for this one. + */ + HA_ATOMIC_STORE(&cell.needed_tot, needed); + + /* OK now we're the queue leader, it's our job to try to get ownership + * of the tail, if we succeeded above, we don't even enter the loop. If + * we failed, we set ourselves at the top the queue, waiting for the + * tail to be unlocked again. We stop doing that if another thread + * comes in and becomes the leader in turn. + */ + + /* Wait for another thread to take the lead or for the tail to + * be available again. It's critical to be read-only in this + * loop so as not to lose time synchronizing cache lines. Also, + * we must detect a new leader ASAP so that the fewest possible + * threads check the tail. + */ + + while (1) { + if ((curr_cell = HA_ATOMIC_LOAD(ring_queue_ptr)) != &cell) + goto wait_for_flush; + __ha_cpu_relax_for_read(); + +#if !defined(__ARM_FEATURE_ATOMICS) + /* ARMv8.1-a has a true atomic OR and doesn't need the preliminary read */ + if ((tail_ofs = HA_ATOMIC_LOAD(tail_ptr)) & RING_TAIL_LOCK) { + __ha_cpu_relax_for_read(); + continue; + } +#endif + /* OK the queue is locked, let's attempt to get the tail lock */ + tail_ofs = HA_ATOMIC_FETCH_OR(tail_ptr, RING_TAIL_LOCK); + + /* did we get it ? */ + if (!(tail_ofs & RING_TAIL_LOCK)) { + /* Here we own the tail. We can go on if we're still the leader, + * which we'll confirm by trying to reset the queue. If we're + * still the leader, we're done. + */ + if (HA_ATOMIC_CAS(ring_queue_ptr, &curr_cell, NULL)) + break; // Won! + + /* oops, no, let's give it back to another thread and wait. + * This does not happen often enough to warrant more complex + * approaches (tried already). + */ + HA_ATOMIC_STORE(tail_ptr, tail_ofs); + goto wait_for_flush; + } + __ha_cpu_relax_for_read(); + } + + head_ofs = HA_ATOMIC_LOAD(&ring->storage->head); + + /* this is the byte before tail, it contains the users count */ + lock_ptr = (uint8_t*)ring_area + (tail_ofs > 0 ? tail_ofs - 1 : ring_size - 1); + + /* Take the lock on the area. We're guaranteed to be the only writer + * here. + */ + readers = HA_ATOMIC_XCHG(lock_ptr, RING_WRITING_SIZE); + + vp_ring_to_data(&v1, &v2, ring_area, ring_size, head_ofs, tail_ofs); + + while (vp_size(v1, v2) > ring_size - needed - 1 - 1) { /* we need to delete the oldest message (from the end), * and we have to stop if there's a reader stuck there. * Unless there's corruption in the buffer it's guaranteed @@ -212,50 +328,142 @@ ssize_t ring_write(struct ring *ring, size_t maxlen, const struct ist pfx[], siz * varint-encoded length (1 byte min) and the message * payload (0 bytes min). */ - if (*b_head(buf)) - goto done_buf; - dellenlen = b_peek_varint(buf, 1, &dellen); + if (*_vp_head(v1, v2)) + break; + dellenlen = vp_peek_varint_ofs(v1, v2, 1, &dellen); if (!dellenlen) - goto done_buf; - BUG_ON(b_data(buf) < 1 + dellenlen + dellen); - - b_del(buf, 1 + dellenlen + dellen); + break; + BUG_ON_HOT(vp_size(v1, v2) < 1 + dellenlen + dellen); + vp_skip(&v1, &v2, 1 + dellenlen + dellen); } - /* OK now we do have room */ - __b_put_varint(buf, totlen); + /* now let's update the buffer with the new tail if our message will fit */ + new_tail_ofs = tail_ofs; + if (vp_size(v1, v2) <= ring_size - needed - 1 - 1) { + vp_data_to_ring(v1, v2, ring_area, ring_size, &head_ofs, &tail_ofs); + + /* update the new space in the buffer */ + HA_ATOMIC_STORE(&ring->storage->head, head_ofs); - totlen = 0; - for (i = 0; i < npfx; i++) { - size_t len = pfx[i].len; + /* calculate next tail pointer */ + new_tail_ofs += needed; + if (new_tail_ofs >= ring_size) + new_tail_ofs -= ring_size; - if (len + totlen > maxlen) - len = maxlen - totlen; - if (len) - __b_putblk(buf, pfx[i].ptr, len); - totlen += len; + /* reset next read counter before releasing writers */ + HA_ATOMIC_STORE(ring_area + (new_tail_ofs > 0 ? new_tail_ofs - 1 : ring_size - 1), 0); + } + else { + /* release readers right now, before writing the tail, so as + * not to expose the readers count byte to another writer. + */ + HA_ATOMIC_STORE(lock_ptr, readers); } - for (i = 0; i < nmsg; i++) { - size_t len = msg[i].len; + /* and release other writers */ + HA_ATOMIC_STORE(tail_ptr, new_tail_ofs); + + vp_ring_to_room(&v1, &v2, ring_area, ring_size, (new_tail_ofs > 0 ? new_tail_ofs - 1 : ring_size - 1), tail_ofs); + + if (likely(tail_ofs != new_tail_ofs)) { + /* the list stops on a NULL */ + for (curr_cell = &cell; curr_cell; curr_cell = HA_ATOMIC_LOAD(&curr_cell->next)) { + maxlen = curr_cell->maxlen; + pfx = curr_cell->pfx; + npfx = curr_cell->npfx; + msg = curr_cell->msg; + nmsg = curr_cell->nmsg; + + /* let's write the message size */ + vp_put_varint(&v1, &v2, maxlen); + + /* then write the messages */ + msglen = 0; + for (i = 0; i < npfx; i++) { + size_t len = pfx[i].len; + + if (len + msglen > maxlen) + len = maxlen - msglen; + if (len) + vp_putblk(&v1, &v2, pfx[i].ptr, len); + msglen += len; + } + + for (i = 0; i < nmsg; i++) { + size_t len = msg[i].len; + + if (len + msglen > maxlen) + len = maxlen - msglen; + if (len) + vp_putblk(&v1, &v2, msg[i].ptr, len); + msglen += len; + } + + /* for all but the last message we need to write the + * readers count byte. + */ + if (curr_cell->next) + vp_putchr(&v1, &v2, 0); + } + + /* now release */ + for (curr_cell = &cell; curr_cell; curr_cell = next_cell) { + next_cell = HA_ATOMIC_LOAD(&curr_cell->next); + _HA_ATOMIC_STORE(&curr_cell->next, curr_cell); + } - if (len + totlen > maxlen) - len = maxlen - totlen; - if (len) - __b_putblk(buf, msg[i].ptr, len); - totlen += len; + /* unlock the message area */ + HA_ATOMIC_STORE(lock_ptr, readers); + } else { + /* messages were dropped, notify about this and release them */ + for (curr_cell = &cell; curr_cell; curr_cell = next_cell) { + next_cell = HA_ATOMIC_LOAD(&curr_cell->next); + HA_ATOMIC_STORE(&curr_cell->to_send_self, 0); + _HA_ATOMIC_STORE(&curr_cell->next, curr_cell); + } } - *b_tail(buf) = 0; buf->data++; // new read counter - sent = lenlen + totlen + 1; + /* we must not write the trailing read counter, it was already done, + * plus we could ruin the one of the next writer. And the front was + * unlocked either at the top if the ring was full, or just above if it + * could be properly filled. + */ + + sent = cell.to_send_self; /* notify potential readers */ - list_for_each_entry(appctx, &ring->waiters, wait_entry) - appctx_wakeup(appctx); + if (sent && HA_ATOMIC_LOAD(&ring->readers_count)) { + HA_ATOMIC_INC(&ring->pending); + while (HA_ATOMIC_LOAD(&ring->pending) && HA_ATOMIC_XCHG(&ring->waking, 1) == 0) { + struct mt_list *elt1, elt2; + struct appctx *appctx; + + HA_ATOMIC_STORE(&ring->pending, 0); + mt_list_for_each_entry_safe(appctx, &ring->waiters, wait_entry, elt1, elt2) + appctx_wakeup(appctx); + HA_ATOMIC_STORE(&ring->waking, 0); + } + } - done_buf: - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); + leave: return sent; + + wait_for_flush: + /* if we arrive here, it means we found another leader */ + + /* The leader will write our own pointer in the cell's next to + * mark it as released. Let's wait for this. + */ + do { + next_cell = HA_ATOMIC_LOAD(&cell.next); + } while (next_cell != &cell && __ha_cpu_relax_for_read()); + + /* OK our message was queued. Retrieving the sent size in the ring cell + * allows another leader thread to zero it if it finally couldn't send + * it (should only happen when using too small ring buffers to store + * all competing threads' messages at once). + */ + return HA_ATOMIC_LOAD(&cell.to_send_self); } /* Tries to attach appctx <appctx> as a new reader on ring <ring>. This is @@ -270,7 +478,7 @@ int ring_attach(struct ring *ring) int users = ring->readers_count; do { - if (users >= 255) + if (users >= RING_MAX_READERS) return 0; } while (!_HA_ATOMIC_CAS(&ring->readers_count, &users, users + 1)); return 1; @@ -285,20 +493,22 @@ void ring_detach_appctx(struct ring *ring, struct appctx *appctx, size_t ofs) if (!ring) return; - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); + HA_ATOMIC_DEC(&ring->readers_count); + if (ofs != ~0) { /* reader was still attached */ - if (ofs < b_head_ofs(&ring->buf)) - ofs += b_size(&ring->buf) - b_head_ofs(&ring->buf); - else - ofs -= b_head_ofs(&ring->buf); - - BUG_ON(ofs >= b_size(&ring->buf)); - LIST_DEL_INIT(&appctx->wait_entry); - HA_ATOMIC_DEC(b_peek(&ring->buf, ofs)); + uint8_t *area = (uint8_t *)ring_area(ring); + uint8_t readers; + + BUG_ON(ofs >= ring_size(ring)); + MT_LIST_DELETE(&appctx->wait_entry); + + /* dec readers count */ + do { + readers = _HA_ATOMIC_LOAD(area + ofs); + } while ((readers > RING_MAX_READERS || + !_HA_ATOMIC_CAS(area + ofs, &readers, readers - 1)) && __ha_cpu_relax()); } - HA_ATOMIC_DEC(&ring->readers_count); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); } /* Tries to attach CLI handler <appctx> as a new reader on ring <ring>. This is @@ -313,7 +523,7 @@ int ring_attach_cli(struct ring *ring, struct appctx *appctx, uint flags) if (!ring_attach(ring)) return cli_err(appctx, - "Sorry, too many watchers (255) on this ring buffer. " + "Sorry, too many watchers (" TOSTR(RING_MAX_READERS) ") on this ring buffer. " "What could it have so interesting to attract so many watchers ?"); if (!appctx->io_handler) @@ -328,36 +538,29 @@ int ring_attach_cli(struct ring *ring, struct appctx *appctx, uint flags) return 0; } -/* This function dumps all events from the ring whose pointer is in <p0> into - * the appctx's output buffer, and takes from <o0> the seek offset into the - * buffer's history (0 for oldest known event). It looks at <i0> for boolean - * options: bit0 means it must wait for new data or any key to be pressed. Bit1 - * means it must seek directly to the end to wait for new contents. It returns - * 0 if the output buffer or events are missing is full and it needs to be - * called again, otherwise non-zero. It is meant to be used with - * cli_release_show_ring() to clean up. + +/* parses as many messages as possible from ring <ring>, starting at the offset + * stored at *ofs_ptr, with RING_WF_* flags in <flags>, and passes them to + * the message handler <msg_handler>. If <last_of_ptr> is not NULL, a copy of + * the last known tail pointer will be copied there so that the caller may use + * this to detect new data have arrived since we left the function. Returns 0 + * if it needs to pause, 1 once finished. */ -int cli_io_handler_show_ring(struct appctx *appctx) +int ring_dispatch_messages(struct ring *ring, void *ctx, size_t *ofs_ptr, size_t *last_ofs_ptr, uint flags, + ssize_t (*msg_handler)(void *ctx, struct ist v1, struct ist v2, size_t ofs, size_t len)) { - struct show_ring_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); - struct ring *ring = ctx->ring; - struct buffer *buf = &ring->buf; - size_t ofs; - size_t last_ofs; + size_t head_ofs, tail_ofs, prev_ofs; + size_t ring_size; + uint8_t *ring_area; + struct ist v1, v2; uint64_t msg_len; size_t len, cnt; + ssize_t copied; + uint8_t readers; int ret; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - return 1; - - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); - LIST_DEL_INIT(&appctx->wait_entry); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); - - HA_RWLOCK_RDLOCK(RING_LOCK, &ring->lock); + ring_area = (uint8_t *)ring->storage->area; + ring_size = ring->storage->size; /* explanation for the initialization below: it would be better to do * this in the parsing function but this would occasionally result in @@ -365,59 +568,134 @@ int cli_io_handler_show_ring(struct appctx *appctx) * and keep it while being scheduled. Thus instead let's take it the * first time we enter here so that we have a chance to pass many * existing messages before grabbing a reference to a location. This - * value cannot be produced after initialization. + * value cannot be produced after initialization. The first offset + * needs to be taken under isolation as it must not move while we're + * trying to catch it. */ - if (unlikely(ctx->ofs == ~0)) { - /* going to the end means looking at tail-1 */ - ctx->ofs = b_peek_ofs(buf, (ctx->flags & RING_WF_SEEK_NEW) ? b_data(buf) - 1 : 0); - HA_ATOMIC_INC(b_orig(buf) + ctx->ofs); + if (unlikely(*ofs_ptr == ~0)) { + thread_isolate(); + + head_ofs = HA_ATOMIC_LOAD(&ring->storage->head); + tail_ofs = ring_tail(ring); + + if (flags & RING_WF_SEEK_NEW) { + /* going to the end means looking at tail-1 */ + head_ofs = tail_ofs + ring_size - 1; + if (head_ofs >= ring_size) + head_ofs -= ring_size; + } + + /* reserve our slot here (inc readers count) */ + do { + readers = _HA_ATOMIC_LOAD(ring_area + head_ofs); + } while ((readers > RING_MAX_READERS || + !_HA_ATOMIC_CAS(ring_area + head_ofs, &readers, readers + 1)) && __ha_cpu_relax()); + + thread_release(); + + /* store this precious offset in our context, and we're done */ + *ofs_ptr = head_ofs; } - /* we were already there, adjust the offset to be relative to - * the buffer's head and remove us from the counter. + /* we have the guarantee we can restart from our own head */ + head_ofs = *ofs_ptr; + BUG_ON(head_ofs >= ring_size); + + /* the tail will continue to move but we're getting a safe value + * here that will continue to work. */ - ofs = ctx->ofs - b_head_ofs(buf); - if (ctx->ofs < b_head_ofs(buf)) - ofs += b_size(buf); + tail_ofs = ring_tail(ring); - BUG_ON(ofs >= buf->size); - HA_ATOMIC_DEC(b_peek(buf, ofs)); + /* we keep track of where we were and we don't release it before + * we've protected the next place. + */ + prev_ofs = head_ofs; - /* in this loop, ofs always points to the counter byte that precedes + /* in this loop, head_ofs always points to the counter byte that precedes * the message so that we can take our reference there if we have to - * stop before the end (ret=0). + * stop before the end (ret=0). The reference is relative to the ring's + * origin, while pos is relative to the ring's head. */ ret = 1; - while (ofs + 1 < b_data(buf)) { + vp_ring_to_data(&v1, &v2, (char *)ring_area, ring_size, head_ofs, tail_ofs); + + while (1) { + if (vp_size(v1, v2) <= 1) { + /* no more data */ + break; + } + + readers = _HA_ATOMIC_LOAD(_vp_addr(v1, v2, 0)); + if (readers > RING_MAX_READERS) { + /* we just met a writer which hasn't finished */ + break; + } + cnt = 1; - len = b_peek_varint(buf, ofs + cnt, &msg_len); + len = vp_peek_varint_ofs(v1, v2, cnt, &msg_len); if (!len) break; cnt += len; - BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf)); - if (unlikely(msg_len + 1 > b_size(&trash))) { + BUG_ON(msg_len + cnt + 1 > vp_size(v1, v2)); + + copied = msg_handler(ctx, v1, v2, cnt, msg_len); + if (copied == -2) { /* too large a message to ever fit, let's skip it */ - ofs += cnt + msg_len; - continue; + goto skip; } - - chunk_reset(&trash); - len = b_getblk(buf, trash.area, msg_len, ofs + cnt); - trash.data += len; - trash.area[trash.data++] = '\n'; - - if (applet_putchk(appctx, &trash) == -1) { + else if (copied == -1) { + /* output full */ ret = 0; break; } - ofs += cnt + msg_len; + skip: + vp_skip(&v1, &v2, cnt + msg_len); + } + + vp_data_to_ring(v1, v2, (char *)ring_area, ring_size, &head_ofs, &tail_ofs); + + if (head_ofs != prev_ofs) { + /* inc readers count on new place */ + do { + readers = _HA_ATOMIC_LOAD(ring_area + head_ofs); + } while ((readers > RING_MAX_READERS || + !_HA_ATOMIC_CAS(ring_area + head_ofs, &readers, readers + 1)) && __ha_cpu_relax()); + + /* dec readers count on old place */ + do { + readers = _HA_ATOMIC_LOAD(ring_area + prev_ofs); + } while ((readers > RING_MAX_READERS || + !_HA_ATOMIC_CAS(ring_area + prev_ofs, &readers, readers - 1)) && __ha_cpu_relax()); } - HA_ATOMIC_INC(b_peek(buf, ofs)); - last_ofs = b_tail_ofs(buf); - ctx->ofs = b_peek_ofs(buf, ofs); - HA_RWLOCK_RDUNLOCK(RING_LOCK, &ring->lock); + if (last_ofs_ptr) + *last_ofs_ptr = tail_ofs; + *ofs_ptr = head_ofs; + return ret; +} + +/* This function dumps all events from the ring whose pointer is in <p0> into + * the appctx's output buffer, and takes from <o0> the seek offset into the + * buffer's history (0 for oldest known event). It looks at <i0> for boolean + * options: bit0 means it must wait for new data or any key to be pressed. Bit1 + * means it must seek directly to the end to wait for new contents. It returns + * 0 if the output buffer or events are missing is full and it needs to be + * called again, otherwise non-zero. It is meant to be used with + * cli_release_show_ring() to clean up. + */ +int cli_io_handler_show_ring(struct appctx *appctx) +{ + struct show_ring_ctx *ctx = appctx->svcctx; + struct stconn *sc = appctx_sc(appctx); + struct ring *ring = ctx->ring; + size_t last_ofs; + size_t ofs; + int ret; + + MT_LIST_DELETE(&appctx->wait_entry); + + ret = ring_dispatch_messages(ring, appctx, &ctx->ofs, &last_ofs, ctx->flags, applet_append_line); if (ret && (ctx->flags & RING_WF_WAIT_MODE)) { /* we've drained everything and are configured to wait for more @@ -425,10 +703,8 @@ int cli_io_handler_show_ring(struct appctx *appctx) */ if (!sc_oc(sc)->output && !(sc->flags & SC_FL_SHUT_DONE)) { /* let's be woken up once new data arrive */ - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); - LIST_APPEND(&ring->waiters, &appctx->wait_entry); - ofs = b_tail_ofs(&ring->buf); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); + MT_LIST_APPEND(&ring->waiters, &appctx->wait_entry); + ofs = ring_tail(ring); if (ofs != last_ofs) { /* more data was added into the ring between the * unlock and the lock, and the writer might not @@ -467,13 +743,41 @@ size_t ring_max_payload(const struct ring *ring) size_t max; /* initial max = bufsize - 1 (initial RC) - 1 (payload RC) */ - max = b_size(&ring->buf) - 1 - 1; + max = ring_size(ring) - 1 - 1; /* subtract payload VI (varint-encoded size) */ max -= varint_bytes(max); return max; } +/* config parser for global "tune.ring.queues", accepts a number from 0 to RING_WAIT_QUEUES */ +static int cfg_parse_tune_ring_queues(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + int queues; + + if (too_many_args(1, args, err, NULL)) + return -1; + + queues = atoi(args[1]); + if (queues < 0 || queues > RING_WAIT_QUEUES) { + memprintf(err, "'%s' expects a number between 0 and %d but got '%s'.", args[0], RING_WAIT_QUEUES, args[1]); + return -1; + } + + global.tune.ring_queues = queues; + return 0; +} + +/* config keyword parsers */ +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "tune.ring.queues", cfg_parse_tune_ring_queues }, + { 0, NULL, NULL } +}}; + +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); + /* * Local variables: * c-indent-level: 8 diff --git a/src/sample.c b/src/sample.c index 89de612..3e5b576 100644 --- a/src/sample.c +++ b/src/sample.c @@ -61,6 +61,21 @@ const char *smp_to_type[SMP_TYPES] = { [SMP_T_METH] = "meth", }; +/* Returns SMP_T_* smp matching with <type> name or SMP_TYPES if + * not found. + */ +int type_to_smp(const char *type) +{ + int it = 0; + + while (it < SMP_TYPES) { + if (strcmp(type, smp_to_type[it]) == 0) + break; // found + it += 1; + } + return it; +} + /* static sample used in sample_process() when <p> is NULL */ static THREAD_LOCAL struct sample temp_smp; @@ -3803,7 +3818,7 @@ static int sample_conv_ungrpc(const struct arg *arg_p, struct sample *smp, void while (grpc_left > GRPC_MSG_HEADER_SZ) { size_t grpc_msg_len, left; - grpc_msg_len = left = ntohl(*(uint32_t *)(pos + GRPC_MSG_COMPRESS_FLAG_SZ)); + grpc_msg_len = left = ntohl(read_u32(pos + GRPC_MSG_COMPRESS_FLAG_SZ)); pos += GRPC_MSG_HEADER_SZ; grpc_left -= GRPC_MSG_HEADER_SZ; @@ -4766,30 +4781,58 @@ static int smp_check_uuid(struct arg *args, char **err) if (!args[0].type) { args[0].type = ARGT_SINT; args[0].data.sint = 4; - } - else if (args[0].data.sint != 4) { - memprintf(err, "Unsupported UUID version: '%lld'", args[0].data.sint); - return 0; + } else { + switch (args[0].data.sint) { + case 4: + case 7: + break; + default: + memprintf(err, "Unsupported UUID version: '%lld'", args[0].data.sint); + return 0; + } } return 1; } -// Generate a RFC4122 UUID (default is v4 = fully random) +// Generate a RFC 9562 UUID (default is v4 = fully random) static int smp_fetch_uuid(const struct arg *args, struct sample *smp, const char *kw, void *private) { - if (args[0].data.sint == 4 || !args[0].type) { - ha_generate_uuid(&trash); - smp->data.type = SMP_T_STR; - smp->flags = SMP_F_VOL_TEST | SMP_F_MAY_CHANGE; - smp->data.u.str = trash; - return 1; + long long int type = -1; + + if (!args[0].type) { + type = 4; + } else { + type = args[0].data.sint; } - // more implementations of other uuid formats possible here - return 0; + switch (type) { + case 4: + ha_generate_uuid_v4(&trash); + break; + case 7: + ha_generate_uuid_v7(&trash); + break; + default: + return 0; + } + + smp->data.type = SMP_T_STR; + smp->flags = SMP_F_VOL_TEST | SMP_F_MAY_CHANGE; + smp->data.u.str = trash; + return 1; +} + +/* returns the uptime in seconds */ +static int +smp_fetch_uptime(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + smp->data.type = SMP_T_SINT; + smp->data.u.sint = ns_to_sec(now_ns - start_time_ns); + return 1; } + /* Check if QUIC support was compiled and was not disabled by "no-quic" global option */ static int smp_fetch_quic_enabled(const struct arg *args, struct sample *smp, const char *kw, void *private) { @@ -4915,6 +4958,30 @@ error: return 0; } +/* Server conn queueing infos - bc_{be,srv}_queue */ +static int smp_fetch_conn_queues(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct strm_logs *logs; + + if (!smp->strm) + return 0; + + smp->data.type = SMP_T_SINT; + smp->flags = 0; + + logs = &smp->strm->logs; + + if (kw[3] == 'b') { + /* bc_be_queue */ + smp->data.u.sint = logs->prx_queue_pos; + } + else { + /* bc_srv_queue */ + smp->data.u.sint = logs->srv_queue_pos; + } + return 1; +} + /* Timing events {f,bc}.timer. */ static int smp_fetch_conn_timers(const struct arg *args, struct sample *smp, const char *kw, void *private) { @@ -5029,6 +5096,9 @@ static struct sample_fetch_kw_list smp_logs_kws = {ILH, { { "txn.timer.user", smp_fetch_txn_timers, 0, NULL, SMP_T_SINT, SMP_USE_TXFIN }, /* "Tu" */ { "bc.timer.connect", smp_fetch_conn_timers, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV }, /* "Tc" */ + { "bc_be_queue", smp_fetch_conn_queues, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV }, /* "bq" */ + { "bc_srv_queue", smp_fetch_conn_queues, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV }, /* "sq" */ + { "fc.timer.handshake", smp_fetch_conn_timers, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI }, /* "Th" */ { "fc.timer.total", smp_fetch_conn_timers, 0, NULL, SMP_T_SINT, SMP_USE_SSFIN }, /* "Tt" */ @@ -5063,6 +5133,7 @@ static struct sample_fetch_kw_list smp_kws = {ILH, { { "thread", smp_fetch_thread, 0, NULL, SMP_T_SINT, SMP_USE_CONST }, { "rand", smp_fetch_rand, ARG1(0,SINT), NULL, SMP_T_SINT, SMP_USE_CONST }, { "stopping", smp_fetch_stopping, 0, NULL, SMP_T_BOOL, SMP_USE_INTRN }, + { "uptime", smp_fetch_uptime, 0, NULL, SMP_T_SINT, SMP_USE_CONST }, { "uuid", smp_fetch_uuid, ARG1(0, SINT), smp_check_uuid, SMP_T_STR, SMP_USE_CONST }, { "cpu_calls", smp_fetch_cpu_calls, 0, NULL, SMP_T_SINT, SMP_USE_INTRN }, diff --git a/src/server.c b/src/server.c index 9196fac..caf2f40 100644 --- a/src/server.c +++ b/src/server.c @@ -28,6 +28,7 @@ #include <haproxy/dict-t.h> #include <haproxy/errors.h> #include <haproxy/global.h> +#include <haproxy/guid.h> #include <haproxy/log.h> #include <haproxy/mailers.h> #include <haproxy/namespace.h> @@ -140,18 +141,10 @@ const char *srv_op_st_chg_cause(enum srv_op_st_chg_cause cause) int srv_downtime(const struct server *s) { - if ((s->cur_state != SRV_ST_STOPPED) || s->last_change >= ns_to_sec(now_ns)) // ignore negative time + if ((s->cur_state != SRV_ST_STOPPED) || s->counters.last_change >= ns_to_sec(now_ns)) // ignore negative time return s->down_time; - return ns_to_sec(now_ns) - s->last_change + s->down_time; -} - -int srv_lastsession(const struct server *s) -{ - if (s->counters.last_sess) - return ns_to_sec(now_ns) - s->counters.last_sess; - - return -1; + return ns_to_sec(now_ns) - s->counters.last_change + s->down_time; } int srv_getinter(const struct check *check) @@ -170,7 +163,7 @@ int srv_getinter(const struct check *check) /* Update server's addr:svc_port tuple in INET context * - * Must be called under thread isolation to ensure consistent readings accross + * Must be called under thread isolation to ensure consistent readings across * all threads (addr:svc_port might be read without srv lock being held). */ static void _srv_set_inetaddr_port(struct server *srv, @@ -184,6 +177,11 @@ static void _srv_set_inetaddr_port(struct server *srv, else srv->flags &= ~SRV_F_MAPPORTS; + if (srv->proxy->lbprm.update_server_eweight) { + /* some balancers (chash in particular) may use the addr in their routing decisions */ + srv->proxy->lbprm.update_server_eweight(srv); + } + if (srv->log_target && srv->log_target->type == LOG_TARGET_DGRAM) { /* server is used as a log target, manually update log target addr for DGRAM */ ipcpy(addr, srv->log_target->addr); @@ -268,7 +266,7 @@ static struct task *server_atomic_sync(struct task *task, void *context, unsigne px = proxy_find_by_id(data->server.safe.proxy_uuid, PR_CAP_BE, 0); if (!px) continue; - srv = findserver_unique_id(px, data->server.safe.puid, data->server.safe.rid); + srv = server_find_by_id_unique(px, data->server.safe.puid, data->server.safe.rid); if (!srv) continue; @@ -295,7 +293,7 @@ static struct task *server_atomic_sync(struct task *task, void *context, unsigne /* * this requires thread isolation, which is safe since we're the only * task working for the current subscription and we don't hold locks - * or ressources that other threads may depend on to complete a running + * or resources that other threads may depend on to complete a running * cycle. Note that we do this way because we assume that this event is * rather rare. */ @@ -306,9 +304,24 @@ static struct task *server_atomic_sync(struct task *task, void *context, unsigne _srv_set_inetaddr_port(srv, &new_addr, data->safe.next.port.svc, data->safe.next.port.map); - /* propagate the changes */ - if (data->safe.purge_conn) /* force connection cleanup on the given server? */ - srv_cleanup_connections(srv); + /* propagate the changes, force connection cleanup */ + if (new_addr.ss_family != AF_UNSPEC && + (srv->next_admin & SRV_ADMF_RMAINT)) { + /* server was previously put under DNS maintenance due + * to DNS error, but addr resolves again, so we must + * put it out of maintenance + */ + srv_clr_admin_flag(srv, SRV_ADMF_RMAINT); + + /* thanks to valid DNS resolution? */ + if (data->safe.updater.dns) { + chunk_reset(&trash); + chunk_printf(&trash, "Server %s/%s administratively READY thanks to valid DNS answer", srv->proxy->id, srv->id); + ha_warning("%s.\n", trash.area); + send_log(srv->proxy, LOG_NOTICE, "%s.\n", trash.area); + } + } + srv_cleanup_connections(srv); srv_set_dyncookie(srv); srv_set_addr_desc(srv, 1); } @@ -437,47 +450,25 @@ void _srv_event_hdl_prepare_state(struct event_hdl_cb_data_server_state *cb_data */ static void _srv_event_hdl_prepare_inetaddr(struct event_hdl_cb_data_server_inetaddr *cb_data, struct server *srv, - const struct sockaddr_storage *next_addr, - unsigned int next_port, uint8_t next_mapports, - uint8_t purge_conn) + const struct server_inetaddr *next_inetaddr, + struct server_inetaddr_updater updater) { - struct sockaddr_storage *prev_addr = &srv->addr; - unsigned int prev_port = srv->svc_port; - uint8_t prev_mapports = !!(srv->flags & SRV_F_MAPPORTS); + struct server_inetaddr prev_inetaddr; + + server_get_inetaddr(srv, &prev_inetaddr); /* only INET families are supported */ - BUG_ON((prev_addr->ss_family != AF_UNSPEC && - prev_addr->ss_family != AF_INET && prev_addr->ss_family != AF_INET6) || - (next_addr->ss_family != AF_UNSPEC && - next_addr->ss_family != AF_INET && next_addr->ss_family != AF_INET6)); + BUG_ON((next_inetaddr->family != AF_UNSPEC && + next_inetaddr->family != AF_INET && next_inetaddr->family != AF_INET6)); /* prev */ - cb_data->safe.prev.family = prev_addr->ss_family; - memset(&cb_data->safe.prev.addr, 0, sizeof(cb_data->safe.prev.addr)); - if (prev_addr->ss_family == AF_INET) - cb_data->safe.prev.addr.v4.s_addr = - ((struct sockaddr_in *)prev_addr)->sin_addr.s_addr; - else if (prev_addr->ss_family == AF_INET6) - memcpy(&cb_data->safe.prev.addr.v6, - &((struct sockaddr_in6 *)prev_addr)->sin6_addr, - sizeof(struct in6_addr)); - cb_data->safe.prev.port.svc = prev_port; - cb_data->safe.prev.port.map = prev_mapports; + cb_data->safe.prev = prev_inetaddr; /* next */ - cb_data->safe.next.family = next_addr->ss_family; - memset(&cb_data->safe.next.addr, 0, sizeof(cb_data->safe.next.addr)); - if (next_addr->ss_family == AF_INET) - cb_data->safe.next.addr.v4.s_addr = - ((struct sockaddr_in *)next_addr)->sin_addr.s_addr; - else if (next_addr->ss_family == AF_INET6) - memcpy(&cb_data->safe.next.addr.v6, - &((struct sockaddr_in6 *)next_addr)->sin6_addr, - sizeof(struct in6_addr)); - cb_data->safe.next.port.svc = next_port; - cb_data->safe.next.port.map = next_mapports; + cb_data->safe.next = *next_inetaddr; - cb_data->safe.purge_conn = purge_conn; + /* updater */ + cb_data->safe.updater = updater; } /* server event publishing helper: publish in both global and @@ -900,11 +891,6 @@ static int srv_parse_disabled(char **args, int *cur_arg, static int srv_parse_enabled(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err) { - if (newsrv->flags & SRV_F_DYNAMIC) { - ha_warning("Keyword 'enabled' is ignored for dynamic servers. It will be rejected from 3.0 onward."); - return 0; - } - newsrv->next_admin &= ~SRV_ADMF_CMAINT & ~SRV_ADMF_FMAINT; newsrv->next_state = SRV_ST_RUNNING; newsrv->check.state &= ~CHK_ST_PAUSED; @@ -933,6 +919,28 @@ static int srv_parse_error_limit(char **args, int *cur_arg, return 0; } +/* Parse the "guid" keyword */ +static int srv_parse_guid(char **args, int *cur_arg, + struct proxy *curproxy, struct server *newsrv, char **err) +{ + const char *guid; + char *guid_err = NULL; + + if (!*args[*cur_arg + 1]) { + memprintf(err, "'%s' : expects an argument", args[*cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + guid = args[*cur_arg + 1]; + if (guid_insert(&newsrv->obj_type, guid, &guid_err)) { + memprintf(err, "'%s': %s", args[*cur_arg], guid_err); + ha_free(&guid_err); + return ERR_ALERT | ERR_FATAL; + } + + return 0; +} + /* Parse the "ws" keyword */ static int srv_parse_ws(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err) @@ -960,6 +968,32 @@ static int srv_parse_ws(char **args, int *cur_arg, return 0; } +/* Parse the "hash-key" server keyword */ +static int srv_parse_hash_key(char **args, int *cur_arg, + struct proxy *curproxy, struct server *newsrv, char **err) +{ + if (!args[*cur_arg + 1]) { + memprintf(err, "'%s expects 'id', 'addr', or 'addr-port' value", args[*cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + if (strcmp(args[*cur_arg + 1], "id") == 0) { + newsrv->hash_key = SRV_HASH_KEY_ID; + } + else if (strcmp(args[*cur_arg + 1], "addr") == 0) { + newsrv->hash_key = SRV_HASH_KEY_ADDR; + } + else if (strcmp(args[*cur_arg + 1], "addr-port") == 0) { + newsrv->hash_key = SRV_HASH_KEY_ADDR_PORT; + } + else { + memprintf(err, "'%s' has to be 'id', 'addr', or 'addr-port'", args[*cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + return 0; +} + /* Parse the "init-addr" server keyword */ static int srv_parse_init_addr(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err) @@ -1119,6 +1153,26 @@ static int srv_parse_pool_purge_delay(char **args, int *cur_arg, struct proxy *c return 0; } +static int srv_parse_pool_conn_name(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err) +{ + char *arg; + + arg = args[*cur_arg + 1]; + if (!*arg) { + memprintf(err, "'%s' expects <value> as argument", args[*cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + ha_free(&newsrv->pool_conn_name); + newsrv->pool_conn_name = strdup(arg); + if (!newsrv->pool_conn_name) { + memprintf(err, "'%s' : out of memory", args[*cur_arg]); + return ERR_ALERT | ERR_FATAL; + } + + return 0; +} + static int srv_parse_pool_low_conn(char **args, int *cur_arg, struct proxy *curproxy, struct server *newsrv, char **err) { char *arg; @@ -1199,6 +1253,7 @@ static int srv_parse_namespace(char **args, int *cur_arg, if (strcmp(arg, "*") == 0) { /* Use the namespace associated with the connection (if present). */ newsrv->flags |= SRV_F_USE_NS_FROM_PP; + global.last_checks |= LSTCHK_SYSADM; return 0; } @@ -1217,6 +1272,7 @@ static int srv_parse_namespace(char **args, int *cur_arg, memprintf(err, "Cannot open namespace '%s'", arg); return ERR_ALERT | ERR_FATAL; } + global.last_checks |= LSTCHK_SYSADM; return 0; #else @@ -2230,9 +2286,11 @@ static struct srv_kw_list srv_kws = { "ALL", { }, { { "backup", srv_parse_backup, 0, 1, 1 }, /* Flag as backup server */ { "cookie", srv_parse_cookie, 1, 1, 1 }, /* Assign a cookie to the server */ { "disabled", srv_parse_disabled, 0, 1, 1 }, /* Start the server in 'disabled' state */ - { "enabled", srv_parse_enabled, 0, 1, 1 }, /* Start the server in 'enabled' state */ + { "enabled", srv_parse_enabled, 0, 1, 0 }, /* Start the server in 'enabled' state */ { "error-limit", srv_parse_error_limit, 1, 1, 1 }, /* Configure the consecutive count of check failures to consider a server on error */ + { "guid", srv_parse_guid, 1, 0, 1 }, /* Set global unique ID of the server */ { "ws", srv_parse_ws, 1, 1, 1 }, /* websocket protocol */ + { "hash-key", srv_parse_hash_key, 1, 1, 1 }, /* Configure how chash keys are computed */ { "id", srv_parse_id, 1, 0, 1 }, /* set id# of server */ { "init-addr", srv_parse_init_addr, 1, 1, 0 }, /* */ { "log-bufsize", srv_parse_log_bufsize, 1, 1, 0 }, /* Set the ring bufsize for log server (only for log backends) */ @@ -2251,6 +2309,7 @@ static struct srv_kw_list srv_kws = { "ALL", { }, { { "on-error", srv_parse_on_error, 1, 1, 1 }, /* Configure the action on check failure */ { "on-marked-down", srv_parse_on_marked_down, 1, 1, 1 }, /* Configure the action when a server is marked down */ { "on-marked-up", srv_parse_on_marked_up, 1, 1, 1 }, /* Configure the action when a server is marked up */ + { "pool-conn-name", srv_parse_pool_conn_name, 1, 1, 1 }, /* Define expression to identify connections in idle pool */ { "pool-low-conn", srv_parse_pool_low_conn, 1, 1, 1 }, /* Set the min number of orphan idle connecbefore being allowed to pick from other threads */ { "pool-max-conn", srv_parse_pool_max_conn, 1, 1, 1 }, /* Set the max number of orphan idle connections, -1 means unlimited */ { "pool-purge-delay", srv_parse_pool_purge_delay, 1, 1, 1 }, /* Set the time before we destroy orphan idle connections, defaults to 1s */ @@ -2290,17 +2349,19 @@ void server_recalc_eweight(struct server *sv, int must_update) struct proxy *px = sv->proxy; unsigned w; - if (ns_to_sec(now_ns) < sv->last_change || ns_to_sec(now_ns) >= sv->last_change + sv->slowstart) { - /* go to full throttle if the slowstart interval is reached */ - if (sv->next_state == SRV_ST_STARTING) + if (ns_to_sec(now_ns) < sv->counters.last_change || ns_to_sec(now_ns) >= sv->counters.last_change + sv->slowstart) { + /* go to full throttle if the slowstart interval is reached unless server is currently down */ + if ((sv->cur_state != SRV_ST_STOPPED) && (sv->next_state == SRV_ST_STARTING)) sv->next_state = SRV_ST_RUNNING; } /* We must take care of not pushing the server to full throttle during slow starts. * It must also start immediately, at least at the minimal step when leaving maintenance. */ - if ((sv->next_state == SRV_ST_STARTING) && (px->lbprm.algo & BE_LB_PROP_DYN)) - w = (px->lbprm.wdiv * (ns_to_sec(now_ns) - sv->last_change) + sv->slowstart) / sv->slowstart; + if ((sv->cur_state == SRV_ST_STOPPED) && (sv->next_state == SRV_ST_STARTING) && (px->lbprm.algo & BE_LB_PROP_DYN)) + w = 1; + else if ((sv->next_state == SRV_ST_STARTING) && (px->lbprm.algo & BE_LB_PROP_DYN)) + w = (px->lbprm.wdiv * (ns_to_sec(now_ns) - sv->counters.last_change) + sv->slowstart) / sv->slowstart; else w = px->lbprm.wdiv; @@ -2334,7 +2395,7 @@ const char *server_parse_weight_change_request(struct server *sv, w = strtol(weight_str, &end, 10); if (end == weight_str) - return "Empty weight string empty or preceded by garbage"; + return "Empty weight string empty or preceded by garbage\n"; else if (end[0] == '%' && end[1] == '\0') { if (w < 0) return "Relative weight must be positive.\n"; @@ -2348,7 +2409,7 @@ const char *server_parse_weight_change_request(struct server *sv, else if (w < 0 || w > 256) return "Absolute weight can only be between 0 and 256 inclusive.\n"; else if (end[0] != '\0') - return "Trailing garbage in weight string"; + return "Trailing garbage in weight string\n"; if (w && w != sv->iweight && !(px->lbprm.algo & BE_LB_PROP_DYN)) return "Backend is using a static LB algorithm and only accepts weights '0%' and '100%'.\n"; @@ -2360,32 +2421,6 @@ const char *server_parse_weight_change_request(struct server *sv, } /* - * Parses <addr_str> and configures <sv> accordingly. <from> precise - * the source of the change in the associated message log. - * Returns: - * - error string on error - * - NULL on success - * - * Must be called with the server lock held. - */ -const char *server_parse_addr_change_request(struct server *sv, - const char *addr_str, const char *updater) -{ - unsigned char ip[INET6_ADDRSTRLEN]; - - if (inet_pton(AF_INET6, addr_str, ip)) { - srv_update_addr(sv, ip, AF_INET6, updater); - return NULL; - } - if (inet_pton(AF_INET, addr_str, ip)) { - srv_update_addr(sv, ip, AF_INET, updater); - return NULL; - } - - return "Could not understand IP address format.\n"; -} - -/* * Must be called with the server lock held. */ const char *server_parse_maxconn_change_request(struct server *sv, @@ -2399,9 +2434,9 @@ const char *server_parse_maxconn_change_request(struct server *sv, v = strtol(maxconn_str, &end, 10); if (end == maxconn_str) - return "maxconn string empty or preceded by garbage"; + return "maxconn string empty or preceded by garbage\n"; else if (end[0] != '\0') - return "Trailing garbage in maxconn string"; + return "Trailing garbage in maxconn string\n"; if (sv->maxconn == sv->minconn) { // static maxconn sv->maxconn = sv->minconn = v; @@ -2415,42 +2450,56 @@ const char *server_parse_maxconn_change_request(struct server *sv, return NULL; } -static struct sample_expr *srv_sni_sample_parse_expr(struct server *srv, struct proxy *px, - const char *file, int linenum, char **err) +/* Interpret <expr> as sample expression. This function is reserved for + * internal server allocation. On parsing use parse_srv_expr() for extra sample + * check validity. + * + * Returns the allocated sample on success or NULL on error. + */ +struct sample_expr *_parse_srv_expr(char *expr, struct arg_list *args_px, + const char *file, int linenum, char **err) { int idx; const char *args[] = { - srv->sni_expr, + expr, NULL, }; idx = 0; - px->conf.args.ctx = ARGC_SRV; + args_px->ctx = ARGC_SRV; - return sample_parse_expr((char **)args, &idx, file, linenum, err, &px->conf.args, NULL); + return sample_parse_expr((char **)args, &idx, file, linenum, err, args_px, NULL); } -int server_parse_sni_expr(struct server *newsrv, struct proxy *px, char **err) +/* Interpret <str> if not empty as a sample expression and store it into <out>. + * Contrary to _parse_srv_expr(), fetch scope validity is checked to ensure it + * is valid on a server line context. It also updates <px> HTTP mode + * requirement depending on fetch method used. + * + * Returns 0 on success else non zero. + */ +static int parse_srv_expr(char *str, struct sample_expr **out, struct proxy *px, + char **err) { struct sample_expr *expr; - expr = srv_sni_sample_parse_expr(newsrv, px, px->conf.file, px->conf.line, err); - if (!expr) { - memprintf(err, "error detected while parsing sni expression : %s", *err); + if (!str) + return 0; + + expr = _parse_srv_expr(str, &px->conf.args, px->conf.file, px->conf.line, err); + if (!expr) return ERR_ALERT | ERR_FATAL; - } if (!(expr->fetch->val & SMP_VAL_BE_SRV_CON)) { - memprintf(err, "error detected while parsing sni expression : " - " fetch method '%s' extracts information from '%s', " + memprintf(err, "fetch method '%s' extracts information from '%s', " "none of which is available here.", - newsrv->sni_expr, sample_src_names(expr->fetch->use)); + str, sample_src_names(expr->fetch->use)); return ERR_ALERT | ERR_FATAL; } px->http_needed |= !!(expr->fetch->use & SMP_USE_HTTP_ANY); - release_sample_expr(newsrv->ssl_ctx.sni); - newsrv->ssl_ctx.sni = expr; + release_sample_expr(*out); + *out = expr; return 0; } @@ -2634,6 +2683,45 @@ int srv_prepare_for_resolution(struct server *srv, const char *hostname) return -1; } +/* Initialize default values for <srv>. Used both for dynamic servers and + * default servers. The latter are not initialized via new_server(), hence this + * function purpose. For static servers, srv_settings_cpy() is used instead + * reusing their default server instance. + */ +void srv_settings_init(struct server *srv) +{ + srv->check.inter = DEF_CHKINTR; + srv->check.fastinter = 0; + srv->check.downinter = 0; + srv->check.rise = DEF_RISETIME; + srv->check.fall = DEF_FALLTIME; + srv->check.port = 0; + + srv->agent.inter = DEF_CHKINTR; + srv->agent.fastinter = 0; + srv->agent.downinter = 0; + srv->agent.rise = DEF_AGENT_RISETIME; + srv->agent.fall = DEF_AGENT_FALLTIME; + srv->agent.port = 0; + + srv->maxqueue = 0; + srv->minconn = 0; + srv->maxconn = 0; + + srv->max_reuse = -1; + srv->max_idle_conns = -1; + srv->pool_purge_delay = 5000; + + srv->slowstart = 0; + + srv->onerror = DEF_HANA_ONERR; + srv->consecutive_errors_limit = DEF_HANA_ERRLIMIT; + + srv->uweight = srv->iweight = 1; + + LIST_INIT(&srv->pp_tlvs); +} + /* * Copy <src> server settings to <srv> server allocating * everything needed. @@ -2704,6 +2792,7 @@ void srv_settings_cpy(struct server *srv, const struct server *src, int srv_tmpl srv->minconn = src->minconn; srv->maxconn = src->maxconn; srv->slowstart = src->slowstart; + srv->hash_key = src->hash_key; srv->observe = src->observe; srv->onerror = src->onerror; srv->onmarkeddown = src->onmarkeddown; @@ -2751,6 +2840,8 @@ void srv_settings_cpy(struct server *srv, const struct server *src, int srv_tmpl srv->tcp_ut = src->tcp_ut; #endif srv->mux_proto = src->mux_proto; + if (srv->pool_conn_name) + srv->pool_conn_name = strdup(srv->pool_conn_name); srv->pool_purge_delay = src->pool_purge_delay; srv->low_idle_conns = src->low_idle_conns; srv->max_idle_conns = src->max_idle_conns; @@ -2806,7 +2897,7 @@ struct server *new_server(struct proxy *proxy) srv->rid = 0; /* rid defaults to 0 */ srv->next_state = SRV_ST_RUNNING; /* early server setup */ - srv->last_change = ns_to_sec(now_ns); + srv->counters.last_change = ns_to_sec(now_ns); srv->check.obj_type = OBJ_TYPE_CHECK; srv->check.status = HCHK_STATUS_INI; @@ -2820,6 +2911,10 @@ struct server *new_server(struct proxy *proxy) srv->agent.proxy = proxy; srv->xprt = srv->check.xprt = srv->agent.xprt = xprt_get(XPRT_RAW); + MT_LIST_INIT(&srv->sess_conns); + + guid_init(&srv->guid); + srv->extra_counters = NULL; #ifdef USE_OPENSSL HA_RWLOCK_INIT(&srv->ssl_ctx.lock); @@ -2840,6 +2935,8 @@ void srv_take(struct server *srv) /* deallocate common server parameters (may be used by default-servers) */ void srv_free_params(struct server *srv) { + struct srv_pp_tlv_list *srv_tlv = NULL; + free(srv->cookie); free(srv->rdr_pfx); free(srv->hostname); @@ -2848,6 +2945,8 @@ void srv_free_params(struct server *srv) free(srv->per_thr); free(srv->per_tgrp); free(srv->curr_idle_thr); + free(srv->pool_conn_name); + release_sample_expr(srv->pool_conn_name_expr); free(srv->resolvers_id); free(srv->addr_node.key); free(srv->lb_nodes); @@ -2858,6 +2957,14 @@ void srv_free_params(struct server *srv) if (xprt_get(XPRT_SSL) && xprt_get(XPRT_SSL)->destroy_srv) xprt_get(XPRT_SSL)->destroy_srv(srv); + + while (!LIST_ISEMPTY(&srv->pp_tlvs)) { + srv_tlv = LIST_ELEM(srv->pp_tlvs.n, struct srv_pp_tlv_list *, list); + LIST_DEL_INIT(&srv_tlv->list); + lf_expr_deinit(&srv_tlv->fmt); + ha_free(&srv_tlv->fmt_string); + ha_free(&srv_tlv); + } } /* Deallocate a server <srv> and its member. <srv> must be allocated. For @@ -2882,6 +2989,8 @@ struct server *srv_drop(struct server *srv) if (HA_ATOMIC_SUB_FETCH(&srv->refcount, 1)) goto end; + guid_remove(&srv->guid); + /* make sure we are removed from our 'next->prev_deleted' list * This doesn't require full thread isolation as we're using mt lists * However this could easily be turned into regular list if required @@ -3018,6 +3127,12 @@ static int _srv_parse_tmpl_init(struct server *srv, struct proxy *px) int i; struct server *newsrv; + /* Set the first server's ID. */ + _srv_parse_set_id_from_prefix(srv, srv->tmpl_info.prefix, srv->tmpl_info.nb_low); + srv->conf.name.key = srv->id; + ebis_insert(&curproxy->conf.used_server_name, &srv->conf.name); + + /* then create other servers from this one */ for (i = srv->tmpl_info.nb_low + 1; i <= srv->tmpl_info.nb_high; i++) { newsrv = new_server(px); if (!newsrv) @@ -3029,8 +3144,21 @@ static int _srv_parse_tmpl_init(struct server *srv, struct proxy *px) srv_settings_cpy(newsrv, srv, 1); srv_prepare_for_resolution(newsrv, srv->hostname); + /* Use sni as fallback if pool_conn_name isn't set */ + if (!newsrv->pool_conn_name && newsrv->sni_expr) { + newsrv->pool_conn_name = strdup(newsrv->sni_expr); + if (!newsrv->pool_conn_name) + goto err; + } + + if (newsrv->pool_conn_name) { + newsrv->pool_conn_name_expr = _parse_srv_expr(srv->pool_conn_name, &px->conf.args, NULL, 0, NULL); + if (!newsrv->pool_conn_name_expr) + goto err; + } + if (newsrv->sni_expr) { - newsrv->ssl_ctx.sni = srv_sni_sample_parse_expr(newsrv, px, NULL, 0, NULL); + newsrv->ssl_ctx.sni = _parse_srv_expr(srv->sni_expr, &px->conf.args, NULL, 0, NULL); if (!newsrv->ssl_ctx.sni) goto err; } @@ -3045,6 +3173,9 @@ static int _srv_parse_tmpl_init(struct server *srv, struct proxy *px) /* Linked backwards first. This will be restablished after parsing. */ newsrv->next = px->srv; px->srv = newsrv; + + newsrv->conf.name.key = newsrv->id; + ebis_insert(&curproxy->conf.used_server_name, &newsrv->conf.name); } _srv_parse_set_id_from_prefix(srv, srv->tmpl_info.prefix, srv->tmpl_info.nb_low); @@ -3316,30 +3447,18 @@ static int _srv_parse_init(struct server **srv, char **args, int *cur_arg, /* Copy default server settings to new server */ srv_settings_cpy(newsrv, &curproxy->defsrv, 0); } else { - /* Initialize dynamic server weight to 1 */ - newsrv->uweight = newsrv->iweight = 1; + srv_settings_init(newsrv); /* A dynamic server is disabled on startup */ newsrv->next_admin = SRV_ADMF_FMAINT; newsrv->next_state = SRV_ST_STOPPED; server_recalc_eweight(newsrv, 0); - - /* Set default values for checks */ - newsrv->check.inter = DEF_CHKINTR; - newsrv->check.rise = DEF_RISETIME; - newsrv->check.fall = DEF_FALLTIME; - - newsrv->agent.inter = DEF_CHKINTR; - newsrv->agent.rise = DEF_AGENT_RISETIME; - newsrv->agent.fall = DEF_AGENT_FALLTIME; } HA_SPIN_INIT(&newsrv->lock); } else { *srv = newsrv = &curproxy->defsrv; *cur_arg = 1; - newsrv->resolv_opts.family_prio = AF_INET6; - newsrv->resolv_opts.accept_duplicate_ip = 0; } free(fqdn); @@ -3426,25 +3545,6 @@ out: return err_code; } -/* This function is first intended to be used through parse_server to - * initialize a new server on startup. - */ -static int _srv_parse_sni_expr_init(char **args, int cur_arg, - struct server *srv, struct proxy *proxy, - char **errmsg) -{ - int ret; - - if (!srv->sni_expr) - return 0; - - ret = server_parse_sni_expr(srv, proxy, errmsg); - if (!ret) - return 0; - - return ret; -} - /* Server initializations finalization. * Initialize health check, agent check, SNI expression and outgoing TLVs if enabled. * Must not be called for a default server instance. @@ -3471,9 +3571,27 @@ static int _srv_parse_finalize(char **args, int cur_arg, return ERR_ALERT | ERR_FATAL; } - if ((ret = _srv_parse_sni_expr_init(args, cur_arg, srv, px, &errmsg)) != 0) { + if ((ret = parse_srv_expr(srv->sni_expr, &srv->ssl_ctx.sni, px, &errmsg))) { if (errmsg) { - ha_alert("%s\n", errmsg); + ha_alert("error detected while parsing sni expression : %s.\n", errmsg); + free(errmsg); + } + return ret; + } + + /* Use sni as fallback if pool_conn_name isn't set */ + if (!srv->pool_conn_name && srv->sni_expr) { + srv->pool_conn_name = strdup(srv->sni_expr); + if (!srv->pool_conn_name) { + ha_alert("out of memory\n"); + return ERR_ALERT | ERR_FATAL; + } + } + + if ((ret = parse_srv_expr(srv->pool_conn_name, &srv->pool_conn_name_expr, + px, &errmsg))) { + if (errmsg) { + ha_alert("error detected while parsing pool-conn-name expression : %s.\n", errmsg); free(errmsg); } return ret; @@ -3490,7 +3608,7 @@ static int _srv_parse_finalize(char **args, int cur_arg, } list_for_each_entry(srv_tlv, &srv->pp_tlvs, list) { - LIST_INIT(&srv_tlv->fmt); + lf_expr_init(&srv_tlv->fmt); if (srv_tlv->fmt_string && unlikely(!parse_logformat_string(srv_tlv->fmt_string, srv->proxy, &srv_tlv->fmt, 0, SMP_VAL_BE_SRV_CON, &errmsg))) { if (errmsg) { @@ -3562,8 +3680,13 @@ int parse_server(const char *file, int linenum, char **args, goto out; } - if (parse_flags & SRV_PARSE_TEMPLATE) + if (parse_flags & SRV_PARSE_TEMPLATE) { _srv_parse_tmpl_init(newsrv, curproxy); + } + else if (!(parse_flags & SRV_PARSE_DEFAULT_SERVER)) { + newsrv->conf.name.key = newsrv->id; + ebis_insert(&curproxy->conf.used_server_name, &newsrv->conf.name); + } /* If the server id is fixed, insert it in the proxy used_id tree. * This is needed to detect a later duplicate id via srv_parse_id. @@ -3610,6 +3733,25 @@ struct server *server_find_by_id(struct proxy *bk, int id) return curserver; } +/* + * This function finds a server with matching "<puid> x <rid>" within + * selected backend <bk>. + * Using the combination of proxy-uid + revision id ensures that the function + * will either return the server we're expecting or NULL if it has been removed + * from the proxy (<id> is unique within the list, but it is not true over the + * process lifetime as new servers may reuse the id of a previously deleted + * server). + */ +struct server *server_find_by_id_unique(struct proxy *bk, int id, uint32_t rid) +{ + struct server *curserver; + + curserver = server_find_by_id(bk, id); + if (!curserver || curserver->rid != rid) + return NULL; + return curserver; +} + /* Returns a pointer to the first server matching either name <name>, or id * if <name> starts with a '#'. NULL is returned if no match is found. * the lookup is performed in the backend <bk> @@ -3628,20 +3770,43 @@ struct server *server_find_by_name(struct proxy *bk, const char *name) curserver = NULL; if (*name == '#') { curserver = server_find_by_id(bk, atoi(name + 1)); - if (curserver) - return curserver; } else { - curserver = bk->srv; - - while (curserver && (strcmp(curserver->id, name) != 0)) - curserver = curserver->next; + struct ebpt_node *node; - if (curserver) - return curserver; + node = ebis_lookup(&bk->conf.used_server_name, name); + if (node) + curserver = container_of(node, struct server, conf.name); } - return NULL; + return curserver; +} + +/* + * This function finds a server with matching "<name> x <rid>" within + * selected backend <bk>. + * Using the combination of name + revision id ensures that the function + * will either return the server we're expecting or NULL if it has been removed + * from the proxy. For this we assume that <name> is unique within the list, + * which is the case in most setups, but in rare cases the user may have + * enforced duplicate server names in the initial config (ie: if he intends to + * use numerical IDs for identification instead). In this particular case, the + * function will not work as expected so server_find_by_id_unique() should be + * used to match a unique server instead. + * + * Just like server_find_by_id_unique(), if a server is deleted and a new server + * reuses the same name, the rid check will prevent the function from returning + * a different server from the one we were expecting to match against at a given + * time. + */ +struct server *server_find_by_name_unique(struct proxy *bk, const char *name, uint32_t rid) +{ + struct server *curserver; + + curserver = server_find_by_name(bk, name); + if (!curserver || curserver->rid != rid) + return NULL; + return curserver; } struct server *server_find_best_match(struct proxy *bk, char *name, int id, int *diff) @@ -3705,101 +3870,332 @@ struct server *server_find_best_match(struct proxy *bk, char *name, int id, int return NULL; } -/* - * update a server's current IP address. - * ip is a pointer to the new IP address, whose address family is ip_sin_family. - * ip is in network format. - * updater is a string which contains an information about the requester of the update. - * updater is used if not NULL. +/* This functions retrieves server's addr and port to fill + * <inetaddr> struct passed as argument. * - * A log line and a stderr warning message is generated based on server's backend options. - * - * Must be called with the server lock held. + * This may only be used under inet context. */ -int srv_update_addr(struct server *s, void *ip, int ip_sin_family, const char *updater) +void server_get_inetaddr(struct server *s, struct server_inetaddr *inetaddr) { - union { - struct event_hdl_cb_data_server_inetaddr addr; - struct event_hdl_cb_data_server common; - } cb_data; - struct sockaddr_storage new_addr = { }; // shut up gcc warning + struct sockaddr_storage *addr = &s->addr; + unsigned int port = s->svc_port; + uint8_t mapports = !!(s->flags & SRV_F_MAPPORTS); - /* save the new IP family & address if necessary */ - switch (ip_sin_family) { - case AF_INET: - if (s->addr.ss_family == ip_sin_family && - !memcmp(ip, &((struct sockaddr_in *)&s->addr)->sin_addr.s_addr, 4)) - return 0; - break; - case AF_INET6: - if (s->addr.ss_family == ip_sin_family && - !memcmp(ip, &((struct sockaddr_in6 *)&s->addr)->sin6_addr.s6_addr, 16)) - return 0; - break; - }; + /* only INET families are supported */ + BUG_ON((addr->ss_family != AF_UNSPEC && + addr->ss_family != AF_INET && addr->ss_family != AF_INET6)); - /* generates a log line and a warning on stderr */ - if (1) { - /* book enough space for both IPv4 and IPv6 */ - char oldip[INET6_ADDRSTRLEN]; - char newip[INET6_ADDRSTRLEN]; + inetaddr->family = addr->ss_family; + memset(&inetaddr->addr, 0, sizeof(inetaddr->addr)); - memset(oldip, '\0', INET6_ADDRSTRLEN); - memset(newip, '\0', INET6_ADDRSTRLEN); + if (addr->ss_family == AF_INET) + inetaddr->addr.v4 = + ((struct sockaddr_in *)addr)->sin_addr; + else if (addr->ss_family == AF_INET6) + inetaddr->addr.v6 = + ((struct sockaddr_in6 *)addr)->sin6_addr; - /* copy old IP address in a string */ - switch (s->addr.ss_family) { - case AF_INET: - inet_ntop(s->addr.ss_family, &((struct sockaddr_in *)&s->addr)->sin_addr, oldip, INET_ADDRSTRLEN); + inetaddr->port.svc = port; + inetaddr->port.map = mapports; +} + +/* get human readable name for server_inetaddr_updater .by struct member + */ +const char *server_inetaddr_updater_by_to_str(enum server_inetaddr_updater_by by) +{ + switch (by) { + case SERVER_INETADDR_UPDATER_BY_CLI: + return "stats socket command"; + case SERVER_INETADDR_UPDATER_BY_LUA: + return "Lua script"; + case SERVER_INETADDR_UPDATER_BY_DNS_AR: + return "DNS additional record"; + case SERVER_INETADDR_UPDATER_BY_DNS_CACHE: + return "DNS cache"; + case SERVER_INETADDR_UPDATER_BY_DNS_RESOLVER: + return "DNS resolver"; + default: + /* unknown, don't mention updater */ break; - case AF_INET6: - inet_ntop(s->addr.ss_family, &((struct sockaddr_in6 *)&s->addr)->sin6_addr, oldip, INET6_ADDRSTRLEN); + } + return NULL; +} + +/* append inetaddr updater info to chunk <out> + */ +static void _srv_append_inetaddr_updater_info(struct buffer *out, + struct server *s, + struct server_inetaddr_updater updater) +{ + switch (updater.by) { + case SERVER_INETADDR_UPDATER_BY_DNS_RESOLVER: + /* we need to report the resolver/nameserver id which is + * responsible for the update + */ + { + struct resolvers *r = s->resolvers; + struct dns_nameserver *ns; + + /* we already know that the update comes from the + * resolver section linked to the server, but we + * need to find out which nameserver handled the dns + * query + */ + BUG_ON(!r); + ns = find_nameserver_by_resolvers_and_id(r, updater.u.dns_resolver.ns_id); + BUG_ON(!ns); + chunk_appendf(out, " by '%s/%s'", r->id, ns->id); + } break; default: - strlcpy2(oldip, "(none)", sizeof(oldip)); + { + const char *by_name; + + by_name = server_inetaddr_updater_by_to_str(updater.by); + if (by_name) + chunk_appendf(out, " by '%s'", by_name); + } break; - }; + } +} - /* copy new IP address in a string */ - switch (ip_sin_family) { +/* server_set_inetaddr() helper */ +static void _addr_to_str(int family, const void *addr, char *addr_str, size_t len) +{ + memset(addr_str, 0, len); + switch (family) { case AF_INET: - inet_ntop(ip_sin_family, ip, newip, INET_ADDRSTRLEN); - break; case AF_INET6: - inet_ntop(ip_sin_family, ip, newip, INET6_ADDRSTRLEN); + inet_ntop(family, addr, addr_str, len); break; - }; + default: + strlcpy2(addr_str, "(none)", len); + break; + } +} +/* server_set_inetaddr() helper */ +static int _inetaddr_addr_cmp(const struct server_inetaddr *inetaddr, const struct sockaddr_storage *addr) +{ + struct in_addr *v4; + struct in6_addr *v6; + + if (inetaddr->family != addr->ss_family) + return 1; + + if (inetaddr->family == AF_INET) { + v4 = &((struct sockaddr_in *)addr)->sin_addr; + if (memcmp(&inetaddr->addr.v4, v4, sizeof(struct in_addr))) + return 1; + } + else if (inetaddr->family == AF_INET6) { + v6 = &((struct sockaddr_in6 *)addr)->sin6_addr; + if (memcmp(&inetaddr->addr.v6, v6, sizeof(struct in6_addr))) + return 1; + } + + return 0; // both inetaddr storage are equivalent +} + +/* This function sets a server's addr and port in inet context based on new + * inetaddr input + * + * The function first does the following, in that order: + * - checks if an update is required (new IP or port is different than current + * one) + * - check the update is allowed: + * - allow all changes if no CHECKS are configured + * - if CHECK is configured: + * - if switch to port map (SRV_F_MAPPORTS), ensure health check have their + * own ports + * - applies required changes to both ADDR and PORT if both 'required' and + * 'allowed' conditions are met. + * + * Caller can pass <msg> buffer so that it gets some information about the + * operation. It may as well provide <updater> so that messages mention that + * the update was performed on the behalf of it. + * + * <inetaddr> family may be set to UNSPEC to reset server's addr + * + * Caller must set <inetaddr>->port.map to 1 if <inetaddr>->port.svc must be + * handled as an offset + * + * The function returns 1 if an update was performed and 0 if nothing was + * changed. + */ +int server_set_inetaddr(struct server *s, + const struct server_inetaddr *inetaddr, + struct server_inetaddr_updater updater, struct buffer *msg) +{ + union { + struct event_hdl_cb_data_server_inetaddr addr; + struct event_hdl_cb_data_server common; + } cb_data; + char addr_str[INET6_ADDRSTRLEN]; + uint16_t current_port; + uint8_t ip_change = 0; + uint8_t port_change = 0; + int ret = 0; + + /* only INET families are supported */ + BUG_ON((inetaddr->family != AF_UNSPEC && + inetaddr->family != AF_INET && inetaddr->family != AF_INET6) || + (s->addr.ss_family != AF_UNSPEC && + s->addr.ss_family != AF_INET && s->addr.ss_family != AF_INET6)); + + /* ignore if no change */ + if (!_inetaddr_addr_cmp(inetaddr, &s->addr)) + goto port; + + ip_change = 1; + + /* update report for caller */ + if (msg) { + void *from_ptr = NULL; + + if (s->addr.ss_family == AF_INET) + from_ptr = &((struct sockaddr_in *)&s->addr)->sin_addr; + else if (s->addr.ss_family == AF_INET6) + from_ptr = &((struct sockaddr_in6 *)&s->addr)->sin6_addr; - /* save log line into a buffer */ - chunk_printf(&trash, "%s/%s changed its IP from %s to %s by %s", - s->proxy->id, s->id, oldip, newip, updater); + _addr_to_str(s->addr.ss_family, from_ptr, addr_str, sizeof(addr_str)); + chunk_printf(msg, "IP changed from '%s'", addr_str); + _addr_to_str(inetaddr->family, &inetaddr->addr, addr_str, sizeof(addr_str)); + chunk_appendf(msg, " to '%s'", addr_str); + } + + if (inetaddr->family == AF_UNSPEC) + goto out; // ignore port information when unsetting addr + + port: + /* collection data currently setup */ + current_port = s->svc_port; + + /* check if caller triggers a port mapped or offset */ + if (inetaddr->port.map) { + /* check if server currently uses port map */ + if (!(s->flags & SRV_F_MAPPORTS)) { + /* we're switching from a fixed port to a SRV_F_MAPPORTS + * (mapped) port, prevent PORT change if check is enabled + * and it doesn't have it's dedicated port while switching + * to port mapping + */ + if ((s->check.state & CHK_ST_ENABLED) && !s->check.port) { + if (msg) { + if (ip_change) + chunk_appendf(msg, ", "); + chunk_appendf(msg, "can't change <port> to port map because it is incompatible with current health check port configuration (use 'port' statement from the 'server' directive)."); + } + goto out; + } + /* switch from fixed port to port map mandatorily triggers + * a port change + */ + port_change = 1; + } + /* else we're already using port maps */ + else { + port_change = current_port != inetaddr->port.svc; + } + } + /* fixed port */ + else { + if ((s->flags & SRV_F_MAPPORTS)) + port_change = 1; // changing from mapped to fixed + else + port_change = current_port != inetaddr->port.svc; + } + + /* update response message about PORT change */ + if (port_change && msg) { + if (ip_change) + chunk_appendf(msg, ", "); + + chunk_appendf(msg, "port changed from '"); + if (s->flags & SRV_F_MAPPORTS) + chunk_appendf(msg, "+"); + + chunk_appendf(msg, "%d' to '", s->svc_port); + if (inetaddr->port.map) + chunk_appendf(msg, "+"); + chunk_appendf(msg, "%d'", inetaddr->port.svc); + } + + out: + if (ip_change || port_change) { + _srv_event_hdl_prepare(&cb_data.common, s, 0); + _srv_event_hdl_prepare_inetaddr(&cb_data.addr, s, + inetaddr, + updater); + + /* server_atomic_sync_task will apply the changes for us */ + _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_INETADDR, cb_data, s); + + ret = 1; + } + + if (ret && msg && updater.by != SERVER_INETADDR_UPDATER_BY_NONE) + _srv_append_inetaddr_updater_info(msg, s, updater); + return ret; +} + +/* Sets new server's addr and/or svc_port, then send a log and report a + * warning on stderr if something has changed. + * + * Returns 1 if something has changed, 0 otherwise. + * see server_set_inetaddr() for more information. + */ +int server_set_inetaddr_warn(struct server *s, + const struct server_inetaddr *inetaddr, + struct server_inetaddr_updater updater) +{ + struct buffer *msg = get_trash_chunk(); + int ret; + + chunk_reset(msg); + + ret = server_set_inetaddr(s, inetaddr, updater, msg); + if (msg->data) { /* write the buffer on stderr */ - ha_warning("%s.\n", trash.area); + ha_warning("%s/%s: %s.\n", s->proxy->id, s->id, msg->area); /* send a log */ - send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.area); + send_log(s->proxy, LOG_NOTICE, "%s/%s: %s.\n", s->proxy->id, s->id, msg->area); } + return ret; +} + +/* + * update a server's current IP address. + * ip is a pointer to the new IP address, whose address family is ip_sin_family. + * ip is in network format. + * updater is a string which contains an information about the requester of the update. + * updater is used if not NULL. + * + * A log line and a stderr warning message is generated based on server's backend options. + * + * Must be called with the server lock held. + */ +int srv_update_addr(struct server *s, void *ip, int ip_sin_family, struct server_inetaddr_updater updater) +{ + struct server_inetaddr inetaddr; + + server_get_inetaddr(s, &inetaddr); + BUG_ON(ip_sin_family != AF_INET && ip_sin_family != AF_INET6); /* save the new IP family */ - new_addr.ss_family = ip_sin_family; + inetaddr.family = ip_sin_family; /* save the new IP address */ switch (ip_sin_family) { case AF_INET: - memcpy(&((struct sockaddr_in *)&new_addr)->sin_addr.s_addr, ip, 4); + memcpy(&inetaddr.addr.v4, ip, 4); break; case AF_INET6: - memcpy(((struct sockaddr_in6 *)&new_addr)->sin6_addr.s6_addr, ip, 16); + memcpy(&inetaddr.addr.v6, ip, 16); break; }; - _srv_event_hdl_prepare(&cb_data.common, s, 0); - _srv_event_hdl_prepare_inetaddr(&cb_data.addr, s, - &new_addr, s->svc_port, !!(s->flags & SRV_F_MAPPORTS), - 0); - - /* server_atomic_sync_task will apply the changes for us */ - _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_INETADDR, cb_data, s); + server_set_inetaddr_warn(s, &inetaddr, updater); return 0; } @@ -3906,40 +4302,37 @@ out: /* * This function update a server's addr and port only for AF_INET and AF_INET6 families. * - * Caller can pass its name through <updater> to get it integrated in the response message - * returned by the function. + * Caller can pass its info through <updater> to get it integrated in the response + * message returned by the function. * * The function first does the following, in that order: + * - checks that don't switch from/to a family other than AF_INET and AF_INET6 * - validates the new addr and/or port - * - checks if an update is required (new IP or port is different than current ones) - * - checks the update is allowed: - * - don't switch from/to a family other than AF_INET4 and AF_INET6 - * - allow all changes if no CHECKS are configured - * - if CHECK is configured: - * - if switch to port map (SRV_F_MAPPORTS), ensure health check have their own ports - * - applies required changes to both ADDR and PORT if both 'required' and 'allowed' - * conditions are met + * - calls server_set_inetaddr() to check and apply the change * * Must be called with the server lock held. */ -const char *srv_update_addr_port(struct server *s, const char *addr, const char *port, char *updater) +const char *srv_update_addr_port(struct server *s, const char *addr, const char *port, + struct server_inetaddr_updater updater) { - union { - struct event_hdl_cb_data_server_inetaddr addr; - struct event_hdl_cb_data_server common; - } cb_data; struct sockaddr_storage sa; - int ret; - char current_addr[INET6_ADDRSTRLEN]; - uint16_t current_port, new_port = 0; + struct server_inetaddr inetaddr; struct buffer *msg; - int ip_change = 0; - int port_change = 0; - uint8_t mapports = !!(s->flags & SRV_F_MAPPORTS); + int ret; msg = get_trash_chunk(); chunk_reset(msg); + /* even a simple port change is not supported outside of inet context, because + * s->svc_port is only relevant under inet context + */ + if ((s->addr.ss_family != AF_INET) && (s->addr.ss_family != AF_INET6)) { + chunk_printf(msg, "Update for the current server address family is only supported through configuration file."); + goto out; + } + + server_get_inetaddr(s, &inetaddr); + if (addr) { memset(&sa, 0, sizeof(struct sockaddr_storage)); if (str2ip2(addr, &sa, 0) == NULL) { @@ -3953,40 +4346,24 @@ const char *srv_update_addr_port(struct server *s, const char *addr, const char goto out; } - /* collecting data currently setup */ - memset(current_addr, '\0', sizeof(current_addr)); - ret = addr_to_str(&s->addr, current_addr, sizeof(current_addr)); - /* changes are allowed on AF_INET* families only */ - if ((ret != AF_INET) && (ret != AF_INET6)) { - chunk_printf(msg, "Update for the current server address family is only supported through configuration file"); - goto out; - } - - /* applying ADDR changes if required and allowed - * ipcmp returns 0 when both ADDR are the same - */ - if (ipcmp(&s->addr, &sa, 0) == 0) { - chunk_appendf(msg, "no need to change the addr"); - goto port; + inetaddr.family = sa.ss_family; + switch (inetaddr.family) { + case AF_INET: + inetaddr.addr.v4 = ((struct sockaddr_in *)&sa)->sin_addr; + break; + case AF_INET6: + inetaddr.addr.v6 = ((struct sockaddr_in6 *)&sa)->sin6_addr; + break; } - ip_change = 1; - - /* update report for caller */ - chunk_printf(msg, "IP changed from '%s' to '%s'", current_addr, addr); } - port: if (port) { + uint16_t new_port; char sign = '\0'; char *endptr; - if (addr) - chunk_appendf(msg, ", "); - - /* collecting data currently setup */ - current_port = s->svc_port; - sign = *port; + errno = 0; new_port = strtol(port, &endptr, 10); if ((errno != 0) || (port == endptr)) { @@ -3995,98 +4372,46 @@ const char *srv_update_addr_port(struct server *s, const char *addr, const char } /* check if caller triggers a port mapped or offset */ - if (sign == '-' || (sign == '+')) { - /* check if server currently uses port map */ - if (!(s->flags & SRV_F_MAPPORTS)) { - /* check is configured - * we're switching from a fixed port to a SRV_F_MAPPORTS (mapped) port - * prevent PORT change if check doesn't have it's dedicated port while switching - * to port mapping */ - if (!s->check.port) { - chunk_appendf(msg, "can't change <port> to port map because it is incompatible with current health check port configuration (use 'port' statement from the 'server' directive."); - goto out; - } - /* switch from fixed port to port map mandatorily triggers - * a port change */ - port_change = 1; - } - /* we're already using port maps */ - else { - port_change = current_port != new_port; - } - } - /* fixed port */ - else { - port_change = current_port != new_port; - } - - /* applying PORT changes if required and update response message */ - if (port_change) { - uint16_t new_port_print = new_port; - - /* prepare message */ - chunk_appendf(msg, "port changed from '"); - if (s->flags & SRV_F_MAPPORTS) - chunk_appendf(msg, "+"); - chunk_appendf(msg, "%d' to '", current_port); - - if (sign == '-') { - mapports = 1; - chunk_appendf(msg, "%c", sign); - /* just use for result output */ - new_port_print = -new_port_print; - } - else if (sign == '+') { - mapports = 1; - chunk_appendf(msg, "%c", sign); - } - else { - mapports = 0; - } - - chunk_appendf(msg, "%d'", new_port_print); - } - else { - chunk_appendf(msg, "no need to change the port"); - } + if (sign == '-' || sign == '+') + inetaddr.port.map = 1; + else + inetaddr.port.map = 0; + + inetaddr.port.svc = new_port; + + /* note: negative offset was converted to positive offset + * (new_port is unsigned) to prevent later conversions errors + * since svc_port is handled as an unsigned int all along the + * chain. Unfortunately this is a one-way operation so the user + * could be surprised to see a negative offset reported using + * its equivalent positive offset in the generated message + * (-X = +(65535 - (X-1))), but thanks to proper wraparound it + * will be interpreted as a negative offset during port + * remapping so it will work as expected. + */ } -out: - if (ip_change || port_change) { - _srv_event_hdl_prepare(&cb_data.common, s, 0); - _srv_event_hdl_prepare_inetaddr(&cb_data.addr, s, - ((ip_change) ? &sa : &s->addr), - ((port_change) ? new_port : s->svc_port), mapports, - 1); + ret = server_set_inetaddr(s, &inetaddr, updater, msg); + if (!ret) + chunk_printf(msg, "nothing changed"); - /* server_atomic_sync_task will apply the changes for us */ - _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_INETADDR, cb_data, s); - } - if (updater) - chunk_appendf(msg, " by '%s'", updater); - chunk_appendf(msg, "\n"); + out: return msg->area; } /* - * update server status based on result of SRV resolution + * put the server in maintenance because of failing SRV resolution * returns: - * 0 if server status is updated + * 0 if server was put under maintenance * 1 if server status has not changed * * Must be called with the server lock held. */ -int srvrq_update_srv_status(struct server *s, int has_no_ip) +int srvrq_set_srv_down(struct server *s) { if (!s->srvrq) return 1; - /* since this server has an IP, it can go back in production */ - if (has_no_ip == 0) { - srv_clr_admin_flag(s, SRV_ADMF_RMAINT); - return 1; - } - if (s->next_admin & SRV_ADMF_RMAINT) return 1; @@ -4095,59 +4420,46 @@ int srvrq_update_srv_status(struct server *s, int has_no_ip) } /* - * update server status based on result of name resolution + * put server under maintenance as a result of name resolution * returns: - * 0 if server status is updated + * 0 if server was put under maintenance * 1 if server status has not changed * * Must be called with the server lock held. */ -int snr_update_srv_status(struct server *s, int has_no_ip) +int snr_set_srv_down(struct server *s) { struct resolvers *resolvers = s->resolvers; struct resolv_resolution *resolution = (s->resolv_requester ? s->resolv_requester->resolution : NULL); int exp; + /* server already under maintenance */ + if (s->next_admin & SRV_ADMF_RMAINT) + goto out; + /* If resolution is NULL we're dealing with SRV records Additional records */ if (resolution == NULL) - return srvrq_update_srv_status(s, has_no_ip); + return srvrq_set_srv_down(s); switch (resolution->status) { case RSLV_STATUS_NONE: /* status when HAProxy has just (re)started. * Nothing to do, since the task is already automatically started */ - break; + goto out; case RSLV_STATUS_VALID: /* - * resume health checks - * server will be turned back on if health check is safe + * valid resolution but no usable server address */ - if (has_no_ip) { - if (s->next_admin & SRV_ADMF_RMAINT) - return 1; - srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_NOIP); - return 0; - } - - if (!(s->next_admin & SRV_ADMF_RMAINT)) - return 1; - srv_clr_admin_flag(s, SRV_ADMF_RMAINT); - chunk_printf(&trash, "Server %s/%s administratively READY thanks to valid DNS answer", - s->proxy->id, s->id); - - ha_warning("%s.\n", trash.area); - send_log(s->proxy, LOG_NOTICE, "%s.\n", trash.area); + srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_NOIP); return 0; case RSLV_STATUS_NX: /* stop server if resolution is NX for a long enough period */ exp = tick_add(resolution->last_valid, resolvers->hold.nx); if (!tick_is_expired(exp, now_ms)) - break; + goto out; // not yet expired - if (s->next_admin & SRV_ADMF_RMAINT) - return 1; srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_NX); return 0; @@ -4155,10 +4467,8 @@ int snr_update_srv_status(struct server *s, int has_no_ip) /* stop server if resolution is TIMEOUT for a long enough period */ exp = tick_add(resolution->last_valid, resolvers->hold.timeout); if (!tick_is_expired(exp, now_ms)) - break; + goto out; // not yet expired - if (s->next_admin & SRV_ADMF_RMAINT) - return 1; srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_TIMEOUT); return 0; @@ -4166,10 +4476,8 @@ int snr_update_srv_status(struct server *s, int has_no_ip) /* stop server if resolution is REFUSED for a long enough period */ exp = tick_add(resolution->last_valid, resolvers->hold.refused); if (!tick_is_expired(exp, now_ms)) - break; + goto out; // not yet expired - if (s->next_admin & SRV_ADMF_RMAINT) - return 1; srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_REFUSED); return 0; @@ -4177,14 +4485,13 @@ int snr_update_srv_status(struct server *s, int has_no_ip) /* stop server if resolution failed for a long enough period */ exp = tick_add(resolution->last_valid, resolvers->hold.other); if (!tick_is_expired(exp, now_ms)) - break; + goto out; // not yet expired - if (s->next_admin & SRV_ADMF_RMAINT) - return 1; srv_set_admin_flag(s, SRV_ADMF_RMAINT, SRV_ADM_STCHGC_DNS_UNSPEC); return 0; } + out: return 1; } @@ -4210,7 +4517,6 @@ int snr_resolution_cb(struct resolv_requester *requester, struct dns_counters *c void *serverip, *firstip; short server_sin_family, firstip_sin_family; int ret; - struct buffer *chk = get_trash_chunk(); int has_no_ip = 0; s = objt_server(requester->owner); @@ -4269,12 +4575,6 @@ int snr_resolution_cb(struct resolv_requester *requester, struct dns_counters *c has_no_ip = 1; goto update_status; - case RSLV_UPD_NAME_ERROR: - /* update resolution status to OTHER error type */ - resolution->status = RSLV_STATUS_OTHER; - has_no_ip = 1; - goto update_status; - default: has_no_ip = 1; goto invalid; @@ -4285,15 +4585,21 @@ int snr_resolution_cb(struct resolv_requester *requester, struct dns_counters *c if (counters) { counters->app.resolver.update++; /* save the first ip we found */ - chunk_printf(chk, "%s/%s", counters->pid, counters->id); + srv_update_addr(s, firstip, firstip_sin_family, + SERVER_INETADDR_UPDATER_DNS_RESOLVER(counters->ns_puid)); } else - chunk_printf(chk, "DNS cache"); - srv_update_addr(s, firstip, firstip_sin_family, (char *) chk->area); + srv_update_addr(s, firstip, firstip_sin_family, SERVER_INETADDR_UPDATER_DNS_CACHE); update_status: - if (!snr_update_srv_status(s, has_no_ip) && has_no_ip) - memset(&s->addr, 0, sizeof(s->addr)); + if (has_no_ip && !snr_set_srv_down(s)) { + struct server_inetaddr srv_addr; + + /* unset server's addr, keep port */ + server_get_inetaddr(s, &srv_addr); + memset(&srv_addr.addr, 0, sizeof(srv_addr.addr)); + server_set_inetaddr(s, &srv_addr, SERVER_INETADDR_UPDATER_NONE, NULL); + } return 1; invalid: @@ -4301,8 +4607,14 @@ int snr_resolution_cb(struct resolv_requester *requester, struct dns_counters *c counters->app.resolver.invalid++; goto update_status; } - if (!snr_update_srv_status(s, has_no_ip) && has_no_ip) - memset(&s->addr, 0, sizeof(s->addr)); + if (has_no_ip && !snr_set_srv_down(s)) { + struct server_inetaddr srv_addr; + + /* unset server's addr, keep port */ + server_get_inetaddr(s, &srv_addr); + memset(&srv_addr.addr, 0, sizeof(srv_addr.addr)); + server_set_inetaddr(s, &srv_addr, SERVER_INETADDR_UPDATER_NONE, NULL); + } return 0; } @@ -4382,8 +4694,13 @@ int snr_resolution_error_cb(struct resolv_requester *requester, int error_code) return 0; HA_SPIN_LOCK(SERVER_LOCK, &s->lock); - if (!snr_update_srv_status(s, 1)) { - memset(&s->addr, 0, sizeof(s->addr)); + if (!snr_set_srv_down(s)) { + struct server_inetaddr srv_addr; + + /* unset server's addr, keep port */ + server_get_inetaddr(s, &srv_addr); + memset(&srv_addr.addr, 0, sizeof(srv_addr.addr)); + server_set_inetaddr(s, &srv_addr, SERVER_INETADDR_UPDATER_NONE, NULL); HA_SPIN_UNLOCK(SERVER_LOCK, &s->lock); resolv_detach_from_resolution_answer_items(requester->resolution, requester); return 0; @@ -4739,16 +5056,16 @@ struct server *cli_find_server(struct appctx *appctx, char *arg) be_name = istsplit(&sv_name, '/'); if (!istlen(sv_name)) { - cli_err(appctx, "Require 'backend/server'."); + cli_err(appctx, "Require 'backend/server'.\n"); return NULL; } if (!(px = proxy_be_by_name(ist0(be_name)))) { - cli_err(appctx, "No such backend."); + cli_err(appctx, "No such backend.\n"); return NULL; } if (!(sv = server_find_by_name(px, ist0(sv_name)))) { - cli_err(appctx, "No such server."); + cli_err(appctx, "No such server.\n"); return NULL; } @@ -4915,10 +5232,9 @@ static int cli_parse_set_server(char **args, char *payload, struct appctx *appct port = args[6]; } HA_SPIN_LOCK(SERVER_LOCK, &sv->lock); - warning = srv_update_addr_port(sv, addr, port, "stats socket command"); + warning = srv_update_addr_port(sv, addr, port, SERVER_INETADDR_UPDATER_CLI); if (warning) cli_msg(appctx, LOG_WARNING, warning); - srv_clr_admin_flag(sv, SRV_ADMF_RMAINT); HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock); } else if (strcmp(args[3], "fqdn") == 0) { @@ -4994,12 +5310,12 @@ static int cli_parse_get_weight(char **args, char *payload, struct appctx *appct be_name = istsplit(&sv_name, '/'); if (!istlen(sv_name)) - return cli_err(appctx, "Require 'backend/server'."); + return cli_err(appctx, "Require 'backend/server'.\n"); if (!(be = proxy_be_by_name(ist0(be_name)))) - return cli_err(appctx, "No such backend."); + return cli_err(appctx, "No such backend.\n"); if (!(sv = server_find_by_name(be, ist0(sv_name)))) - return cli_err(appctx, "No such server."); + return cli_err(appctx, "No such server.\n"); /* return server's effective weight at the moment */ snprintf(trash.area, trash.size, "%d (initial %d)\n", sv->uweight, @@ -5234,7 +5550,7 @@ static int srv_alloc_lb(struct server *sv, struct proxy *be) /* updates the server's weight during a warmup stage. Once the final weight is * reached, the task automatically stops. Note that any server status change - * must have updated s->last_change accordingly. + * must have updated s->counters.last_change accordingly. */ static struct task *server_warmup(struct task *t, void *context, unsigned int state) { @@ -5290,7 +5606,7 @@ static int init_srv_slowstart(struct server *srv) if (srv->next_state == SRV_ST_STARTING) { task_schedule(srv->warmup, tick_add(now_ms, - MS_TO_TICKS(MAX(1000, (ns_to_sec(now_ns) - srv->last_change)) / 20))); + MS_TO_TICKS(MAX(1000, (ns_to_sec(now_ns) - srv->counters.last_change)) / 20))); } } @@ -5352,19 +5668,19 @@ static int cli_parse_add_server(char **args, char *payload, struct appctx *appct } if (!*sv_name) - return cli_err(appctx, "Require 'backend/server'."); + return cli_err(appctx, "Require 'backend/server'.\n"); be = proxy_be_by_name(be_name); if (!be) - return cli_err(appctx, "No such backend."); + return cli_err(appctx, "No such backend.\n"); if (!(be->lbprm.algo & BE_LB_PROP_DYN)) { - cli_err(appctx, "Backend must use a dynamic load balancing to support dynamic servers."); + cli_err(appctx, "Backend must use a dynamic load balancing to support dynamic servers.\n"); return 1; } if (be->mode == PR_MODE_SYSLOG) { - cli_err(appctx," Dynamic servers cannot be used with log backends."); + cli_err(appctx," Dynamic servers cannot be used with log backends.\n"); return 1; } @@ -5554,11 +5870,11 @@ static int cli_parse_add_server(char **args, char *payload, struct appctx *appct */ if (srv->check.state & CHK_ST_CONFIGURED) { if (!start_check_task(&srv->check, 0, 1, 1)) - ha_alert("System might be unstable, consider to execute a reload"); + ha_alert("System might be unstable, consider to execute a reload\n"); } if (srv->agent.state & CHK_ST_CONFIGURED) { if (!start_check_task(&srv->agent, 0, 1, 1)) - ha_alert("System might be unstable, consider to execute a reload"); + ha_alert("System might be unstable, consider to execute a reload\n"); } if (srv->cklen && be->mode != PR_MODE_HTTP) @@ -5594,6 +5910,72 @@ out: return 1; } +/* Check if the server <bename>/<svname> exists and is ready for being deleted. + * Both <bename> and <svname> must be valid strings. This must be called under + * thread isolation. If pb/ps are not null, upon success, the pointer to + * the backend and server respectively will be put there. If pm is not null, + * a pointer to an error/success message is returned there (possibly NULL if + * nothing to say). Returned values: + * >0 if OK + * 0 if not yet (should wait if it can) + * <0 if not possible + */ +int srv_check_for_deletion(const char *bename, const char *svname, struct proxy **pb, struct server **ps, const char **pm) +{ + struct server *srv = NULL; + struct proxy *be = NULL; + const char *msg = NULL; + int ret; + + /* First, unrecoverable errors */ + ret = -1; + + if (!(be = proxy_be_by_name(bename))) { + msg = "No such backend."; + goto leave; + } + + if (!(srv = server_find_by_name(be, svname))) { + msg = "No such server."; + goto leave; + } + + if (srv->flags & SRV_F_NON_PURGEABLE) { + msg = "This server cannot be removed at runtime due to other configuration elements pointing to it."; + goto leave; + } + + /* Only servers in maintenance can be deleted. This ensures that the + * server is not present anymore in the lb structures (through + * lbprm.set_server_status_down). + */ + if (!(srv->cur_admin & SRV_ADMF_MAINT)) { + msg = "Only servers in maintenance mode can be deleted."; + goto leave; + } + + /* Second, conditions that may change over time */ + ret = 0; + + /* Ensure that there is no active/pending connection on the server. */ + if (srv->curr_used_conns || + !eb_is_empty(&srv->queue.head) || srv_has_streams(srv)) { + msg = "Server still has connections attached to it, cannot remove it."; + goto leave; + } + + /* OK, let's go */ + ret = 1; +leave: + if (pb) + *pb = be; + if (ps) + *ps = srv; + if (pm) + *pm = msg; + return ret; +} + /* Parse a "del server" command * Returns 0 if the server has been successfully initialized, 1 on failure. */ @@ -5603,6 +5985,10 @@ static int cli_parse_delete_server(char **args, char *payload, struct appctx *ap struct server *srv; struct server *prev_del; struct ist be_name, sv_name; + struct mt_list *elt1, elt2; + struct sess_priv_conns *sess_conns = NULL; + const char *msg; + int ret, i; if (!cli_has_level(appctx, ACCESS_LVL_ADMIN)) return 1; @@ -5620,42 +6006,71 @@ static int cli_parse_delete_server(char **args, char *payload, struct appctx *ap sv_name = ist(args[1]); be_name = istsplit(&sv_name, '/'); if (!istlen(sv_name)) { - cli_err(appctx, "Require 'backend/server'."); + cli_err(appctx, "Require 'backend/server'.\n"); goto out; } - if (!(be = proxy_be_by_name(ist0(be_name)))) { - cli_err(appctx, "No such backend."); - goto out; - } - if (!(srv = server_find_by_name(be, ist0(sv_name)))) { - cli_err(appctx, "No such server."); + ret = srv_check_for_deletion(ist0(be_name), ist0(sv_name), &be, &srv, &msg); + if (ret <= 0) { + /* failure (recoverable or not) */ + cli_err(appctx, msg); goto out; } - if (srv->flags & SRV_F_NON_PURGEABLE) { - cli_err(appctx, "This server cannot be removed at runtime due to other configuration elements pointing to it."); - goto out; - } + /* Close idle connections attached to this server. */ + for (i = tid;;) { + struct list *list = &srv->per_thr[i].idle_conn_list; + struct connection *conn; + + while (!LIST_ISEMPTY(list)) { + conn = LIST_ELEM(list->n, struct connection *, idle_list); + if (i != tid) { + if (conn->mux && conn->mux->takeover) + conn->mux->takeover(conn, i, 1); + else if (conn->xprt && conn->xprt->takeover) + conn->xprt->takeover(conn, conn->ctx, i, 1); + } + conn_release(conn); + } - /* Only servers in maintenance can be deleted. This ensures that the - * server is not present anymore in the lb structures (through - * lbprm.set_server_status_down). - */ - if (!(srv->cur_admin & SRV_ADMF_MAINT)) { - cli_err(appctx, "Only servers in maintenance mode can be deleted."); - goto out; + /* Also remove all purgeable conns as some of them may still + * reference the currently deleted server. + */ + while ((conn = MT_LIST_POP(&idle_conns[i].toremove_conns, + struct connection *, toremove_list))) { + conn_release(conn); + } + + if ((i = ((i + 1 == global.nbthread) ? 0 : i + 1)) == tid) + break; } - /* Ensure that there is no active/idle/pending connection on the server. - * - * TODO idle connections should not prevent server deletion. A proper - * cleanup function should be implemented to be used here. - */ - if (srv->curr_used_conns || srv->curr_idle_conns || - !eb_is_empty(&srv->queue.head) || srv_has_streams(srv)) { - cli_err(appctx, "Server still has connections attached to it, cannot remove it."); - goto out; + /* All idle connections should be removed now. */ + BUG_ON(srv->curr_idle_conns); + + /* Close idle private connections attached to this server. */ + mt_list_for_each_entry_safe(sess_conns, &srv->sess_conns, srv_el, elt1, elt2) { + struct connection *conn, *conn_back; + list_for_each_entry_safe(conn, conn_back, &sess_conns->conn_list, sess_el) { + + /* Only idle connections should be present if srv_check_for_deletion() is true. */ + BUG_ON(!(conn->flags & CO_FL_SESS_IDLE)); + + LIST_DEL_INIT(&conn->sess_el); + conn->owner = NULL; + conn->flags &= ~CO_FL_SESS_IDLE; + if (sess_conns->tid != tid) { + if (conn->mux && conn->mux->takeover) + conn->mux->takeover(conn, sess_conns->tid, 1); + else if (conn->xprt && conn->xprt->takeover) + conn->xprt->takeover(conn, conn->ctx, sess_conns->tid, 1); + } + conn_release(conn); + } + + LIST_DELETE(&sess_conns->sess_el); + MT_LIST_DELETE_SAFE(elt1); + pool_free(pool_head_sess_priv_conns, sess_conns); } /* removing cannot fail anymore when we reach this: @@ -5724,13 +6139,11 @@ static int cli_parse_delete_server(char **args, char *payload, struct appctx *ap ha_notice("Server deleted.\n"); srv_drop(srv); - cli_msg(appctx, LOG_INFO, "Server deleted."); - + cli_msg(appctx, LOG_INFO, "Server deleted.\n"); return 0; out: thread_release(); - return 1; } @@ -6334,8 +6747,8 @@ static void srv_update_status(struct server *s, int type, int cause) if (srv_prev_state != s->cur_state) { if (srv_prev_state == SRV_ST_STOPPED) { /* server was down and no longer is */ - if (s->last_change < ns_to_sec(now_ns)) // ignore negative times - s->down_time += ns_to_sec(now_ns) - s->last_change; + if (s->counters.last_change < ns_to_sec(now_ns)) // ignore negative times + s->down_time += ns_to_sec(now_ns) - s->counters.last_change; _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_UP, cb_data.common, s); } else if (s->cur_state == SRV_ST_STOPPED) { @@ -6343,7 +6756,7 @@ static void srv_update_status(struct server *s, int type, int cause) s->counters.down_trans++; _srv_event_hdl_publish(EVENT_HDL_SUB_SERVER_DOWN, cb_data.common, s); } - s->last_change = ns_to_sec(now_ns); + s->counters.last_change = ns_to_sec(now_ns); /* publish the state change */ _srv_event_hdl_prepare_state(&cb_data.state, @@ -6358,9 +6771,9 @@ static void srv_update_status(struct server *s, int type, int cause) /* backend was down and is back up again: * no helper function, updating last_change and backend downtime stats */ - if (s->proxy->last_change < ns_to_sec(now_ns)) // ignore negative times - s->proxy->down_time += ns_to_sec(now_ns) - s->proxy->last_change; - s->proxy->last_change = ns_to_sec(now_ns); + if (s->proxy->be_counters.last_change < ns_to_sec(now_ns)) // ignore negative times + s->proxy->down_time += ns_to_sec(now_ns) - s->proxy->be_counters.last_change; + s->proxy->be_counters.last_change = ns_to_sec(now_ns); } } diff --git a/src/server_state.c b/src/server_state.c index ebdcf3c..ffc2463 100644 --- a/src/server_state.c +++ b/src/server_state.c @@ -321,7 +321,7 @@ static void srv_state_srv_update(struct server *srv, int version, char **params) srv_adm_set_drain(srv); } - srv->last_change = ns_to_sec(now_ns) - srv_last_time_change; + srv->counters.last_change = ns_to_sec(now_ns) - srv_last_time_change; srv->check.status = srv_check_status; srv->check.result = srv_check_result; diff --git a/src/session.c b/src/session.c index ce9ccbf..f8953df 100644 --- a/src/session.c +++ b/src/session.c @@ -27,8 +27,8 @@ DECLARE_POOL(pool_head_session, "session", sizeof(struct session)); -DECLARE_POOL(pool_head_sess_srv_list, "session server list", - sizeof(struct sess_srv_list)); +DECLARE_POOL(pool_head_sess_priv_conns, "session priv conns list", + sizeof(struct sess_priv_conns)); int conn_complete_session(struct connection *conn); @@ -61,7 +61,7 @@ struct session *session_new(struct proxy *fe, struct listener *li, enum obj_type sess->t_idle = -1; _HA_ATOMIC_INC(&totalconn); _HA_ATOMIC_INC(&jobs); - LIST_INIT(&sess->srv_list); + LIST_INIT(&sess->priv_conns); sess->idle_conns = 0; sess->flags = SESS_FL_NONE; sess->src = NULL; @@ -76,33 +76,29 @@ struct session *session_new(struct proxy *fe, struct listener *li, enum obj_type void session_free(struct session *sess) { struct connection *conn, *conn_back; - struct sess_srv_list *srv_list, *srv_list_back; + struct sess_priv_conns *pconns, *pconns_back; - if (sess->listener) + if (sess->flags & SESS_FL_RELEASE_LI) { + /* listener must be set for session used to account FE conns. */ + BUG_ON(!sess->listener); listener_release(sess->listener); + } + session_store_counters(sess); pool_free(pool_head_stk_ctr, sess->stkctr); vars_prune_per_sess(&sess->vars); conn = objt_conn(sess->origin); if (conn != NULL && conn->mux) conn->mux->destroy(conn->ctx); - list_for_each_entry_safe(srv_list, srv_list_back, &sess->srv_list, srv_list) { - list_for_each_entry_safe(conn, conn_back, &srv_list->conn_list, session_list) { - LIST_DEL_INIT(&conn->session_list); - if (conn->mux) { - conn->owner = NULL; - conn->flags &= ~CO_FL_SESS_IDLE; - conn->mux->destroy(conn->ctx); - } else { - /* We have a connection, but not yet an associated mux. - * So destroy it now. - */ - conn_stop_tracking(conn); - conn_full_close(conn); - conn_free(conn); - } + list_for_each_entry_safe(pconns, pconns_back, &sess->priv_conns, sess_el) { + list_for_each_entry_safe(conn, conn_back, &pconns->conn_list, sess_el) { + LIST_DEL_INIT(&conn->sess_el); + conn->owner = NULL; + conn->flags &= ~CO_FL_SESS_IDLE; + conn_release(conn); } - pool_free(pool_head_sess_srv_list, srv_list); + MT_LIST_DELETE(&pconns->srv_el); + pool_free(pool_head_sess_priv_conns, pconns); } sockaddr_free(&sess->src); sockaddr_free(&sess->dst); @@ -190,11 +186,17 @@ int session_accept_fd(struct connection *cli_conn) } } - sess = session_new(p, l, &cli_conn->obj_type); - if (!sess) - goto out_free_conn; + /* Reversed conns already have an assigned session, do not recreate it. */ + if (!(cli_conn->flags & CO_FL_REVERSED)) { + sess = session_new(p, l, &cli_conn->obj_type); + if (!sess) + goto out_free_conn; - conn_set_owner(cli_conn, sess, NULL); + conn_set_owner(cli_conn, sess, NULL); + } + else { + sess = cli_conn->owner; + } /* now evaluate the tcp-request layer4 rules. We only need a session * and no stream for these rules. @@ -293,12 +295,19 @@ int session_accept_fd(struct connection *cli_conn) sess->task->process = session_expire_embryonic; sess->task->expire = tick_add_ifset(now_ms, timeout); task_queue(sess->task); + + /* Session is responsible to decrement listener conns counters. */ + sess->flags |= SESS_FL_RELEASE_LI; + return 1; } /* OK let's complete stream initialization since there is no handshake */ - if (conn_complete_session(cli_conn) >= 0) + if (conn_complete_session(cli_conn) >= 0) { + /* Session is responsible to decrement listener conns counters. */ + sess->flags |= SESS_FL_RELEASE_LI; return 1; + } /* if we reach here we have deliberately decided not to keep this * session (e.g. tcp-request rule), so that's not an error we should @@ -308,9 +317,9 @@ int session_accept_fd(struct connection *cli_conn) /* error unrolling */ out_free_sess: - /* prevent call to listener_release during session_free. It will be - * done below, for all errors. */ - sess->listener = NULL; + /* SESS_FL_RELEASE_LI must not be set here as listener_release() is + * called manually for all errors. + */ session_free(sess); out_free_conn: @@ -322,15 +331,8 @@ int session_accept_fd(struct connection *cli_conn) MSG_DONTWAIT|MSG_NOSIGNAL); } - if (cli_conn->mux) { - /* Mux is already initialized for active reversed connection. */ - cli_conn->mux->destroy(cli_conn->ctx); - } - else { - conn_stop_tracking(cli_conn); - conn_full_close(cli_conn); - conn_free(cli_conn); - } + /* Mux is already initialized for active reversed connection. */ + conn_release(cli_conn); listener_release(l); return ret; } @@ -443,7 +445,7 @@ static void session_kill_embryonic(struct session *sess, unsigned int state) conn->err_code = CO_ER_SSL_TIMEOUT; } - if(!LIST_ISEMPTY(&sess->fe->logformat_error)) { + if(!lf_expr_isempty(&sess->fe->logformat_error)) { /* Display a log line following the configured error-log-format. */ sess_log(sess); } @@ -520,6 +522,18 @@ int conn_complete_session(struct connection *conn) return -1; } +/* Add <inc> to the number of cumulated glitches in the tracked counters for + * session <sess> which is known for being tracked, and implicitly update the + * rate if also tracked. + */ +void __session_add_glitch_ctr(struct session *sess, uint inc) +{ + int i; + + for (i = 0; i < global.tune.nb_stk_ctr; i++) + stkctr_add_glitch_ctr(&sess->stkctr[i], inc); +} + /* * Local variables: * c-indent-level: 8 diff --git a/src/shctx.c b/src/shctx.c index be59053..931bc4f 100644 --- a/src/shctx.c +++ b/src/shctx.c @@ -16,6 +16,7 @@ #include <import/ebmbtree.h> #include <haproxy/list.h> #include <haproxy/shctx.h> +#include <haproxy/tools.h> /* * Reserve a new row if <first> is null, put it in the hotlist, set the refcount to 1 @@ -269,13 +270,14 @@ int shctx_row_data_get(struct shared_context *shctx, struct shared_block *first, * and 0 if cache is already allocated. */ int shctx_init(struct shared_context **orig_shctx, int maxblocks, int blocksize, - unsigned int maxobjsz, int extra) + unsigned int maxobjsz, int extra, const char *name) { int i; struct shared_context *shctx; int ret; void *cur; int maptype = MAP_SHARED; + size_t totalsize = sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)); if (maxblocks <= 0) return 0; @@ -284,14 +286,15 @@ int shctx_init(struct shared_context **orig_shctx, int maxblocks, int blocksize, blocksize = (blocksize + sizeof(void *) - 1) & -sizeof(void *); extra = (extra + sizeof(void *) - 1) & -sizeof(void *); - shctx = (struct shared_context *)mmap(NULL, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)), - PROT_READ | PROT_WRITE, maptype | MAP_ANON, -1, 0); + shctx = (struct shared_context *)mmap(NULL, totalsize, PROT_READ | PROT_WRITE, maptype | MAP_ANON, -1, 0); if (!shctx || shctx == MAP_FAILED) { shctx = NULL; ret = SHCTX_E_ALLOC_CACHE; goto err; } + vma_set_name(shctx, totalsize, "shctx", name); + shctx->nbav = 0; LIST_INIT(&shctx->avail); @@ -87,7 +87,6 @@ static struct sink *__sink_new(const char *name, const char *desc, int fmt) /* address will be filled by the caller if needed */ sink->ctx.fd = -1; sink->ctx.dropped = 0; - HA_RWLOCK_INIT(&sink->ctx.lock); LIST_APPEND(&sink_list, &sink->sink_list); end: return sink; @@ -206,30 +205,79 @@ send: * here with the only difference that we override the log level. This is * possible since the announce message will be sent from the same context. * - * In case of success, the amount of drops is reduced by as much. It's supposed - * to be called under an exclusive lock on the sink to avoid multiple producers - * doing the same. On success, >0 is returned, otherwise <=0 on failure. + * In case of success, the amount of drops is reduced by as much. + * The function ensures that a single thread will do that work at once, other + * ones will only report a failure if a thread is dumping, so that no thread + * waits. A pair od atomic OR and AND is performed around the code so the + * caller would be advised to only call this function AFTER having verified + * that sink->ctx.dropped is not zero in order to avoid a memory write. On + * success, >0 is returned, otherwise <=0 on failure, indicating that it could + * not eliminate the pending drop counter. It may loop up to 10 times trying + * to catch up with failing competing threads. */ int sink_announce_dropped(struct sink *sink, struct log_header hdr) { - unsigned int dropped; - struct buffer msg; + static THREAD_LOCAL char msg_dropped1[] = "1 event dropped"; + static THREAD_LOCAL char msg_dropped2[] = "0000000000 events dropped"; + uint dropped, last_dropped; struct ist msgvec[1]; - char logbuf[64]; + uint retries = 10; + int ret = 0; + + /* Explanation. ctx.dropped is made of: + * bit0 = 1 if dropped dump in progress + * bit1..31 = dropped counter + * If non-zero there have been some drops. If not &1, it means + * nobody's taking care of them and we'll have to, otherwise + * another thread is already on them and we can just pass and + * count another drop (hence add 2). + */ + dropped = HA_ATOMIC_FETCH_OR(&sink->ctx.dropped, 1); + if (dropped & 1) { + /* another thread was already on it */ + goto leave; + } - while (unlikely((dropped = sink->ctx.dropped) > 0)) { - chunk_init(&msg, logbuf, sizeof(logbuf)); - chunk_printf(&msg, "%u event%s dropped", dropped, dropped > 1 ? "s" : ""); - msgvec[0] = ist2(msg.area, msg.data); + last_dropped = 0; + dropped >>= 1; + while (1) { + while (unlikely(dropped > last_dropped) && retries-- > 0) { + /* try to aggregate multiple messages if other threads arrive while + * we're producing the dropped message. + */ + uint msglen = sizeof(msg_dropped1); + const char *msg = msg_dropped1; + + last_dropped = dropped; + if (dropped > 1) { + msg = ultoa_r(dropped, msg_dropped2, 11); + msg_dropped2[10] = ' '; + msglen = msg_dropped2 + sizeof(msg_dropped2) - msg; + } + msgvec[0] = ist2(msg, msglen); + dropped = HA_ATOMIC_LOAD(&sink->ctx.dropped) >> 1; + } + if (!dropped) + break; + + last_dropped = 0; hdr.level = LOG_NOTICE; /* override level but keep original log header data */ if (__sink_write(sink, hdr, 0, msgvec, 1) <= 0) - return 0; + goto done; + /* success! */ - HA_ATOMIC_SUB(&sink->ctx.dropped, dropped); + HA_ATOMIC_SUB(&sink->ctx.dropped, dropped << 1); } - return 1; + + /* done! */ + ret = 1; +done: + /* unlock the counter */ + HA_ATOMIC_AND(&sink->ctx.dropped, ~1); +leave: + return ret; } /* parse the "show events" command, returns 1 if a message is returned, otherwise zero */ @@ -284,7 +332,7 @@ static int cli_parse_show_events(char **args, char *payload, struct appctx *appc /* Pre-configures a ring proxy to emit connections */ void sink_setup_proxy(struct proxy *px) { - px->last_change = ns_to_sec(now_ns); + px->be_counters.last_change = ns_to_sec(now_ns); px->cap = PR_CAP_BE; px->maxconn = 0; px->conn_retries = 1; @@ -307,13 +355,12 @@ static void sink_forward_io_handler(struct appctx *appctx) struct sink_forward_target *sft = appctx->svcctx; struct sink *sink = sft->sink; struct ring *ring = sink->ctx.ring; - struct buffer *buf = &ring->buf; - uint64_t msg_len; - size_t len, cnt, ofs, last_ofs; + size_t ofs, last_ofs; int ret = 0; - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) + if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR)))) { goto out; + } /* if stopping was requested, close immediately */ if (unlikely(stopping)) @@ -335,77 +382,14 @@ static void sink_forward_io_handler(struct appctx *appctx) goto close; } - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); - LIST_DEL_INIT(&appctx->wait_entry); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); - - HA_RWLOCK_RDLOCK(RING_LOCK, &ring->lock); - - /* explanation for the initialization below: it would be better to do - * this in the parsing function but this would occasionally result in - * dropped events because we'd take a reference on the oldest message - * and keep it while being scheduled. Thus instead let's take it the - * first time we enter here so that we have a chance to pass many - * existing messages before grabbing a reference to a location. This - * value cannot be produced after initialization. - */ - if (unlikely(sft->ofs == ~0)) { - sft->ofs = b_peek_ofs(buf, 0); - HA_ATOMIC_INC(b_orig(buf) + sft->ofs); - } - - /* we were already there, adjust the offset to be relative to - * the buffer's head and remove us from the counter. - */ - ofs = sft->ofs - b_head_ofs(buf); - if (sft->ofs < b_head_ofs(buf)) - ofs += b_size(buf); - BUG_ON(ofs >= buf->size); - HA_ATOMIC_DEC(b_peek(buf, ofs)); - - /* in this loop, ofs always points to the counter byte that precedes - * the message so that we can take our reference there if we have to - * stop before the end (ret=0). - */ - ret = 1; - while (ofs + 1 < b_data(buf)) { - cnt = 1; - len = b_peek_varint(buf, ofs + cnt, &msg_len); - if (!len) - break; - cnt += len; - BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf)); - - if (unlikely(msg_len + 1 > b_size(&trash))) { - /* too large a message to ever fit, let's skip it */ - ofs += cnt + msg_len; - continue; - } - - chunk_reset(&trash); - len = b_getblk(buf, trash.area, msg_len, ofs + cnt); - trash.data += len; - trash.area[trash.data++] = '\n'; - - if (applet_putchk(appctx, &trash) == -1) { - ret = 0; - break; - } - ofs += cnt + msg_len; - } - - HA_ATOMIC_INC(b_peek(buf, ofs)); - last_ofs = b_tail_ofs(buf); - sft->ofs = b_peek_ofs(buf, ofs); + MT_LIST_DELETE(&appctx->wait_entry); - HA_RWLOCK_RDUNLOCK(RING_LOCK, &ring->lock); + ret = ring_dispatch_messages(ring, appctx, &sft->ofs, &last_ofs, 0, applet_append_line); if (ret) { /* let's be woken up once new data arrive */ - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); - LIST_APPEND(&ring->waiters, &appctx->wait_entry); - ofs = b_tail_ofs(buf); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); + MT_LIST_APPEND(&ring->waiters, &appctx->wait_entry); + ofs = ring_tail(ring); if (ofs != last_ofs) { /* more data was added into the ring between the * unlock and the lock, and the writer might not @@ -437,11 +421,8 @@ static void sink_forward_oc_io_handler(struct appctx *appctx) struct sink_forward_target *sft = appctx->svcctx; struct sink *sink = sft->sink; struct ring *ring = sink->ctx.ring; - struct buffer *buf = &ring->buf; - uint64_t msg_len; - size_t len, cnt, ofs, last_ofs; + size_t ofs, last_ofs; int ret = 0; - char *p; if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) goto out; @@ -466,80 +447,13 @@ static void sink_forward_oc_io_handler(struct appctx *appctx) goto close; } - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); - LIST_DEL_INIT(&appctx->wait_entry); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); - - HA_RWLOCK_RDLOCK(RING_LOCK, &ring->lock); - - /* explanation for the initialization below: it would be better to do - * this in the parsing function but this would occasionally result in - * dropped events because we'd take a reference on the oldest message - * and keep it while being scheduled. Thus instead let's take it the - * first time we enter here so that we have a chance to pass many - * existing messages before grabbing a reference to a location. This - * value cannot be produced after initialization. - */ - if (unlikely(sft->ofs == ~0)) { - sft->ofs = b_peek_ofs(buf, 0); - HA_ATOMIC_INC(b_orig(buf) + sft->ofs); - } - - /* we were already there, adjust the offset to be relative to - * the buffer's head and remove us from the counter. - */ - ofs = sft->ofs - b_head_ofs(buf); - if (sft->ofs < b_head_ofs(buf)) - ofs += b_size(buf); - BUG_ON(ofs >= buf->size); - HA_ATOMIC_DEC(b_peek(buf, ofs)); - - /* in this loop, ofs always points to the counter byte that precedes - * the message so that we can take our reference there if we have to - * stop before the end (ret=0). - */ - ret = 1; - while (ofs + 1 < b_data(buf)) { - cnt = 1; - len = b_peek_varint(buf, ofs + cnt, &msg_len); - if (!len) - break; - cnt += len; - BUG_ON(msg_len + ofs + cnt + 1 > b_data(buf)); - - chunk_reset(&trash); - p = ulltoa(msg_len, trash.area, b_size(&trash)); - if (p) { - trash.data = (p - trash.area) + 1; - *p = ' '; - } - - if (!p || (trash.data + msg_len > b_size(&trash))) { - /* too large a message to ever fit, let's skip it */ - ofs += cnt + msg_len; - continue; - } - - trash.data += b_getblk(buf, p + 1, msg_len, ofs + cnt); - - if (applet_putchk(appctx, &trash) == -1) { - ret = 0; - break; - } - ofs += cnt + msg_len; - } - - HA_ATOMIC_INC(b_peek(buf, ofs)); - last_ofs = b_tail_ofs(buf); - sft->ofs = b_peek_ofs(buf, ofs); - HA_RWLOCK_RDUNLOCK(RING_LOCK, &ring->lock); + MT_LIST_DELETE(&appctx->wait_entry); + ret = ring_dispatch_messages(ring, appctx, &sft->ofs, &last_ofs, 0, syslog_applet_append_event); if (ret) { /* let's be woken up once new data arrive */ - HA_RWLOCK_WRLOCK(RING_LOCK, &ring->lock); - LIST_APPEND(&ring->waiters, &appctx->wait_entry); - ofs = b_tail_ofs(buf); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &ring->lock); + MT_LIST_APPEND(&ring->waiters, &appctx->wait_entry); + ofs = ring_tail(ring); if (ofs != last_ofs) { /* more data was added into the ring between the * unlock and the lock, and the writer might not @@ -569,9 +483,7 @@ void __sink_forward_session_deinit(struct sink_forward_target *sft) if (!sink) return; - HA_RWLOCK_WRLOCK(RING_LOCK, &sink->ctx.ring->lock); - LIST_DEL_INIT(&sft->appctx->wait_entry); - HA_RWLOCK_WRUNLOCK(RING_LOCK, &sink->ctx.ring->lock); + MT_LIST_DELETE(&sft->appctx->wait_entry); sft->appctx = NULL; task_wakeup(sink->forward_task, TASK_WOKEN_MSG); @@ -728,7 +640,7 @@ int sink_init_forward(struct sink *sink) */ void sink_rotate_file_backed_ring(const char *name) { - struct ring ring; + struct ring_storage storage; char *oldback; int ret; int fd; @@ -738,16 +650,20 @@ void sink_rotate_file_backed_ring(const char *name) return; /* check for contents validity */ - ret = read(fd, &ring, sizeof(ring)); + ret = read(fd, &storage, sizeof(storage)); close(fd); - if (ret != sizeof(ring)) + if (ret != sizeof(storage)) goto rotate; + /* check that it's the expected format before touching it */ + if (storage.rsvd != sizeof(storage)) + return; + /* contents are present, we want to keep them => rotate. Note that * an empty ring buffer has one byte (the marker). */ - if (ring.buf.data > 1) + if (storage.head != 0 || storage.tail != 1) goto rotate; /* nothing to keep, let's scratch the file and preserve the backup */ @@ -779,15 +695,14 @@ static void sink_free(struct sink *sink) return; if (sink->type == SINK_TYPE_BUFFER) { if (sink->store) { - size_t size = (sink->ctx.ring->buf.size + 4095UL) & -4096UL; - void *area = (sink->ctx.ring->buf.area - sizeof(*sink->ctx.ring)); + size_t size = (ring_allocated_size(sink->ctx.ring) + 4095UL) & -4096UL; + void *area = ring_allocated_area(sink->ctx.ring); msync(area, size, MS_SYNC); munmap(area, size); ha_free(&sink->store); } - else - ring_free(sink->ctx.ring); + ring_free(sink->ctx.ring); } LIST_DEL_INIT(&sink->sink_list); // remove from parent list task_destroy(sink->forward_task); @@ -914,6 +829,12 @@ static int sink_finalize(struct sink *sink) ha_alert("error when trying to initialize sink buffer forwarding.\n"); err_code |= ERR_ALERT | ERR_FATAL; } + if (!sink->store) { + /* virtual memory backed sink */ + vma_set_name(ring_allocated_area(sink->ctx.ring), + ring_allocated_size(sink->ctx.ring), + "ring", sink->name); + } } return err_code; } @@ -979,22 +900,28 @@ int cfg_parse_ring(const char *file, int linenum, char **args, int kwm) goto err; } + if (size > RING_TAIL_LOCK) { + ha_alert("parsing [%s:%d] : too large size '%llu' for new sink buffer, the limit on this platform is %llu bytes.\n", file, linenum, (ullong)size, (ullong)RING_TAIL_LOCK); + err_code |= ERR_ALERT | ERR_FATAL; + goto err; + } + if (cfg_sink->store) { ha_alert("parsing [%s:%d] : cannot resize an already mapped file, please specify 'size' before 'backing-file'.\n", file, linenum); err_code |= ERR_ALERT | ERR_FATAL; goto err; } - if (size < cfg_sink->ctx.ring->buf.size) { - ha_warning("parsing [%s:%d] : ignoring new size '%llu' that is smaller than current size '%llu' for ring '%s'.\n", - file, linenum, (ullong)size, (ullong)cfg_sink->ctx.ring->buf.size, cfg_sink->name); + if (size < ring_data(cfg_sink->ctx.ring)) { + ha_warning("parsing [%s:%d] : ignoring new size '%llu' that is smaller than contents '%llu' for ring '%s'.\n", + file, linenum, (ullong)size, (ullong)ring_data(cfg_sink->ctx.ring), cfg_sink->name); err_code |= ERR_WARN; goto err; } if (!ring_resize(cfg_sink->ctx.ring, size)) { ha_alert("parsing [%s:%d] : fail to set sink buffer size '%llu' for ring '%s'.\n", file, linenum, - (ullong)cfg_sink->ctx.ring->buf.size, cfg_sink->name); + (ullong)ring_size(cfg_sink->ctx.ring), cfg_sink->name); err_code |= ERR_ALERT | ERR_FATAL; goto err; } @@ -1034,7 +961,7 @@ int cfg_parse_ring(const char *file, int linenum, char **args, int kwm) goto err; } - size = (cfg_sink->ctx.ring->buf.size + 4095UL) & -4096UL; + size = (ring_size(cfg_sink->ctx.ring) + 4095UL) & -4096UL; if (ftruncate(fd, size) != 0) { close(fd); ha_alert("parsing [%s:%d] : could not adjust size of backing-file for ring '%s': %s.\n", file, linenum, cfg_sink->name, strerror(errno)); @@ -1056,7 +983,7 @@ int cfg_parse_ring(const char *file, int linenum, char **args, int kwm) /* never fails */ ring_free(cfg_sink->ctx.ring); - cfg_sink->ctx.ring = ring_make_from_area(area, size); + cfg_sink->ctx.ring = ring_make_from_area(area, size, 1); } else if (strcmp(args[0],"server") == 0) { if (!cfg_sink || (cfg_sink->type != SINK_TYPE_BUFFER)) { @@ -30,6 +30,7 @@ #include <haproxy/listener.h> #include <haproxy/log.h> #include <haproxy/namespace.h> +#include <haproxy/protocol-t.h> #include <haproxy/proto_sockpair.h> #include <haproxy/sock.h> #include <haproxy/sock_inet.h> @@ -109,6 +110,9 @@ struct connection *sock_accept_conn(struct listener *l, int *status) goto fail_conn; } + if (unlikely(port_is_restricted(addr, HA_PROTO_TCP))) + goto fail_conn; + /* Perfect, the connection was accepted */ conn = conn_new(&l->obj_type); if (!conn) @@ -195,14 +199,76 @@ struct connection *sock_accept_conn(struct listener *l, int *status) goto done; } +/* Common code to handle in one place different ERRNOs, that socket() et setns() + * may return + */ +static int sock_handle_system_err(struct connection *conn, struct proxy *be) +{ + qfprintf(stderr, "Cannot get a server socket.\n"); + + conn->flags |= CO_FL_ERROR; + conn->err_code = CO_ER_SOCK_ERR; + + switch(errno) { + case ENFILE: + conn->err_code = CO_ER_SYS_FDLIM; + send_log(be, LOG_EMERG, + "Proxy %s reached system FD limit (maxsock=%d). " + "Please check system tunables.\n", be->id, global.maxsock); + + return SF_ERR_RESOURCE; + + case EMFILE: + conn->err_code = CO_ER_PROC_FDLIM; + send_log(be, LOG_EMERG, + "Proxy %s reached process FD limit (maxsock=%d). " + "Please check 'ulimit-n' and restart.\n", be->id, global.maxsock); + + return SF_ERR_RESOURCE; + + case ENOBUFS: + case ENOMEM: + conn->err_code = CO_ER_SYS_MEMLIM; + send_log(be, LOG_EMERG, + "Proxy %s reached system memory limit (maxsock=%d). " + "Please check system tunables.\n", be->id, global.maxsock); + + return SF_ERR_RESOURCE; + + case EAFNOSUPPORT: + case EPROTONOSUPPORT: + conn->err_code = CO_ER_NOPROTO; + break; + + case EPERM: + conn->err_code = CO_ER_SOCK_ERR; + send_log(be, LOG_EMERG, + "Proxy %s has insufficient permissions to open server socket.\n", + be->id); + + return SF_ERR_PRXCOND; + + default: + send_log(be, LOG_EMERG, + "Proxy %s cannot create a server socket: %s\n", + be->id, strerror(errno)); + } + + return SF_ERR_INTERNAL; +} + /* Create a socket to connect to the server in conn->dst (which MUST be valid), * using the configured namespace if needed, or the one passed by the proxy - * protocol if required to do so. It ultimately calls socket() or socketat() - * and returns the FD or error code. + * protocol if required to do so. It then calls socket() or socketat(). On + * success, checks if mark or tos sockopts need to be set on the file handle. + * Returns backend connection socket FD on success, stream_err flag needed by + * upper level is set as SF_ERR_NONE; -1 on failure, stream_err is set to + * appropriate value. */ -int sock_create_server_socket(struct connection *conn) +int sock_create_server_socket(struct connection *conn, struct proxy *be, int *stream_err) { const struct netns_entry *ns = NULL; + int sock_fd; #ifdef USE_NS if (objt_server(conn->target)) { @@ -212,7 +278,60 @@ int sock_create_server_socket(struct connection *conn) ns = __objt_server(conn->target)->netns; } #endif - return my_socketat(ns, conn->dst->ss_family, SOCK_STREAM, 0); + sock_fd = my_socketat(ns, conn->dst->ss_family, SOCK_STREAM, 0); + + /* at first, handle common to all proto families system limits and permission related errors */ + if (sock_fd == -1) { + *stream_err = sock_handle_system_err(conn, be); + + return -1; + } + + /* now perform some runtime condition checks */ + if (sock_fd >= global.maxsock) { + /* do not log anything there, it's a normal condition when this option + * is used to serialize connections to a server ! + */ + ha_alert("socket(): not enough free sockets. Raise -n argument. Giving up.\n"); + send_log(be, LOG_EMERG, "socket(): not enough free sockets. Raise -n argument. Giving up.\n"); + close(sock_fd); + conn->err_code = CO_ER_CONF_FDLIM; + conn->flags |= CO_FL_ERROR; + *stream_err = SF_ERR_PRXCOND; /* it is a configuration limit */ + + return -1; + } + + if (fd_set_nonblock(sock_fd) == -1 || + ((conn->ctrl->sock_prot == IPPROTO_TCP) && (setsockopt(sock_fd, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one)) == -1))) { + qfprintf(stderr,"Cannot set client socket to non blocking mode.\n"); + send_log(be, LOG_EMERG, "Cannot set client socket to non blocking mode.\n"); + close(sock_fd); + conn->err_code = CO_ER_SOCK_ERR; + conn->flags |= CO_FL_ERROR; + *stream_err = SF_ERR_INTERNAL; + + return -1; + } + + if (master == 1 && fd_set_cloexec(sock_fd) == -1) { + ha_alert("Cannot set CLOEXEC on client socket.\n"); + send_log(be, LOG_EMERG, "Cannot set CLOEXEC on client socket.\n"); + close(sock_fd); + conn->err_code = CO_ER_SOCK_ERR; + conn->flags |= CO_FL_ERROR; + *stream_err = SF_ERR_INTERNAL; + + return -1; + } + + if (conn->flags & CO_FL_OPT_MARK) + sock_set_mark(sock_fd, conn->ctrl->fam->sock_family, conn->mark); + if (conn->flags & CO_FL_OPT_TOS) + sock_set_tos(sock_fd, conn->dst, conn->tos); + + *stream_err = SF_ERR_NONE; + return sock_fd; } /* Enables receiving on receiver <rx> once already bound. */ @@ -807,6 +926,13 @@ int sock_conn_check(struct connection *conn) return 0; wait: + /* we may arrive here due to connect() misleadingly reporting EALREADY + * in some corner cases while the system disagrees and reports an error + * on the FD. + */ + if (fdtab[fd].state & FD_POLL_ERR) + goto out_error; + fd_cant_send(fd); fd_want_send(fd); return 0; diff --git a/src/sock_unix.c b/src/sock_unix.c index ef749a5..0f9bc9a 100644 --- a/src/sock_unix.c +++ b/src/sock_unix.c @@ -255,8 +255,8 @@ int sock_unix_bind_receiver(struct receiver *rx, char **errmsg) } addr.sun_family = AF_UNIX; - /* WT: shouldn't we use my_socketat(rx->netns) here instead ? */ - fd = socket(rx->proto->fam->sock_domain, rx->proto->sock_type, rx->proto->sock_prot); + fd = my_socketat(rx->settings->netns, rx->proto->fam->sock_domain, + rx->proto->sock_type, rx->proto->sock_prot); if (fd < 0) { err |= ERR_FATAL | ERR_ALERT; memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno)); diff --git a/src/ssl_ckch.c b/src/ssl_ckch.c index ebab1f3..b178078 100644 --- a/src/ssl_ckch.c +++ b/src/ssl_ckch.c @@ -28,6 +28,7 @@ #include <haproxy/applet.h> #include <haproxy/base64.h> +#include <haproxy/cfgparse.h> #include <haproxy/channel.h> #include <haproxy/cli.h> #include <haproxy/errors.h> @@ -111,6 +112,7 @@ struct commit_cacrlfile_ctx { enum { CACRL_ST_INIT = 0, CACRL_ST_GEN, + CACRL_ST_CRLCB, CACRL_ST_INSERT, CACRL_ST_SUCCESS, CACRL_ST_FIN, @@ -119,6 +121,18 @@ struct commit_cacrlfile_ctx { }; +/* + * Callback function, which is called if defined after loading CRLs from disk + * when starting HAProxy (function __ssl_store_load_locations_file()), and after + * committing new CRLs via CLI (function cli_io_handler_commit_cafile_crlfile()). + * + * The input parameters of the function are the path for the CRL data and + * a structure containing information about X.509 certificates and CRLs. + * In case of error, returns -1 with an error message in err; or the number + * of revoked certificates (>= 0) otherwise. + */ +int (*ssl_commit_crlfile_cb)(const char *path, X509_STORE *ctx, char **err) = NULL; + /******************** cert_key_and_chain functions ************************* * These are the functions that fills a cert_key_and_chain structure. For the * functions filling a SSL_CTX from a cert_key_and_chain, see ssl_sock.c @@ -721,8 +735,27 @@ void ssl_sock_free_cert_key_and_chain_contents(struct ckch_data *data) X509_free(data->ocsp_issuer); data->ocsp_issuer = NULL; - OCSP_CERTID_free(data->ocsp_cid); - data->ocsp_cid = NULL; + + /* We need to properly remove the reference to the corresponding + * certificate_ocsp structure if it exists (which it should). + */ +#if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL) + if (data->ocsp_cid) { + struct certificate_ocsp *ocsp = NULL; + unsigned char certid[OCSP_MAX_CERTID_ASN1_LENGTH] = {}; + unsigned int certid_length = 0; + + if (ssl_ocsp_build_response_key(data->ocsp_cid, (unsigned char*)certid, &certid_length) >= 0) { + HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock); + ocsp = (struct certificate_ocsp *)ebmb_lookup(&cert_ocsp_tree, certid, OCSP_MAX_CERTID_ASN1_LENGTH); + HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock); + ssl_sock_free_ocsp(ocsp); + } + + OCSP_CERTID_free(data->ocsp_cid); + data->ocsp_cid = NULL; + } +#endif } /* @@ -794,8 +827,6 @@ struct ckch_data *ssl_sock_copy_cert_key_and_chain(struct ckch_data *src, dst->ocsp_cid = OCSP_CERTID_dup(src->ocsp_cid); - dst->ocsp_update_mode = src->ocsp_update_mode; - return dst; error: @@ -877,6 +908,9 @@ void ckch_store_free(struct ckch_store *store) ssl_sock_free_cert_key_and_chain_contents(store->data); ha_free(&store->data); + /* free the ckch_conf content */ + ckch_conf_clean(&store->conf); + free(store); } @@ -928,6 +962,9 @@ struct ckch_store *ckchs_dup(const struct ckch_store *src) if (!ssl_sock_copy_cert_key_and_chain(src->data, dst->data)) goto error; + + dst->conf.ocsp_update_mode = src->conf.ocsp_update_mode; + return dst; error: @@ -953,7 +990,7 @@ struct ckch_store *ckchs_lookup(char *path) /* * This function allocate a ckch_store and populate it with certificates from files. */ -struct ckch_store *ckchs_load_cert_file(char *path, char **err) +struct ckch_store *ckch_store_new_load_files_path(char *path, char **err) { struct ckch_store *ckchs; @@ -966,6 +1003,8 @@ struct ckch_store *ckchs_load_cert_file(char *path, char **err) if (ssl_sock_load_files_into_ckch(path, ckchs->data, err) == 1) goto end; + ckchs->conf.used = CKCH_CONF_SET_EMPTY; + /* insert into the ckchs tree */ memcpy(ckchs->path, path, strlen(path) + 1); ebst_insert(&ckchs_tree, &ckchs->node); @@ -977,6 +1016,51 @@ end: return NULL; } +/* + * This function allocate a ckch_store and populate it with certificates using + * the ckch_conf structure. + */ +struct ckch_store *ckch_store_new_load_files_conf(char *name, struct ckch_conf *conf, char **err) +{ + struct ckch_store *ckchs; + int cfgerr = ERR_NONE; + char *tmpcrt = conf->crt; + + ckchs = ckch_store_new(name); + if (!ckchs) { + memprintf(err, "%sunable to allocate memory.\n", err && *err ? *err : ""); + goto end; + } + + /* this is done for retro-compatibility. When no "filename" crt-store + * options were configured in a crt-list, try to load the files by + * auto-detecting them. */ + if ((conf->used == CKCH_CONF_SET_EMPTY || conf->used == CKCH_CONF_SET_CRTLIST) && + (!conf->key && !conf->ocsp && !conf->issuer && !conf->sctl)) { + cfgerr = ssl_sock_load_files_into_ckch(conf->crt, ckchs->data, err); + if (cfgerr & ERR_FATAL) + goto end; + /* set conf->crt to NULL so it's not erased */ + conf->crt = NULL; + } + + /* load files using the ckch_conf */ + cfgerr = ckch_store_load_files(conf, ckchs, 0, err); + if (cfgerr & ERR_FATAL) + goto end; + + conf->crt = tmpcrt; + + /* insert into the ckchs tree */ + memcpy(ckchs->path, name, strlen(name) + 1); + ebst_insert(&ckchs_tree, &ckchs->node); + return ckchs; + +end: + ckch_store_free(ckchs); + + return NULL; +} /******************** ckch_inst functions ******************************/ @@ -1383,6 +1467,14 @@ scandir_err: goto err; } + if (ssl_commit_crlfile_cb != NULL) { + if (ssl_commit_crlfile_cb(path, store, NULL) == -1) { + if (!shuterror) + ha_alert("crl-file: couldn't load '%s'\n", path); + goto err; + } + } + objs = X509_STORE_get0_objects(store); cert_count = sk_X509_OBJECT_num(objs); if (cert_count == 0) { @@ -1978,7 +2070,7 @@ int ckch_inst_rebuild(struct ckch_store *ckch_store, struct ckch_inst *ckchi, if (ckchi->is_server_instance) errcode |= ckch_inst_new_load_srv_store(ckch_store->path, ckch_store, new_inst, err); else - errcode |= ckch_inst_new_load_store(ckch_store->path, ckch_store, ckchi->bind_conf, ckchi->ssl_conf, sni_filter, fcount, new_inst, err); + errcode |= ckch_inst_new_load_store(ckch_store->path, ckch_store, ckchi->bind_conf, ckchi->ssl_conf, sni_filter, fcount, ckchi->is_default, new_inst, err); if (errcode & ERR_CODE) return 1; @@ -2115,16 +2207,11 @@ void ckch_store_replace(struct ckch_store *old_ckchs, struct ckch_store *new_ckc static int cli_io_handler_commit_cert(struct appctx *appctx) { struct commit_cert_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); int y = 0; struct ckch_store *old_ckchs, *new_ckchs = NULL; struct ckch_inst *ckchi; usermsgs_clr("CLI"); - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - goto end; - while (1) { switch (ctx->state) { case CERT_ST_INIT: @@ -2801,17 +2888,12 @@ error: static int cli_io_handler_commit_cafile_crlfile(struct appctx *appctx) { struct commit_cacrlfile_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); int y = 0; struct cafile_entry *old_cafile_entry = ctx->old_entry; struct cafile_entry *new_cafile_entry = ctx->new_entry; struct ckch_inst_link *ckchi_link; char *path; - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - goto end; - /* The ctx was already validated by the ca-file/crl-file parsing * function. Entries can only be NULL in CACRL_ST_SUCCESS or * CACRL_ST_FIN states @@ -2888,6 +2970,15 @@ static int cli_io_handler_commit_cafile_crlfile(struct appctx *appctx) y++; } + ctx->state = CACRL_ST_CRLCB; + __fallthrough; + case CACRL_ST_CRLCB: + if ((ctx->cafile_type == CAFILE_CRL) && (ssl_commit_crlfile_cb != NULL)) { + if (ssl_commit_crlfile_cb(crlfile_transaction.path, crlfile_transaction.new_crlfile_entry->ca_store, &ctx->err) == -1) { + ctx->state = CACRL_ST_ERROR; + goto error; + } + } ctx->state = CACRL_ST_INSERT; __fallthrough; case CACRL_ST_INSERT: @@ -3947,3 +4038,544 @@ static struct cli_kw_list cli_kws = {{ },{ INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws); +static char *current_crtbase = NULL; +static char *current_keybase = NULL; +static int crtstore_load = 0; /* did we already load in this crt-store */ + +struct ckch_conf_kws ckch_conf_kws[] = { + { "alias", -1, PARSE_TYPE_NONE, NULL, NULL }, + { "crt", offsetof(struct ckch_conf, crt), PARSE_TYPE_STR, ckch_conf_load_pem, ¤t_crtbase }, + { "key", offsetof(struct ckch_conf, key), PARSE_TYPE_STR, ckch_conf_load_key, ¤t_keybase }, + { "ocsp", offsetof(struct ckch_conf, ocsp), PARSE_TYPE_STR, ckch_conf_load_ocsp_response, ¤t_crtbase }, + { "issuer", offsetof(struct ckch_conf, issuer), PARSE_TYPE_STR, ckch_conf_load_ocsp_issuer, ¤t_crtbase }, + { "sctl", offsetof(struct ckch_conf, sctl), PARSE_TYPE_STR, ckch_conf_load_sctl, ¤t_crtbase }, + { "ocsp-update", offsetof(struct ckch_conf, ocsp_update_mode), PARSE_TYPE_ONOFF, ocsp_update_init, NULL }, + { NULL, -1, PARSE_TYPE_STR, NULL, NULL } +}; + +/* crt-store does not try to find files, but use the stored filename */ +int ckch_store_load_files(struct ckch_conf *f, struct ckch_store *c, int cli, char **err) +{ + int i; + int err_code = 0; + int rc = 1; + struct ckch_data *d = c->data; + + for (i = 0; ckch_conf_kws[i].name; i++) { + void *src = NULL; + + if (ckch_conf_kws[i].offset < 0) + continue; + + if (!ckch_conf_kws[i].func) + continue; + + src = (void *)((intptr_t)f + (ptrdiff_t)ckch_conf_kws[i].offset); + + switch (ckch_conf_kws[i].type) { + case PARSE_TYPE_STR: + { + char *v; + char *path; + char **base = ckch_conf_kws[i].base; + char path_base[PATH_MAX]; + + v = *(char **)src; + if (!v) + goto next; + + path = v; + if (base && *base && *path != '/') { + int rv = snprintf(path_base, sizeof(path_base), "%s/%s", *base, path); + if (rv >= sizeof(path_base)) { + memprintf(err, "'%s/%s' : path too long", *base, path); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + path = path_base; + } + rc = ckch_conf_kws[i].func(path, NULL, d, cli, err); + if (rc) { + err_code |= ERR_ALERT | ERR_FATAL; + memprintf(err, "%s '%s' cannot be read or parsed.", err && *err ? *err : "", path); + goto out; + } + break; + } + + case PARSE_TYPE_INT: + case PARSE_TYPE_ONOFF: + { + int v = *(int *)src; + rc = ckch_conf_kws[i].func(&v, NULL, d, cli, err); + if (rc) { + err_code |= ERR_ALERT | ERR_FATAL; + memprintf(err, "%s '%d' cannot be read or parsed.", err && *err ? *err : "", v); + goto out; + } + + break; + } + + default: + break; + } +next: + ; + } + +out: + if (err_code & ERR_FATAL) + ssl_sock_free_cert_key_and_chain_contents(d); + ERR_clear_error(); + + return err_code; +} + +/* Parse a local crt-base or key-base for a crt-store */ +static int crtstore_parse_path_base(char **args, int section_type, struct proxy *curpx, const struct proxy *defpx, + const char *file, int linenum, char **err) +{ + int err_code = ERR_NONE; + + if (!*args[1]) { + memprintf(err, "parsing [%s:%d] : '%s' requires a <path> argument.", + file, linenum, args[0]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + + if (crtstore_load) { + memprintf(err, "parsing [%s:%d] : '%s' can't be used after a load line, use it at the beginning of the section.", + file, linenum, args[0]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + + if (args[0][1] == 'r') { + /* crt-base */ + free(current_crtbase); + current_crtbase = strdup(args[1]); + } else if (args[0][1] == 'e') { + /* key-base */ + free(current_keybase); + current_keybase = strdup(args[1]); + } +out: + return err_code; +} + +/* + * Check if ckch_conf <prev> and <new> are compatible: + * + * new \ prev | EMPTY | CRTLIST | CRTSTORE + * ---------------------------------------- + * EMPTY | OK | X | OK + * ---------------------------------------- + * CRTLIST | X | CMP | CMP + * ---------------------------------------- + * + * Return: + * 1 when the 2 structures have different variables or are incompatible + * 0 when the 2 structures have equal variables or are compatibles + */ +int ckch_conf_cmp(struct ckch_conf *prev, struct ckch_conf *new, char **err) +{ + int ret = 0; + int i; + + if (!prev || !new) + return 1; + + /* compatibility check */ + + if (prev->used == CKCH_CONF_SET_EMPTY) { + if (new->used == CKCH_CONF_SET_CRTLIST) { + memprintf(err, "%sCan't use the certificate previously defined without any keyword with these keywords:\n", *err ? *err : ""); + ret = 1; + } + if (new->used == CKCH_CONF_SET_EMPTY) + return 0; + + } else if (prev->used == CKCH_CONF_SET_CRTLIST) { + if (new->used == CKCH_CONF_SET_EMPTY) { + memprintf(err, "%sCan't use the certificate previously defined with keywords without these keywords:\n", *err ? *err : ""); + ret = 1; + } + } else if (prev->used == CKCH_CONF_SET_CRTSTORE) { + if (new->used == CKCH_CONF_SET_EMPTY) + return 0; + } + + + for (i = 0; ckch_conf_kws[i].name != NULL; i++) { + + if (strcmp(ckch_conf_kws[i].name, "crt") == 0) + continue; + + switch (ckch_conf_kws[i].type) { + case PARSE_TYPE_STR: { + char *avail1, *avail2; + avail1 = *(char **)((intptr_t)prev + (ptrdiff_t)ckch_conf_kws[i].offset); + avail2 = *(char **)((intptr_t)new + (ptrdiff_t)ckch_conf_kws[i].offset); + + /* must alert when strcmp is wrong, or when one of the field is NULL */ + if (((avail1 && avail2) && strcmp(avail1, avail2) != 0) || (!!avail1 ^ !!avail2)) { + memprintf(err, "%s- different parameter '%s' : previously '%s' vs '%s'\n", *err ? *err : "", ckch_conf_kws[i].name, avail1, avail2); + ret = 1; + } + } + break; + + default: + break; + } + /* special case for ocsp-update and default */ + if (strcmp(ckch_conf_kws[i].name, "ocsp-update") == 0) { + int o1, o2; /* ocsp-update from the configuration */ + int q1, q2; /* final ocsp-update value (from default) */ + + + o1 = *(int *)((intptr_t)prev + (ptrdiff_t)ckch_conf_kws[i].offset); + o2 = *(int *)((intptr_t)new + (ptrdiff_t)ckch_conf_kws[i].offset); + + q1 = (o1 == SSL_SOCK_OCSP_UPDATE_DFLT) ? global_ssl.ocsp_update.mode : o1; + q2 = (o2 == SSL_SOCK_OCSP_UPDATE_DFLT) ? global_ssl.ocsp_update.mode : o2; + + if (q1 != q2) { + int j = 1; + int o = o1; + int q = q1; + memprintf(err, "%s- different parameter '%s' : previously ", *err ? *err : "", ckch_conf_kws[i].name); + + do { + switch (o) { + case SSL_SOCK_OCSP_UPDATE_DFLT: + memprintf(err, "%s'default' (ocsp-update.mode %s)", *err ? *err : "", (q > 0) ? "on" : "off"); + break; + case SSL_SOCK_OCSP_UPDATE_ON: + memprintf(err, "%s'%s'", *err ? *err : "", "on"); + break; + case SSL_SOCK_OCSP_UPDATE_OFF: + memprintf(err, "%s'%s'", *err ? *err : "", "off"); + break; + } + o = o2; + q = q2; + if (j) + memprintf(err, "%s vs ", *err ? *err : ""); + } while (j--); + memprintf(err, "%s\n", *err ? *err : ""); + ret = 1; + } + } + } + +out: + return ret; +} + +/* + * Compare a previously generated ckch_conf with an empty one, using ckch_conf_cmp(). + */ +int ckch_conf_cmp_empty(struct ckch_conf *prev, char **err) +{ + struct ckch_conf new = {}; + + return ckch_conf_cmp(prev, &new, err); +} + +/* parse ckch_conf keywords for crt-list */ +int ckch_conf_parse(char **args, int cur_arg, struct ckch_conf *f, int *found, const char *file, int linenum, char **err) +{ + int i; + int err_code = 0; + + for (i = 0; ckch_conf_kws[i].name != NULL; i++) { + if (strcmp(ckch_conf_kws[i].name, args[cur_arg]) == 0) { + void *target; + *found = 1; + target = (char **)((intptr_t)f + (ptrdiff_t)ckch_conf_kws[i].offset); + + if (ckch_conf_kws[i].type == PARSE_TYPE_STR) { + char **t = target; + + *t = strdup(args[cur_arg + 1]); + if (!*t) { + ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum); + err_code |= ERR_ALERT | ERR_ABORT; + goto out; + } + } else if (ckch_conf_kws[i].type == PARSE_TYPE_INT) { + int *t = target; + char *stop; + + *t = strtol(args[cur_arg + 1], &stop, 10); + if (*stop != '\0') { + memprintf(err, "parsing [%s:%d] : cannot parse '%s' value '%s', an integer is expected.\n", + file, linenum, args[cur_arg], args[cur_arg + 1]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + } else if (ckch_conf_kws[i].type == PARSE_TYPE_ONOFF) { + int *t = target; + + if (strcmp(args[cur_arg + 1], "on") == 0) { + *t = 1; + } else if (strcmp(args[cur_arg + 1], "off") == 0) { + *t = -1; + } else { + memprintf(err, "parsing [%s:%d] : cannot parse '%s' value '%s', 'on' or 'off' is expected.\n", + file, linenum, args[cur_arg], args[cur_arg + 1]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + } + break; + } + } +out: + return err_code; +} + +/* freeing the content of a ckch_conf structure */ +void ckch_conf_clean(struct ckch_conf *conf) +{ + free(conf->crt); + free(conf->key); + free(conf->ocsp); + free(conf->issuer); + free(conf->sctl); +} + +static char current_crtstore_name[PATH_MAX] = {}; + +static int crtstore_parse_load(char **args, int section_type, struct proxy *curpx, const struct proxy *defpx, + const char *file, int linenum, char **err) +{ + int err_code = 0; + int cur_arg = 0; + struct ckch_conf f = {}; + struct ckch_store *c = NULL; + char store_path[PATH_MAX]; /* complete path with crt_base */ + char alias_name[PATH_MAX]; /* complete alias name with the store prefix '@/' */ + char *final_name = NULL; /* name used as a key in the ckch_store */ + + cur_arg++; /* skip "load" */ + + while (*(args[cur_arg])) { + int found = 0; + + if (strcmp("alias", args[cur_arg]) == 0) { + int rv; + + if (*args[cur_arg + 1] == '/') { + memprintf(err, "parsing [%s:%d] : cannot parse '%s' value '%s', '/' is forbidden as the first character.\n", + file, linenum, args[cur_arg], args[cur_arg + 1]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + + rv = snprintf(alias_name, sizeof(alias_name), "@%s/%s", current_crtstore_name, args[cur_arg + 1]); + if (rv >= sizeof(alias_name)) { + memprintf(err, "parsing [%s:%d] : cannot parse '%s' value '%s', too long, max len is %zd.\n", + file, linenum, args[cur_arg], args[cur_arg + 1], sizeof(alias_name)); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + final_name = alias_name; + found = 1; + } else { + err_code |= ckch_conf_parse(args, cur_arg, &f, &found, file, linenum, err); + if (err_code & ERR_FATAL) + goto out; + } + + if (!found) { + memprintf(err,"parsing [%s:%d] : '%s %s' in section 'crt-store': unknown keyword '%s'.", + file, linenum, args[0], args[cur_arg],args[cur_arg]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + cur_arg += 2; + } + + if (!f.crt) { + memprintf(err,"parsing [%s:%d] : '%s' in section 'crt-store': mandatory 'crt' parameter not found.", + file, linenum, args[0]); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + + crtstore_load = 1; + + if (!final_name) { + final_name = f.crt; + + /* if no alias was used: + * - when a crt-store exists, use @store/crt + * - or use the absolute file (crt_base + crt) + * - or the relative file when no crt_base exists + */ + if (current_crtstore_name[0] != '\0') { + int rv; + + /* add the crt-store name, avoid a double / if the crt starts by it */ + rv = snprintf(alias_name, sizeof(alias_name), "@%s%s%s", current_crtstore_name, f.crt[0] != '/' ? "/" : "", f.crt); + if (rv >= sizeof(alias_name)) { + memprintf(err, "parsing [%s:%d] : cannot parse '%s' value '%s', too long, max len is %zd.\n", + file, linenum, args[cur_arg], args[cur_arg + 1], sizeof(alias_name)); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + final_name = alias_name; + } else if (global_ssl.crt_base && *f.crt != '/') { + int rv; + /* When no crt_store name, complete the name in the ckch_tree with 'crt-base' */ + + rv = snprintf(store_path, sizeof(store_path), "%s/%s", global_ssl.crt_base, f.crt); + if (rv >= sizeof(store_path)) { + memprintf(err, "'%s/%s' : path too long", global_ssl.crt_base, f.crt); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + final_name = store_path; + } + } + /* process and insert the ckch_store */ + c = ckch_store_new(final_name); + if (!c) + goto alloc_error; + + err_code |= ckch_store_load_files(&f, c, 0, err); + if (err_code & ERR_FATAL) + goto out; + + c->conf = f; + c->conf.used = CKCH_CONF_SET_CRTSTORE; + + if (ebst_insert(&ckchs_tree, &c->node) != &c->node) { + memprintf(err,"parsing [%s:%d] : '%s' in section 'crt-store': store '%s' was already defined.", + file, linenum, args[0], c->path); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + } + +out: + /* free ckch_conf content */ + if (err_code & ERR_FATAL) + ckch_store_free(c); + return err_code; + +alloc_error: + ha_alert("parsing [%s:%d]: out of memory.\n", file, linenum); + err_code |= ERR_ALERT | ERR_ABORT; + goto out; +} + +/* + * Parse "crt-store" section and create corresponding ckch_stores. + * + * The function returns 0 in success case, otherwise, it returns error + * flags. + */ +static int cfg_parse_crtstore(const char *file, int linenum, char **args, int kwm) +{ + struct cfg_kw_list *kwl; + const char *best; + int index; + int rc = 0; + int err_code = 0; + char *errmsg = NULL; + + if (strcmp(args[0], "crt-store") == 0) { /* new crt-store section */ + if (!*args[1]) { + current_crtstore_name[0] = '\0'; + } else { + rc = snprintf(current_crtstore_name, sizeof(current_crtstore_name), "%s", args[1]); + if (rc >= sizeof(current_crtstore_name)) { + ha_alert("parsing [%s:%d] : 'crt-store' <name> argument is too long.\n", file, linenum); + current_crtstore_name[0] = '\0'; + err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT; + goto out; + } + } + + if (*args[2]) { + ha_alert("parsing [%s:%d] : 'crt-store' section only supports a <name> argument.\n", file, linenum); + err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT; + goto out; + } + /* copy the crt_base and key_base */ + ha_free(¤t_crtbase); + if (global_ssl.crt_base) + current_crtbase = strdup(global_ssl.crt_base); + ha_free(¤t_keybase); + if (global_ssl.key_base) + current_keybase = strdup(global_ssl.key_base); + crtstore_load = 0; + + goto out; + } + + list_for_each_entry(kwl, &cfg_keywords.list, list) { + for (index = 0; kwl->kw[index].kw != NULL; index++) { + if (kwl->kw[index].section != CFG_CRTSTORE) + continue; + if (strcmp(kwl->kw[index].kw, args[0]) == 0) { + if (check_kw_experimental(&kwl->kw[index], file, linenum, &errmsg)) { + ha_alert("%s\n", errmsg); + err_code |= ERR_ALERT | ERR_FATAL | ERR_ABORT; + goto out; + } + + /* prepare error message just in case */ + rc = kwl->kw[index].parse(args, CFG_CRTSTORE, NULL, NULL, file, linenum, &errmsg); + if (rc & ERR_ALERT) { + ha_alert("parsing [%s:%d] : %s\n", file, linenum, errmsg); + err_code |= rc; + goto out; + } + else if (rc & ERR_WARN) { + ha_warning("parsing [%s:%d] : %s\n", file, linenum, errmsg); + err_code |= rc; + goto out; + } + goto out; + } + } + } + + best = cfg_find_best_match(args[0], &cfg_keywords.list, CFG_CRTSTORE, NULL); + if (best) + ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section; did you mean '%s' maybe ?\n", file, linenum, args[0], cursection, best); + else + ha_alert("parsing [%s:%d] : unknown keyword '%s' in '%s' section\n", file, linenum, args[0], cursection); + err_code |= ERR_ALERT | ERR_FATAL; + goto out; + +out: + if (err_code & ERR_FATAL) + err_code |= ERR_ABORT; + free(errmsg); + return err_code; +} + +static int cfg_post_parse_crtstore() +{ + current_crtstore_name[0] = '\0'; + ha_free(¤t_crtbase); + ha_free(¤t_keybase); + + return ERR_NONE; +} + +REGISTER_CONFIG_SECTION("crt-store", cfg_parse_crtstore, cfg_post_parse_crtstore); + +static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_CRTSTORE, "crt-base", crtstore_parse_path_base }, + { CFG_CRTSTORE, "key-base", crtstore_parse_path_base }, + { CFG_CRTSTORE, "load", crtstore_parse_load }, + { 0, NULL, NULL }, +}}; +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); diff --git a/src/ssl_crtlist.c b/src/ssl_crtlist.c index d788bec..71fa0a0 100644 --- a/src/ssl_crtlist.c +++ b/src/ssl_crtlist.c @@ -356,7 +356,7 @@ struct crtlist *crtlist_new(const char *filename, int unique) * <crt_path> is a ptr in <line> * Return an error code */ -int crtlist_parse_line(char *line, char **crt_path, struct crtlist_entry *entry, const char *file, int linenum, int from_cli, char **err) +int crtlist_parse_line(char *line, char **crt_path, struct crtlist_entry *entry, struct ckch_conf *cc, const char *file, int linenum, int from_cli, char **err) { int cfgerr = 0; int arg, newarg, cur_arg, i, ssl_b = 0, ssl_e = 0; @@ -438,19 +438,22 @@ int crtlist_parse_line(char *line, char **crt_path, struct crtlist_entry *entry, cfgerr |= ERR_WARN; } - ssl_conf = calloc(1, sizeof *ssl_conf); - if (!ssl_conf) { - memprintf(err, "not enough memory!"); - cfgerr |= ERR_ALERT | ERR_FATAL; - goto error; - } } cur_arg = ssl_b ? ssl_b : 1; while (cur_arg < ssl_e) { newarg = 0; + /* look for ssl_conf keywords */ for (i = 0; ssl_crtlist_kws[i].kw != NULL; i++) { if (strcmp(ssl_crtlist_kws[i].kw, args[cur_arg]) == 0) { + if (!ssl_conf) + ssl_conf = calloc(1, sizeof *ssl_conf); + if (!ssl_conf) { + memprintf(err, "not enough memory!"); + cfgerr |= ERR_ALERT | ERR_FATAL; + goto error; + } + newarg = 1; cfgerr |= ssl_crtlist_kws[i].parse(args, cur_arg, NULL, ssl_conf, from_cli, err); if (cur_arg + 1 + ssl_crtlist_kws[i].skip > ssl_e) { @@ -460,9 +463,22 @@ int crtlist_parse_line(char *line, char **crt_path, struct crtlist_entry *entry, goto error; } cur_arg += 1 + ssl_crtlist_kws[i].skip; - break; + goto out; } } + if (cc) { + /* look for ckch_conf keywords */ + cfgerr |= ckch_conf_parse(args, cur_arg, cc, &newarg, file, linenum, err); + if (cfgerr & ERR_FATAL) + goto error; + + if (newarg) { + cur_arg += 2; /* skip 2 words if the keyword was found */ + cc->used = CKCH_CONF_SET_CRTLIST; /* if they are options they must be used everywhere */ + } + + } +out: if (!cfgerr && !newarg) { memprintf(err, "parsing [%s:%d]: unknown ssl keyword %s", file, linenum, args[cur_arg]); @@ -521,6 +537,7 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu char *crt_path; char path[MAXPATHLEN+1]; struct ckch_store *ckchs; + struct ckch_conf cc = {}; int found = 0; if (missing_lf != -1) { @@ -562,7 +579,7 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu goto error; } - cfgerr |= crtlist_parse_line(thisline, &crt_path, entry, file, linenum, 0, err); + cfgerr |= crtlist_parse_line(thisline, &crt_path, entry, &cc, file, linenum, 0, err); if (cfgerr & ERR_CODE) goto error; @@ -573,7 +590,7 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu continue; } - if (*crt_path != '/' && global_ssl.crt_base) { + if (*crt_path != '@' && *crt_path != '/' && global_ssl.crt_base) { if ((strlen(global_ssl.crt_base) + 1 + strlen(crt_path)) > sizeof(path) || snprintf(path, sizeof(path), "%s/%s", global_ssl.crt_base, crt_path) > sizeof(path)) { memprintf(err, "parsing [%s:%d]: '%s' : path too long", @@ -589,17 +606,18 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu if (ckchs == NULL) { if (stat(crt_path, &buf) == 0) { found++; - - ckchs = ckchs_load_cert_file(crt_path, err); + free(cc.crt); + cc.crt = strdup(crt_path); + ckchs = ckch_store_new_load_files_conf(crt_path, &cc, err); if (ckchs == NULL) { cfgerr |= ERR_ALERT | ERR_FATAL; goto error; } + ckchs->conf = cc; + entry->node.key = ckchs; entry->crtlist = newlist; - if (entry->ssl_conf) - ckchs->data->ocsp_update_mode = entry->ssl_conf->ocsp_update; ebpt_insert(&newlist->entries, &entry->node); LIST_APPEND(&newlist->ord_entries, &entry->by_crtlist); LIST_APPEND(&ckchs->crtlist_entry, &entry->by_ckch_store); @@ -614,6 +632,7 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu char fp[MAXPATHLEN+1] = {0}; int n = 0; struct crtlist_entry *entry_dup = entry; /* use the previous created entry */ + for (n = 0; n < SSL_SOCK_NUM_KEYTYPES; n++) { struct stat buf; int ret; @@ -625,7 +644,13 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu ckchs = ckchs_lookup(fp); if (!ckchs) { if (stat(fp, &buf) == 0) { - ckchs = ckchs_load_cert_file(fp, err); + + if (cc.used) { + memprintf(err, "%sCan't load '%s'. Using crt-store keyword is not compatible with multi certificates bundle.\n", + err && *err ? *err : "", crt_path); + cfgerr |= ERR_ALERT | ERR_FATAL; + } + ckchs = ckch_store_new_load_files_path(fp, err); if (!ckchs) { cfgerr |= ERR_ALERT | ERR_FATAL; goto error; @@ -649,12 +674,6 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu entry_dup->node.key = ckchs; entry_dup->crtlist = newlist; - cfgerr |= ocsp_update_check_cfg_consistency(ckchs, entry, crt_path, err); - if (cfgerr & ERR_FATAL) - goto error; - - if (entry->ssl_conf) - ckchs->data->ocsp_update_mode = entry->ssl_conf->ocsp_update; ebpt_insert(&newlist->entries, &entry_dup->node); LIST_APPEND(&newlist->ord_entries, &entry_dup->by_crtlist); LIST_APPEND(&ckchs->crtlist_entry, &entry_dup->by_ckch_store); @@ -676,15 +695,15 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu } } else { + if (ckch_conf_cmp(&ckchs->conf, &cc, err) != 0) { + memprintf(err, "'%s' in crt-list '%s' line %d, is already defined with incompatible parameters:\n %s", crt_path, file, linenum, err ? *err : ""); + cfgerr |= ERR_ALERT | ERR_FATAL; + goto error; + } + entry->node.key = ckchs; entry->crtlist = newlist; - cfgerr |= ocsp_update_check_cfg_consistency(ckchs, entry, crt_path, err); - if (cfgerr & ERR_FATAL) - goto error; - - if (entry->ssl_conf) - ckchs->data->ocsp_update_mode = entry->ssl_conf->ocsp_update; ebpt_insert(&newlist->entries, &entry->node); LIST_APPEND(&newlist->ord_entries, &entry->by_crtlist); LIST_APPEND(&ckchs->crtlist_entry, &entry->by_ckch_store); @@ -711,6 +730,8 @@ int crtlist_parse_file(char *file, struct bind_conf *bind_conf, struct proxy *cu error: crtlist_entry_free(entry); + /* FIXME: free cc */ + fclose(f); crtlist_free(newlist); return cfgerr; @@ -774,7 +795,7 @@ int crtlist_load_cert_dir(char *path, struct bind_conf *bind_conf, struct crtlis ckchs = ckchs_lookup(fp); if (ckchs == NULL) - ckchs = ckchs_load_cert_file(fp, err); + ckchs = ckch_store_new_load_files_path(fp, err); if (ckchs == NULL) { free(de); free(entry); @@ -808,21 +829,27 @@ end: * Take an ssl_bind_conf structure and append the configuration line used to * create it in the buffer */ -static void dump_crtlist_sslconf(struct buffer *buf, const struct ssl_bind_conf *conf) +static void dump_crtlist_conf(struct buffer *buf, const struct ssl_bind_conf *conf, const struct ckch_conf *cc) { int space = 0; - if (conf == NULL) + if (conf == NULL && cc->used == 0) return; chunk_appendf(buf, " ["); + + + if (conf == NULL) + goto dump_ckch; + + /* first dump all ssl_conf keywords */ + #ifdef OPENSSL_NPN_NEGOTIATED if (conf->npn_str) { int len = conf->npn_len; char *ptr = conf->npn_str; int comma = 0; - if (space) chunk_appendf(buf, " "); chunk_appendf(buf, "npn "); while (len) { unsigned short size; @@ -941,13 +968,23 @@ static void dump_crtlist_sslconf(struct buffer *buf, const struct ssl_bind_conf space++; } - if (conf->ocsp_update != SSL_SOCK_OCSP_UPDATE_DFLT) { + /* then dump the ckch_conf */ +dump_ckch: + if (!cc->used) + goto end; + + if (cc->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_OFF) { + if (space) chunk_appendf(buf, " "); + chunk_appendf(buf, "ocsp-update off"); + space++; + } else if (cc->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) { if (space) chunk_appendf(buf, " "); - chunk_appendf(buf, "ocsp-update %s", - conf->ocsp_update == SSL_SOCK_OCSP_UPDATE_OFF ? "off" : "on"); + chunk_appendf(buf, "ocsp-update on"); space++; } +end: + chunk_appendf(buf, "]"); return; @@ -1030,7 +1067,7 @@ static int cli_io_handler_dump_crtlist_entries(struct appctx *appctx) chunk_appendf(trash, "%s", filename); if (ctx->mode == 's') /* show */ chunk_appendf(trash, ":%d", entry->linenum); - dump_crtlist_sslconf(trash, entry->ssl_conf); + dump_crtlist_conf(trash, entry->ssl_conf, &store->conf); dump_crtlist_filters(trash, entry); chunk_appendf(trash, "\n"); @@ -1128,7 +1165,6 @@ static int cli_io_handler_add_crtlist(struct appctx *appctx) { struct add_crtlist_ctx *ctx = appctx->svcctx; struct bind_conf_list *bind_conf_node; - struct stconn *sc = appctx_sc(appctx); struct crtlist *crtlist = ctx->crtlist; struct crtlist_entry *entry = ctx->entry; struct ckch_store *store = entry->node.key; @@ -1139,10 +1175,6 @@ static int cli_io_handler_add_crtlist(struct appctx *appctx) /* for each bind_conf which use the crt-list, a new ckch_inst must be * created. */ - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) - goto end; - switch (ctx->state) { case ADDCRT_ST_INIT: /* This state just print the update message */ @@ -1173,7 +1205,7 @@ static int cli_io_handler_add_crtlist(struct appctx *appctx) /* we don't support multi-cert bundles, only simple ones */ ctx->err = NULL; - errcode |= ckch_inst_new_load_store(store->path, store, bind_conf, entry->ssl_conf, entry->filters, entry->fcount, &new_inst, &ctx->err); + errcode |= ckch_inst_new_load_store(store->path, store, bind_conf, entry->ssl_conf, entry->filters, entry->fcount, 0, &new_inst, &ctx->err); if (errcode & ERR_CODE) { ctx->state = ADDCRT_ST_ERROR; goto error; @@ -1265,6 +1297,7 @@ static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appc struct ebpt_node *inserted; struct crtlist *crtlist; struct crtlist_entry *entry = NULL; + struct ckch_conf cc = {}; char *end; if (!cli_has_level(appctx, ACCESS_LVL_ADMIN)) @@ -1295,6 +1328,7 @@ static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appc goto error; } + if (payload) { char *lf; @@ -1304,7 +1338,7 @@ static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appc goto error; } /* cert_path is filled here */ - cfgerr |= crtlist_parse_line(payload, &cert_path, entry, "CLI", 1, 1, &err); + cfgerr |= crtlist_parse_line(payload, &cert_path, entry, &cc, "CLI", 1, 1, &err); if (cfgerr & ERR_CODE) goto error; } else { @@ -1335,7 +1369,7 @@ static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appc *slash = '/'; } - if (*cert_path != '/' && global_ssl.crt_base) { + if (*cert_path != '@' && *cert_path != '/' && global_ssl.crt_base) { if ((strlen(global_ssl.crt_base) + 1 + strlen(cert_path)) > sizeof(path) || snprintf(path, sizeof(path), "%s/%s", global_ssl.crt_base, cert_path) > sizeof(path)) { memprintf(&err, "'%s' : path too long", cert_path); @@ -1355,15 +1389,23 @@ static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appc goto error; } - /* No need to check 'ocsp-update' inconsistency on a store that is not - * used yet (it was just added through the CLI for instance). + /* We can use a crt-store keyword when: + * - no ckch_inst are linked OR + * - ckch_inst are linked but exact same ckch_conf is used. */ - if (!LIST_ISEMPTY(&store->ckch_inst) && - ocsp_update_check_cfg_consistency(store, entry, cert_path, &err)) - goto error; + if (LIST_ISEMPTY(&store->ckch_inst)) { - if (entry->ssl_conf) - store->data->ocsp_update_mode = entry->ssl_conf->ocsp_update; + store->conf = cc; + /* fresh new, run more init (for example init ocsp-update tasks) */ + cfgerr |= ckch_store_load_files(&cc, store, 1, &err); + if (cfgerr & ERR_FATAL) + goto error; + + } else if (ckch_conf_cmp(&store->conf, &cc, &err) != 0) { + memprintf(&err, "'%s' is already instantiated with incompatible parameters:\n %s", cert_path, err ? err : ""); + cfgerr |= ERR_ALERT | ERR_FATAL; + goto error; + } /* check if it's possible to insert this new crtlist_entry */ entry->node.key = store; @@ -1374,8 +1416,8 @@ static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appc } /* this is supposed to be a directory (EB_ROOT_UNIQUE), so no ssl_conf are allowed */ - if ((entry->ssl_conf || entry->filters) && eb_gettag(crtlist->entries.b[EB_RGHT])) { - memprintf(&err, "this is a directory, SSL configuration and filters are not allowed"); + if ((entry->ssl_conf || entry->filters || cc.used) && eb_gettag(crtlist->entries.b[EB_RGHT])) { + memprintf(&err, "this is a directory, SSL configuration, crt-store keywords and filters are not allowed"); goto error; } @@ -1391,6 +1433,7 @@ static int cli_parse_add_crtlist(char **args, char *payload, struct appctx *appc return 0; error: + ckch_conf_clean(&cc); crtlist_entry_free(entry); HA_SPIN_UNLOCK(CKCH_LOCK, &ckch_lock); err = memprintf(&err, "Can't edit the crt-list: %s\n", err ? err : ""); @@ -1567,4 +1610,3 @@ static struct cli_kw_list cli_kws = {{ },{ }; INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws); - diff --git a/src/ssl_gencert.c b/src/ssl_gencert.c new file mode 100644 index 0000000..44dc82c --- /dev/null +++ b/src/ssl_gencert.c @@ -0,0 +1,470 @@ +/* + * SSL 'generate-certificate' option logic. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define _GNU_SOURCE +#include <import/lru.h> + +#include <haproxy/errors.h> +#include <haproxy/openssl-compat.h> +#include <haproxy/ssl_ckch.h> +#include <haproxy/ssl_sock.h> +#include <haproxy/xxhash.h> + +#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES) +/* X509V3 Extensions that will be added on generated certificates */ +#define X509V3_EXT_SIZE 5 +static char *x509v3_ext_names[X509V3_EXT_SIZE] = { + "basicConstraints", + "nsComment", + "subjectKeyIdentifier", + "authorityKeyIdentifier", + "keyUsage", +}; +static char *x509v3_ext_values[X509V3_EXT_SIZE] = { + "CA:FALSE", + "\"OpenSSL Generated Certificate\"", + "hash", + "keyid,issuer:always", + "nonRepudiation,digitalSignature,keyEncipherment" +}; +/* LRU cache to store generated certificate */ +static struct lru64_head *ssl_ctx_lru_tree = NULL; +static unsigned int ssl_ctx_lru_seed = 0; +static unsigned int ssl_ctx_serial; +__decl_rwlock(ssl_ctx_lru_rwlock); + +#endif // SSL_CTRL_SET_TLSEXT_HOSTNAME + +#ifndef SSL_NO_GENERATE_CERTIFICATES + +/* Configure a DNS SAN extension on a certificate. */ +int ssl_sock_add_san_ext(X509V3_CTX* ctx, X509* cert, const char *servername) { + int failure = 0; + X509_EXTENSION *san_ext = NULL; + CONF *conf = NULL; + struct buffer *san_name = get_trash_chunk(); + + conf = NCONF_new(NULL); + if (!conf) { + failure = 1; + goto cleanup; + } + + /* Build an extension based on the DNS entry above */ + chunk_appendf(san_name, "DNS:%s", servername); + san_ext = X509V3_EXT_nconf_nid(conf, ctx, NID_subject_alt_name, san_name->area); + if (!san_ext) { + failure = 1; + goto cleanup; + } + + /* Add the extension */ + if (!X509_add_ext(cert, san_ext, -1 /* Add to end */)) { + failure = 1; + goto cleanup; + } + + /* Success */ + failure = 0; + +cleanup: + if (NULL != san_ext) X509_EXTENSION_free(san_ext); + if (NULL != conf) NCONF_free(conf); + + return failure; +} + +/* Create a X509 certificate with the specified servername and serial. This + * function returns a SSL_CTX object or NULL if an error occurs. */ +static SSL_CTX *ssl_sock_do_create_cert(const char *servername, struct bind_conf *bind_conf, SSL *ssl) +{ + X509 *cacert = bind_conf->ca_sign_ckch->cert; + EVP_PKEY *capkey = bind_conf->ca_sign_ckch->key; + SSL_CTX *ssl_ctx = NULL; + X509 *newcrt = NULL; + EVP_PKEY *pkey = NULL; + SSL *tmp_ssl = NULL; + CONF *ctmp = NULL; + X509_NAME *name; + const EVP_MD *digest; + X509V3_CTX ctx; + unsigned int i; + int key_type; + struct sni_ctx *sni_ctx; + + sni_ctx = ssl_sock_chose_sni_ctx(bind_conf, "", 1, 1); + if (!sni_ctx) + goto mkcert_error; + + /* Get the private key of the default certificate and use it */ +#ifdef HAVE_SSL_CTX_get0_privatekey + pkey = SSL_CTX_get0_privatekey(sni_ctx->ctx); +#else + tmp_ssl = SSL_new(sni_ctx->ctx); + if (tmp_ssl) + pkey = SSL_get_privatekey(tmp_ssl); +#endif + if (!pkey) + goto mkcert_error; + + /* Create the certificate */ + if (!(newcrt = X509_new())) + goto mkcert_error; + + /* Set version number for the certificate (X509v3) and the serial + * number */ + if (X509_set_version(newcrt, 2L) != 1) + goto mkcert_error; + ASN1_INTEGER_set(X509_get_serialNumber(newcrt), _HA_ATOMIC_ADD_FETCH(&ssl_ctx_serial, 1)); + + /* Set duration for the certificate */ + if (!X509_gmtime_adj(X509_getm_notBefore(newcrt), (long)-60*60*24) || + !X509_gmtime_adj(X509_getm_notAfter(newcrt),(long)60*60*24*365)) + goto mkcert_error; + + /* set public key in the certificate */ + if (X509_set_pubkey(newcrt, pkey) != 1) + goto mkcert_error; + + /* Set issuer name from the CA */ + if (!(name = X509_get_subject_name(cacert))) + goto mkcert_error; + if (X509_set_issuer_name(newcrt, name) != 1) + goto mkcert_error; + + /* Set the subject name using the same, but the CN */ + name = X509_NAME_dup(name); + if (X509_NAME_add_entry_by_txt(name, "CN", MBSTRING_ASC, + (const unsigned char *)servername, + -1, -1, 0) != 1) { + X509_NAME_free(name); + goto mkcert_error; + } + if (X509_set_subject_name(newcrt, name) != 1) { + X509_NAME_free(name); + goto mkcert_error; + } + X509_NAME_free(name); + + /* Add x509v3 extensions as specified */ + ctmp = NCONF_new(NULL); + X509V3_set_ctx(&ctx, cacert, newcrt, NULL, NULL, 0); + for (i = 0; i < X509V3_EXT_SIZE; i++) { + X509_EXTENSION *ext; + + if (!(ext = X509V3_EXT_nconf(ctmp, &ctx, x509v3_ext_names[i], x509v3_ext_values[i]))) + goto mkcert_error; + if (!X509_add_ext(newcrt, ext, -1)) { + X509_EXTENSION_free(ext); + goto mkcert_error; + } + X509_EXTENSION_free(ext); + } + + /* Add SAN extension */ + if (ssl_sock_add_san_ext(&ctx, newcrt, servername)) { + goto mkcert_error; + } + + /* Sign the certificate with the CA private key */ + + key_type = EVP_PKEY_base_id(capkey); + + if (key_type == EVP_PKEY_DSA) + digest = EVP_sha1(); + else if (key_type == EVP_PKEY_RSA) + digest = EVP_sha256(); + else if (key_type == EVP_PKEY_EC) + digest = EVP_sha256(); + else { +#ifdef ASN1_PKEY_CTRL_DEFAULT_MD_NID + int nid; + + if (EVP_PKEY_get_default_digest_nid(capkey, &nid) <= 0) + goto mkcert_error; + if (!(digest = EVP_get_digestbynid(nid))) + goto mkcert_error; +#else + goto mkcert_error; +#endif + } + + if (!(X509_sign(newcrt, capkey, digest))) + goto mkcert_error; + + /* Create and set the new SSL_CTX */ + if (!(ssl_ctx = SSL_CTX_new(SSLv23_server_method()))) + goto mkcert_error; + + if (global_ssl.security_level > -1) + SSL_CTX_set_security_level(ssl_ctx, global_ssl.security_level); + + if (!SSL_CTX_use_PrivateKey(ssl_ctx, pkey)) + goto mkcert_error; + if (!SSL_CTX_use_certificate(ssl_ctx, newcrt)) + goto mkcert_error; + if (!SSL_CTX_check_private_key(ssl_ctx)) + goto mkcert_error; + + /* Build chaining the CA cert and the rest of the chain, keep these order */ +#if defined(SSL_CTX_add1_chain_cert) + if (!SSL_CTX_add1_chain_cert(ssl_ctx, bind_conf->ca_sign_ckch->cert)) { + goto mkcert_error; + } + + if (bind_conf->ca_sign_ckch->chain) { + for (i = 0; i < sk_X509_num(bind_conf->ca_sign_ckch->chain); i++) { + X509 *chain_cert = sk_X509_value(bind_conf->ca_sign_ckch->chain, i); + if (!SSL_CTX_add1_chain_cert(ssl_ctx, chain_cert)) { + goto mkcert_error; + } + } + } +#endif + + if (newcrt) X509_free(newcrt); + +#ifndef OPENSSL_NO_DH +#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL) + SSL_CTX_set_tmp_dh_callback(ssl_ctx, ssl_get_tmp_dh_cbk); +#else + ssl_sock_set_tmp_dh_from_pkey(ssl_ctx, pkey); +#endif +#endif + +#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) +#if defined(SSL_CTX_set1_curves_list) + { + const char *ecdhe = (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe : ECDHE_DEFAULT_CURVE); + if (!SSL_CTX_set1_curves_list(ssl_ctx, ecdhe)) + goto end; + } +#endif +#else +#if defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH) + { + const char *ecdhe = (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe : ECDHE_DEFAULT_CURVE); + EC_KEY *ecc; + int nid; + + if ((nid = OBJ_sn2nid(ecdhe)) == NID_undef) + goto end; + if (!(ecc = EC_KEY_new_by_curve_name(nid))) + goto end; + SSL_CTX_set_tmp_ecdh(ssl_ctx, ecc); + EC_KEY_free(ecc); + } +#endif /* defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH) */ +#endif /* HA_OPENSSL_VERSION_NUMBER >= 0x10101000L */ + end: + return ssl_ctx; + + mkcert_error: + if (ctmp) NCONF_free(ctmp); + if (tmp_ssl) SSL_free(tmp_ssl); + if (ssl_ctx) SSL_CTX_free(ssl_ctx); + if (newcrt) X509_free(newcrt); + return NULL; +} + + +/* Do a lookup for a certificate in the LRU cache used to store generated + * certificates and immediately assign it to the SSL session if not null. */ +SSL_CTX *ssl_sock_assign_generated_cert(unsigned int key, struct bind_conf *bind_conf, SSL *ssl) +{ + struct lru64 *lru = NULL; + + if (ssl_ctx_lru_tree) { + HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + lru = lru64_lookup(key, ssl_ctx_lru_tree, bind_conf->ca_sign_ckch->cert, 0); + if (lru && lru->domain) { + if (ssl) + SSL_set_SSL_CTX(ssl, (SSL_CTX *)lru->data); + HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + return (SSL_CTX *)lru->data; + } + HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + } + return NULL; +} + +/* Same as <ssl_sock_assign_generated_cert> but without SSL session. This + * function is not thread-safe, it should only be used to check if a certificate + * exists in the lru cache (with no warranty it will not be removed by another + * thread). It is kept for backward compatibility. */ +SSL_CTX * +ssl_sock_get_generated_cert(unsigned int key, struct bind_conf *bind_conf) +{ + return ssl_sock_assign_generated_cert(key, bind_conf, NULL); +} + +/* Set a certificate int the LRU cache used to store generated + * certificate. Return 0 on success, otherwise -1 */ +int ssl_sock_set_generated_cert(SSL_CTX *ssl_ctx, unsigned int key, struct bind_conf *bind_conf) +{ + struct lru64 *lru = NULL; + + if (ssl_ctx_lru_tree) { + HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + lru = lru64_get(key, ssl_ctx_lru_tree, bind_conf->ca_sign_ckch->cert, 0); + if (!lru) { + HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + return -1; + } + if (lru->domain && lru->data) + lru->free((SSL_CTX *)lru->data); + lru64_commit(lru, ssl_ctx, bind_conf->ca_sign_ckch->cert, 0, (void (*)(void *))SSL_CTX_free); + HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + return 0; + } + return -1; +} + +/* Compute the key of the certificate. */ +unsigned int +ssl_sock_generated_cert_key(const void *data, size_t len) +{ + return XXH32(data, len, ssl_ctx_lru_seed); +} + +/* Generate a cert and immediately assign it to the SSL session so that the cert's + * refcount is maintained regardless of the cert's presence in the LRU cache. + */ +int ssl_sock_generate_certificate(const char *servername, struct bind_conf *bind_conf, SSL *ssl) +{ + X509 *cacert = bind_conf->ca_sign_ckch->cert; + SSL_CTX *ssl_ctx = NULL; + struct lru64 *lru = NULL; + unsigned int key; + + key = ssl_sock_generated_cert_key(servername, strlen(servername)); + if (ssl_ctx_lru_tree) { + HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + lru = lru64_get(key, ssl_ctx_lru_tree, cacert, 0); + if (lru && lru->domain) + ssl_ctx = (SSL_CTX *)lru->data; + if (!ssl_ctx && lru) { + ssl_ctx = ssl_sock_do_create_cert(servername, bind_conf, ssl); + lru64_commit(lru, ssl_ctx, cacert, 0, (void (*)(void *))SSL_CTX_free); + } + SSL_set_SSL_CTX(ssl, ssl_ctx); + HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); + return 1; + } + else { + ssl_ctx = ssl_sock_do_create_cert(servername, bind_conf, ssl); + SSL_set_SSL_CTX(ssl, ssl_ctx); + /* No LRU cache, this CTX will be released as soon as the session dies */ + SSL_CTX_free(ssl_ctx); + return 1; + } + return 0; +} +int ssl_sock_generate_certificate_from_conn(struct bind_conf *bind_conf, SSL *ssl) +{ + unsigned int key; + struct connection *conn = SSL_get_ex_data(ssl, ssl_app_data_index); + + if (conn_get_dst(conn)) { + key = ssl_sock_generated_cert_key(conn->dst, get_addr_len(conn->dst)); + if (ssl_sock_assign_generated_cert(key, bind_conf, ssl)) + return 1; + } + return 0; +} + +/* Load CA cert file and private key used to generate certificates */ +int +ssl_sock_load_ca(struct bind_conf *bind_conf) +{ + struct proxy *px = bind_conf->frontend; + struct ckch_data *data = NULL; + int ret = 0; + char *err = NULL; + + if (!(bind_conf->options & BC_O_GENERATE_CERTS)) + return ret; + +#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES) + if (global_ssl.ctx_cache) { + ssl_ctx_lru_tree = lru64_new(global_ssl.ctx_cache); + } + ssl_ctx_lru_seed = (unsigned int)time(NULL); + ssl_ctx_serial = now_ms; +#endif + + if (!bind_conf->ca_sign_file) { + ha_alert("Proxy '%s': cannot enable certificate generation, " + "no CA certificate File configured at [%s:%d].\n", + px->id, bind_conf->file, bind_conf->line); + goto failed; + } + + /* Allocate cert structure */ + data = calloc(1, sizeof(*data)); + if (!data) { + ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain allocation failure\n", + px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line); + goto failed; + } + + /* Try to parse file */ + if (ssl_sock_load_files_into_ckch(bind_conf->ca_sign_file, data, &err)) { + ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain loading failed: %s\n", + px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line, err); + free(err); + goto failed; + } + + /* Fail if missing cert or pkey */ + if ((!data->cert) || (!data->key)) { + ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain missing certificate or private key\n", + px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line); + goto failed; + } + + /* Final assignment to bind */ + bind_conf->ca_sign_ckch = data; + return ret; + + failed: + if (data) { + ssl_sock_free_cert_key_and_chain_contents(data); + free(data); + } + + bind_conf->options &= ~BC_O_GENERATE_CERTS; + ret++; + return ret; +} + +/* Release CA cert and private key used to generate certificated */ +void +ssl_sock_free_ca(struct bind_conf *bind_conf) +{ + if (bind_conf->ca_sign_ckch) { + ssl_sock_free_cert_key_and_chain_contents(bind_conf->ca_sign_ckch); + ha_free(&bind_conf->ca_sign_ckch); + } +} + +#endif /* !defined SSL_NO_GENERATE_CERTIFICATES */ + + +static void __ssl_gencert_deinit(void) +{ +#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES) + if (ssl_ctx_lru_tree) { + lru64_destroy(ssl_ctx_lru_tree); + HA_RWLOCK_DESTROY(&ssl_ctx_lru_rwlock); + } +#endif +} +REGISTER_POST_DEINIT(__ssl_gencert_deinit); + diff --git a/src/ssl_ocsp.c b/src/ssl_ocsp.c index 5b103af..7d3a485 100644 --- a/src/ssl_ocsp.c +++ b/src/ssl_ocsp.c @@ -33,13 +33,6 @@ #include <string.h> #include <unistd.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <netdb.h> -#include <netinet/tcp.h> - -#include <import/ebpttree.h> #include <import/ebsttree.h> #include <import/lru.h> @@ -47,44 +40,27 @@ #include <haproxy/applet.h> #include <haproxy/arg.h> #include <haproxy/base64.h> -#include <haproxy/channel.h> +#include <haproxy/cfgparse.h> #include <haproxy/chunk.h> #include <haproxy/cli.h> #include <haproxy/connection.h> -#include <haproxy/dynbuf.h> #include <haproxy/errors.h> -#include <haproxy/fd.h> #include <haproxy/freq_ctr.h> #include <haproxy/frontend.h> #include <haproxy/global.h> -#include <haproxy/http_rules.h> +#include <haproxy/http_client.h> +#include <haproxy/istbuf.h> #include <haproxy/log.h> #include <haproxy/openssl-compat.h> -#include <haproxy/pattern-t.h> -#include <haproxy/proto_tcp.h> #include <haproxy/proxy.h> -#include <haproxy/sample.h> -#include <haproxy/sc_strm.h> -#include <haproxy/quic_conn.h> -#include <haproxy/quic_tp.h> -#include <haproxy/server.h> #include <haproxy/shctx.h> #include <haproxy/ssl_ckch.h> -#include <haproxy/ssl_crtlist.h> +#include <haproxy/ssl_ocsp-t.h> #include <haproxy/ssl_sock.h> #include <haproxy/ssl_utils.h> -#include <haproxy/stats.h> -#include <haproxy/stconn.h> -#include <haproxy/stream-t.h> #include <haproxy/task.h> #include <haproxy/ticks.h> #include <haproxy/time.h> -#include <haproxy/tools.h> -#include <haproxy/vars.h> -#include <haproxy/xxhash.h> -#include <haproxy/istbuf.h> -#include <haproxy/ssl_ocsp-t.h> -#include <haproxy/http_client.h> /* ***** READ THIS before adding code here! ***** @@ -98,6 +74,8 @@ * to conditionally define it in openssl-compat.h than using lots of ifdefs. */ +static struct sockaddr_storage *ocsp_update_dst; + #ifndef OPENSSL_NO_OCSP int ocsp_ex_index = -1; @@ -383,6 +361,25 @@ int ssl_sock_update_ocsp_response(struct buffer *ocsp_response, char **err) #if !defined OPENSSL_IS_BORINGSSL /* + * Must be called under ocsp_tree_lock lock. + */ +static void ssl_sock_free_ocsp_data(struct certificate_ocsp *ocsp) +{ + ebmb_delete(&ocsp->key); + eb64_delete(&ocsp->next_update); + X509_free(ocsp->issuer); + ocsp->issuer = NULL; + sk_X509_pop_free(ocsp->chain, X509_free); + ocsp->chain = NULL; + chunk_destroy(&ocsp->response); + if (ocsp->uri) { + ha_free(&ocsp->uri->area); + ha_free(&ocsp->uri); + } + free(ocsp); +} + +/* * Decrease the refcount of the struct ocsp_response and frees it if it's not * used anymore. Also removes it from the tree if free'd. */ @@ -392,21 +389,37 @@ void ssl_sock_free_ocsp(struct certificate_ocsp *ocsp) return; HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock); + ocsp->refcount_store--; + if (ocsp->refcount_store <= 0) { + eb64_delete(&ocsp->next_update); + /* Might happen if some ongoing requests kept using an SSL_CTX + * that referenced this OCSP response after the corresponding + * ckch_store was deleted or changed (via cli commands for + * instance). + */ + if (ocsp->refcount <= 0) + ssl_sock_free_ocsp_data(ocsp); + } + HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock); +} + +void ssl_sock_free_ocsp_instance(struct certificate_ocsp *ocsp) +{ + if (!ocsp) + return; + + HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock); ocsp->refcount--; if (ocsp->refcount <= 0) { - ebmb_delete(&ocsp->key); eb64_delete(&ocsp->next_update); - X509_free(ocsp->issuer); - ocsp->issuer = NULL; - sk_X509_pop_free(ocsp->chain, X509_free); - ocsp->chain = NULL; - chunk_destroy(&ocsp->response); - if (ocsp->uri) { - ha_free(&ocsp->uri->area); - ha_free(&ocsp->uri); - } + /* Might happen if some ongoing requests kept using an SSL_CTX + * that referenced this OCSP response after the corresponding + * ckch_store was deleted or changed (via cli commands for + * instance). + */ + if (ocsp->refcount_store <= 0) + ssl_sock_free_ocsp_data(ocsp); - free(ocsp); } HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock); } @@ -626,13 +639,13 @@ void ssl_sock_ocsp_free_func(void *parent, void *ptr, CRYPTO_EX_DATA *ad, int id ocsp_arg = ptr; if (ocsp_arg->is_single) { - ssl_sock_free_ocsp(ocsp_arg->s_ocsp); + ssl_sock_free_ocsp_instance(ocsp_arg->s_ocsp); ocsp_arg->s_ocsp = NULL; } else { int i; for (i = 0; i < SSL_SOCK_NUM_KEYTYPES; i++) { - ssl_sock_free_ocsp(ocsp_arg->m_ocsp[i]); + ssl_sock_free_ocsp_instance(ocsp_arg->m_ocsp[i]); ocsp_arg->m_ocsp[i] = NULL; } } @@ -907,7 +920,7 @@ static int ssl_ocsp_task_schedule() } REGISTER_POST_CHECK(ssl_ocsp_task_schedule); -void ssl_sock_free_ocsp(struct certificate_ocsp *ocsp); +void ssl_sock_free_ocsp_instance(struct certificate_ocsp *ocsp); void ssl_destroy_ocsp_update_task(void) { @@ -929,7 +942,7 @@ void ssl_destroy_ocsp_update_task(void) task_destroy(ocsp_update_task); ocsp_update_task = NULL; - ssl_sock_free_ocsp(ssl_ocsp_task_ctx.cur_ocsp); + ssl_sock_free_ocsp_instance(ssl_ocsp_task_ctx.cur_ocsp); ssl_ocsp_task_ctx.cur_ocsp = NULL; if (ssl_ocsp_task_ctx.hc) { @@ -966,12 +979,6 @@ static inline void ssl_ocsp_set_next_update(struct certificate_ocsp *ocsp) */ int ssl_ocsp_update_insert(struct certificate_ocsp *ocsp) { - /* This entry was only supposed to be updated once, it does not need to - * be reinserted into the update tree. - */ - if (ocsp->update_once) - return 0; - /* Set next_update based on current time and the various OCSP * minimum/maximum update times. */ @@ -980,7 +987,12 @@ int ssl_ocsp_update_insert(struct certificate_ocsp *ocsp) ocsp->fail_count = 0; HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock); - eb64_insert(&ocsp_update_tree, &ocsp->next_update); + ocsp->updating = 0; + /* An entry with update_once set to 1 was only supposed to be updated + * once, it does not need to be reinserted into the update tree. + */ + if (!ocsp->update_once) + eb64_insert(&ocsp_update_tree, &ocsp->next_update); HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock); return 0; @@ -997,12 +1009,6 @@ int ssl_ocsp_update_insert_after_error(struct certificate_ocsp *ocsp) { int replay_delay = 0; - /* This entry was only supposed to be updated once, it does not need to - * be reinserted into the update tree. - */ - if (ocsp->update_once) - return 0; - /* * Set next_update based on current time and the various OCSP * minimum/maximum update times. @@ -1025,7 +1031,12 @@ int ssl_ocsp_update_insert_after_error(struct certificate_ocsp *ocsp) ocsp->next_update.key = date.tv_sec + replay_delay; HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock); - eb64_insert(&ocsp_update_tree, &ocsp->next_update); + ocsp->updating = 0; + /* An entry with update_once set to 1 was only supposed to be updated + * once, it does not need to be reinserted into the update tree. + */ + if (!ocsp->update_once) + eb64_insert(&ocsp_update_tree, &ocsp->next_update); HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock); return 0; @@ -1077,10 +1088,8 @@ void ocsp_update_response_end_cb(struct httpclient *hc) /* - * Send a log line that will mimic this previously used logformat : - * char ocspupdate_log_format[] = "%ci:%cp [%tr] %ft %[ssl_ocsp_certname] \ - * %[ssl_ocsp_status] %{+Q}[ssl_ocsp_status_str] %[ssl_ocsp_fail_cnt] \ - * %[ssl_ocsp_success_cnt]"; + * Send a log line that will contain only OCSP update related information: + * "<proxy_name> <ssl_ocsp_certname> <ocsp_status> \"<ocsp_status_str>\" <ocsp_fail_cnt> <ocsp_success_cnt>" * We can't use the regular sess_log function because we don't have any control * over the stream and session used by the httpclient which might not exist * anymore by the time we call this function. @@ -1090,8 +1099,6 @@ static void ssl_ocsp_send_log() int status_str_len = 0; char *status_str = NULL; struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp; - struct tm tm; - char timebuf[25]; if (!httpclient_ocsp_update_px) return; @@ -1101,11 +1108,7 @@ static void ssl_ocsp_send_log() status_str = istptr(ocsp_update_errors[ssl_ocsp_task_ctx.update_status]); } - get_localtime(date.tv_sec, &tm); - date2str_log(timebuf, &tm, &date, 25); - - send_log(httpclient_ocsp_update_px, LOG_INFO, "-:- [%s] %s %s %u \"%.*s\" %u %u", - timebuf, + send_log(httpclient_ocsp_update_px, LOG_NOTICE, "%s %s %u \"%.*s\" %u %u", httpclient_ocsp_update_px->id, ocsp->path, ssl_ocsp_task_ctx.update_status, @@ -1211,7 +1214,7 @@ static struct task *ssl_ocsp_update_responses(struct task *task, void *context, /* Reinsert the entry into the update list so that it can be updated later */ ssl_ocsp_update_insert(ocsp); /* Release the reference kept on the updated ocsp response. */ - ssl_sock_free_ocsp(ctx->cur_ocsp); + ssl_sock_free_ocsp_instance(ctx->cur_ocsp); ctx->cur_ocsp = NULL; HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock); @@ -1255,6 +1258,7 @@ static struct task *ssl_ocsp_update_responses(struct task *task, void *context, eb64_delete(&ocsp->next_update); ++ocsp->refcount; + ocsp->updating = 1; ctx->cur_ocsp = ocsp; ocsp->last_update_status = OCSP_UPDT_UNKNOWN; @@ -1293,6 +1297,15 @@ static struct task *ssl_ocsp_update_responses(struct task *task, void *context, goto leave; } + /* if the ocsp_update.http_proxy option was set */ + if (ocsp_update_dst) { + hc->flags |= HC_F_HTTPPROXY; + if (!sockaddr_alloc(&hc->dst, ocsp_update_dst, sizeof(*ocsp_update_dst))) { + ha_alert("ocsp-update: Failed to allocate sockaddr in %s:%d.\n", __FUNCTION__, __LINE__); + goto leave; + } + } + if (httpclient_req_gen(hc, hc->req.url, hc->req.meth, b_data(req_body) ? ocsp_request_hdrs : NULL, b_data(req_body) ? ist2(b_orig(req_body), b_data(req_body)) : IST_NULL) != ERR_NONE) { @@ -1321,7 +1334,7 @@ leave: ++ctx->cur_ocsp->num_failure; ssl_ocsp_update_insert_after_error(ctx->cur_ocsp); /* Release the reference kept on the updated ocsp response. */ - ssl_sock_free_ocsp(ctx->cur_ocsp); + ssl_sock_free_ocsp_instance(ctx->cur_ocsp); ctx->cur_ocsp = NULL; } if (hc) @@ -1350,7 +1363,7 @@ http_error: if (hc) httpclient_stop_and_destroy(hc); /* Release the reference kept on the updated ocsp response. */ - ssl_sock_free_ocsp(ctx->cur_ocsp); + ssl_sock_free_ocsp_instance(ctx->cur_ocsp); HA_SPIN_LOCK(OCSP_LOCK, &ocsp_tree_lock); /* Set next_wakeup to the new first entry of the tree */ eb = eb64_first(&ocsp_update_tree); @@ -1378,11 +1391,11 @@ static int ssl_ocsp_update_precheck() /* initialize the OCSP update dedicated httpclient */ httpclient_ocsp_update_px = httpclient_create_proxy("<OCSP-UPDATE>"); if (!httpclient_ocsp_update_px) - return 1; - httpclient_ocsp_update_px->conf.logformat_string = httpclient_log_format; + return ERR_RETRYABLE; + httpclient_ocsp_update_px->logformat.str = httpclient_log_format; httpclient_ocsp_update_px->options2 |= PR_O2_NOLOGNORM; - return 0; + return ERR_NONE; } /* initialize the proxy and servers for the HTTP client */ @@ -1433,13 +1446,24 @@ static int cli_parse_update_ocsp_response(char **args, char *payload, struct app goto end; } - update_once = (ocsp->next_update.node.leaf_p == NULL); - eb64_delete(&ocsp->next_update); + /* No need to try to update this response, it is already being updated. */ + if (!ocsp->updating) { + update_once = (ocsp->next_update.node.leaf_p == NULL); + eb64_delete(&ocsp->next_update); - /* Insert the entry at the beginning of the update tree. */ - ocsp->next_update.key = 0; - eb64_insert(&ocsp_update_tree, &ocsp->next_update); - ocsp->update_once = update_once; + /* Insert the entry at the beginning of the update tree. + * We don't need to increase the reference counter on the + * certificate_ocsp structure because we would not have a way to + * decrease it afterwards since this update operation is asynchronous. + * If the corresponding entry were to be destroyed before the update can + * be performed, which is pretty unlikely, it would not be such a + * problem because that would mean that the OCSP response is not + * actually used. + */ + ocsp->next_update.key = 0; + eb64_insert(&ocsp_update_tree, &ocsp->next_update); + ocsp->update_once = update_once; + } HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock); @@ -1675,20 +1699,12 @@ yield: #endif } -/* Check if the ckch_store and the entry does have the same configuration */ -int ocsp_update_check_cfg_consistency(struct ckch_store *store, struct crtlist_entry *entry, char *crt_path, char **err) +static void cli_release_show_ocspresponse(struct appctx *appctx) { - int err_code = ERR_NONE; - - if (store->data->ocsp_update_mode != SSL_SOCK_OCSP_UPDATE_DFLT || entry->ssl_conf) { - if ((!entry->ssl_conf && store->data->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) - || (entry->ssl_conf && entry->ssl_conf->ocsp_update != SSL_SOCK_OCSP_UPDATE_OFF && - store->data->ocsp_update_mode != entry->ssl_conf->ocsp_update)) { - memprintf(err, "%sIncompatibilities found in OCSP update mode for certificate %s\n", err && *err ? *err : "", crt_path); - err_code |= ERR_ALERT | ERR_FATAL; - } - } - return err_code; + struct show_ocspresp_cli_ctx *ctx = appctx->svcctx; + + if (ctx) + ssl_sock_free_ocsp_instance(ctx->ocsp); } struct show_ocsp_updates_ctx { @@ -1845,98 +1861,168 @@ static void cli_release_show_ocsp_updates(struct appctx *appctx) HA_SPIN_UNLOCK(OCSP_LOCK, &ocsp_tree_lock); } +static int ssl_parse_global_ocsp_maxdelay(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + int value = 0; + + if (*(args[1]) == 0) { + memprintf(err, "'%s' expects an integer argument.", args[0]); + return -1; + } + + value = atoi(args[1]); + if (value < 0) { + memprintf(err, "'%s' expects a positive numeric value.", args[0]); + return -1; + } + + if (global_ssl.ocsp_update.delay_min > value) { + memprintf(err, "'%s' can not be lower than tune.ssl.ocsp-update.mindelay.", args[0]); + return -1; + } + + global_ssl.ocsp_update.delay_max = value; + + return 0; +} -static int -smp_fetch_ssl_ocsp_certid(const struct arg *args, struct sample *smp, const char *kw, void *private) +static int ssl_parse_global_ocsp_mindelay(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) { - struct buffer *data = get_trash_chunk(); - struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp; + int value = 0; - if (!ocsp) - return 0; + if (*(args[1]) == 0) { + memprintf(err, "'%s' expects an integer argument.", args[0]); + return -1; + } + + value = atoi(args[1]); + if (value < 0) { + memprintf(err, "'%s' expects a positive numeric value.", args[0]); + return -1; + } - dump_binary(data, (char *)ocsp->key_data, ocsp->key_length); + if (value > global_ssl.ocsp_update.delay_max) { + memprintf(err, "'%s' can not be higher than tune.ssl.ocsp-update.maxdelay.", args[0]); + return -1; + } - smp->data.type = SMP_T_STR; - smp->data.u.str = *data; - return 1; + global_ssl.ocsp_update.delay_min = value; + + return 0; } -static int -smp_fetch_ssl_ocsp_certname(const struct arg *args, struct sample *smp, const char *kw, void *private) +static int ssl_parse_global_ocsp_update_mode(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) { - struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp; + if (!*args[1]) { + memprintf(err, "'%s' : expecting <on|off>", args[0]); + return ERR_ALERT | ERR_FATAL; + } - if (!ocsp) - return 0; + if (strcmp(args[1], "on") == 0) + global_ssl.ocsp_update.mode = SSL_SOCK_OCSP_UPDATE_ON; + else if (strcmp(args[1], "off") == 0) + global_ssl.ocsp_update.mode = SSL_SOCK_OCSP_UPDATE_OFF; + else { + memprintf(err, "'%s' : expecting <on|off>", args[0]); + return ERR_ALERT | ERR_FATAL; + } - smp->data.type = SMP_T_STR; - smp->data.u.str.area = ocsp->path; - smp->data.u.str.data = strlen(ocsp->path); - return 1; + return 0; } -static int -smp_fetch_ssl_ocsp_status(const struct arg *args, struct sample *smp, const char *kw, void *private) +static int ssl_parse_global_ocsp_update_disable(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) { - struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp; + if (!*args[1]) { + memprintf(err, "'%s' : expecting <on|off>", args[0]); + return ERR_ALERT | ERR_FATAL; + } - if (!ocsp) - return 0; + if (strcmp(args[1], "on") == 0) + global_ssl.ocsp_update.disable = 1; + else if (strcmp(args[1], "off") == 0) + global_ssl.ocsp_update.disable = 0; + else { + memprintf(err, "'%s' : expecting <on|off>", args[0]); + return ERR_ALERT | ERR_FATAL; + } - smp->data.type = SMP_T_SINT; - smp->data.u.sint = ssl_ocsp_task_ctx.update_status; - return 1; + return 0; } -static int -smp_fetch_ssl_ocsp_status_str(const struct arg *args, struct sample *smp, const char *kw, void *private) +static int ocsp_update_parse_global_http_proxy(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) { - struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp; + struct sockaddr_storage *sk; + char *errmsg = NULL; - if (!ocsp) - return 0; + if (too_many_args(1, args, err, NULL)) + return -1; - if (ssl_ocsp_task_ctx.update_status >= OCSP_UPDT_ERR_LAST) - return 0; + sockaddr_free(&ocsp_update_dst); + /* 'sk' is statically allocated (no need to be freed). */ + sk = str2sa_range(args[1], NULL, NULL, NULL, NULL, NULL, NULL, + &errmsg, NULL, NULL, + PA_O_PORT_OK | PA_O_STREAM | PA_O_XPRT | PA_O_CONNECT); + if (!sk) { + ha_alert("ocsp-update: Failed to parse destination address in %s\n", errmsg); + free(errmsg); + return -1; + } - smp->data.type = SMP_T_STR; - smp->data.u.str = ist2buf(ocsp_update_errors[ssl_ocsp_task_ctx.update_status]); + if (!sockaddr_alloc(&ocsp_update_dst, sk, sizeof(*sk))) { + ha_alert("ocsp-update: Failed to allocate sockaddr in %s:%d.\n", __FUNCTION__, __LINE__); + return -1; + } - return 1; + return 0; } -static int -smp_fetch_ssl_ocsp_fail_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private) +int ocsp_update_init(void *value, char *buf, struct ckch_data *d, int cli, char **err) { - struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp; + int ocsp_update_mode = *(int *)value; + int ret = 0; - if (!ocsp) - return 0; + /* inherit from global section */ + ocsp_update_mode = (ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_DFLT) ? global_ssl.ocsp_update.mode : ocsp_update_mode; - smp->data.type = SMP_T_SINT; - smp->data.u.sint = ocsp->num_failure; - return 1; + if (!global_ssl.ocsp_update.disable && ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) { + /* We might need to create the main ocsp update task */ + ret = ssl_create_ocsp_update_task(err); + } + + return ret; } -static int -smp_fetch_ssl_ocsp_success_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private) +int ocsp_update_postparser_init() { - struct certificate_ocsp *ocsp = ssl_ocsp_task_ctx.cur_ocsp; + int ret = 0; + char *err = NULL; - if (!ocsp) - return 0; + /* if the global ocsp-update.mode option is not set to "on", there is + * no need to start the task, it would have been started when parsing a + * crt-store or a crt-list */ + if (!global_ssl.ocsp_update.disable && (global_ssl.ocsp_update.mode == SSL_SOCK_OCSP_UPDATE_ON)) { + /* We might need to create the main ocsp update task */ + ret = ssl_create_ocsp_update_task(&err); + } - smp->data.type = SMP_T_SINT; - smp->data.u.sint = ocsp->num_success; - return 1; + return ret; } static struct cli_kw_list cli_kws = {{ },{ { { "set", "ssl", "ocsp-response", NULL }, "set ssl ocsp-response <resp|payload> : update a certificate's OCSP Response from a base64-encode DER", cli_parse_set_ocspresponse, NULL }, - { { "show", "ssl", "ocsp-response", NULL },"show ssl ocsp-response [[text|base64] id] : display the IDs of the OCSP responses used in memory, or the details of a single OCSP response (in text or base64 format)", cli_parse_show_ocspresponse, cli_io_handler_show_ocspresponse, NULL }, + { { "show", "ssl", "ocsp-response", NULL },"show ssl ocsp-response [[text|base64] id] : display the IDs of the OCSP responses used in memory, or the details of a single OCSP response (in text or base64 format)", cli_parse_show_ocspresponse, cli_io_handler_show_ocspresponse, cli_release_show_ocspresponse }, { { "show", "ssl", "ocsp-updates", NULL }, "show ssl ocsp-updates : display information about the next 'nb' ocsp responses that will be updated automatically", cli_parse_show_ocsp_updates, cli_io_handler_show_ocsp_updates, cli_release_show_ocsp_updates }, #if ((defined SSL_CTRL_SET_TLSEXT_STATUS_REQ_CB && !defined OPENSSL_NO_OCSP) && !defined OPENSSL_IS_BORINGSSL) { { "update", "ssl", "ocsp-response", NULL }, "update ssl ocsp-response <certfile> : send ocsp request and update stored ocsp response", cli_parse_update_ocsp_response, NULL, NULL }, @@ -1946,27 +2032,22 @@ static struct cli_kw_list cli_kws = {{ },{ INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws); - -/* Note: must not be declared <const> as its list will be overwritten. - * Please take care of keeping this list alphabetically sorted. - * - * Those fetches only have a valid value during an OCSP update process so they - * can only be used in a log format of a log line built by the update process - * task itself. - */ -static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, { - { "ssl_ocsp_certid", smp_fetch_ssl_ocsp_certid, 0, NULL, SMP_T_STR, SMP_USE_L5SRV }, - { "ssl_ocsp_certname", smp_fetch_ssl_ocsp_certname, 0, NULL, SMP_T_STR, SMP_USE_L5SRV }, - { "ssl_ocsp_status", smp_fetch_ssl_ocsp_status, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, - { "ssl_ocsp_status_str", smp_fetch_ssl_ocsp_status_str, 0, NULL, SMP_T_STR, SMP_USE_L5SRV }, - { "ssl_ocsp_fail_cnt", smp_fetch_ssl_ocsp_fail_cnt, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, - { "ssl_ocsp_success_cnt", smp_fetch_ssl_ocsp_success_cnt, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, - { NULL, NULL, 0, 0, 0 }, +static struct cfg_kw_list cfg_kws = {ILH, { +#ifndef OPENSSL_NO_OCSP + { CFG_GLOBAL, "ocsp-update.disable", ssl_parse_global_ocsp_update_disable }, + { CFG_GLOBAL, "tune.ssl.ocsp-update.maxdelay", ssl_parse_global_ocsp_maxdelay }, + { CFG_GLOBAL, "ocsp-update.maxdelay", ssl_parse_global_ocsp_maxdelay }, + { CFG_GLOBAL, "tune.ssl.ocsp-update.mindelay", ssl_parse_global_ocsp_mindelay }, + { CFG_GLOBAL, "ocsp-update.mindelay", ssl_parse_global_ocsp_mindelay }, + { CFG_GLOBAL, "ocsp-update.mode", ssl_parse_global_ocsp_update_mode }, + { CFG_GLOBAL, "ocsp-update.httpproxy", ocsp_update_parse_global_http_proxy }, +#endif + { 0, NULL, NULL }, }}; -INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords); - +INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); +REGISTER_CONFIG_POSTPARSER("ocsp-update", ocsp_update_postparser_init); /* * Local variables: * c-indent-level: 8 diff --git a/src/ssl_sample.c b/src/ssl_sample.c index 22b4072..0757c12 100644 --- a/src/ssl_sample.c +++ b/src/ssl_sample.c @@ -219,6 +219,10 @@ static inline int sample_check_arg_base64(struct arg *arg, char **err) static int check_aes_gcm(struct arg *args, struct sample_conv *conv, const char *file, int line, char **err) { + if (conv->kw[8] == 'd') + /* flag it as "aes_gcm_dec" */ + args[0].type_flags = 1; + switch(args[0].data.sint) { case 128: case 192: @@ -238,7 +242,8 @@ static int check_aes_gcm(struct arg *args, struct sample_conv *conv, memprintf(err, "failed to parse key : %s", *err); return 0; } - if (!sample_check_arg_base64(&args[3], err)) { + if ((args[0].type_flags && !sample_check_arg_base64(&args[3], err)) || + (!args[0].type_flags && !vars_check_arg(&args[3], err))) { memprintf(err, "failed to parse aead_tag : %s", *err); return 0; } @@ -246,13 +251,37 @@ static int check_aes_gcm(struct arg *args, struct sample_conv *conv, return 1; } +#define sample_conv_aes_gcm_init(a, b, c, d, e, f) \ + ({ \ + int _ret = (a) ? \ + EVP_DecryptInit_ex(b, c, d, e, f) : \ + EVP_EncryptInit_ex(b, c, d, e, f); \ + _ret; \ + }) + +#define sample_conv_aes_gcm_update(a, b, c, d, e, f) \ + ({ \ + int _ret = (a) ? \ + EVP_DecryptUpdate(b, c, d, e, f) : \ + EVP_EncryptUpdate(b, c, d, e, f); \ + _ret; \ + }) + +#define sample_conv_aes_gcm_final(a, b, c, d) \ + ({ \ + int _ret = (a) ? \ + EVP_DecryptFinal_ex(b, c, d) : \ + EVP_EncryptFinal_ex(b, c, d); \ + _ret; \ + }) + /* Arguments: AES size in bits, nonce, key, tag. The last three arguments are base64 encoded */ -static int sample_conv_aes_gcm_dec(const struct arg *arg_p, struct sample *smp, void *private) +static int sample_conv_aes_gcm(const struct arg *arg_p, struct sample *smp, void *private) { struct sample nonce, key, aead_tag; struct buffer *smp_trash = NULL, *smp_trash_alloc = NULL; EVP_CIPHER_CTX *ctx = NULL; - int dec_size, ret; + int size, ret, dec; smp_trash_alloc = alloc_trash_chunk(); if (!smp_trash_alloc) @@ -278,30 +307,33 @@ static int sample_conv_aes_gcm_dec(const struct arg *arg_p, struct sample *smp, goto err; if (arg_p[1].type == ARGT_VAR) { - dec_size = base64dec(nonce.data.u.str.area, nonce.data.u.str.data, smp_trash->area, smp_trash->size); - if (dec_size < 0) + size = base64dec(nonce.data.u.str.area, nonce.data.u.str.data, smp_trash->area, smp_trash->size); + if (size < 0) goto err; - smp_trash->data = dec_size; + smp_trash->data = size; nonce.data.u.str = *smp_trash; } + /* encrypt (0) or decrypt (1) */ + dec = (arg_p[0].type_flags == 1); + /* Set cipher type and mode */ switch(arg_p[0].data.sint) { case 128: - EVP_DecryptInit_ex(ctx, EVP_aes_128_gcm(), NULL, NULL, NULL); + sample_conv_aes_gcm_init(dec, ctx, EVP_aes_128_gcm(), NULL, NULL, NULL); break; case 192: - EVP_DecryptInit_ex(ctx, EVP_aes_192_gcm(), NULL, NULL, NULL); + sample_conv_aes_gcm_init(dec, ctx, EVP_aes_192_gcm(), NULL, NULL, NULL); break; case 256: - EVP_DecryptInit_ex(ctx, EVP_aes_256_gcm(), NULL, NULL, NULL); + sample_conv_aes_gcm_init(dec, ctx, EVP_aes_256_gcm(), NULL, NULL, NULL); break; } EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_IVLEN, nonce.data.u.str.data, NULL); /* Initialise IV */ - if(!EVP_DecryptInit_ex(ctx, NULL, NULL, NULL, (unsigned char *) nonce.data.u.str.area)) + if(!sample_conv_aes_gcm_init(dec, ctx, NULL, NULL, NULL, (unsigned char *) nonce.data.u.str.area)) goto err; smp_set_owner(&key, smp->px, smp->sess, smp->strm, smp->opt); @@ -309,42 +341,66 @@ static int sample_conv_aes_gcm_dec(const struct arg *arg_p, struct sample *smp, goto err; if (arg_p[2].type == ARGT_VAR) { - dec_size = base64dec(key.data.u.str.area, key.data.u.str.data, smp_trash->area, smp_trash->size); - if (dec_size < 0) + size = base64dec(key.data.u.str.area, key.data.u.str.data, smp_trash->area, smp_trash->size); + if (size < 0) goto err; - smp_trash->data = dec_size; + smp_trash->data = size; key.data.u.str = *smp_trash; } /* Initialise key */ - if (!EVP_DecryptInit_ex(ctx, NULL, NULL, (unsigned char *) key.data.u.str.area, NULL)) + if (!sample_conv_aes_gcm_init(dec, ctx, NULL, NULL, (unsigned char *) key.data.u.str.area, NULL)) goto err; - if (!EVP_DecryptUpdate(ctx, (unsigned char *) smp_trash->area, (int *) &smp_trash->data, - (unsigned char *) smp_trash_alloc->area, (int) smp_trash_alloc->data)) + if (!sample_conv_aes_gcm_update(dec, ctx, (unsigned char *) smp_trash->area, (int *) &smp_trash->data, + (unsigned char *) smp_trash_alloc->area, (int) smp_trash_alloc->data)) goto err; smp_set_owner(&aead_tag, smp->px, smp->sess, smp->strm, smp->opt); - if (!sample_conv_var2smp_str(&arg_p[3], &aead_tag)) - goto err; - - if (arg_p[3].type == ARGT_VAR) { - dec_size = base64dec(aead_tag.data.u.str.area, aead_tag.data.u.str.data, smp_trash_alloc->area, smp_trash_alloc->size); - if (dec_size < 0) + if (dec) { + if (!sample_conv_var2smp_str(&arg_p[3], &aead_tag)) goto err; - smp_trash_alloc->data = dec_size; - aead_tag.data.u.str = *smp_trash_alloc; - } - dec_size = smp_trash->data; + if (arg_p[3].type == ARGT_VAR) { + size = base64dec(aead_tag.data.u.str.area, aead_tag.data.u.str.data, smp_trash_alloc->area, + smp_trash_alloc->size); + if (size < 0) + goto err; + smp_trash_alloc->data = size; + aead_tag.data.u.str = *smp_trash_alloc; + } - EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, aead_tag.data.u.str.data, (void *) aead_tag.data.u.str.area); - ret = EVP_DecryptFinal_ex(ctx, (unsigned char *) smp_trash->area + smp_trash->data, (int *) &smp_trash->data); + EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_SET_TAG, aead_tag.data.u.str.data, + (void *) aead_tag.data.u.str.area); + } + + size = smp_trash->data; + ret = sample_conv_aes_gcm_final(dec, ctx, (unsigned char *) smp_trash->area + smp_trash->data, + (int *) &smp_trash->data); if (ret <= 0) goto err; - smp->data.u.str.data = dec_size + smp_trash->data; + if (!dec) { + struct buffer *trash = get_trash_chunk(); + + EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_AEAD_GET_TAG, 16, (void *) trash->area); + + aead_tag.data.u.str = *smp_trash_alloc; + ret = a2base64(trash->area, 16, aead_tag.data.u.str.area, aead_tag.data.u.str.size); + if (ret < 0) + goto err; + + aead_tag.data.u.str.data = ret; + aead_tag.data.type = SMP_T_STR; + + if (!var_set(arg_p[3].data.var.name_hash, arg_p[3].data.var.scope, &aead_tag, + (arg_p[3].data.var.scope == SCOPE_PROC) ? VF_COND_IFEXISTS : 0)) { + goto err; + } + } + + smp->data.u.str.data = size + smp_trash->data; smp->data.u.str.area = smp_trash->area; smp->data.type = SMP_T_BIN; smp_dup(smp); @@ -1317,61 +1373,61 @@ smp_fetch_ssl_fc_is_resumed(const struct arg *args, struct sample *smp, const ch static int smp_fetch_ssl_fc_ec(const struct arg *args, struct sample *smp, const char *kw, void *private) { - struct connection *conn; - SSL *ssl; - int __maybe_unused nid; - char *curve_name; - - if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK) - conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL; - else - conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) : - smp->strm ? sc_conn(smp->strm->scb) : NULL; - - ssl = ssl_sock_get_ssl_object(conn); - if (!ssl) - return 0; - - /* - * SSL_get0_group_name is a function to get the curve name and is available from - * OpenSSL v3.2 onwards. For OpenSSL >=3.0 and <3.2, we will continue to use - * SSL_get_negotiated_group to get the curve name. - */ - #if (HA_OPENSSL_VERSION_NUMBER >= 0x3020000fL) - curve_name = (char *)SSL_get0_group_name(ssl); - if (curve_name == NULL) - return 0; - else { - /** - * The curve name returned by SSL_get0_group_name is in lowercase whereas the curve - * name returned when we use `SSL_get_negotiated_group` and `OBJ_nid2sn` is the - * short name and is in upper case. To make the return value consistent across the - * different functional calls and to make it consistent while upgrading OpenSSL versions, - * will convert the curve name returned by SSL_get0_group_name to upper case. - */ - int i; - - for (i = 0; curve_name[i]; i++) - curve_name[i] = toupper(curve_name[i]); - } - #else - nid = SSL_get_negotiated_group(ssl); - if (!nid) - return 0; - curve_name = (char *)OBJ_nid2sn(nid); - if (curve_name == NULL) - return 0; - #endif - - smp->data.u.str.area = curve_name; - if (!smp->data.u.str.area) - return 0; - - smp->data.type = SMP_T_STR; - smp->flags |= SMP_F_VOL_SESS | SMP_F_CONST; - smp->data.u.str.data = strlen(smp->data.u.str.area); - - return 1; + struct connection *conn; + SSL *ssl; + int __maybe_unused nid; + char *curve_name; + + if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK) + conn = (kw[4] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL; + else + conn = (kw[4] != 'b') ? objt_conn(smp->sess->origin) : + smp->strm ? sc_conn(smp->strm->scb) : NULL; + + ssl = ssl_sock_get_ssl_object(conn); + if (!ssl) + return 0; + + /* + * SSL_get0_group_name is a function to get the curve name and is available from + * OpenSSL v3.2 onwards. For OpenSSL >=3.0 and <3.2, we will continue to use + * SSL_get_negotiated_group to get the curve name. + */ +# if (HA_OPENSSL_VERSION_NUMBER >= 0x3020000fL) + curve_name = (char *)SSL_get0_group_name(ssl); + if (curve_name == NULL) { + return 0; + } else { + /* + * The curve name returned by SSL_get0_group_name is in lowercase whereas the curve + * name returned when we use `SSL_get_negotiated_group` and `OBJ_nid2sn` is the + * short name and is in upper case. To make the return value consistent across the + * different functional calls and to make it consistent while upgrading OpenSSL versions, + * will convert the curve name returned by SSL_get0_group_name to upper case. + */ + int i; + + for (i = 0; curve_name[i]; i++) + curve_name[i] = toupper(curve_name[i]); + } +# else + nid = SSL_get_negotiated_group(ssl); + if (!nid) + return 0; + curve_name = (char *)OBJ_nid2sn(nid); + if (curve_name == NULL) + return 0; +# endif + + smp->data.u.str.area = curve_name; + if (!smp->data.u.str.area) + return 0; + + smp->data.type = SMP_T_STR; + smp->flags |= SMP_F_VOL_SESS | SMP_F_CONST; + smp->data.u.str.data = strlen(smp->data.u.str.area); + + return 1; } #endif @@ -2263,6 +2319,15 @@ static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, { { "ssl_bc_server_random", smp_fetch_ssl_fc_random, 0, NULL, SMP_T_BIN, SMP_USE_L5SRV }, { "ssl_bc_session_key", smp_fetch_ssl_fc_session_key, 0, NULL, SMP_T_BIN, SMP_USE_L5SRV }, #endif +#ifdef HAVE_SSL_KEYLOG + { "ssl_bc_client_early_traffic_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI }, + { "ssl_bc_client_handshake_traffic_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI }, + { "ssl_bc_server_handshake_traffic_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI }, + { "ssl_bc_client_traffic_secret_0", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI }, + { "ssl_bc_server_traffic_secret_0", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI }, + { "ssl_bc_exporter_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI }, + { "ssl_bc_early_exporter_secret", smp_fetch_ssl_x_keylog, 0, NULL, SMP_T_STR, SMP_USE_L5CLI }, +#endif { "ssl_bc_err", smp_fetch_ssl_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, { "ssl_bc_err_str", smp_fetch_ssl_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L5SRV }, { "ssl_c_ca_err", smp_fetch_ssl_c_ca_err, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI }, @@ -2367,7 +2432,8 @@ INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords); static struct sample_conv_kw_list sample_conv_kws = {ILH, { { "sha2", sample_conv_sha2, ARG1(0, SINT), smp_check_sha2, SMP_T_BIN, SMP_T_BIN }, #ifdef EVP_CIPH_GCM_MODE - { "aes_gcm_dec", sample_conv_aes_gcm_dec, ARG4(4,SINT,STR,STR,STR), check_aes_gcm, SMP_T_BIN, SMP_T_BIN }, + { "aes_gcm_enc", sample_conv_aes_gcm, ARG4(4,SINT,STR,STR,STR), check_aes_gcm, SMP_T_BIN, SMP_T_BIN }, + { "aes_gcm_dec", sample_conv_aes_gcm, ARG4(4,SINT,STR,STR,STR), check_aes_gcm, SMP_T_BIN, SMP_T_BIN }, #endif { "x509_v_err_str", sample_conv_x509_v_err, 0, NULL, SMP_T_SINT, SMP_T_STR }, { "digest", sample_conv_crypto_digest, ARG1(1,STR), check_crypto_digest, SMP_T_BIN, SMP_T_BIN }, diff --git a/src/ssl_sock.c b/src/ssl_sock.c index 96d826e..e6bf3ff 100644 --- a/src/ssl_sock.c +++ b/src/ssl_sock.c @@ -72,6 +72,7 @@ #include <haproxy/shctx.h> #include <haproxy/ssl_ckch.h> #include <haproxy/ssl_crtlist.h> +#include <haproxy/ssl_gencert.h> #include <haproxy/ssl_sock.h> #include <haproxy/ssl_utils.h> #include <haproxy/stats.h> @@ -135,9 +136,12 @@ struct global_ssl global_ssl = { #ifdef HAVE_SSL_KEYLOG .keylog = 0, #endif + .security_level = -1, #ifndef OPENSSL_NO_OCSP .ocsp_update.delay_max = SSL_OCSP_UPDATE_DELAY_MAX, .ocsp_update.delay_min = SSL_OCSP_UPDATE_DELAY_MIN, + .ocsp_update.mode = SSL_SOCK_OCSP_UPDATE_OFF, + .ocsp_update.disable = 0, #endif }; @@ -156,7 +160,7 @@ enum { SSL_ST_STATS_COUNT /* must be the last member of the enum */ }; -static struct name_desc ssl_stats[] = { +static struct stat_col ssl_stats[] = { [SSL_ST_SESS] = { .name = "ssl_sess", .desc = "Total number of ssl sessions established" }, [SSL_ST_REUSED_SESS] = { .name = "ssl_reused_sess", @@ -171,13 +175,37 @@ static struct ssl_counters { long long failed_handshake; } ssl_counters; -static void ssl_fill_stats(void *data, struct field *stats) +static int ssl_fill_stats(void *data, struct field *stats, unsigned int *selected_field) { struct ssl_counters *counters = data; + unsigned int current_field = (selected_field != NULL ? *selected_field : 0); - stats[SSL_ST_SESS] = mkf_u64(FN_COUNTER, counters->sess); - stats[SSL_ST_REUSED_SESS] = mkf_u64(FN_COUNTER, counters->reused_sess); - stats[SSL_ST_FAILED_HANDSHAKE] = mkf_u64(FN_COUNTER, counters->failed_handshake); + for (; current_field < SSL_ST_STATS_COUNT; current_field++) { + struct field metric = { 0 }; + + switch (current_field) { + case SSL_ST_SESS: + metric = mkf_u64(FN_COUNTER, counters->sess); + break; + case SSL_ST_REUSED_SESS: + metric = mkf_u64(FN_COUNTER, counters->reused_sess); + break; + case SSL_ST_FAILED_HANDSHAKE: + metric = mkf_u64(FN_COUNTER, counters->failed_handshake); + break; + default: + /* not used for frontends. If a specific metric + * is requested, return an error. Otherwise continue. + */ + if (selected_field != NULL) + return 0; + continue; + } + stats[current_field] = metric; + if (selected_field != NULL) + break; + } + return 1; } static struct stats_module ssl_stats_module = { @@ -504,38 +532,8 @@ static HASSL_DH *global_dh = NULL; static HASSL_DH *local_dh_1024 = NULL; static HASSL_DH *local_dh_2048 = NULL; static HASSL_DH *local_dh_4096 = NULL; -#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL) -static DH *ssl_get_tmp_dh_cbk(SSL *ssl, int export, int keylen); -#else -static void ssl_sock_set_tmp_dh_from_pkey(SSL_CTX *ctx, EVP_PKEY *pkey); -#endif #endif /* OPENSSL_NO_DH */ -#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES) -/* X509V3 Extensions that will be added on generated certificates */ -#define X509V3_EXT_SIZE 5 -static char *x509v3_ext_names[X509V3_EXT_SIZE] = { - "basicConstraints", - "nsComment", - "subjectKeyIdentifier", - "authorityKeyIdentifier", - "keyUsage", -}; -static char *x509v3_ext_values[X509V3_EXT_SIZE] = { - "CA:FALSE", - "\"OpenSSL Generated Certificate\"", - "hash", - "keyid,issuer:always", - "nonRepudiation,digitalSignature,keyEncipherment" -}; -/* LRU cache to store generated certificate */ -static struct lru64_head *ssl_ctx_lru_tree = NULL; -static unsigned int ssl_ctx_lru_seed = 0; -static unsigned int ssl_ctx_serial; -__decl_rwlock(ssl_ctx_lru_rwlock); - -#endif // SSL_CTRL_SET_TLSEXT_HOSTNAME - /* The order here matters for picking a default context, * keep the most common keytype at the bottom of the list */ @@ -1109,40 +1107,40 @@ static int tlskeys_finalize_config(void) * Returns 1 if no ".ocsp" file found, 0 if OCSP status extension is * successfully enabled, or -1 in other error case. */ -static int ssl_sock_load_ocsp(const char *path, SSL_CTX *ctx, struct ckch_data *data, STACK_OF(X509) *chain) +static int ssl_sock_load_ocsp(const char *path, SSL_CTX *ctx, struct ckch_store *store, STACK_OF(X509) *chain) { + struct ckch_data *data = store->data; X509 *x, *issuer; int i, ret = -1; struct certificate_ocsp *ocsp = NULL, *iocsp; char *warn = NULL; unsigned char *p; -#ifndef USE_OPENSSL_WOLFSSL -#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) - int (*callback) (SSL *, void *); -#else - void (*callback) (void); -#endif +#ifdef USE_OPENSSL_WOLFSSL + /* typedef int(*tlsextStatusCb)(WOLFSSL* ssl, void*); */ + tlsextStatusCb callback = NULL; +#elif (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) + int (*callback) (SSL *, void *) = NULL; #else - tlsextStatusCb callback; + void (*callback) (void) = NULL; #endif struct buffer *ocsp_uri = get_trash_chunk(); char *err = NULL; size_t path_len; + int inc_refcount_store = 0; + int enable_auto_update = (store->conf.ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) || + (store->conf.ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_DFLT && + global_ssl.ocsp_update.mode == SSL_SOCK_OCSP_UPDATE_ON); x = data->cert; if (!x) goto out; ssl_ocsp_get_uri_from_cert(x, ocsp_uri, &err); - /* We should have an "OCSP URI" field in order for auto update to work. */ - if (data->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON && b_data(ocsp_uri) == 0) - goto out; - - /* In case of ocsp update mode set to 'on', this function might be - * called with no known ocsp response. If no ocsp uri can be found in - * the certificate, nothing needs to be done here. */ if (!data->ocsp_response && !data->ocsp_cid) { - if (data->ocsp_update_mode != SSL_SOCK_OCSP_UPDATE_ON || b_data(ocsp_uri) == 0) { + /* In case of ocsp update mode set to 'on', this function might + * be called with no known ocsp response. If no ocsp uri can be + * found in the certificate, nothing needs to be done here. */ + if (!enable_auto_update || b_data(ocsp_uri) == 0) { ret = 0; goto out; } @@ -1163,8 +1161,10 @@ static int ssl_sock_load_ocsp(const char *path, SSL_CTX *ctx, struct ckch_data * if (!issuer) goto out; - if (!data->ocsp_cid) + if (!data->ocsp_cid) { data->ocsp_cid = OCSP_cert_to_id(0, x, issuer); + inc_refcount_store = 1; + } if (!data->ocsp_cid) goto out; @@ -1185,12 +1185,11 @@ static int ssl_sock_load_ocsp(const char *path, SSL_CTX *ctx, struct ckch_data * if (iocsp == ocsp) ocsp = NULL; -#ifndef SSL_CTX_get_tlsext_status_cb -# define SSL_CTX_get_tlsext_status_cb(ctx, cb) \ - *cb = (void (*) (void))ctx->tlsext_status_cb; -#endif SSL_CTX_get_tlsext_status_cb(ctx, &callback); + if (inc_refcount_store) + iocsp->refcount_store++; + if (!callback) { struct ocsp_cbk_arg *cb_arg; EVP_PKEY *pkey; @@ -1282,7 +1281,7 @@ static int ssl_sock_load_ocsp(const char *path, SSL_CTX *ctx, struct ckch_data * */ memcpy(iocsp->path, path, path_len + 1); - if (data->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) { + if (enable_auto_update) { ssl_ocsp_update_insert(iocsp); /* If we are during init the update task is not * scheduled yet so a wakeup won't do anything. @@ -1294,7 +1293,7 @@ static int ssl_sock_load_ocsp(const char *path, SSL_CTX *ctx, struct ckch_data * if (ocsp_update_task) task_wakeup(ocsp_update_task, TASK_WOKEN_MSG); } - } else if (iocsp->uri && data->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) { + } else if (iocsp->uri && enable_auto_update) { /* This unlikely case can happen if a series of "del ssl * crt-list" / "add ssl crt-list" commands are made on the CLI. * In such a case, the OCSP response tree entry will be created @@ -1910,342 +1909,6 @@ static int ssl_sock_advertise_alpn_protos(SSL *s, const unsigned char **out, } #endif -#ifdef SSL_CTRL_SET_TLSEXT_HOSTNAME -#ifndef SSL_NO_GENERATE_CERTIFICATES - -/* Configure a DNS SAN extension on a certificate. */ -int ssl_sock_add_san_ext(X509V3_CTX* ctx, X509* cert, const char *servername) { - int failure = 0; - X509_EXTENSION *san_ext = NULL; - CONF *conf = NULL; - struct buffer *san_name = get_trash_chunk(); - - conf = NCONF_new(NULL); - if (!conf) { - failure = 1; - goto cleanup; - } - - /* Build an extension based on the DNS entry above */ - chunk_appendf(san_name, "DNS:%s", servername); - san_ext = X509V3_EXT_nconf_nid(conf, ctx, NID_subject_alt_name, san_name->area); - if (!san_ext) { - failure = 1; - goto cleanup; - } - - /* Add the extension */ - if (!X509_add_ext(cert, san_ext, -1 /* Add to end */)) { - failure = 1; - goto cleanup; - } - - /* Success */ - failure = 0; - -cleanup: - if (NULL != san_ext) X509_EXTENSION_free(san_ext); - if (NULL != conf) NCONF_free(conf); - - return failure; -} - -/* Create a X509 certificate with the specified servername and serial. This - * function returns a SSL_CTX object or NULL if an error occurs. */ -static SSL_CTX * -ssl_sock_do_create_cert(const char *servername, struct bind_conf *bind_conf, SSL *ssl) -{ - X509 *cacert = bind_conf->ca_sign_ckch->cert; - EVP_PKEY *capkey = bind_conf->ca_sign_ckch->key; - SSL_CTX *ssl_ctx = NULL; - X509 *newcrt = NULL; - EVP_PKEY *pkey = NULL; - SSL *tmp_ssl = NULL; - CONF *ctmp = NULL; - X509_NAME *name; - const EVP_MD *digest; - X509V3_CTX ctx; - unsigned int i; - int key_type; - - /* Get the private key of the default certificate and use it */ -#ifdef HAVE_SSL_CTX_get0_privatekey - pkey = SSL_CTX_get0_privatekey(bind_conf->default_ctx); -#else - tmp_ssl = SSL_new(bind_conf->default_ctx); - if (tmp_ssl) - pkey = SSL_get_privatekey(tmp_ssl); -#endif - if (!pkey) - goto mkcert_error; - - /* Create the certificate */ - if (!(newcrt = X509_new())) - goto mkcert_error; - - /* Set version number for the certificate (X509v3) and the serial - * number */ - if (X509_set_version(newcrt, 2L) != 1) - goto mkcert_error; - ASN1_INTEGER_set(X509_get_serialNumber(newcrt), _HA_ATOMIC_ADD_FETCH(&ssl_ctx_serial, 1)); - - /* Set duration for the certificate */ - if (!X509_gmtime_adj(X509_getm_notBefore(newcrt), (long)-60*60*24) || - !X509_gmtime_adj(X509_getm_notAfter(newcrt),(long)60*60*24*365)) - goto mkcert_error; - - /* set public key in the certificate */ - if (X509_set_pubkey(newcrt, pkey) != 1) - goto mkcert_error; - - /* Set issuer name from the CA */ - if (!(name = X509_get_subject_name(cacert))) - goto mkcert_error; - if (X509_set_issuer_name(newcrt, name) != 1) - goto mkcert_error; - - /* Set the subject name using the same, but the CN */ - name = X509_NAME_dup(name); - if (X509_NAME_add_entry_by_txt(name, "CN", MBSTRING_ASC, - (const unsigned char *)servername, - -1, -1, 0) != 1) { - X509_NAME_free(name); - goto mkcert_error; - } - if (X509_set_subject_name(newcrt, name) != 1) { - X509_NAME_free(name); - goto mkcert_error; - } - X509_NAME_free(name); - - /* Add x509v3 extensions as specified */ - ctmp = NCONF_new(NULL); - X509V3_set_ctx(&ctx, cacert, newcrt, NULL, NULL, 0); - for (i = 0; i < X509V3_EXT_SIZE; i++) { - X509_EXTENSION *ext; - - if (!(ext = X509V3_EXT_nconf(ctmp, &ctx, x509v3_ext_names[i], x509v3_ext_values[i]))) - goto mkcert_error; - if (!X509_add_ext(newcrt, ext, -1)) { - X509_EXTENSION_free(ext); - goto mkcert_error; - } - X509_EXTENSION_free(ext); - } - - /* Add SAN extension */ - if (ssl_sock_add_san_ext(&ctx, newcrt, servername)) { - goto mkcert_error; - } - - /* Sign the certificate with the CA private key */ - - key_type = EVP_PKEY_base_id(capkey); - - if (key_type == EVP_PKEY_DSA) - digest = EVP_sha1(); - else if (key_type == EVP_PKEY_RSA) - digest = EVP_sha256(); - else if (key_type == EVP_PKEY_EC) - digest = EVP_sha256(); - else { -#ifdef ASN1_PKEY_CTRL_DEFAULT_MD_NID - int nid; - - if (EVP_PKEY_get_default_digest_nid(capkey, &nid) <= 0) - goto mkcert_error; - if (!(digest = EVP_get_digestbynid(nid))) - goto mkcert_error; -#else - goto mkcert_error; -#endif - } - - if (!(X509_sign(newcrt, capkey, digest))) - goto mkcert_error; - - /* Create and set the new SSL_CTX */ - if (!(ssl_ctx = SSL_CTX_new(SSLv23_server_method()))) - goto mkcert_error; - if (!SSL_CTX_use_PrivateKey(ssl_ctx, pkey)) - goto mkcert_error; - if (!SSL_CTX_use_certificate(ssl_ctx, newcrt)) - goto mkcert_error; - if (!SSL_CTX_check_private_key(ssl_ctx)) - goto mkcert_error; - - /* Build chaining the CA cert and the rest of the chain, keep these order */ -#if defined(SSL_CTX_add1_chain_cert) - if (!SSL_CTX_add1_chain_cert(ssl_ctx, bind_conf->ca_sign_ckch->cert)) { - goto mkcert_error; - } - - if (bind_conf->ca_sign_ckch->chain) { - for (i = 0; i < sk_X509_num(bind_conf->ca_sign_ckch->chain); i++) { - X509 *chain_cert = sk_X509_value(bind_conf->ca_sign_ckch->chain, i); - if (!SSL_CTX_add1_chain_cert(ssl_ctx, chain_cert)) { - goto mkcert_error; - } - } - } -#endif - - if (newcrt) X509_free(newcrt); - -#ifndef OPENSSL_NO_DH -#if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL) - SSL_CTX_set_tmp_dh_callback(ssl_ctx, ssl_get_tmp_dh_cbk); -#else - ssl_sock_set_tmp_dh_from_pkey(ssl_ctx, pkey); -#endif -#endif - -#if (HA_OPENSSL_VERSION_NUMBER >= 0x10101000L) -#if defined(SSL_CTX_set1_curves_list) - { - const char *ecdhe = (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe : ECDHE_DEFAULT_CURVE); - if (!SSL_CTX_set1_curves_list(ssl_ctx, ecdhe)) - goto end; - } -#endif -#else -#if defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH) - { - const char *ecdhe = (bind_conf->ssl_conf.ecdhe ? bind_conf->ssl_conf.ecdhe : ECDHE_DEFAULT_CURVE); - EC_KEY *ecc; - int nid; - - if ((nid = OBJ_sn2nid(ecdhe)) == NID_undef) - goto end; - if (!(ecc = EC_KEY_new_by_curve_name(nid))) - goto end; - SSL_CTX_set_tmp_ecdh(ssl_ctx, ecc); - EC_KEY_free(ecc); - } -#endif /* defined(SSL_CTX_set_tmp_ecdh) && !defined(OPENSSL_NO_ECDH) */ -#endif /* HA_OPENSSL_VERSION_NUMBER >= 0x10101000L */ - end: - return ssl_ctx; - - mkcert_error: - if (ctmp) NCONF_free(ctmp); - if (tmp_ssl) SSL_free(tmp_ssl); - if (ssl_ctx) SSL_CTX_free(ssl_ctx); - if (newcrt) X509_free(newcrt); - return NULL; -} - - -/* Do a lookup for a certificate in the LRU cache used to store generated - * certificates and immediately assign it to the SSL session if not null. */ -SSL_CTX * -ssl_sock_assign_generated_cert(unsigned int key, struct bind_conf *bind_conf, SSL *ssl) -{ - struct lru64 *lru = NULL; - - if (ssl_ctx_lru_tree) { - HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - lru = lru64_lookup(key, ssl_ctx_lru_tree, bind_conf->ca_sign_ckch->cert, 0); - if (lru && lru->domain) { - if (ssl) - SSL_set_SSL_CTX(ssl, (SSL_CTX *)lru->data); - HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - return (SSL_CTX *)lru->data; - } - HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - } - return NULL; -} - -/* Same as <ssl_sock_assign_generated_cert> but without SSL session. This - * function is not thread-safe, it should only be used to check if a certificate - * exists in the lru cache (with no warranty it will not be removed by another - * thread). It is kept for backward compatibility. */ -SSL_CTX * -ssl_sock_get_generated_cert(unsigned int key, struct bind_conf *bind_conf) -{ - return ssl_sock_assign_generated_cert(key, bind_conf, NULL); -} - -/* Set a certificate int the LRU cache used to store generated - * certificate. Return 0 on success, otherwise -1 */ -int -ssl_sock_set_generated_cert(SSL_CTX *ssl_ctx, unsigned int key, struct bind_conf *bind_conf) -{ - struct lru64 *lru = NULL; - - if (ssl_ctx_lru_tree) { - HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - lru = lru64_get(key, ssl_ctx_lru_tree, bind_conf->ca_sign_ckch->cert, 0); - if (!lru) { - HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - return -1; - } - if (lru->domain && lru->data) - lru->free((SSL_CTX *)lru->data); - lru64_commit(lru, ssl_ctx, bind_conf->ca_sign_ckch->cert, 0, (void (*)(void *))SSL_CTX_free); - HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - return 0; - } - return -1; -} - -/* Compute the key of the certificate. */ -unsigned int -ssl_sock_generated_cert_key(const void *data, size_t len) -{ - return XXH32(data, len, ssl_ctx_lru_seed); -} - -/* Generate a cert and immediately assign it to the SSL session so that the cert's - * refcount is maintained regardless of the cert's presence in the LRU cache. - */ -static int -ssl_sock_generate_certificate(const char *servername, struct bind_conf *bind_conf, SSL *ssl) -{ - X509 *cacert = bind_conf->ca_sign_ckch->cert; - SSL_CTX *ssl_ctx = NULL; - struct lru64 *lru = NULL; - unsigned int key; - - key = ssl_sock_generated_cert_key(servername, strlen(servername)); - if (ssl_ctx_lru_tree) { - HA_RWLOCK_WRLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - lru = lru64_get(key, ssl_ctx_lru_tree, cacert, 0); - if (lru && lru->domain) - ssl_ctx = (SSL_CTX *)lru->data; - if (!ssl_ctx && lru) { - ssl_ctx = ssl_sock_do_create_cert(servername, bind_conf, ssl); - lru64_commit(lru, ssl_ctx, cacert, 0, (void (*)(void *))SSL_CTX_free); - } - SSL_set_SSL_CTX(ssl, ssl_ctx); - HA_RWLOCK_WRUNLOCK(SSL_GEN_CERTS_LOCK, &ssl_ctx_lru_rwlock); - return 1; - } - else { - ssl_ctx = ssl_sock_do_create_cert(servername, bind_conf, ssl); - SSL_set_SSL_CTX(ssl, ssl_ctx); - /* No LRU cache, this CTX will be released as soon as the session dies */ - SSL_CTX_free(ssl_ctx); - return 1; - } - return 0; -} -static int -ssl_sock_generate_certificate_from_conn(struct bind_conf *bind_conf, SSL *ssl) -{ - unsigned int key; - struct connection *conn = SSL_get_ex_data(ssl, ssl_app_data_index); - - if (conn_get_dst(conn)) { - key = ssl_sock_generated_cert_key(conn->dst, get_addr_len(conn->dst)); - if (ssl_sock_assign_generated_cert(key, bind_conf, ssl)) - return 1; - } - return 0; -} -#endif /* !defined SSL_NO_GENERATE_CERTIFICATES */ - #if (HA_OPENSSL_VERSION_NUMBER < 0x1010000fL) static void ctx_set_SSLv3_func(SSL_CTX *ctx, set_context_func c) @@ -2351,7 +2014,7 @@ static void ssl_sock_switchctx_set(SSL *ssl, SSL_CTX *ctx) * * This function does a lookup in the bind_conf sni tree so the caller should lock its tree. */ -static __maybe_unused struct sni_ctx *ssl_sock_chose_sni_ctx(struct bind_conf *s, const char *servername, +struct sni_ctx *ssl_sock_chose_sni_ctx(struct bind_conf *s, const char *servername, int have_rsa_sig, int have_ecdsa_sig) { struct ebmb_node *node, *n, *node_ecdsa = NULL, *node_rsa = NULL, *node_anonymous = NULL; @@ -2365,6 +2028,9 @@ static __maybe_unused struct sni_ctx *ssl_sock_chose_sni_ctx(struct bind_conf *s break; } } + /* if the servername is empty look for the default in the wildcard list */ + if (!*servername) + wildp = servername; /* Look for an ECDSA, RSA and DSA certificate, first in the single * name and if not found in the wildcard */ @@ -2463,7 +2129,8 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *arg) int has_rsa_sig = 0, has_ecdsa_sig = 0; struct sni_ctx *sni_ctx; const char *servername; - size_t servername_len; + size_t servername_len = 0; + int default_lookup = 0; /* did we lookup for a default yet? */ int allow_early = 0; int i; @@ -2551,14 +2218,16 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *arg) goto allow_early; } #endif - /* without SNI extension, is the default_ctx (need SSL_TLSEXT_ERR_NOACK) */ - if (!s->strict_sni) { - HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock); - ssl_sock_switchctx_set(ssl, s->default_ctx); - HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - goto allow_early; - } - goto abort; + + /* no servername field is not compatible with strict-sni */ + if (s->strict_sni) + goto abort; + + /* without servername extension, look for the defaults which is + * defined by an empty servername string */ + servername = ""; + servername_len = 0; + default_lookup = 1; } /* extract/check clientHello information */ @@ -2634,14 +2303,14 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *arg) } } +sni_lookup: /* we need to transform this a NULL-ended string in lowecase */ for (i = 0; i < trash.size && i < servername_len; i++) trash.area[i] = tolower(servername[i]); trash.area[i] = 0; - servername = trash.area; HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock); - sni_ctx = ssl_sock_chose_sni_ctx(s, servername, has_rsa_sig, has_ecdsa_sig); + sni_ctx = ssl_sock_chose_sni_ctx(s, trash.area, has_rsa_sig, has_ecdsa_sig); if (sni_ctx) { /* switch ctx */ struct ssl_bind_conf *conf = sni_ctx->conf; @@ -2658,17 +2327,20 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *arg) HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); #if (!defined SSL_NO_GENERATE_CERTIFICATES) - if (s->options & BC_O_GENERATE_CERTS && ssl_sock_generate_certificate(servername, s, ssl)) { + if (s->options & BC_O_GENERATE_CERTS && ssl_sock_generate_certificate(trash.area, s, ssl)) { /* switch ctx done in ssl_sock_generate_certificate */ goto allow_early; } #endif - if (!s->strict_sni) { - /* no certificate match, is the default_ctx */ - HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock); - ssl_sock_switchctx_set(ssl, s->default_ctx); - HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - goto allow_early; + + if (!s->strict_sni && !default_lookup) { + /* we didn't find a SNI, and we didn't look for a default + * look again to find a matching default cert */ + servername = ""; + servername_len = 0; + default_lookup = 1; + + goto sni_lookup; } /* We are about to raise an handshake error so the servername extension @@ -2722,6 +2394,7 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *priv) const char *wildp = NULL; struct ebmb_node *node, *n; struct bind_conf *s = priv; + int default_lookup = 0; /* did we lookup for a default yet? */ #ifdef USE_QUIC const uint8_t *extension_data; size_t extension_len; @@ -2761,12 +2434,15 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *priv) #endif if (s->strict_sni) return SSL_TLSEXT_ERR_ALERT_FATAL; - HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock); - ssl_sock_switchctx_set(ssl, s->default_ctx); - HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - return SSL_TLSEXT_ERR_NOACK; + + /* without servername extension, look for the defaults which is + * defined by an empty servername string */ + servername = ""; + default_lookup = 1; } +sni_lookup: + for (i = 0; i < trash.size; i++) { if (!servername[i]) break; @@ -2775,6 +2451,8 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *priv) wildp = &trash.area[i]; } trash.area[i] = 0; + if(!*trash.area) /* handle the default which in wildcard tree */ + wildp = trash.area; HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock); node = NULL; @@ -2804,24 +2482,35 @@ int ssl_sock_switchctx_cbk(SSL *ssl, int *al, void *priv) return SSL_TLSEXT_ERR_OK; } #endif - if (s->strict_sni) { - HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - return SSL_TLSEXT_ERR_ALERT_FATAL; - } - ssl_sock_switchctx_set(ssl, s->default_ctx); HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - return SSL_TLSEXT_ERR_OK; + + if (!s->strict_sni && !default_lookup) { + /* we didn't find a SNI, and we didn't look for a default + * look again to find a matching default cert */ + servername = ""; + default_lookup = 1; + + goto sni_lookup; + } + return SSL_TLSEXT_ERR_ALERT_FATAL; } +#if defined(OPENSSL_IS_AWSLC) + /* Note that ssl_sock_switchctx_set() calls SSL_set_SSL_CTX() which propagates the + * "early data enabled" setting from the SSL_CTX object to the SSL objects. + * So enable early data for this SSL_CTX context if configured. + */ + if (s->ssl_conf.early_data) + SSL_CTX_set_early_data_enabled(container_of(node, struct sni_ctx, name)->ctx, 1); +#endif /* switch ctx */ ssl_sock_switchctx_set(ssl, container_of(node, struct sni_ctx, name)->ctx); HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); return SSL_TLSEXT_ERR_OK; } #endif /* (!) OPENSSL_IS_BORINGSSL */ -#endif /* SSL_CTRL_SET_TLSEXT_HOSTNAME */ -#if 0 && defined(USE_OPENSSL_WOLFSSL) +#if defined(USE_OPENSSL_WOLFSSL) /* This implement the equivalent of the clientHello Callback but using the cert_cb. * WolfSSL is able to extract the sigalgs and ciphers of the client byt using the API * provided in https://github.com/wolfSSL/wolfssl/pull/6963 @@ -2833,6 +2522,7 @@ static int ssl_sock_switchctx_wolfSSL_cbk(WOLFSSL* ssl, void* arg) struct bind_conf *s = arg; int has_rsa_sig = 0, has_ecdsa_sig = 0; const char *servername; + int default_lookup = 0; struct sni_ctx *sni_ctx; int i; @@ -2844,14 +2534,13 @@ static int ssl_sock_switchctx_wolfSSL_cbk(WOLFSSL* ssl, void* arg) servername = SSL_get_servername(ssl, TLSEXT_NAMETYPE_host_name); if (!servername) { - /* without SNI extension, is the default_ctx (need SSL_TLSEXT_ERR_NOACK) */ - if (!s->strict_sni) { - HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock); - ssl_sock_switchctx_set(ssl, s->default_ctx); - HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - goto allow_early; - } - goto abort; + if (s->strict_sni) + goto abort; + + /* without servername extension, look for the defaults which is + * defined by an empty servername string */ + servername = ""; + default_lookup = 1; } /* extract sigalgs and ciphers */ @@ -2895,6 +2584,8 @@ static int ssl_sock_switchctx_wolfSSL_cbk(WOLFSSL* ssl, void* arg) } } +sni_lookup: + /* we need to transform this into a NULL-ended string in lowecase */ for (i = 0; i < trash.size && servername[i] != '\0'; i++) trash.area[i] = tolower(servername[i]); @@ -2916,12 +2607,13 @@ static int ssl_sock_switchctx_wolfSSL_cbk(WOLFSSL* ssl, void* arg) } HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - if (!s->strict_sni) { - /* no certificate match, is the default_ctx */ - HA_RWLOCK_RDLOCK(SNI_LOCK, &s->sni_lock); - ssl_sock_switchctx_set(ssl, s->default_ctx); - HA_RWLOCK_RDUNLOCK(SNI_LOCK, &s->sni_lock); - goto allow_early; + if (!s->strict_sni && !default_lookup) { + /* we didn't find a SNI, and we didn't look for a default + * look again to find a matching default cert */ + servername = ""; + default_lookup = 1; + + goto sni_lookup; } /* We are about to raise an handshake error so the servername extension @@ -3224,7 +2916,7 @@ static HASSL_DH *ssl_get_tmp_dh(EVP_PKEY *pkey) #if (HA_OPENSSL_VERSION_NUMBER < 0x3000000fL) /* Returns Diffie-Hellman parameters matching the private key length but not exceeding global_ssl.default_dh_param */ -static HASSL_DH *ssl_get_tmp_dh_cbk(SSL *ssl, int export, int keylen) +HASSL_DH *ssl_get_tmp_dh_cbk(SSL *ssl, int export, int keylen) { EVP_PKEY *pkey = SSL_get_privatekey(ssl); @@ -3250,7 +2942,7 @@ static int ssl_sock_set_tmp_dh(SSL_CTX *ctx, HASSL_DH *dh) } #if (HA_OPENSSL_VERSION_NUMBER >= 0x3000000fL) -static void ssl_sock_set_tmp_dh_from_pkey(SSL_CTX *ctx, EVP_PKEY *pkey) +void ssl_sock_set_tmp_dh_from_pkey(SSL_CTX *ctx, EVP_PKEY *pkey) { HASSL_DH *dh = NULL; if (pkey && (dh = ssl_get_tmp_dh(pkey))) { @@ -3335,7 +3027,7 @@ static int ckch_inst_add_cert_sni(SSL_CTX *ctx, struct ckch_inst *ckch_inst, struct pkey_info kinfo, char *name, int order) { struct sni_ctx *sc; - int wild = 0, neg = 0; + int wild = 0, neg = 0, default_crt = 0; if (*name == '!') { neg = 1; @@ -3344,11 +3036,14 @@ static int ckch_inst_add_cert_sni(SSL_CTX *ctx, struct ckch_inst *ckch_inst, if (*name == '*') { wild = 1; name++; + /* if this was only a '*' filter, this is a default cert */ + if (!*name) + default_crt = 1; } /* !* filter is a nop */ if (neg && wild) return order; - if (*name) { + if (*name || default_crt) { int j, len; len = strlen(name); for (j = 0; j < len && j < trash.size; j++) @@ -3420,14 +3115,6 @@ void ssl_sock_load_cert_sni(struct ckch_inst *ckch_inst, struct bind_conf *bind_ else ebst_insert(&bind_conf->sni_ctx, &sc0->name); } - - /* replace the default_ctx if required with the instance's ctx. */ - if (ckch_inst->is_default) { - SSL_CTX_free(bind_conf->default_ctx); - SSL_CTX_up_ref(ckch_inst->ctx); - bind_conf->default_ctx = ckch_inst->ctx; - bind_conf->default_inst = ckch_inst; - } } /* @@ -3625,9 +3312,10 @@ end: * The value 0 means there is no error nor warning and * the operation succeed. */ -static int ssl_sock_put_ckch_into_ctx(const char *path, struct ckch_data *data, SSL_CTX *ctx, char **err) +static int ssl_sock_put_ckch_into_ctx(const char *path, struct ckch_store *store, SSL_CTX *ctx, char **err) { int errcode = 0; + struct ckch_data *data = store->data; STACK_OF(X509) *find_chain = NULL; ERR_clear_error(); @@ -3679,7 +3367,7 @@ static int ssl_sock_put_ckch_into_ctx(const char *path, struct ckch_data *data, * ocsp tree even if no ocsp_response was known during init, unless the * frontend's conf disables ocsp update explicitly. */ - if (ssl_sock_load_ocsp(path, ctx, data, find_chain) < 0) { + if (ssl_sock_load_ocsp(path, ctx, store, find_chain) < 0) { if (data->ocsp_response) memprintf(err, "%s '%s.ocsp' is present and activates OCSP but it is impossible to compute the OCSP certificate ID (maybe the issuer could not be found)'.\n", err && *err ? *err : "", path); @@ -3744,7 +3432,7 @@ end: * ERR_WARN if a warning is available into err */ int ckch_inst_new_load_store(const char *path, struct ckch_store *ckchs, struct bind_conf *bind_conf, - struct ssl_bind_conf *ssl_conf, char **sni_filter, int fcount, struct ckch_inst **ckchi, char **err) + struct ssl_bind_conf *ssl_conf, char **sni_filter, int fcount, int is_default, struct ckch_inst **ckchi, char **err) { SSL_CTX *ctx; int i; @@ -3775,7 +3463,10 @@ int ckch_inst_new_load_store(const char *path, struct ckch_store *ckchs, struct goto error; } - errcode |= ssl_sock_put_ckch_into_ctx(path, data, ctx, err); + if (global_ssl.security_level > -1) + SSL_CTX_set_security_level(ctx, global_ssl.security_level); + + errcode |= ssl_sock_put_ckch_into_ctx(path, ckchs, ctx, err); if (errcode & ERR_CODE) goto error; @@ -3857,20 +3548,16 @@ int ckch_inst_new_load_store(const char *path, struct ckch_store *ckchs, struct * the tree, so it will be discovered and cleaned in time. */ -#ifndef SSL_CTRL_SET_TLSEXT_HOSTNAME - if (bind_conf->default_ctx) { - memprintf(err, "%sthis version of openssl cannot load multiple SSL certificates.\n", - err && *err ? *err : ""); - errcode |= ERR_ALERT | ERR_FATAL; - goto error; - } -#endif - if (!bind_conf->default_ctx) { - bind_conf->default_ctx = ctx; - bind_conf->default_ssl_conf = ssl_conf; + if (is_default) { ckch_inst->is_default = 1; - SSL_CTX_up_ref(ctx); - bind_conf->default_inst = ckch_inst; + + /* insert an empty SNI which will be used to lookup default certificate */ + order = ckch_inst_add_cert_sni(ctx, ckch_inst, bind_conf, ssl_conf, kinfo, "*", order); + if (order < 0) { + memprintf(err, "%sunable to create a sni context.\n", err && *err ? *err : ""); + errcode |= ERR_ALERT | ERR_FATAL; + goto error; + } } /* Always keep a reference to the newly constructed SSL_CTX in the @@ -3892,9 +3579,6 @@ int ckch_inst_new_load_store(const char *path, struct ckch_store *ckchs, struct error: /* free the allocated sni_ctxs */ if (ckch_inst) { - if (ckch_inst->is_default) - SSL_CTX_free(ctx); - ckch_inst_free(ckch_inst); ckch_inst = NULL; } @@ -3936,6 +3620,9 @@ int ckch_inst_new_load_srv_store(const char *path, struct ckch_store *ckchs, goto error; } + if (global_ssl.security_level > -1) + SSL_CTX_set_security_level(ctx, global_ssl.security_level); + errcode |= ssl_sock_put_srv_ckch_into_ctx(path, data, ctx, err); if (errcode & ERR_CODE) goto error; @@ -3967,12 +3654,14 @@ error: /* Returns a set of ERR_* flags possibly with an error in <err>. */ static int ssl_sock_load_ckchs(const char *path, struct ckch_store *ckchs, struct bind_conf *bind_conf, struct ssl_bind_conf *ssl_conf, - char **sni_filter, int fcount, struct ckch_inst **ckch_inst, char **err) + char **sni_filter, int fcount, + int is_default, + struct ckch_inst **ckch_inst, char **err) { int errcode = 0; /* we found the ckchs in the tree, we can use it directly */ - errcode |= ckch_inst_new_load_store(path, ckchs, bind_conf, ssl_conf, sni_filter, fcount, ckch_inst, err); + errcode |= ckch_inst_new_load_store(path, ckchs, bind_conf, ssl_conf, sni_filter, fcount, is_default, ckch_inst, err); if (errcode & ERR_CODE) return errcode; @@ -4081,9 +3770,17 @@ int ssl_sock_load_cert_list_file(char *file, int dir, struct bind_conf *bind_con list_for_each_entry(entry, &crtlist->ord_entries, by_crtlist) { struct ckch_store *store; struct ckch_inst *ckch_inst = NULL; + int is_default = 0; store = entry->node.key; - cfgerr |= ssl_sock_load_ckchs(store->path, store, bind_conf, entry->ssl_conf, entry->filters, entry->fcount, &ckch_inst, err); + + /* if the SNI trees were empty the first "crt" become a default certificate, + * it can be applied on multiple certificates if it's a bundle */ + if (eb_is_empty(&bind_conf->sni_ctx) && eb_is_empty(&bind_conf->sni_w_ctx)) + is_default = 1; + + + cfgerr |= ssl_sock_load_ckchs(store->path, store, bind_conf, entry->ssl_conf, entry->filters, entry->fcount, is_default, &ckch_inst, err); if (cfgerr & ERR_CODE) { memprintf(err, "error processing line %d in file '%s' : %s", entry->linenum, file, *err); goto error; @@ -4125,7 +3822,7 @@ error: } /* Returns a set of ERR_* flags possibly with an error in <err>. */ -int ssl_sock_load_cert(char *path, struct bind_conf *bind_conf, char **err) +int ssl_sock_load_cert(char *path, struct bind_conf *bind_conf, int is_default, char **err) { struct stat buf; int cfgerr = 0; @@ -4133,25 +3830,32 @@ int ssl_sock_load_cert(char *path, struct bind_conf *bind_conf, char **err) struct ckch_inst *ckch_inst = NULL; int found = 0; /* did we found a file to load ? */ + /* if the SNI trees were empty the first "crt" become a default certificate, + * it can be applied on multiple certificates if it's a bundle */ + if (is_default == 0) { + if (eb_is_empty(&bind_conf->sni_ctx) && eb_is_empty(&bind_conf->sni_w_ctx)) + is_default = 1; + } + if ((ckchs = ckchs_lookup(path))) { - /* we found the ckchs in the tree, we can use it directly */ - cfgerr |= ssl_sock_load_ckchs(path, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err); - /* This certificate has an 'ocsp-update' already set in a - * previous crt-list so we must raise an error. */ - if (ckchs->data->ocsp_update_mode == SSL_SOCK_OCSP_UPDATE_ON) { - memprintf(err, "%sIncompatibilities found in OCSP update mode for certificate %s\n", err && *err ? *err: "", path); - cfgerr |= ERR_ALERT | ERR_FATAL; - } + cfgerr |= ckch_conf_cmp_empty(&ckchs->conf, err); + if (cfgerr & ERR_CODE) { + memprintf(err, "Can't load '%s', is already defined with incompatible parameters:\n %s", path, err ? *err : ""); + return cfgerr; + } + + /* we found the ckchs in the tree, we can use it directly */ + cfgerr |= ssl_sock_load_ckchs(path, ckchs, bind_conf, NULL, NULL, 0, is_default, &ckch_inst, err); found++; } else if (stat(path, &buf) == 0) { found++; if (S_ISDIR(buf.st_mode) == 0) { - ckchs = ckchs_load_cert_file(path, err); + ckchs = ckch_store_new_load_files_path(path, err); if (!ckchs) cfgerr |= ERR_ALERT | ERR_FATAL; - cfgerr |= ssl_sock_load_ckchs(path, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err); + cfgerr |= ssl_sock_load_ckchs(path, ckchs, bind_conf, NULL, NULL, 0, is_default, &ckch_inst, err); } else { cfgerr |= ssl_sock_load_cert_list_file(path, 1, bind_conf, bind_conf->frontend, err); } @@ -4171,15 +3875,15 @@ int ssl_sock_load_cert(char *path, struct bind_conf *bind_conf, char **err) continue; if ((ckchs = ckchs_lookup(fp))) { - cfgerr |= ssl_sock_load_ckchs(fp, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err); + cfgerr |= ssl_sock_load_ckchs(fp, ckchs, bind_conf, NULL, NULL, 0, is_default, &ckch_inst, err); found++; } else { if (stat(fp, &buf) == 0) { found++; - ckchs = ckchs_load_cert_file(fp, err); + ckchs = ckch_store_new_load_files_path(fp, err); if (!ckchs) cfgerr |= ERR_ALERT | ERR_FATAL; - cfgerr |= ssl_sock_load_ckchs(fp, ckchs, bind_conf, NULL, NULL, 0, &ckch_inst, err); + cfgerr |= ssl_sock_load_ckchs(fp, ckchs, bind_conf, NULL, NULL, 0, is_default, &ckch_inst, err); } } } @@ -4229,7 +3933,7 @@ int ssl_sock_load_srv_cert(char *path, struct server *server, int create_if_none /* We do not manage directories on backend side. */ if (S_ISDIR(buf.st_mode) == 0) { ++found; - ckchs = ckchs_load_cert_file(path, err); + ckchs = ckch_store_new_load_files_path(path, err); if (!ckchs) cfgerr |= ERR_ALERT | ERR_FATAL; cfgerr |= ssl_sock_load_srv_ckchs(path, ckchs, server, &server->ssl_ctx.inst, err); @@ -4274,6 +3978,9 @@ ssl_sock_initial_ctx(struct bind_conf *bind_conf) ctx = SSL_CTX_new(SSLv23_server_method()); bind_conf->initial_ctx = ctx; + if (global_ssl.security_level > -1) + SSL_CTX_set_security_level(ctx, global_ssl.security_level); + if (conf_ssl_methods->flags && (conf_ssl_methods->min || conf_ssl_methods->max)) ha_warning("Proxy '%s': no-sslv3/no-tlsv1x are ignored for bind '%s' at [%s:%d]. " "Use only 'ssl-min-ver' and 'ssl-max-ver' to fix.\n", @@ -4384,7 +4091,7 @@ ssl_sock_initial_ctx(struct bind_conf *bind_conf) # endif /* ! SSL_OP_NO_ANTI_REPLAY */ SSL_CTX_set_client_hello_cb(ctx, ssl_sock_switchctx_cbk, NULL); SSL_CTX_set_tlsext_servername_callback(ctx, ssl_sock_switchctx_err_cbk); -# elif 0 && defined(USE_OPENSSL_WOLFSSL) +# elif defined(USE_OPENSSL_WOLFSSL) SSL_CTX_set_cert_cb(ctx, ssl_sock_switchctx_wolfSSL_cbk, bind_conf); # else /* ! OPENSSL_IS_BORINGSSL && ! HAVE_SSL_CLIENT_HELLO_CB */ @@ -5270,6 +4977,8 @@ int ssl_sock_prepare_srv_ctx(struct server *srv) cfgerr++; return cfgerr; } + if (global_ssl.security_level > -1) + SSL_CTX_set_security_level(ctx, global_ssl.security_level); srv->ssl_ctx.ctx = ctx; } @@ -5429,6 +5138,16 @@ static int ssl_sock_prepare_srv_ssl_ctx(const struct server *srv, SSL_CTX *ctx) cfgerr++; } +#ifdef SSL_CTRL_SET_MSG_CALLBACK + SSL_CTX_set_msg_callback(ctx, ssl_sock_msgcbk); +#endif + +#ifdef HAVE_SSL_KEYLOG + /* only activate the keylog callback if it was required to prevent performance loss */ + if (global_ssl.keylog > 0) + SSL_CTX_set_keylog_callback(ctx, SSL_CTX_keylog); +#endif + #ifdef HAVE_SSL_CTX_SET_CIPHERSUITES if (srv->ssl_ctx.ciphersuites && !SSL_CTX_set_ciphersuites(ctx, srv->ssl_ctx.ciphersuites)) { @@ -5547,16 +5266,12 @@ int ssl_sock_prepare_all_ctx(struct bind_conf *bind_conf) to initial_ctx in ssl_initial_ctx. */ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, NULL, bind_conf->initial_ctx, NULL, &errmsg); } - if (bind_conf->default_ctx) { - errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, bind_conf->default_ssl_conf, bind_conf->default_ctx, bind_conf->default_inst, &errmsg); - } node = ebmb_first(&bind_conf->sni_ctx); while (node) { sni = ebmb_entry(node, struct sni_ctx, name); - if (!sni->order && sni->ctx != bind_conf->default_ctx) { - /* only initialize the CTX on its first occurrence and - if it is not the default_ctx */ + if (!sni->order) { + /* only initialize the CTX on its first occurrence */ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, sni->conf, sni->ctx, sni->ckch_inst, &errmsg); } node = ebmb_next(node); @@ -5565,9 +5280,8 @@ int ssl_sock_prepare_all_ctx(struct bind_conf *bind_conf) node = ebmb_first(&bind_conf->sni_w_ctx); while (node) { sni = ebmb_entry(node, struct sni_ctx, name); - if (!sni->order && sni->ctx != bind_conf->default_ctx) { - /* only initialize the CTX on its first occurrence and - if it is not the default_ctx */ + if (!sni->order) { + /* only initialize the CTX on its first occurrence */ errcode |= ssl_sock_prep_ctx_and_inst(bind_conf, sni->conf, sni->ctx, sni->ckch_inst, &errmsg); } node = ebmb_next(node); @@ -5594,14 +5308,17 @@ int ssl_sock_prepare_bind_conf(struct bind_conf *bind_conf) int alloc_ctx; int err; + /* check if some certificates were loaded but no ssl keyword is used */ if (!(bind_conf->options & BC_O_USE_SSL)) { - if (bind_conf->default_ctx) { + if (!eb_is_empty(&bind_conf->sni_ctx) || !eb_is_empty(&bind_conf->sni_w_ctx)) { ha_warning("Proxy '%s': A certificate was specified but SSL was not enabled on bind '%s' at [%s:%d] (use 'ssl').\n", px->id, bind_conf->arg, bind_conf->file, bind_conf->line); } return 0; } - if (!bind_conf->default_ctx) { + + /* check if we have certificates */ + if (eb_is_empty(&bind_conf->sni_ctx) && eb_is_empty(&bind_conf->sni_w_ctx)) { if (bind_conf->strict_sni && !(bind_conf->options & BC_O_GENERATE_CERTS)) { ha_warning("Proxy '%s': no SSL certificate specified for bind '%s' at [%s:%d], ssl connections will fail (use 'crt').\n", px->id, bind_conf->arg, bind_conf->file, bind_conf->line); @@ -5612,10 +5329,23 @@ int ssl_sock_prepare_bind_conf(struct bind_conf *bind_conf) return -1; } } + + if ((bind_conf->options & BC_O_GENERATE_CERTS)) { + struct sni_ctx *sni_ctx; + + /* if we use the generate-certificates option, look for the first default cert available */ + sni_ctx = ssl_sock_chose_sni_ctx(bind_conf, "", 1, 1); + if (!sni_ctx) { + ha_alert("Proxy '%s': no SSL certificate specified for bind '%s' and 'generate-certificates' option at [%s:%d] (use 'crt').\n", + px->id, bind_conf->arg, bind_conf->file, bind_conf->line); + return -1; + } + } + if (!ssl_shctx && global.tune.sslcachesize) { alloc_ctx = shctx_init(&ssl_shctx, global.tune.sslcachesize, sizeof(struct sh_ssl_sess_hdr) + SHSESS_BLOCK_MIN_SIZE, -1, - sizeof(*sh_ssl_sess_tree)); + sizeof(*sh_ssl_sess_tree), "ssl cache"); if (alloc_ctx <= 0) { if (alloc_ctx == SHCTX_E_INIT_LOCK) ha_alert("Unable to initialize the lock for the shared SSL session cache. You can retry using the global statement 'tune.ssl.force-private-cache' but it could increase CPU usage due to renegotiations if nbproc > 1.\n"); @@ -5713,10 +5443,6 @@ void ssl_sock_free_all_ctx(struct bind_conf *bind_conf) SSL_CTX_free(bind_conf->initial_ctx); bind_conf->initial_ctx = NULL; - SSL_CTX_free(bind_conf->default_ctx); - bind_conf->default_ctx = NULL; - bind_conf->default_inst = NULL; - bind_conf->default_ssl_conf = NULL; } @@ -5746,81 +5472,6 @@ void ssl_sock_destroy_bind_conf(struct bind_conf *bind_conf) bind_conf->ca_sign_file = NULL; } -/* Load CA cert file and private key used to generate certificates */ -int -ssl_sock_load_ca(struct bind_conf *bind_conf) -{ - struct proxy *px = bind_conf->frontend; - struct ckch_data *data = NULL; - int ret = 0; - char *err = NULL; - - if (!(bind_conf->options & BC_O_GENERATE_CERTS)) - return ret; - -#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES) - if (global_ssl.ctx_cache) { - ssl_ctx_lru_tree = lru64_new(global_ssl.ctx_cache); - } - ssl_ctx_lru_seed = (unsigned int)time(NULL); - ssl_ctx_serial = now_ms; -#endif - - if (!bind_conf->ca_sign_file) { - ha_alert("Proxy '%s': cannot enable certificate generation, " - "no CA certificate File configured at [%s:%d].\n", - px->id, bind_conf->file, bind_conf->line); - goto failed; - } - - /* Allocate cert structure */ - data = calloc(1, sizeof(*data)); - if (!data) { - ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain allocation failure\n", - px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line); - goto failed; - } - - /* Try to parse file */ - if (ssl_sock_load_files_into_ckch(bind_conf->ca_sign_file, data, &err)) { - ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain loading failed: %s\n", - px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line, err); - free(err); - goto failed; - } - - /* Fail if missing cert or pkey */ - if ((!data->cert) || (!data->key)) { - ha_alert("Proxy '%s': Failed to read CA certificate file '%s' at [%s:%d]. Chain missing certificate or private key\n", - px->id, bind_conf->ca_sign_file, bind_conf->file, bind_conf->line); - goto failed; - } - - /* Final assignment to bind */ - bind_conf->ca_sign_ckch = data; - return ret; - - failed: - if (data) { - ssl_sock_free_cert_key_and_chain_contents(data); - free(data); - } - - bind_conf->options &= ~BC_O_GENERATE_CERTS; - ret++; - return ret; -} - -/* Release CA cert and private key used to generate certificated */ -void -ssl_sock_free_ca(struct bind_conf *bind_conf) -{ - if (bind_conf->ca_sign_ckch) { - ssl_sock_free_cert_key_and_chain_contents(bind_conf->ca_sign_ckch); - ha_free(&bind_conf->ca_sign_ckch); - } -} - /* * Try to allocate the BIO and SSL session objects of <conn> connection with <bio> and * <ssl> as addresses, <bio_meth> as BIO method and <ssl_ctx> as SSL context inherited settings. @@ -6060,7 +5711,7 @@ static int ssl_sock_init(struct connection *conn, void **xprt_ctx) #ifdef SSL_READ_EARLY_DATA_SUCCESS if (bc->ssl_conf.early_data) { - b_alloc(&ctx->early_buf); + b_alloc(&ctx->early_buf, DB_MUX_RX); SSL_set_max_early_data(ctx->ssl, /* Only allow early data if we managed to allocate * a buffer. @@ -6516,19 +6167,26 @@ static int ssl_unsubscribe(struct connection *conn, void *xprt_ctx, int event_ty * It should be called with the takeover lock for the old thread held. * Returns 0 on success, and -1 on failure */ -static int ssl_takeover(struct connection *conn, void *xprt_ctx, int orig_tid) +static int ssl_takeover(struct connection *conn, void *xprt_ctx, int orig_tid, int release) { struct ssl_sock_ctx *ctx = xprt_ctx; - struct tasklet *tl = tasklet_new(); + struct tasklet *tl = NULL; - if (!tl) - return -1; + if (!release) { + tl = tasklet_new(); + if (!tl) + return -1; + } ctx->wait_event.tasklet->context = NULL; tasklet_wakeup_on(ctx->wait_event.tasklet, orig_tid); + ctx->wait_event.tasklet = tl; - ctx->wait_event.tasklet->process = ssl_sock_io_cb; - ctx->wait_event.tasklet->context = ctx; + if (!release) { + ctx->wait_event.tasklet->process = ssl_sock_io_cb; + ctx->wait_event.tasklet->context = ctx; + } + return 0; } @@ -6558,7 +6216,7 @@ static void ssl_set_used(struct connection *conn, void *xprt_ctx) if (!ctx || !ctx->wait_event.tasklet) return; - HA_ATOMIC_OR(&ctx->wait_event.tasklet->state, TASK_F_USR1); + HA_ATOMIC_AND(&ctx->wait_event.tasklet->state, ~TASK_F_USR1); if (ctx->xprt) xprt_set_used(conn, ctx->xprt, ctx->xprt_ctx); } @@ -7873,6 +7531,8 @@ static void __ssl_sock_init(void) xprt_register(XPRT_SSL, &ssl_sock); #if HA_OPENSSL_VERSION_NUMBER < 0x10100000L SSL_library_init(); +#elif HA_OPENSSL_VERSION_NUMBER >= 0x10100000L + OPENSSL_init_ssl(0, NULL); #endif #if (!defined(OPENSSL_NO_COMP) && !defined(SSL_OP_NO_COMPRESSION)) cm = SSL_COMP_get_compression_methods(); @@ -8068,12 +7728,6 @@ void ssl_free_dh(void) { static void __ssl_sock_deinit(void) { -#if (defined SSL_CTRL_SET_TLSEXT_HOSTNAME && !defined SSL_NO_GENERATE_CERTIFICATES) - if (ssl_ctx_lru_tree) { - lru64_destroy(ssl_ctx_lru_tree); - HA_RWLOCK_DESTROY(&ssl_ctx_lru_rwlock); - } -#endif #if (HA_OPENSSL_VERSION_NUMBER < 0x10100000L) ERR_remove_state(0); diff --git a/src/stats-file.c b/src/stats-file.c new file mode 100644 index 0000000..1a77e31 --- /dev/null +++ b/src/stats-file.c @@ -0,0 +1,426 @@ +#include <haproxy/stats-file.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <import/ebmbtree.h> +#include <import/ebsttree.h> +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/buf.h> +#include <haproxy/chunk.h> +#include <haproxy/clock.h> +#include <haproxy/errors.h> +#include <haproxy/global.h> +#include <haproxy/guid.h> +#include <haproxy/intops.h> +#include <haproxy/list.h> +#include <haproxy/listener-t.h> +#include <haproxy/obj_type.h> +#include <haproxy/proxy-t.h> +#include <haproxy/server-t.h> +#include <haproxy/stats.h> +#include <haproxy/time.h> + +/* Dump all fields from <stats> into <out> for stats-file. */ +int stats_dump_fields_file(struct buffer *out, + const struct field *line, size_t stats_count, + struct show_stat_ctx *ctx) +{ + struct guid_node *guid; + struct listener *l; + int i; + + switch (ctx->px_st) { + case STAT_PX_ST_FE: + case STAT_PX_ST_BE: + guid = &__objt_proxy(ctx->obj1)->guid; + break; + + case STAT_PX_ST_LI: + l = LIST_ELEM(ctx->obj2, struct listener *, by_fe); + guid = &l->guid; + break; + + case STAT_PX_ST_SV: + guid = &__objt_server(ctx->obj2)->guid; + break; + + default: + ABORT_NOW(); + return 1; + } + + /* Skip objects without GUID. */ + if (!guid->node.key) + return 1; + + chunk_appendf(out, "%s,", (char *)guid->node.key); + + for (i = 0; i < stats_count; ++i) { + /* Empty field for stats-file is used to skip its output, + * including any separator. + */ + if (field_format(line, i) == FF_EMPTY) + continue; + + if (!stats_emit_raw_data_field(out, &line[i])) + return 0; + if (!chunk_strcat(out, ",")) + return 0; + } + + chunk_strcat(out, "\n"); + return 1; +} + +void stats_dump_file_header(int type, struct buffer *out) +{ + const struct stat_col *col; + int i; + + /* Caller must specified ither FE or BE. */ + BUG_ON(!(type & ((1 << STATS_TYPE_FE) | (1 << STATS_TYPE_BE)))); + + if (type & (1 << STATS_TYPE_FE)) { + chunk_strcat(out, "#fe guid,"); + for (i = 0; i < ST_I_PX_MAX; ++i) { + col = &stat_cols_px[i]; + if (stcol_is_generic(col) && + col->cap & (STATS_PX_CAP_FE|STATS_PX_CAP_LI)) { + chunk_appendf(out, "%s,", col->name); + } + } + } + else { + chunk_appendf(out, "#be guid,"); + for (i = 0; i < ST_I_PX_MAX; ++i) { + col = &stat_cols_px[i]; + if (stcol_is_generic(col) && + col->cap & (STATS_PX_CAP_BE|STATS_PX_CAP_SRV)) { + chunk_appendf(out, "%s,", col->name); + } + } + } + + chunk_strcat(out, "\n"); +} + +/* Parse an identified header line <header> starting with '#' character. + * + * If the section is recognized, <domain> will point to the current stats-file + * scope. <cols> will be filled as a matrix to identify each stat_col position + * using <st_tree> as prefilled proxy stats columns. If stats-file section is + * unknown, only <domain> will be set to STFILE_DOMAIN_UNSET. + * + * Returns 0 on success. On fatal error, non-zero is returned and parsing should + * be interrupted. + */ +static int parse_header_line(struct ist header, struct eb_root *st_tree, + enum stfile_domain *domain, + const struct stat_col *cols[]) +{ + enum stfile_domain dom = STFILE_DOMAIN_UNSET; + struct ist token; + char last; + int i; + + header = iststrip(header); + last = istptr(header)[istlen(header) - 1]; + token = istsplit(&header, ' '); + + /* A header line is considered valid if: + * - a space delimiter is found and first token is several chars + * - last line character must be a comma separator + */ + if (!istlen(header) || istlen(token) == 1 || last != ',') + goto err; + + if (isteq(token, ist("#fe"))) + dom = STFILE_DOMAIN_PX_FE; + else if (isteq(token, ist("#be"))) + dom = STFILE_DOMAIN_PX_BE; + + /* Remove 'guid' field. */ + token = istsplit(&header, ','); + if (!isteq(token, ist("guid"))) { + /* Fatal error if FE/BE domain without guid token. */ + if (dom == STFILE_DOMAIN_PX_FE || dom == STFILE_DOMAIN_PX_BE) + goto err; + } + + /* Unknown domain. Following lines should be ignored until next header. */ + if (dom == STFILE_DOMAIN_UNSET) + return 0; + + /* Generate matrix of stats column into cols[]. */ + memset(cols, 0, sizeof(void *) * STAT_FILE_MAX_COL_COUNT); + + i = 0; + while (istlen(header) && i < STAT_FILE_MAX_COL_COUNT) { + struct stcol_node *col_node; + const struct stat_col *col; + struct ebmb_node *node; + + /* Lookup column by its name into <st_tree>. */ + token = istsplit(&header, ','); + node = ebst_lookup(st_tree, ist0(token)); + if (!node) { + ++i; + continue; + } + + col_node = ebmb_entry(node, struct stcol_node, name); + col = col_node->col; + + /* Ignore column if its cap is not valid with current stats-file section. */ + if ((dom == STFILE_DOMAIN_PX_FE && + !(col->cap & (STATS_PX_CAP_FE|STATS_PX_CAP_LI))) || + (dom == STFILE_DOMAIN_PX_BE && + !(col->cap & (STATS_PX_CAP_BE|STATS_PX_CAP_SRV)))) { + ++i; + continue; + } + + cols[i] = col; + ++i; + } + + *domain = dom; + return 0; + + err: + *domain = STFILE_DOMAIN_UNSET; + return 1; +} + +/* Preload an individual counter instance stored at <counter> with <token> + * value> for the <col> stat column. + * + * Returns 0 on success else non-zero if counter was not updated. + */ +static int load_ctr(const struct stat_col *col, const struct ist token, + void* counter) +{ + const enum field_nature fn = stcol_nature(col); + const enum field_format ff = stcol_format(col); + const char *ptr = istptr(token); + struct field value; + + switch (ff) { + case FF_U64: + value.u.u64 = read_uint64(&ptr, istend(token)); + break; + + case FF_S32: + case FF_U32: + value.u.u32 = read_uint(&ptr, istend(token)); + break; + + default: + /* Unsupported field nature. */ + return 1; + } + + /* Do not load value if non numeric characters present. */ + if (ptr != istend(token)) + return 1; + + if (fn == FN_COUNTER && ff == FF_U64) { + *(uint64_t *)counter = value.u.u64; + } + else if (fn == FN_RATE && ff == FF_U32) { + preload_freq_ctr(counter, value.u.u32); + } + else if (fn == FN_AGE && (ff == FF_U32 || ff == FF_S32)) { + *(uint32_t *)counter = ns_to_sec(now_ns) - value.u.u32; + } + else { + /* Unsupported field format/nature combination. */ + return 1; + } + + return 0; +} + +/* Parse a non header stats-file line <line>. Specify current parsing <domain> + * and <cols> stats column matrix derived from the last header line. + * + * Returns 0 on success else non-zero. + */ +static int parse_stat_line(struct ist line, + enum stfile_domain domain, + const struct stat_col *cols[]) +{ + struct guid_node *node; + struct listener *li; + struct server *srv; + struct proxy *px; + struct ist token; + char *base_off; + char *guid; + int i, off; + + token = istsplit(&line, ','); + guid = ist0(token); + if (!guid_is_valid_fmt(guid, NULL)) + goto err; + + node = guid_lookup(guid); + if (!node) { + /* Silently ignored unknown GUID. */ + return 0; + } + + switch (obj_type(node->obj_type)) { + case OBJ_TYPE_PROXY: + px = __objt_proxy(node->obj_type); + + if (domain == STFILE_DOMAIN_PX_FE) { + if (!(px->cap & PR_CAP_FE)) + goto err; + base_off = (char *)&px->fe_counters; + off = 0; + } + else if (domain == STFILE_DOMAIN_PX_BE) { + if (!(px->cap & PR_CAP_BE)) + goto err; + base_off = (char *)&px->be_counters; + off = 1; + } + else { + goto err; + } + + break; + + case OBJ_TYPE_LISTENER: + if (domain != STFILE_DOMAIN_PX_FE) + goto err; + + li = __objt_listener(node->obj_type); + /* Listeners counters are not allocated if 'option socket-stats' unset. */ + if (!li->counters) + return 0; + + base_off = (char *)li->counters; + off = 0; + break; + + case OBJ_TYPE_SERVER: + if (domain != STFILE_DOMAIN_PX_BE) + goto err; + + srv = __objt_server(node->obj_type); + base_off = (char *)&srv->counters; + off = 1; + break; + + default: + goto err; + } + + i = 0; + while (istlen(line) && i < STAT_FILE_MAX_COL_COUNT) { + const struct stat_col *col = cols[i++]; + + token = istsplit(&line, ','); + if (!istlen(token)) + continue; + + if (!col) + continue; + + load_ctr(col, token, base_off + col->metric.offset[off]); + } + + return 0; + + err: + return 1; +} + +/* Parse a stats-file and preload haproxy internal counters. */ +void apply_stats_file(void) +{ + const struct stat_col *cols[STAT_FILE_MAX_COL_COUNT]; + struct eb_root st_tree = EB_ROOT; + enum stfile_domain domain; + int valid_format = 0; + FILE *file; + struct ist istline; + char *line = NULL; + int linenum; + + if (!global.stats_file) + return; + + file = fopen(global.stats_file, "r"); + if (!file) { + ha_warning("config: Can't load stats file: cannot open file.\n"); + return; + } + + /* Generate stat columns map indexed by name. */ + if (generate_stat_tree(&st_tree, stat_cols_px)) { + ha_warning("config: Can't load stats file: not enough memory.\n"); + goto out; + } + + line = malloc(sizeof(char) * LINESIZE); + if (!line) { + ha_warning("config: Can't load stats file: line alloc error.\n"); + goto out; + } + + linenum = 0; + domain = STFILE_DOMAIN_UNSET; + while (1) { + if (!fgets(line, LINESIZE, file)) + break; + + ++linenum; + istline = iststrip(ist(line)); + if (!istlen(istline)) + continue; + + if (*istptr(istline) == '#') { + if (parse_header_line(istline, &st_tree, &domain, cols)) { + if (!valid_format) { + ha_warning("config: Invalid stats-file format.\n"); + break; + } + + ha_warning("config: Ignored stats-file header line '%d'.\n", linenum); + } + + valid_format = 1; + } + else if (domain != STFILE_DOMAIN_UNSET) { + if (parse_stat_line(istline, domain, cols)) + ha_warning("config: Ignored stats-file line %d.\n", linenum); + } + else { + /* Stop parsing if first line is not a valid header. + * Allows to immediately stop reading garbage file. + */ + if (!valid_format) { + ha_warning("config: Invalid stats-file format.\n"); + break; + } + } + } + + out: + while (!eb_is_empty(&st_tree)) { + struct ebmb_node *node = ebmb_first(&st_tree); + struct stcol_node *snode = ebmb_entry(node, struct stcol_node, name); + + ebmb_delete(node); + ha_free(&snode); + } + + ha_free(&line); + fclose(file); +} diff --git a/src/stats-html.c b/src/stats-html.c new file mode 100644 index 0000000..41eaa9e --- /dev/null +++ b/src/stats-html.c @@ -0,0 +1,2081 @@ +#include <haproxy/stats-html.h> + +#include <string.h> + +#include <import/ist.h> +#include <haproxy/api.h> +#include <haproxy/applet.h> +#include <haproxy/buf.h> +#include <haproxy/chunk.h> +#include <haproxy/clock.h> +#include <haproxy/freq_ctr.h> +#include <haproxy/global.h> +#include <haproxy/http.h> +#include <haproxy/http_htx.h> +#include <haproxy/htx.h> +#include <haproxy/list.h> +#include <haproxy/listener.h> +#include <haproxy/obj_type-t.h> +#include <haproxy/pipe.h> +#include <haproxy/proxy.h> +#include <haproxy/stats.h> +#include <haproxy/stconn.h> +#include <haproxy/server.h> +#include <haproxy/task.h> +#include <haproxy/thread.h> +#include <haproxy/time.h> +#include <haproxy/tinfo.h> +#include <haproxy/tools.h> +#include <haproxy/uri_auth-t.h> +#include <haproxy/version.h> + +static const char *field_to_html_str(const struct field *f) +{ + switch (field_format(f, 0)) { + case FF_S32: return U2H(f->u.s32); + case FF_S64: return U2H(f->u.s64); + case FF_U64: return U2H(f->u.u64); + case FF_U32: return U2H(f->u.u32); + case FF_FLT: return F2H(f->u.flt); + case FF_STR: return field_str(f, 0); + case FF_EMPTY: + default: + return ""; + } +} + +/* Dumps the HTTP stats head block to chunk ctx buffer and uses the per-uri + * parameters from the parent proxy. The caller is responsible for clearing + * chunk ctx buffer if needed. + */ +void stats_dump_html_head(struct appctx *appctx) +{ + struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; + struct uri_auth *uri; + + BUG_ON(!ctx->http_px); + uri = ctx->http_px->uri_auth; + + /* WARNING! This must fit in the first buffer !!! */ + chunk_appendf(chk, + "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"\n" + "\"http://www.w3.org/TR/html4/loose.dtd\">\n" + "<html><head><title>Statistics Report for " PRODUCT_NAME "%s%s</title>\n" + "<link rel=\"icon\" href=\"data:,\">\n" + "<meta http-equiv=\"content-type\" content=\"text/html; charset=iso-8859-1\">\n" + "<style type=\"text/css\"><!--\n" + "body {" + " font-family: arial, helvetica, sans-serif;" + " font-size: 12px;" + " font-weight: normal;" + " color: black;" + " background: white;" + "}\n" + "th,td {" + " font-size: 10px;" + "}\n" + "h1 {" + " font-size: x-large;" + " margin-bottom: 0.5em;" + "}\n" + "h2 {" + " font-family: helvetica, arial;" + " font-size: x-large;" + " font-weight: bold;" + " font-style: italic;" + " color: #6020a0;" + " margin-top: 0em;" + " margin-bottom: 0em;" + "}\n" + "h3 {" + " font-family: helvetica, arial;" + " font-size: 16px;" + " font-weight: bold;" + " color: #b00040;" + " background: #e8e8d0;" + " margin-top: 0em;" + " margin-bottom: 0em;" + "}\n" + "li {" + " margin-top: 0.25em;" + " margin-right: 2em;" + "}\n" + ".hr {margin-top: 0.25em;" + " border-color: black;" + " border-bottom-style: solid;" + "}\n" + ".titre {background: #20D0D0;color: #000000; font-weight: bold; text-align: center;}\n" + ".total {background: #20D0D0;color: #ffff80;}\n" + ".frontend {background: #e8e8d0;}\n" + ".socket {background: #d0d0d0;}\n" + ".backend {background: #e8e8d0;}\n" + ".active_down {background: #ff9090;}\n" + ".active_going_up {background: #ffd020;}\n" + ".active_going_down {background: #ffffa0;}\n" + ".active_up {background: #c0ffc0;}\n" + ".active_nolb {background: #20a0ff;}\n" + ".active_draining {background: #20a0FF;}\n" + ".active_no_check {background: #e0e0e0;}\n" + ".backup_down {background: #ff9090;}\n" + ".backup_going_up {background: #ff80ff;}\n" + ".backup_going_down {background: #c060ff;}\n" + ".backup_up {background: #b0d0ff;}\n" + ".backup_nolb {background: #90b0e0;}\n" + ".backup_draining {background: #cc9900;}\n" + ".backup_no_check {background: #e0e0e0;}\n" + ".maintain {background: #c07820;}\n" + ".rls {letter-spacing: 0.2em; margin-right: 1px;}\n" /* right letter spacing (used for grouping digits) */ + "\n" + "a.px:link {color: #ffff40; text-decoration: none;}" + "a.px:visited {color: #ffff40; text-decoration: none;}" + "a.px:hover {color: #ffffff; text-decoration: none;}" + "a.lfsb:link {color: #000000; text-decoration: none;}" + "a.lfsb:visited {color: #000000; text-decoration: none;}" + "a.lfsb:hover {color: #505050; text-decoration: none;}" + "\n" + "table.tbl { border-collapse: collapse; border-style: none;}\n" + "table.tbl td { text-align: right; border-width: 1px 1px 1px 1px; border-style: solid solid solid solid; padding: 2px 3px; border-color: gray; white-space: nowrap;}\n" + "table.tbl td.ac { text-align: center;}\n" + "table.tbl th { border-width: 1px; border-style: solid solid solid solid; border-color: gray;}\n" + "table.tbl th.pxname { background: #b00040; color: #ffff40; font-weight: bold; border-style: solid solid none solid; padding: 2px 3px; white-space: nowrap;}\n" + "table.tbl th.empty { border-style: none; empty-cells: hide; background: white;}\n" + "table.tbl th.desc { background: white; border-style: solid solid none solid; text-align: left; padding: 2px 3px;}\n" + "\n" + "table.lgd { border-collapse: collapse; border-width: 1px; border-style: none none none solid; border-color: black;}\n" + "table.lgd td { border-width: 1px; border-style: solid solid solid solid; border-color: gray; padding: 2px;}\n" + "table.lgd td.noborder { border-style: none; padding: 2px; white-space: nowrap;}\n" + "table.det { border-collapse: collapse; border-style: none; }\n" + "table.det th { text-align: left; border-width: 0px; padding: 0px 1px 0px 0px; font-style:normal;font-size:11px;font-weight:bold;font-family: sans-serif;}\n" + "table.det td { text-align: right; border-width: 0px; padding: 0px 0px 0px 4px; white-space: nowrap; font-style:normal;font-size:11px;font-weight:normal;}\n" + "u {text-decoration:none; border-bottom: 1px dotted black;}\n" + "div.tips {\n" + " display:block;\n" + " visibility:hidden;\n" + " z-index:2147483647;\n" + " position:absolute;\n" + " padding:2px 4px 3px;\n" + " background:#f0f060; color:#000000;\n" + " border:1px solid #7040c0;\n" + " white-space:nowrap;\n" + " font-style:normal;font-size:11px;font-weight:normal;\n" + " -moz-border-radius:3px;-webkit-border-radius:3px;border-radius:3px;\n" + " -moz-box-shadow:gray 2px 2px 3px;-webkit-box-shadow:gray 2px 2px 3px;box-shadow:gray 2px 2px 3px;\n" + "}\n" + "u:hover div.tips {visibility:visible;}\n" + "@media (prefers-color-scheme: dark) {\n" + " body { font-family: arial, helvetica, sans-serif; font-size: 12px; font-weight: normal; color: #e8e6e3; background: #131516;}\n" + " h1 { color: #a265e0!important; }\n" + " h2 { color: #a265e0; }\n" + " h3 { color: #ff5190; background-color: #3e3e1f; }\n" + " a { color: #3391ff; }\n" + " input { background-color: #2f3437; }\n" + " .hr { border-color: #8c8273; }\n" + " .titre { background-color: #1aa6a6; color: #e8e6e3; }\n" + " .frontend {background: #2f3437;}\n" + " .socket {background: #2a2d2f;}\n" + " .backend {background: #2f3437;}\n" + " .active_down {background: #760000;}\n" + " .active_going_up {background: #b99200;}\n" + " .active_going_down {background: #6c6c00;}\n" + " .active_up {background: #165900;}\n" + " .active_nolb {background: #006ab9;}\n" + " .active_draining {background: #006ab9;}\n" + " .active_no_check {background: #2a2d2f;}\n" + " .backup_down {background: #760000;}\n" + " .backup_going_up {background: #7f007f;}\n" + " .backup_going_down {background: #580092;}\n" + " .backup_up {background: #2e3234;}\n" + " .backup_nolb {background: #1e3c6a;}\n" + " .backup_draining {background: #a37a00;}\n" + " .backup_no_check {background: #2a2d2f;}\n" + " .maintain {background: #9a601a;}\n" + " a.px:link {color: #d8d83b; text-decoration: none;}\n" + " a.px:visited {color: #d8d83b; text-decoration: none;}\n" + " a.px:hover {color: #ffffff; text-decoration: none;}\n" + " a.lfsb:link {color: #e8e6e3; text-decoration: none;}\n" + " a.lfsb:visited {color: #e8e6e3; text-decoration: none;}\n" + " a.lfsb:hover {color: #b5afa6; text-decoration: none;}\n" + " table.tbl th.empty { background-color: #181a1b; }\n" + " table.tbl th.desc { background: #181a1b; }\n" + " table.tbl th.pxname { background-color: #8d0033; color: #ffff46; }\n" + " table.tbl th { border-color: #808080; }\n" + " table.tbl td { border-color: #808080; }\n" + " u {text-decoration:none; border-bottom: 1px dotted #e8e6e3;}\n" + " div.tips {\n" + " background:#8e8e0d;\n" + " color:#e8e6e3;\n" + " border-color: #4e2c86;\n" + " -moz-box-shadow: #60686c 2px 2px 3px;\n" + " -webkit-box-shadow: #60686c 2px 2px 3px;\n" + " box-shadow: #60686c 2px 2px 3px;\n" + " }\n" + "}\n" + "-->\n" + "</style></head>\n", + (ctx->flags & STAT_F_SHNODE) ? " on " : "", + (ctx->flags & STAT_F_SHNODE) ? (uri && uri->node ? uri->node : global.node) : "" + ); +} + +/* Dumps the HTML stats information block to chunk ctx buffer and uses the + * state from stream connector <sc> and per-uri parameter from the parent + * proxy. The caller is responsible for clearing chunk ctx buffer if needed. + */ +void stats_dump_html_info(struct stconn *sc) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; + unsigned int up = ns_to_sec(now_ns - start_time_ns); + char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN]; + const char *scope_ptr = stats_scope_ptr(appctx); + struct uri_auth *uri; + unsigned long long bps; + int thr; + + BUG_ON(!ctx->http_px); + uri = ctx->http_px->uri_auth; + for (bps = thr = 0; thr < global.nbthread; thr++) + bps += 32ULL * read_freq_ctr(&ha_thread_ctx[thr].out_32bps); + + /* Turn the bytes per second to bits per second and take care of the + * usual ethernet overhead in order to help figure how far we are from + * interface saturation since it's the only case which usually matters. + * For this we count the total size of an Ethernet frame on the wire + * including preamble and IFG (1538) for the largest TCP segment it + * transports (1448 with TCP timestamps). This is not valid for smaller + * packets (under-estimated), but it gives a reasonably accurate + * estimation of how far we are from uplink saturation. + */ + bps = bps * 8 * 1538 / 1448; + + /* WARNING! this has to fit the first packet too. + * We are around 3.5 kB, add adding entries will + * become tricky if we want to support 4kB buffers ! + */ + chunk_appendf(chk, + "<body><h1><a href=\"" PRODUCT_URL "\" style=\"text-decoration: none;\">" + PRODUCT_NAME "%s</a></h1>\n" + "<h2>Statistics Report for pid %d%s%s%s%s</h2>\n" + "<hr width=\"100%%\" class=\"hr\">\n" + "<h3>> General process information</h3>\n" + "<table border=0><tr><td align=\"left\" nowrap width=\"1%%\">\n" + "<p><b>pid = </b> %d (process #%d, nbproc = %d, nbthread = %d)<br>\n" + "<b>uptime = </b> %dd %dh%02dm%02ds; warnings = %u<br>\n" + "<b>system limits:</b> memmax = %s%s; ulimit-n = %d<br>\n" + "<b>maxsock = </b> %d; <b>maxconn = </b> %d; <b>reached = </b> %llu; <b>maxpipes = </b> %d<br>\n" + "current conns = %d; current pipes = %d/%d; conn rate = %d/sec; bit rate = %.3f %cbps<br>\n" + "Running tasks: %d/%d (%d niced); idle = %d %%<br>\n" + "</td><td align=\"center\" nowrap>\n" + "<table class=\"lgd\"><tr>\n" + "<td class=\"active_up\"> </td><td class=\"noborder\">active UP </td>" + "<td class=\"backup_up\"> </td><td class=\"noborder\">backup UP </td>" + "</tr><tr>\n" + "<td class=\"active_going_down\"></td><td class=\"noborder\">active UP, going down </td>" + "<td class=\"backup_going_down\"></td><td class=\"noborder\">backup UP, going down </td>" + "</tr><tr>\n" + "<td class=\"active_going_up\"></td><td class=\"noborder\">active DOWN, going up </td>" + "<td class=\"backup_going_up\"></td><td class=\"noborder\">backup DOWN, going up </td>" + "</tr><tr>\n" + "<td class=\"active_down\"></td><td class=\"noborder\">active or backup DOWN </td>" + "<td class=\"active_no_check\"></td><td class=\"noborder\">not checked </td>" + "</tr><tr>\n" + "<td class=\"maintain\"></td><td class=\"noborder\" colspan=\"3\">active or backup DOWN for maintenance (MAINT) </td>" + "</tr><tr>\n" + "<td class=\"active_draining\"></td><td class=\"noborder\" colspan=\"3\">active or backup SOFT STOPPED for maintenance </td>" + "</tr></table>\n" + "Note: \"NOLB\"/\"DRAIN\" = UP with load-balancing disabled." + "</td>" + "<td align=\"left\" valign=\"top\" nowrap width=\"1%%\">" + "<b>Display option:</b><ul style=\"margin-top: 0.25em;\">" + "", + (ctx->flags & STAT_F_HIDEVER) ? "" : (stats_version_string), + pid, (ctx->flags & STAT_F_SHNODE) ? " on " : "", + (ctx->flags & STAT_F_SHNODE) ? (uri->node ? uri->node : global.node) : "", + (ctx->flags & STAT_F_SHDESC) ? ": " : "", + (ctx->flags & STAT_F_SHDESC) ? (uri->desc ? uri->desc : global.desc) : "", + pid, 1, 1, global.nbthread, + up / 86400, (up % 86400) / 3600, + (up % 3600) / 60, (up % 60), + HA_ATOMIC_LOAD(&tot_warnings), + global.rlimit_memmax ? ultoa(global.rlimit_memmax) : "unlimited", + global.rlimit_memmax ? " MB" : "", + global.rlimit_nofile, + global.maxsock, global.maxconn, HA_ATOMIC_LOAD(&maxconn_reached), global.maxpipes, + actconn, pipes_used, pipes_used+pipes_free, read_freq_ctr(&global.conn_per_sec), + bps >= 1000000000UL ? (bps / 1000000000.0) : bps >= 1000000UL ? (bps / 1000000.0) : (bps / 1000.0), + bps >= 1000000000UL ? 'G' : bps >= 1000000UL ? 'M' : 'k', + total_run_queues(), total_allocated_tasks(), total_niced_running_tasks(), clock_report_idle()); + + /* scope_txt = search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ + memcpy(scope_txt, scope_ptr, ctx->scope_len); + scope_txt[ctx->scope_len] = '\0'; + + chunk_appendf(chk, + "<li><form method=\"GET\">Scope : <input value=\"%s\" name=\"" STAT_SCOPE_INPUT_NAME "\" size=\"8\" maxlength=\"%d\" tabindex=\"1\"/></form>\n", + (ctx->scope_len > 0) ? scope_txt : "", + STAT_SCOPE_TXT_MAXLEN); + + /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ + scope_txt[0] = 0; + if (ctx->scope_len) { + strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt)); + memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len); + scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0; + } + + if (ctx->flags & STAT_F_HIDE_DOWN) + chunk_appendf(chk, + "<li><a href=\"%s%s%s%s\">Show all servers</a><br>\n", + uri->uri_prefix, + "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + else + chunk_appendf(chk, + "<li><a href=\"%s%s%s%s\">Hide 'DOWN' servers</a><br>\n", + uri->uri_prefix, + ";up", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + + if (uri->refresh > 0) { + if (ctx->flags & STAT_F_NO_REFRESH) + chunk_appendf(chk, + "<li><a href=\"%s%s%s%s\">Enable refresh</a><br>\n", + uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + "", + scope_txt); + else + chunk_appendf(chk, + "<li><a href=\"%s%s%s%s\">Disable refresh</a><br>\n", + uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + ";norefresh", + scope_txt); + } + + chunk_appendf(chk, + "<li><a href=\"%s%s%s%s\">Refresh now</a><br>\n", + uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + + chunk_appendf(chk, + "<li><a href=\"%s;csv%s%s\">CSV export</a><br>\n", + uri->uri_prefix, + (uri->refresh > 0) ? ";norefresh" : "", + scope_txt); + + chunk_appendf(chk, + "<li><a href=\"%s;json%s%s\">JSON export</a> (<a href=\"%s;json-schema\">schema</a>)<br>\n", + uri->uri_prefix, + (uri->refresh > 0) ? ";norefresh" : "", + scope_txt, uri->uri_prefix); + + chunk_appendf(chk, + "</ul></td>" + "<td align=\"left\" valign=\"top\" nowrap width=\"1%%\">" + "<b>External resources:</b><ul style=\"margin-top: 0.25em;\">\n" + "<li><a href=\"" PRODUCT_URL "\">Primary site</a><br>\n" + "<li><a href=\"" PRODUCT_URL_UPD "\">Updates (v" PRODUCT_BRANCH ")</a><br>\n" + "<li><a href=\"" PRODUCT_URL_DOC "\">Online manual</a><br>\n" + "</ul>" + "</td>" + "</tr></table>\n" + "" + ); + + if (ctx->st_code) { + switch (ctx->st_code) { + case STAT_STATUS_DONE: + chunk_appendf(chk, + "<p><div class=active_up>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "Action processed successfully." + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + break; + case STAT_STATUS_NONE: + chunk_appendf(chk, + "<p><div class=active_going_down>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "Nothing has changed." + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + break; + case STAT_STATUS_PART: + chunk_appendf(chk, + "<p><div class=active_going_down>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "Action partially processed.<br>" + "Some server names are probably unknown or ambiguous (duplicated names in the backend)." + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + break; + case STAT_STATUS_ERRP: + chunk_appendf(chk, + "<p><div class=active_down>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "Action not processed because of invalid parameters." + "<ul>" + "<li>The action is maybe unknown.</li>" + "<li>Invalid key parameter (empty or too long).</li>" + "<li>The backend name is probably unknown or ambiguous (duplicated names).</li>" + "<li>Some server names are probably unknown or ambiguous (duplicated names in the backend).</li>" + "</ul>" + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + break; + case STAT_STATUS_EXCD: + chunk_appendf(chk, + "<p><div class=active_down>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "<b>Action not processed : the buffer couldn't store all the data.<br>" + "You should retry with less servers at a time.</b>" + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + break; + case STAT_STATUS_DENY: + chunk_appendf(chk, + "<p><div class=active_down>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "<b>Action denied.</b>" + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + break; + case STAT_STATUS_IVAL: + chunk_appendf(chk, + "<p><div class=active_down>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "<b>Invalid requests (unsupported method or chunked encoded request).</b>" + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + break; + default: + chunk_appendf(chk, + "<p><div class=active_no_check>" + "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " + "Unexpected result." + "</div>\n", uri->uri_prefix, + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + } + chunk_appendf(chk, "<p>\n"); + } +} + +/* Dump all fields from <stats> into <out> using the HTML format. A column is + * reserved for the checkbox is STAT_F_ADMIN is set in <flags>. Some extra info + * are provided if STAT_F_SHLGNDS is present in <flags>. The statistics from + * extra modules are displayed at the end of the lines if STAT_F_SHMODULES is + * present in <flags>. + */ +int stats_dump_fields_html(struct buffer *out, + const struct field *stats, + struct show_stat_ctx *ctx) +{ + struct buffer src; + struct stats_module *mod; + int flags = ctx->flags; + int i = 0, j = 0; + + if (stats[ST_I_PX_TYPE].u.u32 == STATS_TYPE_FE) { + chunk_appendf(out, + /* name, queue */ + "<tr class=\"frontend\">"); + + if (flags & STAT_F_ADMIN) { + /* Column sub-heading for Enable or Disable server */ + chunk_appendf(out, "<td></td>"); + } + + chunk_appendf(out, + "<td class=ac>" + "<a name=\"%s/Frontend\"></a>" + "<a class=lfsb href=\"#%s/Frontend\">Frontend</a></td>" + "<td colspan=3></td>" + "", + field_str(stats, ST_I_PX_PXNAME), field_str(stats, ST_I_PX_PXNAME)); + + chunk_appendf(out, + /* sessions rate : current */ + "<td><u>%s<div class=tips><table class=det>" + "<tr><th>Current connection rate:</th><td>%s/s</td></tr>" + "<tr><th>Current session rate:</th><td>%s/s</td></tr>" + "", + U2H(stats[ST_I_PX_RATE].u.u32), + U2H(stats[ST_I_PX_CONN_RATE].u.u32), + U2H(stats[ST_I_PX_RATE].u.u32)); + + if (strcmp(field_str(stats, ST_I_PX_MODE), "http") == 0) + chunk_appendf(out, + "<tr><th>Current request rate:</th><td>%s/s</td></tr>", + U2H(stats[ST_I_PX_REQ_RATE].u.u32)); + + chunk_appendf(out, + "</table></div></u></td>" + /* sessions rate : max */ + "<td><u>%s<div class=tips><table class=det>" + "<tr><th>Max connection rate:</th><td>%s/s</td></tr>" + "<tr><th>Max session rate:</th><td>%s/s</td></tr>" + "", + U2H(stats[ST_I_PX_RATE_MAX].u.u32), + U2H(stats[ST_I_PX_CONN_RATE_MAX].u.u32), + U2H(stats[ST_I_PX_RATE_MAX].u.u32)); + + if (strcmp(field_str(stats, ST_I_PX_MODE), "http") == 0) + chunk_appendf(out, + "<tr><th>Max request rate:</th><td>%s/s</td></tr>", + U2H(stats[ST_I_PX_REQ_RATE_MAX].u.u32)); + + chunk_appendf(out, + "</table></div></u></td>" + /* sessions rate : limit */ + "<td>%s</td>", + LIM2A(stats[ST_I_PX_RATE_LIM].u.u32, "-")); + + chunk_appendf(out, + /* sessions: current, max, limit, total */ + "<td>%s</td><td>%s</td><td>%s</td>" + "<td><u>%s<div class=tips><table class=det>" + "<tr><th>Cum. connections:</th><td>%s</td></tr>" + "<tr><th>Cum. sessions:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_SCUR].u.u32), U2H(stats[ST_I_PX_SMAX].u.u32), U2H(stats[ST_I_PX_SLIM].u.u32), + U2H(stats[ST_I_PX_STOT].u.u64), + U2H(stats[ST_I_PX_CONN_TOT].u.u64), + U2H(stats[ST_I_PX_STOT].u.u64)); + + /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */ + if (strcmp(field_str(stats, ST_I_PX_MODE), "http") == 0) { + chunk_appendf(out, + "<tr><th>- HTTP/1 sessions:</th><td>%s</td></tr>" + "<tr><th>- HTTP/2 sessions:</th><td>%s</td></tr>" + "<tr><th>- HTTP/3 sessions:</th><td>%s</td></tr>" + "<tr><th>- other sessions:</th><td>%s</td></tr>" + "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>" + "<tr><th>- HTTP/1 requests:</th><td>%s</td></tr>" + "<tr><th>- HTTP/2 requests:</th><td>%s</td></tr>" + "<tr><th>- HTTP/3 requests:</th><td>%s</td></tr>" + "<tr><th>- other requests:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_H1SESS].u.u64), + U2H(stats[ST_I_PX_H2SESS].u.u64), + U2H(stats[ST_I_PX_H3SESS].u.u64), + U2H(stats[ST_I_PX_SESS_OTHER].u.u64), + U2H(stats[ST_I_PX_REQ_TOT].u.u64), + U2H(stats[ST_I_PX_H1REQ].u.u64), + U2H(stats[ST_I_PX_H2REQ].u.u64), + U2H(stats[ST_I_PX_H3REQ].u.u64), + U2H(stats[ST_I_PX_REQ_OTHER].u.u64)); + + chunk_appendf(out, + "<tr><th>- HTTP 1xx responses:</th><td>%s</td></tr>" + "<tr><th>- HTTP 2xx responses:</th><td>%s</td></tr>" + "<tr><th> Compressed 2xx:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>- HTTP 3xx responses:</th><td>%s</td></tr>" + "<tr><th>- HTTP 4xx responses:</th><td>%s</td></tr>" + "<tr><th>- HTTP 5xx responses:</th><td>%s</td></tr>" + "<tr><th>- other responses:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_HRSP_1XX].u.u64), + U2H(stats[ST_I_PX_HRSP_2XX].u.u64), + U2H(stats[ST_I_PX_COMP_RSP].u.u64), + stats[ST_I_PX_HRSP_2XX].u.u64 ? + (int)(100 * stats[ST_I_PX_COMP_RSP].u.u64 / stats[ST_I_PX_HRSP_2XX].u.u64) : 0, + U2H(stats[ST_I_PX_HRSP_3XX].u.u64), + U2H(stats[ST_I_PX_HRSP_4XX].u.u64), + U2H(stats[ST_I_PX_HRSP_5XX].u.u64), + U2H(stats[ST_I_PX_HRSP_OTHER].u.u64)); + + chunk_appendf(out, + "<tr><th>Intercepted requests:</th><td>%s</td></tr>" + "<tr><th>Cache lookups:</th><td>%s</td></tr>" + "<tr><th>Cache hits:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>" + "<tr><th>Internal errors:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_INTERCEPTED].u.u64), + U2H(stats[ST_I_PX_CACHE_LOOKUPS].u.u64), + U2H(stats[ST_I_PX_CACHE_HITS].u.u64), + stats[ST_I_PX_CACHE_LOOKUPS].u.u64 ? + (int)(100 * stats[ST_I_PX_CACHE_HITS].u.u64 / stats[ST_I_PX_CACHE_LOOKUPS].u.u64) : 0, + U2H(stats[ST_I_PX_WREW].u.u64), + U2H(stats[ST_I_PX_EINT].u.u64)); + } + + chunk_appendf(out, + "</table></div></u></td>" + /* sessions: lbtot, lastsess */ + "<td></td><td></td>" + /* bytes : in */ + "<td>%s</td>" + "", + U2H(stats[ST_I_PX_BIN].u.u64)); + + chunk_appendf(out, + /* bytes:out + compression stats (via hover): comp_in, comp_out, comp_byp */ + "<td>%s%s<div class=tips><table class=det>" + "<tr><th>Response bytes in:</th><td>%s</td></tr>" + "<tr><th>Compression in:</th><td>%s</td></tr>" + "<tr><th>Compression out:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>Compression bypass:</th><td>%s</td></tr>" + "<tr><th>Total bytes saved:</th><td>%s</td><td>(%d%%)</td></tr>" + "</table></div>%s</td>", + (stats[ST_I_PX_COMP_IN].u.u64 || stats[ST_I_PX_COMP_BYP].u.u64) ? "<u>":"", + U2H(stats[ST_I_PX_BOUT].u.u64), + U2H(stats[ST_I_PX_BOUT].u.u64), + U2H(stats[ST_I_PX_COMP_IN].u.u64), + U2H(stats[ST_I_PX_COMP_OUT].u.u64), + stats[ST_I_PX_COMP_IN].u.u64 ? (int)(stats[ST_I_PX_COMP_OUT].u.u64 * 100 / stats[ST_I_PX_COMP_IN].u.u64) : 0, + U2H(stats[ST_I_PX_COMP_BYP].u.u64), + U2H(stats[ST_I_PX_COMP_IN].u.u64 - stats[ST_I_PX_COMP_OUT].u.u64), + stats[ST_I_PX_BOUT].u.u64 ? (int)((stats[ST_I_PX_COMP_IN].u.u64 - stats[ST_I_PX_COMP_OUT].u.u64) * 100 / stats[ST_I_PX_BOUT].u.u64) : 0, + (stats[ST_I_PX_COMP_IN].u.u64 || stats[ST_I_PX_COMP_BYP].u.u64) ? "</u>":""); + + chunk_appendf(out, + /* denied: req, resp */ + "<td>%s</td><td>%s</td>" + /* errors : request, connect, response */ + "<td>%s</td><td></td><td></td>" + /* warnings: retries, redispatches */ + "<td></td><td></td>" + /* server status : reflect frontend status */ + "<td class=ac>%s</td>" + /* rest of server: nothing */ + "<td class=ac colspan=8></td>" + "", + U2H(stats[ST_I_PX_DREQ].u.u64), U2H(stats[ST_I_PX_DRESP].u.u64), + U2H(stats[ST_I_PX_EREQ].u.u64), + field_str(stats, ST_I_PX_STATUS)); + + if (flags & STAT_F_SHMODULES) { + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + chunk_appendf(out, "<td>"); + + if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_FE) { + chunk_appendf(out, + "<u>%s<div class=tips><table class=det>", + mod->name); + for (j = 0; j < mod->stats_count; ++j) { + chunk_appendf(out, + "<tr><th>%s</th><td>%s</td></tr>", + mod->stats[j].desc, field_to_html_str(&stats[ST_I_PX_MAX + i])); + ++i; + } + chunk_appendf(out, "</table></div></u>"); + } else { + i += mod->stats_count; + } + + chunk_appendf(out, "</td>"); + } + } + + chunk_appendf(out, "</tr>"); + } + else if (stats[ST_I_PX_TYPE].u.u32 == STATS_TYPE_SO) { + chunk_appendf(out, "<tr class=socket>"); + if (flags & STAT_F_ADMIN) { + /* Column sub-heading for Enable or Disable server */ + chunk_appendf(out, "<td></td>"); + } + + chunk_appendf(out, + /* frontend name, listener name */ + "<td class=ac><a name=\"%s/+%s\"></a>%s" + "<a class=lfsb href=\"#%s/+%s\">%s</a>" + "", + field_str(stats, ST_I_PX_PXNAME), field_str(stats, ST_I_PX_SVNAME), + (flags & STAT_F_SHLGNDS)?"<u>":"", + field_str(stats, ST_I_PX_PXNAME), field_str(stats, ST_I_PX_SVNAME), field_str(stats, ST_I_PX_SVNAME)); + + if (flags & STAT_F_SHLGNDS) { + chunk_appendf(out, "<div class=tips>"); + + if (isdigit((unsigned char)*field_str(stats, ST_I_PX_ADDR))) + chunk_appendf(out, "IPv4: %s, ", field_str(stats, ST_I_PX_ADDR)); + else if (*field_str(stats, ST_I_PX_ADDR) == '[') + chunk_appendf(out, "IPv6: %s, ", field_str(stats, ST_I_PX_ADDR)); + else if (*field_str(stats, ST_I_PX_ADDR)) + chunk_appendf(out, "%s, ", field_str(stats, ST_I_PX_ADDR)); + + chunk_appendf(out, "proto=%s, ", field_str(stats, ST_I_PX_PROTO)); + + /* id */ + chunk_appendf(out, "id: %d</div>", stats[ST_I_PX_SID].u.u32); + } + + chunk_appendf(out, + /* queue */ + "%s</td><td colspan=3></td>" + /* sessions rate: current, max, limit */ + "<td colspan=3> </td>" + /* sessions: current, max, limit, total, lbtot, lastsess */ + "<td>%s</td><td>%s</td><td>%s</td>" + "<td>%s</td><td> </td><td> </td>" + /* bytes: in, out */ + "<td>%s</td><td>%s</td>" + "", + (flags & STAT_F_SHLGNDS)?"</u>":"", + U2H(stats[ST_I_PX_SCUR].u.u32), U2H(stats[ST_I_PX_SMAX].u.u32), U2H(stats[ST_I_PX_SLIM].u.u32), + U2H(stats[ST_I_PX_STOT].u.u64), U2H(stats[ST_I_PX_BIN].u.u64), U2H(stats[ST_I_PX_BOUT].u.u64)); + + chunk_appendf(out, + /* denied: req, resp */ + "<td>%s</td><td>%s</td>" + /* errors: request, connect, response */ + "<td>%s</td><td></td><td></td>" + /* warnings: retries, redispatches */ + "<td></td><td></td>" + /* server status: reflect listener status */ + "<td class=ac>%s</td>" + /* rest of server: nothing */ + "<td class=ac colspan=8></td>" + "", + U2H(stats[ST_I_PX_DREQ].u.u64), U2H(stats[ST_I_PX_DRESP].u.u64), + U2H(stats[ST_I_PX_EREQ].u.u64), + field_str(stats, ST_I_PX_STATUS)); + + if (flags & STAT_F_SHMODULES) { + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + chunk_appendf(out, "<td>"); + + if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_LI) { + chunk_appendf(out, + "<u>%s<div class=tips><table class=det>", + mod->name); + for (j = 0; j < mod->stats_count; ++j) { + chunk_appendf(out, + "<tr><th>%s</th><td>%s</td></tr>", + mod->stats[j].desc, field_to_html_str(&stats[ST_I_PX_MAX + i])); + ++i; + } + chunk_appendf(out, "</table></div></u>"); + } else { + i += mod->stats_count; + } + + chunk_appendf(out, "</td>"); + } + } + + chunk_appendf(out, "</tr>"); + } + else if (stats[ST_I_PX_TYPE].u.u32 == STATS_TYPE_SV) { + const char *style; + + /* determine the style to use depending on the server's state, + * its health and weight. There isn't a 1-to-1 mapping between + * state and styles for the cases where the server is (still) + * up. The reason is that we don't want to report nolb and + * drain with the same color. + */ + + if (strcmp(field_str(stats, ST_I_PX_STATUS), "DOWN") == 0 || + strcmp(field_str(stats, ST_I_PX_STATUS), "DOWN (agent)") == 0) { + style = "down"; + } + else if (strncmp(field_str(stats, ST_I_PX_STATUS), "DOWN ", strlen("DOWN ")) == 0) { + style = "going_up"; + } + else if (strcmp(field_str(stats, ST_I_PX_STATUS), "DRAIN") == 0) { + style = "draining"; + } + else if (strncmp(field_str(stats, ST_I_PX_STATUS), "NOLB ", strlen("NOLB ")) == 0) { + style = "going_down"; + } + else if (strcmp(field_str(stats, ST_I_PX_STATUS), "NOLB") == 0) { + style = "nolb"; + } + else if (strcmp(field_str(stats, ST_I_PX_STATUS), "no check") == 0) { + style = "no_check"; + } + else if (!stats[ST_I_PX_CHKFAIL].type || + stats[ST_I_PX_CHECK_HEALTH].u.u32 == stats[ST_I_PX_CHECK_RISE].u.u32 + stats[ST_I_PX_CHECK_FALL].u.u32 - 1) { + /* no check or max health = UP */ + if (stats[ST_I_PX_WEIGHT].u.u32) + style = "up"; + else + style = "draining"; + } + else { + style = "going_down"; + } + + if (strncmp(field_str(stats, ST_I_PX_STATUS), "MAINT", 5) == 0) + chunk_appendf(out, "<tr class=\"maintain\">"); + else + chunk_appendf(out, + "<tr class=\"%s_%s\">", + (stats[ST_I_PX_BCK].u.u32) ? "backup" : "active", style); + + + if (flags & STAT_F_ADMIN) + chunk_appendf(out, + "<td><input class='%s-checkbox' type=\"checkbox\" name=\"s\" value=\"%s\"></td>", + field_str(stats, ST_I_PX_PXNAME), + field_str(stats, ST_I_PX_SVNAME)); + + chunk_appendf(out, + "<td class=ac><a name=\"%s/%s\"></a>%s" + "<a class=lfsb href=\"#%s/%s\">%s</a>" + "", + field_str(stats, ST_I_PX_PXNAME), field_str(stats, ST_I_PX_SVNAME), + (flags & STAT_F_SHLGNDS) ? "<u>" : "", + field_str(stats, ST_I_PX_PXNAME), field_str(stats, ST_I_PX_SVNAME), field_str(stats, ST_I_PX_SVNAME)); + + if (flags & STAT_F_SHLGNDS) { + chunk_appendf(out, "<div class=tips>"); + + if (isdigit((unsigned char)*field_str(stats, ST_I_PX_ADDR))) + chunk_appendf(out, "IPv4: %s, ", field_str(stats, ST_I_PX_ADDR)); + else if (*field_str(stats, ST_I_PX_ADDR) == '[') + chunk_appendf(out, "IPv6: %s, ", field_str(stats, ST_I_PX_ADDR)); + else if (*field_str(stats, ST_I_PX_ADDR)) + chunk_appendf(out, "%s, ", field_str(stats, ST_I_PX_ADDR)); + + /* id */ + chunk_appendf(out, "id: %d, rid: %d", stats[ST_I_PX_SID].u.u32, stats[ST_I_PX_SRID].u.u32); + + /* cookie */ + if (stats[ST_I_PX_COOKIE].type) { + chunk_appendf(out, ", cookie: '"); + chunk_initstr(&src, field_str(stats, ST_I_PX_COOKIE)); + chunk_htmlencode(out, &src); + chunk_appendf(out, "'"); + } + + chunk_appendf(out, "</div>"); + } + + chunk_appendf(out, + /* queue : current, max, limit */ + "%s</td><td>%s</td><td>%s</td><td>%s</td>" + /* sessions rate : current, max, limit */ + "<td>%s</td><td>%s</td><td></td>" + "", + (flags & STAT_F_SHLGNDS) ? "</u>" : "", + U2H(stats[ST_I_PX_QCUR].u.u32), U2H(stats[ST_I_PX_QMAX].u.u32), LIM2A(stats[ST_I_PX_QLIMIT].u.u32, "-"), + U2H(stats[ST_I_PX_RATE].u.u32), U2H(stats[ST_I_PX_RATE_MAX].u.u32)); + + chunk_appendf(out, + /* sessions: current, max, limit, total */ + "<td><u>%s<div class=tips>" + "<table class=det>" + "<tr><th>Current active connections:</th><td>%s</td></tr>" + "<tr><th>Current used connections:</th><td>%s</td></tr>" + "<tr><th>Current idle connections:</th><td>%s</td></tr>" + "<tr><th>- unsafe:</th><td>%s</td></tr>" + "<tr><th>- safe:</th><td>%s</td></tr>" + "<tr><th>Estimated need of connections:</th><td>%s</td></tr>" + "<tr><th>Active connections limit:</th><td>%s</td></tr>" + "<tr><th>Idle connections limit:</th><td>%s</td></tr>" + "</table></div></u>" + "</td><td>%s</td><td>%s</td>" + "<td><u>%s<div class=tips><table class=det>" + "<tr><th>Cum. sessions:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_SCUR].u.u32), + U2H(stats[ST_I_PX_SCUR].u.u32), + U2H(stats[ST_I_PX_USED_CONN_CUR].u.u32), + U2H(stats[ST_I_PX_SRV_ICUR].u.u32), + U2H(stats[ST_I_PX_IDLE_CONN_CUR].u.u32), + U2H(stats[ST_I_PX_SAFE_CONN_CUR].u.u32), + U2H(stats[ST_I_PX_NEED_CONN_EST].u.u32), + + LIM2A(stats[ST_I_PX_SLIM].u.u32, "-"), + stats[ST_I_PX_SRV_ILIM].type ? U2H(stats[ST_I_PX_SRV_ILIM].u.u32) : "-", + U2H(stats[ST_I_PX_SMAX].u.u32), LIM2A(stats[ST_I_PX_SLIM].u.u32, "-"), + U2H(stats[ST_I_PX_STOT].u.u64), + U2H(stats[ST_I_PX_STOT].u.u64)); + + /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */ + if (strcmp(field_str(stats, ST_I_PX_MODE), "http") == 0) { + chunk_appendf(out, + "<tr><th>New connections:</th><td>%s</td></tr>" + "<tr><th>Reused connections:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>" + "<tr><th>- HTTP 1xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>- HTTP 2xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>- HTTP 3xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>- HTTP 4xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>- HTTP 5xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>- other responses:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>" + "<tr><th>Internal error:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_CONNECT].u.u64), + U2H(stats[ST_I_PX_REUSE].u.u64), + (stats[ST_I_PX_CONNECT].u.u64 + stats[ST_I_PX_REUSE].u.u64) ? + (int)(100 * stats[ST_I_PX_REUSE].u.u64 / (stats[ST_I_PX_CONNECT].u.u64 + stats[ST_I_PX_REUSE].u.u64)) : 0, + U2H(stats[ST_I_PX_REQ_TOT].u.u64), + U2H(stats[ST_I_PX_HRSP_1XX].u.u64), stats[ST_I_PX_REQ_TOT].u.u64 ? + (int)(100 * stats[ST_I_PX_HRSP_1XX].u.u64 / stats[ST_I_PX_REQ_TOT].u.u64) : 0, + U2H(stats[ST_I_PX_HRSP_2XX].u.u64), stats[ST_I_PX_REQ_TOT].u.u64 ? + (int)(100 * stats[ST_I_PX_HRSP_2XX].u.u64 / stats[ST_I_PX_REQ_TOT].u.u64) : 0, + U2H(stats[ST_I_PX_HRSP_3XX].u.u64), stats[ST_I_PX_REQ_TOT].u.u64 ? + (int)(100 * stats[ST_I_PX_HRSP_3XX].u.u64 / stats[ST_I_PX_REQ_TOT].u.u64) : 0, + U2H(stats[ST_I_PX_HRSP_4XX].u.u64), stats[ST_I_PX_REQ_TOT].u.u64 ? + (int)(100 * stats[ST_I_PX_HRSP_4XX].u.u64 / stats[ST_I_PX_REQ_TOT].u.u64) : 0, + U2H(stats[ST_I_PX_HRSP_5XX].u.u64), stats[ST_I_PX_REQ_TOT].u.u64 ? + (int)(100 * stats[ST_I_PX_HRSP_5XX].u.u64 / stats[ST_I_PX_REQ_TOT].u.u64) : 0, + U2H(stats[ST_I_PX_HRSP_OTHER].u.u64), stats[ST_I_PX_REQ_TOT].u.u64 ? + (int)(100 * stats[ST_I_PX_HRSP_OTHER].u.u64 / stats[ST_I_PX_REQ_TOT].u.u64) : 0, + U2H(stats[ST_I_PX_WREW].u.u64), + U2H(stats[ST_I_PX_EINT].u.u64)); + } + + chunk_appendf(out, "<tr><th colspan=3>Max / Avg over last 1024 success. conn.</th></tr>"); + chunk_appendf(out, "<tr><th>- Queue time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_QT_MAX].u.u32), U2H(stats[ST_I_PX_QTIME].u.u32)); + chunk_appendf(out, "<tr><th>- Connect time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_CT_MAX].u.u32), U2H(stats[ST_I_PX_CTIME].u.u32)); + if (strcmp(field_str(stats, ST_I_PX_MODE), "http") == 0) + chunk_appendf(out, "<tr><th>- Responses time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_RT_MAX].u.u32), U2H(stats[ST_I_PX_RTIME].u.u32)); + chunk_appendf(out, "<tr><th>- Total time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_TT_MAX].u.u32), U2H(stats[ST_I_PX_TTIME].u.u32)); + + chunk_appendf(out, + "</table></div></u></td>" + /* sessions: lbtot, last */ + "<td>%s</td><td>%s</td>", + U2H(stats[ST_I_PX_LBTOT].u.u64), + human_time(stats[ST_I_PX_LASTSESS].u.s32, 1)); + + chunk_appendf(out, + /* bytes : in, out */ + "<td>%s</td><td>%s</td>" + /* denied: req, resp */ + "<td></td><td>%s</td>" + /* errors : request, connect */ + "<td></td><td>%s</td>" + /* errors : response */ + "<td><u>%s<div class=tips>Connection resets during transfers: %lld client, %lld server</div></u></td>" + /* warnings: retries, redispatches */ + "<td>%lld</td><td>%lld</td>" + "", + U2H(stats[ST_I_PX_BIN].u.u64), U2H(stats[ST_I_PX_BOUT].u.u64), + U2H(stats[ST_I_PX_DRESP].u.u64), + U2H(stats[ST_I_PX_ECON].u.u64), + U2H(stats[ST_I_PX_ERESP].u.u64), + (long long)stats[ST_I_PX_CLI_ABRT].u.u64, + (long long)stats[ST_I_PX_SRV_ABRT].u.u64, + (long long)stats[ST_I_PX_WRETR].u.u64, + (long long)stats[ST_I_PX_WREDIS].u.u64); + + /* status, last change */ + chunk_appendf(out, "<td class=ac>"); + + /* FIXME!!!! + * LASTCHG should contain the last change for *this* server and must be computed + * properly above, as was done below, ie: this server if maint, otherwise ref server + * if tracking. Note that ref is either local or remote depending on tracking. + */ + + + if (strncmp(field_str(stats, ST_I_PX_STATUS), "MAINT", 5) == 0) { + chunk_appendf(out, "%s MAINT", human_time(stats[ST_I_PX_LASTCHG].u.u32, 1)); + } + else if (strcmp(field_str(stats, ST_I_PX_STATUS), "no check") == 0) { + chunk_strcat(out, "<i>no check</i>"); + } + else { + chunk_appendf(out, "%s %s", human_time(stats[ST_I_PX_LASTCHG].u.u32, 1), field_str(stats, ST_I_PX_STATUS)); + if (strncmp(field_str(stats, ST_I_PX_STATUS), "DOWN", 4) == 0) { + if (stats[ST_I_PX_CHECK_HEALTH].u.u32) + chunk_strcat(out, " ↑"); + } + else if (stats[ST_I_PX_CHECK_HEALTH].u.u32 < stats[ST_I_PX_CHECK_RISE].u.u32 + stats[ST_I_PX_CHECK_FALL].u.u32 - 1) + chunk_strcat(out, " ↓"); + } + + if (strncmp(field_str(stats, ST_I_PX_STATUS), "DOWN", 4) == 0 && + stats[ST_I_PX_AGENT_STATUS].type && !stats[ST_I_PX_AGENT_HEALTH].u.u32) { + chunk_appendf(out, + "</td><td class=ac><u> %s", + field_str(stats, ST_I_PX_AGENT_STATUS)); + + if (stats[ST_I_PX_AGENT_CODE].type) + chunk_appendf(out, "/%d", stats[ST_I_PX_AGENT_CODE].u.u32); + + if (stats[ST_I_PX_AGENT_DURATION].type) + chunk_appendf(out, " in %lums", (long)stats[ST_I_PX_AGENT_DURATION].u.u64); + + chunk_appendf(out, "<div class=tips>%s", field_str(stats, ST_I_PX_AGENT_DESC)); + + if (*field_str(stats, ST_I_PX_LAST_AGT)) { + chunk_appendf(out, ": "); + chunk_initstr(&src, field_str(stats, ST_I_PX_LAST_AGT)); + chunk_htmlencode(out, &src); + } + chunk_appendf(out, "</div></u>"); + } + else if (stats[ST_I_PX_CHECK_STATUS].type) { + chunk_appendf(out, + "</td><td class=ac><u> %s", + field_str(stats, ST_I_PX_CHECK_STATUS)); + + if (stats[ST_I_PX_CHECK_CODE].type) + chunk_appendf(out, "/%d", stats[ST_I_PX_CHECK_CODE].u.u32); + + if (stats[ST_I_PX_CHECK_DURATION].type) + chunk_appendf(out, " in %lums", (long)stats[ST_I_PX_CHECK_DURATION].u.u64); + + chunk_appendf(out, "<div class=tips>%s", field_str(stats, ST_I_PX_CHECK_DESC)); + + if (*field_str(stats, ST_I_PX_LAST_CHK)) { + chunk_appendf(out, ": "); + chunk_initstr(&src, field_str(stats, ST_I_PX_LAST_CHK)); + chunk_htmlencode(out, &src); + } + chunk_appendf(out, "</div></u>"); + } + else + chunk_appendf(out, "</td><td>"); + + chunk_appendf(out, + /* weight / uweight */ + "</td><td class=ac>%d/%d</td>" + /* act, bck */ + "<td class=ac>%s</td><td class=ac>%s</td>" + "", + stats[ST_I_PX_WEIGHT].u.u32, stats[ST_I_PX_UWEIGHT].u.u32, + stats[ST_I_PX_BCK].u.u32 ? "-" : "Y", + stats[ST_I_PX_BCK].u.u32 ? "Y" : "-"); + + /* check failures: unique, fatal, down time */ + if (strcmp(field_str(stats, ST_I_PX_STATUS), "MAINT (resolution)") == 0) { + chunk_appendf(out, "<td class=ac colspan=3>resolution</td>"); + } + else if (stats[ST_I_PX_CHKFAIL].type) { + chunk_appendf(out, "<td><u>%lld", (long long)stats[ST_I_PX_CHKFAIL].u.u64); + + if (stats[ST_I_PX_HANAFAIL].type) + chunk_appendf(out, "/%lld", (long long)stats[ST_I_PX_HANAFAIL].u.u64); + + chunk_appendf(out, + "<div class=tips>Failed Health Checks%s</div></u></td>" + "<td>%lld</td><td>%s</td>" + "", + stats[ST_I_PX_HANAFAIL].type ? "/Health Analyses" : "", + (long long)stats[ST_I_PX_CHKDOWN].u.u64, human_time(stats[ST_I_PX_DOWNTIME].u.u32, 1)); + } + else if (strcmp(field_str(stats, ST_I_PX_STATUS), "MAINT") != 0 && field_format(stats, ST_I_PX_TRACKED) == FF_STR) { + /* tracking a server (hence inherited maint would appear as "MAINT (via...)" */ + chunk_appendf(out, + "<td class=ac colspan=3><a class=lfsb href=\"#%s\">via %s</a></td>", + field_str(stats, ST_I_PX_TRACKED), field_str(stats, ST_I_PX_TRACKED)); + } + else + chunk_appendf(out, "<td colspan=3></td>"); + + /* throttle */ + if (stats[ST_I_PX_THROTTLE].type) + chunk_appendf(out, "<td class=ac>%d %%</td>\n", stats[ST_I_PX_THROTTLE].u.u32); + else + chunk_appendf(out, "<td class=ac>-</td>"); + + if (flags & STAT_F_SHMODULES) { + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + chunk_appendf(out, "<td>"); + + if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_SRV) { + chunk_appendf(out, + "<u>%s<div class=tips><table class=det>", + mod->name); + for (j = 0; j < mod->stats_count; ++j) { + chunk_appendf(out, + "<tr><th>%s</th><td>%s</td></tr>", + mod->stats[j].desc, field_to_html_str(&stats[ST_I_PX_MAX + i])); + ++i; + } + chunk_appendf(out, "</table></div></u>"); + } else { + i += mod->stats_count; + } + + chunk_appendf(out, "</td>"); + } + } + + chunk_appendf(out, "</tr>\n"); + } + else if (stats[ST_I_PX_TYPE].u.u32 == STATS_TYPE_BE) { + chunk_appendf(out, "<tr class=\"backend\">"); + if (flags & STAT_F_ADMIN) { + /* Column sub-heading for Enable or Disable server */ + chunk_appendf(out, "<td></td>"); + } + chunk_appendf(out, + "<td class=ac>" + /* name */ + "%s<a name=\"%s/Backend\"></a>" + "<a class=lfsb href=\"#%s/Backend\">Backend</a>" + "", + (flags & STAT_F_SHLGNDS)?"<u>":"", + field_str(stats, ST_I_PX_PXNAME), field_str(stats, ST_I_PX_PXNAME)); + + if (flags & STAT_F_SHLGNDS) { + /* balancing */ + chunk_appendf(out, "<div class=tips>balancing: %s", + field_str(stats, ST_I_PX_ALGO)); + + /* cookie */ + if (stats[ST_I_PX_COOKIE].type) { + chunk_appendf(out, ", cookie: '"); + chunk_initstr(&src, field_str(stats, ST_I_PX_COOKIE)); + chunk_htmlencode(out, &src); + chunk_appendf(out, "'"); + } + chunk_appendf(out, "</div>"); + } + + chunk_appendf(out, + "%s</td>" + /* queue : current, max */ + "<td>%s</td><td>%s</td><td></td>" + /* sessions rate : current, max, limit */ + "<td>%s</td><td>%s</td><td></td>" + "", + (flags & STAT_F_SHLGNDS)?"</u>":"", + U2H(stats[ST_I_PX_QCUR].u.u32), U2H(stats[ST_I_PX_QMAX].u.u32), + U2H(stats[ST_I_PX_RATE].u.u32), U2H(stats[ST_I_PX_RATE_MAX].u.u32)); + + chunk_appendf(out, + /* sessions: current, max, limit, total */ + "<td>%s</td><td>%s</td><td>%s</td>" + "<td><u>%s<div class=tips><table class=det>" + "<tr><th>Cum. sessions:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_SCUR].u.u32), U2H(stats[ST_I_PX_SMAX].u.u32), U2H(stats[ST_I_PX_SLIM].u.u32), + U2H(stats[ST_I_PX_STOT].u.u64), + U2H(stats[ST_I_PX_STOT].u.u64)); + + /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */ + if (strcmp(field_str(stats, ST_I_PX_MODE), "http") == 0) { + chunk_appendf(out, + "<tr><th>New connections:</th><td>%s</td></tr>" + "<tr><th>Reused connections:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>" + "<tr><th>- HTTP 1xx responses:</th><td>%s</td></tr>" + "<tr><th>- HTTP 2xx responses:</th><td>%s</td></tr>" + "<tr><th> Compressed 2xx:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>- HTTP 3xx responses:</th><td>%s</td></tr>" + "<tr><th>- HTTP 4xx responses:</th><td>%s</td></tr>" + "<tr><th>- HTTP 5xx responses:</th><td>%s</td></tr>" + "<tr><th>- other responses:</th><td>%s</td></tr>" + "<tr><th>Cache lookups:</th><td>%s</td></tr>" + "<tr><th>Cache hits:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>" + "<tr><th>Internal errors:</th><td>%s</td></tr>" + "", + U2H(stats[ST_I_PX_CONNECT].u.u64), + U2H(stats[ST_I_PX_REUSE].u.u64), + (stats[ST_I_PX_CONNECT].u.u64 + stats[ST_I_PX_REUSE].u.u64) ? + (int)(100 * stats[ST_I_PX_REUSE].u.u64 / (stats[ST_I_PX_CONNECT].u.u64 + stats[ST_I_PX_REUSE].u.u64)) : 0, + U2H(stats[ST_I_PX_REQ_TOT].u.u64), + U2H(stats[ST_I_PX_HRSP_1XX].u.u64), + U2H(stats[ST_I_PX_HRSP_2XX].u.u64), + U2H(stats[ST_I_PX_COMP_RSP].u.u64), + stats[ST_I_PX_HRSP_2XX].u.u64 ? + (int)(100 * stats[ST_I_PX_COMP_RSP].u.u64 / stats[ST_I_PX_HRSP_2XX].u.u64) : 0, + U2H(stats[ST_I_PX_HRSP_3XX].u.u64), + U2H(stats[ST_I_PX_HRSP_4XX].u.u64), + U2H(stats[ST_I_PX_HRSP_5XX].u.u64), + U2H(stats[ST_I_PX_HRSP_OTHER].u.u64), + U2H(stats[ST_I_PX_CACHE_LOOKUPS].u.u64), + U2H(stats[ST_I_PX_CACHE_HITS].u.u64), + stats[ST_I_PX_CACHE_LOOKUPS].u.u64 ? + (int)(100 * stats[ST_I_PX_CACHE_HITS].u.u64 / stats[ST_I_PX_CACHE_LOOKUPS].u.u64) : 0, + U2H(stats[ST_I_PX_WREW].u.u64), + U2H(stats[ST_I_PX_EINT].u.u64)); + } + + chunk_appendf(out, "<tr><th colspan=3>Max / Avg over last 1024 success. conn.</th></tr>"); + chunk_appendf(out, "<tr><th>- Queue time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_QT_MAX].u.u32), U2H(stats[ST_I_PX_QTIME].u.u32)); + chunk_appendf(out, "<tr><th>- Connect time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_CT_MAX].u.u32), U2H(stats[ST_I_PX_CTIME].u.u32)); + if (strcmp(field_str(stats, ST_I_PX_MODE), "http") == 0) + chunk_appendf(out, "<tr><th>- Responses time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_RT_MAX].u.u32), U2H(stats[ST_I_PX_RTIME].u.u32)); + chunk_appendf(out, "<tr><th>- Total time:</th><td>%s / %s</td><td>ms</td></tr>", + U2H(stats[ST_I_PX_TT_MAX].u.u32), U2H(stats[ST_I_PX_TTIME].u.u32)); + + chunk_appendf(out, + "</table></div></u></td>" + /* sessions: lbtot, last */ + "<td>%s</td><td>%s</td>" + /* bytes: in */ + "<td>%s</td>" + "", + U2H(stats[ST_I_PX_LBTOT].u.u64), + human_time(stats[ST_I_PX_LASTSESS].u.s32, 1), + U2H(stats[ST_I_PX_BIN].u.u64)); + + chunk_appendf(out, + /* bytes:out + compression stats (via hover): comp_in, comp_out, comp_byp */ + "<td>%s%s<div class=tips><table class=det>" + "<tr><th>Response bytes in:</th><td>%s</td></tr>" + "<tr><th>Compression in:</th><td>%s</td></tr>" + "<tr><th>Compression out:</th><td>%s</td><td>(%d%%)</td></tr>" + "<tr><th>Compression bypass:</th><td>%s</td></tr>" + "<tr><th>Total bytes saved:</th><td>%s</td><td>(%d%%)</td></tr>" + "</table></div>%s</td>", + (stats[ST_I_PX_COMP_IN].u.u64 || stats[ST_I_PX_COMP_BYP].u.u64) ? "<u>":"", + U2H(stats[ST_I_PX_BOUT].u.u64), + U2H(stats[ST_I_PX_BOUT].u.u64), + U2H(stats[ST_I_PX_COMP_IN].u.u64), + U2H(stats[ST_I_PX_COMP_OUT].u.u64), + stats[ST_I_PX_COMP_IN].u.u64 ? (int)(stats[ST_I_PX_COMP_OUT].u.u64 * 100 / stats[ST_I_PX_COMP_IN].u.u64) : 0, + U2H(stats[ST_I_PX_COMP_BYP].u.u64), + U2H(stats[ST_I_PX_COMP_IN].u.u64 - stats[ST_I_PX_COMP_OUT].u.u64), + stats[ST_I_PX_BOUT].u.u64 ? (int)((stats[ST_I_PX_COMP_IN].u.u64 - stats[ST_I_PX_COMP_OUT].u.u64) * 100 / stats[ST_I_PX_BOUT].u.u64) : 0, + (stats[ST_I_PX_COMP_IN].u.u64 || stats[ST_I_PX_COMP_BYP].u.u64) ? "</u>":""); + + chunk_appendf(out, + /* denied: req, resp */ + "<td>%s</td><td>%s</td>" + /* errors : request, connect */ + "<td></td><td>%s</td>" + /* errors : response */ + "<td><u>%s<div class=tips>Connection resets during transfers: %lld client, %lld server</div></u></td>" + /* warnings: retries, redispatches */ + "<td>%lld</td><td>%lld</td>" + /* backend status: reflect backend status (up/down): we display UP + * if the backend has known working servers or if it has no server at + * all (eg: for stats). Then we display the total weight, number of + * active and backups. */ + "<td class=ac>%s %s</td><td class=ac> </td><td class=ac>%d/%d</td>" + "<td class=ac>%d</td><td class=ac>%d</td>" + "", + U2H(stats[ST_I_PX_DREQ].u.u64), U2H(stats[ST_I_PX_DRESP].u.u64), + U2H(stats[ST_I_PX_ECON].u.u64), + U2H(stats[ST_I_PX_ERESP].u.u64), + (long long)stats[ST_I_PX_CLI_ABRT].u.u64, + (long long)stats[ST_I_PX_SRV_ABRT].u.u64, + (long long)stats[ST_I_PX_WRETR].u.u64, (long long)stats[ST_I_PX_WREDIS].u.u64, + human_time(stats[ST_I_PX_LASTCHG].u.u32, 1), + strcmp(field_str(stats, ST_I_PX_STATUS), "DOWN") ? field_str(stats, ST_I_PX_STATUS) : "<font color=\"red\"><b>DOWN</b></font>", + stats[ST_I_PX_WEIGHT].u.u32, stats[ST_I_PX_UWEIGHT].u.u32, + stats[ST_I_PX_ACT].u.u32, stats[ST_I_PX_BCK].u.u32); + + chunk_appendf(out, + /* rest of backend: nothing, down transitions, total downtime, throttle */ + "<td class=ac> </td><td>%d</td>" + "<td>%s</td>" + "<td></td>", + stats[ST_I_PX_CHKDOWN].u.u32, + stats[ST_I_PX_DOWNTIME].type ? human_time(stats[ST_I_PX_DOWNTIME].u.u32, 1) : " "); + + if (flags & STAT_F_SHMODULES) { + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + chunk_appendf(out, "<td>"); + + if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_BE) { + chunk_appendf(out, + "<u>%s<div class=tips><table class=det>", + mod->name); + for (j = 0; j < mod->stats_count; ++j) { + chunk_appendf(out, + "<tr><th>%s</th><td>%s</td></tr>", + mod->stats[j].desc, field_to_html_str(&stats[ST_I_PX_MAX + i])); + ++i; + } + chunk_appendf(out, "</table></div></u>"); + } else { + i += mod->stats_count; + } + + chunk_appendf(out, "</td>"); + } + } + + chunk_appendf(out, "</tr>"); + } + + return 1; +} + +/* Dumps the HTML table header for proxy <px> to chunk ctx buffer and uses the + * state from stream connector <sc>. The caller is responsible for clearing + * chunk ctx buffer if needed. + */ +void stats_dump_html_px_hdr(struct stconn *sc, struct proxy *px) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; + char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN]; + struct stats_module *mod; + int stats_module_len = 0; + + if (px->cap & PR_CAP_BE && px->srv && (ctx->flags & STAT_F_ADMIN)) { + /* A form to enable/disable this proxy servers */ + + /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ + scope_txt[0] = 0; + if (ctx->scope_len) { + const char *scope_ptr = stats_scope_ptr(appctx); + + strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt)); + memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len); + scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0; + } + + chunk_appendf(chk, + "<form method=\"post\">"); + } + + /* print a new table */ + chunk_appendf(chk, + "<table class=\"tbl\" width=\"100%%\">\n" + "<tr class=\"titre\">" + "<th class=\"pxname\" width=\"10%%\">"); + + chunk_appendf(chk, + "<a name=\"%s\"></a>%s" + "<a class=px href=\"#%s\">%s</a>", + px->id, + (ctx->flags & STAT_F_SHLGNDS) ? "<u>":"", + px->id, px->id); + + if (ctx->flags & STAT_F_SHLGNDS) { + /* cap, mode, id */ + chunk_appendf(chk, "<div class=tips>cap: %s, mode: %s, id: %d", + proxy_cap_str(px->cap), proxy_mode_str(px->mode), + px->uuid); + chunk_appendf(chk, "</div>"); + } + + chunk_appendf(chk, + "%s</th>" + "<th class=\"%s\" width=\"90%%\">%s</th>" + "</tr>\n" + "</table>\n" + "<table class=\"tbl\" width=\"100%%\">\n" + "<tr class=\"titre\">", + (ctx->flags & STAT_F_SHLGNDS) ? "</u>":"", + px->desc ? "desc" : "empty", px->desc ? px->desc : ""); + + if (ctx->flags & STAT_F_ADMIN) { + /* Column heading for Enable or Disable server */ + if ((px->cap & PR_CAP_BE) && px->srv) + chunk_appendf(chk, + "<th rowspan=2 width=1><input type=\"checkbox\" " + "onclick=\"for(c in document.getElementsByClassName('%s-checkbox')) " + "document.getElementsByClassName('%s-checkbox').item(c).checked = this.checked\"></th>", + px->id, + px->id); + else + chunk_appendf(chk, "<th rowspan=2></th>"); + } + + chunk_appendf(chk, + "<th rowspan=2></th>" + "<th colspan=3>Queue</th>" + "<th colspan=3>Session rate</th><th colspan=6>Sessions</th>" + "<th colspan=2>Bytes</th><th colspan=2>Denied</th>" + "<th colspan=3>Errors</th><th colspan=2>Warnings</th>" + "<th colspan=9>Server</th>"); + + if (ctx->flags & STAT_F_SHMODULES) { + // calculate the count of module for colspan attribute + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + ++stats_module_len; + } + chunk_appendf(chk, "<th colspan=%d>Extra modules</th>", + stats_module_len); + } + + chunk_appendf(chk, + "</tr>\n" + "<tr class=\"titre\">" + "<th>Cur</th><th>Max</th><th>Limit</th>" + "<th>Cur</th><th>Max</th><th>Limit</th><th>Cur</th><th>Max</th>" + "<th>Limit</th><th>Total</th><th>LbTot</th><th>Last</th><th>In</th><th>Out</th>" + "<th>Req</th><th>Resp</th><th>Req</th><th>Conn</th>" + "<th>Resp</th><th>Retr</th><th>Redis</th>" + "<th>Status</th><th>LastChk</th><th>Wght</th><th>Act</th>" + "<th>Bck</th><th>Chk</th><th>Dwn</th><th>Dwntme</th>" + "<th>Thrtle</th>\n"); + + if (ctx->flags & STAT_F_SHMODULES) { + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + chunk_appendf(chk, "<th>%s</th>", mod->name); + } + } + + chunk_appendf(chk, "</tr>"); +} + +/* Dumps the HTML table trailer for proxy <px> to chunk ctx buffer and uses the + * state from stream connector <sc>. The caller is responsible for clearing + * chunk ctx buffer if needed. + */ +void stats_dump_html_px_end(struct stconn *sc, struct proxy *px) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; + + chunk_appendf(chk, "</table>"); + + if ((px->cap & PR_CAP_BE) && px->srv && (ctx->flags & STAT_F_ADMIN)) { + /* close the form used to enable/disable this proxy servers */ + chunk_appendf(chk, + "Choose the action to perform on the checked servers : " + "<select name=action>" + "<option value=\"\"></option>" + "<option value=\"ready\">Set state to READY</option>" + "<option value=\"drain\">Set state to DRAIN</option>" + "<option value=\"maint\">Set state to MAINT</option>" + "<option value=\"dhlth\">Health: disable checks</option>" + "<option value=\"ehlth\">Health: enable checks</option>" + "<option value=\"hrunn\">Health: force UP</option>" + "<option value=\"hnolb\">Health: force NOLB</option>" + "<option value=\"hdown\">Health: force DOWN</option>" + "<option value=\"dagent\">Agent: disable checks</option>" + "<option value=\"eagent\">Agent: enable checks</option>" + "<option value=\"arunn\">Agent: force UP</option>" + "<option value=\"adown\">Agent: force DOWN</option>" + "<option value=\"shutdown\">Kill Sessions</option>" + "</select>" + "<input type=\"hidden\" name=\"b\" value=\"#%d\">" + " <input type=\"submit\" value=\"Apply\">" + "</form>", + px->uuid); + } + + chunk_appendf(chk, "<p>\n"); +} + +/* Dumps the HTML stats trailer block to <out> buffer. The caller is + * responsible for clearing it if needed. + */ +void stats_dump_html_end(struct buffer *out) +{ + chunk_appendf(out, "</body></html>\n"); +} + + +static int stats_send_http_headers(struct stconn *sc, struct htx *htx) +{ + struct uri_auth *uri; + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct htx_sl *sl; + unsigned int flags; + + BUG_ON(!ctx->http_px); + uri = ctx->http_px->uri_auth; + + flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_ENC|HTX_SL_F_XFER_LEN|HTX_SL_F_CHNK); + sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), ist("200"), ist("OK")); + if (!sl) + goto full; + sl->info.res.status = 200; + + if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache"))) + goto full; + if (ctx->flags & STAT_F_FMT_HTML) { + if (!htx_add_header(htx, ist("Content-Type"), ist("text/html"))) + goto full; + } + else if (ctx->flags & (STAT_F_FMT_JSON|STAT_F_JSON_SCHM)) { + if (!htx_add_header(htx, ist("Content-Type"), ist("application/json"))) + goto full; + } + else { + if (!htx_add_header(htx, ist("Content-Type"), ist("text/plain"))) + goto full; + } + + if (uri->refresh > 0 && !(ctx->flags & STAT_F_NO_REFRESH)) { + const char *refresh = U2A(uri->refresh); + if (!htx_add_header(htx, ist("Refresh"), ist(refresh))) + goto full; + } + + if (ctx->flags & STAT_F_CHUNKED) { + if (!htx_add_header(htx, ist("Transfer-Encoding"), ist("chunked"))) + goto full; + } + + if (!htx_add_endof(htx, HTX_BLK_EOH)) + goto full; + return 1; + + full: + htx_reset(htx); + applet_set_eos(appctx); + applet_set_error(appctx); + return 0; +} + +static int stats_send_http_redirect(struct stconn *sc, struct htx *htx) +{ + char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN]; + struct uri_auth *uri; + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct htx_sl *sl; + unsigned int flags; + + BUG_ON(!ctx->http_px); + uri = ctx->http_px->uri_auth; + + /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ + scope_txt[0] = 0; + if (ctx->scope_len) { + const char *scope_ptr = stats_scope_ptr(appctx); + + strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt)); + memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len); + scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0; + } + + /* We don't want to land on the posted stats page because a refresh will + * repost the data. We don't want this to happen on accident so we redirect + * the browse to the stats page with a GET. + */ + chunk_printf(&trash, "%s;st=%s%s%s%s", + uri->uri_prefix, + ((ctx->st_code > STAT_STATUS_INIT) && + (ctx->st_code < STAT_STATUS_SIZE) && + stat_status_codes[ctx->st_code]) ? + stat_status_codes[ctx->st_code] : + stat_status_codes[STAT_STATUS_UNKN], + (ctx->flags & STAT_F_HIDE_DOWN) ? ";up" : "", + (ctx->flags & STAT_F_NO_REFRESH) ? ";norefresh" : "", + scope_txt); + + flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_LEN|HTX_SL_F_CLEN|HTX_SL_F_BODYLESS); + sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), ist("303"), ist("See Other")); + if (!sl) + goto full; + sl->info.res.status = 303; + + if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache")) || + !htx_add_header(htx, ist("Content-Type"), ist("text/plain")) || + !htx_add_header(htx, ist("Content-Length"), ist("0")) || + !htx_add_header(htx, ist("Location"), ist2(trash.area, trash.data))) + goto full; + + if (!htx_add_endof(htx, HTX_BLK_EOH)) + goto full; + + return 1; + + full: + htx_reset(htx); + applet_set_eos(appctx); + applet_set_error(appctx); + return 0; +} + +/* We reached the stats page through a POST request. The appctx is + * expected to have already been allocated by the caller. + * Parse the posted data and enable/disable servers if necessary. + * Returns 1 if request was parsed or zero if it needs more data. + */ +static int stats_process_http_post(struct stconn *sc) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + + struct proxy *px = NULL; + struct server *sv = NULL; + + char key[LINESIZE]; + int action = ST_ADM_ACTION_NONE; + int reprocess = 0; + + int total_servers = 0; + int altered_servers = 0; + + char *first_param, *cur_param, *next_param, *end_params; + char *st_cur_param = NULL; + char *st_next_param = NULL; + + struct buffer *temp = get_trash_chunk(); + + struct htx *htx = htxbuf(&appctx->inbuf); + struct htx_blk *blk; + + /* we need more data */ + if (!(htx->flags & HTX_FL_EOM)) { + /* check if we can receive more */ + if (applet_fl_test(appctx, APPCTX_FL_INBLK_FULL)) { + ctx->st_code = STAT_STATUS_EXCD; + goto out; + } + goto wait; + } + + /* The request was fully received. Copy data */ + blk = htx_get_head_blk(htx); + while (blk) { + enum htx_blk_type type = htx_get_blk_type(blk); + + if (type == HTX_BLK_TLR || type == HTX_BLK_EOT) + break; + if (type == HTX_BLK_DATA) { + struct ist v = htx_get_blk_value(htx, blk); + + if (!chunk_memcat(temp, v.ptr, v.len)) { + ctx->st_code = STAT_STATUS_EXCD; + goto out; + } + } + blk = htx_get_next_blk(htx, blk); + } + + first_param = temp->area; + end_params = temp->area + temp->data; + cur_param = next_param = end_params; + *end_params = '\0'; + + ctx->st_code = STAT_STATUS_NONE; + + /* + * Parse the parameters in reverse order to only store the last value. + * From the html form, the backend and the action are at the end. + */ + while (cur_param > first_param) { + char *value; + int poffset, plen; + + cur_param--; + + if ((*cur_param == '&') || (cur_param == first_param)) { + reprocess_servers: + /* Parse the key */ + poffset = (cur_param != first_param ? 1 : 0); + plen = next_param - cur_param + (cur_param == first_param ? 1 : 0); + if ((plen > 0) && (plen <= sizeof(key))) { + strncpy(key, cur_param + poffset, plen); + key[plen - 1] = '\0'; + } else { + ctx->st_code = STAT_STATUS_ERRP; + goto out; + } + + /* Parse the value */ + value = key; + while (*value != '\0' && *value != '=') { + value++; + } + if (*value == '=') { + /* Ok, a value is found, we can mark the end of the key */ + *value++ = '\0'; + } + if (url_decode(key, 1) < 0 || url_decode(value, 1) < 0) + break; + + /* Now we can check the key to see what to do */ + if (!px && (strcmp(key, "b") == 0)) { + if ((px = proxy_be_by_name(value)) == NULL) { + /* the backend name is unknown or ambiguous (duplicate names) */ + ctx->st_code = STAT_STATUS_ERRP; + goto out; + } + } + else if (!action && (strcmp(key, "action") == 0)) { + if (strcmp(value, "ready") == 0) { + action = ST_ADM_ACTION_READY; + } + else if (strcmp(value, "drain") == 0) { + action = ST_ADM_ACTION_DRAIN; + } + else if (strcmp(value, "maint") == 0) { + action = ST_ADM_ACTION_MAINT; + } + else if (strcmp(value, "shutdown") == 0) { + action = ST_ADM_ACTION_SHUTDOWN; + } + else if (strcmp(value, "dhlth") == 0) { + action = ST_ADM_ACTION_DHLTH; + } + else if (strcmp(value, "ehlth") == 0) { + action = ST_ADM_ACTION_EHLTH; + } + else if (strcmp(value, "hrunn") == 0) { + action = ST_ADM_ACTION_HRUNN; + } + else if (strcmp(value, "hnolb") == 0) { + action = ST_ADM_ACTION_HNOLB; + } + else if (strcmp(value, "hdown") == 0) { + action = ST_ADM_ACTION_HDOWN; + } + else if (strcmp(value, "dagent") == 0) { + action = ST_ADM_ACTION_DAGENT; + } + else if (strcmp(value, "eagent") == 0) { + action = ST_ADM_ACTION_EAGENT; + } + else if (strcmp(value, "arunn") == 0) { + action = ST_ADM_ACTION_ARUNN; + } + else if (strcmp(value, "adown") == 0) { + action = ST_ADM_ACTION_ADOWN; + } + /* else these are the old supported methods */ + else if (strcmp(value, "disable") == 0) { + action = ST_ADM_ACTION_DISABLE; + } + else if (strcmp(value, "enable") == 0) { + action = ST_ADM_ACTION_ENABLE; + } + else if (strcmp(value, "stop") == 0) { + action = ST_ADM_ACTION_STOP; + } + else if (strcmp(value, "start") == 0) { + action = ST_ADM_ACTION_START; + } + else { + ctx->st_code = STAT_STATUS_ERRP; + goto out; + } + } + else if (strcmp(key, "s") == 0) { + if (!(px && action)) { + /* + * Indicates that we'll need to reprocess the parameters + * as soon as backend and action are known + */ + if (!reprocess) { + st_cur_param = cur_param; + st_next_param = next_param; + } + reprocess = 1; + } + else if ((sv = findserver(px, value)) != NULL) { + HA_SPIN_LOCK(SERVER_LOCK, &sv->lock); + switch (action) { + case ST_ADM_ACTION_DISABLE: + if (!(sv->cur_admin & SRV_ADMF_FMAINT)) { + altered_servers++; + total_servers++; + srv_set_admin_flag(sv, SRV_ADMF_FMAINT, SRV_ADM_STCHGC_STATS_DISABLE); + } + break; + case ST_ADM_ACTION_ENABLE: + if (sv->cur_admin & SRV_ADMF_FMAINT) { + altered_servers++; + total_servers++; + srv_clr_admin_flag(sv, SRV_ADMF_FMAINT); + } + break; + case ST_ADM_ACTION_STOP: + if (!(sv->cur_admin & SRV_ADMF_FDRAIN)) { + srv_set_admin_flag(sv, SRV_ADMF_FDRAIN, SRV_ADM_STCHGC_STATS_STOP); + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_START: + if (sv->cur_admin & SRV_ADMF_FDRAIN) { + srv_clr_admin_flag(sv, SRV_ADMF_FDRAIN); + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_DHLTH: + if (sv->check.state & CHK_ST_CONFIGURED) { + sv->check.state &= ~CHK_ST_ENABLED; + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_EHLTH: + if (sv->check.state & CHK_ST_CONFIGURED) { + sv->check.state |= CHK_ST_ENABLED; + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_HRUNN: + if (!(sv->track)) { + sv->check.health = sv->check.rise + sv->check.fall - 1; + srv_set_running(sv, SRV_OP_STCHGC_STATS_WEB); + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_HNOLB: + if (!(sv->track)) { + sv->check.health = sv->check.rise + sv->check.fall - 1; + srv_set_stopping(sv, SRV_OP_STCHGC_STATS_WEB); + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_HDOWN: + if (!(sv->track)) { + sv->check.health = 0; + srv_set_stopped(sv, SRV_OP_STCHGC_STATS_WEB); + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_DAGENT: + if (sv->agent.state & CHK_ST_CONFIGURED) { + sv->agent.state &= ~CHK_ST_ENABLED; + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_EAGENT: + if (sv->agent.state & CHK_ST_CONFIGURED) { + sv->agent.state |= CHK_ST_ENABLED; + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_ARUNN: + if (sv->agent.state & CHK_ST_ENABLED) { + sv->agent.health = sv->agent.rise + sv->agent.fall - 1; + srv_set_running(sv, SRV_OP_STCHGC_STATS_WEB); + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_ADOWN: + if (sv->agent.state & CHK_ST_ENABLED) { + sv->agent.health = 0; + srv_set_stopped(sv, SRV_OP_STCHGC_STATS_WEB); + altered_servers++; + total_servers++; + } + break; + case ST_ADM_ACTION_READY: + srv_adm_set_ready(sv); + altered_servers++; + total_servers++; + break; + case ST_ADM_ACTION_DRAIN: + srv_adm_set_drain(sv); + altered_servers++; + total_servers++; + break; + case ST_ADM_ACTION_MAINT: + srv_adm_set_maint(sv); + altered_servers++; + total_servers++; + break; + case ST_ADM_ACTION_SHUTDOWN: + if (!(px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) { + srv_shutdown_streams(sv, SF_ERR_KILLED); + altered_servers++; + total_servers++; + } + break; + } + HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock); + } else { + /* the server name is unknown or ambiguous (duplicate names) */ + total_servers++; + } + } + if (reprocess && px && action) { + /* Now, we know the backend and the action chosen by the user. + * We can safely restart from the first server parameter + * to reprocess them + */ + cur_param = st_cur_param; + next_param = st_next_param; + reprocess = 0; + goto reprocess_servers; + } + + next_param = cur_param; + } + } + + if (total_servers == 0) { + ctx->st_code = STAT_STATUS_NONE; + } + else if (altered_servers == 0) { + ctx->st_code = STAT_STATUS_ERRP; + } + else if (altered_servers == total_servers) { + ctx->st_code = STAT_STATUS_DONE; + } + else { + ctx->st_code = STAT_STATUS_PART; + } + out: + return 1; + wait: + ctx->st_code = STAT_STATUS_NONE; + return 0; +} + +/* This I/O handler runs as an applet embedded in a stream connector. It is + * used to send HTTP stats over a TCP socket. The mechanism is very simple. + * appctx->st0 contains the operation in progress (dump, done). The handler + * automatically unregisters itself once transfer is complete. + */ +static void http_stats_io_handler(struct appctx *appctx) +{ + struct show_stat_ctx *ctx = appctx->svcctx; + struct stconn *sc = appctx_sc(appctx); + struct htx *res_htx = NULL; + + /* only proxy stats are available via http */ + ctx->domain = STATS_DOMAIN_PROXY; + + if (applet_fl_test(appctx, APPCTX_FL_INBLK_ALLOC|APPCTX_FL_OUTBLK_ALLOC|APPCTX_FL_OUTBLK_FULL)) + goto out; + + if (applet_fl_test(appctx, APPCTX_FL_FASTFWD) && se_fl_test(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD)) + goto out; + + if (!appctx_get_buf(appctx, &appctx->outbuf)) { + goto out; + } + + res_htx = htx_from_buf(&appctx->outbuf); + + if (unlikely(applet_fl_test(appctx, APPCTX_FL_EOS|APPCTX_FL_ERROR))) { + appctx->st0 = STAT_HTTP_END; + goto out; + } + + /* all states are processed in sequence */ + if (appctx->st0 == STAT_HTTP_HEAD) { + if (stats_send_http_headers(sc, res_htx)) { + struct ist meth = htx_sl_req_meth(http_get_stline(htxbuf(&appctx->inbuf))); + + if (find_http_meth(istptr(meth), istlen(meth)) == HTTP_METH_HEAD) + appctx->st0 = STAT_HTTP_DONE; + else { + if (!(global.tune.no_zero_copy_fwd & NO_ZERO_COPY_FWD_APPLET)) + se_fl_set(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD); + appctx->st0 = STAT_HTTP_DUMP; + } + } + } + + if (appctx->st0 == STAT_HTTP_DUMP) { + ctx->chunk = b_make(trash.area, appctx->outbuf.size, 0, 0); + /* adjust buffer size to take htx overhead into account, + * make sure to perform this call on an empty buffer + */ + ctx->chunk.size = buf_room_for_htx_data(&ctx->chunk); + if (stats_dump_stat_to_buffer(sc, NULL, res_htx)) + appctx->st0 = STAT_HTTP_DONE; + } + + if (appctx->st0 == STAT_HTTP_POST) { + if (stats_process_http_post(sc)) + appctx->st0 = STAT_HTTP_LAST; + } + + if (appctx->st0 == STAT_HTTP_LAST) { + if (stats_send_http_redirect(sc, res_htx)) + appctx->st0 = STAT_HTTP_DONE; + } + + if (appctx->st0 == STAT_HTTP_DONE) { + /* no more data are expected. If the response buffer is empty, + * be sure to add something (EOT block in this case) to have + * something to send. It is important to be sure the EOM flags + * will be handled by the endpoint. + */ + if (htx_is_empty(res_htx)) { + if (!htx_add_endof(res_htx, HTX_BLK_EOT)) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); + goto out; + } + } + res_htx->flags |= HTX_FL_EOM; + applet_set_eoi(appctx); + se_fl_clr(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD); + applet_fl_clr(appctx, APPCTX_FL_FASTFWD); + appctx->st0 = STAT_HTTP_END; + } + + if (appctx->st0 == STAT_HTTP_END) { + applet_set_eos(appctx); + applet_will_consume(appctx); + } + + out: + /* we have left the request in the buffer for the case where we + * process a POST, and this automatically re-enables activity on + * read. It's better to indicate that we want to stop reading when + * we're sending, so that we know there's at most one direction + * deciding to wake the applet up. It saves it from looping when + * emitting large blocks into small TCP windows. + */ + if (res_htx) + htx_to_buf(res_htx, &appctx->outbuf); + + if (appctx->st0 == STAT_HTTP_END) { + /* eat the whole request */ + b_reset(&appctx->inbuf); + applet_fl_clr(appctx, APPCTX_FL_INBLK_FULL); + appctx->sedesc->iobuf.flags &= ~IOBUF_FL_FF_BLOCKED; + } + else if (applet_fl_test(appctx, APPCTX_FL_OUTBLK_FULL)) + applet_wont_consume(appctx); +} + +static size_t http_stats_fastfwd(struct appctx *appctx, struct buffer *buf, + size_t count, unsigned int flags) +{ + struct stconn *sc = appctx_sc(appctx); + size_t ret = 0; + + ret = b_data(buf); + if (stats_dump_stat_to_buffer(sc, buf, NULL)) { + se_fl_clr(appctx->sedesc, SE_FL_MAY_FASTFWD_PROD); + applet_fl_clr(appctx, APPCTX_FL_FASTFWD); + appctx->st0 = STAT_HTTP_DONE; + } + + ret = b_data(buf) - ret; + return ret; +} + +static void http_stats_release(struct appctx *appctx) +{ + struct show_stat_ctx *ctx = appctx->svcctx; + + if (ctx->px_st == STAT_PX_ST_SV) + srv_drop(ctx->obj2); +} + +struct applet http_stats_applet = { + .obj_type = OBJ_TYPE_APPLET, + .name = "<STATS>", /* used for logging */ + .fct = http_stats_io_handler, + .rcv_buf = appctx_htx_rcv_buf, + .snd_buf = appctx_htx_snd_buf, + .fastfwd = http_stats_fastfwd, + .release = http_stats_release, +}; diff --git a/src/stats-json.c b/src/stats-json.c new file mode 100644 index 0000000..b493853 --- /dev/null +++ b/src/stats-json.c @@ -0,0 +1,533 @@ +#include <haproxy/stats-json.h> + +#include <stdio.h> + +#include <haproxy/applet.h> +#include <haproxy/buf.h> +#include <haproxy/chunk.h> +#include <haproxy/stats.h> + +/* Emits an encoding of the field type as JSON. + * Returns non-zero on success, 0 if the buffer is full. + */ +static int stats_emit_json_field_tags(struct buffer *out, const struct field *f) +{ + const char *origin, *nature, *scope; + int old_len; + + switch (field_origin(f, 0)) { + case FO_METRIC: origin = "Metric"; break; + case FO_STATUS: origin = "Status"; break; + case FO_KEY: origin = "Key"; break; + case FO_CONFIG: origin = "Config"; break; + case FO_PRODUCT: origin = "Product"; break; + default: origin = "Unknown"; break; + } + + switch (field_nature(f, 0)) { + case FN_GAUGE: nature = "Gauge"; break; + case FN_LIMIT: nature = "Limit"; break; + case FN_MIN: nature = "Min"; break; + case FN_MAX: nature = "Max"; break; + case FN_RATE: nature = "Rate"; break; + case FN_COUNTER: nature = "Counter"; break; + case FN_DURATION: nature = "Duration"; break; + case FN_AGE: nature = "Age"; break; + case FN_TIME: nature = "Time"; break; + case FN_NAME: nature = "Name"; break; + case FN_OUTPUT: nature = "Output"; break; + case FN_AVG: nature = "Avg"; break; + default: nature = "Unknown"; break; + } + + switch (field_scope(f, 0)) { + case FS_PROCESS: scope = "Process"; break; + case FS_SERVICE: scope = "Service"; break; + case FS_SYSTEM: scope = "System"; break; + case FS_CLUSTER: scope = "Cluster"; break; + default: scope = "Unknown"; break; + } + + old_len = out->data; + chunk_appendf(out, "\"tags\":{" + "\"origin\":\"%s\"," + "\"nature\":\"%s\"," + "\"scope\":\"%s\"" + "}", origin, nature, scope); + return !(old_len == out->data); +} + +/* Limit JSON integer values to the range [-(2**53)+1, (2**53)-1] as per + * the recommendation for interoperable integers in section 6 of RFC 7159. + */ +#define JSON_INT_MAX ((1ULL << 53) - 1) +#define JSON_INT_MIN (0 - JSON_INT_MAX) + +/* Emits a stats field value and its type in JSON. + * Returns non-zero on success, 0 on error. + */ +static int stats_emit_json_data_field(struct buffer *out, const struct field *f) +{ + int old_len; + char buf[20]; + const char *type, *value = buf, *quote = ""; + + switch (field_format(f, 0)) { + case FF_EMPTY: return 1; + case FF_S32: type = "\"s32\""; + snprintf(buf, sizeof(buf), "%d", f->u.s32); + break; + case FF_U32: type = "\"u32\""; + snprintf(buf, sizeof(buf), "%u", f->u.u32); + break; + case FF_S64: type = "\"s64\""; + if (f->u.s64 < JSON_INT_MIN || f->u.s64 > JSON_INT_MAX) + return 0; + type = "\"s64\""; + snprintf(buf, sizeof(buf), "%lld", (long long)f->u.s64); + break; + case FF_U64: if (f->u.u64 > JSON_INT_MAX) + return 0; + type = "\"u64\""; + snprintf(buf, sizeof(buf), "%llu", + (unsigned long long) f->u.u64); + break; + case FF_FLT: type = "\"flt\""; + flt_trim(buf, 0, snprintf(buf, sizeof(buf), "%f", f->u.flt)); + break; + case FF_STR: type = "\"str\""; + value = field_str(f, 0); + quote = "\""; + break; + default: snprintf(buf, sizeof(buf), "%u", f->type); + type = buf; + value = "unknown"; + quote = "\""; + break; + } + + old_len = out->data; + chunk_appendf(out, ",\"value\":{\"type\":%s,\"value\":%s%s%s}", + type, quote, value, quote); + return !(old_len == out->data); +} + +static void stats_print_proxy_field_json(struct buffer *out, + const struct field *stat, + const char *name, + int pos, + uint32_t field_type, + uint32_t iid, + uint32_t sid, + uint32_t pid) +{ + const char *obj_type; + switch (field_type) { + case STATS_TYPE_FE: obj_type = "Frontend"; break; + case STATS_TYPE_BE: obj_type = "Backend"; break; + case STATS_TYPE_SO: obj_type = "Listener"; break; + case STATS_TYPE_SV: obj_type = "Server"; break; + default: obj_type = "Unknown"; break; + } + + chunk_appendf(out, + "{" + "\"objType\":\"%s\"," + "\"proxyId\":%u," + "\"id\":%u," + "\"field\":{\"pos\":%d,\"name\":\"%s\"}," + "\"processNum\":%u,", + obj_type, iid, sid, pos, name, pid); +} + +static void stats_print_rslv_field_json(struct buffer *out, + const struct field *stat, + const char *name, + int pos) +{ + chunk_appendf(out, + "{" + "\"field\":{\"pos\":%d,\"name\":\"%s\"},", + pos, name); +} + + +/* Dumps the stats JSON header to <out> buffer. The caller is responsible for + * clearing it if needed. + */ +void stats_dump_json_header(struct buffer *out) +{ + chunk_strcat(out, "["); +} + +/* Dump all fields from <line> into <out> using a typed "field:desc:type:value" format */ +int stats_dump_fields_json(struct buffer *out, + const struct field *line, size_t stats_count, + struct show_stat_ctx *ctx) +{ + int flags = ctx->flags; + int domain = ctx->domain; + int started = (ctx->field) ? 1 : 0; + int ready_data = 0; + + if (!started && (flags & STAT_F_STARTED) && !chunk_strcat(out, ",")) + return 0; + if (!started && !chunk_strcat(out, "[")) + return 0; + + for (; ctx->field < stats_count; ctx->field++) { + int old_len; + int i = ctx->field; + + if (!line[i].type) + continue; + + if (started && !chunk_strcat(out, ",")) + goto err; + started = 1; + + old_len = out->data; + if (domain == STATS_DOMAIN_PROXY) { + stats_print_proxy_field_json(out, &line[i], + stat_cols[domain][i].name, + i, + line[ST_I_PX_TYPE].u.u32, + line[ST_I_PX_IID].u.u32, + line[ST_I_PX_SID].u.u32, + line[ST_I_PX_PID].u.u32); + } else if (domain == STATS_DOMAIN_RESOLVERS) { + stats_print_rslv_field_json(out, &line[i], + stat_cols[domain][i].name, + i); + } + + if (old_len == out->data) + goto err; + + if (!stats_emit_json_field_tags(out, &line[i])) + goto err; + + if (!stats_emit_json_data_field(out, &line[i])) + goto err; + + if (!chunk_strcat(out, "}")) + goto err; + ready_data = out->data; + } + + if (!chunk_strcat(out, "]")) + goto err; + + ctx->field = 0; /* we're done */ + return 1; + +err: + if (!ready_data) { + /* not enough buffer space for a single entry.. */ + chunk_reset(out); + if (ctx->flags & STAT_F_STARTED) + chunk_strcat(out, ","); + chunk_appendf(out, "{\"errorStr\":\"output buffer too short\"}"); + return 0; /* hard error */ + } + /* push ready data and wait for a new buffer to complete the dump */ + out->data = ready_data; + return 1; +} + +/* Dumps the JSON stats trailer block to <out> buffer. The caller is + * responsible for clearing it if needed. + */ +void stats_dump_json_end(struct buffer *out) +{ + chunk_strcat(out, "]\n"); +} + +/* Dump all fields from <stats> into <out> using the "show info json" format */ +int stats_dump_json_info_fields(struct buffer *out, + const struct field *info, + struct show_stat_ctx *ctx) +{ + int started = (ctx->field) ? 1 : 0; + int ready_data = 0; + + if (!started && !chunk_strcat(out, "[")) + return 0; + + for (; ctx->field < ST_I_INF_MAX; ctx->field++) { + int old_len; + int i = ctx->field; + + if (!field_format(info, i)) + continue; + + if (started && !chunk_strcat(out, ",")) + goto err; + started = 1; + + old_len = out->data; + chunk_appendf(out, + "{\"field\":{\"pos\":%d,\"name\":\"%s\"}," + "\"processNum\":%u,", + i, stat_cols_info[i].name, + info[ST_I_INF_PROCESS_NUM].u.u32); + if (old_len == out->data) + goto err; + + if (!stats_emit_json_field_tags(out, &info[i])) + goto err; + + if (!stats_emit_json_data_field(out, &info[i])) + goto err; + + if (!chunk_strcat(out, "}")) + goto err; + ready_data = out->data; + } + + if (!chunk_strcat(out, "]\n")) + goto err; + ctx->field = 0; /* we're done */ + return 1; + +err: + if (!ready_data) { + /* not enough buffer space for a single entry.. */ + chunk_reset(out); + chunk_appendf(out, "{\"errorStr\":\"output buffer too short\"}\n"); + return 0; /* hard error */ + } + /* push ready data and wait for a new buffer to complete the dump */ + out->data = ready_data; + return 1; +} + +/* This function dumps the schema onto the stream connector's read buffer. + * It returns 0 as long as it does not complete, non-zero upon completion. + * No state is used. + * + * Integer values bounded to the range [-(2**53)+1, (2**53)-1] as + * per the recommendation for interoperable integers in section 6 of RFC 7159. + */ +void stats_dump_json_schema(struct buffer *out) +{ + + int old_len = out->data; + + chunk_strcat(out, + "{" + "\"$schema\":\"http://json-schema.org/draft-04/schema#\"," + "\"oneOf\":[" + "{" + "\"title\":\"Info\"," + "\"type\":\"array\"," + "\"items\":{" + "\"title\":\"InfoItem\"," + "\"type\":\"object\"," + "\"properties\":{" + "\"field\":{\"$ref\":\"#/definitions/field\"}," + "\"processNum\":{\"$ref\":\"#/definitions/processNum\"}," + "\"tags\":{\"$ref\":\"#/definitions/tags\"}," + "\"value\":{\"$ref\":\"#/definitions/typedValue\"}" + "}," + "\"required\":[\"field\",\"processNum\",\"tags\"," + "\"value\"]" + "}" + "}," + "{" + "\"title\":\"Stat\"," + "\"type\":\"array\"," + "\"items\":{" + "\"title\":\"InfoItem\"," + "\"type\":\"object\"," + "\"properties\":{" + "\"objType\":{" + "\"enum\":[\"Frontend\",\"Backend\",\"Listener\"," + "\"Server\",\"Unknown\"]" + "}," + "\"proxyId\":{" + "\"type\":\"integer\"," + "\"minimum\":0" + "}," + "\"id\":{" + "\"type\":\"integer\"," + "\"minimum\":0" + "}," + "\"field\":{\"$ref\":\"#/definitions/field\"}," + "\"processNum\":{\"$ref\":\"#/definitions/processNum\"}," + "\"tags\":{\"$ref\":\"#/definitions/tags\"}," + "\"typedValue\":{\"$ref\":\"#/definitions/typedValue\"}" + "}," + "\"required\":[\"objType\",\"proxyId\",\"id\"," + "\"field\",\"processNum\",\"tags\"," + "\"value\"]" + "}" + "}," + "{" + "\"title\":\"Error\"," + "\"type\":\"object\"," + "\"properties\":{" + "\"errorStr\":{" + "\"type\":\"string\"" + "}" + "}," + "\"required\":[\"errorStr\"]" + "}" + "]," + "\"definitions\":{" + "\"field\":{" + "\"type\":\"object\"," + "\"pos\":{" + "\"type\":\"integer\"," + "\"minimum\":0" + "}," + "\"name\":{" + "\"type\":\"string\"" + "}," + "\"required\":[\"pos\",\"name\"]" + "}," + "\"processNum\":{" + "\"type\":\"integer\"," + "\"minimum\":1" + "}," + "\"tags\":{" + "\"type\":\"object\"," + "\"origin\":{" + "\"type\":\"string\"," + "\"enum\":[\"Metric\",\"Status\",\"Key\"," + "\"Config\",\"Product\",\"Unknown\"]" + "}," + "\"nature\":{" + "\"type\":\"string\"," + "\"enum\":[\"Gauge\",\"Limit\",\"Min\",\"Max\"," + "\"Rate\",\"Counter\",\"Duration\"," + "\"Age\",\"Time\",\"Name\",\"Output\"," + "\"Avg\", \"Unknown\"]" + "}," + "\"scope\":{" + "\"type\":\"string\"," + "\"enum\":[\"Cluster\",\"Process\",\"Service\"," + "\"System\",\"Unknown\"]" + "}," + "\"required\":[\"origin\",\"nature\",\"scope\"]" + "}," + "\"typedValue\":{" + "\"type\":\"object\"," + "\"oneOf\":[" + "{\"$ref\":\"#/definitions/typedValue/definitions/s32Value\"}," + "{\"$ref\":\"#/definitions/typedValue/definitions/s64Value\"}," + "{\"$ref\":\"#/definitions/typedValue/definitions/u32Value\"}," + "{\"$ref\":\"#/definitions/typedValue/definitions/u64Value\"}," + "{\"$ref\":\"#/definitions/typedValue/definitions/strValue\"}" + "]," + "\"definitions\":{" + "\"s32Value\":{" + "\"properties\":{" + "\"type\":{" + "\"type\":\"string\"," + "\"enum\":[\"s32\"]" + "}," + "\"value\":{" + "\"type\":\"integer\"," + "\"minimum\":-2147483648," + "\"maximum\":2147483647" + "}" + "}," + "\"required\":[\"type\",\"value\"]" + "}," + "\"s64Value\":{" + "\"properties\":{" + "\"type\":{" + "\"type\":\"string\"," + "\"enum\":[\"s64\"]" + "}," + "\"value\":{" + "\"type\":\"integer\"," + "\"minimum\":-9007199254740991," + "\"maximum\":9007199254740991" + "}" + "}," + "\"required\":[\"type\",\"value\"]" + "}," + "\"u32Value\":{" + "\"properties\":{" + "\"type\":{" + "\"type\":\"string\"," + "\"enum\":[\"u32\"]" + "}," + "\"value\":{" + "\"type\":\"integer\"," + "\"minimum\":0," + "\"maximum\":4294967295" + "}" + "}," + "\"required\":[\"type\",\"value\"]" + "}," + "\"u64Value\":{" + "\"properties\":{" + "\"type\":{" + "\"type\":\"string\"," + "\"enum\":[\"u64\"]" + "}," + "\"value\":{" + "\"type\":\"integer\"," + "\"minimum\":0," + "\"maximum\":9007199254740991" + "}" + "}," + "\"required\":[\"type\",\"value\"]" + "}," + "\"strValue\":{" + "\"properties\":{" + "\"type\":{" + "\"type\":\"string\"," + "\"enum\":[\"str\"]" + "}," + "\"value\":{\"type\":\"string\"}" + "}," + "\"required\":[\"type\",\"value\"]" + "}," + "\"unknownValue\":{" + "\"properties\":{" + "\"type\":{" + "\"type\":\"integer\"," + "\"minimum\":0" + "}," + "\"value\":{" + "\"type\":\"string\"," + "\"enum\":[\"unknown\"]" + "}" + "}," + "\"required\":[\"type\",\"value\"]" + "}" + "}" + "}" + "}" + "}"); + + if (old_len == out->data) { + chunk_reset(out); + chunk_appendf(out, + "{\"errorStr\":\"output buffer too short\"}"); + } + chunk_appendf(out, "\n"); +} + +/* This function dumps the schema onto the stream connector's read buffer. + * It returns 0 as long as it does not complete, non-zero upon completion. + * No state is used. + */ +int stats_dump_json_schema_to_buffer(struct appctx *appctx) +{ + struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; + + chunk_reset(chk); + + stats_dump_json_schema(chk); + + if (applet_putchk(appctx, chk) == -1) + return 0; + + return 1; +} diff --git a/src/stats-proxy.c b/src/stats-proxy.c new file mode 100644 index 0000000..a158d87 --- /dev/null +++ b/src/stats-proxy.c @@ -0,0 +1,1686 @@ +#include <haproxy/stats-proxy.h> + +#include <errno.h> +#include <string.h> + +#include <haproxy/api.h> +#include <haproxy/backend.h> +#include <haproxy/check.h> +#include <haproxy/chunk.h> +#include <haproxy/freq_ctr.h> +#include <haproxy/list.h> +#include <haproxy/listener.h> +#include <haproxy/obj_type.h> +#include <haproxy/proxy.h> +#include <haproxy/stats.h> +#include <haproxy/stats-html.h> +#include <haproxy/server.h> +#include <haproxy/stconn.h> +#include <haproxy/time.h> +#include <haproxy/tools.h> + +/* Define a new metric for both frontend and backend sides. */ +#define ME_NEW_PX(name_f, nature, format, offset_f, cap_f, desc_f) \ + { .name = (name_f), .desc = (desc_f), .type = (nature)|(format), \ + .metric.offset[0] = offsetof(struct fe_counters, offset_f), \ + .metric.offset[1] = offsetof(struct be_counters, offset_f), \ + .cap = (cap_f), \ + } + +/* Define a new metric for frontend side only. */ +#define ME_NEW_FE(name_f, nature, format, offset_f, cap_f, desc_f) \ + { .name = (name_f), .desc = (desc_f), .type = (nature)|(format), \ + .metric.offset[0] = offsetof(struct fe_counters, offset_f), \ + .cap = (cap_f), \ + } + +/* Define a new metric for backend side only. */ +#define ME_NEW_BE(name_f, nature, format, offset_f, cap_f, desc_f) \ + { .name = (name_f), .desc = (desc_f), .type = (nature)|(format), \ + .metric.offset[1] = offsetof(struct be_counters, offset_f), \ + .cap = (cap_f), \ + } + +const struct stat_col stat_cols_px[ST_I_PX_MAX] = { + [ST_I_PX_PXNAME] = { .name = "pxname", .desc = "Proxy name" }, + [ST_I_PX_SVNAME] = { .name = "svname", .desc = "Server name" }, + [ST_I_PX_QCUR] = { .name = "qcur", .desc = "Number of current queued connections" }, + [ST_I_PX_QMAX] = { .name = "qmax", .desc = "Highest value of queued connections encountered since process started" }, + [ST_I_PX_SCUR] = { .name = "scur", .desc = "Number of current sessions on the frontend, backend or server" }, + [ST_I_PX_SMAX] = { .name = "smax", .desc = "Highest value of current sessions encountered since process started" }, + [ST_I_PX_SLIM] = { .name = "slim", .desc = "Frontend/listener/server's maxconn, backend's fullconn" }, + [ST_I_PX_STOT] = ME_NEW_PX("stot", FN_COUNTER, FF_U64, cum_sess, STATS_PX_CAP_LFBS, "Total number of sessions since process started"), + [ST_I_PX_BIN] = ME_NEW_PX("bin", FN_COUNTER, FF_U64, bytes_in, STATS_PX_CAP_LFBS, "Total number of request bytes since process started"), + [ST_I_PX_BOUT] = ME_NEW_PX("bout", FN_COUNTER, FF_U64, bytes_out, STATS_PX_CAP_LFBS, "Total number of response bytes since process started"), + [ST_I_PX_DREQ] = ME_NEW_PX("dreq", FN_COUNTER, FF_U64, denied_req, STATS_PX_CAP_LFB_, "Total number of denied requests since process started"), + [ST_I_PX_DRESP] = ME_NEW_PX("dresp", FN_COUNTER, FF_U64, denied_resp, STATS_PX_CAP_LFBS, "Total number of denied responses since process started"), + [ST_I_PX_EREQ] = ME_NEW_FE("ereq", FN_COUNTER, FF_U64, failed_req, STATS_PX_CAP_LF__, "Total number of invalid requests since process started"), + [ST_I_PX_ECON] = ME_NEW_BE("econ", FN_COUNTER, FF_U64, failed_conns, STATS_PX_CAP___BS, "Total number of failed connections to server since the worker process started"), + [ST_I_PX_ERESP] = ME_NEW_BE("eresp", FN_COUNTER, FF_U64, failed_resp, STATS_PX_CAP___BS, "Total number of invalid responses since the worker process started"), + [ST_I_PX_WRETR] = ME_NEW_BE("wretr", FN_COUNTER, FF_U64, retries, STATS_PX_CAP___BS, "Total number of server connection retries since the worker process started"), + [ST_I_PX_WREDIS] = ME_NEW_BE("wredis", FN_COUNTER, FF_U64, redispatches, STATS_PX_CAP___BS, "Total number of server redispatches due to connection failures since the worker process started"), + [ST_I_PX_STATUS] = { .name = "status", .desc = "Frontend/listen status: OPEN/WAITING/FULL/STOP; backend: UP/DOWN; server: last check status" }, + [ST_I_PX_WEIGHT] = { .name = "weight", .desc = "Server's effective weight, or sum of active servers' effective weights for a backend" }, + [ST_I_PX_ACT] = { .name = "act", .desc = "Total number of active UP servers with a non-zero weight" }, + [ST_I_PX_BCK] = { .name = "bck", .desc = "Total number of backup UP servers with a non-zero weight" }, + [ST_I_PX_CHKFAIL] = ME_NEW_BE("chkfail", FN_COUNTER, FF_U64, failed_checks, STATS_PX_CAP____S, "Total number of failed individual health checks per server/backend, since the worker process started"), + [ST_I_PX_CHKDOWN] = ME_NEW_BE("chkdown", FN_COUNTER, FF_U64, down_trans, STATS_PX_CAP___BS, "Total number of failed checks causing UP to DOWN server transitions, per server/backend, since the worker process started"), + [ST_I_PX_LASTCHG] = ME_NEW_BE("lastchg", FN_AGE, FF_U32, last_change, STATS_PX_CAP___BS, "How long ago the last server state changed, in seconds"), + [ST_I_PX_DOWNTIME] = { .name = "downtime", .desc = "Total time spent in DOWN state, for server or backend" }, + [ST_I_PX_QLIMIT] = { .name = "qlimit", .desc = "Limit on the number of connections in queue, for servers only (maxqueue argument)" }, + [ST_I_PX_PID] = { .name = "pid", .desc = "Relative worker process number (1)" }, + [ST_I_PX_IID] = { .name = "iid", .desc = "Frontend or Backend numeric identifier ('id' setting)" }, + [ST_I_PX_SID] = { .name = "sid", .desc = "Server numeric identifier ('id' setting)" }, + [ST_I_PX_THROTTLE] = { .name = "throttle", .desc = "Throttling ratio applied to a server's maxconn and weight during the slowstart period (0 to 100%)" }, + [ST_I_PX_LBTOT] = ME_NEW_BE("lbtot", FN_COUNTER, FF_U64, cum_lbconn, STATS_PX_CAP_LFBS, "Total number of requests routed by load balancing since the worker process started (ignores queue pop and stickiness)"), + [ST_I_PX_TRACKED] = { .name = "tracked", .desc = "Name of the other server this server tracks for its state" }, + [ST_I_PX_TYPE] = { .name = "type", .desc = "Type of the object (Listener, Frontend, Backend, Server)" }, + [ST_I_PX_RATE] = ME_NEW_PX("rate", FN_RATE, FF_U32, sess_per_sec, STATS_PX_CAP__FBS, "Total number of sessions processed by this object over the last second (sessions for listeners/frontends, requests for backends/servers)"), + [ST_I_PX_RATE_LIM] = { .name = "rate_lim", .desc = "Limit on the number of sessions accepted in a second (frontend only, 'rate-limit sessions' setting)" }, + [ST_I_PX_RATE_MAX] = { .name = "rate_max", .desc = "Highest value of sessions per second observed since the worker process started" }, + [ST_I_PX_CHECK_STATUS] = { .name = "check_status", .desc = "Status report of the server's latest health check, prefixed with '*' if a check is currently in progress" }, + [ST_I_PX_CHECK_CODE] = { .name = "check_code", .desc = "HTTP/SMTP/LDAP status code reported by the latest server health check" }, + [ST_I_PX_CHECK_DURATION] = { .name = "check_duration", .desc = "Total duration of the latest server health check, in milliseconds" }, + [ST_I_PX_HRSP_1XX] = ME_NEW_PX("hrsp_1xx", FN_COUNTER, FF_U64, p.http.rsp[1], STATS_PX_CAP__FBS, "Total number of HTTP responses with status 100-199 returned by this object since the worker process started"), + [ST_I_PX_HRSP_2XX] = ME_NEW_PX("hrsp_2xx", FN_COUNTER, FF_U64, p.http.rsp[2], STATS_PX_CAP__FBS, "Total number of HTTP responses with status 200-299 returned by this object since the worker process started"), + [ST_I_PX_HRSP_3XX] = ME_NEW_PX("hrsp_3xx", FN_COUNTER, FF_U64, p.http.rsp[3], STATS_PX_CAP__FBS, "Total number of HTTP responses with status 300-399 returned by this object since the worker process started"), + [ST_I_PX_HRSP_4XX] = ME_NEW_PX("hrsp_4xx", FN_COUNTER, FF_U64, p.http.rsp[4], STATS_PX_CAP__FBS, "Total number of HTTP responses with status 400-499 returned by this object since the worker process started"), + [ST_I_PX_HRSP_5XX] = ME_NEW_PX("hrsp_5xx", FN_COUNTER, FF_U64, p.http.rsp[5], STATS_PX_CAP__FBS, "Total number of HTTP responses with status 500-599 returned by this object since the worker process started"), + [ST_I_PX_HRSP_OTHER] = ME_NEW_PX("hrsp_other", FN_COUNTER, FF_U64, p.http.rsp[0], STATS_PX_CAP__FBS, "Total number of HTTP responses with status <100, >599 returned by this object since the worker process started (error -1 included)"), + [ST_I_PX_HANAFAIL] = ME_NEW_BE("hanafail", FN_COUNTER, FF_U64, failed_hana, STATS_PX_CAP____S, "Total number of failed checks caused by an 'on-error' directive after an 'observe' condition matched"), + [ST_I_PX_REQ_RATE] = ME_NEW_FE("req_rate", FN_RATE, FF_U32, req_per_sec, STATS_PX_CAP__F__, "Number of HTTP requests processed over the last second on this object"), + [ST_I_PX_REQ_RATE_MAX] = { .name = "req_rate_max", .desc = "Highest value of http requests observed since the worker process started" }, + /* Note: ST_I_PX_REQ_TOT is also diplayed on frontend but does not uses a raw counter value, see me_generate_field() for details. */ + [ST_I_PX_REQ_TOT] = ME_NEW_BE("req_tot", FN_COUNTER, FF_U64, p.http.cum_req, STATS_PX_CAP___BS, "Total number of HTTP requests processed by this object since the worker process started"), + [ST_I_PX_CLI_ABRT] = ME_NEW_BE("cli_abrt", FN_COUNTER, FF_U64, cli_aborts, STATS_PX_CAP_LFBS, "Total number of requests or connections aborted by the client since the worker process started"), + [ST_I_PX_SRV_ABRT] = ME_NEW_BE("srv_abrt", FN_COUNTER, FF_U64, srv_aborts, STATS_PX_CAP_LFBS, "Total number of requests or connections aborted by the server since the worker process started"), + [ST_I_PX_COMP_IN] = ME_NEW_PX("comp_in", FN_COUNTER, FF_U64, comp_in[COMP_DIR_RES], STATS_PX_CAP__FB_, "Total number of bytes submitted to the HTTP compressor for this object since the worker process started"), + [ST_I_PX_COMP_OUT] = ME_NEW_PX("comp_out", FN_COUNTER, FF_U64, comp_out[COMP_DIR_RES], STATS_PX_CAP__FB_, "Total number of bytes emitted by the HTTP compressor for this object since the worker process started"), + [ST_I_PX_COMP_BYP] = ME_NEW_PX("comp_byp", FN_COUNTER, FF_U64, comp_byp[COMP_DIR_RES], STATS_PX_CAP__FB_, "Total number of bytes that bypassed HTTP compression for this object since the worker process started (CPU/memory/bandwidth limitation)"), + [ST_I_PX_COMP_RSP] = ME_NEW_PX("comp_rsp", FN_COUNTER, FF_U64, p.http.comp_rsp, STATS_PX_CAP__FB_, "Total number of HTTP responses that were compressed for this object since the worker process started"), + [ST_I_PX_LASTSESS] = ME_NEW_BE("lastsess", FN_AGE, FF_S32, last_sess, STATS_PX_CAP___BS, "How long ago some traffic was seen on this object on this worker process, in seconds"), + [ST_I_PX_LAST_CHK] = { .name = "last_chk", .desc = "Short description of the latest health check report for this server (see also check_desc)" }, + [ST_I_PX_LAST_AGT] = { .name = "last_agt", .desc = "Short description of the latest agent check report for this server (see also agent_desc)" }, + [ST_I_PX_QTIME] = { .name = "qtime", .desc = "Time spent in the queue, in milliseconds, averaged over the 1024 last requests (backend/server)" }, + [ST_I_PX_CTIME] = { .name = "ctime", .desc = "Time spent waiting for a connection to complete, in milliseconds, averaged over the 1024 last requests (backend/server)" }, + [ST_I_PX_RTIME] = { .name = "rtime", .desc = "Time spent waiting for a server response, in milliseconds, averaged over the 1024 last requests (backend/server)" }, + [ST_I_PX_TTIME] = { .name = "ttime", .desc = "Total request+response time (request+queue+connect+response+processing), in milliseconds, averaged over the 1024 last requests (backend/server)" }, + [ST_I_PX_AGENT_STATUS] = { .name = "agent_status", .desc = "Status report of the server's latest agent check, prefixed with '*' if a check is currently in progress" }, + [ST_I_PX_AGENT_CODE] = { .name = "agent_code", .desc = "Status code reported by the latest server agent check" }, + [ST_I_PX_AGENT_DURATION] = { .name = "agent_duration", .desc = "Total duration of the latest server agent check, in milliseconds" }, + [ST_I_PX_CHECK_DESC] = { .name = "check_desc", .desc = "Textual description of the latest health check report for this server" }, + [ST_I_PX_AGENT_DESC] = { .name = "agent_desc", .desc = "Textual description of the latest agent check report for this server" }, + [ST_I_PX_CHECK_RISE] = { .name = "check_rise", .desc = "Number of successful health checks before declaring a server UP (server 'rise' setting)" }, + [ST_I_PX_CHECK_FALL] = { .name = "check_fall", .desc = "Number of failed health checks before declaring a server DOWN (server 'fall' setting)" }, + [ST_I_PX_CHECK_HEALTH] = { .name = "check_health", .desc = "Current server health check level (0..fall-1=DOWN, fall..rise-1=UP)" }, + [ST_I_PX_AGENT_RISE] = { .name = "agent_rise", .desc = "Number of successful agent checks before declaring a server UP (server 'rise' setting)" }, + [ST_I_PX_AGENT_FALL] = { .name = "agent_fall", .desc = "Number of failed agent checks before declaring a server DOWN (server 'fall' setting)" }, + [ST_I_PX_AGENT_HEALTH] = { .name = "agent_health", .desc = "Current server agent check level (0..fall-1=DOWN, fall..rise-1=UP)" }, + [ST_I_PX_ADDR] = { .name = "addr", .desc = "Server's address:port, shown only if show-legends is set, or at levels oper/admin for the CLI" }, + [ST_I_PX_COOKIE] = { .name = "cookie", .desc = "Backend's cookie name or Server's cookie value, shown only if show-legends is set, or at levels oper/admin for the CLI" }, + [ST_I_PX_MODE] = { .name = "mode", .desc = "'mode' setting (tcp/http/health/cli)" }, + [ST_I_PX_ALGO] = { .name = "algo", .desc = "Backend's load balancing algorithm, shown only if show-legends is set, or at levels oper/admin for the CLI" }, + [ST_I_PX_CONN_RATE] = ME_NEW_FE("conn_rate", FN_RATE, FF_U32, conn_per_sec, STATS_PX_CAP__F__, "Number of new connections accepted over the last second on the frontend for this worker process"), + [ST_I_PX_CONN_RATE_MAX] = { .name = "conn_rate_max", .desc = "Highest value of connections per second observed since the worker process started" }, + [ST_I_PX_CONN_TOT] = ME_NEW_FE("conn_tot", FN_COUNTER, FF_U64, cum_conn, STATS_PX_CAP_LF__, "Total number of new connections accepted on this frontend since the worker process started"), + [ST_I_PX_INTERCEPTED] = ME_NEW_FE("intercepted", FN_COUNTER, FF_U64, intercepted_req, STATS_PX_CAP__F__, "Total number of HTTP requests intercepted on the frontend (redirects/stats/services) since the worker process started"), + [ST_I_PX_DCON] = ME_NEW_FE("dcon", FN_COUNTER, FF_U64, denied_conn, STATS_PX_CAP_LF__, "Total number of incoming connections blocked on a listener/frontend by a tcp-request connection rule since the worker process started"), + [ST_I_PX_DSES] = ME_NEW_FE("dses", FN_COUNTER, FF_U64, denied_sess, STATS_PX_CAP_LF__, "Total number of incoming sessions blocked on a listener/frontend by a tcp-request connection rule since the worker process started"), + [ST_I_PX_WREW] = ME_NEW_PX("wrew", FN_COUNTER, FF_U64, failed_rewrites, STATS_PX_CAP_LFBS, "Total number of failed HTTP header rewrites since the worker process started"), + [ST_I_PX_CONNECT] = ME_NEW_BE("connect", FN_COUNTER, FF_U64, connect, STATS_PX_CAP___BS, "Total number of outgoing connection attempts on this backend/server since the worker process started"), + [ST_I_PX_REUSE] = ME_NEW_BE("reuse", FN_COUNTER, FF_U64, reuse, STATS_PX_CAP___BS, "Total number of reused connection on this backend/server since the worker process started"), + [ST_I_PX_CACHE_LOOKUPS] = ME_NEW_PX("cache_lookups", FN_COUNTER, FF_U64, p.http.cache_lookups, STATS_PX_CAP__FB_, "Total number of HTTP requests looked up in the cache on this frontend/backend since the worker process started"), + [ST_I_PX_CACHE_HITS] = ME_NEW_PX("cache_hits", FN_COUNTER, FF_U64, p.http.cache_hits, STATS_PX_CAP__FB_, "Total number of HTTP requests not found in the cache on this frontend/backend since the worker process started"), + [ST_I_PX_SRV_ICUR] = { .name = "srv_icur", .desc = "Current number of idle connections available for reuse on this server" }, + [ST_I_PX_SRV_ILIM] = { .name = "src_ilim", .desc = "Limit on the number of available idle connections on this server (server 'pool_max_conn' directive)" }, + [ST_I_PX_QT_MAX] = { .name = "qtime_max", .desc = "Maximum observed time spent in the queue, in milliseconds (backend/server)" }, + [ST_I_PX_CT_MAX] = { .name = "ctime_max", .desc = "Maximum observed time spent waiting for a connection to complete, in milliseconds (backend/server)" }, + [ST_I_PX_RT_MAX] = { .name = "rtime_max", .desc = "Maximum observed time spent waiting for a server response, in milliseconds (backend/server)" }, + [ST_I_PX_TT_MAX] = { .name = "ttime_max", .desc = "Maximum observed total request+response time (request+queue+connect+response+processing), in milliseconds (backend/server)" }, + [ST_I_PX_EINT] = ME_NEW_PX("eint", FN_COUNTER, FF_U64, internal_errors, STATS_PX_CAP_LFBS, "Total number of internal errors since process started"), + [ST_I_PX_IDLE_CONN_CUR] = { .name = "idle_conn_cur", .desc = "Current number of unsafe idle connections"}, + [ST_I_PX_SAFE_CONN_CUR] = { .name = "safe_conn_cur", .desc = "Current number of safe idle connections"}, + [ST_I_PX_USED_CONN_CUR] = { .name = "used_conn_cur", .desc = "Current number of connections in use"}, + [ST_I_PX_NEED_CONN_EST] = { .name = "need_conn_est", .desc = "Estimated needed number of connections"}, + [ST_I_PX_UWEIGHT] = { .name = "uweight", .desc = "Server's user weight, or sum of active servers' user weights for a backend" }, + [ST_I_PX_AGG_SRV_CHECK_STATUS] = { .name = "agg_server_check_status", .desc = "[DEPRECATED] Backend's aggregated gauge of servers' status" }, + [ST_I_PX_AGG_SRV_STATUS ] = { .name = "agg_server_status", .desc = "Backend's aggregated gauge of servers' status" }, + [ST_I_PX_AGG_CHECK_STATUS] = { .name = "agg_check_status", .desc = "Backend's aggregated gauge of servers' state check status" }, + [ST_I_PX_SRID] = { .name = "srid", .desc = "Server id revision, to prevent server id reuse mixups" }, + [ST_I_PX_SESS_OTHER] = { .name = "sess_other", .desc = "Total number of sessions other than HTTP since process started" }, + [ST_I_PX_H1SESS] = ME_NEW_FE("h1sess", FN_COUNTER, FF_U64, cum_sess_ver[0], STATS_PX_CAP__F__, "Total number of HTTP/1 sessions since process started"), + [ST_I_PX_H2SESS] = ME_NEW_FE("h2sess", FN_COUNTER, FF_U64, cum_sess_ver[1], STATS_PX_CAP__F__, "Total number of HTTP/2 sessions since process started"), + [ST_I_PX_H3SESS] = ME_NEW_FE("h3sess", FN_COUNTER, FF_U64, cum_sess_ver[2], STATS_PX_CAP__F__, "Total number of HTTP/3 sessions since process started"), + [ST_I_PX_REQ_OTHER] = ME_NEW_FE("req_other", FN_COUNTER, FF_U64, p.http.cum_req[0], STATS_PX_CAP__F__, "Total number of sessions other than HTTP processed by this object since the worker process started"), + [ST_I_PX_H1REQ] = ME_NEW_FE("h1req", FN_COUNTER, FF_U64, p.http.cum_req[1], STATS_PX_CAP__F__, "Total number of HTTP/1 sessions processed by this object since the worker process started"), + [ST_I_PX_H2REQ] = ME_NEW_FE("h2req", FN_COUNTER, FF_U64, p.http.cum_req[2], STATS_PX_CAP__F__, "Total number of hTTP/2 sessions processed by this object since the worker process started"), + [ST_I_PX_H3REQ] = ME_NEW_FE("h3req", FN_COUNTER, FF_U64, p.http.cum_req[3], STATS_PX_CAP__F__, "Total number of HTTP/3 sessions processed by this object since the worker process started"), + [ST_I_PX_PROTO] = { .name = "proto", .desc = "Protocol" }, +}; + +/* Returns true if column at <idx> should be hidden. + * This may depends on various <objt> internal status. + */ +static int stcol_hide(enum stat_idx_px idx, enum obj_type *objt) +{ + struct proxy *px; + struct server *srv = NULL, *ref; + struct listener *li = NULL; + + switch (obj_type(objt)) { + case OBJ_TYPE_PROXY: + px = __objt_proxy(objt); + break; + case OBJ_TYPE_SERVER: + srv = __objt_server(objt); + px = srv->proxy; + break; + case OBJ_TYPE_LISTENER: + li = __objt_listener(objt); + px = li->bind_conf->frontend; + break; + default: + ABORT_NOW(); + return 0; + } + + switch (idx) { + case ST_I_PX_HRSP_1XX: + case ST_I_PX_HRSP_2XX: + case ST_I_PX_HRSP_3XX: + case ST_I_PX_HRSP_4XX: + case ST_I_PX_HRSP_5XX: + case ST_I_PX_REQ_TOT: + case ST_I_PX_INTERCEPTED: + case ST_I_PX_CACHE_LOOKUPS: + case ST_I_PX_CACHE_HITS: + return px->mode != PR_MODE_HTTP; + + case ST_I_PX_CHKFAIL: + case ST_I_PX_CHKDOWN: + return srv && !(srv->check.state & CHK_ST_ENABLED); + + case ST_I_PX_HANAFAIL: + BUG_ON(!srv); /* HANAFAIL is only defined for server scope */ + + ref = srv->track ? srv->track : srv; + while (ref->track) + ref = ref->track; + return !ref->observe; + + case ST_I_PX_LASTSESS: + if (srv) + return !srv->counters.last_sess; + else if (px) + return !px->be_counters.last_sess; + else + return 0; + + default: + return 0; + } +} + +/* Generate if possible a metric value from <col>. <cap> must be set to one of + * STATS_PX_CAP_* values to check if the metric is available for this object + * type. <stat_file> must be set when dumping stats-file. Metric value will be + * extracted from <counters>. + * + * Returns a field metric. + */ +static struct field me_generate_field(const struct stat_col *col, + enum stat_idx_px idx, enum obj_type *objt, + const void *counters, uint8_t cap, + int stat_file) +{ + enum field_nature fn; + struct field value; + void *counter = NULL; + int wrong_side = 0; + + /* Only generic stat column must be used as input. */ + BUG_ON(!stcol_is_generic(col)); + + fn = stcol_nature(col); + + switch (cap) { + case STATS_PX_CAP_FE: + case STATS_PX_CAP_LI: + counter = (char *)counters + col->metric.offset[0]; + wrong_side = !(col->cap & (STATS_PX_CAP_FE|STATS_PX_CAP_LI)); + break; + + case STATS_PX_CAP_BE: + case STATS_PX_CAP_SRV: + counter = (char *)counters + col->metric.offset[1]; + wrong_side = !(col->cap & (STATS_PX_CAP_BE|STATS_PX_CAP_SRV)); + break; + + default: + /* invalid cap requested */ + ABORT_NOW(); + } + + /* TODO Special case needed for ST_I_PX_REQ_TOT. It is defined as a + * generic column for backend side. Extra code required to diplay it on + * frontend side as an aggregate of values splitted by HTTP version. + */ + if (idx == ST_I_PX_REQ_TOT && cap == STATS_PX_CAP_FE && !stat_file) { + struct proxy *px = __objt_proxy(objt); + const size_t nb_reqs = + sizeof(px->fe_counters.p.http.cum_req) / + sizeof(*px->fe_counters.p.http.cum_req); + uint64_t total_req = 0; + int i; + + for (i = 0; i < nb_reqs; i++) + total_req += px->fe_counters.p.http.cum_req[i]; + return mkf_u64(FN_COUNTER, total_req); + } + + if (stat_file) { + /* stats-file emits separately frontend and backend stats. + * Skip metric if not defined for any object on the cap side. + */ + if (wrong_side) + return (struct field){ .type = FF_EMPTY }; + } + else { + /* Ensure metric is defined for the current cap. */ + if (!(col->cap & cap)) + return (struct field){ .type = FF_EMPTY }; + + if (stcol_hide(idx, objt)) { + if (fn == FN_AGE) + return mkf_s32(FN_AGE, -1); + else + return (struct field){ .type = FF_EMPTY }; + } + } + + if (fn == FN_COUNTER) { + switch (stcol_format(col)) { + case FF_U64: + value = mkf_u64(FN_COUNTER, *(uint64_t *)counter); + break; + default: + /* only FF_U64 counters currently use generic metric calculation */ + ABORT_NOW(); + } + } + else if (fn == FN_RATE) { + /* freq-ctr always uses FF_U32 */ + BUG_ON(stcol_format(col) != FF_U32); + value = mkf_u32(FN_RATE, read_freq_ctr(counter)); + } + else if (fn == FN_AGE) { + unsigned long age = *(unsigned long *)counter; + if (age) + age = ns_to_sec(now_ns) - age; + + switch (stcol_format(col)) { + case FF_U32: + value = mkf_u32(FN_AGE, age); + break; + case FF_S32: + value = mkf_s32(FN_AGE, age); + break; + default: + /* only FF_U32/FF+S32 for age as generic stat column */ + ABORT_NOW(); + } + } + else { + /* No generic column available for other field nature. */ + ABORT_NOW(); + } + + return value; +} + +/* Fill <line> with the frontend statistics. <line> is preallocated array of + * length <len>. If <index> is != NULL, only fill this one. The length + * of the array must be at least ST_I_PX_MAX. If this length is less than + * this value, or if the selected field is not implemented for frontends, the + * function returns 0, otherwise, it returns 1. + */ +int stats_fill_fe_line(struct proxy *px, int flags, struct field *line, int len, + enum stat_idx_px *index) +{ + enum stat_idx_px i = index ? *index : 0; + + if (len < ST_I_PX_MAX) + return 0; + + for (; i < ST_I_PX_MAX; i++) { + const struct stat_col *col = &stat_cols_px[i]; + struct field field = { 0 }; + + if (stcol_is_generic(col)) { + field = me_generate_field(col, i, &px->obj_type, + &px->fe_counters, STATS_PX_CAP_FE, + flags & STAT_F_FMT_FILE); + } + else if (!(flags & STAT_F_FMT_FILE)) { + switch (i) { + case ST_I_PX_PXNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); + break; + case ST_I_PX_SVNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, "FRONTEND"); + break; + case ST_I_PX_MODE: + field = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); + break; + case ST_I_PX_SCUR: + field = mkf_u32(0, px->feconn); + break; + case ST_I_PX_SMAX: + field = mkf_u32(FN_MAX, px->fe_counters.conn_max); + break; + case ST_I_PX_SLIM: + field = mkf_u32(FO_CONFIG|FN_LIMIT, px->maxconn); + break; + case ST_I_PX_STATUS: { + const char *state; + + if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) + state = "STOP"; + else if (px->flags & PR_FL_PAUSED) + state = "PAUSED"; + else + state = "OPEN"; + field = mkf_str(FO_STATUS, state); + break; + } + case ST_I_PX_PID: + field = mkf_u32(FO_KEY, 1); + break; + case ST_I_PX_IID: + field = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); + break; + case ST_I_PX_SID: + field = mkf_u32(FO_KEY|FS_SERVICE, 0); + break; + case ST_I_PX_TYPE: + field = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_FE); + break; + case ST_I_PX_RATE_LIM: + field = mkf_u32(FO_CONFIG|FN_LIMIT, px->fe_sps_lim); + break; + case ST_I_PX_RATE_MAX: + field = mkf_u32(FN_MAX, px->fe_counters.sps_max); + break; + case ST_I_PX_REQ_RATE_MAX: + field = mkf_u32(FN_MAX, px->fe_counters.p.http.rps_max); + break; + case ST_I_PX_CONN_RATE_MAX: + field = mkf_u32(FN_MAX, px->fe_counters.cps_max); + break; + case ST_I_PX_SESS_OTHER: { + int i; + uint64_t total_sess; + size_t nb_sess = + sizeof(px->fe_counters.cum_sess_ver) / sizeof(*px->fe_counters.cum_sess_ver); + + total_sess = px->fe_counters.cum_sess; + for (i = 0; i < nb_sess; i++) + total_sess -= px->fe_counters.cum_sess_ver[i]; + total_sess = (int64_t)total_sess < 0 ? 0 : total_sess; + field = mkf_u64(FN_COUNTER, total_sess); + break; + } + default: + /* not used for frontends. If a specific field + * is requested, return an error. Otherwise continue. + */ + if (index) + return 0; + continue; + } + } + line[i] = field; + if (index) + break; + } + return 1; +} + +/* Dumps a frontend's line to chunk ctx buffer for the current proxy <px> and + * uses the state from stream connector <sc>. The caller is responsible for + * clearing chunk ctx buffer if needed. Returns non-zero if it emits anything, + * zero otherwise. + */ +static int stats_dump_fe_line(struct stconn *sc, struct proxy *px) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct field *line = stat_lines[STATS_DOMAIN_PROXY]; + struct stats_module *mod; + size_t stats_count = ST_I_PX_MAX; + + if (!(px->cap & PR_CAP_FE)) + return 0; + + if ((ctx->flags & STAT_F_BOUND) && !(ctx->type & (1 << STATS_TYPE_FE))) + return 0; + + memset(line, 0, sizeof(struct field) * stat_cols_len[STATS_DOMAIN_PROXY]); + + if (!stats_fill_fe_line(px, ctx->flags, line, ST_I_PX_MAX, NULL)) + return 0; + + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + void *counters; + + if (ctx->flags & STAT_F_FMT_FILE) + continue; + + if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_FE)) { + stats_count += mod->stats_count; + continue; + } + + counters = EXTRA_COUNTERS_GET(px->extra_counters_fe, mod); + if (!mod->fill_stats(counters, line + stats_count, NULL)) + continue; + stats_count += mod->stats_count; + } + + return stats_dump_one_line(line, stats_count, appctx); +} + +/* Fill <line> with the listener statistics. <line> is preallocated array of + * length <len>. The length of the array must be at least ST_I_PX_MAX. If + * this length is less then this value, the function returns 0, otherwise, it + * returns 1. If selected_field is != NULL, only fill this one. <flags> can + * take the value STAT_F_SHLGNDS. + */ +int stats_fill_li_line(struct proxy *px, struct listener *l, int flags, + struct field *line, int len, enum stat_idx_px *selected_field) +{ + enum stat_idx_px i = (selected_field != NULL ? *selected_field : 0); + struct buffer *out = get_trash_chunk(); + + if (len < ST_I_PX_MAX) + return 0; + + if (!l->counters) + return 0; + + chunk_reset(out); + + for (; i < ST_I_PX_MAX; i++) { + const struct stat_col *col = &stat_cols_px[i]; + struct field field = { 0 }; + + if (stcol_is_generic(col)) { + field = me_generate_field(col, i, &l->obj_type, + l->counters, STATS_PX_CAP_LI, + flags & STAT_F_FMT_FILE); + } + else if (!(flags & STAT_F_FMT_FILE)) { + switch (i) { + case ST_I_PX_PXNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); + break; + case ST_I_PX_SVNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, l->name); + break; + case ST_I_PX_MODE: + field = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); + break; + case ST_I_PX_SCUR: + field = mkf_u32(0, l->nbconn); + break; + case ST_I_PX_SMAX: + field = mkf_u32(FN_MAX, l->counters->conn_max); + break; + case ST_I_PX_SLIM: + field = mkf_u32(FO_CONFIG|FN_LIMIT, l->bind_conf->maxconn); + break; + case ST_I_PX_STATUS: + field = mkf_str(FO_STATUS, li_status_st[get_li_status(l)]); + break; + case ST_I_PX_PID: + field = mkf_u32(FO_KEY, 1); + break; + case ST_I_PX_IID: + field = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); + break; + case ST_I_PX_SID: + field = mkf_u32(FO_KEY|FS_SERVICE, l->luid); + break; + case ST_I_PX_TYPE: + field = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_SO); + break; + case ST_I_PX_ADDR: + if (flags & STAT_F_SHLGNDS) { + char str[INET6_ADDRSTRLEN]; + int port; + + port = get_host_port(&l->rx.addr); + switch (addr_to_str(&l->rx.addr, str, sizeof(str))) { + case AF_INET: + field = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); + chunk_appendf(out, "%s:%d", str, port); + break; + case AF_INET6: + field = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); + chunk_appendf(out, "[%s]:%d", str, port); + break; + case AF_UNIX: + field = mkf_str(FO_CONFIG|FS_SERVICE, "unix"); + break; + case -1: + field = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); + chunk_strcat(out, strerror(errno)); + break; + default: /* address family not supported */ + break; + } + } + break; + case ST_I_PX_PROTO: + field = mkf_str(FO_STATUS, l->rx.proto->name); + break; + default: + /* not used for listen. If a specific field + * is requested, return an error. Otherwise continue. + */ + if (selected_field != NULL) + return 0; + continue; + } + } + line[i] = field; + if (selected_field != NULL) + break; + } + return 1; +} + +/* Dumps a line for listener <l> and proxy <px> to chunk ctx buffer and uses + * the state from stream connector <sc>. The caller is responsible for clearing + * chunk ctx buffer if needed. Returns non-zero if it emits anything, zero + * otherwise. + */ +static int stats_dump_li_line(struct stconn *sc, struct proxy *px, struct listener *l) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct field *line = stat_lines[STATS_DOMAIN_PROXY]; + struct stats_module *mod; + size_t stats_count = ST_I_PX_MAX; + + memset(line, 0, sizeof(struct field) * stat_cols_len[STATS_DOMAIN_PROXY]); + + if (!stats_fill_li_line(px, l, ctx->flags, line, + ST_I_PX_MAX, NULL)) + return 0; + + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + void *counters; + + if (ctx->flags & STAT_F_FMT_FILE) + continue; + + if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_LI)) { + stats_count += mod->stats_count; + continue; + } + + counters = EXTRA_COUNTERS_GET(l->extra_counters, mod); + if (!mod->fill_stats(counters, line + stats_count, NULL)) + continue; + stats_count += mod->stats_count; + } + + return stats_dump_one_line(line, stats_count, appctx); +} + +enum srv_stats_state { + SRV_STATS_STATE_DOWN = 0, + SRV_STATS_STATE_DOWN_AGENT, + SRV_STATS_STATE_GOING_UP, + SRV_STATS_STATE_UP_GOING_DOWN, + SRV_STATS_STATE_UP, + SRV_STATS_STATE_NOLB_GOING_DOWN, + SRV_STATS_STATE_NOLB, + SRV_STATS_STATE_DRAIN_GOING_DOWN, + SRV_STATS_STATE_DRAIN, + SRV_STATS_STATE_DRAIN_AGENT, + SRV_STATS_STATE_NO_CHECK, + + SRV_STATS_STATE_COUNT, /* Must be last */ +}; + +static const char *srv_hlt_st[SRV_STATS_STATE_COUNT] = { + [SRV_STATS_STATE_DOWN] = "DOWN", + [SRV_STATS_STATE_DOWN_AGENT] = "DOWN (agent)", + [SRV_STATS_STATE_GOING_UP] = "DOWN %d/%d", + [SRV_STATS_STATE_UP_GOING_DOWN] = "UP %d/%d", + [SRV_STATS_STATE_UP] = "UP", + [SRV_STATS_STATE_NOLB_GOING_DOWN] = "NOLB %d/%d", + [SRV_STATS_STATE_NOLB] = "NOLB", + [SRV_STATS_STATE_DRAIN_GOING_DOWN] = "DRAIN %d/%d", + [SRV_STATS_STATE_DRAIN] = "DRAIN", + [SRV_STATS_STATE_DRAIN_AGENT] = "DRAIN (agent)", + [SRV_STATS_STATE_NO_CHECK] = "no check" +}; + +/* Compute server state helper + */ +static void stats_fill_sv_computestate(struct server *sv, struct server *ref, + enum srv_stats_state *state) +{ + if (sv->cur_state == SRV_ST_RUNNING || sv->cur_state == SRV_ST_STARTING) { + if ((ref->check.state & CHK_ST_ENABLED) && + (ref->check.health < ref->check.rise + ref->check.fall - 1)) { + *state = SRV_STATS_STATE_UP_GOING_DOWN; + } else { + *state = SRV_STATS_STATE_UP; + } + + if (sv->cur_admin & SRV_ADMF_DRAIN) { + if (ref->agent.state & CHK_ST_ENABLED) + *state = SRV_STATS_STATE_DRAIN_AGENT; + else if (*state == SRV_STATS_STATE_UP_GOING_DOWN) + *state = SRV_STATS_STATE_DRAIN_GOING_DOWN; + else + *state = SRV_STATS_STATE_DRAIN; + } + + if (*state == SRV_STATS_STATE_UP && !(ref->check.state & CHK_ST_ENABLED)) { + *state = SRV_STATS_STATE_NO_CHECK; + } + } + else if (sv->cur_state == SRV_ST_STOPPING) { + if ((!(sv->check.state & CHK_ST_ENABLED) && !sv->track) || + (ref->check.health == ref->check.rise + ref->check.fall - 1)) { + *state = SRV_STATS_STATE_NOLB; + } else { + *state = SRV_STATS_STATE_NOLB_GOING_DOWN; + } + } + else { /* stopped */ + if ((ref->agent.state & CHK_ST_ENABLED) && !ref->agent.health) { + *state = SRV_STATS_STATE_DOWN_AGENT; + } else if ((ref->check.state & CHK_ST_ENABLED) && !ref->check.health) { + *state = SRV_STATS_STATE_DOWN; /* DOWN */ + } else if ((ref->agent.state & CHK_ST_ENABLED) || (ref->check.state & CHK_ST_ENABLED)) { + *state = SRV_STATS_STATE_GOING_UP; + } else { + *state = SRV_STATS_STATE_DOWN; /* DOWN, unchecked */ + } + } +} + +/* Fill <line> with the backend statistics. <line> is preallocated array of + * length <len>. If <selected_field> is != NULL, only fill this one. The length + * of the array must be at least ST_I_PX_MAX. If this length is less than + * this value, or if the selected field is not implemented for servers, the + * function returns 0, otherwise, it returns 1. <flags> can take the value + * STAT_F_SHLGNDS. + */ +int stats_fill_sv_line(struct proxy *px, struct server *sv, int flags, + struct field *line, int len, + enum stat_idx_px *index) +{ + enum stat_idx_px i = index ? *index : 0; + struct server *via = sv->track ? sv->track : sv; + struct server *ref = via; + enum srv_stats_state state = 0; + char str[INET6_ADDRSTRLEN]; + struct buffer *out = get_trash_chunk(); + char *fld_status; + long long srv_samples_counter; + unsigned int srv_samples_window = TIME_STATS_SAMPLES; + + if (len < ST_I_PX_MAX) + return 0; + + chunk_reset(out); + + /* compute state for later use */ + if (!index || *index == ST_I_PX_STATUS || + *index == ST_I_PX_CHECK_RISE || *index == ST_I_PX_CHECK_FALL || + *index == ST_I_PX_CHECK_HEALTH || *index == ST_I_PX_HANAFAIL) { + /* we have "via" which is the tracked server as described in the configuration, + * and "ref" which is the checked server and the end of the chain. + */ + while (ref->track) + ref = ref->track; + stats_fill_sv_computestate(sv, ref, &state); + } + + /* compue time values for later use */ + if (index == NULL || *index == ST_I_PX_QTIME || + *index == ST_I_PX_CTIME || *index == ST_I_PX_RTIME || + *index == ST_I_PX_TTIME) { + srv_samples_counter = (px->mode == PR_MODE_HTTP) ? sv->counters.p.http.cum_req : sv->counters.cum_lbconn; + if (srv_samples_counter < TIME_STATS_SAMPLES && srv_samples_counter > 0) + srv_samples_window = srv_samples_counter; + } + + for (; i < ST_I_PX_MAX; i++) { + const struct stat_col *col = &stat_cols_px[i]; + struct field field = { 0 }; + + if (stcol_is_generic(col)) { + field = me_generate_field(col, i, &sv->obj_type, + &sv->counters, STATS_PX_CAP_SRV, + flags & STAT_F_FMT_FILE); + } + else if (!(flags & STAT_F_FMT_FILE)) { + switch (i) { + case ST_I_PX_PXNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); + break; + case ST_I_PX_SVNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, sv->id); + break; + case ST_I_PX_MODE: + field = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); + break; + case ST_I_PX_QCUR: + field = mkf_u32(0, sv->queue.length); + break; + case ST_I_PX_QMAX: + field = mkf_u32(FN_MAX, sv->counters.nbpend_max); + break; + case ST_I_PX_SCUR: + field = mkf_u32(0, sv->cur_sess); + break; + case ST_I_PX_SMAX: + field = mkf_u32(FN_MAX, sv->counters.cur_sess_max); + break; + case ST_I_PX_SLIM: + if (sv->maxconn) + field = mkf_u32(FO_CONFIG|FN_LIMIT, sv->maxconn); + break; + case ST_I_PX_SRV_ICUR: + field = mkf_u32(0, sv->curr_idle_conns); + break; + case ST_I_PX_SRV_ILIM: + if (sv->max_idle_conns != -1) + field = mkf_u32(FO_CONFIG|FN_LIMIT, sv->max_idle_conns); + break; + case ST_I_PX_IDLE_CONN_CUR: + field = mkf_u32(0, sv->curr_idle_nb); + break; + case ST_I_PX_SAFE_CONN_CUR: + field = mkf_u32(0, sv->curr_safe_nb); + break; + case ST_I_PX_USED_CONN_CUR: + field = mkf_u32(0, sv->curr_used_conns); + break; + case ST_I_PX_NEED_CONN_EST: + field = mkf_u32(0, sv->est_need_conns); + break; + case ST_I_PX_STATUS: + fld_status = chunk_newstr(out); + if (sv->cur_admin & SRV_ADMF_RMAINT) + chunk_appendf(out, "MAINT (resolution)"); + else if (sv->cur_admin & SRV_ADMF_IMAINT) + chunk_appendf(out, "MAINT (via %s/%s)", via->proxy->id, via->id); + else if (sv->cur_admin & SRV_ADMF_MAINT) + chunk_appendf(out, "MAINT"); + else + chunk_appendf(out, + srv_hlt_st[state], + (ref->cur_state != SRV_ST_STOPPED) ? (ref->check.health - ref->check.rise + 1) : (ref->check.health), + (ref->cur_state != SRV_ST_STOPPED) ? (ref->check.fall) : (ref->check.rise)); + + field = mkf_str(FO_STATUS, fld_status); + break; + case ST_I_PX_WEIGHT: + field = mkf_u32(FN_AVG, (sv->cur_eweight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv); + break; + case ST_I_PX_UWEIGHT: + field = mkf_u32(FN_AVG, sv->uweight); + break; + case ST_I_PX_ACT: + field = mkf_u32(FO_STATUS, (sv->flags & SRV_F_BACKUP) ? 0 : 1); + break; + case ST_I_PX_BCK: + field = mkf_u32(FO_STATUS, (sv->flags & SRV_F_BACKUP) ? 1 : 0); + break; + case ST_I_PX_DOWNTIME: + if (sv->check.state & CHK_ST_ENABLED) + field = mkf_u32(FN_COUNTER, srv_downtime(sv)); + break; + case ST_I_PX_QLIMIT: + if (sv->maxqueue) + field = mkf_u32(FO_CONFIG|FS_SERVICE, sv->maxqueue); + break; + case ST_I_PX_PID: + field = mkf_u32(FO_KEY, 1); + break; + case ST_I_PX_IID: + field = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); + break; + case ST_I_PX_SID: + field = mkf_u32(FO_KEY|FS_SERVICE, sv->puid); + break; + case ST_I_PX_SRID: + field = mkf_u32(FN_COUNTER, sv->rid); + break; + case ST_I_PX_THROTTLE: + if (sv->cur_state == SRV_ST_STARTING && !server_is_draining(sv)) + field = mkf_u32(FN_AVG, server_throttle_rate(sv)); + break; + case ST_I_PX_TRACKED: + if (sv->track) { + char *fld_track = chunk_newstr(out); + chunk_appendf(out, "%s/%s", sv->track->proxy->id, sv->track->id); + field = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, fld_track); + } + break; + case ST_I_PX_TYPE: + field = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_SV); + break; + case ST_I_PX_RATE_MAX: + field = mkf_u32(FN_MAX, sv->counters.sps_max); + break; + case ST_I_PX_CHECK_STATUS: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) { + const char *fld_chksts; + + fld_chksts = chunk_newstr(out); + chunk_strcat(out, "* "); // for check in progress + chunk_strcat(out, get_check_status_info(sv->check.status)); + if (!(sv->check.state & CHK_ST_INPROGRESS)) + fld_chksts += 2; // skip "* " + field = mkf_str(FN_OUTPUT, fld_chksts); + } + break; + case ST_I_PX_CHECK_CODE: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED && + sv->check.status >= HCHK_STATUS_L57DATA) + field = mkf_u32(FN_OUTPUT, sv->check.code); + break; + case ST_I_PX_CHECK_DURATION: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED && + sv->check.status >= HCHK_STATUS_CHECKED) + field = mkf_u64(FN_DURATION, MAX(sv->check.duration, 0)); + break; + case ST_I_PX_CHECK_DESC: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_str(FN_OUTPUT, get_check_status_description(sv->check.status)); + break; + case ST_I_PX_LAST_CHK: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_str(FN_OUTPUT, sv->check.desc); + break; + case ST_I_PX_CHECK_RISE: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.rise); + break; + case ST_I_PX_CHECK_FALL: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.fall); + break; + case ST_I_PX_CHECK_HEALTH: + if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.health); + break; + case ST_I_PX_AGENT_STATUS: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) { + const char *fld_chksts; + + fld_chksts = chunk_newstr(out); + chunk_strcat(out, "* "); // for check in progress + chunk_strcat(out, get_check_status_info(sv->agent.status)); + if (!(sv->agent.state & CHK_ST_INPROGRESS)) + fld_chksts += 2; // skip "* " + field = mkf_str(FN_OUTPUT, fld_chksts); + } + break; + case ST_I_PX_AGENT_CODE: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED && + (sv->agent.status >= HCHK_STATUS_L57DATA)) + field = mkf_u32(FN_OUTPUT, sv->agent.code); + break; + case ST_I_PX_AGENT_DURATION: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_u64(FN_DURATION, sv->agent.duration); + break; + case ST_I_PX_AGENT_DESC: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_str(FN_OUTPUT, get_check_status_description(sv->agent.status)); + break; + case ST_I_PX_LAST_AGT: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_str(FN_OUTPUT, sv->agent.desc); + break; + case ST_I_PX_AGENT_RISE: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.rise); + break; + case ST_I_PX_AGENT_FALL: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.fall); + break; + case ST_I_PX_AGENT_HEALTH: + if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) + field = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.health); + break; + case ST_I_PX_QTIME: + field = mkf_u32(FN_AVG, swrate_avg(sv->counters.q_time, srv_samples_window)); + break; + case ST_I_PX_CTIME: + field = mkf_u32(FN_AVG, swrate_avg(sv->counters.c_time, srv_samples_window)); + break; + case ST_I_PX_RTIME: + field = mkf_u32(FN_AVG, swrate_avg(sv->counters.d_time, srv_samples_window)); + break; + case ST_I_PX_TTIME: + field = mkf_u32(FN_AVG, swrate_avg(sv->counters.t_time, srv_samples_window)); + break; + case ST_I_PX_QT_MAX: + field = mkf_u32(FN_MAX, sv->counters.qtime_max); + break; + case ST_I_PX_CT_MAX: + field = mkf_u32(FN_MAX, sv->counters.ctime_max); + break; + case ST_I_PX_RT_MAX: + field = mkf_u32(FN_MAX, sv->counters.dtime_max); + break; + case ST_I_PX_TT_MAX: + field = mkf_u32(FN_MAX, sv->counters.ttime_max); + break; + case ST_I_PX_ADDR: + if (flags & STAT_F_SHLGNDS) { + switch (addr_to_str(&sv->addr, str, sizeof(str))) { + case AF_INET: + field = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); + chunk_appendf(out, "%s:%d", str, sv->svc_port); + break; + case AF_INET6: + field = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); + chunk_appendf(out, "[%s]:%d", str, sv->svc_port); + break; + case AF_UNIX: + field = mkf_str(FO_CONFIG|FS_SERVICE, "unix"); + break; + case -1: + field = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); + chunk_strcat(out, strerror(errno)); + break; + default: /* address family not supported */ + break; + } + } + break; + case ST_I_PX_COOKIE: + if (flags & STAT_F_SHLGNDS && sv->cookie) + field = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, sv->cookie); + break; + default: + /* not used for servers. If a specific field + * is requested, return an error. Otherwise continue. + */ + if (index) + return 0; + continue; + } + } + line[i] = field; + if (index) + break; + } + return 1; +} + +/* Dumps a line for server <sv> and proxy <px> to chunk ctx buffer and uses the + * state from stream connector <sc>, and server state <state>. The caller is + * responsible for clearing the chunk ctx buffer if needed. Returns non-zero if + * it emits anything, zero otherwise. + */ +static int stats_dump_sv_line(struct stconn *sc, struct proxy *px, struct server *sv) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct stats_module *mod; + struct field *line = stat_lines[STATS_DOMAIN_PROXY]; + size_t stats_count = ST_I_PX_MAX; + + memset(line, 0, sizeof(struct field) * stat_cols_len[STATS_DOMAIN_PROXY]); + + if (!stats_fill_sv_line(px, sv, ctx->flags, line, + ST_I_PX_MAX, NULL)) + return 0; + + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + void *counters; + + if (ctx->flags & STAT_F_FMT_FILE) + continue; + + if (stats_get_domain(mod->domain_flags) != STATS_DOMAIN_PROXY) + continue; + + if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_SRV)) { + stats_count += mod->stats_count; + continue; + } + + counters = EXTRA_COUNTERS_GET(sv->extra_counters, mod); + if (!mod->fill_stats(counters, line + stats_count, NULL)) + continue; + stats_count += mod->stats_count; + } + + return stats_dump_one_line(line, stats_count, appctx); +} + +/* Helper to compute srv values for a given backend + */ +static void stats_fill_be_computesrv(struct proxy *px, int *nbup, int *nbsrv, int *totuw) +{ + int nbup_tmp, nbsrv_tmp, totuw_tmp; + const struct server *srv; + + nbup_tmp = nbsrv_tmp = totuw_tmp = 0; + for (srv = px->srv; srv; srv = srv->next) { + if (srv->cur_state != SRV_ST_STOPPED) { + nbup_tmp++; + if (srv_currently_usable(srv) && + (!px->srv_act ^ !(srv->flags & SRV_F_BACKUP))) + totuw_tmp += srv->uweight; + } + nbsrv_tmp++; + } + + HA_RWLOCK_RDLOCK(LBPRM_LOCK, &px->lbprm.lock); + if (!px->srv_act && px->lbprm.fbck) + totuw_tmp = px->lbprm.fbck->uweight; + HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &px->lbprm.lock); + + /* use tmp variable then assign result to make gcc happy */ + *nbup = nbup_tmp; + *nbsrv = nbsrv_tmp; + *totuw = totuw_tmp; +} + +/* Fill <line> with the backend statistics. <line> is preallocated array of + * length <len>. If <index> is != NULL, only fill this one. The length + * of the array must be at least ST_I_PX_MAX. If this length is less than + * this value, or if the selected field is not implemented for backends, the + * function returns 0, otherwise, it returns 1. <flags> can take the value + * STAT_F_SHLGNDS. + */ +int stats_fill_be_line(struct proxy *px, int flags, struct field *line, int len, + enum stat_idx_px *index) +{ + enum stat_idx_px i = index ? *index : 0; + long long be_samples_counter; + unsigned int be_samples_window = TIME_STATS_SAMPLES; + struct buffer *out = get_trash_chunk(); + int nbup, nbsrv, totuw; + char *fld; + + if (len < ST_I_PX_MAX) + return 0; + + nbup = nbsrv = totuw = 0; + /* some srv values compute for later if we either select all fields or + * need them for one of the mentioned ones */ + if (!index || *index == ST_I_PX_STATUS || + *index == ST_I_PX_UWEIGHT) + stats_fill_be_computesrv(px, &nbup, &nbsrv, &totuw); + + /* same here but specific to time fields */ + if (!index || *index == ST_I_PX_QTIME || + *index == ST_I_PX_CTIME || *index == ST_I_PX_RTIME || + *index == ST_I_PX_TTIME) { + be_samples_counter = (px->mode == PR_MODE_HTTP) ? px->be_counters.p.http.cum_req : px->be_counters.cum_lbconn; + if (be_samples_counter < TIME_STATS_SAMPLES && be_samples_counter > 0) + be_samples_window = be_samples_counter; + } + + for (; i < ST_I_PX_MAX; i++) { + const struct stat_col *col = &stat_cols_px[i]; + struct field field = { 0 }; + + if (stcol_is_generic(col)) { + field = me_generate_field(col, i, &px->obj_type, + &px->be_counters, STATS_PX_CAP_BE, + flags & STAT_F_FMT_FILE); + } + else if (!(flags & STAT_F_FMT_FILE)) { + switch (i) { + case ST_I_PX_PXNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); + break; + case ST_I_PX_SVNAME: + field = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, "BACKEND"); + break; + case ST_I_PX_MODE: + field = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); + break; + case ST_I_PX_QCUR: + field = mkf_u32(0, px->queue.length); + break; + case ST_I_PX_QMAX: + field = mkf_u32(FN_MAX, px->be_counters.nbpend_max); + break; + case ST_I_PX_SCUR: + field = mkf_u32(0, px->beconn); + break; + case ST_I_PX_SMAX: + field = mkf_u32(FN_MAX, px->be_counters.conn_max); + break; + case ST_I_PX_SLIM: + field = mkf_u32(FO_CONFIG|FN_LIMIT, px->fullconn); + break; + case ST_I_PX_STATUS: + fld = chunk_newstr(out); + chunk_appendf(out, "%s", (px->lbprm.tot_weight > 0 || !px->srv) ? "UP" : "DOWN"); + if (flags & (STAT_F_HIDE_MAINT|STAT_F_HIDE_DOWN)) + chunk_appendf(out, " (%d/%d)", nbup, nbsrv); + field = mkf_str(FO_STATUS, fld); + break; + case ST_I_PX_AGG_SRV_CHECK_STATUS: // DEPRECATED + case ST_I_PX_AGG_SRV_STATUS: + field = mkf_u32(FN_GAUGE, 0); + break; + case ST_I_PX_AGG_CHECK_STATUS: + field = mkf_u32(FN_GAUGE, 0); + break; + case ST_I_PX_WEIGHT: + field = mkf_u32(FN_AVG, (px->lbprm.tot_weight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv); + break; + case ST_I_PX_UWEIGHT: + field = mkf_u32(FN_AVG, totuw); + break; + case ST_I_PX_ACT: + field = mkf_u32(0, px->srv_act); + break; + case ST_I_PX_BCK: + field = mkf_u32(0, px->srv_bck); + break; + case ST_I_PX_DOWNTIME: + if (px->srv) + field = mkf_u32(FN_COUNTER, be_downtime(px)); + break; + case ST_I_PX_PID: + field = mkf_u32(FO_KEY, 1); + break; + case ST_I_PX_IID: + field = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); + break; + case ST_I_PX_SID: + field = mkf_u32(FO_KEY|FS_SERVICE, 0); + break; + case ST_I_PX_TYPE: + field = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_BE); + break; + case ST_I_PX_RATE_MAX: + field = mkf_u32(0, px->be_counters.sps_max); + break; + case ST_I_PX_COOKIE: + if (flags & STAT_F_SHLGNDS && px->cookie_name) + field = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, px->cookie_name); + break; + case ST_I_PX_ALGO: + if (flags & STAT_F_SHLGNDS) + field = mkf_str(FO_CONFIG|FS_SERVICE, backend_lb_algo_str(px->lbprm.algo & BE_LB_ALGO)); + break; + case ST_I_PX_QTIME: + field = mkf_u32(FN_AVG, swrate_avg(px->be_counters.q_time, be_samples_window)); + break; + case ST_I_PX_CTIME: + field = mkf_u32(FN_AVG, swrate_avg(px->be_counters.c_time, be_samples_window)); + break; + case ST_I_PX_RTIME: + field = mkf_u32(FN_AVG, swrate_avg(px->be_counters.d_time, be_samples_window)); + break; + case ST_I_PX_TTIME: + field = mkf_u32(FN_AVG, swrate_avg(px->be_counters.t_time, be_samples_window)); + break; + case ST_I_PX_QT_MAX: + field = mkf_u32(FN_MAX, px->be_counters.qtime_max); + break; + case ST_I_PX_CT_MAX: + field = mkf_u32(FN_MAX, px->be_counters.ctime_max); + break; + case ST_I_PX_RT_MAX: + field = mkf_u32(FN_MAX, px->be_counters.dtime_max); + break; + case ST_I_PX_TT_MAX: + field = mkf_u32(FN_MAX, px->be_counters.ttime_max); + break; + default: + /* not used for backends. If a specific field + * is requested, return an error. Otherwise continue. + */ + if (index) + return 0; + continue; + } + } + line[i] = field; + if (index) + break; + } + return 1; +} + +/* Dumps a line for backend <px> to chunk ctx buffer and uses the state from + * stream interface <si>. The caller is responsible for clearing chunk buffer + * if needed. Returns non-zero if it emits anything, zero otherwise. + */ +static int stats_dump_be_line(struct stconn *sc, struct proxy *px) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct field *line = stat_lines[STATS_DOMAIN_PROXY]; + struct stats_module *mod; + size_t stats_count = ST_I_PX_MAX; + + if (!(px->cap & PR_CAP_BE)) + return 0; + + if ((ctx->flags & STAT_F_BOUND) && !(ctx->type & (1 << STATS_TYPE_BE))) + return 0; + + memset(line, 0, sizeof(struct field) * stat_cols_len[STATS_DOMAIN_PROXY]); + + if (!stats_fill_be_line(px, ctx->flags, line, ST_I_PX_MAX, NULL)) + return 0; + + list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { + struct extra_counters *counters; + + if (ctx->flags & STAT_F_FMT_FILE) + continue; + + if (stats_get_domain(mod->domain_flags) != STATS_DOMAIN_PROXY) + continue; + + if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_BE)) { + stats_count += mod->stats_count; + continue; + } + + counters = EXTRA_COUNTERS_GET(px->extra_counters_be, mod); + if (!mod->fill_stats(counters, line + stats_count, NULL)) + continue; + stats_count += mod->stats_count; + } + + return stats_dump_one_line(line, stats_count, appctx); +} + +/* + * Dumps statistics for a proxy. The output is sent to the stream connector's + * input buffer. Returns 0 if it had to stop dumping data because of lack of + * buffer space, or non-zero if everything completed. This function is used + * both by the CLI and the HTTP entry points, and is able to dump the output + * in HTML or CSV formats. + */ +static int stats_dump_proxy_to_buffer(struct stconn *sc, struct buffer *buf, + struct htx *htx, struct proxy *px) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; + struct server *sv, *svs; /* server and server-state, server-state=server or server->track */ + struct listener *l; + struct uri_auth *uri = NULL; + int current_field; + int px_st = ctx->px_st; + + if (ctx->http_px) + uri = ctx->http_px->uri_auth; + chunk_reset(chk); +more: + current_field = ctx->field; + + switch (ctx->px_st) { + case STAT_PX_ST_INIT: + /* we are on a new proxy */ + if (uri && uri->scope) { + /* we have a limited scope, we have to check the proxy name */ + struct stat_scope *scope; + int len; + + len = strlen(px->id); + scope = uri->scope; + + while (scope) { + /* match exact proxy name */ + if (scope->px_len == len && !memcmp(px->id, scope->px_id, len)) + break; + + /* match '.' which means 'self' proxy */ + if (strcmp(scope->px_id, ".") == 0 && px == ctx->http_px) + break; + scope = scope->next; + } + + /* proxy name not found : don't dump anything */ + if (scope == NULL) + return 1; + } + + /* if the user has requested a limited output and the proxy + * name does not match, skip it. + */ + if (ctx->scope_len) { + const char *scope_ptr = stats_scope_ptr(appctx); + + if (strnistr(px->id, strlen(px->id), scope_ptr, ctx->scope_len) == NULL) + return 1; + } + + if ((ctx->flags & STAT_F_BOUND) && + (ctx->iid != -1) && + (px->uuid != ctx->iid)) + return 1; + + ctx->px_st = STAT_PX_ST_TH; + __fallthrough; + + case STAT_PX_ST_TH: + if (ctx->flags & STAT_F_FMT_HTML) { + stats_dump_html_px_hdr(sc, px); + if (!stats_putchk(appctx, buf, htx)) + goto full; + } + + ctx->px_st = STAT_PX_ST_FE; + __fallthrough; + + case STAT_PX_ST_FE: + /* print the frontend */ + if (stats_dump_fe_line(sc, px)) { + if (!stats_putchk(appctx, buf, htx)) + goto full; + ctx->flags |= STAT_F_STARTED; + if (ctx->field) + goto more; + } + + current_field = 0; + ctx->obj2 = px->conf.listeners.n; + ctx->px_st = STAT_PX_ST_LI; + __fallthrough; + + case STAT_PX_ST_LI: + /* obj2 points to listeners list as initialized above */ + for (; ctx->obj2 != &px->conf.listeners; ctx->obj2 = l->by_fe.n) { + if (stats_is_full(appctx, buf, htx)) + goto full; + + l = LIST_ELEM(ctx->obj2, struct listener *, by_fe); + if (!l->counters) + continue; + + if (ctx->flags & STAT_F_BOUND) { + if (!(ctx->type & (1 << STATS_TYPE_SO))) + break; + + if (ctx->sid != -1 && l->luid != ctx->sid) + continue; + } + + /* print the frontend */ + if (stats_dump_li_line(sc, px, l)) { + if (!stats_putchk(appctx, buf, htx)) + goto full; + ctx->flags |= STAT_F_STARTED; + if (ctx->field) + goto more; + } + current_field = 0; + } + + ctx->obj2 = px->srv; /* may be NULL */ + ctx->px_st = STAT_PX_ST_SV; + __fallthrough; + + case STAT_PX_ST_SV: + /* check for dump resumption */ + if (px_st == STAT_PX_ST_SV) { + struct server *cur = ctx->obj2; + + /* re-entrant dump */ + BUG_ON(!cur); + if (cur->flags & SRV_F_DELETED) { + /* the server could have been marked as deleted + * between two dumping attempts, skip it. + */ + cur = cur->next; + } + srv_drop(ctx->obj2); /* drop old srv taken on last dumping attempt */ + ctx->obj2 = cur; /* could be NULL */ + /* back to normal */ + } + + /* obj2 points to servers list as initialized above. + * + * A server may be removed during the stats dumping. + * Temporarily increment its refcount to prevent its + * anticipated cleaning. Call srv_drop() to release it. + */ + for (; ctx->obj2 != NULL; + ctx->obj2 = srv_drop(sv)) { + + sv = ctx->obj2; + srv_take(sv); + + if (stats_is_full(appctx, buf, htx)) + goto full; + + if (ctx->flags & STAT_F_BOUND) { + if (!(ctx->type & (1 << STATS_TYPE_SV))) { + srv_drop(sv); + break; + } + + if (ctx->sid != -1 && sv->puid != ctx->sid) + continue; + } + + /* do not report disabled servers */ + if (ctx->flags & STAT_F_HIDE_MAINT && + sv->cur_admin & SRV_ADMF_MAINT) { + continue; + } + + svs = sv; + while (svs->track) + svs = svs->track; + + /* do not report servers which are DOWN and not changing state */ + if ((ctx->flags & STAT_F_HIDE_DOWN) && + ((sv->cur_admin & SRV_ADMF_MAINT) || /* server is in maintenance */ + (sv->cur_state == SRV_ST_STOPPED && /* server is down */ + (!((svs->agent.state | svs->check.state) & CHK_ST_ENABLED) || + ((svs->agent.state & CHK_ST_ENABLED) && !svs->agent.health) || + ((svs->check.state & CHK_ST_ENABLED) && !svs->check.health))))) { + continue; + } + + if (stats_dump_sv_line(sc, px, sv)) { + if (!stats_putchk(appctx, buf, htx)) + goto full; + ctx->flags |= STAT_F_STARTED; + if (ctx->field) + goto more; + } + current_field = 0; + } /* for sv */ + + ctx->px_st = STAT_PX_ST_BE; + __fallthrough; + + case STAT_PX_ST_BE: + /* print the backend */ + if (stats_dump_be_line(sc, px)) { + if (!stats_putchk(appctx, buf, htx)) + goto full; + ctx->flags |= STAT_F_STARTED; + if (ctx->field) + goto more; + } + + current_field = 0; + ctx->px_st = STAT_PX_ST_END; + __fallthrough; + + case STAT_PX_ST_END: + if (ctx->flags & STAT_F_FMT_HTML) { + stats_dump_html_px_end(sc, px); + if (!stats_putchk(appctx, buf, htx)) + goto full; + } + + ctx->px_st = STAT_PX_ST_FIN; + __fallthrough; + + case STAT_PX_ST_FIN: + return 1; + + default: + /* unknown state, we should put an abort() here ! */ + return 1; + } + + full: + /* restore previous field */ + ctx->field = current_field; + return 0; +} + +/* Uses <appctx.ctx.stats.obj1> as a pointer to the current proxy and <obj2> as + * a pointer to the current server/listener. + */ +int stats_dump_proxies(struct stconn *sc, struct buffer *buf, struct htx *htx) +{ + struct appctx *appctx = __sc_appctx(sc); + struct show_stat_ctx *ctx = appctx->svcctx; + struct proxy *px; + + /* dump proxies */ + while (ctx->obj1) { + if (stats_is_full(appctx, buf, htx)) + goto full; + + px = ctx->obj1; + /* Skip the global frontend proxies and non-networked ones. + * Also skip proxies that were disabled in the configuration + * This change allows retrieving stats from "old" proxies after a reload. + */ + if (!(px->flags & PR_FL_DISABLED) && px->uuid > 0 && + (px->cap & (PR_CAP_FE | PR_CAP_BE)) && !(px->cap & PR_CAP_INT)) { + if (stats_dump_proxy_to_buffer(sc, buf, htx, px) == 0) + return 0; + } + + ctx->obj1 = px->next; + ctx->px_st = STAT_PX_ST_INIT; + ctx->field = 0; + } + + return 1; + + full: + return 0; +} + +void proxy_stats_clear_counters(int clrall, struct list *stat_modules) +{ + struct proxy *px; + struct server *sv; + struct listener *li; + struct stats_module *mod; + + for (px = proxies_list; px; px = px->next) { + if (clrall) { + memset(&px->be_counters, 0, sizeof(px->be_counters)); + memset(&px->fe_counters, 0, sizeof(px->fe_counters)); + } + else { + px->be_counters.conn_max = 0; + px->be_counters.p.http.rps_max = 0; + px->be_counters.sps_max = 0; + px->be_counters.cps_max = 0; + px->be_counters.nbpend_max = 0; + px->be_counters.qtime_max = 0; + px->be_counters.ctime_max = 0; + px->be_counters.dtime_max = 0; + px->be_counters.ttime_max = 0; + + px->fe_counters.conn_max = 0; + px->fe_counters.p.http.rps_max = 0; + px->fe_counters.sps_max = 0; + px->fe_counters.cps_max = 0; + } + + for (sv = px->srv; sv; sv = sv->next) + if (clrall) + memset(&sv->counters, 0, sizeof(sv->counters)); + else { + sv->counters.cur_sess_max = 0; + sv->counters.nbpend_max = 0; + sv->counters.sps_max = 0; + sv->counters.qtime_max = 0; + sv->counters.ctime_max = 0; + sv->counters.dtime_max = 0; + sv->counters.ttime_max = 0; + } + + list_for_each_entry(li, &px->conf.listeners, by_fe) + if (li->counters) { + if (clrall) + memset(li->counters, 0, sizeof(*li->counters)); + else + li->counters->conn_max = 0; + } + } + + list_for_each_entry(mod, stat_modules, list) { + if (!mod->clearable && !clrall) + continue; + + for (px = proxies_list; px; px = px->next) { + enum stats_domain_px_cap mod_cap = stats_px_get_cap(mod->domain_flags); + + if (px->cap & PR_CAP_FE && mod_cap & STATS_PX_CAP_FE) { + EXTRA_COUNTERS_INIT(px->extra_counters_fe, + mod, + mod->counters, + mod->counters_size); + } + + if (px->cap & PR_CAP_BE && mod_cap & STATS_PX_CAP_BE) { + EXTRA_COUNTERS_INIT(px->extra_counters_be, + mod, + mod->counters, + mod->counters_size); + } + + if (mod_cap & STATS_PX_CAP_SRV) { + for (sv = px->srv; sv; sv = sv->next) { + EXTRA_COUNTERS_INIT(sv->extra_counters, + mod, + mod->counters, + mod->counters_size); + } + } + + if (mod_cap & STATS_PX_CAP_LI) { + list_for_each_entry(li, &px->conf.listeners, by_fe) { + EXTRA_COUNTERS_INIT(li->extra_counters, + mod, + mod->counters, + mod->counters_size); + } + } + } + } +} diff --git a/src/stats.c b/src/stats.c index ac47f00..5db9c26 100644 --- a/src/stats.c +++ b/src/stats.c @@ -12,7 +12,6 @@ */ #include <ctype.h> -#include <errno.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -23,10 +22,10 @@ #include <sys/stat.h> #include <sys/types.h> +#include <import/ebsttree.h> #include <haproxy/api.h> #include <haproxy/activity.h> #include <haproxy/applet.h> -#include <haproxy/backend.h> #include <haproxy/base64.h> #include <haproxy/cfgparse.h> #include <haproxy/channel.h> @@ -36,9 +35,7 @@ #include <haproxy/compression.h> #include <haproxy/debug.h> #include <haproxy/errors.h> -#include <haproxy/fd.h> #include <haproxy/freq_ctr.h> -#include <haproxy/frontend.h> #include <haproxy/global.h> #include <haproxy/http.h> #include <haproxy/http_ana.h> @@ -57,6 +54,10 @@ #include <haproxy/server.h> #include <haproxy/session.h> #include <haproxy/stats.h> +#include <haproxy/stats-file.h> +#include <haproxy/stats-html.h> +#include <haproxy/stats-json.h> +#include <haproxy/stats-proxy.h> #include <haproxy/stconn.h> #include <haproxy/stream.h> #include <haproxy/task.h> @@ -66,6 +67,13 @@ #include <haproxy/uri_auth-t.h> #include <haproxy/version.h> +/* Convert stat_col <col> to old-style <name> as name_desc. */ +static void stcol2ndesc(struct name_desc *name, const struct stat_col *col) +{ + name->name = col->name; + name->desc = col->desc; +} + /* status codes available for the stats admin page (strictly 4 chars length) */ const char *stat_status_codes[STAT_STATUS_SIZE] = { @@ -79,267 +87,202 @@ const char *stat_status_codes[STAT_STATUS_SIZE] = { [STAT_STATUS_IVAL] = "IVAL", }; -/* These are the field names for each INF_* field position. Please pay attention +/* These are the column names for each ST_I_INF_* field position. Please pay attention * to always use the exact same name except that the strings for new names must * be lower case or CamelCase while the enum entries must be upper case. */ -const struct name_desc info_fields[INF_TOTAL_FIELDS] = { - [INF_NAME] = { .name = "Name", .desc = "Product name" }, - [INF_VERSION] = { .name = "Version", .desc = "Product version" }, - [INF_RELEASE_DATE] = { .name = "Release_date", .desc = "Date of latest source code update" }, - [INF_NBTHREAD] = { .name = "Nbthread", .desc = "Number of started threads (global.nbthread)" }, - [INF_NBPROC] = { .name = "Nbproc", .desc = "Number of started worker processes (historical, always 1)" }, - [INF_PROCESS_NUM] = { .name = "Process_num", .desc = "Relative worker process number (1)" }, - [INF_PID] = { .name = "Pid", .desc = "This worker process identifier for the system" }, - [INF_UPTIME] = { .name = "Uptime", .desc = "How long ago this worker process was started (days+hours+minutes+seconds)" }, - [INF_UPTIME_SEC] = { .name = "Uptime_sec", .desc = "How long ago this worker process was started (seconds)" }, - [INF_START_TIME_SEC] = { .name = "Start_time_sec", .desc = "Start time in seconds" }, - [INF_MEMMAX_MB] = { .name = "Memmax_MB", .desc = "Worker process's hard limit on memory usage in MB (-m on command line)" }, - [INF_MEMMAX_BYTES] = { .name = "Memmax_bytes", .desc = "Worker process's hard limit on memory usage in byes (-m on command line)" }, - [INF_POOL_ALLOC_MB] = { .name = "PoolAlloc_MB", .desc = "Amount of memory allocated in pools (in MB)" }, - [INF_POOL_ALLOC_BYTES] = { .name = "PoolAlloc_bytes", .desc = "Amount of memory allocated in pools (in bytes)" }, - [INF_POOL_USED_MB] = { .name = "PoolUsed_MB", .desc = "Amount of pool memory currently used (in MB)" }, - [INF_POOL_USED_BYTES] = { .name = "PoolUsed_bytes", .desc = "Amount of pool memory currently used (in bytes)" }, - [INF_POOL_FAILED] = { .name = "PoolFailed", .desc = "Number of failed pool allocations since this worker was started" }, - [INF_ULIMIT_N] = { .name = "Ulimit-n", .desc = "Hard limit on the number of per-process file descriptors" }, - [INF_MAXSOCK] = { .name = "Maxsock", .desc = "Hard limit on the number of per-process sockets" }, - [INF_MAXCONN] = { .name = "Maxconn", .desc = "Hard limit on the number of per-process connections (configured or imposed by Ulimit-n)" }, - [INF_HARD_MAXCONN] = { .name = "Hard_maxconn", .desc = "Hard limit on the number of per-process connections (imposed by Memmax_MB or Ulimit-n)" }, - [INF_CURR_CONN] = { .name = "CurrConns", .desc = "Current number of connections on this worker process" }, - [INF_CUM_CONN] = { .name = "CumConns", .desc = "Total number of connections on this worker process since started" }, - [INF_CUM_REQ] = { .name = "CumReq", .desc = "Total number of requests on this worker process since started" }, - [INF_MAX_SSL_CONNS] = { .name = "MaxSslConns", .desc = "Hard limit on the number of per-process SSL endpoints (front+back), 0=unlimited" }, - [INF_CURR_SSL_CONNS] = { .name = "CurrSslConns", .desc = "Current number of SSL endpoints on this worker process (front+back)" }, - [INF_CUM_SSL_CONNS] = { .name = "CumSslConns", .desc = "Total number of SSL endpoints on this worker process since started (front+back)" }, - [INF_MAXPIPES] = { .name = "Maxpipes", .desc = "Hard limit on the number of pipes for splicing, 0=unlimited" }, - [INF_PIPES_USED] = { .name = "PipesUsed", .desc = "Current number of pipes in use in this worker process" }, - [INF_PIPES_FREE] = { .name = "PipesFree", .desc = "Current number of allocated and available pipes in this worker process" }, - [INF_CONN_RATE] = { .name = "ConnRate", .desc = "Number of front connections created on this worker process over the last second" }, - [INF_CONN_RATE_LIMIT] = { .name = "ConnRateLimit", .desc = "Hard limit for ConnRate (global.maxconnrate)" }, - [INF_MAX_CONN_RATE] = { .name = "MaxConnRate", .desc = "Highest ConnRate reached on this worker process since started (in connections per second)" }, - [INF_SESS_RATE] = { .name = "SessRate", .desc = "Number of sessions created on this worker process over the last second" }, - [INF_SESS_RATE_LIMIT] = { .name = "SessRateLimit", .desc = "Hard limit for SessRate (global.maxsessrate)" }, - [INF_MAX_SESS_RATE] = { .name = "MaxSessRate", .desc = "Highest SessRate reached on this worker process since started (in sessions per second)" }, - [INF_SSL_RATE] = { .name = "SslRate", .desc = "Number of SSL connections created on this worker process over the last second" }, - [INF_SSL_RATE_LIMIT] = { .name = "SslRateLimit", .desc = "Hard limit for SslRate (global.maxsslrate)" }, - [INF_MAX_SSL_RATE] = { .name = "MaxSslRate", .desc = "Highest SslRate reached on this worker process since started (in connections per second)" }, - [INF_SSL_FRONTEND_KEY_RATE] = { .name = "SslFrontendKeyRate", .desc = "Number of SSL keys created on frontends in this worker process over the last second" }, - [INF_SSL_FRONTEND_MAX_KEY_RATE] = { .name = "SslFrontendMaxKeyRate", .desc = "Highest SslFrontendKeyRate reached on this worker process since started (in SSL keys per second)" }, - [INF_SSL_FRONTEND_SESSION_REUSE_PCT] = { .name = "SslFrontendSessionReuse_pct", .desc = "Percent of frontend SSL connections which did not require a new key" }, - [INF_SSL_BACKEND_KEY_RATE] = { .name = "SslBackendKeyRate", .desc = "Number of SSL keys created on backends in this worker process over the last second" }, - [INF_SSL_BACKEND_MAX_KEY_RATE] = { .name = "SslBackendMaxKeyRate", .desc = "Highest SslBackendKeyRate reached on this worker process since started (in SSL keys per second)" }, - [INF_SSL_CACHE_LOOKUPS] = { .name = "SslCacheLookups", .desc = "Total number of SSL session ID lookups in the SSL session cache on this worker since started" }, - [INF_SSL_CACHE_MISSES] = { .name = "SslCacheMisses", .desc = "Total number of SSL session ID lookups that didn't find a session in the SSL session cache on this worker since started" }, - [INF_COMPRESS_BPS_IN] = { .name = "CompressBpsIn", .desc = "Number of bytes submitted to the HTTP compressor in this worker process over the last second" }, - [INF_COMPRESS_BPS_OUT] = { .name = "CompressBpsOut", .desc = "Number of bytes emitted by the HTTP compressor in this worker process over the last second" }, - [INF_COMPRESS_BPS_RATE_LIM] = { .name = "CompressBpsRateLim", .desc = "Limit of CompressBpsOut beyond which HTTP compression is automatically disabled" }, - [INF_ZLIB_MEM_USAGE] = { .name = "ZlibMemUsage", .desc = "Amount of memory currently used by HTTP compression on the current worker process (in bytes)" }, - [INF_MAX_ZLIB_MEM_USAGE] = { .name = "MaxZlibMemUsage", .desc = "Limit on the amount of memory used by HTTP compression above which it is automatically disabled (in bytes, see global.maxzlibmem)" }, - [INF_TASKS] = { .name = "Tasks", .desc = "Total number of tasks in the current worker process (active + sleeping)" }, - [INF_RUN_QUEUE] = { .name = "Run_queue", .desc = "Total number of active tasks+tasklets in the current worker process" }, - [INF_IDLE_PCT] = { .name = "Idle_pct", .desc = "Percentage of last second spent waiting in the current worker thread" }, - [INF_NODE] = { .name = "node", .desc = "Node name (global.node)" }, - [INF_DESCRIPTION] = { .name = "description", .desc = "Node description (global.description)" }, - [INF_STOPPING] = { .name = "Stopping", .desc = "1 if the worker process is currently stopping, otherwise zero" }, - [INF_JOBS] = { .name = "Jobs", .desc = "Current number of active jobs on the current worker process (frontend connections, master connections, listeners)" }, - [INF_UNSTOPPABLE_JOBS] = { .name = "Unstoppable Jobs", .desc = "Current number of unstoppable jobs on the current worker process (master connections)" }, - [INF_LISTENERS] = { .name = "Listeners", .desc = "Current number of active listeners on the current worker process" }, - [INF_ACTIVE_PEERS] = { .name = "ActivePeers", .desc = "Current number of verified active peers connections on the current worker process" }, - [INF_CONNECTED_PEERS] = { .name = "ConnectedPeers", .desc = "Current number of peers having passed the connection step on the current worker process" }, - [INF_DROPPED_LOGS] = { .name = "DroppedLogs", .desc = "Total number of dropped logs for current worker process since started" }, - [INF_BUSY_POLLING] = { .name = "BusyPolling", .desc = "1 if busy-polling is currently in use on the worker process, otherwise zero (config.busy-polling)" }, - [INF_FAILED_RESOLUTIONS] = { .name = "FailedResolutions", .desc = "Total number of failed DNS resolutions in current worker process since started" }, - [INF_TOTAL_BYTES_OUT] = { .name = "TotalBytesOut", .desc = "Total number of bytes emitted by current worker process since started" }, - [INF_TOTAL_SPLICED_BYTES_OUT] = { .name = "TotalSplicedBytesOut", .desc = "Total number of bytes emitted by current worker process through a kernel pipe since started" }, - [INF_BYTES_OUT_RATE] = { .name = "BytesOutRate", .desc = "Number of bytes emitted by current worker process over the last second" }, - [INF_DEBUG_COMMANDS_ISSUED] = { .name = "DebugCommandsIssued", .desc = "Number of debug commands issued on this process (anything > 0 is unsafe)" }, - [INF_CUM_LOG_MSGS] = { .name = "CumRecvLogs", .desc = "Total number of log messages received by log-forwarding listeners on this worker process since started" }, - [INF_BUILD_INFO] = { .name = "Build info", .desc = "Build info" }, - [INF_TAINTED] = { .name = "Tainted", .desc = "Experimental features used" }, - [INF_WARNINGS] = { .name = "TotalWarnings", .desc = "Total warnings issued" }, - [INF_MAXCONN_REACHED] = { .name = "MaxconnReached", .desc = "Number of times an accepted connection resulted in Maxconn being reached" }, - [INF_BOOTTIME_MS] = { .name = "BootTime_ms", .desc = "How long ago it took to parse and process the config before being ready (milliseconds)" }, - [INF_NICED_TASKS] = { .name = "Niced_tasks", .desc = "Total number of active tasks+tasklets in the current worker process (Run_queue) that are niced" }, -}; - -const struct name_desc stat_fields[ST_F_TOTAL_FIELDS] = { - [ST_F_PXNAME] = { .name = "pxname", .desc = "Proxy name" }, - [ST_F_SVNAME] = { .name = "svname", .desc = "Server name" }, - [ST_F_QCUR] = { .name = "qcur", .desc = "Number of current queued connections" }, - [ST_F_QMAX] = { .name = "qmax", .desc = "Highest value of queued connections encountered since process started" }, - [ST_F_SCUR] = { .name = "scur", .desc = "Number of current sessions on the frontend, backend or server" }, - [ST_F_SMAX] = { .name = "smax", .desc = "Highest value of current sessions encountered since process started" }, - [ST_F_SLIM] = { .name = "slim", .desc = "Frontend/listener/server's maxconn, backend's fullconn" }, - [ST_F_STOT] = { .name = "stot", .desc = "Total number of sessions since process started" }, - [ST_F_BIN] = { .name = "bin", .desc = "Total number of request bytes since process started" }, - [ST_F_BOUT] = { .name = "bout", .desc = "Total number of response bytes since process started" }, - [ST_F_DREQ] = { .name = "dreq", .desc = "Total number of denied requests since process started" }, - [ST_F_DRESP] = { .name = "dresp", .desc = "Total number of denied responses since process started" }, - [ST_F_EREQ] = { .name = "ereq", .desc = "Total number of invalid requests since process started" }, - [ST_F_ECON] = { .name = "econ", .desc = "Total number of failed connections to server since the worker process started" }, - [ST_F_ERESP] = { .name = "eresp", .desc = "Total number of invalid responses since the worker process started" }, - [ST_F_WRETR] = { .name = "wretr", .desc = "Total number of server connection retries since the worker process started" }, - [ST_F_WREDIS] = { .name = "wredis", .desc = "Total number of server redispatches due to connection failures since the worker process started" }, - [ST_F_STATUS] = { .name = "status", .desc = "Frontend/listen status: OPEN/WAITING/FULL/STOP; backend: UP/DOWN; server: last check status" }, - [ST_F_WEIGHT] = { .name = "weight", .desc = "Server's effective weight, or sum of active servers' effective weights for a backend" }, - [ST_F_ACT] = { .name = "act", .desc = "Total number of active UP servers with a non-zero weight" }, - [ST_F_BCK] = { .name = "bck", .desc = "Total number of backup UP servers with a non-zero weight" }, - [ST_F_CHKFAIL] = { .name = "chkfail", .desc = "Total number of failed individual health checks per server/backend, since the worker process started" }, - [ST_F_CHKDOWN] = { .name = "chkdown", .desc = "Total number of failed checks causing UP to DOWN server transitions, per server/backend, since the worker process started" }, - [ST_F_LASTCHG] = { .name = "lastchg", .desc = "How long ago the last server state changed, in seconds" }, - [ST_F_DOWNTIME] = { .name = "downtime", .desc = "Total time spent in DOWN state, for server or backend" }, - [ST_F_QLIMIT] = { .name = "qlimit", .desc = "Limit on the number of connections in queue, for servers only (maxqueue argument)" }, - [ST_F_PID] = { .name = "pid", .desc = "Relative worker process number (1)" }, - [ST_F_IID] = { .name = "iid", .desc = "Frontend or Backend numeric identifier ('id' setting)" }, - [ST_F_SID] = { .name = "sid", .desc = "Server numeric identifier ('id' setting)" }, - [ST_F_THROTTLE] = { .name = "throttle", .desc = "Throttling ratio applied to a server's maxconn and weight during the slowstart period (0 to 100%)" }, - [ST_F_LBTOT] = { .name = "lbtot", .desc = "Total number of requests routed by load balancing since the worker process started (ignores queue pop and stickiness)" }, - [ST_F_TRACKED] = { .name = "tracked", .desc = "Name of the other server this server tracks for its state" }, - [ST_F_TYPE] = { .name = "type", .desc = "Type of the object (Listener, Frontend, Backend, Server)" }, - [ST_F_RATE] = { .name = "rate", .desc = "Total number of sessions processed by this object over the last second (sessions for listeners/frontends, requests for backends/servers)" }, - [ST_F_RATE_LIM] = { .name = "rate_lim", .desc = "Limit on the number of sessions accepted in a second (frontend only, 'rate-limit sessions' setting)" }, - [ST_F_RATE_MAX] = { .name = "rate_max", .desc = "Highest value of sessions per second observed since the worker process started" }, - [ST_F_CHECK_STATUS] = { .name = "check_status", .desc = "Status report of the server's latest health check, prefixed with '*' if a check is currently in progress" }, - [ST_F_CHECK_CODE] = { .name = "check_code", .desc = "HTTP/SMTP/LDAP status code reported by the latest server health check" }, - [ST_F_CHECK_DURATION] = { .name = "check_duration", .desc = "Total duration of the latest server health check, in milliseconds" }, - [ST_F_HRSP_1XX] = { .name = "hrsp_1xx", .desc = "Total number of HTTP responses with status 100-199 returned by this object since the worker process started" }, - [ST_F_HRSP_2XX] = { .name = "hrsp_2xx", .desc = "Total number of HTTP responses with status 200-299 returned by this object since the worker process started" }, - [ST_F_HRSP_3XX] = { .name = "hrsp_3xx", .desc = "Total number of HTTP responses with status 300-399 returned by this object since the worker process started" }, - [ST_F_HRSP_4XX] = { .name = "hrsp_4xx", .desc = "Total number of HTTP responses with status 400-499 returned by this object since the worker process started" }, - [ST_F_HRSP_5XX] = { .name = "hrsp_5xx", .desc = "Total number of HTTP responses with status 500-599 returned by this object since the worker process started" }, - [ST_F_HRSP_OTHER] = { .name = "hrsp_other", .desc = "Total number of HTTP responses with status <100, >599 returned by this object since the worker process started (error -1 included)" }, - [ST_F_HANAFAIL] = { .name = "hanafail", .desc = "Total number of failed checks caused by an 'on-error' directive after an 'observe' condition matched" }, - [ST_F_REQ_RATE] = { .name = "req_rate", .desc = "Number of HTTP requests processed over the last second on this object" }, - [ST_F_REQ_RATE_MAX] = { .name = "req_rate_max", .desc = "Highest value of http requests observed since the worker process started" }, - [ST_F_REQ_TOT] = { .name = "req_tot", .desc = "Total number of HTTP requests processed by this object since the worker process started" }, - [ST_F_CLI_ABRT] = { .name = "cli_abrt", .desc = "Total number of requests or connections aborted by the client since the worker process started" }, - [ST_F_SRV_ABRT] = { .name = "srv_abrt", .desc = "Total number of requests or connections aborted by the server since the worker process started" }, - [ST_F_COMP_IN] = { .name = "comp_in", .desc = "Total number of bytes submitted to the HTTP compressor for this object since the worker process started" }, - [ST_F_COMP_OUT] = { .name = "comp_out", .desc = "Total number of bytes emitted by the HTTP compressor for this object since the worker process started" }, - [ST_F_COMP_BYP] = { .name = "comp_byp", .desc = "Total number of bytes that bypassed HTTP compression for this object since the worker process started (CPU/memory/bandwidth limitation)" }, - [ST_F_COMP_RSP] = { .name = "comp_rsp", .desc = "Total number of HTTP responses that were compressed for this object since the worker process started" }, - [ST_F_LASTSESS] = { .name = "lastsess", .desc = "How long ago some traffic was seen on this object on this worker process, in seconds" }, - [ST_F_LAST_CHK] = { .name = "last_chk", .desc = "Short description of the latest health check report for this server (see also check_desc)" }, - [ST_F_LAST_AGT] = { .name = "last_agt", .desc = "Short description of the latest agent check report for this server (see also agent_desc)" }, - [ST_F_QTIME] = { .name = "qtime", .desc = "Time spent in the queue, in milliseconds, averaged over the 1024 last requests (backend/server)" }, - [ST_F_CTIME] = { .name = "ctime", .desc = "Time spent waiting for a connection to complete, in milliseconds, averaged over the 1024 last requests (backend/server)" }, - [ST_F_RTIME] = { .name = "rtime", .desc = "Time spent waiting for a server response, in milliseconds, averaged over the 1024 last requests (backend/server)" }, - [ST_F_TTIME] = { .name = "ttime", .desc = "Total request+response time (request+queue+connect+response+processing), in milliseconds, averaged over the 1024 last requests (backend/server)" }, - [ST_F_AGENT_STATUS] = { .name = "agent_status", .desc = "Status report of the server's latest agent check, prefixed with '*' if a check is currently in progress" }, - [ST_F_AGENT_CODE] = { .name = "agent_code", .desc = "Status code reported by the latest server agent check" }, - [ST_F_AGENT_DURATION] = { .name = "agent_duration", .desc = "Total duration of the latest server agent check, in milliseconds" }, - [ST_F_CHECK_DESC] = { .name = "check_desc", .desc = "Textual description of the latest health check report for this server" }, - [ST_F_AGENT_DESC] = { .name = "agent_desc", .desc = "Textual description of the latest agent check report for this server" }, - [ST_F_CHECK_RISE] = { .name = "check_rise", .desc = "Number of successful health checks before declaring a server UP (server 'rise' setting)" }, - [ST_F_CHECK_FALL] = { .name = "check_fall", .desc = "Number of failed health checks before declaring a server DOWN (server 'fall' setting)" }, - [ST_F_CHECK_HEALTH] = { .name = "check_health", .desc = "Current server health check level (0..fall-1=DOWN, fall..rise-1=UP)" }, - [ST_F_AGENT_RISE] = { .name = "agent_rise", .desc = "Number of successful agent checks before declaring a server UP (server 'rise' setting)" }, - [ST_F_AGENT_FALL] = { .name = "agent_fall", .desc = "Number of failed agent checks before declaring a server DOWN (server 'fall' setting)" }, - [ST_F_AGENT_HEALTH] = { .name = "agent_health", .desc = "Current server agent check level (0..fall-1=DOWN, fall..rise-1=UP)" }, - [ST_F_ADDR] = { .name = "addr", .desc = "Server's address:port, shown only if show-legends is set, or at levels oper/admin for the CLI" }, - [ST_F_COOKIE] = { .name = "cookie", .desc = "Backend's cookie name or Server's cookie value, shown only if show-legends is set, or at levels oper/admin for the CLI" }, - [ST_F_MODE] = { .name = "mode", .desc = "'mode' setting (tcp/http/health/cli)" }, - [ST_F_ALGO] = { .name = "algo", .desc = "Backend's load balancing algorithm, shown only if show-legends is set, or at levels oper/admin for the CLI" }, - [ST_F_CONN_RATE] = { .name = "conn_rate", .desc = "Number of new connections accepted over the last second on the frontend for this worker process" }, - [ST_F_CONN_RATE_MAX] = { .name = "conn_rate_max", .desc = "Highest value of connections per second observed since the worker process started" }, - [ST_F_CONN_TOT] = { .name = "conn_tot", .desc = "Total number of new connections accepted on this frontend since the worker process started" }, - [ST_F_INTERCEPTED] = { .name = "intercepted", .desc = "Total number of HTTP requests intercepted on the frontend (redirects/stats/services) since the worker process started" }, - [ST_F_DCON] = { .name = "dcon", .desc = "Total number of incoming connections blocked on a listener/frontend by a tcp-request connection rule since the worker process started" }, - [ST_F_DSES] = { .name = "dses", .desc = "Total number of incoming sessions blocked on a listener/frontend by a tcp-request connection rule since the worker process started" }, - [ST_F_WREW] = { .name = "wrew", .desc = "Total number of failed HTTP header rewrites since the worker process started" }, - [ST_F_CONNECT] = { .name = "connect", .desc = "Total number of outgoing connection attempts on this backend/server since the worker process started" }, - [ST_F_REUSE] = { .name = "reuse", .desc = "Total number of reused connection on this backend/server since the worker process started" }, - [ST_F_CACHE_LOOKUPS] = { .name = "cache_lookups", .desc = "Total number of HTTP requests looked up in the cache on this frontend/backend since the worker process started" }, - [ST_F_CACHE_HITS] = { .name = "cache_hits", .desc = "Total number of HTTP requests not found in the cache on this frontend/backend since the worker process started" }, - [ST_F_SRV_ICUR] = { .name = "srv_icur", .desc = "Current number of idle connections available for reuse on this server" }, - [ST_F_SRV_ILIM] = { .name = "src_ilim", .desc = "Limit on the number of available idle connections on this server (server 'pool_max_conn' directive)" }, - [ST_F_QT_MAX] = { .name = "qtime_max", .desc = "Maximum observed time spent in the queue, in milliseconds (backend/server)" }, - [ST_F_CT_MAX] = { .name = "ctime_max", .desc = "Maximum observed time spent waiting for a connection to complete, in milliseconds (backend/server)" }, - [ST_F_RT_MAX] = { .name = "rtime_max", .desc = "Maximum observed time spent waiting for a server response, in milliseconds (backend/server)" }, - [ST_F_TT_MAX] = { .name = "ttime_max", .desc = "Maximum observed total request+response time (request+queue+connect+response+processing), in milliseconds (backend/server)" }, - [ST_F_EINT] = { .name = "eint", .desc = "Total number of internal errors since process started"}, - [ST_F_IDLE_CONN_CUR] = { .name = "idle_conn_cur", .desc = "Current number of unsafe idle connections"}, - [ST_F_SAFE_CONN_CUR] = { .name = "safe_conn_cur", .desc = "Current number of safe idle connections"}, - [ST_F_USED_CONN_CUR] = { .name = "used_conn_cur", .desc = "Current number of connections in use"}, - [ST_F_NEED_CONN_EST] = { .name = "need_conn_est", .desc = "Estimated needed number of connections"}, - [ST_F_UWEIGHT] = { .name = "uweight", .desc = "Server's user weight, or sum of active servers' user weights for a backend" }, - [ST_F_AGG_SRV_CHECK_STATUS] = { .name = "agg_server_check_status", .desc = "[DEPRECATED] Backend's aggregated gauge of servers' status" }, - [ST_F_AGG_SRV_STATUS ] = { .name = "agg_server_status", .desc = "Backend's aggregated gauge of servers' status" }, - [ST_F_AGG_CHECK_STATUS] = { .name = "agg_check_status", .desc = "Backend's aggregated gauge of servers' state check status" }, - [ST_F_SRID] = { .name = "srid", .desc = "Server id revision, to prevent server id reuse mixups" }, - [ST_F_SESS_OTHER] = { .name = "sess_other", .desc = "Total number of sessions other than HTTP since process started" }, - [ST_F_H1SESS] = { .name = "h1sess", .desc = "Total number of HTTP/1 sessions since process started" }, - [ST_F_H2SESS] = { .name = "h2sess", .desc = "Total number of HTTP/2 sessions since process started" }, - [ST_F_H3SESS] = { .name = "h3sess", .desc = "Total number of HTTP/3 sessions since process started" }, - [ST_F_REQ_OTHER] = { .name = "req_other", .desc = "Total number of sessions other than HTTP processed by this object since the worker process started" }, - [ST_F_H1REQ] = { .name = "h1req", .desc = "Total number of HTTP/1 sessions processed by this object since the worker process started" }, - [ST_F_H2REQ] = { .name = "h2req", .desc = "Total number of hTTP/2 sessions processed by this object since the worker process started" }, - [ST_F_H3REQ] = { .name = "h3req", .desc = "Total number of HTTP/3 sessions processed by this object since the worker process started" }, - [ST_F_PROTO] = { .name = "proto", .desc = "Protocol" }, +const struct name_desc stat_cols_info[ST_I_INF_MAX] = { + [ST_I_INF_NAME] = { .name = "Name", .desc = "Product name" }, + [ST_I_INF_VERSION] = { .name = "Version", .desc = "Product version" }, + [ST_I_INF_RELEASE_DATE] = { .name = "Release_date", .desc = "Date of latest source code update" }, + [ST_I_INF_NBTHREAD] = { .name = "Nbthread", .desc = "Number of started threads (global.nbthread)" }, + [ST_I_INF_NBPROC] = { .name = "Nbproc", .desc = "Number of started worker processes (historical, always 1)" }, + [ST_I_INF_PROCESS_NUM] = { .name = "Process_num", .desc = "Relative worker process number (1)" }, + [ST_I_INF_PID] = { .name = "Pid", .desc = "This worker process identifier for the system" }, + [ST_I_INF_UPTIME] = { .name = "Uptime", .desc = "How long ago this worker process was started (days+hours+minutes+seconds)" }, + [ST_I_INF_UPTIME_SEC] = { .name = "Uptime_sec", .desc = "How long ago this worker process was started (seconds)" }, + [ST_I_INF_START_TIME_SEC] = { .name = "Start_time_sec", .desc = "Start time in seconds" }, + [ST_I_INF_MEMMAX_MB] = { .name = "Memmax_MB", .desc = "Worker process's hard limit on memory usage in MB (-m on command line)" }, + [ST_I_INF_MEMMAX_BYTES] = { .name = "Memmax_bytes", .desc = "Worker process's hard limit on memory usage in byes (-m on command line)" }, + [ST_I_INF_POOL_ALLOC_MB] = { .name = "PoolAlloc_MB", .desc = "Amount of memory allocated in pools (in MB)" }, + [ST_I_INF_POOL_ALLOC_BYTES] = { .name = "PoolAlloc_bytes", .desc = "Amount of memory allocated in pools (in bytes)" }, + [ST_I_INF_POOL_USED_MB] = { .name = "PoolUsed_MB", .desc = "Amount of pool memory currently used (in MB)" }, + [ST_I_INF_POOL_USED_BYTES] = { .name = "PoolUsed_bytes", .desc = "Amount of pool memory currently used (in bytes)" }, + [ST_I_INF_POOL_FAILED] = { .name = "PoolFailed", .desc = "Number of failed pool allocations since this worker was started" }, + [ST_I_INF_ULIMIT_N] = { .name = "Ulimit-n", .desc = "Hard limit on the number of per-process file descriptors" }, + [ST_I_INF_MAXSOCK] = { .name = "Maxsock", .desc = "Hard limit on the number of per-process sockets" }, + [ST_I_INF_MAXCONN] = { .name = "Maxconn", .desc = "Hard limit on the number of per-process connections (configured or imposed by Ulimit-n)" }, + [ST_I_INF_HARD_MAXCONN] = { .name = "Hard_maxconn", .desc = "Hard limit on the number of per-process connections (imposed by Memmax_MB or Ulimit-n)" }, + [ST_I_INF_CURR_CONN] = { .name = "CurrConns", .desc = "Current number of connections on this worker process" }, + [ST_I_INF_CUM_CONN] = { .name = "CumConns", .desc = "Total number of connections on this worker process since started" }, + [ST_I_INF_CUM_REQ] = { .name = "CumReq", .desc = "Total number of requests on this worker process since started" }, + [ST_I_INF_MAX_SSL_CONNS] = { .name = "MaxSslConns", .desc = "Hard limit on the number of per-process SSL endpoints (front+back), 0=unlimited" }, + [ST_I_INF_CURR_SSL_CONNS] = { .name = "CurrSslConns", .desc = "Current number of SSL endpoints on this worker process (front+back)" }, + [ST_I_INF_CUM_SSL_CONNS] = { .name = "CumSslConns", .desc = "Total number of SSL endpoints on this worker process since started (front+back)" }, + [ST_I_INF_MAXPIPES] = { .name = "Maxpipes", .desc = "Hard limit on the number of pipes for splicing, 0=unlimited" }, + [ST_I_INF_PIPES_USED] = { .name = "PipesUsed", .desc = "Current number of pipes in use in this worker process" }, + [ST_I_INF_PIPES_FREE] = { .name = "PipesFree", .desc = "Current number of allocated and available pipes in this worker process" }, + [ST_I_INF_CONN_RATE] = { .name = "ConnRate", .desc = "Number of front connections created on this worker process over the last second" }, + [ST_I_INF_CONN_RATE_LIMIT] = { .name = "ConnRateLimit", .desc = "Hard limit for ConnRate (global.maxconnrate)" }, + [ST_I_INF_MAX_CONN_RATE] = { .name = "MaxConnRate", .desc = "Highest ConnRate reached on this worker process since started (in connections per second)" }, + [ST_I_INF_SESS_RATE] = { .name = "SessRate", .desc = "Number of sessions created on this worker process over the last second" }, + [ST_I_INF_SESS_RATE_LIMIT] = { .name = "SessRateLimit", .desc = "Hard limit for SessRate (global.maxsessrate)" }, + [ST_I_INF_MAX_SESS_RATE] = { .name = "MaxSessRate", .desc = "Highest SessRate reached on this worker process since started (in sessions per second)" }, + [ST_I_INF_SSL_RATE] = { .name = "SslRate", .desc = "Number of SSL connections created on this worker process over the last second" }, + [ST_I_INF_SSL_RATE_LIMIT] = { .name = "SslRateLimit", .desc = "Hard limit for SslRate (global.maxsslrate)" }, + [ST_I_INF_MAX_SSL_RATE] = { .name = "MaxSslRate", .desc = "Highest SslRate reached on this worker process since started (in connections per second)" }, + [ST_I_INF_SSL_FRONTEND_KEY_RATE] = { .name = "SslFrontendKeyRate", .desc = "Number of SSL keys created on frontends in this worker process over the last second" }, + [ST_I_INF_SSL_FRONTEND_MAX_KEY_RATE] = { .name = "SslFrontendMaxKeyRate", .desc = "Highest SslFrontendKeyRate reached on this worker process since started (in SSL keys per second)" }, + [ST_I_INF_SSL_FRONTEND_SESSION_REUSE_PCT] = { .name = "SslFrontendSessionReuse_pct", .desc = "Percent of frontend SSL connections which did not require a new key" }, + [ST_I_INF_SSL_BACKEND_KEY_RATE] = { .name = "SslBackendKeyRate", .desc = "Number of SSL keys created on backends in this worker process over the last second" }, + [ST_I_INF_SSL_BACKEND_MAX_KEY_RATE] = { .name = "SslBackendMaxKeyRate", .desc = "Highest SslBackendKeyRate reached on this worker process since started (in SSL keys per second)" }, + [ST_I_INF_SSL_CACHE_LOOKUPS] = { .name = "SslCacheLookups", .desc = "Total number of SSL session ID lookups in the SSL session cache on this worker since started" }, + [ST_I_INF_SSL_CACHE_MISSES] = { .name = "SslCacheMisses", .desc = "Total number of SSL session ID lookups that didn't find a session in the SSL session cache on this worker since started" }, + [ST_I_INF_COMPRESS_BPS_IN] = { .name = "CompressBpsIn", .desc = "Number of bytes submitted to the HTTP compressor in this worker process over the last second" }, + [ST_I_INF_COMPRESS_BPS_OUT] = { .name = "CompressBpsOut", .desc = "Number of bytes emitted by the HTTP compressor in this worker process over the last second" }, + [ST_I_INF_COMPRESS_BPS_RATE_LIM] = { .name = "CompressBpsRateLim", .desc = "Limit of CompressBpsOut beyond which HTTP compression is automatically disabled" }, + [ST_I_INF_ZLIB_MEM_USAGE] = { .name = "ZlibMemUsage", .desc = "Amount of memory currently used by HTTP compression on the current worker process (in bytes)" }, + [ST_I_INF_MAX_ZLIB_MEM_USAGE] = { .name = "MaxZlibMemUsage", .desc = "Limit on the amount of memory used by HTTP compression above which it is automatically disabled (in bytes, see global.maxzlibmem)" }, + [ST_I_INF_TASKS] = { .name = "Tasks", .desc = "Total number of tasks in the current worker process (active + sleeping)" }, + [ST_I_INF_RUN_QUEUE] = { .name = "Run_queue", .desc = "Total number of active tasks+tasklets in the current worker process" }, + [ST_I_INF_IDLE_PCT] = { .name = "Idle_pct", .desc = "Percentage of last second spent waiting in the current worker thread" }, + [ST_I_INF_NODE] = { .name = "node", .desc = "Node name (global.node)" }, + [ST_I_INF_DESCRIPTION] = { .name = "description", .desc = "Node description (global.description)" }, + [ST_I_INF_STOPPING] = { .name = "Stopping", .desc = "1 if the worker process is currently stopping, otherwise zero" }, + [ST_I_INF_JOBS] = { .name = "Jobs", .desc = "Current number of active jobs on the current worker process (frontend connections, master connections, listeners)" }, + [ST_I_INF_UNSTOPPABLE_JOBS] = { .name = "Unstoppable Jobs", .desc = "Current number of unstoppable jobs on the current worker process (master connections)" }, + [ST_I_INF_LISTENERS] = { .name = "Listeners", .desc = "Current number of active listeners on the current worker process" }, + [ST_I_INF_ACTIVE_PEERS] = { .name = "ActivePeers", .desc = "Current number of verified active peers connections on the current worker process" }, + [ST_I_INF_CONNECTED_PEERS] = { .name = "ConnectedPeers", .desc = "Current number of peers having passed the connection step on the current worker process" }, + [ST_I_INF_DROPPED_LOGS] = { .name = "DroppedLogs", .desc = "Total number of dropped logs for current worker process since started" }, + [ST_I_INF_BUSY_POLLING] = { .name = "BusyPolling", .desc = "1 if busy-polling is currently in use on the worker process, otherwise zero (config.busy-polling)" }, + [ST_I_INF_FAILED_RESOLUTIONS] = { .name = "FailedResolutions", .desc = "Total number of failed DNS resolutions in current worker process since started" }, + [ST_I_INF_TOTAL_BYTES_OUT] = { .name = "TotalBytesOut", .desc = "Total number of bytes emitted by current worker process since started" }, + [ST_I_INF_TOTAL_SPLICED_BYTES_OUT] = { .name = "TotalSplicedBytesOut", .desc = "Total number of bytes emitted by current worker process through a kernel pipe since started" }, + [ST_I_INF_BYTES_OUT_RATE] = { .name = "BytesOutRate", .desc = "Number of bytes emitted by current worker process over the last second" }, + [ST_I_INF_DEBUG_COMMANDS_ISSUED] = { .name = "DebugCommandsIssued", .desc = "Number of debug commands issued on this process (anything > 0 is unsafe)" }, + [ST_I_INF_CUM_LOG_MSGS] = { .name = "CumRecvLogs", .desc = "Total number of log messages received by log-forwarding listeners on this worker process since started" }, + [ST_I_INF_BUILD_INFO] = { .name = "Build info", .desc = "Build info" }, + [ST_I_INF_TAINTED] = { .name = "Tainted", .desc = "Experimental features used" }, + [ST_I_INF_WARNINGS] = { .name = "TotalWarnings", .desc = "Total warnings issued" }, + [ST_I_INF_MAXCONN_REACHED] = { .name = "MaxconnReached", .desc = "Number of times an accepted connection resulted in Maxconn being reached" }, + [ST_I_INF_BOOTTIME_MS] = { .name = "BootTime_ms", .desc = "How long ago it took to parse and process the config before being ready (milliseconds)" }, + [ST_I_INF_NICED_TASKS] = { .name = "Niced_tasks", .desc = "Total number of active tasks+tasklets in the current worker process (Run_queue) that are niced" }, }; /* one line of info */ -THREAD_LOCAL struct field info[INF_TOTAL_FIELDS]; - -/* description of statistics (static and dynamic) */ -static struct name_desc *stat_f[STATS_DOMAIN_COUNT]; -static size_t stat_count[STATS_DOMAIN_COUNT]; +THREAD_LOCAL struct field stat_line_info[ST_I_INF_MAX]; /* one line for stats */ -THREAD_LOCAL struct field *stat_l[STATS_DOMAIN_COUNT]; +THREAD_LOCAL struct field *stat_lines[STATS_DOMAIN_COUNT]; + +/* Unified storage for statistics from all module + * TODO merge info stats into it as global statistic domain. + */ +struct name_desc *stat_cols[STATS_DOMAIN_COUNT]; +size_t stat_cols_len[STATS_DOMAIN_COUNT]; /* list of all registered stats module */ -static struct list stats_module_list[STATS_DOMAIN_COUNT] = { +struct list stats_module_list[STATS_DOMAIN_COUNT] = { LIST_HEAD_INIT(stats_module_list[STATS_DOMAIN_PROXY]), LIST_HEAD_INIT(stats_module_list[STATS_DOMAIN_RESOLVERS]), }; THREAD_LOCAL void *trash_counters; -static THREAD_LOCAL struct buffer trash_chunk = BUF_NULL; - -static inline uint8_t stats_get_domain(uint32_t domain) +/* Insert <cols> generic stat columns into <st_tree> indexed by their name. */ +int generate_stat_tree(struct eb_root *st_tree, const struct stat_col cols[]) { - return domain >> STATS_DOMAIN & STATS_DOMAIN_MASK; -} + const struct stat_col *col; + struct stcol_node *node; + size_t len; + int i; -static inline enum stats_domain_px_cap stats_px_get_cap(uint32_t domain) -{ - return domain >> STATS_PX_CAP & STATS_PX_CAP_MASK; + for (i = 0; i < ST_I_PX_MAX; ++i) { + col = &cols[i]; + + if (stcol_is_generic(col)) { + len = strlen(col->name); + node = malloc(sizeof(struct stcol_node) + len + 1); + if (!node) + goto err; + + node->col = col; + memcpy(node->name.key, col->name, len); + node->name.key[len] = '\0'; + + ebst_insert(st_tree, &node->name); + } + } + + return 0; + + err: + return 1; } -static void stats_dump_json_schema(struct buffer *out); -int stats_putchk(struct appctx *appctx, struct htx *htx) +int stats_putchk(struct appctx *appctx, struct buffer *buf, struct htx *htx) { - struct stconn *sc = appctx_sc(appctx); - struct channel *chn = sc_ic(sc); - struct buffer *chk = &trash_chunk; + struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; if (htx) { - if (chk->data >= channel_htx_recv_max(chn, htx)) { - sc_need_room(sc, chk->data); + if (b_data(chk) > htx_free_data_space(htx)) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); + return 0; + } + if (!htx_add_data_atonce(htx, ist2(b_orig(chk), b_data(chk)))) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); return 0; } - if (!htx_add_data_atonce(htx, ist2(chk->area, chk->data))) { - sc_need_room(sc, 0); + chunk_reset(chk); + } + else if (buf) { + if (b_data(chk) > b_room(buf)) { + se_fl_set(appctx->sedesc, SE_FL_RCV_MORE | SE_FL_WANT_ROOM); return 0; } - channel_add_input(chn, chk->data); - chk->data = 0; + b_putblk(buf, b_head(chk), b_data(chk)); + chunk_reset(chk); } - else { + else { if (applet_putchk(appctx, chk) == -1) return 0; } return 1; } -static const char *stats_scope_ptr(struct appctx *appctx, struct stconn *sc) + +int stats_is_full(struct appctx *appctx, struct buffer *buf, struct htx *htx) +{ + if (htx) { + if (htx_almost_full(htx)) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); + goto full; + } + } + else if (buf) { + if (buffer_almost_full(buf)) { + se_fl_set(appctx->sedesc, SE_FL_RCV_MORE | SE_FL_WANT_ROOM); + goto full; + } + } + else { + if (buffer_almost_full(&appctx->outbuf)) { + applet_fl_set(appctx, APPCTX_FL_OUTBLK_FULL); + goto full; + } + } + return 0; +full: + return 1; +} + +const char *stats_scope_ptr(struct appctx *appctx) { struct show_stat_ctx *ctx = appctx->svcctx; - struct channel *req = sc_oc(sc); - struct htx *htx = htxbuf(&req->buf); + struct htx *htx = htxbuf(&appctx->inbuf); struct htx_blk *blk; struct ist uri; @@ -359,38 +302,39 @@ static const char *stats_scope_ptr(struct appctx *appctx, struct stconn *sc) * -> stats_dump_html_info() // emits the equivalent of "show info" at the top * -> stats_dump_proxy_to_buffer() // same as above, valid for CSV and HTML * -> stats_dump_html_px_hdr() - * -> stats_dump_fe_stats() - * -> stats_dump_li_stats() - * -> stats_dump_sv_stats() - * -> stats_dump_be_stats() + * -> stats_dump_fe_line() + * -> stats_dump_li_line() + * -> stats_dump_sv_line() + * -> stats_dump_be_line() * -> stats_dump_html_px_end() * -> stats_dump_html_end() // emits HTML trailer * -> stats_dump_json_end() // emits JSON trailer */ -/* Dumps the stats CSV header to the local trash buffer. The caller is - * responsible for clearing it if needed. +/* Dumps the stats CSV header to <out> buffer. The caller is responsible for + * clearing it if needed. + * * NOTE: Some tools happen to rely on the field position instead of its name, * so please only append new fields at the end, never in the middle. */ -static void stats_dump_csv_header(enum stats_domain domain) +static void stats_dump_csv_header(enum stats_domain domain, struct buffer *out) { - int field; + int i; - chunk_appendf(&trash_chunk, "# "); - if (stat_f[domain]) { - for (field = 0; field < stat_count[domain]; ++field) { - chunk_appendf(&trash_chunk, "%s,", stat_f[domain][field].name); + chunk_appendf(out, "# "); + if (stat_cols[domain]) { + for (i = 0; i < stat_cols_len[domain]; ++i) { + chunk_appendf(out, "%s,", stat_cols[domain][i].name); /* print special delimiter on proxy stats to mark end of static fields */ - if (domain == STATS_DOMAIN_PROXY && field + 1 == ST_F_TOTAL_FIELDS) - chunk_appendf(&trash_chunk, "-,"); + if (domain == STATS_DOMAIN_PROXY && i + 1 == ST_I_PX_MAX) + chunk_appendf(out, "-,"); } } - chunk_appendf(&trash_chunk, "\n"); + chunk_appendf(out, "\n"); } /* Emits a stats field without any surrounding element and properly encoded to @@ -414,21 +358,6 @@ int stats_emit_raw_data_field(struct buffer *out, const struct field *f) } } -const char *field_to_html_str(const struct field *f) -{ - switch (field_format(f, 0)) { - case FF_S32: return U2H(f->u.s32); - case FF_S64: return U2H(f->u.s64); - case FF_U64: return U2H(f->u.u64); - case FF_U32: return U2H(f->u.u32); - case FF_FLT: return F2H(f->u.flt); - case FF_STR: return field_str(f, 0); - case FF_EMPTY: - default: - return ""; - } -} - /* Emits a stats field prefixed with its type. No CSV encoding is prepared, the * output is supposed to be used on its own line. Returns non-zero on success, 0 * if the buffer is full. @@ -451,61 +380,6 @@ int stats_emit_typed_data_field(struct buffer *out, const struct field *f) } } -/* Limit JSON integer values to the range [-(2**53)+1, (2**53)-1] as per - * the recommendation for interoperable integers in section 6 of RFC 7159. - */ -#define JSON_INT_MAX ((1ULL << 53) - 1) -#define JSON_INT_MIN (0 - JSON_INT_MAX) - -/* Emits a stats field value and its type in JSON. - * Returns non-zero on success, 0 on error. - */ -int stats_emit_json_data_field(struct buffer *out, const struct field *f) -{ - int old_len; - char buf[20]; - const char *type, *value = buf, *quote = ""; - - switch (field_format(f, 0)) { - case FF_EMPTY: return 1; - case FF_S32: type = "\"s32\""; - snprintf(buf, sizeof(buf), "%d", f->u.s32); - break; - case FF_U32: type = "\"u32\""; - snprintf(buf, sizeof(buf), "%u", f->u.u32); - break; - case FF_S64: type = "\"s64\""; - if (f->u.s64 < JSON_INT_MIN || f->u.s64 > JSON_INT_MAX) - return 0; - type = "\"s64\""; - snprintf(buf, sizeof(buf), "%lld", (long long)f->u.s64); - break; - case FF_U64: if (f->u.u64 > JSON_INT_MAX) - return 0; - type = "\"u64\""; - snprintf(buf, sizeof(buf), "%llu", - (unsigned long long) f->u.u64); - break; - case FF_FLT: type = "\"flt\""; - flt_trim(buf, 0, snprintf(buf, sizeof(buf), "%f", f->u.flt)); - break; - case FF_STR: type = "\"str\""; - value = field_str(f, 0); - quote = "\""; - break; - default: snprintf(buf, sizeof(buf), "%u", f->type); - type = buf; - value = "unknown"; - quote = "\""; - break; - } - - old_len = out->data; - chunk_appendf(out, ",\"value\":{\"type\":%s,\"value\":%s%s%s}", - type, quote, value, quote); - return !(old_len == out->data); -} - /* Emits an encoding of the field type on 3 characters followed by a delimiter. * Returns non-zero on success, 0 if the buffer is full. */ @@ -550,73 +424,23 @@ int stats_emit_field_tags(struct buffer *out, const struct field *f, return chunk_appendf(out, "%c%c%c%c", origin, nature, scope, delim); } -/* Emits an encoding of the field type as JSON. - * Returns non-zero on success, 0 if the buffer is full. - */ -int stats_emit_json_field_tags(struct buffer *out, const struct field *f) -{ - const char *origin, *nature, *scope; - int old_len; - - switch (field_origin(f, 0)) { - case FO_METRIC: origin = "Metric"; break; - case FO_STATUS: origin = "Status"; break; - case FO_KEY: origin = "Key"; break; - case FO_CONFIG: origin = "Config"; break; - case FO_PRODUCT: origin = "Product"; break; - default: origin = "Unknown"; break; - } - - switch (field_nature(f, 0)) { - case FN_GAUGE: nature = "Gauge"; break; - case FN_LIMIT: nature = "Limit"; break; - case FN_MIN: nature = "Min"; break; - case FN_MAX: nature = "Max"; break; - case FN_RATE: nature = "Rate"; break; - case FN_COUNTER: nature = "Counter"; break; - case FN_DURATION: nature = "Duration"; break; - case FN_AGE: nature = "Age"; break; - case FN_TIME: nature = "Time"; break; - case FN_NAME: nature = "Name"; break; - case FN_OUTPUT: nature = "Output"; break; - case FN_AVG: nature = "Avg"; break; - default: nature = "Unknown"; break; - } - - switch (field_scope(f, 0)) { - case FS_PROCESS: scope = "Process"; break; - case FS_SERVICE: scope = "Service"; break; - case FS_SYSTEM: scope = "System"; break; - case FS_CLUSTER: scope = "Cluster"; break; - default: scope = "Unknown"; break; - } - - old_len = out->data; - chunk_appendf(out, "\"tags\":{" - "\"origin\":\"%s\"," - "\"nature\":\"%s\"," - "\"scope\":\"%s\"" - "}", origin, nature, scope); - return !(old_len == out->data); -} - -/* Dump all fields from <stats> into <out> using CSV format */ +/* Dump all fields from <line> into <out> using CSV format */ static int stats_dump_fields_csv(struct buffer *out, - const struct field *stats, size_t stats_count, + const struct field *line, size_t stats_count, struct show_stat_ctx *ctx) { int domain = ctx->domain; - int field; + int i; - for (field = 0; field < stats_count; ++field) { - if (!stats_emit_raw_data_field(out, &stats[field])) + for (i = 0; i < stats_count; ++i) { + if (!stats_emit_raw_data_field(out, &line[i])) return 0; if (!chunk_strcat(out, ",")) return 0; /* print special delimiter on proxy stats to mark end of static fields */ - if (domain == STATS_DOMAIN_PROXY && field + 1 == ST_F_TOTAL_FIELDS) { + if (domain == STATS_DOMAIN_PROXY && i + 1 == ST_I_PX_MAX) { if (!chunk_strcat(out, "-,")) return 0; } @@ -626,50 +450,50 @@ static int stats_dump_fields_csv(struct buffer *out, return 1; } -/* Dump all fields from <stats> into <out> using a typed "field:desc:type:value" format */ +/* Dump all fields from <line> into <out> using a typed "field:desc:type:value" format */ static int stats_dump_fields_typed(struct buffer *out, - const struct field *stats, + const struct field *line, size_t stats_count, struct show_stat_ctx * ctx) { int flags = ctx->flags; int domain = ctx->domain; - int field; + int i; - for (field = 0; field < stats_count; ++field) { - if (!stats[field].type) + for (i = 0; i < stats_count; ++i) { + if (!line[i].type) continue; switch (domain) { case STATS_DOMAIN_PROXY: chunk_appendf(out, "%c.%u.%u.%d.%s.%u:", - stats[ST_F_TYPE].u.u32 == STATS_TYPE_FE ? 'F' : - stats[ST_F_TYPE].u.u32 == STATS_TYPE_BE ? 'B' : - stats[ST_F_TYPE].u.u32 == STATS_TYPE_SO ? 'L' : - stats[ST_F_TYPE].u.u32 == STATS_TYPE_SV ? 'S' : + line[ST_I_PX_TYPE].u.u32 == STATS_TYPE_FE ? 'F' : + line[ST_I_PX_TYPE].u.u32 == STATS_TYPE_BE ? 'B' : + line[ST_I_PX_TYPE].u.u32 == STATS_TYPE_SO ? 'L' : + line[ST_I_PX_TYPE].u.u32 == STATS_TYPE_SV ? 'S' : '?', - stats[ST_F_IID].u.u32, stats[ST_F_SID].u.u32, - field, - stat_f[domain][field].name, - stats[ST_F_PID].u.u32); + line[ST_I_PX_IID].u.u32, line[ST_I_PX_SID].u.u32, + i, + stat_cols[domain][i].name, + line[ST_I_PX_PID].u.u32); break; case STATS_DOMAIN_RESOLVERS: - chunk_appendf(out, "N.%d.%s:", field, - stat_f[domain][field].name); + chunk_appendf(out, "N.%d.%s:", i, + stat_cols[domain][i].name); break; default: break; } - if (!stats_emit_field_tags(out, &stats[field], ':')) + if (!stats_emit_field_tags(out, &line[i], ':')) return 0; - if (!stats_emit_typed_data_field(out, &stats[field])) + if (!stats_emit_typed_data_field(out, &line[i])) return 0; - if (flags & STAT_SHOW_FDESC && - !chunk_appendf(out, ":\"%s\"", stat_f[domain][field].desc)) { + if (flags & STAT_F_SHOW_FDESC && + !chunk_appendf(out, ":\"%s\"", stat_cols[domain][i].desc)) { return 0; } @@ -679,3254 +503,42 @@ static int stats_dump_fields_typed(struct buffer *out, return 1; } -/* Dump all fields from <stats> into <out> using the "show info json" format */ -static int stats_dump_json_info_fields(struct buffer *out, - const struct field *info, - struct show_stat_ctx *ctx) -{ - int started = (ctx->field) ? 1 : 0; - int ready_data = 0; - - if (!started && !chunk_strcat(out, "[")) - return 0; - - for (; ctx->field < INF_TOTAL_FIELDS; ctx->field++) { - int old_len; - int field = ctx->field; - - if (!field_format(info, field)) - continue; - - if (started && !chunk_strcat(out, ",")) - goto err; - started = 1; - - old_len = out->data; - chunk_appendf(out, - "{\"field\":{\"pos\":%d,\"name\":\"%s\"}," - "\"processNum\":%u,", - field, info_fields[field].name, - info[INF_PROCESS_NUM].u.u32); - if (old_len == out->data) - goto err; - - if (!stats_emit_json_field_tags(out, &info[field])) - goto err; - - if (!stats_emit_json_data_field(out, &info[field])) - goto err; - - if (!chunk_strcat(out, "}")) - goto err; - ready_data = out->data; - } - - if (!chunk_strcat(out, "]\n")) - goto err; - ctx->field = 0; /* we're done */ - return 1; - -err: - if (!ready_data) { - /* not enough buffer space for a single entry.. */ - chunk_reset(out); - chunk_appendf(out, "{\"errorStr\":\"output buffer too short\"}\n"); - return 0; /* hard error */ - } - /* push ready data and wait for a new buffer to complete the dump */ - out->data = ready_data; - return 1; -} - -static void stats_print_proxy_field_json(struct buffer *out, - const struct field *stat, - const char *name, - int pos, - uint32_t field_type, - uint32_t iid, - uint32_t sid, - uint32_t pid) -{ - const char *obj_type; - switch (field_type) { - case STATS_TYPE_FE: obj_type = "Frontend"; break; - case STATS_TYPE_BE: obj_type = "Backend"; break; - case STATS_TYPE_SO: obj_type = "Listener"; break; - case STATS_TYPE_SV: obj_type = "Server"; break; - default: obj_type = "Unknown"; break; - } - - chunk_appendf(out, - "{" - "\"objType\":\"%s\"," - "\"proxyId\":%u," - "\"id\":%u," - "\"field\":{\"pos\":%d,\"name\":\"%s\"}," - "\"processNum\":%u,", - obj_type, iid, sid, pos, name, pid); -} - -static void stats_print_rslv_field_json(struct buffer *out, - const struct field *stat, - const char *name, - int pos) -{ - chunk_appendf(out, - "{" - "\"field\":{\"pos\":%d,\"name\":\"%s\"},", - pos, name); -} - - -/* Dump all fields from <stats> into <out> using a typed "field:desc:type:value" format */ -static int stats_dump_fields_json(struct buffer *out, - const struct field *stats, size_t stats_count, - struct show_stat_ctx *ctx) -{ - int flags = ctx->flags; - int domain = ctx->domain; - int started = (ctx->field) ? 1 : 0; - int ready_data = 0; - - if (!started && (flags & STAT_STARTED) && !chunk_strcat(out, ",")) - return 0; - if (!started && !chunk_strcat(out, "[")) - return 0; - - for (; ctx->field < stats_count; ctx->field++) { - int old_len; - int field = ctx->field; - - if (!stats[field].type) - continue; - - if (started && !chunk_strcat(out, ",")) - goto err; - started = 1; - - old_len = out->data; - if (domain == STATS_DOMAIN_PROXY) { - stats_print_proxy_field_json(out, &stats[field], - stat_f[domain][field].name, - field, - stats[ST_F_TYPE].u.u32, - stats[ST_F_IID].u.u32, - stats[ST_F_SID].u.u32, - stats[ST_F_PID].u.u32); - } else if (domain == STATS_DOMAIN_RESOLVERS) { - stats_print_rslv_field_json(out, &stats[field], - stat_f[domain][field].name, - field); - } - - if (old_len == out->data) - goto err; - - if (!stats_emit_json_field_tags(out, &stats[field])) - goto err; - - if (!stats_emit_json_data_field(out, &stats[field])) - goto err; - - if (!chunk_strcat(out, "}")) - goto err; - ready_data = out->data; - } - - if (!chunk_strcat(out, "]")) - goto err; - - ctx->field = 0; /* we're done */ - return 1; - -err: - if (!ready_data) { - /* not enough buffer space for a single entry.. */ - chunk_reset(out); - if (ctx->flags & STAT_STARTED) - chunk_strcat(out, ","); - chunk_appendf(out, "{\"errorStr\":\"output buffer too short\"}"); - return 0; /* hard error */ - } - /* push ready data and wait for a new buffer to complete the dump */ - out->data = ready_data; - return 1; -} - -/* Dump all fields from <stats> into <out> using the HTML format. A column is - * reserved for the checkbox is STAT_ADMIN is set in <flags>. Some extra info - * are provided if STAT_SHLGNDS is present in <flags>. The statistics from - * extra modules are displayed at the end of the lines if STAT_SHMODULES is - * present in <flags>. - */ -static int stats_dump_fields_html(struct buffer *out, - const struct field *stats, - struct show_stat_ctx *ctx) -{ - struct buffer src; - struct stats_module *mod; - int flags = ctx->flags; - int i = 0, j = 0; - - if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_FE) { - chunk_appendf(out, - /* name, queue */ - "<tr class=\"frontend\">"); - - if (flags & STAT_ADMIN) { - /* Column sub-heading for Enable or Disable server */ - chunk_appendf(out, "<td></td>"); - } - - chunk_appendf(out, - "<td class=ac>" - "<a name=\"%s/Frontend\"></a>" - "<a class=lfsb href=\"#%s/Frontend\">Frontend</a></td>" - "<td colspan=3></td>" - "", - field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_PXNAME)); - - chunk_appendf(out, - /* sessions rate : current */ - "<td><u>%s<div class=tips><table class=det>" - "<tr><th>Current connection rate:</th><td>%s/s</td></tr>" - "<tr><th>Current session rate:</th><td>%s/s</td></tr>" - "", - U2H(stats[ST_F_RATE].u.u32), - U2H(stats[ST_F_CONN_RATE].u.u32), - U2H(stats[ST_F_RATE].u.u32)); - - if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) - chunk_appendf(out, - "<tr><th>Current request rate:</th><td>%s/s</td></tr>", - U2H(stats[ST_F_REQ_RATE].u.u32)); - - chunk_appendf(out, - "</table></div></u></td>" - /* sessions rate : max */ - "<td><u>%s<div class=tips><table class=det>" - "<tr><th>Max connection rate:</th><td>%s/s</td></tr>" - "<tr><th>Max session rate:</th><td>%s/s</td></tr>" - "", - U2H(stats[ST_F_RATE_MAX].u.u32), - U2H(stats[ST_F_CONN_RATE_MAX].u.u32), - U2H(stats[ST_F_RATE_MAX].u.u32)); - - if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) - chunk_appendf(out, - "<tr><th>Max request rate:</th><td>%s/s</td></tr>", - U2H(stats[ST_F_REQ_RATE_MAX].u.u32)); - - chunk_appendf(out, - "</table></div></u></td>" - /* sessions rate : limit */ - "<td>%s</td>", - LIM2A(stats[ST_F_RATE_LIM].u.u32, "-")); - - chunk_appendf(out, - /* sessions: current, max, limit, total */ - "<td>%s</td><td>%s</td><td>%s</td>" - "<td><u>%s<div class=tips><table class=det>" - "<tr><th>Cum. connections:</th><td>%s</td></tr>" - "<tr><th>Cum. sessions:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_SCUR].u.u32), U2H(stats[ST_F_SMAX].u.u32), U2H(stats[ST_F_SLIM].u.u32), - U2H(stats[ST_F_STOT].u.u64), - U2H(stats[ST_F_CONN_TOT].u.u64), - U2H(stats[ST_F_STOT].u.u64)); - - /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */ - if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) { - chunk_appendf(out, - "<tr><th>- HTTP/1 sessions:</th><td>%s</td></tr>" - "<tr><th>- HTTP/2 sessions:</th><td>%s</td></tr>" - "<tr><th>- HTTP/3 sessions:</th><td>%s</td></tr>" - "<tr><th>- other sessions:</th><td>%s</td></tr>" - "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>" - "<tr><th>- HTTP/1 requests:</th><td>%s</td></tr>" - "<tr><th>- HTTP/2 requests:</th><td>%s</td></tr>" - "<tr><th>- HTTP/3 requests:</th><td>%s</td></tr>" - "<tr><th>- other requests:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_H1SESS].u.u64), - U2H(stats[ST_F_H2SESS].u.u64), - U2H(stats[ST_F_H3SESS].u.u64), - U2H(stats[ST_F_SESS_OTHER].u.u64), - U2H(stats[ST_F_REQ_TOT].u.u64), - U2H(stats[ST_F_H1REQ].u.u64), - U2H(stats[ST_F_H2REQ].u.u64), - U2H(stats[ST_F_H3REQ].u.u64), - U2H(stats[ST_F_REQ_OTHER].u.u64)); - - chunk_appendf(out, - "<tr><th>- HTTP 1xx responses:</th><td>%s</td></tr>" - "<tr><th>- HTTP 2xx responses:</th><td>%s</td></tr>" - "<tr><th> Compressed 2xx:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>- HTTP 3xx responses:</th><td>%s</td></tr>" - "<tr><th>- HTTP 4xx responses:</th><td>%s</td></tr>" - "<tr><th>- HTTP 5xx responses:</th><td>%s</td></tr>" - "<tr><th>- other responses:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_HRSP_1XX].u.u64), - U2H(stats[ST_F_HRSP_2XX].u.u64), - U2H(stats[ST_F_COMP_RSP].u.u64), - stats[ST_F_HRSP_2XX].u.u64 ? - (int)(100 * stats[ST_F_COMP_RSP].u.u64 / stats[ST_F_HRSP_2XX].u.u64) : 0, - U2H(stats[ST_F_HRSP_3XX].u.u64), - U2H(stats[ST_F_HRSP_4XX].u.u64), - U2H(stats[ST_F_HRSP_5XX].u.u64), - U2H(stats[ST_F_HRSP_OTHER].u.u64)); - - chunk_appendf(out, - "<tr><th>Intercepted requests:</th><td>%s</td></tr>" - "<tr><th>Cache lookups:</th><td>%s</td></tr>" - "<tr><th>Cache hits:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>" - "<tr><th>Internal errors:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_INTERCEPTED].u.u64), - U2H(stats[ST_F_CACHE_LOOKUPS].u.u64), - U2H(stats[ST_F_CACHE_HITS].u.u64), - stats[ST_F_CACHE_LOOKUPS].u.u64 ? - (int)(100 * stats[ST_F_CACHE_HITS].u.u64 / stats[ST_F_CACHE_LOOKUPS].u.u64) : 0, - U2H(stats[ST_F_WREW].u.u64), - U2H(stats[ST_F_EINT].u.u64)); - } - - chunk_appendf(out, - "</table></div></u></td>" - /* sessions: lbtot, lastsess */ - "<td></td><td></td>" - /* bytes : in */ - "<td>%s</td>" - "", - U2H(stats[ST_F_BIN].u.u64)); - - chunk_appendf(out, - /* bytes:out + compression stats (via hover): comp_in, comp_out, comp_byp */ - "<td>%s%s<div class=tips><table class=det>" - "<tr><th>Response bytes in:</th><td>%s</td></tr>" - "<tr><th>Compression in:</th><td>%s</td></tr>" - "<tr><th>Compression out:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>Compression bypass:</th><td>%s</td></tr>" - "<tr><th>Total bytes saved:</th><td>%s</td><td>(%d%%)</td></tr>" - "</table></div>%s</td>", - (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "<u>":"", - U2H(stats[ST_F_BOUT].u.u64), - U2H(stats[ST_F_BOUT].u.u64), - U2H(stats[ST_F_COMP_IN].u.u64), - U2H(stats[ST_F_COMP_OUT].u.u64), - stats[ST_F_COMP_IN].u.u64 ? (int)(stats[ST_F_COMP_OUT].u.u64 * 100 / stats[ST_F_COMP_IN].u.u64) : 0, - U2H(stats[ST_F_COMP_BYP].u.u64), - U2H(stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64), - stats[ST_F_BOUT].u.u64 ? (int)((stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64) * 100 / stats[ST_F_BOUT].u.u64) : 0, - (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "</u>":""); - - chunk_appendf(out, - /* denied: req, resp */ - "<td>%s</td><td>%s</td>" - /* errors : request, connect, response */ - "<td>%s</td><td></td><td></td>" - /* warnings: retries, redispatches */ - "<td></td><td></td>" - /* server status : reflect frontend status */ - "<td class=ac>%s</td>" - /* rest of server: nothing */ - "<td class=ac colspan=8></td>" - "", - U2H(stats[ST_F_DREQ].u.u64), U2H(stats[ST_F_DRESP].u.u64), - U2H(stats[ST_F_EREQ].u.u64), - field_str(stats, ST_F_STATUS)); - - if (flags & STAT_SHMODULES) { - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - chunk_appendf(out, "<td>"); - - if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_FE) { - chunk_appendf(out, - "<u>%s<div class=tips><table class=det>", - mod->name); - for (j = 0; j < mod->stats_count; ++j) { - chunk_appendf(out, - "<tr><th>%s</th><td>%s</td></tr>", - mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i])); - ++i; - } - chunk_appendf(out, "</table></div></u>"); - } else { - i += mod->stats_count; - } - - chunk_appendf(out, "</td>"); - } - } - - chunk_appendf(out, "</tr>"); - } - else if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_SO) { - chunk_appendf(out, "<tr class=socket>"); - if (flags & STAT_ADMIN) { - /* Column sub-heading for Enable or Disable server */ - chunk_appendf(out, "<td></td>"); - } - - chunk_appendf(out, - /* frontend name, listener name */ - "<td class=ac><a name=\"%s/+%s\"></a>%s" - "<a class=lfsb href=\"#%s/+%s\">%s</a>" - "", - field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME), - (flags & STAT_SHLGNDS)?"<u>":"", - field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME), field_str(stats, ST_F_SVNAME)); - - if (flags & STAT_SHLGNDS) { - chunk_appendf(out, "<div class=tips>"); - - if (isdigit((unsigned char)*field_str(stats, ST_F_ADDR))) - chunk_appendf(out, "IPv4: %s, ", field_str(stats, ST_F_ADDR)); - else if (*field_str(stats, ST_F_ADDR) == '[') - chunk_appendf(out, "IPv6: %s, ", field_str(stats, ST_F_ADDR)); - else if (*field_str(stats, ST_F_ADDR)) - chunk_appendf(out, "%s, ", field_str(stats, ST_F_ADDR)); - - chunk_appendf(out, "proto=%s, ", field_str(stats, ST_F_PROTO)); - - /* id */ - chunk_appendf(out, "id: %d</div>", stats[ST_F_SID].u.u32); - } - - chunk_appendf(out, - /* queue */ - "%s</td><td colspan=3></td>" - /* sessions rate: current, max, limit */ - "<td colspan=3> </td>" - /* sessions: current, max, limit, total, lbtot, lastsess */ - "<td>%s</td><td>%s</td><td>%s</td>" - "<td>%s</td><td> </td><td> </td>" - /* bytes: in, out */ - "<td>%s</td><td>%s</td>" - "", - (flags & STAT_SHLGNDS)?"</u>":"", - U2H(stats[ST_F_SCUR].u.u32), U2H(stats[ST_F_SMAX].u.u32), U2H(stats[ST_F_SLIM].u.u32), - U2H(stats[ST_F_STOT].u.u64), U2H(stats[ST_F_BIN].u.u64), U2H(stats[ST_F_BOUT].u.u64)); - - chunk_appendf(out, - /* denied: req, resp */ - "<td>%s</td><td>%s</td>" - /* errors: request, connect, response */ - "<td>%s</td><td></td><td></td>" - /* warnings: retries, redispatches */ - "<td></td><td></td>" - /* server status: reflect listener status */ - "<td class=ac>%s</td>" - /* rest of server: nothing */ - "<td class=ac colspan=8></td>" - "", - U2H(stats[ST_F_DREQ].u.u64), U2H(stats[ST_F_DRESP].u.u64), - U2H(stats[ST_F_EREQ].u.u64), - field_str(stats, ST_F_STATUS)); - - if (flags & STAT_SHMODULES) { - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - chunk_appendf(out, "<td>"); - - if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_LI) { - chunk_appendf(out, - "<u>%s<div class=tips><table class=det>", - mod->name); - for (j = 0; j < mod->stats_count; ++j) { - chunk_appendf(out, - "<tr><th>%s</th><td>%s</td></tr>", - mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i])); - ++i; - } - chunk_appendf(out, "</table></div></u>"); - } else { - i += mod->stats_count; - } - - chunk_appendf(out, "</td>"); - } - } - - chunk_appendf(out, "</tr>"); - } - else if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_SV) { - const char *style; - - /* determine the style to use depending on the server's state, - * its health and weight. There isn't a 1-to-1 mapping between - * state and styles for the cases where the server is (still) - * up. The reason is that we don't want to report nolb and - * drain with the same color. - */ - - if (strcmp(field_str(stats, ST_F_STATUS), "DOWN") == 0 || - strcmp(field_str(stats, ST_F_STATUS), "DOWN (agent)") == 0) { - style = "down"; - } - else if (strncmp(field_str(stats, ST_F_STATUS), "DOWN ", strlen("DOWN ")) == 0) { - style = "going_up"; - } - else if (strcmp(field_str(stats, ST_F_STATUS), "DRAIN") == 0) { - style = "draining"; - } - else if (strncmp(field_str(stats, ST_F_STATUS), "NOLB ", strlen("NOLB ")) == 0) { - style = "going_down"; - } - else if (strcmp(field_str(stats, ST_F_STATUS), "NOLB") == 0) { - style = "nolb"; - } - else if (strcmp(field_str(stats, ST_F_STATUS), "no check") == 0) { - style = "no_check"; - } - else if (!stats[ST_F_CHKFAIL].type || - stats[ST_F_CHECK_HEALTH].u.u32 == stats[ST_F_CHECK_RISE].u.u32 + stats[ST_F_CHECK_FALL].u.u32 - 1) { - /* no check or max health = UP */ - if (stats[ST_F_WEIGHT].u.u32) - style = "up"; - else - style = "draining"; - } - else { - style = "going_down"; - } - - if (strncmp(field_str(stats, ST_F_STATUS), "MAINT", 5) == 0) - chunk_appendf(out, "<tr class=\"maintain\">"); - else - chunk_appendf(out, - "<tr class=\"%s_%s\">", - (stats[ST_F_BCK].u.u32) ? "backup" : "active", style); - - - if (flags & STAT_ADMIN) - chunk_appendf(out, - "<td><input class='%s-checkbox' type=\"checkbox\" name=\"s\" value=\"%s\"></td>", - field_str(stats, ST_F_PXNAME), - field_str(stats, ST_F_SVNAME)); - - chunk_appendf(out, - "<td class=ac><a name=\"%s/%s\"></a>%s" - "<a class=lfsb href=\"#%s/%s\">%s</a>" - "", - field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME), - (flags & STAT_SHLGNDS) ? "<u>" : "", - field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_SVNAME), field_str(stats, ST_F_SVNAME)); - - if (flags & STAT_SHLGNDS) { - chunk_appendf(out, "<div class=tips>"); - - if (isdigit((unsigned char)*field_str(stats, ST_F_ADDR))) - chunk_appendf(out, "IPv4: %s, ", field_str(stats, ST_F_ADDR)); - else if (*field_str(stats, ST_F_ADDR) == '[') - chunk_appendf(out, "IPv6: %s, ", field_str(stats, ST_F_ADDR)); - else if (*field_str(stats, ST_F_ADDR)) - chunk_appendf(out, "%s, ", field_str(stats, ST_F_ADDR)); - - /* id */ - chunk_appendf(out, "id: %d, rid: %d", stats[ST_F_SID].u.u32, stats[ST_F_SRID].u.u32); - - /* cookie */ - if (stats[ST_F_COOKIE].type) { - chunk_appendf(out, ", cookie: '"); - chunk_initstr(&src, field_str(stats, ST_F_COOKIE)); - chunk_htmlencode(out, &src); - chunk_appendf(out, "'"); - } - - chunk_appendf(out, "</div>"); - } - - chunk_appendf(out, - /* queue : current, max, limit */ - "%s</td><td>%s</td><td>%s</td><td>%s</td>" - /* sessions rate : current, max, limit */ - "<td>%s</td><td>%s</td><td></td>" - "", - (flags & STAT_SHLGNDS) ? "</u>" : "", - U2H(stats[ST_F_QCUR].u.u32), U2H(stats[ST_F_QMAX].u.u32), LIM2A(stats[ST_F_QLIMIT].u.u32, "-"), - U2H(stats[ST_F_RATE].u.u32), U2H(stats[ST_F_RATE_MAX].u.u32)); - - chunk_appendf(out, - /* sessions: current, max, limit, total */ - "<td><u>%s<div class=tips>" - "<table class=det>" - "<tr><th>Current active connections:</th><td>%s</td></tr>" - "<tr><th>Current used connections:</th><td>%s</td></tr>" - "<tr><th>Current idle connections:</th><td>%s</td></tr>" - "<tr><th>- unsafe:</th><td>%s</td></tr>" - "<tr><th>- safe:</th><td>%s</td></tr>" - "<tr><th>Estimated need of connections:</th><td>%s</td></tr>" - "<tr><th>Active connections limit:</th><td>%s</td></tr>" - "<tr><th>Idle connections limit:</th><td>%s</td></tr>" - "</table></div></u>" - "</td><td>%s</td><td>%s</td>" - "<td><u>%s<div class=tips><table class=det>" - "<tr><th>Cum. sessions:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_SCUR].u.u32), - U2H(stats[ST_F_SCUR].u.u32), - U2H(stats[ST_F_USED_CONN_CUR].u.u32), - U2H(stats[ST_F_SRV_ICUR].u.u32), - U2H(stats[ST_F_IDLE_CONN_CUR].u.u32), - U2H(stats[ST_F_SAFE_CONN_CUR].u.u32), - U2H(stats[ST_F_NEED_CONN_EST].u.u32), - - LIM2A(stats[ST_F_SLIM].u.u32, "-"), - stats[ST_F_SRV_ILIM].type ? U2H(stats[ST_F_SRV_ILIM].u.u32) : "-", - U2H(stats[ST_F_SMAX].u.u32), LIM2A(stats[ST_F_SLIM].u.u32, "-"), - U2H(stats[ST_F_STOT].u.u64), - U2H(stats[ST_F_STOT].u.u64)); - - /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */ - if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) { - chunk_appendf(out, - "<tr><th>New connections:</th><td>%s</td></tr>" - "<tr><th>Reused connections:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>" - "<tr><th>- HTTP 1xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>- HTTP 2xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>- HTTP 3xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>- HTTP 4xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>- HTTP 5xx responses:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>- other responses:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>" - "<tr><th>Internal error:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_CONNECT].u.u64), - U2H(stats[ST_F_REUSE].u.u64), - (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64) ? - (int)(100 * stats[ST_F_REUSE].u.u64 / (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64)) : 0, - U2H(stats[ST_F_REQ_TOT].u.u64), - U2H(stats[ST_F_HRSP_1XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ? - (int)(100 * stats[ST_F_HRSP_1XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0, - U2H(stats[ST_F_HRSP_2XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ? - (int)(100 * stats[ST_F_HRSP_2XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0, - U2H(stats[ST_F_HRSP_3XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ? - (int)(100 * stats[ST_F_HRSP_3XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0, - U2H(stats[ST_F_HRSP_4XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ? - (int)(100 * stats[ST_F_HRSP_4XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0, - U2H(stats[ST_F_HRSP_5XX].u.u64), stats[ST_F_REQ_TOT].u.u64 ? - (int)(100 * stats[ST_F_HRSP_5XX].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0, - U2H(stats[ST_F_HRSP_OTHER].u.u64), stats[ST_F_REQ_TOT].u.u64 ? - (int)(100 * stats[ST_F_HRSP_OTHER].u.u64 / stats[ST_F_REQ_TOT].u.u64) : 0, - U2H(stats[ST_F_WREW].u.u64), - U2H(stats[ST_F_EINT].u.u64)); - } - - chunk_appendf(out, "<tr><th colspan=3>Max / Avg over last 1024 success. conn.</th></tr>"); - chunk_appendf(out, "<tr><th>- Queue time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_QT_MAX].u.u32), U2H(stats[ST_F_QTIME].u.u32)); - chunk_appendf(out, "<tr><th>- Connect time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_CT_MAX].u.u32), U2H(stats[ST_F_CTIME].u.u32)); - if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) - chunk_appendf(out, "<tr><th>- Responses time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_RT_MAX].u.u32), U2H(stats[ST_F_RTIME].u.u32)); - chunk_appendf(out, "<tr><th>- Total time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_TT_MAX].u.u32), U2H(stats[ST_F_TTIME].u.u32)); - - chunk_appendf(out, - "</table></div></u></td>" - /* sessions: lbtot, last */ - "<td>%s</td><td>%s</td>", - U2H(stats[ST_F_LBTOT].u.u64), - human_time(stats[ST_F_LASTSESS].u.s32, 1)); - - chunk_appendf(out, - /* bytes : in, out */ - "<td>%s</td><td>%s</td>" - /* denied: req, resp */ - "<td></td><td>%s</td>" - /* errors : request, connect */ - "<td></td><td>%s</td>" - /* errors : response */ - "<td><u>%s<div class=tips>Connection resets during transfers: %lld client, %lld server</div></u></td>" - /* warnings: retries, redispatches */ - "<td>%lld</td><td>%lld</td>" - "", - U2H(stats[ST_F_BIN].u.u64), U2H(stats[ST_F_BOUT].u.u64), - U2H(stats[ST_F_DRESP].u.u64), - U2H(stats[ST_F_ECON].u.u64), - U2H(stats[ST_F_ERESP].u.u64), - (long long)stats[ST_F_CLI_ABRT].u.u64, - (long long)stats[ST_F_SRV_ABRT].u.u64, - (long long)stats[ST_F_WRETR].u.u64, - (long long)stats[ST_F_WREDIS].u.u64); - - /* status, last change */ - chunk_appendf(out, "<td class=ac>"); - - /* FIXME!!!! - * LASTCHG should contain the last change for *this* server and must be computed - * properly above, as was done below, ie: this server if maint, otherwise ref server - * if tracking. Note that ref is either local or remote depending on tracking. - */ - - - if (strncmp(field_str(stats, ST_F_STATUS), "MAINT", 5) == 0) { - chunk_appendf(out, "%s MAINT", human_time(stats[ST_F_LASTCHG].u.u32, 1)); - } - else if (strcmp(field_str(stats, ST_F_STATUS), "no check") == 0) { - chunk_strcat(out, "<i>no check</i>"); - } - else { - chunk_appendf(out, "%s %s", human_time(stats[ST_F_LASTCHG].u.u32, 1), field_str(stats, ST_F_STATUS)); - if (strncmp(field_str(stats, ST_F_STATUS), "DOWN", 4) == 0) { - if (stats[ST_F_CHECK_HEALTH].u.u32) - chunk_strcat(out, " ↑"); - } - else if (stats[ST_F_CHECK_HEALTH].u.u32 < stats[ST_F_CHECK_RISE].u.u32 + stats[ST_F_CHECK_FALL].u.u32 - 1) - chunk_strcat(out, " ↓"); - } - if (strncmp(field_str(stats, ST_F_STATUS), "DOWN", 4) == 0 && - stats[ST_F_AGENT_STATUS].type && !stats[ST_F_AGENT_HEALTH].u.u32) { - chunk_appendf(out, - "</td><td class=ac><u> %s", - field_str(stats, ST_F_AGENT_STATUS)); - - if (stats[ST_F_AGENT_CODE].type) - chunk_appendf(out, "/%d", stats[ST_F_AGENT_CODE].u.u32); - - if (stats[ST_F_AGENT_DURATION].type) - chunk_appendf(out, " in %lums", (long)stats[ST_F_AGENT_DURATION].u.u64); - - chunk_appendf(out, "<div class=tips>%s", field_str(stats, ST_F_AGENT_DESC)); - - if (*field_str(stats, ST_F_LAST_AGT)) { - chunk_appendf(out, ": "); - chunk_initstr(&src, field_str(stats, ST_F_LAST_AGT)); - chunk_htmlencode(out, &src); - } - chunk_appendf(out, "</div></u>"); - } - else if (stats[ST_F_CHECK_STATUS].type) { - chunk_appendf(out, - "</td><td class=ac><u> %s", - field_str(stats, ST_F_CHECK_STATUS)); - - if (stats[ST_F_CHECK_CODE].type) - chunk_appendf(out, "/%d", stats[ST_F_CHECK_CODE].u.u32); - - if (stats[ST_F_CHECK_DURATION].type) - chunk_appendf(out, " in %lums", (long)stats[ST_F_CHECK_DURATION].u.u64); - - chunk_appendf(out, "<div class=tips>%s", field_str(stats, ST_F_CHECK_DESC)); - - if (*field_str(stats, ST_F_LAST_CHK)) { - chunk_appendf(out, ": "); - chunk_initstr(&src, field_str(stats, ST_F_LAST_CHK)); - chunk_htmlencode(out, &src); - } - chunk_appendf(out, "</div></u>"); - } - else - chunk_appendf(out, "</td><td>"); - - chunk_appendf(out, - /* weight / uweight */ - "</td><td class=ac>%d/%d</td>" - /* act, bck */ - "<td class=ac>%s</td><td class=ac>%s</td>" - "", - stats[ST_F_WEIGHT].u.u32, stats[ST_F_UWEIGHT].u.u32, - stats[ST_F_BCK].u.u32 ? "-" : "Y", - stats[ST_F_BCK].u.u32 ? "Y" : "-"); - - /* check failures: unique, fatal, down time */ - if (strcmp(field_str(stats, ST_F_STATUS), "MAINT (resolution)") == 0) { - chunk_appendf(out, "<td class=ac colspan=3>resolution</td>"); - } - else if (stats[ST_F_CHKFAIL].type) { - chunk_appendf(out, "<td><u>%lld", (long long)stats[ST_F_CHKFAIL].u.u64); - - if (stats[ST_F_HANAFAIL].type) - chunk_appendf(out, "/%lld", (long long)stats[ST_F_HANAFAIL].u.u64); - - chunk_appendf(out, - "<div class=tips>Failed Health Checks%s</div></u></td>" - "<td>%lld</td><td>%s</td>" - "", - stats[ST_F_HANAFAIL].type ? "/Health Analyses" : "", - (long long)stats[ST_F_CHKDOWN].u.u64, human_time(stats[ST_F_DOWNTIME].u.u32, 1)); - } - else if (strcmp(field_str(stats, ST_F_STATUS), "MAINT") != 0 && field_format(stats, ST_F_TRACKED) == FF_STR) { - /* tracking a server (hence inherited maint would appear as "MAINT (via...)" */ - chunk_appendf(out, - "<td class=ac colspan=3><a class=lfsb href=\"#%s\">via %s</a></td>", - field_str(stats, ST_F_TRACKED), field_str(stats, ST_F_TRACKED)); - } - else - chunk_appendf(out, "<td colspan=3></td>"); - - /* throttle */ - if (stats[ST_F_THROTTLE].type) - chunk_appendf(out, "<td class=ac>%d %%</td>\n", stats[ST_F_THROTTLE].u.u32); - else - chunk_appendf(out, "<td class=ac>-</td>"); - - if (flags & STAT_SHMODULES) { - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - chunk_appendf(out, "<td>"); - - if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_SRV) { - chunk_appendf(out, - "<u>%s<div class=tips><table class=det>", - mod->name); - for (j = 0; j < mod->stats_count; ++j) { - chunk_appendf(out, - "<tr><th>%s</th><td>%s</td></tr>", - mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i])); - ++i; - } - chunk_appendf(out, "</table></div></u>"); - } else { - i += mod->stats_count; - } - - chunk_appendf(out, "</td>"); - } - } - - chunk_appendf(out, "</tr>\n"); - } - else if (stats[ST_F_TYPE].u.u32 == STATS_TYPE_BE) { - chunk_appendf(out, "<tr class=\"backend\">"); - if (flags & STAT_ADMIN) { - /* Column sub-heading for Enable or Disable server */ - chunk_appendf(out, "<td></td>"); - } - chunk_appendf(out, - "<td class=ac>" - /* name */ - "%s<a name=\"%s/Backend\"></a>" - "<a class=lfsb href=\"#%s/Backend\">Backend</a>" - "", - (flags & STAT_SHLGNDS)?"<u>":"", - field_str(stats, ST_F_PXNAME), field_str(stats, ST_F_PXNAME)); - - if (flags & STAT_SHLGNDS) { - /* balancing */ - chunk_appendf(out, "<div class=tips>balancing: %s", - field_str(stats, ST_F_ALGO)); - - /* cookie */ - if (stats[ST_F_COOKIE].type) { - chunk_appendf(out, ", cookie: '"); - chunk_initstr(&src, field_str(stats, ST_F_COOKIE)); - chunk_htmlencode(out, &src); - chunk_appendf(out, "'"); - } - chunk_appendf(out, "</div>"); - } - - chunk_appendf(out, - "%s</td>" - /* queue : current, max */ - "<td>%s</td><td>%s</td><td></td>" - /* sessions rate : current, max, limit */ - "<td>%s</td><td>%s</td><td></td>" - "", - (flags & STAT_SHLGNDS)?"</u>":"", - U2H(stats[ST_F_QCUR].u.u32), U2H(stats[ST_F_QMAX].u.u32), - U2H(stats[ST_F_RATE].u.u32), U2H(stats[ST_F_RATE_MAX].u.u32)); - - chunk_appendf(out, - /* sessions: current, max, limit, total */ - "<td>%s</td><td>%s</td><td>%s</td>" - "<td><u>%s<div class=tips><table class=det>" - "<tr><th>Cum. sessions:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_SCUR].u.u32), U2H(stats[ST_F_SMAX].u.u32), U2H(stats[ST_F_SLIM].u.u32), - U2H(stats[ST_F_STOT].u.u64), - U2H(stats[ST_F_STOT].u.u64)); - - /* http response (via hover): 1xx, 2xx, 3xx, 4xx, 5xx, other */ - if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) { - chunk_appendf(out, - "<tr><th>New connections:</th><td>%s</td></tr>" - "<tr><th>Reused connections:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>Cum. HTTP requests:</th><td>%s</td></tr>" - "<tr><th>- HTTP 1xx responses:</th><td>%s</td></tr>" - "<tr><th>- HTTP 2xx responses:</th><td>%s</td></tr>" - "<tr><th> Compressed 2xx:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>- HTTP 3xx responses:</th><td>%s</td></tr>" - "<tr><th>- HTTP 4xx responses:</th><td>%s</td></tr>" - "<tr><th>- HTTP 5xx responses:</th><td>%s</td></tr>" - "<tr><th>- other responses:</th><td>%s</td></tr>" - "<tr><th>Cache lookups:</th><td>%s</td></tr>" - "<tr><th>Cache hits:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>Failed hdr rewrites:</th><td>%s</td></tr>" - "<tr><th>Internal errors:</th><td>%s</td></tr>" - "", - U2H(stats[ST_F_CONNECT].u.u64), - U2H(stats[ST_F_REUSE].u.u64), - (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64) ? - (int)(100 * stats[ST_F_REUSE].u.u64 / (stats[ST_F_CONNECT].u.u64 + stats[ST_F_REUSE].u.u64)) : 0, - U2H(stats[ST_F_REQ_TOT].u.u64), - U2H(stats[ST_F_HRSP_1XX].u.u64), - U2H(stats[ST_F_HRSP_2XX].u.u64), - U2H(stats[ST_F_COMP_RSP].u.u64), - stats[ST_F_HRSP_2XX].u.u64 ? - (int)(100 * stats[ST_F_COMP_RSP].u.u64 / stats[ST_F_HRSP_2XX].u.u64) : 0, - U2H(stats[ST_F_HRSP_3XX].u.u64), - U2H(stats[ST_F_HRSP_4XX].u.u64), - U2H(stats[ST_F_HRSP_5XX].u.u64), - U2H(stats[ST_F_HRSP_OTHER].u.u64), - U2H(stats[ST_F_CACHE_LOOKUPS].u.u64), - U2H(stats[ST_F_CACHE_HITS].u.u64), - stats[ST_F_CACHE_LOOKUPS].u.u64 ? - (int)(100 * stats[ST_F_CACHE_HITS].u.u64 / stats[ST_F_CACHE_LOOKUPS].u.u64) : 0, - U2H(stats[ST_F_WREW].u.u64), - U2H(stats[ST_F_EINT].u.u64)); - } - - chunk_appendf(out, "<tr><th colspan=3>Max / Avg over last 1024 success. conn.</th></tr>"); - chunk_appendf(out, "<tr><th>- Queue time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_QT_MAX].u.u32), U2H(stats[ST_F_QTIME].u.u32)); - chunk_appendf(out, "<tr><th>- Connect time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_CT_MAX].u.u32), U2H(stats[ST_F_CTIME].u.u32)); - if (strcmp(field_str(stats, ST_F_MODE), "http") == 0) - chunk_appendf(out, "<tr><th>- Responses time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_RT_MAX].u.u32), U2H(stats[ST_F_RTIME].u.u32)); - chunk_appendf(out, "<tr><th>- Total time:</th><td>%s / %s</td><td>ms</td></tr>", - U2H(stats[ST_F_TT_MAX].u.u32), U2H(stats[ST_F_TTIME].u.u32)); - - chunk_appendf(out, - "</table></div></u></td>" - /* sessions: lbtot, last */ - "<td>%s</td><td>%s</td>" - /* bytes: in */ - "<td>%s</td>" - "", - U2H(stats[ST_F_LBTOT].u.u64), - human_time(stats[ST_F_LASTSESS].u.s32, 1), - U2H(stats[ST_F_BIN].u.u64)); - - chunk_appendf(out, - /* bytes:out + compression stats (via hover): comp_in, comp_out, comp_byp */ - "<td>%s%s<div class=tips><table class=det>" - "<tr><th>Response bytes in:</th><td>%s</td></tr>" - "<tr><th>Compression in:</th><td>%s</td></tr>" - "<tr><th>Compression out:</th><td>%s</td><td>(%d%%)</td></tr>" - "<tr><th>Compression bypass:</th><td>%s</td></tr>" - "<tr><th>Total bytes saved:</th><td>%s</td><td>(%d%%)</td></tr>" - "</table></div>%s</td>", - (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "<u>":"", - U2H(stats[ST_F_BOUT].u.u64), - U2H(stats[ST_F_BOUT].u.u64), - U2H(stats[ST_F_COMP_IN].u.u64), - U2H(stats[ST_F_COMP_OUT].u.u64), - stats[ST_F_COMP_IN].u.u64 ? (int)(stats[ST_F_COMP_OUT].u.u64 * 100 / stats[ST_F_COMP_IN].u.u64) : 0, - U2H(stats[ST_F_COMP_BYP].u.u64), - U2H(stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64), - stats[ST_F_BOUT].u.u64 ? (int)((stats[ST_F_COMP_IN].u.u64 - stats[ST_F_COMP_OUT].u.u64) * 100 / stats[ST_F_BOUT].u.u64) : 0, - (stats[ST_F_COMP_IN].u.u64 || stats[ST_F_COMP_BYP].u.u64) ? "</u>":""); - - chunk_appendf(out, - /* denied: req, resp */ - "<td>%s</td><td>%s</td>" - /* errors : request, connect */ - "<td></td><td>%s</td>" - /* errors : response */ - "<td><u>%s<div class=tips>Connection resets during transfers: %lld client, %lld server</div></u></td>" - /* warnings: retries, redispatches */ - "<td>%lld</td><td>%lld</td>" - /* backend status: reflect backend status (up/down): we display UP - * if the backend has known working servers or if it has no server at - * all (eg: for stats). Then we display the total weight, number of - * active and backups. */ - "<td class=ac>%s %s</td><td class=ac> </td><td class=ac>%d/%d</td>" - "<td class=ac>%d</td><td class=ac>%d</td>" - "", - U2H(stats[ST_F_DREQ].u.u64), U2H(stats[ST_F_DRESP].u.u64), - U2H(stats[ST_F_ECON].u.u64), - U2H(stats[ST_F_ERESP].u.u64), - (long long)stats[ST_F_CLI_ABRT].u.u64, - (long long)stats[ST_F_SRV_ABRT].u.u64, - (long long)stats[ST_F_WRETR].u.u64, (long long)stats[ST_F_WREDIS].u.u64, - human_time(stats[ST_F_LASTCHG].u.u32, 1), - strcmp(field_str(stats, ST_F_STATUS), "DOWN") ? field_str(stats, ST_F_STATUS) : "<font color=\"red\"><b>DOWN</b></font>", - stats[ST_F_WEIGHT].u.u32, stats[ST_F_UWEIGHT].u.u32, - stats[ST_F_ACT].u.u32, stats[ST_F_BCK].u.u32); - - chunk_appendf(out, - /* rest of backend: nothing, down transitions, total downtime, throttle */ - "<td class=ac> </td><td>%d</td>" - "<td>%s</td>" - "<td></td>", - stats[ST_F_CHKDOWN].u.u32, - stats[ST_F_DOWNTIME].type ? human_time(stats[ST_F_DOWNTIME].u.u32, 1) : " "); - - if (flags & STAT_SHMODULES) { - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - chunk_appendf(out, "<td>"); - - if (stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_BE) { - chunk_appendf(out, - "<u>%s<div class=tips><table class=det>", - mod->name); - for (j = 0; j < mod->stats_count; ++j) { - chunk_appendf(out, - "<tr><th>%s</th><td>%s</td></tr>", - mod->stats[j].desc, field_to_html_str(&stats[ST_F_TOTAL_FIELDS + i])); - ++i; - } - chunk_appendf(out, "</table></div></u>"); - } else { - i += mod->stats_count; - } - - chunk_appendf(out, "</td>"); - } - } - - chunk_appendf(out, "</tr>"); - } - - return 1; -} - -int stats_dump_one_line(const struct field *stats, size_t stats_count, +int stats_dump_one_line(const struct field *line, size_t stats_count, struct appctx *appctx) { struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; int ret; - if (ctx->flags & STAT_FMT_HTML) - ret = stats_dump_fields_html(&trash_chunk, stats, ctx); - else if (ctx->flags & STAT_FMT_TYPED) - ret = stats_dump_fields_typed(&trash_chunk, stats, stats_count, ctx); - else if (ctx->flags & STAT_FMT_JSON) - ret = stats_dump_fields_json(&trash_chunk, stats, stats_count, ctx); + if (ctx->flags & STAT_F_FMT_HTML) + ret = stats_dump_fields_html(chk, line, ctx); + else if (ctx->flags & STAT_F_FMT_TYPED) + ret = stats_dump_fields_typed(chk, line, stats_count, ctx); + else if (ctx->flags & STAT_F_FMT_JSON) + ret = stats_dump_fields_json(chk, line, stats_count, ctx); + else if (ctx->flags & STAT_F_FMT_FILE) + ret = stats_dump_fields_file(chk, line, stats_count, ctx); else - ret = stats_dump_fields_csv(&trash_chunk, stats, stats_count, ctx); + ret = stats_dump_fields_csv(chk, line, stats_count, ctx); return ret; } -/* Fill <stats> with the frontend statistics. <stats> is preallocated array of - * length <len>. If <selected_field> is != NULL, only fill this one. The length - * of the array must be at least ST_F_TOTAL_FIELDS. If this length is less than - * this value, or if the selected field is not implemented for frontends, the - * function returns 0, otherwise, it returns 1. - */ -int stats_fill_fe_stats(struct proxy *px, struct field *stats, int len, - enum stat_field *selected_field) -{ - enum stat_field current_field = (selected_field != NULL ? *selected_field : 0); - - if (len < ST_F_TOTAL_FIELDS) - return 0; - - for (; current_field < ST_F_TOTAL_FIELDS; current_field++) { - struct field metric = { 0 }; - - switch (current_field) { - case ST_F_PXNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); - break; - case ST_F_SVNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, "FRONTEND"); - break; - case ST_F_MODE: - metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); - break; - case ST_F_SCUR: - metric = mkf_u32(0, px->feconn); - break; - case ST_F_SMAX: - metric = mkf_u32(FN_MAX, px->fe_counters.conn_max); - break; - case ST_F_SLIM: - metric = mkf_u32(FO_CONFIG|FN_LIMIT, px->maxconn); - break; - case ST_F_STOT: - metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_sess); - break; - case ST_F_BIN: - metric = mkf_u64(FN_COUNTER, px->fe_counters.bytes_in); - break; - case ST_F_BOUT: - metric = mkf_u64(FN_COUNTER, px->fe_counters.bytes_out); - break; - case ST_F_DREQ: - metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_req); - break; - case ST_F_DRESP: - metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_resp); - break; - case ST_F_EREQ: - metric = mkf_u64(FN_COUNTER, px->fe_counters.failed_req); - break; - case ST_F_DCON: - metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_conn); - break; - case ST_F_DSES: - metric = mkf_u64(FN_COUNTER, px->fe_counters.denied_sess); - break; - case ST_F_STATUS: { - const char *state; - - if (px->flags & (PR_FL_DISABLED|PR_FL_STOPPED)) - state = "STOP"; - else if (px->flags & PR_FL_PAUSED) - state = "PAUSED"; - else - state = "OPEN"; - metric = mkf_str(FO_STATUS, state); - break; - } - case ST_F_PID: - metric = mkf_u32(FO_KEY, 1); - break; - case ST_F_IID: - metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); - break; - case ST_F_SID: - metric = mkf_u32(FO_KEY|FS_SERVICE, 0); - break; - case ST_F_TYPE: - metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_FE); - break; - case ST_F_RATE: - metric = mkf_u32(FN_RATE, read_freq_ctr(&px->fe_sess_per_sec)); - break; - case ST_F_RATE_LIM: - metric = mkf_u32(FO_CONFIG|FN_LIMIT, px->fe_sps_lim); - break; - case ST_F_RATE_MAX: - metric = mkf_u32(FN_MAX, px->fe_counters.sps_max); - break; - case ST_F_WREW: - metric = mkf_u64(FN_COUNTER, px->fe_counters.failed_rewrites); - break; - case ST_F_EINT: - metric = mkf_u64(FN_COUNTER, px->fe_counters.internal_errors); - break; - case ST_F_HRSP_1XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[1]); - break; - case ST_F_HRSP_2XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[2]); - break; - case ST_F_HRSP_3XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[3]); - break; - case ST_F_HRSP_4XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[4]); - break; - case ST_F_HRSP_5XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[5]); - break; - case ST_F_HRSP_OTHER: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.rsp[0]); - break; - case ST_F_INTERCEPTED: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.intercepted_req); - break; - case ST_F_CACHE_LOOKUPS: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cache_lookups); - break; - case ST_F_CACHE_HITS: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cache_hits); - break; - case ST_F_REQ_RATE: - metric = mkf_u32(FN_RATE, read_freq_ctr(&px->fe_req_per_sec)); - break; - case ST_F_REQ_RATE_MAX: - metric = mkf_u32(FN_MAX, px->fe_counters.p.http.rps_max); - break; - case ST_F_REQ_TOT: { - int i; - uint64_t total_req; - size_t nb_reqs = - sizeof(px->fe_counters.p.http.cum_req) / sizeof(*px->fe_counters.p.http.cum_req); - - total_req = 0; - for (i = 0; i < nb_reqs; i++) - total_req += px->fe_counters.p.http.cum_req[i]; - metric = mkf_u64(FN_COUNTER, total_req); - break; - } - case ST_F_COMP_IN: - metric = mkf_u64(FN_COUNTER, px->fe_counters.comp_in[COMP_DIR_RES]); - break; - case ST_F_COMP_OUT: - metric = mkf_u64(FN_COUNTER, px->fe_counters.comp_out[COMP_DIR_RES]); - break; - case ST_F_COMP_BYP: - metric = mkf_u64(FN_COUNTER, px->fe_counters.comp_byp[COMP_DIR_RES]); - break; - case ST_F_COMP_RSP: - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.comp_rsp); - break; - case ST_F_CONN_RATE: - metric = mkf_u32(FN_RATE, read_freq_ctr(&px->fe_conn_per_sec)); - break; - case ST_F_CONN_RATE_MAX: - metric = mkf_u32(FN_MAX, px->fe_counters.cps_max); - break; - case ST_F_CONN_TOT: - metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_conn); - break; - case ST_F_SESS_OTHER: { - int i; - uint64_t total_sess; - size_t nb_sess = - sizeof(px->fe_counters.cum_sess_ver) / sizeof(*px->fe_counters.cum_sess_ver); - - total_sess = px->fe_counters.cum_sess; - for (i = 0; i < nb_sess; i++) - total_sess -= px->fe_counters.cum_sess_ver[i]; - total_sess = (int64_t)total_sess < 0 ? 0 : total_sess; - metric = mkf_u64(FN_COUNTER, total_sess); - break; - } - case ST_F_H1SESS: - metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_sess_ver[0]); - break; - case ST_F_H2SESS: - metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_sess_ver[1]); - break; - case ST_F_H3SESS: - metric = mkf_u64(FN_COUNTER, px->fe_counters.cum_sess_ver[2]); - break; - case ST_F_REQ_OTHER: - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cum_req[0]); - break; - case ST_F_H1REQ: - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cum_req[1]); - break; - case ST_F_H2REQ: - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cum_req[2]); - break; - case ST_F_H3REQ: - metric = mkf_u64(FN_COUNTER, px->fe_counters.p.http.cum_req[3]); - break; - default: - /* not used for frontends. If a specific metric - * is requested, return an error. Otherwise continue. - */ - if (selected_field != NULL) - return 0; - continue; - } - stats[current_field] = metric; - if (selected_field != NULL) - break; - } - return 1; -} - -/* Dumps a frontend's line to the local trash buffer for the current proxy <px> - * and uses the state from stream connector <sc>. The caller is responsible for - * clearing the local trash buffer if needed. Returns non-zero if it emits - * anything, zero otherwise. - */ -static int stats_dump_fe_stats(struct stconn *sc, struct proxy *px) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct field *stats = stat_l[STATS_DOMAIN_PROXY]; - struct stats_module *mod; - size_t stats_count = ST_F_TOTAL_FIELDS; - - if (!(px->cap & PR_CAP_FE)) - return 0; - - if ((ctx->flags & STAT_BOUND) && !(ctx->type & (1 << STATS_TYPE_FE))) - return 0; - - memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]); - - if (!stats_fill_fe_stats(px, stats, ST_F_TOTAL_FIELDS, NULL)) - return 0; - - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - void *counters; - - if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_FE)) { - stats_count += mod->stats_count; - continue; - } - - counters = EXTRA_COUNTERS_GET(px->extra_counters_fe, mod); - mod->fill_stats(counters, stats + stats_count); - stats_count += mod->stats_count; - } - - return stats_dump_one_line(stats, stats_count, appctx); -} - -/* Fill <stats> with the listener statistics. <stats> is preallocated array of - * length <len>. The length of the array must be at least ST_F_TOTAL_FIELDS. If - * this length is less then this value, the function returns 0, otherwise, it - * returns 1. If selected_field is != NULL, only fill this one. <flags> can - * take the value STAT_SHLGNDS. - */ -int stats_fill_li_stats(struct proxy *px, struct listener *l, int flags, - struct field *stats, int len, enum stat_field *selected_field) -{ - enum stat_field current_field = (selected_field != NULL ? *selected_field : 0); - struct buffer *out = get_trash_chunk(); - - if (len < ST_F_TOTAL_FIELDS) - return 0; - - if (!l->counters) - return 0; - - chunk_reset(out); - - for (; current_field < ST_F_TOTAL_FIELDS; current_field++) { - struct field metric = { 0 }; - - switch (current_field) { - case ST_F_PXNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); - break; - case ST_F_SVNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, l->name); - break; - case ST_F_MODE: - metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); - break; - case ST_F_SCUR: - metric = mkf_u32(0, l->nbconn); - break; - case ST_F_SMAX: - metric = mkf_u32(FN_MAX, l->counters->conn_max); - break; - case ST_F_SLIM: - metric = mkf_u32(FO_CONFIG|FN_LIMIT, l->bind_conf->maxconn); - break; - case ST_F_STOT: - metric = mkf_u64(FN_COUNTER, l->counters->cum_conn); - break; - case ST_F_BIN: - metric = mkf_u64(FN_COUNTER, l->counters->bytes_in); - break; - case ST_F_BOUT: - metric = mkf_u64(FN_COUNTER, l->counters->bytes_out); - break; - case ST_F_DREQ: - metric = mkf_u64(FN_COUNTER, l->counters->denied_req); - break; - case ST_F_DRESP: - metric = mkf_u64(FN_COUNTER, l->counters->denied_resp); - break; - case ST_F_EREQ: - metric = mkf_u64(FN_COUNTER, l->counters->failed_req); - break; - case ST_F_DCON: - metric = mkf_u64(FN_COUNTER, l->counters->denied_conn); - break; - case ST_F_DSES: - metric = mkf_u64(FN_COUNTER, l->counters->denied_sess); - break; - case ST_F_STATUS: - metric = mkf_str(FO_STATUS, li_status_st[get_li_status(l)]); - break; - case ST_F_PID: - metric = mkf_u32(FO_KEY, 1); - break; - case ST_F_IID: - metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); - break; - case ST_F_SID: - metric = mkf_u32(FO_KEY|FS_SERVICE, l->luid); - break; - case ST_F_TYPE: - metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_SO); - break; - case ST_F_WREW: - metric = mkf_u64(FN_COUNTER, l->counters->failed_rewrites); - break; - case ST_F_EINT: - metric = mkf_u64(FN_COUNTER, l->counters->internal_errors); - break; - case ST_F_ADDR: - if (flags & STAT_SHLGNDS) { - char str[INET6_ADDRSTRLEN]; - int port; - - port = get_host_port(&l->rx.addr); - switch (addr_to_str(&l->rx.addr, str, sizeof(str))) { - case AF_INET: - metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); - chunk_appendf(out, "%s:%d", str, port); - break; - case AF_INET6: - metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); - chunk_appendf(out, "[%s]:%d", str, port); - break; - case AF_UNIX: - metric = mkf_str(FO_CONFIG|FS_SERVICE, "unix"); - break; - case -1: - metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); - chunk_strcat(out, strerror(errno)); - break; - default: /* address family not supported */ - break; - } - } - break; - case ST_F_PROTO: - metric = mkf_str(FO_STATUS, l->rx.proto->name); - break; - default: - /* not used for listen. If a specific metric - * is requested, return an error. Otherwise continue. - */ - if (selected_field != NULL) - return 0; - continue; - } - stats[current_field] = metric; - if (selected_field != NULL) - break; - } - return 1; -} - -/* Dumps a line for listener <l> and proxy <px> to the local trash buffer and - * uses the state from stream connector <sc>. The caller is responsible for - * clearing the local trash buffer if needed. Returns non-zero if it emits - * anything, zero otherwise. - */ -static int stats_dump_li_stats(struct stconn *sc, struct proxy *px, struct listener *l) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct field *stats = stat_l[STATS_DOMAIN_PROXY]; - struct stats_module *mod; - size_t stats_count = ST_F_TOTAL_FIELDS; - - memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]); - - if (!stats_fill_li_stats(px, l, ctx->flags, stats, - ST_F_TOTAL_FIELDS, NULL)) - return 0; - - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - void *counters; - - if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_LI)) { - stats_count += mod->stats_count; - continue; - } - - counters = EXTRA_COUNTERS_GET(l->extra_counters, mod); - mod->fill_stats(counters, stats + stats_count); - stats_count += mod->stats_count; - } - - return stats_dump_one_line(stats, stats_count, appctx); -} - -enum srv_stats_state { - SRV_STATS_STATE_DOWN = 0, - SRV_STATS_STATE_DOWN_AGENT, - SRV_STATS_STATE_GOING_UP, - SRV_STATS_STATE_UP_GOING_DOWN, - SRV_STATS_STATE_UP, - SRV_STATS_STATE_NOLB_GOING_DOWN, - SRV_STATS_STATE_NOLB, - SRV_STATS_STATE_DRAIN_GOING_DOWN, - SRV_STATS_STATE_DRAIN, - SRV_STATS_STATE_DRAIN_AGENT, - SRV_STATS_STATE_NO_CHECK, - - SRV_STATS_STATE_COUNT, /* Must be last */ -}; - -static const char *srv_hlt_st[SRV_STATS_STATE_COUNT] = { - [SRV_STATS_STATE_DOWN] = "DOWN", - [SRV_STATS_STATE_DOWN_AGENT] = "DOWN (agent)", - [SRV_STATS_STATE_GOING_UP] = "DOWN %d/%d", - [SRV_STATS_STATE_UP_GOING_DOWN] = "UP %d/%d", - [SRV_STATS_STATE_UP] = "UP", - [SRV_STATS_STATE_NOLB_GOING_DOWN] = "NOLB %d/%d", - [SRV_STATS_STATE_NOLB] = "NOLB", - [SRV_STATS_STATE_DRAIN_GOING_DOWN] = "DRAIN %d/%d", - [SRV_STATS_STATE_DRAIN] = "DRAIN", - [SRV_STATS_STATE_DRAIN_AGENT] = "DRAIN (agent)", - [SRV_STATS_STATE_NO_CHECK] = "no check" -}; - -/* Compute server state helper - */ -static void stats_fill_sv_stats_computestate(struct server *sv, struct server *ref, - enum srv_stats_state *state) -{ - if (sv->cur_state == SRV_ST_RUNNING || sv->cur_state == SRV_ST_STARTING) { - if ((ref->check.state & CHK_ST_ENABLED) && - (ref->check.health < ref->check.rise + ref->check.fall - 1)) { - *state = SRV_STATS_STATE_UP_GOING_DOWN; - } else { - *state = SRV_STATS_STATE_UP; - } - - if (sv->cur_admin & SRV_ADMF_DRAIN) { - if (ref->agent.state & CHK_ST_ENABLED) - *state = SRV_STATS_STATE_DRAIN_AGENT; - else if (*state == SRV_STATS_STATE_UP_GOING_DOWN) - *state = SRV_STATS_STATE_DRAIN_GOING_DOWN; - else - *state = SRV_STATS_STATE_DRAIN; - } - - if (*state == SRV_STATS_STATE_UP && !(ref->check.state & CHK_ST_ENABLED)) { - *state = SRV_STATS_STATE_NO_CHECK; - } - } - else if (sv->cur_state == SRV_ST_STOPPING) { - if ((!(sv->check.state & CHK_ST_ENABLED) && !sv->track) || - (ref->check.health == ref->check.rise + ref->check.fall - 1)) { - *state = SRV_STATS_STATE_NOLB; - } else { - *state = SRV_STATS_STATE_NOLB_GOING_DOWN; - } - } - else { /* stopped */ - if ((ref->agent.state & CHK_ST_ENABLED) && !ref->agent.health) { - *state = SRV_STATS_STATE_DOWN_AGENT; - } else if ((ref->check.state & CHK_ST_ENABLED) && !ref->check.health) { - *state = SRV_STATS_STATE_DOWN; /* DOWN */ - } else if ((ref->agent.state & CHK_ST_ENABLED) || (ref->check.state & CHK_ST_ENABLED)) { - *state = SRV_STATS_STATE_GOING_UP; - } else { - *state = SRV_STATS_STATE_DOWN; /* DOWN, unchecked */ - } - } -} - -/* Fill <stats> with the backend statistics. <stats> is preallocated array of - * length <len>. If <selected_field> is != NULL, only fill this one. The length - * of the array must be at least ST_F_TOTAL_FIELDS. If this length is less than - * this value, or if the selected field is not implemented for servers, the - * function returns 0, otherwise, it returns 1. <flags> can take the value - * STAT_SHLGNDS. - */ -int stats_fill_sv_stats(struct proxy *px, struct server *sv, int flags, - struct field *stats, int len, - enum stat_field *selected_field) -{ - enum stat_field current_field = (selected_field != NULL ? *selected_field : 0); - struct server *via = sv->track ? sv->track : sv; - struct server *ref = via; - enum srv_stats_state state = 0; - char str[INET6_ADDRSTRLEN]; - struct buffer *out = get_trash_chunk(); - char *fld_status; - long long srv_samples_counter; - unsigned int srv_samples_window = TIME_STATS_SAMPLES; - - if (len < ST_F_TOTAL_FIELDS) - return 0; - - chunk_reset(out); - - /* compute state for later use */ - if (selected_field == NULL || *selected_field == ST_F_STATUS || - *selected_field == ST_F_CHECK_RISE || *selected_field == ST_F_CHECK_FALL || - *selected_field == ST_F_CHECK_HEALTH || *selected_field == ST_F_HANAFAIL) { - /* we have "via" which is the tracked server as described in the configuration, - * and "ref" which is the checked server and the end of the chain. - */ - while (ref->track) - ref = ref->track; - stats_fill_sv_stats_computestate(sv, ref, &state); - } - - /* compue time values for later use */ - if (selected_field == NULL || *selected_field == ST_F_QTIME || - *selected_field == ST_F_CTIME || *selected_field == ST_F_RTIME || - *selected_field == ST_F_TTIME) { - srv_samples_counter = (px->mode == PR_MODE_HTTP) ? sv->counters.p.http.cum_req : sv->counters.cum_lbconn; - if (srv_samples_counter < TIME_STATS_SAMPLES && srv_samples_counter > 0) - srv_samples_window = srv_samples_counter; - } - - for (; current_field < ST_F_TOTAL_FIELDS; current_field++) { - struct field metric = { 0 }; - - switch (current_field) { - case ST_F_PXNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); - break; - case ST_F_SVNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, sv->id); - break; - case ST_F_MODE: - metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); - break; - case ST_F_QCUR: - metric = mkf_u32(0, sv->queue.length); - break; - case ST_F_QMAX: - metric = mkf_u32(FN_MAX, sv->counters.nbpend_max); - break; - case ST_F_SCUR: - metric = mkf_u32(0, sv->cur_sess); - break; - case ST_F_SMAX: - metric = mkf_u32(FN_MAX, sv->counters.cur_sess_max); - break; - case ST_F_SLIM: - if (sv->maxconn) - metric = mkf_u32(FO_CONFIG|FN_LIMIT, sv->maxconn); - break; - case ST_F_SRV_ICUR: - metric = mkf_u32(0, sv->curr_idle_conns); - break; - case ST_F_SRV_ILIM: - if (sv->max_idle_conns != -1) - metric = mkf_u32(FO_CONFIG|FN_LIMIT, sv->max_idle_conns); - break; - case ST_F_STOT: - metric = mkf_u64(FN_COUNTER, sv->counters.cum_sess); - break; - case ST_F_BIN: - metric = mkf_u64(FN_COUNTER, sv->counters.bytes_in); - break; - case ST_F_BOUT: - metric = mkf_u64(FN_COUNTER, sv->counters.bytes_out); - break; - case ST_F_DRESP: - metric = mkf_u64(FN_COUNTER, sv->counters.denied_resp); - break; - case ST_F_ECON: - metric = mkf_u64(FN_COUNTER, sv->counters.failed_conns); - break; - case ST_F_ERESP: - metric = mkf_u64(FN_COUNTER, sv->counters.failed_resp); - break; - case ST_F_WRETR: - metric = mkf_u64(FN_COUNTER, sv->counters.retries); - break; - case ST_F_WREDIS: - metric = mkf_u64(FN_COUNTER, sv->counters.redispatches); - break; - case ST_F_WREW: - metric = mkf_u64(FN_COUNTER, sv->counters.failed_rewrites); - break; - case ST_F_EINT: - metric = mkf_u64(FN_COUNTER, sv->counters.internal_errors); - break; - case ST_F_CONNECT: - metric = mkf_u64(FN_COUNTER, sv->counters.connect); - break; - case ST_F_REUSE: - metric = mkf_u64(FN_COUNTER, sv->counters.reuse); - break; - case ST_F_IDLE_CONN_CUR: - metric = mkf_u32(0, sv->curr_idle_nb); - break; - case ST_F_SAFE_CONN_CUR: - metric = mkf_u32(0, sv->curr_safe_nb); - break; - case ST_F_USED_CONN_CUR: - metric = mkf_u32(0, sv->curr_used_conns); - break; - case ST_F_NEED_CONN_EST: - metric = mkf_u32(0, sv->est_need_conns); - break; - case ST_F_STATUS: - fld_status = chunk_newstr(out); - if (sv->cur_admin & SRV_ADMF_RMAINT) - chunk_appendf(out, "MAINT (resolution)"); - else if (sv->cur_admin & SRV_ADMF_IMAINT) - chunk_appendf(out, "MAINT (via %s/%s)", via->proxy->id, via->id); - else if (sv->cur_admin & SRV_ADMF_MAINT) - chunk_appendf(out, "MAINT"); - else - chunk_appendf(out, - srv_hlt_st[state], - (ref->cur_state != SRV_ST_STOPPED) ? (ref->check.health - ref->check.rise + 1) : (ref->check.health), - (ref->cur_state != SRV_ST_STOPPED) ? (ref->check.fall) : (ref->check.rise)); - - metric = mkf_str(FO_STATUS, fld_status); - break; - case ST_F_LASTCHG: - metric = mkf_u32(FN_AGE, ns_to_sec(now_ns) - sv->last_change); - break; - case ST_F_WEIGHT: - metric = mkf_u32(FN_AVG, (sv->cur_eweight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv); - break; - case ST_F_UWEIGHT: - metric = mkf_u32(FN_AVG, sv->uweight); - break; - case ST_F_ACT: - metric = mkf_u32(FO_STATUS, (sv->flags & SRV_F_BACKUP) ? 0 : 1); - break; - case ST_F_BCK: - metric = mkf_u32(FO_STATUS, (sv->flags & SRV_F_BACKUP) ? 1 : 0); - break; - case ST_F_CHKFAIL: - if (sv->check.state & CHK_ST_ENABLED) - metric = mkf_u64(FN_COUNTER, sv->counters.failed_checks); - break; - case ST_F_CHKDOWN: - if (sv->check.state & CHK_ST_ENABLED) - metric = mkf_u64(FN_COUNTER, sv->counters.down_trans); - break; - case ST_F_DOWNTIME: - if (sv->check.state & CHK_ST_ENABLED) - metric = mkf_u32(FN_COUNTER, srv_downtime(sv)); - break; - case ST_F_QLIMIT: - if (sv->maxqueue) - metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->maxqueue); - break; - case ST_F_PID: - metric = mkf_u32(FO_KEY, 1); - break; - case ST_F_IID: - metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); - break; - case ST_F_SID: - metric = mkf_u32(FO_KEY|FS_SERVICE, sv->puid); - break; - case ST_F_SRID: - metric = mkf_u32(FN_COUNTER, sv->rid); - break; - case ST_F_THROTTLE: - if (sv->cur_state == SRV_ST_STARTING && !server_is_draining(sv)) - metric = mkf_u32(FN_AVG, server_throttle_rate(sv)); - break; - case ST_F_LBTOT: - metric = mkf_u64(FN_COUNTER, sv->counters.cum_lbconn); - break; - case ST_F_TRACKED: - if (sv->track) { - char *fld_track = chunk_newstr(out); - chunk_appendf(out, "%s/%s", sv->track->proxy->id, sv->track->id); - metric = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, fld_track); - } - break; - case ST_F_TYPE: - metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_SV); - break; - case ST_F_RATE: - metric = mkf_u32(FN_RATE, read_freq_ctr(&sv->sess_per_sec)); - break; - case ST_F_RATE_MAX: - metric = mkf_u32(FN_MAX, sv->counters.sps_max); - break; - case ST_F_CHECK_STATUS: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) { - const char *fld_chksts; - - fld_chksts = chunk_newstr(out); - chunk_strcat(out, "* "); // for check in progress - chunk_strcat(out, get_check_status_info(sv->check.status)); - if (!(sv->check.state & CHK_ST_INPROGRESS)) - fld_chksts += 2; // skip "* " - metric = mkf_str(FN_OUTPUT, fld_chksts); - } - break; - case ST_F_CHECK_CODE: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED && - sv->check.status >= HCHK_STATUS_L57DATA) - metric = mkf_u32(FN_OUTPUT, sv->check.code); - break; - case ST_F_CHECK_DURATION: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED && - sv->check.status >= HCHK_STATUS_CHECKED) - metric = mkf_u64(FN_DURATION, MAX(sv->check.duration, 0)); - break; - case ST_F_CHECK_DESC: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_str(FN_OUTPUT, get_check_status_description(sv->check.status)); - break; - case ST_F_LAST_CHK: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_str(FN_OUTPUT, sv->check.desc); - break; - case ST_F_CHECK_RISE: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.rise); - break; - case ST_F_CHECK_FALL: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.fall); - break; - case ST_F_CHECK_HEALTH: - if ((sv->check.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_u32(FO_CONFIG|FS_SERVICE, ref->check.health); - break; - case ST_F_AGENT_STATUS: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) { - const char *fld_chksts; - - fld_chksts = chunk_newstr(out); - chunk_strcat(out, "* "); // for check in progress - chunk_strcat(out, get_check_status_info(sv->agent.status)); - if (!(sv->agent.state & CHK_ST_INPROGRESS)) - fld_chksts += 2; // skip "* " - metric = mkf_str(FN_OUTPUT, fld_chksts); - } - break; - case ST_F_AGENT_CODE: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED && - (sv->agent.status >= HCHK_STATUS_L57DATA)) - metric = mkf_u32(FN_OUTPUT, sv->agent.code); - break; - case ST_F_AGENT_DURATION: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_u64(FN_DURATION, sv->agent.duration); - break; - case ST_F_AGENT_DESC: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_str(FN_OUTPUT, get_check_status_description(sv->agent.status)); - break; - case ST_F_LAST_AGT: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_str(FN_OUTPUT, sv->agent.desc); - break; - case ST_F_AGENT_RISE: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.rise); - break; - case ST_F_AGENT_FALL: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.fall); - break; - case ST_F_AGENT_HEALTH: - if ((sv->agent.state & (CHK_ST_ENABLED|CHK_ST_PAUSED)) == CHK_ST_ENABLED) - metric = mkf_u32(FO_CONFIG|FS_SERVICE, sv->agent.health); - break; - case ST_F_REQ_TOT: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, sv->counters.p.http.cum_req); - break; - case ST_F_HRSP_1XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[1]); - break; - case ST_F_HRSP_2XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[2]); - break; - case ST_F_HRSP_3XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[3]); - break; - case ST_F_HRSP_4XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[4]); - break; - case ST_F_HRSP_5XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[5]); - break; - case ST_F_HRSP_OTHER: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, sv->counters.p.http.rsp[0]); - break; - case ST_F_HANAFAIL: - if (ref->observe) - metric = mkf_u64(FN_COUNTER, sv->counters.failed_hana); - break; - case ST_F_CLI_ABRT: - metric = mkf_u64(FN_COUNTER, sv->counters.cli_aborts); - break; - case ST_F_SRV_ABRT: - metric = mkf_u64(FN_COUNTER, sv->counters.srv_aborts); - break; - case ST_F_LASTSESS: - metric = mkf_s32(FN_AGE, srv_lastsession(sv)); - break; - case ST_F_QTIME: - metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.q_time, srv_samples_window)); - break; - case ST_F_CTIME: - metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.c_time, srv_samples_window)); - break; - case ST_F_RTIME: - metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.d_time, srv_samples_window)); - break; - case ST_F_TTIME: - metric = mkf_u32(FN_AVG, swrate_avg(sv->counters.t_time, srv_samples_window)); - break; - case ST_F_QT_MAX: - metric = mkf_u32(FN_MAX, sv->counters.qtime_max); - break; - case ST_F_CT_MAX: - metric = mkf_u32(FN_MAX, sv->counters.ctime_max); - break; - case ST_F_RT_MAX: - metric = mkf_u32(FN_MAX, sv->counters.dtime_max); - break; - case ST_F_TT_MAX: - metric = mkf_u32(FN_MAX, sv->counters.ttime_max); - break; - case ST_F_ADDR: - if (flags & STAT_SHLGNDS) { - switch (addr_to_str(&sv->addr, str, sizeof(str))) { - case AF_INET: - metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); - chunk_appendf(out, "%s:%d", str, sv->svc_port); - break; - case AF_INET6: - metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); - chunk_appendf(out, "[%s]:%d", str, sv->svc_port); - break; - case AF_UNIX: - metric = mkf_str(FO_CONFIG|FS_SERVICE, "unix"); - break; - case -1: - metric = mkf_str(FO_CONFIG|FS_SERVICE, chunk_newstr(out)); - chunk_strcat(out, strerror(errno)); - break; - default: /* address family not supported */ - break; - } - } - break; - case ST_F_COOKIE: - if (flags & STAT_SHLGNDS && sv->cookie) - metric = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, sv->cookie); - break; - default: - /* not used for servers. If a specific metric - * is requested, return an error. Otherwise continue. - */ - if (selected_field != NULL) - return 0; - continue; - } - stats[current_field] = metric; - if (selected_field != NULL) - break; - } - return 1; -} - -/* Dumps a line for server <sv> and proxy <px> to the local trash vbuffer and - * uses the state from stream connector <sc>, and server state <state>. The - * caller is responsible for clearing the local trash buffer if needed. Returns - * non-zero if it emits anything, zero otherwise. - */ -static int stats_dump_sv_stats(struct stconn *sc, struct proxy *px, struct server *sv) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct stats_module *mod; - struct field *stats = stat_l[STATS_DOMAIN_PROXY]; - size_t stats_count = ST_F_TOTAL_FIELDS; - - memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]); - - if (!stats_fill_sv_stats(px, sv, ctx->flags, stats, - ST_F_TOTAL_FIELDS, NULL)) - return 0; - - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - void *counters; - - if (stats_get_domain(mod->domain_flags) != STATS_DOMAIN_PROXY) - continue; - - if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_SRV)) { - stats_count += mod->stats_count; - continue; - } - - counters = EXTRA_COUNTERS_GET(sv->extra_counters, mod); - mod->fill_stats(counters, stats + stats_count); - stats_count += mod->stats_count; - } - - return stats_dump_one_line(stats, stats_count, appctx); -} - -/* Helper to compute srv values for a given backend - */ -static void stats_fill_be_stats_computesrv(struct proxy *px, int *nbup, int *nbsrv, int *totuw) -{ - int nbup_tmp, nbsrv_tmp, totuw_tmp; - const struct server *srv; - - nbup_tmp = nbsrv_tmp = totuw_tmp = 0; - for (srv = px->srv; srv; srv = srv->next) { - if (srv->cur_state != SRV_ST_STOPPED) { - nbup_tmp++; - if (srv_currently_usable(srv) && - (!px->srv_act ^ !(srv->flags & SRV_F_BACKUP))) - totuw_tmp += srv->uweight; - } - nbsrv_tmp++; - } - - HA_RWLOCK_RDLOCK(LBPRM_LOCK, &px->lbprm.lock); - if (!px->srv_act && px->lbprm.fbck) - totuw_tmp = px->lbprm.fbck->uweight; - HA_RWLOCK_RDUNLOCK(LBPRM_LOCK, &px->lbprm.lock); - - /* use tmp variable then assign result to make gcc happy */ - *nbup = nbup_tmp; - *nbsrv = nbsrv_tmp; - *totuw = totuw_tmp; -} - -/* Fill <stats> with the backend statistics. <stats> is preallocated array of - * length <len>. If <selected_field> is != NULL, only fill this one. The length - * of the array must be at least ST_F_TOTAL_FIELDS. If this length is less than - * this value, or if the selected field is not implemented for backends, the - * function returns 0, otherwise, it returns 1. <flags> can take the value - * STAT_SHLGNDS. - */ -int stats_fill_be_stats(struct proxy *px, int flags, struct field *stats, int len, - enum stat_field *selected_field) -{ - enum stat_field current_field = (selected_field != NULL ? *selected_field : 0); - long long be_samples_counter; - unsigned int be_samples_window = TIME_STATS_SAMPLES; - struct buffer *out = get_trash_chunk(); - int nbup, nbsrv, totuw; - char *fld; - - if (len < ST_F_TOTAL_FIELDS) - return 0; - - nbup = nbsrv = totuw = 0; - /* some srv values compute for later if we either select all fields or - * need them for one of the mentioned ones */ - if (selected_field == NULL || *selected_field == ST_F_STATUS || - *selected_field == ST_F_UWEIGHT) - stats_fill_be_stats_computesrv(px, &nbup, &nbsrv, &totuw); - - /* same here but specific to time fields */ - if (selected_field == NULL || *selected_field == ST_F_QTIME || - *selected_field == ST_F_CTIME || *selected_field == ST_F_RTIME || - *selected_field == ST_F_TTIME) { - be_samples_counter = (px->mode == PR_MODE_HTTP) ? px->be_counters.p.http.cum_req : px->be_counters.cum_lbconn; - if (be_samples_counter < TIME_STATS_SAMPLES && be_samples_counter > 0) - be_samples_window = be_samples_counter; - } - - for (; current_field < ST_F_TOTAL_FIELDS; current_field++) { - struct field metric = { 0 }; - - switch (current_field) { - case ST_F_PXNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, px->id); - break; - case ST_F_SVNAME: - metric = mkf_str(FO_KEY|FN_NAME|FS_SERVICE, "BACKEND"); - break; - case ST_F_MODE: - metric = mkf_str(FO_CONFIG|FS_SERVICE, proxy_mode_str(px->mode)); - break; - case ST_F_QCUR: - metric = mkf_u32(0, px->queue.length); - break; - case ST_F_QMAX: - metric = mkf_u32(FN_MAX, px->be_counters.nbpend_max); - break; - case ST_F_SCUR: - metric = mkf_u32(0, px->beconn); - break; - case ST_F_SMAX: - metric = mkf_u32(FN_MAX, px->be_counters.conn_max); - break; - case ST_F_SLIM: - metric = mkf_u32(FO_CONFIG|FN_LIMIT, px->fullconn); - break; - case ST_F_STOT: - metric = mkf_u64(FN_COUNTER, px->be_counters.cum_conn); - break; - case ST_F_BIN: - metric = mkf_u64(FN_COUNTER, px->be_counters.bytes_in); - break; - case ST_F_BOUT: - metric = mkf_u64(FN_COUNTER, px->be_counters.bytes_out); - break; - case ST_F_DREQ: - metric = mkf_u64(FN_COUNTER, px->be_counters.denied_req); - break; - case ST_F_DRESP: - metric = mkf_u64(FN_COUNTER, px->be_counters.denied_resp); - break; - case ST_F_ECON: - metric = mkf_u64(FN_COUNTER, px->be_counters.failed_conns); - break; - case ST_F_ERESP: - metric = mkf_u64(FN_COUNTER, px->be_counters.failed_resp); - break; - case ST_F_WRETR: - metric = mkf_u64(FN_COUNTER, px->be_counters.retries); - break; - case ST_F_WREDIS: - metric = mkf_u64(FN_COUNTER, px->be_counters.redispatches); - break; - case ST_F_WREW: - metric = mkf_u64(FN_COUNTER, px->be_counters.failed_rewrites); - break; - case ST_F_EINT: - metric = mkf_u64(FN_COUNTER, px->be_counters.internal_errors); - break; - case ST_F_CONNECT: - metric = mkf_u64(FN_COUNTER, px->be_counters.connect); - break; - case ST_F_REUSE: - metric = mkf_u64(FN_COUNTER, px->be_counters.reuse); - break; - case ST_F_STATUS: - fld = chunk_newstr(out); - chunk_appendf(out, "%s", (px->lbprm.tot_weight > 0 || !px->srv) ? "UP" : "DOWN"); - if (flags & (STAT_HIDE_MAINT|STAT_HIDE_DOWN)) - chunk_appendf(out, " (%d/%d)", nbup, nbsrv); - metric = mkf_str(FO_STATUS, fld); - break; - case ST_F_AGG_SRV_CHECK_STATUS: // DEPRECATED - case ST_F_AGG_SRV_STATUS: - metric = mkf_u32(FN_GAUGE, 0); - break; - case ST_F_AGG_CHECK_STATUS: - metric = mkf_u32(FN_GAUGE, 0); - break; - case ST_F_WEIGHT: - metric = mkf_u32(FN_AVG, (px->lbprm.tot_weight * px->lbprm.wmult + px->lbprm.wdiv - 1) / px->lbprm.wdiv); - break; - case ST_F_UWEIGHT: - metric = mkf_u32(FN_AVG, totuw); - break; - case ST_F_ACT: - metric = mkf_u32(0, px->srv_act); - break; - case ST_F_BCK: - metric = mkf_u32(0, px->srv_bck); - break; - case ST_F_CHKDOWN: - metric = mkf_u64(FN_COUNTER, px->down_trans); - break; - case ST_F_LASTCHG: - metric = mkf_u32(FN_AGE, ns_to_sec(now_ns) - px->last_change); - break; - case ST_F_DOWNTIME: - if (px->srv) - metric = mkf_u32(FN_COUNTER, be_downtime(px)); - break; - case ST_F_PID: - metric = mkf_u32(FO_KEY, 1); - break; - case ST_F_IID: - metric = mkf_u32(FO_KEY|FS_SERVICE, px->uuid); - break; - case ST_F_SID: - metric = mkf_u32(FO_KEY|FS_SERVICE, 0); - break; - case ST_F_LBTOT: - metric = mkf_u64(FN_COUNTER, px->be_counters.cum_lbconn); - break; - case ST_F_TYPE: - metric = mkf_u32(FO_CONFIG|FS_SERVICE, STATS_TYPE_BE); - break; - case ST_F_RATE: - metric = mkf_u32(0, read_freq_ctr(&px->be_sess_per_sec)); - break; - case ST_F_RATE_MAX: - metric = mkf_u32(0, px->be_counters.sps_max); - break; - case ST_F_COOKIE: - if (flags & STAT_SHLGNDS && px->cookie_name) - metric = mkf_str(FO_CONFIG|FN_NAME|FS_SERVICE, px->cookie_name); - break; - case ST_F_ALGO: - if (flags & STAT_SHLGNDS) - metric = mkf_str(FO_CONFIG|FS_SERVICE, backend_lb_algo_str(px->lbprm.algo & BE_LB_ALGO)); - break; - case ST_F_REQ_TOT: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.cum_req); - break; - case ST_F_HRSP_1XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[1]); - break; - case ST_F_HRSP_2XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[2]); - break; - case ST_F_HRSP_3XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[3]); - break; - case ST_F_HRSP_4XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[4]); - break; - case ST_F_HRSP_5XX: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[5]); - break; - case ST_F_HRSP_OTHER: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.rsp[0]); - break; - case ST_F_CACHE_LOOKUPS: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.cache_lookups); - break; - case ST_F_CACHE_HITS: - if (px->mode == PR_MODE_HTTP) - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.cache_hits); - break; - case ST_F_CLI_ABRT: - metric = mkf_u64(FN_COUNTER, px->be_counters.cli_aborts); - break; - case ST_F_SRV_ABRT: - metric = mkf_u64(FN_COUNTER, px->be_counters.srv_aborts); - break; - case ST_F_COMP_IN: - metric = mkf_u64(FN_COUNTER, px->be_counters.comp_in[COMP_DIR_RES]); - break; - case ST_F_COMP_OUT: - metric = mkf_u64(FN_COUNTER, px->be_counters.comp_out[COMP_DIR_RES]); - break; - case ST_F_COMP_BYP: - metric = mkf_u64(FN_COUNTER, px->be_counters.comp_byp[COMP_DIR_RES]); - break; - case ST_F_COMP_RSP: - metric = mkf_u64(FN_COUNTER, px->be_counters.p.http.comp_rsp); - break; - case ST_F_LASTSESS: - metric = mkf_s32(FN_AGE, be_lastsession(px)); - break; - case ST_F_QTIME: - metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.q_time, be_samples_window)); - break; - case ST_F_CTIME: - metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.c_time, be_samples_window)); - break; - case ST_F_RTIME: - metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.d_time, be_samples_window)); - break; - case ST_F_TTIME: - metric = mkf_u32(FN_AVG, swrate_avg(px->be_counters.t_time, be_samples_window)); - break; - case ST_F_QT_MAX: - metric = mkf_u32(FN_MAX, px->be_counters.qtime_max); - break; - case ST_F_CT_MAX: - metric = mkf_u32(FN_MAX, px->be_counters.ctime_max); - break; - case ST_F_RT_MAX: - metric = mkf_u32(FN_MAX, px->be_counters.dtime_max); - break; - case ST_F_TT_MAX: - metric = mkf_u32(FN_MAX, px->be_counters.ttime_max); - break; - default: - /* not used for backends. If a specific metric - * is requested, return an error. Otherwise continue. - */ - if (selected_field != NULL) - return 0; - continue; - } - stats[current_field] = metric; - if (selected_field != NULL) - break; - } - return 1; -} - -/* Dumps a line for backend <px> to the local trash buffer for and uses the - * state from stream interface <si>. The caller is responsible for clearing the - * local trash buffer if needed. Returns non-zero if it emits anything, zero - * otherwise. - */ -static int stats_dump_be_stats(struct stconn *sc, struct proxy *px) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct field *stats = stat_l[STATS_DOMAIN_PROXY]; - struct stats_module *mod; - size_t stats_count = ST_F_TOTAL_FIELDS; - - if (!(px->cap & PR_CAP_BE)) - return 0; - - if ((ctx->flags & STAT_BOUND) && !(ctx->type & (1 << STATS_TYPE_BE))) - return 0; - - memset(stats, 0, sizeof(struct field) * stat_count[STATS_DOMAIN_PROXY]); - - if (!stats_fill_be_stats(px, ctx->flags, stats, ST_F_TOTAL_FIELDS, NULL)) - return 0; - - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - struct extra_counters *counters; - - if (stats_get_domain(mod->domain_flags) != STATS_DOMAIN_PROXY) - continue; - - if (!(stats_px_get_cap(mod->domain_flags) & STATS_PX_CAP_BE)) { - stats_count += mod->stats_count; - continue; - } - - counters = EXTRA_COUNTERS_GET(px->extra_counters_be, mod); - mod->fill_stats(counters, stats + stats_count); - stats_count += mod->stats_count; - } - - return stats_dump_one_line(stats, stats_count, appctx); -} - -/* Dumps the HTML table header for proxy <px> to the local trash buffer for and - * uses the state from stream connector <sc>. The caller is responsible for - * clearing the local trash buffer if needed. - */ -static void stats_dump_html_px_hdr(struct stconn *sc, struct proxy *px) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN]; - struct stats_module *mod; - int stats_module_len = 0; - - if (px->cap & PR_CAP_BE && px->srv && (ctx->flags & STAT_ADMIN)) { - /* A form to enable/disable this proxy servers */ - - /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ - scope_txt[0] = 0; - if (ctx->scope_len) { - const char *scope_ptr = stats_scope_ptr(appctx, sc); - - strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt)); - memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len); - scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0; - } - - chunk_appendf(&trash_chunk, - "<form method=\"post\">"); - } - - /* print a new table */ - chunk_appendf(&trash_chunk, - "<table class=\"tbl\" width=\"100%%\">\n" - "<tr class=\"titre\">" - "<th class=\"pxname\" width=\"10%%\">"); - - chunk_appendf(&trash_chunk, - "<a name=\"%s\"></a>%s" - "<a class=px href=\"#%s\">%s</a>", - px->id, - (ctx->flags & STAT_SHLGNDS) ? "<u>":"", - px->id, px->id); - - if (ctx->flags & STAT_SHLGNDS) { - /* cap, mode, id */ - chunk_appendf(&trash_chunk, "<div class=tips>cap: %s, mode: %s, id: %d", - proxy_cap_str(px->cap), proxy_mode_str(px->mode), - px->uuid); - chunk_appendf(&trash_chunk, "</div>"); - } - - chunk_appendf(&trash_chunk, - "%s</th>" - "<th class=\"%s\" width=\"90%%\">%s</th>" - "</tr>\n" - "</table>\n" - "<table class=\"tbl\" width=\"100%%\">\n" - "<tr class=\"titre\">", - (ctx->flags & STAT_SHLGNDS) ? "</u>":"", - px->desc ? "desc" : "empty", px->desc ? px->desc : ""); - - if (ctx->flags & STAT_ADMIN) { - /* Column heading for Enable or Disable server */ - if ((px->cap & PR_CAP_BE) && px->srv) - chunk_appendf(&trash_chunk, - "<th rowspan=2 width=1><input type=\"checkbox\" " - "onclick=\"for(c in document.getElementsByClassName('%s-checkbox')) " - "document.getElementsByClassName('%s-checkbox').item(c).checked = this.checked\"></th>", - px->id, - px->id); - else - chunk_appendf(&trash_chunk, "<th rowspan=2></th>"); - } - - chunk_appendf(&trash_chunk, - "<th rowspan=2></th>" - "<th colspan=3>Queue</th>" - "<th colspan=3>Session rate</th><th colspan=6>Sessions</th>" - "<th colspan=2>Bytes</th><th colspan=2>Denied</th>" - "<th colspan=3>Errors</th><th colspan=2>Warnings</th>" - "<th colspan=9>Server</th>"); - - if (ctx->flags & STAT_SHMODULES) { - // calculate the count of module for colspan attribute - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - ++stats_module_len; - } - chunk_appendf(&trash_chunk, "<th colspan=%d>Extra modules</th>", - stats_module_len); - } - - chunk_appendf(&trash_chunk, - "</tr>\n" - "<tr class=\"titre\">" - "<th>Cur</th><th>Max</th><th>Limit</th>" - "<th>Cur</th><th>Max</th><th>Limit</th><th>Cur</th><th>Max</th>" - "<th>Limit</th><th>Total</th><th>LbTot</th><th>Last</th><th>In</th><th>Out</th>" - "<th>Req</th><th>Resp</th><th>Req</th><th>Conn</th>" - "<th>Resp</th><th>Retr</th><th>Redis</th>" - "<th>Status</th><th>LastChk</th><th>Wght</th><th>Act</th>" - "<th>Bck</th><th>Chk</th><th>Dwn</th><th>Dwntme</th>" - "<th>Thrtle</th>\n"); - - if (ctx->flags & STAT_SHMODULES) { - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - chunk_appendf(&trash_chunk, "<th>%s</th>", mod->name); - } - } - - chunk_appendf(&trash_chunk, "</tr>"); -} - -/* Dumps the HTML table trailer for proxy <px> to the local trash buffer for and - * uses the state from stream connector <sc>. The caller is responsible for - * clearing the local trash buffer if needed. - */ -static void stats_dump_html_px_end(struct stconn *sc, struct proxy *px) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - - chunk_appendf(&trash_chunk, "</table>"); - - if ((px->cap & PR_CAP_BE) && px->srv && (ctx->flags & STAT_ADMIN)) { - /* close the form used to enable/disable this proxy servers */ - chunk_appendf(&trash_chunk, - "Choose the action to perform on the checked servers : " - "<select name=action>" - "<option value=\"\"></option>" - "<option value=\"ready\">Set state to READY</option>" - "<option value=\"drain\">Set state to DRAIN</option>" - "<option value=\"maint\">Set state to MAINT</option>" - "<option value=\"dhlth\">Health: disable checks</option>" - "<option value=\"ehlth\">Health: enable checks</option>" - "<option value=\"hrunn\">Health: force UP</option>" - "<option value=\"hnolb\">Health: force NOLB</option>" - "<option value=\"hdown\">Health: force DOWN</option>" - "<option value=\"dagent\">Agent: disable checks</option>" - "<option value=\"eagent\">Agent: enable checks</option>" - "<option value=\"arunn\">Agent: force UP</option>" - "<option value=\"adown\">Agent: force DOWN</option>" - "<option value=\"shutdown\">Kill Sessions</option>" - "</select>" - "<input type=\"hidden\" name=\"b\" value=\"#%d\">" - " <input type=\"submit\" value=\"Apply\">" - "</form>", - px->uuid); - } - - chunk_appendf(&trash_chunk, "<p>\n"); -} - -/* - * Dumps statistics for a proxy. The output is sent to the stream connector's - * input buffer. Returns 0 if it had to stop dumping data because of lack of - * buffer space, or non-zero if everything completed. This function is used - * both by the CLI and the HTTP entry points, and is able to dump the output - * in HTML or CSV formats. - */ -int stats_dump_proxy_to_buffer(struct stconn *sc, struct htx *htx, - struct proxy *px) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct channel *rep = sc_ic(sc); - struct server *sv, *svs; /* server and server-state, server-state=server or server->track */ - struct listener *l; - struct uri_auth *uri = NULL; - int current_field; - int px_st = ctx->px_st; - - if (ctx->http_px) - uri = ctx->http_px->uri_auth; - chunk_reset(&trash_chunk); -more: - current_field = ctx->field; - - switch (ctx->px_st) { - case STAT_PX_ST_INIT: - /* we are on a new proxy */ - if (uri && uri->scope) { - /* we have a limited scope, we have to check the proxy name */ - struct stat_scope *scope; - int len; - - len = strlen(px->id); - scope = uri->scope; - - while (scope) { - /* match exact proxy name */ - if (scope->px_len == len && !memcmp(px->id, scope->px_id, len)) - break; - - /* match '.' which means 'self' proxy */ - if (strcmp(scope->px_id, ".") == 0 && px == ctx->http_px) - break; - scope = scope->next; - } - - /* proxy name not found : don't dump anything */ - if (scope == NULL) - return 1; - } - - /* if the user has requested a limited output and the proxy - * name does not match, skip it. - */ - if (ctx->scope_len) { - const char *scope_ptr = stats_scope_ptr(appctx, sc); - - if (strnistr(px->id, strlen(px->id), scope_ptr, ctx->scope_len) == NULL) - return 1; - } - - if ((ctx->flags & STAT_BOUND) && - (ctx->iid != -1) && - (px->uuid != ctx->iid)) - return 1; - - ctx->px_st = STAT_PX_ST_TH; - __fallthrough; - - case STAT_PX_ST_TH: - if (ctx->flags & STAT_FMT_HTML) { - stats_dump_html_px_hdr(sc, px); - if (!stats_putchk(appctx, htx)) - goto full; - } - - ctx->px_st = STAT_PX_ST_FE; - __fallthrough; - - case STAT_PX_ST_FE: - /* print the frontend */ - if (stats_dump_fe_stats(sc, px)) { - if (!stats_putchk(appctx, htx)) - goto full; - ctx->flags |= STAT_STARTED; - if (ctx->field) - goto more; - } - - current_field = 0; - ctx->obj2 = px->conf.listeners.n; - ctx->px_st = STAT_PX_ST_LI; - __fallthrough; - - case STAT_PX_ST_LI: - /* obj2 points to listeners list as initialized above */ - for (; ctx->obj2 != &px->conf.listeners; ctx->obj2 = l->by_fe.n) { - if (htx) { - if (htx_almost_full(htx)) { - sc_need_room(sc, htx->size / 2); - goto full; - } - } - else { - if (buffer_almost_full(&rep->buf)) { - sc_need_room(sc, b_size(&rep->buf) / 2); - goto full; - } - } - - l = LIST_ELEM(ctx->obj2, struct listener *, by_fe); - if (!l->counters) - continue; - - if (ctx->flags & STAT_BOUND) { - if (!(ctx->type & (1 << STATS_TYPE_SO))) - break; - - if (ctx->sid != -1 && l->luid != ctx->sid) - continue; - } - - /* print the frontend */ - if (stats_dump_li_stats(sc, px, l)) { - if (!stats_putchk(appctx, htx)) - goto full; - ctx->flags |= STAT_STARTED; - if (ctx->field) - goto more; - } - current_field = 0; - } - - ctx->obj2 = px->srv; /* may be NULL */ - ctx->px_st = STAT_PX_ST_SV; - __fallthrough; - - case STAT_PX_ST_SV: - /* check for dump resumption */ - if (px_st == STAT_PX_ST_SV) { - struct server *cur = ctx->obj2; - - /* re-entrant dump */ - BUG_ON(!cur); - if (cur->flags & SRV_F_DELETED) { - /* the server could have been marked as deleted - * between two dumping attempts, skip it. - */ - cur = cur->next; - } - srv_drop(ctx->obj2); /* drop old srv taken on last dumping attempt */ - ctx->obj2 = cur; /* could be NULL */ - /* back to normal */ - } - - /* obj2 points to servers list as initialized above. - * - * A server may be removed during the stats dumping. - * Temporarily increment its refcount to prevent its - * anticipated cleaning. Call srv_drop() to release it. - */ - for (; ctx->obj2 != NULL; - ctx->obj2 = srv_drop(sv)) { - - sv = ctx->obj2; - srv_take(sv); - - if (htx) { - if (htx_almost_full(htx)) { - sc_need_room(sc, htx->size / 2); - goto full; - } - } - else { - if (buffer_almost_full(&rep->buf)) { - sc_need_room(sc, b_size(&rep->buf) / 2); - goto full; - } - } - - if (ctx->flags & STAT_BOUND) { - if (!(ctx->type & (1 << STATS_TYPE_SV))) { - srv_drop(sv); - break; - } - - if (ctx->sid != -1 && sv->puid != ctx->sid) - continue; - } - - /* do not report disabled servers */ - if (ctx->flags & STAT_HIDE_MAINT && - sv->cur_admin & SRV_ADMF_MAINT) { - continue; - } - - svs = sv; - while (svs->track) - svs = svs->track; - - /* do not report servers which are DOWN and not changing state */ - if ((ctx->flags & STAT_HIDE_DOWN) && - ((sv->cur_admin & SRV_ADMF_MAINT) || /* server is in maintenance */ - (sv->cur_state == SRV_ST_STOPPED && /* server is down */ - (!((svs->agent.state | svs->check.state) & CHK_ST_ENABLED) || - ((svs->agent.state & CHK_ST_ENABLED) && !svs->agent.health) || - ((svs->check.state & CHK_ST_ENABLED) && !svs->check.health))))) { - continue; - } - - if (stats_dump_sv_stats(sc, px, sv)) { - if (!stats_putchk(appctx, htx)) - goto full; - ctx->flags |= STAT_STARTED; - if (ctx->field) - goto more; - } - current_field = 0; - } /* for sv */ - - ctx->px_st = STAT_PX_ST_BE; - __fallthrough; - - case STAT_PX_ST_BE: - /* print the backend */ - if (stats_dump_be_stats(sc, px)) { - if (!stats_putchk(appctx, htx)) - goto full; - ctx->flags |= STAT_STARTED; - if (ctx->field) - goto more; - } - - current_field = 0; - ctx->px_st = STAT_PX_ST_END; - __fallthrough; - - case STAT_PX_ST_END: - if (ctx->flags & STAT_FMT_HTML) { - stats_dump_html_px_end(sc, px); - if (!stats_putchk(appctx, htx)) - goto full; - } - - ctx->px_st = STAT_PX_ST_FIN; - __fallthrough; - - case STAT_PX_ST_FIN: - return 1; - - default: - /* unknown state, we should put an abort() here ! */ - return 1; - } - - full: - /* restore previous field */ - ctx->field = current_field; - return 0; -} - -/* Dumps the HTTP stats head block to the local trash buffer and uses the - * per-uri parameters from the parent proxy. The caller is responsible for - * clearing the local trash buffer if needed. - */ -static void stats_dump_html_head(struct appctx *appctx) -{ - struct show_stat_ctx *ctx = appctx->svcctx; - struct uri_auth *uri; - - BUG_ON(!ctx->http_px); - uri = ctx->http_px->uri_auth; - - /* WARNING! This must fit in the first buffer !!! */ - chunk_appendf(&trash_chunk, - "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"\n" - "\"http://www.w3.org/TR/html4/loose.dtd\">\n" - "<html><head><title>Statistics Report for " PRODUCT_NAME "%s%s</title>\n" - "<link rel=\"icon\" href=\"data:,\">\n" - "<meta http-equiv=\"content-type\" content=\"text/html; charset=iso-8859-1\">\n" - "<style type=\"text/css\"><!--\n" - "body {" - " font-family: arial, helvetica, sans-serif;" - " font-size: 12px;" - " font-weight: normal;" - " color: black;" - " background: white;" - "}\n" - "th,td {" - " font-size: 10px;" - "}\n" - "h1 {" - " font-size: x-large;" - " margin-bottom: 0.5em;" - "}\n" - "h2 {" - " font-family: helvetica, arial;" - " font-size: x-large;" - " font-weight: bold;" - " font-style: italic;" - " color: #6020a0;" - " margin-top: 0em;" - " margin-bottom: 0em;" - "}\n" - "h3 {" - " font-family: helvetica, arial;" - " font-size: 16px;" - " font-weight: bold;" - " color: #b00040;" - " background: #e8e8d0;" - " margin-top: 0em;" - " margin-bottom: 0em;" - "}\n" - "li {" - " margin-top: 0.25em;" - " margin-right: 2em;" - "}\n" - ".hr {margin-top: 0.25em;" - " border-color: black;" - " border-bottom-style: solid;" - "}\n" - ".titre {background: #20D0D0;color: #000000; font-weight: bold; text-align: center;}\n" - ".total {background: #20D0D0;color: #ffff80;}\n" - ".frontend {background: #e8e8d0;}\n" - ".socket {background: #d0d0d0;}\n" - ".backend {background: #e8e8d0;}\n" - ".active_down {background: #ff9090;}\n" - ".active_going_up {background: #ffd020;}\n" - ".active_going_down {background: #ffffa0;}\n" - ".active_up {background: #c0ffc0;}\n" - ".active_nolb {background: #20a0ff;}\n" - ".active_draining {background: #20a0FF;}\n" - ".active_no_check {background: #e0e0e0;}\n" - ".backup_down {background: #ff9090;}\n" - ".backup_going_up {background: #ff80ff;}\n" - ".backup_going_down {background: #c060ff;}\n" - ".backup_up {background: #b0d0ff;}\n" - ".backup_nolb {background: #90b0e0;}\n" - ".backup_draining {background: #cc9900;}\n" - ".backup_no_check {background: #e0e0e0;}\n" - ".maintain {background: #c07820;}\n" - ".rls {letter-spacing: 0.2em; margin-right: 1px;}\n" /* right letter spacing (used for grouping digits) */ - "\n" - "a.px:link {color: #ffff40; text-decoration: none;}" - "a.px:visited {color: #ffff40; text-decoration: none;}" - "a.px:hover {color: #ffffff; text-decoration: none;}" - "a.lfsb:link {color: #000000; text-decoration: none;}" - "a.lfsb:visited {color: #000000; text-decoration: none;}" - "a.lfsb:hover {color: #505050; text-decoration: none;}" - "\n" - "table.tbl { border-collapse: collapse; border-style: none;}\n" - "table.tbl td { text-align: right; border-width: 1px 1px 1px 1px; border-style: solid solid solid solid; padding: 2px 3px; border-color: gray; white-space: nowrap;}\n" - "table.tbl td.ac { text-align: center;}\n" - "table.tbl th { border-width: 1px; border-style: solid solid solid solid; border-color: gray;}\n" - "table.tbl th.pxname { background: #b00040; color: #ffff40; font-weight: bold; border-style: solid solid none solid; padding: 2px 3px; white-space: nowrap;}\n" - "table.tbl th.empty { border-style: none; empty-cells: hide; background: white;}\n" - "table.tbl th.desc { background: white; border-style: solid solid none solid; text-align: left; padding: 2px 3px;}\n" - "\n" - "table.lgd { border-collapse: collapse; border-width: 1px; border-style: none none none solid; border-color: black;}\n" - "table.lgd td { border-width: 1px; border-style: solid solid solid solid; border-color: gray; padding: 2px;}\n" - "table.lgd td.noborder { border-style: none; padding: 2px; white-space: nowrap;}\n" - "table.det { border-collapse: collapse; border-style: none; }\n" - "table.det th { text-align: left; border-width: 0px; padding: 0px 1px 0px 0px; font-style:normal;font-size:11px;font-weight:bold;font-family: sans-serif;}\n" - "table.det td { text-align: right; border-width: 0px; padding: 0px 0px 0px 4px; white-space: nowrap; font-style:normal;font-size:11px;font-weight:normal;}\n" - "u {text-decoration:none; border-bottom: 1px dotted black;}\n" - "div.tips {\n" - " display:block;\n" - " visibility:hidden;\n" - " z-index:2147483647;\n" - " position:absolute;\n" - " padding:2px 4px 3px;\n" - " background:#f0f060; color:#000000;\n" - " border:1px solid #7040c0;\n" - " white-space:nowrap;\n" - " font-style:normal;font-size:11px;font-weight:normal;\n" - " -moz-border-radius:3px;-webkit-border-radius:3px;border-radius:3px;\n" - " -moz-box-shadow:gray 2px 2px 3px;-webkit-box-shadow:gray 2px 2px 3px;box-shadow:gray 2px 2px 3px;\n" - "}\n" - "u:hover div.tips {visibility:visible;}\n" - "@media (prefers-color-scheme: dark) {\n" - " body { font-family: arial, helvetica, sans-serif; font-size: 12px; font-weight: normal; color: #e8e6e3; background: #131516;}\n" - " h1 { color: #a265e0!important; }\n" - " h2 { color: #a265e0; }\n" - " h3 { color: #ff5190; background-color: #3e3e1f; }\n" - " a { color: #3391ff; }\n" - " input { background-color: #2f3437; }\n" - " .hr { border-color: #8c8273; }\n" - " .titre { background-color: #1aa6a6; color: #e8e6e3; }\n" - " .frontend {background: #2f3437;}\n" - " .socket {background: #2a2d2f;}\n" - " .backend {background: #2f3437;}\n" - " .active_down {background: #760000;}\n" - " .active_going_up {background: #b99200;}\n" - " .active_going_down {background: #6c6c00;}\n" - " .active_up {background: #165900;}\n" - " .active_nolb {background: #006ab9;}\n" - " .active_draining {background: #006ab9;}\n" - " .active_no_check {background: #2a2d2f;}\n" - " .backup_down {background: #760000;}\n" - " .backup_going_up {background: #7f007f;}\n" - " .backup_going_down {background: #580092;}\n" - " .backup_up {background: #2e3234;}\n" - " .backup_nolb {background: #1e3c6a;}\n" - " .backup_draining {background: #a37a00;}\n" - " .backup_no_check {background: #2a2d2f;}\n" - " .maintain {background: #9a601a;}\n" - " a.px:link {color: #d8d83b; text-decoration: none;}\n" - " a.px:visited {color: #d8d83b; text-decoration: none;}\n" - " a.px:hover {color: #ffffff; text-decoration: none;}\n" - " a.lfsb:link {color: #e8e6e3; text-decoration: none;}\n" - " a.lfsb:visited {color: #e8e6e3; text-decoration: none;}\n" - " a.lfsb:hover {color: #b5afa6; text-decoration: none;}\n" - " table.tbl th.empty { background-color: #181a1b; }\n" - " table.tbl th.desc { background: #181a1b; }\n" - " table.tbl th.pxname { background-color: #8d0033; color: #ffff46; }\n" - " table.tbl th { border-color: #808080; }\n" - " table.tbl td { border-color: #808080; }\n" - " u {text-decoration:none; border-bottom: 1px dotted #e8e6e3;}\n" - " div.tips {\n" - " background:#8e8e0d;\n" - " color:#e8e6e3;\n" - " border-color: #4e2c86;\n" - " -moz-box-shadow: #60686c 2px 2px 3px;\n" - " -webkit-box-shadow: #60686c 2px 2px 3px;\n" - " box-shadow: #60686c 2px 2px 3px;\n" - " }\n" - "}\n" - "-->\n" - "</style></head>\n", - (ctx->flags & STAT_SHNODE) ? " on " : "", - (ctx->flags & STAT_SHNODE) ? (uri && uri->node ? uri->node : global.node) : "" - ); -} - -/* Dumps the HTML stats information block to the local trash buffer and uses - * the state from stream connector <sc> and per-uri parameter from the parent - * proxy. The caller is responsible for clearing the local trash buffer if - * needed. - */ -static void stats_dump_html_info(struct stconn *sc) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - unsigned int up = ns_to_sec(now_ns - start_time_ns); - char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN]; - const char *scope_ptr = stats_scope_ptr(appctx, sc); - struct uri_auth *uri; - unsigned long long bps; - int thr; - - BUG_ON(!ctx->http_px); - uri = ctx->http_px->uri_auth; - for (bps = thr = 0; thr < global.nbthread; thr++) - bps += 32ULL * read_freq_ctr(&ha_thread_ctx[thr].out_32bps); - - /* Turn the bytes per second to bits per second and take care of the - * usual ethernet overhead in order to help figure how far we are from - * interface saturation since it's the only case which usually matters. - * For this we count the total size of an Ethernet frame on the wire - * including preamble and IFG (1538) for the largest TCP segment it - * transports (1448 with TCP timestamps). This is not valid for smaller - * packets (under-estimated), but it gives a reasonably accurate - * estimation of how far we are from uplink saturation. - */ - bps = bps * 8 * 1538 / 1448; - - /* WARNING! this has to fit the first packet too. - * We are around 3.5 kB, add adding entries will - * become tricky if we want to support 4kB buffers ! - */ - chunk_appendf(&trash_chunk, - "<body><h1><a href=\"" PRODUCT_URL "\" style=\"text-decoration: none;\">" - PRODUCT_NAME "%s</a></h1>\n" - "<h2>Statistics Report for pid %d%s%s%s%s</h2>\n" - "<hr width=\"100%%\" class=\"hr\">\n" - "<h3>> General process information</h3>\n" - "<table border=0><tr><td align=\"left\" nowrap width=\"1%%\">\n" - "<p><b>pid = </b> %d (process #%d, nbproc = %d, nbthread = %d)<br>\n" - "<b>uptime = </b> %dd %dh%02dm%02ds; warnings = %u<br>\n" - "<b>system limits:</b> memmax = %s%s; ulimit-n = %d<br>\n" - "<b>maxsock = </b> %d; <b>maxconn = </b> %d; <b>reached = </b> %llu; <b>maxpipes = </b> %d<br>\n" - "current conns = %d; current pipes = %d/%d; conn rate = %d/sec; bit rate = %.3f %cbps<br>\n" - "Running tasks: %d/%d (%d niced); idle = %d %%<br>\n" - "</td><td align=\"center\" nowrap>\n" - "<table class=\"lgd\"><tr>\n" - "<td class=\"active_up\"> </td><td class=\"noborder\">active UP </td>" - "<td class=\"backup_up\"> </td><td class=\"noborder\">backup UP </td>" - "</tr><tr>\n" - "<td class=\"active_going_down\"></td><td class=\"noborder\">active UP, going down </td>" - "<td class=\"backup_going_down\"></td><td class=\"noborder\">backup UP, going down </td>" - "</tr><tr>\n" - "<td class=\"active_going_up\"></td><td class=\"noborder\">active DOWN, going up </td>" - "<td class=\"backup_going_up\"></td><td class=\"noborder\">backup DOWN, going up </td>" - "</tr><tr>\n" - "<td class=\"active_down\"></td><td class=\"noborder\">active or backup DOWN </td>" - "<td class=\"active_no_check\"></td><td class=\"noborder\">not checked </td>" - "</tr><tr>\n" - "<td class=\"maintain\"></td><td class=\"noborder\" colspan=\"3\">active or backup DOWN for maintenance (MAINT) </td>" - "</tr><tr>\n" - "<td class=\"active_draining\"></td><td class=\"noborder\" colspan=\"3\">active or backup SOFT STOPPED for maintenance </td>" - "</tr></table>\n" - "Note: \"NOLB\"/\"DRAIN\" = UP with load-balancing disabled." - "</td>" - "<td align=\"left\" valign=\"top\" nowrap width=\"1%%\">" - "<b>Display option:</b><ul style=\"margin-top: 0.25em;\">" - "", - (ctx->flags & STAT_HIDEVER) ? "" : (stats_version_string), - pid, (ctx->flags & STAT_SHNODE) ? " on " : "", - (ctx->flags & STAT_SHNODE) ? (uri->node ? uri->node : global.node) : "", - (ctx->flags & STAT_SHDESC) ? ": " : "", - (ctx->flags & STAT_SHDESC) ? (uri->desc ? uri->desc : global.desc) : "", - pid, 1, 1, global.nbthread, - up / 86400, (up % 86400) / 3600, - (up % 3600) / 60, (up % 60), - HA_ATOMIC_LOAD(&tot_warnings), - global.rlimit_memmax ? ultoa(global.rlimit_memmax) : "unlimited", - global.rlimit_memmax ? " MB" : "", - global.rlimit_nofile, - global.maxsock, global.maxconn, HA_ATOMIC_LOAD(&maxconn_reached), global.maxpipes, - actconn, pipes_used, pipes_used+pipes_free, read_freq_ctr(&global.conn_per_sec), - bps >= 1000000000UL ? (bps / 1000000000.0) : bps >= 1000000UL ? (bps / 1000000.0) : (bps / 1000.0), - bps >= 1000000000UL ? 'G' : bps >= 1000000UL ? 'M' : 'k', - total_run_queues(), total_allocated_tasks(), total_niced_running_tasks(), clock_report_idle()); - - /* scope_txt = search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ - memcpy(scope_txt, scope_ptr, ctx->scope_len); - scope_txt[ctx->scope_len] = '\0'; - - chunk_appendf(&trash_chunk, - "<li><form method=\"GET\">Scope : <input value=\"%s\" name=\"" STAT_SCOPE_INPUT_NAME "\" size=\"8\" maxlength=\"%d\" tabindex=\"1\"/></form>\n", - (ctx->scope_len > 0) ? scope_txt : "", - STAT_SCOPE_TXT_MAXLEN); - - /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ - scope_txt[0] = 0; - if (ctx->scope_len) { - strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt)); - memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len); - scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0; - } - - if (ctx->flags & STAT_HIDE_DOWN) - chunk_appendf(&trash_chunk, - "<li><a href=\"%s%s%s%s\">Show all servers</a><br>\n", - uri->uri_prefix, - "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - else - chunk_appendf(&trash_chunk, - "<li><a href=\"%s%s%s%s\">Hide 'DOWN' servers</a><br>\n", - uri->uri_prefix, - ";up", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - - if (uri->refresh > 0) { - if (ctx->flags & STAT_NO_REFRESH) - chunk_appendf(&trash_chunk, - "<li><a href=\"%s%s%s%s\">Enable refresh</a><br>\n", - uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - "", - scope_txt); - else - chunk_appendf(&trash_chunk, - "<li><a href=\"%s%s%s%s\">Disable refresh</a><br>\n", - uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - ";norefresh", - scope_txt); - } - - chunk_appendf(&trash_chunk, - "<li><a href=\"%s%s%s%s\">Refresh now</a><br>\n", - uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - - chunk_appendf(&trash_chunk, - "<li><a href=\"%s;csv%s%s\">CSV export</a><br>\n", - uri->uri_prefix, - (uri->refresh > 0) ? ";norefresh" : "", - scope_txt); - - chunk_appendf(&trash_chunk, - "<li><a href=\"%s;json%s%s\">JSON export</a> (<a href=\"%s;json-schema\">schema</a>)<br>\n", - uri->uri_prefix, - (uri->refresh > 0) ? ";norefresh" : "", - scope_txt, uri->uri_prefix); - - chunk_appendf(&trash_chunk, - "</ul></td>" - "<td align=\"left\" valign=\"top\" nowrap width=\"1%%\">" - "<b>External resources:</b><ul style=\"margin-top: 0.25em;\">\n" - "<li><a href=\"" PRODUCT_URL "\">Primary site</a><br>\n" - "<li><a href=\"" PRODUCT_URL_UPD "\">Updates (v" PRODUCT_BRANCH ")</a><br>\n" - "<li><a href=\"" PRODUCT_URL_DOC "\">Online manual</a><br>\n" - "</ul>" - "</td>" - "</tr></table>\n" - "" - ); - - if (ctx->st_code) { - switch (ctx->st_code) { - case STAT_STATUS_DONE: - chunk_appendf(&trash_chunk, - "<p><div class=active_up>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "Action processed successfully." - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - break; - case STAT_STATUS_NONE: - chunk_appendf(&trash_chunk, - "<p><div class=active_going_down>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "Nothing has changed." - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - break; - case STAT_STATUS_PART: - chunk_appendf(&trash_chunk, - "<p><div class=active_going_down>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "Action partially processed.<br>" - "Some server names are probably unknown or ambiguous (duplicated names in the backend)." - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - break; - case STAT_STATUS_ERRP: - chunk_appendf(&trash_chunk, - "<p><div class=active_down>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "Action not processed because of invalid parameters." - "<ul>" - "<li>The action is maybe unknown.</li>" - "<li>Invalid key parameter (empty or too long).</li>" - "<li>The backend name is probably unknown or ambiguous (duplicated names).</li>" - "<li>Some server names are probably unknown or ambiguous (duplicated names in the backend).</li>" - "</ul>" - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - break; - case STAT_STATUS_EXCD: - chunk_appendf(&trash_chunk, - "<p><div class=active_down>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "<b>Action not processed : the buffer couldn't store all the data.<br>" - "You should retry with less servers at a time.</b>" - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - break; - case STAT_STATUS_DENY: - chunk_appendf(&trash_chunk, - "<p><div class=active_down>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "<b>Action denied.</b>" - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - break; - case STAT_STATUS_IVAL: - chunk_appendf(&trash_chunk, - "<p><div class=active_down>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "<b>Invalid requests (unsupported method or chunked encoded request).</b>" - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - break; - default: - chunk_appendf(&trash_chunk, - "<p><div class=active_no_check>" - "<a class=lfsb href=\"%s%s%s%s\" title=\"Remove this message\">[X]</a> " - "Unexpected result." - "</div>\n", uri->uri_prefix, - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - } - chunk_appendf(&trash_chunk, "<p>\n"); - } -} - -/* Dumps the HTML stats trailer block to the local trash buffer. The caller is - * responsible for clearing the local trash buffer if needed. - */ -static void stats_dump_html_end() -{ - chunk_appendf(&trash_chunk, "</body></html>\n"); -} - -/* Dumps the stats JSON header to the local trash buffer buffer which. The - * caller is responsible for clearing it if needed. - */ -static void stats_dump_json_header() -{ - chunk_strcat(&trash_chunk, "["); -} - - -/* Dumps the JSON stats trailer block to the local trash buffer. The caller is - * responsible for clearing the local trash buffer if needed. - */ -static void stats_dump_json_end() -{ - chunk_strcat(&trash_chunk, "]\n"); -} - -/* Uses <appctx.ctx.stats.obj1> as a pointer to the current proxy and <obj2> as - * a pointer to the current server/listener. - */ -static int stats_dump_proxies(struct stconn *sc, - struct htx *htx) -{ - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct channel *rep = sc_ic(sc); - struct proxy *px; - - /* dump proxies */ - while (ctx->obj1) { - if (htx) { - if (htx_almost_full(htx)) { - sc_need_room(sc, htx->size / 2); - goto full; - } - } - else { - if (buffer_almost_full(&rep->buf)) { - sc_need_room(sc, b_size(&rep->buf) / 2); - goto full; - } - } - - px = ctx->obj1; - /* Skip the global frontend proxies and non-networked ones. - * Also skip proxies that were disabled in the configuration - * This change allows retrieving stats from "old" proxies after a reload. - */ - if (!(px->flags & PR_FL_DISABLED) && px->uuid > 0 && - (px->cap & (PR_CAP_FE | PR_CAP_BE)) && !(px->cap & PR_CAP_INT)) { - if (stats_dump_proxy_to_buffer(sc, htx, px) == 0) - return 0; - } - - ctx->obj1 = px->next; - ctx->px_st = STAT_PX_ST_INIT; - ctx->field = 0; - } - - return 1; - - full: - return 0; -} - /* This function dumps statistics onto the stream connector's read buffer in * either CSV or HTML format. It returns 0 if it had to stop writing data and * an I/O is needed, 1 if the dump is finished and the stream must be closed, * or -1 in case of any error. This function is used by both the CLI and the * HTTP handlers. */ -static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx) +int stats_dump_stat_to_buffer(struct stconn *sc, struct buffer *buf, struct htx *htx) { struct appctx *appctx = __sc_appctx(sc); struct show_stat_ctx *ctx = appctx->svcctx; enum stats_domain domain = ctx->domain; + struct buffer *chk = &ctx->chunk; - chunk_reset(&trash_chunk); + chunk_reset(chk); switch (ctx->state) { case STAT_STATE_INIT: @@ -3934,19 +546,21 @@ static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx) __fallthrough; case STAT_STATE_HEAD: - if (ctx->flags & STAT_FMT_HTML) + if (ctx->flags & STAT_F_FMT_HTML) stats_dump_html_head(appctx); - else if (ctx->flags & STAT_JSON_SCHM) - stats_dump_json_schema(&trash_chunk); - else if (ctx->flags & STAT_FMT_JSON) - stats_dump_json_header(); - else if (!(ctx->flags & STAT_FMT_TYPED)) - stats_dump_csv_header(ctx->domain); - - if (!stats_putchk(appctx, htx)) + else if (ctx->flags & STAT_F_JSON_SCHM) + stats_dump_json_schema(chk); + else if (ctx->flags & STAT_F_FMT_JSON) + stats_dump_json_header(chk); + else if (ctx->flags & STAT_F_FMT_FILE) + stats_dump_file_header(ctx->type, chk); + else if (!(ctx->flags & STAT_F_FMT_TYPED)) + stats_dump_csv_header(ctx->domain, chk); + + if (!stats_putchk(appctx, buf, htx)) goto full; - if (ctx->flags & STAT_JSON_SCHM) { + if (ctx->flags & STAT_F_JSON_SCHM) { ctx->state = STAT_STATE_FIN; return 1; } @@ -3954,9 +568,9 @@ static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx) __fallthrough; case STAT_STATE_INFO: - if (ctx->flags & STAT_FMT_HTML) { + if (ctx->flags & STAT_F_FMT_HTML) { stats_dump_html_info(sc); - if (!stats_putchk(appctx, htx)) + if (!stats_putchk(appctx, buf, htx)) goto full; } @@ -3971,8 +585,8 @@ static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx) case STAT_STATE_LIST: switch (domain) { case STATS_DOMAIN_RESOLVERS: - if (!stats_dump_resolvers(sc, stat_l[domain], - stat_count[domain], + if (!stats_dump_resolvers(sc, stat_lines[domain], + stat_cols_len[domain], &stats_module_list[domain])) { return 0; } @@ -3981,7 +595,7 @@ static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx) case STATS_DOMAIN_PROXY: default: /* dump proxies */ - if (!stats_dump_proxies(sc, htx)) + if (!stats_dump_proxies(sc, buf, htx)) return 0; break; } @@ -3990,12 +604,12 @@ static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx) __fallthrough; case STAT_STATE_END: - if (ctx->flags & (STAT_FMT_HTML|STAT_FMT_JSON)) { - if (ctx->flags & STAT_FMT_HTML) - stats_dump_html_end(); + if (ctx->flags & (STAT_F_FMT_HTML|STAT_F_FMT_JSON)) { + if (ctx->flags & STAT_F_FMT_HTML) + stats_dump_html_end(chk); else - stats_dump_json_end(); - if (!stats_putchk(appctx, htx)) + stats_dump_json_end(chk); + if (!stats_putchk(appctx, buf, htx)) goto full; } @@ -4016,589 +630,23 @@ static int stats_dump_stat_to_buffer(struct stconn *sc, struct htx *htx) } -/* We reached the stats page through a POST request. The appctx is - * expected to have already been allocated by the caller. - * Parse the posted data and enable/disable servers if necessary. - * Returns 1 if request was parsed or zero if it needs more data. - */ -static int stats_process_http_post(struct stconn *sc) -{ - struct stream *s = __sc_strm(sc); - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - - struct proxy *px = NULL; - struct server *sv = NULL; - - char key[LINESIZE]; - int action = ST_ADM_ACTION_NONE; - int reprocess = 0; - - int total_servers = 0; - int altered_servers = 0; - - char *first_param, *cur_param, *next_param, *end_params; - char *st_cur_param = NULL; - char *st_next_param = NULL; - - struct buffer *temp = get_trash_chunk(); - - struct htx *htx = htxbuf(&s->req.buf); - struct htx_blk *blk; - - /* we need more data */ - if (s->txn->req.msg_state < HTTP_MSG_DONE) { - /* check if we can receive more */ - if (htx_free_data_space(htx) <= global.tune.maxrewrite) { - ctx->st_code = STAT_STATUS_EXCD; - goto out; - } - goto wait; - } - - /* The request was fully received. Copy data */ - blk = htx_get_head_blk(htx); - while (blk) { - enum htx_blk_type type = htx_get_blk_type(blk); - - if (type == HTX_BLK_TLR || type == HTX_BLK_EOT) - break; - if (type == HTX_BLK_DATA) { - struct ist v = htx_get_blk_value(htx, blk); - - if (!chunk_memcat(temp, v.ptr, v.len)) { - ctx->st_code = STAT_STATUS_EXCD; - goto out; - } - } - blk = htx_get_next_blk(htx, blk); - } - - first_param = temp->area; - end_params = temp->area + temp->data; - cur_param = next_param = end_params; - *end_params = '\0'; - - ctx->st_code = STAT_STATUS_NONE; - - /* - * Parse the parameters in reverse order to only store the last value. - * From the html form, the backend and the action are at the end. - */ - while (cur_param > first_param) { - char *value; - int poffset, plen; - - cur_param--; - - if ((*cur_param == '&') || (cur_param == first_param)) { - reprocess_servers: - /* Parse the key */ - poffset = (cur_param != first_param ? 1 : 0); - plen = next_param - cur_param + (cur_param == first_param ? 1 : 0); - if ((plen > 0) && (plen <= sizeof(key))) { - strncpy(key, cur_param + poffset, plen); - key[plen - 1] = '\0'; - } else { - ctx->st_code = STAT_STATUS_ERRP; - goto out; - } - - /* Parse the value */ - value = key; - while (*value != '\0' && *value != '=') { - value++; - } - if (*value == '=') { - /* Ok, a value is found, we can mark the end of the key */ - *value++ = '\0'; - } - if (url_decode(key, 1) < 0 || url_decode(value, 1) < 0) - break; - - /* Now we can check the key to see what to do */ - if (!px && (strcmp(key, "b") == 0)) { - if ((px = proxy_be_by_name(value)) == NULL) { - /* the backend name is unknown or ambiguous (duplicate names) */ - ctx->st_code = STAT_STATUS_ERRP; - goto out; - } - } - else if (!action && (strcmp(key, "action") == 0)) { - if (strcmp(value, "ready") == 0) { - action = ST_ADM_ACTION_READY; - } - else if (strcmp(value, "drain") == 0) { - action = ST_ADM_ACTION_DRAIN; - } - else if (strcmp(value, "maint") == 0) { - action = ST_ADM_ACTION_MAINT; - } - else if (strcmp(value, "shutdown") == 0) { - action = ST_ADM_ACTION_SHUTDOWN; - } - else if (strcmp(value, "dhlth") == 0) { - action = ST_ADM_ACTION_DHLTH; - } - else if (strcmp(value, "ehlth") == 0) { - action = ST_ADM_ACTION_EHLTH; - } - else if (strcmp(value, "hrunn") == 0) { - action = ST_ADM_ACTION_HRUNN; - } - else if (strcmp(value, "hnolb") == 0) { - action = ST_ADM_ACTION_HNOLB; - } - else if (strcmp(value, "hdown") == 0) { - action = ST_ADM_ACTION_HDOWN; - } - else if (strcmp(value, "dagent") == 0) { - action = ST_ADM_ACTION_DAGENT; - } - else if (strcmp(value, "eagent") == 0) { - action = ST_ADM_ACTION_EAGENT; - } - else if (strcmp(value, "arunn") == 0) { - action = ST_ADM_ACTION_ARUNN; - } - else if (strcmp(value, "adown") == 0) { - action = ST_ADM_ACTION_ADOWN; - } - /* else these are the old supported methods */ - else if (strcmp(value, "disable") == 0) { - action = ST_ADM_ACTION_DISABLE; - } - else if (strcmp(value, "enable") == 0) { - action = ST_ADM_ACTION_ENABLE; - } - else if (strcmp(value, "stop") == 0) { - action = ST_ADM_ACTION_STOP; - } - else if (strcmp(value, "start") == 0) { - action = ST_ADM_ACTION_START; - } - else { - ctx->st_code = STAT_STATUS_ERRP; - goto out; - } - } - else if (strcmp(key, "s") == 0) { - if (!(px && action)) { - /* - * Indicates that we'll need to reprocess the parameters - * as soon as backend and action are known - */ - if (!reprocess) { - st_cur_param = cur_param; - st_next_param = next_param; - } - reprocess = 1; - } - else if ((sv = findserver(px, value)) != NULL) { - HA_SPIN_LOCK(SERVER_LOCK, &sv->lock); - switch (action) { - case ST_ADM_ACTION_DISABLE: - if (!(sv->cur_admin & SRV_ADMF_FMAINT)) { - altered_servers++; - total_servers++; - srv_set_admin_flag(sv, SRV_ADMF_FMAINT, SRV_ADM_STCHGC_STATS_DISABLE); - } - break; - case ST_ADM_ACTION_ENABLE: - if (sv->cur_admin & SRV_ADMF_FMAINT) { - altered_servers++; - total_servers++; - srv_clr_admin_flag(sv, SRV_ADMF_FMAINT); - } - break; - case ST_ADM_ACTION_STOP: - if (!(sv->cur_admin & SRV_ADMF_FDRAIN)) { - srv_set_admin_flag(sv, SRV_ADMF_FDRAIN, SRV_ADM_STCHGC_STATS_STOP); - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_START: - if (sv->cur_admin & SRV_ADMF_FDRAIN) { - srv_clr_admin_flag(sv, SRV_ADMF_FDRAIN); - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_DHLTH: - if (sv->check.state & CHK_ST_CONFIGURED) { - sv->check.state &= ~CHK_ST_ENABLED; - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_EHLTH: - if (sv->check.state & CHK_ST_CONFIGURED) { - sv->check.state |= CHK_ST_ENABLED; - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_HRUNN: - if (!(sv->track)) { - sv->check.health = sv->check.rise + sv->check.fall - 1; - srv_set_running(sv, SRV_OP_STCHGC_STATS_WEB); - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_HNOLB: - if (!(sv->track)) { - sv->check.health = sv->check.rise + sv->check.fall - 1; - srv_set_stopping(sv, SRV_OP_STCHGC_STATS_WEB); - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_HDOWN: - if (!(sv->track)) { - sv->check.health = 0; - srv_set_stopped(sv, SRV_OP_STCHGC_STATS_WEB); - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_DAGENT: - if (sv->agent.state & CHK_ST_CONFIGURED) { - sv->agent.state &= ~CHK_ST_ENABLED; - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_EAGENT: - if (sv->agent.state & CHK_ST_CONFIGURED) { - sv->agent.state |= CHK_ST_ENABLED; - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_ARUNN: - if (sv->agent.state & CHK_ST_ENABLED) { - sv->agent.health = sv->agent.rise + sv->agent.fall - 1; - srv_set_running(sv, SRV_OP_STCHGC_STATS_WEB); - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_ADOWN: - if (sv->agent.state & CHK_ST_ENABLED) { - sv->agent.health = 0; - srv_set_stopped(sv, SRV_OP_STCHGC_STATS_WEB); - altered_servers++; - total_servers++; - } - break; - case ST_ADM_ACTION_READY: - srv_adm_set_ready(sv); - altered_servers++; - total_servers++; - break; - case ST_ADM_ACTION_DRAIN: - srv_adm_set_drain(sv); - altered_servers++; - total_servers++; - break; - case ST_ADM_ACTION_MAINT: - srv_adm_set_maint(sv); - altered_servers++; - total_servers++; - break; - case ST_ADM_ACTION_SHUTDOWN: - if (!(px->flags & (PR_FL_DISABLED|PR_FL_STOPPED))) { - srv_shutdown_streams(sv, SF_ERR_KILLED); - altered_servers++; - total_servers++; - } - break; - } - HA_SPIN_UNLOCK(SERVER_LOCK, &sv->lock); - } else { - /* the server name is unknown or ambiguous (duplicate names) */ - total_servers++; - } - } - if (reprocess && px && action) { - /* Now, we know the backend and the action chosen by the user. - * We can safely restart from the first server parameter - * to reprocess them - */ - cur_param = st_cur_param; - next_param = st_next_param; - reprocess = 0; - goto reprocess_servers; - } - - next_param = cur_param; - } - } - - if (total_servers == 0) { - ctx->st_code = STAT_STATUS_NONE; - } - else if (altered_servers == 0) { - ctx->st_code = STAT_STATUS_ERRP; - } - else if (altered_servers == total_servers) { - ctx->st_code = STAT_STATUS_DONE; - } - else { - ctx->st_code = STAT_STATUS_PART; - } - out: - return 1; - wait: - ctx->st_code = STAT_STATUS_NONE; - return 0; -} - - -static int stats_send_http_headers(struct stconn *sc, struct htx *htx) -{ - struct stream *s = __sc_strm(sc); - struct uri_auth *uri; - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct htx_sl *sl; - unsigned int flags; - - BUG_ON(!ctx->http_px); - uri = ctx->http_px->uri_auth; - - flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_ENC|HTX_SL_F_XFER_LEN|HTX_SL_F_CHNK); - sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), ist("200"), ist("OK")); - if (!sl) - goto full; - sl->info.res.status = 200; - - if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache"))) - goto full; - if (ctx->flags & STAT_FMT_HTML) { - if (!htx_add_header(htx, ist("Content-Type"), ist("text/html"))) - goto full; - } - else if (ctx->flags & (STAT_FMT_JSON|STAT_JSON_SCHM)) { - if (!htx_add_header(htx, ist("Content-Type"), ist("application/json"))) - goto full; - } - else { - if (!htx_add_header(htx, ist("Content-Type"), ist("text/plain"))) - goto full; - } - - if (uri->refresh > 0 && !(ctx->flags & STAT_NO_REFRESH)) { - const char *refresh = U2A(uri->refresh); - if (!htx_add_header(htx, ist("Refresh"), ist(refresh))) - goto full; - } - - if (ctx->flags & STAT_CHUNKED) { - if (!htx_add_header(htx, ist("Transfer-Encoding"), ist("chunked"))) - goto full; - } - - if (!htx_add_endof(htx, HTX_BLK_EOH)) - goto full; - - channel_add_input(&s->res, htx->data); - return 1; - - full: - htx_reset(htx); - sc_need_room(sc, 0); - return 0; -} - - -static int stats_send_http_redirect(struct stconn *sc, struct htx *htx) -{ - char scope_txt[STAT_SCOPE_TXT_MAXLEN + sizeof STAT_SCOPE_PATTERN]; - struct stream *s = __sc_strm(sc); - struct uri_auth *uri; - struct appctx *appctx = __sc_appctx(sc); - struct show_stat_ctx *ctx = appctx->svcctx; - struct htx_sl *sl; - unsigned int flags; - - BUG_ON(!ctx->http_px); - uri = ctx->http_px->uri_auth; - - /* scope_txt = search pattern + search query, ctx->scope_len is always <= STAT_SCOPE_TXT_MAXLEN */ - scope_txt[0] = 0; - if (ctx->scope_len) { - const char *scope_ptr = stats_scope_ptr(appctx, sc); - - strlcpy2(scope_txt, STAT_SCOPE_PATTERN, sizeof(scope_txt)); - memcpy(scope_txt + strlen(STAT_SCOPE_PATTERN), scope_ptr, ctx->scope_len); - scope_txt[strlen(STAT_SCOPE_PATTERN) + ctx->scope_len] = 0; - } - - /* We don't want to land on the posted stats page because a refresh will - * repost the data. We don't want this to happen on accident so we redirect - * the browse to the stats page with a GET. - */ - chunk_printf(&trash, "%s;st=%s%s%s%s", - uri->uri_prefix, - ((ctx->st_code > STAT_STATUS_INIT) && - (ctx->st_code < STAT_STATUS_SIZE) && - stat_status_codes[ctx->st_code]) ? - stat_status_codes[ctx->st_code] : - stat_status_codes[STAT_STATUS_UNKN], - (ctx->flags & STAT_HIDE_DOWN) ? ";up" : "", - (ctx->flags & STAT_NO_REFRESH) ? ";norefresh" : "", - scope_txt); - - flags = (HTX_SL_F_IS_RESP|HTX_SL_F_VER_11|HTX_SL_F_XFER_LEN|HTX_SL_F_CLEN|HTX_SL_F_CHNK); - sl = htx_add_stline(htx, HTX_BLK_RES_SL, flags, ist("HTTP/1.1"), ist("303"), ist("See Other")); - if (!sl) - goto full; - sl->info.res.status = 303; - - if (!htx_add_header(htx, ist("Cache-Control"), ist("no-cache")) || - !htx_add_header(htx, ist("Content-Type"), ist("text/plain")) || - !htx_add_header(htx, ist("Content-Length"), ist("0")) || - !htx_add_header(htx, ist("Location"), ist2(trash.area, trash.data))) - goto full; - - if (!htx_add_endof(htx, HTX_BLK_EOH)) - goto full; - - channel_add_input(&s->res, htx->data); - return 1; - -full: - htx_reset(htx); - sc_need_room(sc, 0); - return 0; -} - -/* This I/O handler runs as an applet embedded in a stream connector. It is - * used to send HTTP stats over a TCP socket. The mechanism is very simple. - * appctx->st0 contains the operation in progress (dump, done). The handler - * automatically unregisters itself once transfer is complete. - */ -static void http_stats_io_handler(struct appctx *appctx) -{ - struct show_stat_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); - struct stream *s = __sc_strm(sc); - struct channel *req = sc_oc(sc); - struct channel *res = sc_ic(sc); - struct htx *req_htx, *res_htx; - - /* only proxy stats are available via http */ - ctx->domain = STATS_DOMAIN_PROXY; - - res_htx = htx_from_buf(&res->buf); - - if (unlikely(se_fl_test(appctx->sedesc, (SE_FL_EOS|SE_FL_ERROR|SE_FL_SHR|SE_FL_SHW)))) { - appctx->st0 = STAT_HTTP_END; - goto out; - } - - /* Check if the input buffer is available. */ - if (!b_size(&res->buf)) { - sc_need_room(sc, 0); - goto out; - } - - /* all states are processed in sequence */ - if (appctx->st0 == STAT_HTTP_HEAD) { - if (stats_send_http_headers(sc, res_htx)) { - if (s->txn->meth == HTTP_METH_HEAD) - appctx->st0 = STAT_HTTP_DONE; - else - appctx->st0 = STAT_HTTP_DUMP; - } - } - - if (appctx->st0 == STAT_HTTP_DUMP) { - trash_chunk = b_make(trash.area, res->buf.size, 0, 0); - /* adjust buffer size to take htx overhead into account, - * make sure to perform this call on an empty buffer - */ - trash_chunk.size = buf_room_for_htx_data(&trash_chunk); - if (stats_dump_stat_to_buffer(sc, res_htx)) - appctx->st0 = STAT_HTTP_DONE; - } - - if (appctx->st0 == STAT_HTTP_POST) { - if (stats_process_http_post(sc)) - appctx->st0 = STAT_HTTP_LAST; - else if (s->scf->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) - appctx->st0 = STAT_HTTP_DONE; - } - - if (appctx->st0 == STAT_HTTP_LAST) { - if (stats_send_http_redirect(sc, res_htx)) - appctx->st0 = STAT_HTTP_DONE; - } - - if (appctx->st0 == STAT_HTTP_DONE) { - /* no more data are expected. If the response buffer is empty, - * be sure to add something (EOT block in this case) to have - * something to send. It is important to be sure the EOM flags - * will be handled by the endpoint. - */ - if (htx_is_empty(res_htx)) { - if (!htx_add_endof(res_htx, HTX_BLK_EOT)) { - sc_need_room(sc, sizeof(struct htx_blk) + 1); - goto out; - } - channel_add_input(res, 1); - } - res_htx->flags |= HTX_FL_EOM; - se_fl_set(appctx->sedesc, SE_FL_EOI); - appctx->st0 = STAT_HTTP_END; - } - - if (appctx->st0 == STAT_HTTP_END) { - se_fl_set(appctx->sedesc, SE_FL_EOS); - applet_will_consume(appctx); - } - - out: - /* we have left the request in the buffer for the case where we - * process a POST, and this automatically re-enables activity on - * read. It's better to indicate that we want to stop reading when - * we're sending, so that we know there's at most one direction - * deciding to wake the applet up. It saves it from looping when - * emitting large blocks into small TCP windows. - */ - htx_to_buf(res_htx, &res->buf); - if (appctx->st0 == STAT_HTTP_END) { - /* eat the whole request */ - if (co_data(req)) { - req_htx = htx_from_buf(&req->buf); - co_htx_skip(req, req_htx, co_data(req)); - htx_to_buf(req_htx, &req->buf); - } - } - else if (co_data(res)) - applet_wont_consume(appctx); -} - -/* Dump all fields from <info> into <out> using the "show info" format (name: value) */ +/* Dump all fields from <info_fields> into <out> using the "show info" format (name: value) */ static int stats_dump_info_fields(struct buffer *out, - const struct field *info, + const struct field *line, struct show_stat_ctx *ctx) { int flags = ctx->flags; - int field; + int i; - for (field = 0; field < INF_TOTAL_FIELDS; field++) { - if (!field_format(info, field)) + for (i = 0; i < ST_I_INF_MAX; i++) { + if (!field_format(line, i)) continue; - if (!chunk_appendf(out, "%s: ", info_fields[field].name)) + if (!chunk_appendf(out, "%s: ", stat_cols_info[i].name)) return 0; - if (!stats_emit_raw_data_field(out, &info[field])) + if (!stats_emit_raw_data_field(out, &line[i])) return 0; - if ((flags & STAT_SHOW_FDESC) && !chunk_appendf(out, ":\"%s\"", info_fields[field].desc)) + if ((flags & STAT_F_SHOW_FDESC) && !chunk_appendf(out, ":\"%s\"", stat_cols_info[i].desc)) return 0; if (!chunk_strcat(out, "\n")) return 0; @@ -4606,25 +654,27 @@ static int stats_dump_info_fields(struct buffer *out, return 1; } -/* Dump all fields from <info> into <out> using the "show info typed" format */ +/* Dump all fields from <line> into <out> using the "show info typed" format */ static int stats_dump_typed_info_fields(struct buffer *out, - const struct field *info, + const struct field *line, struct show_stat_ctx *ctx) { int flags = ctx->flags; - int field; + int i; - for (field = 0; field < INF_TOTAL_FIELDS; field++) { - if (!field_format(info, field)) + for (i = 0; i < ST_I_INF_MAX; i++) { + if (!field_format(line, i)) continue; - if (!chunk_appendf(out, "%d.%s.%u:", field, info_fields[field].name, info[INF_PROCESS_NUM].u.u32)) + if (!chunk_appendf(out, "%d.%s.%u:", i, stat_cols_info[i].name, + line[ST_I_INF_PROCESS_NUM].u.u32)) { return 0; - if (!stats_emit_field_tags(out, &info[field], ':')) + } + if (!stats_emit_field_tags(out, &line[i], ':')) return 0; - if (!stats_emit_typed_data_field(out, &info[field])) + if (!stats_emit_typed_data_field(out, &line[i])) return 0; - if ((flags & STAT_SHOW_FDESC) && !chunk_appendf(out, ":\"%s\"", info_fields[field].desc)) + if ((flags & STAT_F_SHOW_FDESC) && !chunk_appendf(out, ":\"%s\"", stat_cols_info[i].desc)) return 0; if (!chunk_strcat(out, "\n")) return 0; @@ -4633,12 +683,12 @@ static int stats_dump_typed_info_fields(struct buffer *out, } /* Fill <info> with HAProxy global info. <info> is preallocated array of length - * <len>. The length of the array must be INF_TOTAL_FIELDS. If this length is + * <len>. The length of the array must be ST_I_INF_MAX. If this length is * less then this value, the function returns 0, otherwise, it returns 1. Some - * fields' presence or precision may depend on some of the STAT_* flags present + * fields' presence or precision may depend on some of the STAT_F_* flags present * in <flags>. */ -int stats_fill_info(struct field *info, int len, uint flags) +int stats_fill_info(struct field *line, int len, uint flags) { struct buffer *out = get_trash_chunk(); uint64_t glob_out_bytes, glob_spl_bytes, glob_out_b32; @@ -4671,102 +721,102 @@ int stats_fill_info(struct field *info, int len, uint flags) boot = tv_ms_remain(&start_date, &ready_date); - if (len < INF_TOTAL_FIELDS) + if (len < ST_I_INF_MAX) return 0; chunk_reset(out); - memset(info, 0, sizeof(*info) * len); + memset(line, 0, sizeof(*line) * len); - info[INF_NAME] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, PRODUCT_NAME); - info[INF_VERSION] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_version); - info[INF_BUILD_INFO] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_version); - info[INF_RELEASE_DATE] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_date); + line[ST_I_INF_NAME] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, PRODUCT_NAME); + line[ST_I_INF_VERSION] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_version); + line[ST_I_INF_BUILD_INFO] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_version); + line[ST_I_INF_RELEASE_DATE] = mkf_str(FO_PRODUCT|FN_OUTPUT|FS_SERVICE, haproxy_date); - info[INF_NBTHREAD] = mkf_u32(FO_CONFIG|FS_SERVICE, global.nbthread); - info[INF_NBPROC] = mkf_u32(FO_CONFIG|FS_SERVICE, 1); - info[INF_PROCESS_NUM] = mkf_u32(FO_KEY, 1); - info[INF_PID] = mkf_u32(FO_STATUS, pid); + line[ST_I_INF_NBTHREAD] = mkf_u32(FO_CONFIG|FS_SERVICE, global.nbthread); + line[ST_I_INF_NBPROC] = mkf_u32(FO_CONFIG|FS_SERVICE, 1); + line[ST_I_INF_PROCESS_NUM] = mkf_u32(FO_KEY, 1); + line[ST_I_INF_PID] = mkf_u32(FO_STATUS, pid); - info[INF_UPTIME] = mkf_str(FN_DURATION, chunk_newstr(out)); + line[ST_I_INF_UPTIME] = mkf_str(FN_DURATION, chunk_newstr(out)); chunk_appendf(out, "%ud %uh%02um%02us", up_sec / 86400, (up_sec % 86400) / 3600, (up_sec % 3600) / 60, (up_sec % 60)); - info[INF_UPTIME_SEC] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_DURATION, up_sec + up_usec / 1000000.0) : mkf_u32(FN_DURATION, up_sec); - info[INF_START_TIME_SEC] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_DURATION, start_date.tv_sec + start_date.tv_usec / 1000000.0) : mkf_u32(FN_DURATION, start_date.tv_sec); - info[INF_MEMMAX_MB] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_memmax); - info[INF_MEMMAX_BYTES] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_memmax * 1048576L); - info[INF_POOL_ALLOC_MB] = mkf_u32(0, (unsigned)(pool_total_allocated() / 1048576L)); - info[INF_POOL_ALLOC_BYTES] = mkf_u64(0, pool_total_allocated()); - info[INF_POOL_USED_MB] = mkf_u32(0, (unsigned)(pool_total_used() / 1048576L)); - info[INF_POOL_USED_BYTES] = mkf_u64(0, pool_total_used()); - info[INF_POOL_FAILED] = mkf_u32(FN_COUNTER, pool_total_failures()); - info[INF_ULIMIT_N] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_nofile); - info[INF_MAXSOCK] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxsock); - info[INF_MAXCONN] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxconn); - info[INF_HARD_MAXCONN] = mkf_u32(FO_CONFIG|FN_LIMIT, global.hardmaxconn); - info[INF_CURR_CONN] = mkf_u32(0, actconn); - info[INF_CUM_CONN] = mkf_u32(FN_COUNTER, totalconn); - info[INF_CUM_REQ] = mkf_u32(FN_COUNTER, global.req_count); + line[ST_I_INF_UPTIME_SEC] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_DURATION, up_sec + up_usec / 1000000.0) : mkf_u32(FN_DURATION, up_sec); + line[ST_I_INF_START_TIME_SEC] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_DURATION, start_date.tv_sec + start_date.tv_usec / 1000000.0) : mkf_u32(FN_DURATION, start_date.tv_sec); + line[ST_I_INF_MEMMAX_MB] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_memmax); + line[ST_I_INF_MEMMAX_BYTES] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_memmax * 1048576L); + line[ST_I_INF_POOL_ALLOC_MB] = mkf_u32(0, (unsigned)(pool_total_allocated() / 1048576L)); + line[ST_I_INF_POOL_ALLOC_BYTES] = mkf_u64(0, pool_total_allocated()); + line[ST_I_INF_POOL_USED_MB] = mkf_u32(0, (unsigned)(pool_total_used() / 1048576L)); + line[ST_I_INF_POOL_USED_BYTES] = mkf_u64(0, pool_total_used()); + line[ST_I_INF_POOL_FAILED] = mkf_u32(FN_COUNTER, pool_total_failures()); + line[ST_I_INF_ULIMIT_N] = mkf_u32(FO_CONFIG|FN_LIMIT, global.rlimit_nofile); + line[ST_I_INF_MAXSOCK] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxsock); + line[ST_I_INF_MAXCONN] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxconn); + line[ST_I_INF_HARD_MAXCONN] = mkf_u32(FO_CONFIG|FN_LIMIT, global.hardmaxconn); + line[ST_I_INF_CURR_CONN] = mkf_u32(0, actconn); + line[ST_I_INF_CUM_CONN] = mkf_u32(FN_COUNTER, totalconn); + line[ST_I_INF_CUM_REQ] = mkf_u32(FN_COUNTER, global.req_count); #ifdef USE_OPENSSL - info[INF_MAX_SSL_CONNS] = mkf_u32(FN_MAX, global.maxsslconn); - info[INF_CURR_SSL_CONNS] = mkf_u32(0, global.sslconns); - info[INF_CUM_SSL_CONNS] = mkf_u32(FN_COUNTER, global.totalsslconns); + line[ST_I_INF_MAX_SSL_CONNS] = mkf_u32(FN_MAX, global.maxsslconn); + line[ST_I_INF_CURR_SSL_CONNS] = mkf_u32(0, global.sslconns); + line[ST_I_INF_CUM_SSL_CONNS] = mkf_u32(FN_COUNTER, global.totalsslconns); #endif - info[INF_MAXPIPES] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxpipes); - info[INF_PIPES_USED] = mkf_u32(0, pipes_used); - info[INF_PIPES_FREE] = mkf_u32(0, pipes_free); - info[INF_CONN_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.conn_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.conn_per_sec)); - info[INF_CONN_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.cps_lim); - info[INF_MAX_CONN_RATE] = mkf_u32(FN_MAX, global.cps_max); - info[INF_SESS_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.sess_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.sess_per_sec)); - info[INF_SESS_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.sps_lim); - info[INF_MAX_SESS_RATE] = mkf_u32(FN_RATE, global.sps_max); + line[ST_I_INF_MAXPIPES] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxpipes); + line[ST_I_INF_PIPES_USED] = mkf_u32(0, pipes_used); + line[ST_I_INF_PIPES_FREE] = mkf_u32(0, pipes_free); + line[ST_I_INF_CONN_RATE] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.conn_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.conn_per_sec)); + line[ST_I_INF_CONN_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.cps_lim); + line[ST_I_INF_MAX_CONN_RATE] = mkf_u32(FN_MAX, global.cps_max); + line[ST_I_INF_SESS_RATE] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.sess_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.sess_per_sec)); + line[ST_I_INF_SESS_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.sps_lim); + line[ST_I_INF_MAX_SESS_RATE] = mkf_u32(FN_RATE, global.sps_max); #ifdef USE_OPENSSL - info[INF_SSL_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_sess_rate) : mkf_u32(FN_RATE, ssl_sess_rate); - info[INF_SSL_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.ssl_lim); - info[INF_MAX_SSL_RATE] = mkf_u32(FN_MAX, global.ssl_max); - info[INF_SSL_FRONTEND_KEY_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_key_rate) : mkf_u32(0, ssl_key_rate); - info[INF_SSL_FRONTEND_MAX_KEY_RATE] = mkf_u32(FN_MAX, global.ssl_fe_keys_max); - info[INF_SSL_FRONTEND_SESSION_REUSE_PCT] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_reuse) : mkf_u32(0, ssl_reuse); - info[INF_SSL_BACKEND_KEY_RATE] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.ssl_be_keys_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.ssl_be_keys_per_sec)); - info[INF_SSL_BACKEND_MAX_KEY_RATE] = mkf_u32(FN_MAX, global.ssl_be_keys_max); - info[INF_SSL_CACHE_LOOKUPS] = mkf_u32(FN_COUNTER, global.shctx_lookups); - info[INF_SSL_CACHE_MISSES] = mkf_u32(FN_COUNTER, global.shctx_misses); + line[ST_I_INF_SSL_RATE] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_sess_rate) : mkf_u32(FN_RATE, ssl_sess_rate); + line[ST_I_INF_SSL_RATE_LIMIT] = mkf_u32(FO_CONFIG|FN_LIMIT, global.ssl_lim); + line[ST_I_INF_MAX_SSL_RATE] = mkf_u32(FN_MAX, global.ssl_max); + line[ST_I_INF_SSL_FRONTEND_KEY_RATE] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_key_rate) : mkf_u32(0, ssl_key_rate); + line[ST_I_INF_SSL_FRONTEND_MAX_KEY_RATE] = mkf_u32(FN_MAX, global.ssl_fe_keys_max); + line[ST_I_INF_SSL_FRONTEND_SESSION_REUSE_PCT] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, ssl_reuse) : mkf_u32(0, ssl_reuse); + line[ST_I_INF_SSL_BACKEND_KEY_RATE] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.ssl_be_keys_per_sec)) : mkf_u32(FN_RATE, read_freq_ctr(&global.ssl_be_keys_per_sec)); + line[ST_I_INF_SSL_BACKEND_MAX_KEY_RATE] = mkf_u32(FN_MAX, global.ssl_be_keys_max); + line[ST_I_INF_SSL_CACHE_LOOKUPS] = mkf_u32(FN_COUNTER, global.shctx_lookups); + line[ST_I_INF_SSL_CACHE_MISSES] = mkf_u32(FN_COUNTER, global.shctx_misses); #endif - info[INF_COMPRESS_BPS_IN] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.comp_bps_in)) : mkf_u32(FN_RATE, read_freq_ctr(&global.comp_bps_in)); - info[INF_COMPRESS_BPS_OUT] = (flags & STAT_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.comp_bps_out)) : mkf_u32(FN_RATE, read_freq_ctr(&global.comp_bps_out)); - info[INF_COMPRESS_BPS_RATE_LIM] = mkf_u32(FO_CONFIG|FN_LIMIT, global.comp_rate_lim); + line[ST_I_INF_COMPRESS_BPS_IN] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.comp_bps_in)) : mkf_u32(FN_RATE, read_freq_ctr(&global.comp_bps_in)); + line[ST_I_INF_COMPRESS_BPS_OUT] = (flags & STAT_F_USE_FLOAT) ? mkf_flt(FN_RATE, read_freq_ctr_flt(&global.comp_bps_out)) : mkf_u32(FN_RATE, read_freq_ctr(&global.comp_bps_out)); + line[ST_I_INF_COMPRESS_BPS_RATE_LIM] = mkf_u32(FO_CONFIG|FN_LIMIT, global.comp_rate_lim); #ifdef USE_ZLIB - info[INF_ZLIB_MEM_USAGE] = mkf_u32(0, zlib_used_memory); - info[INF_MAX_ZLIB_MEM_USAGE] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxzlibmem); + line[ST_I_INF_ZLIB_MEM_USAGE] = mkf_u32(0, zlib_used_memory); + line[ST_I_INF_MAX_ZLIB_MEM_USAGE] = mkf_u32(FO_CONFIG|FN_LIMIT, global.maxzlibmem); #endif - info[INF_TASKS] = mkf_u32(0, total_allocated_tasks()); - info[INF_RUN_QUEUE] = mkf_u32(0, total_run_queues()); - info[INF_IDLE_PCT] = mkf_u32(FN_AVG, clock_report_idle()); - info[INF_NODE] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.node); + line[ST_I_INF_TASKS] = mkf_u32(0, total_allocated_tasks()); + line[ST_I_INF_RUN_QUEUE] = mkf_u32(0, total_run_queues()); + line[ST_I_INF_IDLE_PCT] = mkf_u32(FN_AVG, clock_report_idle()); + line[ST_I_INF_NODE] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.node); if (global.desc) - info[INF_DESCRIPTION] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.desc); - info[INF_STOPPING] = mkf_u32(0, stopping); - info[INF_JOBS] = mkf_u32(0, jobs); - info[INF_UNSTOPPABLE_JOBS] = mkf_u32(0, unstoppable_jobs); - info[INF_LISTENERS] = mkf_u32(0, listeners); - info[INF_ACTIVE_PEERS] = mkf_u32(0, active_peers); - info[INF_CONNECTED_PEERS] = mkf_u32(0, connected_peers); - info[INF_DROPPED_LOGS] = mkf_u32(0, dropped_logs); - info[INF_BUSY_POLLING] = mkf_u32(0, !!(global.tune.options & GTUNE_BUSY_POLLING)); - info[INF_FAILED_RESOLUTIONS] = mkf_u32(0, resolv_failed_resolutions); - info[INF_TOTAL_BYTES_OUT] = mkf_u64(0, glob_out_bytes); - info[INF_TOTAL_SPLICED_BYTES_OUT] = mkf_u64(0, glob_spl_bytes); - info[INF_BYTES_OUT_RATE] = mkf_u64(FN_RATE, glob_out_b32); - info[INF_DEBUG_COMMANDS_ISSUED] = mkf_u32(0, debug_commands_issued); - info[INF_CUM_LOG_MSGS] = mkf_u32(FN_COUNTER, cum_log_messages); - - info[INF_TAINTED] = mkf_str(FO_STATUS, chunk_newstr(out)); + line[ST_I_INF_DESCRIPTION] = mkf_str(FO_CONFIG|FN_OUTPUT|FS_SERVICE, global.desc); + line[ST_I_INF_STOPPING] = mkf_u32(0, stopping); + line[ST_I_INF_JOBS] = mkf_u32(0, jobs); + line[ST_I_INF_UNSTOPPABLE_JOBS] = mkf_u32(0, unstoppable_jobs); + line[ST_I_INF_LISTENERS] = mkf_u32(0, listeners); + line[ST_I_INF_ACTIVE_PEERS] = mkf_u32(0, active_peers); + line[ST_I_INF_CONNECTED_PEERS] = mkf_u32(0, connected_peers); + line[ST_I_INF_DROPPED_LOGS] = mkf_u32(0, dropped_logs); + line[ST_I_INF_BUSY_POLLING] = mkf_u32(0, !!(global.tune.options & GTUNE_BUSY_POLLING)); + line[ST_I_INF_FAILED_RESOLUTIONS] = mkf_u32(0, resolv_failed_resolutions); + line[ST_I_INF_TOTAL_BYTES_OUT] = mkf_u64(0, glob_out_bytes); + line[ST_I_INF_TOTAL_SPLICED_BYTES_OUT] = mkf_u64(0, glob_spl_bytes); + line[ST_I_INF_BYTES_OUT_RATE] = mkf_u64(FN_RATE, glob_out_b32); + line[ST_I_INF_DEBUG_COMMANDS_ISSUED] = mkf_u32(0, debug_commands_issued); + line[ST_I_INF_CUM_LOG_MSGS] = mkf_u32(FN_COUNTER, cum_log_messages); + + line[ST_I_INF_TAINTED] = mkf_str(FO_STATUS, chunk_newstr(out)); chunk_appendf(out, "%#x", get_tainted()); - info[INF_WARNINGS] = mkf_u32(FN_COUNTER, HA_ATOMIC_LOAD(&tot_warnings)); - info[INF_MAXCONN_REACHED] = mkf_u32(FN_COUNTER, HA_ATOMIC_LOAD(&maxconn_reached)); - info[INF_BOOTTIME_MS] = mkf_u32(FN_DURATION, boot); - info[INF_NICED_TASKS] = mkf_u32(0, total_niced_running_tasks()); + line[ST_I_INF_WARNINGS] = mkf_u32(FN_COUNTER, HA_ATOMIC_LOAD(&tot_warnings)); + line[ST_I_INF_MAXCONN_REACHED] = mkf_u32(FN_COUNTER, HA_ATOMIC_LOAD(&maxconn_reached)); + line[ST_I_INF_BOOTTIME_MS] = mkf_u32(FN_DURATION, boot); + line[ST_I_INF_NICED_TASKS] = mkf_u32(0, total_niced_running_tasks()); return 1; } @@ -4779,24 +829,25 @@ static int stats_dump_info_to_buffer(struct stconn *sc) { struct appctx *appctx = __sc_appctx(sc); struct show_stat_ctx *ctx = appctx->svcctx; + struct buffer *chk = &ctx->chunk; int ret; int current_field; - if (!stats_fill_info(info, INF_TOTAL_FIELDS, ctx->flags)) + if (!stats_fill_info(stat_line_info, ST_I_INF_MAX, ctx->flags)) return 0; - chunk_reset(&trash_chunk); + chunk_reset(chk); more: current_field = ctx->field; - if (ctx->flags & STAT_FMT_TYPED) - ret = stats_dump_typed_info_fields(&trash_chunk, info, ctx); - else if (ctx->flags & STAT_FMT_JSON) - ret = stats_dump_json_info_fields(&trash_chunk, info, ctx); + if (ctx->flags & STAT_F_FMT_TYPED) + ret = stats_dump_typed_info_fields(chk, stat_line_info, ctx); + else if (ctx->flags & STAT_F_FMT_JSON) + ret = stats_dump_json_info_fields(chk, stat_line_info, ctx); else - ret = stats_dump_info_fields(&trash_chunk, info, ctx); + ret = stats_dump_info_fields(chk, stat_line_info, ctx); - if (applet_putchk(appctx, &trash_chunk) == -1) { + if (applet_putchk(appctx, chk) == -1) { /* restore previous field */ ctx->field = current_field; return 0; @@ -4809,248 +860,8 @@ more: return 1; } -/* This function dumps the schema onto the stream connector's read buffer. - * It returns 0 as long as it does not complete, non-zero upon completion. - * No state is used. - * - * Integer values bounded to the range [-(2**53)+1, (2**53)-1] as - * per the recommendation for interoperable integers in section 6 of RFC 7159. - */ -static void stats_dump_json_schema(struct buffer *out) -{ - - int old_len = out->data; - - chunk_strcat(out, - "{" - "\"$schema\":\"http://json-schema.org/draft-04/schema#\"," - "\"oneOf\":[" - "{" - "\"title\":\"Info\"," - "\"type\":\"array\"," - "\"items\":{" - "\"title\":\"InfoItem\"," - "\"type\":\"object\"," - "\"properties\":{" - "\"field\":{\"$ref\":\"#/definitions/field\"}," - "\"processNum\":{\"$ref\":\"#/definitions/processNum\"}," - "\"tags\":{\"$ref\":\"#/definitions/tags\"}," - "\"value\":{\"$ref\":\"#/definitions/typedValue\"}" - "}," - "\"required\":[\"field\",\"processNum\",\"tags\"," - "\"value\"]" - "}" - "}," - "{" - "\"title\":\"Stat\"," - "\"type\":\"array\"," - "\"items\":{" - "\"title\":\"InfoItem\"," - "\"type\":\"object\"," - "\"properties\":{" - "\"objType\":{" - "\"enum\":[\"Frontend\",\"Backend\",\"Listener\"," - "\"Server\",\"Unknown\"]" - "}," - "\"proxyId\":{" - "\"type\":\"integer\"," - "\"minimum\":0" - "}," - "\"id\":{" - "\"type\":\"integer\"," - "\"minimum\":0" - "}," - "\"field\":{\"$ref\":\"#/definitions/field\"}," - "\"processNum\":{\"$ref\":\"#/definitions/processNum\"}," - "\"tags\":{\"$ref\":\"#/definitions/tags\"}," - "\"typedValue\":{\"$ref\":\"#/definitions/typedValue\"}" - "}," - "\"required\":[\"objType\",\"proxyId\",\"id\"," - "\"field\",\"processNum\",\"tags\"," - "\"value\"]" - "}" - "}," - "{" - "\"title\":\"Error\"," - "\"type\":\"object\"," - "\"properties\":{" - "\"errorStr\":{" - "\"type\":\"string\"" - "}" - "}," - "\"required\":[\"errorStr\"]" - "}" - "]," - "\"definitions\":{" - "\"field\":{" - "\"type\":\"object\"," - "\"pos\":{" - "\"type\":\"integer\"," - "\"minimum\":0" - "}," - "\"name\":{" - "\"type\":\"string\"" - "}," - "\"required\":[\"pos\",\"name\"]" - "}," - "\"processNum\":{" - "\"type\":\"integer\"," - "\"minimum\":1" - "}," - "\"tags\":{" - "\"type\":\"object\"," - "\"origin\":{" - "\"type\":\"string\"," - "\"enum\":[\"Metric\",\"Status\",\"Key\"," - "\"Config\",\"Product\",\"Unknown\"]" - "}," - "\"nature\":{" - "\"type\":\"string\"," - "\"enum\":[\"Gauge\",\"Limit\",\"Min\",\"Max\"," - "\"Rate\",\"Counter\",\"Duration\"," - "\"Age\",\"Time\",\"Name\",\"Output\"," - "\"Avg\", \"Unknown\"]" - "}," - "\"scope\":{" - "\"type\":\"string\"," - "\"enum\":[\"Cluster\",\"Process\",\"Service\"," - "\"System\",\"Unknown\"]" - "}," - "\"required\":[\"origin\",\"nature\",\"scope\"]" - "}," - "\"typedValue\":{" - "\"type\":\"object\"," - "\"oneOf\":[" - "{\"$ref\":\"#/definitions/typedValue/definitions/s32Value\"}," - "{\"$ref\":\"#/definitions/typedValue/definitions/s64Value\"}," - "{\"$ref\":\"#/definitions/typedValue/definitions/u32Value\"}," - "{\"$ref\":\"#/definitions/typedValue/definitions/u64Value\"}," - "{\"$ref\":\"#/definitions/typedValue/definitions/strValue\"}" - "]," - "\"definitions\":{" - "\"s32Value\":{" - "\"properties\":{" - "\"type\":{" - "\"type\":\"string\"," - "\"enum\":[\"s32\"]" - "}," - "\"value\":{" - "\"type\":\"integer\"," - "\"minimum\":-2147483648," - "\"maximum\":2147483647" - "}" - "}," - "\"required\":[\"type\",\"value\"]" - "}," - "\"s64Value\":{" - "\"properties\":{" - "\"type\":{" - "\"type\":\"string\"," - "\"enum\":[\"s64\"]" - "}," - "\"value\":{" - "\"type\":\"integer\"," - "\"minimum\":-9007199254740991," - "\"maximum\":9007199254740991" - "}" - "}," - "\"required\":[\"type\",\"value\"]" - "}," - "\"u32Value\":{" - "\"properties\":{" - "\"type\":{" - "\"type\":\"string\"," - "\"enum\":[\"u32\"]" - "}," - "\"value\":{" - "\"type\":\"integer\"," - "\"minimum\":0," - "\"maximum\":4294967295" - "}" - "}," - "\"required\":[\"type\",\"value\"]" - "}," - "\"u64Value\":{" - "\"properties\":{" - "\"type\":{" - "\"type\":\"string\"," - "\"enum\":[\"u64\"]" - "}," - "\"value\":{" - "\"type\":\"integer\"," - "\"minimum\":0," - "\"maximum\":9007199254740991" - "}" - "}," - "\"required\":[\"type\",\"value\"]" - "}," - "\"strValue\":{" - "\"properties\":{" - "\"type\":{" - "\"type\":\"string\"," - "\"enum\":[\"str\"]" - "}," - "\"value\":{\"type\":\"string\"}" - "}," - "\"required\":[\"type\",\"value\"]" - "}," - "\"unknownValue\":{" - "\"properties\":{" - "\"type\":{" - "\"type\":\"integer\"," - "\"minimum\":0" - "}," - "\"value\":{" - "\"type\":\"string\"," - "\"enum\":[\"unknown\"]" - "}" - "}," - "\"required\":[\"type\",\"value\"]" - "}" - "}" - "}" - "}" - "}"); - - if (old_len == out->data) { - chunk_reset(out); - chunk_appendf(out, - "{\"errorStr\":\"output buffer too short\"}"); - } - chunk_appendf(out, "\n"); -} - -/* This function dumps the schema onto the stream connector's read buffer. - * It returns 0 as long as it does not complete, non-zero upon completion. - * No state is used. - */ -static int stats_dump_json_schema_to_buffer(struct appctx *appctx) -{ - - chunk_reset(&trash_chunk); - - stats_dump_json_schema(&trash_chunk); - - if (applet_putchk(appctx, &trash_chunk) == -1) - return 0; - - return 1; -} - -static void http_stats_release(struct appctx *appctx) -{ - struct show_stat_ctx *ctx = appctx->svcctx; - - if (ctx->px_st == STAT_PX_ST_SV) - srv_drop(ctx->obj2); -} - static int cli_parse_clear_counters(char **args, char *payload, struct appctx *appctx, void *private) { - struct proxy *px; - struct server *sv; - struct listener *li; - struct stats_module *mod; int clrall = 0; if (strcmp(args[2], "all") == 0) @@ -5061,96 +872,13 @@ static int cli_parse_clear_counters(char **args, char *payload, struct appctx *a (clrall && !cli_has_level(appctx, ACCESS_LVL_ADMIN))) return 1; - for (px = proxies_list; px; px = px->next) { - if (clrall) { - memset(&px->be_counters, 0, sizeof(px->be_counters)); - memset(&px->fe_counters, 0, sizeof(px->fe_counters)); - } - else { - px->be_counters.conn_max = 0; - px->be_counters.p.http.rps_max = 0; - px->be_counters.sps_max = 0; - px->be_counters.cps_max = 0; - px->be_counters.nbpend_max = 0; - px->be_counters.qtime_max = 0; - px->be_counters.ctime_max = 0; - px->be_counters.dtime_max = 0; - px->be_counters.ttime_max = 0; - - px->fe_counters.conn_max = 0; - px->fe_counters.p.http.rps_max = 0; - px->fe_counters.sps_max = 0; - px->fe_counters.cps_max = 0; - } - - for (sv = px->srv; sv; sv = sv->next) - if (clrall) - memset(&sv->counters, 0, sizeof(sv->counters)); - else { - sv->counters.cur_sess_max = 0; - sv->counters.nbpend_max = 0; - sv->counters.sps_max = 0; - sv->counters.qtime_max = 0; - sv->counters.ctime_max = 0; - sv->counters.dtime_max = 0; - sv->counters.ttime_max = 0; - } - - list_for_each_entry(li, &px->conf.listeners, by_fe) - if (li->counters) { - if (clrall) - memset(li->counters, 0, sizeof(*li->counters)); - else - li->counters->conn_max = 0; - } - } - global.cps_max = 0; global.sps_max = 0; global.ssl_max = 0; global.ssl_fe_keys_max = 0; global.ssl_be_keys_max = 0; - list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - if (!mod->clearable && !clrall) - continue; - - for (px = proxies_list; px; px = px->next) { - enum stats_domain_px_cap mod_cap = stats_px_get_cap(mod->domain_flags); - - if (px->cap & PR_CAP_FE && mod_cap & STATS_PX_CAP_FE) { - EXTRA_COUNTERS_INIT(px->extra_counters_fe, - mod, - mod->counters, - mod->counters_size); - } - - if (px->cap & PR_CAP_BE && mod_cap & STATS_PX_CAP_BE) { - EXTRA_COUNTERS_INIT(px->extra_counters_be, - mod, - mod->counters, - mod->counters_size); - } - - if (mod_cap & STATS_PX_CAP_SRV) { - for (sv = px->srv; sv; sv = sv->next) { - EXTRA_COUNTERS_INIT(sv->extra_counters, - mod, - mod->counters, - mod->counters_size); - } - } - - if (mod_cap & STATS_PX_CAP_LI) { - list_for_each_entry(li, &px->conf.listeners, by_fe) { - EXTRA_COUNTERS_INIT(li->extra_counters, - mod, - mod->counters, - mod->counters_size); - } - } - } - } + proxy_stats_clear_counters(clrall, &stats_module_list[STATS_DOMAIN_PROXY]); resolv_stats_clear_counters(clrall, &stats_module_list[STATS_DOMAIN_RESOLVERS]); @@ -5171,13 +899,13 @@ static int cli_parse_show_info(char **args, char *payload, struct appctx *appctx while (*args[arg]) { if (strcmp(args[arg], "typed") == 0) - ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_TYPED; + ctx->flags = (ctx->flags & ~STAT_F_FMT_MASK) | STAT_F_FMT_TYPED; else if (strcmp(args[arg], "json") == 0) - ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_JSON; + ctx->flags = (ctx->flags & ~STAT_F_FMT_MASK) | STAT_F_FMT_JSON; else if (strcmp(args[arg], "desc") == 0) - ctx->flags |= STAT_SHOW_FDESC; + ctx->flags |= STAT_F_SHOW_FDESC; else if (strcmp(args[arg], "float") == 0) - ctx->flags |= STAT_USE_FLOAT; + ctx->flags |= STAT_F_USE_FLOAT; arg++; } return 0; @@ -5192,10 +920,10 @@ static int cli_parse_show_stat(char **args, char *payload, struct appctx *appctx ctx->scope_str = 0; ctx->scope_len = 0; ctx->http_px = NULL; // not under http context - ctx->flags = STAT_SHNODE | STAT_SHDESC; + ctx->flags = STAT_F_SHNODE | STAT_F_SHDESC; if ((strm_li(appctx_strm(appctx))->bind_conf->level & ACCESS_LVL_MASK) >= ACCESS_LVL_OPER) - ctx->flags |= STAT_SHLGNDS; + ctx->flags |= STAT_F_SHLGNDS; /* proxy is the default domain */ ctx->domain = STATS_DOMAIN_PROXY; @@ -5225,7 +953,7 @@ static int cli_parse_show_stat(char **args, char *payload, struct appctx *appctx if (!ctx->iid) return cli_err(appctx, "No such proxy.\n"); - ctx->flags |= STAT_BOUND; + ctx->flags |= STAT_F_BOUND; ctx->type = atoi(args[arg+1]); ctx->sid = atoi(args[arg+2]); arg += 3; @@ -5233,15 +961,15 @@ static int cli_parse_show_stat(char **args, char *payload, struct appctx *appctx while (*args[arg]) { if (strcmp(args[arg], "typed") == 0) - ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_TYPED; + ctx->flags = (ctx->flags & ~STAT_F_FMT_MASK) | STAT_F_FMT_TYPED; else if (strcmp(args[arg], "json") == 0) - ctx->flags = (ctx->flags & ~STAT_FMT_MASK) | STAT_FMT_JSON; + ctx->flags = (ctx->flags & ~STAT_F_FMT_MASK) | STAT_F_FMT_JSON; else if (strcmp(args[arg], "desc") == 0) - ctx->flags |= STAT_SHOW_FDESC; + ctx->flags |= STAT_F_SHOW_FDESC; else if (strcmp(args[arg], "no-maint") == 0) - ctx->flags |= STAT_HIDE_MAINT; + ctx->flags |= STAT_F_HIDE_MAINT; else if (strcmp(args[arg], "up") == 0) - ctx->flags |= STAT_HIDE_DOWN; + ctx->flags |= STAT_F_HIDE_DOWN; arg++; } @@ -5250,7 +978,8 @@ static int cli_parse_show_stat(char **args, char *payload, struct appctx *appctx static int cli_io_handler_dump_info(struct appctx *appctx) { - trash_chunk = b_make(trash.area, trash.size, 0, 0); + struct show_stat_ctx *ctx = appctx->svcctx; + ctx->chunk = b_make(trash.area, trash.size, 0, 0); return stats_dump_info_to_buffer(appctx_sc(appctx)); } @@ -5259,8 +988,9 @@ static int cli_io_handler_dump_info(struct appctx *appctx) */ static int cli_io_handler_dump_stat(struct appctx *appctx) { - trash_chunk = b_make(trash.area, trash.size, 0, 0); - return stats_dump_stat_to_buffer(appctx_sc(appctx), NULL); + struct show_stat_ctx *ctx = appctx->svcctx; + ctx->chunk = b_make(trash.area, trash.size, 0, 0); + return stats_dump_stat_to_buffer(appctx_sc(appctx), NULL, NULL); } static void cli_io_handler_release_stat(struct appctx *appctx) @@ -5273,10 +1003,61 @@ static void cli_io_handler_release_stat(struct appctx *appctx) static int cli_io_handler_dump_json_schema(struct appctx *appctx) { - trash_chunk = b_make(trash.area, trash.size, 0, 0); + struct show_stat_ctx *ctx = appctx->svcctx; + ctx->chunk = b_make(trash.area, trash.size, 0, 0); return stats_dump_json_schema_to_buffer(appctx); } +static int cli_parse_dump_stat_file(char **args, char *payload, + struct appctx *appctx, void *private) +{ + struct show_stat_ctx *ctx = applet_reserve_svcctx(appctx, sizeof(*ctx)); + + ctx->chunk = b_make(trash.area, trash.size, 0, 0); + ctx->domain = STATS_DOMAIN_PROXY; + ctx->flags |= STAT_F_FMT_FILE; + + return 0; +} + +/* Returns 1 on completion else 0. */ +static int cli_io_handler_dump_stat_file(struct appctx *appctx) +{ + struct show_stat_ctx *ctx = appctx->svcctx; + int ret; + + /* Frontend and backend sides are outputted separately on stats-file. + * As such, use STAT_F_BOUND to restrict proxies looping over frontend + * side first before first stats_dump_stat_to_buffer(). A second + * iteration is conducted for backend side after. + */ + ctx->flags |= STAT_F_BOUND; + + if (!(ctx->type & (1 << STATS_TYPE_BE))) { + /* Restrict to frontend side. */ + ctx->type = (1 << STATS_TYPE_FE) | (1 << STATS_TYPE_SO); + ctx->iid = ctx->sid = -1; + + ret = stats_dump_stat_to_buffer(appctx_sc(appctx), NULL, NULL); + if (!ret) + return 0; + + chunk_strcat(&ctx->chunk, "\n"); + if (!stats_putchk(appctx, NULL, NULL)) + return 0; + + /* Switch to backend side. */ + ctx->state = STAT_STATE_INIT; + ctx->type = (1 << STATS_TYPE_BE) | (1 << STATS_TYPE_SV); + } + + return stats_dump_stat_to_buffer(appctx_sc(appctx), NULL, NULL); +} + +static void cli_io_handler_release_dump_stat_file(struct appctx *appctx) +{ +} + int stats_allocate_proxy_counters_internal(struct extra_counters **counters, int type, int px_cap) { @@ -5354,33 +1135,35 @@ void stats_register_module(struct stats_module *m) const uint8_t domain = stats_get_domain(m->domain_flags); LIST_APPEND(&stats_module_list[domain], &m->list); - stat_count[domain] += m->stats_count; + stat_cols_len[domain] += m->stats_count; } + static int allocate_stats_px_postcheck(void) { struct stats_module *mod; - size_t i = ST_F_TOTAL_FIELDS; + size_t i = ST_I_PX_MAX, offset; int err_code = 0; struct proxy *px; - stat_count[STATS_DOMAIN_PROXY] += ST_F_TOTAL_FIELDS; + stat_cols_len[STATS_DOMAIN_PROXY] += ST_I_PX_MAX; - stat_f[STATS_DOMAIN_PROXY] = malloc(stat_count[STATS_DOMAIN_PROXY] * sizeof(struct name_desc)); - if (!stat_f[STATS_DOMAIN_PROXY]) { + stat_cols[STATS_DOMAIN_PROXY] = malloc(stat_cols_len[STATS_DOMAIN_PROXY] * sizeof(struct name_desc)); + if (!stat_cols[STATS_DOMAIN_PROXY]) { ha_alert("stats: cannot allocate all fields for proxy statistics\n"); err_code |= ERR_ALERT | ERR_FATAL; return err_code; } - memcpy(stat_f[STATS_DOMAIN_PROXY], stat_fields, - ST_F_TOTAL_FIELDS * sizeof(struct name_desc)); + for (i = 0; i < ST_I_PX_MAX; ++i) + stcol2ndesc(&stat_cols[STATS_DOMAIN_PROXY][i], &stat_cols_px[i]); list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_PROXY], list) { - memcpy(stat_f[STATS_DOMAIN_PROXY] + i, - mod->stats, - mod->stats_count * sizeof(struct name_desc)); - i += mod->stats_count; + for (offset = i, i = 0; i < mod->stats_count; ++i) { + stcol2ndesc(&stat_cols[STATS_DOMAIN_PROXY][offset + i], + &mod->stats[i]); + } + i += offset; } for (px = proxies_list; px; px = px->next) { @@ -5391,7 +1174,7 @@ static int allocate_stats_px_postcheck(void) } } - /* wait per-thread alloc to perform corresponding stat_l allocation */ + /* wait per-thread alloc to perform corresponding stat_lines allocation */ return err_code; } @@ -5401,21 +1184,22 @@ REGISTER_CONFIG_POSTPARSER("allocate-stats-px", allocate_stats_px_postcheck); static int allocate_stats_rslv_postcheck(void) { struct stats_module *mod; - size_t i = 0; + size_t i = 0, offset; int err_code = 0; - stat_f[STATS_DOMAIN_RESOLVERS] = malloc(stat_count[STATS_DOMAIN_RESOLVERS] * sizeof(struct name_desc)); - if (!stat_f[STATS_DOMAIN_RESOLVERS]) { + stat_cols[STATS_DOMAIN_RESOLVERS] = malloc(stat_cols_len[STATS_DOMAIN_RESOLVERS] * sizeof(struct name_desc)); + if (!stat_cols[STATS_DOMAIN_RESOLVERS]) { ha_alert("stats: cannot allocate all fields for resolver statistics\n"); err_code |= ERR_ALERT | ERR_FATAL; return err_code; } list_for_each_entry(mod, &stats_module_list[STATS_DOMAIN_RESOLVERS], list) { - memcpy(stat_f[STATS_DOMAIN_RESOLVERS] + i, - mod->stats, - mod->stats_count * sizeof(struct name_desc)); - i += mod->stats_count; + for (offset = i, i = 0; i < mod->stats_count; ++i) { + stcol2ndesc(&stat_cols[STATS_DOMAIN_RESOLVERS][offset + i], + &mod->stats[i]); + } + i += offset; } if (!resolv_allocate_counters(&stats_module_list[STATS_DOMAIN_RESOLVERS])) { @@ -5424,7 +1208,7 @@ static int allocate_stats_rslv_postcheck(void) return err_code; } - /* wait per-thread alloc to perform corresponding stat_l allocation */ + /* wait per-thread alloc to perform corresponding stat_lines allocation */ return err_code; } @@ -5438,8 +1222,8 @@ static int allocate_stat_lines_per_thread(void) for (i = 0; i < STATS_DOMAIN_COUNT; ++i) { const int domain = domains[i]; - stat_l[domain] = malloc(stat_count[domain] * sizeof(struct field)); - if (!stat_l[domain]) + stat_lines[domain] = malloc(stat_cols_len[domain] * sizeof(struct field)); + if (!stat_lines[domain]) return 0; } return 1; @@ -5482,7 +1266,7 @@ static void deinit_stat_lines_per_thread(void) for (i = 0; i < STATS_DOMAIN_COUNT; ++i) { const int domain = domains[i]; - ha_free(&stat_l[domain]); + ha_free(&stat_lines[domain]); } } @@ -5496,8 +1280,8 @@ static void deinit_stats(void) for (i = 0; i < STATS_DOMAIN_COUNT; ++i) { const int domain = domains[i]; - if (stat_f[domain]) - free(stat_f[domain]); + if (stat_cols[domain]) + free(stat_cols[domain]); } } @@ -5517,18 +1301,12 @@ static struct cli_kw_list cli_kws = {{ },{ { { "show", "info", NULL }, "show info [desc|json|typed|float]* : report information about the running process", cli_parse_show_info, cli_io_handler_dump_info, NULL }, { { "show", "stat", NULL }, "show stat [desc|json|no-maint|typed|up]*: report counters for each proxy and server", cli_parse_show_stat, cli_io_handler_dump_stat, cli_io_handler_release_stat }, { { "show", "schema", "json", NULL }, "show schema json : report schema used for stats", NULL, cli_io_handler_dump_json_schema, NULL }, + { { "dump", "stats-file", NULL }, "dump stats-file : dump stats for restore", cli_parse_dump_stat_file, cli_io_handler_dump_stat_file, cli_io_handler_release_dump_stat_file }, {{},} }}; INITCALL1(STG_REGISTER, cli_register_kw, &cli_kws); -struct applet http_stats_applet = { - .obj_type = OBJ_TYPE_APPLET, - .name = "<STATS>", /* used for logging */ - .fct = http_stats_io_handler, - .release = http_stats_release, -}; - /* * Local variables: * c-indent-level: 8 diff --git a/src/stconn.c b/src/stconn.c index df119a1..6077403 100644 --- a/src/stconn.c +++ b/src/stconn.c @@ -14,6 +14,7 @@ #include <haproxy/applet.h> #include <haproxy/connection.h> #include <haproxy/check.h> +#include <haproxy/filters.h> #include <haproxy/http_ana.h> #include <haproxy/pipe.h> #include <haproxy/pool.h> @@ -99,6 +100,9 @@ void sedesc_init(struct sedesc *sedesc) sedesc->xref.peer = NULL; se_fl_setall(sedesc, SE_FL_NONE); + sedesc->abort_info.info = 0; + sedesc->abort_info.code = 0; + sedesc->iobuf.pipe = NULL; sedesc->iobuf.buf = NULL; sedesc->iobuf.offset = sedesc->iobuf.data = 0; @@ -130,6 +134,54 @@ void sedesc_free(struct sedesc *sedesc) } } +/* Performs a shutdown on the endpoint. This function deals with connection and + * applet endpoints. It is responsible to set SE flags corresponding to the + * given shut modes and to call right shutdown functions of the endpoint. It is + * called from the .abort and .shut app_ops callback functions at the SC level. + */ +void se_shutdown(struct sedesc *sedesc, enum se_shut_mode mode) +{ + if (se_fl_test(sedesc, SE_FL_T_MUX)) { + const struct mux_ops *mux = (sedesc->conn ? sedesc->conn->mux : NULL); + unsigned int flags = 0; + + if ((mode & (SE_SHW_SILENT|SE_SHW_NORMAL)) && !se_fl_test(sedesc, SE_FL_SHW)) + flags |= (mode & SE_SHW_NORMAL) ? SE_FL_SHWN : SE_FL_SHWS; + + + if ((mode & (SE_SHR_RESET|SE_SHR_DRAIN)) && !se_fl_test(sedesc, SE_FL_SHR)) + flags |= (mode & SE_SHR_DRAIN) ? SE_FL_SHRD : SE_FL_SHRR; + + if (flags) { + if (mux && mux->shut) { + struct se_abort_info *reason = NULL; + struct xref *peer = xref_get_peer_and_lock(&sedesc->xref); + + if (peer) { + struct sedesc *sdo = container_of(peer, struct sedesc, xref); + + reason = &sdo->abort_info; + xref_unlock(&sedesc->xref, peer); + } + + mux->shut(sedesc->sc, mode, reason); + + } + se_fl_set(sedesc, flags); + } + } + else if (se_fl_test(sedesc, SE_FL_T_APPLET)) { + if ((mode & (SE_SHW_SILENT|SE_SHW_NORMAL)) && !se_fl_test(sedesc, SE_FL_SHW)) + se_fl_set(sedesc, SE_FL_SHWN); + + if ((mode & (SE_SHR_RESET|SE_SHR_DRAIN)) && !se_fl_test(sedesc, SE_FL_SHR)) + se_fl_set(sedesc, SE_FL_SHRR); + + if (se_fl_test(sedesc, SE_FL_SHR) && se_fl_test(sedesc, SE_FL_SHW)) + appctx_shut(sedesc->se); + } +} + /* Tries to allocate a new stconn and initialize its main fields. On * failure, nothing is allocated and NULL is returned. It is an internal * function. The caller must, at least, set the SE_FL_ORPHAN or SE_FL_DETACHED @@ -312,15 +364,17 @@ int sc_attach_mux(struct stconn *sc, void *sd, void *ctx) * removed. This function is called by a stream when a backend applet is * registered. */ -static void sc_attach_applet(struct stconn *sc, void *sd) +static int sc_attach_applet(struct stconn *sc, struct appctx *appctx) { - sc->sedesc->se = sd; + sc->sedesc->se = appctx; sc_ep_set(sc, SE_FL_T_APPLET); sc_ep_clr(sc, SE_FL_DETACHED); if (sc_strm(sc)) { sc->app_ops = &sc_app_applet_ops; xref_create(&sc->sedesc->xref, &sc_opposite(sc)->sedesc->xref); } + + return 0; } /* Attaches a stconn to a app layer and sets the relevant @@ -402,7 +456,7 @@ static void sc_detach_endp(struct stconn **scp) sc_ep_set(sc, SE_FL_ORPHAN); sc->sedesc->sc = NULL; sc->sedesc = NULL; - appctx_shut(appctx); + se_shutdown(appctx->sedesc, SE_SHR_RESET|SE_SHW_NORMAL); appctx_free(appctx); } @@ -506,7 +560,10 @@ struct appctx *sc_applet_create(struct stconn *sc, struct applet *app) appctx = appctx_new_here(app, sc->sedesc); if (!appctx) return NULL; - sc_attach_applet(sc, appctx); + if (sc_attach_applet(sc, appctx) == -1) { + appctx_free_on_early_error(appctx); + return NULL; + } appctx->t->nice = __sc_strm(sc)->task->nice; applet_need_more_data(appctx); appctx_wakeup(appctx); @@ -612,21 +669,24 @@ static void sc_app_shut(struct stconn *sc) !(ic->flags & CF_DONT_READ)) return; - __fallthrough; + sc->state = SC_ST_DIS; + break; case SC_ST_CON: case SC_ST_CER: case SC_ST_QUE: case SC_ST_TAR: /* Note that none of these states may happen with applets */ sc->state = SC_ST_DIS; - __fallthrough; + break; default: - sc->flags &= ~SC_FL_NOLINGER; - sc->flags |= SC_FL_ABRT_DONE; - if (sc->flags & SC_FL_ISBACK) - __sc_strm(sc)->conn_exp = TICK_ETERNITY; + break; } + sc->flags &= ~SC_FL_NOLINGER; + sc->flags |= SC_FL_ABRT_DONE; + if (sc->flags & SC_FL_ISBACK) + __sc_strm(sc)->conn_exp = TICK_ETERNITY; + /* note that if the task exists, it must unregister itself once it runs */ if (!(sc->flags & SC_FL_DONT_WAKE)) task_wakeup(sc_strm_task(sc), TASK_WOKEN_IO); @@ -691,7 +751,7 @@ static void sc_app_abort_conn(struct stconn *sc) return; if (sc->flags & SC_FL_SHUT_DONE) { - sc_conn_shut(sc); + se_shutdown(sc->sedesc, SE_SHR_RESET|SE_SHW_SILENT); sc->state = SC_ST_DIS; if (sc->flags & SC_FL_ISBACK) __sc_strm(sc)->conn_exp = TICK_ETERNITY; @@ -725,51 +785,42 @@ static void sc_app_shut_conn(struct stconn *sc) switch (sc->state) { case SC_ST_RDY: case SC_ST_EST: + /* we have to shut before closing, otherwise some short messages * may never leave the system, especially when there are remaining * unread data in the socket input buffer, or when nolinger is set. * However, if SC_FL_NOLINGER is explicitly set, we know there is * no risk so we close both sides immediately. */ - if (sc->flags & SC_FL_NOLINGER) { - /* unclean data-layer shutdown, typically an aborted request - * or a forwarded shutdown from a client to a server due to - * option abortonclose. No need for the TLS layer to try to - * emit a shutdown message. - */ - sc_conn_shutw(sc, CO_SHW_SILENT); + if (!(sc->flags & (SC_FL_NOLINGER|SC_FL_EOS|SC_FL_ABRT_DONE)) && !(ic->flags & CF_DONT_READ)) { + se_shutdown(sc->sedesc, SE_SHW_NORMAL); + return; } - else { - /* clean data-layer shutdown. This only happens on the - * frontend side, or on the backend side when forwarding - * a client close in TCP mode or in HTTP TUNNEL mode - * while option abortonclose is set. We want the TLS - * layer to try to signal it to the peer before we close. - */ - sc_conn_shutw(sc, CO_SHW_NORMAL); - if (!(sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) && !(ic->flags & CF_DONT_READ)) - return; - } + se_shutdown(sc->sedesc, SE_SHR_RESET|((sc->flags & SC_FL_NOLINGER) ? SE_SHW_SILENT : SE_SHW_NORMAL)); + sc->state = SC_ST_DIS; + break; - __fallthrough; case SC_ST_CON: /* we may have to close a pending connection, and mark the * response buffer as abort */ - sc_conn_shut(sc); - __fallthrough; + se_shutdown(sc->sedesc, SE_SHR_RESET|SE_SHW_SILENT); + sc->state = SC_ST_DIS; + break; case SC_ST_CER: case SC_ST_QUE: case SC_ST_TAR: sc->state = SC_ST_DIS; - __fallthrough; + break; default: - sc->flags &= ~SC_FL_NOLINGER; - sc->flags |= SC_FL_ABRT_DONE; - if (sc->flags & SC_FL_ISBACK) - __sc_strm(sc)->conn_exp = TICK_ETERNITY; + break; } + + sc->flags &= ~SC_FL_NOLINGER; + sc->flags |= SC_FL_ABRT_DONE; + if (sc->flags & SC_FL_ISBACK) + __sc_strm(sc)->conn_exp = TICK_ETERNITY; } /* This function is used for inter-stream connector calls. It is called by the @@ -884,7 +935,7 @@ static void sc_app_abort_applet(struct stconn *sc) return; if (sc->flags & SC_FL_SHUT_DONE) { - appctx_shut(__sc_appctx(sc)); + se_shutdown(sc->sedesc, SE_SHR_RESET|SE_SHW_NORMAL); sc->state = SC_ST_DIS; if (sc->flags & SC_FL_ISBACK) __sc_strm(sc)->conn_exp = TICK_ETERNITY; @@ -920,6 +971,7 @@ static void sc_app_shut_applet(struct stconn *sc) switch (sc->state) { case SC_ST_RDY: case SC_ST_EST: + /* we have to shut before closing, otherwise some short messages * may never leave the system, especially when there are remaining * unread data in the socket input buffer, or when nolinger is set. @@ -927,24 +979,31 @@ static void sc_app_shut_applet(struct stconn *sc) * no risk so we close both sides immediately. */ if (!(sc->flags & (SC_FL_ERROR|SC_FL_NOLINGER|SC_FL_EOS|SC_FL_ABRT_DONE)) && - !(ic->flags & CF_DONT_READ)) + !(ic->flags & CF_DONT_READ)) { + se_shutdown(sc->sedesc, SE_SHW_NORMAL); return; + } + + se_shutdown(sc->sedesc, SE_SHR_RESET|SE_SHW_NORMAL); + sc->state = SC_ST_DIS; + break; - __fallthrough; case SC_ST_CON: case SC_ST_CER: case SC_ST_QUE: case SC_ST_TAR: /* Note that none of these states may happen with applets */ - appctx_shut(__sc_appctx(sc)); + se_shutdown(sc->sedesc, SE_SHR_RESET|SE_SHW_NORMAL); sc->state = SC_ST_DIS; - __fallthrough; + break; default: - sc->flags &= ~SC_FL_NOLINGER; - sc->flags |= SC_FL_ABRT_DONE; - if (sc->flags & SC_FL_ISBACK) - __sc_strm(sc)->conn_exp = TICK_ETERNITY; + break; } + + sc->flags &= ~SC_FL_NOLINGER; + sc->flags |= SC_FL_ABRT_DONE; + if (sc->flags & SC_FL_ISBACK) + __sc_strm(sc)->conn_exp = TICK_ETERNITY; } /* chk_rcv function for applets */ @@ -1095,6 +1154,7 @@ void sc_notify(struct stconn *sc) */ if (sc_ep_have_ff_data(sc_opposite(sc)) || (co_data(ic) && sc_ep_test(sco, SE_FL_WAIT_DATA) && + (!HAS_DATA_FILTERS(__sc_strm(sc), ic) || channel_input_data(ic) == 0) && (!(sc->flags & SC_FL_SND_EXP_MORE) || channel_full(ic, co_data(ic)) || channel_input_data(ic) == 0))) { int new_len, last_len; @@ -1185,7 +1245,6 @@ static void sc_conn_eos(struct stconn *sc) if (sc_cond_forward_shut(sc)) { /* we want to immediately forward this close to the write side */ /* force flag on ssl to keep stream in cache */ - sc_conn_shutw(sc, CO_SHW_SILENT); goto do_close; } @@ -1194,7 +1253,7 @@ static void sc_conn_eos(struct stconn *sc) do_close: /* OK we completely close the socket here just as if we went through sc_shut[rw]() */ - sc_conn_shut(sc); + se_shutdown(sc->sedesc, SE_SHR_RESET|SE_SHW_SILENT); sc->flags &= ~SC_FL_SHUT_WANTED; sc->flags |= SC_FL_SHUT_DONE; @@ -1253,17 +1312,7 @@ int sc_conn_recv(struct stconn *sc) /* prepare to detect if the mux needs more room */ sc_ep_clr(sc, SE_FL_WANT_ROOM); - if ((ic->flags & (CF_STREAMER | CF_STREAMER_FAST)) && !co_data(ic) && - global.tune.idle_timer && - (unsigned short)(now_ms - ic->last_read) >= global.tune.idle_timer) { - /* The buffer was empty and nothing was transferred for more - * than one second. This was caused by a pause and not by - * congestion. Reset any streaming mode to reduce latency. - */ - ic->xfer_small = 0; - ic->xfer_large = 0; - ic->flags &= ~(CF_STREAMER | CF_STREAMER_FAST); - } + channel_check_idletimer(ic); #if defined(USE_LINUX_SPLICE) /* Detect if the splicing is possible depending on the stream policy */ @@ -1448,41 +1497,7 @@ int sc_conn_recv(struct stconn *sc) if (!cur_read) se_have_no_more_data(sc->sedesc); else { - if ((ic->flags & (CF_STREAMER | CF_STREAMER_FAST)) && - (cur_read <= ic->buf.size / 2)) { - ic->xfer_large = 0; - ic->xfer_small++; - if (ic->xfer_small >= 3) { - /* we have read less than half of the buffer in - * one pass, and this happened at least 3 times. - * This is definitely not a streamer. - */ - ic->flags &= ~(CF_STREAMER | CF_STREAMER_FAST); - } - else if (ic->xfer_small >= 2) { - /* if the buffer has been at least half full twice, - * we receive faster than we send, so at least it - * is not a "fast streamer". - */ - ic->flags &= ~CF_STREAMER_FAST; - } - } - else if (!(ic->flags & CF_STREAMER_FAST) && (cur_read >= channel_data_limit(ic))) { - /* we read a full buffer at once */ - ic->xfer_small = 0; - ic->xfer_large++; - if (ic->xfer_large >= 3) { - /* we call this buffer a fast streamer if it manages - * to be filled in one call 3 consecutive times. - */ - ic->flags |= (CF_STREAMER | CF_STREAMER_FAST); - } - } - else { - ic->xfer_small = 0; - ic->xfer_large = 0; - } - ic->last_read = now_ms; + channel_check_xfer(ic, cur_read); sc_ep_report_read_activity(sc); } @@ -1660,7 +1675,7 @@ int sc_conn_send(struct stconn *sc) if (s->txn->req.msg_state != HTTP_MSG_DONE) s->txn->flags &= ~TX_L7_RETRY; else { - if (b_alloc(&s->txn->l7_buffer) == NULL) + if (b_alloc(&s->txn->l7_buffer, DB_UNLIKELY) == NULL) s->txn->flags &= ~TX_L7_RETRY; else { memcpy(b_orig(&s->txn->l7_buffer), @@ -1673,6 +1688,9 @@ int sc_conn_send(struct stconn *sc) } } + if ((sc->flags & SC_FL_SHUT_WANTED) && co_data(oc) == c_data(oc)) + send_flag |= CO_SFL_LAST_DATA; + ret = conn->mux->snd_buf(sc, &oc->buf, co_data(oc), send_flag); if (ret > 0) { did_send = 1; @@ -1899,7 +1917,7 @@ static void sc_applet_eos(struct stconn *sc) return; if (sc->flags & SC_FL_SHUT_DONE) { - appctx_shut(__sc_appctx(sc)); + se_shutdown(sc->sedesc, SE_SHR_RESET|SE_SHW_NORMAL); sc->state = SC_ST_DIS; if (sc->flags & SC_FL_ISBACK) __sc_strm(sc)->conn_exp = TICK_ETERNITY; @@ -1908,6 +1926,352 @@ static void sc_applet_eos(struct stconn *sc) return sc_app_shut_applet(sc); } +/* + * This is the callback which is called by the applet layer to receive data into + * the buffer from the appctx. It iterates over the applet's rcv_buf + * function. Please do not statify this function, it's often present in + * backtraces, it's useful to recognize it. + */ +int sc_applet_recv(struct stconn *sc) +{ + struct appctx *appctx = __sc_appctx(sc); + struct channel *ic = sc_ic(sc); + int ret, max, cur_read = 0; + int read_poll = MAX_READ_POLL_LOOPS; + int flags = 0; + + + /* If another call to sc_applet_recv() failed, give up now. + */ + if (sc_waiting_room(sc)) + return 0; + + /* maybe we were called immediately after an asynchronous abort */ + if (sc->flags & (SC_FL_EOS|SC_FL_ABRT_DONE)) + return 1; + + /* We must wait because the applet is not fully initialized */ + if (se_fl_test(sc->sedesc, SE_FL_ORPHAN)) + return 0; + + /* stop immediately on errors. */ + if (!sc_ep_test(sc, SE_FL_RCV_MORE)) { + // TODO: be sure SE_FL_RCV_MORE may be set for applet ? + if (sc_ep_test(sc, SE_FL_ERROR)) + goto end_recv; + } + + /* prepare to detect if the mux needs more room */ + sc_ep_clr(sc, SE_FL_WANT_ROOM); + + channel_check_idletimer(ic); + + /* First, let's see if we may fast-forward data from a side to the other + * one without using the channel buffer. + */ + if (sc_is_fastfwd_supported(sc)) { + if (channel_data(ic)) { + /* We're embarrassed, there are already data pending in + * the buffer and we don't want to have them at two + * locations at a time. Let's indicate we need some + * place and ask the consumer to hurry. + */ + flags |= CO_RFL_BUF_FLUSH; + goto abort_fastfwd; + } + ret = appctx_fastfwd(sc, ic->to_forward, flags); + if (ret < 0) + goto abort_fastfwd; + else if (ret > 0) { + if (ic->to_forward != CHN_INFINITE_FORWARD) + ic->to_forward -= ret; + ic->total += ret; + cur_read += ret; + ic->flags |= CF_READ_EVENT; + } + + if (sc_ep_test(sc, SE_FL_EOS | SE_FL_ERROR)) + goto end_recv; + + if (sc_ep_test(sc, SE_FL_WANT_ROOM)) + sc_need_room(sc, -1); + + if (sc_ep_test(sc, SE_FL_MAY_FASTFWD_PROD) && ic->to_forward) + goto done_recv; + } + + abort_fastfwd: + if (!sc_alloc_ibuf(sc, &appctx->buffer_wait)) + goto end_recv; + + /* For an HTX stream, if the buffer is stuck (no output data with some + * input data) and if the HTX message is fragmented or if its free space + * wraps, we force an HTX deframentation. It is a way to have a + * contiguous free space nad to let the mux to copy as much data as + * possible. + * + * NOTE: A possible optim may be to let the mux decides if defrag is + * required or not, depending on amount of data to be xferred. + */ + if (IS_HTX_STRM(__sc_strm(sc)) && !co_data(ic)) { + struct htx *htx = htxbuf(&ic->buf); + + if (htx_is_not_empty(htx) && ((htx->flags & HTX_FL_FRAGMENTED) || htx_space_wraps(htx))) + htx_defrag(htx, NULL, 0); + } + + /* Compute transient CO_RFL_* flags */ + if (co_data(ic)) { + flags |= (CO_RFL_BUF_WET | CO_RFL_BUF_NOT_STUCK); + } + + /* <max> may be null. This is the mux responsibility to set + * SE_FL_RCV_MORE on the SC if more space is needed. + */ + max = channel_recv_max(ic); + ret = appctx_rcv_buf(sc, &ic->buf, max, flags); + if (sc_ep_test(sc, SE_FL_WANT_ROOM)) { + /* SE_FL_WANT_ROOM must not be reported if the channel's + * buffer is empty. + */ + BUG_ON(c_empty(ic)); + + sc_need_room(sc, channel_recv_max(ic) + 1); + /* Add READ_PARTIAL because some data are pending but + * cannot be xferred to the channel + */ + ic->flags |= CF_READ_EVENT; + sc_ep_report_read_activity(sc); + } + + if (ret <= 0) { + /* if we refrained from reading because we asked for a flush to + * satisfy rcv_pipe(), report that there's not enough room here + * to proceed. + */ + if (flags & CO_RFL_BUF_FLUSH) + sc_need_room(sc, -1); + goto done_recv; + } + + cur_read += ret; + + /* if we're allowed to directly forward data, we must update ->o */ + if (ic->to_forward && !(sc_opposite(sc)->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED))) { + unsigned long fwd = ret; + if (ic->to_forward != CHN_INFINITE_FORWARD) { + if (fwd > ic->to_forward) + fwd = ic->to_forward; + ic->to_forward -= fwd; + } + c_adv(ic, fwd); + } + + ic->flags |= CF_READ_EVENT; + ic->total += ret; + + /* End-of-input reached, we can leave. In this case, it is + * important to break the loop to not block the SC because of + * the channel's policies.This way, we are still able to receive + * shutdowns. + */ + if (sc_ep_test(sc, SE_FL_EOI)) + goto done_recv; + + if ((sc->flags & SC_FL_RCV_ONCE) || --read_poll <= 0) { + /* we don't expect to read more data */ + sc_wont_read(sc); + goto done_recv; + } + + /* if too many bytes were missing from last read, it means that + * it's pointless trying to read again because the system does + * not have them in buffers. + */ + if (ret < max) { + /* if a streamer has read few data, it may be because we + * have exhausted system buffers. It's not worth trying + * again. + */ + if (ic->flags & CF_STREAMER) { + /* we're stopped by the channel's policy */ + sc_wont_read(sc); + goto done_recv; + } + + /* if we read a large block smaller than what we requested, + * it's almost certain we'll never get anything more. + */ + if (ret >= global.tune.recv_enough) { + /* we're stopped by the channel's policy */ + sc_wont_read(sc); + } + } + + done_recv: + if (cur_read) { + channel_check_xfer(ic, cur_read); + sc_ep_report_read_activity(sc); + } + + end_recv: + ret = (cur_read != 0); + + /* Report EOI on the channel if it was reached from the mux point of + * view. */ + if (sc_ep_test(sc, SE_FL_EOI) && !(sc->flags & SC_FL_EOI)) { + sc_ep_report_read_activity(sc); + sc->flags |= SC_FL_EOI; + ic->flags |= CF_READ_EVENT; + ret = 1; + } + + if (sc_ep_test(sc, SE_FL_EOS)) { + /* we received a shutdown */ + if (ic->flags & CF_AUTO_CLOSE) + sc_schedule_shutdown(sc_opposite(sc)); + sc_applet_eos(sc); + ret = 1; + } + + if (sc_ep_test(sc, SE_FL_ERROR)) { + sc->flags |= SC_FL_ERROR; + ret = 1; + } + else if (cur_read || (sc->flags & (SC_FL_WONT_READ|SC_FL_NEED_BUFF|SC_FL_NEED_ROOM))) { + se_have_more_data(sc->sedesc); + ret = 1; + } + + return ret; +} + +/* This tries to perform a synchronous receive on the stream connector to + * try to collect last arrived data. In practice it's only implemented on + * stconns. Returns 0 if nothing was done, non-zero if new data or a + * shutdown were collected. This may result on some delayed receive calls + * to be programmed and performed later, though it doesn't provide any + * such guarantee. + */ +int sc_applet_sync_recv(struct stconn *sc) +{ + if (!(__sc_appctx(sc)->flags & APPCTX_FL_INOUT_BUFS)) + return 0; + + if (!sc_state_in(sc->state, SC_SB_RDY|SC_SB_EST)) + return 0; + + if (se_fl_test(sc->sedesc, SE_FL_ORPHAN)) + return 0; + + if (!sc_is_recv_allowed(sc)) + return 0; // already failed + + return sc_applet_recv(sc); +} + +/* + * This function is called to send buffer data to an applet. It calls the + * applet's snd_buf function. Please do not statify this function, it's often + * present in backtraces, it's useful to recognize it. + */ +int sc_applet_send(struct stconn *sc) +{ + struct stconn *sco = sc_opposite(sc); + struct channel *oc = sc_oc(sc); + size_t ret; + int did_send = 0; + + if (sc_ep_test(sc, SE_FL_ERROR | SE_FL_ERR_PENDING)) { + BUG_ON(sc_ep_test(sc, SE_FL_EOS|SE_FL_ERROR|SE_FL_ERR_PENDING) == (SE_FL_EOS|SE_FL_ERR_PENDING)); + return 1; + } + + if (sc_ep_test(sc, SE_FL_WONT_CONSUME)) + return 0; + + /* we might have been called just after an asynchronous shutw */ + if (sc->flags & SC_FL_SHUT_DONE) + return 1; + + /* We must wait because the applet is not fully initialized */ + if (se_fl_test(sc->sedesc, SE_FL_ORPHAN)) + return 0; + + /* TODO: Splicing is not supported, so it is not possible to have FF data stuck into the I/O buf */ + BUG_ON(sc_ep_have_ff_data(sc)); + + if (co_data(oc)) { + unsigned int send_flag = 0; + + if ((sc->flags & SC_FL_SHUT_WANTED) && co_data(oc) == c_data(oc)) + send_flag |= CO_SFL_LAST_DATA; + + ret = appctx_snd_buf(sc, &oc->buf, co_data(oc), send_flag); + if (ret > 0) { + did_send = 1; + c_rew(oc, ret); + c_realign_if_empty(oc); + + if (!co_data(oc)) { + /* Always clear both flags once everything has been sent, they're one-shot */ + sc->flags &= ~(SC_FL_SND_ASAP|SC_FL_SND_EXP_MORE); + } + /* if some data remain in the buffer, it's only because the + * system buffers are full, we will try next time. + */ + } + } + + if (did_send) + oc->flags |= CF_WRITE_EVENT | CF_WROTE_DATA; + + if (!sco->room_needed || (did_send && (sco->room_needed < 0 || channel_recv_max(sc_oc(sc)) >= sco->room_needed))) + sc_have_room(sco); + + if (sc_ep_test(sc, SE_FL_ERROR | SE_FL_ERR_PENDING)) { + oc->flags |= CF_WRITE_EVENT; + BUG_ON(sc_ep_test(sc, SE_FL_EOS|SE_FL_ERROR|SE_FL_ERR_PENDING) == (SE_FL_EOS|SE_FL_ERR_PENDING)); + if (sc_ep_test(sc, SE_FL_ERROR)) + sc->flags |= SC_FL_ERROR; + return 1; + } + + if (!co_data(oc)) { + if (did_send) + sc_ep_report_send_activity(sc); + } + else { + sc_ep_report_blocked_send(sc, did_send); + } + + return did_send; +} + +void sc_applet_sync_send(struct stconn *sc) +{ + struct channel *oc = sc_oc(sc); + + oc->flags &= ~CF_WRITE_EVENT; + + if (!(__sc_appctx(sc)->flags & APPCTX_FL_INOUT_BUFS)) + return; + + if (sc->flags & SC_FL_SHUT_DONE) + return; + + if (!co_data(oc)) + return; + + if (!sc_state_in(sc->state, SC_SB_EST)) + return; + + if (se_fl_test(sc->sedesc, SE_FL_ORPHAN)) + return; + + sc_applet_send(sc); +} + /* Callback to be used by applet handlers upon completion. It updates the stream * (which may or may not take this opportunity to try to forward data), then * may re-enable the applet's based on the channels and stream connector's final @@ -1960,7 +2324,8 @@ int sc_applet_process(struct stconn *sc) * appctx but in the case the task is not in runqueue we may have to * wakeup the appctx immediately. */ - if (sc_is_recv_allowed(sc) || sc_is_send_allowed(sc)) + if ((sc_is_recv_allowed(sc) && !applet_fl_test(__sc_appctx(sc), APPCTX_FL_OUTBLK_ALLOC)) || + (sc_is_send_allowed(sc) && !applet_fl_test(__sc_appctx(sc), APPCTX_FL_INBLK_ALLOC))) appctx_wakeup(__sc_appctx(sc)); return 0; } @@ -2036,6 +2401,57 @@ smp_fetch_sid(const struct arg *args, struct sample *smp, const char *kw, void * return 1; } +/* return 1 if the frontend or backend mux stream has received an abort and 0 otherwise. + */ +static int +smp_fetch_strm_aborted(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct stconn *sc; + unsigned int aborted = 0; + + if (!smp->strm) + return 0; + + sc = (kw[0] == 'f' ? smp->strm->scf : smp->strm->scb); + if (sc->sedesc->abort_info.info) + aborted = 1; + + smp->flags = SMP_F_VOL_TXN; + smp->data.type = SMP_T_BOOL; + smp->data.u.sint = aborted; + + return 1; +} + +/* return the H2/QUIC RESET code of the frontend or backend mux stream. Any value + * means an a RST_STREAM was received on H2 and a STOP_SENDING on QUIC. Otherwise the sample fetch fails. + */ +static int +smp_fetch_strm_rst_code(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct stconn *sc; + unsigned int source; + unsigned long long code = 0; + + if (!smp->strm) + return 0; + + sc = (kw[0] == 'f' ? smp->strm->scf : smp->strm->scb); + source = ((sc->sedesc->abort_info.info & SE_ABRT_SRC_MASK) >> SE_ABRT_SRC_SHIFT); + if (source != SE_ABRT_SRC_MUX_H2 && source != SE_ABRT_SRC_MUX_QUIC) { + if (!source) + smp->flags |= SMP_F_MAY_CHANGE; + return 0; + } + code = sc->sedesc->abort_info.code; + + smp->flags = SMP_F_VOL_TXN; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = code; + + return 1; +} + /* Note: must not be declared <const> as its list will be overwritten. * Note: fetches that may return multiple types should be declared using the * appropriate pseudo-type. If not available it must be declared as the lowest @@ -2043,7 +2459,11 @@ smp_fetch_sid(const struct arg *args, struct sample *smp, const char *kw, void * */ static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, { { "bs.id", smp_fetch_sid, 0, NULL, SMP_T_SINT, SMP_USE_L6REQ }, + { "bs.aborted", smp_fetch_strm_aborted, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, + { "bs.rst_code", smp_fetch_strm_rst_code, 0, NULL, SMP_T_SINT, SMP_USE_L5SRV }, { "fs.id", smp_fetch_sid, 0, NULL, SMP_T_STR, SMP_USE_L6RES }, + { "fs.aborted", smp_fetch_strm_aborted, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI }, + { "fs.rst_code", smp_fetch_strm_rst_code, 0, NULL, SMP_T_SINT, SMP_USE_L5CLI }, { /* END */ }, }}; diff --git a/src/stick_table.c b/src/stick_table.c index b1ce9d4..08a22e4 100644 --- a/src/stick_table.c +++ b/src/stick_table.c @@ -46,6 +46,18 @@ #include <haproxy/tools.h> #include <haproxy/xxhash.h> +#if defined(USE_PROMEX) +#include <promex/promex.h> +#endif + +/* stick table base fields */ +enum sticktable_field { + STICKTABLE_SIZE = 0, + STICKTABLE_USED, + /* must always be the last one */ + STICKTABLE_TOTAL_FIELDS +}; + /* structure used to return a table key built from a sample */ static THREAD_LOCAL struct stktable_key static_table_key; @@ -98,15 +110,27 @@ void __stksess_free(struct stktable *t, struct stksess *ts) */ void stksess_free(struct stktable *t, struct stksess *ts) { + uint shard; + size_t len; void *data; + data = stktable_data_ptr(t, ts, STKTABLE_DT_SERVER_KEY); if (data) { dict_entry_unref(&server_key_dict, stktable_data_cast(data, std_t_dict)); stktable_data_cast(data, std_t_dict) = NULL; } - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock); + + if (t->type == SMP_T_STR) + len = strlen((const char *)ts->key.key); + else + len = t->key_size; + + shard = stktable_calc_shard_num(t, ts->key.key, len); + + /* make the compiler happy when shard is not used without threads */ + ALREADY_CHECKED(shard); + __stksess_free(t, ts); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock); } /* @@ -115,17 +139,25 @@ void stksess_free(struct stktable *t, struct stksess *ts) */ int __stksess_kill(struct stktable *t, struct stksess *ts) { + int updt_locked = 0; + if (HA_ATOMIC_LOAD(&ts->ref_cnt)) return 0; - eb32_delete(&ts->exp); if (ts->upd.node.leaf_p) { + updt_locked = 1; HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); - eb32_delete(&ts->upd); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); + if (HA_ATOMIC_LOAD(&ts->ref_cnt)) + goto out_unlock; } + eb32_delete(&ts->exp); + eb32_delete(&ts->upd); ebmb_delete(&ts->key); __stksess_free(t, ts); + + out_unlock: + if (updt_locked) + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); return 1; } @@ -136,14 +168,26 @@ int __stksess_kill(struct stktable *t, struct stksess *ts) */ int stksess_kill(struct stktable *t, struct stksess *ts, int decrefcnt) { + uint shard; + size_t len; int ret; if (decrefcnt && HA_ATOMIC_SUB_FETCH(&ts->ref_cnt, 1) != 0) return 0; - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); + if (t->type == SMP_T_STR) + len = strlen((const char *)ts->key.key); + else + len = t->key_size; + + shard = stktable_calc_shard_num(t, ts->key.key, len); + + /* make the compiler happy when shard is not used without threads */ + ALREADY_CHECKED(shard); + + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); ret = __stksess_kill(t, ts); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); return ret; } @@ -203,6 +247,7 @@ static struct stksess *__stksess_init(struct stktable *t, struct stksess * ts) memset((void *)ts - t->data_size, 0, t->data_size); ts->ref_cnt = 0; ts->shard = 0; + ts->seen = 0; ts->key.node.leaf_p = NULL; ts->exp.node.leaf_p = NULL; ts->upd.node.leaf_p = NULL; @@ -215,100 +260,124 @@ static struct stksess *__stksess_init(struct stktable *t, struct stksess * ts) * Trash oldest <to_batch> sticky sessions from table <t> * Returns number of trashed sticky sessions. It may actually trash less * than expected if finding these requires too long a search time (e.g. - * most of them have ts->ref_cnt>0). + * most of them have ts->ref_cnt>0). This function locks the table. */ -int __stktable_trash_oldest(struct stktable *t, int to_batch) +int stktable_trash_oldest(struct stktable *t, int to_batch) { struct stksess *ts; struct eb32_node *eb; int max_search = to_batch * 2; // no more than 50% misses + int max_per_shard = (to_batch + CONFIG_HAP_TBL_BUCKETS - 1) / CONFIG_HAP_TBL_BUCKETS; + int done_per_shard; int batched = 0; - int looped = 0; + int updt_locked; + int looped; + int shard; - eb = eb32_lookup_ge(&t->exps, now_ms - TIMER_LOOK_BACK); + shard = 0; while (batched < to_batch) { + done_per_shard = 0; + looped = 0; + updt_locked = 0; + + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + + eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK); + while (batched < to_batch && done_per_shard < max_per_shard) { + if (unlikely(!eb)) { + /* we might have reached the end of the tree, typically because + * <now_ms> is in the first half and we're first scanning the last + * half. Let's loop back to the beginning of the tree now if we + * have not yet visited it. + */ + if (looped) + break; + looped = 1; + eb = eb32_first(&t->shards[shard].exps); + if (likely(!eb)) + break; + } - if (unlikely(!eb)) { - /* we might have reached the end of the tree, typically because - * <now_ms> is in the first half and we're first scanning the last - * half. Let's loop back to the beginning of the tree now if we - * have not yet visited it. - */ - if (looped) - break; - looped = 1; - eb = eb32_first(&t->exps); - if (likely(!eb)) + if (--max_search < 0) break; - } - if (--max_search < 0) - break; + /* timer looks expired, detach it from the queue */ + ts = eb32_entry(eb, struct stksess, exp); + eb = eb32_next(eb); - /* timer looks expired, detach it from the queue */ - ts = eb32_entry(eb, struct stksess, exp); - eb = eb32_next(eb); + /* don't delete an entry which is currently referenced */ + if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) + continue; - /* don't delete an entry which is currently referenced */ - if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) - continue; + eb32_delete(&ts->exp); - eb32_delete(&ts->exp); + if (ts->expire != ts->exp.key) { + if (!tick_isset(ts->expire)) + continue; - if (ts->expire != ts->exp.key) { - if (!tick_isset(ts->expire)) - continue; + ts->exp.key = ts->expire; + eb32_insert(&t->shards[shard].exps, &ts->exp); - ts->exp.key = ts->expire; - eb32_insert(&t->exps, &ts->exp); + /* the update might have jumped beyond the next element, + * possibly causing a wrapping. We need to check whether + * the next element should be used instead. If the next + * element doesn't exist it means we're on the right + * side and have to check the first one then. If it + * exists and is closer, we must use it, otherwise we + * use the current one. + */ + if (!eb) + eb = eb32_first(&t->shards[shard].exps); - /* the update might have jumped beyond the next element, - * possibly causing a wrapping. We need to check whether - * the next element should be used instead. If the next - * element doesn't exist it means we're on the right - * side and have to check the first one then. If it - * exists and is closer, we must use it, otherwise we - * use the current one. - */ - if (!eb) - eb = eb32_first(&t->exps); + if (!eb || tick_is_lt(ts->exp.key, eb->key)) + eb = &ts->exp; - if (!eb || tick_is_lt(ts->exp.key, eb->key)) - eb = &ts->exp; + continue; + } - continue; - } + /* if the entry is in the update list, we must be extremely careful + * because peers can see it at any moment and start to use it. Peers + * will take the table's updt_lock for reading when doing that, and + * with that lock held, will grab a ref_cnt before releasing the + * lock. So we must take this lock as well and check the ref_cnt. + */ + if (ts->upd.node.leaf_p) { + if (!updt_locked) { + updt_locked = 1; + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + } + /* now we're locked, new peers can't grab it anymore, + * existing ones already have the ref_cnt. + */ + if (HA_ATOMIC_LOAD(&ts->ref_cnt)) + continue; + } - /* session expired, trash it */ - ebmb_delete(&ts->key); - if (ts->upd.node.leaf_p) { - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + /* session expired, trash it */ + ebmb_delete(&ts->key); eb32_delete(&ts->upd); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); + __stksess_free(t, ts); + batched++; + done_per_shard++; } - __stksess_free(t, ts); - batched++; - } - return batched; -} + if (updt_locked) + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); -/* - * Trash oldest <to_batch> sticky sessions from table <t> - * Returns number of trashed sticky sessions. - * This function locks the table - */ -int stktable_trash_oldest(struct stktable *t, int to_batch) -{ - int ret; + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); - ret = __stktable_trash_oldest(t, to_batch); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + if (max_search <= 0) + break; - return ret; + shard = (shard + 1) % CONFIG_HAP_TBL_BUCKETS; + if (!shard) + break; + } + + return batched; } + /* * Allocate and initialise a new sticky session. * The new sticky session is returned or NULL in case of lack of memory. @@ -346,17 +415,17 @@ struct stksess *stksess_new(struct stktable *t, struct stktable_key *key) } /* - * Looks in table <t> for a sticky session matching key <key>. + * Looks in table <t> for a sticky session matching key <key> in shard <shard>. * Returns pointer on requested sticky session or NULL if none was found. */ -struct stksess *__stktable_lookup_key(struct stktable *t, struct stktable_key *key) +struct stksess *__stktable_lookup_key(struct stktable *t, struct stktable_key *key, uint shard) { struct ebmb_node *eb; if (t->type == SMP_T_STR) - eb = ebst_lookup_len(&t->keys, key->key, key->key_len+1 < t->key_size ? key->key_len : t->key_size-1); + eb = ebst_lookup_len(&t->shards[shard].keys, key->key, key->key_len + 1 < t->key_size ? key->key_len : t->key_size - 1); else - eb = ebmb_lookup(&t->keys, key->key, t->key_size); + eb = ebmb_lookup(&t->shards[shard].keys, key->key, t->key_size); if (unlikely(!eb)) { /* no session found */ @@ -375,12 +444,60 @@ struct stksess *__stktable_lookup_key(struct stktable *t, struct stktable_key *k struct stksess *stktable_lookup_key(struct stktable *t, struct stktable_key *key) { struct stksess *ts; + uint shard; + size_t len; + + if (t->type == SMP_T_STR) + len = key->key_len + 1 < t->key_size ? key->key_len : t->key_size - 1; + else + len = t->key_size; - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock); - ts = __stktable_lookup_key(t, key); + shard = stktable_calc_shard_num(t, key->key, len); + + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + ts = __stktable_lookup_key(t, key, shard); if (ts) HA_ATOMIC_INC(&ts->ref_cnt); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + + return ts; +} + +/* + * Looks in table <t> for a sticky session matching ptr <ptr>. + * Returns pointer on requested sticky session or NULL if none was found. + * The refcount of the found entry is increased and this function + * is protected using the table lock + */ +struct stksess *stktable_lookup_ptr(struct stktable *t, void *ptr) +{ + struct stksess *ts = NULL; + struct ebmb_node *eb; + int shard; + + for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) { + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + /* linear search is performed, this could be optimized by adding + * an eb node dedicated to ptr lookups into stksess struct to + * leverage eb_lookup function instead. + */ + eb = ebmb_first(&t->shards[shard].keys); + while (eb) { + struct stksess *cur; + + cur = ebmb_entry(eb, struct stksess, key); + if (cur == ptr) { + ts = cur; + break; + } + eb = ebmb_next(eb); + } + if (ts) + HA_ATOMIC_INC(&ts->ref_cnt); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + if (ts) + return ts; + } return ts; } @@ -389,14 +506,14 @@ struct stksess *stktable_lookup_key(struct stktable *t, struct stktable_key *key * Looks in table <t> for a sticky session with same key as <ts>. * Returns pointer on requested sticky session or NULL if none was found. */ -struct stksess *__stktable_lookup(struct stktable *t, struct stksess *ts) +struct stksess *__stktable_lookup(struct stktable *t, struct stksess *ts, uint shard) { struct ebmb_node *eb; if (t->type == SMP_T_STR) - eb = ebst_lookup(&(t->keys), (char *)ts->key.key); + eb = ebst_lookup(&t->shards[shard].keys, (char *)ts->key.key); else - eb = ebmb_lookup(&(t->keys), ts->key.key, t->key_size); + eb = ebmb_lookup(&t->shards[shard].keys, ts->key.key, t->key_size); if (unlikely(!eb)) return NULL; @@ -413,12 +530,21 @@ struct stksess *__stktable_lookup(struct stktable *t, struct stksess *ts) struct stksess *stktable_lookup(struct stktable *t, struct stksess *ts) { struct stksess *lts; + uint shard; + size_t len; + + if (t->type == SMP_T_STR) + len = strlen((const char *)ts->key.key); + else + len = t->key_size; + + shard = stktable_calc_shard_num(t, ts->key.key, len); - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock); - lts = __stktable_lookup(t, ts); + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + lts = __stktable_lookup(t, ts, shard); if (lts) HA_ATOMIC_INC(<s->ref_cnt); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); return lts; } @@ -428,7 +554,7 @@ struct stksess *stktable_lookup(struct stktable *t, struct stksess *ts) * The node will be also inserted into the update tree if needed, at a position * depending if the update is a local or coming from a remote node. * If <decrefcnt> is set, the ts entry's ref_cnt will be decremented. The table's - * write lock may be taken. + * updt_lock may be taken for writes. */ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, int expire, int decrefcnt) { @@ -444,39 +570,18 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, /* If sync is enabled */ if (t->sync_task) { - try_lock_again: - /* We'll need to reliably check that the entry is in the tree. - * It's only inserted/deleted using a write lock so a read lock - * is sufficient to verify this. We may then need to upgrade it - * to perform an update (which is rare under load), and if the - * upgrade fails, we'll try again with a write lock directly. - */ - if (use_wrlock) - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); - else - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->updt_lock); - if (local) { /* Check if this entry is not in the tree or not * scheduled for at least one peer. */ - if (!ts->upd.node.leaf_p - || (int)(t->commitupdate - ts->upd.key) >= 0 - || (int)(ts->upd.key - t->localupdate) >= 0) { - /* Time to upgrade the read lock to write lock if needed */ - if (!use_wrlock) { - if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &t->updt_lock) != 0) { - /* failed, try again */ - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock); - use_wrlock = 1; - goto try_lock_again; - } - HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &t->updt_lock); - use_wrlock = 1; - } + if (!ts->upd.node.leaf_p || _HA_ATOMIC_LOAD(&ts->seen)) { + /* Time to upgrade the read lock to write lock */ + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + use_wrlock = 1; /* here we're write-locked */ + ts->seen = 0; ts->upd.key = ++t->update; t->localupdate = t->update; eb32_delete(&ts->upd); @@ -489,28 +594,30 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, do_wakeup = 1; } else { - /* If this entry is not in the tree */ - + /* Note: we land here when learning new entries from + * remote peers. We hold one ref_cnt so the entry + * cannot vanish under us, however if two peers create + * the same key at the exact same time, we must be + * careful not to perform two parallel inserts! Hence + * we need to first check leaf_p to know if the entry + * is new, then lock the tree and check the entry again + * (since another thread could have created it in the + * mean time). + */ if (!ts->upd.node.leaf_p) { /* Time to upgrade the read lock to write lock if needed */ - if (!use_wrlock) { - if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &t->updt_lock) != 0) { - /* failed, try again */ - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock); - use_wrlock = 1; - goto try_lock_again; - } - HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &t->updt_lock); - use_wrlock = 1; - } + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + use_wrlock = 1; /* here we're write-locked */ - - ts->upd.key= (++t->update)+(2147483648U); - eb = eb32_insert(&t->updates, &ts->upd); - if (eb != &ts->upd) { - eb32_delete(eb); - eb32_insert(&t->updates, &ts->upd); + if (!ts->upd.node.leaf_p) { + ts->seen = 0; + ts->upd.key= (++t->update)+(2147483648U); + eb = eb32_insert(&t->updates, &ts->upd); + if (eb != &ts->upd) { + eb32_delete(eb); + eb32_insert(&t->updates, &ts->upd); + } } } } @@ -518,8 +625,6 @@ void stktable_touch_with_exp(struct stktable *t, struct stksess *ts, int local, /* drop the lock now */ if (use_wrlock) HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); - else - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->updt_lock); } if (decrefcnt) @@ -569,14 +674,14 @@ static void stktable_release(struct stktable *t, struct stksess *ts) * is set. <ts> is returned if properly inserted, otherwise the one already * present if any. */ -struct stksess *__stktable_store(struct stktable *t, struct stksess *ts) +struct stksess *__stktable_store(struct stktable *t, struct stksess *ts, uint shard) { struct ebmb_node *eb; - eb = ebmb_insert(&t->keys, &ts->key, t->key_size); + eb = ebmb_insert(&t->shards[shard].keys, &ts->key, t->key_size); if (likely(eb == &ts->key)) { ts->exp.key = ts->expire; - eb32_insert(&t->exps, &ts->exp); + eb32_insert(&t->shards[shard].exps, &ts->exp); } return ebmb_entry(eb, struct stksess, key); // most commonly this is <ts> } @@ -621,11 +726,24 @@ void stktable_requeue_exp(struct stktable *t, const struct stksess *ts) struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key *key) { struct stksess *ts, *ts2; + uint shard; + size_t len; if (!key) return NULL; - ts = stktable_lookup_key(table, key); + if (table->type == SMP_T_STR) + len = key->key_len + 1 < table->key_size ? key->key_len : table->key_size - 1; + else + len = table->key_size; + + shard = stktable_calc_shard_num(table, key->key, len); + + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); + ts = __stktable_lookup_key(table, key, shard); + if (ts) + HA_ATOMIC_INC(&ts->ref_cnt); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); if (ts) return ts; @@ -645,12 +763,12 @@ struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key * * one we find. */ - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); - ts2 = __stktable_store(table, ts); + ts2 = __stktable_store(table, ts, shard); HA_ATOMIC_INC(&ts2->ref_cnt); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); if (unlikely(ts2 != ts)) { /* another entry was added in the mean time, let's @@ -671,12 +789,21 @@ struct stksess *stktable_get_entry(struct stktable *table, struct stktable_key * struct stksess *stktable_set_entry(struct stktable *table, struct stksess *nts) { struct stksess *ts; + uint shard; + size_t len; + + if (table->type == SMP_T_STR) + len = strlen((const char *)nts->key.key); + else + len = table->key_size; + + shard = stktable_calc_shard_num(table, nts->key.key, len); - HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &table->lock); - ts = __stktable_lookup(table, nts); + HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); + ts = __stktable_lookup(table, nts, shard); if (ts) { HA_ATOMIC_INC(&ts->ref_cnt); - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); return ts; } ts = nts; @@ -684,18 +811,18 @@ struct stksess *stktable_set_entry(struct stktable *table, struct stksess *nts) /* let's increment it before switching to exclusive */ HA_ATOMIC_INC(&ts->ref_cnt); - if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &table->lock) != 0) { + if (HA_RWLOCK_TRYRDTOSK(STK_TABLE_LOCK, &table->shards[shard].sh_lock) != 0) { /* upgrade to seek lock failed, let's drop and take */ - HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->lock); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); } else - HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &table->lock); + HA_RWLOCK_SKTOWR(STK_TABLE_LOCK, &table->shards[shard].sh_lock); /* now we're write-locked */ - __stktable_store(table, ts); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->lock); + __stktable_store(table, ts, shard); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &table->shards[shard].sh_lock); stktable_requeue_exp(table, ts); return ts; @@ -710,87 +837,117 @@ struct task *process_table_expire(struct task *task, void *context, unsigned int struct stktable *t = context; struct stksess *ts; struct eb32_node *eb; - int updt_locked = 0; - int looped = 0; + int updt_locked; + int looped; int exp_next; + int task_exp; + int shard; + + task_exp = TICK_ETERNITY; + + for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) { + updt_locked = 0; + looped = 0; + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + eb = eb32_lookup_ge(&t->shards[shard].exps, now_ms - TIMER_LOOK_BACK); + + while (1) { + if (unlikely(!eb)) { + /* we might have reached the end of the tree, typically because + * <now_ms> is in the first half and we're first scanning the last + * half. Let's loop back to the beginning of the tree now if we + * have not yet visited it. + */ + if (looped) + break; + looped = 1; + eb = eb32_first(&t->shards[shard].exps); + if (likely(!eb)) + break; + } - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); - eb = eb32_lookup_ge(&t->exps, now_ms - TIMER_LOOK_BACK); - - while (1) { - if (unlikely(!eb)) { - /* we might have reached the end of the tree, typically because - * <now_ms> is in the first half and we're first scanning the last - * half. Let's loop back to the beginning of the tree now if we - * have not yet visited it. - */ - if (looped) - break; - looped = 1; - eb = eb32_first(&t->exps); - if (likely(!eb)) - break; - } - - if (likely(tick_is_lt(now_ms, eb->key))) { - /* timer not expired yet, revisit it later */ - exp_next = eb->key; - goto out_unlock; - } + if (likely(tick_is_lt(now_ms, eb->key))) { + /* timer not expired yet, revisit it later */ + exp_next = eb->key; + goto out_unlock; + } - /* timer looks expired, detach it from the queue */ - ts = eb32_entry(eb, struct stksess, exp); - eb = eb32_next(eb); + /* timer looks expired, detach it from the queue */ + ts = eb32_entry(eb, struct stksess, exp); + eb = eb32_next(eb); - /* don't delete an entry which is currently referenced */ - if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) - continue; + /* don't delete an entry which is currently referenced */ + if (HA_ATOMIC_LOAD(&ts->ref_cnt) != 0) + continue; - eb32_delete(&ts->exp); + eb32_delete(&ts->exp); - if (!tick_is_expired(ts->expire, now_ms)) { - if (!tick_isset(ts->expire)) - continue; + if (!tick_is_expired(ts->expire, now_ms)) { + if (!tick_isset(ts->expire)) + continue; - ts->exp.key = ts->expire; - eb32_insert(&t->exps, &ts->exp); + ts->exp.key = ts->expire; + eb32_insert(&t->shards[shard].exps, &ts->exp); - /* the update might have jumped beyond the next element, - * possibly causing a wrapping. We need to check whether - * the next element should be used instead. If the next - * element doesn't exist it means we're on the right - * side and have to check the first one then. If it - * exists and is closer, we must use it, otherwise we - * use the current one. - */ - if (!eb) - eb = eb32_first(&t->exps); + /* the update might have jumped beyond the next element, + * possibly causing a wrapping. We need to check whether + * the next element should be used instead. If the next + * element doesn't exist it means we're on the right + * side and have to check the first one then. If it + * exists and is closer, we must use it, otherwise we + * use the current one. + */ + if (!eb) + eb = eb32_first(&t->shards[shard].exps); - if (!eb || tick_is_lt(ts->exp.key, eb->key)) - eb = &ts->exp; - continue; - } + if (!eb || tick_is_lt(ts->exp.key, eb->key)) + eb = &ts->exp; + continue; + } - /* session expired, trash it */ - ebmb_delete(&ts->key); - if (ts->upd.node.leaf_p) { - if (!updt_locked) { - updt_locked = 1; - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + /* if the entry is in the update list, we must be extremely careful + * because peers can see it at any moment and start to use it. Peers + * will take the table's updt_lock for reading when doing that, and + * with that lock held, will grab a ref_cnt before releasing the + * lock. So we must take this lock as well and check the ref_cnt. + */ + if (ts->upd.node.leaf_p) { + if (!updt_locked) { + updt_locked = 1; + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->updt_lock); + } + /* now we're locked, new peers can't grab it anymore, + * existing ones already have the ref_cnt. + */ + if (HA_ATOMIC_LOAD(&ts->ref_cnt)) + continue; } + + /* session expired, trash it */ + ebmb_delete(&ts->key); eb32_delete(&ts->upd); + __stksess_free(t, ts); } - __stksess_free(t, ts); - } - /* We have found no task to expire in any tree */ - exp_next = TICK_ETERNITY; + /* We have found no task to expire in any tree */ + exp_next = TICK_ETERNITY; -out_unlock: - task->expire = exp_next; - if (updt_locked) - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); + out_unlock: + if (updt_locked) + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->updt_lock); + + task_exp = tick_first(task_exp, exp_next); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->shards[shard].sh_lock); + } + + /* Reset the task's expiration. We do this under the lock so as not + * to ruin a call to task_queue() in stktable_requeue_exp() if we + * were to update with TICK_ETERNITY. + */ + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock); + task->expire = task_exp; HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock); + return task; } @@ -803,12 +960,17 @@ out_unlock: int stktable_init(struct stktable *t, char **err_msg) { int peers_retval = 0; + int shard; t->hash_seed = XXH64(t->id, t->idlen, 0); if (t->size) { - t->keys = EB_ROOT_UNIQUE; - memset(&t->exps, 0, sizeof(t->exps)); + for (shard = 0; shard < CONFIG_HAP_TBL_BUCKETS; shard++) { + t->shards[shard].keys = EB_ROOT_UNIQUE; + memset(&t->shards[shard].exps, 0, sizeof(t->shards[shard].exps)); + HA_RWLOCK_INIT(&t->shards[shard].sh_lock); + } + t->updates = EB_ROOT_UNIQUE; HA_RWLOCK_INIT(&t->lock); @@ -1402,6 +1564,8 @@ struct stktable_data_type stktable_data_types[STKTABLE_DATA_TYPES] = { [STKTABLE_DT_GPT] = { .name = "gpt", .std_type = STD_T_UINT, .is_array = 1, .as_is = 1 }, [STKTABLE_DT_GPC] = { .name = "gpc", .std_type = STD_T_UINT, .is_array = 1 }, [STKTABLE_DT_GPC_RATE] = { .name = "gpc_rate", .std_type = STD_T_FRQP, .is_array = 1, .arg_type = ARG_T_DELAY }, + [STKTABLE_DT_GLITCH_CNT] = { .name = "glitch_cnt", .std_type = STD_T_UINT }, + [STKTABLE_DT_GLITCH_RATE] = { .name = "glitch_rate", .std_type = STD_T_FRQP, .arg_type = ARG_T_DELAY }, }; /* Registers stick-table extra data type with index <idx>, name <name>, type @@ -1741,6 +1905,79 @@ static int sample_conv_table_bytes_out_rate(const struct arg *arg_p, struct samp return !!ptr; } +/* Casts sample <smp> to the type of the table specified in arg(0), and looks + * it up into this table. Returns the cumulated number of front glitches for the + * key if the key is present in the table, otherwise zero, so that comparisons + * can be easily performed. If the inspected parameter is not stored in the + * table, <not found> is returned. + */ +static int sample_conv_table_glitch_cnt(const struct arg *arg_p, struct sample *smp, void *private) +{ + struct stktable *t; + struct stktable_key *key; + struct stksess *ts; + void *ptr; + + t = arg_p[0].data.t; + + key = smp_to_stkey(smp, t); + if (!key) + return 0; + + ts = stktable_lookup_key(t, key); + + smp->flags = SMP_F_VOL_TEST; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = 0; + + if (!ts) /* key not present */ + return 1; + + ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GLITCH_CNT); + if (ptr) + smp->data.u.sint = stktable_data_cast(ptr, std_t_uint); + + stktable_release(t, ts); + return !!ptr; +} + +/* Casts sample <smp> to the type of the table specified in arg(0), and looks + * it up into this table. Returns the front glitch rate the key if the key is + * present in the table, otherwise zero, so that comparisons can be easily + * performed. If the inspected parameter is not stored in the table, <not found> + * is returned. + */ +static int sample_conv_table_glitch_rate(const struct arg *arg_p, struct sample *smp, void *private) +{ + struct stktable *t; + struct stktable_key *key; + struct stksess *ts; + void *ptr; + + t = arg_p[0].data.t; + + key = smp_to_stkey(smp, t); + if (!key) + return 0; + + ts = stktable_lookup_key(t, key); + + smp->flags = SMP_F_VOL_TEST; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = 0; + + if (!ts) /* key not present */ + return 1; + + ptr = stktable_data_ptr(t, ts, STKTABLE_DT_GLITCH_RATE); + if (ptr) + smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp), + t->data_arg[STKTABLE_DT_GLITCH_RATE].u); + + stktable_release(t, ts); + return !!ptr; +} + /* Casts sample <smp> to the type of the table specified in arg_p(1), and looks * it up into this table. Returns the value of the GPT[arg_p(0)] tag for the key * if the key is present in the table, otherwise false, so that comparisons can @@ -4218,6 +4455,85 @@ smp_fetch_sc_conn_cur(const struct arg *args, struct sample *smp, const char *kw return 1; } +/* set <smp> to the cumulated number of glitches from the stream or session's + * tracked frontend counters. Supports being called as "sc[0-9]_glitch_cnt" or + * "src_glitch_cnt" only. + */ +static int +smp_fetch_sc_glitch_cnt(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct stkctr tmpstkctr; + struct stkctr *stkctr; + + stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr); + if (!stkctr) + return 0; + + smp->flags = SMP_F_VOL_TEST; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = 0; + if (stkctr_entry(stkctr) != NULL) { + void *ptr; + + ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GLITCH_CNT); + if (!ptr) { + if (stkctr == &tmpstkctr) + stktable_release(stkctr->table, stkctr_entry(stkctr)); + return 0; /* parameter not stored */ + } + + HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock); + + smp->data.u.sint = stktable_data_cast(ptr, std_t_uint); + + HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock); + + if (stkctr == &tmpstkctr) + stktable_release(stkctr->table, stkctr_entry(stkctr)); + } + return 1; +} + +/* set <smp> to the rate of glitches from the stream or session's tracked + * frontend counters. Supports being called as "sc[0-9]_glitch_rate" or + * "src_glitch_rate" only. + */ +static int +smp_fetch_sc_glitch_rate(const struct arg *args, struct sample *smp, const char *kw, void *private) +{ + struct stkctr tmpstkctr; + struct stkctr *stkctr; + + stkctr = smp_fetch_sc_stkctr(smp->sess, smp->strm, args, kw, &tmpstkctr); + if (!stkctr) + return 0; + + smp->flags = SMP_F_VOL_TEST; + smp->data.type = SMP_T_SINT; + smp->data.u.sint = 0; + if (stkctr_entry(stkctr) != NULL) { + void *ptr; + + ptr = stktable_data_ptr(stkctr->table, stkctr_entry(stkctr), STKTABLE_DT_GLITCH_RATE); + if (!ptr) { + if (stkctr == &tmpstkctr) + stktable_release(stkctr->table, stkctr_entry(stkctr)); + return 0; /* parameter not stored */ + } + + HA_RWLOCK_RDLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock); + + smp->data.u.sint = read_freq_ctr_period(&stktable_data_cast(ptr, std_t_frqp), + stkctr->table->data_arg[STKTABLE_DT_GLITCH_RATE].u); + + HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &stkctr_entry(stkctr)->lock); + + if (stkctr == &tmpstkctr) + stktable_release(stkctr->table, stkctr_entry(stkctr)); + } + return 1; +} + /* set <smp> to the cumulated number of streams from the stream's tracked * frontend counters. Supports being called as "sc[0-9]_sess_cnt" or * "src_sess_cnt" only. @@ -4885,6 +5201,7 @@ struct show_table_ctx { void *target; /* table we want to dump, or NULL for all */ struct stktable *t; /* table being currently dumped (first if NULL) */ struct stksess *entry; /* last entry we were trying to dump (or first if NULL) */ + int tree_head; /* tree head currently being visited */ long long value[STKTABLE_FILTER_LEN]; /* value to compare against */ signed char data_type[STKTABLE_FILTER_LEN]; /* type of data to compare, or -1 if none */ signed char data_op[STKTABLE_FILTER_LEN]; /* operator (STD_OP_*) when data_type set */ @@ -4896,39 +5213,22 @@ struct show_table_ctx { char action; /* action on the table : one of STK_CLI_ACT_* */ }; -/* Processes a single table entry matching a specific key passed in argument. - * returns 0 if wants to be called again, 1 if has ended processing. +/* Processes a single table entry <ts>. + * returns 0 if it wants to be called again, 1 if has ended processing. */ -static int table_process_entry_per_key(struct appctx *appctx, char **args) +static int table_process_entry(struct appctx *appctx, struct stksess *ts, char **args) { struct show_table_ctx *ctx = appctx->svcctx; struct stktable *t = ctx->target; - struct stksess *ts; - struct sample key; long long value; int data_type; int cur_arg; void *ptr; struct freq_ctr *frqp; - if (!*args[4]) - return cli_err(appctx, "Key value expected\n"); - - memset(&key, 0, sizeof(key)); - key.data.type = SMP_T_STR; - key.data.u.str.area = args[4]; - key.data.u.str.data = strlen(args[4]); - switch (t->type) { case SMP_T_IPV4: case SMP_T_IPV6: - /* prefer input format over table type when parsing ip addresses, - * then let smp_to_stkey() do the conversion for us when needed - */ - BUG_ON(!sample_casts[key.data.type][SMP_T_ADDR]); - if (!sample_casts[key.data.type][SMP_T_ADDR](&key)) - return cli_err(appctx, "Invalid key\n"); - break; case SMP_T_SINT: case SMP_T_STR: break; @@ -4945,21 +5245,15 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) } } - /* try to convert key according to table type - * (it will fill static_table_key on success) - */ - if (!smp_to_stkey(&key, t)) - return cli_err(appctx, "Invalid key\n"); - /* check permissions */ if (!cli_has_level(appctx, ACCESS_LVL_OPER)) return 1; + if (!ts) + return 1; + switch (ctx->action) { case STK_CLI_ACT_SHOW: - ts = stktable_lookup_key(t, &static_table_key); - if (!ts) - return 1; chunk_reset(&trash); if (!table_dump_head_to_buffer(&trash, appctx, t, t)) { stktable_release(t, ts); @@ -4976,10 +5270,6 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) break; case STK_CLI_ACT_CLR: - ts = stktable_lookup_key(t, &static_table_key); - if (!ts) - return 1; - if (!stksess_kill(t, ts, 1)) { /* don't delete an entry which is currently referenced */ return cli_err(appctx, "Entry currently in use, cannot remove\n"); @@ -4987,11 +5277,6 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) break; case STK_CLI_ACT_SET: - ts = stktable_get_entry(t, &static_table_key); - if (!ts) { - /* don't delete an entry which is currently referenced */ - return cli_err(appctx, "Unable to allocate a new entry\n"); - } HA_RWLOCK_WRLOCK(STK_SESS_LOCK, &ts->lock); for (cur_arg = 5; *args[cur_arg]; cur_arg += 2) { if (strncmp(args[cur_arg], "data.", 5) != 0) { @@ -5023,7 +5308,7 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) return 1; } - ptr = stktable_data_ptr(t, ts, data_type); + ptr = __stktable_data_ptr(t, ts, data_type); switch (stktable_data_types[data_type].std_type) { case STD_T_SINT: @@ -5060,6 +5345,82 @@ static int table_process_entry_per_key(struct appctx *appctx, char **args) return cli_err(appctx, "Unknown action\n"); } return 1; + +} + +/* Processes a single table entry matching a specific key passed in argument. + * returns 0 if wants to be called again, 1 if has ended processing. + */ +static int table_process_entry_per_key(struct appctx *appctx, char **args) +{ + struct show_table_ctx *ctx = appctx->svcctx; + struct stktable *t = ctx->target; + struct stksess *ts; + struct sample key; + + if (!*args[4]) + return cli_err(appctx, "Key value expected\n"); + + memset(&key, 0, sizeof(key)); + key.data.type = SMP_T_STR; + key.data.u.str.area = args[4]; + key.data.u.str.data = strlen(args[4]); + + switch (t->type) { + case SMP_T_IPV4: + case SMP_T_IPV6: + /* prefer input format over table type when parsing ip addresses, + * then let smp_to_stkey() do the conversion for us when needed + */ + BUG_ON(!sample_casts[key.data.type][SMP_T_ADDR]); + if (!sample_casts[key.data.type][SMP_T_ADDR](&key)) + return cli_err(appctx, "Invalid key\n"); + break; + default: + /* nothing to do */ + break; + } + + /* try to convert key according to table type + * (it will fill static_table_key on success) + */ + if (!smp_to_stkey(&key, t)) + return cli_err(appctx, "Invalid key\n"); + + if (ctx->action == STK_CLI_ACT_SET) { + ts = stktable_get_entry(t, &static_table_key); + if (!ts) + return cli_err(appctx, "Unable to allocate a new entry\n"); + } else + ts = stktable_lookup_key(t, &static_table_key); + + return table_process_entry(appctx, ts, args); +} + +/* Processes a single table entry matching a specific ptr passed in argument. + * returns 0 if wants to be called again, 1 if has ended processing. + */ +static int table_process_entry_per_ptr(struct appctx *appctx, char **args) +{ + struct show_table_ctx *ctx = appctx->svcctx; + struct stktable *t = ctx->target; + ulong ptr; + char *error; + struct stksess *ts; + + if (!*args[4] || args[4][0] != '0' || args[4][1] != 'x') + return cli_err(appctx, "Pointer expected (0xffff notation)\n"); + + /* Convert argument to integer value */ + ptr = strtoul(args[4], &error, 16); + if (*error != '\0') + return cli_err(appctx, "Malformed ptr.\n"); + + ts = stktable_lookup_ptr(t, (void *)ptr); + if (!ts) + return cli_err(appctx, "No entry can be found matching ptr.\n"); + + return table_process_entry(appctx, ts, args); } /* Prepares the appctx fields with the data-based filters from the command line. @@ -5127,6 +5488,8 @@ static int cli_parse_table_req(char **args, char *payload, struct appctx *appctx if (strcmp(args[3], "key") == 0) return table_process_entry_per_key(appctx, args); + if (strcmp(args[3], "ptr") == 0) + return table_process_entry_per_ptr(appctx, args); else if (strncmp(args[3], "data.", 5) == 0) return table_prepare_data_request(appctx, args); else if (*args[3]) @@ -5137,11 +5500,11 @@ static int cli_parse_table_req(char **args, char *payload, struct appctx *appctx err_args: switch (ctx->action) { case STK_CLI_ACT_SHOW: - return cli_err(appctx, "Optional argument only supports \"data.<store_data_type>\" <operator> <value> and key <key>\n"); + return cli_err(appctx, "Optional argument only supports \"data.<store_data_type>\" <operator> <value> or key <key> or ptr <ptr>\n"); case STK_CLI_ACT_CLR: - return cli_err(appctx, "Required arguments: <table> \"data.<store_data_type>\" <operator> <value> or <table> key <key>\n"); + return cli_err(appctx, "Required arguments: <table> \"data.<store_data_type>\" <operator> <value> or <table> key <key> or <table> ptr <ptr>\n"); case STK_CLI_ACT_SET: - return cli_err(appctx, "Required arguments: <table> key <key> [data.<store_data_type> <value>]*\n"); + return cli_err(appctx, "Required arguments: <table> key <key> [data.<store_data_type> <value>]* or <table> ptr <ptr> [data.<store_data_type> <value>]*\n"); default: return cli_err(appctx, "Unknown action\n"); } @@ -5159,6 +5522,7 @@ static int cli_io_handler_table(struct appctx *appctx) struct ebmb_node *eb; int skip_entry; int show = ctx->action == STK_CLI_ACT_SHOW; + int shard = ctx->tree_head; /* * We have 3 possible states in ctx->state : @@ -5170,14 +5534,6 @@ static int cli_io_handler_table(struct appctx *appctx) * - STATE_DONE : nothing left to dump, the buffer may contain some * data though. */ - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) { - /* in case of abort, remove any refcount we might have set on an entry */ - if (ctx->state == STATE_DUMP) { - stksess_kill_if_expired(ctx->t, ctx->entry, 1); - } - return 1; - } chunk_reset(&trash); @@ -5192,22 +5548,30 @@ static int cli_io_handler_table(struct appctx *appctx) } if (ctx->t->size) { - if (show && !table_dump_head_to_buffer(&trash, appctx, ctx->t, ctx->target)) + if (show && !shard && !table_dump_head_to_buffer(&trash, appctx, ctx->t, ctx->target)) return 0; if (ctx->target && (strm_li(s)->bind_conf->level & ACCESS_LVL_MASK) >= ACCESS_LVL_OPER) { /* dump entries only if table explicitly requested */ - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->lock); - eb = ebmb_first(&ctx->t->keys); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); + eb = ebmb_first(&ctx->t->shards[shard].keys); if (eb) { ctx->entry = ebmb_entry(eb, struct stksess, key); HA_ATOMIC_INC(&ctx->entry->ref_cnt); ctx->state = STATE_DUMP; - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); break; } - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); + + /* we come here if we didn't find any entry in this shard */ + shard = ++ctx->tree_head; + if (shard < CONFIG_HAP_TBL_BUCKETS) + break; // try again on new shard + + /* fall through next table */ + shard = ctx->tree_head = 0; } } ctx->t = ctx->t->next; @@ -5275,7 +5639,7 @@ static int cli_io_handler_table(struct appctx *appctx) HA_RWLOCK_RDUNLOCK(STK_SESS_LOCK, &ctx->entry->lock); - HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); HA_ATOMIC_DEC(&ctx->entry->ref_cnt); eb = ebmb_next(&ctx->entry->key); @@ -5287,7 +5651,7 @@ static int cli_io_handler_table(struct appctx *appctx) else if (!skip_entry && !ctx->entry->ref_cnt) __stksess_kill(ctx->t, old); HA_ATOMIC_INC(&ctx->entry->ref_cnt); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); break; } @@ -5297,9 +5661,13 @@ static int cli_io_handler_table(struct appctx *appctx) else if (!skip_entry && !HA_ATOMIC_LOAD(&ctx->entry->ref_cnt)) __stksess_kill(ctx->t, ctx->entry); - HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->lock); + HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &ctx->t->shards[shard].sh_lock); - ctx->t = ctx->t->next; + shard = ++ctx->tree_head; + if (shard >= CONFIG_HAP_TBL_BUCKETS) { + shard = ctx->tree_head = 0; + ctx->t = ctx->t->next; + } ctx->state = STATE_NEXT; break; @@ -5481,6 +5849,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "sc_get_gpc", smp_fetch_sc_get_gpc, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_get_gpc0", smp_fetch_sc_get_gpc0, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_get_gpc1", smp_fetch_sc_get_gpc1, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN }, + { "sc_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc_glitch_rate", smp_fetch_sc_glitch_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_gpc_rate", smp_fetch_sc_gpc_rate, ARG3(2,SINT,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG2(1,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, @@ -5509,6 +5879,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "sc0_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc0_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc0_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc0_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, @@ -5536,6 +5908,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "sc1_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc1_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc1_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc1_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, @@ -5562,6 +5936,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "sc2_get_gpt0", smp_fetch_sc_get_gpt0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc2_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "sc2_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "sc2_http_err_cnt", smp_fetch_sc_http_err_cnt, ARG1(0,TAB), NULL, SMP_T_SINT, SMP_USE_INTRN, }, @@ -5591,6 +5967,8 @@ static struct sample_fetch_kw_list smp_fetch_keywords = {ILH, { { "src_get_gpc", smp_fetch_sc_get_gpc, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_get_gpc0", smp_fetch_sc_get_gpc0, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_get_gpc1", smp_fetch_sc_get_gpc1, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, + { "src_glitch_cnt", smp_fetch_sc_glitch_cnt, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, + { "src_glitch_rate", smp_fetch_sc_glitch_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_gpc_rate", smp_fetch_sc_gpc_rate, ARG2(2,SINT,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_gpc0_rate", smp_fetch_sc_gpc0_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, { "src_gpc1_rate", smp_fetch_sc_gpc1_rate, ARG1(1,TAB), NULL, SMP_T_SINT, SMP_USE_L4CLI, }, @@ -5632,6 +6010,8 @@ static struct sample_conv_kw_list sample_conv_kws = {ILH, { { "table_gpc_rate", sample_conv_table_gpc_rate, ARG2(2,SINT,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_gpc0_rate", sample_conv_table_gpc0_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_gpc1_rate", sample_conv_table_gpc1_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, + { "table_glitch_cnt", sample_conv_table_glitch_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, + { "table_glitch_rate", sample_conv_table_glitch_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_http_err_cnt", sample_conv_table_http_err_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_http_err_rate", sample_conv_table_http_err_rate, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, { "table_http_fail_cnt", sample_conv_table_http_fail_cnt, ARG1(1,TAB), NULL, SMP_T_ANY, SMP_T_SINT }, @@ -5656,3 +6036,73 @@ static struct cfg_kw_list cfg_kws = {{ },{ }}; INITCALL1(STG_REGISTER, cfg_register_keywords, &cfg_kws); + + +#if defined(USE_PROMEX) + +static int stk_promex_metric_info(unsigned int id, struct promex_metric *metric, struct ist *desc) +{ + switch (id) { + case STICKTABLE_SIZE: + *metric = (struct promex_metric){ .n = ist("size"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_MODULE_METRIC }; + *desc = ist("Stick table size."); + break; + case STICKTABLE_USED: + *metric = (struct promex_metric){ .n = ist("used"), .type = PROMEX_MT_GAUGE, .flags = PROMEX_FL_MODULE_METRIC }; + *desc = ist("Number of entries used in this stick table."); + break; + default: + return -1; + } + return 1; +} + +static void *stk_promex_start_ts(void *unused, unsigned int id) +{ + return stktables_list; +} + +static void *stk_promex_next_ts(void *unused, void *metric_ctx, unsigned int id) +{ + struct stktable *t = metric_ctx; + + return t->next; +} + +static int stk_promex_fill_ts(void *unused, void *metric_ctx, unsigned int id, struct promex_label *labels, struct field *field) +{ + struct stktable *t = metric_ctx; + + if (!t->size) + return 0; + + labels[0].name = ist("name"); + labels[0].value = ist(t->id); + labels[1].name = ist("type"); + labels[1].value = ist(stktable_types[t->type].kw); + + switch (id) { + case STICKTABLE_SIZE: + *field = mkf_u32(FN_GAUGE, t->size); + break; + case STICKTABLE_USED: + *field = mkf_u32(FN_GAUGE, t->current); + break; + default: + return -1; + } + return 1; +} + +static struct promex_module promex_sticktable_module = { + .name = IST("sticktable"), + .metric_info = stk_promex_metric_info, + .start_ts = stk_promex_start_ts, + .next_ts = stk_promex_next_ts, + .fill_ts = stk_promex_fill_ts, + .nb_metrics = STICKTABLE_TOTAL_FIELDS, +}; + +INITCALL1(STG_REGISTER, promex_register_module, &promex_sticktable_module); + +#endif diff --git a/src/stream.c b/src/stream.c index e643a6d..ed5c268 100644 --- a/src/stream.c +++ b/src/stream.c @@ -320,15 +320,13 @@ int stream_buf_available(void *arg) { struct stream *s = arg; - if (!s->req.buf.size && !sc_ep_have_ff_data(s->scb) && s->scf->flags & SC_FL_NEED_BUFF && - b_alloc(&s->req.buf)) + if (!s->req.buf.size && !sc_ep_have_ff_data(s->scb) && s->scf->flags & SC_FL_NEED_BUFF) sc_have_buff(s->scf); - else if (!s->res.buf.size && !sc_ep_have_ff_data(s->scf) && s->scb->flags & SC_FL_NEED_BUFF && - b_alloc(&s->res.buf)) + + if (!s->res.buf.size && !sc_ep_have_ff_data(s->scf) && s->scb->flags & SC_FL_NEED_BUFF) sc_have_buff(s->scb); - else - return 0; + s->flags |= SF_MAYALLOC; task_wakeup(s->task, TASK_WOKEN_RES); return 1; @@ -632,8 +630,7 @@ void stream_free(struct stream *s) } /* We may still be present in the buffer wait queue */ - if (LIST_INLIST(&s->buffer_wait.list)) - LIST_DEL_INIT(&s->buffer_wait.list); + b_dequeue(&s->buffer_wait); if (s->req.buf.size || s->res.buf.size) { int count = !!s->req.buf.size + !!s->res.buf.size; @@ -752,8 +749,12 @@ void stream_free(struct stream *s) */ static int stream_alloc_work_buffer(struct stream *s) { - if (b_alloc(&s->res.buf)) + if (b_alloc(&s->res.buf, DB_CHANNEL | ((s->flags & SF_MAYALLOC) ? DB_F_NOQUEUE : 0))) { + s->flags &= ~SF_MAYALLOC; return 1; + } + + b_requeue(DB_CHANNEL, &s->buffer_wait); return 0; } @@ -920,7 +921,7 @@ void back_establish(struct stream *s) if (!IS_HTX_STRM(s)) { /* let's allow immediate data connection in this case */ /* if the user wants to log as soon as possible, without counting * bytes from the server, then this is the right moment. */ - if (!LIST_ISEMPTY(&strm_fe(s)->logformat) && !(s->logs.logwait & LW_BYTES)) { + if (!lf_expr_isempty(&strm_fe(s)->logformat) && !(s->logs.logwait & LW_BYTES)) { /* note: no pend_pos here, session is established */ s->logs.t_close = s->logs.t_connect; /* to get a valid end date */ s->do_log(s); @@ -1736,8 +1737,8 @@ struct task *process_stream(struct task *t, void *context, unsigned int state) scb = s->scb; /* First, attempt to receive pending data from I/O layers */ - sc_conn_sync_recv(scf); - sc_conn_sync_recv(scb); + sc_sync_recv(scf); + sc_sync_recv(scb); /* Let's check if we're looping without making any progress, e.g. due * to a bogus analyser or the fact that we're ignoring a read0. The @@ -1794,25 +1795,12 @@ struct task *process_stream(struct task *t, void *context, unsigned int state) } resync_stconns: - /* below we may emit error messages so we have to ensure that we have - * our buffers properly allocated. If the allocation failed, an error is - * triggered. - * - * NOTE: An error is returned because the mechanism to queue entities - * waiting for a buffer is totally broken for now. However, this - * part must be refactored. When it will be handled, this part - * must be be reviewed too. - */ if (!stream_alloc_work_buffer(s)) { - scf->flags |= SC_FL_ERROR; - s->conn_err_type = STRM_ET_CONN_RES; - - scb->flags |= SC_FL_ERROR; - s->conn_err_type = STRM_ET_CONN_RES; - - if (!(s->flags & SF_ERR_MASK)) - s->flags |= SF_ERR_RESOURCE; - sess_set_term_flags(s); + scf->flags &= ~SC_FL_DONT_WAKE; + scb->flags &= ~SC_FL_DONT_WAKE; + /* we're stuck for now */ + t->expire = TICK_ETERNITY; + goto leave; } /* 1b: check for low-level errors reported at the stream connector. @@ -2349,7 +2337,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state) } /* Let's see if we can send the pending request now */ - sc_conn_sync_send(scb); + sc_sync_send(scb); /* * Now forward all shutdown requests between both sides of the request buffer @@ -2459,7 +2447,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state) scf_flags = (scf_flags & ~(SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)) | (scf->flags & (SC_FL_SHUT_DONE|SC_FL_SHUT_WANTED)); /* Let's see if we can send the pending response now */ - sc_conn_sync_send(scf); + sc_sync_send(scf); /* * Now forward all shutdown requests between both sides of the buffer @@ -2552,7 +2540,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state) stream_handle_timeouts(s); goto resync_stconns; } - + leave: s->pending_events &= ~(TASK_WOKEN_TIMER | TASK_WOKEN_RES); stream_release_buffers(s); @@ -2597,7 +2585,7 @@ struct task *process_stream(struct task *t, void *context, unsigned int state) } /* let's do a final log if we need it */ - if (!LIST_ISEMPTY(&sess->fe->logformat) && s->logs.logwait && + if (!lf_expr_isempty(&sess->fe->logformat) && s->logs.logwait && !(s->flags & SF_MONITOR) && (!(sess->fe->options & PR_O_NULLNOLOG) || req->total)) { /* we may need to know the position in the queue */ @@ -2847,7 +2835,7 @@ INITCALL0(STG_INIT, init_stream); * If an ID is already stored within the stream nothing happens existing unique ID is * returned. */ -struct ist stream_generate_unique_id(struct stream *strm, struct list *format) +struct ist stream_generate_unique_id(struct stream *strm, struct lf_expr *format) { if (isttest(strm->unique_id)) { return strm->unique_id; @@ -3494,9 +3482,8 @@ void strm_dump_to_buffer(struct buffer *buf, const struct stream *strm, const ch * buffer is full and it needs to be called again, otherwise non-zero. It is * designed to be called from stats_dump_strm_to_buffer() below. */ -static int stats_dump_full_strm_to_buffer(struct stconn *sc, struct stream *strm) +static int stats_dump_full_strm_to_buffer(struct appctx *appctx, struct stream *strm) { - struct appctx *appctx = __sc_appctx(sc); struct show_sess_ctx *ctx = appctx->svcctx; chunk_reset(&trash); @@ -3588,7 +3575,6 @@ static int cli_parse_show_sess(char **args, char *payload, struct appctx *appctx static int cli_io_handler_dump_sess(struct appctx *appctx) { struct show_sess_ctx *ctx = appctx->svcctx; - struct stconn *sc = appctx_sc(appctx); struct connection *conn; thread_isolate(); @@ -3598,18 +3584,6 @@ static int cli_io_handler_dump_sess(struct appctx *appctx) goto done; } - /* FIXME: Don't watch the other side !*/ - if (unlikely(sc_opposite(sc)->flags & SC_FL_SHUT_DONE)) { - /* If we're forced to shut down, we might have to remove our - * reference to the last stream being dumped. - */ - if (!LIST_ISEMPTY(&ctx->bref.users)) { - LIST_DELETE(&ctx->bref.users); - LIST_INIT(&ctx->bref.users); - } - goto done; - } - chunk_reset(&trash); /* first, let's detach the back-ref from a possible previous stream */ @@ -3666,7 +3640,7 @@ static int cli_io_handler_dump_sess(struct appctx *appctx) LIST_APPEND(&curr_strm->back_refs, &ctx->bref.users); /* call the proper dump() function and return if we're missing space */ - if (!stats_dump_full_strm_to_buffer(sc, curr_strm)) + if (!stats_dump_full_strm_to_buffer(appctx, curr_strm)) goto full; /* stream dump complete */ @@ -4036,6 +4010,19 @@ static int smp_fetch_id32(const struct arg *args, struct sample *smp, const char return 1; } +static int smp_fetch_redispatched(const struct arg *args, struct sample *smp, const char *km, void *private) +{ + smp->flags = SMP_F_VOL_TXN; + smp->data.type = SMP_T_BOOL; + if (!smp->strm) + return 0; + + if (!sc_state_in(smp->strm->scb->state, SC_SB_DIS|SC_SB_CLO)) + smp->flags |= SMP_F_VOL_TEST; + smp->data.u.sint = !!(smp->strm->flags & SF_REDISP); + return 1; +} + /* Note: must not be declared <const> as its list will be overwritten. * Please take care of keeping this list alphabetically sorted. */ @@ -4047,6 +4034,7 @@ static struct sample_fetch_kw_list smp_kws = {ILH, { { "last_rule_line", smp_fetch_last_rule_line, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, }, { "txn.conn_retries", smp_fetch_conn_retries, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV, }, { "txn.id32", smp_fetch_id32, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, }, + { "txn.redispatched", smp_fetch_redispatched, 0, NULL, SMP_T_BOOL, SMP_USE_L4SRV, }, { "txn.sess_term_state",smp_fetch_sess_term_state, 0, NULL, SMP_T_STR, SMP_USE_INTRN, }, { NULL, NULL, 0, 0, 0 }, }}; diff --git a/src/systemd.c b/src/systemd.c new file mode 100644 index 0000000..fb36dd9 --- /dev/null +++ b/src/systemd.c @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: MIT-0 */ + +/* Implement the systemd notify protocol without external dependencies. + * Supports both readiness notification on startup and on reloading, + * according to the protocol defined at: + * https://www.freedesktop.org/software/systemd/man/latest/sd_notify.html + * This protocol is guaranteed to be stable as per: + * https://systemd.io/PORTABILITY_AND_STABILITY/ + * + */ + +#include <errno.h> +#include <inttypes.h> +#include <signal.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <time.h> +#include <unistd.h> +#include <stdarg.h> + +#include <haproxy/tools.h> + +/* + * standalone reimplementation of sd_notify from the libsystemd + * Return: + * -errno in case of error + * 0 when ignored + * >0 when succeeded + * + * Will send <message> over the NOTIFY_SOCKET. + * When unset_environement is set, unsetenv NOTIFY_SOCKET. + */ +int sd_notify(int unset_environment, const char *message) +{ + union sockaddr_union { + struct sockaddr sa; + struct sockaddr_un sun; + } socket_addr = { + .sun.sun_family = AF_UNIX, + }; + int ret = 1; + int fd = -1; + size_t path_length, message_length; + const char *socket_path; + ssize_t written; + + socket_path = getenv("NOTIFY_SOCKET"); + if (!socket_path) { + ret = 0; /* Not running under systemd? Nothing to do */ + goto end; + } + + if (unset_environment) + unsetenv("NOTIFY_SOCKET"); + + if (!message) { + ret = -EINVAL; + goto end; + } + + message_length = strlen(message); + if (message_length == 0) { + ret = -EINVAL; + goto end; + } + + /* Only AF_UNIX is supported, with path or abstract sockets */ + if (socket_path[0] != '/' && socket_path[0] != '@') { + ret = -EAFNOSUPPORT; + goto end; + } + + path_length = strlen(socket_path); + /* Ensure there is room for NUL byte */ + if (path_length >= sizeof(socket_addr.sun.sun_path)) { + ret = -E2BIG; + goto end; + } + + memcpy(socket_addr.sun.sun_path, socket_path, path_length); + + /* Support for abstract socket */ + if (socket_addr.sun.sun_path[0] == '@') + socket_addr.sun.sun_path[0] = 0; + + fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0); + if (fd < 0) { + ret = -errno; + goto end; + } + + if (connect(fd, &socket_addr.sa, offsetof(struct sockaddr_un, sun_path) + path_length) != 0) { + ret = -errno; + goto end; + } + + written = write(fd, message, message_length); + if (written != (ssize_t) message_length) { + ret = written < 0 ? -errno : -EPROTO; + goto end; + } + +end: + if (fd > -1) + close(fd); + return ret; /* Notified! */ +} + +/* va_args variant of sd_notify */ +int sd_notifyf(int unset_environment, const char *format, ...) +{ + int r; + va_list args; + char *strp = NULL; + + va_start(args, format); + strp = memvprintf(&strp, format, args); + va_end(args); + + if (strp == NULL) { + r = -ENOMEM; + goto end; + } + + r = sd_notify(unset_environment, strp); + free(strp); +end: + return r; +} + diff --git a/src/tcp_act.c b/src/tcp_act.c index 8b44047..c9c4a5c 100644 --- a/src/tcp_act.c +++ b/src/tcp_act.c @@ -71,6 +71,29 @@ static enum act_return tcp_action_attach_srv(struct act_rule *rule, struct proxy return ACT_RET_CONT; } +/* tries to extract integer value from rule's argument: + * if expr is set, computes expr and sets the result into <value> + * else, it's already a numerical value, use it as-is. + * + * Returns 1 on success and 0 on failure. + */ +static int extract_int_from_rule(struct act_rule *rule, + struct proxy *px, struct session *sess, struct stream *s, + int *value) +{ + struct sample *smp; + + if (!rule->arg.expr_int.expr) { + *value = rule->arg.expr_int.value; + return 1; + } + smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr_int.expr, SMP_T_SINT); + if (!smp) + return 0; + *value = smp->data.u.sint; + return 1; +} + /* * Execute the "set-src" action. May be called from {tcp,http}request. * It only changes the address and tries to preserve the original port. If the @@ -389,19 +412,57 @@ static enum act_return tcp_exec_action_silent_drop(struct act_rule *rule, struct #if defined(SO_MARK) || defined(SO_USER_COOKIE) || defined(SO_RTABLE) -static enum act_return tcp_action_set_mark(struct act_rule *rule, struct proxy *px, - struct session *sess, struct stream *s, int flags) +static enum act_return tcp_action_set_fc_mark(struct act_rule *rule, struct proxy *px, + struct session *sess, struct stream *s, int flags) +{ + unsigned int mark; + + if (extract_int_from_rule(rule, px, sess, s, (int *)&mark)) + conn_set_mark(objt_conn(sess->origin), mark); + return ACT_RET_CONT; +} +static enum act_return tcp_action_set_bc_mark(struct act_rule *rule, struct proxy *px, + struct session *sess, struct stream *s, int flags) { - conn_set_mark(objt_conn(sess->origin), (uintptr_t)rule->arg.act.p[0]); + struct connection __maybe_unused *conn = (s && s->scb) ? sc_conn(s->scb) : NULL; + unsigned int mark; + + BUG_ON(!s || conn); + if (extract_int_from_rule(rule, px, sess, s, (int *)&mark)) { + /* connection does not exist yet, ensure it will be applied + * before connection is used by saving it within the stream + */ + s->bc_mark = mark; + s->flags |= SF_BC_MARK; + } return ACT_RET_CONT; } #endif #ifdef IP_TOS -static enum act_return tcp_action_set_tos(struct act_rule *rule, struct proxy *px, - struct session *sess, struct stream *s, int flags) +static enum act_return tcp_action_set_fc_tos(struct act_rule *rule, struct proxy *px, + struct session *sess, struct stream *s, int flags) +{ + int tos; + + if (extract_int_from_rule(rule, px, sess, s, &tos)) + conn_set_tos(objt_conn(sess->origin), tos); + return ACT_RET_CONT; +} +static enum act_return tcp_action_set_bc_tos(struct act_rule *rule, struct proxy *px, + struct session *sess, struct stream *s, int flags) { - conn_set_tos(objt_conn(sess->origin), (uintptr_t)rule->arg.act.p[0]); + struct connection __maybe_unused *conn = (s && s->scb) ? sc_conn(s->scb) : NULL; + int tos; + + BUG_ON(!s || conn); + if (extract_int_from_rule(rule, px, sess, s, &tos)) { + /* connection does not exist yet, ensure it will be applied + * before connection is used by saving it within the stream + */ + s->bc_tos = tos; + s->flags |= SF_BC_TOS; + } return ACT_RET_CONT; } #endif @@ -423,6 +484,14 @@ static void release_set_src_dst_action(struct act_rule *rule) release_sample_expr(rule->arg.expr); } +/* + * Release expr_int rule argument when action is no longer used + */ +static __maybe_unused void release_expr_int_action(struct act_rule *rule) +{ + release_sample_expr(rule->arg.expr_int.expr); +} + static int tcp_check_attach_srv(struct act_rule *rule, struct proxy *px, char **err) { struct proxy *be = NULL; @@ -451,10 +520,16 @@ static int tcp_check_attach_srv(struct act_rule *rule, struct proxy *px, char ** return 0; } - if ((rule->arg.attach_srv.name && (!srv->use_ssl || !srv->sni_expr)) || - (!rule->arg.attach_srv.name && srv->use_ssl && srv->sni_expr)) { - memprintf(err, "attach-srv rule: connection will never be used; either specify name argument in conjunction with defined SSL SNI on targeted server or none of these"); - return 0; + if (rule->arg.attach_srv.name) { + if (!srv->pool_conn_name) { + memprintf(err, "attach-srv rule has a name argument while server '%s/%s' does not use pool-conn-name; either reconfigure the server or remove the name argument from this attach-srv rule", ist0(be_name), ist0(sv_name)); + return 0; + } + } else { + if (srv->pool_conn_name) { + memprintf(err, "attach-srv rule has no name argument while server '%s/%s' uses pool-conn-name; either add a name argument to the attach-srv rule or reconfigure the server", ist0(be_name), ist0(sv_name)); + return 0; + } } rule->arg.attach_srv.srv = srv; @@ -565,29 +640,56 @@ static enum act_parse_ret tcp_parse_set_src_dst(const char **args, int *orig_arg /* Parse a "set-mark" action. It takes the MARK value as argument. It returns * ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error. */ -static enum act_parse_ret tcp_parse_set_mark(const char **args, int *cur_arg, struct proxy *px, - struct act_rule *rule, char **err) +static enum act_parse_ret tcp_parse_set_mark(const char **args, int *orig_arg, struct proxy *px, + struct act_rule *rule, char **err) { #if defined(SO_MARK) || defined(SO_USER_COOKIE) || defined(SO_RTABLE) + struct sample_expr *expr; char *endp; - unsigned int mark; + unsigned int where; + int cur_arg = *orig_arg; - if (!*args[*cur_arg]) { - memprintf(err, "expects exactly 1 argument (integer/hex value)"); + if (!*args[*orig_arg]) { + memprintf(err, "expects an argument"); return ACT_RET_PRS_ERR; } - mark = strtoul(args[*cur_arg], &endp, 0); - if (endp && *endp != '\0') { - memprintf(err, "invalid character starting at '%s' (integer/hex value expected)", endp); - return ACT_RET_PRS_ERR; + + /* value may be either an unsigned integer or an expression */ + rule->arg.expr_int.expr = NULL; + rule->arg.expr_int.value = strtoul(args[*orig_arg], &endp, 0); + if (*endp == '\0') { + /* valid unsigned integer */ + (*orig_arg)++; } + else { + /* invalid unsigned integer, fallback to expr */ + expr = sample_parse_expr((char **)args, orig_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args, NULL); + if (!expr) + return ACT_RET_PRS_ERR; - (*cur_arg)++; + where = 0; + if (px->cap & PR_CAP_FE) + where |= SMP_VAL_FE_HRQ_HDR; + if (px->cap & PR_CAP_BE) + where |= SMP_VAL_BE_HRQ_HDR; + + if (!(expr->fetch->val & where)) { + memprintf(err, + "fetch method '%s' extracts information from '%s', none of which is available here", + args[cur_arg-1], sample_src_names(expr->fetch->use)); + free(expr); + return ACT_RET_PRS_ERR; + } + rule->arg.expr_int.expr = expr; + } /* Register processing function. */ - rule->action_ptr = tcp_action_set_mark; + if (strcmp("set-bc-mark", args[cur_arg - 1]) == 0) + rule->action_ptr = tcp_action_set_bc_mark; + else + rule->action_ptr = tcp_action_set_fc_mark; // fc mark rule->action = ACT_CUSTOM; - rule->arg.act.p[0] = (void *)(uintptr_t)mark; + rule->release_ptr = release_expr_int_action; global.last_checks |= LSTCHK_NETADM; return ACT_RET_PRS_OK; #else @@ -600,29 +702,56 @@ static enum act_parse_ret tcp_parse_set_mark(const char **args, int *cur_arg, st /* Parse a "set-tos" action. It takes the TOS value as argument. It returns * ACT_RET_PRS_OK on success, ACT_RET_PRS_ERR on error. */ -static enum act_parse_ret tcp_parse_set_tos(const char **args, int *cur_arg, struct proxy *px, - struct act_rule *rule, char **err) +static enum act_parse_ret tcp_parse_set_tos(const char **args, int *orig_arg, struct proxy *px, + struct act_rule *rule, char **err) { #ifdef IP_TOS + struct sample_expr *expr; char *endp; - int tos; + unsigned int where; + int cur_arg = *orig_arg; - if (!*args[*cur_arg]) { - memprintf(err, "expects exactly 1 argument (integer/hex value)"); + if (!*args[*orig_arg]) { + memprintf(err, "expects an argument"); return ACT_RET_PRS_ERR; } - tos = strtol(args[*cur_arg], &endp, 0); - if (endp && *endp != '\0') { - memprintf(err, "invalid character starting at '%s' (integer/hex value expected)", endp); - return ACT_RET_PRS_ERR; + + /* value may be either an integer or an expression */ + rule->arg.expr_int.expr = NULL; + rule->arg.expr_int.value = strtol(args[*orig_arg], &endp, 0); + if (*endp == '\0') { + /* valid integer */ + (*orig_arg)++; } + else { + /* invalid unsigned integer, fallback to expr */ + expr = sample_parse_expr((char **)args, orig_arg, px->conf.args.file, px->conf.args.line, err, &px->conf.args, NULL); + if (!expr) + return ACT_RET_PRS_ERR; - (*cur_arg)++; + where = 0; + if (px->cap & PR_CAP_FE) + where |= SMP_VAL_FE_HRQ_HDR; + if (px->cap & PR_CAP_BE) + where |= SMP_VAL_BE_HRQ_HDR; + + if (!(expr->fetch->val & where)) { + memprintf(err, + "fetch method '%s' extracts information from '%s', none of which is available here", + args[cur_arg-1], sample_src_names(expr->fetch->use)); + free(expr); + return ACT_RET_PRS_ERR; + } + rule->arg.expr_int.expr = expr; + } /* Register processing function. */ - rule->action_ptr = tcp_action_set_tos; + if (strcmp("set-bc-tos", args[cur_arg - 1]) == 0) + rule->action_ptr = tcp_action_set_bc_tos; + else + rule->action_ptr = tcp_action_set_fc_tos; // fc tos rule->action = ACT_CUSTOM; - rule->arg.act.p[0] = (void *)(uintptr_t)tos; + rule->release_ptr = release_expr_int_action; return ACT_RET_PRS_OK; #else memprintf(err, "not supported on this platform (IP_TOS undefined)"); @@ -672,10 +801,12 @@ static enum act_parse_ret tcp_parse_silent_drop(const char **args, int *cur_arg, static struct action_kw_list tcp_req_conn_actions = {ILH, { { "set-dst" , tcp_parse_set_src_dst }, { "set-dst-port", tcp_parse_set_src_dst }, - { "set-mark", tcp_parse_set_mark }, + { "set-fc-mark", tcp_parse_set_mark }, + { "set-fc-tos", tcp_parse_set_tos }, + { "set-mark", tcp_parse_set_mark }, // DEPRECATED, see set-fc-mark { "set-src", tcp_parse_set_src_dst }, { "set-src-port", tcp_parse_set_src_dst }, - { "set-tos", tcp_parse_set_tos }, + { "set-tos", tcp_parse_set_tos }, // DEPRECATED, see set-fc-tos { "silent-drop", tcp_parse_silent_drop }, { /* END */ } }}; @@ -686,10 +817,12 @@ static struct action_kw_list tcp_req_sess_actions = {ILH, { { "attach-srv" , tcp_parse_attach_srv }, { "set-dst" , tcp_parse_set_src_dst }, { "set-dst-port", tcp_parse_set_src_dst }, - { "set-mark", tcp_parse_set_mark }, + { "set-fc-mark", tcp_parse_set_mark }, + { "set-fc-tos", tcp_parse_set_tos }, + { "set-mark", tcp_parse_set_mark }, // DEPRECATED, see set-fc-mark { "set-src", tcp_parse_set_src_dst }, { "set-src-port", tcp_parse_set_src_dst }, - { "set-tos", tcp_parse_set_tos }, + { "set-tos", tcp_parse_set_tos }, // DEPRECATED, see set-fc-tos { "silent-drop", tcp_parse_silent_drop }, { /* END */ } }}; @@ -697,12 +830,16 @@ static struct action_kw_list tcp_req_sess_actions = {ILH, { INITCALL1(STG_REGISTER, tcp_req_sess_keywords_register, &tcp_req_sess_actions); static struct action_kw_list tcp_req_cont_actions = {ILH, { - { "set-src", tcp_parse_set_src_dst }, - { "set-src-port", tcp_parse_set_src_dst }, + { "set-bc-mark", tcp_parse_set_mark }, + { "set-bc-tos", tcp_parse_set_tos }, { "set-dst" , tcp_parse_set_src_dst }, { "set-dst-port", tcp_parse_set_src_dst }, - { "set-mark", tcp_parse_set_mark }, - { "set-tos", tcp_parse_set_tos }, + { "set-fc-mark", tcp_parse_set_mark }, + { "set-fc-tos", tcp_parse_set_tos }, + { "set-mark", tcp_parse_set_mark }, // DEPRECATED, see set-fc-mark + { "set-src", tcp_parse_set_src_dst }, + { "set-src-port", tcp_parse_set_src_dst }, + { "set-tos", tcp_parse_set_tos }, // DEPRECATED, see set-fc-tos { "silent-drop", tcp_parse_silent_drop }, { /* END */ } }}; @@ -710,8 +847,10 @@ static struct action_kw_list tcp_req_cont_actions = {ILH, { INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_req_cont_actions); static struct action_kw_list tcp_res_cont_actions = {ILH, { - { "set-mark", tcp_parse_set_mark }, - { "set-tos", tcp_parse_set_tos }, + { "set-fc-mark", tcp_parse_set_mark }, + { "set-fc-tos", tcp_parse_set_tos }, + { "set-mark", tcp_parse_set_mark }, // DEPRECATED, see set-fc-mark + { "set-tos", tcp_parse_set_tos }, // DEPRECATED, see set-fc-tos { "silent-drop", tcp_parse_silent_drop }, { /* END */ } }}; @@ -719,12 +858,16 @@ static struct action_kw_list tcp_res_cont_actions = {ILH, { INITCALL1(STG_REGISTER, tcp_res_cont_keywords_register, &tcp_res_cont_actions); static struct action_kw_list http_req_actions = {ILH, { + { "set-bc-mark", tcp_parse_set_mark }, + { "set-bc-tos", tcp_parse_set_tos }, { "set-dst", tcp_parse_set_src_dst }, { "set-dst-port", tcp_parse_set_src_dst }, - { "set-mark", tcp_parse_set_mark }, + { "set-fc-mark", tcp_parse_set_mark }, + { "set-fc-tos", tcp_parse_set_tos }, + { "set-mark", tcp_parse_set_mark }, // DEPRECATED, see set-fc-mark { "set-src", tcp_parse_set_src_dst }, { "set-src-port", tcp_parse_set_src_dst }, - { "set-tos", tcp_parse_set_tos }, + { "set-tos", tcp_parse_set_tos }, // DEPRECATED, see set-fc-tos { "silent-drop", tcp_parse_silent_drop }, { /* END */ } }}; @@ -732,8 +875,10 @@ static struct action_kw_list http_req_actions = {ILH, { INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_actions); static struct action_kw_list http_res_actions = {ILH, { - { "set-mark", tcp_parse_set_mark }, - { "set-tos", tcp_parse_set_tos }, + { "set-fc-mark", tcp_parse_set_mark }, + { "set-fc-tos", tcp_parse_set_tos }, + { "set-mark", tcp_parse_set_mark }, // DEPRECATED, see set-fc-mark + { "set-tos", tcp_parse_set_tos }, // DEPRECATED, see set-fc-tos { "silent-drop", tcp_parse_silent_drop }, { /* END */ } }}; diff --git a/src/tcpcheck.c b/src/tcpcheck.c index d30ecb5..b4f9590 100644 --- a/src/tcpcheck.c +++ b/src/tcpcheck.c @@ -75,26 +75,13 @@ DECLARE_POOL(pool_head_tcpcheck_rule, "tcpcheck_rule", sizeof(struct tcpcheck_ru /**************************************************************************/ /*************** Init/deinit tcp-check rules and ruleset ******************/ /**************************************************************************/ -/* Releases memory allocated for a log-format string */ -static void free_tcpcheck_fmt(struct list *fmt) -{ - struct logformat_node *lf, *lfb; - - list_for_each_entry_safe(lf, lfb, fmt, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } -} - /* Releases memory allocated for an HTTP header used in a tcp-check send rule */ void free_tcpcheck_http_hdr(struct tcpcheck_http_hdr *hdr) { if (!hdr) return; - free_tcpcheck_fmt(&hdr->value); + lf_expr_deinit(&hdr->value); istfree(&hdr->name); free(hdr); } @@ -131,28 +118,28 @@ void free_tcpcheck(struct tcpcheck_rule *rule, int in_pool) break; case TCPCHK_SEND_STRING_LF: case TCPCHK_SEND_BINARY_LF: - free_tcpcheck_fmt(&rule->send.fmt); + lf_expr_deinit(&rule->send.fmt); break; case TCPCHK_SEND_HTTP: free(rule->send.http.meth.str.area); if (!(rule->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT)) istfree(&rule->send.http.uri); else - free_tcpcheck_fmt(&rule->send.http.uri_fmt); + lf_expr_deinit(&rule->send.http.uri_fmt); istfree(&rule->send.http.vsn); free_tcpcheck_http_hdrs(&rule->send.http.hdrs); if (!(rule->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT)) istfree(&rule->send.http.body); else - free_tcpcheck_fmt(&rule->send.http.body_fmt); + lf_expr_deinit(&rule->send.http.body_fmt); break; case TCPCHK_SEND_UNDEF: break; } break; case TCPCHK_ACT_EXPECT: - free_tcpcheck_fmt(&rule->expect.onerror_fmt); - free_tcpcheck_fmt(&rule->expect.onsuccess_fmt); + lf_expr_deinit(&rule->expect.onerror_fmt); + lf_expr_deinit(&rule->expect.onsuccess_fmt); release_sample_expr(rule->expect.status_expr); switch (rule->expect.type) { case TCPCHK_EXPECT_HTTP_STATUS: @@ -172,20 +159,20 @@ void free_tcpcheck(struct tcpcheck_rule *rule, int in_pool) case TCPCHK_EXPECT_STRING_LF: case TCPCHK_EXPECT_BINARY_LF: case TCPCHK_EXPECT_HTTP_BODY_LF: - free_tcpcheck_fmt(&rule->expect.fmt); + lf_expr_deinit(&rule->expect.fmt); break; case TCPCHK_EXPECT_HTTP_HEADER: if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HNAME_REG) regex_free(rule->expect.hdr.name_re); else if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HNAME_FMT) - free_tcpcheck_fmt(&rule->expect.hdr.name_fmt); + lf_expr_deinit(&rule->expect.hdr.name_fmt); else istfree(&rule->expect.hdr.name); if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_REG) regex_free(rule->expect.hdr.value_re); else if (rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_FMT) - free_tcpcheck_fmt(&rule->expect.hdr.value_fmt); + lf_expr_deinit(&rule->expect.hdr.value_fmt); else if (!(rule->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_NONE)) istfree(&rule->expect.hdr.value); break; @@ -434,7 +421,7 @@ static void tcpcheck_expect_onerror_message(struct buffer *msg, struct check *ch chunk_istcat(msg, info); goto comment; } - else if (!LIST_ISEMPTY(&rule->expect.onerror_fmt)) { + else if (!lf_expr_isempty(&rule->expect.onerror_fmt)) { msg->data += sess_build_logline(check->sess, NULL, b_tail(msg), b_room(msg), &rule->expect.onerror_fmt); goto comment; } @@ -529,7 +516,7 @@ static void tcpcheck_expect_onsuccess_message(struct buffer *msg, struct check * */ if (istlen(info)) chunk_istcat(msg, info); - if (!LIST_ISEMPTY(&rule->expect.onsuccess_fmt)) + if (!lf_expr_isempty(&rule->expect.onsuccess_fmt)) msg->data += sess_build_logline(check->sess, NULL, b_tail(msg), b_room(msg), &rule->expect.onsuccess_fmt); else if (check->type == PR_O2_TCPCHK_CHK && @@ -1697,7 +1684,7 @@ enum tcpcheck_eval_ret tcpcheck_eval_expect_http(struct check *check, struct tcp /* Set status and description in case of error */ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7STS); - if (LIST_ISEMPTY(&expect->onerror_fmt)) + if (lf_expr_isempty(&expect->onerror_fmt)) desc = htx_sl_res_reason(sl); break; case TCPCHK_EXPECT_HTTP_STATUS_REGEX: @@ -1705,7 +1692,7 @@ enum tcpcheck_eval_ret tcpcheck_eval_expect_http(struct check *check, struct tcp /* Set status and description in case of error */ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7STS); - if (LIST_ISEMPTY(&expect->onerror_fmt)) + if (lf_expr_isempty(&expect->onerror_fmt)) desc = htx_sl_res_reason(sl); break; @@ -1836,7 +1823,7 @@ enum tcpcheck_eval_ret tcpcheck_eval_expect_http(struct check *check, struct tcp end_of_match: status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7STS); - if (LIST_ISEMPTY(&expect->onerror_fmt)) + if (lf_expr_isempty(&expect->onerror_fmt)) desc = htx_sl_res_reason(sl); break; } @@ -1863,7 +1850,7 @@ enum tcpcheck_eval_ret tcpcheck_eval_expect_http(struct check *check, struct tcp goto wait_more_data; } status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7RSP); - if (LIST_ISEMPTY(&expect->onerror_fmt)) + if (lf_expr_isempty(&expect->onerror_fmt)) desc = ist("HTTP content check could not find a response body"); TRACE_ERROR("no response boduy found while expected", CHK_EV_TCPCHK_EXP|CHK_EV_TCPCHK_ERR, check); goto error; @@ -1912,7 +1899,7 @@ enum tcpcheck_eval_ret tcpcheck_eval_expect_http(struct check *check, struct tcp /* Set status and description in case of error */ status = ((status != HCHK_STATUS_UNKNOWN) ? status : HCHK_STATUS_L7RSP); - if (LIST_ISEMPTY(&expect->onerror_fmt)) + if (lf_expr_isempty(&expect->onerror_fmt)) desc = (inverse ? ist("HTTP check matched unwanted content") : ist("HTTP content check did not match")); @@ -2649,7 +2636,7 @@ struct tcpcheck_rule *parse_tcpcheck_send(char **args, int cur_arg, struct proxy } case TCPCHK_SEND_STRING_LF: case TCPCHK_SEND_BINARY_LF: - LIST_INIT(&chk->send.fmt); + lf_expr_init(&chk->send.fmt); px->conf.args.ctx = ARGC_SRV; if (!parse_logformat_string(data, px, &chk->send.fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) { memprintf(errmsg, "'%s' invalid log-format string (%s).\n", data, *errmsg); @@ -2790,7 +2777,7 @@ struct tcpcheck_rule *parse_tcpcheck_send_http(char **args, int cur_arg, struct } if (uri) { if (chk->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT) { - LIST_INIT(&chk->send.http.uri_fmt); + lf_expr_init(&chk->send.http.uri_fmt); px->conf.args.ctx = ARGC_SRV; if (!parse_logformat_string(uri, px, &chk->send.http.uri_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) { memprintf(errmsg, "'%s' invalid log-format string (%s).\n", uri, *errmsg); @@ -2818,7 +2805,7 @@ struct tcpcheck_rule *parse_tcpcheck_send_http(char **args, int cur_arg, struct memprintf(errmsg, "out of memory"); goto error; } - LIST_INIT(&hdr->value); + lf_expr_init(&hdr->value); hdr->name = istdup(hdrs[i].n); if (!isttest(hdr->name)) { memprintf(errmsg, "out of memory"); @@ -2834,7 +2821,7 @@ struct tcpcheck_rule *parse_tcpcheck_send_http(char **args, int cur_arg, struct if (body) { if (chk->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) { - LIST_INIT(&chk->send.http.body_fmt); + lf_expr_init(&chk->send.http.body_fmt); px->conf.args.ctx = ARGC_SRV; if (!parse_logformat_string(body, px, &chk->send.http.body_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) { memprintf(errmsg, "'%s' invalid log-format string (%s).\n", body, *errmsg); @@ -3301,8 +3288,8 @@ struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct pro goto error; } chk->action = TCPCHK_ACT_EXPECT; - LIST_INIT(&chk->expect.onerror_fmt); - LIST_INIT(&chk->expect.onsuccess_fmt); + lf_expr_init(&chk->expect.onerror_fmt); + lf_expr_init(&chk->expect.onsuccess_fmt); chk->comment = comment; comment = NULL; chk->expect.type = type; chk->expect.min_recv = min_recv; @@ -3395,7 +3382,7 @@ struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct pro case TCPCHK_EXPECT_STRING_LF: case TCPCHK_EXPECT_BINARY_LF: case TCPCHK_EXPECT_HTTP_BODY_LF: - LIST_INIT(&chk->expect.fmt); + lf_expr_init(&chk->expect.fmt); px->conf.args.ctx = ARGC_SRV; if (!parse_logformat_string(pattern, px, &chk->expect.fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) { memprintf(errmsg, "'%s' invalid log-format string (%s).\n", pattern, *errmsg); @@ -3415,7 +3402,7 @@ struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct pro } else if (chk->expect.flags & TCPCHK_EXPT_FL_HTTP_HNAME_FMT) { px->conf.args.ctx = ARGC_SRV; - LIST_INIT(&chk->expect.hdr.name_fmt); + lf_expr_init(&chk->expect.hdr.name_fmt); if (!parse_logformat_string(npat, px, &chk->expect.hdr.name_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) { memprintf(errmsg, "'%s' invalid log-format string (%s).\n", npat, *errmsg); goto error; @@ -3445,7 +3432,7 @@ struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct pro } else if (chk->expect.flags & TCPCHK_EXPT_FL_HTTP_HVAL_FMT) { px->conf.args.ctx = ARGC_SRV; - LIST_INIT(&chk->expect.hdr.value_fmt); + lf_expr_init(&chk->expect.hdr.value_fmt); if (!parse_logformat_string(vpat, px, &chk->expect.hdr.value_fmt, 0, SMP_VAL_BE_CHK_RUL, errmsg)) { memprintf(errmsg, "'%s' invalid log-format string (%s).\n", npat, *errmsg); goto error; @@ -3497,7 +3484,6 @@ struct tcpcheck_rule *parse_tcpcheck_expect(char **args, int cur_arg, struct pro */ void tcpcheck_overwrite_send_http_rule(struct tcpcheck_rule *old, struct tcpcheck_rule *new) { - struct logformat_node *lf, *lfb; struct tcpcheck_http_hdr *hdr, *bhdr; @@ -3513,22 +3499,19 @@ void tcpcheck_overwrite_send_http_rule(struct tcpcheck_rule *old, struct tcpchec if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT)) istfree(&old->send.http.uri); else - free_tcpcheck_fmt(&old->send.http.uri_fmt); + lf_expr_deinit(&old->send.http.uri_fmt); old->send.http.flags &= ~TCPCHK_SND_HTTP_FL_URI_FMT; old->send.http.uri = new->send.http.uri; new->send.http.uri = IST_NULL; } - else if ((new->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT) && !LIST_ISEMPTY(&new->send.http.uri_fmt)) { + else if ((new->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT) && !lf_expr_isempty(&new->send.http.uri_fmt)) { if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_URI_FMT)) istfree(&old->send.http.uri); else - free_tcpcheck_fmt(&old->send.http.uri_fmt); + lf_expr_deinit(&old->send.http.uri_fmt); old->send.http.flags |= TCPCHK_SND_HTTP_FL_URI_FMT; - LIST_INIT(&old->send.http.uri_fmt); - list_for_each_entry_safe(lf, lfb, &new->send.http.uri_fmt, list) { - LIST_DELETE(&lf->list); - LIST_APPEND(&old->send.http.uri_fmt, &lf->list); - } + lf_expr_init(&old->send.http.uri_fmt); + lf_expr_xfer(&new->send.http.uri_fmt, &old->send.http.uri_fmt); } if (isttest(new->send.http.vsn)) { @@ -3549,22 +3532,19 @@ void tcpcheck_overwrite_send_http_rule(struct tcpcheck_rule *old, struct tcpchec if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT)) istfree(&old->send.http.body); else - free_tcpcheck_fmt(&old->send.http.body_fmt); + lf_expr_deinit(&old->send.http.body_fmt); old->send.http.flags &= ~TCPCHK_SND_HTTP_FL_BODY_FMT; old->send.http.body = new->send.http.body; new->send.http.body = IST_NULL; } - else if ((new->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) && !LIST_ISEMPTY(&new->send.http.body_fmt)) { + else if ((new->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT) && !lf_expr_isempty(&new->send.http.body_fmt)) { if (!(old->send.http.flags & TCPCHK_SND_HTTP_FL_BODY_FMT)) istfree(&old->send.http.body); else - free_tcpcheck_fmt(&old->send.http.body_fmt); + lf_expr_deinit(&old->send.http.body_fmt); old->send.http.flags |= TCPCHK_SND_HTTP_FL_BODY_FMT; - LIST_INIT(&old->send.http.body_fmt); - list_for_each_entry_safe(lf, lfb, &new->send.http.body_fmt, list) { - LIST_DELETE(&lf->list); - LIST_APPEND(&old->send.http.body_fmt, &lf->list); - } + lf_expr_init(&old->send.http.body_fmt); + lf_expr_xfer(&new->send.http.body_fmt, &old->send.http.body_fmt); } } @@ -3815,8 +3795,8 @@ int add_tcpcheck_expect_str(struct tcpcheck_rules *rules, const char *str) expect = &tcpcheck->expect; expect->type = TCPCHK_EXPECT_STRING; - LIST_INIT(&expect->onerror_fmt); - LIST_INIT(&expect->onsuccess_fmt); + lf_expr_init(&expect->onerror_fmt); + lf_expr_init(&expect->onsuccess_fmt); expect->ok_status = HCHK_STATUS_L7OKD; expect->err_status = HCHK_STATUS_L7RSP; expect->tout_status = HCHK_STATUS_L7TOUT; @@ -3877,9 +3857,9 @@ int add_tcpcheck_send_strs(struct tcpcheck_rules *rules, const char * const *str } /* Parses the "tcp-check" proxy keyword */ -static int proxy_parse_tcpcheck(char **args, int section, struct proxy *curpx, - const struct proxy *defpx, const char *file, int line, - char **errmsg) +int proxy_parse_tcpcheck(char **args, int section, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **errmsg) { struct tcpcheck_ruleset *rs = NULL; struct tcpcheck_rule *chk = NULL; diff --git a/src/thread.c b/src/thread.c index ab4342d..655e199 100644 --- a/src/thread.c +++ b/src/thread.c @@ -1709,6 +1709,35 @@ static int cfg_parse_nbthread(char **args, int section_type, struct proxy *curpx return 0; } +/* Parse the "thread-hard-limit" global directive, which takes an integer + * argument that contains the desired maximum number of threads that will + * not be crossed. + */ +static int cfg_parse_thread_hard_limit(char **args, int section_type, struct proxy *curpx, + const struct proxy *defpx, const char *file, int line, + char **err) +{ + long nbthread; + char *errptr; + + if (too_many_args(1, args, err, NULL)) + return -1; + + nbthread = strtol(args[1], &errptr, 10); + if (!*args[1] || *errptr) { + memprintf(err, "'%s' passed a missing or unparsable integer value in '%s'", args[0], args[1]); + return -1; + } + + if (nbthread < 1 || nbthread > MAX_THREADS) { + memprintf(err, "'%s' value must be at least 1 (was %ld)", args[0], nbthread); + return -1; + } + + global.thread_limit = nbthread; + return 0; +} + /* Parse the "thread-group" global directive, which takes an integer argument * that designates a thread group, and a list of threads to put into that group. */ @@ -1855,6 +1884,7 @@ static int cfg_parse_thread_groups(char **args, int section_type, struct proxy * /* config keyword parsers */ static struct cfg_kw_list cfg_kws = {ILH, { + { CFG_GLOBAL, "thread-hard-limit", cfg_parse_thread_hard_limit, 0 }, { CFG_GLOBAL, "nbthread", cfg_parse_nbthread, 0 }, { CFG_GLOBAL, "thread-group", cfg_parse_thread_group, 0 }, { CFG_GLOBAL, "thread-groups", cfg_parse_thread_groups, 0 }, diff --git a/src/tools.c b/src/tools.c index e1ba241..7608e7e 100644 --- a/src/tools.c +++ b/src/tools.c @@ -17,9 +17,14 @@ #endif #if defined(__FreeBSD__) +#include <sys/param.h> +#if __FreeBSD_version < 1300058 #include <elf.h> #include <dlfcn.h> extern void *__elf_aux_vector; +#else +#include <sys/auxv.h> +#endif #endif #if defined(__NetBSD__) @@ -36,6 +41,7 @@ extern void *__elf_aux_vector; #include <string.h> #include <time.h> #include <unistd.h> +#include <sys/mman.h> #include <sys/socket.h> #include <sys/stat.h> #include <sys/types.h> @@ -47,6 +53,10 @@ extern void *__elf_aux_vector; #include <sys/auxv.h> #endif +#if defined(USE_PRCTL) +#include <sys/prctl.h> +#endif + #include <import/eb32sctree.h> #include <import/eb32tree.h> #include <import/ebmbtree.h> @@ -1964,11 +1974,11 @@ int addr_is_local(const struct netns_entry *ns, * <map> with the hexadecimal representation of their ASCII-code (2 digits) * prefixed by <escape>, and will store the result between <start> (included) * and <stop> (excluded), and will always terminate the string with a '\0' - * before <stop>. The position of the '\0' is returned if the conversion - * completes. If bytes are missing between <start> and <stop>, then the - * conversion will be incomplete and truncated. If <stop> <= <start>, the '\0' - * cannot even be stored so we return <start> without writing the 0. + * before <stop>. If bytes are missing between <start> and <stop>, then the + * conversion will be incomplete and truncated. * The input string must also be zero-terminated. + * + * Return the address of the \0 character, or NULL on error */ const char hextab[16] = "0123456789ABCDEF"; char *encode_string(char *start, char *stop, @@ -1990,8 +2000,9 @@ char *encode_string(char *start, char *stop, string++; } *start = '\0'; + return start; } - return start; + return NULL; } /* @@ -2020,8 +2031,9 @@ char *encode_chunk(char *start, char *stop, str++; } *start = '\0'; + return start; } - return start; + return NULL; } /* @@ -2030,8 +2042,9 @@ char *encode_chunk(char *start, char *stop, * is reached or NULL-byte is encountered. The result will * be stored between <start> (included) and <stop> (excluded). This * function will always try to terminate the resulting string with a '\0' - * before <stop>, and will return its position if the conversion - * completes. + * before <stop>. + * + * Return the address of the \0 character, or NULL on error */ char *escape_string(char *start, char *stop, const char escape, const long *map, @@ -2051,10 +2064,169 @@ char *escape_string(char *start, char *stop, string++; } *start = '\0'; + return start; + } + return NULL; +} + +/* CBOR helper to encode an uint64 value with prefix (3bits MAJOR type) + * according to RFC8949 + * + * CBOR encode ctx is provided in <ctx> + * + * Returns the position of the last written byte on success and NULL on + * error. The function cannot write past <stop> + */ +char *cbor_encode_uint64_prefix(struct cbor_encode_ctx *ctx, + char *start, char *stop, uint64_t value, + uint8_t prefix) +{ + int nb_bytes = 0; + + /* + * For encoding logic, see: + * https://www.rfc-editor.org/rfc/rfc8949.html#name-specification-of-the-cbor-e + */ + if (value < 24) { + /* argument is the value itself */ + prefix |= value; + } + else { + if (value <= 0xFFU) { + /* 1-byte */ + nb_bytes = 1; + prefix |= 24; // 0x18 + } + else if (value <= 0xFFFFU) { + /* 2 bytes */ + nb_bytes = 2; + prefix |= 25; // 0x19 + } + else if (value <= 0xFFFFFFFFU) { + /* 4 bytes */ + nb_bytes = 4; + prefix |= 26; // 0x1A + } + else { + /* 8 bytes */ + nb_bytes = 8; + prefix |= 27; // 0x1B + } + } + + start = ctx->e_fct_byte(ctx, start, stop, prefix); + if (start == NULL) + return NULL; + + /* encode 1 byte at a time from higher bits to lower bits */ + while (nb_bytes) { + uint8_t cur_byte = (value >> ((nb_bytes - 1) * 8)) & 0xFFU; + + start = ctx->e_fct_byte(ctx, start, stop, cur_byte); + if (start == NULL) + return NULL; + + nb_bytes--; + } + + return start; +} + +/* CBOR helper to encode an int64 value according to RFC8949 + * + * CBOR encode ctx is provided in <ctx> + * + * Returns the position of the last written byte on success and NULL on + * error. The function cannot write past <stop> + */ +char *cbor_encode_int64(struct cbor_encode_ctx *ctx, + char *start, char *stop, int64_t value) +{ + uint64_t absolute_value = llabs(value); + int cbor_prefix; + + /* + * For encoding logic, see: + * https://www.rfc-editor.org/rfc/rfc8949.html#name-specification-of-the-cbor-e + */ + if (value >= 0) + cbor_prefix = 0x00; // unsigned int + else { + cbor_prefix = 0x20; // negative int + /* N-1 for negative int */ + absolute_value -= 1; + } + return cbor_encode_uint64_prefix(ctx, start, stop, + absolute_value, cbor_prefix); +} + +/* CBOR helper to encode a <prefix> string chunk according to RFC8949 + * + * if <bytes> is NULL, then only the <prefix> (with length) will be + * emitted + * + * CBOR encode ctx is provided in <ctx> + * + * Returns the position of the last written byte on success and NULL on + * error. The function cannot write past <stop> + */ +char *cbor_encode_bytes_prefix(struct cbor_encode_ctx *ctx, + char *start, char *stop, + const char *bytes, size_t len, + uint8_t prefix) +{ + + size_t it = 0; + + /* write prefix (with text length as argument) */ + start = cbor_encode_uint64_prefix(ctx, start, stop, + len, prefix); + if (start == NULL) + return NULL; + + /* write actual bytes if provided */ + while (bytes && it < len) { + start = ctx->e_fct_byte(ctx, start, stop, bytes[it]); + if (start == NULL) + return NULL; + it++; } return start; } +/* CBOR helper to encode a text chunk according to RFC8949 + * + * if <text> is NULL, then only the text prefix (with length) will be emitted + * + * CBOR encode ctx is provided in <ctx> + * + * Returns the position of the last written byte on success and NULL on + * error. The function cannot write past <stop> + */ +char *cbor_encode_text(struct cbor_encode_ctx *ctx, + char *start, char *stop, + const char *text, size_t len) +{ + return cbor_encode_bytes_prefix(ctx, start, stop, text, len, 0x60); +} + +/* CBOR helper to encode a byte string chunk according to RFC8949 + * + * if <bytes> is NULL, then only the byte string prefix (with length) will be + * emitted + * + * CBOR encode ctx is provided in <ctx> + * + * Returns the position of the last written byte on success and NULL on + * error. The function cannot write past <stop> + */ +char *cbor_encode_bytes(struct cbor_encode_ctx *ctx, + char *start, char *stop, + const char *bytes, size_t len) +{ + return cbor_encode_bytes_prefix(ctx, start, stop, bytes, len, 0x40); +} + /* Check a string for using it in a CSV output format. If the string contains * one of the following four char <">, <,>, CR or LF, the string is * encapsulated between <"> and the <"> are escaped by a <""> sequence. @@ -4900,6 +5072,58 @@ void dump_addr_and_bytes(struct buffer *buf, const char *pfx, const void *addr, } } +/* Dumps the 64 bytes around <addr> at the end of <output> with symbols + * decoding. An optional special pointer may be recognized (special), in + * which case its type (spec_type) and name (spec_name) will be reported. + * This is convenient for pool names but could be used for list heads or + * anything in that vein. +*/ +void dump_area_with_syms(struct buffer *output, const void *base, const void *addr, + const void *special, const char *spec_type, const char *spec_name) +{ + const char *start, *end, *p; + const void *tag; + + chunk_appendf(output, "Contents around address %p+%lu=%p:\n", base, (ulong)(addr - base), addr); + + /* dump in word-sized blocks */ + start = (const void *)(((uintptr_t)addr - 32) & -sizeof(void*)); + end = (const void *)(((uintptr_t)addr + 32 + sizeof(void*) - 1) & -sizeof(void*)); + + while (start < end) { + dump_addr_and_bytes(output, " ", start, sizeof(void*)); + chunk_strcat(output, " ["); + for (p = start; p < start + sizeof(void*); p++) { + if (!may_access(p)) + chunk_strcat(output, "*"); + else if (isprint((unsigned char)*p)) + chunk_appendf(output, "%c", *p); + else + chunk_strcat(output, "."); + } + + if (may_access(start)) + tag = *(const void **)start; + else + tag = NULL; + + if (special && tag == special) { + /* the pool can often be there so let's detect it */ + chunk_appendf(output, "] [%s:%s", spec_type, spec_name); + } + else if (tag) { + /* print pointers that resolve to a symbol */ + size_t back_data = output->data; + chunk_strcat(output, "] ["); + if (!resolve_sym_name(output, NULL, tag)) + output->data = back_data; + } + + chunk_strcat(output, "]\n"); + start = p; + } +} + /* print a line of text buffer (limited to 70 bytes) to <out>. The format is : * <2 spaces> <offset=5 digits> <space or plus> <space> <70 chars max> <\n> * which is 60 chars per line. Non-printable chars \t, \n, \r and \e are @@ -5018,6 +5242,7 @@ const char *get_exec_path() if (execfn && execfn != ENOENT) ret = (const char *)execfn; #elif defined(__FreeBSD__) +#if __FreeBSD_version < 1300058 Elf_Auxinfo *auxv; for (auxv = __elf_aux_vector; auxv->a_type != AT_NULL; ++auxv) { if (auxv->a_type == AT_EXECPATH) { @@ -5025,6 +5250,14 @@ const char *get_exec_path() break; } } +#else + static char execpath[MAXPATHLEN]; + + if (execpath[0] == '\0') + elf_aux_info(AT_EXECPATH, execpath, MAXPATHLEN); + if (execpath[0] != '\0') + ret = execpath; +#endif #elif defined(__NetBSD__) AuxInfo *auxv; for (auxv = _dlauxinfo(); auxv->a_type != AT_NULL; ++auxv) { @@ -5511,10 +5744,10 @@ void ha_random_jump96(uint32_t dist) } } -/* Generates an RFC4122 UUID into chunk <output> which must be at least 37 - * bytes large. +/* Generates an RFC 9562 version 4 UUID into chunk + * <output> which must be at least 37 bytes large. */ -void ha_generate_uuid(struct buffer *output) +void ha_generate_uuid_v4(struct buffer *output) { uint32_t rnd[4]; uint64_t last; @@ -5535,6 +5768,31 @@ void ha_generate_uuid(struct buffer *output) (long long)((rnd[2] >> 14u) | ((uint64_t) rnd[3] << 18u)) & 0xFFFFFFFFFFFFull); } +/* Generates an RFC 9562 version 7 UUID into chunk + * <output> which must be at least 37 bytes large. + */ +void ha_generate_uuid_v7(struct buffer *output) +{ + uint32_t rnd[3]; + uint64_t last; + uint64_t time; + + time = (date.tv_sec * 1000) + (date.tv_usec / 1000); + last = ha_random64(); + rnd[0] = last; + rnd[1] = last >> 32; + + last = ha_random64(); + rnd[2] = last; + + chunk_printf(output, "%8.8x-%4.4x-%4.4x-%4.4x-%12.12llx", + (uint)(time >> 16u), + (uint)(time & 0xFFFF), + ((rnd[0] >> 16u) & 0xFFF) | 0x7000, // highest 4 bits indicate the uuid version + (rnd[1] & 0x3FFF) | 0x8000, // the highest 2 bits indicate the UUID variant (10), + (long long)((rnd[1] >> 14u) | ((uint64_t) rnd[2] << 18u)) & 0xFFFFFFFFFFFFull); +} + /* only used by parse_line() below. It supports writing in place provided that * <in> is updated to the next location before calling it. In that case, the @@ -6206,6 +6464,94 @@ int openssl_compare_current_name(const char *name) return 1; } +/* prctl/PR_SET_VMA wrapper to easily give a name to virtual memory areas, + * knowing their address and size. + * + * It is only intended for use with memory allocated using mmap (private or + * shared anonymous maps) or malloc (provided that <size> is at least one page + * large), which is memory that may be released using munmap(). For memory + * allocated using malloc(), no naming will be attempted if the vma is less + * than one page large, because naming is only relevant for large memory + * blocks. For instance, glibc/malloc() will directly use mmap() once + * MMAP_THRESHOLD is reached (defaults to 128K), and will try to use the + * heap as much as possible below that. + * + * <type> and <name> are mandatory + * <id> is optional, if != ~0, will be used to append an id after the name + * in order to differentiate 2 entries set using the same <type> and <name> + * + * The function does nothing if naming API is not available, and naming errors + * are ignored. + */ +void vma_set_name_id(void *addr, size_t size, const char *type, const char *name, unsigned int id) +{ + long pagesize = sysconf(_SC_PAGESIZE); + void *aligned_addr; + __maybe_unused size_t aligned_size; + + BUG_ON(!type || !name); + + /* prctl/PR_SET/VMA expects the start of an aligned memory address, but + * user may have provided address returned by malloc() which may not be + * aligned nor point to the beginning of the map + */ + aligned_addr = (void *)((uintptr_t)addr & -4096); + aligned_size = (((addr + size) - aligned_addr) + 4095) & -4096; + + if (aligned_addr != addr) { + /* provided pointer likely comes from malloc(), at least it + * doesn't come from mmap() which only returns aligned addresses + */ + if (size < pagesize) + return; + } +#if defined(USE_PRCTL) && defined(PR_SET_VMA) + { + /* + * From Linux 5.17 (and if the `CONFIG_ANON_VMA_NAME` kernel config is set)`, + * anonymous regions can be named. + * We intentionally ignore errors as it should not jeopardize the memory context + * mapping whatsoever (e.g. older kernels). + * + * The naming can take up to 79 characters, accepting valid ASCII values + * except [, ], \, $ and '. + * As a result, when looking for /proc/<pid>/maps, we can see the anonymous range + * as follow : + * `7364c4fff000-736508000000 rw-s 00000000 00:01 3540 [anon_shmem:scope:name{-id}]` + * (MAP_SHARED) + * `7364c4fff000-736508000000 rw-s 00000000 00:01 3540 [anon:scope:name{-id}]` + * (MAP_PRIVATE) + */ + char fullname[80]; + int rn; + + if (id != ~0) + rn = snprintf(fullname, sizeof(fullname), "%s:%s-%u", type, name, id); + else + rn = snprintf(fullname, sizeof(fullname), "%s:%s", type, name); + + if (rn >= 0) { + /* Give a name to the map by setting PR_SET_VMA_ANON_NAME attribute + * using prctl/PR_SET_VMA combination. + * + * note from 'man prctl': + * assigning an attribute to a virtual memory area might prevent it + * from being merged with adjacent virtual memory areas due to the + * difference in that attribute's value. + */ + (void)prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, + aligned_addr, aligned_size, fullname); + } + } +#endif +} + +/* wrapper for vma_set_name_id() but without id */ +void vma_set_name(void *addr, size_t size, const char *type, const char *name) +{ + vma_set_name_id(addr, size, type, name, ~0); +} + #if defined(RTLD_DEFAULT) || defined(RTLD_NEXT) /* redefine dlopen() so that we can detect unexpected replacement of some * critical symbols, typically init/alloc/free functions coming from alternate diff --git a/src/trace.c b/src/trace.c index a233c0d..fcf557b 100644 --- a/src/trace.c +++ b/src/trace.c @@ -129,7 +129,7 @@ int __trace_enabled(enum trace_level level, uint64_t mask, struct trace_source * if (!sess && strm) sess = strm->sess; - else if (!sess && conn && LIST_INLIST(&conn->session_list)) + else if (!sess && conn && LIST_INLIST(&conn->sess_el)) sess = conn->owner; else if (!sess && check) sess = check->sess; @@ -376,15 +376,16 @@ static int trace_source_parse_verbosity(struct trace_source *src, const struct name_desc *nd; int ret; + /* Only "quiet" is defined for all sources. Other identifiers are + * specific to trace source. + */ if (strcmp(verbosity, "quiet") == 0) { ret = 0; goto end; } - /* Only "quiet" is defined for all sources. Other identifiers are - * specific to trace source. - */ - BUG_ON(!src); + if (!src) + return -1; if (!src->decoding || !src->decoding[0].name) { if (strcmp(verbosity, "default") != 0) @@ -566,10 +567,16 @@ static int trace_parse_statement(char **args, char **msg) } else if (strcmp(args[2], "level") == 0) { const char *name = args[3]; - int level; + int level = -1; - if (!*name) { - chunk_printf(&trash, "Supported trace levels for source %s:\n", src->name.ptr); + if (*name) + level = trace_parse_level(name); + + if (level < 0) { + chunk_reset(&trash); + if (*name) + chunk_appendf(&trash, "No such trace level '%s'. ", name); + chunk_appendf(&trash, "Supported trace levels for source %s:\n", src->name.ptr); chunk_appendf(&trash, " %c error : report errors\n", src->level == TRACE_LEVEL_ERROR ? '*' : ' '); chunk_appendf(&trash, " %c user : also information useful to the end user\n", @@ -584,13 +591,7 @@ static int trace_parse_statement(char **args, char **msg) src->level == TRACE_LEVEL_DEVELOPER ? '*' : ' '); trash.area[trash.data] = 0; *msg = strdup(trash.area); - return LOG_WARNING; - } - - level = trace_parse_level(name); - if (level < 0) { - memprintf(msg, "No such trace level '%s'", name); - return LOG_ERR; + return *name ? LOG_ERR : LOG_WARNING; } HA_ATOMIC_STORE(&src->level, level); @@ -734,10 +735,16 @@ static int trace_parse_statement(char **args, char **msg) else if (strcmp(args[2], "verbosity") == 0) { const char *name = args[3]; const struct name_desc *nd; - int verbosity; + int verbosity = -1; - if (!*name) { - chunk_printf(&trash, "Supported trace verbosities for source %s:\n", src->name.ptr); + if (*name) + verbosity = trace_source_parse_verbosity(src, name); + + if (verbosity < 0) { + chunk_reset(&trash); + if (*name) + chunk_appendf(&trash, "No such verbosity level '%s'. ", name); + chunk_appendf(&trash, "Supported trace verbosities for source %s:\n", src->name.ptr); chunk_appendf(&trash, " %c quiet : only report basic information with no decoding\n", src->verbosity == 0 ? '*' : ' '); if (!src->decoding || !src->decoding[0].name) { @@ -751,13 +758,7 @@ static int trace_parse_statement(char **args, char **msg) } trash.area[trash.data] = 0; *msg = strdup(trash.area); - return LOG_WARNING; - } - - verbosity = trace_source_parse_verbosity(src, name); - if (verbosity < 0) { - memprintf(msg, "No such verbosity level '%s'", name); - return LOG_ERR; + return *name ? LOG_ERR : LOG_WARNING; } HA_ATOMIC_STORE(&src->verbosity, verbosity); @@ -837,7 +838,7 @@ int trace_parse_cmd(char *arg, char **errmsg) if (strlen(field)) { level = trace_parse_level(field); if (level < 0) { - memprintf(errmsg, "no such level '%s'", field); + memprintf(errmsg, "no such trace level '%s', available levels are 'error', 'user', 'proto', 'state', 'data', and 'developer'", field); return 1; } } @@ -848,18 +849,23 @@ int trace_parse_cmd(char *arg, char **errmsg) /* 3. verbosity */ field = str; if (strchr(field, ':')) { - memprintf(errmsg, "too many double-colon separator"); - return 1; - } - - if (!src && strcmp(field, "quiet") != 0) { - memprintf(errmsg, "trace source must be specified for verbosity other than 'quiet'"); + memprintf(errmsg, "too many double-colon separators in trace definition"); return 1; } verbosity = trace_source_parse_verbosity(src, field); if (verbosity < 0) { - memprintf(errmsg, "no such verbosity '%s' for source '%s'", field, name); + const struct name_desc *nd; + + if (!src) { + memprintf(errmsg, "trace source must be specified for verbosity other than 'quiet'"); + } + else { + memprintf(errmsg, "no such trace verbosity '%s' for source '%s', available verbosities for this source are: 'quiet'", field, name); + for (nd = src->decoding; nd->name && nd->desc; nd++) + memprintf(errmsg, "%s, %s'%s'", *errmsg, (nd + 1)->name ? "" : "and ", nd->name); + } + return 1; } diff --git a/src/uri_auth.c b/src/uri_auth.c index db7e6c6..979b327 100644 --- a/src/uri_auth.c +++ b/src/uri_auth.c @@ -110,7 +110,7 @@ struct uri_auth *stats_set_realm(struct uri_auth **root, char *realm) } /* - * Returns a default uri_auth with STAT_SHNODE flag enabled and + * Returns a default uri_auth with STAT_F_SHNODE flag enabled and * <node> set as the name if it is not empty. * Uses the pointer provided if not NULL and not initialized. */ @@ -128,7 +128,7 @@ struct uri_auth *stats_set_node(struct uri_auth **root, char *name) if ((u = stats_check_init_uri_auth(root)) == NULL) goto out_u; - if (!stats_set_flag(root, STAT_SHNODE)) + if (!stats_set_flag(root, STAT_F_SHNODE)) goto out_u; if (node_copy) { @@ -145,7 +145,7 @@ struct uri_auth *stats_set_node(struct uri_auth **root, char *name) } /* - * Returns a default uri_auth with STAT_SHDESC flag enabled and + * Returns a default uri_auth with STAT_F_SHDESC flag enabled and * <description> set as the desc if it is not empty. * Uses the pointer provided if not NULL and not initialized. */ @@ -163,7 +163,7 @@ struct uri_auth *stats_set_desc(struct uri_auth **root, char *desc) if ((u = stats_check_init_uri_auth(root)) == NULL) goto out_u; - if (!stats_set_flag(root, STAT_SHDESC)) + if (!stats_set_flag(root, STAT_F_SHDESC)) goto out_u; if (desc_copy) { @@ -328,16 +328,16 @@ static int smp_fetch_var(const struct arg *args, struct sample *smp, const char */ static inline void var_clear_buffer(struct sample *smp, struct vars *vars, struct var *var, int var_type) { - if (var_type == SMP_T_STR || var_type == SMP_T_BIN) { - ha_free(&var->data.u.str.area); - var_accounting_diff(vars, smp->sess, smp->strm, - -var->data.u.str.data); - } - else if (var_type == SMP_T_METH && var->data.u.meth.meth == HTTP_METH_OTHER) { - ha_free(&var->data.u.meth.str.area); - var_accounting_diff(vars, smp->sess, smp->strm, - -var->data.u.meth.str.data); - } + if (var_type == SMP_T_STR || var_type == SMP_T_BIN) { + ha_free(&var->data.u.str.area); + var_accounting_diff(vars, smp->sess, smp->strm, + -var->data.u.str.data); + } + else if (var_type == SMP_T_METH && var->data.u.meth.meth == HTTP_METH_OTHER) { + ha_free(&var->data.u.meth.str.area); + var_accounting_diff(vars, smp->sess, smp->strm, + -var->data.u.meth.str.data); + } } /* This function tries to create a variable whose name hash is <name_hash> in @@ -363,7 +363,7 @@ static inline void var_clear_buffer(struct sample *smp, struct vars *vars, struc * * It returns 0 on failure, non-zero on success. */ -static int var_set(uint64_t name_hash, enum vars_scope scope, struct sample *smp, uint flags) +int var_set(uint64_t name_hash, enum vars_scope scope, struct sample *smp, uint flags) { struct vars *vars; struct var *var; @@ -515,7 +515,7 @@ static int var_set(uint64_t name_hash, enum vars_scope scope, struct sample *smp * session and stream found in <smp>. Note that stream may be null for * SCOPE_SESS. Returns 0 if the scope was not found otherwise 1. */ -static int var_unset(uint64_t name_hash, enum vars_scope scope, struct sample *smp) +int var_unset(uint64_t name_hash, enum vars_scope scope, struct sample *smp) { struct vars *vars; struct var *var; @@ -787,7 +787,7 @@ static enum act_return action_store(struct act_rule *rule, struct proxy *px, /* Process the expression. */ memset(&smp, 0, sizeof(smp)); - if (!LIST_ISEMPTY(&rule->arg.vars.fmt)) { + if (!lf_expr_isempty(&rule->arg.vars.fmt)) { /* a format-string is used */ fmtstr = alloc_trash_chunk(); @@ -838,14 +838,7 @@ static enum act_return action_clear(struct act_rule *rule, struct proxy *px, static void release_store_rule(struct act_rule *rule) { - struct logformat_node *lf, *lfb; - - list_for_each_entry_safe(lf, lfb, &rule->arg.vars.fmt, list) { - LIST_DELETE(&lf->list); - release_sample_expr(lf->expr); - free(lf->arg); - free(lf); - } + lf_expr_deinit(&rule->arg.vars.fmt); release_sample_expr(rule->arg.vars.expr); } @@ -949,7 +942,7 @@ static enum act_parse_ret parse_store(const char **args, int *arg, struct proxy condition = istsplit(&var, ','); } - LIST_INIT(&rule->arg.vars.fmt); + lf_expr_init(&rule->arg.vars.fmt); if (!vars_hash_name(var_name, var_len, &rule->arg.vars.scope, &rule->arg.vars.name_hash, err)) return ACT_RET_PRS_ERR; @@ -1029,11 +1022,6 @@ static enum act_parse_ret parse_store(const char **args, int *arg, struct proxy return ACT_RET_PRS_ERR; (*arg)++; - - /* for late error reporting */ - free(px->conf.lfs_file); - px->conf.lfs_file = strdup(px->conf.args.file); - px->conf.lfs_line = px->conf.args.line; } else { /* set-var */ rule->arg.vars.expr = sample_parse_expr((char **)args, arg, px->conf.args.file, @@ -1072,6 +1060,7 @@ static int vars_parse_global_set_var(char **args, int section_type, struct proxy struct proxy px = { .id = "CFG", .conf.args = { .file = file, .line = line, }, + .flags = PR_FL_CHECKED, }; struct act_rule rule = { .arg.vars.scope = SCOPE_PROC, @@ -1192,6 +1181,7 @@ static int vars_parse_cli_set_var(char **args, char *payload, struct appctx *app struct proxy px = { .id = "CLI", .conf.args = { .file = "CLI", .line = 0, }, + .flags = PR_FL_CHECKED, }; struct act_rule rule = { .arg.vars.scope = SCOPE_PROC, diff --git a/src/xprt_quic.c b/src/xprt_quic.c index eda113c..b83b634 100644 --- a/src/xprt_quic.c +++ b/src/xprt_quic.c @@ -140,6 +140,13 @@ static int qc_xprt_start(struct connection *conn, void *ctx) /* mux-quic can now be considered ready. */ qc->mux_state = QC_MUX_READY; + /* Schedule quic-conn to ensure post handshake frames are emitted. This + * is not done for 0-RTT as xprt->start happens before handshake + * completion. + */ + if (qc->flags & QUIC_FL_CONN_NEED_POST_HANDSHAKE_FRMS) + tasklet_wakeup(qc->wait_event.tasklet); + ret = 1; out: TRACE_LEAVE(QUIC_EV_CONN_NEW, qc); diff --git a/tests/unit/ist.c b/tests/unit/ist.c index 43b3438..e0d2b00 100644 --- a/tests/unit/ist.c +++ b/tests/unit/ist.c @@ -129,6 +129,30 @@ int test_istzero() return 0; } +struct ist f_iststrip(struct ist ist) { return iststrip(ist); } +int test_iststrip() +{ + if (iststrip(ist("foo")).len != 3) + return __LINE__; + + if (iststrip(ist("foo\n")).len != 3) + return __LINE__; + + if (iststrip(ist("foo\r")).len != 3) + return __LINE__; + + if (iststrip(ist("foo\r\n")).len != 3) + return __LINE__; + + if (iststrip(ist("")).len != 0) + return __LINE__; + + if (iststrip(ist("\n")).len != 0) + return __LINE__; + + return 0; +} + int f_istdiff(const struct ist ist1, const struct ist ist2) { return istdiff(ist1, ist2); } int test_istdiff() { @@ -682,6 +706,7 @@ int main(void) printf("%4d istpad()\n", test_istpad()); printf("%4d isttrim()\n", test_isttrim()); printf("%4d istzero()\n", test_istzero()); + printf("%4d iststrip()\n", test_iststrip()); printf("%4d istdiff()\n", test_istdiff()); printf("%4d istmatch()\n", test_istmatch()); printf("%4d istnmatch()\n", test_istnmatch()); diff --git a/tests/unit/test-int-range.c b/tests/unit/test-int-range.c new file mode 100644 index 0000000..f62c51c --- /dev/null +++ b/tests/unit/test-int-range.c @@ -0,0 +1,198 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> + +/* generic function for 32-bit, 4-bytes at a time */ +static inline __attribute__((always_inline)) +uint32_t is_char4_outside(uint32_t x, uint8_t min8, uint8_t max8) +{ + uint32_t min32 = min8 * 0x01010101U; + uint32_t max32 = max8 * 0x01010101U; + return (((x - min32) | (max32 - x)) & 0x80808080U); +} + +/* generic function for 64-bit, 4-bytes at a time */ +static inline __attribute__((always_inline)) +uint64_t is_char8_outside_by4(uint64_t x, uint8_t min8, uint8_t max8) +{ + return is_char4_outside(x, min8, max8) | is_char4_outside(x >> 32, min8, max8); +} + +/* generic function for 64-bit, 8-bytes at a time */ +static inline __attribute__((always_inline)) +uint64_t is_char8_outside_by8(uint64_t x, uint8_t min8, uint8_t max8) +{ + uint64_t min64 = min8 * 0x0101010101010101ULL; + uint64_t max64 = max8 * 0x0101010101010101ULL; + return (((x - min64) | (max64 - x)) & 0x8080808080808080ULL); +} + +/* generic function for 64-bit, 4- or 8-bytes at a time */ +static inline __attribute__((always_inline)) +uint64_t is_char8_outside(uint64_t x, uint8_t min8, uint8_t max8) +{ + if (sizeof(long) >= 8) + return is_char8_outside_by8(x, min8, max8); + else + return is_char8_outside_by4(x, min8, max8); +} + +/* reference function for 32-bit, one byte at a time */ +static inline int slow32_ref(uint32_t x) +{ + uint8_t a, b, c, d; + + a = x >> 0; b = x >> 8; c = x >> 16; d = x >> 24; + + return a < 0x24 || a > 0x7e || b < 0x24 || b > 0x7e || + c < 0x24 || c > 0x7e || d < 0x24 || d > 0x7e; +} + +/* reference function for 64-bit, one byte at a time */ +static inline int slow64_ref(uint64_t x) +{ + uint8_t a, b, c, d, e, f, g, h; + + a = x >> 0; b = x >> 8; c = x >> 16; d = x >> 24; + e = x >> 32; f = x >> 40; g = x >> 48; h = x >> 56; + + return a < 0x24 || a > 0x7e || b < 0x24 || b > 0x7e || + c < 0x24 || c > 0x7e || d < 0x24 || d > 0x7e || + e < 0x24 || e > 0x7e || f < 0x24 || f > 0x7e || + g < 0x24 || g > 0x7e || h < 0x24 || h > 0x7e; +} + +/* optimal function for 32-bit, 4-bytes at a time */ +static inline int fast32_gen(uint32_t x) +{ + return !!is_char4_outside(x, 0x24, 0x7e); +} + +/* optimal function for 64-bit, 4-bytes at a time */ +static inline int fast64_gen4(uint64_t x) +{ + return !!is_char8_outside_by4(x, 0x24, 0x7e); +} + +/* optimal function for 64-bit, 8-bytes at a time */ +static inline int fast64_gen8(uint64_t x) +{ + return !!is_char8_outside_by8(x, 0x24, 0x7e); +} + +/* optimal function for 64-bit, 4- or 8-bytes at a time */ +static inline int fast64_gen(uint64_t x) +{ + return !!is_char8_outside(x, 0x24, 0x7e); +} + +/* specific function for 32-bit, 4- or 8-bytes at a time */ +static inline int fast32_spec(uint32_t x) +{ + return !!(((x - 0x24242424) | (0x7e7e7e7e - x)) & 0x80808080U); +} + +/* specific function for 32-bit, 4- or 8-bytes at a time */ +static inline int fast64_spec(uint64_t x) +{ + return !!(((x - 0x2424242424242424ULL) | (0x7e7e7e7e7e7e7e7eULL - x)) & 0x8080808080808080ULL); +} + +/* xorshift 64-bit PRNG */ +#define RND64SEED 0x9876543210abcdefull +static uint64_t rnd64seed = RND64SEED; +static inline uint64_t rnd64() +{ + rnd64seed ^= rnd64seed << 13; + rnd64seed ^= rnd64seed >> 7; + rnd64seed ^= rnd64seed << 17; + return rnd64seed; +} + +int main(int argc, char **argv) +{ + uint32_t base = 0; + uint32_t step = 1; + uint32_t loops = 0; + int size = 32; + int ref; + + /* usage: cmd [<bits> [<base> [<step>]]] */ + + if (argc > 1) + size = atoi(argv[1]); + + if (argc > 2) + base = atol(argv[2]); + + if (argc > 3) + step = atol(argv[3]); + + if (size == 32) { + do { + ref = slow32_ref(base); + + if (fast32_gen(base) != ref) { + printf("fast32_gen() fails at 0x%08x: %d / ref=%d\n", base, !ref, ref); + return 1; + } + + if (fast32_spec(base) != ref) { + printf("fast32_spec() fails at 0x%08x: %d / ref=%d\n", base, !ref, ref); + return 1; + } + + base += step; + loops++; + if (!(loops & 0x7ffff)) + printf("0x%08x: 0x%08x\r", loops, base); + } while (base >= step); + } + else if (size == 64) { /* 64-bit on randoms but no more than 2^32 tests */ + uint32_t ctr; + uint64_t rnd; + + /* offset the RNG if using multiple workers */ + for (ctr = 0; ctr < base; ctr++) + rnd64(); + + do { + rnd = rnd64(); + ref = slow64_ref(rnd); + + if (fast64_gen(rnd) != ref) { + printf("fast64_gen() fails at 0x%08x: fct(0x%16llx)=%d / ref=%d\n", base, (long long)rnd, !ref, ref); + return 1; + } + + if (fast64_gen4(rnd) != ref) { + printf("fast64_gen4() fails at 0x%08x: fct(0x%16llx)=%d / ref=%d\n", base, (long long)rnd, !ref, ref); + return 1; + } + + if (fast64_gen8(rnd) != ref) { + printf("fast64_gen8() fails at 0x%08x: fct(0x%16llx)=%d / ref=%d\n", base, (long long)rnd, !ref, ref); + return 1; + } + + if (fast64_spec(rnd) != ref) { + printf("fast64_spec() fails at 0x%08x: fct(0x%16llx)=%d / ref=%d\n", base, (long long)rnd, !ref, ref); + return 1; + } + + base += step; + loops++; + if (!(loops & 0x7ffff)) + printf("0x%08x: 0x%08x -> 0x%16llx\r", loops, base, (long long)rnd); + } while (base >= step); + } + else { + printf("unknown size, usage: %s [<bits> [<base> [<step>]]]\n", argv[0]); + return 1; + } + + printf("%llu checks passed. \n", + (unsigned long long)((uint32_t)(base - step) / step) + 1); + return 0; +} |