From 6beeb1b708550be0d4a53b272283e17e5e35fe17 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 17:01:30 +0200 Subject: Adding upstream version 2.4.57. Signed-off-by: Daniel Baumann --- server/core_filters.c | 868 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 868 insertions(+) create mode 100644 server/core_filters.c (limited to 'server/core_filters.c') diff --git a/server/core_filters.c b/server/core_filters.c new file mode 100644 index 0000000..d8a661f --- /dev/null +++ b/server/core_filters.c @@ -0,0 +1,868 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @file core_filters.c + * @brief Core input/output network filters. + */ + +#include "apr.h" +#include "apr_strings.h" +#include "apr_lib.h" +#include "apr_fnmatch.h" +#include "apr_hash.h" +#include "apr_thread_proc.h" /* for RLIMIT stuff */ +#include "apr_version.h" + +#define APR_WANT_IOVEC +#define APR_WANT_STRFUNC +#define APR_WANT_MEMFUNC +#include "apr_want.h" + +#include "ap_config.h" +#include "httpd.h" +#include "http_config.h" +#include "http_core.h" +#include "http_protocol.h" /* For index_of_response(). Grump. */ +#include "http_request.h" +#include "http_vhost.h" +#include "http_main.h" /* For the default_handler below... */ +#include "http_log.h" +#include "util_md5.h" +#include "http_connection.h" +#include "apr_buckets.h" +#include "util_filter.h" +#include "util_ebcdic.h" +#include "mpm_common.h" +#include "scoreboard.h" +#include "mod_core.h" +#include "ap_listen.h" + +#include "mod_so.h" /* for ap_find_loaded_module_symbol */ + +#define AP_MIN_SENDFILE_BYTES (256) + +/** + * Remove all zero length buckets from the brigade. + */ +#define BRIGADE_NORMALIZE(b) \ +do { \ + apr_bucket *e = APR_BRIGADE_FIRST(b); \ + do { \ + if (e->length == 0 && !APR_BUCKET_IS_METADATA(e)) { \ + apr_bucket *d; \ + d = APR_BUCKET_NEXT(e); \ + apr_bucket_delete(e); \ + e = d; \ + } \ + else { \ + e = APR_BUCKET_NEXT(e); \ + } \ + } while (!APR_BRIGADE_EMPTY(b) && (e != APR_BRIGADE_SENTINEL(b))); \ +} while (0) + +/* we know core's module_index is 0 */ +#undef APLOG_MODULE_INDEX +#define APLOG_MODULE_INDEX AP_CORE_MODULE_INDEX + +struct core_output_filter_ctx { + apr_bucket_brigade *buffered_bb; + apr_pool_t *deferred_write_pool; + apr_size_t bytes_written; + struct iovec *vec; + apr_size_t nvec; +}; + + +apr_status_t ap_core_input_filter(ap_filter_t *f, apr_bucket_brigade *b, + ap_input_mode_t mode, apr_read_type_e block, + apr_off_t readbytes) +{ + apr_status_t rv; + core_net_rec *net = f->ctx; + core_ctx_t *ctx = net->in_ctx; + const char *str; + apr_size_t len; + + if (mode == AP_MODE_INIT) { + /* + * this mode is for filters that might need to 'initialize' + * a connection before reading request data from a client. + * NNTP over SSL for example needs to handshake before the + * server sends the welcome message. + * such filters would have changed the mode before this point + * is reached. however, protocol modules such as NNTP should + * not need to know anything about SSL. given the example, if + * SSL is not in the filter chain, AP_MODE_INIT is a noop. + */ + return APR_SUCCESS; + } + + if (!ctx) + { + net->in_ctx = ctx = apr_palloc(f->c->pool, sizeof(*ctx)); + ctx->b = apr_brigade_create(f->c->pool, f->c->bucket_alloc); + ctx->tmpbb = apr_brigade_create(f->c->pool, f->c->bucket_alloc); + /* seed the brigade with the client socket. */ + rv = ap_run_insert_network_bucket(f->c, ctx->b, net->client_socket); + if (rv != APR_SUCCESS) + return rv; + } + else if (APR_BRIGADE_EMPTY(ctx->b)) { + return APR_EOF; + } + + /* ### This is bad. */ + BRIGADE_NORMALIZE(ctx->b); + + /* check for empty brigade again *AFTER* BRIGADE_NORMALIZE() + * If we have lost our socket bucket (see above), we are EOF. + * + * Ideally, this should be returning SUCCESS with EOS bucket, but + * some higher-up APIs (spec. read_request_line via ap_rgetline) + * want an error code. */ + if (APR_BRIGADE_EMPTY(ctx->b)) { + return APR_EOF; + } + + if (mode == AP_MODE_GETLINE) { + /* we are reading a single LF line, e.g. the HTTP headers */ + rv = apr_brigade_split_line(b, ctx->b, block, HUGE_STRING_LEN); + /* We should treat EAGAIN here the same as we do for EOF (brigade is + * empty). We do this by returning whatever we have read. This may + * or may not be bogus, but is consistent (for now) with EOF logic. + */ + if (APR_STATUS_IS_EAGAIN(rv) && block == APR_NONBLOCK_READ) { + rv = APR_SUCCESS; + } + return rv; + } + + /* ### AP_MODE_PEEK is a horrific name for this mode because we also + * eat any CRLFs that we see. That's not the obvious intention of + * this mode. Determine whether anyone actually uses this or not. */ + if (mode == AP_MODE_EATCRLF) { + apr_bucket *e; + const char *c; + + /* The purpose of this loop is to ignore any CRLF (or LF) at the end + * of a request. Many browsers send extra lines at the end of POST + * requests. We use the PEEK method to determine if there is more + * data on the socket, so that we know if we should delay sending the + * end of one request until we have served the second request in a + * pipelined situation. We don't want to actually delay sending a + * response if the server finds a CRLF (or LF), becuause that doesn't + * mean that there is another request, just a blank line. + */ + while (1) { + if (APR_BRIGADE_EMPTY(ctx->b)) + return APR_EOF; + + e = APR_BRIGADE_FIRST(ctx->b); + + rv = apr_bucket_read(e, &str, &len, APR_NONBLOCK_READ); + + if (rv != APR_SUCCESS) + return rv; + + c = str; + while (c < str + len) { + if (*c == APR_ASCII_LF) + c++; + else if (*c == APR_ASCII_CR && *(c + 1) == APR_ASCII_LF) + c += 2; + else + return APR_SUCCESS; + } + + /* If we reach here, we were a bucket just full of CRLFs, so + * just toss the bucket. */ + /* FIXME: Is this the right thing to do in the core? */ + apr_bucket_delete(e); + } + return APR_SUCCESS; + } + + /* If mode is EXHAUSTIVE, we want to just read everything until the end + * of the brigade, which in this case means the end of the socket. + * To do this, we attach the brigade that has currently been setaside to + * the brigade that was passed down, and send that brigade back. + * + * NOTE: This is VERY dangerous to use, and should only be done with + * extreme caution. FWLIW, this would be needed by an MPM like Perchild; + * such an MPM can easily request the socket and all data that has been + * read, which means that it can pass it to the correct child process. + */ + if (mode == AP_MODE_EXHAUSTIVE) { + apr_bucket *e; + + /* Tack on any buckets that were set aside. */ + APR_BRIGADE_CONCAT(b, ctx->b); + + /* Since we've just added all potential buckets (which will most + * likely simply be the socket bucket) we know this is the end, + * so tack on an EOS too. */ + /* We have read until the brigade was empty, so we know that we + * must be EOS. */ + e = apr_bucket_eos_create(f->c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(b, e); + return APR_SUCCESS; + } + + /* read up to the amount they specified. */ + if (mode == AP_MODE_READBYTES || mode == AP_MODE_SPECULATIVE) { + apr_bucket *e; + + AP_DEBUG_ASSERT(readbytes > 0); + + e = APR_BRIGADE_FIRST(ctx->b); + rv = apr_bucket_read(e, &str, &len, block); + + if (APR_STATUS_IS_EAGAIN(rv) && block == APR_NONBLOCK_READ) { + /* getting EAGAIN for a blocking read is an error; for a + * non-blocking read, return an empty brigade. */ + return APR_SUCCESS; + } + else if (rv != APR_SUCCESS) { + return rv; + } + else if (block == APR_BLOCK_READ && len == 0) { + /* We wanted to read some bytes in blocking mode. We read + * 0 bytes. Hence, we now assume we are EOS. + * + * When we are in normal mode, return an EOS bucket to the + * caller. + * When we are in speculative mode, leave ctx->b empty, so + * that the next call returns an EOS bucket. + */ + apr_bucket_delete(e); + + if (mode == AP_MODE_READBYTES) { + e = apr_bucket_eos_create(f->c->bucket_alloc); + APR_BRIGADE_INSERT_TAIL(b, e); + } + return APR_SUCCESS; + } + + /* Have we read as much data as we wanted (be greedy)? */ + if (len < readbytes) { + apr_size_t bucket_len; + + rv = APR_SUCCESS; + /* We already registered the data in e in len */ + e = APR_BUCKET_NEXT(e); + while ((len < readbytes) && (rv == APR_SUCCESS) + && (e != APR_BRIGADE_SENTINEL(ctx->b))) { + /* Check for the availability of buckets with known length */ + if (e->length != (apr_size_t)-1) { + len += e->length; + e = APR_BUCKET_NEXT(e); + } + else { + /* + * Read from bucket, but non blocking. If there isn't any + * more data, well than this is fine as well, we will + * not wait for more since we already got some and we are + * only checking if there isn't more. + */ + rv = apr_bucket_read(e, &str, &bucket_len, + APR_NONBLOCK_READ); + if (rv == APR_SUCCESS) { + len += bucket_len; + e = APR_BUCKET_NEXT(e); + } + } + } + } + + /* We can only return at most what we read. */ + if (len < readbytes) { + readbytes = len; + } + + rv = apr_brigade_partition(ctx->b, readbytes, &e); + if (rv != APR_SUCCESS) { + return rv; + } + + /* Must do move before CONCAT */ + ctx->tmpbb = apr_brigade_split_ex(ctx->b, e, ctx->tmpbb); + + if (mode == AP_MODE_READBYTES) { + APR_BRIGADE_CONCAT(b, ctx->b); + } + else if (mode == AP_MODE_SPECULATIVE) { + apr_bucket *copy_bucket; + + for (e = APR_BRIGADE_FIRST(ctx->b); + e != APR_BRIGADE_SENTINEL(ctx->b); + e = APR_BUCKET_NEXT(e)) + { + rv = apr_bucket_copy(e, ©_bucket); + if (rv != APR_SUCCESS) { + return rv; + } + APR_BRIGADE_INSERT_TAIL(b, copy_bucket); + } + } + + /* Take what was originally there and place it back on ctx->b */ + APR_BRIGADE_CONCAT(ctx->b, ctx->tmpbb); + } + return APR_SUCCESS; +} + +static void setaside_remaining_output(ap_filter_t *f, + core_output_filter_ctx_t *ctx, + apr_bucket_brigade *bb, + conn_rec *c); + +static apr_status_t send_brigade_nonblocking(apr_socket_t *s, + apr_bucket_brigade *bb, + core_output_filter_ctx_t *ctx, + conn_rec *c); + +static apr_status_t writev_nonblocking(apr_socket_t *s, + apr_bucket_brigade *bb, + core_output_filter_ctx_t *ctx, + apr_size_t bytes_to_write, + apr_size_t nvec, + conn_rec *c); + +#if APR_HAS_SENDFILE +static apr_status_t sendfile_nonblocking(apr_socket_t *s, + apr_bucket *bucket, + core_output_filter_ctx_t *ctx, + conn_rec *c); +#endif + +/* XXX: Should these be configurable parameters? */ +#define THRESHOLD_MIN_WRITE 4096 + +/* Optional function coming from mod_logio, used for logging of output + * traffic + */ +extern APR_OPTIONAL_FN_TYPE(ap_logio_add_bytes_out) *ap__logio_add_bytes_out; + +static int should_send_brigade(apr_bucket_brigade *bb, conn_rec *c, int *flush) +{ + core_server_config *conf = + ap_get_core_module_config(c->base_server->module_config); + apr_size_t total_bytes = 0, non_file_bytes = 0; + apr_uint32_t eor_buckets = 0; + apr_bucket *bucket; + int need_flush = 0; + + /* Scan through the brigade and decide whether we need to flush it, + * based on the following rules: + * + * a) The brigade contains a flush bucket: Do a blocking write + * of everything up that point. + * + * b) The request is in CONN_STATE_HANDLER state, and the brigade + * contains at least flush_max_threshold bytes in non-file + * buckets: Do blocking writes until the amount of data in the + * buffer is less than flush_max_threshold. (The point of this + * rule is to provide flow control, in case a handler is + * streaming out lots of data faster than the data can be + * sent to the client.) + * + * c) The request is in CONN_STATE_HANDLER state, and the brigade + * contains at least flush_max_pipelined EOR buckets: + * Do blocking writes until less than flush_max_pipelined EOR + * buckets are left. (The point of this rule is to prevent too many + * FDs being kept open by pipelined requests, possibly allowing a + * DoS). + * + * d) The brigade contains a morphing bucket: otherwise ap_save_brigade() + * could read the whole bucket into memory. + */ + for (bucket = APR_BRIGADE_FIRST(bb); + bucket != APR_BRIGADE_SENTINEL(bb); + bucket = APR_BUCKET_NEXT(bucket)) { + + if (!APR_BUCKET_IS_METADATA(bucket)) { + if (bucket->length == (apr_size_t)-1) { + if (flush) { + ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, c, + "core_output_filter: flushing because " + "of morphing bucket"); + } + need_flush = 1; + break; + } + + total_bytes += bucket->length; + if (!APR_BUCKET_IS_FILE(bucket)) { + non_file_bytes += bucket->length; + if (non_file_bytes > conf->flush_max_threshold) { + if (flush) { + ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, c, + "core_output_filter: flushing because " + "of max threshold"); + } + need_flush = 1; + break; + } + } + } + else if (APR_BUCKET_IS_FLUSH(bucket)) { + if (flush) { + ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, c, + "core_output_filter: flushing because " + "of FLUSH bucket"); + } + need_flush = 1; + break; + } + else if (AP_BUCKET_IS_EOR(bucket) + && conf->flush_max_pipelined >= 0 + && ++eor_buckets > conf->flush_max_pipelined) { + if (flush) { + ap_log_cerror(APLOG_MARK, APLOG_TRACE6, 0, c, + "core_output_filter: flushing because " + "of max pipelined"); + } + need_flush = 1; + break; + } + } + if (flush) { + *flush = need_flush; + } + + /* Also send if above flush_min_threshold, or if there are FILE buckets */ + return (need_flush + || total_bytes >= THRESHOLD_MIN_WRITE + || total_bytes > non_file_bytes); +} + +apr_status_t ap_core_output_filter(ap_filter_t *f, apr_bucket_brigade *new_bb) +{ + conn_rec *c = f->c; + core_net_rec *net = f->ctx; + core_output_filter_ctx_t *ctx = net->out_ctx; + apr_bucket_brigade *bb = NULL; + apr_status_t rv = APR_SUCCESS; + + /* Fail quickly if the connection has already been aborted. */ + if (c->aborted) { + if (new_bb != NULL) { + apr_brigade_cleanup(new_bb); + } + return APR_ECONNABORTED; + } + + if (ctx == NULL) { + ctx = apr_pcalloc(c->pool, sizeof(*ctx)); + net->out_ctx = (core_output_filter_ctx_t *)ctx; + /* + * Need to create buffered_bb brigade with correct lifetime. Passing + * NULL to ap_save_brigade() would result in a brigade + * allocated from bb->pool which might be wrong. + */ + ctx->buffered_bb = apr_brigade_create(c->pool, c->bucket_alloc); + } + + if (new_bb != NULL) + bb = new_bb; + + if ((ctx->buffered_bb != NULL) && + !APR_BRIGADE_EMPTY(ctx->buffered_bb)) { + if (new_bb != NULL) { + APR_BRIGADE_PREPEND(bb, ctx->buffered_bb); + } + else { + bb = ctx->buffered_bb; + } + } + else if (new_bb == NULL) { + c->data_in_output_filters = 0; + return APR_SUCCESS; + } + + if (!new_bb || should_send_brigade(bb, c, NULL)) { + apr_socket_t *sock = net->client_socket; + apr_interval_time_t sock_timeout = 0; + + /* Non-blocking writes on the socket in any case. */ + apr_socket_timeout_get(sock, &sock_timeout); + apr_socket_timeout_set(sock, 0); + + do { + rv = send_brigade_nonblocking(sock, bb, ctx, c); + if (new_bb && APR_STATUS_IS_EAGAIN(rv)) { + /* Scan through the brigade and decide whether we must absolutely + * flush the remaining data, based on should_send_brigade() &flush + * rules. If so, wait for writability and retry, otherwise we did + * our best already and can wait for the next call. + */ + int flush; + (void)should_send_brigade(bb, c, &flush); + if (flush) { + apr_int32_t nfd; + apr_pollfd_t pfd; + memset(&pfd, 0, sizeof(pfd)); + pfd.reqevents = APR_POLLOUT; + pfd.desc_type = APR_POLL_SOCKET; + pfd.desc.s = sock; + pfd.p = c->pool; + do { + rv = apr_poll(&pfd, 1, &nfd, sock_timeout); + } while (APR_STATUS_IS_EINTR(rv)); + } + } + } while (rv == APR_SUCCESS && !APR_BRIGADE_EMPTY(bb)); + + /* Restore original socket timeout before leaving. */ + apr_socket_timeout_set(sock, sock_timeout); + } + + if (rv != APR_SUCCESS && !APR_STATUS_IS_EAGAIN(rv)) { + /* The client has aborted the connection */ + ap_log_cerror( + APLOG_MARK, APLOG_TRACE1, rv, c, + "core_output_filter: writing data to the network"); + /* + * Set c->aborted before apr_brigade_cleanup to have the correct status + * when logging the request as apr_brigade_cleanup triggers the logging + * of the request if it contains an EOR bucket. + */ + c->aborted = 1; + apr_brigade_cleanup(bb); + return rv; + } + + setaside_remaining_output(f, ctx, bb, c); + return APR_SUCCESS; +} + +/* + * This function assumes that either ctx->buffered_bb == NULL, or + * ctx->buffered_bb is empty, or ctx->buffered_bb == bb + */ +static void setaside_remaining_output(ap_filter_t *f, + core_output_filter_ctx_t *ctx, + apr_bucket_brigade *bb, + conn_rec *c) +{ + apr_bucket *bucket; + + /* Don't set aside leading empty buckets, all previous data have been + * consumed so it's safe to delete them now. + */ + while (((bucket = APR_BRIGADE_FIRST(bb)) != APR_BRIGADE_SENTINEL(bb)) && + (APR_BUCKET_IS_METADATA(bucket) || (bucket->length == 0))) { + apr_bucket_delete(bucket); + } + + c->data_in_output_filters = 0; + if (!APR_BRIGADE_EMPTY(bb)) { + c->data_in_output_filters = 1; + if (bb != ctx->buffered_bb) { + if (!ctx->deferred_write_pool) { + apr_pool_create(&ctx->deferred_write_pool, c->pool); + apr_pool_tag(ctx->deferred_write_pool, "deferred_write"); + } + ap_save_brigade(f, &(ctx->buffered_bb), &bb, + ctx->deferred_write_pool); + } + } + else if (ctx->deferred_write_pool) { + /* + * There are no more requests in the pipeline. We can just clear the + * pool. + */ + apr_pool_clear(ctx->deferred_write_pool); + } +} + +#ifndef APR_MAX_IOVEC_SIZE +#define NVEC_MIN 16 +#define NVEC_MAX NVEC_MIN +#else +#if APR_MAX_IOVEC_SIZE > 16 +#define NVEC_MIN 16 +#else +#define NVEC_MIN APR_MAX_IOVEC_SIZE +#endif +#define NVEC_MAX APR_MAX_IOVEC_SIZE +#endif + +static APR_INLINE int is_in_memory_bucket(apr_bucket *b) +{ + /* These buckets' data are already in memory. */ + return APR_BUCKET_IS_HEAP(b) + || APR_BUCKET_IS_POOL(b) + || APR_BUCKET_IS_TRANSIENT(b) + || APR_BUCKET_IS_IMMORTAL(b); +} + +#if APR_HAS_SENDFILE +static APR_INLINE int can_sendfile_bucket(apr_bucket *b) +{ + /* Use sendfile to send the bucket unless: + * - the bucket is not a file bucket, or + * - the file is too small for sendfile to be useful, or + * - sendfile is disabled in the httpd config via "EnableSendfile off". + */ + if (APR_BUCKET_IS_FILE(b) && b->length >= AP_MIN_SENDFILE_BYTES) { + apr_file_t *file = ((apr_bucket_file *)b->data)->fd; + return apr_file_flags_get(file) & APR_SENDFILE_ENABLED; + } + else { + return 0; + } +} +#endif + +#if defined(WIN32) && (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION <= 7) +#undef APR_TCP_NOPUSH_FLAG +#define APR_TCP_NOPUSH_FLAG 0 +#endif + +static APR_INLINE void sock_nopush(apr_socket_t *s, int to) +{ + /* Disable TCP_NOPUSH handling on OSX since unsetting it won't push + * retained data, which might introduce delays if further data don't + * come soon enough or cause the last chunk to be sent only when the + * connection is shutdown (e.g. after KeepAliveTimeout). + */ +#if APR_TCP_NOPUSH_FLAG && !defined(__APPLE__) + (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, to); +#endif +} + +static apr_status_t send_brigade_nonblocking(apr_socket_t *s, + apr_bucket_brigade *bb, + core_output_filter_ctx_t *ctx, + conn_rec *c) +{ + apr_status_t rv = APR_SUCCESS; + core_server_config *conf = + ap_get_core_module_config(c->base_server->module_config); + apr_size_t nvec = 0, nbytes = 0; + apr_bucket *bucket, *next; + const char *data; + apr_size_t length; + + for (bucket = APR_BRIGADE_FIRST(bb); + bucket != APR_BRIGADE_SENTINEL(bb); + bucket = next) { + next = APR_BUCKET_NEXT(bucket); + +#if APR_HAS_SENDFILE + if (can_sendfile_bucket(bucket)) { + if (nvec > 0) { + sock_nopush(s, 1); + rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c); + if (rv != APR_SUCCESS) { + goto cleanup; + } + nbytes = 0; + nvec = 0; + } + rv = sendfile_nonblocking(s, bucket, ctx, c); + if (rv != APR_SUCCESS) { + goto cleanup; + } + continue; + } +#endif /* APR_HAS_SENDFILE */ + + if (bucket->length) { + /* Non-blocking read first, in case this is a morphing + * bucket type. */ + rv = apr_bucket_read(bucket, &data, &length, APR_NONBLOCK_READ); + if (APR_STATUS_IS_EAGAIN(rv)) { + /* Read would block; flush any pending data and retry. */ + if (nvec) { + rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c); + if (rv != APR_SUCCESS) { + goto cleanup; + } + nbytes = 0; + nvec = 0; + } + sock_nopush(s, 0); + + rv = apr_bucket_read(bucket, &data, &length, APR_BLOCK_READ); + } + if (rv != APR_SUCCESS) { + goto cleanup; + } + + /* reading may have split the bucket, so recompute next: */ + next = APR_BUCKET_NEXT(bucket); + } + + if (!bucket->length) { + /* Don't delete empty buckets until all the previous ones have been + * sent (nvec == 0); this must happen in sequence since metabuckets + * like EOR could free the data still pointed to by the iovec. So + * unless the latter is empty, let writev_nonblocking() cleanup the + * brigade in order. + */ + if (!nvec) { + apr_bucket_delete(bucket); + } + continue; + } + + /* Make sure that these new data fit in our iovec. */ + if (nvec == ctx->nvec) { + if (nvec == NVEC_MAX) { + sock_nopush(s, 1); + rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c); + if (rv != APR_SUCCESS) { + goto cleanup; + } + nbytes = 0; + nvec = 0; + } + else { + struct iovec *newvec; + apr_size_t newn = nvec * 2; + if (newn < NVEC_MIN) { + newn = NVEC_MIN; + } + else if (newn > NVEC_MAX) { + newn = NVEC_MAX; + } + newvec = apr_palloc(c->pool, newn * sizeof(struct iovec)); + if (nvec) { + memcpy(newvec, ctx->vec, nvec * sizeof(struct iovec)); + } + ctx->vec = newvec; + ctx->nvec = newn; + } + } + nbytes += length; + ctx->vec[nvec].iov_base = (void *)data; + ctx->vec[nvec].iov_len = length; + nvec++; + + /* Flush above max threshold, unless the brigade still contains in + * memory buckets which we want to try writing in the same pass (if + * we are at the end of the brigade, the write will happen outside + * the loop anyway). + */ + if (nbytes > conf->flush_max_threshold + && next != APR_BRIGADE_SENTINEL(bb) + && !is_in_memory_bucket(next)) { + sock_nopush(s, 1); + rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c); + if (rv != APR_SUCCESS) { + goto cleanup; + } + nbytes = 0; + nvec = 0; + } + } + if (nvec > 0) { + rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c); + } + +cleanup: + sock_nopush(s, 0); + return rv; +} + +static apr_status_t writev_nonblocking(apr_socket_t *s, + apr_bucket_brigade *bb, + core_output_filter_ctx_t *ctx, + apr_size_t bytes_to_write, + apr_size_t nvec, + conn_rec *c) +{ + apr_status_t rv; + struct iovec *vec = ctx->vec; + apr_size_t bytes_written = 0; + apr_size_t i, offset = 0; + + do { + apr_size_t n = 0; + rv = apr_socket_sendv(s, vec + offset, nvec - offset, &n); + bytes_written += n; + + for (i = offset; i < nvec; ) { + apr_bucket *bucket = APR_BRIGADE_FIRST(bb); + if (!bucket->length) { + apr_bucket_delete(bucket); + } + else if (n >= vec[i].iov_len) { + apr_bucket_delete(bucket); + n -= vec[i++].iov_len; + offset++; + } + else { + if (n) { + apr_bucket_split(bucket, n); + apr_bucket_delete(bucket); + vec[i].iov_len -= n; + vec[i].iov_base = (char *) vec[i].iov_base + n; + } + break; + } + } + } while (rv == APR_SUCCESS && bytes_written < bytes_to_write); + + if ((ap__logio_add_bytes_out != NULL) && (bytes_written > 0)) { + ap__logio_add_bytes_out(c, bytes_written); + } + ctx->bytes_written += bytes_written; + + ap_log_cerror(APLOG_MARK, APLOG_TRACE6, rv, c, + "writev_nonblocking: %"APR_SIZE_T_FMT"/%"APR_SIZE_T_FMT, + bytes_written, bytes_to_write); + return rv; +} + +#if APR_HAS_SENDFILE + +static apr_status_t sendfile_nonblocking(apr_socket_t *s, + apr_bucket *bucket, + core_output_filter_ctx_t *ctx, + conn_rec *c) +{ + apr_status_t rv; + apr_file_t *file = ((apr_bucket_file *)bucket->data)->fd; + apr_size_t bytes_written = bucket->length; /* bytes_to_write for now */ + apr_off_t file_offset = bucket->start; + + rv = apr_socket_sendfile(s, file, NULL, &file_offset, &bytes_written, 0); + if ((ap__logio_add_bytes_out != NULL) && (bytes_written > 0)) { + ap__logio_add_bytes_out(c, bytes_written); + } + ctx->bytes_written += bytes_written; + + ap_log_cerror(APLOG_MARK, APLOG_TRACE6, rv, c, + "sendfile_nonblocking: %" APR_SIZE_T_FMT "/%" APR_SIZE_T_FMT, + bytes_written, bucket->length); + if (bytes_written >= bucket->length) { + apr_bucket_delete(bucket); + } + else if (bytes_written > 0) { + apr_bucket_split(bucket, bytes_written); + apr_bucket_delete(bucket); + if (rv == APR_SUCCESS) { + rv = APR_EAGAIN; + } + } + return rv; +} + +#endif -- cgit v1.2.3