summaryrefslogtreecommitdiffstats
path: root/modules/metadata/mod_unique_id.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 15:01:30 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 15:01:30 +0000
commit6beeb1b708550be0d4a53b272283e17e5e35fe17 (patch)
tree1ce8673d4aaa948e5554000101f46536a1e4cc29 /modules/metadata/mod_unique_id.c
parentInitial commit. (diff)
downloadapache2-upstream.tar.xz
apache2-upstream.zip
Adding upstream version 2.4.57.upstream/2.4.57upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'modules/metadata/mod_unique_id.c')
-rw-r--r--modules/metadata/mod_unique_id.c336
1 files changed, 336 insertions, 0 deletions
diff --git a/modules/metadata/mod_unique_id.c b/modules/metadata/mod_unique_id.c
new file mode 100644
index 0000000..2555749
--- /dev/null
+++ b/modules/metadata/mod_unique_id.c
@@ -0,0 +1,336 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * mod_unique_id.c: generate a unique identifier for each request
+ *
+ * Original author: Dean Gaudet <dgaudet@arctic.org>
+ * UUencoding modified by: Alvaro Martinez Echevarria <alvaro@lander.es>
+ */
+
+#define APR_WANT_BYTEFUNC /* for htons() et al */
+#include "apr_want.h"
+#include "apr_general.h" /* for APR_OFFSETOF */
+#include "apr_network_io.h"
+
+#ifdef APR_HAS_THREADS
+#include "apr_atomic.h" /* for apr_atomic_inc32 */
+#include "mpm_common.h" /* for ap_mpm_query */
+#endif
+
+#include "httpd.h"
+#include "http_config.h"
+#include "http_log.h"
+#include "http_protocol.h" /* for ap_hook_post_read_request */
+
+#define ROOT_SIZE 10
+
+typedef struct {
+ unsigned int stamp;
+ char root[ROOT_SIZE];
+ unsigned short counter;
+ unsigned int thread_index;
+} unique_id_rec;
+
+/* We are using thread_index (the index into the scoreboard), because we
+ * cannot guarantee the thread_id will be an integer.
+ *
+ * This code looks like it won't give a unique ID with the new thread logic.
+ * It will. The reason is, we don't increment the counter in a thread_safe
+ * manner. Because the thread_index is also in the unique ID now, this does
+ * not matter. In order for the id to not be unique, the same thread would
+ * have to get the same counter twice in the same second.
+ */
+
+/* Comments:
+ *
+ * We want an identifier which is unique across all hits, everywhere.
+ * "everywhere" includes multiple httpd instances on the same machine, or on
+ * multiple machines. Essentially "everywhere" should include all possible
+ * httpds across all servers at a particular "site". We make some assumptions
+ * that if the site has a cluster of machines then their time is relatively
+ * synchronized. We also assume that the first address returned by a
+ * gethostbyname (gethostname()) is unique across all the machines at the
+ * "site".
+ *
+ * The root is assumed to absolutely uniquely identify this one child
+ * from all other currently running children on all servers (including
+ * this physical server if it is running multiple httpds) from each
+ * other.
+ *
+ * The stamp and counter are used to distinguish all hits for a
+ * particular root. The stamp is updated using r->request_time,
+ * saving cpu cycles. The counter is never reset, and is used to
+ * permit up to 64k requests in a single second by a single child.
+ *
+ * The 144-bits of unique_id_rec are encoded using the alphabet
+ * [A-Za-z0-9@-], resulting in 24 bytes of printable characters. That is then
+ * stuffed into the environment variable UNIQUE_ID so that it is available to
+ * other modules. The alphabet choice differs from normal base64 encoding
+ * [A-Za-z0-9+/] because + and / are special characters in URLs and we want to
+ * make it easy to use UNIQUE_ID in URLs.
+ *
+ * Note that UNIQUE_ID should be considered an opaque token by other
+ * applications. No attempt should be made to dissect its internal components.
+ * It is an abstraction that may change in the future as the needs of this
+ * module change.
+ *
+ * It is highly desirable that identifiers exist for "eternity". But future
+ * needs (such as much faster webservers, or moving to a
+ * multithreaded server) may dictate a need to change the contents of
+ * unique_id_rec. Such a future implementation should ensure that the first
+ * field is still a time_t stamp. By doing that, it is possible for a site to
+ * have a "flag second" in which they stop all of their old-format servers,
+ * wait one entire second, and then start all of their new-servers. This
+ * procedure will ensure that the new space of identifiers is completely unique
+ * from the old space. (Since the first four unencoded bytes always differ.)
+ *
+ * Note: previous implementations used 32-bits of IP address plus pid
+ * in place of the PRNG output in the "root" field. This was
+ * insufficient for IPv6-only hosts, required working DNS to determine
+ * a unique IP address (fragile), and needed a [0, 1) second sleep
+ * call at startup to avoid pid reuse. Use of the PRNG avoids all
+ * these issues.
+ */
+
+/*
+ * Sun Jun 7 05:43:49 CEST 1998 -- Alvaro
+ * More comments:
+ * 1) The UUencoding procedure is now done in a general way, avoiding the problems
+ * with sizes and paddings that can arise depending on the architecture. Now the
+ * offsets and sizes of the elements of the unique_id_rec structure are calculated
+ * in unique_id_global_init; and then used to duplicate the structure without the
+ * paddings that might exist. The multithreaded server fix should be now very easy:
+ * just add a new "tid" field to the unique_id_rec structure, and increase by one
+ * UNIQUE_ID_REC_MAX.
+ * 2) unique_id_rec.stamp has been changed from "time_t" to "unsigned int", because
+ * its size is 64bits on some platforms (linux/alpha), and this caused problems with
+ * htonl/ntohl. Well, this shouldn't be a problem till year 2106.
+ */
+
+/*
+ * XXX: We should have a per-thread counter and not use cur_unique_id.counter
+ * XXX: in all threads, because this is bad for performance on multi-processor
+ * XXX: systems: Writing to the same address from several CPUs causes cache
+ * XXX: thrashing.
+ */
+static unique_id_rec cur_unique_id;
+static apr_uint32_t cur_unique_counter;
+#ifdef APR_HAS_THREADS
+static int is_threaded_mpm;
+#endif
+
+/*
+ * Number of elements in the structure unique_id_rec.
+ */
+#define UNIQUE_ID_REC_MAX 4
+
+static unsigned short unique_id_rec_offset[UNIQUE_ID_REC_MAX],
+ unique_id_rec_size[UNIQUE_ID_REC_MAX],
+ unique_id_rec_total_size,
+ unique_id_rec_size_uu;
+
+static int unique_id_global_init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *main_server)
+{
+ /*
+ * Calculate the sizes and offsets in cur_unique_id.
+ */
+ unique_id_rec_offset[0] = APR_OFFSETOF(unique_id_rec, stamp);
+ unique_id_rec_size[0] = sizeof(cur_unique_id.stamp);
+ unique_id_rec_offset[1] = APR_OFFSETOF(unique_id_rec, root);
+ unique_id_rec_size[1] = sizeof(cur_unique_id.root);
+ unique_id_rec_offset[2] = APR_OFFSETOF(unique_id_rec, counter);
+ unique_id_rec_size[2] = sizeof(cur_unique_id.counter);
+ unique_id_rec_offset[3] = APR_OFFSETOF(unique_id_rec, thread_index);
+ unique_id_rec_size[3] = sizeof(cur_unique_id.thread_index);
+ unique_id_rec_total_size = unique_id_rec_size[0] + unique_id_rec_size[1] +
+ unique_id_rec_size[2] + unique_id_rec_size[3];
+
+ /*
+ * Calculate the size of the structure when encoded.
+ */
+ unique_id_rec_size_uu = (unique_id_rec_total_size*8+5)/6;
+
+ return OK;
+}
+
+static void unique_id_child_init(apr_pool_t *p, server_rec *s)
+{
+#ifdef APR_HAS_THREADS
+ is_threaded_mpm = 0;
+ ap_mpm_query(AP_MPMQ_IS_THREADED, &is_threaded_mpm);
+#endif
+
+ ap_random_insecure_bytes(&cur_unique_id.root,
+ sizeof(cur_unique_id.root));
+
+ /*
+ * If we use 0 as the initial counter we have a little less protection
+ * against restart problems, and a little less protection against a clock
+ * going backwards in time.
+ */
+ ap_random_insecure_bytes(&cur_unique_counter,
+ sizeof(cur_unique_counter));
+}
+
+/* Use the base64url encoding per RFC 4648, avoiding characters which
+ * are not safe in URLs. ### TODO: can switch to apr_encode_*. */
+static const char uuencoder[64] = {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_',
+};
+
+#ifndef APR_UINT16_MAX
+#define APR_UINT16_MAX 0xffffu
+#endif
+
+static const char *gen_unique_id(const request_rec *r)
+{
+ char *str;
+ /*
+ * Buffer padded with two final bytes, used to copy the unique_id_rec
+ * structure without the internal paddings that it could have.
+ */
+ unique_id_rec new_unique_id;
+ struct {
+ unique_id_rec foo;
+ unsigned char pad[2];
+ } paddedbuf;
+ apr_uint32_t counter;
+ unsigned char *x,*y;
+ int i,j,k;
+
+ memcpy(&new_unique_id.root, &cur_unique_id.root, ROOT_SIZE);
+ new_unique_id.stamp = htonl((unsigned int)apr_time_sec(r->request_time));
+ new_unique_id.thread_index = htonl((unsigned int)r->connection->id);
+#ifdef APR_HAS_THREADS
+ if (is_threaded_mpm)
+ counter = apr_atomic_inc32(&cur_unique_counter);
+ else
+#endif
+ counter = cur_unique_counter++;
+
+ /* The counter is two bytes for the uuencoded unique id, in network
+ * byte order.
+ */
+ new_unique_id.counter = htons(counter % APR_UINT16_MAX);
+
+ /* we'll use a temporal buffer to avoid uuencoding the possible internal
+ * paddings of the original structure */
+ x = (unsigned char *) &paddedbuf;
+ k = 0;
+ for (i = 0; i < UNIQUE_ID_REC_MAX; i++) {
+ y = ((unsigned char *) &new_unique_id) + unique_id_rec_offset[i];
+ for (j = 0; j < unique_id_rec_size[i]; j++, k++) {
+ x[k] = y[j];
+ }
+ }
+ /*
+ * We reset two more bytes just in case padding is needed for the uuencoding.
+ */
+ x[k++] = '\0';
+ x[k++] = '\0';
+
+ /* alloc str and do the uuencoding */
+ str = (char *)apr_palloc(r->pool, unique_id_rec_size_uu + 1);
+ k = 0;
+ for (i = 0; i < unique_id_rec_total_size; i += 3) {
+ y = x + i;
+ str[k++] = uuencoder[y[0] >> 2];
+ str[k++] = uuencoder[((y[0] & 0x03) << 4) | ((y[1] & 0xf0) >> 4)];
+ if (k == unique_id_rec_size_uu) break;
+ str[k++] = uuencoder[((y[1] & 0x0f) << 2) | ((y[2] & 0xc0) >> 6)];
+ if (k == unique_id_rec_size_uu) break;
+ str[k++] = uuencoder[y[2] & 0x3f];
+ }
+ str[k++] = '\0';
+
+ return str;
+}
+
+/*
+ * There are two ways the generation of a unique id can be triggered:
+ *
+ * - from the post_read_request hook which calls set_unique_id()
+ * - from error logging via the generate_log_id hook which calls
+ * generate_log_id(). This may happen before or after set_unique_id()
+ * has been called, or not at all.
+ */
+
+static int generate_log_id(const conn_rec *c, const request_rec *r,
+ const char **id)
+{
+ /* we do not care about connection ids */
+ if (r == NULL)
+ return DECLINED;
+
+ /* XXX: do we need special handling for internal redirects? */
+
+ /* if set_unique_id() has been called for this request, use it */
+ *id = apr_table_get(r->subprocess_env, "UNIQUE_ID");
+
+ if (!*id)
+ *id = gen_unique_id(r);
+ return OK;
+}
+
+static int set_unique_id(request_rec *r)
+{
+ const char *id = NULL;
+ /* copy the unique_id if this is an internal redirect (we're never
+ * actually called for sub requests, so we don't need to test for
+ * them) */
+ if (r->prev) {
+ id = apr_table_get(r->subprocess_env, "REDIRECT_UNIQUE_ID");
+ }
+
+ if (!id) {
+ /* if we have a log id, it was set by our generate_log_id() function
+ * and we should reuse the same id
+ */
+ id = r->log_id;
+ }
+
+ if (!id) {
+ id = gen_unique_id(r);
+ }
+
+ /* set the environment variable */
+ apr_table_setn(r->subprocess_env, "UNIQUE_ID", id);
+
+ return DECLINED;
+}
+
+static void register_hooks(apr_pool_t *p)
+{
+ ap_hook_post_config(unique_id_global_init, NULL, NULL, APR_HOOK_MIDDLE);
+ ap_hook_child_init(unique_id_child_init, NULL, NULL, APR_HOOK_MIDDLE);
+ ap_hook_post_read_request(set_unique_id, NULL, NULL, APR_HOOK_MIDDLE);
+ ap_hook_generate_log_id(generate_log_id, NULL, NULL, APR_HOOK_MIDDLE);
+}
+
+AP_DECLARE_MODULE(unique_id) = {
+ STANDARD20_MODULE_STUFF,
+ NULL, /* dir config creater */
+ NULL, /* dir merger --- default is to override */
+ NULL, /* server config */
+ NULL, /* merge server configs */
+ NULL, /* command apr_table_t */
+ register_hooks /* register hooks */
+};