diff options
Diffstat (limited to '')
-rw-r--r-- | modules/metadata/mod_usertrack.c | 459 |
1 files changed, 459 insertions, 0 deletions
diff --git a/modules/metadata/mod_usertrack.c b/modules/metadata/mod_usertrack.c new file mode 100644 index 0000000..73a9f45 --- /dev/null +++ b/modules/metadata/mod_usertrack.c @@ -0,0 +1,459 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* User Tracking Module (Was mod_cookies.c) + * + * *** IMPORTANT NOTE: This module is not designed to generate + * *** cryptographically secure cookies. This means you should not + * *** use cookies generated by this module for authentication purposes + * + * This Apache module is designed to track users paths through a site. + * It uses the client-side state ("Cookie") protocol developed by Netscape. + * It is known to work on most browsers. + * + * Each time a page is requested we look to see if the browser is sending + * us a Cookie: header that we previously generated. + * + * If we don't find one then the user hasn't been to this site since + * starting their browser or their browser doesn't support cookies. So + * we generate a unique Cookie for the transaction and send it back to + * the browser (via a "Set-Cookie" header) + * Future requests from the same browser should keep the same Cookie line. + * + * By matching up all the requests with the same cookie you can + * work out exactly what path a user took through your site. To log + * the cookie use the " %{Cookie}n " directive in a custom access log; + * + * Example 1 : If you currently use the standard Log file format (CLF) + * and use the command "TransferLog somefilename", add the line + * LogFormat "%h %l %u %t \"%r\" %s %b %{Cookie}n" + * to your config file. + * + * Example 2 : If you used to use the old "CookieLog" directive, you + * can emulate it by adding the following command to your config file + * CustomLog filename "%{Cookie}n \"%r\" %t" + * + * Mark Cox, mjc@apache.org, 6 July 95 + * + * This file replaces mod_cookies.c + */ + +#include "apr.h" +#include "apr_lib.h" +#include "apr_strings.h" + +#define APR_WANT_STRFUNC +#include "apr_want.h" + +#include "httpd.h" +#include "http_config.h" +#include "http_core.h" +#include "http_request.h" +#include "http_log.h" + + +module AP_MODULE_DECLARE_DATA usertrack_module; + +typedef struct { + int always; + int expires; +} cookie_log_state; + +typedef enum { + CT_UNSET, + CT_NETSCAPE, + CT_COOKIE, + CT_COOKIE2 +} cookie_type_e; + +typedef struct { + int enabled; + cookie_type_e style; + const char *cookie_name; + const char *cookie_domain; + char *regexp_string; /* used to compile regexp; save for debugging */ + ap_regex_t *regexp; /* used to find usertrack cookie in cookie header */ +} cookie_dir_rec; + +/* Make Cookie: Now we have to generate something that is going to be + * pretty unique. We can base it on the pid, time, hostip */ + +#define COOKIE_NAME "Apache" + +static void make_cookie(request_rec *r) +{ + cookie_log_state *cls = ap_get_module_config(r->server->module_config, + &usertrack_module); + char cookiebuf[2 * (sizeof(apr_uint64_t) + sizeof(int)) + 2]; + unsigned int random; + apr_time_t now = r->request_time ? r->request_time : apr_time_now(); + char *new_cookie; + cookie_dir_rec *dcfg; + + ap_random_insecure_bytes(&random, sizeof(random)); + apr_snprintf(cookiebuf, sizeof(cookiebuf), "%x.%" APR_UINT64_T_HEX_FMT, + random, (apr_uint64_t)now); + dcfg = ap_get_module_config(r->per_dir_config, &usertrack_module); + if (cls->expires) { + + /* Cookie with date; as strftime '%a, %d-%h-%y %H:%M:%S GMT' */ + new_cookie = apr_psprintf(r->pool, "%s=%s; path=/", + dcfg->cookie_name, cookiebuf); + + if ((dcfg->style == CT_UNSET) || (dcfg->style == CT_NETSCAPE)) { + apr_time_exp_t tms; + apr_time_exp_gmt(&tms, r->request_time + + apr_time_from_sec(cls->expires)); + new_cookie = apr_psprintf(r->pool, + "%s; expires=%s, " + "%.2d-%s-%.2d %.2d:%.2d:%.2d GMT", + new_cookie, apr_day_snames[tms.tm_wday], + tms.tm_mday, + apr_month_snames[tms.tm_mon], + tms.tm_year % 100, + tms.tm_hour, tms.tm_min, tms.tm_sec); + } + else { + new_cookie = apr_psprintf(r->pool, "%s; max-age=%d", + new_cookie, cls->expires); + } + } + else { + new_cookie = apr_psprintf(r->pool, "%s=%s; path=/", + dcfg->cookie_name, cookiebuf); + } + if (dcfg->cookie_domain != NULL) { + new_cookie = apr_pstrcat(r->pool, new_cookie, "; domain=", + dcfg->cookie_domain, + (dcfg->style == CT_COOKIE2 + ? "; version=1" + : ""), + NULL); + } + + apr_table_addn(r->err_headers_out, + (dcfg->style == CT_COOKIE2 ? "Set-Cookie2" : "Set-Cookie"), + new_cookie); + apr_table_setn(r->notes, "cookie", apr_pstrdup(r->pool, cookiebuf)); /* log first time */ +} + +/* dcfg->regexp is "^cookie_name=([^;]+)|;[ \t]+cookie_name=([^;]+)", + * which has three subexpressions, $0..$2 */ +#define NUM_SUBS 3 + +static void set_and_comp_regexp(cookie_dir_rec *dcfg, + apr_pool_t *p, + const char *cookie_name) +{ + int danger_chars = 0; + const char *sp = cookie_name; + + /* The goal is to end up with this regexp, + * ^cookie_name=([^;,]+)|[;,][ \t]+cookie_name=([^;,]+) + * with cookie_name obviously substituted either + * with the real cookie name set by the user in httpd.conf, or with the + * default COOKIE_NAME. */ + + /* Anyway, we need to escape the cookie_name before pasting it + * into the regex + */ + while (*sp) { + if (!apr_isalnum(*sp)) { + ++danger_chars; + } + ++sp; + } + + if (danger_chars) { + char *cp; + cp = apr_palloc(p, sp - cookie_name + danger_chars + 1); /* 1 == \0 */ + sp = cookie_name; + cookie_name = cp; + while (*sp) { + if (!apr_isalnum(*sp)) { + *cp++ = '\\'; + } + *cp++ = *sp++; + } + *cp = '\0'; + } + + dcfg->regexp_string = apr_pstrcat(p, "^", + cookie_name, + "=([^;,]+)|[;,][ \t]*", + cookie_name, + "=([^;,]+)", NULL); + + dcfg->regexp = ap_pregcomp(p, dcfg->regexp_string, AP_REG_EXTENDED); + ap_assert(dcfg->regexp != NULL); +} + +static int spot_cookie(request_rec *r) +{ + cookie_dir_rec *dcfg = ap_get_module_config(r->per_dir_config, + &usertrack_module); + const char *cookie_header; + ap_regmatch_t regm[NUM_SUBS]; + + /* Do not run in subrequests */ + if (!dcfg->enabled || r->main) { + return DECLINED; + } + + if ((cookie_header = apr_table_get(r->headers_in, "Cookie"))) { + if (!ap_regexec(dcfg->regexp, cookie_header, NUM_SUBS, regm, 0)) { + char *cookieval = NULL; + int err = 0; + /* Our regexp, + * ^cookie_name=([^;]+)|;[ \t]+cookie_name=([^;]+) + * only allows for $1 or $2 to be available. ($0 is always + * filled with the entire matched expression, not just + * the part in parentheses.) So just check for either one + * and assign to cookieval if present. */ + if (regm[1].rm_so != -1) { + cookieval = ap_pregsub(r->pool, "$1", cookie_header, + NUM_SUBS, regm); + if (cookieval == NULL) + err = 1; + } + if (regm[2].rm_so != -1) { + cookieval = ap_pregsub(r->pool, "$2", cookie_header, + NUM_SUBS, regm); + if (cookieval == NULL) + err = 1; + } + if (err) { + ap_log_rerror(APLOG_MARK, APLOG_CRIT, 0, r, APLOGNO(01499) + "Failed to extract cookie value (out of mem?)"); + return HTTP_INTERNAL_SERVER_ERROR; + } + /* Set the cookie in a note, for logging */ + apr_table_setn(r->notes, "cookie", cookieval); + + return DECLINED; /* There's already a cookie, no new one */ + } + } + make_cookie(r); + return OK; /* We set our cookie */ +} + +static void *make_cookie_log_state(apr_pool_t *p, server_rec *s) +{ + cookie_log_state *cls = + (cookie_log_state *) apr_palloc(p, sizeof(cookie_log_state)); + + cls->expires = 0; + + return (void *) cls; +} + +static void *make_cookie_dir(apr_pool_t *p, char *d) +{ + cookie_dir_rec *dcfg; + + dcfg = (cookie_dir_rec *) apr_pcalloc(p, sizeof(cookie_dir_rec)); + dcfg->cookie_name = COOKIE_NAME; + dcfg->cookie_domain = NULL; + dcfg->style = CT_UNSET; + dcfg->enabled = 0; + + /* In case the user does not use the CookieName directive, + * we need to compile the regexp for the default cookie name. */ + set_and_comp_regexp(dcfg, p, COOKIE_NAME); + + return dcfg; +} + +static const char *set_cookie_enable(cmd_parms *cmd, void *mconfig, int arg) +{ + cookie_dir_rec *dcfg = mconfig; + + dcfg->enabled = arg; + return NULL; +} + +static const char *set_cookie_exp(cmd_parms *parms, void *dummy, + const char *arg) +{ + cookie_log_state *cls; + time_t factor, modifier = 0; + time_t num = 0; + char *word; + + cls = ap_get_module_config(parms->server->module_config, + &usertrack_module); + /* The simple case first - all numbers (we assume) */ + if (apr_isdigit(arg[0]) && apr_isdigit(arg[strlen(arg) - 1])) { + cls->expires = atol(arg); + return NULL; + } + + /* + * The harder case - stolen from mod_expires + * + * CookieExpires "[plus] {<num> <type>}*" + */ + + word = ap_getword_conf(parms->temp_pool, &arg); + if (!strncasecmp(word, "plus", 1)) { + word = ap_getword_conf(parms->temp_pool, &arg); + }; + + /* {<num> <type>}* */ + while (word[0]) { + /* <num> */ + if (apr_isdigit(word[0])) + num = atoi(word); + else + return "bad expires code, numeric value expected."; + + /* <type> */ + word = ap_getword_conf(parms->temp_pool, &arg); + if (!word[0]) + return "bad expires code, missing <type>"; + + if (!strncasecmp(word, "years", 1)) + factor = 60 * 60 * 24 * 365; + else if (!strncasecmp(word, "months", 2)) + factor = 60 * 60 * 24 * 30; + else if (!strncasecmp(word, "weeks", 1)) + factor = 60 * 60 * 24 * 7; + else if (!strncasecmp(word, "days", 1)) + factor = 60 * 60 * 24; + else if (!strncasecmp(word, "hours", 1)) + factor = 60 * 60; + else if (!strncasecmp(word, "minutes", 2)) + factor = 60; + else if (!strncasecmp(word, "seconds", 1)) + factor = 1; + else + return "bad expires code, unrecognized type"; + + modifier = modifier + factor * num; + + /* next <num> */ + word = ap_getword_conf(parms->temp_pool, &arg); + } + + cls->expires = modifier; + + return NULL; +} + +static const char *set_cookie_name(cmd_parms *cmd, void *mconfig, + const char *name) +{ + cookie_dir_rec *dcfg = (cookie_dir_rec *) mconfig; + + dcfg->cookie_name = name; + + set_and_comp_regexp(dcfg, cmd->pool, name); + + if (dcfg->regexp == NULL) { + return "Regular expression could not be compiled."; + } + if (dcfg->regexp->re_nsub + 1 != NUM_SUBS) { + return apr_pstrcat(cmd->pool, "Invalid cookie name \"", + name, "\"", NULL); + } + + return NULL; +} + +/* + * Set the value for the 'Domain=' attribute. + */ +static const char *set_cookie_domain(cmd_parms *cmd, void *mconfig, + const char *name) +{ + cookie_dir_rec *dcfg; + + dcfg = (cookie_dir_rec *) mconfig; + + /* + * Apply the restrictions on cookie domain attributes. + */ + if (!name[0]) { + return "CookieDomain values may not be null"; + } + if (name[0] != '.') { + return "CookieDomain values must begin with a dot"; + } + if (ap_strchr_c(&name[1], '.') == NULL) { + return "CookieDomain values must contain at least one embedded dot"; + } + + dcfg->cookie_domain = name; + return NULL; +} + +/* + * Make a note of the cookie style we should use. + */ +static const char *set_cookie_style(cmd_parms *cmd, void *mconfig, + const char *name) +{ + cookie_dir_rec *dcfg; + + dcfg = (cookie_dir_rec *) mconfig; + + if (strcasecmp(name, "Netscape") == 0) { + dcfg->style = CT_NETSCAPE; + } + else if ((strcasecmp(name, "Cookie") == 0) + || (strcasecmp(name, "RFC2109") == 0)) { + dcfg->style = CT_COOKIE; + } + else if ((strcasecmp(name, "Cookie2") == 0) + || (strcasecmp(name, "RFC2965") == 0)) { + dcfg->style = CT_COOKIE2; + } + else { + return apr_psprintf(cmd->pool, "Invalid %s keyword: '%s'", + cmd->cmd->name, name); + } + + return NULL; +} + +static const command_rec cookie_log_cmds[] = { + AP_INIT_TAKE1("CookieExpires", set_cookie_exp, NULL, OR_FILEINFO, + "an expiry date code"), + AP_INIT_TAKE1("CookieDomain", set_cookie_domain, NULL, OR_FILEINFO, + "domain to which this cookie applies"), + AP_INIT_TAKE1("CookieStyle", set_cookie_style, NULL, OR_FILEINFO, + "'Netscape', 'Cookie' (RFC2109), or 'Cookie2' (RFC2965)"), + AP_INIT_FLAG("CookieTracking", set_cookie_enable, NULL, OR_FILEINFO, + "whether or not to enable cookies"), + AP_INIT_TAKE1("CookieName", set_cookie_name, NULL, OR_FILEINFO, + "name of the tracking cookie"), + {NULL} +}; + +static void register_hooks(apr_pool_t *p) +{ + ap_hook_fixups(spot_cookie,NULL,NULL,APR_HOOK_REALLY_FIRST); +} + +AP_DECLARE_MODULE(usertrack) = { + STANDARD20_MODULE_STUFF, + make_cookie_dir, /* dir config creater */ + NULL, /* dir merger --- default is to override */ + make_cookie_log_state, /* server config */ + NULL, /* merge server configs */ + cookie_log_cmds, /* command apr_table_t */ + register_hooks /* register hooks */ +}; |