summaryrefslogtreecommitdiffstats
path: root/modules/filters/mod_proxy_html.c
diff options
context:
space:
mode:
Diffstat (limited to 'modules/filters/mod_proxy_html.c')
-rw-r--r--modules/filters/mod_proxy_html.c1353
1 files changed, 1353 insertions, 0 deletions
diff --git a/modules/filters/mod_proxy_html.c b/modules/filters/mod_proxy_html.c
new file mode 100644
index 0000000..7783da1
--- /dev/null
+++ b/modules/filters/mod_proxy_html.c
@@ -0,0 +1,1353 @@
+/* Copyright (c) 2003-11, WebThing Ltd
+ * Copyright (c) 2011-, The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* GO_FASTER
+ You can #define GO_FASTER to disable trace logging.
+*/
+
+#ifdef GO_FASTER
+#define VERBOSE(x)
+#define VERBOSEB(x)
+#else
+#define VERBOSE(x) if (verbose) x
+#define VERBOSEB(x) if (verbose) {x}
+#endif
+
+/* libxml2 includes unicode/[...].h files which uses C++ comments */
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic warning "-Wcomment"
+#elif defined(__GNUC__)
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic warning "-Wcomment"
+#endif
+#endif
+
+/* libxml2 */
+#include <libxml/HTMLparser.h>
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+#pragma GCC diagnostic pop
+#endif
+#endif
+
+#include "http_protocol.h"
+#include "http_config.h"
+#include "http_log.h"
+#include "apr_strings.h"
+#include "apr_hash.h"
+#include "apr_strmatch.h"
+#include "apr_lib.h"
+
+#include "apr_optional.h"
+#include "mod_xml2enc.h"
+#include "http_request.h"
+#include "ap_expr.h"
+
+/* globals set once at startup */
+static ap_rxplus_t *old_expr;
+static ap_regex_t *seek_meta;
+static const apr_strmatch_pattern* seek_content;
+static apr_status_t (*xml2enc_charset)(request_rec*, xmlCharEncoding*, const char**) = NULL;
+static apr_status_t (*xml2enc_filter)(request_rec*, const char*, unsigned int) = NULL;
+
+module AP_MODULE_DECLARE_DATA proxy_html_module;
+
+#define M_HTML 0x01
+#define M_EVENTS 0x02
+#define M_CDATA 0x04
+#define M_REGEX 0x08
+#define M_ATSTART 0x10
+#define M_ATEND 0x20
+#define M_LAST 0x40
+#define M_NOTLAST 0x80
+#define M_INTERPOLATE_TO 0x100
+#define M_INTERPOLATE_FROM 0x200
+
+typedef struct {
+ const char *val;
+} tattr;
+typedef struct {
+ unsigned int start;
+ unsigned int end;
+} meta;
+typedef struct urlmap {
+ struct urlmap *next;
+ unsigned int flags;
+ unsigned int regflags;
+ union {
+ const char *c;
+ ap_regex_t *r;
+ } from;
+ const char *to;
+ ap_expr_info_t *cond;
+} urlmap;
+typedef struct {
+ urlmap *map;
+ const char *doctype;
+ const char *etag;
+ unsigned int flags;
+ int bufsz;
+ apr_hash_t *links;
+ apr_array_header_t *events;
+ const char *charset_out;
+ int extfix;
+ int metafix;
+ int strip_comments;
+ int interp;
+ int enabled;
+} proxy_html_conf;
+typedef struct {
+ ap_filter_t *f;
+ proxy_html_conf *cfg;
+ htmlParserCtxtPtr parser;
+ apr_bucket_brigade *bb;
+ char *buf;
+ size_t offset;
+ size_t avail;
+ const char *encoding;
+ urlmap *map;
+ char rbuf[4];
+ apr_size_t rlen;
+ apr_size_t rmin;
+} saxctxt;
+
+
+#define NORM_LC 0x1
+#define NORM_MSSLASH 0x2
+#define NORM_RESET 0x4
+static htmlSAXHandler sax;
+
+typedef enum { ATTR_IGNORE, ATTR_URI, ATTR_EVENT } rewrite_t;
+
+static const char *const fpi_html =
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n";
+static const char *const fpi_html_legacy =
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n";
+static const char *const fpi_xhtml =
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n";
+static const char *const fpi_xhtml_legacy =
+ "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n";
+static const char *const fpi_html5 = "<!DOCTYPE html>\n";
+static const char *const html_etag = ">";
+static const char *const xhtml_etag = " />";
+/*#define DEFAULT_DOCTYPE fpi_html */
+static const char *const DEFAULT_DOCTYPE = "";
+#define DEFAULT_ETAG html_etag
+
+static void normalise(unsigned int flags, char *str)
+{
+ char *p;
+ if (flags & NORM_LC)
+ for (p = str; *p; ++p)
+ if (isupper(*p))
+ *p = tolower(*p);
+
+ if (flags & NORM_MSSLASH)
+ for (p = ap_strchr(str, '\\'); p; p = ap_strchr(p+1, '\\'))
+ *p = '/';
+
+}
+#define consume_buffer(ctx,inbuf,bytes,flag) \
+ htmlParseChunk(ctx->parser, inbuf, bytes, flag)
+
+#define AP_fwrite(ctx,inbuf,bytes,flush) \
+ ap_fwrite(ctx->f->next, ctx->bb, inbuf, bytes);
+
+/* This is always utf-8 on entry. We can convert charset within FLUSH */
+#define FLUSH AP_fwrite(ctx, (chars+begin), (i-begin), 0); begin = i+1
+static void pcharacters(void *ctxt, const xmlChar *uchars, int length)
+{
+ const char *chars = (const char*) uchars;
+ saxctxt *ctx = (saxctxt*) ctxt;
+ int i;
+ int begin;
+ for (begin=i=0; i<length; i++) {
+ switch (chars[i]) {
+ case '&' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, "&amp;"); break;
+ case '<' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, "&lt;"); break;
+ case '>' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, "&gt;"); break;
+ case '"' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, "&quot;"); break;
+ default : break;
+ }
+ }
+ FLUSH;
+}
+
+static void preserve(saxctxt *ctx, const size_t len)
+{
+ char *newbuf;
+ if (len <= (ctx->avail - ctx->offset))
+ return;
+ else while (len > (ctx->avail - ctx->offset))
+ ctx->avail += ctx->cfg->bufsz;
+
+ newbuf = realloc(ctx->buf, ctx->avail);
+ if (newbuf != ctx->buf) {
+ if (ctx->buf)
+ apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf,
+ (int(*)(void*))free);
+ apr_pool_cleanup_register(ctx->f->r->pool, newbuf,
+ (int(*)(void*))free, apr_pool_cleanup_null);
+ ctx->buf = newbuf;
+ }
+}
+
+static void pappend(saxctxt *ctx, const char *buf, const size_t len)
+{
+ preserve(ctx, len);
+ memcpy(ctx->buf+ctx->offset, buf, len);
+ ctx->offset += len;
+}
+
+static void dump_content(saxctxt *ctx)
+{
+ urlmap *m;
+ char *found;
+ size_t s_from, s_to;
+ size_t match;
+ char c = 0;
+ int nmatch;
+ ap_regmatch_t pmatch[10];
+ char *subs;
+ size_t len, offs;
+ urlmap *themap = ctx->map;
+#ifndef GO_FASTER
+ int verbose = APLOGrtrace1(ctx->f->r);
+#endif
+
+ pappend(ctx, &c, 1); /* append null byte */
+ /* parse the text for URLs */
+ for (m = themap; m; m = m->next) {
+ if (!(m->flags & M_CDATA))
+ continue;
+ if (m->flags & M_REGEX) {
+ nmatch = 10;
+ offs = 0;
+ while (!ap_regexec(m->from.r, ctx->buf+offs, nmatch, pmatch, 0)) {
+ match = pmatch[0].rm_so;
+ s_from = pmatch[0].rm_eo - match;
+ subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
+ nmatch, pmatch);
+ s_to = strlen(subs);
+ len = strlen(ctx->buf);
+ offs += match;
+ VERBOSEB(
+ const char *f = apr_pstrndup(ctx->f->r->pool,
+ ctx->buf + offs, s_from);
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, ctx->f->r,
+ "C/RX: match at %s, substituting %s", f, subs);
+ )
+ if (s_to > s_from) {
+ preserve(ctx, s_to - s_from);
+ memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
+ len + 1 - s_from - offs);
+ memcpy(ctx->buf+offs, subs, s_to);
+ }
+ else {
+ memcpy(ctx->buf + offs, subs, s_to);
+ memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from,
+ len + 1 - s_from - offs);
+ }
+ offs += s_to;
+ }
+ }
+ else {
+ s_from = strlen(m->from.c);
+ s_to = strlen(m->to);
+ for (found = strstr(ctx->buf, m->from.c); found;
+ found = strstr(ctx->buf+match+s_to, m->from.c)) {
+ match = found - ctx->buf;
+ if ((m->flags & M_ATSTART) && (match != 0))
+ break;
+ len = strlen(ctx->buf);
+ if ((m->flags & M_ATEND) && (match < (len - s_from)))
+ continue;
+ VERBOSE(ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, ctx->f->r,
+ "C: matched %s, substituting %s",
+ m->from.c, m->to));
+ if (s_to > s_from) {
+ preserve(ctx, s_to - s_from);
+ memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
+ len + 1 - s_from - match);
+ memcpy(ctx->buf+match, m->to, s_to);
+ }
+ else {
+ memcpy(ctx->buf+match, m->to, s_to);
+ memmove(ctx->buf+match+s_to, ctx->buf+match+s_from,
+ len + 1 - s_from - match);
+ }
+ }
+ }
+ }
+ AP_fwrite(ctx, ctx->buf, strlen(ctx->buf), 1);
+}
+static void pcdata(void *ctxt, const xmlChar *uchars, int length)
+{
+ const char *chars = (const char*) uchars;
+ saxctxt *ctx = (saxctxt*) ctxt;
+ if (ctx->cfg->extfix) {
+ pappend(ctx, chars, length);
+ }
+ else {
+ /* not sure if this should force-flush
+ * (i.e. can one cdata section come in multiple calls?)
+ */
+ AP_fwrite(ctx, chars, length, 0);
+ }
+}
+static void pcomment(void *ctxt, const xmlChar *uchars)
+{
+ const char *chars = (const char*) uchars;
+ saxctxt *ctx = (saxctxt*) ctxt;
+ if (ctx->cfg->strip_comments)
+ return;
+
+ if (ctx->cfg->extfix) {
+ pappend(ctx, "<!--", 4);
+ pappend(ctx, chars, strlen(chars));
+ pappend(ctx, "-->", 3);
+ }
+ else {
+ ap_fputs(ctx->f->next, ctx->bb, "<!--");
+ AP_fwrite(ctx, chars, strlen(chars), 1);
+ ap_fputs(ctx->f->next, ctx->bb, "-->");
+ dump_content(ctx);
+ }
+}
+static void pendElement(void *ctxt, const xmlChar *uname)
+{
+ saxctxt *ctx = (saxctxt*) ctxt;
+ const char *name = (const char*) uname;
+ const htmlElemDesc* desc = htmlTagLookup(uname);
+
+ if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) {
+ /* enforce html */
+ if (!desc || desc->depr)
+ return;
+
+ }
+ else if ((ctx->cfg->doctype == fpi_html_legacy)
+ || (ctx->cfg->doctype == fpi_xhtml_legacy)) {
+ /* enforce html legacy */
+ if (!desc)
+ return;
+ }
+ /* TODO - implement HTML "allowed here" using the stack */
+ /* nah. Keeping the stack is too much overhead */
+
+ if (ctx->offset > 0) {
+ dump_content(ctx);
+ ctx->offset = 0; /* having dumped it, we can re-use the memory */
+ }
+ if (!desc || !desc->empty) {
+ ap_fprintf(ctx->f->next, ctx->bb, "</%s>", name);
+ }
+}
+
+static void pstartElement(void *ctxt, const xmlChar *uname,
+ const xmlChar** uattrs)
+{
+ int required_attrs;
+ int num_match;
+ size_t offs, len;
+ char *subs;
+ rewrite_t is_uri;
+ const char** a;
+ urlmap *m;
+ size_t s_to, s_from, match;
+ char *found;
+ saxctxt *ctx = (saxctxt*) ctxt;
+ size_t nmatch;
+ ap_regmatch_t pmatch[10];
+#ifndef GO_FASTER
+ int verbose = APLOGrtrace1(ctx->f->r);
+#endif
+ apr_array_header_t *linkattrs;
+ int i;
+ const char *name = (const char*) uname;
+ const char** attrs = (const char**) uattrs;
+ const htmlElemDesc* desc = htmlTagLookup(uname);
+ urlmap *themap = ctx->map;
+#ifdef HAVE_STACK
+ const void** descp;
+#endif
+ int enforce = 0;
+ if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) {
+ /* enforce html */
+ if (!desc || desc->depr) {
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01416)
+ "Bogus HTML element %s dropped", name);
+ return;
+ }
+ enforce = 2;
+ }
+ else if ((ctx->cfg->doctype == fpi_html_legacy)
+ || (ctx->cfg->doctype == fpi_xhtml_legacy)) {
+ /* enforce html legacy */
+ if (!desc) {
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01417)
+ "Deprecated HTML element %s dropped", name);
+ return;
+ }
+ enforce = 1;
+ }
+#ifdef HAVE_STACK
+ descp = apr_array_push(ctx->stack);
+ *descp = desc;
+ /* TODO - implement HTML "allowed here" */
+#endif
+
+ ap_fputc(ctx->f->next, ctx->bb, '<');
+ ap_fputs(ctx->f->next, ctx->bb, name);
+
+ required_attrs = 0;
+ if ((enforce > 0) && (desc != NULL) && (desc->attrs_req != NULL))
+ for (a = desc->attrs_req; *a; a++)
+ ++required_attrs;
+
+ if (attrs) {
+ linkattrs = apr_hash_get(ctx->cfg->links, name, APR_HASH_KEY_STRING);
+ for (a = attrs; *a; a += 2) {
+ if (desc && enforce > 0) {
+ switch (htmlAttrAllowed(desc, (xmlChar*)*a, 2-enforce)) {
+ case HTML_INVALID:
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01418)
+ "Bogus HTML attribute %s of %s dropped",
+ *a, name);
+ continue;
+ case HTML_DEPRECATED:
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01419)
+ "Deprecated HTML attribute %s of %s dropped",
+ *a, name);
+ continue;
+ case HTML_REQUIRED:
+ required_attrs--; /* cross off the number still needed */
+ /* fallthrough - required implies valid */
+ default:
+ break;
+ }
+ }
+ ctx->offset = 0;
+ if (a[1]) {
+ pappend(ctx, a[1], strlen(a[1])+1);
+ is_uri = ATTR_IGNORE;
+ if (linkattrs) {
+ tattr *attrs = (tattr*) linkattrs->elts;
+ for (i=0; i < linkattrs->nelts; ++i) {
+ if (!strcmp(*a, attrs[i].val)) {
+ is_uri = ATTR_URI;
+ break;
+ }
+ }
+ }
+ if ((is_uri == ATTR_IGNORE) && ctx->cfg->extfix
+ && (ctx->cfg->events != NULL)) {
+ for (i=0; i < ctx->cfg->events->nelts; ++i) {
+ tattr *attrs = (tattr*) ctx->cfg->events->elts;
+ if (!strcmp(*a, attrs[i].val)) {
+ is_uri = ATTR_EVENT;
+ break;
+ }
+ }
+ }
+ switch (is_uri) {
+ case ATTR_URI:
+ num_match = 0;
+ for (m = themap; m; m = m->next) {
+ if (!(m->flags & M_HTML))
+ continue;
+ if (m->flags & M_REGEX) {
+ nmatch = 10;
+ if (!ap_regexec(m->from.r, ctx->buf, nmatch,
+ pmatch, 0)) {
+ ++num_match;
+ offs = match = pmatch[0].rm_so;
+ s_from = pmatch[0].rm_eo - match;
+ subs = ap_pregsub(ctx->f->r->pool, m->to,
+ ctx->buf, nmatch, pmatch);
+ VERBOSE({
+ const char *f;
+ f = apr_pstrndup(ctx->f->r->pool,
+ ctx->buf + offs, s_from);
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0,
+ ctx->f->r,
+ "H/RX: match at %s, substituting %s",
+ f, subs);
+ })
+ s_to = strlen(subs);
+ len = strlen(ctx->buf);
+ if (s_to > s_from) {
+ preserve(ctx, s_to - s_from);
+ memmove(ctx->buf+offs+s_to,
+ ctx->buf+offs+s_from,
+ len + 1 - s_from - offs);
+ memcpy(ctx->buf+offs, subs, s_to);
+ }
+ else {
+ memcpy(ctx->buf + offs, subs, s_to);
+ memmove(ctx->buf+offs+s_to,
+ ctx->buf+offs+s_from,
+ len + 1 - s_from - offs);
+ }
+ }
+ } else {
+ s_from = strlen(m->from.c);
+ if (!strncasecmp(ctx->buf, m->from.c, s_from)) {
+ ++num_match;
+ s_to = strlen(m->to);
+ len = strlen(ctx->buf);
+ VERBOSE(ap_log_rerror(APLOG_MARK, APLOG_TRACE3,
+ 0, ctx->f->r,
+ "H: matched %s, substituting %s",
+ m->from.c, m->to));
+ if (s_to > s_from) {
+ preserve(ctx, s_to - s_from);
+ memmove(ctx->buf+s_to, ctx->buf+s_from,
+ len + 1 - s_from);
+ memcpy(ctx->buf, m->to, s_to);
+ }
+ else { /* it fits in the existing space */
+ memcpy(ctx->buf, m->to, s_to);
+ memmove(ctx->buf+s_to, ctx->buf+s_from,
+ len + 1 - s_from);
+ }
+ break;
+ }
+ }
+ /* URIs only want one match unless overridden in the config */
+ if ((num_match > 0) && !(m->flags & M_NOTLAST))
+ break;
+ }
+ break;
+ case ATTR_EVENT:
+ for (m = themap; m; m = m->next) {
+ num_match = 0; /* reset here since we're working per-rule */
+ if (!(m->flags & M_EVENTS))
+ continue;
+ if (m->flags & M_REGEX) {
+ nmatch = 10;
+ offs = 0;
+ while (!ap_regexec(m->from.r, ctx->buf+offs,
+ nmatch, pmatch, 0)) {
+ match = pmatch[0].rm_so;
+ s_from = pmatch[0].rm_eo - match;
+ subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs,
+ nmatch, pmatch);
+ VERBOSE({
+ const char *f;
+ f = apr_pstrndup(ctx->f->r->pool,
+ ctx->buf + offs, s_from);
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0,
+ ctx->f->r,
+ "E/RX: match at %s, substituting %s",
+ f, subs);
+ })
+ s_to = strlen(subs);
+ offs += match;
+ len = strlen(ctx->buf);
+ if (s_to > s_from) {
+ preserve(ctx, s_to - s_from);
+ memmove(ctx->buf+offs+s_to,
+ ctx->buf+offs+s_from,
+ len + 1 - s_from - offs);
+ memcpy(ctx->buf+offs, subs, s_to);
+ }
+ else {
+ memcpy(ctx->buf + offs, subs, s_to);
+ memmove(ctx->buf+offs+s_to,
+ ctx->buf+offs+s_from,
+ len + 1 - s_from - offs);
+ }
+ offs += s_to;
+ ++num_match;
+ }
+ }
+ else {
+ found = strstr(ctx->buf, m->from.c);
+ if ((m->flags & M_ATSTART) && (found != ctx->buf))
+ continue;
+ while (found) {
+ s_from = strlen(m->from.c);
+ s_to = strlen(m->to);
+ match = found - ctx->buf;
+ if ((s_from < strlen(found))
+ && (m->flags & M_ATEND)) {
+ found = strstr(ctx->buf+match+s_from,
+ m->from.c);
+ continue;
+ }
+ else {
+ found = strstr(ctx->buf+match+s_to,
+ m->from.c);
+ }
+ VERBOSE(ap_log_rerror(APLOG_MARK, APLOG_TRACE3,
+ 0, ctx->f->r,
+ "E: matched %s, substituting %s",
+ m->from.c, m->to));
+ len = strlen(ctx->buf);
+ if (s_to > s_from) {
+ preserve(ctx, s_to - s_from);
+ memmove(ctx->buf+match+s_to,
+ ctx->buf+match+s_from,
+ len + 1 - s_from - match);
+ memcpy(ctx->buf+match, m->to, s_to);
+ }
+ else {
+ memcpy(ctx->buf+match, m->to, s_to);
+ memmove(ctx->buf+match+s_to,
+ ctx->buf+match+s_from,
+ len + 1 - s_from - match);
+ }
+ ++num_match;
+ }
+ }
+ if (num_match && (m->flags & M_LAST))
+ break;
+ }
+ break;
+ case ATTR_IGNORE:
+ break;
+ }
+ }
+ if (!a[1])
+ ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], NULL);
+ else {
+
+ if (ctx->cfg->flags != 0)
+ normalise(ctx->cfg->flags, ctx->buf);
+
+ /* write the attribute, using pcharacters to html-escape
+ anything that needs it in the value.
+ */
+ ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"", NULL);
+ pcharacters(ctx, (const xmlChar*)ctx->buf, strlen(ctx->buf));
+ ap_fputc(ctx->f->next, ctx->bb, '"');
+ }
+ }
+ }
+ ctx->offset = 0;
+ if (desc && desc->empty)
+ ap_fputs(ctx->f->next, ctx->bb, ctx->cfg->etag);
+ else
+ ap_fputc(ctx->f->next, ctx->bb, '>');
+
+ if ((enforce > 0) && (required_attrs > 0)) {
+ /* if there are more required attributes than we found then complain */
+ ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01420)
+ "HTML element %s is missing %d required attributes",
+ name, required_attrs);
+ }
+}
+
+static meta *metafix(request_rec *r, const char *buf, apr_size_t len)
+{
+ meta *ret = NULL;
+ size_t offs = 0;
+ const char *p;
+ const char *q;
+ char *header;
+ char *content;
+ ap_regmatch_t pmatch[2];
+ char delim;
+
+ while (offs < len &&
+ !ap_regexec_len(seek_meta, buf + offs, len - offs, 2, pmatch, 0)) {
+ header = NULL;
+ content = NULL;
+ p = buf+offs+pmatch[1].rm_eo;
+ while (!apr_isalpha(*++p));
+ for (q = p; apr_isalnum(*q) || (*q == '-'); ++q);
+ header = apr_pstrmemdup(r->pool, p, q-p);
+ if (ap_cstr_casecmpn(header, "Content-", 8)) {
+ /* find content=... string */
+ p = apr_strmatch(seek_content, buf+offs+pmatch[0].rm_so,
+ pmatch[0].rm_eo - pmatch[0].rm_so);
+ /* if it doesn't contain "content", ignore, don't crash! */
+ if (p != NULL) {
+ while (*p) {
+ p += 7;
+ while (apr_isspace(*p))
+ ++p;
+ /* XXX Should we search for another content= pattern? */
+ if (*p != '=')
+ break;
+ while (*p && apr_isspace(*++p));
+ if ((*p == '\'') || (*p == '"')) {
+ delim = *p++;
+ for (q = p; *q && *q != delim; ++q);
+ /* No terminating delimiter found? Skip the bogus directive */
+ if (*q != delim)
+ break;
+ } else {
+ for (q = p; *q && !apr_isspace(*q) && (*q != '>'); ++q);
+ }
+ content = apr_pstrmemdup(r->pool, p, q-p);
+ break;
+ }
+ }
+ }
+ else if (!ap_cstr_casecmpn(header, "Content-Type", 12)) {
+ ret = apr_palloc(r->pool, sizeof(meta));
+ ret->start = offs+pmatch[0].rm_so;
+ ret->end = offs+pmatch[0].rm_eo;
+ }
+ if (header && content) {
+#ifndef GO_FASTER
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE2, 0, r,
+ "Adding header [%s: %s] from HTML META",
+ header, content);
+#endif
+ apr_table_setn(r->headers_out, header, content);
+ }
+ offs += pmatch[0].rm_eo;
+ }
+ return ret;
+}
+
+static const char *interpolate_vars(request_rec *r, const char *str)
+{
+ const char *start;
+ const char *end;
+ const char *delim;
+ const char *before;
+ const char *after;
+ const char *replacement;
+ const char *var;
+ for (;;) {
+ if ((start = ap_strstr_c(str, "${")) == NULL)
+ break;
+
+ if ((end = ap_strchr_c(start+2, '}')) == NULL)
+ break;
+
+ delim = ap_strchr_c(start+2, '|');
+
+ /* Restrict delim to ${...} */
+ if (delim && delim >= end) {
+ delim = NULL;
+ }
+
+ before = apr_pstrmemdup(r->pool, str, start-str);
+ after = end+1;
+ if (delim) {
+ var = apr_pstrmemdup(r->pool, start+2, delim-start-2);
+ }
+ else {
+ var = apr_pstrmemdup(r->pool, start+2, end-start-2);
+ }
+ replacement = apr_table_get(r->subprocess_env, var);
+ if (!replacement) {
+ if (delim)
+ replacement = apr_pstrmemdup(r->pool, delim+1, end-delim-1);
+ else
+ replacement = "";
+ }
+ str = apr_pstrcat(r->pool, before, replacement, after, NULL);
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r,
+ "Interpolating %s => %s", var, replacement);
+ }
+ return str;
+}
+static void fixup_rules(saxctxt *ctx)
+{
+ urlmap *newp;
+ urlmap *p;
+ urlmap *prev = NULL;
+ request_rec *r = ctx->f->r;
+
+ for (p = ctx->cfg->map; p; p = p->next) {
+ if (p->cond != NULL) {
+ const char *err;
+ int ok = ap_expr_exec(r, p->cond, &err);
+ if (err) {
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01421)
+ "Error evaluating expr: %s", err);
+ }
+ if (ok == 0) {
+ continue; /* condition is unsatisfied */
+ }
+ }
+
+ newp = apr_pmemdup(r->pool, p, sizeof(urlmap));
+
+ if (newp->flags & M_INTERPOLATE_FROM) {
+ newp->from.c = interpolate_vars(r, newp->from.c);
+ if (!newp->from.c || !*newp->from.c)
+ continue; /* don't use empty from-pattern */
+ if (newp->flags & M_REGEX) {
+ newp->from.r = ap_pregcomp(r->pool, newp->from.c,
+ newp->regflags);
+ }
+ }
+ if (newp->flags & M_INTERPOLATE_TO) {
+ newp->to = interpolate_vars(r, newp->to);
+ }
+ /* evaluate p->cond; continue if unsatisfied */
+ /* create new urlmap with memcpy and append to map */
+ /* interpolate from if flagged to do so */
+ /* interpolate to if flagged to do so */
+
+ if (prev != NULL)
+ prev->next = newp;
+ else
+ ctx->map = newp;
+ prev = newp;
+ }
+
+ if (prev)
+ prev->next = NULL;
+}
+
+static saxctxt *check_filter_init (ap_filter_t *f)
+{
+ saxctxt *fctx;
+ if (!f->ctx) {
+ proxy_html_conf *cfg;
+ const char *force;
+ const char *errmsg = NULL;
+ cfg = ap_get_module_config(f->r->per_dir_config, &proxy_html_module);
+ force = apr_table_get(f->r->subprocess_env, "PROXY_HTML_FORCE");
+
+ if (!force) {
+ if (!f->r->proxyreq) {
+ errmsg = "Non-proxy request; not inserting proxy-html filter";
+ }
+ else if (!f->r->content_type) {
+ errmsg = "No content-type; bailing out of proxy-html filter";
+ }
+ else if (ap_cstr_casecmpn(f->r->content_type, "text/html", 9) &&
+ ap_cstr_casecmpn(f->r->content_type,
+ "application/xhtml+xml", 21)) {
+ errmsg = "Non-HTML content; not inserting proxy-html filter";
+ }
+ }
+ if (!cfg->links) {
+ errmsg = "No links configured: nothing for proxy-html filter to do";
+ }
+
+ if (errmsg) {
+#ifndef GO_FASTER
+ ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, f->r, "%s", errmsg);
+#endif
+ ap_remove_output_filter(f);
+ return NULL;
+ }
+
+ fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt));
+ fctx->f = f;
+ fctx->bb = apr_brigade_create(f->r->pool,
+ f->r->connection->bucket_alloc);
+ fctx->cfg = cfg;
+ apr_table_unset(f->r->headers_out, "Content-Length");
+
+ if (cfg->interp)
+ fixup_rules(fctx);
+ else
+ fctx->map = cfg->map;
+ /* defer dealing with charset_out until after sniffing charset_in
+ * so we can support setting one to t'other.
+ */
+ }
+ return f->ctx;
+}
+
+static void prepend_rbuf(saxctxt *ctxt, apr_bucket_brigade *bb)
+{
+ if (ctxt->rlen) {
+ apr_bucket *b = apr_bucket_transient_create(ctxt->rbuf,
+ ctxt->rlen,
+ bb->bucket_alloc);
+ APR_BRIGADE_INSERT_HEAD(bb, b);
+ ctxt->rlen = 0;
+ }
+}
+
+static apr_status_t proxy_html_filter(ap_filter_t *f, apr_bucket_brigade *bb)
+{
+ apr_bucket* b;
+ meta *m = NULL;
+ xmlCharEncoding enc;
+ const char *buf = 0;
+ apr_size_t bytes = 0;
+#ifndef USE_OLD_LIBXML2
+ int xmlopts = XML_PARSE_RECOVER | XML_PARSE_NONET |
+ XML_PARSE_NOBLANKS | XML_PARSE_NOERROR | XML_PARSE_NOWARNING;
+#endif
+
+ saxctxt *ctxt = check_filter_init(f);
+ if (!ctxt)
+ return ap_pass_brigade(f->next, bb);
+ for (b = APR_BRIGADE_FIRST(bb);
+ b != APR_BRIGADE_SENTINEL(bb);
+ b = APR_BUCKET_NEXT(b)) {
+ if (APR_BUCKET_IS_METADATA(b)) {
+ if (APR_BUCKET_IS_EOS(b)) {
+ if (ctxt->parser != NULL) {
+ consume_buffer(ctxt, "", 0, 1);
+ }
+ else {
+ prepend_rbuf(ctxt, ctxt->bb);
+ }
+ APR_BRIGADE_INSERT_TAIL(ctxt->bb,
+ apr_bucket_eos_create(ctxt->bb->bucket_alloc));
+ ap_pass_brigade(ctxt->f->next, ctxt->bb);
+ apr_brigade_cleanup(ctxt->bb);
+ }
+ else if (APR_BUCKET_IS_FLUSH(b)) {
+ /* pass on flush, except at start where it would cause
+ * headers to be sent before doc sniffing
+ */
+ if (ctxt->parser != NULL) {
+ ap_fflush(ctxt->f->next, ctxt->bb);
+ }
+ }
+ }
+ else if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ)
+ == APR_SUCCESS) {
+ if (ctxt->parser == NULL) {
+ const char *cenc;
+
+ /* For documents smaller than four bytes, there is no reason to do
+ * HTML rewriting. The URL schema (i.e. 'http') needs four bytes alone.
+ * And the HTML parser needs at least four bytes to initialise correctly.
+ */
+ ctxt->rmin += bytes;
+ if (ctxt->rmin < sizeof(ctxt->rbuf)) {
+ memcpy(ctxt->rbuf + ctxt->rlen, buf, bytes);
+ ctxt->rlen += bytes;
+ continue;
+ }
+ if (ctxt->rlen && ctxt->rlen < sizeof(ctxt->rbuf)) {
+ apr_size_t rem = sizeof(ctxt->rbuf) - ctxt->rlen;
+ memcpy(ctxt->rbuf + ctxt->rlen, buf, rem);
+ ctxt->rlen += rem;
+ buf += rem;
+ bytes -= rem;
+ }
+
+ if (!xml2enc_charset ||
+ (xml2enc_charset(f->r, &enc, &cenc) != APR_SUCCESS)) {
+ if (!xml2enc_charset)
+ ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, APLOGNO(01422)
+ "No i18n support found. Install mod_xml2enc if required");
+ enc = XML_CHAR_ENCODING_NONE;
+ ap_set_content_type(f->r, "text/html;charset=utf-8");
+ }
+ else {
+ /* if we wanted a non-default charset_out, insert the
+ * xml2enc filter now that we've sniffed it
+ */
+ if (ctxt->cfg->charset_out && xml2enc_filter) {
+ if (*ctxt->cfg->charset_out != '*')
+ cenc = ctxt->cfg->charset_out;
+ xml2enc_filter(f->r, cenc, ENCIO_OUTPUT);
+ ap_set_content_type(f->r,
+ apr_pstrcat(f->r->pool,
+ "text/html;charset=",
+ cenc, NULL));
+ }
+ else /* Normal case, everything worked, utf-8 output */
+ ap_set_content_type(f->r, "text/html;charset=utf-8");
+ }
+
+ ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype);
+
+ if (ctxt->rlen) {
+ ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt,
+ ctxt->rbuf,
+ ctxt->rlen,
+ NULL, enc);
+ }
+ else {
+ ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, buf, 4,
+ NULL, enc);
+ buf += 4;
+ bytes -= 4;
+ }
+ if (ctxt->parser == NULL) {
+ prepend_rbuf(ctxt, bb);
+ ap_remove_output_filter(f);
+ return ap_pass_brigade(f->next, bb);
+ }
+ ctxt->rlen = 0;
+ apr_pool_cleanup_register(f->r->pool, ctxt->parser,
+ (int(*)(void*))htmlFreeParserCtxt,
+ apr_pool_cleanup_null);
+#ifndef USE_OLD_LIBXML2
+ if (xmlopts = xmlCtxtUseOptions(ctxt->parser, xmlopts), xmlopts)
+ ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, APLOGNO(01423)
+ "Unsupported parser opts %x", xmlopts);
+#endif
+ if (ctxt->cfg->metafix)
+ m = metafix(f->r, buf, bytes);
+ if (m) {
+ consume_buffer(ctxt, buf, m->start, 0);
+ consume_buffer(ctxt, buf+m->end, bytes-m->end, 0);
+ }
+ else {
+ consume_buffer(ctxt, buf, bytes, 0);
+ }
+ }
+ else {
+ consume_buffer(ctxt, buf, bytes, 0);
+ }
+ }
+ else {
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, APLOGNO(01424)
+ "Error in bucket read");
+ }
+ }
+ /*ap_fflush(ctxt->f->next, ctxt->bb); // uncomment for debug */
+ apr_brigade_cleanup(bb);
+ return APR_SUCCESS;
+}
+
+static void *proxy_html_config(apr_pool_t *pool, char *x)
+{
+ proxy_html_conf *ret = apr_pcalloc(pool, sizeof(proxy_html_conf));
+ ret->doctype = DEFAULT_DOCTYPE;
+ ret->etag = DEFAULT_ETAG;
+ ret->bufsz = 8192;
+ /* ret->interp = 1; */
+ /* don't initialise links and events until they get set/used */
+ return ret;
+}
+
+static void *proxy_html_merge(apr_pool_t *pool, void *BASE, void *ADD)
+{
+ proxy_html_conf *base = (proxy_html_conf *) BASE;
+ proxy_html_conf *add = (proxy_html_conf *) ADD;
+ proxy_html_conf *conf = apr_palloc(pool, sizeof(proxy_html_conf));
+
+ /* don't merge declarations - just use the most specific */
+ conf->links = (add->links == NULL) ? base->links : add->links;
+ conf->events = (add->events == NULL) ? base->events : add->events;
+
+ conf->charset_out = (add->charset_out == NULL)
+ ? base->charset_out : add->charset_out;
+
+ if (add->map && base->map) {
+ urlmap *a;
+ conf->map = NULL;
+ for (a = base->map; a; a = a->next) {
+ urlmap *save = conf->map;
+ conf->map = apr_pmemdup(pool, a, sizeof(urlmap));
+ conf->map->next = save;
+ }
+ for (a = add->map; a; a = a->next) {
+ urlmap *save = conf->map;
+ conf->map = apr_pmemdup(pool, a, sizeof(urlmap));
+ conf->map->next = save;
+ }
+ }
+ else
+ conf->map = add->map ? add->map : base->map;
+
+ conf->doctype = (add->doctype == DEFAULT_DOCTYPE)
+ ? base->doctype : add->doctype;
+ conf->etag = (add->etag == DEFAULT_ETAG) ? base->etag : add->etag;
+ conf->bufsz = add->bufsz;
+ if (add->flags & NORM_RESET) {
+ conf->flags = add->flags ^ NORM_RESET;
+ conf->metafix = add->metafix;
+ conf->extfix = add->extfix;
+ conf->interp = add->interp;
+ conf->strip_comments = add->strip_comments;
+ conf->enabled = add->enabled;
+ }
+ else {
+ conf->flags = base->flags | add->flags;
+ conf->metafix = base->metafix | add->metafix;
+ conf->extfix = base->extfix | add->extfix;
+ conf->interp = base->interp | add->interp;
+ conf->strip_comments = base->strip_comments | add->strip_comments;
+ conf->enabled = add->enabled | base->enabled;
+ }
+ return conf;
+}
+#define REGFLAG(n,s,c) ((s&&(ap_strchr_c((s),(c))!=NULL)) ? (n) : 0)
+#define XREGFLAG(n,s,c) ((!s||(ap_strchr_c((s),(c))==NULL)) ? (n) : 0)
+static const char *comp_urlmap(cmd_parms *cmd, urlmap *newmap,
+ const char *from, const char *to,
+ const char *flags, const char *cond)
+{
+ const char *err = NULL;
+ newmap->flags
+ = XREGFLAG(M_HTML,flags,'h')
+ | XREGFLAG(M_EVENTS,flags,'e')
+ | XREGFLAG(M_CDATA,flags,'c')
+ | REGFLAG(M_ATSTART,flags,'^')
+ | REGFLAG(M_ATEND,flags,'$')
+ | REGFLAG(M_REGEX,flags,'R')
+ | REGFLAG(M_LAST,flags,'L')
+ | REGFLAG(M_NOTLAST,flags,'l')
+ | REGFLAG(M_INTERPOLATE_TO,flags,'V')
+ | REGFLAG(M_INTERPOLATE_FROM,flags,'v');
+
+ if ((newmap->flags & M_INTERPOLATE_FROM) || !(newmap->flags & M_REGEX)) {
+ newmap->from.c = from;
+ newmap->to = to;
+ }
+ else {
+ newmap->regflags
+ = REGFLAG(AP_REG_EXTENDED,flags,'x')
+ | REGFLAG(AP_REG_ICASE,flags,'i')
+ | REGFLAG(AP_REG_NOSUB,flags,'n')
+ | REGFLAG(AP_REG_NEWLINE,flags,'s');
+ newmap->from.r = ap_pregcomp(cmd->pool, from, newmap->regflags);
+ newmap->to = to;
+ }
+ if (cond != NULL) {
+ /* back-compatibility: support old-style ENV expressions
+ * by converting to ap_expr syntax.
+ *
+ * 1. var --> env(var)
+ * 2. var=val --> env(var)=val
+ * 3. !var --> !env(var)
+ * 4. !var=val --> env(var)!=val
+ */
+ char *newcond = NULL;
+ if (ap_rxplus_exec(cmd->temp_pool, old_expr, cond, &newcond)) {
+ /* we got a substitution. Check for the case (3) above
+ * that the regexp gets wrong: a negation without a comparison.
+ */
+ if ((cond[0] == '!') && !ap_strchr_c(cond, '=')) {
+ memmove(newcond+1, newcond, strlen(newcond)-1);
+ newcond[0] = '!';
+ }
+ cond = newcond;
+ }
+ newmap->cond = ap_expr_parse_cmd(cmd, cond, 0, &err, NULL);
+ }
+ else {
+ newmap->cond = NULL;
+ }
+ return err;
+}
+
+static const char *set_urlmap(cmd_parms *cmd, void *CFG, const char *args)
+{
+ proxy_html_conf *cfg = (proxy_html_conf *)CFG;
+ urlmap *map;
+ apr_pool_t *pool = cmd->pool;
+ urlmap *newmap;
+ const char *usage =
+ "Usage: ProxyHTMLURLMap from-pattern to-pattern [flags] [cond]";
+ const char *from;
+ const char *to;
+ const char *flags;
+ const char *cond = NULL;
+
+ if (from = ap_getword_conf(cmd->pool, &args), !from)
+ return usage;
+ if (to = ap_getword_conf(cmd->pool, &args), !to)
+ return usage;
+ flags = ap_getword_conf(cmd->pool, &args);
+ if (flags && *flags)
+ cond = ap_getword_conf(cmd->pool, &args);
+ if (cond && !*cond)
+ cond = NULL;
+
+ /* the args look OK, so let's use them */
+ newmap = apr_palloc(pool, sizeof(urlmap));
+ newmap->next = NULL;
+ if (cfg->map) {
+ for (map = cfg->map; map->next; map = map->next);
+ map->next = newmap;
+ }
+ else
+ cfg->map = newmap;
+
+ return comp_urlmap(cmd, newmap, from, to, flags, cond);
+}
+
+static const char *set_doctype(cmd_parms *cmd, void *CFG,
+ const char *t, const char *l)
+{
+ proxy_html_conf *cfg = (proxy_html_conf *)CFG;
+ if (!strcasecmp(t, "xhtml")) {
+ cfg->etag = xhtml_etag;
+ if (l && !strcasecmp(l, "legacy"))
+ cfg->doctype = fpi_xhtml_legacy;
+ else
+ cfg->doctype = fpi_xhtml;
+ }
+ else if (!strcasecmp(t, "html")) {
+ cfg->etag = html_etag;
+ if (l && !strcasecmp(l, "legacy"))
+ cfg->doctype = fpi_html_legacy;
+ else
+ cfg->doctype = fpi_html;
+ }
+ else if (!strcasecmp(t, "html5")) {
+ cfg->etag = html_etag;
+ cfg->doctype = fpi_html5;
+ }
+ else {
+ cfg->doctype = t;
+ if (l && ((l[0] == 'x') || (l[0] == 'X')))
+ cfg->etag = xhtml_etag;
+ else
+ cfg->etag = html_etag;
+ }
+ return NULL;
+}
+
+static const char *set_flags(cmd_parms *cmd, void *CFG, const char *arg)
+{
+ proxy_html_conf *cfg = CFG;
+ if (arg && *arg) {
+ if (!strcasecmp(arg, "lowercase"))
+ cfg->flags |= NORM_LC;
+ else if (!strcasecmp(arg, "dospath"))
+ cfg->flags |= NORM_MSSLASH;
+ else if (!strcasecmp(arg, "reset"))
+ cfg->flags |= NORM_RESET;
+ }
+ return NULL;
+}
+
+static const char *set_events(cmd_parms *cmd, void *CFG, const char *arg)
+{
+ tattr *attr;
+ proxy_html_conf *cfg = CFG;
+ if (cfg->events == NULL)
+ cfg->events = apr_array_make(cmd->pool, 20, sizeof(tattr));
+ attr = apr_array_push(cfg->events);
+ attr->val = arg;
+ return NULL;
+}
+
+static const char *set_links(cmd_parms *cmd, void *CFG,
+ const char *elt, const char *att)
+{
+ apr_array_header_t *attrs;
+ tattr *attr;
+ proxy_html_conf *cfg = CFG;
+
+ if (cfg->links == NULL)
+ cfg->links = apr_hash_make(cmd->pool);
+
+ attrs = apr_hash_get(cfg->links, elt, APR_HASH_KEY_STRING);
+ if (!attrs) {
+ attrs = apr_array_make(cmd->pool, 2, sizeof(tattr*));
+ apr_hash_set(cfg->links, elt, APR_HASH_KEY_STRING, attrs);
+ }
+ attr = apr_array_push(attrs);
+ attr->val = att;
+ return NULL;
+}
+static const command_rec proxy_html_cmds[] = {
+ AP_INIT_ITERATE("ProxyHTMLEvents", set_events, NULL,
+ RSRC_CONF|ACCESS_CONF,
+ "Strings to be treated as scripting events"),
+ AP_INIT_ITERATE2("ProxyHTMLLinks", set_links, NULL,
+ RSRC_CONF|ACCESS_CONF, "Declare HTML Attributes"),
+ AP_INIT_RAW_ARGS("ProxyHTMLURLMap", set_urlmap, NULL,
+ RSRC_CONF|ACCESS_CONF, "Map URL From To"),
+ AP_INIT_TAKE12("ProxyHTMLDoctype", set_doctype, NULL,
+ RSRC_CONF|ACCESS_CONF, "(HTML|XHTML) [Legacy]"),
+ AP_INIT_ITERATE("ProxyHTMLFixups", set_flags, NULL,
+ RSRC_CONF|ACCESS_CONF, "Options are lowercase, dospath"),
+ AP_INIT_FLAG("ProxyHTMLMeta", ap_set_flag_slot,
+ (void*)APR_OFFSETOF(proxy_html_conf, metafix),
+ RSRC_CONF|ACCESS_CONF, "Fix META http-equiv elements"),
+ AP_INIT_FLAG("ProxyHTMLInterp", ap_set_flag_slot,
+ (void*)APR_OFFSETOF(proxy_html_conf, interp),
+ RSRC_CONF|ACCESS_CONF,
+ "Support interpolation and conditions in URLMaps"),
+ AP_INIT_FLAG("ProxyHTMLExtended", ap_set_flag_slot,
+ (void*)APR_OFFSETOF(proxy_html_conf, extfix),
+ RSRC_CONF|ACCESS_CONF, "Map URLs in Javascript and CSS"),
+ AP_INIT_FLAG("ProxyHTMLStripComments", ap_set_flag_slot,
+ (void*)APR_OFFSETOF(proxy_html_conf, strip_comments),
+ RSRC_CONF|ACCESS_CONF, "Strip out comments"),
+ AP_INIT_TAKE1("ProxyHTMLBufSize", ap_set_int_slot,
+ (void*)APR_OFFSETOF(proxy_html_conf, bufsz),
+ RSRC_CONF|ACCESS_CONF, "Buffer size"),
+ AP_INIT_TAKE1("ProxyHTMLCharsetOut", ap_set_string_slot,
+ (void*)APR_OFFSETOF(proxy_html_conf, charset_out),
+ RSRC_CONF|ACCESS_CONF, "Usage: ProxyHTMLCharsetOut charset"),
+ AP_INIT_FLAG("ProxyHTMLEnable", ap_set_flag_slot,
+ (void*)APR_OFFSETOF(proxy_html_conf, enabled),
+ RSRC_CONF|ACCESS_CONF,
+ "Enable proxy-html and xml2enc filters"),
+ { NULL }
+};
+static int mod_proxy_html(apr_pool_t *p, apr_pool_t *p1, apr_pool_t *p2)
+{
+ seek_meta = ap_pregcomp(p, "<meta[^>]*(http-equiv)[^>]*>",
+ AP_REG_EXTENDED|AP_REG_ICASE);
+ seek_content = apr_strmatch_precompile(p, "content", 0);
+ memset(&sax, 0, sizeof(htmlSAXHandler));
+ sax.startElement = pstartElement;
+ sax.endElement = pendElement;
+ sax.characters = pcharacters;
+ sax.comment = pcomment;
+ sax.cdataBlock = pcdata;
+ xml2enc_charset = APR_RETRIEVE_OPTIONAL_FN(xml2enc_charset);
+ xml2enc_filter = APR_RETRIEVE_OPTIONAL_FN(xml2enc_filter);
+ if (!xml2enc_charset) {
+ ap_log_perror(APLOG_MARK, APLOG_NOTICE, 0, p2, APLOGNO(01425)
+ "I18n support in mod_proxy_html requires mod_xml2enc. "
+ "Without it, non-ASCII characters in proxied pages are "
+ "likely to display incorrectly.");
+ }
+
+ /* old_expr only needs to last the life of the config phase */
+ old_expr = ap_rxplus_compile(p1, "s/^(!)?(\\w+)((=)(.+))?$/reqenv('$2')$1$4'$5'/");
+ return OK;
+}
+static void proxy_html_insert(request_rec *r)
+{
+ proxy_html_conf *cfg;
+ cfg = ap_get_module_config(r->per_dir_config, &proxy_html_module);
+ if (cfg->enabled) {
+ if (xml2enc_filter)
+ xml2enc_filter(r, NULL, ENCIO_INPUT_CHECKS);
+ ap_add_output_filter("proxy-html", NULL, r, r->connection);
+ }
+}
+static void proxy_html_hooks(apr_pool_t *p)
+{
+ static const char *aszSucc[] = { "mod_filter.c", NULL };
+ ap_register_output_filter_protocol("proxy-html", proxy_html_filter,
+ NULL, AP_FTYPE_RESOURCE,
+ AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH);
+ /* move this to pre_config so old_expr is available to interpret
+ * old-style conditions on URL maps.
+ */
+ ap_hook_pre_config(mod_proxy_html, NULL, NULL, APR_HOOK_MIDDLE);
+ ap_hook_insert_filter(proxy_html_insert, NULL, aszSucc, APR_HOOK_MIDDLE);
+}
+
+AP_DECLARE_MODULE(proxy_html) = {
+ STANDARD20_MODULE_STUFF,
+ proxy_html_config,
+ proxy_html_merge,
+ NULL,
+ NULL,
+ proxy_html_cmds,
+ proxy_html_hooks
+};