summaryrefslogtreecommitdiffstats
path: root/server/gen_test_char.c
diff options
context:
space:
mode:
Diffstat (limited to 'server/gen_test_char.c')
-rw-r--r--server/gen_test_char.c192
1 files changed, 192 insertions, 0 deletions
diff --git a/server/gen_test_char.c b/server/gen_test_char.c
new file mode 100644
index 0000000..248216b
--- /dev/null
+++ b/server/gen_test_char.c
@@ -0,0 +1,192 @@
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef CROSS_COMPILE
+
+#include <ctype.h>
+#define apr_isalnum(c) (isalnum(((unsigned char)(c))))
+#define apr_isalpha(c) (isalpha(((unsigned char)(c))))
+#define apr_iscntrl(c) (iscntrl(((unsigned char)(c))))
+#define apr_isprint(c) (isprint(((unsigned char)(c))))
+#define APR_HAVE_STDIO_H 1
+#define APR_HAVE_STRING_H 1
+
+#else
+
+#include "apr.h"
+#include "apr_lib.h"
+
+#endif
+
+#if defined(WIN32) || defined(OS2)
+#define NEED_ENHANCED_ESCAPES
+#endif
+
+#if APR_HAVE_STDIO_H
+#include <stdio.h>
+#endif
+#if APR_HAVE_STRING_H
+#include <string.h>
+#endif
+
+/* A bunch of functions in util.c scan strings looking for certain characters.
+ * To make that more efficient we encode a lookup table.
+ */
+#define T_ESCAPE_SHELL_CMD (0x01)
+#define T_ESCAPE_PATH_SEGMENT (0x02)
+#define T_OS_ESCAPE_PATH (0x04)
+#define T_HTTP_TOKEN_STOP (0x08)
+#define T_ESCAPE_LOGITEM (0x10)
+#define T_ESCAPE_FORENSIC (0x20)
+#define T_ESCAPE_URLENCODED (0x40)
+#define T_HTTP_CTRLS (0x80)
+#define T_VCHAR_OBSTEXT (0x100)
+#define T_URI_UNRESERVED (0x200)
+
+int main(int argc, char *argv[])
+{
+ unsigned c;
+ unsigned short flags;
+
+ printf("/* this file is automatically generated by gen_test_char, "
+ "do not edit */\n"
+ "#define T_ESCAPE_SHELL_CMD (%u)\n"
+ "#define T_ESCAPE_PATH_SEGMENT (%u)\n"
+ "#define T_OS_ESCAPE_PATH (%u)\n"
+ "#define T_HTTP_TOKEN_STOP (%u)\n"
+ "#define T_ESCAPE_LOGITEM (%u)\n"
+ "#define T_ESCAPE_FORENSIC (%u)\n"
+ "#define T_ESCAPE_URLENCODED (%u)\n"
+ "#define T_HTTP_CTRLS (%u)\n"
+ "#define T_VCHAR_OBSTEXT (%u)\n"
+ "#define T_URI_UNRESERVED (%u)\n"
+ "\n"
+ "static const unsigned short test_char_table[256] = {",
+ T_ESCAPE_SHELL_CMD,
+ T_ESCAPE_PATH_SEGMENT,
+ T_OS_ESCAPE_PATH,
+ T_HTTP_TOKEN_STOP,
+ T_ESCAPE_LOGITEM,
+ T_ESCAPE_FORENSIC,
+ T_ESCAPE_URLENCODED,
+ T_HTTP_CTRLS,
+ T_VCHAR_OBSTEXT,
+ T_URI_UNRESERVED
+ );
+
+ for (c = 0; c < 256; ++c) {
+ flags = 0;
+ if (c % 8 == 0)
+ printf("\n ");
+
+ /* escape_shell_cmd */
+#ifdef NEED_ENHANCED_ESCAPES
+ /* Win32/OS2 have many of the same vulnerable characters
+ * as Unix sh, plus the carriage return and percent char.
+ * The proper escaping of these characters varies from unix
+ * since Win32/OS2 use carets or doubled-double quotes,
+ * and neither lf nor cr can be escaped. We escape unix
+ * specific as well, to assure that cross-compiled unix
+ * applications behave similarly when invoked on win32/os2.
+ *
+ * Rem please keep in-sync with apr's list in win32/filesys.c
+ */
+ if (c && strchr("&;`'\"|*?~<>^()[]{}$\\\n\r%", c)) {
+ flags |= T_ESCAPE_SHELL_CMD;
+ }
+#else
+ if (c && strchr("&;`'\"|*?~<>^()[]{}$\\\n", c)) {
+ flags |= T_ESCAPE_SHELL_CMD;
+ }
+#endif
+
+ if (!apr_isalnum(c) && !strchr("$-_.+!*'(),:@&=~", c)) {
+ flags |= T_ESCAPE_PATH_SEGMENT;
+ }
+
+ if (!apr_isalnum(c) && !strchr("$-_.+!*'(),:;@&=/~", c)) {
+ flags |= T_OS_ESCAPE_PATH;
+ }
+
+ if (!apr_isalnum(c) && !strchr(".-*_ ", c)) {
+ flags |= T_ESCAPE_URLENCODED;
+ }
+
+ /* Stop for any non-'token' character, including ctrls, obs-text,
+ * and "tspecials" (RFC2068) a.k.a. "separators" (RFC2616), which
+ * is easier to express as characters remaining in the ASCII token set
+ */
+ if (!c || !(apr_isalnum(c) || strchr("!#$%&'*+-.^_`|~", c))) {
+ flags |= T_HTTP_TOKEN_STOP;
+ }
+
+ /* Catch CTRLs other than VCHAR, HT and SP, and obs-text (RFC7230 3.2)
+ * This includes only the C0 plane, not C1 (which is obs-text itself.)
+ * XXX: We should verify that all ASCII C0 ctrls/DEL corresponding to
+ * the current EBCDIC translation are captured, and ASCII C1 ctrls
+ * corresponding are all permitted (as they fall under obs-text rule)
+ */
+ if (!c || (apr_iscntrl(c) && c != '\t')) {
+ flags |= T_HTTP_CTRLS;
+ }
+
+ /* From RFC3986, the specific sets of gen-delims, sub-delims (2.2),
+ * and unreserved (2.3) that are possible somewhere within a URI.
+ * Spec requires all others to be %XX encoded, including obs-text.
+ */
+ if (c && !apr_iscntrl(c) && c != ' ') {
+ flags |= T_VCHAR_OBSTEXT;
+ }
+
+ /* For logging, escape all control characters,
+ * double quotes (because they delimit the request in the log file)
+ * backslashes (because we use backslash for escaping)
+ * and 8-bit chars with the high bit set
+ */
+ if (c && (!apr_isprint(c) || c == '"' || c == '\\' || apr_iscntrl(c))) {
+ flags |= T_ESCAPE_LOGITEM;
+ }
+
+ /* For forensic logging, escape all control characters, top bit set,
+ * :, | (used as delimiters) and % (used for escaping).
+ */
+ if (!apr_isprint(c) || c == ':' || c == '|' || c == '%'
+ || apr_iscntrl(c) || !c) {
+ flags |= T_ESCAPE_FORENSIC;
+ }
+
+ /* Characters in the RFC 3986 "unreserved" set.
+ * https://datatracker.ietf.org/doc/html/rfc3986#section-2.3 */
+ if (c && (apr_isalnum(c) || strchr("-._~", c))) {
+ flags |= T_URI_UNRESERVED;
+ }
+
+ printf("0x%03x%c", flags, (c < 255) ? ',' : ' ');
+ }
+
+ printf("\n};\n\n");
+
+ printf(
+ "/* we assume the folks using this ensure 0 <= c < 256... which means\n"
+ " * you need a cast to (unsigned char) first, you can't just plug a\n"
+ " * char in here and get it to work, because if char is signed then it\n"
+ " * will first be sign extended.\n"
+ " */\n"
+ "#define TEST_CHAR(c, f) (test_char_table[(unsigned char)(c)] & (f))\n"
+ );
+
+ return 0;
+}