summaryrefslogtreecommitdiffstats
path: root/server/gen_test_char.c
blob: 48ae6f47d02dfc46f4ace8c01eee2c0bf0520312 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/* Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifdef CROSS_COMPILE

#include <ctype.h>
#define apr_isalnum(c) (isalnum(((unsigned char)(c))))
#define apr_isalpha(c) (isalpha(((unsigned char)(c))))
#define apr_iscntrl(c) (iscntrl(((unsigned char)(c))))
#define apr_isprint(c) (isprint(((unsigned char)(c))))
#define APR_HAVE_STDIO_H 1
#define APR_HAVE_STRING_H 1

#else

#include "apr.h"
#include "apr_lib.h"

#endif

#if defined(WIN32) || defined(OS2)
#define NEED_ENHANCED_ESCAPES
#endif

#if APR_HAVE_STDIO_H
#include <stdio.h>
#endif
#if APR_HAVE_STRING_H
#include <string.h>
#endif

/* A bunch of functions in util.c scan strings looking for certain characters.
 * To make that more efficient we encode a lookup table.
 */
#define T_ESCAPE_SHELL_CMD    (0x01)
#define T_ESCAPE_PATH_SEGMENT (0x02)
#define T_OS_ESCAPE_PATH      (0x04)
#define T_HTTP_TOKEN_STOP     (0x08)
#define T_ESCAPE_LOGITEM      (0x10)
#define T_ESCAPE_FORENSIC     (0x20)
#define T_ESCAPE_URLENCODED   (0x40)
#define T_HTTP_CTRLS          (0x80)
#define T_VCHAR_OBSTEXT      (0x100)

int main(int argc, char *argv[])
{
    unsigned c;
    unsigned short flags;

    printf("/* this file is automatically generated by gen_test_char, "
           "do not edit */\n"
           "#define T_ESCAPE_SHELL_CMD     (%u)\n"
           "#define T_ESCAPE_PATH_SEGMENT  (%u)\n"
           "#define T_OS_ESCAPE_PATH       (%u)\n"
           "#define T_HTTP_TOKEN_STOP      (%u)\n"
           "#define T_ESCAPE_LOGITEM       (%u)\n"
           "#define T_ESCAPE_FORENSIC      (%u)\n"
           "#define T_ESCAPE_URLENCODED    (%u)\n"
           "#define T_HTTP_CTRLS           (%u)\n"
           "#define T_VCHAR_OBSTEXT        (%u)\n"
           "\n"
           "static const unsigned short test_char_table[256] = {",
           T_ESCAPE_SHELL_CMD,
           T_ESCAPE_PATH_SEGMENT,
           T_OS_ESCAPE_PATH,
           T_HTTP_TOKEN_STOP,
           T_ESCAPE_LOGITEM,
           T_ESCAPE_FORENSIC,
           T_ESCAPE_URLENCODED,
           T_HTTP_CTRLS,
           T_VCHAR_OBSTEXT);

    for (c = 0; c < 256; ++c) {
        flags = 0;
        if (c % 8 == 0)
            printf("\n    ");

        /* escape_shell_cmd */
#ifdef NEED_ENHANCED_ESCAPES
        /* Win32/OS2 have many of the same vulnerable characters
         * as Unix sh, plus the carriage return and percent char.
         * The proper escaping of these characters varies from unix
         * since Win32/OS2 use carets or doubled-double quotes,
         * and neither lf nor cr can be escaped.  We escape unix
         * specific as well, to assure that cross-compiled unix
         * applications behave similarly when invoked on win32/os2.
         *
         * Rem please keep in-sync with apr's list in win32/filesys.c
         */
        if (c && strchr("&;`'\"|*?~<>^()[]{}$\\\n\r%", c)) {
            flags |= T_ESCAPE_SHELL_CMD;
        }
#else
        if (c && strchr("&;`'\"|*?~<>^()[]{}$\\\n", c)) {
            flags |= T_ESCAPE_SHELL_CMD;
        }
#endif

        if (!apr_isalnum(c) && !strchr("$-_.+!*'(),:@&=~", c)) {
            flags |= T_ESCAPE_PATH_SEGMENT;
        }

        if (!apr_isalnum(c) && !strchr("$-_.+!*'(),:;@&=/~", c)) {
            flags |= T_OS_ESCAPE_PATH;
        }

        if (!apr_isalnum(c) && !strchr(".-*_ ", c)) {
            flags |= T_ESCAPE_URLENCODED;
        }

        /* Stop for any non-'token' character, including ctrls, obs-text,
         * and "tspecials" (RFC2068) a.k.a. "separators" (RFC2616), which
         * is easer to express as characters remaining in the ASCII token set
         */
        if (!c || !(apr_isalnum(c) || strchr("!#$%&'*+-.^_`|~", c))) {
            flags |= T_HTTP_TOKEN_STOP;
        }

        /* Catch CTRLs other than VCHAR, HT and SP, and obs-text (RFC7230 3.2)
         * This includes only the C0 plane, not C1 (which is obs-text itself.)
         * XXX: We should verify that all ASCII C0 ctrls/DEL corresponding to
         * the current EBCDIC translation are captured, and ASCII C1 ctrls
         * corresponding are all permitted (as they fall under obs-text rule)
         */
        if (!c || (apr_iscntrl(c) && c != '\t')) {
            flags |= T_HTTP_CTRLS;
        }

        /* From RFC3986, the specific sets of gen-delims, sub-delims (2.2),
         * and unreserved (2.3) that are possible somewhere within a URI.
         * Spec requires all others to be %XX encoded, including obs-text.
         */
        if (c && !apr_iscntrl(c) && c != ' ') {
            flags |= T_VCHAR_OBSTEXT;
        }

        /* For logging, escape all control characters,
         * double quotes (because they delimit the request in the log file)
         * backslashes (because we use backslash for escaping)
         * and 8-bit chars with the high bit set
         */
        if (c && (!apr_isprint(c) || c == '"' || c == '\\' || apr_iscntrl(c))) {
            flags |= T_ESCAPE_LOGITEM;
        }

        /* For forensic logging, escape all control characters, top bit set,
         * :, | (used as delimiters) and % (used for escaping).
         */
        if (!apr_isprint(c) || c == ':' || c == '|' || c == '%'
            || apr_iscntrl(c) || !c) {
            flags |= T_ESCAPE_FORENSIC;
        }

        printf("0x%03x%c", flags, (c < 255) ? ',' : ' ');
    }

    printf("\n};\n");

    return 0;
}