summaryrefslogtreecommitdiffstats
path: root/WWW/Library/Implementation/HTParse.h
blob: 49b40b3419e643ba7f1940dc03dc30c1ae771548 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
/*
 * $LynxId: HTParse.h,v 1.26 2021/07/05 20:56:50 tom Exp $
 *				HTParse:  URL parsing in the WWW Library
 *				HTPARSE
 *
 *  This module of the WWW library contains code to parse URLs and various
 *  related things.
 *  Implemented by HTParse.c .
 */
#ifndef HTPARSE_H
#define HTPARSE_H

#ifndef HTUTILS_H
#include <HTUtils.h>
#endif

#ifdef __cplusplus
extern "C" {
#endif

#define RFC_3986_UNRESERVED(c) (isalnum(UCH(c)) || strchr("-._~", UCH(c)) != 0)
#define RFC_3986_GEN_DELIMS(c) ((c) != 0 && strchr(":/?#[]@", UCH(c)) != 0)
#define RFC_3986_SUB_DELIMS(c) ((c) != 0 && strchr("!$&'()*+,;=", UCH(c)) != 0)

/*
 *  The following are flag bits which may be ORed together to form
 *  a number to give the 'wanted' argument to HTParse.
 */
#define PARSE_ACCESS            16
#define PARSE_HOST               8
#define PARSE_PATH               4
#define PARSE_ANCHOR             2
#define PARSE_PUNCTUATION        1
#define PARSE_ALL               31
#define PARSE_ALL_WITHOUT_ANCHOR  (PARSE_ALL ^ PARSE_ANCHOR)
/*
 *  Additional flag bits for more details on components already
 *  covered by the above.  The PARSE_PATH above doesn't really
 *  strictly refer to the path component in the sense of the URI
 *  specs only, but rather to that combined with a possible query
 *  component. - kw
 */
#define PARSE_STRICTPATH        32
#define PARSE_QUERY             64
/*
 *  The following are valid mask values.  The terms are the BNF names
 *  in the URL document.
 */
#define URL_XALPHAS     UCH(1)
#define URL_XPALPHAS    UCH(2)
#define URL_PATH        UCH(4)

#ifdef USE_IDN2
    typedef enum {
	LYidna2003 = 1,
	LYidna2008,
	LYidnaTR46,
	LYidnaCompat
    } HTIdnaModes;

    extern int LYidnaMode;
#endif

/*	Strip white space off a string.				HTStrip()
 *	-------------------------------
 *
 * On exit,
 *	Return value points to first non-white character, or to 0 if none.
 *	All trailing white space is OVERWRITTEN with zero.
 */
    extern char *HTStrip(char *s);

/*
 *	Parse a port number
 *	-------------------
 *
 * On entry,
 *	host            A pointer to hostname possibly followed by port
 *
 * On exit,
 *	returns         A pointer to the ":" before the port
 *	sets            the port number via the pointer portp.
 */
    extern char *HTParsePort(char *host, int *portp);

/*	Parse a Name relative to another name.			HTParse()
 *	--------------------------------------
 *
 *	This returns those parts of a name which are given (and requested)
 *	substituting bits from the related name where necessary.
 *
 * On entry,
 *	aName		A filename given
 *      relatedName     A name relative to which aName is to be parsed
 *      wanted          A mask for the bits which are wanted.
 *
 * On exit,
 *	returns		A pointer to a malloc'd string which MUST BE FREED
 */
    extern char *HTParse(const char *aName,
			 const char *relatedName,
			 int wanted);

/*	HTParseAnchor(), fast HTParse() specialization
 *	----------------------------------------------
 *
 * On exit,
 *	returns		A pointer within input string (probably to its end '\0')
 */
    extern const char *HTParseAnchor(const char *aName);

/*	Simplify a filename.				HTSimplify()
 *	--------------------
 *
 *  A unix-style file is allowed to contain the sequence xxx/../ which may
 *  be replaced by "" , and the sequence "/./" which may be replaced by "/".
 *  Simplification helps us recognize duplicate filenames.
 */
    extern void HTSimplify(char *filename, BOOL absolute);

/*	Make Relative Name.					HTRelative()
 *	-------------------
 *
 * This function creates and returns a string which gives an expression of
 * one address as related to another.  Where there is no relation, an absolute
 * address is returned.
 *
 *  On entry,
 *	Both names must be absolute, fully qualified names of nodes
 *	(no anchor bits)
 *
 *  On exit,
 *	The return result points to a newly allocated name which, if
 *	parsed by HTParse relative to relatedName, will yield aName.
 *	The caller is responsible for freeing the resulting name later.
 *
 */
    extern char *HTRelative(const char *aName,
			    const char *relatedName);

/*		Escape undesirable characters using %		HTEscape()
 *		-------------------------------------
 *
 *	This function takes a pointer to a string in which
 *	some characters may be unacceptable are unescaped.
 *	It returns a string which has these characters
 *	represented by a '%' character followed by two hex digits.
 *
 *	Unlike HTUnEscape(), this routine returns a malloc'd string.
 */
    extern char *HTEscape(const char *str,
			  unsigned mask);

/*		Escape unsafe characters using %		HTEscapeUnsafe()
 *		--------------------------------
 *
 *	This function takes a pointer to a string in which
 *	some characters may be that may be unsafe are unescaped.
 *	It returns a string which has these characters
 *	represented by a '%' character followed by two hex digits.
 *
 *	Unlike HTUnEscape(), this routine returns a malloc'd string.
 */
    extern char *HTEscapeUnsafe(const char *str);

/*	Escape undesirable characters using % but space to +.	HTEscapeSP()
 *	-----------------------------------------------------
 *
 *	This function takes a pointer to a string in which
 *	some characters may be unacceptable are unescaped.
 *	It returns a string which has these characters
 *	represented by a '%' character followed by two hex digits,
 *	except that spaces are converted to '+' instead of %2B.
 *
 *	Unlike HTUnEscape(), this routine returns a malloc'd string.
 */
    extern char *HTEscapeSP(const char *str,
			    unsigned mask);

/*	Decode %xx escaped characters.				HTUnEscape()
 *	------------------------------
 *
 *	This function takes a pointer to a string in which some
 *	characters may have been encoded in %xy form, where xy is
 *	the acsii hex code for character 16x+y.
 *	The string is converted in place, as it will never grow.
 */
    extern char *HTUnEscape(char *str);

/*	Decode some %xx escaped characters.		      HTUnEscapeSome()
 *	-----------------------------------			Klaus Weide
 *							    (kweide@tezcat.com)
 *	This function takes a pointer to a string in which some
 *	characters may have been encoded in %xy form, where xy is
 *	the acsii hex code for character 16x+y, and a pointer to
 *	a second string containing one or more characters which
 *	should be unescaped if escaped in the first string.
 *	The first string is converted in place, as it will never grow.
 */
    extern char *HTUnEscapeSome(char *str,
				const char *do_trans);

/*
 *  Turn a string which is not a RFC 822 token into a quoted-string. - KW
 */
    extern void HTMake822Word(char **str,
			      int quoted);

#ifdef __cplusplus
}
#endif
#endif				/* HTPARSE_H */