1391 lines
36 KiB
C
1391 lines
36 KiB
C
/*
|
|
* $LynxId: HTParse.c,v 1.101 2023/10/24 00:11:42 tom Exp $
|
|
*
|
|
* Parse HyperText Document Address HTParse.c
|
|
* ================================
|
|
*/
|
|
|
|
#include <HTUtils.h>
|
|
#include <HTParse.h>
|
|
|
|
#include <LYUtils.h>
|
|
#include <LYLeaks.h>
|
|
#include <LYStrings.h>
|
|
#include <LYCharUtils.h>
|
|
#include <LYGlobalDefs.h>
|
|
|
|
#ifdef HAVE_ALLOCA_H
|
|
#include <alloca.h>
|
|
#else
|
|
#ifdef __MINGW32__
|
|
#include <malloc.h>
|
|
#endif /* __MINGW32__ */
|
|
#endif
|
|
|
|
#ifdef USE_IDN2
|
|
#include <idn2.h>
|
|
#define FreeIdna(out) idn2_free(out)
|
|
#elif defined(USE_IDNA)
|
|
#include <idna.h>
|
|
#include <idn-free.h>
|
|
#define FreeIdna(out) idn_free(out)
|
|
#define IDN2_OK IDNA_SUCCESS
|
|
#endif
|
|
|
|
#define HEX_ESCAPE '%'
|
|
|
|
struct struct_parts {
|
|
char *access;
|
|
char *host;
|
|
char *absolute;
|
|
char *relative;
|
|
char *search; /* treated normally as part of path */
|
|
char *anchor;
|
|
};
|
|
|
|
#if 0 /* for debugging */
|
|
static void show_parts(const char *name, struct struct_parts *parts, int line)
|
|
{
|
|
if (TRACE) {
|
|
CTRACE((tfp, "struct_parts(%s) %s@%d\n", name, __FILE__, line));
|
|
CTRACE((tfp, " access '%s'\n", NONNULL(parts->access)));
|
|
CTRACE((tfp, " host '%s'\n", NONNULL(parts->host)));
|
|
CTRACE((tfp, " absolute '%s'\n", NONNULL(parts->absolute)));
|
|
CTRACE((tfp, " relative '%s'\n", NONNULL(parts->relative)));
|
|
CTRACE((tfp, " search '%s'\n", NONNULL(parts->search)));
|
|
CTRACE((tfp, " anchor '%s'\n", NONNULL(parts->anchor)));
|
|
}
|
|
}
|
|
#define SHOW_PARTS(name) show_parts(#name, &name, __LINE__)
|
|
#else
|
|
#define SHOW_PARTS(name) /* nothing */
|
|
#endif
|
|
|
|
/* Strip white space off a string. HTStrip()
|
|
* -------------------------------
|
|
*
|
|
* On exit,
|
|
* Return value points to first non-white character, or to 0 if none.
|
|
* All trailing white space is OVERWRITTEN with zero.
|
|
*/
|
|
char *HTStrip(char *s)
|
|
{
|
|
#define SPACE(c) ((c == ' ') || (c == '\t') || (c == '\n'))
|
|
char *p;
|
|
|
|
for (p = s; *p; p++) { /* Find end of string */
|
|
;
|
|
}
|
|
for (p--; p >= s; p--) {
|
|
if (SPACE(*p))
|
|
*p = '\0'; /* Zap trailing blanks */
|
|
else
|
|
break;
|
|
}
|
|
while (SPACE(*s))
|
|
s++; /* Strip leading blanks */
|
|
return s;
|
|
}
|
|
|
|
/* Scan a filename for its constituents. scan()
|
|
* -------------------------------------
|
|
*
|
|
* On entry,
|
|
* name points to a document name which may be incomplete.
|
|
* On exit,
|
|
* absolute or relative may be nonzero (but not both).
|
|
* host, anchor and access may be nonzero if they were specified.
|
|
* Any which are nonzero point to zero terminated strings.
|
|
*/
|
|
static void scan(char *name,
|
|
struct struct_parts *parts)
|
|
{
|
|
char *after_access;
|
|
char *p;
|
|
|
|
parts->access = NULL;
|
|
parts->host = NULL;
|
|
parts->absolute = NULL;
|
|
parts->relative = NULL;
|
|
parts->search = NULL; /* normally not used - kw */
|
|
parts->anchor = NULL;
|
|
|
|
/*
|
|
* Scan left-to-right for a scheme (access).
|
|
*/
|
|
after_access = name;
|
|
for (p = name; *p; p++) {
|
|
if (*p == ':') {
|
|
*p = '\0';
|
|
parts->access = name; /* Access name has been specified */
|
|
after_access = (p + 1);
|
|
break;
|
|
}
|
|
if (*p == '/' || *p == '#' || *p == ';' || *p == '?')
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Scan left-to-right for a fragment (anchor).
|
|
*/
|
|
for (p = after_access; *p; p++) {
|
|
if (*p == '#') {
|
|
parts->anchor = (p + 1);
|
|
*p = '\0'; /* terminate the rest */
|
|
break; /* leave things after first # alone - kw */
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Scan left-to-right for a host or absolute path.
|
|
*/
|
|
p = after_access;
|
|
if (*p == '/') {
|
|
if (p[1] == '/') {
|
|
parts->host = (p + 2); /* host has been specified */
|
|
*p = '\0'; /* Terminate access */
|
|
p = StrChr(parts->host, '/'); /* look for end of host name if any */
|
|
if (p != NULL) {
|
|
*p = '\0'; /* Terminate host */
|
|
parts->absolute = (p + 1); /* Root has been found */
|
|
} else {
|
|
p = StrChr(parts->host, '?');
|
|
if (p != NULL) {
|
|
*p = '\0'; /* Terminate host */
|
|
parts->search = (p + 1);
|
|
}
|
|
}
|
|
} else {
|
|
parts->absolute = (p + 1); /* Root found but no host */
|
|
}
|
|
} else {
|
|
parts->relative = (*after_access) ?
|
|
after_access : NULL; /* NULL for "" */
|
|
}
|
|
|
|
/*
|
|
* Check schemes that commonly have unescaped hashes.
|
|
*/
|
|
if (parts->access && parts->anchor &&
|
|
/* optimize */ StrChr("lnsdLNSD", *parts->access) != NULL) {
|
|
if ((!parts->host && strcasecomp(parts->access, "lynxcgi")) ||
|
|
!strcasecomp(parts->access, "nntp") ||
|
|
!strcasecomp(parts->access, "snews") ||
|
|
!strcasecomp(parts->access, "news") ||
|
|
!strcasecomp(parts->access, "data")) {
|
|
/*
|
|
* Access specified but no host and not a lynxcgi URL, so the
|
|
* anchor may not really be one, e.g., news:j462#36487@foo.bar, or
|
|
* it's an nntp or snews URL, or news URL with a host. Restore the
|
|
* '#' in the address.
|
|
*/
|
|
/* but only if we have found a path component of which this will
|
|
* become part. - kw */
|
|
if (parts->relative || parts->absolute) {
|
|
*(parts->anchor - 1) = '#';
|
|
parts->anchor = NULL;
|
|
}
|
|
}
|
|
}
|
|
} /*scan */
|
|
|
|
#if defined(HAVE_ALLOCA) && !defined(LY_FIND_LEAKS)
|
|
#define LYalloca(x) alloca((size_t)(x))
|
|
#define LYalloca_free(x) {}
|
|
#else
|
|
#define LYalloca(x) malloc((size_t)(x))
|
|
#define LYalloca_free(x) free((void *)(x))
|
|
#endif
|
|
|
|
static char *strchr_or_end(char *string, int ch)
|
|
{
|
|
char *result = StrChr(string, ch);
|
|
|
|
if (result == 0) {
|
|
result = string + strlen(string);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Given a host specification that may end with a port number, e.g.,
|
|
* foobar:123
|
|
* point to the ':' which begins the ":port" to make it simple to handle the
|
|
* substring.
|
|
*
|
|
* If no port is found (or a syntax error), return null.
|
|
*/
|
|
char *HTParsePort(char *host, int *portp)
|
|
{
|
|
int brackets = 0;
|
|
char *result = NULL;
|
|
|
|
*portp = 0;
|
|
if (host != NULL) {
|
|
while (*host != '\0' && result == 0) {
|
|
switch (*host++) {
|
|
case ':':
|
|
if (brackets == 0 && isdigit(UCH(*host))) {
|
|
char *next = NULL;
|
|
|
|
*portp = (int) strtol(host, &next, 10);
|
|
if (next != 0 && next != host && *next == '\0') {
|
|
result = (host - 1);
|
|
CTRACE((tfp, "HTParsePort %d\n", *portp));
|
|
}
|
|
}
|
|
break;
|
|
case '[': /* for ipv6 */
|
|
++brackets;
|
|
break;
|
|
case ']': /* for ipv6 */
|
|
--brackets;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
#if defined(USE_IDNA) || defined(USE_IDN2)
|
|
static int hex_decode(int ch)
|
|
{
|
|
int result = -1;
|
|
|
|
if (ch >= '0' && ch <= '9')
|
|
result = (ch - '0');
|
|
else if (ch >= 'a' && ch <= 'f')
|
|
result = (ch - 'a') + 10;
|
|
else if (ch >= 'A' && ch <= 'F')
|
|
result = (ch - 'A') + 10;
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Convert in-place the given hostname to IDNA form. That requires up to 64
|
|
* characters, and we've allowed for that, with MIN_PARSE.
|
|
*/
|
|
static void convert_to_idna(char *host)
|
|
{
|
|
size_t length = strlen(host);
|
|
char *endhost = host + length;
|
|
char *buffer = malloc(length + 1);
|
|
char *params = malloc(length + 1);
|
|
char *output = NULL;
|
|
char *src, *dst;
|
|
int code;
|
|
int hi, lo;
|
|
|
|
if (buffer != NULL && params != NULL) {
|
|
code = TRUE;
|
|
*params = '\0';
|
|
for (dst = buffer, src = host; src < endhost; ++dst) {
|
|
int ch = *src++;
|
|
|
|
if (RFC_3986_GEN_DELIMS(ch)) {
|
|
strcpy(params, src - 1);
|
|
*dst = '\0';
|
|
break;
|
|
} else if (ch == HEX_ESCAPE) {
|
|
if ((src + 1) < endhost
|
|
&& (hi = hex_decode(src[0])) >= 0
|
|
&& (lo = hex_decode(src[1])) >= 0) {
|
|
|
|
*dst = (char) ((hi << 4) | lo);
|
|
src += 2;
|
|
} else {
|
|
CTRACE((tfp, "convert_to_idna: `%s' is malformed\n", host));
|
|
code = FALSE;
|
|
break;
|
|
}
|
|
} else {
|
|
*dst = (char) ch;
|
|
}
|
|
}
|
|
if (code) {
|
|
*dst = '\0';
|
|
#ifdef USE_IDN2
|
|
#if (!defined(IDN2_VERSION_NUMBER) || IDN2_VERSION_NUMBER < 0x02000003)
|
|
/*
|
|
* Older libidn2 mishandles STD3, stripping underscores.
|
|
*/
|
|
if (strchr(buffer, '_') != NULL) {
|
|
code = -1;
|
|
} else
|
|
#endif
|
|
switch (LYidnaMode) {
|
|
case LYidna2003:
|
|
code = idn2_to_ascii_8z(buffer, &output, IDN2_TRANSITIONAL);
|
|
break;
|
|
case LYidna2008:
|
|
/* IDNA2008 rules without the TR46 amendments */
|
|
code = idn2_to_ascii_8z(buffer, &output, 0);
|
|
break;
|
|
case LYidnaTR46:
|
|
code = idn2_to_ascii_8z(buffer, &output, IDN2_NONTRANSITIONAL
|
|
| IDN2_NFC_INPUT);
|
|
break;
|
|
case LYidnaCompat:
|
|
/* IDNA2008 */
|
|
code = idn2_to_ascii_8z(buffer, &output, IDN2_NONTRANSITIONAL
|
|
| IDN2_NFC_INPUT);
|
|
if (code == IDN2_DISALLOWED) {
|
|
/* IDNA2003 - compatible */
|
|
code = idn2_to_ascii_8z(buffer, &output, IDN2_TRANSITIONAL);
|
|
}
|
|
break;
|
|
}
|
|
#else
|
|
code = idna_to_ascii_8z(buffer, &output, IDNA_USE_STD3_ASCII_RULES);
|
|
#endif
|
|
if (code == IDN2_OK) {
|
|
CTRACE((tfp, "convert_to_idna: `%s' -> `%s': OK\n", buffer, output));
|
|
strcpy(host, output);
|
|
strcat(host, params);
|
|
} else {
|
|
CTRACE((tfp, "convert_to_idna: `%s': %s\n",
|
|
buffer,
|
|
idna_strerror((Idna_rc) code)));
|
|
}
|
|
if (output)
|
|
FreeIdna(output);
|
|
}
|
|
}
|
|
free(buffer);
|
|
free(params);
|
|
}
|
|
#define MIN_PARSE 80
|
|
#else
|
|
#define MIN_PARSE 8
|
|
#endif
|
|
|
|
/* Parse a Name relative to another name. HTParse()
|
|
* --------------------------------------
|
|
*
|
|
* This returns those parts of a name which are given (and requested)
|
|
* substituting bits from the related name where necessary.
|
|
*
|
|
* Originally based on RFC 1808, some details in RFC 3986 are used.
|
|
*
|
|
* On entry,
|
|
* aName A filename given
|
|
* relatedName A name relative to which aName is to be parsed
|
|
* wanted A mask for the bits which are wanted.
|
|
*
|
|
* On exit,
|
|
* returns A pointer to a malloc'd string which MUST BE FREED
|
|
*/
|
|
char *HTParse(const char *aName,
|
|
const char *relatedName,
|
|
int wanted)
|
|
{
|
|
char *result = NULL;
|
|
char *tail = NULL; /* a pointer to the end of the 'result' string */
|
|
char *return_value = NULL;
|
|
size_t len, len1, len2;
|
|
size_t need;
|
|
char *name = NULL;
|
|
char *rel = NULL;
|
|
char *p, *q;
|
|
char *acc_method;
|
|
struct struct_parts given, related;
|
|
|
|
CTRACE((tfp, "HTParse: aName:`%s'\n", aName));
|
|
CTRACE((tfp, " relatedName:`%s'\n", relatedName));
|
|
|
|
if (wanted & (PARSE_STRICTPATH | PARSE_QUERY)) { /* if detail wanted... */
|
|
if ((wanted & (PARSE_STRICTPATH | PARSE_QUERY))
|
|
== (PARSE_STRICTPATH | PARSE_QUERY)) /* if strictpath AND query */
|
|
wanted |= PARSE_PATH; /* then treat as if PARSE_PATH wanted */
|
|
if (wanted & PARSE_PATH) /* if PARSE_PATH wanted */
|
|
wanted &= ~(PARSE_STRICTPATH | PARSE_QUERY); /* ignore details */
|
|
}
|
|
/* *INDENT-OFF* */
|
|
CTRACE((tfp, " want:%s%s%s%s%s%s%s\n",
|
|
wanted & PARSE_PUNCTUATION ? " punc" : "",
|
|
wanted & PARSE_ANCHOR ? " anchor" : "",
|
|
wanted & PARSE_PATH ? " path" : "",
|
|
wanted & PARSE_HOST ? " host" : "",
|
|
wanted & PARSE_ACCESS ? " access" : "",
|
|
wanted & PARSE_STRICTPATH ? " PATH" : "",
|
|
wanted & PARSE_QUERY ? " QUERY" : ""));
|
|
/* *INDENT-ON* */
|
|
|
|
/*
|
|
* Allocate the temporary string. Optimized.
|
|
*/
|
|
len1 = strlen(aName) + 1;
|
|
len2 = strlen(relatedName) + 1;
|
|
len = len1 + len2 + MIN_PARSE; /* Lots of space: more than enough */
|
|
need = (len * 2 + len1 + len2);
|
|
|
|
if ((int) need < (int) len1 ||
|
|
(int) need < (int) len2) {
|
|
CTRACE((tfp, "HTParse: overflow\n"));
|
|
return StrAllocCopy(return_value, "");
|
|
}
|
|
|
|
result = tail = (char *) LYalloca(need);
|
|
if (result == NULL) {
|
|
outofmem(__FILE__, "HTParse");
|
|
}
|
|
*result = '\0';
|
|
name = result + len;
|
|
rel = name + len1;
|
|
|
|
/*
|
|
* Make working copy of the input string to cut up.
|
|
*/
|
|
MemCpy(name, aName, len1);
|
|
|
|
/*
|
|
* Cut up the string into URL fields.
|
|
*/
|
|
scan(name, &given);
|
|
SHOW_PARTS(given);
|
|
|
|
/*
|
|
* Now related string.
|
|
*/
|
|
if ((given.access && given.host && given.absolute) || !*relatedName) {
|
|
/*
|
|
* Inherit nothing!
|
|
*/
|
|
related.access = NULL;
|
|
related.host = NULL;
|
|
related.absolute = NULL;
|
|
related.relative = NULL;
|
|
related.search = NULL;
|
|
related.anchor = NULL;
|
|
} else {
|
|
MemCpy(rel, relatedName, len2);
|
|
scan(rel, &related);
|
|
}
|
|
SHOW_PARTS(related);
|
|
|
|
/*
|
|
* Handle the scheme (access) field.
|
|
*/
|
|
if (given.access && given.host && !given.relative && !given.absolute) {
|
|
if (!strcmp(given.access, "http") ||
|
|
!strcmp(given.access, "https") ||
|
|
!strcmp(given.access, "ftp")) {
|
|
|
|
/*
|
|
* Assume root.
|
|
*/
|
|
given.absolute = empty_string;
|
|
}
|
|
}
|
|
acc_method = given.access ? given.access : related.access;
|
|
if (wanted & PARSE_ACCESS) {
|
|
if (acc_method) {
|
|
strcpy(tail, acc_method);
|
|
tail += strlen(tail);
|
|
if (wanted & PARSE_PUNCTUATION) {
|
|
*tail++ = ':';
|
|
*tail = '\0';
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If different schemes, inherit nothing.
|
|
*
|
|
* We'll try complying with RFC 1808 and the Fielding draft, and inherit
|
|
* nothing if both schemes are given, rather than only when they differ,
|
|
* except for file URLs - FM
|
|
*
|
|
* After trying it for a while, it's still premature, IHMO, to go along
|
|
* with it, so this is back to inheriting for identical schemes whether or
|
|
* not they are "file". If you want to try it again yourself, uncomment
|
|
* the strcasecomp() below. - FM
|
|
*/
|
|
if ((given.access && related.access) &&
|
|
( /* strcasecomp(given.access, "file") || */
|
|
strcmp(given.access, related.access))) {
|
|
related.host = NULL;
|
|
related.absolute = NULL;
|
|
related.relative = NULL;
|
|
related.search = NULL;
|
|
related.anchor = NULL;
|
|
}
|
|
|
|
/*
|
|
* Handle the host field.
|
|
*/
|
|
if (wanted & PARSE_HOST) {
|
|
if (given.host || related.host) {
|
|
if (wanted & PARSE_PUNCTUATION) {
|
|
*tail++ = '/';
|
|
*tail++ = '/';
|
|
}
|
|
strcpy(tail, given.host ? given.host : related.host);
|
|
/*
|
|
* Ignore default port numbers, and trailing dots on FQDNs, which
|
|
* will only cause identical addresses to look different. (related
|
|
* is already a clean url).
|
|
*/
|
|
{
|
|
char *p2, *h;
|
|
int portnumber;
|
|
int gen_delims = 0;
|
|
|
|
if ((p2 = HTSkipToAt(result, &gen_delims)) != NULL
|
|
&& gen_delims == 0) {
|
|
tail = (p2 + 1);
|
|
}
|
|
p2 = HTParsePort(result, &portnumber);
|
|
if (p2 != NULL && acc_method != NULL) {
|
|
/*
|
|
* Port specified.
|
|
*/
|
|
#define ACC_METHOD(a,b) (!strcmp(acc_method, a) && (portnumber == b))
|
|
if (ACC_METHOD("http", 80) ||
|
|
ACC_METHOD("https", 443) ||
|
|
ACC_METHOD("gopher", 70) ||
|
|
ACC_METHOD("ftp", 21) ||
|
|
ACC_METHOD("wais", 210) ||
|
|
ACC_METHOD("nntp", 119) ||
|
|
ACC_METHOD("news", 119) ||
|
|
ACC_METHOD("newspost", 119) ||
|
|
ACC_METHOD("newsreply", 119) ||
|
|
ACC_METHOD("snews", 563) ||
|
|
ACC_METHOD("snewspost", 563) ||
|
|
ACC_METHOD("snewsreply", 563) ||
|
|
ACC_METHOD("finger", 79) ||
|
|
ACC_METHOD("telnet", 23) ||
|
|
ACC_METHOD("tn3270", 23) ||
|
|
ACC_METHOD("rlogin", 513) ||
|
|
ACC_METHOD("cso", 105))
|
|
*p2 = '\0'; /* It is the default: ignore it */
|
|
}
|
|
if (p2 == NULL) {
|
|
int len3 = (int) strlen(tail);
|
|
|
|
if (len3 > 0) {
|
|
h = tail + len3 - 1; /* last char of hostname */
|
|
if (*h == '.')
|
|
*h = '\0'; /* chop final . */
|
|
}
|
|
} else if (p2 != result) {
|
|
h = p2;
|
|
h--; /* End of hostname */
|
|
if (*h == '.') {
|
|
/*
|
|
* Slide p2 over h.
|
|
*/
|
|
while (*p2 != '\0')
|
|
*h++ = *p2++;
|
|
*h = '\0'; /* terminate */
|
|
}
|
|
}
|
|
}
|
|
#if defined(USE_IDNA) || defined(USE_IDN2)
|
|
/*
|
|
* Depending on locale-support, we could have a literal UTF-8
|
|
* string as a host name, or a URL-encoded form of that.
|
|
*/
|
|
convert_to_idna(tail);
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Trim any blanks from the result so far - there's no excuse for blanks
|
|
* in a hostname. Also update the tail here.
|
|
*/
|
|
tail = LYRemoveBlanks(result);
|
|
|
|
/*
|
|
* If host in given or related was ended directly with a '?' (no slash),
|
|
* fake the search part into absolute. This is the only case search is
|
|
* returned from scan. A host must have been present. this restores the
|
|
* '?' at which the host part had been truncated in scan, we have to do
|
|
* this after host part handling is done. - kw
|
|
*/
|
|
if (given.search && *(given.search - 1) == '\0') {
|
|
given.absolute = given.search - 1;
|
|
given.absolute[0] = '?';
|
|
} else if (related.search && !related.absolute &&
|
|
*(related.search - 1) == '\0') {
|
|
related.absolute = related.search - 1;
|
|
related.absolute[0] = '?';
|
|
}
|
|
|
|
/*
|
|
* If different hosts, inherit no path.
|
|
*/
|
|
if (given.host && related.host)
|
|
if (strcmp(given.host, related.host) != 0) {
|
|
related.absolute = NULL;
|
|
related.relative = NULL;
|
|
related.anchor = NULL;
|
|
}
|
|
|
|
/*
|
|
* Handle the path.
|
|
*/
|
|
if (wanted & (PARSE_PATH | PARSE_STRICTPATH | PARSE_QUERY)) {
|
|
int want_detail = (wanted & (PARSE_STRICTPATH | PARSE_QUERY));
|
|
|
|
if (acc_method && !given.absolute && given.relative) {
|
|
/*
|
|
* Treat all given nntp or snews paths, or given paths for news
|
|
* URLs with a host, as absolute.
|
|
*/
|
|
switch (*acc_method) {
|
|
case 'N':
|
|
case 'n':
|
|
if (!strcasecomp(acc_method, "nntp") ||
|
|
(!strcasecomp(acc_method, "news") &&
|
|
!strncasecomp(result, "news://", 7))) {
|
|
given.absolute = given.relative;
|
|
given.relative = NULL;
|
|
}
|
|
break;
|
|
case 'S':
|
|
case 's':
|
|
if (!strcasecomp(acc_method, "snews")) {
|
|
given.absolute = given.relative;
|
|
given.relative = NULL;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (given.absolute) { /* All is given */
|
|
char *base = tail;
|
|
|
|
if (wanted & PARSE_PUNCTUATION)
|
|
*tail++ = '/';
|
|
strcpy(tail, given.absolute);
|
|
HTSimplify(base, TRUE);
|
|
CTRACE((tfp, "HTParse: (ABS)\n"));
|
|
} else if (related.absolute) { /* Adopt path not name */
|
|
char *base = tail;
|
|
|
|
*tail++ = '/';
|
|
strcpy(tail, related.absolute);
|
|
if (given.relative) {
|
|
/* RFC 1808 part 4 step 5 (if URL path is empty) */
|
|
/* a) if given has params, add/replace that */
|
|
if (given.relative[0] == ';') {
|
|
strcpy(strchr_or_end(tail, ';'), given.relative);
|
|
}
|
|
/* b) if given has query, add/replace that */
|
|
else if (given.relative[0] == '?') {
|
|
strcpy(strchr_or_end(tail, '?'), given.relative);
|
|
}
|
|
/* otherwise fall through to RFC 1808 part 4 step 6 */
|
|
else {
|
|
p = StrChr(tail, '?'); /* Search part? */
|
|
if (p == NULL)
|
|
p = (tail + strlen(tail) - 1);
|
|
for (; *p != '/'; p--) ; /* last / */
|
|
p[1] = '\0'; /* Remove filename */
|
|
strcat(p, given.relative); /* Add given one */
|
|
}
|
|
HTSimplify(base, FALSE);
|
|
if (*base == '\0')
|
|
strcpy(base, "/");
|
|
} else {
|
|
HTSimplify(base, TRUE);
|
|
}
|
|
if (base[0] == '/' && base[1] == '/') {
|
|
char *pz;
|
|
|
|
for (pz = base; (pz[0] = pz[1]) != '\0'; ++pz) ;
|
|
}
|
|
CTRACE((tfp, "HTParse: (Related-ABS)\n"));
|
|
} else if (given.relative) {
|
|
strcpy(tail, given.relative); /* what we've got */
|
|
HTSimplify(tail, FALSE);
|
|
CTRACE((tfp, "HTParse: (REL)\n"));
|
|
} else if (related.relative) {
|
|
strcpy(tail, related.relative);
|
|
HTSimplify(tail, FALSE);
|
|
CTRACE((tfp, "HTParse: (Related-REL)\n"));
|
|
} else { /* No inheritance */
|
|
if (!isLYNXCGI(aName) &&
|
|
!isLYNXEXEC(aName) &&
|
|
!isLYNXPROG(aName)) {
|
|
*tail++ = '/';
|
|
*tail = '\0';
|
|
} else {
|
|
HTSimplify(tail, FALSE);
|
|
}
|
|
if (!strcmp(result, "news:/"))
|
|
result[5] = '*';
|
|
CTRACE((tfp, "HTParse: (No inheritance)\n"));
|
|
}
|
|
if (want_detail) {
|
|
p = StrChr(tail, '?'); /* Search part? */
|
|
if (p) {
|
|
if (PARSE_STRICTPATH) {
|
|
*p = '\0';
|
|
} else {
|
|
if (!(wanted & PARSE_PUNCTUATION))
|
|
p++;
|
|
do {
|
|
*tail++ = *p;
|
|
} while (*p++);
|
|
}
|
|
} else {
|
|
if (wanted & PARSE_QUERY)
|
|
*tail = '\0';
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Handle the fragment (anchor). Never inherit.
|
|
*/
|
|
if (wanted & PARSE_ANCHOR) {
|
|
if (given.anchor && *given.anchor) {
|
|
tail += strlen(tail);
|
|
if (wanted & PARSE_PUNCTUATION)
|
|
*tail++ = '#';
|
|
strcpy(tail, given.anchor);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* If there are any blanks remaining in the string, escape them as needed.
|
|
* See the discussion in LYLegitimizeHREF() for example.
|
|
*/
|
|
if ((p = StrChr(result, ' ')) != 0) {
|
|
switch (is_url(result)) {
|
|
case UNKNOWN_URL_TYPE:
|
|
CTRACE((tfp, "HTParse: ignore:`%s'\n", result));
|
|
break;
|
|
case LYNXEXEC_URL_TYPE:
|
|
case LYNXPROG_URL_TYPE:
|
|
case LYNXCGI_URL_TYPE:
|
|
case LYNXPRINT_URL_TYPE:
|
|
case LYNXHIST_URL_TYPE:
|
|
case LYNXDOWNLOAD_URL_TYPE:
|
|
case LYNXKEYMAP_URL_TYPE:
|
|
case LYNXIMGMAP_URL_TYPE:
|
|
case LYNXCOOKIE_URL_TYPE:
|
|
case LYNXCACHE_URL_TYPE:
|
|
case LYNXDIRED_URL_TYPE:
|
|
case LYNXOPTIONS_URL_TYPE:
|
|
case LYNXCFG_URL_TYPE:
|
|
case LYNXCOMPILE_OPTS_URL_TYPE:
|
|
case LYNXMESSAGES_URL_TYPE:
|
|
CTRACE((tfp, "HTParse: spaces:`%s'\n", result));
|
|
break;
|
|
case NOT_A_URL_TYPE:
|
|
default:
|
|
CTRACE((tfp, "HTParse: encode:`%s'\n", result));
|
|
do {
|
|
q = p + strlen(p) + 2;
|
|
|
|
while (q != p + 1) {
|
|
q[0] = q[-2];
|
|
--q;
|
|
}
|
|
p[0] = HEX_ESCAPE;
|
|
p[1] = '2';
|
|
p[2] = '0';
|
|
} while ((p = StrChr(result, ' ')) != 0);
|
|
break;
|
|
}
|
|
}
|
|
CTRACE((tfp, "HTParse: result:`%s'\n", result));
|
|
|
|
need = strlen(result);
|
|
if (need > (size_t) max_uri_size) {
|
|
CTRACE((tfp, "HTParse too-long address (have %ld vs limit %d)\n",
|
|
(long) need, max_uri_size));
|
|
StrAllocCopy(return_value, "");
|
|
} else {
|
|
StrAllocCopy(return_value, result);
|
|
}
|
|
LYalloca_free(result);
|
|
|
|
/* FIXME: could be optimized using HTParse() internals */
|
|
if (*relatedName &&
|
|
((wanted & PARSE_ALL_WITHOUT_ANCHOR) == PARSE_ALL_WITHOUT_ANCHOR)) {
|
|
/*
|
|
* Check whether to fill in localhost. - FM
|
|
*/
|
|
LYFillLocalFileURL(&return_value, relatedName);
|
|
CTRACE((tfp, "pass LYFillLocalFile:`%s'\n", return_value));
|
|
}
|
|
|
|
return return_value; /* exactly the right length */
|
|
}
|
|
|
|
/* HTParseAnchor(), fast HTParse() specialization
|
|
* ----------------------------------------------
|
|
*
|
|
* On exit,
|
|
* returns A pointer within input string (probably to its end '\0')
|
|
*/
|
|
const char *HTParseAnchor(const char *aName)
|
|
{
|
|
const char *p = aName;
|
|
|
|
for (; *p && *p != '#'; p++) {
|
|
;
|
|
}
|
|
if (*p == '#') {
|
|
/* the safe way based on HTParse() -
|
|
* keeping in mind scan() peculiarities on schemes:
|
|
*/
|
|
struct struct_parts given;
|
|
size_t need = ((unsigned) ((p - aName) + (int) strlen(p) + 1));
|
|
char *name;
|
|
|
|
if (need > (size_t) max_uri_size) {
|
|
p += strlen(p);
|
|
} else {
|
|
name = (char *) LYalloca(need);
|
|
|
|
if (name == NULL) {
|
|
outofmem(__FILE__, "HTParseAnchor");
|
|
}
|
|
strcpy(name, aName);
|
|
scan(name, &given);
|
|
LYalloca_free(name);
|
|
|
|
p++; /*next to '#' */
|
|
if (given.anchor == NULL) {
|
|
for (; *p; p++) /*scroll to end '\0' */
|
|
;
|
|
}
|
|
}
|
|
}
|
|
return p;
|
|
}
|
|
|
|
/* Simplify a filename. HTSimplify()
|
|
* --------------------
|
|
*
|
|
* A unix-style file is allowed to contain the sequence xxx/../ which may
|
|
* be replaced by "" , and the sequence "/./" which may be replaced by "/".
|
|
* Simplification helps us recognize duplicate filenames.
|
|
*
|
|
* RFC 3986 section 5.2.4 says to do this whether or not the path was relative.
|
|
*/
|
|
void HTSimplify(char *filename, BOOL absolute)
|
|
{
|
|
#define MY_FMT "HTParse HTSimplify\t(%s)"
|
|
#ifdef NO_LYNX_TRACE
|
|
#define debug_at(at) /* nothing */
|
|
#define atln "?"
|
|
#else
|
|
const char *atln;
|
|
|
|
#define debug_at(at) atln = at
|
|
#endif
|
|
char *mark;
|
|
char *p;
|
|
size_t limit;
|
|
|
|
CTRACE2(TRACE_HTPARSE,
|
|
(tfp, MY_FMT " %s\n",
|
|
filename,
|
|
absolute ? "ABS" : "REL"));
|
|
|
|
if (LYIsPathSep(*filename) && !absolute)
|
|
++filename;
|
|
mark = filename;
|
|
limit = strlen(filename);
|
|
|
|
for (p = filename; *p; ++p) {
|
|
if (*p == '?' || *p == '#') {
|
|
limit = (size_t) (p - filename);
|
|
break;
|
|
}
|
|
}
|
|
while ((limit != 0) && (*filename != '\0')) {
|
|
size_t trim = 0;
|
|
size_t skip = 0;
|
|
size_t last = 0;
|
|
|
|
debug_at("?");
|
|
p = filename;
|
|
if (limit >= 2 && !memcmp(p, "./", 2)) { /* 2A */
|
|
debug_at("2A");
|
|
trim = 2;
|
|
} else if (limit >= 3 && !memcmp(p, "../", 3)) {
|
|
debug_at("2A2");
|
|
trim = 3;
|
|
} else if (limit >= 3 && !memcmp(p, "/./", 3)) { /* 2B */
|
|
debug_at("2B");
|
|
trim = 2;
|
|
skip = 1;
|
|
} else if (limit == 2 && !memcmp(p, "/.", 2)) {
|
|
debug_at("2B2");
|
|
trim = 1;
|
|
skip = 1;
|
|
} else if (limit >= 4 && !memcmp(p, "/../", 4)) { /* 2C */
|
|
debug_at("2C");
|
|
trim = 3;
|
|
skip = 1;
|
|
last = 1;
|
|
} else if (limit == 3 && !memcmp(p, "/..", 3)) {
|
|
debug_at("2C2");
|
|
trim = 2;
|
|
skip = 1;
|
|
last = 1;
|
|
} else if (limit == 2 && !memcmp(p, "..", 2)) { /* 2D */
|
|
debug_at("2D");
|
|
trim = 2;
|
|
} else if (limit == 1 && !memcmp(p, ".", 1)) {
|
|
debug_at("2D2");
|
|
trim = 1;
|
|
}
|
|
if (trim) {
|
|
CTRACE2(TRACE_HTPARSE,
|
|
(tfp, MY_FMT " trim %lu/%lu (%.*s) '%.*s' @%s\n",
|
|
mark, (unsigned long) trim, (unsigned long) limit,
|
|
(int) trim, p + skip, (int) limit, p, atln));
|
|
}
|
|
if (last) {
|
|
char *prior = filename;
|
|
|
|
if (prior != mark) {
|
|
--prior;
|
|
while (prior != mark && *prior != '/') {
|
|
--prior;
|
|
}
|
|
}
|
|
if (prior != filename) {
|
|
trim += (size_t) (filename - prior);
|
|
limit += (size_t) (filename - prior);
|
|
filename = prior;
|
|
CTRACE2(TRACE_HTPARSE,
|
|
(tfp, MY_FMT " TRIM %lu/%lu (%.*s)\n",
|
|
mark, (unsigned long) trim, (unsigned long) limit,
|
|
(int) trim, filename + skip));
|
|
}
|
|
}
|
|
if (trim) {
|
|
limit -= trim;
|
|
for (p = filename;; ++p) {
|
|
if ((p[0] = p[trim]) == '\0') {
|
|
break;
|
|
}
|
|
if (skip) {
|
|
p[0] = '/';
|
|
skip = 0;
|
|
}
|
|
}
|
|
CTRACE2(TRACE_HTPARSE,
|
|
(tfp, MY_FMT " loop %lu\n", mark, (unsigned long) limit));
|
|
} else {
|
|
if (*filename == '/') {
|
|
++filename;
|
|
--limit;
|
|
}
|
|
while ((limit != 0) && (*filename != '/')) {
|
|
++filename;
|
|
--limit;
|
|
}
|
|
}
|
|
}
|
|
CTRACE2(TRACE_HTPARSE, (tfp, MY_FMT " done\n", mark));
|
|
#undef MY_FMT
|
|
}
|
|
|
|
/* Make Relative Name. HTRelative()
|
|
* -------------------
|
|
*
|
|
* This function creates and returns a string which gives an expression of
|
|
* one address as related to another. Where there is no relation, an absolute
|
|
* address is returned.
|
|
*
|
|
* On entry,
|
|
* Both names must be absolute, fully qualified names of nodes
|
|
* (no anchor bits)
|
|
*
|
|
* On exit,
|
|
* The return result points to a newly allocated name which, if
|
|
* parsed by HTParse relative to relatedName, will yield aName.
|
|
* The caller is responsible for freeing the resulting name later.
|
|
*
|
|
*/
|
|
char *HTRelative(const char *aName,
|
|
const char *relatedName)
|
|
{
|
|
char *result = NULL;
|
|
const char *p = aName;
|
|
const char *q = relatedName;
|
|
const char *after_access = NULL;
|
|
const char *path = NULL;
|
|
const char *last_slash = NULL;
|
|
int slashes = 0;
|
|
|
|
for (; *p; p++, q++) { /* Find extent of match */
|
|
if (*p != *q)
|
|
break;
|
|
if (*p == ':')
|
|
after_access = p + 1;
|
|
if (*p == '/') {
|
|
last_slash = p;
|
|
slashes++;
|
|
if (slashes == 3)
|
|
path = p;
|
|
}
|
|
}
|
|
|
|
/* q, p point to the first non-matching character or zero */
|
|
|
|
if (!after_access) { /* Different access */
|
|
StrAllocCopy(result, aName);
|
|
} else if (slashes < 3) { /* Different nodes */
|
|
StrAllocCopy(result, after_access);
|
|
} else if (slashes == 3) { /* Same node, different path */
|
|
StrAllocCopy(result, path);
|
|
} else { /* Some path in common */
|
|
unsigned levels = 0;
|
|
|
|
for (; *q && (*q != '#'); q++)
|
|
if (*q == '/')
|
|
levels++;
|
|
result = typecallocn(char, 3 * levels + strlen(last_slash) + 1);
|
|
|
|
if (result == NULL)
|
|
outofmem(__FILE__, "HTRelative");
|
|
|
|
result[0] = '\0';
|
|
for (; levels; levels--)
|
|
strcat(result, "../");
|
|
strcat(result, last_slash + 1);
|
|
}
|
|
CTRACE((tfp,
|
|
"HTparse: `%s' expressed relative to\n `%s' is\n `%s'.\n",
|
|
aName, relatedName, result));
|
|
return result;
|
|
}
|
|
|
|
#define AlloCopy(next,base,extra) \
|
|
typecallocn(char, ((next - base) + ((int) extra)))
|
|
|
|
/* Escape undesirable characters using % HTEscape()
|
|
* -------------------------------------
|
|
*
|
|
* This function takes a pointer to a string in which
|
|
* some characters may be unacceptable unescaped.
|
|
* It returns a string which has these characters
|
|
* represented by a '%' character followed by two hex digits.
|
|
*
|
|
* Unlike HTUnEscape(), this routine returns a calloc'd string.
|
|
*/
|
|
/* *INDENT-OFF* */
|
|
static const unsigned char isAcceptable[96] =
|
|
|
|
/* Bit 0 xalpha -- see HTFile.h
|
|
* Bit 1 xpalpha -- as xalpha but with plus.
|
|
* Bit 2 ... path -- as xpalphas but with /
|
|
*/
|
|
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
|
|
{ 0,0,0,0,0,0,0,0,0,0,7,6,0,7,7,4, /* 2x !"#$%&'()*+,-./ */
|
|
7,7,7,7,7,7,7,7,7,7,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
|
|
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 4x @ABCDEFGHIJKLMNO */
|
|
7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,7, /* 5X PQRSTUVWXYZ[\]^_ */
|
|
0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, /* 6x `abcdefghijklmno */
|
|
7,7,7,7,7,7,7,7,7,7,7,0,0,0,0,0 }; /* 7X pqrstuvwxyz{|}~ DEL */
|
|
/* *INDENT-ON* */
|
|
|
|
static const char *hex = "0123456789ABCDEF";
|
|
|
|
#define ACCEPTABLE(a) ( a>=32 && a<128 && ((isAcceptable[a-32]) & mask))
|
|
|
|
char *HTEscape(const char *str,
|
|
unsigned mask)
|
|
{
|
|
const char *p;
|
|
char *q;
|
|
char *result;
|
|
size_t unacceptable = 0;
|
|
|
|
for (p = str; *p; p++)
|
|
if (!ACCEPTABLE(UCH(TOASCII(*p))))
|
|
unacceptable++;
|
|
result = AlloCopy(p, str, (unacceptable * 2) + 1);
|
|
|
|
if (result == NULL)
|
|
outofmem(__FILE__, "HTEscape");
|
|
|
|
for (q = result, p = str; *p; p++) {
|
|
unsigned char a = UCH(TOASCII(*p));
|
|
|
|
if (!ACCEPTABLE(a)) {
|
|
*q++ = HEX_ESCAPE; /* Means hex coming */
|
|
*q++ = hex[a >> 4];
|
|
*q++ = hex[a & 15];
|
|
} else
|
|
*q++ = *p;
|
|
}
|
|
*q = '\0'; /* Terminate */
|
|
return result;
|
|
}
|
|
|
|
/* Escape unsafe characters using % HTEscapeUnsafe()
|
|
* --------------------------------
|
|
*
|
|
* This function takes a pointer to a string in which
|
|
* some characters may be that may be unsafe are unescaped.
|
|
* It returns a string which has these characters
|
|
* represented by a '%' character followed by two hex digits.
|
|
*
|
|
* Unlike HTUnEscape(), this routine returns a malloc'd string.
|
|
*/
|
|
#define UNSAFE(ch) (((ch) <= 32) || ((ch) >= 127))
|
|
|
|
char *HTEscapeUnsafe(const char *str)
|
|
{
|
|
const char *p;
|
|
char *q;
|
|
char *result;
|
|
size_t unacceptable = 0;
|
|
|
|
for (p = str; *p; p++)
|
|
if (UNSAFE(UCH(TOASCII(*p))))
|
|
unacceptable++;
|
|
result = AlloCopy(p, str, (unacceptable * 2) + 1);
|
|
|
|
if (result == NULL)
|
|
outofmem(__FILE__, "HTEscapeUnsafe");
|
|
|
|
for (q = result, p = str; *p; p++) {
|
|
unsigned char a = UCH(TOASCII(*p));
|
|
|
|
if (UNSAFE(a)) {
|
|
*q++ = HEX_ESCAPE; /* Means hex coming */
|
|
*q++ = hex[a >> 4];
|
|
*q++ = hex[a & 15];
|
|
} else
|
|
*q++ = *p;
|
|
}
|
|
*q = '\0'; /* Terminate */
|
|
return result;
|
|
}
|
|
|
|
/* Escape undesirable characters using % but space to +. HTEscapeSP()
|
|
* -----------------------------------------------------
|
|
*
|
|
* This function takes a pointer to a string in which
|
|
* some characters may be unacceptable unescaped.
|
|
* It returns a string which has these characters
|
|
* represented by a '%' character followed by two hex digits,
|
|
* except that spaces are converted to '+' instead of %2B.
|
|
*
|
|
* Unlike HTUnEscape(), this routine returns a calloced string.
|
|
*/
|
|
char *HTEscapeSP(const char *str,
|
|
unsigned mask)
|
|
{
|
|
const char *p;
|
|
char *q;
|
|
char *result;
|
|
size_t unacceptable = 0;
|
|
|
|
for (p = str; *p; p++)
|
|
if (!(*p == ' ' || ACCEPTABLE(UCH(TOASCII(*p)))))
|
|
unacceptable++;
|
|
result = AlloCopy(p, str, (unacceptable * 2) + 1);
|
|
|
|
if (result == NULL)
|
|
outofmem(__FILE__, "HTEscape");
|
|
|
|
for (q = result, p = str; *p; p++) {
|
|
unsigned char a = UCH(TOASCII(*p));
|
|
|
|
if (a == 32) {
|
|
*q++ = '+';
|
|
} else if (!ACCEPTABLE(a)) {
|
|
*q++ = HEX_ESCAPE; /* Means hex coming */
|
|
*q++ = hex[a >> 4];
|
|
*q++ = hex[a & 15];
|
|
} else {
|
|
*q++ = *p;
|
|
}
|
|
}
|
|
*q = '\0'; /* Terminate */
|
|
return result;
|
|
}
|
|
|
|
/* Decode %xx escaped characters. HTUnEscape()
|
|
* ------------------------------
|
|
*
|
|
* This function takes a pointer to a string in which some
|
|
* characters may have been encoded in %xy form, where xy is
|
|
* the ASCII hex code for character 16x+y.
|
|
* The string is converted in place, as it will never grow.
|
|
*/
|
|
static char from_hex(int c)
|
|
{
|
|
return (char) (c >= '0' && c <= '9' ? c - '0'
|
|
: c >= 'A' && c <= 'F' ? c - 'A' + 10
|
|
: c - 'a' + 10); /* accept small letters just in case */
|
|
}
|
|
|
|
char *HTUnEscape(char *str)
|
|
{
|
|
char *p = str;
|
|
char *q = str;
|
|
|
|
if (!(p && *p))
|
|
return str;
|
|
|
|
while (*p != '\0') {
|
|
if (*p == HEX_ESCAPE &&
|
|
/*
|
|
* Tests shouldn't be needed, but better safe than sorry.
|
|
*/
|
|
p[1] && p[2] &&
|
|
isxdigit(UCH(p[1])) &&
|
|
isxdigit(UCH(p[2]))) {
|
|
p++;
|
|
if (*p)
|
|
*q = (char) (from_hex(*p++) * 16);
|
|
if (*p) {
|
|
/*
|
|
* Careful! FROMASCII() may evaluate its arg more than once!
|
|
*/
|
|
/* S/390 -- gil -- 0221 */
|
|
*q = (char) (*q + from_hex(*p++));
|
|
}
|
|
*q = FROMASCII(*q);
|
|
q++;
|
|
} else {
|
|
*q++ = *p++;
|
|
}
|
|
}
|
|
|
|
*q = '\0';
|
|
return str;
|
|
|
|
} /* HTUnEscape */
|
|
|
|
/* Decode some %xx escaped characters. HTUnEscapeSome()
|
|
* ----------------------------------- Klaus Weide
|
|
* (kweide@tezcat.com)
|
|
* This function takes a pointer to a string in which some
|
|
* characters may have been encoded in %xy form, where xy is
|
|
* the ASCII hex code for character 16x+y, and a pointer to
|
|
* a second string containing one or more characters which
|
|
* should be unescaped if escaped in the first string.
|
|
* The first string is converted in place, as it will never grow.
|
|
*/
|
|
char *HTUnEscapeSome(char *str,
|
|
const char *do_trans)
|
|
{
|
|
char *p = str;
|
|
char *q = str;
|
|
char testcode;
|
|
|
|
if (p == NULL || *p == '\0' || do_trans == NULL || *do_trans == '\0')
|
|
return str;
|
|
|
|
while (*p != '\0') {
|
|
if (*p == HEX_ESCAPE &&
|
|
p[1] && p[2] && /* tests shouldn't be needed, but.. */
|
|
isxdigit(UCH(p[1])) &&
|
|
isxdigit(UCH(p[2])) &&
|
|
(testcode = (char) FROMASCII(from_hex(p[1]) * 16 +
|
|
from_hex(p[2]))) && /* %00 no good */
|
|
StrChr(do_trans, testcode)) { /* it's one of the ones we want */
|
|
*q++ = testcode;
|
|
p += 3;
|
|
} else {
|
|
*q++ = *p++;
|
|
}
|
|
}
|
|
|
|
*q = '\0';
|
|
return str;
|
|
|
|
} /* HTUnEscapeSome */
|
|
/* *INDENT-OFF* */
|
|
static const unsigned char crfc[96] =
|
|
|
|
/* Bit 0 xalpha -- need "quoting"
|
|
* Bit 1 xpalpha -- need \escape if quoted
|
|
*/
|
|
/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
|
|
{ 1,0,3,0,0,0,0,0,1,1,0,0,1,0,1,0, /* 2x !"#$%&'()*+,-./ */
|
|
0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0, /* 3x 0123456789:;<=>? */
|
|
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 4x @ABCDEFGHIJKLMNO */
|
|
0,0,0,0,0,0,0,0,0,0,0,1,2,1,0,0, /* 5X PQRSTUVWXYZ[\]^_ */
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 6x `abcdefghijklmno */
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3 }; /* 7X pqrstuvwxyz{|}~ DEL */
|
|
/* *INDENT-ON* */
|
|
|
|
#define ASCII_TAB '\011'
|
|
#define ASCII_LF '\012'
|
|
#define ASCII_CR '\015'
|
|
#define ASCII_SPC '\040'
|
|
#define ASCII_BAK '\134'
|
|
|
|
/*
|
|
* Turn a string which is not a RFC 822 token into a quoted-string. - KW
|
|
* The "quoted" parameter tells whether we need the beginning/ending quote
|
|
* marks. If not, the caller will provide them -TD
|
|
*/
|
|
void HTMake822Word(char **str,
|
|
int quoted)
|
|
{
|
|
const char *p;
|
|
char *q;
|
|
char *result;
|
|
unsigned char a;
|
|
unsigned added = 0;
|
|
|
|
if (isEmpty(*str)) {
|
|
StrAllocCopy(*str, quoted ? "\"\"" : "");
|
|
return;
|
|
}
|
|
for (p = *str; *p; p++) {
|
|
a = UCH(TOASCII(*p)); /* S/390 -- gil -- 0240 */
|
|
if (a < 32 || a >= 128 ||
|
|
((crfc[a - 32]) & 1)) {
|
|
if (!added)
|
|
added = 2;
|
|
if (a >= 160 || a == '\t')
|
|
continue;
|
|
if (a == '\r' || a == '\n')
|
|
added += 2;
|
|
else if ((a & 127) < 32 || ((crfc[a - 32]) & 2))
|
|
added++;
|
|
}
|
|
}
|
|
if (!added)
|
|
return;
|
|
result = AlloCopy(p, *str, added + 1);
|
|
if (result == NULL)
|
|
outofmem(__FILE__, "HTMake822Word");
|
|
|
|
q = result;
|
|
if (quoted)
|
|
*q++ = '"';
|
|
/*
|
|
* Having converted the character to ASCII, we can't use symbolic
|
|
* escape codes, since they're in the host character set, which
|
|
* is not necessarily ASCII. Thus we use octal escape codes instead.
|
|
* -- gil (Paul Gilmartin) <pg@sweng.stortek.com>
|
|
*/
|
|
/* S/390 -- gil -- 0268 */
|
|
for (p = *str; *p; p++) {
|
|
a = UCH(TOASCII(*p));
|
|
if ((a != ASCII_TAB) &&
|
|
((a & 127) < ASCII_SPC ||
|
|
(a < 128 && ((crfc[a - 32]) & 2))))
|
|
*q++ = ASCII_BAK;
|
|
*q++ = *p;
|
|
if (a == ASCII_LF ||
|
|
(a == ASCII_CR && (TOASCII(*(p + 1)) != ASCII_LF)))
|
|
*q++ = ' ';
|
|
}
|
|
if (quoted)
|
|
*q++ = '"';
|
|
*q = '\0'; /* Terminate */
|
|
FREE(*str);
|
|
*str = result;
|
|
}
|