diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 16:37:15 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 16:37:15 +0000 |
commit | ae5d181b854d3ccb373b6bc01b4869e44ff4d87a (patch) | |
tree | 91f59efb48c56a84cc798e012fccb667b63d3fee /WWW/Library/Implementation/HTMIME.c | |
parent | Initial commit. (diff) | |
download | lynx-upstream.tar.xz lynx-upstream.zip |
Adding upstream version 2.9.0dev.12.upstream/2.9.0dev.12upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | WWW/Library/Implementation/HTMIME.c | 2594 |
1 files changed, 2594 insertions, 0 deletions
diff --git a/WWW/Library/Implementation/HTMIME.c b/WWW/Library/Implementation/HTMIME.c new file mode 100644 index 0000000..fde89a6 --- /dev/null +++ b/WWW/Library/Implementation/HTMIME.c @@ -0,0 +1,2594 @@ +/* + * $LynxId: HTMIME.c,v 1.102 2022/03/12 14:47:02 tom Exp $ + * + * MIME Message Parse HTMIME.c + * ================== + * + * This is RFC 1341-specific code. + * The input stream pushed into this parser is assumed to be + * stripped on CRs, ie lines end with LF, not CR LF. + * (It is easy to change this except for the body part where + * conversion can be slow.) + * + * History: + * Feb 92 Written Tim Berners-Lee, CERN + * + */ + +#define HTSTREAM_INTERNAL 1 + +#include <HTUtils.h> +#include <HTMIME.h> /* Implemented here */ +#include <HTTP.h> /* for redirecting_url */ +#include <HTAlert.h> +#include <HTFile.h> +#include <HTCJK.h> +#include <UCMap.h> +#include <UCDefs.h> +#include <UCAux.h> + +#include <LYCookie.h> +#include <LYCharSets.h> +#include <LYCharUtils.h> +#include <LYStrings.h> +#include <LYUtils.h> +#include <LYGlobalDefs.h> +#include <LYLeaks.h> + +/* MIME Object + * ----------- + */ + +typedef enum { + MIME_TRANSPARENT, /* put straight through to target ASAP! */ + /* states for "Transfer-Encoding: chunked" */ + MIME_CHUNKED, + mcCHUNKED_COUNT_DIGIT, + mcCHUNKED_COUNT_CR, + mcCHUNKED_COUNT_LF, + mcCHUNKED_EXTENSION, + mcCHUNKED_DATA, + mcCHUNKED_DATA_CR, + mcCHUNKED_DATA_LF, + /* character state-machine */ + miBEGINNING_OF_LINE, /* first character and not a continuation */ + miA, + miACCEPT_RANGES, + miAGE, + miAL, + miALLOW, + miALTERNATES, + miC, + miCACHE_CONTROL, + miCO, + miCOOKIE, + miCON, + miCONNECTION, + miCONTENT_, + miCONTENT_BASE, + miCONTENT_DISPOSITION, + miCONTENT_ENCODING, + miCONTENT_FEATURES, + miCONTENT_L, + miCONTENT_LANGUAGE, + miCONTENT_LENGTH, + miCONTENT_LOCATION, + miCONTENT_MD5, + miCONTENT_RANGE, + miCONTENT_T, + miCONTENT_TRANSFER_ENCODING, + miCONTENT_TYPE, + miDATE, + miE, + miETAG, + miEXPIRES, + miKEEP_ALIVE, + miL, + miLAST_MODIFIED, + miLINK, + miLOCATION, + miP, + miPR, + miPRAGMA, + miPROXY_AUTHENTICATE, + miPUBLIC, + miR, + miRE, + miREFRESH, + miRETRY_AFTER, + miS, + miSAFE, + miSE, + miSERVER, + miSET_COOKIE, + miSET_COOKIE1, + miSET_COOKIE2, + miT, + miTITLE, + miTRANSFER_ENCODING, + miU, + miUPGRADE, + miURI, + miV, + miVARY, + miVIA, + miW, + miWARNING, + miWWW_AUTHENTICATE, + miSKIP_GET_VALUE, /* Skip space then get value */ + miGET_VALUE, /* Get value till white space */ + miJUNK_LINE, /* Ignore the rest of this folded line */ + miNEWLINE, /* Just found a LF .. maybe continuation */ + miCHECK, /* check against check_pointer */ + MIME_NET_ASCII, /* Translate from net ascii */ + MIME_IGNORE /* Ignore entire file */ + /* TRANSPARENT and IGNORE are defined as stg else in _WINDOWS */ +} MIME_state; + +#define VALUE_SIZE 5120 /* @@@@@@@ Arbitrary? */ +struct _HTStream { + const HTStreamClass *isa; + + BOOL net_ascii; /* Is input net ascii? */ + MIME_state state; /* current state */ + MIME_state if_ok; /* got this state if match */ + MIME_state field; /* remember which field */ + MIME_state fold_state; /* state on a fold */ + BOOL head_only; /* only parsing header */ + BOOL pickup_redirection; /* parsing for location */ + BOOL no_streamstack; /* use sink directly */ + const char *check_pointer; /* checking input */ + + char *value_pointer; /* storing values */ + char value[VALUE_SIZE]; + + HTParentAnchor *anchor; /* Given on creation */ + HTStream *sink; /* Given on creation */ + + char *boundary; /* For multipart */ + char *set_cookie; /* Set-Cookie */ + char *set_cookie2; /* Set-Cookie2 */ + char *location; /* Location */ + + char *refresh_url; /* "Refresh:" URL */ + + HTFormat c_t_encoding; /* Content-Transfer-Encoding */ + char *compression_encoding; + + BOOL chunked_encoding; /* Transfer-Encoding: chunked */ + long chunked_size; /* ...counter for "chunked" */ + + HTFormat format; /* Content-Type */ + HTStream *target; /* While writing out */ + HTStreamClass targetClass; + + HTAtom *targetRep; /* Converting into? */ +}; + +/* + * This function is for trimming off any paired + * open- and close-double quotes from header values. + * It does not parse the string for embedded quotes, + * and will not modify the string unless both the + * first and last characters are double-quotes. - FM + */ +void HTMIME_TrimDoubleQuotes(char *value) +{ + size_t i; + char *cp = value; + + if (isEmpty(cp) || *cp != '"') + return; + + i = strlen(cp); + if (cp[(i - 1)] != '"') + return; + else + cp[(i - 1)] = '\0'; + + for (i = 0; value[i]; i++) + value[i] = cp[(i + 1)]; +} + +/* + * Check if the token from "Content-Encoding" corresponds to a compression + * type. + */ +static BOOL content_is_compressed(HTStream *me) +{ + char *encoding = me->anchor->content_encoding; + BOOL result = (BOOL) (HTEncodingToCompressType(encoding) != cftNone); + + CTRACE((tfp, "content is%s compressed\n", result ? "" : " NOT")); + return result; +} + +/* + * Strip quotes from a refresh-URL. + */ +static void dequote(char *url) +{ + size_t len; + + len = strlen(url); + if (*url == '\'' && len > 1 && url[len - 1] == url[0]) { + url[len - 1] = '\0'; + while ((url[0] = url[1]) != '\0') { + ++url; + } + } +} + +/* + * Strip off any compression-suffix from the address and check if the result + * looks like one of the presentable suffixes. If so, return the corresponding + * MIME type. + */ +static const char *UncompressedContentType(HTStream *me, CompressFileType method) +{ + const char *result = 0; + char *address = me->anchor->address; + const char *expected = HTCompressTypeToSuffix(method); + const char *actual = strrchr(address, '.'); + + /* + * We have to ensure the suffix is consistent, to use HTFileFormat(). + */ + if (actual != 0 && !strcasecomp(actual, expected)) { + HTFormat format; + HTAtom *pencoding = 0; + const char *description = 0; + + format = HTFileFormat(address, &pencoding, &description); + result = HTAtom_name(format); + } + + return result; +} + +static int pumpData(HTStream *me) +{ + CompressFileType method; + const char *new_encoding; + const char *new_content; + + CTRACE((tfp, "Begin pumpData\n")); + /* + * If the content-type says it is compressed, and there is no + * content-encoding, check further and see if the address (omitting the + * suffix for a compressed type) looks like a type we can present. If so, + * rearrange things so we'll present the StreamStack code with the + * presentable type, already marked as compressed. + */ + CTRACE((tfp, "...address{%s}\n", me->anchor->address)); + method = HTContentTypeToCompressType(me->anchor->content_type_params); + if (isEmpty(me->anchor->content_encoding)) + me->anchor->no_content_encoding = TRUE; + if ((method != cftNone) + && isEmpty(me->anchor->content_encoding) + && (new_content = UncompressedContentType(me, method)) != 0) { + + new_encoding = HTCompressTypeToEncoding(method); + CTRACE((tfp, "reinterpreting as content-type:%s, encoding:%s\n", + new_content, new_encoding)); + + StrAllocCopy(me->anchor->content_encoding, new_encoding); + FREE(me->compression_encoding); + StrAllocCopy(me->compression_encoding, new_encoding); + + LYStrNCpy(me->value, new_content, VALUE_SIZE - 1); + StrAllocCopy(me->anchor->content_type_params, me->value); + me->format = HTAtom_for(me->value); + } + + if (StrChr(HTAtom_name(me->format), ';') != NULL) { + char *cp = NULL, *cp1, *cp2, *cp3 = NULL, *cp4; + + CTRACE((tfp, "HTMIME: Extended MIME Content-Type is %s\n", + HTAtom_name(me->format))); + StrAllocCopy(cp, HTAtom_name(me->format)); + /* + * Note that the Content-Type value was converted + * to lower case when we loaded into me->format, + * but there may have been a mixed or upper-case + * atom, so we'll force lower-casing again. We + * also stripped spaces and double-quotes, but + * we'll make sure they're still gone from any + * charset parameter we check. - FM + */ + LYLowerCase(cp); + if ((cp1 = StrChr(cp, ';')) != NULL) { + BOOL chartrans_ok = NO; + + if ((cp2 = strstr(cp1, "charset")) != NULL) { + int chndl; + + cp2 += 7; + while (*cp2 == ' ' || *cp2 == '=' || *cp2 == '"') + cp2++; + StrAllocCopy(cp3, cp2); /* copy to mutilate more */ + for (cp4 = cp3; (*cp4 != '\0' && *cp4 != '"' && + *cp4 != ';' && *cp4 != ':' && + !WHITE(*cp4)); cp4++) ; /* do nothing */ + *cp4 = '\0'; + cp4 = cp3; + chndl = UCGetLYhndl_byMIME(cp3); + if (UCCanTranslateFromTo(chndl, + current_char_set)) { + chartrans_ok = YES; + *cp1 = '\0'; + me->format = HTAtom_for(cp); + StrAllocCopy(me->anchor->charset, cp4); + HTAnchor_setUCInfoStage(me->anchor, chndl, + UCT_STAGE_MIME, + UCT_SETBY_MIME); + } else if (chndl < 0) { /* got something but we don't + recognize it */ + chndl = UCLYhndl_for_unrec; + if (chndl < 0) + /* + * UCLYhndl_for_unrec not defined :-( fallback to + * UCLYhndl_for_unspec which always valid. + */ + chndl = UCLYhndl_for_unspec; /* always >= 0 */ + if (UCCanTranslateFromTo(chndl, + current_char_set)) { + chartrans_ok = YES; + *cp1 = '\0'; + me->format = HTAtom_for(cp); + HTAnchor_setUCInfoStage(me->anchor, chndl, + UCT_STAGE_MIME, + UCT_SETBY_DEFAULT); + } + } else { + /* + * Something like 'big5' - we cannot translate it, but + * the user may still be able to navigate the links. + */ + *cp1 = '\0'; + me->format = HTAtom_for(cp); + StrAllocCopy(me->anchor->charset, cp4); + HTAnchor_setUCInfoStage(me->anchor, chndl, + UCT_STAGE_MIME, + UCT_SETBY_MIME); + } + if (chartrans_ok) { + LYUCcharset *p_in = + HTAnchor_getUCInfoStage(me->anchor, + UCT_STAGE_MIME); + LYUCcharset *p_out = + HTAnchor_setUCInfoStage(me->anchor, + current_char_set, + UCT_STAGE_HTEXT, + UCT_SETBY_DEFAULT); + + if (!p_out) + /* + * Try again. + */ + p_out = + HTAnchor_getUCInfoStage(me->anchor, + UCT_STAGE_HTEXT); + + if (!strcmp(p_in->MIMEname, + "x-transparent")) { + HTPassEightBitRaw = TRUE; + HTAnchor_setUCInfoStage(me->anchor, + HTAnchor_getUCLYhndl(me->anchor, + UCT_STAGE_HTEXT), + UCT_STAGE_MIME, + UCT_SETBY_DEFAULT); + } + if (!strcmp(p_out->MIMEname, + "x-transparent")) { + HTPassEightBitRaw = TRUE; + HTAnchor_setUCInfoStage(me->anchor, + HTAnchor_getUCLYhndl(me->anchor, + UCT_STAGE_MIME), + UCT_STAGE_HTEXT, + UCT_SETBY_DEFAULT); + } + if ((p_in->enc != UCT_ENC_CJK) +#ifdef USE_JAPANESEUTF8_SUPPORT + && ((p_in->enc != UCT_ENC_UTF8) + || (p_out->enc != UCT_ENC_CJK)) +#endif + ) { + HTCJK = NOCJK; + if (!(p_in->codepoints & + UCT_CP_SUBSETOF_LAT1) && + chndl == current_char_set) { + HTPassEightBitRaw = TRUE; + } + } else if (p_out->enc == UCT_ENC_CJK) { + Set_HTCJK(p_in->MIMEname, p_out->MIMEname); + } + } else { + /* + * Cannot translate. If according to some heuristic the + * given charset and the current display character both are + * likely to be like ISO-8859 in structure, pretend we have + * some kind of match. + */ + BOOL given_is_8859 = + (BOOL) (!StrNCmp(cp4, "iso-8859-", 9) && + isdigit(UCH(cp4[9]))); + BOOL given_is_8859like = + (BOOL) (given_is_8859 || + !StrNCmp(cp4, "windows-", 8) || + !StrNCmp(cp4, "cp12", 4) || + !StrNCmp(cp4, "cp-12", 5)); + BOOL given_and_display_8859like = + (BOOL) (given_is_8859like && + (strstr(LYchar_set_names[current_char_set], + "ISO-8859") || + strstr(LYchar_set_names[current_char_set], + "windows-"))); + + if (given_and_display_8859like) { + *cp1 = '\0'; + me->format = HTAtom_for(cp); + } + if (given_is_8859) { + cp1 = &cp4[10]; + while (*cp1 && + isdigit(UCH(*cp1))) + cp1++; + *cp1 = '\0'; + } + if (given_and_display_8859like) { + StrAllocCopy(me->anchor->charset, cp4); + HTPassEightBitRaw = TRUE; + } + HTAlert(*cp4 ? cp4 : me->anchor->charset); + } + FREE(cp3); + } else { + /* + * No charset parameter is present. Ignore all other + * parameters, as we do when charset is present. - FM + */ + *cp1 = '\0'; + me->format = HTAtom_for(cp); + } + } + FREE(cp); + } + /* + * If we have an Expires header and haven't already set the no_cache + * element for the anchor, check if we should set it based on that header. + * - FM + */ + if (me->anchor->no_cache == FALSE && + me->anchor->expires != NULL) { + if (!strcmp(me->anchor->expires, "0")) { + /* + * The value is zero, which we treat as an absolute no-cache + * directive. - FM + */ + me->anchor->no_cache = TRUE; + } else if (me->anchor->date != NULL) { + /* + * We have a Date header, so check if the value is less than or + * equal to that. - FM + */ + if (LYmktime(me->anchor->expires, TRUE) <= + LYmktime(me->anchor->date, TRUE)) { + me->anchor->no_cache = TRUE; + } + } else if (LYmktime(me->anchor->expires, FALSE) == 0) { + /* + * We don't have a Date header, and the value is in past for us. - + * FM + */ + me->anchor->no_cache = TRUE; + } + } + StrAllocCopy(me->anchor->content_type, + HTAtom_name(me->format)); + + if (me->set_cookie != NULL || me->set_cookie2 != NULL) { + LYSetCookie(me->set_cookie, + me->set_cookie2, + me->anchor->address); + FREE(me->set_cookie); + FREE(me->set_cookie2); + } + if (me->pickup_redirection) { + if (me->location && *me->location) { + redirecting_url = me->location; + me->location = NULL; + if (me->targetRep != WWW_DEBUG || me->sink) + me->head_only = YES; + + } else { + permanent_redirection = FALSE; + if (me->location) { + CTRACE((tfp, "HTTP: 'Location:' is zero-length!\n")); + HTAlert(REDIRECTION_WITH_BAD_LOCATION); + } + CTRACE((tfp, "HTTP: Failed to pick up location.\n")); + if (me->location) { + FREE(me->location); + } else { + HTAlert(REDIRECTION_WITH_NO_LOCATION); + } + } + } + CTRACE((tfp, "...pumpData finished reading header\n")); + if (me->head_only) { + /* We are done! - kw */ + me->state = MIME_IGNORE; + } else { + + if (me->no_streamstack) { + me->target = me->sink; + } else { + if (!me->compression_encoding) { + CTRACE((tfp, + "HTMIME: MIME Content-Type is '%s', converting to '%s'\n", + HTAtom_name(me->format), HTAtom_name(me->targetRep))); + } else { + /* + * Change the format to that for "www/compressed" and set up a + * stream to deal with it. - FM + */ + CTRACE((tfp, "HTMIME: MIME Content-Type is '%s',\n", + HTAtom_name(me->format))); + me->format = HTAtom_for("www/compressed"); + CTRACE((tfp, " Treating as '%s'. Converting to '%s'\n", + HTAtom_name(me->format), HTAtom_name(me->targetRep))); + FREE(me->compression_encoding); + } + me->target = HTStreamStack(me->format, me->targetRep, + me->sink, me->anchor); + if (!me->target) { + CTRACE((tfp, "HTMIME: Can't translate! ** \n")); + me->target = me->sink; /* Cheat */ + } + } + if (me->target) { + me->targetClass = *me->target->isa; + /* + * Pump rest of data right through, according to the transfer encoding. + */ + me->state = (me->chunked_encoding + ? MIME_CHUNKED + : MIME_TRANSPARENT); + } else { + me->state = MIME_IGNORE; /* What else to do? */ + } + if (me->refresh_url != NULL && !content_is_compressed(me)) { + char *url = NULL; + char *num = NULL; + char *txt = NULL; + const char *base = ""; /* FIXME: refresh_url may be relative to doc */ + + LYParseRefreshURL(me->refresh_url, &num, &url); + if (url != NULL && me->format == WWW_HTML) { + CTRACE((tfp, + "Formatting refresh-url as first line of result\n")); + HTSprintf0(&txt, gettext("Refresh: ")); + HTSprintf(&txt, gettext("%s seconds "), num); + dequote(url); + HTSprintf(&txt, "<a href=\"%s%s\">%s</a><br>", base, url, url); + CTRACE((tfp, "URL %s%s\n", base, url)); + (me->isa->put_string) (me, txt); + free(txt); + } + FREE(num); + FREE(url); + } + } + CTRACE((tfp, "...end of pumpData, copied %" + PRI_off_t " vs %" + PRI_off_t "\n", + CAST_off_t (me->anchor->actual_length), + CAST_off_t (me->anchor->content_length))); + me->anchor->actual_length = 0; + return HT_OK; +} + +static int dispatchField(HTStream *me) +{ + int i, j; + char *cp; + + *me->value_pointer = '\0'; + + cp = me->value_pointer; + while ((cp > me->value) && *(--cp) == ' ') /* S/390 -- gil -- 0146 */ + /* + * Trim trailing spaces. + */ + *cp = '\0'; + + switch (me->field) { + case miACCEPT_RANGES: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Accept-Ranges: '%s'\n", + me->value)); + break; + case miAGE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Age: '%s'\n", + me->value)); + break; + case miALLOW: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Allow: '%s'\n", + me->value)); + break; + case miALTERNATES: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Alternates: '%s'\n", + me->value)); + break; + case miCACHE_CONTROL: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Cache-Control: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Convert to lowercase and indicate in anchor. - FM + */ + LYLowerCase(me->value); + StrAllocCopy(me->anchor->cache_control, me->value); + /* + * Check whether to set no_cache for the anchor. - FM + */ + { + char *cp1, *cp0 = me->value; + + while ((cp1 = strstr(cp0, "no-cache")) != NULL) { + cp1 += 8; + while (*cp1 != '\0' && WHITE(*cp1)) + cp1++; + if (*cp1 == '\0' || *cp1 == ';') { + me->anchor->no_cache = TRUE; + break; + } + cp0 = cp1; + } + if (me->anchor->no_cache == TRUE) + break; + cp0 = me->value; + while ((cp1 = strstr(cp0, "max-age")) != NULL) { + cp1 += 7; + while (*cp1 != '\0' && WHITE(*cp1)) + cp1++; + if (*cp1 == '=') { + cp1++; + while (*cp1 != '\0' && WHITE(*cp1)) + cp1++; + if (isdigit(UCH(*cp1))) { + cp0 = cp1; + while (isdigit(UCH(*cp1))) + cp1++; + if (*cp0 == '0' && cp1 == (cp0 + 1)) { + me->anchor->no_cache = TRUE; + break; + } + } + } + cp0 = cp1; + } + } + break; + case miCOOKIE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Cookie: '%s'\n", + me->value)); + break; + case miCONNECTION: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Connection: '%s'\n", + me->value)); + break; + case miCONTENT_BASE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Base: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->content_base, me->value); + break; + case miCONTENT_DISPOSITION: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Disposition: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->content_disposition, me->value); + /* + * It's not clear yet from existing RFCs and IDs whether we should be + * looking for file;, attachment;, and/or inline; before the + * filename=value, so we'll just search for "filename" followed by '=' + * and just hope we get the intended value. It is purely a suggested + * name, anyway. - FM + */ + cp = me->anchor->content_disposition; + while (*cp != '\0' && strncasecomp(cp, "filename", 8)) + cp++; + if (*cp == '\0') + break; + cp += 8; + while ((*cp != '\0') && (WHITE(*cp) || *cp == '=')) + cp++; + if (*cp == '\0') + break; + while (*cp != '\0' && WHITE(*cp)) + cp++; + if (*cp == '\0') + break; + StrAllocCopy(me->anchor->SugFname, cp); + if (*me->anchor->SugFname == '"') { + if ((cp = StrChr((me->anchor->SugFname + 1), + '"')) != NULL) { + *(cp + 1) = '\0'; + HTMIME_TrimDoubleQuotes(me->anchor->SugFname); + } else { + FREE(me->anchor->SugFname); + break; + } + } + cp = me->anchor->SugFname; + while (*cp != '\0' && !WHITE(*cp)) + cp++; + *cp = '\0'; + if (*me->anchor->SugFname == '\0') + FREE(me->anchor->SugFname); + break; + case miCONTENT_ENCODING: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Encoding: '%s'\n", + me->value)); + if (me->value[0] == '\0' || + !strcasecomp(me->value, "identity")) + break; + /* + * Convert to lowercase and indicate in anchor. - FM + */ + LYLowerCase(me->value); + StrAllocCopy(me->anchor->content_encoding, me->value); + FREE(me->compression_encoding); + if (content_is_compressed(me)) { + /* + * Save it to use as a flag for setting up a "www/compressed" + * target. - FM + */ + StrAllocCopy(me->compression_encoding, me->value); + } else { + /* + * Some server indicated "8bit", "7bit" or "binary" + * inappropriately. We'll ignore it. - FM + */ + CTRACE((tfp, " Ignoring it!\n")); + } + break; + case miCONTENT_FEATURES: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Features: '%s'\n", + me->value)); + break; + case miCONTENT_LANGUAGE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Language: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Convert to lowercase and indicate in anchor. - FM + */ + LYLowerCase(me->value); + StrAllocCopy(me->anchor->content_language, me->value); + break; + case miCONTENT_LENGTH: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Length: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Convert to integer and indicate in anchor. - FM + */ + me->anchor->content_length = LYatoll(me->value); + if (me->anchor->content_length < 0) + me->anchor->content_length = 0; + CTRACE((tfp, " Converted to integer: '%" PRI_off_t "'\n", + CAST_off_t (me->anchor->content_length))); + break; + case miCONTENT_LOCATION: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Location: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->content_location, me->value); + break; + case miCONTENT_MD5: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-MD5: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->content_md5, me->value); + break; + case miCONTENT_RANGE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Range: '%s'\n", + me->value)); + break; + case miCONTENT_TRANSFER_ENCODING: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Transfer-Encoding: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Force the Content-Transfer-Encoding value to all lower case. - FM + */ + LYLowerCase(me->value); + me->c_t_encoding = HTAtom_for(me->value); + break; + case miCONTENT_TYPE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Content-Type: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Force the Content-Type value to all lower case and strip spaces and + * double-quotes. - FM + */ + for (i = 0, j = 0; me->value[i]; i++) { + if (me->value[i] != ' ' && me->value[i] != '"') { + me->value[j++] = (char) TOLOWER(me->value[i]); + } + } + me->value[j] = '\0'; + me->format = HTAtom_for(me->value); + StrAllocCopy(me->anchor->content_type_params, me->value); + break; + case miDATE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Date: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->date, me->value); + break; + case miETAG: + /* Do not trim double quotes: an entity tag consists of an opaque + * quoted string, possibly prefixed by a weakness indicator. + */ + CTRACE((tfp, "HTMIME: PICKED UP ETag: %s\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->ETag, me->value); + break; + case miEXPIRES: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Expires: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->expires, me->value); + break; + case miKEEP_ALIVE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Keep-Alive: '%s'\n", + me->value)); + break; + case miLAST_MODIFIED: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Last-Modified: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->last_modified, me->value); + break; + case miLINK: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Link: '%s'\n", + me->value)); + break; + case miLOCATION: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Location: '%s'\n", + me->value)); + if (me->pickup_redirection && !me->location) { + StrAllocCopy(me->location, me->value); + } else { + CTRACE((tfp, "HTMIME: *** Ignoring Location!\n")); + } + break; + case miPRAGMA: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Pragma: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Check whether to set no_cache for the anchor. - FM + */ + if (!strcmp(me->value, "no-cache")) + me->anchor->no_cache = TRUE; + break; + case miPROXY_AUTHENTICATE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Proxy-Authenticate: '%s'\n", + me->value)); + break; + case miPUBLIC: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Public: '%s'\n", + me->value)); + break; + case miREFRESH: /* nonstandard: Netscape */ + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Refresh: '%s'\n", + me->value)); + StrAllocCopy(me->refresh_url, me->value); + break; + case miRETRY_AFTER: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Retry-After: '%s'\n", + me->value)); + break; + case miSAFE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Safe: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Indicate in anchor if "YES" or "TRUE". - FM + */ + if (!strcasecomp(me->value, "YES") || + !strcasecomp(me->value, "TRUE")) { + me->anchor->safe = TRUE; + } else if (!strcasecomp(me->value, "NO") || + !strcasecomp(me->value, "FALSE")) { + /* + * If server explicitly tells us that it has changed its mind, + * reset flag in anchor. - kw + */ + me->anchor->safe = FALSE; + } + break; + case miSERVER: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Server: '%s'\n", + me->value)); + if (me->value[0] == '\0') + break; + /* + * Indicate in anchor. - FM + */ + StrAllocCopy(me->anchor->server, me->value); + break; + case miSET_COOKIE1: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Set-Cookie: '%s'\n", + me->value)); + if (me->set_cookie == NULL) { + StrAllocCopy(me->set_cookie, me->value); + } else { + StrAllocCat(me->set_cookie, ", "); + StrAllocCat(me->set_cookie, me->value); + } + break; + case miSET_COOKIE2: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Set-Cookie2: '%s'\n", + me->value)); + if (me->set_cookie2 == NULL) { + StrAllocCopy(me->set_cookie2, me->value); + } else { + StrAllocCat(me->set_cookie2, ", "); + StrAllocCat(me->set_cookie2, me->value); + } + break; + case miTITLE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Title: '%s'\n", + me->value)); + break; + case miTRANSFER_ENCODING: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Transfer-Encoding: '%s'\n", + me->value)); + if (!strcmp(me->value, "chunked")) + me->chunked_encoding = YES; + break; + case miUPGRADE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Upgrade: '%s'\n", + me->value)); + break; + case miURI: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP URI: '%s'\n", + me->value)); + break; + case miVARY: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Vary: '%s'\n", + me->value)); + break; + case miVIA: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Via: '%s'\n", + me->value)); + break; + case miWARNING: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP Warning: '%s'\n", + me->value)); + break; + case miWWW_AUTHENTICATE: + HTMIME_TrimDoubleQuotes(me->value); + CTRACE((tfp, "HTMIME: PICKED UP WWW-Authenticate: '%s'\n", + me->value)); + break; + default: /* Should never get here */ + return HT_ERROR; + } + return HT_OK; +} + +/*_________________________________________________________________________ + * + * A C T I O N R O U T I N E S + */ + +/* Character handling + * ------------------ + * + * This is a FSM parser. It ignores field names it does not understand. + * Folded header fields are recognized. Lines without a fieldname at + * the beginning (that are not folded continuation lines) are ignored + * as unknown field names. Fields with empty values are not picked up. + */ +static void HTMIME_put_character(HTStream *me, int c) +{ + if (me->anchor->inHEAD) { + me->anchor->header_length++; + } + switch (me->state) { + begin_transparent: + case MIME_TRANSPARENT: + me->anchor->actual_length += 1; + (me->targetClass.put_character) (me->target, c); + return; + + /* RFC-2616 describes chunked transfer coding */ + case mcCHUNKED_DATA: + (*me->targetClass.put_character) (me->target, c); + me->chunked_size--; + if (me->chunked_size <= 0) + me->state = mcCHUNKED_DATA_CR; + return; + + case mcCHUNKED_DATA_CR: + me->state = mcCHUNKED_DATA_LF; + if (c == CR) { + return; + } + /* FALLTHRU */ + + case mcCHUNKED_DATA_LF: + me->state = MIME_CHUNKED; + if (c == LF) { + return; + } + + CTRACE((tfp, "HTIME_put_character expected LF in chunked data\n")); + me->state = MIME_TRANSPARENT; + goto begin_transparent; + + /* FALLTHRU */ + begin_chunked: + case MIME_CHUNKED: + me->chunked_size = 0; + me->state = mcCHUNKED_COUNT_DIGIT; + + /* FALLTHRU */ + case mcCHUNKED_COUNT_DIGIT: + if (isxdigit(UCH(c))) { + me->chunked_size <<= 4; + if (isdigit(UCH(c))) + me->chunked_size += UCH(c) - '0'; + else + me->chunked_size += TOUPPER(UCH(c)) - 'A' + 10; + return; + } + if (c == ';') + me->state = mcCHUNKED_EXTENSION; + + /* FALLTHRU */ + case mcCHUNKED_EXTENSION: + if (c != CR && c != LF) { + return; + } + me->state = mcCHUNKED_COUNT_CR; + + /* FALLTHRU */ + case mcCHUNKED_COUNT_CR: + me->state = mcCHUNKED_COUNT_LF; + if (c == CR) { + return; + } + + /* FALLTHRU */ + case mcCHUNKED_COUNT_LF: + me->state = ((me->chunked_size != 0) + ? mcCHUNKED_DATA + : MIME_CHUNKED); + if (c == LF) { + return; + } + goto begin_chunked; + + default: + break; + } + + /* + * This slightly simple conversion just strips CR and turns LF to newline. + * On unix LF is \n but on Mac \n is CR for example. See NetToText for an + * implementation which preserves single CR or LF. + */ + if (me->net_ascii) { + /* + * <sigh> This is evidence that at one time, this code supported + * local character sets other than ASCII. But there is so much + * code in HTTP.c that depends on line_buffer's having been + * translated to local character set that I needed to put the + * FROMASCII translation there, leaving this translation purely + * destructive. -- gil + */ + /* S/390 -- gil -- 0118 */ +#ifndef NOT_ASCII + c = FROMASCII(c); +#endif /* NOT_ASCII */ + if (c == CR) + return; + else if (c == LF) + c = '\n'; + } + + switch (me->state) { + + case MIME_IGNORE: + return; + + case MIME_TRANSPARENT: /* Not reached see above */ + case MIME_CHUNKED: + case mcCHUNKED_COUNT_DIGIT: + case mcCHUNKED_COUNT_CR: + case mcCHUNKED_COUNT_LF: + case mcCHUNKED_EXTENSION: + case mcCHUNKED_DATA: + case mcCHUNKED_DATA_CR: + case mcCHUNKED_DATA_LF: + return; + + case MIME_NET_ASCII: + (*me->targetClass.put_character) (me->target, c); /* MUST BE FAST */ + return; + + case miNEWLINE: + if (c != '\n' && WHITE(c)) { /* Folded line */ + me->state = me->fold_state; /* pop state before newline */ + if (me->state == miGET_VALUE && + me->value_pointer && me->value_pointer != me->value && + !WHITE(*(me->value_pointer - 1))) { + c = ' '; + goto GET_VALUE; /* will add space to value if it fits - kw */ + } + break; + } else if (me->fold_state == miGET_VALUE) { + /* Got a field, and now we know it's complete - so + * act on it. - kw */ + dispatchField(me); + } + /* FALLTHRU */ + + case miBEGINNING_OF_LINE: + me->net_ascii = YES; + switch (c) { + case 'a': + case 'A': + me->state = miA; + CTRACE((tfp, "HTMIME: Got 'A' at beginning of line, state now A\n")); + break; + + case 'c': + case 'C': + me->state = miC; + CTRACE((tfp, "HTMIME: Got 'C' at beginning of line, state now C\n")); + break; + + case 'd': + case 'D': + me->check_pointer = "ate:"; + me->if_ok = miDATE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Got 'D' at beginning of line, checking for 'ate:'\n")); + break; + + case 'e': + case 'E': + me->state = miE; + CTRACE((tfp, "HTMIME: Got 'E' at beginning of line, state now E\n")); + break; + + case 'k': + case 'K': + me->check_pointer = "eep-alive:"; + me->if_ok = miKEEP_ALIVE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Got 'K' at beginning of line, checking for 'eep-alive:'\n")); + break; + + case 'l': + case 'L': + me->state = miL; + CTRACE((tfp, "HTMIME: Got 'L' at beginning of line, state now L\n")); + break; + + case 'p': + case 'P': + me->state = miP; + CTRACE((tfp, "HTMIME: Got 'P' at beginning of line, state now P\n")); + break; + + case 'r': + case 'R': + me->state = miR; + CTRACE((tfp, "HTMIME: Got 'R' at beginning of line, state now R\n")); + break; + + case 's': + case 'S': + me->state = miS; + CTRACE((tfp, "HTMIME: Got 'S' at beginning of line, state now S\n")); + break; + + case 't': + case 'T': + me->state = miT; + CTRACE((tfp, "HTMIME: Got 'T' at beginning of line, state now T\n")); + break; + + case 'u': + case 'U': + me->state = miU; + CTRACE((tfp, "HTMIME: Got 'U' at beginning of line, state now U\n")); + break; + + case 'v': + case 'V': + me->state = miV; + CTRACE((tfp, "HTMIME: Got 'V' at beginning of line, state now V\n")); + break; + + case 'w': + case 'W': + me->state = miW; + CTRACE((tfp, "HTMIME: Got 'W' at beginning of line, state now W\n")); + break; + + case '\n': /* Blank line: End of Header! */ + { + me->anchor->inHEAD = FALSE; + CTRACE((tfp, "HTMIME length %" PRI_off_t "\n", + CAST_off_t (me->anchor->header_length))); + me->net_ascii = NO; + pumpData(me); + } + break; + + default: + goto bad_field_name; + + } /* switch on character */ + break; + + case miA: /* Check for 'c','g' or 'l' */ + switch (c) { + case 'c': + case 'C': + me->check_pointer = "cept-ranges:"; + me->if_ok = miACCEPT_RANGES; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was A, found C, checking for 'cept-ranges:'\n")); + break; + + case 'g': + case 'G': + me->check_pointer = "e:"; + me->if_ok = miAGE; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was A, found G, checking for 'e:'\n")); + break; + + case 'l': + case 'L': + me->state = miAL; + CTRACE((tfp, "HTMIME: Was A, found L, state now AL'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'g' or 'l'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miAL: /* Check for 'l' or 't' */ + switch (c) { + case 'l': + case 'L': + me->check_pointer = "ow:"; + me->if_ok = miALLOW; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was AL, found L, checking for 'ow:'\n")); + break; + + case 't': + case 'T': + me->check_pointer = "ernates:"; + me->if_ok = miALTERNATES; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was AL, found T, checking for 'ernates:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'l' or 't'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miC: /* Check for 'a' or 'o' */ + switch (c) { + case 'a': + case 'A': + me->check_pointer = "che-control:"; + me->if_ok = miCACHE_CONTROL; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was C, found A, checking for 'che-control:'\n")); + break; + + case 'o': + case 'O': + me->state = miCO; + CTRACE((tfp, "HTMIME: Was C, found O, state now CO'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'a' or 'o'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miCO: /* Check for 'n' or 'o' */ + switch (c) { + case 'n': + case 'N': + me->state = miCON; + CTRACE((tfp, "HTMIME: Was CO, found N, state now CON\n")); + break; + + case 'o': + case 'O': + me->check_pointer = "kie:"; + me->if_ok = miCOOKIE; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was CO, found O, checking for 'kie:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'n' or 'o'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miCON: /* Check for 'n' or 't' */ + switch (c) { + case 'n': + case 'N': + me->check_pointer = "ection:"; + me->if_ok = miCONNECTION; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was CON, found N, checking for 'ection:'\n")); + break; + + case 't': + case 'T': + me->check_pointer = "ent-"; + me->if_ok = miCONTENT_; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was CON, found T, checking for 'ent-'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'n' or 't'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miE: /* Check for 't' or 'x' */ + switch (c) { + case 't': + case 'T': + me->check_pointer = "ag:"; + me->if_ok = miETAG; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was E, found T, checking for 'ag:'\n")); + break; + + case 'x': + case 'X': + me->check_pointer = "pires:"; + me->if_ok = miEXPIRES; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was E, found X, checking for 'pires:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'t' or 'x'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miL: /* Check for 'a', 'i' or 'o' */ + switch (c) { + case 'a': + case 'A': + me->check_pointer = "st-modified:"; + me->if_ok = miLAST_MODIFIED; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was L, found A, checking for 'st-modified:'\n")); + break; + + case 'i': + case 'I': + me->check_pointer = "nk:"; + me->if_ok = miLINK; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was L, found I, checking for 'nk:'\n")); + break; + + case 'o': + case 'O': + me->check_pointer = "cation:"; + me->if_ok = miLOCATION; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was L, found O, checking for 'cation:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'a', 'i' or 'o'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miP: /* Check for 'r' or 'u' */ + switch (c) { + case 'r': + case 'R': + me->state = miPR; + CTRACE((tfp, "HTMIME: Was P, found R, state now PR'\n")); + break; + + case 'u': + case 'U': + me->check_pointer = "blic:"; + me->if_ok = miPUBLIC; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was P, found U, checking for 'blic:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'r' or 'u'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miPR: /* Check for 'a' or 'o' */ + switch (c) { + case 'a': + case 'A': + me->check_pointer = "gma:"; + me->if_ok = miPRAGMA; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was PR, found A, checking for 'gma'\n")); + break; + + case 'o': + case 'O': + me->check_pointer = "xy-authenticate:"; + me->if_ok = miPROXY_AUTHENTICATE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was PR, found O, checking for 'xy-authenticate'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'a' or 'o'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miR: /* Check for 'e' */ + switch (c) { + case 'e': + case 'E': + me->state = miRE; + CTRACE((tfp, "HTMIME: Was R, found E\n")); + break; + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'e'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miRE: /* Check for 'a' or 'o' */ + switch (c) { + case 'f': + case 'F': /* nonstandard: Netscape */ + me->check_pointer = "resh:"; + me->if_ok = miREFRESH; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was RE, found F, checking for '%s'\n", me->check_pointer)); + break; + + case 't': + case 'T': + me->check_pointer = "ry-after:"; + me->if_ok = miRETRY_AFTER; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was RE, found T, checking for '%s'\n", me->check_pointer)); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'f' or 't'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miS: /* Check for 'a' or 'e' */ + switch (c) { + case 'a': + case 'A': + me->check_pointer = "fe:"; + me->if_ok = miSAFE; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was S, found A, checking for 'fe:'\n")); + break; + + case 'e': + case 'E': + me->state = miSE; + CTRACE((tfp, "HTMIME: Was S, found E, state now SE'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'a' or 'e'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miSE: /* Check for 'r' or 't' */ + switch (c) { + case 'r': + case 'R': + me->check_pointer = "ver:"; + me->if_ok = miSERVER; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was SE, found R, checking for 'ver'\n")); + break; + + case 't': + case 'T': + me->check_pointer = "-cookie"; + me->if_ok = miSET_COOKIE; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was SE, found T, checking for '-cookie'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'r' or 't'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miSET_COOKIE: /* Check for ':' or '2' */ + switch (c) { + case ':': + me->field = miSET_COOKIE1; /* remember it */ + me->state = miSKIP_GET_VALUE; + CTRACE((tfp, "HTMIME: Was SET_COOKIE, found :, processing\n")); + break; + + case '2': + me->check_pointer = ":"; + me->if_ok = miSET_COOKIE2; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was SET_COOKIE, found 2, checking for ':'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "':' or '2'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miT: /* Check for 'i' or 'r' */ + switch (c) { + case 'i': + case 'I': + me->check_pointer = "tle:"; + me->if_ok = miTITLE; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was T, found I, checking for 'tle:'\n")); + break; + + case 'r': + case 'R': + me->check_pointer = "ansfer-encoding:"; + me->if_ok = miTRANSFER_ENCODING; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was T, found R, checking for 'ansfer-encoding'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'i' or 'r'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miU: /* Check for 'p' or 'r' */ + switch (c) { + case 'p': + case 'P': + me->check_pointer = "grade:"; + me->if_ok = miUPGRADE; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was U, found P, checking for 'grade:'\n")); + break; + + case 'r': + case 'R': + me->check_pointer = "i:"; + me->if_ok = miURI; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was U, found R, checking for 'i:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'p' or 'r'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miV: /* Check for 'a' or 'i' */ + switch (c) { + case 'a': + case 'A': + me->check_pointer = "ry:"; + me->if_ok = miVARY; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was V, found A, checking for 'ry:'\n")); + break; + + case 'i': + case 'I': + me->check_pointer = "a:"; + me->if_ok = miVIA; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was V, found I, checking for 'a:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'a' or 'i'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miW: /* Check for 'a' or 'w' */ + switch (c) { + case 'a': + case 'A': + me->check_pointer = "rning:"; + me->if_ok = miWARNING; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was W, found A, checking for 'rning:'\n")); + break; + + case 'w': + case 'W': + me->check_pointer = "w-authenticate:"; + me->if_ok = miWWW_AUTHENTICATE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was W, found W, checking for 'w-authenticate:'\n")); + break; + + default: + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, "'a' or 'w'")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miCHECK: /* Check against string */ + if (TOLOWER(c) == *(me->check_pointer)++) { + if (!*me->check_pointer) + me->state = me->if_ok; + } else { /* Error */ + CTRACE((tfp, + "HTMIME: Bad character `%c' found where `%s' expected\n", + c, me->check_pointer - 1)); + goto bad_field_name; + } + break; + + case miCONTENT_: + CTRACE((tfp, "HTMIME: in case CONTENT_\n")); + + switch (c) { + case 'b': + case 'B': + me->check_pointer = "ase:"; + me->if_ok = miCONTENT_BASE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_, found B, checking for 'ase:'\n")); + break; + + case 'd': + case 'D': + me->check_pointer = "isposition:"; + me->if_ok = miCONTENT_DISPOSITION; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_, found D, checking for 'isposition:'\n")); + break; + + case 'e': + case 'E': + me->check_pointer = "ncoding:"; + me->if_ok = miCONTENT_ENCODING; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_, found E, checking for 'ncoding:'\n")); + break; + + case 'f': + case 'F': + me->check_pointer = "eatures:"; + me->if_ok = miCONTENT_FEATURES; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_, found F, checking for 'eatures:'\n")); + break; + + case 'l': + case 'L': + me->state = miCONTENT_L; + CTRACE((tfp, + "HTMIME: Was CONTENT_, found L, state now CONTENT_L\n")); + break; + + case 'm': + case 'M': + me->check_pointer = "d5:"; + me->if_ok = miCONTENT_MD5; + me->state = miCHECK; + CTRACE((tfp, "HTMIME: Was CONTENT_, found M, checking for 'd5:'\n")); + break; + + case 'r': + case 'R': + me->check_pointer = "ange:"; + me->if_ok = miCONTENT_RANGE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_, found R, checking for 'ange:'\n")); + break; + + case 't': + case 'T': + me->state = miCONTENT_T; + CTRACE((tfp, + "HTMIME: Was CONTENT_, found T, state now CONTENT_T\n")); + break; + + default: + CTRACE((tfp, "HTMIME: Was CONTENT_, found nothing; bleah\n")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miCONTENT_L: + CTRACE((tfp, "HTMIME: in case CONTENT_L\n")); + + switch (c) { + case 'a': + case 'A': + me->check_pointer = "nguage:"; + me->if_ok = miCONTENT_LANGUAGE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_L, found A, checking for 'nguage:'\n")); + break; + + case 'e': + case 'E': + me->check_pointer = "ngth:"; + me->if_ok = miCONTENT_LENGTH; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_L, found E, checking for 'ngth:'\n")); + break; + + case 'o': + case 'O': + me->check_pointer = "cation:"; + me->if_ok = miCONTENT_LOCATION; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_L, found O, checking for 'cation:'\n")); + break; + + default: + CTRACE((tfp, "HTMIME: Was CONTENT_L, found nothing; bleah\n")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miCONTENT_T: + CTRACE((tfp, "HTMIME: in case CONTENT_T\n")); + + switch (c) { + case 'r': + case 'R': + me->check_pointer = "ansfer-encoding:"; + me->if_ok = miCONTENT_TRANSFER_ENCODING; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_T, found R, checking for 'ansfer-encoding:'\n")); + break; + + case 'y': + case 'Y': + me->check_pointer = "pe:"; + me->if_ok = miCONTENT_TYPE; + me->state = miCHECK; + CTRACE((tfp, + "HTMIME: Was CONTENT_T, found Y, checking for 'pe:'\n")); + break; + + default: + CTRACE((tfp, "HTMIME: Was CONTENT_T, found nothing; bleah\n")); + goto bad_field_name; + + } /* switch on character */ + break; + + case miACCEPT_RANGES: + case miAGE: + case miALLOW: + case miALTERNATES: + case miCACHE_CONTROL: + case miCOOKIE: + case miCONNECTION: + case miCONTENT_BASE: + case miCONTENT_DISPOSITION: + case miCONTENT_ENCODING: + case miCONTENT_FEATURES: + case miCONTENT_LANGUAGE: + case miCONTENT_LENGTH: + case miCONTENT_LOCATION: + case miCONTENT_MD5: + case miCONTENT_RANGE: + case miCONTENT_TRANSFER_ENCODING: + case miCONTENT_TYPE: + case miDATE: + case miETAG: + case miEXPIRES: + case miKEEP_ALIVE: + case miLAST_MODIFIED: + case miLINK: + case miLOCATION: + case miPRAGMA: + case miPROXY_AUTHENTICATE: + case miPUBLIC: + case miREFRESH: + case miRETRY_AFTER: + case miSAFE: + case miSERVER: + case miSET_COOKIE1: + case miSET_COOKIE2: + case miTITLE: + case miTRANSFER_ENCODING: + case miUPGRADE: + case miURI: + case miVARY: + case miVIA: + case miWARNING: + case miWWW_AUTHENTICATE: + me->field = me->state; /* remember it */ + me->state = miSKIP_GET_VALUE; + /* FALLTHRU */ + + case miSKIP_GET_VALUE: + if (c == '\n') { + me->fold_state = me->state; + me->state = miNEWLINE; + break; + } + if (WHITE(c)) + /* + * Skip white space. + */ + break; + + me->value_pointer = me->value; + me->state = miGET_VALUE; + /* Fall through to store first character */ + /* FALLTHRU */ + + case miGET_VALUE: + GET_VALUE: + if (c != '\n') { /* Not end of line */ + if (me->value_pointer < me->value + VALUE_SIZE - 1) { + *me->value_pointer++ = (char) c; + break; + } else { + goto value_too_long; + } + } + /* Fall through (if end of line) */ + /* FALLTHRU */ + + case miJUNK_LINE: + if (c == '\n') { + me->fold_state = me->state; + me->state = miNEWLINE; + } + break; + + } /* switch on state */ + + HTChunkPutc(&me->anchor->http_headers, UCH(c)); + if (me->state == MIME_TRANSPARENT) { + HTChunkTerminate(&me->anchor->http_headers); + CTRACE((tfp, "Server Headers (%d bytes):\n%.*s\n", + me->anchor->http_headers.size, + me->anchor->http_headers.size, + me->anchor->http_headers.data)); + CTRACE((tfp, "Server Content-Type:%s\n", + me->anchor->content_type_params)); + } + return; + + value_too_long: + CTRACE((tfp, "HTMIME: *** Syntax error. (string too long)\n")); + + bad_field_name: /* Ignore it */ + me->state = miJUNK_LINE; + + HTChunkPutc(&me->anchor->http_headers, UCH(c)); + + return; +} + +/* String handling + * --------------- + * + * Strings must be smaller than this buffer size. + */ +static void HTMIME_put_string(HTStream *me, + const char *s) +{ + const char *p; + + if (me->state == MIME_TRANSPARENT) { /* Optimisation */ + (*me->targetClass.put_string) (me->target, s); + + } else if (me->state != MIME_IGNORE) { + CTRACE((tfp, "HTMIME: %s\n", s)); + + for (p = s; *p; p++) + HTMIME_put_character(me, *p); + } +} + +/* Buffer write. Buffers can (and should!) be big. + * ------------ + */ +static void HTMIME_write(HTStream *me, + const char *s, + int l) +{ + const char *p; + + if (me->state == MIME_TRANSPARENT) { /* Optimisation */ + (*me->targetClass.put_block) (me->target, s, l); + + } else { + CTRACE((tfp, "HTMIME: %.*s\n", l, s)); + + for (p = s; p < s + l; p++) + HTMIME_put_character(me, *p); + } +} + +/* Free an HTML object + * ------------------- + * + */ +static void HTMIME_free(HTStream *me) +{ + if (me) { + FREE(me->location); + FREE(me->compression_encoding); + if (me->target) + (*me->targetClass._free) (me->target); + FREE(me); + } +} + +/* End writing +*/ +static void HTMIME_abort(HTStream *me, + HTError e) +{ + if (me) { + FREE(me->location); + FREE(me->compression_encoding); + if (me->target) + (*me->targetClass._abort) (me->target, e); + FREE(me); + } +} + +/* Structured Object Class + * ----------------------- + */ +static const HTStreamClass HTMIME = +{ + "MIMEParser", + HTMIME_free, + HTMIME_abort, + HTMIME_put_character, + HTMIME_put_string, + HTMIME_write +}; + +/* Subclass-specific Methods + * ------------------------- + */ +HTStream *HTMIMEConvert(HTPresentation *pres, + HTParentAnchor *anchor, + HTStream *sink) +{ + HTStream *me; + + CTRACE((tfp, "HTMIMEConvert\n")); + me = typecalloc(HTStream); + + if (me == NULL) + outofmem(__FILE__, "HTMIMEConvert"); + + me->isa = &HTMIME; + me->sink = sink; + me->anchor = anchor; + me->anchor->safe = FALSE; + me->anchor->no_cache = FALSE; + + FREE(me->anchor->cache_control); + FREE(me->anchor->SugFname); + FREE(me->anchor->charset); + + HTChunkClear(&me->anchor->http_headers); + HTChunkInit(&me->anchor->http_headers, 128); + + FREE(me->anchor->content_type_params); + FREE(me->anchor->content_language); + FREE(me->anchor->content_encoding); + FREE(me->anchor->content_base); + FREE(me->anchor->content_disposition); + FREE(me->anchor->content_location); + FREE(me->anchor->content_md5); + + me->anchor->inHEAD = TRUE; + me->anchor->header_length = 0; + me->anchor->content_length = 0; + + FREE(me->anchor->date); + FREE(me->anchor->expires); + FREE(me->anchor->last_modified); + FREE(me->anchor->ETag); + FREE(me->anchor->server); + + me->target = NULL; + me->state = miBEGINNING_OF_LINE; + me->format = HTAtom_for(ContentTypes[LYContentType]); + + CTRACE((tfp, "default Content-Type is %s\n", HTAtom_name(me->format))); + me->targetRep = pres->rep_out; + me->boundary = NULL; /* Not set yet */ + me->set_cookie = NULL; /* Not set yet */ + me->set_cookie2 = NULL; /* Not set yet */ + me->refresh_url = NULL; /* Not set yet */ + me->c_t_encoding = 0; /* Not set yet */ + me->compression_encoding = NULL; /* Not set yet */ + me->net_ascii = NO; /* Local character set */ + + HTAnchor_setUCInfoStage(me->anchor, current_char_set, + UCT_STAGE_STRUCTURED, + UCT_SETBY_DEFAULT); + HTAnchor_setUCInfoStage(me->anchor, current_char_set, + UCT_STAGE_HTEXT, + UCT_SETBY_DEFAULT); + return me; +} + +HTStream *HTNetMIME(HTPresentation *pres, + HTParentAnchor *anchor, + HTStream *sink) +{ + HTStream *me = HTMIMEConvert(pres, anchor, sink); + + if (!me) + return NULL; + + me->net_ascii = YES; + return me; +} + +HTStream *HTMIMERedirect(HTPresentation *pres, + HTParentAnchor *anchor, + HTStream *sink) +{ + HTStream *me = HTMIMEConvert(pres, anchor, sink); + + if (!me) + return NULL; + + me->pickup_redirection = YES; + if (me->targetRep == WWW_DEBUG && sink) + me->no_streamstack = YES; + return me; +} + +/* Japanese header handling functions + * ================================== + * + * K&Rized and added 07-Jun-96 by FM, based on: + * +//////////////////////////////////////////////////////////////////////// + * + * ISO-2022-JP handling routines + * & + * MIME decode routines (quick hack just for ISO-2022-JP) + * + * Thu Jan 25 10:11:42 JST 1996 + * + * Copyright (C) 1994, 1995, 1996 + * Shuichi Ichikawa (ichikawa@nuee.nagoya-u.ac.jp) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either versions 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with SKK, see the file COPYING. If not, write to the Free + * Software Foundation Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * MIME decoding routines + * + * Written by S. Ichikawa, + * partially inspired by encdec.c of <jh@efd.lth.se>. + * Caller's buffers decode to no longer than the input strings. + */ +#include <LYCharVals.h> /* S/390 -- gil -- 0163 */ + +static char HTmm64[] = +"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; +static char HTmmquote[] = "0123456789ABCDEF"; +static int HTmmcont = 0; + +static void HTmmdec_base64(char **t, + char *s) +{ + int d, count, j, val; + char *buf, *bp, nw[4], *p; + + if ((buf = typeMallocn(char, strlen(s) * 3 + 1)) == 0) + outofmem(__FILE__, "HTmmdec_base64"); + + for (bp = buf; *s; s += 4) { + val = 0; + if (s[2] == '=') + count = 1; + else if (s[3] == '=') + count = 2; + else + count = 3; + + for (j = 0; j <= count; j++) { + if (!(p = StrChr(HTmm64, s[j]))) { + FREE(buf); + return; + } + d = (int) (p - HTmm64); + d <<= (3 - j) * 6; + val += d; + } + for (j = 2; j >= 0; j--) { + nw[j] = (char) (val & 255); + val >>= 8; + } + if (count--) + *bp++ = nw[0]; + if (count--) + *bp++ = nw[1]; + if (count) + *bp++ = nw[2]; + } + *bp = '\0'; + StrAllocCopy(*t, buf); + FREE(buf); +} + +static void HTmmdec_quote(char **t, + char *s) +{ + char *buf, cval, *bp, *p; + + if ((buf = typeMallocn(char, strlen(s) + 1)) == 0) + outofmem(__FILE__, "HTmmdec_quote"); + + for (bp = buf; *s;) { + if (*s == '=') { + cval = 0; + if (s[1] && (p = StrChr(HTmmquote, s[1]))) { + cval = (char) (cval + (char) (p - HTmmquote)); + } else { + *bp++ = *s++; + continue; + } + if (s[2] && (p = StrChr(HTmmquote, s[2]))) { + cval = (char) (cval << 4); + cval = (char) (cval + (p - HTmmquote)); + *bp++ = cval; + s += 3; + } else { + *bp++ = *s++; + } + } else if (*s == '_') { + *bp++ = 0x20; + s++; + } else { + *bp++ = *s++; + } + } + *bp = '\0'; + StrAllocCopy(*t, buf); + FREE(buf); +} + +/* + * HTmmdecode for ISO-2022-JP - FM + */ +void HTmmdecode(char **target, + char *source) +{ + char *buf; + char *mmbuf = NULL; + char *m2buf = NULL; + char *s, *t, *u; + int base64, quote; + + if ((buf = typeMallocn(char, strlen(source) + 1)) == 0) + outofmem(__FILE__, "HTmmdecode"); + + for (s = source, *(u = buf) = '\0'; *s;) { + if (!strncasecomp(s, "=?ISO-2022-JP?B?", 16)) { + base64 = 1; + } else { + base64 = 0; + } + if (!strncasecomp(s, "=?ISO-2022-JP?Q?", 16)) { + quote = 1; + } else { + quote = 0; + } + if (base64 || quote) { + if (HTmmcont) { + for (t = s - 1; + t >= source && (*t == ' ' || *t == '\t'); t--) { + u--; + } + } + if (mmbuf == 0) /* allocate buffer big enough for source */ + StrAllocCopy(mmbuf, source); + for (s += 16, t = mmbuf; *s;) { + if (s[0] == '?' && s[1] == '=') { + break; + } else { + *t++ = *s++; + *t = '\0'; + } + } + if (s[0] != '?' || s[1] != '=') { + goto end; + } else { + s += 2; + *t = '\0'; + } + if (base64) + HTmmdec_base64(&m2buf, mmbuf); + else + HTmmdec_quote(&m2buf, mmbuf); + for (t = m2buf; non_empty(t);) + *u++ = *t++; + HTmmcont = 1; + } else { + if (*s != ' ' && *s != '\t') + HTmmcont = 0; + *u++ = *s++; + } + } + *u = '\0'; + end: + StrAllocCopy(*target, buf); + FREE(m2buf); + FREE(mmbuf); + FREE(buf); +} + +/* + * Insert ESC where it seems lost. + * (The author of this function "rjis" is S. Ichikawa.) + */ +int HTrjis(char **t, + char *s) +{ + char *p; + char *buf = NULL; + int kanji = 0; + + if (StrChr(s, CH_ESC) || !StrChr(s, '$')) { + if (s != *t) + StrAllocCopy(*t, s); + return 1; + } + + if ((buf = typeMallocn(char, strlen(s) * 2 + 1)) == 0) + outofmem(__FILE__, "HTrjis"); + + for (p = buf; *s;) { + if (!kanji && s[0] == '$' && (s[1] == '@' || s[1] == 'B')) { + if (HTmaybekanji((int) s[2], (int) s[3])) { + kanji = 1; + *p++ = CH_ESC; + *p++ = *s++; + *p++ = *s++; + *p++ = *s++; + *p++ = *s++; + continue; + } + *p++ = *s++; + continue; + } + if (kanji && s[0] == '(' && (s[1] == 'J' || s[1] == 'B')) { + kanji = 0; + *p++ = CH_ESC; + *p++ = *s++; + *p++ = *s++; + continue; + } + *p++ = *s++; + } + *p = *s; /* terminate string */ + + StrAllocCopy(*t, buf); + FREE(buf); + return 0; +} + +/* + * The following function "maybekanji" is derived from + * RJIS-1.0 by Mr. Hironobu Takahashi. + * Maybekanji() is included here under the courtesy of the author. + * The original comment of rjis.c is also included here. + */ +/* + * RJIS ( Recover JIS code from broken file ) + * Copyright (C) 1992 1994 + * Hironobu Takahashi (takahasi@tiny.or.jp) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either versions 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with SKK, see the file COPYING. If not, write to the Free + * Software Foundation Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +int HTmaybekanji(int c1, + int c2) +{ + + if ((c2 < 33) || (c2 > 126)) + return 0; + if ((c1 < 33) || ((40 < c1) && (c1 < 48)) || (116 < c1)) + return 0; + c2 -= 32; + switch (c1 - 32) { + case 2: + if ((14 < c2) && (c2 < 26)) + return 0; + if ((33 < c2) && (c2 < 42)) + return 0; + if ((48 < c2) && (c2 < 60)) + return 0; + if ((74 < c2) && (c2 < 82)) + return 0; + if ((89 < c2) && (c2 < 94)) + return 0; + break; + case 3: + if (c2 < 16) + return 0; + if ((25 < c2) && (c2 < 33)) + return 0; + if ((58 < c2) && (c2 < 65)) + return 0; + if (90 < c2) + return 0; + break; + case 4: + if (83 < c2) + return 0; + break; + case 5: + if (86 < c2) + return 0; + break; + case 6: + if ((24 < c2) && (c2 < 33)) + return 0; + if (56 < c2) + return 0; + break; + case 7: + if ((33 < c2) && (c2 < 49)) + return 0; + if (81 < c2) + return 0; + break; + case 8: + if (32 < c2) + return 0; + break; + case 47: + if (51 < c2) + return 0; + break; + case 84: + if (6 < c2) + return 0; + break; + } + return 1; +} |