/* * $LynxId: LYCharUtils.c,v 1.133 2020/01/21 21:36:01 tom Exp $ * * Functions associated with LYCharSets.c and the Lynx version of HTML.c - FM * ========================================================================== */ #include #include #define Lynx_HTML_Handler #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Used for nested lists. - FM */ int OL_CONTINUE = -29999; /* flag for whether CONTINUE is set */ int OL_VOID = -29998; /* flag for whether a count is set */ static size_t count_char(const char *value, int ch) { const char *found; size_t result = 0; while ((*value != '\0') && (found = StrChr(value, ch)) != NULL) { ++result; value = (found + 1); } return result; } /* * This function converts any ampersands in a pre-allocated string to "&". * If brackets is TRUE, it also converts any angle-brackets to "<" or ">". */ void LYEntify(char **in_out, int brackets) { char *source = *in_out; char *target; char *result = NULL; size_t count_AMPs = 0; size_t count_LTs = 0; size_t count_GTs = 0; #ifdef CJK_EX enum _state { S_text, S_esc, S_dollar, S_paren, S_nonascii_text, S_dollar_paren } state = S_text; int in_sjis = 0; #endif if (non_empty(source)) { count_AMPs = count_char(*in_out, '&'); if (brackets) { count_LTs = count_char(*in_out, '<'); count_GTs = count_char(*in_out, '>'); } if (count_AMPs != 0 || count_LTs != 0 || count_GTs != 0) { target = typecallocn(char, (strlen(*in_out) + (4 * count_AMPs) + (3 * count_LTs) + (3 * count_GTs) + 1)); if ((result = target) == NULL) outofmem(__FILE__, "LYEntify"); for (source = *in_out; *source; source++) { #ifdef CJK_EX if (IS_CJK_TTY) { switch (state) { case S_text: if (*source == '\033') { state = S_esc; *target++ = *source; continue; } break; case S_esc: if (*source == '$') { state = S_dollar; } else if (*source == '(') { state = S_paren; } else { state = S_text; } *target++ = *source; continue; case S_dollar: if (*source == '@' || *source == 'B' || *source == 'A') { state = S_nonascii_text; } else if (*source == '(') { state = S_dollar_paren; } else { state = S_text; } *target++ = *source; continue; case S_dollar_paren: if (*source == 'C') { state = S_nonascii_text; } else { state = S_text; } *target++ = *source; continue; case S_paren: if (*source == 'B' || *source == 'J' || *source == 'T') { state = S_text; } else if (*source == 'I') { state = S_nonascii_text; } else if (*source == '\033') { state = S_esc; } *target++ = *source; continue; case S_nonascii_text: if (*source == '\033') state = S_esc; *target++ = *source; continue; default: break; } if (*(source + 1) != '\0' && (IS_EUC(UCH(*source), UCH(*(source + 1))) || IS_SJIS(UCH(*source), UCH(*(source + 1)), in_sjis) || IS_BIG5(UCH(*source), UCH(*(source + 1))))) { *target++ = *source++; *target++ = *source; continue; } } #endif switch (*source) { case '&': *target++ = '&'; *target++ = 'a'; *target++ = 'm'; *target++ = 'p'; *target++ = ';'; break; case '<': if (brackets) { *target++ = '&'; *target++ = 'l'; *target++ = 't'; *target++ = ';'; break; } /* FALLTHRU */ case '>': if (brackets) { *target++ = '&'; *target++ = 'g'; *target++ = 't'; *target++ = ';'; break; } /* FALLTHRU */ default: *target++ = *source; break; } } *target = '\0'; FREE(*in_out); *in_out = result; } } } /* * Callers to LYEntifyTitle/LYEntifyValue do not look at the 'target' param. * Optimize things a little by avoiding the memory allocation if not needed, * as is usually the case. */ static BOOL MustEntify(const char *source) { BOOL result; #ifdef CJK_EX if (IS_CJK_TTY && StrChr(source, '\033') != 0) { result = TRUE; } else #endif { size_t length = strlen(source); size_t reject = strcspn(source, "<&>"); result = (BOOL) (length != reject); } return result; } /* * Wrappers for LYEntify() which do not assume that the source was allocated, * e.g., output from gettext(). */ const char *LYEntifyTitle(char **target, const char *source) { const char *result = 0; if (MustEntify(source)) { StrAllocCopy(*target, source); LYEntify(target, TRUE); result = *target; } else { result = source; } return result; } const char *LYEntifyValue(char **target, const char *source) { const char *result = 0; if (MustEntify(source)) { StrAllocCopy(*target, source); LYEntify(target, FALSE); result = *target; } else { result = source; } return result; } /* * This function trims characters <= that of a space (32), * including HT_NON_BREAK_SPACE (1) and HT_EN_SPACE (2), * but not ESC, from the heads of strings. - FM */ void LYTrimHead(char *str) { const char *s = str; if (isEmpty(s)) return; while (*s && WHITE(*s) && UCH(*s) != UCH(CH_ESC)) /* S/390 -- gil -- 1669 */ s++; if (s > str) { char *ns = str; while (*s) { *ns++ = *s++; } *ns = '\0'; } } /* * This function trims characters <= that of a space (32), * including HT_NON_BREAK_SPACE (1), HT_EN_SPACE (2), and * ESC from the tails of strings. - FM */ void LYTrimTail(char *str) { int i; if (isEmpty(str)) return; i = (int) strlen(str) - 1; while (i >= 0) { if (WHITE(str[i])) str[i] = '\0'; else break; i--; } } /* * This function should receive a pointer to the start * of a comment. It returns a pointer to the end ('>') * character of comment, or it's best guess if the comment * is invalid. - FM */ char *LYFindEndOfComment(char *str) { char *cp, *cp1; enum comment_state { start1, start2, end1, end2 } state; if (str == NULL) /* * We got NULL, so return NULL. - FM */ return NULL; if (StrNCmp(str, "