/* * $LynxId: HTMLGen.c,v 1.47 2024/03/13 23:48:59 tom Exp $ * * HTML Generator * ============== * * This version of the HTML object sends HTML markup to the output stream. * * Bugs: Line wrapping is not done at all. * All data handled as PCDATA. * Should convert old XMP, LISTING and PLAINTEXT to PRE. * * It is not obvious to me right now whether the HEAD should be generated * from the incoming data or the anchor. Currently it is from the former * which is cleanest. */ #define HTSTREAM_INTERNAL 1 #include #define BUFFER_SIZE 200 /* Line buffer attempts to make neat breaks */ #define MAX_CLEANNESS 20 /* Implements: */ #include #include #include #include #include #ifdef USE_COLOR_STYLE #include #include #include #include #endif #include #include #include #ifdef USE_COLOR_STYLE char class_string[TEMPSTRINGSIZE + 1]; static char *Style_className = NULL; static int hcode; #endif /* HTML Object * ----------- */ struct _HTStream { const HTStreamClass *isa; HTStream *target; HTStreamClass targetClass; /* COPY for speed */ }; struct _HTStructured { const HTStructuredClass *isa; HTStream *target; HTStreamClass targetClass; /* COPY for speed */ char buffer[BUFFER_SIZE + 1]; /* 1for NL */ int buffer_maxchars; char *write_pointer; char *line_break[MAX_CLEANNESS + 1]; int cleanness; BOOL overflowed; BOOL delete_line_break_char[MAX_CLEANNESS + 1]; BOOL preformatted; BOOL escape_specials; BOOL in_attrval; #ifdef USE_COLOR_STYLE HText *text; #endif }; /* Flush Buffer * ------------ */ static void flush_breaks(HTStructured * me) { int i; for (i = 0; i <= MAX_CLEANNESS; i++) { me->line_break[i] = NULL; } } static void HTMLGen_flush(HTStructured * me) { (*me->targetClass.put_block) (me->target, me->buffer, (int) (me->write_pointer - me->buffer)); me->write_pointer = me->buffer; flush_breaks(me); me->cleanness = 0; me->delete_line_break_char[0] = NO; } #ifdef USE_COLOR_STYLE /* * We need to flush our buffer each time before we effect a color style change, * this also relies on the subsequent stage not doing any buffering - this is * currently true, in cases where it matters the target stream should be the * HTPlain converter. The flushing currently prevents reasonable line breaking * in lines with tags. Since color styles help visual scanning of displayed * source lines, and long lines are wrapped in GridText anyway, this is * probably acceptable (or even A Good Thing - more to see in one screenful). * The pointer to the HText structure is initialized here before we effect the * first style change. Getting it from the global HTMainText variable isn't * very clean, since it relies on the fact that HText_new() has already been * called for the current stream stack's document by the time we start * processing the first element; we rely on HTMLGenerator's callers * (HTMLParsedPresent in particular) to guarantee this when it matters. * Normally the target stream will have been setup by HTPlainPresent, which * does what we need in this respect. (A check whether we have the right * output stream could be done by checking that targetClass.name is * "PlainPresenter" or similar.) * * All special color style handling is only done if LYPreparsedSource is set. * We could always do it for displaying source generated by an internal * gateway, but this makes the rule more simple for the user: color styles are * applied to html source only with the -preparsed flag. - kw */ static void do_cstyle_flush(HTStructured * me) { if (!me->text && LYPreparsedSource) { me->text = HTMainText; } if (me->text) { HTMLGen_flush(me); } } #endif /* COLOR_STYLE */ /* Weighted optional line break * * We keep track of all the breaks for when we chop the line */ static void allow_break(HTStructured * me, int new_cleanness, int dlbc) { if (dlbc && me->write_pointer == me->buffer) dlbc = NO; me->line_break[new_cleanness] = dlbc ? me->write_pointer - 1 /* Point to space */ : me->write_pointer; /* point to gap */ me->delete_line_break_char[new_cleanness] = (BOOLEAN) dlbc; if (new_cleanness >= me->cleanness && (me->overflowed || me->line_break[new_cleanness] > me->buffer)) me->cleanness = new_cleanness; } /* Character handling * ------------------ * * The tricky bits are the line break handling. This attempts * to synchronize line breaks on sentence or phrase ends. This * is important if one stores SGML files in a line-oriented code * repository, so that if a small change is made, line ends don't * shift in a ripple-through to apparently change a large part of the * file. We give extra "cleanness" to spaces appearing directly * after periods (full stops), [semi]colons and commas. * This should make the source files easier to read and modify * by hand, too, though this is not a primary design consideration. TBL */ static void HTMLGen_put_character(HTStructured * me, int c) { if (me->escape_specials && UCH(c) < 32) { if (c == HT_NON_BREAK_SPACE || c == HT_EN_SPACE || c == LY_SOFT_HYPHEN) { /* recursion... */ HTMLGen_put_character(me, '&'); HTMLGen_put_character(me, '#'); HTMLGen_put_character(me, 'x'); switch (c) { case HT_NON_BREAK_SPACE: /*   */ HTMLGen_put_character(me, 'A'); HTMLGen_put_character(me, '0'); break; case HT_EN_SPACE: /*   */ HTMLGen_put_character(me, '2'); HTMLGen_put_character(me, '0'); HTMLGen_put_character(me, '0'); HTMLGen_put_character(me, '2'); break; case LY_SOFT_HYPHEN: /* ­ */ HTMLGen_put_character(me, 'A'); HTMLGen_put_character(me, 'D'); break; } c = ';'; } } *me->write_pointer++ = (char) c; if (c == '\n') { HTMLGen_flush(me); return; } /* Figure our whether we can break at this point */ if ((!me->preformatted && (c == ' ' || c == '\t'))) { int new_cleanness = 3; if (me->write_pointer > (me->buffer + 1)) { char delims[5]; char *p; strcpy(delims, ",;:."); /* @@ english bias */ p = StrChr(delims, me->write_pointer[-2]); if (p) new_cleanness = (int) (p - delims + 6); if (!me->in_attrval) new_cleanness += 10; } allow_break(me, new_cleanness, YES); } /* * Flush buffer out when full, or whenever the line is over the nominal * maximum and we can break at all */ if (me->write_pointer >= me->buffer + me->buffer_maxchars || (me->overflowed && me->cleanness)) { if (me->cleanness) { char line_break_char = me->line_break[me->cleanness][0]; char *saved = me->line_break[me->cleanness]; if (me->delete_line_break_char[me->cleanness]) saved++; me->line_break[me->cleanness][0] = '\n'; (*me->targetClass.put_block) (me->target, me->buffer, (int) (me->line_break[me->cleanness] - me->buffer + 1)); me->line_break[me->cleanness][0] = line_break_char; { /* move next line in */ char *p = saved; char *q; for (q = me->buffer; p < me->write_pointer;) *q++ = *p++; } me->cleanness = 0; /* Now we have to check whether there are any perfectly good breaks * which weren't good enough for the last line but may be good * enough for the next */ { int i; for (i = 0; i <= MAX_CLEANNESS; i++) { if (me->line_break[i] != NULL && me->line_break[i] > saved) { me->line_break[i] = me->line_break[i] - (saved - me->buffer); me->cleanness = i; } else { me->line_break[i] = NULL; } } } me->delete_line_break_char[0] = 0; me->write_pointer = me->write_pointer - (saved - me->buffer); me->overflowed = NO; } else { (*me->targetClass.put_block) (me->target, me->buffer, me->buffer_maxchars); me->write_pointer = me->buffer; flush_breaks(me); me->overflowed = YES; } } } /* String handling * --------------- */ static void HTMLGen_put_string(HTStructured * me, const char *s) { const char *p; for (p = s; *p; p++) HTMLGen_put_character(me, *p); } static void HTMLGen_write(HTStructured * me, const char *s, int l) { const char *p; for (p = s; p < (s + l); p++) HTMLGen_put_character(me, *p); } /* Start Element * ------------- * * Within the opening tag, there may be spaces and the line may be broken at * these spaces. */ static int HTMLGen_start_element(HTStructured * me, int element_number, const BOOL *present, STRING2PTR value, int charset GCC_UNUSED, char **insert GCC_UNUSED) { int i; BOOL was_preformatted = me->preformatted; HTTag *tag = &HTML_dtd.tags[element_number]; #if defined(USE_COLOR_STYLE) char *title = NULL; char *title_tmp = NULL; const char *name; if (LYPreparsedSource && (name = tag->name) != 0) { char *myHash = NULL; /* * Same logic as in HTML_start_element, copied from there. - kw */ HTSprintf(&Style_className, ";%s", name); StrAllocCopy(myHash, name); if (class_string[0]) { StrAllocCat(myHash, "."); StrAllocCat(myHash, class_string); HTSprintf(&Style_className, ".%s", class_string); } class_string[0] = '\0'; strtolower(myHash); hcode = color_style_1(myHash); strtolower(Style_className); if (TRACE_STYLE) { fprintf(tfp, "CSSTRIM:%s -> %d", myHash, hcode); if (!hashStyles[hcode].used) { char *rp = strrchr(myHash, '.'); fprintf(tfp, " (undefined) %s\n", myHash); if (rp) { int hcd; *rp = '\0'; /* trim the class */ hcd = color_style_1(myHash); fprintf(tfp, "CSS:%s -> %d", myHash, hcd); if (!hashStyles[hcd].used) fprintf(tfp, " (undefined) %s\n", myHash); else fprintf(tfp, " ca=%d\n", hashStyles[hcd].color); } } else fprintf(tfp, " ca=%d\n", hashStyles[hcode].color); } if (displayStyles[element_number + STARTAT].color > -2) { CTRACE2(TRACE_STYLE, (tfp, "CSSTRIM: start_element: top <%s>\n", tag->name)); do_cstyle_flush(me); HText_characterStyle(me->text, hcode, 1); } FREE(myHash); } #endif /* USE_COLOR_STYLE */ me->preformatted = YES; /* free text within tags */ HTMLGen_put_character(me, '<'); HTMLGen_put_string(me, tag->name); if (present) { BOOL had_attr = NO; for (i = 0; i < tag->number_of_attributes; i++) { if (present[i]) { had_attr = YES; HTMLGen_put_character(me, ' '); allow_break(me, 11, YES); #ifdef USE_COLOR_STYLE /* * Try to mimic HTML_start_element's special handling for * HTML_LINK. If applicable, color the displayed attribute / * value pairs differently. - kw */ if (LYPreparsedSource && element_number == HTML_LINK && !title && present[HTML_LINK_CLASS] && *value[HTML_LINK_CLASS] && !present[HTML_LINK_REV] && (present[HTML_LINK_REL] || present[HTML_LINK_HREF])) { if (present[HTML_LINK_TITLE] && *value[HTML_LINK_TITLE]) { StrAllocCopy(title, value[HTML_LINK_TITLE]); LYTrimHead(title); LYTrimTail(title); } if ((!title || *title == '\0') && present[HTML_LINK_REL]) { StrAllocCopy(title, value[HTML_LINK_REL]); } if (title && *title) { HTSprintf0(&title_tmp, "link.%s.%s", value[HTML_LINK_CLASS], title); CTRACE2(TRACE_STYLE, (tfp, "CSSTRIM:link=%s\n", title_tmp)); do_cstyle_flush(me); HText_characterStyle(me->text, color_style_1(title_tmp), 1); } } #endif HTMLGen_put_string(me, tag->attributes[i].name); if (value[i]) { me->preformatted = was_preformatted; me->in_attrval = YES; if (StrChr(value[i], '"') == NULL) { HTMLGen_put_string(me, "=\""); HTMLGen_put_string(me, value[i]); HTMLGen_put_character(me, '"'); } else if (StrChr(value[i], '\'') == NULL) { HTMLGen_put_string(me, "='"); HTMLGen_put_string(me, value[i]); HTMLGen_put_character(me, '\''); } else { /* attribute value has both kinds of quotes */ const char *p; HTMLGen_put_string(me, "=\""); for (p = value[i]; *p; p++) { if (*p != '"') { HTMLGen_put_character(me, *p); } else { HTMLGen_put_string(me, """); } } HTMLGen_put_character(me, '"'); } me->preformatted = YES; me->in_attrval = NO; } } } #ifdef USE_COLOR_STYLE if (had_attr && LYPreparsedSource && element_number == HTML_LINK) { /* * Clean up after special HTML_LINK handling - kw */ if (title && *title) { do_cstyle_flush(me); HText_characterStyle(me->text, color_style_1(title_tmp), 0); FREE(title_tmp); } FREE(title); } #endif if (had_attr) allow_break(me, 12, NO); } HTMLGen_put_string(me, ">"); /* got rid of \n LJM */ /* * Make very specific HTML assumption that PRE can't be nested! */ me->preformatted = (BOOL) ((element_number == HTML_PRE || element_number == HTML_XMP) ? YES : was_preformatted); /* * Can break after element start. */ if (!me->preformatted && tag->contents != SGML_EMPTY) { if (tag->contents == SGML_ELEMENT) allow_break(me, 15, NO); else allow_break(me, 2, NO); } #if defined(USE_COLOR_STYLE) /* * Same logic as in HTML_start_element, copied from there. - kw */ /* end really empty tags straight away */ if (LYPreparsedSource && ReallyEmptyTagNum(element_number)) { CTRACE2(TRACE_STYLE, (tfp, "STYLE:begin_element:ending EMPTY element style\n")); do_cstyle_flush(me); HText_characterStyle(me->text, hcode, STACK_OFF); TrimColorClass(tag->name, Style_className, &hcode); } #endif /* USE_COLOR_STYLE */ if (element_number == HTML_OBJECT && tag->contents == SGML_LITTERAL) { /* * These conditions only approximate the ones used in HTML.c. Let our * SGML parser know that further content is to be parsed normally not * literally. - kw */ if (!present) { return HT_PARSER_OTHER_CONTENT; } else if (!present[HTML_OBJECT_DECLARE] && !(present[HTML_OBJECT_NAME] && value[HTML_OBJECT_NAME] && *value[HTML_OBJECT_NAME])) { if (present[HTML_OBJECT_SHAPES] || !(present[HTML_OBJECT_USEMAP] && value[HTML_OBJECT_USEMAP] && *value[HTML_OBJECT_USEMAP])) return HT_PARSER_OTHER_CONTENT; } } return HT_OK; } /* End Element * ----------- * * When we end an element, the style must be returned to that in effect before * that element. Note that anchors (etc?) don't have an associated style, so * that we must scan down the stack for an element with a defined style. (In * fact, the styles should be linked to the whole stack not just the top one.) * TBL 921119 */ static int HTMLGen_end_element(HTStructured * me, int element_number, char **insert GCC_UNUSED) { if (!me->preformatted && HTML_dtd.tags[element_number].contents != SGML_EMPTY) { /* * Can break before element end. */ if (HTML_dtd.tags[element_number].contents == SGML_ELEMENT) allow_break(me, 14, NO); else allow_break(me, 1, NO); } HTMLGen_put_string(me, "'); if (element_number == HTML_PRE || element_number == HTML_XMP) { me->preformatted = NO; } #ifdef USE_COLOR_STYLE /* * Same logic as in HTML_end_element, copied from there. - kw */ TrimColorClass(HTML_dtd.tags[element_number].name, Style_className, &hcode); if (LYPreparsedSource && !ReallyEmptyTagNum(element_number)) { CTRACE2(TRACE_STYLE, (tfp, "STYLE:end_element: ending non-EMPTY style\n")); do_cstyle_flush(me); HText_characterStyle(me->text, hcode, STACK_OFF); } #endif /* USE_COLOR_STYLE */ return HT_OK; } /* Expanding entities * ------------------ * */ static int HTMLGen_put_entity(HTStructured * me, int entity_number) { int nent = (int) HTML_dtd.number_of_entities; HTMLGen_put_character(me, '&'); if (entity_number < nent) { HTMLGen_put_string(me, HTML_dtd.entity_names[entity_number]); } HTMLGen_put_character(me, ';'); return HT_OK; } /* Free an HTML object * ------------------- * */ static void HTMLGen_free(HTStructured * me) { (*me->targetClass.put_character) (me->target, '\n'); HTMLGen_flush(me); (*me->targetClass._free) (me->target); /* ripple through */ #ifdef USE_COLOR_STYLE FREE(Style_className); #endif FREE(me); } static void PlainToHTML_free(HTStructured * me) { HTMLGen_end_element(me, HTML_PRE, 0); HTMLGen_free(me); } static void HTMLGen_abort(HTStructured * me, HTError e GCC_UNUSED) { HTMLGen_free(me); #ifdef USE_COLOR_STYLE FREE(Style_className); #endif } static void PlainToHTML_abort(HTStructured * me, HTError e GCC_UNUSED) { PlainToHTML_free(me); } /* Structured Object Class * ----------------------- */ static const HTStructuredClass HTMLGeneration = /* As opposed to print etc */ { "HTMLGen", HTMLGen_free, HTMLGen_abort, HTMLGen_put_character, HTMLGen_put_string, HTMLGen_write, HTMLGen_start_element, HTMLGen_end_element, HTMLGen_put_entity }; /* Subclass-specific Methods * ------------------------- */ HTStructured *HTMLGenerator(HTStream *output) { HTStructured *me = (HTStructured *) malloc(sizeof(*me)); if (me == NULL) outofmem(__FILE__, "HTMLGenerator"); me->isa = &HTMLGeneration; me->target = output; me->targetClass = *me->target->isa; /* Copy pointers to routines for speed */ me->write_pointer = me->buffer; flush_breaks(me); me->line_break[0] = me->buffer; me->cleanness = 0; me->overflowed = NO; me->delete_line_break_char[0] = NO; me->preformatted = NO; me->in_attrval = NO; /* * For what line length should we attempt to wrap ? - kw */ if (!LYPreparsedSource) { me->buffer_maxchars = 80; /* work as before - kw */ } else if (dump_output_width > 1) { me->buffer_maxchars = dump_output_width; /* try to honor -width - kw */ } else if (dump_output_immediately) { me->buffer_maxchars = 80; /* try to honor -width - kw */ } else { me->buffer_maxchars = (LYcolLimit - 1); if (me->buffer_maxchars < 38) /* too narrow, let GridText deal */ me->buffer_maxchars = 40; } if (me->buffer_maxchars > 900) /* likely not true - kw */ me->buffer_maxchars = 78; if (me->buffer_maxchars > BUFFER_SIZE) /* must not be larger! */ me->buffer_maxchars = BUFFER_SIZE - 2; /* * If dump_output_immediately is set, there likely isn't anything after * this stream to interpret the Lynx special chars. Also if they get * displayed via HTPlain, that will probably make non-breaking space chars * etc. invisible. So let's translate them to numerical character * references. For debugging purposes we'll use the new hex format. */ me->escape_specials = LYPreparsedSource; #ifdef USE_COLOR_STYLE me->text = NULL; /* Will be initialized when first needed. - kw */ FREE(Style_className); class_string[0] = '\0'; #endif /* COLOR_STYLE */ return me; } /* Stream Object Class * ------------------- * * This object just converts a plain text stream into HTML * It is officially a structured stream but only the stream bits exist. * This is just the easiest way of typecasting all the routines. */ static const HTStructuredClass PlainToHTMLConversion = { "plaintexttoHTML", HTMLGen_free, PlainToHTML_abort, HTMLGen_put_character, HTMLGen_put_string, HTMLGen_write, NULL, /* Structured stuff */ NULL, NULL }; /* HTConverter from plain text to HTML Stream * ------------------------------------------ */ HTStream *HTPlainToHTML(HTPresentation *pres GCC_UNUSED, HTParentAnchor *anchor GCC_UNUSED, HTStream *sink) { HTStructured *me = (HTStructured *) malloc(sizeof(*me)); if (me == NULL) outofmem(__FILE__, "PlainToHTML"); me->isa = (const HTStructuredClass *) &PlainToHTMLConversion; /* * Copy pointers to routines for speed. */ me->target = sink; me->targetClass = *me->target->isa; me->write_pointer = me->buffer; flush_breaks(me); me->cleanness = 0; me->overflowed = NO; me->delete_line_break_char[0] = NO; /* try to honor -width - kw */ me->buffer_maxchars = (dump_output_width > 1 ? dump_output_width : 80); HTMLGen_put_string(me, "\n\n
\n");
    me->preformatted = YES;
    me->escape_specials = NO;
    me->in_attrval = NO;
    return (HTStream *) me;
}