summaryrefslogtreecommitdiffstats
path: root/src/HTML.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/HTML.h')
-rw-r--r--src/HTML.h283
1 files changed, 283 insertions, 0 deletions
diff --git a/src/HTML.h b/src/HTML.h
new file mode 100644
index 0000000..6e5ebc3
--- /dev/null
+++ b/src/HTML.h
@@ -0,0 +1,283 @@
+/*
+ * $LynxId: HTML.h,v 1.36 2022/07/22 20:22:13 tom Exp $
+ *
+ * HTML to rich text converter for libwww
+ *
+ * THE HTML TO RTF OBJECT CONVERTER
+ *
+ * This interprets the HTML semantics.
+ */
+#ifndef HTML_H
+#define HTML_H
+
+#ifndef HTUTILS_H
+#include <HTUtils.h>
+#endif /* HTUTILS_H */
+
+#include <UCDefs.h>
+#include <UCAux.h>
+#include <HTAnchor.h>
+#include <HTMLDTD.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* #define ATTR_CS_IN (me->T.output_utf8 ? me->UCLYhndl : 0) */
+#define ATTR_CS_IN me->tag_charset
+#define TRANSLATE_AND_UNESCAPE_ENTITIES(s, p, h) \
+ LYUCTranslateHTMLString(s, ATTR_CS_IN, current_char_set, YES, p, h, st_HTML)
+#define TRANSLATE_AND_UNESCAPE_ENTITIES5(s,cs_from,cs_to,p,h) \
+ LYUCTranslateHTMLString(s, cs_from, cs_to, YES, p, h, st_HTML)
+#define TRANSLATE_AND_UNESCAPE_ENTITIES6(s,cs_from,cs_to,spcls,p,h) \
+ LYUCTranslateHTMLString(s, cs_from, cs_to, spcls, p, h, st_HTML)
+#define TRANSLATE_HTML(s,p,h) \
+ LYUCFullyTranslateString(s, me->UCLYhndl, current_char_set, NO, YES, p, h, NO, st_HTML)
+#define TRANSLATE_HTML5(s,cs_from,cs_to,p,h) \
+ LYUCFullyTranslateString(s, cs_from, cs_to, NO, YES, p, h, NO, st_HTML)
+#define TRANSLATE_HTML7(s,cs_from,cs_to,spcls,p,h,Back) \
+ LYUCFullyTranslateString(s, cs_from, cs_to, NO, spcls, p, h, Back, st_HTML)
+/*
+ * Strings from attributes which should be converted to some kind of "standard"
+ * representation (character encoding), was Latin-1, esp. URLs (incl.
+ * #fragments) and HTML NAME and ID stuff.
+ */
+#define TRANSLATE_AND_UNESCAPE_TO_STD(s) \
+ LYUCTranslateHTMLString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_URL)
+#define UNESCAPE_FIELDNAME_TO_STD(s) \
+ LYUCTranslateHTMLString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_HTML)
+ extern const HTStructuredClass HTMLPresentation;
+
+#ifdef Lynx_HTML_Handler
+/*
+ * This section is semi-private to HTML.c and it's helper modules. - FM
+ * --------------------------------------------------------------------
+ */
+
+ typedef struct _stack_element {
+ HTStyle *style;
+ int tag_number;
+ } stack_element;
+
+/* HTML Object
+ * -----------
+ */
+#define MAX_NESTING 800 /* Should be checked by parser */
+
+ struct _HTStructured {
+ const HTStructuredClass *isa;
+ HTParentAnchor *node_anchor;
+ HText *text;
+
+ HTStream *target; /* Output stream */
+ HTStreamClass targetClass; /* Output routines */
+
+ HTChildAnchor *CurrentA; /* current HTML_A anchor */
+ int CurrentANum; /* current HTML_A number */
+ char *base_href; /* current HTML_BASE href */
+ char *map_address; /* current HTML_MAP address */
+
+ HTChunk title; /* Grow by 128 */
+ HTChunk object; /* Grow by 128 */
+ BOOL object_started;
+ BOOL object_declare;
+ BOOL object_shapes;
+ BOOL object_ismap;
+ char *object_usemap;
+ char *object_id;
+ char *object_title;
+ char *object_data;
+ char *object_type;
+ char *object_classid;
+ char *object_codebase;
+ char *object_codetype;
+ char *object_name;
+ int objects_mixed_open, objects_figged_open;
+ HTChunk option; /* Grow by 128 */
+ BOOL first_option; /* First OPTION in SELECT? */
+ char *LastOptionValue;
+ BOOL LastOptionChecked;
+ BOOL select_disabled;
+ HTChunk textarea; /* Grow by 128 */
+ char *textarea_name;
+ int textarea_name_cs;
+ char *textarea_accept_cs;
+ int textarea_cols;
+ int textarea_rows;
+ int textarea_disabled;
+ int textarea_readonly;
+ char *textarea_id;
+ HTChunk math; /* Grow by 128 */
+ HTChunk style_block; /* Grow by 128 */
+ HTChunk script; /* Grow by 128 */
+
+ /*
+ * Used for nested lists. - FM
+ */
+ int List_Nesting_Level; /* counter for list nesting level */
+ int OL_Counter[12]; /* counter for ordered lists */
+ char OL_Type[12]; /* types for ordered lists */
+ int Last_OL_Count; /* last count in ordered lists */
+ char Last_OL_Type; /* last type in ordered lists */
+
+ int Division_Level;
+ short DivisionAlignments[MAX_NESTING];
+ int Underline_Level;
+ int Quote_Level;
+
+ BOOL UsePlainSpace;
+ BOOL HiddenValue;
+ int lastraw;
+
+ const char *comment_start; /* for literate programming */
+ const char *comment_end;
+
+ HTTag *current_tag;
+ BOOL style_change;
+ HTStyle *new_style;
+ HTStyle *old_style;
+ int current_default_alignment;
+ BOOL in_word; /* Have just had a non-white char */
+ stack_element stack[MAX_NESTING];
+ stack_element *sp; /* Style stack pointer */
+ BOOL stack_overrun; /* Was MAX_NESTING exceeded? */
+ int skip_stack; /* flag to skip next style stack operation */
+
+ /*
+ * Track if we are in an anchor, paragraph, address, base, etc.
+ */
+ BOOL inA;
+ BOOL inAPPLET;
+ BOOL inAPPLETwithP;
+ BOOL inBadHREF;
+ BOOL inBadHTML;
+ BOOL inBASE;
+ BOOL inBoldA;
+ BOOL inBoldH;
+ BOOL inCAPTION;
+ BOOL inCREDIT;
+ BOOL inFIG;
+ BOOL inFIGwithP;
+ BOOL inFONT;
+ BOOL inFORM;
+ BOOL inLABEL;
+ BOOL inP;
+ BOOL inPRE;
+ BOOL inSELECT;
+ BOOL inTABLE;
+ BOOL inTEXTAREA;
+ BOOL inUnderline;
+
+ BOOL needBoldH;
+
+ char *xinclude; /* if no include strin address passed */
+ /*
+ * UCI and UCLYhndl give the UCInfo and charset registered for the HTML
+ * parser in the node_anchor's UCStages structure. It indicates what is
+ * fed to the HTML parser as the stream of character data (not necessarily
+ * tags and attributes). It should currently always be set to be the same
+ * as UCI and UCLhndl for the HTEXT stage in the node_anchor's UCStages
+ * structure, since the HTML parser sends its input character data to the
+ * output without further charset translation.
+ */
+ LYUCcharset *UCI;
+ int UCLYhndl;
+ /*
+ * inUCI and inUCLYhndl indicate the UCInfo and charset which the HTML
+ * parser treats at the input charset. It is normally set to the UCI and
+ * UCLhndl for the SGML parser in the node_anchor's UCStages structure
+ * (which may be a dummy, based on the MIME parser's UCI and UCLhndl in
+ * that structure, when we are handling a local file or non-http(s)
+ * gateway). It could be changed temporarily by the HTML parser, for
+ * conversions of attribute strings, but should be reset once done. - FM
+ */
+ LYUCcharset *inUCI;
+ int inUCLYhndl;
+ /*
+ * outUCI and outUCLYhndl indicate the UCInfo and charset which the HTML
+ * parser treats as the output charset. It is normally set to its own UCI
+ * and UCLhndl. It could be changed for conversions of attribute strings,
+ * but should be reset once done. - FM
+ */
+ LYUCcharset *outUCI;
+ int outUCLYhndl;
+ /*
+ * T holds the transformation rules for conversions of strings between the
+ * input and output charsets by the HTML parser. - FM
+ */
+ UCTransParams T;
+
+ int tag_charset; /* charset for attribute values etc. */
+ };
+
+ extern HTStyle *LYstyles(int style_number);
+ extern BOOL LYBadHTML(HTStructured * me);
+ extern void LYShowBadHTML(const char *s);
+
+/*
+ * Semi-Private functions. - FM
+ */
+ extern void HTML_put_character(HTStructured * me, int c);
+ extern void HTML_put_string(HTStructured * me, const char *s);
+ extern void HTML_write(HTStructured * me, const char *s, int l);
+ extern int HTML_put_entity(HTStructured * me, int entity_number);
+ extern void actually_set_style(HTStructured * me);
+
+/* Style buffering avoids dummy paragraph begin/ends.
+*/
+#define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
+#endif /* Lynx_HTML_Handler */
+
+ extern void strtolower(char *i);
+
+/* P U B L I C
+*/
+
+/*
+ * HTConverter to present HTML
+ */
+ extern HTStream *HTMLToPlain(HTPresentation *pres,
+ HTParentAnchor *anchor,
+ HTStream *sink);
+
+ extern HTStream *HTMLParsedPresent(HTPresentation *pres,
+ HTParentAnchor *anchor,
+ HTStream *sink);
+
+ extern HTStream *HTMLToC(HTPresentation *pres,
+ HTParentAnchor *anchor,
+ HTStream *sink);
+
+ extern HTStream *HTMLPresent(HTPresentation *pres,
+ HTParentAnchor *anchor,
+ HTStream *sink);
+
+ extern HTStream *XHTMLPresent(HTPresentation *pres,
+ HTParentAnchor *anchor,
+ HTStream *sink);
+
+ extern HTStructured *HTML_new(HTParentAnchor *anchor,
+ HTFormat format_out,
+ HTStream *target);
+
+/*
+ * Record error message as a hypertext object.
+ *
+ * The error message should be marked as an error so that it can be reloaded
+ * later. This implementation just throws up an error message and leaves the
+ * document unloaded.
+ *
+ * On entry,
+ * sink is a stream to the output device if any
+ * number is the HTTP error number
+ * message is the human readable message.
+ * On exit,
+ * a return code like HT_LOADED if object exists else 60; 0
+ */
+ extern int HTLoadError(HTStream *sink,
+ int number,
+ const char *message);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* HTML_H */