1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
|
/*
* $LynxId: HTML.h,v 1.36 2022/07/22 20:22:13 tom Exp $
*
* HTML to rich text converter for libwww
*
* THE HTML TO RTF OBJECT CONVERTER
*
* This interprets the HTML semantics.
*/
#ifndef HTML_H
#define HTML_H
#ifndef HTUTILS_H
#include <HTUtils.h>
#endif /* HTUTILS_H */
#include <UCDefs.h>
#include <UCAux.h>
#include <HTAnchor.h>
#include <HTMLDTD.h>
#ifdef __cplusplus
extern "C" {
#endif
/* #define ATTR_CS_IN (me->T.output_utf8 ? me->UCLYhndl : 0) */
#define ATTR_CS_IN me->tag_charset
#define TRANSLATE_AND_UNESCAPE_ENTITIES(s, p, h) \
LYUCTranslateHTMLString(s, ATTR_CS_IN, current_char_set, YES, p, h, st_HTML)
#define TRANSLATE_AND_UNESCAPE_ENTITIES5(s,cs_from,cs_to,p,h) \
LYUCTranslateHTMLString(s, cs_from, cs_to, YES, p, h, st_HTML)
#define TRANSLATE_AND_UNESCAPE_ENTITIES6(s,cs_from,cs_to,spcls,p,h) \
LYUCTranslateHTMLString(s, cs_from, cs_to, spcls, p, h, st_HTML)
#define TRANSLATE_HTML(s,p,h) \
LYUCFullyTranslateString(s, me->UCLYhndl, current_char_set, NO, YES, p, h, NO, st_HTML)
#define TRANSLATE_HTML5(s,cs_from,cs_to,p,h) \
LYUCFullyTranslateString(s, cs_from, cs_to, NO, YES, p, h, NO, st_HTML)
#define TRANSLATE_HTML7(s,cs_from,cs_to,spcls,p,h,Back) \
LYUCFullyTranslateString(s, cs_from, cs_to, NO, spcls, p, h, Back, st_HTML)
/*
* Strings from attributes which should be converted to some kind of "standard"
* representation (character encoding), was Latin-1, esp. URLs (incl.
* #fragments) and HTML NAME and ID stuff.
*/
#define TRANSLATE_AND_UNESCAPE_TO_STD(s) \
LYUCTranslateHTMLString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_URL)
#define UNESCAPE_FIELDNAME_TO_STD(s) \
LYUCTranslateHTMLString(s, ATTR_CS_IN, ATTR_CS_IN, NO, NO, YES, st_HTML)
extern const HTStructuredClass HTMLPresentation;
#ifdef Lynx_HTML_Handler
/*
* This section is semi-private to HTML.c and it's helper modules. - FM
* --------------------------------------------------------------------
*/
typedef struct _stack_element {
HTStyle *style;
int tag_number;
} stack_element;
/* HTML Object
* -----------
*/
#define MAX_NESTING 800 /* Should be checked by parser */
struct _HTStructured {
const HTStructuredClass *isa;
HTParentAnchor *node_anchor;
HText *text;
HTStream *target; /* Output stream */
HTStreamClass targetClass; /* Output routines */
HTChildAnchor *CurrentA; /* current HTML_A anchor */
int CurrentANum; /* current HTML_A number */
char *base_href; /* current HTML_BASE href */
char *map_address; /* current HTML_MAP address */
HTChunk title; /* Grow by 128 */
HTChunk object; /* Grow by 128 */
BOOL object_started;
BOOL object_declare;
BOOL object_shapes;
BOOL object_ismap;
char *object_usemap;
char *object_id;
char *object_title;
char *object_data;
char *object_type;
char *object_classid;
char *object_codebase;
char *object_codetype;
char *object_name;
int objects_mixed_open, objects_figged_open;
HTChunk option; /* Grow by 128 */
BOOL first_option; /* First OPTION in SELECT? */
char *LastOptionValue;
BOOL LastOptionChecked;
BOOL select_disabled;
HTChunk textarea; /* Grow by 128 */
char *textarea_name;
int textarea_name_cs;
char *textarea_accept_cs;
int textarea_cols;
int textarea_rows;
int textarea_disabled;
int textarea_readonly;
char *textarea_id;
HTChunk math; /* Grow by 128 */
HTChunk style_block; /* Grow by 128 */
HTChunk script; /* Grow by 128 */
/*
* Used for nested lists. - FM
*/
int List_Nesting_Level; /* counter for list nesting level */
int OL_Counter[12]; /* counter for ordered lists */
char OL_Type[12]; /* types for ordered lists */
int Last_OL_Count; /* last count in ordered lists */
char Last_OL_Type; /* last type in ordered lists */
int Division_Level;
short DivisionAlignments[MAX_NESTING];
int Underline_Level;
int Quote_Level;
BOOL UsePlainSpace;
BOOL HiddenValue;
int lastraw;
const char *comment_start; /* for literate programming */
const char *comment_end;
HTTag *current_tag;
BOOL style_change;
HTStyle *new_style;
HTStyle *old_style;
int current_default_alignment;
BOOL in_word; /* Have just had a non-white char */
stack_element stack[MAX_NESTING];
stack_element *sp; /* Style stack pointer */
BOOL stack_overrun; /* Was MAX_NESTING exceeded? */
int skip_stack; /* flag to skip next style stack operation */
/*
* Track if we are in an anchor, paragraph, address, base, etc.
*/
BOOL inA;
BOOL inAPPLET;
BOOL inAPPLETwithP;
BOOL inBadHREF;
BOOL inBadHTML;
BOOL inBASE;
BOOL inBoldA;
BOOL inBoldH;
BOOL inCAPTION;
BOOL inCREDIT;
BOOL inFIG;
BOOL inFIGwithP;
BOOL inFONT;
BOOL inFORM;
BOOL inLABEL;
BOOL inP;
BOOL inPRE;
BOOL inSELECT;
BOOL inTABLE;
BOOL inTEXTAREA;
BOOL inUnderline;
BOOL needBoldH;
char *xinclude; /* if no include strin address passed */
/*
* UCI and UCLYhndl give the UCInfo and charset registered for the HTML
* parser in the node_anchor's UCStages structure. It indicates what is
* fed to the HTML parser as the stream of character data (not necessarily
* tags and attributes). It should currently always be set to be the same
* as UCI and UCLhndl for the HTEXT stage in the node_anchor's UCStages
* structure, since the HTML parser sends its input character data to the
* output without further charset translation.
*/
LYUCcharset *UCI;
int UCLYhndl;
/*
* inUCI and inUCLYhndl indicate the UCInfo and charset which the HTML
* parser treats at the input charset. It is normally set to the UCI and
* UCLhndl for the SGML parser in the node_anchor's UCStages structure
* (which may be a dummy, based on the MIME parser's UCI and UCLhndl in
* that structure, when we are handling a local file or non-http(s)
* gateway). It could be changed temporarily by the HTML parser, for
* conversions of attribute strings, but should be reset once done. - FM
*/
LYUCcharset *inUCI;
int inUCLYhndl;
/*
* outUCI and outUCLYhndl indicate the UCInfo and charset which the HTML
* parser treats as the output charset. It is normally set to its own UCI
* and UCLhndl. It could be changed for conversions of attribute strings,
* but should be reset once done. - FM
*/
LYUCcharset *outUCI;
int outUCLYhndl;
/*
* T holds the transformation rules for conversions of strings between the
* input and output charsets by the HTML parser. - FM
*/
UCTransParams T;
int tag_charset; /* charset for attribute values etc. */
};
extern HTStyle *LYstyles(int style_number);
extern BOOL LYBadHTML(HTStructured * me);
extern void LYShowBadHTML(const char *s);
/*
* Semi-Private functions. - FM
*/
extern void HTML_put_character(HTStructured * me, int c);
extern void HTML_put_string(HTStructured * me, const char *s);
extern void HTML_write(HTStructured * me, const char *s, int l);
extern int HTML_put_entity(HTStructured * me, int entity_number);
extern void actually_set_style(HTStructured * me);
/* Style buffering avoids dummy paragraph begin/ends.
*/
#define UPDATE_STYLE if (me->style_change) { actually_set_style(me); }
#endif /* Lynx_HTML_Handler */
extern void strtolower(char *i);
/* P U B L I C
*/
/*
* HTConverter to present HTML
*/
extern HTStream *HTMLToPlain(HTPresentation *pres,
HTParentAnchor *anchor,
HTStream *sink);
extern HTStream *HTMLParsedPresent(HTPresentation *pres,
HTParentAnchor *anchor,
HTStream *sink);
extern HTStream *HTMLToC(HTPresentation *pres,
HTParentAnchor *anchor,
HTStream *sink);
extern HTStream *HTMLPresent(HTPresentation *pres,
HTParentAnchor *anchor,
HTStream *sink);
extern HTStream *XHTMLPresent(HTPresentation *pres,
HTParentAnchor *anchor,
HTStream *sink);
extern HTStructured *HTML_new(HTParentAnchor *anchor,
HTFormat format_out,
HTStream *target);
/*
* Record error message as a hypertext object.
*
* The error message should be marked as an error so that it can be reloaded
* later. This implementation just throws up an error message and leaves the
* document unloaded.
*
* On entry,
* sink is a stream to the output device if any
* number is the HTTP error number
* message is the human readable message.
* On exit,
* a return code like HT_LOADED if object exists else 60; 0
*/
extern int HTLoadError(HTStream *sink,
int number,
const char *message);
#ifdef __cplusplus
}
#endif
#endif /* HTML_H */
|