diff options
Diffstat (limited to '')
-rw-r--r-- | WWW/Library/Implementation/UCDefs.h | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/WWW/Library/Implementation/UCDefs.h b/WWW/Library/Implementation/UCDefs.h new file mode 100644 index 0000000..4eb7c56 --- /dev/null +++ b/WWW/Library/Implementation/UCDefs.h @@ -0,0 +1,106 @@ +/* + * $LynxId: UCDefs.h,v 1.18 2021/06/29 00:21:51 tom Exp $ + * + * Definitions for Unicode character-translations + */ + +#ifndef UCDEFS_H +#define UCDEFS_H + +#ifndef HTUTILS_H +#include <HTUtils.h> +#endif + +typedef struct _LYUCcharset { + int UChndl; /* -1 for "old" charsets, >= 0 for chartrans tables */ + + const char *MIMEname; + int enc; + int codepage; /* if positive, an IBM OS/2 specific number; + if negative, flag for no table translation */ + + /* parameters below are not used by chartrans mechanism, */ + /* they describe some relationships against built-in Latin1 charset... */ + int repertoire; /* unused */ + int codepoints; /* subset/superset of Latin1 ? */ + int cpranges; /* unused, obsolete by LYlowest_eightbit; + "which ranges have valid displayable chars + (including nbsp and shy)" */ + int like8859; /* currently used for nbsp and shy only + (but UCT_R_8859SPECL assumed for any UCT_R_8BIT...); + "for which ranges is it like 8859-1" */ +} LYUCcharset; + +typedef enum { + UCT_ENC_7BIT, + UCT_ENC_8BIT, + UCT_ENC_8859, /* no displayable chars in 0x80-0x9F */ + UCT_ENC_8BIT_C0, /* 8-bit + some chars in C0 control area */ + UCT_ENC_MAYBE2022, + UCT_ENC_CJK, + UCT_ENC_16BIT, + UCT_ENC_UTF8 +} eUCT_ENC; + +#define UCT_REP_SUBSETOF_LAT1 0x01 +#define UCT_REP_SUPERSETOF_LAT1 0x02 +#define UCT_REP_IS_LAT1 UCT_REP_SUBSETOF_LAT1 | UCT_REP_SUPERSETOF_LAT1 +/* + * Assume everything we deal with is included in the UCS2 repertoire, + * so a flag for _REP_SUBSETOF_UCS2 would be redundant. + */ + +/* + * More general description how the code points relate to 8859-1 and UCS: + */ +#define UCT_CP_SUBSETOF_LAT1 0x01 /* implies UCT_CP_SUBSETOF_UCS2 */ +#define UCT_CP_SUPERSETOF_LAT1 0x02 +#define UCT_CP_SUBSETOF_UCS2 0x04 + +#define UCT_CP_IS_LAT1 UCT_CP_SUBSETOF_LAT1 | UCT_CP_SUPERSETOF_LAT1 + +/* + * More specific bitflags for practically important code point ranges: + */ +#define UCT_R_LOWCTRL 0x08 /* 0x00-0x1F, for completeness */ +#define UCT_R_7BITINV 0x10 /* invariant???, displayable 7bit chars */ +#define UCT_R_7BITNAT 0x20 /* displayable 7bit, national??? */ +#define UCT_R_HIGHCTRL 0x40 /* chars in 0x80-0x9F range */ +#define UCT_R_8859SPECL 0x80 /* special chars in 8859-x sets: nbsp and shy */ +#define UCT_R_HIGH8BIT 0x100 /* rest of 0xA0-0xFF range */ + +#define UCT_R_ASCII UCT_R_7BITINV | UCT_R_7BITNAT /* displayable US-ASCII */ +#define UCT_R_LAT1 UCT_R_ASCII | UCT_R_8859SPECL | UCT_R_HIGH8BIT +#define UCT_R_8BIT UCT_R_LAT1 | UCT_R_HIGHCTRL /* full 8bit range */ + +/* + * For the following some comments are in HTAnchor.c. + */ +typedef enum { + UCT_STAGE_MIME, + UCT_STAGE_PARSER, /* What the parser (SGML.c) gets to see */ + UCT_STAGE_STRUCTURED, /* What the structured stream (HTML) gets fed */ + UCT_STAGE_HTEXT, /* What gets fed to the HText_* functions */ + UCT_STAGEMAX +} eUCT_STAGE; + +typedef enum { + UCT_SETBY_NONE, + UCT_SETBY_DEFAULT, + UCT_SETBY_LINK, /* set by A or LINK CHARSET= hint */ + UCT_SETBY_STRUCTURED, /* structured stream stage (HTML.c) */ + UCT_SETBY_PARSER, /* set by SGML parser or similar */ + UCT_SETBY_MIME /* set explicitly by MIME charset parameter */ +} eUCT_SETBY; + +typedef struct _UCStageInfo { + int lock; /* by what it has been set */ + int LYhndl; + LYUCcharset C; +} UCStageInfo; + +typedef struct _UCAnchorInfo { + struct _UCStageInfo s[UCT_STAGEMAX]; +} UCAnchorInfo; + +#endif /* UCDEFS_H */ |