summaryrefslogtreecommitdiffstats
path: root/WWW/Library/Implementation/HTFormat.h
diff options
context:
space:
mode:
Diffstat (limited to 'WWW/Library/Implementation/HTFormat.h')
-rw-r--r--WWW/Library/Implementation/HTFormat.h588
1 files changed, 588 insertions, 0 deletions
diff --git a/WWW/Library/Implementation/HTFormat.h b/WWW/Library/Implementation/HTFormat.h
new file mode 100644
index 0000000..835daef
--- /dev/null
+++ b/WWW/Library/Implementation/HTFormat.h
@@ -0,0 +1,588 @@
+/*
+ * $LynxId: HTFormat.h,v 1.42 2022/04/01 07:54:14 tom Exp $
+ *
+ * HTFormat: The format manager in the WWW Library
+ * MANAGE DIFFERENT DOCUMENT FORMATS
+ *
+ * Here we describe the functions of the HTFormat module which handles conversion between
+ * different data representations. (In MIME parlance, a representation is known as a
+ * content-type. In WWW the term "format" is often used as it is shorter).
+ *
+ * This module is implemented by HTFormat.c. This hypertext document is used to generate
+ * the HTFormat.h include file. Part of the WWW library.
+ */
+#ifndef HTFORMAT_H
+#define HTFORMAT_H
+
+#include <HTStream.h>
+#include <HTAtom.h>
+#include <HTList.h>
+#include <HTAnchor.h>
+
+#ifdef USE_SOURCE_CACHE
+#include <HTChunk.h>
+#endif
+
+#ifdef USE_BZLIB
+#include <bzlib.h>
+#endif
+
+#ifdef USE_ZLIB
+#include <zlib.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+/*
+
+ These macros (which used to be constants) define some basic internally
+ referenced representations. The www/xxx ones are of course not MIME
+ standard.
+
+ www/source is an output format which leaves the input untouched. It is
+ useful for diagnostics, and for users who want to see the original, whatever
+ it is.
+
+ */
+/* Internal ones */
+#define STR_SOURCE "www/source"
+ extern HTAtom *WWW_SOURCE; /* calculated once, heavy used */
+
+/*
+
+ www/present represents the user's perception of the document. If you
+ convert to www/present, you present the material to the user.
+
+ */
+#define STR_PRESENT "www/present"
+#define WWW_PRESENT HTAtom_for(STR_PRESENT) /* The user's perception */
+
+#define WWW_DEBUG HTAtom_for("www/debug")
+/*
+
+ WWW_DEBUG represents the user's perception of debug information, for example
+ sent as a HTML document in a HTTP redirection message.
+
+ */
+
+/*
+
+ The message/rfc822 format means a MIME message or a plain text message with
+ no MIME header. This is what is returned by an HTTP server.
+
+ */
+#define WWW_MIME HTAtom_for("www/mime") /* A MIME message */
+
+/*
+ For parsing only the header. - kw
+ */
+#define WWW_MIME_HEAD HTAtom_for("message/x-rfc822-head")
+
+/*
+
+ www/print is like www/present except it represents a printed copy.
+
+ */
+#define WWW_PRINT HTAtom_for("www/print") /* A printed copy */
+
+/*
+
+ www/unknown is a really unknown type. Some default action is appropriate.
+
+ */
+#define WWW_UNKNOWN HTAtom_for("www/unknown")
+
+#ifdef DIRED_SUPPORT
+/*
+ www/dired signals directory edit mode.
+*/
+#define WWW_DIRED HTAtom_for("www/dired")
+#endif
+
+/*
+ * Miscellaneous internal MIME types.
+ */
+#define STR_DOWNLOAD "www/download"
+#define WWW_DOWNLOAD HTAtom_for(STR_DOWNLOAD)
+
+#define STR_DUMP "www/dump"
+#define WWW_DUMP HTAtom_for(STR_DUMP)
+
+/*
+
+ These are regular MIME types. HTML is assumed to be added by the W3 code.
+ application/octet-stream was mistakenly application/binary in earlier libwww
+ versions (pre 2.11).
+
+ */
+#define STR_BINARY "application/octet-stream"
+#define STR_PLAINTEXT "text/plain"
+#define STR_HTML "text/html"
+
+#define WWW_BINARY HTAtom_for(STR_BINARY)
+#define WWW_PLAINTEXT HTAtom_for(STR_PLAINTEXT)
+#define WWW_HTML HTAtom_for(STR_HTML)
+
+#define WWW_POSTSCRIPT HTAtom_for("application/postscript")
+#define WWW_RICHTEXT HTAtom_for("application/rtf")
+#define WWW_AUDIO HTAtom_for("audio/basic")
+
+ typedef HTAtom *HTEncoding;
+
+/*
+ * The following are values for the MIME types:
+ */
+#define WWW_ENC_7BIT HTAtom_for("7bit")
+#define WWW_ENC_8BIT HTAtom_for("8bit")
+#define WWW_ENC_BINARY HTAtom_for("binary")
+
+/*
+ * We also add
+ */
+#define WWW_ENC_COMPRESS HTAtom_for("compress")
+
+/*
+ * Does a string designate a real encoding, or is it just
+ * a "dummy" as for example 7bit, 8bit, and binary?
+ */
+#define IsUnityEncStr(senc) \
+ ((senc)==NULL || *(senc)=='\0' || !strcmp(senc,"identity") ||\
+ !strcmp(senc,"8bit") || !strcmp(senc,"binary") || !strcmp(senc,"7bit"))
+
+#define IsUnityEnc(enc) \
+ ((enc)==NULL || (enc)==HTAtom_for("identity") ||\
+ (enc)==WWW_ENC_8BIT || (enc)==WWW_ENC_BINARY || (enc)==WWW_ENC_7BIT)
+
+/*
+
+The HTPresentation and HTConverter types
+
+ This HTPresentation structure represents a possible conversion algorithm
+ from one format to another. It includes a pointer to a conversion routine.
+ The conversion routine returns a stream to which data should be fed. See
+ also HTStreamStack which scans the list of registered converters and calls
+ one. See the initialisation module for a list of conversion routines.
+
+ */
+ typedef struct _HTPresentation HTPresentation;
+
+ typedef HTStream *HTConverter (HTPresentation *pres,
+ HTParentAnchor *anchor,
+ HTStream *sink);
+
+ struct _HTPresentation {
+ HTAtom *rep; /* representation name atomized */
+ HTAtom *rep_out; /* resulting representation */
+ HTConverter *converter; /* routine to gen the stream stack */
+ char *command; /* MIME-format command string */
+ char *testcommand; /* MIME-format test string */
+ float quality; /* Between 0 (bad) and 1 (good) */
+ float secs;
+ float secs_per_byte;
+ off_t maxbytes;
+ BOOL get_accept; /* list in "Accept:" for GET */
+ int accept_opt; /* matches against LYAcceptMedia */
+ };
+
+/*
+
+ The list of presentations is kept by this module. It is also scanned by
+ modules which want to know the set of formats supported. for example.
+
+ */
+ extern HTList *HTPresentations;
+
+/*
+
+ The default presentation is used when no other is appropriate
+
+ */
+ extern HTPresentation *default_presentation;
+
+/*
+ * Options used for "Content-Type" string
+ */
+ typedef enum {
+ contentBINARY = 0
+ ,contentTEXT
+ ,contentHTML
+ } ContentType;
+
+/*
+ * Options used for "Accept:" string
+ */
+ typedef enum {
+ /* make the components powers of two so we can add them */
+ mediaINT = 1 /* internal types predefined in HTInit.c */
+ ,mediaEXT = 2 /* external types predefined in HTInit.c */
+ ,mediaCFG = 4 /* types, e.g., viewers, from lynx.cfg */
+ ,mediaUSR = 8 /* user's mime-types, etc. */
+ ,mediaSYS = 16 /* system's mime-types, etc. */
+ /* these are useful flavors for the options menu */
+ ,mediaOpt1 = mediaINT
+ ,mediaOpt2 = mediaINT + mediaCFG
+ ,mediaOpt3 = mediaINT + mediaCFG + mediaUSR
+ ,mediaOpt4 = mediaINT + mediaCFG + mediaUSR + mediaSYS
+ /* this is the flavor from pre-2.8.6 */
+ ,mediaALL = mediaINT + mediaEXT + mediaCFG + mediaUSR + mediaSYS
+ } AcceptMedia;
+
+/*
+ * Options used for "Accept-Encoding:" string
+ */
+ typedef enum {
+ encodingNONE = 0
+ ,encodingGZIP = 1
+ ,encodingDEFLATE = 2
+ ,encodingCOMPRESS = 4
+ ,encodingBZIP2 = 8
+ ,encodingBROTLI = 16
+ ,encodingALL = (encodingGZIP
+ + encodingDEFLATE
+ + encodingCOMPRESS
+ + encodingBZIP2
+ + encodingBROTLI)
+ } AcceptEncoding;
+
+/*
+
+HTSetPresentation: Register a system command to present a format
+
+ ON ENTRY,
+
+ rep is the MIME - style format name
+
+ command is the MAILCAP - style command template
+
+ testcommand is the MAILCAP - style testcommand template
+
+ quality A degradation faction 0..1.0
+
+ secs A limit on the time user will wait (0.0 for infinity)
+ secs_per_byte
+
+ maxbytes A limit on the length acceptable as input (0 infinite)
+
+ media Used in filtering presentation types for "Accept:"
+
+ */
+ extern void HTSetPresentation(const char *representation,
+ const char *command,
+ const char *testcommand,
+ double quality,
+ double secs,
+ double secs_per_byte,
+ long int maxbytes,
+ AcceptMedia media
+ );
+
+/*
+
+HTSetConversion: Register a conversion routine
+
+ ON ENTRY,
+
+ rep_in is the content-type input
+
+ rep_out is the resulting content-type
+
+ converter is the routine to make the stream to do it
+
+ */
+
+ extern void HTSetConversion(const char *rep_in,
+ const char *rep_out,
+ HTConverter *converter,
+ double quality,
+ double secs,
+ double secs_per_byte,
+ long int maxbytes,
+ AcceptMedia media
+ );
+
+/*
+
+HTStreamStack: Create a stack of streams
+
+ This is the routine which actually sets up the conversion. It currently
+ checks only for direct conversions, but multi-stage conversions are forseen.
+ It takes a stream into which the output should be sent in the final format,
+ builds the conversion stack, and returns a stream into which the data in the
+ input format should be fed. The anchor is passed because hypertxet objects
+ load information into the anchor object which represents them.
+
+ */
+ extern HTStream *HTStreamStack(HTFormat format_in,
+ HTFormat format_out,
+ HTStream *stream_out,
+ HTParentAnchor *anchor);
+
+/*
+HTReorderPresentation: put presentation near head of list
+
+ Look up a presentation (exact match only) and, if found, reorder it to the
+ start of the HTPresentations list. - kw
+ */
+
+ extern void HTReorderPresentation(HTFormat format_in,
+ HTFormat format_out);
+
+/*
+ * Setup 'get_accept' flag to denote presentations that are not redundant,
+ * and will be listed in "Accept:" header.
+ */
+ extern void HTFilterPresentations(void);
+
+/*
+
+HTStackValue: Find the cost of a filter stack
+
+ Must return the cost of the same stack which HTStreamStack would set up.
+
+ ON ENTRY,
+
+ format_in The format of the data to be converted
+
+ format_out The format required
+
+ initial_value The intrinsic "value" of the data before conversion on a scale
+ from 0 to 1
+
+ length The number of bytes expected in the input format
+
+ */
+ extern float HTStackValue(HTFormat format_in,
+ HTFormat rep_out,
+ double initial_value,
+ long int length);
+
+#define NO_VALUE_FOUND -1e20 /* returned if none found */
+
+/* Display the page while transfer in progress
+ * -------------------------------------------
+ *
+ * Repaint the page only when necessary.
+ * This is a traverse call for HText_pageDispaly() - it works!.
+ *
+ */
+ extern void HTDisplayPartial(void);
+
+ extern void HTFinishDisplayPartial(void);
+
+/*
+
+HTCopy: Copy a socket to a stream
+
+ This is used by the protocol engines to send data down a stream, typically
+ one which has been generated by HTStreamStack.
+
+ */
+ extern int HTCopy(HTParentAnchor *anchor,
+ int file_number,
+ void *handle,
+ HTStream *sink);
+
+/*
+
+HTFileCopy: Copy a file to a stream
+
+ This is used by the protocol engines to send data down a stream, typically
+ one which has been generated by HTStreamStack. It is currently called by
+ HTParseFile
+
+ */
+ extern int HTFileCopy(FILE *fp,
+ HTStream *sink);
+
+#ifdef USE_SOURCE_CACHE
+/*
+
+HTMemCopy: Copy a memory chunk to a stream
+
+ This is used by the protocol engines to send data down a stream, typically
+ one which has been generated by HTStreamStack. It is currently called by
+ HTParseMem
+
+ */
+ extern int HTMemCopy(HTChunk *chunk,
+ HTStream *sink);
+#endif
+
+/*
+
+HTCopyNoCR: Copy a socket to a stream, stripping CR characters.
+
+ It is slower than HTCopy .
+
+ */
+
+ extern void HTCopyNoCR(HTParentAnchor *anchor,
+ int file_number,
+ HTStream *sink);
+
+/*
+
+Clear input buffer and set file number
+
+ This routine and the one below provide simple character input from sockets.
+ (They are left over from the older architecture and may not be used very
+ much.) The existence of a common routine and buffer saves memory space in
+ small implementations.
+
+ */
+ extern void HTInitInput(int file_number);
+
+/*
+
+Get next character from buffer
+
+ */
+ extern int interrupted_in_htgetcharacter;
+ extern int HTGetCharacter(void);
+
+/*
+
+HTParseSocket: Parse a socket given its format
+
+ This routine is called by protocol modules to load an object. uses
+ HTStreamStack and the copy routines above. Returns HT_LOADED if successful,
+ <0 if not.
+
+ */
+ extern int HTParseSocket(HTFormat format_in,
+ HTFormat format_out,
+ HTParentAnchor *anchor,
+ int file_number,
+ HTStream *sink);
+
+/*
+
+HTParseFile: Parse a File through a file pointer
+
+ This routine is called by protocols modules to load an object. uses
+ HTStreamStack and HTFileCopy. Returns HT_LOADED if successful, can also
+ return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
+
+ */
+ extern int HTParseFile(HTFormat format_in,
+ HTFormat format_out,
+ HTParentAnchor *anchor,
+ FILE *fp,
+ HTStream *sink);
+
+#ifdef USE_SOURCE_CACHE
+/*
+
+HTParseMem: Parse a document in memory
+
+ This routine is called by protocols modules to load an object. uses
+ HTStreamStack and HTMemCopy. Returns HT_LOADED if successful, can also
+ return <0 for failure.
+
+ */
+ extern int HTParseMem(HTFormat format_in,
+ HTFormat format_out,
+ HTParentAnchor *anchor,
+ HTChunk *chunk,
+ HTStream *sink);
+#endif
+
+#ifdef USE_ZLIB
+/*
+HTParseGzFile: Parse a gzip'ed File through a file pointer
+
+ This routine is called by protocols modules to load an object. uses
+ HTStreamStack and HTGzFileCopy. Returns HT_LOADED if successful, can also
+ return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
+ */
+ extern int HTParseGzFile(HTFormat format_in,
+ HTFormat format_out,
+ HTParentAnchor *anchor,
+ gzFile gzfp,
+ HTStream *sink);
+
+/*
+HTParseZzFile: Parse a deflate'd File through a file pointer
+
+ This routine is called by protocols modules to load an object. uses
+ HTStreamStack and HTZzFileCopy. Returns HT_LOADED if successful, can also
+ return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
+ */
+ extern int HTParseZzFile(HTFormat format_in,
+ HTFormat format_out,
+ HTParentAnchor *anchor,
+ FILE *zzfp,
+ HTStream *sink);
+
+#endif /* USE_ZLIB */
+
+#ifdef USE_BZLIB
+/*
+HTParseBzFile: Parse a bzip2'ed File through a file pointer
+
+ This routine is called by protocols modules to load an object. uses
+ HTStreamStack and HTBzFileCopy. Returns HT_LOADED if successful, can also
+ return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
+ */
+ extern int HTParseBzFile(HTFormat format_in,
+ HTFormat format_out,
+ HTParentAnchor *anchor,
+ BZFILE * bzfp,
+ HTStream *sink);
+
+#endif /* USE_BZLIB */
+
+#ifdef USE_BROTLI
+/*
+HTParseBrFile: Parse a brotli'ed File through a file pointer
+
+ This routine is called by protocols modules to load an object. uses
+ HTStreamStack and HTBrFileCopy. Returns HT_LOADED if successful, can also
+ return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
+ */
+ extern int HTParseBrFile(HTFormat format_in,
+ HTFormat format_out,
+ HTParentAnchor *anchor,
+ FILE *brfp,
+ HTStream *sink);
+
+#endif /* USE_BROTLI */
+
+/*
+
+HTNetToText: Convert Net ASCII to local representation
+
+ This is a filter stream suitable for taking text from a socket and passing
+ it into a stream which expects text in the local C representation. It does
+ ASCII and newline conversion. As usual, pass its output stream to it when
+ creating it.
+
+ */
+ extern HTStream *HTNetToText(HTStream *sink);
+
+/*
+
+HTFormatInit: Set up default presentations and conversions
+
+ These are defined in HTInit.c or HTSInit.c if these have been replaced. If
+ you don't call this routine, and you don't define any presentations, then
+ this routine will automatically be called the first time a conversion is
+ needed. However, if you explicitly add some conversions (eg using
+ HTLoadRules) then you may want also to explicitly call this to get the
+ defaults as well.
+
+ */
+ extern void HTFormatInit(void);
+
+/*
+
+Epilogue
+
+ */
+ extern BOOL HTOutputSource; /* Flag: shortcut parser */
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* HTFORMAT_H */