diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-21 20:56:19 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-21 20:56:19 +0000 |
commit | 0b6210cd37b68b94252cb798598b12974a20e1c1 (patch) | |
tree | e371686554a877842d95aa94f100bee552ff2a8e /llhttp/src | |
parent | Initial commit. (diff) | |
download | node-undici-0b6210cd37b68b94252cb798598b12974a20e1c1.tar.xz node-undici-0b6210cd37b68b94252cb798598b12974a20e1c1.zip |
Adding upstream version 5.28.2+dfsg1+~cs23.11.12.3.upstream/5.28.2+dfsg1+_cs23.11.12.3upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'llhttp/src')
-rw-r--r-- | llhttp/src/common.gypi | 46 | ||||
-rw-r--r-- | llhttp/src/llhttp.gyp | 22 | ||||
-rw-r--r-- | llhttp/src/llhttp.ts | 7 | ||||
-rw-r--r-- | llhttp/src/llhttp/c-headers.ts | 106 | ||||
-rw-r--r-- | llhttp/src/llhttp/constants.ts | 540 | ||||
-rw-r--r-- | llhttp/src/llhttp/http.ts | 1299 | ||||
-rw-r--r-- | llhttp/src/llhttp/url.ts | 220 | ||||
-rw-r--r-- | llhttp/src/llhttp/utils.ts | 27 | ||||
-rw-r--r-- | llhttp/src/native/api.c | 510 | ||||
-rw-r--r-- | llhttp/src/native/api.h | 355 | ||||
-rw-r--r-- | llhttp/src/native/http.c | 170 |
11 files changed, 3302 insertions, 0 deletions
diff --git a/llhttp/src/common.gypi b/llhttp/src/common.gypi new file mode 100644 index 0000000..ef7549f --- /dev/null +++ b/llhttp/src/common.gypi @@ -0,0 +1,46 @@ +{ + 'target_defaults': { + 'default_configuration': 'Debug', + 'configurations': { + # TODO: hoist these out and put them somewhere common, because + # RuntimeLibrary MUST MATCH across the entire project + 'Debug': { + 'defines': [ 'DEBUG', '_DEBUG' ], + 'cflags': [ '-Wall', '-Wextra', '-O0', '-g', '-ftrapv' ], + 'msvs_settings': { + 'VCCLCompilerTool': { + 'RuntimeLibrary': 1, # static debug + }, + }, + }, + 'Release': { + 'defines': [ 'NDEBUG' ], + 'cflags': [ '-Wall', '-Wextra', '-O3' ], + 'msvs_settings': { + 'VCCLCompilerTool': { + 'RuntimeLibrary': 0, # static release + }, + }, + } + }, + 'msvs_settings': { + 'VCCLCompilerTool': { + # Compile as C++. llhttp.c is actually C99, but C++ is + # close enough in this case. + 'CompileAs': 2, + }, + 'VCLibrarianTool': { + }, + 'VCLinkerTool': { + 'GenerateDebugInformation': 'true', + }, + }, + 'conditions': [ + ['OS == "win"', { + 'defines': [ + 'WIN32' + ], + }] + ], + }, +} diff --git a/llhttp/src/llhttp.gyp b/llhttp/src/llhttp.gyp new file mode 100644 index 0000000..c7b8800 --- /dev/null +++ b/llhttp/src/llhttp.gyp @@ -0,0 +1,22 @@ +{ + 'variables': { + 'llhttp_sources': [ + 'src/llhttp.c', + 'src/api.c', + 'src/http.c', + ] + }, + 'targets': [ + { + 'target_name': 'llhttp', + 'type': 'static_library', + 'include_dirs': [ '.', 'include' ], + 'direct_dependent_settings': { + 'include_dirs': [ 'include' ], + }, + 'sources': [ + '<@(llhttp_sources)', + ], + }, + ] +} diff --git a/llhttp/src/llhttp.ts b/llhttp/src/llhttp.ts new file mode 100644 index 0000000..ba36b01 --- /dev/null +++ b/llhttp/src/llhttp.ts @@ -0,0 +1,7 @@ +import * as constants from './llhttp/constants'; + +export { constants }; + +export { HTTP } from './llhttp/http'; +export { URL } from './llhttp/url'; +export { CHeaders } from './llhttp/c-headers'; diff --git a/llhttp/src/llhttp/c-headers.ts b/llhttp/src/llhttp/c-headers.ts new file mode 100644 index 0000000..fad66de --- /dev/null +++ b/llhttp/src/llhttp/c-headers.ts @@ -0,0 +1,106 @@ +import * as constants from './constants'; +import { enumToMap, IEnumMap } from './utils'; + +type Encoding = 'none' | 'hex'; + +export class CHeaders { + public build(): string { + let res = ''; + + res += '#ifndef LLLLHTTP_C_HEADERS_\n'; + res += '#define LLLLHTTP_C_HEADERS_\n'; + + res += '#ifdef __cplusplus\n'; + res += 'extern "C" {\n'; + res += '#endif\n'; + + res += '\n'; + + const errorMap = enumToMap(constants.ERROR); + const methodMap = enumToMap(constants.METHODS); + const httpMethodMap = enumToMap(constants.METHODS, constants.METHODS_HTTP, [ + constants.METHODS.PRI, + ]); + const rtspMethodMap = enumToMap(constants.METHODS, constants.METHODS_RTSP); + const statusMap = enumToMap(constants.STATUSES, constants.STATUSES_HTTP); + + res += this.buildEnum('llhttp_errno', 'HPE', errorMap); + res += '\n'; + res += this.buildEnum('llhttp_flags', 'F', enumToMap(constants.FLAGS), + 'hex'); + res += '\n'; + res += this.buildEnum('llhttp_lenient_flags', 'LENIENT', + enumToMap(constants.LENIENT_FLAGS), 'hex'); + res += '\n'; + res += this.buildEnum('llhttp_type', 'HTTP', + enumToMap(constants.TYPE)); + res += '\n'; + res += this.buildEnum('llhttp_finish', 'HTTP_FINISH', + enumToMap(constants.FINISH)); + res += '\n'; + res += this.buildEnum('llhttp_method', 'HTTP', methodMap); + res += '\n'; + res += this.buildEnum('llhttp_status', 'HTTP_STATUS', statusMap); + + res += '\n'; + + res += this.buildMap('HTTP_ERRNO', errorMap); + res += '\n'; + res += this.buildMap('HTTP_METHOD', httpMethodMap); + res += '\n'; + res += this.buildMap('RTSP_METHOD', rtspMethodMap); + res += '\n'; + res += this.buildMap('HTTP_ALL_METHOD', methodMap); + res += '\n'; + res += this.buildMap('HTTP_STATUS', statusMap); + + res += '\n'; + + res += '#ifdef __cplusplus\n'; + res += '} /* extern "C" */\n'; + res += '#endif\n'; + res += '#endif /* LLLLHTTP_C_HEADERS_ */\n'; + + return res; + } + + private buildEnum(name: string, prefix: string, map: IEnumMap, + encoding: Encoding = 'none'): string { + let res = ''; + + res += `enum ${name} {\n`; + const keys = Object.keys(map); + const keysLength = keys.length; + for (let i = 0; i < keysLength; i++) { + const key = keys[i]; + const isLast = i === keysLength - 1; + + let value: number | string = map[key]; + + if (encoding === 'hex') { + value = `0x${value.toString(16)}`; + } + + res += ` ${prefix}_${key.replace(/-/g, '')} = ${value}`; + if (!isLast) { + res += ',\n'; + } + } + res += '\n};\n'; + res += `typedef enum ${name} ${name}_t;\n`; + + return res; + } + + private buildMap(name: string, map: IEnumMap): string { + let res = ''; + + res += `#define ${name}_MAP(XX) \\\n`; + for (const [key, value] of Object.entries(map)) { + res += ` XX(${value!}, ${key.replace(/-/g, '')}, ${key}) \\\n`; + } + res += '\n'; + + return res; + } +} diff --git a/llhttp/src/llhttp/constants.ts b/llhttp/src/llhttp/constants.ts new file mode 100644 index 0000000..00fc523 --- /dev/null +++ b/llhttp/src/llhttp/constants.ts @@ -0,0 +1,540 @@ +import { enumToMap, IEnumMap } from './utils'; + +// C headers + +export enum ERROR { + OK = 0, + INTERNAL = 1, + STRICT = 2, + CR_EXPECTED = 25, + LF_EXPECTED = 3, + UNEXPECTED_CONTENT_LENGTH = 4, + UNEXPECTED_SPACE = 30, + CLOSED_CONNECTION = 5, + INVALID_METHOD = 6, + INVALID_URL = 7, + INVALID_CONSTANT = 8, + INVALID_VERSION = 9, + INVALID_HEADER_TOKEN = 10, + INVALID_CONTENT_LENGTH = 11, + INVALID_CHUNK_SIZE = 12, + INVALID_STATUS = 13, + INVALID_EOF_STATE = 14, + INVALID_TRANSFER_ENCODING = 15, + + CB_MESSAGE_BEGIN = 16, + CB_HEADERS_COMPLETE = 17, + CB_MESSAGE_COMPLETE = 18, + CB_CHUNK_HEADER = 19, + CB_CHUNK_COMPLETE = 20, + + PAUSED = 21, + PAUSED_UPGRADE = 22, + PAUSED_H2_UPGRADE = 23, + + USER = 24, + + CB_URL_COMPLETE = 26, + CB_STATUS_COMPLETE = 27, + CB_METHOD_COMPLETE = 32, + CB_VERSION_COMPLETE = 33, + CB_HEADER_FIELD_COMPLETE = 28, + CB_HEADER_VALUE_COMPLETE = 29, + CB_CHUNK_EXTENSION_NAME_COMPLETE = 34, + CB_CHUNK_EXTENSION_VALUE_COMPLETE = 35, + CB_RESET = 31, +} + +export enum TYPE { + BOTH = 0, // default + REQUEST = 1, + RESPONSE = 2, +} + +export enum FLAGS { + CONNECTION_KEEP_ALIVE = 1 << 0, + CONNECTION_CLOSE = 1 << 1, + CONNECTION_UPGRADE = 1 << 2, + CHUNKED = 1 << 3, + UPGRADE = 1 << 4, + CONTENT_LENGTH = 1 << 5, + SKIPBODY = 1 << 6, + TRAILING = 1 << 7, + // 1 << 8 is unused + TRANSFER_ENCODING = 1 << 9, +} + +export enum LENIENT_FLAGS { + HEADERS = 1 << 0, + CHUNKED_LENGTH = 1 << 1, + KEEP_ALIVE = 1 << 2, + TRANSFER_ENCODING = 1 << 3, + VERSION = 1 << 4, + DATA_AFTER_CLOSE = 1 << 5, + OPTIONAL_LF_AFTER_CR = 1 << 6, + OPTIONAL_CRLF_AFTER_CHUNK = 1 << 7, + OPTIONAL_CR_BEFORE_LF = 1 << 8, + SPACES_AFTER_CHUNK_SIZE = 1 << 9, +} + +export enum METHODS { + DELETE = 0, + GET = 1, + HEAD = 2, + POST = 3, + PUT = 4, + /* pathological */ + CONNECT = 5, + OPTIONS = 6, + TRACE = 7, + /* WebDAV */ + COPY = 8, + LOCK = 9, + MKCOL = 10, + MOVE = 11, + PROPFIND = 12, + PROPPATCH = 13, + SEARCH = 14, + UNLOCK = 15, + BIND = 16, + REBIND = 17, + UNBIND = 18, + ACL = 19, + /* subversion */ + REPORT = 20, + MKACTIVITY = 21, + CHECKOUT = 22, + MERGE = 23, + /* upnp */ + 'M-SEARCH' = 24, + NOTIFY = 25, + SUBSCRIBE = 26, + UNSUBSCRIBE = 27, + /* RFC-5789 */ + PATCH = 28, + PURGE = 29, + /* CalDAV */ + MKCALENDAR = 30, + /* RFC-2068, section 19.6.1.2 */ + LINK = 31, + UNLINK = 32, + /* icecast */ + SOURCE = 33, + /* RFC-7540, section 11.6 */ + PRI = 34, + /* RFC-2326 RTSP */ + DESCRIBE = 35, + ANNOUNCE = 36, + SETUP = 37, + PLAY = 38, + PAUSE = 39, + TEARDOWN = 40, + GET_PARAMETER = 41, + SET_PARAMETER = 42, + REDIRECT = 43, + RECORD = 44, + /* RAOP */ + FLUSH = 45, +} + +export const METHODS_HTTP = [ + METHODS.DELETE, + METHODS.GET, + METHODS.HEAD, + METHODS.POST, + METHODS.PUT, + METHODS.CONNECT, + METHODS.OPTIONS, + METHODS.TRACE, + METHODS.COPY, + METHODS.LOCK, + METHODS.MKCOL, + METHODS.MOVE, + METHODS.PROPFIND, + METHODS.PROPPATCH, + METHODS.SEARCH, + METHODS.UNLOCK, + METHODS.BIND, + METHODS.REBIND, + METHODS.UNBIND, + METHODS.ACL, + METHODS.REPORT, + METHODS.MKACTIVITY, + METHODS.CHECKOUT, + METHODS.MERGE, + METHODS['M-SEARCH'], + METHODS.NOTIFY, + METHODS.SUBSCRIBE, + METHODS.UNSUBSCRIBE, + METHODS.PATCH, + METHODS.PURGE, + METHODS.MKCALENDAR, + METHODS.LINK, + METHODS.UNLINK, + METHODS.PRI, + + // TODO(indutny): should we allow it with HTTP? + METHODS.SOURCE, +]; + +export const METHODS_ICE = [ + METHODS.SOURCE, +]; + +export const METHODS_RTSP = [ + METHODS.OPTIONS, + METHODS.DESCRIBE, + METHODS.ANNOUNCE, + METHODS.SETUP, + METHODS.PLAY, + METHODS.PAUSE, + METHODS.TEARDOWN, + METHODS.GET_PARAMETER, + METHODS.SET_PARAMETER, + METHODS.REDIRECT, + METHODS.RECORD, + METHODS.FLUSH, + + // For AirPlay + METHODS.GET, + METHODS.POST, +]; + +export const METHOD_MAP = enumToMap(METHODS); +export const H_METHOD_MAP: IEnumMap = {}; + +for (const key of Object.keys(METHOD_MAP)) { + if (/^H/.test(key)) { + H_METHOD_MAP[key] = METHOD_MAP[key]; + } +} + +export enum STATUSES { + CONTINUE = 100, + SWITCHING_PROTOCOLS = 101, + PROCESSING = 102, + EARLY_HINTS = 103, + RESPONSE_IS_STALE = 110, // Unofficial + REVALIDATION_FAILED = 111, // Unofficial + DISCONNECTED_OPERATION = 112, // Unofficial + HEURISTIC_EXPIRATION = 113, // Unofficial + MISCELLANEOUS_WARNING = 199, // Unofficial + OK = 200, + CREATED = 201, + ACCEPTED = 202, + NON_AUTHORITATIVE_INFORMATION = 203, + NO_CONTENT = 204, + RESET_CONTENT = 205, + PARTIAL_CONTENT = 206, + MULTI_STATUS = 207, + ALREADY_REPORTED = 208, + TRANSFORMATION_APPLIED = 214, // Unofficial + IM_USED = 226, + MISCELLANEOUS_PERSISTENT_WARNING = 299, // Unofficial + MULTIPLE_CHOICES = 300, + MOVED_PERMANENTLY = 301, + FOUND = 302, + SEE_OTHER = 303, + NOT_MODIFIED = 304, + USE_PROXY = 305, + SWITCH_PROXY = 306, // No longer used + TEMPORARY_REDIRECT = 307, + PERMANENT_REDIRECT = 308, + BAD_REQUEST = 400, + UNAUTHORIZED = 401, + PAYMENT_REQUIRED = 402, + FORBIDDEN = 403, + NOT_FOUND = 404, + METHOD_NOT_ALLOWED = 405, + NOT_ACCEPTABLE = 406, + PROXY_AUTHENTICATION_REQUIRED = 407, + REQUEST_TIMEOUT = 408, + CONFLICT = 409, + GONE = 410, + LENGTH_REQUIRED = 411, + PRECONDITION_FAILED = 412, + PAYLOAD_TOO_LARGE = 413, + URI_TOO_LONG = 414, + UNSUPPORTED_MEDIA_TYPE = 415, + RANGE_NOT_SATISFIABLE = 416, + EXPECTATION_FAILED = 417, + IM_A_TEAPOT = 418, + PAGE_EXPIRED = 419, // Unofficial + ENHANCE_YOUR_CALM = 420, // Unofficial + MISDIRECTED_REQUEST = 421, + UNPROCESSABLE_ENTITY = 422, + LOCKED = 423, + FAILED_DEPENDENCY = 424, + TOO_EARLY = 425, + UPGRADE_REQUIRED = 426, + PRECONDITION_REQUIRED = 428, + TOO_MANY_REQUESTS = 429, + REQUEST_HEADER_FIELDS_TOO_LARGE_UNOFFICIAL = 430, // Unofficial + REQUEST_HEADER_FIELDS_TOO_LARGE = 431, + LOGIN_TIMEOUT = 440, // Unofficial + NO_RESPONSE = 444, // Unofficial + RETRY_WITH = 449, // Unofficial + BLOCKED_BY_PARENTAL_CONTROL = 450, // Unofficial + UNAVAILABLE_FOR_LEGAL_REASONS = 451, + CLIENT_CLOSED_LOAD_BALANCED_REQUEST = 460, // Unofficial + INVALID_X_FORWARDED_FOR = 463, // Unofficial + REQUEST_HEADER_TOO_LARGE = 494, // Unofficial + SSL_CERTIFICATE_ERROR = 495, // Unofficial + SSL_CERTIFICATE_REQUIRED = 496, // Unofficial + HTTP_REQUEST_SENT_TO_HTTPS_PORT = 497, // Unofficial + INVALID_TOKEN = 498, // Unofficial + CLIENT_CLOSED_REQUEST = 499, // Unofficial + INTERNAL_SERVER_ERROR = 500, + NOT_IMPLEMENTED = 501, + BAD_GATEWAY = 502, + SERVICE_UNAVAILABLE = 503, + GATEWAY_TIMEOUT = 504, + HTTP_VERSION_NOT_SUPPORTED = 505, + VARIANT_ALSO_NEGOTIATES = 506, + INSUFFICIENT_STORAGE = 507, + LOOP_DETECTED = 508, + BANDWIDTH_LIMIT_EXCEEDED = 509, + NOT_EXTENDED = 510, + NETWORK_AUTHENTICATION_REQUIRED = 511, + WEB_SERVER_UNKNOWN_ERROR = 520, // Unofficial + WEB_SERVER_IS_DOWN = 521, // Unofficial + CONNECTION_TIMEOUT = 522, // Unofficial + ORIGIN_IS_UNREACHABLE = 523, // Unofficial + TIMEOUT_OCCURED = 524, // Unofficial + SSL_HANDSHAKE_FAILED = 525, // Unofficial + INVALID_SSL_CERTIFICATE = 526, // Unofficial + RAILGUN_ERROR = 527, // Unofficial + SITE_IS_OVERLOADED = 529, // Unofficial + SITE_IS_FROZEN = 530, // Unofficial + IDENTITY_PROVIDER_AUTHENTICATION_ERROR = 561, // Unofficial + NETWORK_READ_TIMEOUT = 598, // Unofficial + NETWORK_CONNECT_TIMEOUT = 599, // Unofficial +} + +export const STATUSES_HTTP = [ + STATUSES.CONTINUE, + STATUSES.SWITCHING_PROTOCOLS, + STATUSES.PROCESSING, + STATUSES.EARLY_HINTS, + STATUSES.RESPONSE_IS_STALE, + STATUSES.REVALIDATION_FAILED, + STATUSES.DISCONNECTED_OPERATION, + STATUSES.HEURISTIC_EXPIRATION, + STATUSES.MISCELLANEOUS_WARNING, + STATUSES.OK, + STATUSES.CREATED, + STATUSES.ACCEPTED, + STATUSES.NON_AUTHORITATIVE_INFORMATION, + STATUSES.NO_CONTENT, + STATUSES.RESET_CONTENT, + STATUSES.PARTIAL_CONTENT, + STATUSES.MULTI_STATUS, + STATUSES.ALREADY_REPORTED, + STATUSES.TRANSFORMATION_APPLIED, + STATUSES.IM_USED, + STATUSES.MISCELLANEOUS_PERSISTENT_WARNING, + STATUSES.MULTIPLE_CHOICES, + STATUSES.MOVED_PERMANENTLY, + STATUSES.FOUND, + STATUSES.SEE_OTHER, + STATUSES.NOT_MODIFIED, + STATUSES.USE_PROXY, + STATUSES.SWITCH_PROXY, + STATUSES.TEMPORARY_REDIRECT, + STATUSES.PERMANENT_REDIRECT, + STATUSES.BAD_REQUEST, + STATUSES.UNAUTHORIZED, + STATUSES.PAYMENT_REQUIRED, + STATUSES.FORBIDDEN, + STATUSES.NOT_FOUND, + STATUSES.METHOD_NOT_ALLOWED, + STATUSES.NOT_ACCEPTABLE, + STATUSES.PROXY_AUTHENTICATION_REQUIRED, + STATUSES.REQUEST_TIMEOUT, + STATUSES.CONFLICT, + STATUSES.GONE, + STATUSES.LENGTH_REQUIRED, + STATUSES.PRECONDITION_FAILED, + STATUSES.PAYLOAD_TOO_LARGE, + STATUSES.URI_TOO_LONG, + STATUSES.UNSUPPORTED_MEDIA_TYPE, + STATUSES.RANGE_NOT_SATISFIABLE, + STATUSES.EXPECTATION_FAILED, + STATUSES.IM_A_TEAPOT, + STATUSES.PAGE_EXPIRED, + STATUSES.ENHANCE_YOUR_CALM, + STATUSES.MISDIRECTED_REQUEST, + STATUSES.UNPROCESSABLE_ENTITY, + STATUSES.LOCKED, + STATUSES.FAILED_DEPENDENCY, + STATUSES.TOO_EARLY, + STATUSES.UPGRADE_REQUIRED, + STATUSES.PRECONDITION_REQUIRED, + STATUSES.TOO_MANY_REQUESTS, + STATUSES.REQUEST_HEADER_FIELDS_TOO_LARGE_UNOFFICIAL, + STATUSES.REQUEST_HEADER_FIELDS_TOO_LARGE, + STATUSES.LOGIN_TIMEOUT, + STATUSES.NO_RESPONSE, + STATUSES.RETRY_WITH, + STATUSES.BLOCKED_BY_PARENTAL_CONTROL, + STATUSES.UNAVAILABLE_FOR_LEGAL_REASONS, + STATUSES.CLIENT_CLOSED_LOAD_BALANCED_REQUEST, + STATUSES.INVALID_X_FORWARDED_FOR, + STATUSES.REQUEST_HEADER_TOO_LARGE, + STATUSES.SSL_CERTIFICATE_ERROR, + STATUSES.SSL_CERTIFICATE_REQUIRED, + STATUSES.HTTP_REQUEST_SENT_TO_HTTPS_PORT, + STATUSES.INVALID_TOKEN, + STATUSES.CLIENT_CLOSED_REQUEST, + STATUSES.INTERNAL_SERVER_ERROR, + STATUSES.NOT_IMPLEMENTED, + STATUSES.BAD_GATEWAY, + STATUSES.SERVICE_UNAVAILABLE, + STATUSES.GATEWAY_TIMEOUT, + STATUSES.HTTP_VERSION_NOT_SUPPORTED, + STATUSES.VARIANT_ALSO_NEGOTIATES, + STATUSES.INSUFFICIENT_STORAGE, + STATUSES.LOOP_DETECTED, + STATUSES.BANDWIDTH_LIMIT_EXCEEDED, + STATUSES.NOT_EXTENDED, + STATUSES.NETWORK_AUTHENTICATION_REQUIRED, + STATUSES.WEB_SERVER_UNKNOWN_ERROR, + STATUSES.WEB_SERVER_IS_DOWN, + STATUSES.CONNECTION_TIMEOUT, + STATUSES.ORIGIN_IS_UNREACHABLE, + STATUSES.TIMEOUT_OCCURED, + STATUSES.SSL_HANDSHAKE_FAILED, + STATUSES.INVALID_SSL_CERTIFICATE, + STATUSES.RAILGUN_ERROR, + STATUSES.SITE_IS_OVERLOADED, + STATUSES.SITE_IS_FROZEN, + STATUSES.IDENTITY_PROVIDER_AUTHENTICATION_ERROR, + STATUSES.NETWORK_READ_TIMEOUT, + STATUSES.NETWORK_CONNECT_TIMEOUT, +]; + +export enum FINISH { + SAFE = 0, + SAFE_WITH_CB = 1, + UNSAFE = 2, +} + +// Internal + +export type CharList = Array<string | number>; + +export const ALPHA: CharList = []; + +for (let i = 'A'.charCodeAt(0); i <= 'Z'.charCodeAt(0); i++) { + // Upper case + ALPHA.push(String.fromCharCode(i)); + + // Lower case + ALPHA.push(String.fromCharCode(i + 0x20)); +} + +export const NUM_MAP = { + 0: 0, 1: 1, 2: 2, 3: 3, 4: 4, + 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, +}; + +export const HEX_MAP = { + 0: 0, 1: 1, 2: 2, 3: 3, 4: 4, + 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, + A: 0XA, B: 0XB, C: 0XC, D: 0XD, E: 0XE, F: 0XF, + a: 0xa, b: 0xb, c: 0xc, d: 0xd, e: 0xe, f: 0xf, +}; + +export const NUM: CharList = [ + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', +]; + +export const ALPHANUM: CharList = ALPHA.concat(NUM); +export const MARK: CharList = [ '-', '_', '.', '!', '~', '*', '\'', '(', ')' ]; +export const USERINFO_CHARS: CharList = ALPHANUM + .concat(MARK) + .concat([ '%', ';', ':', '&', '=', '+', '$', ',' ]); + +// TODO(indutny): use RFC +export const URL_CHAR: CharList = ([ + '!', '"', '$', '%', '&', '\'', + '(', ')', '*', '+', ',', '-', '.', '/', + ':', ';', '<', '=', '>', + '@', '[', '\\', ']', '^', '_', + '`', + '{', '|', '}', '~', +] as CharList).concat(ALPHANUM); + +export const HEX: CharList = NUM.concat( + [ 'a', 'b', 'c', 'd', 'e', 'f', 'A', 'B', 'C', 'D', 'E', 'F' ]); + +/* Tokens as defined by rfc 2616. Also lowercases them. + * token = 1*<any CHAR except CTLs or separators> + * separators = "(" | ")" | "<" | ">" | "@" + * | "," | ";" | ":" | "\" | <"> + * | "/" | "[" | "]" | "?" | "=" + * | "{" | "}" | SP | HT + */ +export const TOKEN: CharList = ([ + '!', '#', '$', '%', '&', '\'', + '*', '+', '-', '.', + '^', '_', '`', + '|', '~', +] as CharList).concat(ALPHANUM); + +/* + * Verify that a char is a valid visible (printable) US-ASCII + * character or %x80-FF + */ +export const HEADER_CHARS: CharList = [ '\t' ]; +for (let i = 32; i <= 255; i++) { + if (i !== 127) { + HEADER_CHARS.push(i); + } +} + +// ',' = \x44 +export const CONNECTION_TOKEN_CHARS: CharList = + HEADER_CHARS.filter((c: string | number) => c !== 44); + +export const QUOTED_STRING: CharList = [ '\t', ' ' ]; +for (let i = 0x21; i <= 0xff; i++) { + if (i !== 0x22 && i !== 0x5c) { // All characters in ASCII except \ and " + QUOTED_STRING.push(i); + } +} + +export const HTAB_SP_VCHAR_OBS_TEXT: CharList = [ '\t', ' ' ]; + +// VCHAR: https://tools.ietf.org/html/rfc5234#appendix-B.1 +for (let i = 0x21; i <= 0x7E; i++) { + HTAB_SP_VCHAR_OBS_TEXT.push(i); +} +// OBS_TEXT: https://datatracker.ietf.org/doc/html/rfc9110#name-collected-abnf +for (let i = 0x80; i <= 0xff; i++) { + HTAB_SP_VCHAR_OBS_TEXT.push(i); +} + +export const MAJOR = NUM_MAP; +export const MINOR = MAJOR; + +export enum HEADER_STATE { + GENERAL = 0, + CONNECTION = 1, + CONTENT_LENGTH = 2, + TRANSFER_ENCODING = 3, + UPGRADE = 4, + + CONNECTION_KEEP_ALIVE = 5, + CONNECTION_CLOSE = 6, + CONNECTION_UPGRADE = 7, + TRANSFER_ENCODING_CHUNKED = 8, +} + +export const SPECIAL_HEADERS = { + 'connection': HEADER_STATE.CONNECTION, + 'content-length': HEADER_STATE.CONTENT_LENGTH, + 'proxy-connection': HEADER_STATE.CONNECTION, + 'transfer-encoding': HEADER_STATE.TRANSFER_ENCODING, + 'upgrade': HEADER_STATE.UPGRADE, +}; diff --git a/llhttp/src/llhttp/http.ts b/llhttp/src/llhttp/http.ts new file mode 100644 index 0000000..6a201ff --- /dev/null +++ b/llhttp/src/llhttp/http.ts @@ -0,0 +1,1299 @@ +import * as assert from 'assert'; +import { LLParse, source } from 'llparse'; + +import Match = source.node.Match; +import Node = source.node.Node; + +import { + CharList, + CONNECTION_TOKEN_CHARS, ERROR, FINISH, FLAGS, H_METHOD_MAP, HEADER_CHARS, + HEADER_STATE, HEX_MAP, HTAB_SP_VCHAR_OBS_TEXT, + LENIENT_FLAGS, + MAJOR, METHOD_MAP, METHODS, METHODS_HTTP, METHODS_ICE, METHODS_RTSP, + MINOR, NUM_MAP, QUOTED_STRING, SPECIAL_HEADERS, + TOKEN, TYPE, +} from './constants'; +import { URL } from './url'; + +type MaybeNode = string | Match | Node; + +const NODES: ReadonlyArray<string> = [ + 'start', + 'after_start', + 'start_req', + 'after_start_req', + 'start_res', + 'start_req_or_res', + + 'req_or_res_method', + + 'res_http_major', + 'res_http_dot', + 'res_http_minor', + 'res_http_end', + 'res_after_version', + 'res_status_code_digit_1', + 'res_status_code_digit_2', + 'res_status_code_digit_3', + 'res_status_code_otherwise', + 'res_status_start', + 'res_status', + 'res_line_almost_done', + + 'req_first_space_before_url', + 'req_spaces_before_url', + 'req_http_start', + 'req_http_version', + 'req_http_major', + 'req_http_dot', + 'req_http_minor', + 'req_http_end', + 'req_http_complete', + 'req_http_complete_crlf', + + 'req_pri_upgrade', + + 'headers_start', + 'header_field_start', + 'header_field', + 'header_field_colon', + 'header_field_colon_discard_ws', + 'header_field_general', + 'header_field_general_otherwise', + 'header_value_discard_ws', + 'header_value_discard_ws_almost_done', + 'header_value_discard_lws', + 'header_value_start', + 'header_value', + 'header_value_otherwise', + 'header_value_lenient', + 'header_value_lenient_failed', + 'header_value_lws', + 'header_value_te_chunked', + 'header_value_te_chunked_last', + 'header_value_te_token', + 'header_value_te_token_ows', + 'header_value_content_length_once', + 'header_value_content_length', + 'header_value_content_length_ws', + 'header_value_connection', + 'header_value_connection_ws', + 'header_value_connection_token', + 'header_value_almost_done', + + 'headers_almost_done', + 'headers_done', + + 'chunk_size_start', + 'chunk_size_digit', + 'chunk_size', + 'chunk_size_otherwise', + 'chunk_size_almost_done', + 'chunk_size_almost_done_lf', + 'chunk_extensions', + 'chunk_extension_name', + 'chunk_extension_value', + 'chunk_extension_quoted_value', + 'chunk_extension_quoted_value_quoted_pair', + 'chunk_extension_quoted_value_done', + 'chunk_data', + 'chunk_data_almost_done', + 'chunk_complete', + 'body_identity', + 'body_identity_eof', + + 'message_done', + + 'eof', + 'cleanup', + 'closed', + 'restart', +]; + +interface ISpanMap { + readonly status: source.Span; + readonly method: source.Span; + readonly version: source.Span; + readonly headerField: source.Span; + readonly headerValue: source.Span; + readonly chunkExtensionName: source.Span; + readonly chunkExtensionValue: source.Span; + readonly body: source.Span; +} + +interface ICallbackMap { + readonly onMessageBegin: source.code.Code; + readonly onUrlComplete: source.code.Code; + readonly onMethodComplete: source.code.Code; + readonly onVersionComplete: source.code.Code; + readonly onStatusComplete: source.code.Code; + readonly beforeHeadersComplete: source.code.Code; + readonly onHeaderFieldComplete: source.code.Code; + readonly onHeaderValueComplete: source.code.Code; + readonly onHeadersComplete: source.code.Code; + readonly afterHeadersComplete: source.code.Code; + readonly onChunkHeader: source.code.Code; + readonly onChunkExtensionName: source.code.Code; + readonly onChunkExtensionValue: source.code.Code; + readonly onChunkComplete: source.code.Code; + readonly onMessageComplete: source.code.Code; + readonly afterMessageComplete: source.code.Code; + readonly onReset: source.code.Code; +} + +interface IMulTargets { + readonly overflow: string | Node; + readonly success: string | Node; +} + +interface IMulOptions { + readonly base: number; + readonly max?: number; + readonly signed: boolean; +} + +interface IIsEqualTargets { + readonly equal: string | Node; + readonly notEqual: string | Node; +} + +export interface IHTTPResult { + readonly entry: Node; +} + +export class HTTP { + private readonly url: URL; + private readonly TOKEN: CharList; + private readonly span: ISpanMap; + private readonly callback: ICallbackMap; + private readonly nodes: Map<string, Match> = new Map(); + + constructor(private readonly llparse: LLParse) { + const p = llparse; + + this.url = new URL(p); + this.TOKEN = TOKEN; + + this.span = { + body: p.span(p.code.span('llhttp__on_body')), + chunkExtensionName: p.span(p.code.span('llhttp__on_chunk_extension_name')), + chunkExtensionValue: p.span(p.code.span('llhttp__on_chunk_extension_value')), + headerField: p.span(p.code.span('llhttp__on_header_field')), + headerValue: p.span(p.code.span('llhttp__on_header_value')), + method: p.span(p.code.span('llhttp__on_method')), + status: p.span(p.code.span('llhttp__on_status')), + version: p.span(p.code.span('llhttp__on_version')), + }; + + /* tslint:disable:object-literal-sort-keys */ + this.callback = { + // User callbacks + onUrlComplete: p.code.match('llhttp__on_url_complete'), + onStatusComplete: p.code.match('llhttp__on_status_complete'), + onMethodComplete: p.code.match('llhttp__on_method_complete'), + onVersionComplete: p.code.match('llhttp__on_version_complete'), + onHeaderFieldComplete: p.code.match('llhttp__on_header_field_complete'), + onHeaderValueComplete: p.code.match('llhttp__on_header_value_complete'), + onHeadersComplete: p.code.match('llhttp__on_headers_complete'), + onMessageBegin: p.code.match('llhttp__on_message_begin'), + onMessageComplete: p.code.match('llhttp__on_message_complete'), + onChunkHeader: p.code.match('llhttp__on_chunk_header'), + onChunkExtensionName: p.code.match('llhttp__on_chunk_extension_name_complete'), + onChunkExtensionValue: p.code.match('llhttp__on_chunk_extension_value_complete'), + onChunkComplete: p.code.match('llhttp__on_chunk_complete'), + onReset: p.code.match('llhttp__on_reset'), + + // Internal callbacks `src/http.c` + beforeHeadersComplete: + p.code.match('llhttp__before_headers_complete'), + afterHeadersComplete: p.code.match('llhttp__after_headers_complete'), + afterMessageComplete: p.code.match('llhttp__after_message_complete'), + }; + /* tslint:enable:object-literal-sort-keys */ + + for (const name of NODES) { + this.nodes.set(name, p.node(name) as Match); + } + } + + public build(): IHTTPResult { + const p = this.llparse; + + p.property('i64', 'content_length'); + p.property('i8', 'type'); + p.property('i8', 'method'); + p.property('i8', 'http_major'); + p.property('i8', 'http_minor'); + p.property('i8', 'header_state'); + p.property('i16', 'lenient_flags'); + p.property('i8', 'upgrade'); + p.property('i8', 'finish'); + p.property('i16', 'flags'); + p.property('i16', 'status_code'); + p.property('i8', 'initial_message_completed'); + + // Verify defaults + assert.strictEqual(FINISH.SAFE, 0); + assert.strictEqual(TYPE.BOTH, 0); + + // Shared settings (to be used in C wrapper) + p.property('ptr', 'settings'); + + this.buildLine(); + this.buildHeaders(); + + return { + entry: this.node('start'), + }; + } + + private buildLine(): void { + const p = this.llparse; + const span = this.span; + const n = (name: string): Match => this.node<Match>(name); + + const url = this.url.build(); + + const switchType = this.load('type', { + [TYPE.REQUEST]: n('start_req'), + [TYPE.RESPONSE]: n('start_res'), + }, n('start_req_or_res')); + + n('start') + .match([ '\r', '\n' ], n('start')) + .otherwise( + this.load('initial_message_completed', { + 1: this.invokePausable('on_reset', ERROR.CB_RESET, n('after_start')), + }, n('after_start')), + ); + + n('after_start').otherwise( + this.update( + 'finish', + FINISH.UNSAFE, + this.invokePausable('on_message_begin', ERROR.CB_MESSAGE_BEGIN, switchType), + ), + ); + + n('start_req_or_res') + .peek('H', this.span.method.start(n('req_or_res_method'))) + .otherwise(this.update('type', TYPE.REQUEST, 'start_req')); + + n('req_or_res_method') + .select(H_METHOD_MAP, this.store('method', + this.update('type', TYPE.REQUEST, this.span.method.end( + this.invokePausable('on_method_complete', ERROR.CB_METHOD_COMPLETE, n('req_first_space_before_url')), + )), + )) + .match('HTTP/', this.span.method.end(this.update('type', TYPE.RESPONSE, + this.span.version.start(n('res_http_major'))))) + .otherwise(p.error(ERROR.INVALID_CONSTANT, 'Invalid word encountered')); + + const checkVersion = (destination: string): Node => { + const node = n(destination); + const errorNode = this.span.version.end(p.error(ERROR.INVALID_VERSION, 'Invalid HTTP version')); + + return this.testLenientFlags(LENIENT_FLAGS.VERSION, + { + 1: node, + }, + this.load('http_major', { + 0: this.load('http_minor', { + 9: node, + }, errorNode), + 1: this.load('http_minor', { + 0: node, + 1: node, + }, errorNode), + 2: this.load('http_minor', { + 0: node, + }, errorNode), + }, errorNode), + ); + }; + + const checkIfAllowLFWithoutCR = (success: Node, failure: Node) => { + return this.testLenientFlags(LENIENT_FLAGS.OPTIONAL_CR_BEFORE_LF, { 1: success }, failure); + }; + + // Response + n('start_res') + .match('HTTP/', span.version.start(n('res_http_major'))) + .otherwise(p.error(ERROR.INVALID_CONSTANT, 'Expected HTTP/')); + + n('res_http_major') + .select(MAJOR, this.store('http_major', 'res_http_dot')) + .otherwise(this.span.version.end(p.error(ERROR.INVALID_VERSION, 'Invalid major version'))); + + n('res_http_dot') + .match('.', n('res_http_minor')) + .otherwise(this.span.version.end(p.error(ERROR.INVALID_VERSION, 'Expected dot'))); + + n('res_http_minor') + .select(MINOR, this.store('http_minor', checkVersion('res_http_end'))) + .otherwise(this.span.version.end(p.error(ERROR.INVALID_VERSION, 'Invalid minor version'))); + + n('res_http_end') + .otherwise(this.span.version.end( + this.invokePausable('on_version_complete', ERROR.CB_VERSION_COMPLETE, 'res_after_version'), + )); + + n('res_after_version') + .match(' ', this.update('status_code', 0, 'res_status_code_digit_1')) + .otherwise(p.error(ERROR.INVALID_VERSION, + 'Expected space after version')); + + n('res_status_code_digit_1') + .select(NUM_MAP, this.mulAdd('status_code', { + overflow: p.error(ERROR.INVALID_STATUS, 'Invalid status code'), + success: 'res_status_code_digit_2', + })) + .otherwise(p.error(ERROR.INVALID_STATUS, 'Invalid status code')); + + n('res_status_code_digit_2') + .select(NUM_MAP, this.mulAdd('status_code', { + overflow: p.error(ERROR.INVALID_STATUS, 'Invalid status code'), + success: 'res_status_code_digit_3', + })) + .otherwise(p.error(ERROR.INVALID_STATUS, 'Invalid status code')); + + n('res_status_code_digit_3') + .select(NUM_MAP, this.mulAdd('status_code', { + overflow: p.error(ERROR.INVALID_STATUS, 'Invalid status code'), + success: 'res_status_code_otherwise', + })) + .otherwise(p.error(ERROR.INVALID_STATUS, 'Invalid status code')); + + const onStatusComplete = this.invokePausable( + 'on_status_complete', ERROR.CB_STATUS_COMPLETE, n('headers_start'), + ); + + n('res_status_code_otherwise') + .match(' ', n('res_status_start')) + .match('\r', n('res_line_almost_done')) + .match( + '\n', + checkIfAllowLFWithoutCR( + onStatusComplete, + p.error(ERROR.INVALID_STATUS, 'Invalid response status'), + ), + ) + .otherwise(p.error(ERROR.INVALID_STATUS, 'Invalid response status')); + + n('res_status_start') + .otherwise(span.status.start(n('res_status'))); + + n('res_status') + .peek('\r', span.status.end().skipTo(n('res_line_almost_done'))) + .peek( + '\n', + span.status.end().skipTo( + checkIfAllowLFWithoutCR( + onStatusComplete, + p.error(ERROR.CR_EXPECTED, 'Missing expected CR after response line'), + ), + ), + ) + .skipTo(n('res_status')); + + n('res_line_almost_done') + .match(['\r', '\n'], onStatusComplete) + .otherwise(this.testLenientFlags(LENIENT_FLAGS.OPTIONAL_LF_AFTER_CR, { + 1: onStatusComplete, + }, p.error(ERROR.STRICT, 'Expected LF after CR'))); + + // Request + n('start_req').otherwise(this.span.method.start(n('after_start_req'))); + + n('after_start_req') + .select(METHOD_MAP, this.store('method', this.span.method.end( + this.invokePausable('on_method_complete', ERROR.CB_METHOD_COMPLETE, n('req_first_space_before_url'), + )))) + .otherwise(p.error(ERROR.INVALID_METHOD, 'Invalid method encountered')); + + n('req_first_space_before_url') + .match(' ', n('req_spaces_before_url')) + .otherwise(p.error(ERROR.INVALID_METHOD, 'Expected space after method')); + + n('req_spaces_before_url') + .match(' ', n('req_spaces_before_url')) + .otherwise(this.isEqual('method', METHODS.CONNECT, { + equal: url.entry.connect, + notEqual: url.entry.normal, + })); + + const onUrlCompleteHTTP = this.invokePausable( + 'on_url_complete', ERROR.CB_URL_COMPLETE, n('req_http_start'), + ); + + url.exit.toHTTP + .otherwise(onUrlCompleteHTTP); + + const onUrlCompleteHTTP09 = this.invokePausable( + 'on_url_complete', ERROR.CB_URL_COMPLETE, n('headers_start'), + ); + + url.exit.toHTTP09 + .otherwise( + this.update('http_major', 0, + this.update('http_minor', 9, onUrlCompleteHTTP09)), + ); + + const checkMethod = (methods: METHODS[], error: string): Node => { + const success = n('req_http_version'); + const failure = p.error(ERROR.INVALID_CONSTANT, error); + + const map: { [key: number]: Node } = {}; + for (const method of methods) { + map[method] = success; + } + + return this.load('method', map, failure); + }; + + n('req_http_start') + .match('HTTP/', checkMethod(METHODS_HTTP, + 'Invalid method for HTTP/x.x request')) + .match('RTSP/', checkMethod(METHODS_RTSP, + 'Invalid method for RTSP/x.x request')) + .match('ICE/', checkMethod(METHODS_ICE, + 'Expected SOURCE method for ICE/x.x request')) + .match(' ', n('req_http_start')) + .otherwise(p.error(ERROR.INVALID_CONSTANT, 'Expected HTTP/')); + + n('req_http_version').otherwise(span.version.start(n('req_http_major'))); + + n('req_http_major') + .select(MAJOR, this.store('http_major', 'req_http_dot')) + .otherwise(this.span.version.end(p.error(ERROR.INVALID_VERSION, 'Invalid major version'))); + + n('req_http_dot') + .match('.', n('req_http_minor')) + .otherwise(this.span.version.end(p.error(ERROR.INVALID_VERSION, 'Expected dot'))); + + n('req_http_minor') + .select(MINOR, this.store('http_minor', checkVersion('req_http_end'))) + .otherwise(this.span.version.end(p.error(ERROR.INVALID_VERSION, 'Invalid minor version'))); + + n('req_http_end').otherwise( + span.version.end( + this.invokePausable( + 'on_version_complete', + ERROR.CB_VERSION_COMPLETE, + this.load('method', { + [METHODS.PRI]: n('req_pri_upgrade'), + }, n('req_http_complete')), + ), + ), + ); + + n('req_http_complete') + .match('\r', n('req_http_complete_crlf')) + .match( + '\n', + checkIfAllowLFWithoutCR( + n('req_http_complete_crlf'), + p.error(ERROR.INVALID_VERSION, 'Expected CRLF after version'), + ), + ) + .otherwise(p.error(ERROR.INVALID_VERSION, 'Expected CRLF after version')); + + n('req_http_complete_crlf') + .match('\n', n('headers_start')) + .otherwise(this.testLenientFlags(LENIENT_FLAGS.OPTIONAL_LF_AFTER_CR, { + 1: n('headers_start'), + }, p.error(ERROR.STRICT, 'Expected CRLF after version'))); + + n('req_pri_upgrade') + .match('\r\n\r\nSM\r\n\r\n', + p.error(ERROR.PAUSED_H2_UPGRADE, 'Pause on PRI/Upgrade')) + .otherwise( + p.error(ERROR.INVALID_VERSION, 'Expected HTTP/2 Connection Preface')); + } + + private buildHeaders(): void { + this.buildHeaderField(); + this.buildHeaderValue(); + } + + private buildHeaderField(): void { + const p = this.llparse; + const span = this.span; + const n = (name: string): Match => this.node<Match>(name); + + const onInvalidHeaderFieldChar = + p.error(ERROR.INVALID_HEADER_TOKEN, 'Invalid header field char'); + + n('headers_start') + .match(' ', + this.testLenientFlags(LENIENT_FLAGS.HEADERS, { + 1: n('header_field_start'), + }, p.error(ERROR.UNEXPECTED_SPACE, 'Unexpected space after start line')), + ) + .otherwise(n('header_field_start')); + + n('header_field_start') + .match('\r', n('headers_almost_done')) + .match('\n', + this.testLenientFlags(LENIENT_FLAGS.OPTIONAL_CR_BEFORE_LF, { + 1: this.testFlags(FLAGS.TRAILING, { + 1: this.invokePausable('on_chunk_complete', + ERROR.CB_CHUNK_COMPLETE, 'message_done'), + }).otherwise(this.headersCompleted()), + }, onInvalidHeaderFieldChar), + ) + .peek(':', p.error(ERROR.INVALID_HEADER_TOKEN, 'Invalid header token')) + .otherwise(span.headerField.start(n('header_field'))); + + n('header_field') + .transform(p.transform.toLower()) + // Match headers that need special treatment + .select(SPECIAL_HEADERS, this.store('header_state', 'header_field_colon')) + .otherwise(this.resetHeaderState('header_field_general')); + + /* https://www.rfc-editor.org/rfc/rfc7230.html#section-3.3.3, paragraph 3. + * + * If a message is received with both a Transfer-Encoding and a + * Content-Length header field, the Transfer-Encoding overrides the + * Content-Length. Such a message might indicate an attempt to + * perform request smuggling (Section 9.5) or response splitting + * (Section 9.4) and **ought to be handled as an error**. A sender MUST + * remove the received Content-Length field prior to forwarding such + * a message downstream. + * + * Since llhttp 9, we go for the stricter approach and treat this as an error. + */ + const checkInvalidTransferEncoding = (otherwise: Node) => { + return this.testFlags(FLAGS.CONTENT_LENGTH, { + 1: this.testLenientFlags(LENIENT_FLAGS.CHUNKED_LENGTH, { + 0: p.error(ERROR.INVALID_TRANSFER_ENCODING, "Transfer-Encoding can't be present with Content-Length"), + }).otherwise(otherwise), + }).otherwise(otherwise); + }; + + const checkInvalidContentLength = (otherwise: Node) => { + return this.testFlags(FLAGS.TRANSFER_ENCODING, { + 1: this.testLenientFlags(LENIENT_FLAGS.CHUNKED_LENGTH, { + 0: p.error(ERROR.INVALID_CONTENT_LENGTH, "Content-Length can't be present with Transfer-Encoding"), + }).otherwise(otherwise), + }).otherwise(otherwise); + }; + + const onHeaderFieldComplete = this.invokePausable( + 'on_header_field_complete', ERROR.CB_HEADER_FIELD_COMPLETE, + this.load('header_state', { + [HEADER_STATE.TRANSFER_ENCODING]: checkInvalidTransferEncoding(n('header_value_discard_ws')), + [HEADER_STATE.CONTENT_LENGTH]: checkInvalidContentLength(n('header_value_discard_ws')), + }, 'header_value_discard_ws'), + ); + + const checkLenientFlagsOnColon = + this.testLenientFlags(LENIENT_FLAGS.HEADERS, { + 1: n('header_field_colon_discard_ws'), + }, span.headerField.end().skipTo(onInvalidHeaderFieldChar)); + + n('header_field_colon') + // https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.4 + // Whitespace character is not allowed between the header field-name + // and colon. If the next token matches whitespace then throw an error. + // + // Add a check for the lenient flag. If the lenient flag is set, the + // whitespace token is allowed to support legacy code not following + // http specs. + .peek(' ', checkLenientFlagsOnColon) + .peek(':', span.headerField.end().skipTo(onHeaderFieldComplete)) + // Fallback to general header, there're additional characters: + // `Connection-Duration` instead of `Connection` and so on. + .otherwise(this.resetHeaderState('header_field_general')); + + n('header_field_colon_discard_ws') + .match(' ', n('header_field_colon_discard_ws')) + .otherwise(n('header_field_colon')); + + n('header_field_general') + .match(this.TOKEN, n('header_field_general')) + .otherwise(n('header_field_general_otherwise')); + + // Just a performance optimization, split the node so that the fast case + // remains in `header_field_general` + n('header_field_general_otherwise') + .peek(':', span.headerField.end().skipTo(onHeaderFieldComplete)) + .otherwise(p.error(ERROR.INVALID_HEADER_TOKEN, 'Invalid header token')); + } + + private buildHeaderValue(): void { + const p = this.llparse; + const span = this.span; + const callback = this.callback; + const n = (name: string): Match => this.node<Match>(name); + + const fallback = this.resetHeaderState('header_value'); + + n('header_value_discard_ws') + .match([ ' ', '\t' ], n('header_value_discard_ws')) + .match('\r', n('header_value_discard_ws_almost_done')) + .match('\n', this.testLenientFlags(LENIENT_FLAGS.OPTIONAL_CR_BEFORE_LF, { + 1: n('header_value_discard_lws'), + }, p.error(ERROR.INVALID_HEADER_TOKEN, 'Invalid header value char'))) + .otherwise(span.headerValue.start(n('header_value_start'))); + + n('header_value_discard_ws_almost_done') + .match('\n', n('header_value_discard_lws')) + .otherwise( + this.testLenientFlags(LENIENT_FLAGS.HEADERS, { + 1: n('header_value_discard_lws'), + }, p.error(ERROR.STRICT, 'Expected LF after CR')), + ); + + const onHeaderValueComplete = this.invokePausable( + 'on_header_value_complete', ERROR.CB_HEADER_VALUE_COMPLETE, n('header_field_start'), + ); + + const emptyContentLengthError = p.error( + ERROR.INVALID_CONTENT_LENGTH, 'Empty Content-Length'); + const checkContentLengthEmptiness = this.load('header_state', { + [HEADER_STATE.CONTENT_LENGTH]: emptyContentLengthError, + }, this.setHeaderFlags( + this.emptySpan(span.headerValue, onHeaderValueComplete))); + + n('header_value_discard_lws') + .match([ ' ', '\t' ], this.testLenientFlags(LENIENT_FLAGS.HEADERS, { + 1: n('header_value_discard_ws'), + }, p.error(ERROR.INVALID_HEADER_TOKEN, 'Invalid header value char'))) + .otherwise(checkContentLengthEmptiness); + + // Multiple `Transfer-Encoding` headers should be treated as one, but with + // values separate by a comma. + // + // See: https://tools.ietf.org/html/rfc7230#section-3.2.2 + const toTransferEncoding = this.unsetFlag( + FLAGS.CHUNKED, + 'header_value_te_chunked'); + + // Once chunked has been selected, no other encoding is possible in requests + // https://datatracker.ietf.org/doc/html/rfc7230#section-3.3.1 + const forbidAfterChunkedInRequest = (otherwise: Node) => { + return this.load('type', { + [TYPE.REQUEST]: this.testLenientFlags(LENIENT_FLAGS.TRANSFER_ENCODING, { + 0: span.headerValue.end().skipTo( + p.error(ERROR.INVALID_TRANSFER_ENCODING, 'Invalid `Transfer-Encoding` header value'), + ), + }).otherwise(otherwise), + }, otherwise); + }; + + n('header_value_start') + .otherwise(this.load('header_state', { + [HEADER_STATE.UPGRADE]: this.setFlag(FLAGS.UPGRADE, fallback), + [HEADER_STATE.TRANSFER_ENCODING]: this.testFlags( + FLAGS.CHUNKED, + { + 1: forbidAfterChunkedInRequest(this.setFlag(FLAGS.TRANSFER_ENCODING, toTransferEncoding)), + }, + this.setFlag(FLAGS.TRANSFER_ENCODING, toTransferEncoding)), + [HEADER_STATE.CONTENT_LENGTH]: n('header_value_content_length_once'), + [HEADER_STATE.CONNECTION]: n('header_value_connection'), + }, 'header_value')); + + // + // Transfer-Encoding + // + + n('header_value_te_chunked') + .transform(p.transform.toLowerUnsafe()) + .match( + 'chunked', + n('header_value_te_chunked_last'), + ) + .otherwise(n('header_value_te_token')); + + n('header_value_te_chunked_last') + .match(' ', n('header_value_te_chunked_last')) + .peek([ '\r', '\n' ], this.update('header_state', + HEADER_STATE.TRANSFER_ENCODING_CHUNKED, + 'header_value_otherwise')) + .peek(',', forbidAfterChunkedInRequest(n('header_value_te_chunked'))) + .otherwise(n('header_value_te_token')); + + n('header_value_te_token') + .match(',', n('header_value_te_token_ows')) + .match(CONNECTION_TOKEN_CHARS, n('header_value_te_token')) + .otherwise(fallback); + + n('header_value_te_token_ows') + .match([ ' ', '\t' ], n('header_value_te_token_ows')) + .otherwise(n('header_value_te_chunked')); + + // + // Content-Length + // + + const invalidContentLength = (reason: string): Node => { + // End span for easier testing + // TODO(indutny): minimize code size + return span.headerValue.end() + .otherwise(p.error(ERROR.INVALID_CONTENT_LENGTH, reason)); + }; + + n('header_value_content_length_once') + .otherwise(this.testFlags(FLAGS.CONTENT_LENGTH, { + 0: n('header_value_content_length'), + }, p.error(ERROR.UNEXPECTED_CONTENT_LENGTH, 'Duplicate Content-Length'))); + + n('header_value_content_length') + .select(NUM_MAP, this.mulAdd('content_length', { + overflow: invalidContentLength('Content-Length overflow'), + success: 'header_value_content_length', + })) + .otherwise(n('header_value_content_length_ws')); + + n('header_value_content_length_ws') + .match(' ', n('header_value_content_length_ws')) + .peek([ '\r', '\n' ], + this.setFlag(FLAGS.CONTENT_LENGTH, 'header_value_otherwise')) + .otherwise(invalidContentLength('Invalid character in Content-Length')); + + // + // Connection + // + + n('header_value_connection') + .transform(p.transform.toLower()) + // TODO(indutny): extra node for token back-edge? + // Skip lws + .match([ ' ', '\t' ], n('header_value_connection')) + .match( + 'close', + this.update('header_state', HEADER_STATE.CONNECTION_CLOSE, + 'header_value_connection_ws'), + ) + .match( + 'upgrade', + this.update('header_state', HEADER_STATE.CONNECTION_UPGRADE, + 'header_value_connection_ws'), + ) + .match( + 'keep-alive', + this.update('header_state', HEADER_STATE.CONNECTION_KEEP_ALIVE, + 'header_value_connection_ws'), + ) + .otherwise(n('header_value_connection_token')); + + n('header_value_connection_ws') + .match(',', this.setHeaderFlags('header_value_connection')) + .match(' ', n('header_value_connection_ws')) + .peek([ '\r', '\n' ], n('header_value_otherwise')) + .otherwise(this.resetHeaderState('header_value_connection_token')); + + n('header_value_connection_token') + .match(',', n('header_value_connection')) + .match(CONNECTION_TOKEN_CHARS, + n('header_value_connection_token')) + .otherwise(n('header_value_otherwise')); + + // Split for performance reasons + n('header_value') + .match(HEADER_CHARS, n('header_value')) + .otherwise(n('header_value_otherwise')); + + const checkIfAllowLFWithoutCR = (success: Node, failure: Node) => { + return this.testLenientFlags(LENIENT_FLAGS.OPTIONAL_CR_BEFORE_LF, { 1: success }, failure); + }; + + const checkLenient = this.testLenientFlags(LENIENT_FLAGS.HEADERS, { + 1: n('header_value_lenient'), + }, span.headerValue.end(p.error(ERROR.INVALID_HEADER_TOKEN, 'Invalid header value char'))); + + n('header_value_otherwise') + .peek('\r', span.headerValue.end().skipTo(n('header_value_almost_done'))) + .peek( + '\n', + span.headerValue.end( + checkIfAllowLFWithoutCR( + n('header_value_almost_done'), + p.error(ERROR.CR_EXPECTED, 'Missing expected CR after header value'), + ), + ), + ) + .otherwise(checkLenient); + + n('header_value_lenient') + .peek('\r', span.headerValue.end().skipTo(n('header_value_almost_done'))) + .peek('\n', span.headerValue.end(n('header_value_almost_done'))) + .skipTo(n('header_value_lenient')); + + n('header_value_almost_done') + .match('\n', n('header_value_lws')) + .otherwise(p.error(ERROR.LF_EXPECTED, + 'Missing expected LF after header value')); + + n('header_value_lws') + .peek([ ' ', '\t' ], + this.load('header_state', { + [HEADER_STATE.TRANSFER_ENCODING_CHUNKED]: + this.resetHeaderState(span.headerValue.start(n('header_value_start'))), + }, span.headerValue.start(n('header_value_start')))) + .otherwise(this.setHeaderFlags(onHeaderValueComplete)); + + const checkTrailing = this.testFlags(FLAGS.TRAILING, { + 1: this.invokePausable('on_chunk_complete', + ERROR.CB_CHUNK_COMPLETE, 'message_done'), + }).otherwise(this.headersCompleted()); + + n('headers_almost_done') + .match('\n', checkTrailing) + .otherwise( + this.testLenientFlags(LENIENT_FLAGS.OPTIONAL_LF_AFTER_CR, { + 1: checkTrailing, + }, p.error(ERROR.STRICT, 'Expected LF after headers'))); + + const upgradePause = p.pause(ERROR.PAUSED_UPGRADE, + 'Pause on CONNECT/Upgrade'); + + const afterHeadersComplete = p.invoke(callback.afterHeadersComplete, { + 1: this.invokePausable('on_message_complete', + ERROR.CB_MESSAGE_COMPLETE, upgradePause), + 2: n('chunk_size_start'), + 3: n('body_identity'), + 4: n('body_identity_eof'), + + // non-chunked `Transfer-Encoding` for request, see `src/native/http.c` + 5: p.error(ERROR.INVALID_TRANSFER_ENCODING, + 'Request has invalid `Transfer-Encoding`'), + }); + + n('headers_done') + .otherwise(afterHeadersComplete); + + upgradePause + .otherwise(n('cleanup')); + + afterHeadersComplete + .otherwise(this.invokePausable('on_message_complete', + ERROR.CB_MESSAGE_COMPLETE, 'cleanup')); + + n('body_identity') + .otherwise(span.body.start() + .otherwise(p.consume('content_length').otherwise( + span.body.end(n('message_done'))))); + + n('body_identity_eof') + .otherwise( + this.update('finish', FINISH.SAFE_WITH_CB, span.body.start(n('eof')))); + + // Just read everything until EOF + n('eof') + .skipTo(n('eof')); + + n('chunk_size_start') + .otherwise(this.update('content_length', 0, 'chunk_size_digit')); + + const addContentLength = this.mulAdd('content_length', { + overflow: p.error(ERROR.INVALID_CHUNK_SIZE, 'Chunk size overflow'), + success: 'chunk_size', + }, { signed: false, base: 0x10 }); + + n('chunk_size_digit') + .select(HEX_MAP, addContentLength) + .otherwise(p.error(ERROR.INVALID_CHUNK_SIZE, + 'Invalid character in chunk size')); + + n('chunk_size') + .select(HEX_MAP, addContentLength) + .otherwise(n('chunk_size_otherwise')); + + n('chunk_size_otherwise') + .match( + [ ' ', '\t' ], + this.testLenientFlags( + LENIENT_FLAGS.SPACES_AFTER_CHUNK_SIZE, + { + 1: n('chunk_size_otherwise'), + }, + p.error(ERROR.INVALID_CHUNK_SIZE, 'Invalid character in chunk size'), + ), + ) + .match('\r', n('chunk_size_almost_done')) + .match( + '\n', + checkIfAllowLFWithoutCR( + n('chunk_size_almost_done'), + p.error(ERROR.CR_EXPECTED, 'Missing expected CR after chunk size'), + ), + ) + .match(';', n('chunk_extensions')) + .otherwise(p.error(ERROR.INVALID_CHUNK_SIZE, + 'Invalid character in chunk size')); + + const onChunkExtensionNameCompleted = (destination: Node) => { + return this.invokePausable( + 'on_chunk_extension_name', ERROR.CB_CHUNK_EXTENSION_NAME_COMPLETE, destination); + }; + + const onChunkExtensionValueCompleted = (destination: Node) => { + return this.invokePausable( + 'on_chunk_extension_value', ERROR.CB_CHUNK_EXTENSION_VALUE_COMPLETE, destination); + }; + + n('chunk_extensions') + .match(' ', p.error(ERROR.STRICT, 'Invalid character in chunk extensions')) + .match('\r', p.error(ERROR.STRICT, 'Invalid character in chunk extensions')) + .otherwise(this.span.chunkExtensionName.start(n('chunk_extension_name'))); + + n('chunk_extension_name') + .match(TOKEN, n('chunk_extension_name')) + .peek('=', this.span.chunkExtensionName.end().skipTo( + this.span.chunkExtensionValue.start( + onChunkExtensionNameCompleted(n('chunk_extension_value')), + ), + )) + .peek(';', this.span.chunkExtensionName.end().skipTo( + onChunkExtensionNameCompleted(n('chunk_extensions')), + )) + .peek('\r', this.span.chunkExtensionName.end().skipTo( + onChunkExtensionNameCompleted(n('chunk_size_almost_done')), + )) + .peek('\n', this.span.chunkExtensionName.end( + onChunkExtensionNameCompleted( + checkIfAllowLFWithoutCR( + n('chunk_size_almost_done'), + p.error(ERROR.CR_EXPECTED, 'Missing expected CR after chunk extension name'), + ), + ), + )) + .otherwise(this.span.chunkExtensionName.end().skipTo( + p.error(ERROR.STRICT, 'Invalid character in chunk extensions name'), + )); + + n('chunk_extension_value') + .match('"', n('chunk_extension_quoted_value')) + .match(TOKEN, n('chunk_extension_value')) + .peek(';', this.span.chunkExtensionValue.end().skipTo( + onChunkExtensionValueCompleted(n('chunk_extensions')), + )) + .peek('\r', this.span.chunkExtensionValue.end().skipTo( + onChunkExtensionValueCompleted(n('chunk_size_almost_done')), + )) + .peek('\n', this.span.chunkExtensionValue.end( + onChunkExtensionValueCompleted( + checkIfAllowLFWithoutCR( + n('chunk_size_almost_done'), + p.error(ERROR.CR_EXPECTED, 'Missing expected CR after chunk extension value'), + ), + ), + )) + .otherwise(this.span.chunkExtensionValue.end().skipTo( + p.error(ERROR.STRICT, 'Invalid character in chunk extensions value'), + )); + + n('chunk_extension_quoted_value') + .match(QUOTED_STRING, n('chunk_extension_quoted_value')) + .match('"', this.span.chunkExtensionValue.end( + onChunkExtensionValueCompleted(n('chunk_extension_quoted_value_done')), + )) + .match('\\', n('chunk_extension_quoted_value_quoted_pair')) + .otherwise(this.span.chunkExtensionValue.end().skipTo( + p.error(ERROR.STRICT, 'Invalid character in chunk extensions quoted value'), + )); + + n('chunk_extension_quoted_value_quoted_pair') + .match(HTAB_SP_VCHAR_OBS_TEXT, n('chunk_extension_quoted_value')) + .otherwise(this.span.chunkExtensionValue.end().skipTo( + p.error(ERROR.STRICT, 'Invalid quoted-pair in chunk extensions quoted value'), + )); + + n('chunk_extension_quoted_value_done') + .match(';', n('chunk_extensions')) + .match('\r', n('chunk_size_almost_done')) + .peek( + '\n', + checkIfAllowLFWithoutCR( + n('chunk_size_almost_done'), + p.error(ERROR.CR_EXPECTED, 'Missing expected CR after chunk extension value'), + ), + ) + .otherwise(p.error(ERROR.STRICT, + 'Invalid character in chunk extensions quote value')); + + n('chunk_size_almost_done') + .match('\n', n('chunk_size_almost_done_lf')) + .otherwise( + this.testLenientFlags(LENIENT_FLAGS.OPTIONAL_LF_AFTER_CR, { + 1: n('chunk_size_almost_done_lf'), + }).otherwise(p.error(ERROR.STRICT, 'Expected LF after chunk size')), + ); + + const toChunk = this.isEqual('content_length', 0, { + equal: this.setFlag(FLAGS.TRAILING, 'header_field_start'), + notEqual: 'chunk_data', + }); + + n('chunk_size_almost_done_lf') + .otherwise(this.invokePausable('on_chunk_header', + ERROR.CB_CHUNK_HEADER, toChunk)); + + n('chunk_data') + .otherwise(span.body.start() + .otherwise(p.consume('content_length').otherwise( + span.body.end(n('chunk_data_almost_done'))))); + + n('chunk_data_almost_done') + .match('\r\n', n('chunk_complete')) + .match( + '\n', + checkIfAllowLFWithoutCR( + n('chunk_complete'), + p.error(ERROR.CR_EXPECTED, 'Missing expected CR after chunk data'), + ), + ) + .otherwise( + this.testLenientFlags(LENIENT_FLAGS.OPTIONAL_CRLF_AFTER_CHUNK, { + 1: n('chunk_complete'), + }).otherwise(p.error(ERROR.STRICT, 'Expected LF after chunk data')), + ); + + n('chunk_complete') + .otherwise(this.invokePausable('on_chunk_complete', + ERROR.CB_CHUNK_COMPLETE, 'chunk_size_start')); + + const upgradeAfterDone = this.isEqual('upgrade', 1, { + // Exit, the rest of the message is in a different protocol. + equal: upgradePause, + + // Restart + notEqual: 'cleanup', + }); + + n('message_done') + .otherwise(this.invokePausable('on_message_complete', + ERROR.CB_MESSAGE_COMPLETE, upgradeAfterDone)); + + const lenientClose = this.testLenientFlags(LENIENT_FLAGS.KEEP_ALIVE, { + 1: n('restart'), + }, n('closed')); + + // Check if we'd like to keep-alive + n('cleanup') + .otherwise(p.invoke(callback.afterMessageComplete, { + 1: this.update('content_length', 0, n('restart')), + }, this.update('finish', FINISH.SAFE, lenientClose))); + + const lenientDiscardAfterClose = this.testLenientFlags(LENIENT_FLAGS.DATA_AFTER_CLOSE, { + 1: n('closed'), + }, p.error(ERROR.CLOSED_CONNECTION, 'Data after `Connection: close`')); + + n('closed') + .match([ '\r', '\n' ], n('closed')) + .skipTo(lenientDiscardAfterClose); + + n('restart') + .otherwise( + this.update('initial_message_completed', 1, this.update('finish', FINISH.SAFE, n('start')), + )); + } + + private headersCompleted(): Node { + const p = this.llparse; + const callback = this.callback; + const n = (name: string): Match => this.node<Match>(name); + + // Set `upgrade` if needed + const beforeHeadersComplete = p.invoke(callback.beforeHeadersComplete); + + /* Here we call the headers_complete callback. This is somewhat + * different than other callbacks because if the user returns 1, we + * will interpret that as saying that this message has no body. This + * is needed for the annoying case of receiving a response to a HEAD + * request. + * + * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so + * we have to simulate it by handling a change in errno below. + */ + const onHeadersComplete = p.invoke(callback.onHeadersComplete, { + 0: n('headers_done'), + 1: this.setFlag(FLAGS.SKIPBODY, 'headers_done'), + 2: this.update('upgrade', 1, + this.setFlag(FLAGS.SKIPBODY, 'headers_done')), + [ERROR.PAUSED]: this.pause('Paused by on_headers_complete', + 'headers_done'), + }, p.error(ERROR.CB_HEADERS_COMPLETE, 'User callback error')); + + beforeHeadersComplete.otherwise(onHeadersComplete); + + return beforeHeadersComplete; + } + + private node<T extends Node>(name: string | T): T { + if (name instanceof Node) { + return name; + } + + assert(this.nodes.has(name), `Unknown node with name "${name}"`); + return this.nodes.get(name)! as any; + } + + private load(field: string, map: { [key: number]: Node }, + next?: string | Node): Node { + const p = this.llparse; + + const res = p.invoke(p.code.load(field), map); + if (next !== undefined) { + res.otherwise(this.node(next)); + } + return res; + } + + private store(field: string, next?: string | Node): Node { + const p = this.llparse; + + const res = p.invoke(p.code.store(field)); + if (next !== undefined) { + res.otherwise(this.node(next)); + } + return res; + } + + private update(field: string, value: number, next?: string | Node): Node { + const p = this.llparse; + + const res = p.invoke(p.code.update(field, value)); + if (next !== undefined) { + res.otherwise(this.node(next)); + } + return res; + } + + private resetHeaderState(next: string | Node): Node { + return this.update('header_state', HEADER_STATE.GENERAL, next); + } + + private emptySpan(span: source.Span, next: string | Node): Node { + return span.start(span.end(this.node(next))); + } + + private unsetFlag(flag: FLAGS, next: string | Node): Node { + const p = this.llparse; + return p.invoke(p.code.and('flags', ~flag), this.node(next)); + } + + private setFlag(flag: FLAGS, next: string | Node): Node { + const p = this.llparse; + return p.invoke(p.code.or('flags', flag), this.node(next)); + } + + private testFlags(flag: FLAGS, map: { [key: number]: Node }, + next?: string | Node): Node { + const p = this.llparse; + const res = p.invoke(p.code.test('flags', flag), map); + if (next !== undefined) { + res.otherwise(this.node(next)); + } + return res; + } + + private testLenientFlags(flag: LENIENT_FLAGS, map: { [key: number]: Node }, + next?: string | Node): Node { + const p = this.llparse; + const res = p.invoke(p.code.test('lenient_flags', flag), map); + if (next !== undefined) { + res.otherwise(this.node(next)); + } + return res; + } + + private setHeaderFlags(next: string | Node): Node { + const HS = HEADER_STATE; + const F = FLAGS; + + const toConnection = + this.update('header_state', HEADER_STATE.CONNECTION, next); + + return this.load('header_state', { + [HS.CONNECTION_KEEP_ALIVE]: + this.setFlag(F.CONNECTION_KEEP_ALIVE, toConnection), + [HS.CONNECTION_CLOSE]: this.setFlag(F.CONNECTION_CLOSE, toConnection), + [HS.CONNECTION_UPGRADE]: this.setFlag(F.CONNECTION_UPGRADE, toConnection), + [HS.TRANSFER_ENCODING_CHUNKED]: this.setFlag(F.CHUNKED, next), + }, this.node(next)); + } + + private mulAdd(field: string, targets: IMulTargets, + options: IMulOptions = { base: 10, signed: false }): Node { + const p = this.llparse; + + return p.invoke(p.code.mulAdd(field, options), { + 1: this.node(targets.overflow), + }, this.node(targets.success)); + } + + private isEqual(field: string, value: number, map: IIsEqualTargets) { + const p = this.llparse; + return p.invoke(p.code.isEqual(field, value), { + 0: this.node(map.notEqual), + }, this.node(map.equal)); + } + + private pause(msg: string, next?: string | Node) { + const p = this.llparse; + const res = p.pause(ERROR.PAUSED, msg); + if (next !== undefined) { + res.otherwise(this.node(next)); + } + return res; + } + + private invokePausable(name: string, errorCode: ERROR, next: string | Node) + : Node { + let cb; + + switch (name) { + case 'on_message_begin': + cb = this.callback.onMessageBegin; + break; + case 'on_url_complete': + cb = this.callback.onUrlComplete; + break; + case 'on_status_complete': + cb = this.callback.onStatusComplete; + break; + case 'on_method_complete': + cb = this.callback.onMethodComplete; + break; + case 'on_version_complete': + cb = this.callback.onVersionComplete; + break; + case 'on_header_field_complete': + cb = this.callback.onHeaderFieldComplete; + break; + case 'on_header_value_complete': + cb = this.callback.onHeaderValueComplete; + break; + case 'on_message_complete': + cb = this.callback.onMessageComplete; + break; + case 'on_chunk_header': + cb = this.callback.onChunkHeader; + break; + case 'on_chunk_extension_name': + cb = this.callback.onChunkExtensionName; + break; + case 'on_chunk_extension_value': + cb = this.callback.onChunkExtensionValue; + break; + case 'on_chunk_complete': + cb = this.callback.onChunkComplete; + break; + case 'on_reset': + cb = this.callback.onReset; + break; + default: + throw new Error('Unknown callback: ' + name); + } + + const p = this.llparse; + return p.invoke(cb, { + 0: this.node(next), + [ERROR.PAUSED]: this.pause(`${name} pause`, next), + }, p.error(errorCode, `\`${name}\` callback error`)); + } +} diff --git a/llhttp/src/llhttp/url.ts b/llhttp/src/llhttp/url.ts new file mode 100644 index 0000000..c5fced9 --- /dev/null +++ b/llhttp/src/llhttp/url.ts @@ -0,0 +1,220 @@ +import { LLParse, source } from 'llparse'; + +import Match = source.node.Match; +import Node = source.node.Node; + +import { + ALPHA, + CharList, + ERROR, + URL_CHAR, + USERINFO_CHARS, +} from './constants'; + +type SpanName = 'schema' | 'host' | 'path' | 'query' | 'fragment' | 'url'; + +export interface IURLResult { + readonly entry: { + readonly normal: Node; + readonly connect: Node; + }; + readonly exit: { + readonly toHTTP: Node; + readonly toHTTP09: Node; + }; +} + +type SpanTable = Map<SpanName, source.Span>; + +export class URL { + private readonly spanTable: SpanTable = new Map(); + private readonly errorInvalid: Node; + private readonly URL_CHAR: CharList; + + constructor(private readonly llparse: LLParse, separateSpans: boolean = false) { + const p = this.llparse; + + this.errorInvalid = p.error(ERROR.INVALID_URL, 'Invalid characters in url'); + + this.URL_CHAR = URL_CHAR; + + const table = this.spanTable; + if (separateSpans) { + table.set('schema', p.span(p.code.span('llhttp__on_url_schema'))); + table.set('host', p.span(p.code.span('llhttp__on_url_host'))); + table.set('path', p.span(p.code.span('llhttp__on_url_path'))); + table.set('query', p.span(p.code.span('llhttp__on_url_query'))); + table.set('fragment', + p.span(p.code.span('llhttp__on_url_fragment'))); + } else { + table.set('url', p.span(p.code.span('llhttp__on_url'))); + } + } + + public build(): IURLResult { + const p = this.llparse; + + const entry = { + connect: this.node('entry_connect'), + normal: this.node('entry_normal'), + }; + + const start = this.node('start'); + const path = this.node('path'); + const queryOrFragment = this.node('query_or_fragment'); + const schema = this.node('schema'); + const schemaDelim = this.node('schema_delim'); + const server = this.node('server'); + const queryStart = this.node('query_start'); + const query = this.node('query'); + const fragment = this.node('fragment'); + const serverWithAt = this.node('server_with_at'); + + entry.normal + .otherwise(this.spanStart('url', start)); + + entry.connect + .otherwise(this.spanStart('url', this.spanStart('host', server))); + + start + .peek([ '/', '*' ], this.spanStart('path').skipTo(path)) + .peek(ALPHA, this.spanStart('schema', schema)) + .otherwise(p.error(ERROR.INVALID_URL, 'Unexpected start char in url')); + + schema + .match(ALPHA, schema) + .peek(':', this.spanEnd('schema').skipTo(schemaDelim)) + .otherwise(p.error(ERROR.INVALID_URL, 'Unexpected char in url schema')); + + schemaDelim + .match('//', this.spanStart('host', server)) + .otherwise(p.error(ERROR.INVALID_URL, 'Unexpected char in url schema')); + + for (const node of [server, serverWithAt]) { + node + .peek('/', this.spanEnd('host', this.spanStart('path').skipTo(path))) + .match('?', this.spanEnd('host', this.spanStart('query', query))) + .match(USERINFO_CHARS, server) + .match([ '[', ']' ], server) + .otherwise(p.error(ERROR.INVALID_URL, 'Unexpected char in url server')); + + if (node !== serverWithAt) { + node.match('@', serverWithAt); + } + } + + serverWithAt + .match('@', p.error(ERROR.INVALID_URL, 'Double @ in url')); + + path + .match(this.URL_CHAR, path) + .otherwise(this.spanEnd('path', queryOrFragment)); + + // Performance optimization, split `path` so that the fast case remains + // there + queryOrFragment + .match('?', this.spanStart('query', query)) + .match('#', this.spanStart('fragment', fragment)) + .otherwise(p.error(ERROR.INVALID_URL, 'Invalid char in url path')); + + query + .match(this.URL_CHAR, query) + // Allow extra '?' in query string + .match('?', query) + .peek('#', this.spanEnd('query') + .skipTo(this.spanStart('fragment', fragment))) + .otherwise(p.error(ERROR.INVALID_URL, 'Invalid char in url query')); + + fragment + .match(this.URL_CHAR, fragment) + .match([ '?', '#' ], fragment) + .otherwise( + p.error(ERROR.INVALID_URL, 'Invalid char in url fragment start')); + + for (const node of [ start, schema, schemaDelim ]) { + /* No whitespace allowed here */ + node.match([ ' ', '\r', '\n' ], this.errorInvalid); + } + + // Adaptors + const toHTTP = this.node('to_http'); + const toHTTP09 = this.node('to_http_09'); + + const skipToHTTP = this.node('skip_to_http') + .skipTo(toHTTP); + + const skipToHTTP09 = this.node('skip_to_http09') + .skipTo(toHTTP09); + + const skipCRLF = this.node('skip_lf_to_http09') + .match('\r\n', toHTTP09) + .otherwise(p.error(ERROR.INVALID_URL, 'Expected CRLF')); + + for (const node of [server, serverWithAt, queryOrFragment, queryStart, query, fragment]) { + let spanName: SpanName | undefined; + + if (node === server || node === serverWithAt) { + spanName = 'host'; + } else if (node === queryStart || node === query) { + spanName = 'query'; + } else if (node === fragment) { + spanName = 'fragment'; + } + + const endTo = (target: Node): Node => { + let res: Node = this.spanEnd('url', target); + if (spanName !== undefined) { + res = this.spanEnd(spanName, res); + } + return res; + }; + + node.peek(' ', endTo(skipToHTTP)); + + node.peek('\r', endTo(skipCRLF)); + node.peek('\n', endTo(skipToHTTP09)); + } + + return { + entry, + exit: { + toHTTP, + toHTTP09, + }, + }; + } + + private spanStart(name: SpanName, otherwise?: Node): Node { + let res: Node; + if (this.spanTable.has(name)) { + res = this.spanTable.get(name)!.start(); + } else { + res = this.llparse.node('span_start_stub_' + name); + } + if (otherwise !== undefined) { + res.otherwise(otherwise); + } + return res; + } + + private spanEnd(name: SpanName, otherwise?: Node): Node { + let res: Node; + if (this.spanTable.has(name)) { + res = this.spanTable.get(name)!.end(); + } else { + res = this.llparse.node('span_end_stub_' + name); + } + if (otherwise !== undefined) { + res.otherwise(otherwise); + } + return res; + } + + private node(name: string): Match { + const res = this.llparse.node('url_' + name); + + res.match([ '\t', '\f' ], this.errorInvalid); + + return res; + } +} diff --git a/llhttp/src/llhttp/utils.ts b/llhttp/src/llhttp/utils.ts new file mode 100644 index 0000000..7c01d66 --- /dev/null +++ b/llhttp/src/llhttp/utils.ts @@ -0,0 +1,27 @@ +export interface IEnumMap { + [key: string]: number; +} + +export function enumToMap( + obj: any, + filter?: ReadonlyArray<number>, + exceptions?: ReadonlyArray<number>, +): IEnumMap { + const res: IEnumMap = {}; + + for (const key of Object.keys(obj)) { + const value = obj[key]; + if (typeof value !== 'number') { + continue; + } + if (filter && !filter.includes(value)) { + continue; + } + if (exceptions && exceptions.includes(value)) { + continue; + } + res[key] = value; + } + + return res; +} diff --git a/llhttp/src/native/api.c b/llhttp/src/native/api.c new file mode 100644 index 0000000..8c2ce3d --- /dev/null +++ b/llhttp/src/native/api.c @@ -0,0 +1,510 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "llhttp.h" + +#define CALLBACK_MAYBE(PARSER, NAME) \ + do { \ + const llhttp_settings_t* settings; \ + settings = (const llhttp_settings_t*) (PARSER)->settings; \ + if (settings == NULL || settings->NAME == NULL) { \ + err = 0; \ + break; \ + } \ + err = settings->NAME((PARSER)); \ + } while (0) + +#define SPAN_CALLBACK_MAYBE(PARSER, NAME, START, LEN) \ + do { \ + const llhttp_settings_t* settings; \ + settings = (const llhttp_settings_t*) (PARSER)->settings; \ + if (settings == NULL || settings->NAME == NULL) { \ + err = 0; \ + break; \ + } \ + err = settings->NAME((PARSER), (START), (LEN)); \ + if (err == -1) { \ + err = HPE_USER; \ + llhttp_set_error_reason((PARSER), "Span callback error in " #NAME); \ + } \ + } while (0) + +void llhttp_init(llhttp_t* parser, llhttp_type_t type, + const llhttp_settings_t* settings) { + llhttp__internal_init(parser); + + parser->type = type; + parser->settings = (void*) settings; +} + + +#if defined(__wasm__) + +extern int wasm_on_message_begin(llhttp_t * p); +extern int wasm_on_url(llhttp_t* p, const char* at, size_t length); +extern int wasm_on_status(llhttp_t* p, const char* at, size_t length); +extern int wasm_on_header_field(llhttp_t* p, const char* at, size_t length); +extern int wasm_on_header_value(llhttp_t* p, const char* at, size_t length); +extern int wasm_on_headers_complete(llhttp_t * p, int status_code, + uint8_t upgrade, int should_keep_alive); +extern int wasm_on_body(llhttp_t* p, const char* at, size_t length); +extern int wasm_on_message_complete(llhttp_t * p); + +static int wasm_on_headers_complete_wrap(llhttp_t* p) { + return wasm_on_headers_complete(p, p->status_code, p->upgrade, + llhttp_should_keep_alive(p)); +} + +const llhttp_settings_t wasm_settings = { + wasm_on_message_begin, + wasm_on_url, + wasm_on_status, + NULL, + NULL, + wasm_on_header_field, + wasm_on_header_value, + NULL, + NULL, + wasm_on_headers_complete_wrap, + wasm_on_body, + wasm_on_message_complete, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, +}; + + +llhttp_t* llhttp_alloc(llhttp_type_t type) { + llhttp_t* parser = malloc(sizeof(llhttp_t)); + llhttp_init(parser, type, &wasm_settings); + return parser; +} + +void llhttp_free(llhttp_t* parser) { + free(parser); +} + +#endif // defined(__wasm__) + +/* Some getters required to get stuff from the parser */ + +uint8_t llhttp_get_type(llhttp_t* parser) { + return parser->type; +} + +uint8_t llhttp_get_http_major(llhttp_t* parser) { + return parser->http_major; +} + +uint8_t llhttp_get_http_minor(llhttp_t* parser) { + return parser->http_minor; +} + +uint8_t llhttp_get_method(llhttp_t* parser) { + return parser->method; +} + +int llhttp_get_status_code(llhttp_t* parser) { + return parser->status_code; +} + +uint8_t llhttp_get_upgrade(llhttp_t* parser) { + return parser->upgrade; +} + + +void llhttp_reset(llhttp_t* parser) { + llhttp_type_t type = parser->type; + const llhttp_settings_t* settings = parser->settings; + void* data = parser->data; + uint16_t lenient_flags = parser->lenient_flags; + + llhttp__internal_init(parser); + + parser->type = type; + parser->settings = (void*) settings; + parser->data = data; + parser->lenient_flags = lenient_flags; +} + + +llhttp_errno_t llhttp_execute(llhttp_t* parser, const char* data, size_t len) { + return llhttp__internal_execute(parser, data, data + len); +} + + +void llhttp_settings_init(llhttp_settings_t* settings) { + memset(settings, 0, sizeof(*settings)); +} + + +llhttp_errno_t llhttp_finish(llhttp_t* parser) { + int err; + + /* We're in an error state. Don't bother doing anything. */ + if (parser->error != 0) { + return 0; + } + + switch (parser->finish) { + case HTTP_FINISH_SAFE_WITH_CB: + CALLBACK_MAYBE(parser, on_message_complete); + if (err != HPE_OK) return err; + + /* FALLTHROUGH */ + case HTTP_FINISH_SAFE: + return HPE_OK; + case HTTP_FINISH_UNSAFE: + parser->reason = "Invalid EOF state"; + return HPE_INVALID_EOF_STATE; + default: + abort(); + } +} + + +void llhttp_pause(llhttp_t* parser) { + if (parser->error != HPE_OK) { + return; + } + + parser->error = HPE_PAUSED; + parser->reason = "Paused"; +} + + +void llhttp_resume(llhttp_t* parser) { + if (parser->error != HPE_PAUSED) { + return; + } + + parser->error = 0; +} + + +void llhttp_resume_after_upgrade(llhttp_t* parser) { + if (parser->error != HPE_PAUSED_UPGRADE) { + return; + } + + parser->error = 0; +} + + +llhttp_errno_t llhttp_get_errno(const llhttp_t* parser) { + return parser->error; +} + + +const char* llhttp_get_error_reason(const llhttp_t* parser) { + return parser->reason; +} + + +void llhttp_set_error_reason(llhttp_t* parser, const char* reason) { + parser->reason = reason; +} + + +const char* llhttp_get_error_pos(const llhttp_t* parser) { + return parser->error_pos; +} + + +const char* llhttp_errno_name(llhttp_errno_t err) { +#define HTTP_ERRNO_GEN(CODE, NAME, _) case HPE_##NAME: return "HPE_" #NAME; + switch (err) { + HTTP_ERRNO_MAP(HTTP_ERRNO_GEN) + default: abort(); + } +#undef HTTP_ERRNO_GEN +} + + +const char* llhttp_method_name(llhttp_method_t method) { +#define HTTP_METHOD_GEN(NUM, NAME, STRING) case HTTP_##NAME: return #STRING; + switch (method) { + HTTP_ALL_METHOD_MAP(HTTP_METHOD_GEN) + default: abort(); + } +#undef HTTP_METHOD_GEN +} + +const char* llhttp_status_name(llhttp_status_t status) { +#define HTTP_STATUS_GEN(NUM, NAME, STRING) case HTTP_STATUS_##NAME: return #STRING; + switch (status) { + HTTP_STATUS_MAP(HTTP_STATUS_GEN) + default: abort(); + } +#undef HTTP_STATUS_GEN +} + + +void llhttp_set_lenient_headers(llhttp_t* parser, int enabled) { + if (enabled) { + parser->lenient_flags |= LENIENT_HEADERS; + } else { + parser->lenient_flags &= ~LENIENT_HEADERS; + } +} + + +void llhttp_set_lenient_chunked_length(llhttp_t* parser, int enabled) { + if (enabled) { + parser->lenient_flags |= LENIENT_CHUNKED_LENGTH; + } else { + parser->lenient_flags &= ~LENIENT_CHUNKED_LENGTH; + } +} + + +void llhttp_set_lenient_keep_alive(llhttp_t* parser, int enabled) { + if (enabled) { + parser->lenient_flags |= LENIENT_KEEP_ALIVE; + } else { + parser->lenient_flags &= ~LENIENT_KEEP_ALIVE; + } +} + +void llhttp_set_lenient_transfer_encoding(llhttp_t* parser, int enabled) { + if (enabled) { + parser->lenient_flags |= LENIENT_TRANSFER_ENCODING; + } else { + parser->lenient_flags &= ~LENIENT_TRANSFER_ENCODING; + } +} + +void llhttp_set_lenient_version(llhttp_t* parser, int enabled) { + if (enabled) { + parser->lenient_flags |= LENIENT_VERSION; + } else { + parser->lenient_flags &= ~LENIENT_VERSION; + } +} + +void llhttp_set_lenient_data_after_close(llhttp_t* parser, int enabled) { + if (enabled) { + parser->lenient_flags |= LENIENT_DATA_AFTER_CLOSE; + } else { + parser->lenient_flags &= ~LENIENT_DATA_AFTER_CLOSE; + } +} + +void llhttp_set_lenient_optional_lf_after_cr(llhttp_t* parser, int enabled) { + if (enabled) { + parser->lenient_flags |= LENIENT_OPTIONAL_LF_AFTER_CR; + } else { + parser->lenient_flags &= ~LENIENT_OPTIONAL_LF_AFTER_CR; + } +} + +void llhttp_set_lenient_optional_crlf_after_chunk(llhttp_t* parser, int enabled) { + if (enabled) { + parser->lenient_flags |= LENIENT_OPTIONAL_CRLF_AFTER_CHUNK; + } else { + parser->lenient_flags &= ~LENIENT_OPTIONAL_CRLF_AFTER_CHUNK; + } +} + +void llhttp_set_lenient_optional_cr_before_lf(llhttp_t* parser, int enabled) { + if (enabled) { + parser->lenient_flags |= LENIENT_OPTIONAL_CR_BEFORE_LF; + } else { + parser->lenient_flags &= ~LENIENT_OPTIONAL_CR_BEFORE_LF; + } +} + +void llhttp_set_lenient_spaces_after_chunk_size(llhttp_t* parser, int enabled) { + if (enabled) { + parser->lenient_flags |= LENIENT_SPACES_AFTER_CHUNK_SIZE; + } else { + parser->lenient_flags &= ~LENIENT_SPACES_AFTER_CHUNK_SIZE; + } +} + +/* Callbacks */ + + +int llhttp__on_message_begin(llhttp_t* s, const char* p, const char* endp) { + int err; + CALLBACK_MAYBE(s, on_message_begin); + return err; +} + + +int llhttp__on_url(llhttp_t* s, const char* p, const char* endp) { + int err; + SPAN_CALLBACK_MAYBE(s, on_url, p, endp - p); + return err; +} + + +int llhttp__on_url_complete(llhttp_t* s, const char* p, const char* endp) { + int err; + CALLBACK_MAYBE(s, on_url_complete); + return err; +} + + +int llhttp__on_status(llhttp_t* s, const char* p, const char* endp) { + int err; + SPAN_CALLBACK_MAYBE(s, on_status, p, endp - p); + return err; +} + + +int llhttp__on_status_complete(llhttp_t* s, const char* p, const char* endp) { + int err; + CALLBACK_MAYBE(s, on_status_complete); + return err; +} + + +int llhttp__on_method(llhttp_t* s, const char* p, const char* endp) { + int err; + SPAN_CALLBACK_MAYBE(s, on_method, p, endp - p); + return err; +} + + +int llhttp__on_method_complete(llhttp_t* s, const char* p, const char* endp) { + int err; + CALLBACK_MAYBE(s, on_method_complete); + return err; +} + + +int llhttp__on_version(llhttp_t* s, const char* p, const char* endp) { + int err; + SPAN_CALLBACK_MAYBE(s, on_version, p, endp - p); + return err; +} + + +int llhttp__on_version_complete(llhttp_t* s, const char* p, const char* endp) { + int err; + CALLBACK_MAYBE(s, on_version_complete); + return err; +} + + +int llhttp__on_header_field(llhttp_t* s, const char* p, const char* endp) { + int err; + SPAN_CALLBACK_MAYBE(s, on_header_field, p, endp - p); + return err; +} + + +int llhttp__on_header_field_complete(llhttp_t* s, const char* p, const char* endp) { + int err; + CALLBACK_MAYBE(s, on_header_field_complete); + return err; +} + + +int llhttp__on_header_value(llhttp_t* s, const char* p, const char* endp) { + int err; + SPAN_CALLBACK_MAYBE(s, on_header_value, p, endp - p); + return err; +} + + +int llhttp__on_header_value_complete(llhttp_t* s, const char* p, const char* endp) { + int err; + CALLBACK_MAYBE(s, on_header_value_complete); + return err; +} + + +int llhttp__on_headers_complete(llhttp_t* s, const char* p, const char* endp) { + int err; + CALLBACK_MAYBE(s, on_headers_complete); + return err; +} + + +int llhttp__on_message_complete(llhttp_t* s, const char* p, const char* endp) { + int err; + CALLBACK_MAYBE(s, on_message_complete); + return err; +} + + +int llhttp__on_body(llhttp_t* s, const char* p, const char* endp) { + int err; + SPAN_CALLBACK_MAYBE(s, on_body, p, endp - p); + return err; +} + + +int llhttp__on_chunk_header(llhttp_t* s, const char* p, const char* endp) { + int err; + CALLBACK_MAYBE(s, on_chunk_header); + return err; +} + + +int llhttp__on_chunk_extension_name(llhttp_t* s, const char* p, const char* endp) { + int err; + SPAN_CALLBACK_MAYBE(s, on_chunk_extension_name, p, endp - p); + return err; +} + + +int llhttp__on_chunk_extension_name_complete(llhttp_t* s, const char* p, const char* endp) { + int err; + CALLBACK_MAYBE(s, on_chunk_extension_name_complete); + return err; +} + + +int llhttp__on_chunk_extension_value(llhttp_t* s, const char* p, const char* endp) { + int err; + SPAN_CALLBACK_MAYBE(s, on_chunk_extension_value, p, endp - p); + return err; +} + + +int llhttp__on_chunk_extension_value_complete(llhttp_t* s, const char* p, const char* endp) { + int err; + CALLBACK_MAYBE(s, on_chunk_extension_value_complete); + return err; +} + + +int llhttp__on_chunk_complete(llhttp_t* s, const char* p, const char* endp) { + int err; + CALLBACK_MAYBE(s, on_chunk_complete); + return err; +} + + +int llhttp__on_reset(llhttp_t* s, const char* p, const char* endp) { + int err; + CALLBACK_MAYBE(s, on_reset); + return err; +} + + +/* Private */ + + +void llhttp__debug(llhttp_t* s, const char* p, const char* endp, + const char* msg) { + if (p == endp) { + fprintf(stderr, "p=%p type=%d flags=%02x next=null debug=%s\n", s, s->type, + s->flags, msg); + } else { + fprintf(stderr, "p=%p type=%d flags=%02x next=%02x debug=%s\n", s, + s->type, s->flags, *p, msg); + } +} diff --git a/llhttp/src/native/api.h b/llhttp/src/native/api.h new file mode 100644 index 0000000..321879c --- /dev/null +++ b/llhttp/src/native/api.h @@ -0,0 +1,355 @@ +#ifndef INCLUDE_LLHTTP_API_H_ +#define INCLUDE_LLHTTP_API_H_ +#ifdef __cplusplus +extern "C" { +#endif +#include <stddef.h> + +#if defined(__wasm__) +#define LLHTTP_EXPORT __attribute__((visibility("default"))) +#else +#define LLHTTP_EXPORT +#endif + +typedef llhttp__internal_t llhttp_t; +typedef struct llhttp_settings_s llhttp_settings_t; + +typedef int (*llhttp_data_cb)(llhttp_t*, const char *at, size_t length); +typedef int (*llhttp_cb)(llhttp_t*); + +struct llhttp_settings_s { + /* Possible return values 0, -1, `HPE_PAUSED` */ + llhttp_cb on_message_begin; + + /* Possible return values 0, -1, HPE_USER */ + llhttp_data_cb on_url; + llhttp_data_cb on_status; + llhttp_data_cb on_method; + llhttp_data_cb on_version; + llhttp_data_cb on_header_field; + llhttp_data_cb on_header_value; + llhttp_data_cb on_chunk_extension_name; + llhttp_data_cb on_chunk_extension_value; + + /* Possible return values: + * 0 - Proceed normally + * 1 - Assume that request/response has no body, and proceed to parsing the + * next message + * 2 - Assume absence of body (as above) and make `llhttp_execute()` return + * `HPE_PAUSED_UPGRADE` + * -1 - Error + * `HPE_PAUSED` + */ + llhttp_cb on_headers_complete; + + /* Possible return values 0, -1, HPE_USER */ + llhttp_data_cb on_body; + + /* Possible return values 0, -1, `HPE_PAUSED` */ + llhttp_cb on_message_complete; + llhttp_cb on_url_complete; + llhttp_cb on_status_complete; + llhttp_cb on_method_complete; + llhttp_cb on_version_complete; + llhttp_cb on_header_field_complete; + llhttp_cb on_header_value_complete; + llhttp_cb on_chunk_extension_name_complete; + llhttp_cb on_chunk_extension_value_complete; + + /* When on_chunk_header is called, the current chunk length is stored + * in parser->content_length. + * Possible return values 0, -1, `HPE_PAUSED` + */ + llhttp_cb on_chunk_header; + llhttp_cb on_chunk_complete; + llhttp_cb on_reset; +}; + +/* Initialize the parser with specific type and user settings. + * + * NOTE: lifetime of `settings` has to be at least the same as the lifetime of + * the `parser` here. In practice, `settings` has to be either a static + * variable or be allocated with `malloc`, `new`, etc. + */ +LLHTTP_EXPORT +void llhttp_init(llhttp_t* parser, llhttp_type_t type, + const llhttp_settings_t* settings); + +LLHTTP_EXPORT +llhttp_t* llhttp_alloc(llhttp_type_t type); + +LLHTTP_EXPORT +void llhttp_free(llhttp_t* parser); + +LLHTTP_EXPORT +uint8_t llhttp_get_type(llhttp_t* parser); + +LLHTTP_EXPORT +uint8_t llhttp_get_http_major(llhttp_t* parser); + +LLHTTP_EXPORT +uint8_t llhttp_get_http_minor(llhttp_t* parser); + +LLHTTP_EXPORT +uint8_t llhttp_get_method(llhttp_t* parser); + +LLHTTP_EXPORT +int llhttp_get_status_code(llhttp_t* parser); + +LLHTTP_EXPORT +uint8_t llhttp_get_upgrade(llhttp_t* parser); + +/* Reset an already initialized parser back to the start state, preserving the + * existing parser type, callback settings, user data, and lenient flags. + */ +LLHTTP_EXPORT +void llhttp_reset(llhttp_t* parser); + +/* Initialize the settings object */ +LLHTTP_EXPORT +void llhttp_settings_init(llhttp_settings_t* settings); + +/* Parse full or partial request/response, invoking user callbacks along the + * way. + * + * If any of `llhttp_data_cb` returns errno not equal to `HPE_OK` - the parsing + * interrupts, and such errno is returned from `llhttp_execute()`. If + * `HPE_PAUSED` was used as a errno, the execution can be resumed with + * `llhttp_resume()` call. + * + * In a special case of CONNECT/Upgrade request/response `HPE_PAUSED_UPGRADE` + * is returned after fully parsing the request/response. If the user wishes to + * continue parsing, they need to invoke `llhttp_resume_after_upgrade()`. + * + * NOTE: if this function ever returns a non-pause type error, it will continue + * to return the same error upon each successive call up until `llhttp_init()` + * is called. + */ +LLHTTP_EXPORT +llhttp_errno_t llhttp_execute(llhttp_t* parser, const char* data, size_t len); + +/* This method should be called when the other side has no further bytes to + * send (e.g. shutdown of readable side of the TCP connection.) + * + * Requests without `Content-Length` and other messages might require treating + * all incoming bytes as the part of the body, up to the last byte of the + * connection. This method will invoke `on_message_complete()` callback if the + * request was terminated safely. Otherwise a error code would be returned. + */ +LLHTTP_EXPORT +llhttp_errno_t llhttp_finish(llhttp_t* parser); + +/* Returns `1` if the incoming message is parsed until the last byte, and has + * to be completed by calling `llhttp_finish()` on EOF + */ +LLHTTP_EXPORT +int llhttp_message_needs_eof(const llhttp_t* parser); + +/* Returns `1` if there might be any other messages following the last that was + * successfully parsed. + */ +LLHTTP_EXPORT +int llhttp_should_keep_alive(const llhttp_t* parser); + +/* Make further calls of `llhttp_execute()` return `HPE_PAUSED` and set + * appropriate error reason. + * + * Important: do not call this from user callbacks! User callbacks must return + * `HPE_PAUSED` if pausing is required. + */ +LLHTTP_EXPORT +void llhttp_pause(llhttp_t* parser); + +/* Might be called to resume the execution after the pause in user's callback. + * See `llhttp_execute()` above for details. + * + * Call this only if `llhttp_execute()` returns `HPE_PAUSED`. + */ +LLHTTP_EXPORT +void llhttp_resume(llhttp_t* parser); + +/* Might be called to resume the execution after the pause in user's callback. + * See `llhttp_execute()` above for details. + * + * Call this only if `llhttp_execute()` returns `HPE_PAUSED_UPGRADE` + */ +LLHTTP_EXPORT +void llhttp_resume_after_upgrade(llhttp_t* parser); + +/* Returns the latest return error */ +LLHTTP_EXPORT +llhttp_errno_t llhttp_get_errno(const llhttp_t* parser); + +/* Returns the verbal explanation of the latest returned error. + * + * Note: User callback should set error reason when returning the error. See + * `llhttp_set_error_reason()` for details. + */ +LLHTTP_EXPORT +const char* llhttp_get_error_reason(const llhttp_t* parser); + +/* Assign verbal description to the returned error. Must be called in user + * callbacks right before returning the errno. + * + * Note: `HPE_USER` error code might be useful in user callbacks. + */ +LLHTTP_EXPORT +void llhttp_set_error_reason(llhttp_t* parser, const char* reason); + +/* Returns the pointer to the last parsed byte before the returned error. The + * pointer is relative to the `data` argument of `llhttp_execute()`. + * + * Note: this method might be useful for counting the number of parsed bytes. + */ +LLHTTP_EXPORT +const char* llhttp_get_error_pos(const llhttp_t* parser); + +/* Returns textual name of error code */ +LLHTTP_EXPORT +const char* llhttp_errno_name(llhttp_errno_t err); + +/* Returns textual name of HTTP method */ +LLHTTP_EXPORT +const char* llhttp_method_name(llhttp_method_t method); + +/* Returns textual name of HTTP status */ +LLHTTP_EXPORT +const char* llhttp_status_name(llhttp_status_t status); + +/* Enables/disables lenient header value parsing (disabled by default). + * + * Lenient parsing disables header value token checks, extending llhttp's + * protocol support to highly non-compliant clients/server. No + * `HPE_INVALID_HEADER_TOKEN` will be raised for incorrect header values when + * lenient parsing is "on". + * + * **Enabling this flag can pose a security issue since you will be exposed to + * request smuggling attacks. USE WITH CAUTION!** + */ +LLHTTP_EXPORT +void llhttp_set_lenient_headers(llhttp_t* parser, int enabled); + + +/* Enables/disables lenient handling of conflicting `Transfer-Encoding` and + * `Content-Length` headers (disabled by default). + * + * Normally `llhttp` would error when `Transfer-Encoding` is present in + * conjunction with `Content-Length`. This error is important to prevent HTTP + * request smuggling, but may be less desirable for small number of cases + * involving legacy servers. + * + * **Enabling this flag can pose a security issue since you will be exposed to + * request smuggling attacks. USE WITH CAUTION!** + */ +LLHTTP_EXPORT +void llhttp_set_lenient_chunked_length(llhttp_t* parser, int enabled); + + +/* Enables/disables lenient handling of `Connection: close` and HTTP/1.0 + * requests responses. + * + * Normally `llhttp` would error on (in strict mode) or discard (in loose mode) + * the HTTP request/response after the request/response with `Connection: close` + * and `Content-Length`. This is important to prevent cache poisoning attacks, + * but might interact badly with outdated and insecure clients. With this flag + * the extra request/response will be parsed normally. + * + * **Enabling this flag can pose a security issue since you will be exposed to + * poisoning attacks. USE WITH CAUTION!** + */ +LLHTTP_EXPORT +void llhttp_set_lenient_keep_alive(llhttp_t* parser, int enabled); + +/* Enables/disables lenient handling of `Transfer-Encoding` header. + * + * Normally `llhttp` would error when a `Transfer-Encoding` has `chunked` value + * and another value after it (either in a single header or in multiple + * headers whose value are internally joined using `, `). + * This is mandated by the spec to reliably determine request body size and thus + * avoid request smuggling. + * With this flag the extra value will be parsed normally. + * + * **Enabling this flag can pose a security issue since you will be exposed to + * request smuggling attacks. USE WITH CAUTION!** + */ +LLHTTP_EXPORT +void llhttp_set_lenient_transfer_encoding(llhttp_t* parser, int enabled); + +/* Enables/disables lenient handling of HTTP version. + * + * Normally `llhttp` would error when the HTTP version in the request or status line + * is not `0.9`, `1.0`, `1.1` or `2.0`. + * With this flag the invalid value will be parsed normally. + * + * **Enabling this flag can pose a security issue since you will allow unsupported + * HTTP versions. USE WITH CAUTION!** + */ +LLHTTP_EXPORT +void llhttp_set_lenient_version(llhttp_t* parser, int enabled); + +/* Enables/disables lenient handling of additional data received after a message ends + * and keep-alive is disabled. + * + * Normally `llhttp` would error when additional unexpected data is received if the message + * contains the `Connection` header with `close` value. + * With this flag the extra data will discarded without throwing an error. + * + * **Enabling this flag can pose a security issue since you will be exposed to + * poisoning attacks. USE WITH CAUTION!** + */ +LLHTTP_EXPORT +void llhttp_set_lenient_data_after_close(llhttp_t* parser, int enabled); + +/* Enables/disables lenient handling of incomplete CRLF sequences. + * + * Normally `llhttp` would error when a CR is not followed by LF when terminating the + * request line, the status line, the headers or a chunk header. + * With this flag only a CR is required to terminate such sections. + * + * **Enabling this flag can pose a security issue since you will be exposed to + * request smuggling attacks. USE WITH CAUTION!** + */ +LLHTTP_EXPORT +void llhttp_set_lenient_optional_lf_after_cr(llhttp_t* parser, int enabled); + +/* + * Enables/disables lenient handling of line separators. + * + * Normally `llhttp` would error when a LF is not preceded by CR when terminating the + * request line, the status line, the headers, a chunk header or a chunk data. + * With this flag only a LF is required to terminate such sections. + * + * **Enabling this flag can pose a security issue since you will be exposed to + * request smuggling attacks. USE WITH CAUTION!** + */ +LLHTTP_EXPORT +void llhttp_set_lenient_optional_cr_before_lf(llhttp_t* parser, int enabled); + +/* Enables/disables lenient handling of chunks not separated via CRLF. + * + * Normally `llhttp` would error when after a chunk data a CRLF is missing before + * starting a new chunk. + * With this flag the new chunk can start immediately after the previous one. + * + * **Enabling this flag can pose a security issue since you will be exposed to + * request smuggling attacks. USE WITH CAUTION!** + */ +LLHTTP_EXPORT +void llhttp_set_lenient_optional_crlf_after_chunk(llhttp_t* parser, int enabled); + +/* Enables/disables lenient handling of spaces after chunk size. + * + * Normally `llhttp` would error when after a chunk size is followed by one or more + * spaces are present instead of a CRLF or `;`. + * With this flag this check is disabled. + * + * **Enabling this flag can pose a security issue since you will be exposed to + * request smuggling attacks. USE WITH CAUTION!** + */ +LLHTTP_EXPORT +void llhttp_set_lenient_spaces_after_chunk_size(llhttp_t* parser, int enabled); + +#ifdef __cplusplus +} /* extern "C" */ +#endif +#endif /* INCLUDE_LLHTTP_API_H_ */ diff --git a/llhttp/src/native/http.c b/llhttp/src/native/http.c new file mode 100644 index 0000000..1ab91a5 --- /dev/null +++ b/llhttp/src/native/http.c @@ -0,0 +1,170 @@ +#include <stdio.h> +#ifndef LLHTTP__TEST +# include "llhttp.h" +#else +# define llhttp_t llparse_t +#endif /* */ + +int llhttp_message_needs_eof(const llhttp_t* parser); +int llhttp_should_keep_alive(const llhttp_t* parser); + +int llhttp__before_headers_complete(llhttp_t* parser, const char* p, + const char* endp) { + /* Set this here so that on_headers_complete() callbacks can see it */ + if ((parser->flags & F_UPGRADE) && + (parser->flags & F_CONNECTION_UPGRADE)) { + /* For responses, "Upgrade: foo" and "Connection: upgrade" are + * mandatory only when it is a 101 Switching Protocols response, + * otherwise it is purely informational, to announce support. + */ + parser->upgrade = + (parser->type == HTTP_REQUEST || parser->status_code == 101); + } else { + parser->upgrade = (parser->method == HTTP_CONNECT); + } + return 0; +} + + +/* Return values: + * 0 - No body, `restart`, message_complete + * 1 - CONNECT request, `restart`, message_complete, and pause + * 2 - chunk_size_start + * 3 - body_identity + * 4 - body_identity_eof + * 5 - invalid transfer-encoding for request + */ +int llhttp__after_headers_complete(llhttp_t* parser, const char* p, + const char* endp) { + int hasBody; + + hasBody = parser->flags & F_CHUNKED || parser->content_length > 0; + if ( + (parser->upgrade && (parser->method == HTTP_CONNECT || + (parser->flags & F_SKIPBODY) || !hasBody)) || + /* See RFC 2616 section 4.4 - 1xx e.g. Continue */ + (parser->type == HTTP_RESPONSE && parser->status_code == 101) + ) { + /* Exit, the rest of the message is in a different protocol. */ + return 1; + } + + if (parser->type == HTTP_RESPONSE && parser->status_code == 100) { + /* No body, restart as the message is complete */ + return 0; + } + + /* See RFC 2616 section 4.4 */ + if ( + parser->flags & F_SKIPBODY || /* response to a HEAD request */ + ( + parser->type == HTTP_RESPONSE && ( + parser->status_code == 102 || /* Processing */ + parser->status_code == 103 || /* Early Hints */ + parser->status_code == 204 || /* No Content */ + parser->status_code == 304 /* Not Modified */ + ) + ) + ) { + return 0; + } else if (parser->flags & F_CHUNKED) { + /* chunked encoding - ignore Content-Length header, prepare for a chunk */ + return 2; + } else if (parser->flags & F_TRANSFER_ENCODING) { + if (parser->type == HTTP_REQUEST && + (parser->lenient_flags & LENIENT_CHUNKED_LENGTH) == 0 && + (parser->lenient_flags & LENIENT_TRANSFER_ENCODING) == 0) { + /* RFC 7230 3.3.3 */ + + /* If a Transfer-Encoding header field + * is present in a request and the chunked transfer coding is not + * the final encoding, the message body length cannot be determined + * reliably; the server MUST respond with the 400 (Bad Request) + * status code and then close the connection. + */ + return 5; + } else { + /* RFC 7230 3.3.3 */ + + /* If a Transfer-Encoding header field is present in a response and + * the chunked transfer coding is not the final encoding, the + * message body length is determined by reading the connection until + * it is closed by the server. + */ + return 4; + } + } else { + if (!(parser->flags & F_CONTENT_LENGTH)) { + if (!llhttp_message_needs_eof(parser)) { + /* Assume content-length 0 - read the next */ + return 0; + } else { + /* Read body until EOF */ + return 4; + } + } else if (parser->content_length == 0) { + /* Content-Length header given but zero: Content-Length: 0\r\n */ + return 0; + } else { + /* Content-Length header given and non-zero */ + return 3; + } + } +} + + +int llhttp__after_message_complete(llhttp_t* parser, const char* p, + const char* endp) { + int should_keep_alive; + + should_keep_alive = llhttp_should_keep_alive(parser); + parser->finish = HTTP_FINISH_SAFE; + parser->flags = 0; + + /* NOTE: this is ignored in loose parsing mode */ + return should_keep_alive; +} + + +int llhttp_message_needs_eof(const llhttp_t* parser) { + if (parser->type == HTTP_REQUEST) { + return 0; + } + + /* See RFC 2616 section 4.4 */ + if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */ + parser->status_code == 204 || /* No Content */ + parser->status_code == 304 || /* Not Modified */ + (parser->flags & F_SKIPBODY)) { /* response to a HEAD request */ + return 0; + } + + /* RFC 7230 3.3.3, see `llhttp__after_headers_complete` */ + if ((parser->flags & F_TRANSFER_ENCODING) && + (parser->flags & F_CHUNKED) == 0) { + return 1; + } + + if (parser->flags & (F_CHUNKED | F_CONTENT_LENGTH)) { + return 0; + } + + return 1; +} + + +int llhttp_should_keep_alive(const llhttp_t* parser) { + if (parser->http_major > 0 && parser->http_minor > 0) { + /* HTTP/1.1 */ + if (parser->flags & F_CONNECTION_CLOSE) { + return 0; + } + } else { + /* HTTP/1.0 or earlier */ + if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) { + return 0; + } + } + + return !llhttp_message_needs_eof(parser); +} |