From 0b6210cd37b68b94252cb798598b12974a20e1c1 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Tue, 21 May 2024 22:56:19 +0200 Subject: Adding upstream version 5.28.2+dfsg1+~cs23.11.12.3. Signed-off-by: Daniel Baumann --- llparse/src/api.ts | 47 +++ llparse/src/compiler/header-builder.ts | 80 +++++ llparse/src/compiler/index.ts | 88 ++++++ llparse/src/implementation/c/code/and.ts | 11 + llparse/src/implementation/c/code/base.ts | 12 + llparse/src/implementation/c/code/external.ts | 19 ++ llparse/src/implementation/c/code/field.ts | 28 ++ llparse/src/implementation/c/code/index.ts | 27 ++ llparse/src/implementation/c/code/is-equal.ts | 10 + llparse/src/implementation/c/code/load.ts | 10 + llparse/src/implementation/c/code/mul-add.ts | 67 ++++ llparse/src/implementation/c/code/or.ts | 11 + llparse/src/implementation/c/code/store.ts | 11 + llparse/src/implementation/c/code/test.ts | 11 + llparse/src/implementation/c/code/update.ts | 11 + llparse/src/implementation/c/compilation.ts | 336 +++++++++++++++++++++ llparse/src/implementation/c/constants.ts | 45 +++ .../src/implementation/c/helpers/match-sequence.ts | 75 +++++ llparse/src/implementation/c/index.ts | 199 ++++++++++++ llparse/src/implementation/c/node/base.ts | 77 +++++ llparse/src/implementation/c/node/consume.ts | 48 +++ llparse/src/implementation/c/node/empty.ts | 16 + llparse/src/implementation/c/node/error.ts | 33 ++ llparse/src/implementation/c/node/index.ts | 27 ++ llparse/src/implementation/c/node/invoke.ts | 44 +++ llparse/src/implementation/c/node/pause.ts | 19 ++ llparse/src/implementation/c/node/sequence.ts | 55 ++++ llparse/src/implementation/c/node/single.ts | 47 +++ llparse/src/implementation/c/node/span-end.ts | 56 ++++ llparse/src/implementation/c/node/span-start.ts | 26 ++ llparse/src/implementation/c/node/table-lookup.ts | 196 ++++++++++++ llparse/src/implementation/c/transform/base.ts | 10 + llparse/src/implementation/c/transform/id.ts | 11 + llparse/src/implementation/c/transform/index.ts | 11 + .../implementation/c/transform/to-lower-unsafe.ts | 10 + llparse/src/implementation/c/transform/to-lower.ts | 11 + 36 files changed, 1795 insertions(+) create mode 100644 llparse/src/api.ts create mode 100644 llparse/src/compiler/header-builder.ts create mode 100644 llparse/src/compiler/index.ts create mode 100644 llparse/src/implementation/c/code/and.ts create mode 100644 llparse/src/implementation/c/code/base.ts create mode 100644 llparse/src/implementation/c/code/external.ts create mode 100644 llparse/src/implementation/c/code/field.ts create mode 100644 llparse/src/implementation/c/code/index.ts create mode 100644 llparse/src/implementation/c/code/is-equal.ts create mode 100644 llparse/src/implementation/c/code/load.ts create mode 100644 llparse/src/implementation/c/code/mul-add.ts create mode 100644 llparse/src/implementation/c/code/or.ts create mode 100644 llparse/src/implementation/c/code/store.ts create mode 100644 llparse/src/implementation/c/code/test.ts create mode 100644 llparse/src/implementation/c/code/update.ts create mode 100644 llparse/src/implementation/c/compilation.ts create mode 100644 llparse/src/implementation/c/constants.ts create mode 100644 llparse/src/implementation/c/helpers/match-sequence.ts create mode 100644 llparse/src/implementation/c/index.ts create mode 100644 llparse/src/implementation/c/node/base.ts create mode 100644 llparse/src/implementation/c/node/consume.ts create mode 100644 llparse/src/implementation/c/node/empty.ts create mode 100644 llparse/src/implementation/c/node/error.ts create mode 100644 llparse/src/implementation/c/node/index.ts create mode 100644 llparse/src/implementation/c/node/invoke.ts create mode 100644 llparse/src/implementation/c/node/pause.ts create mode 100644 llparse/src/implementation/c/node/sequence.ts create mode 100644 llparse/src/implementation/c/node/single.ts create mode 100644 llparse/src/implementation/c/node/span-end.ts create mode 100644 llparse/src/implementation/c/node/span-start.ts create mode 100644 llparse/src/implementation/c/node/table-lookup.ts create mode 100644 llparse/src/implementation/c/transform/base.ts create mode 100644 llparse/src/implementation/c/transform/id.ts create mode 100644 llparse/src/implementation/c/transform/index.ts create mode 100644 llparse/src/implementation/c/transform/to-lower-unsafe.ts create mode 100644 llparse/src/implementation/c/transform/to-lower.ts (limited to 'llparse/src') diff --git a/llparse/src/api.ts b/llparse/src/api.ts new file mode 100644 index 0000000..a34f5bc --- /dev/null +++ b/llparse/src/api.ts @@ -0,0 +1,47 @@ +import * as frontend from 'llparse-frontend'; + +import source = frontend.source; + +import { Compiler, ICompilerOptions, ICompilerResult } from './compiler'; + +export { source, ICompilerOptions, ICompilerResult }; + +// TODO(indutny): API for disabling/short-circuiting spans + +/** + * LLParse graph builder and compiler. + */ +export class LLParse extends source.Builder { + /** + * The prefix controls the names of methods and state struct in generated + * public C headers: + * + * ```c + * // state struct + * struct PREFIX_t { + * ... + * } + * + * int PREFIX_init(PREFIX_t* state); + * int PREFIX_execute(PREFIX_t* state, const char* p, const char* endp); + * ``` + * + * @param prefix Prefix to be used when generating public API. + */ + constructor(private readonly prefix: string = 'llparse') { + super(); + } + + /** + * Compile LLParse graph to the C code and C headers + * + * @param root Root node of the parse graph (see `.node()`) + * @param options Compiler options. + */ + public build(root: source.node.Node, options: ICompilerOptions = {}) + : ICompilerResult { + const c = new Compiler(this.prefix, options); + + return c.compile(root, this.properties); + } +} diff --git a/llparse/src/compiler/header-builder.ts b/llparse/src/compiler/header-builder.ts new file mode 100644 index 0000000..9f5bee7 --- /dev/null +++ b/llparse/src/compiler/header-builder.ts @@ -0,0 +1,80 @@ +import * as frontend from 'llparse-frontend'; +import source = frontend.source; + +export interface IHeaderBuilderOptions { + readonly prefix: string; + readonly headerGuard?: string; + readonly properties: ReadonlyArray; + readonly spans: ReadonlyArray; +} + +export class HeaderBuilder { + public build(options: IHeaderBuilderOptions): string { + let res = ''; + const PREFIX = options.prefix.toUpperCase().replace(/[^a-z]/gi, '_'); + const DEFINE = options.headerGuard === undefined ? + `INCLUDE_${PREFIX}_H_` : options.headerGuard; + + res += `#ifndef ${DEFINE}\n`; + res += `#define ${DEFINE}\n`; + res += '#ifdef __cplusplus\n'; + res += 'extern "C" {\n'; + res += '#endif\n'; + res += '\n'; + + res += '#include \n'; + res += '\n'; + + // Structure + res += `typedef struct ${options.prefix}_s ${options.prefix}_t;\n`; + res += `struct ${options.prefix}_s {\n`; + res += ' int32_t _index;\n'; + + for (const [ index, field ] of options.spans.entries()) { + res += ` void* _span_pos${index};\n`; + if (field.callbacks.length > 1) { + res += ` void* _span_cb${index};\n`; + } + } + + res += ' int32_t error;\n'; + res += ' const char* reason;\n'; + res += ' const char* error_pos;\n'; + res += ' void* data;\n'; + res += ' void* _current;\n'; + + for (const prop of options.properties) { + let ty: string; + if (prop.ty === 'i8') { + ty = 'uint8_t'; + } else if (prop.ty === 'i16') { + ty = 'uint16_t'; + } else if (prop.ty === 'i32') { + ty = 'uint32_t'; + } else if (prop.ty === 'i64') { + ty = 'uint64_t'; + } else if (prop.ty === 'ptr') { + ty = 'void*'; + } else { + throw new Error( + `Unknown state property type: "${prop.ty}"`); + } + res += ` ${ty} ${prop.name};\n`; + } + res += '};\n'; + + res += '\n'; + + res += `int ${options.prefix}_init(${options.prefix}_t* s);\n`; + res += `int ${options.prefix}_execute(${options.prefix}_t* s, ` + + 'const char* p, const char* endp);\n'; + + res += '\n'; + res += '#ifdef __cplusplus\n'; + res += '} /* extern "C" *\/\n'; + res += '#endif\n'; + res += `#endif /* ${DEFINE} *\/\n`; + + return res; + } +} diff --git a/llparse/src/compiler/index.ts b/llparse/src/compiler/index.ts new file mode 100644 index 0000000..89c258a --- /dev/null +++ b/llparse/src/compiler/index.ts @@ -0,0 +1,88 @@ +import * as debugAPI from 'debug'; +import * as frontend from 'llparse-frontend'; + +import source = frontend.source; + +import * as cImpl from '../implementation/c'; +import { HeaderBuilder } from './header-builder'; + +const debug = debugAPI('llparse:compiler'); + +export interface ICompilerOptions { + /** + * Debug method name + * + * The method must have following signature: + * + * ```c + * void debug(llparse_t* state, const char* p, const char* endp, + * const char* msg); + * ``` + * + * Where `llparse_t` is a parser state type. + */ + readonly debug?: string; + + /** + * What guard define to use in `#ifndef` in C headers. + * + * Default value: `prefix` argument + */ + readonly headerGuard?: string; + + /** Optional frontend configuration */ + readonly frontend?: frontend.IFrontendLazyOptions; + + /** Optional C-backend configuration */ + readonly c?: cImpl.ICPublicOptions; +} + +export interface ICompilerResult { + /** + * Textual C code + */ + readonly c: string; + + /** + * Textual C header file + */ + readonly header: string; +} + +export class Compiler { + constructor(public readonly prefix: string, + public readonly options: ICompilerOptions) { + } + + public compile(root: source.node.Node, + properties: ReadonlyArray): ICompilerResult { + debug('Combining implementations'); + const container = new frontend.Container(); + + const c = new cImpl.CCompiler(container, Object.assign({ + debug: this.options.debug, + }, this.options.c)); + + debug('Running frontend pass'); + const f = new frontend.Frontend(this.prefix, + container.build(), + this.options.frontend); + const info = f.compile(root, properties); + + debug('Building header'); + const hb = new HeaderBuilder(); + + const header = hb.build({ + headerGuard: this.options.headerGuard, + prefix: this.prefix, + properties, + spans: info.spans, + }); + + debug('Building C'); + return { + header, + c: c.compile(info), + }; + } +} diff --git a/llparse/src/implementation/c/code/and.ts b/llparse/src/implementation/c/code/and.ts new file mode 100644 index 0000000..fdd5434 --- /dev/null +++ b/llparse/src/implementation/c/code/and.ts @@ -0,0 +1,11 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Field } from './field'; + +export class And extends Field { + protected doBuild(ctx: Compilation, out: string[]): void { + out.push(`${this.field(ctx)} &= ${this.ref.value};`); + out.push('return 0;'); + } +} diff --git a/llparse/src/implementation/c/code/base.ts b/llparse/src/implementation/c/code/base.ts new file mode 100644 index 0000000..888330d --- /dev/null +++ b/llparse/src/implementation/c/code/base.ts @@ -0,0 +1,12 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; + +export abstract class Code { + protected cachedDecl: string | undefined; + + constructor(public readonly ref: T) { + } + + public abstract build(ctx: Compilation, out: string[]): void; +} diff --git a/llparse/src/implementation/c/code/external.ts b/llparse/src/implementation/c/code/external.ts new file mode 100644 index 0000000..494fc5a --- /dev/null +++ b/llparse/src/implementation/c/code/external.ts @@ -0,0 +1,19 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Code } from './base'; + +export abstract class External + extends Code { + + public build(ctx: Compilation, out: string[]): void { + out.push(`int ${this.ref.name}(`); + out.push(` ${ctx.prefix}_t* s, const unsigned char* p,`); + if (this.ref.signature === 'value') { + out.push(' const unsigned char* endp,'); + out.push(' int value);'); + } else { + out.push(' const unsigned char* endp);'); + } + } +} diff --git a/llparse/src/implementation/c/code/field.ts b/llparse/src/implementation/c/code/field.ts new file mode 100644 index 0000000..51f4439 --- /dev/null +++ b/llparse/src/implementation/c/code/field.ts @@ -0,0 +1,28 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Code } from './base'; + +export abstract class Field extends Code { + public build(ctx: Compilation, out: string[]): void { + out.push(`int ${this.ref.name}(`); + out.push(` ${ctx.prefix}_t* ${ctx.stateArg()},`); + out.push(` const unsigned char* ${ctx.posArg()},`); + if (this.ref.signature === 'value') { + out.push(` const unsigned char* ${ctx.endPosArg()},`); + out.push(` int ${ctx.matchVar()}) {`); + } else { + out.push(` const unsigned char* ${ctx.endPosArg()}) {`); + } + const tmp: string[] = []; + this.doBuild(ctx, tmp); + ctx.indent(out, tmp, ' '); + out.push('}'); + } + + protected abstract doBuild(ctx: Compilation, out: string[]): void; + + protected field(ctx: Compilation): string { + return `${ctx.stateArg()}->${this.ref.field}`; + } +} diff --git a/llparse/src/implementation/c/code/index.ts b/llparse/src/implementation/c/code/index.ts new file mode 100644 index 0000000..0de5de5 --- /dev/null +++ b/llparse/src/implementation/c/code/index.ts @@ -0,0 +1,27 @@ +import * as frontend from 'llparse-frontend'; + +import { And } from './and'; +import { External } from './external'; +import { IsEqual } from './is-equal'; +import { Load } from './load'; +import { MulAdd } from './mul-add'; +import { Or } from './or'; +import { Store } from './store'; +import { Test } from './test'; +import { Update } from './update'; + +export * from './base'; + +export default { + And, + IsEqual, + Load, + Match: class Match extends External {}, + MulAdd, + Or, + Span: class Span extends External {}, + Store, + Test, + Update, + Value: class Value extends External {}, +}; diff --git a/llparse/src/implementation/c/code/is-equal.ts b/llparse/src/implementation/c/code/is-equal.ts new file mode 100644 index 0000000..f76c2c1 --- /dev/null +++ b/llparse/src/implementation/c/code/is-equal.ts @@ -0,0 +1,10 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Field } from './field'; + +export class IsEqual extends Field { + protected doBuild(ctx: Compilation, out: string[]): void { + out.push(`return ${this.field(ctx)} == ${this.ref.value};`); + } +} diff --git a/llparse/src/implementation/c/code/load.ts b/llparse/src/implementation/c/code/load.ts new file mode 100644 index 0000000..b913f23 --- /dev/null +++ b/llparse/src/implementation/c/code/load.ts @@ -0,0 +1,10 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Field } from './field'; + +export class Load extends Field { + protected doBuild(ctx: Compilation, out: string[]): void { + out.push(`return ${this.field(ctx)};`); + } +} diff --git a/llparse/src/implementation/c/code/mul-add.ts b/llparse/src/implementation/c/code/mul-add.ts new file mode 100644 index 0000000..fd5ce8c --- /dev/null +++ b/llparse/src/implementation/c/code/mul-add.ts @@ -0,0 +1,67 @@ +import * as assert from 'assert'; +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { SIGNED_LIMITS, UNSIGNED_LIMITS, SIGNED_TYPES } from '../constants'; +import { Field } from './field'; + +export class MulAdd extends Field { + protected doBuild(ctx: Compilation, out: string[]): void { + const options = this.ref.options; + const ty = ctx.getFieldType(this.ref.field); + + let field = this.field(ctx); + if (options.signed) { + assert(SIGNED_TYPES.has(ty), `Unexpected mulAdd type "${ty}"`); + const targetTy = SIGNED_TYPES.get(ty)!; + out.push(`${targetTy}* field = (${targetTy}*) &${field};`); + field = '(*field)'; + } + + const match = ctx.matchVar(); + + const limits = options.signed ? SIGNED_LIMITS : UNSIGNED_LIMITS; + assert(limits.has(ty), `Unexpected mulAdd type "${ty}"`); + const [ min, max ] = limits.get(ty)!; + + const mulMax = `${max} / ${options.base}`; + const mulMin = `${min} / ${options.base}`; + + out.push('/* Multiplication overflow */'); + out.push(`if (${field} > ${mulMax}) {`); + out.push(' return 1;'); + out.push('}'); + if (options.signed) { + out.push(`if (${field} < ${mulMin}) {`); + out.push(' return 1;'); + out.push('}'); + } + out.push(''); + + out.push(`${field} *= ${options.base};`); + out.push(''); + + out.push('/* Addition overflow */'); + out.push(`if (${match} >= 0) {`); + out.push(` if (${field} > ${max} - ${match}) {`); + out.push(' return 1;'); + out.push(' }'); + out.push('} else {'); + out.push(` if (${field} < ${min} - ${match}) {`); + out.push(' return 1;'); + out.push(' }'); + out.push('}'); + + out.push(`${field} += ${match};`); + + if (options.max !== undefined) { + out.push(''); + out.push('/* Enforce maximum */'); + out.push(`if (${field} > ${options.max}) {`); + out.push(' return 1;'); + out.push('}'); + } + + out.push('return 0;'); + } +} diff --git a/llparse/src/implementation/c/code/or.ts b/llparse/src/implementation/c/code/or.ts new file mode 100644 index 0000000..76b16f9 --- /dev/null +++ b/llparse/src/implementation/c/code/or.ts @@ -0,0 +1,11 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Field } from './field'; + +export class Or extends Field { + protected doBuild(ctx: Compilation, out: string[]): void { + out.push(`${this.field(ctx)} |= ${this.ref.value};`); + out.push('return 0;'); + } +} diff --git a/llparse/src/implementation/c/code/store.ts b/llparse/src/implementation/c/code/store.ts new file mode 100644 index 0000000..a37d963 --- /dev/null +++ b/llparse/src/implementation/c/code/store.ts @@ -0,0 +1,11 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Field } from './field'; + +export class Store extends Field { + protected doBuild(ctx: Compilation, out: string[]): void { + out.push(`${this.field(ctx)} = ${ctx.matchVar()};`); + out.push('return 0;'); + } +} diff --git a/llparse/src/implementation/c/code/test.ts b/llparse/src/implementation/c/code/test.ts new file mode 100644 index 0000000..36126f5 --- /dev/null +++ b/llparse/src/implementation/c/code/test.ts @@ -0,0 +1,11 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Field } from './field'; + +export class Test extends Field { + protected doBuild(ctx: Compilation, out: string[]): void { + const value = this.ref.value; + out.push(`return (${this.field(ctx)} & ${value}) == ${value};`); + } +} diff --git a/llparse/src/implementation/c/code/update.ts b/llparse/src/implementation/c/code/update.ts new file mode 100644 index 0000000..89efedf --- /dev/null +++ b/llparse/src/implementation/c/code/update.ts @@ -0,0 +1,11 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Field } from './field'; + +export class Update extends Field { + protected doBuild(ctx: Compilation, out: string[]): void { + out.push(`${this.field(ctx)} = ${this.ref.value};`); + out.push('return 0;'); + } +} diff --git a/llparse/src/implementation/c/compilation.ts b/llparse/src/implementation/c/compilation.ts new file mode 100644 index 0000000..4df05a6 --- /dev/null +++ b/llparse/src/implementation/c/compilation.ts @@ -0,0 +1,336 @@ +import * as assert from 'assert'; +import { Buffer } from 'buffer'; +import * as frontend from 'llparse-frontend'; + +import { + CONTAINER_KEY, STATE_ERROR, + ARG_STATE, ARG_POS, ARG_ENDPOS, + VAR_MATCH, + STATE_PREFIX, LABEL_PREFIX, BLOB_PREFIX, + SEQUENCE_COMPLETE, SEQUENCE_MISMATCH, SEQUENCE_PAUSE, +} from './constants'; +import { Code } from './code'; +import { Node } from './node'; +import { Transform } from './transform'; +import { MatchSequence } from './helpers/match-sequence'; + +// Number of hex words per line of blob declaration +const BLOB_GROUP_SIZE = 11; + +type WrappedNode = frontend.IWrap; + +interface IBlob { + readonly alignment: number | undefined; + readonly buffer: Buffer; + readonly name: string; +} + +// TODO(indutny): deduplicate +export interface ICompilationOptions { + readonly debug?: string; +} + +// TODO(indutny): deduplicate +export interface ICompilationProperty { + readonly name: string; + readonly ty: string; +} + +export class Compilation { + private readonly stateMap: Map> = new Map(); + private readonly blobs: Map = new Map(); + private readonly codeMap: Map> = new Map(); + private readonly matchSequence: + Map = new Map(); + private readonly resumptionTargets: Set = new Set(); + + constructor(public readonly prefix: string, + private readonly properties: ReadonlyArray, + resumptionTargets: ReadonlySet, + private readonly options: ICompilationOptions) { + for (const node of resumptionTargets) { + this.resumptionTargets.add(STATE_PREFIX + node.ref.id.name); + } + } + + private buildStateEnum(out: string[]): void { + out.push('enum llparse_state_e {'); + out.push(` ${STATE_ERROR},`); + for (const stateName of this.stateMap.keys()) { + if (this.resumptionTargets.has(stateName)) { + out.push(` ${stateName},`); + } + } + out.push('};'); + out.push('typedef enum llparse_state_e llparse_state_t;'); + } + + private buildBlobs(out: string[]): void { + if (this.blobs.size === 0) { + return; + } + + for (const blob of this.blobs.values()) { + const buffer = blob.buffer; + let align = ''; + if (blob.alignment) { + align = ` ALIGN(${blob.alignment})`; + } + + if (blob.alignment) { + out.push('#ifdef __SSE4_2__'); + } + out.push(`static const unsigned char${align} ${blob.name}[] = {`); + + for (let i = 0; i < buffer.length; i += BLOB_GROUP_SIZE) { + const limit = Math.min(buffer.length, i + BLOB_GROUP_SIZE); + const hex: string[] = []; + for (let j = i; j < limit; j++) { + const value = buffer[j] as number; + + const ch = String.fromCharCode(value); + // `'`, `\` + if (value === 0x27 || value === 0x5c) { + hex.push(`'\\${ch}'`); + } else if (value >= 0x20 && value <= 0x7e) { + hex.push(`'${ch}'`); + } else { + hex.push(`0x${value.toString(16)}`); + } + } + let line = ' ' + hex.join(', '); + if (limit !== buffer.length) { + line += ','; + } + out.push(line); + } + + out.push(`};`); + if (blob.alignment) { + out.push('#endif /* __SSE4_2__ */'); + } + } + out.push(''); + } + + private buildMatchSequence(out: string[]): void { + if (this.matchSequence.size === 0) { + return; + } + + MatchSequence.buildGlobals(out); + out.push(''); + + for (const match of this.matchSequence.values()) { + match.build(this, out); + out.push(''); + } + } + + public reserveSpans(spans: ReadonlyArray): void { + for (const span of spans) { + for (const callback of span.callbacks) { + this.buildCode(this.unwrapCode(callback)); + } + } + } + + public debug(out: string[], message: string): void { + if (this.options.debug === undefined) { + return; + } + + const args = [ + this.stateArg(), + `(const char*) ${this.posArg()}`, + `(const char*) ${this.endPosArg()}`, + ]; + + out.push(`${this.options.debug}(${args.join(', ')},`); + out.push(` ${this.cstring(message)});`); + } + + public buildGlobals(out: string[]): void { + if (this.options.debug !== undefined) { + out.push(`void ${this.options.debug}(`); + out.push(` ${this.prefix}_t* s, const char* p, const char* endp,`); + out.push(' const char* msg);'); + } + + this.buildBlobs(out); + this.buildMatchSequence(out); + this.buildStateEnum(out); + + for (const code of this.codeMap.values()) { + out.push(''); + code.build(this, out); + } + } + + public buildResumptionStates(out: string[]): void { + this.stateMap.forEach((lines, name) => { + if (!this.resumptionTargets.has(name)) { + return; + } + out.push(`case ${name}:`); + out.push(`${LABEL_PREFIX}${name}: {`); + lines.forEach((line) => out.push(` ${line}`)); + out.push(' /* UNREACHABLE */;'); + out.push(' abort();'); + out.push('}'); + }); + } + + public buildInternalStates(out: string[]): void { + this.stateMap.forEach((lines, name) => { + if (this.resumptionTargets.has(name)) { + return; + } + out.push(`${LABEL_PREFIX}${name}: {`); + lines.forEach((line) => out.push(` ${line}`)); + out.push(' /* UNREACHABLE */;'); + out.push(' abort();'); + out.push('}'); + }); + } + + public addState(state: string, lines: ReadonlyArray): void { + assert(!this.stateMap.has(state)); + this.stateMap.set(state, lines); + } + + public buildCode(code: Code): string { + if (this.codeMap.has(code.ref.name)) { + assert.strictEqual(this.codeMap.get(code.ref.name)!, code, + `Code name conflict for "${code.ref.name}"`); + } else { + this.codeMap.set(code.ref.name, code); + } + return code.ref.name; + } + + public getFieldType(field: string): string { + for (const property of this.properties) { + if (property.name === field) { + return property.ty; + } + } + throw new Error(`Field "${field}" not found`); + } + + // Helpers + + public unwrapCode(code: frontend.IWrap) + : Code { + const container = code as frontend.ContainerWrap; + return container.get(CONTAINER_KEY); + } + + public unwrapNode(node: WrappedNode): Node { + const container = node as frontend.ContainerWrap; + return container.get(CONTAINER_KEY); + } + + public unwrapTransform(node: frontend.IWrap) + : Transform { + const container = + node as frontend.ContainerWrap; + return container.get(CONTAINER_KEY); + } + + public indent(out: string[], lines: ReadonlyArray, pad: string) { + for (const line of lines) { + out.push(`${pad}${line}`); + } + } + + // MatchSequence cache + + public getMatchSequence( + transform: frontend.IWrap, select: Buffer) + : string { + const wrap = this.unwrapTransform(transform); + + let res: MatchSequence; + if (this.matchSequence.has(wrap.ref.name)) { + res = this.matchSequence.get(wrap.ref.name)!; + } else { + res = new MatchSequence(wrap); + this.matchSequence.set(wrap.ref.name, res); + } + + return res.getName(); + } + + // Arguments + + public stateArg(): string { + return ARG_STATE; + } + + public posArg(): string { + return ARG_POS; + } + + public endPosArg(): string { + return ARG_ENDPOS; + } + + public matchVar(): string { + return VAR_MATCH; + } + + // State fields + + public indexField(): string { + return this.stateField('_index'); + } + + public currentField(): string { + return this.stateField('_current'); + } + + public errorField(): string { + return this.stateField('error'); + } + + public reasonField(): string { + return this.stateField('reason'); + } + + public errorPosField(): string { + return this.stateField('error_pos'); + } + + public spanPosField(index: number): string { + return this.stateField(`_span_pos${index}`); + } + + public spanCbField(index: number): string { + return this.stateField(`_span_cb${index}`); + } + + public stateField(name: string): string { + return `${this.stateArg()}->${name}`; + } + + // Globals + + public cstring(value: string): string { + return JSON.stringify(value); + } + + public blob(value: Buffer, alignment?: number): string { + if (this.blobs.has(value)) { + return this.blobs.get(value)!.name; + } + + const res = BLOB_PREFIX + this.blobs.size; + this.blobs.set(value, { + alignment, + buffer: value, + name: res, + }); + return res; + } +} diff --git a/llparse/src/implementation/c/constants.ts b/llparse/src/implementation/c/constants.ts new file mode 100644 index 0000000..bfd5be3 --- /dev/null +++ b/llparse/src/implementation/c/constants.ts @@ -0,0 +1,45 @@ +export const CONTAINER_KEY = 'c'; + +export const LABEL_PREFIX = ''; +export const STATE_PREFIX = 's_n_'; +export const STATE_ERROR = 's_error'; + +export const BLOB_PREFIX = 'llparse_blob'; + +export const ARG_STATE = 'state'; +export const ARG_POS = 'p'; +export const ARG_ENDPOS = 'endp'; + +export const VAR_MATCH = 'match'; + +// MatchSequence + +export const SEQUENCE_COMPLETE = 'kMatchComplete'; +export const SEQUENCE_MISMATCH = 'kMatchMismatch'; +export const SEQUENCE_PAUSE = 'kMatchPause'; + +export const SIGNED_LIMITS: Map = new Map(); +SIGNED_LIMITS.set('i8', [ '-0x80', '0x7f' ]); +SIGNED_LIMITS.set('i16', [ '-0x8000', '0x7fff' ]); +SIGNED_LIMITS.set('i32', [ '(-0x7fffffff - 1)', '0x7fffffff' ]); +SIGNED_LIMITS.set('i64', [ '(-0x7fffffffffffffffLL - 1)', + '0x7fffffffffffffffLL' ]); + +export const UNSIGNED_LIMITS: Map = new Map(); +UNSIGNED_LIMITS.set('i8', [ '0', '0xff' ]); +UNSIGNED_LIMITS.set('i8', [ '0', '0xff' ]); +UNSIGNED_LIMITS.set('i16', [ '0', '0xffff' ]); +UNSIGNED_LIMITS.set('i32', [ '0', '0xffffffff' ]); +UNSIGNED_LIMITS.set('i64', [ '0ULL', '0xffffffffffffffffULL' ]); + +export const UNSIGNED_TYPES: Map = new Map(); +UNSIGNED_TYPES.set('i8', 'uint8_t'); +UNSIGNED_TYPES.set('i16', 'uint16_t'); +UNSIGNED_TYPES.set('i32', 'uint32_t'); +UNSIGNED_TYPES.set('i64', 'uint64_t'); + +export const SIGNED_TYPES: Map = new Map(); +SIGNED_TYPES.set('i8', 'int8_t'); +SIGNED_TYPES.set('i16', 'int16_t'); +SIGNED_TYPES.set('i32', 'int32_t'); +SIGNED_TYPES.set('i64', 'int64_t'); diff --git a/llparse/src/implementation/c/helpers/match-sequence.ts b/llparse/src/implementation/c/helpers/match-sequence.ts new file mode 100644 index 0000000..278f4b5 --- /dev/null +++ b/llparse/src/implementation/c/helpers/match-sequence.ts @@ -0,0 +1,75 @@ +import * as assert from 'assert'; +import { Buffer } from 'buffer'; +import * as frontend from 'llparse-frontend'; + +import { + SEQUENCE_COMPLETE, SEQUENCE_MISMATCH, SEQUENCE_PAUSE, +} from '../constants'; +import { Transform } from '../transform'; +import { Compilation } from '../compilation'; + +type TransformWrap = Transform; + +export class MatchSequence { + constructor(private readonly transform: TransformWrap) { + } + + public static buildGlobals(out: string[]): void { + out.push('enum llparse_match_status_e {'); + out.push(` ${SEQUENCE_COMPLETE},`); + out.push(` ${SEQUENCE_PAUSE},`); + out.push(` ${SEQUENCE_MISMATCH}`); + out.push('};'); + out.push('typedef enum llparse_match_status_e llparse_match_status_t;'); + out.push(''); + out.push('struct llparse_match_s {'); + out.push(' llparse_match_status_t status;'); + out.push(' const unsigned char* current;'); + out.push('};'); + out.push('typedef struct llparse_match_s llparse_match_t;'); + } + + public getName(): string { + return `llparse__match_sequence_${this.transform.ref.name}`; + } + + public build(ctx: Compilation, out: string[]): void { + out.push(`static llparse_match_t ${this.getName()}(`); + out.push(` ${ctx.prefix}_t* s, const unsigned char* p,`); + out.push(' const unsigned char* endp,'); + out.push(' const unsigned char* seq, uint32_t seq_len) {'); + + // Vars + out.push(' uint32_t index;'); + out.push(' llparse_match_t res;'); + out.push(''); + + // Body + out.push(' index = s->_index;'); + out.push(' for (; p != endp; p++) {'); + out.push(' unsigned char current;'); + out.push(''); + out.push(` current = ${this.transform.build(ctx, '*p')};`); + out.push(' if (current == seq[index]) {'); + out.push(' if (++index == seq_len) {'); + out.push(` res.status = ${SEQUENCE_COMPLETE};`); + out.push(' goto reset;'); + out.push(' }'); + out.push(' } else {'); + out.push(` res.status = ${SEQUENCE_MISMATCH};`); + out.push(' goto reset;'); + out.push(' }'); + out.push(' }'); + + out.push(' s->_index = index;'); + out.push(` res.status = ${SEQUENCE_PAUSE};`); + out.push(' res.current = p;'); + out.push(' return res;'); + + out.push('reset:'); + out.push(' s->_index = 0;'); + out.push(' res.current = p;'); + out.push(' return res;'); + out.push('}'); + } +} diff --git a/llparse/src/implementation/c/index.ts b/llparse/src/implementation/c/index.ts new file mode 100644 index 0000000..ae94d34 --- /dev/null +++ b/llparse/src/implementation/c/index.ts @@ -0,0 +1,199 @@ +import * as frontend from 'llparse-frontend'; + +import { + ARG_STATE, ARG_POS, ARG_ENDPOS, + STATE_ERROR, + VAR_MATCH, + CONTAINER_KEY, +} from './constants'; +import { Compilation } from './compilation'; +import code from './code'; +import node from './node'; +import { Node } from './node'; +import transform from './transform'; + +export interface ICCompilerOptions { + readonly debug?: string; + readonly header?: string; +} + +export interface ICPublicOptions { + readonly header?: string; +} + +export class CCompiler { + constructor(container: frontend.Container, + public readonly options: ICCompilerOptions) { + container.add(CONTAINER_KEY, { code, node, transform }); + } + + public compile(info: frontend.IFrontendResult): string { + const compilation = new Compilation(info.prefix, info.properties, + info.resumptionTargets, this.options); + const out: string[] = []; + + out.push('#include '); + out.push('#include '); + out.push('#include '); + out.push(''); + + // NOTE: Inspired by https://github.com/h2o/picohttpparser + // TODO(indutny): Windows support for SSE4.2. + // See: https://github.com/nodejs/llparse/pull/24#discussion_r299789676 + // (There is no `__SSE4_2__` define for MSVC) + out.push('#ifdef __SSE4_2__'); + out.push(' #ifdef _MSC_VER'); + out.push(' #include '); + out.push(' #else /* !_MSC_VER */'); + out.push(' #include '); + out.push(' #endif /* _MSC_VER */'); + out.push('#endif /* __SSE4_2__ */'); + out.push(''); + + out.push('#ifdef _MSC_VER'); + out.push(' #define ALIGN(n) _declspec(align(n))'); + out.push('#else /* !_MSC_VER */'); + out.push(' #define ALIGN(n) __attribute__((aligned(n)))'); + out.push('#endif /* _MSC_VER */'); + + out.push(''); + out.push(`#include "${this.options.header || info.prefix}.h"`); + out.push(``); + out.push(`typedef int (*${info.prefix}__span_cb)(`); + out.push(` ${info.prefix}_t*, const char*, const char*);`); + out.push(''); + + // Queue span callbacks to be built before `executeSpans()` code gets called + // below. + compilation.reserveSpans(info.spans); + + const root = info.root as frontend.ContainerWrap; + const rootState = root.get>(CONTAINER_KEY) + .build(compilation); + + compilation.buildGlobals(out); + out.push(''); + + out.push(`int ${info.prefix}_init(${info.prefix}_t* ${ARG_STATE}) {`); + out.push(` memset(${ARG_STATE}, 0, sizeof(*${ARG_STATE}));`); + out.push(` ${ARG_STATE}->_current = (void*) (intptr_t) ${rootState};`); + out.push(' return 0;'); + out.push('}'); + out.push(''); + + out.push(`static llparse_state_t ${info.prefix}__run(`); + out.push(` ${info.prefix}_t* ${ARG_STATE},`); + out.push(` const unsigned char* ${ARG_POS},`); + out.push(` const unsigned char* ${ARG_ENDPOS}) {`); + out.push(` int ${VAR_MATCH};`); + out.push(` switch ((llparse_state_t) (intptr_t) ` + + `${compilation.currentField()}) {`); + + let tmp: string[] = []; + compilation.buildResumptionStates(tmp); + compilation.indent(out, tmp, ' '); + + out.push(' default:'); + out.push(' /* UNREACHABLE */'); + out.push(' abort();'); + out.push(' }'); + + tmp = []; + compilation.buildInternalStates(tmp); + compilation.indent(out, tmp, ' '); + + out.push('}'); + out.push(''); + + + out.push(`int ${info.prefix}_execute(${info.prefix}_t* ${ARG_STATE}, ` + + `const char* ${ARG_POS}, const char* ${ARG_ENDPOS}) {`); + out.push(' llparse_state_t next;'); + out.push(''); + + out.push(' /* check lingering errors */'); + out.push(` if (${compilation.errorField()} != 0) {`); + out.push(` return ${compilation.errorField()};`); + out.push(' }'); + out.push(''); + + tmp = []; + this.restartSpans(compilation, info, tmp); + compilation.indent(out, tmp, ' '); + + const args = [ + compilation.stateArg(), + `(const unsigned char*) ${compilation.posArg()}`, + `(const unsigned char*) ${compilation.endPosArg()}`, + ]; + out.push(` next = ${info.prefix}__run(${args.join(', ')});`); + out.push(` if (next == ${STATE_ERROR}) {`); + out.push(` return ${compilation.errorField()};`); + out.push(' }'); + out.push(` ${compilation.currentField()} = (void*) (intptr_t) next;`); + out.push(''); + + tmp = []; + this.executeSpans(compilation, info, tmp); + compilation.indent(out, tmp, ' '); + + out.push(' return 0;'); + out.push('}'); + + return out.join('\n'); + } + + private restartSpans(ctx: Compilation, info: frontend.IFrontendResult, + out: string[]): void { + if (info.spans.length === 0) { + return; + } + + out.push('/* restart spans */'); + for (const span of info.spans) { + const posField = ctx.spanPosField(span.index); + + out.push(`if (${posField} != NULL) {`); + out.push(` ${posField} = (void*) ${ctx.posArg()};`); + out.push('}'); + } + out.push(''); + } + + private executeSpans(ctx: Compilation, info: frontend.IFrontendResult, + out: string[]): void { + if (info.spans.length === 0) { + return; + } + + out.push('/* execute spans */'); + for (const span of info.spans) { + const posField = ctx.spanPosField(span.index); + let callback: string; + if (span.callbacks.length === 1) { + callback = ctx.buildCode(ctx.unwrapCode(span.callbacks[0])); + } else { + callback = `(${info.prefix}__span_cb) ` + ctx.spanCbField(span.index); + callback = `(${callback})`; + } + + const args = [ + ctx.stateArg(), posField, `(const char*) ${ctx.endPosArg()}`, + ]; + + out.push(`if (${posField} != NULL) {`); + out.push(' int error;'); + out.push(''); + out.push(` error = ${callback}(${args.join(', ')});`); + + // TODO(indutny): de-duplicate this here and in SpanEnd + out.push(' if (error != 0) {'); + out.push(` ${ctx.errorField()} = error;`); + out.push(` ${ctx.errorPosField()} = ${ctx.endPosArg()};`); + out.push(' return error;'); + out.push(' }'); + out.push('}'); + } + out.push(''); + } +} diff --git a/llparse/src/implementation/c/node/base.ts b/llparse/src/implementation/c/node/base.ts new file mode 100644 index 0000000..51f90bb --- /dev/null +++ b/llparse/src/implementation/c/node/base.ts @@ -0,0 +1,77 @@ +import * as assert from 'assert'; +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { + STATE_PREFIX, LABEL_PREFIX, +} from '../constants'; + +export interface INodeEdge { + readonly node: frontend.IWrap; + readonly noAdvance: boolean; + readonly value?: number; +} + +export abstract class Node { + protected cachedDecl: string | undefined; + protected privCompilation: Compilation | undefined; + + constructor(public readonly ref: T) { + } + + public build(compilation: Compilation): string { + if (this.cachedDecl !== undefined) { + return this.cachedDecl; + } + + const res = STATE_PREFIX + this.ref.id.name; + this.cachedDecl = res; + + this.privCompilation = compilation; + + const out: string[] = []; + compilation.debug(out, + `Entering node "${this.ref.id.originalName}" ("${this.ref.id.name}")`); + this.doBuild(out); + + compilation.addState(res, out); + + return res; + } + + protected get compilation(): Compilation { + assert(this.privCompilation !== undefined); + return this.privCompilation!; + } + + protected prologue(out: string[]): void { + const ctx = this.compilation; + + out.push(`if (${ctx.posArg()} == ${ctx.endPosArg()}) {`); + + const tmp: string[] = []; + this.pause(tmp); + this.compilation.indent(out, tmp, ' '); + + out.push('}'); + } + + protected pause(out: string[]): void { + out.push(`return ${this.cachedDecl};`); + } + + protected tailTo(out: string[], edge: INodeEdge): void { + const ctx = this.compilation; + const target = ctx.unwrapNode(edge.node).build(ctx); + + if (!edge.noAdvance) { + out.push(`${ctx.posArg()}++;`); + } + if (edge.value !== undefined) { + out.push(`${ctx.matchVar()} = ${edge.value};`); + } + out.push(`goto ${LABEL_PREFIX}${target};`); + } + + protected abstract doBuild(out: string[]): void; +} diff --git a/llparse/src/implementation/c/node/consume.ts b/llparse/src/implementation/c/node/consume.ts new file mode 100644 index 0000000..658a00e --- /dev/null +++ b/llparse/src/implementation/c/node/consume.ts @@ -0,0 +1,48 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Node } from './base'; + +export class Consume extends Node { + public doBuild(out: string[]): void { + const ctx = this.compilation; + + const index = ctx.stateField(this.ref.field); + const ty = ctx.getFieldType(this.ref.field); + + let fieldTy: string; + if (ty === 'i64') { + fieldTy = 'uint64_t'; + } else if (ty === 'i32') { + fieldTy = 'uint32_t'; + } else if (ty === 'i16') { + fieldTy = 'uint16_t'; + } else if (ty === 'i8') { + fieldTy = 'uint8_t'; + } else { + throw new Error( + `Unsupported type ${ty} of field ${this.ref.field} for consume node`); + } + + out.push('size_t avail;'); + out.push(`${fieldTy} need;`); + + out.push(''); + out.push(`avail = ${ctx.endPosArg()} - ${ctx.posArg()};`); + out.push(`need = ${index};`); + + // Note: `avail` or `need` are going to coerced to the largest + // datatype needed to hold either of the values. + out.push('if (avail >= need) {'); + out.push(` p += need;`); + out.push(` ${index} = 0;`); + const tmp: string[] = []; + this.tailTo(tmp, this.ref.otherwise!); + ctx.indent(out, tmp, ' '); + out.push('}'); + out.push(''); + + out.push(`${index} -= avail;`); + this.pause(out); + } +} diff --git a/llparse/src/implementation/c/node/empty.ts b/llparse/src/implementation/c/node/empty.ts new file mode 100644 index 0000000..e28ecb5 --- /dev/null +++ b/llparse/src/implementation/c/node/empty.ts @@ -0,0 +1,16 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Node } from './base'; + +export class Empty extends Node { + public doBuild(out: string[]): void { + const otherwise = this.ref.otherwise!; + + if (!otherwise.noAdvance) { + this.prologue(out); + } + + this.tailTo(out, otherwise); + } +} diff --git a/llparse/src/implementation/c/node/error.ts b/llparse/src/implementation/c/node/error.ts new file mode 100644 index 0000000..29dce63 --- /dev/null +++ b/llparse/src/implementation/c/node/error.ts @@ -0,0 +1,33 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { STATE_ERROR } from '../constants'; +import { Node } from './base'; + +class ErrorNode extends Node { + protected storeError(out: string[]): void { + const ctx = this.compilation; + + let hexCode: string; + if (this.ref.code < 0) { + hexCode = `-0x` + this.ref.code.toString(16); + } else { + hexCode = '0x' + this.ref.code.toString(16); + } + + out.push(`${ctx.errorField()} = ${hexCode};`); + out.push(`${ctx.reasonField()} = ${ctx.cstring(this.ref.reason)};`); + out.push(`${ctx.errorPosField()} = (const char*) ${ctx.posArg()};`); + } + + public doBuild(out: string[]): void { + this.storeError(out); + + // Non-recoverable state + out.push(`${this.compilation.currentField()} = ` + + `(void*) (intptr_t) ${STATE_ERROR};`); + out.push(`return ${STATE_ERROR};`); + } +} + +export { ErrorNode as Error }; diff --git a/llparse/src/implementation/c/node/index.ts b/llparse/src/implementation/c/node/index.ts new file mode 100644 index 0000000..ba751d9 --- /dev/null +++ b/llparse/src/implementation/c/node/index.ts @@ -0,0 +1,27 @@ +import * as frontend from 'llparse-frontend'; + +import { Consume } from './consume'; +import { Empty } from './empty'; +import { Error as ErrorNode } from './error'; +import { Invoke } from './invoke'; +import { Pause } from './pause'; +import { Sequence } from './sequence'; +import { Single } from './single'; +import { SpanEnd } from './span-end'; +import { SpanStart } from './span-start'; +import { TableLookup } from './table-lookup'; + +export { Node } from './base'; + +export default { + Consume, + Empty, + Error: class Error extends ErrorNode {}, + Invoke, + Pause, + Sequence, + Single, + SpanEnd, + SpanStart, + TableLookup, +}; diff --git a/llparse/src/implementation/c/node/invoke.ts b/llparse/src/implementation/c/node/invoke.ts new file mode 100644 index 0000000..ee917e9 --- /dev/null +++ b/llparse/src/implementation/c/node/invoke.ts @@ -0,0 +1,44 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Node } from './base'; + +export class Invoke extends Node { + public doBuild(out: string[]): void { + const ctx = this.compilation; + + const code = ctx.unwrapCode(this.ref.code); + const codeDecl = ctx.buildCode(code); + + const args: string[] = [ + ctx.stateArg(), + ctx.posArg(), + ctx.endPosArg(), + ]; + + const signature = code.ref.signature; + if (signature === 'value') { + args.push(ctx.matchVar()); + } + + out.push(`switch (${codeDecl}(${args.join(', ')})) {`); + let tmp: string[]; + + for (const edge of this.ref.edges) { + out.push(` case ${edge.code}:`); + tmp = []; + this.tailTo(tmp, { + noAdvance: true, + node: edge.node, + value: undefined, + }); + ctx.indent(out, tmp, ' '); + } + + out.push(' default:'); + tmp = []; + this.tailTo(tmp, this.ref.otherwise!); + ctx.indent(out, tmp, ' '); + out.push('}'); + } +} diff --git a/llparse/src/implementation/c/node/pause.ts b/llparse/src/implementation/c/node/pause.ts new file mode 100644 index 0000000..c239b46 --- /dev/null +++ b/llparse/src/implementation/c/node/pause.ts @@ -0,0 +1,19 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { STATE_ERROR } from '../constants'; +import { Error as ErrorNode } from './error'; + +export class Pause extends ErrorNode { + public doBuild(out: string[]): void { + const ctx = this.compilation; + + this.storeError(out); + + // Recoverable state + const otherwise = ctx.unwrapNode(this.ref.otherwise!.node).build(ctx); + out.push(`${ctx.currentField()} = ` + + `(void*) (intptr_t) ${otherwise};`); + out.push(`return ${STATE_ERROR};`); + } +} diff --git a/llparse/src/implementation/c/node/sequence.ts b/llparse/src/implementation/c/node/sequence.ts new file mode 100644 index 0000000..73d8816 --- /dev/null +++ b/llparse/src/implementation/c/node/sequence.ts @@ -0,0 +1,55 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { + SEQUENCE_COMPLETE, SEQUENCE_MISMATCH, SEQUENCE_PAUSE, +} from '../constants'; +import { Node } from './base'; + +export class Sequence extends Node { + public doBuild(out: string[]): void { + const ctx = this.compilation; + + out.push('llparse_match_t match_seq;'); + out.push(''); + + this.prologue(out); + + const matchSequence = ctx.getMatchSequence(this.ref.transform!, + this.ref.select); + + out.push(`match_seq = ${matchSequence}(${ctx.stateArg()}, ` + + `${ctx.posArg()}, ` + + `${ctx.endPosArg()}, ${ctx.blob(this.ref.select)}, ` + + `${this.ref.select.length});`); + out.push('p = match_seq.current;'); + + let tmp: string[]; + + out.push('switch (match_seq.status) {'); + + out.push(` case ${SEQUENCE_COMPLETE}: {`); + tmp = []; + this.tailTo(tmp, { + noAdvance: false, + node: this.ref.edge!.node, + value: this.ref.edge!.value, + }); + ctx.indent(out, tmp, ' '); + out.push(' }'); + + out.push(` case ${SEQUENCE_PAUSE}: {`); + tmp = []; + this.pause(tmp); + ctx.indent(out, tmp, ' '); + out.push(' }'); + + out.push(` case ${SEQUENCE_MISMATCH}: {`); + tmp = []; + this.tailTo(tmp, this.ref.otherwise!); + ctx.indent(out, tmp, ' '); + out.push(' }'); + + out.push('}'); + } +} diff --git a/llparse/src/implementation/c/node/single.ts b/llparse/src/implementation/c/node/single.ts new file mode 100644 index 0000000..b9c8811 --- /dev/null +++ b/llparse/src/implementation/c/node/single.ts @@ -0,0 +1,47 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Node } from './base'; + +export class Single extends Node { + public doBuild(out: string[]): void { + const ctx = this.compilation; + + const otherwise = this.ref.otherwise!; + + this.prologue(out); + + const transform = ctx.unwrapTransform(this.ref.transform!); + const current = transform.build(ctx, `*${ctx.posArg()}`); + + out.push(`switch (${current}) {`) + this.ref.edges.forEach((edge) => { + let ch: string; + + // Non-printable ASCII, or single-quote, or forward slash + if (edge.key < 0x20 || edge.key > 0x7e || edge.key === 0x27 || + edge.key === 0x5c) { + ch = edge.key.toString(); + } else { + ch = `'${String.fromCharCode(edge.key)}'`; + } + out.push(` case ${ch}: {`); + + const tmp: string[] = []; + this.tailTo(tmp, edge); + ctx.indent(out, tmp, ' '); + + out.push(' }'); + }); + + out.push(` default: {`); + + const tmp: string[] = []; + this.tailTo(tmp, otherwise); + ctx.indent(out, tmp, ' '); + + out.push(' }'); + + out.push(`}`); + } +} diff --git a/llparse/src/implementation/c/node/span-end.ts b/llparse/src/implementation/c/node/span-end.ts new file mode 100644 index 0000000..09f97e5 --- /dev/null +++ b/llparse/src/implementation/c/node/span-end.ts @@ -0,0 +1,56 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { STATE_ERROR } from '../constants'; +import { Node } from './base'; + +export class SpanEnd extends Node { + public doBuild(out: string[]): void { + out.push('const unsigned char* start;'); + out.push('int err;'); + out.push(''); + + const ctx = this.compilation; + const field = this.ref.field; + const posField = ctx.spanPosField(field.index); + + // Load start position + out.push(`start = ${posField};`); + + // ...and reset + out.push(`${posField} = NULL;`); + + // Invoke callback + const callback = ctx.buildCode(ctx.unwrapCode(this.ref.callback)); + out.push(`err = ${callback}(${ctx.stateArg()}, start, ${ctx.posArg()});`); + + out.push('if (err != 0) {'); + const tmp: string[] = []; + this.buildError(tmp, 'err'); + ctx.indent(out, tmp, ' '); + out.push('}'); + + const otherwise = this.ref.otherwise!; + this.tailTo(out, otherwise); + } + + private buildError(out: string[], code: string) { + const ctx = this.compilation; + + out.push(`${ctx.errorField()} = ${code};`); + + const otherwise = this.ref.otherwise!; + let resumePos = ctx.posArg(); + if (!otherwise.noAdvance) { + resumePos = `(${resumePos} + 1)`; + } + + out.push(`${ctx.errorPosField()} = (const char*) ${resumePos};`); + + const resumptionTarget = ctx.unwrapNode(otherwise.node).build(ctx); + out.push(`${ctx.currentField()} = ` + + `(void*) (intptr_t) ${resumptionTarget};`); + + out.push(`return ${STATE_ERROR};`); + } +} diff --git a/llparse/src/implementation/c/node/span-start.ts b/llparse/src/implementation/c/node/span-start.ts new file mode 100644 index 0000000..445da67 --- /dev/null +++ b/llparse/src/implementation/c/node/span-start.ts @@ -0,0 +1,26 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Node } from './base'; + +export class SpanStart extends Node { + public doBuild(out: string[]): void { + // Prevent spurious empty spans + this.prologue(out); + + const ctx = this.compilation; + const field = this.ref.field; + + const posField = ctx.spanPosField(field.index); + out.push(`${posField} = (void*) ${ctx.posArg()};`); + + if (field.callbacks.length > 1) { + const cbField = ctx.spanCbField(field.index); + const callback = ctx.unwrapCode(this.ref.callback); + out.push(`${cbField} = ${ctx.buildCode(callback)};`); + } + + const otherwise = this.ref.otherwise!; + this.tailTo(out, otherwise); + } +} diff --git a/llparse/src/implementation/c/node/table-lookup.ts b/llparse/src/implementation/c/node/table-lookup.ts new file mode 100644 index 0000000..6a400a3 --- /dev/null +++ b/llparse/src/implementation/c/node/table-lookup.ts @@ -0,0 +1,196 @@ +import * as assert from 'assert'; +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Node } from './base'; + +const MAX_CHAR = 0xff; +const TABLE_GROUP = 16; + +// _mm_cmpestri takes 8 ranges +const SSE_RANGES_LEN = 16; +// _mm_cmpestri takes 128bit input +const SSE_RANGES_PAD = 16; +const MAX_SSE_CALLS = 2; +const SSE_ALIGNMENT = 16; + +interface ITable { + readonly name: string; + readonly declaration: ReadonlyArray; +} + +export class TableLookup extends Node { + public doBuild(out: string[]): void { + const ctx = this.compilation; + + const table = this.buildTable(); + for (const line of table.declaration) { + out.push(line); + } + + this.prologue(out); + + const transform = ctx.unwrapTransform(this.ref.transform!); + + // Try to vectorize nodes matching characters and looping to themselves + // NOTE: `switch` below triggers when there is not enough characters in the + // stream for vectorized processing. + this.buildSSE(out); + + const current = transform.build(ctx, `*${ctx.posArg()}`); + out.push(`switch (${table.name}[(uint8_t) ${current}]) {`); + + for (const [ index, edge ] of this.ref.edges.entries()) { + out.push(` case ${index + 1}: {`); + + const tmp: string[] = []; + const edge = this.ref.edges[index]; + this.tailTo(tmp, { + noAdvance: edge.noAdvance, + node: edge.node, + value: undefined, + }); + ctx.indent(out, tmp, ' '); + + out.push(' }'); + } + + out.push(` default: {`); + + const tmp: string[] = []; + this.tailTo(tmp, this.ref.otherwise!); + ctx.indent(out, tmp, ' '); + + out.push(' }'); + out.push('}'); + } + + private buildSSE(out: string[]): boolean { + const ctx = this.compilation; + + // Transformation is not supported atm + if (this.ref.transform && this.ref.transform.ref.name !== 'id') { + return false; + } + + if (this.ref.edges.length !== 1) { + return false; + } + + const edge = this.ref.edges[0]; + if (edge.node.ref !== this.ref) { + return false; + } + + // NOTE: keys are sorted + let ranges: number[] = []; + let first: number | undefined; + let last: number | undefined; + for (const key of edge.keys) { + if (first === undefined) { + first = key; + } + if (last === undefined) { + last = key; + } + + if (key - last > 1) { + ranges.push(first, last); + first = key; + } + last = key; + } + if (first !== undefined && last !== undefined) { + ranges.push(first, last); + } + + if (ranges.length === 0) { + return false; + } + + // Way too many calls would be required + if (ranges.length > MAX_SSE_CALLS * SSE_RANGES_LEN) { + return false; + } + + out.push('#ifdef __SSE4_2__'); + out.push(`if (${ctx.endPosArg()} - ${ctx.posArg()} >= 16) {`); + out.push(' __m128i ranges;'); + out.push(' __m128i input;'); + out.push(' int avail;'); + out.push(' int match_len;'); + out.push(''); + out.push(' /* Load input */'); + out.push(` input = _mm_loadu_si128((__m128i const*) ${ctx.posArg()});`); + for (let off = 0; off < ranges.length; off += SSE_RANGES_LEN) { + const subRanges = ranges.slice(off, off + SSE_RANGES_LEN); + + let paddedRanges = subRanges.slice(); + while (paddedRanges.length < SSE_RANGES_PAD) { + paddedRanges.push(0); + } + + const blob = ctx.blob(Buffer.from(paddedRanges), SSE_ALIGNMENT); + out.push(` ranges = _mm_loadu_si128((__m128i const*) ${blob});`); + out.push(''); + + out.push(' /* Find first character that does not match `ranges` */'); + out.push(` match_len = _mm_cmpestri(ranges, ${subRanges.length},`); + out.push(' input, 16,'); + out.push(' _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES |'); + out.push(' _SIDD_NEGATIVE_POLARITY);'); + out.push(''); + out.push(' if (match_len != 0) {'); + out.push(` ${ctx.posArg()} += match_len;`); + + const tmp: string[] = []; + assert.strictEqual(edge.noAdvance, false); + this.tailTo(tmp, { + noAdvance: true, + node: edge.node, + }); + ctx.indent(out, tmp, ' '); + + out.push(' }'); + } + + { + const tmp: string[] = []; + this.tailTo(tmp, this.ref.otherwise!); + ctx.indent(out, tmp, ' '); + } + out.push('}'); + + out.push('#endif /* __SSE4_2__ */'); + + return true; + } + + private buildTable(): ITable { + const table: number[] = new Array(MAX_CHAR + 1).fill(0); + + for (const [ index, edge ] of this.ref.edges.entries()) { + edge.keys.forEach((key) => { + assert.strictEqual(table[key], 0); + table[key] = index + 1; + }); + } + + const lines = [ + 'static uint8_t lookup_table[] = {', + ]; + for (let i = 0; i < table.length; i += TABLE_GROUP) { + let line = ` ${table.slice(i, i + TABLE_GROUP).join(', ')}`; + if (i + TABLE_GROUP < table.length) { + line += ','; + } + lines.push(line); + } + lines.push('};'); + + return { + name: 'lookup_table', + declaration: lines, + }; + } +} diff --git a/llparse/src/implementation/c/transform/base.ts b/llparse/src/implementation/c/transform/base.ts new file mode 100644 index 0000000..82028d5 --- /dev/null +++ b/llparse/src/implementation/c/transform/base.ts @@ -0,0 +1,10 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; + +export abstract class Transform { + constructor(public readonly ref: T) { + } + + public abstract build(ctx: Compilation, value: string): string; +} diff --git a/llparse/src/implementation/c/transform/id.ts b/llparse/src/implementation/c/transform/id.ts new file mode 100644 index 0000000..6c6105f --- /dev/null +++ b/llparse/src/implementation/c/transform/id.ts @@ -0,0 +1,11 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Transform } from './base'; + +export class ID extends Transform { + public build(ctx: Compilation, value: string): string { + // Identity transformation + return value; + } +} diff --git a/llparse/src/implementation/c/transform/index.ts b/llparse/src/implementation/c/transform/index.ts new file mode 100644 index 0000000..c13ba50 --- /dev/null +++ b/llparse/src/implementation/c/transform/index.ts @@ -0,0 +1,11 @@ +import { ID } from './id'; +import { ToLower } from './to-lower'; +import { ToLowerUnsafe } from './to-lower-unsafe'; + +export { Transform } from './base'; + +export default { + ID, + ToLower, + ToLowerUnsafe, +}; diff --git a/llparse/src/implementation/c/transform/to-lower-unsafe.ts b/llparse/src/implementation/c/transform/to-lower-unsafe.ts new file mode 100644 index 0000000..27f608c --- /dev/null +++ b/llparse/src/implementation/c/transform/to-lower-unsafe.ts @@ -0,0 +1,10 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Transform } from './base'; + +export class ToLowerUnsafe extends Transform { + public build(ctx: Compilation, value: string): string { + return `((${value}) | 0x20)`; + } +} diff --git a/llparse/src/implementation/c/transform/to-lower.ts b/llparse/src/implementation/c/transform/to-lower.ts new file mode 100644 index 0000000..f639ef1 --- /dev/null +++ b/llparse/src/implementation/c/transform/to-lower.ts @@ -0,0 +1,11 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Transform } from './base'; + +export class ToLower extends Transform { + public build(ctx: Compilation, value: string): string { + return `((${value}) >= 'A' && (${value}) <= 'Z' ? ` + + `(${value} | 0x20) : (${value}))`; + } +} -- cgit v1.2.3