summaryrefslogtreecommitdiffstats
path: root/llparse/src
diff options
context:
space:
mode:
Diffstat (limited to 'llparse/src')
-rw-r--r--llparse/src/api.ts47
-rw-r--r--llparse/src/compiler/header-builder.ts80
-rw-r--r--llparse/src/compiler/index.ts88
-rw-r--r--llparse/src/implementation/c/code/and.ts11
-rw-r--r--llparse/src/implementation/c/code/base.ts12
-rw-r--r--llparse/src/implementation/c/code/external.ts19
-rw-r--r--llparse/src/implementation/c/code/field.ts28
-rw-r--r--llparse/src/implementation/c/code/index.ts27
-rw-r--r--llparse/src/implementation/c/code/is-equal.ts10
-rw-r--r--llparse/src/implementation/c/code/load.ts10
-rw-r--r--llparse/src/implementation/c/code/mul-add.ts67
-rw-r--r--llparse/src/implementation/c/code/or.ts11
-rw-r--r--llparse/src/implementation/c/code/store.ts11
-rw-r--r--llparse/src/implementation/c/code/test.ts11
-rw-r--r--llparse/src/implementation/c/code/update.ts11
-rw-r--r--llparse/src/implementation/c/compilation.ts336
-rw-r--r--llparse/src/implementation/c/constants.ts45
-rw-r--r--llparse/src/implementation/c/helpers/match-sequence.ts75
-rw-r--r--llparse/src/implementation/c/index.ts199
-rw-r--r--llparse/src/implementation/c/node/base.ts77
-rw-r--r--llparse/src/implementation/c/node/consume.ts48
-rw-r--r--llparse/src/implementation/c/node/empty.ts16
-rw-r--r--llparse/src/implementation/c/node/error.ts33
-rw-r--r--llparse/src/implementation/c/node/index.ts27
-rw-r--r--llparse/src/implementation/c/node/invoke.ts44
-rw-r--r--llparse/src/implementation/c/node/pause.ts19
-rw-r--r--llparse/src/implementation/c/node/sequence.ts55
-rw-r--r--llparse/src/implementation/c/node/single.ts47
-rw-r--r--llparse/src/implementation/c/node/span-end.ts56
-rw-r--r--llparse/src/implementation/c/node/span-start.ts26
-rw-r--r--llparse/src/implementation/c/node/table-lookup.ts196
-rw-r--r--llparse/src/implementation/c/transform/base.ts10
-rw-r--r--llparse/src/implementation/c/transform/id.ts11
-rw-r--r--llparse/src/implementation/c/transform/index.ts11
-rw-r--r--llparse/src/implementation/c/transform/to-lower-unsafe.ts10
-rw-r--r--llparse/src/implementation/c/transform/to-lower.ts11
36 files changed, 1795 insertions, 0 deletions
diff --git a/llparse/src/api.ts b/llparse/src/api.ts
new file mode 100644
index 0000000..a34f5bc
--- /dev/null
+++ b/llparse/src/api.ts
@@ -0,0 +1,47 @@
+import * as frontend from 'llparse-frontend';
+
+import source = frontend.source;
+
+import { Compiler, ICompilerOptions, ICompilerResult } from './compiler';
+
+export { source, ICompilerOptions, ICompilerResult };
+
+// TODO(indutny): API for disabling/short-circuiting spans
+
+/**
+ * LLParse graph builder and compiler.
+ */
+export class LLParse extends source.Builder {
+ /**
+ * The prefix controls the names of methods and state struct in generated
+ * public C headers:
+ *
+ * ```c
+ * // state struct
+ * struct PREFIX_t {
+ * ...
+ * }
+ *
+ * int PREFIX_init(PREFIX_t* state);
+ * int PREFIX_execute(PREFIX_t* state, const char* p, const char* endp);
+ * ```
+ *
+ * @param prefix Prefix to be used when generating public API.
+ */
+ constructor(private readonly prefix: string = 'llparse') {
+ super();
+ }
+
+ /**
+ * Compile LLParse graph to the C code and C headers
+ *
+ * @param root Root node of the parse graph (see `.node()`)
+ * @param options Compiler options.
+ */
+ public build(root: source.node.Node, options: ICompilerOptions = {})
+ : ICompilerResult {
+ const c = new Compiler(this.prefix, options);
+
+ return c.compile(root, this.properties);
+ }
+}
diff --git a/llparse/src/compiler/header-builder.ts b/llparse/src/compiler/header-builder.ts
new file mode 100644
index 0000000..9f5bee7
--- /dev/null
+++ b/llparse/src/compiler/header-builder.ts
@@ -0,0 +1,80 @@
+import * as frontend from 'llparse-frontend';
+import source = frontend.source;
+
+export interface IHeaderBuilderOptions {
+ readonly prefix: string;
+ readonly headerGuard?: string;
+ readonly properties: ReadonlyArray<source.Property>;
+ readonly spans: ReadonlyArray<frontend.SpanField>;
+}
+
+export class HeaderBuilder {
+ public build(options: IHeaderBuilderOptions): string {
+ let res = '';
+ const PREFIX = options.prefix.toUpperCase().replace(/[^a-z]/gi, '_');
+ const DEFINE = options.headerGuard === undefined ?
+ `INCLUDE_${PREFIX}_H_` : options.headerGuard;
+
+ res += `#ifndef ${DEFINE}\n`;
+ res += `#define ${DEFINE}\n`;
+ res += '#ifdef __cplusplus\n';
+ res += 'extern "C" {\n';
+ res += '#endif\n';
+ res += '\n';
+
+ res += '#include <stdint.h>\n';
+ res += '\n';
+
+ // Structure
+ res += `typedef struct ${options.prefix}_s ${options.prefix}_t;\n`;
+ res += `struct ${options.prefix}_s {\n`;
+ res += ' int32_t _index;\n';
+
+ for (const [ index, field ] of options.spans.entries()) {
+ res += ` void* _span_pos${index};\n`;
+ if (field.callbacks.length > 1) {
+ res += ` void* _span_cb${index};\n`;
+ }
+ }
+
+ res += ' int32_t error;\n';
+ res += ' const char* reason;\n';
+ res += ' const char* error_pos;\n';
+ res += ' void* data;\n';
+ res += ' void* _current;\n';
+
+ for (const prop of options.properties) {
+ let ty: string;
+ if (prop.ty === 'i8') {
+ ty = 'uint8_t';
+ } else if (prop.ty === 'i16') {
+ ty = 'uint16_t';
+ } else if (prop.ty === 'i32') {
+ ty = 'uint32_t';
+ } else if (prop.ty === 'i64') {
+ ty = 'uint64_t';
+ } else if (prop.ty === 'ptr') {
+ ty = 'void*';
+ } else {
+ throw new Error(
+ `Unknown state property type: "${prop.ty}"`);
+ }
+ res += ` ${ty} ${prop.name};\n`;
+ }
+ res += '};\n';
+
+ res += '\n';
+
+ res += `int ${options.prefix}_init(${options.prefix}_t* s);\n`;
+ res += `int ${options.prefix}_execute(${options.prefix}_t* s, ` +
+ 'const char* p, const char* endp);\n';
+
+ res += '\n';
+ res += '#ifdef __cplusplus\n';
+ res += '} /* extern "C" *\/\n';
+ res += '#endif\n';
+ res += `#endif /* ${DEFINE} *\/\n`;
+
+ return res;
+ }
+}
diff --git a/llparse/src/compiler/index.ts b/llparse/src/compiler/index.ts
new file mode 100644
index 0000000..89c258a
--- /dev/null
+++ b/llparse/src/compiler/index.ts
@@ -0,0 +1,88 @@
+import * as debugAPI from 'debug';
+import * as frontend from 'llparse-frontend';
+
+import source = frontend.source;
+
+import * as cImpl from '../implementation/c';
+import { HeaderBuilder } from './header-builder';
+
+const debug = debugAPI('llparse:compiler');
+
+export interface ICompilerOptions {
+ /**
+ * Debug method name
+ *
+ * The method must have following signature:
+ *
+ * ```c
+ * void debug(llparse_t* state, const char* p, const char* endp,
+ * const char* msg);
+ * ```
+ *
+ * Where `llparse_t` is a parser state type.
+ */
+ readonly debug?: string;
+
+ /**
+ * What guard define to use in `#ifndef` in C headers.
+ *
+ * Default value: `prefix` argument
+ */
+ readonly headerGuard?: string;
+
+ /** Optional frontend configuration */
+ readonly frontend?: frontend.IFrontendLazyOptions;
+
+ /** Optional C-backend configuration */
+ readonly c?: cImpl.ICPublicOptions;
+}
+
+export interface ICompilerResult {
+ /**
+ * Textual C code
+ */
+ readonly c: string;
+
+ /**
+ * Textual C header file
+ */
+ readonly header: string;
+}
+
+export class Compiler {
+ constructor(public readonly prefix: string,
+ public readonly options: ICompilerOptions) {
+ }
+
+ public compile(root: source.node.Node,
+ properties: ReadonlyArray<source.Property>): ICompilerResult {
+ debug('Combining implementations');
+ const container = new frontend.Container();
+
+ const c = new cImpl.CCompiler(container, Object.assign({
+ debug: this.options.debug,
+ }, this.options.c));
+
+ debug('Running frontend pass');
+ const f = new frontend.Frontend(this.prefix,
+ container.build(),
+ this.options.frontend);
+ const info = f.compile(root, properties);
+
+ debug('Building header');
+ const hb = new HeaderBuilder();
+
+ const header = hb.build({
+ headerGuard: this.options.headerGuard,
+ prefix: this.prefix,
+ properties,
+ spans: info.spans,
+ });
+
+ debug('Building C');
+ return {
+ header,
+ c: c.compile(info),
+ };
+ }
+}
diff --git a/llparse/src/implementation/c/code/and.ts b/llparse/src/implementation/c/code/and.ts
new file mode 100644
index 0000000..fdd5434
--- /dev/null
+++ b/llparse/src/implementation/c/code/and.ts
@@ -0,0 +1,11 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Field } from './field';
+
+export class And extends Field<frontend.code.And> {
+ protected doBuild(ctx: Compilation, out: string[]): void {
+ out.push(`${this.field(ctx)} &= ${this.ref.value};`);
+ out.push('return 0;');
+ }
+}
diff --git a/llparse/src/implementation/c/code/base.ts b/llparse/src/implementation/c/code/base.ts
new file mode 100644
index 0000000..888330d
--- /dev/null
+++ b/llparse/src/implementation/c/code/base.ts
@@ -0,0 +1,12 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+
+export abstract class Code<T extends frontend.code.Code> {
+ protected cachedDecl: string | undefined;
+
+ constructor(public readonly ref: T) {
+ }
+
+ public abstract build(ctx: Compilation, out: string[]): void;
+}
diff --git a/llparse/src/implementation/c/code/external.ts b/llparse/src/implementation/c/code/external.ts
new file mode 100644
index 0000000..494fc5a
--- /dev/null
+++ b/llparse/src/implementation/c/code/external.ts
@@ -0,0 +1,19 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Code } from './base';
+
+export abstract class External<T extends frontend.code.External>
+ extends Code<T> {
+
+ public build(ctx: Compilation, out: string[]): void {
+ out.push(`int ${this.ref.name}(`);
+ out.push(` ${ctx.prefix}_t* s, const unsigned char* p,`);
+ if (this.ref.signature === 'value') {
+ out.push(' const unsigned char* endp,');
+ out.push(' int value);');
+ } else {
+ out.push(' const unsigned char* endp);');
+ }
+ }
+}
diff --git a/llparse/src/implementation/c/code/field.ts b/llparse/src/implementation/c/code/field.ts
new file mode 100644
index 0000000..51f4439
--- /dev/null
+++ b/llparse/src/implementation/c/code/field.ts
@@ -0,0 +1,28 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Code } from './base';
+
+export abstract class Field<T extends frontend.code.Field> extends Code<T> {
+ public build(ctx: Compilation, out: string[]): void {
+ out.push(`int ${this.ref.name}(`);
+ out.push(` ${ctx.prefix}_t* ${ctx.stateArg()},`);
+ out.push(` const unsigned char* ${ctx.posArg()},`);
+ if (this.ref.signature === 'value') {
+ out.push(` const unsigned char* ${ctx.endPosArg()},`);
+ out.push(` int ${ctx.matchVar()}) {`);
+ } else {
+ out.push(` const unsigned char* ${ctx.endPosArg()}) {`);
+ }
+ const tmp: string[] = [];
+ this.doBuild(ctx, tmp);
+ ctx.indent(out, tmp, ' ');
+ out.push('}');
+ }
+
+ protected abstract doBuild(ctx: Compilation, out: string[]): void;
+
+ protected field(ctx: Compilation): string {
+ return `${ctx.stateArg()}->${this.ref.field}`;
+ }
+}
diff --git a/llparse/src/implementation/c/code/index.ts b/llparse/src/implementation/c/code/index.ts
new file mode 100644
index 0000000..0de5de5
--- /dev/null
+++ b/llparse/src/implementation/c/code/index.ts
@@ -0,0 +1,27 @@
+import * as frontend from 'llparse-frontend';
+
+import { And } from './and';
+import { External } from './external';
+import { IsEqual } from './is-equal';
+import { Load } from './load';
+import { MulAdd } from './mul-add';
+import { Or } from './or';
+import { Store } from './store';
+import { Test } from './test';
+import { Update } from './update';
+
+export * from './base';
+
+export default {
+ And,
+ IsEqual,
+ Load,
+ Match: class Match extends External<frontend.code.External> {},
+ MulAdd,
+ Or,
+ Span: class Span extends External<frontend.code.Span> {},
+ Store,
+ Test,
+ Update,
+ Value: class Value extends External<frontend.code.Value> {},
+};
diff --git a/llparse/src/implementation/c/code/is-equal.ts b/llparse/src/implementation/c/code/is-equal.ts
new file mode 100644
index 0000000..f76c2c1
--- /dev/null
+++ b/llparse/src/implementation/c/code/is-equal.ts
@@ -0,0 +1,10 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Field } from './field';
+
+export class IsEqual extends Field<frontend.code.IsEqual> {
+ protected doBuild(ctx: Compilation, out: string[]): void {
+ out.push(`return ${this.field(ctx)} == ${this.ref.value};`);
+ }
+}
diff --git a/llparse/src/implementation/c/code/load.ts b/llparse/src/implementation/c/code/load.ts
new file mode 100644
index 0000000..b913f23
--- /dev/null
+++ b/llparse/src/implementation/c/code/load.ts
@@ -0,0 +1,10 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Field } from './field';
+
+export class Load extends Field<frontend.code.Load> {
+ protected doBuild(ctx: Compilation, out: string[]): void {
+ out.push(`return ${this.field(ctx)};`);
+ }
+}
diff --git a/llparse/src/implementation/c/code/mul-add.ts b/llparse/src/implementation/c/code/mul-add.ts
new file mode 100644
index 0000000..fd5ce8c
--- /dev/null
+++ b/llparse/src/implementation/c/code/mul-add.ts
@@ -0,0 +1,67 @@
+import * as assert from 'assert';
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { SIGNED_LIMITS, UNSIGNED_LIMITS, SIGNED_TYPES } from '../constants';
+import { Field } from './field';
+
+export class MulAdd extends Field<frontend.code.MulAdd> {
+ protected doBuild(ctx: Compilation, out: string[]): void {
+ const options = this.ref.options;
+ const ty = ctx.getFieldType(this.ref.field);
+
+ let field = this.field(ctx);
+ if (options.signed) {
+ assert(SIGNED_TYPES.has(ty), `Unexpected mulAdd type "${ty}"`);
+ const targetTy = SIGNED_TYPES.get(ty)!;
+ out.push(`${targetTy}* field = (${targetTy}*) &${field};`);
+ field = '(*field)';
+ }
+
+ const match = ctx.matchVar();
+
+ const limits = options.signed ? SIGNED_LIMITS : UNSIGNED_LIMITS;
+ assert(limits.has(ty), `Unexpected mulAdd type "${ty}"`);
+ const [ min, max ] = limits.get(ty)!;
+
+ const mulMax = `${max} / ${options.base}`;
+ const mulMin = `${min} / ${options.base}`;
+
+ out.push('/* Multiplication overflow */');
+ out.push(`if (${field} > ${mulMax}) {`);
+ out.push(' return 1;');
+ out.push('}');
+ if (options.signed) {
+ out.push(`if (${field} < ${mulMin}) {`);
+ out.push(' return 1;');
+ out.push('}');
+ }
+ out.push('');
+
+ out.push(`${field} *= ${options.base};`);
+ out.push('');
+
+ out.push('/* Addition overflow */');
+ out.push(`if (${match} >= 0) {`);
+ out.push(` if (${field} > ${max} - ${match}) {`);
+ out.push(' return 1;');
+ out.push(' }');
+ out.push('} else {');
+ out.push(` if (${field} < ${min} - ${match}) {`);
+ out.push(' return 1;');
+ out.push(' }');
+ out.push('}');
+
+ out.push(`${field} += ${match};`);
+
+ if (options.max !== undefined) {
+ out.push('');
+ out.push('/* Enforce maximum */');
+ out.push(`if (${field} > ${options.max}) {`);
+ out.push(' return 1;');
+ out.push('}');
+ }
+
+ out.push('return 0;');
+ }
+}
diff --git a/llparse/src/implementation/c/code/or.ts b/llparse/src/implementation/c/code/or.ts
new file mode 100644
index 0000000..76b16f9
--- /dev/null
+++ b/llparse/src/implementation/c/code/or.ts
@@ -0,0 +1,11 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Field } from './field';
+
+export class Or extends Field<frontend.code.Or> {
+ protected doBuild(ctx: Compilation, out: string[]): void {
+ out.push(`${this.field(ctx)} |= ${this.ref.value};`);
+ out.push('return 0;');
+ }
+}
diff --git a/llparse/src/implementation/c/code/store.ts b/llparse/src/implementation/c/code/store.ts
new file mode 100644
index 0000000..a37d963
--- /dev/null
+++ b/llparse/src/implementation/c/code/store.ts
@@ -0,0 +1,11 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Field } from './field';
+
+export class Store extends Field<frontend.code.Store> {
+ protected doBuild(ctx: Compilation, out: string[]): void {
+ out.push(`${this.field(ctx)} = ${ctx.matchVar()};`);
+ out.push('return 0;');
+ }
+}
diff --git a/llparse/src/implementation/c/code/test.ts b/llparse/src/implementation/c/code/test.ts
new file mode 100644
index 0000000..36126f5
--- /dev/null
+++ b/llparse/src/implementation/c/code/test.ts
@@ -0,0 +1,11 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Field } from './field';
+
+export class Test extends Field<frontend.code.Test> {
+ protected doBuild(ctx: Compilation, out: string[]): void {
+ const value = this.ref.value;
+ out.push(`return (${this.field(ctx)} & ${value}) == ${value};`);
+ }
+}
diff --git a/llparse/src/implementation/c/code/update.ts b/llparse/src/implementation/c/code/update.ts
new file mode 100644
index 0000000..89efedf
--- /dev/null
+++ b/llparse/src/implementation/c/code/update.ts
@@ -0,0 +1,11 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Field } from './field';
+
+export class Update extends Field<frontend.code.Update> {
+ protected doBuild(ctx: Compilation, out: string[]): void {
+ out.push(`${this.field(ctx)} = ${this.ref.value};`);
+ out.push('return 0;');
+ }
+}
diff --git a/llparse/src/implementation/c/compilation.ts b/llparse/src/implementation/c/compilation.ts
new file mode 100644
index 0000000..4df05a6
--- /dev/null
+++ b/llparse/src/implementation/c/compilation.ts
@@ -0,0 +1,336 @@
+import * as assert from 'assert';
+import { Buffer } from 'buffer';
+import * as frontend from 'llparse-frontend';
+
+import {
+ CONTAINER_KEY, STATE_ERROR,
+ ARG_STATE, ARG_POS, ARG_ENDPOS,
+ VAR_MATCH,
+ STATE_PREFIX, LABEL_PREFIX, BLOB_PREFIX,
+ SEQUENCE_COMPLETE, SEQUENCE_MISMATCH, SEQUENCE_PAUSE,
+} from './constants';
+import { Code } from './code';
+import { Node } from './node';
+import { Transform } from './transform';
+import { MatchSequence } from './helpers/match-sequence';
+
+// Number of hex words per line of blob declaration
+const BLOB_GROUP_SIZE = 11;
+
+type WrappedNode = frontend.IWrap<frontend.node.Node>;
+
+interface IBlob {
+ readonly alignment: number | undefined;
+ readonly buffer: Buffer;
+ readonly name: string;
+}
+
+// TODO(indutny): deduplicate
+export interface ICompilationOptions {
+ readonly debug?: string;
+}
+
+// TODO(indutny): deduplicate
+export interface ICompilationProperty {
+ readonly name: string;
+ readonly ty: string;
+}
+
+export class Compilation {
+ private readonly stateMap: Map<string, ReadonlyArray<string>> = new Map();
+ private readonly blobs: Map<Buffer, IBlob> = new Map();
+ private readonly codeMap: Map<string, Code<frontend.code.Code>> = new Map();
+ private readonly matchSequence:
+ Map<string, MatchSequence> = new Map();
+ private readonly resumptionTargets: Set<string> = new Set();
+
+ constructor(public readonly prefix: string,
+ private readonly properties: ReadonlyArray<ICompilationProperty>,
+ resumptionTargets: ReadonlySet<WrappedNode>,
+ private readonly options: ICompilationOptions) {
+ for (const node of resumptionTargets) {
+ this.resumptionTargets.add(STATE_PREFIX + node.ref.id.name);
+ }
+ }
+
+ private buildStateEnum(out: string[]): void {
+ out.push('enum llparse_state_e {');
+ out.push(` ${STATE_ERROR},`);
+ for (const stateName of this.stateMap.keys()) {
+ if (this.resumptionTargets.has(stateName)) {
+ out.push(` ${stateName},`);
+ }
+ }
+ out.push('};');
+ out.push('typedef enum llparse_state_e llparse_state_t;');
+ }
+
+ private buildBlobs(out: string[]): void {
+ if (this.blobs.size === 0) {
+ return;
+ }
+
+ for (const blob of this.blobs.values()) {
+ const buffer = blob.buffer;
+ let align = '';
+ if (blob.alignment) {
+ align = ` ALIGN(${blob.alignment})`;
+ }
+
+ if (blob.alignment) {
+ out.push('#ifdef __SSE4_2__');
+ }
+ out.push(`static const unsigned char${align} ${blob.name}[] = {`);
+
+ for (let i = 0; i < buffer.length; i += BLOB_GROUP_SIZE) {
+ const limit = Math.min(buffer.length, i + BLOB_GROUP_SIZE);
+ const hex: string[] = [];
+ for (let j = i; j < limit; j++) {
+ const value = buffer[j] as number;
+
+ const ch = String.fromCharCode(value);
+ // `'`, `\`
+ if (value === 0x27 || value === 0x5c) {
+ hex.push(`'\\${ch}'`);
+ } else if (value >= 0x20 && value <= 0x7e) {
+ hex.push(`'${ch}'`);
+ } else {
+ hex.push(`0x${value.toString(16)}`);
+ }
+ }
+ let line = ' ' + hex.join(', ');
+ if (limit !== buffer.length) {
+ line += ',';
+ }
+ out.push(line);
+ }
+
+ out.push(`};`);
+ if (blob.alignment) {
+ out.push('#endif /* __SSE4_2__ */');
+ }
+ }
+ out.push('');
+ }
+
+ private buildMatchSequence(out: string[]): void {
+ if (this.matchSequence.size === 0) {
+ return;
+ }
+
+ MatchSequence.buildGlobals(out);
+ out.push('');
+
+ for (const match of this.matchSequence.values()) {
+ match.build(this, out);
+ out.push('');
+ }
+ }
+
+ public reserveSpans(spans: ReadonlyArray<frontend.SpanField>): void {
+ for (const span of spans) {
+ for (const callback of span.callbacks) {
+ this.buildCode(this.unwrapCode(callback));
+ }
+ }
+ }
+
+ public debug(out: string[], message: string): void {
+ if (this.options.debug === undefined) {
+ return;
+ }
+
+ const args = [
+ this.stateArg(),
+ `(const char*) ${this.posArg()}`,
+ `(const char*) ${this.endPosArg()}`,
+ ];
+
+ out.push(`${this.options.debug}(${args.join(', ')},`);
+ out.push(` ${this.cstring(message)});`);
+ }
+
+ public buildGlobals(out: string[]): void {
+ if (this.options.debug !== undefined) {
+ out.push(`void ${this.options.debug}(`);
+ out.push(` ${this.prefix}_t* s, const char* p, const char* endp,`);
+ out.push(' const char* msg);');
+ }
+
+ this.buildBlobs(out);
+ this.buildMatchSequence(out);
+ this.buildStateEnum(out);
+
+ for (const code of this.codeMap.values()) {
+ out.push('');
+ code.build(this, out);
+ }
+ }
+
+ public buildResumptionStates(out: string[]): void {
+ this.stateMap.forEach((lines, name) => {
+ if (!this.resumptionTargets.has(name)) {
+ return;
+ }
+ out.push(`case ${name}:`);
+ out.push(`${LABEL_PREFIX}${name}: {`);
+ lines.forEach((line) => out.push(` ${line}`));
+ out.push(' /* UNREACHABLE */;');
+ out.push(' abort();');
+ out.push('}');
+ });
+ }
+
+ public buildInternalStates(out: string[]): void {
+ this.stateMap.forEach((lines, name) => {
+ if (this.resumptionTargets.has(name)) {
+ return;
+ }
+ out.push(`${LABEL_PREFIX}${name}: {`);
+ lines.forEach((line) => out.push(` ${line}`));
+ out.push(' /* UNREACHABLE */;');
+ out.push(' abort();');
+ out.push('}');
+ });
+ }
+
+ public addState(state: string, lines: ReadonlyArray<string>): void {
+ assert(!this.stateMap.has(state));
+ this.stateMap.set(state, lines);
+ }
+
+ public buildCode(code: Code<frontend.code.Code>): string {
+ if (this.codeMap.has(code.ref.name)) {
+ assert.strictEqual(this.codeMap.get(code.ref.name)!, code,
+ `Code name conflict for "${code.ref.name}"`);
+ } else {
+ this.codeMap.set(code.ref.name, code);
+ }
+ return code.ref.name;
+ }
+
+ public getFieldType(field: string): string {
+ for (const property of this.properties) {
+ if (property.name === field) {
+ return property.ty;
+ }
+ }
+ throw new Error(`Field "${field}" not found`);
+ }
+
+ // Helpers
+
+ public unwrapCode(code: frontend.IWrap<frontend.code.Code>)
+ : Code<frontend.code.Code> {
+ const container = code as frontend.ContainerWrap<frontend.code.Code>;
+ return container.get(CONTAINER_KEY);
+ }
+
+ public unwrapNode(node: WrappedNode): Node<frontend.node.Node> {
+ const container = node as frontend.ContainerWrap<frontend.node.Node>;
+ return container.get(CONTAINER_KEY);
+ }
+
+ public unwrapTransform(node: frontend.IWrap<frontend.transform.Transform>)
+ : Transform<frontend.transform.Transform> {
+ const container =
+ node as frontend.ContainerWrap<frontend.transform.Transform>;
+ return container.get(CONTAINER_KEY);
+ }
+
+ public indent(out: string[], lines: ReadonlyArray<string>, pad: string) {
+ for (const line of lines) {
+ out.push(`${pad}${line}`);
+ }
+ }
+
+ // MatchSequence cache
+
+ public getMatchSequence(
+ transform: frontend.IWrap<frontend.transform.Transform>, select: Buffer)
+ : string {
+ const wrap = this.unwrapTransform(transform);
+
+ let res: MatchSequence;
+ if (this.matchSequence.has(wrap.ref.name)) {
+ res = this.matchSequence.get(wrap.ref.name)!;
+ } else {
+ res = new MatchSequence(wrap);
+ this.matchSequence.set(wrap.ref.name, res);
+ }
+
+ return res.getName();
+ }
+
+ // Arguments
+
+ public stateArg(): string {
+ return ARG_STATE;
+ }
+
+ public posArg(): string {
+ return ARG_POS;
+ }
+
+ public endPosArg(): string {
+ return ARG_ENDPOS;
+ }
+
+ public matchVar(): string {
+ return VAR_MATCH;
+ }
+
+ // State fields
+
+ public indexField(): string {
+ return this.stateField('_index');
+ }
+
+ public currentField(): string {
+ return this.stateField('_current');
+ }
+
+ public errorField(): string {
+ return this.stateField('error');
+ }
+
+ public reasonField(): string {
+ return this.stateField('reason');
+ }
+
+ public errorPosField(): string {
+ return this.stateField('error_pos');
+ }
+
+ public spanPosField(index: number): string {
+ return this.stateField(`_span_pos${index}`);
+ }
+
+ public spanCbField(index: number): string {
+ return this.stateField(`_span_cb${index}`);
+ }
+
+ public stateField(name: string): string {
+ return `${this.stateArg()}->${name}`;
+ }
+
+ // Globals
+
+ public cstring(value: string): string {
+ return JSON.stringify(value);
+ }
+
+ public blob(value: Buffer, alignment?: number): string {
+ if (this.blobs.has(value)) {
+ return this.blobs.get(value)!.name;
+ }
+
+ const res = BLOB_PREFIX + this.blobs.size;
+ this.blobs.set(value, {
+ alignment,
+ buffer: value,
+ name: res,
+ });
+ return res;
+ }
+}
diff --git a/llparse/src/implementation/c/constants.ts b/llparse/src/implementation/c/constants.ts
new file mode 100644
index 0000000..bfd5be3
--- /dev/null
+++ b/llparse/src/implementation/c/constants.ts
@@ -0,0 +1,45 @@
+export const CONTAINER_KEY = 'c';
+
+export const LABEL_PREFIX = '';
+export const STATE_PREFIX = 's_n_';
+export const STATE_ERROR = 's_error';
+
+export const BLOB_PREFIX = 'llparse_blob';
+
+export const ARG_STATE = 'state';
+export const ARG_POS = 'p';
+export const ARG_ENDPOS = 'endp';
+
+export const VAR_MATCH = 'match';
+
+// MatchSequence
+
+export const SEQUENCE_COMPLETE = 'kMatchComplete';
+export const SEQUENCE_MISMATCH = 'kMatchMismatch';
+export const SEQUENCE_PAUSE = 'kMatchPause';
+
+export const SIGNED_LIMITS: Map<string, [ string, string ]> = new Map();
+SIGNED_LIMITS.set('i8', [ '-0x80', '0x7f' ]);
+SIGNED_LIMITS.set('i16', [ '-0x8000', '0x7fff' ]);
+SIGNED_LIMITS.set('i32', [ '(-0x7fffffff - 1)', '0x7fffffff' ]);
+SIGNED_LIMITS.set('i64', [ '(-0x7fffffffffffffffLL - 1)',
+ '0x7fffffffffffffffLL' ]);
+
+export const UNSIGNED_LIMITS: Map<string, [ string, string ]> = new Map();
+UNSIGNED_LIMITS.set('i8', [ '0', '0xff' ]);
+UNSIGNED_LIMITS.set('i8', [ '0', '0xff' ]);
+UNSIGNED_LIMITS.set('i16', [ '0', '0xffff' ]);
+UNSIGNED_LIMITS.set('i32', [ '0', '0xffffffff' ]);
+UNSIGNED_LIMITS.set('i64', [ '0ULL', '0xffffffffffffffffULL' ]);
+
+export const UNSIGNED_TYPES: Map<string, string> = new Map();
+UNSIGNED_TYPES.set('i8', 'uint8_t');
+UNSIGNED_TYPES.set('i16', 'uint16_t');
+UNSIGNED_TYPES.set('i32', 'uint32_t');
+UNSIGNED_TYPES.set('i64', 'uint64_t');
+
+export const SIGNED_TYPES: Map<string, string> = new Map();
+SIGNED_TYPES.set('i8', 'int8_t');
+SIGNED_TYPES.set('i16', 'int16_t');
+SIGNED_TYPES.set('i32', 'int32_t');
+SIGNED_TYPES.set('i64', 'int64_t');
diff --git a/llparse/src/implementation/c/helpers/match-sequence.ts b/llparse/src/implementation/c/helpers/match-sequence.ts
new file mode 100644
index 0000000..278f4b5
--- /dev/null
+++ b/llparse/src/implementation/c/helpers/match-sequence.ts
@@ -0,0 +1,75 @@
+import * as assert from 'assert';
+import { Buffer } from 'buffer';
+import * as frontend from 'llparse-frontend';
+
+import {
+ SEQUENCE_COMPLETE, SEQUENCE_MISMATCH, SEQUENCE_PAUSE,
+} from '../constants';
+import { Transform } from '../transform';
+import { Compilation } from '../compilation';
+
+type TransformWrap = Transform<frontend.transform.Transform>;
+
+export class MatchSequence {
+ constructor(private readonly transform: TransformWrap) {
+ }
+
+ public static buildGlobals(out: string[]): void {
+ out.push('enum llparse_match_status_e {');
+ out.push(` ${SEQUENCE_COMPLETE},`);
+ out.push(` ${SEQUENCE_PAUSE},`);
+ out.push(` ${SEQUENCE_MISMATCH}`);
+ out.push('};');
+ out.push('typedef enum llparse_match_status_e llparse_match_status_t;');
+ out.push('');
+ out.push('struct llparse_match_s {');
+ out.push(' llparse_match_status_t status;');
+ out.push(' const unsigned char* current;');
+ out.push('};');
+ out.push('typedef struct llparse_match_s llparse_match_t;');
+ }
+
+ public getName(): string {
+ return `llparse__match_sequence_${this.transform.ref.name}`;
+ }
+
+ public build(ctx: Compilation, out: string[]): void {
+ out.push(`static llparse_match_t ${this.getName()}(`);
+ out.push(` ${ctx.prefix}_t* s, const unsigned char* p,`);
+ out.push(' const unsigned char* endp,');
+ out.push(' const unsigned char* seq, uint32_t seq_len) {');
+
+ // Vars
+ out.push(' uint32_t index;');
+ out.push(' llparse_match_t res;');
+ out.push('');
+
+ // Body
+ out.push(' index = s->_index;');
+ out.push(' for (; p != endp; p++) {');
+ out.push(' unsigned char current;');
+ out.push('');
+ out.push(` current = ${this.transform.build(ctx, '*p')};`);
+ out.push(' if (current == seq[index]) {');
+ out.push(' if (++index == seq_len) {');
+ out.push(` res.status = ${SEQUENCE_COMPLETE};`);
+ out.push(' goto reset;');
+ out.push(' }');
+ out.push(' } else {');
+ out.push(` res.status = ${SEQUENCE_MISMATCH};`);
+ out.push(' goto reset;');
+ out.push(' }');
+ out.push(' }');
+
+ out.push(' s->_index = index;');
+ out.push(` res.status = ${SEQUENCE_PAUSE};`);
+ out.push(' res.current = p;');
+ out.push(' return res;');
+
+ out.push('reset:');
+ out.push(' s->_index = 0;');
+ out.push(' res.current = p;');
+ out.push(' return res;');
+ out.push('}');
+ }
+}
diff --git a/llparse/src/implementation/c/index.ts b/llparse/src/implementation/c/index.ts
new file mode 100644
index 0000000..ae94d34
--- /dev/null
+++ b/llparse/src/implementation/c/index.ts
@@ -0,0 +1,199 @@
+import * as frontend from 'llparse-frontend';
+
+import {
+ ARG_STATE, ARG_POS, ARG_ENDPOS,
+ STATE_ERROR,
+ VAR_MATCH,
+ CONTAINER_KEY,
+} from './constants';
+import { Compilation } from './compilation';
+import code from './code';
+import node from './node';
+import { Node } from './node';
+import transform from './transform';
+
+export interface ICCompilerOptions {
+ readonly debug?: string;
+ readonly header?: string;
+}
+
+export interface ICPublicOptions {
+ readonly header?: string;
+}
+
+export class CCompiler {
+ constructor(container: frontend.Container,
+ public readonly options: ICCompilerOptions) {
+ container.add(CONTAINER_KEY, { code, node, transform });
+ }
+
+ public compile(info: frontend.IFrontendResult): string {
+ const compilation = new Compilation(info.prefix, info.properties,
+ info.resumptionTargets, this.options);
+ const out: string[] = [];
+
+ out.push('#include <stdlib.h>');
+ out.push('#include <stdint.h>');
+ out.push('#include <string.h>');
+ out.push('');
+
+ // NOTE: Inspired by https://github.com/h2o/picohttpparser
+ // TODO(indutny): Windows support for SSE4.2.
+ // See: https://github.com/nodejs/llparse/pull/24#discussion_r299789676
+ // (There is no `__SSE4_2__` define for MSVC)
+ out.push('#ifdef __SSE4_2__');
+ out.push(' #ifdef _MSC_VER');
+ out.push(' #include <nmmintrin.h>');
+ out.push(' #else /* !_MSC_VER */');
+ out.push(' #include <x86intrin.h>');
+ out.push(' #endif /* _MSC_VER */');
+ out.push('#endif /* __SSE4_2__ */');
+ out.push('');
+
+ out.push('#ifdef _MSC_VER');
+ out.push(' #define ALIGN(n) _declspec(align(n))');
+ out.push('#else /* !_MSC_VER */');
+ out.push(' #define ALIGN(n) __attribute__((aligned(n)))');
+ out.push('#endif /* _MSC_VER */');
+
+ out.push('');
+ out.push(`#include "${this.options.header || info.prefix}.h"`);
+ out.push(``);
+ out.push(`typedef int (*${info.prefix}__span_cb)(`);
+ out.push(` ${info.prefix}_t*, const char*, const char*);`);
+ out.push('');
+
+ // Queue span callbacks to be built before `executeSpans()` code gets called
+ // below.
+ compilation.reserveSpans(info.spans);
+
+ const root = info.root as frontend.ContainerWrap<frontend.node.Node>;
+ const rootState = root.get<Node<frontend.node.Node>>(CONTAINER_KEY)
+ .build(compilation);
+
+ compilation.buildGlobals(out);
+ out.push('');
+
+ out.push(`int ${info.prefix}_init(${info.prefix}_t* ${ARG_STATE}) {`);
+ out.push(` memset(${ARG_STATE}, 0, sizeof(*${ARG_STATE}));`);
+ out.push(` ${ARG_STATE}->_current = (void*) (intptr_t) ${rootState};`);
+ out.push(' return 0;');
+ out.push('}');
+ out.push('');
+
+ out.push(`static llparse_state_t ${info.prefix}__run(`);
+ out.push(` ${info.prefix}_t* ${ARG_STATE},`);
+ out.push(` const unsigned char* ${ARG_POS},`);
+ out.push(` const unsigned char* ${ARG_ENDPOS}) {`);
+ out.push(` int ${VAR_MATCH};`);
+ out.push(` switch ((llparse_state_t) (intptr_t) ` +
+ `${compilation.currentField()}) {`);
+
+ let tmp: string[] = [];
+ compilation.buildResumptionStates(tmp);
+ compilation.indent(out, tmp, ' ');
+
+ out.push(' default:');
+ out.push(' /* UNREACHABLE */');
+ out.push(' abort();');
+ out.push(' }');
+
+ tmp = [];
+ compilation.buildInternalStates(tmp);
+ compilation.indent(out, tmp, ' ');
+
+ out.push('}');
+ out.push('');
+
+
+ out.push(`int ${info.prefix}_execute(${info.prefix}_t* ${ARG_STATE}, ` +
+ `const char* ${ARG_POS}, const char* ${ARG_ENDPOS}) {`);
+ out.push(' llparse_state_t next;');
+ out.push('');
+
+ out.push(' /* check lingering errors */');
+ out.push(` if (${compilation.errorField()} != 0) {`);
+ out.push(` return ${compilation.errorField()};`);
+ out.push(' }');
+ out.push('');
+
+ tmp = [];
+ this.restartSpans(compilation, info, tmp);
+ compilation.indent(out, tmp, ' ');
+
+ const args = [
+ compilation.stateArg(),
+ `(const unsigned char*) ${compilation.posArg()}`,
+ `(const unsigned char*) ${compilation.endPosArg()}`,
+ ];
+ out.push(` next = ${info.prefix}__run(${args.join(', ')});`);
+ out.push(` if (next == ${STATE_ERROR}) {`);
+ out.push(` return ${compilation.errorField()};`);
+ out.push(' }');
+ out.push(` ${compilation.currentField()} = (void*) (intptr_t) next;`);
+ out.push('');
+
+ tmp = [];
+ this.executeSpans(compilation, info, tmp);
+ compilation.indent(out, tmp, ' ');
+
+ out.push(' return 0;');
+ out.push('}');
+
+ return out.join('\n');
+ }
+
+ private restartSpans(ctx: Compilation, info: frontend.IFrontendResult,
+ out: string[]): void {
+ if (info.spans.length === 0) {
+ return;
+ }
+
+ out.push('/* restart spans */');
+ for (const span of info.spans) {
+ const posField = ctx.spanPosField(span.index);
+
+ out.push(`if (${posField} != NULL) {`);
+ out.push(` ${posField} = (void*) ${ctx.posArg()};`);
+ out.push('}');
+ }
+ out.push('');
+ }
+
+ private executeSpans(ctx: Compilation, info: frontend.IFrontendResult,
+ out: string[]): void {
+ if (info.spans.length === 0) {
+ return;
+ }
+
+ out.push('/* execute spans */');
+ for (const span of info.spans) {
+ const posField = ctx.spanPosField(span.index);
+ let callback: string;
+ if (span.callbacks.length === 1) {
+ callback = ctx.buildCode(ctx.unwrapCode(span.callbacks[0]));
+ } else {
+ callback = `(${info.prefix}__span_cb) ` + ctx.spanCbField(span.index);
+ callback = `(${callback})`;
+ }
+
+ const args = [
+ ctx.stateArg(), posField, `(const char*) ${ctx.endPosArg()}`,
+ ];
+
+ out.push(`if (${posField} != NULL) {`);
+ out.push(' int error;');
+ out.push('');
+ out.push(` error = ${callback}(${args.join(', ')});`);
+
+ // TODO(indutny): de-duplicate this here and in SpanEnd
+ out.push(' if (error != 0) {');
+ out.push(` ${ctx.errorField()} = error;`);
+ out.push(` ${ctx.errorPosField()} = ${ctx.endPosArg()};`);
+ out.push(' return error;');
+ out.push(' }');
+ out.push('}');
+ }
+ out.push('');
+ }
+}
diff --git a/llparse/src/implementation/c/node/base.ts b/llparse/src/implementation/c/node/base.ts
new file mode 100644
index 0000000..51f90bb
--- /dev/null
+++ b/llparse/src/implementation/c/node/base.ts
@@ -0,0 +1,77 @@
+import * as assert from 'assert';
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import {
+ STATE_PREFIX, LABEL_PREFIX,
+} from '../constants';
+
+export interface INodeEdge {
+ readonly node: frontend.IWrap<frontend.node.Node>;
+ readonly noAdvance: boolean;
+ readonly value?: number;
+}
+
+export abstract class Node<T extends frontend.node.Node> {
+ protected cachedDecl: string | undefined;
+ protected privCompilation: Compilation | undefined;
+
+ constructor(public readonly ref: T) {
+ }
+
+ public build(compilation: Compilation): string {
+ if (this.cachedDecl !== undefined) {
+ return this.cachedDecl;
+ }
+
+ const res = STATE_PREFIX + this.ref.id.name;
+ this.cachedDecl = res;
+
+ this.privCompilation = compilation;
+
+ const out: string[] = [];
+ compilation.debug(out,
+ `Entering node "${this.ref.id.originalName}" ("${this.ref.id.name}")`);
+ this.doBuild(out);
+
+ compilation.addState(res, out);
+
+ return res;
+ }
+
+ protected get compilation(): Compilation {
+ assert(this.privCompilation !== undefined);
+ return this.privCompilation!;
+ }
+
+ protected prologue(out: string[]): void {
+ const ctx = this.compilation;
+
+ out.push(`if (${ctx.posArg()} == ${ctx.endPosArg()}) {`);
+
+ const tmp: string[] = [];
+ this.pause(tmp);
+ this.compilation.indent(out, tmp, ' ');
+
+ out.push('}');
+ }
+
+ protected pause(out: string[]): void {
+ out.push(`return ${this.cachedDecl};`);
+ }
+
+ protected tailTo(out: string[], edge: INodeEdge): void {
+ const ctx = this.compilation;
+ const target = ctx.unwrapNode(edge.node).build(ctx);
+
+ if (!edge.noAdvance) {
+ out.push(`${ctx.posArg()}++;`);
+ }
+ if (edge.value !== undefined) {
+ out.push(`${ctx.matchVar()} = ${edge.value};`);
+ }
+ out.push(`goto ${LABEL_PREFIX}${target};`);
+ }
+
+ protected abstract doBuild(out: string[]): void;
+}
diff --git a/llparse/src/implementation/c/node/consume.ts b/llparse/src/implementation/c/node/consume.ts
new file mode 100644
index 0000000..658a00e
--- /dev/null
+++ b/llparse/src/implementation/c/node/consume.ts
@@ -0,0 +1,48 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Node } from './base';
+
+export class Consume extends Node<frontend.node.Consume> {
+ public doBuild(out: string[]): void {
+ const ctx = this.compilation;
+
+ const index = ctx.stateField(this.ref.field);
+ const ty = ctx.getFieldType(this.ref.field);
+
+ let fieldTy: string;
+ if (ty === 'i64') {
+ fieldTy = 'uint64_t';
+ } else if (ty === 'i32') {
+ fieldTy = 'uint32_t';
+ } else if (ty === 'i16') {
+ fieldTy = 'uint16_t';
+ } else if (ty === 'i8') {
+ fieldTy = 'uint8_t';
+ } else {
+ throw new Error(
+ `Unsupported type ${ty} of field ${this.ref.field} for consume node`);
+ }
+
+ out.push('size_t avail;');
+ out.push(`${fieldTy} need;`);
+
+ out.push('');
+ out.push(`avail = ${ctx.endPosArg()} - ${ctx.posArg()};`);
+ out.push(`need = ${index};`);
+
+ // Note: `avail` or `need` are going to coerced to the largest
+ // datatype needed to hold either of the values.
+ out.push('if (avail >= need) {');
+ out.push(` p += need;`);
+ out.push(` ${index} = 0;`);
+ const tmp: string[] = [];
+ this.tailTo(tmp, this.ref.otherwise!);
+ ctx.indent(out, tmp, ' ');
+ out.push('}');
+ out.push('');
+
+ out.push(`${index} -= avail;`);
+ this.pause(out);
+ }
+}
diff --git a/llparse/src/implementation/c/node/empty.ts b/llparse/src/implementation/c/node/empty.ts
new file mode 100644
index 0000000..e28ecb5
--- /dev/null
+++ b/llparse/src/implementation/c/node/empty.ts
@@ -0,0 +1,16 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Node } from './base';
+
+export class Empty extends Node<frontend.node.Empty> {
+ public doBuild(out: string[]): void {
+ const otherwise = this.ref.otherwise!;
+
+ if (!otherwise.noAdvance) {
+ this.prologue(out);
+ }
+
+ this.tailTo(out, otherwise);
+ }
+}
diff --git a/llparse/src/implementation/c/node/error.ts b/llparse/src/implementation/c/node/error.ts
new file mode 100644
index 0000000..29dce63
--- /dev/null
+++ b/llparse/src/implementation/c/node/error.ts
@@ -0,0 +1,33 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { STATE_ERROR } from '../constants';
+import { Node } from './base';
+
+class ErrorNode<T extends frontend.node.Error> extends Node<T> {
+ protected storeError(out: string[]): void {
+ const ctx = this.compilation;
+
+ let hexCode: string;
+ if (this.ref.code < 0) {
+ hexCode = `-0x` + this.ref.code.toString(16);
+ } else {
+ hexCode = '0x' + this.ref.code.toString(16);
+ }
+
+ out.push(`${ctx.errorField()} = ${hexCode};`);
+ out.push(`${ctx.reasonField()} = ${ctx.cstring(this.ref.reason)};`);
+ out.push(`${ctx.errorPosField()} = (const char*) ${ctx.posArg()};`);
+ }
+
+ public doBuild(out: string[]): void {
+ this.storeError(out);
+
+ // Non-recoverable state
+ out.push(`${this.compilation.currentField()} = ` +
+ `(void*) (intptr_t) ${STATE_ERROR};`);
+ out.push(`return ${STATE_ERROR};`);
+ }
+}
+
+export { ErrorNode as Error };
diff --git a/llparse/src/implementation/c/node/index.ts b/llparse/src/implementation/c/node/index.ts
new file mode 100644
index 0000000..ba751d9
--- /dev/null
+++ b/llparse/src/implementation/c/node/index.ts
@@ -0,0 +1,27 @@
+import * as frontend from 'llparse-frontend';
+
+import { Consume } from './consume';
+import { Empty } from './empty';
+import { Error as ErrorNode } from './error';
+import { Invoke } from './invoke';
+import { Pause } from './pause';
+import { Sequence } from './sequence';
+import { Single } from './single';
+import { SpanEnd } from './span-end';
+import { SpanStart } from './span-start';
+import { TableLookup } from './table-lookup';
+
+export { Node } from './base';
+
+export default {
+ Consume,
+ Empty,
+ Error: class Error extends ErrorNode<frontend.node.Error> {},
+ Invoke,
+ Pause,
+ Sequence,
+ Single,
+ SpanEnd,
+ SpanStart,
+ TableLookup,
+};
diff --git a/llparse/src/implementation/c/node/invoke.ts b/llparse/src/implementation/c/node/invoke.ts
new file mode 100644
index 0000000..ee917e9
--- /dev/null
+++ b/llparse/src/implementation/c/node/invoke.ts
@@ -0,0 +1,44 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Node } from './base';
+
+export class Invoke extends Node<frontend.node.Invoke> {
+ public doBuild(out: string[]): void {
+ const ctx = this.compilation;
+
+ const code = ctx.unwrapCode(this.ref.code);
+ const codeDecl = ctx.buildCode(code);
+
+ const args: string[] = [
+ ctx.stateArg(),
+ ctx.posArg(),
+ ctx.endPosArg(),
+ ];
+
+ const signature = code.ref.signature;
+ if (signature === 'value') {
+ args.push(ctx.matchVar());
+ }
+
+ out.push(`switch (${codeDecl}(${args.join(', ')})) {`);
+ let tmp: string[];
+
+ for (const edge of this.ref.edges) {
+ out.push(` case ${edge.code}:`);
+ tmp = [];
+ this.tailTo(tmp, {
+ noAdvance: true,
+ node: edge.node,
+ value: undefined,
+ });
+ ctx.indent(out, tmp, ' ');
+ }
+
+ out.push(' default:');
+ tmp = [];
+ this.tailTo(tmp, this.ref.otherwise!);
+ ctx.indent(out, tmp, ' ');
+ out.push('}');
+ }
+}
diff --git a/llparse/src/implementation/c/node/pause.ts b/llparse/src/implementation/c/node/pause.ts
new file mode 100644
index 0000000..c239b46
--- /dev/null
+++ b/llparse/src/implementation/c/node/pause.ts
@@ -0,0 +1,19 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { STATE_ERROR } from '../constants';
+import { Error as ErrorNode } from './error';
+
+export class Pause extends ErrorNode<frontend.node.Pause> {
+ public doBuild(out: string[]): void {
+ const ctx = this.compilation;
+
+ this.storeError(out);
+
+ // Recoverable state
+ const otherwise = ctx.unwrapNode(this.ref.otherwise!.node).build(ctx);
+ out.push(`${ctx.currentField()} = ` +
+ `(void*) (intptr_t) ${otherwise};`);
+ out.push(`return ${STATE_ERROR};`);
+ }
+}
diff --git a/llparse/src/implementation/c/node/sequence.ts b/llparse/src/implementation/c/node/sequence.ts
new file mode 100644
index 0000000..73d8816
--- /dev/null
+++ b/llparse/src/implementation/c/node/sequence.ts
@@ -0,0 +1,55 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import {
+ SEQUENCE_COMPLETE, SEQUENCE_MISMATCH, SEQUENCE_PAUSE,
+} from '../constants';
+import { Node } from './base';
+
+export class Sequence extends Node<frontend.node.Sequence> {
+ public doBuild(out: string[]): void {
+ const ctx = this.compilation;
+
+ out.push('llparse_match_t match_seq;');
+ out.push('');
+
+ this.prologue(out);
+
+ const matchSequence = ctx.getMatchSequence(this.ref.transform!,
+ this.ref.select);
+
+ out.push(`match_seq = ${matchSequence}(${ctx.stateArg()}, ` +
+ `${ctx.posArg()}, ` +
+ `${ctx.endPosArg()}, ${ctx.blob(this.ref.select)}, ` +
+ `${this.ref.select.length});`);
+ out.push('p = match_seq.current;');
+
+ let tmp: string[];
+
+ out.push('switch (match_seq.status) {');
+
+ out.push(` case ${SEQUENCE_COMPLETE}: {`);
+ tmp = [];
+ this.tailTo(tmp, {
+ noAdvance: false,
+ node: this.ref.edge!.node,
+ value: this.ref.edge!.value,
+ });
+ ctx.indent(out, tmp, ' ');
+ out.push(' }');
+
+ out.push(` case ${SEQUENCE_PAUSE}: {`);
+ tmp = [];
+ this.pause(tmp);
+ ctx.indent(out, tmp, ' ');
+ out.push(' }');
+
+ out.push(` case ${SEQUENCE_MISMATCH}: {`);
+ tmp = [];
+ this.tailTo(tmp, this.ref.otherwise!);
+ ctx.indent(out, tmp, ' ');
+ out.push(' }');
+
+ out.push('}');
+ }
+}
diff --git a/llparse/src/implementation/c/node/single.ts b/llparse/src/implementation/c/node/single.ts
new file mode 100644
index 0000000..b9c8811
--- /dev/null
+++ b/llparse/src/implementation/c/node/single.ts
@@ -0,0 +1,47 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Node } from './base';
+
+export class Single extends Node<frontend.node.Single> {
+ public doBuild(out: string[]): void {
+ const ctx = this.compilation;
+
+ const otherwise = this.ref.otherwise!;
+
+ this.prologue(out);
+
+ const transform = ctx.unwrapTransform(this.ref.transform!);
+ const current = transform.build(ctx, `*${ctx.posArg()}`);
+
+ out.push(`switch (${current}) {`)
+ this.ref.edges.forEach((edge) => {
+ let ch: string;
+
+ // Non-printable ASCII, or single-quote, or forward slash
+ if (edge.key < 0x20 || edge.key > 0x7e || edge.key === 0x27 ||
+ edge.key === 0x5c) {
+ ch = edge.key.toString();
+ } else {
+ ch = `'${String.fromCharCode(edge.key)}'`;
+ }
+ out.push(` case ${ch}: {`);
+
+ const tmp: string[] = [];
+ this.tailTo(tmp, edge);
+ ctx.indent(out, tmp, ' ');
+
+ out.push(' }');
+ });
+
+ out.push(` default: {`);
+
+ const tmp: string[] = [];
+ this.tailTo(tmp, otherwise);
+ ctx.indent(out, tmp, ' ');
+
+ out.push(' }');
+
+ out.push(`}`);
+ }
+}
diff --git a/llparse/src/implementation/c/node/span-end.ts b/llparse/src/implementation/c/node/span-end.ts
new file mode 100644
index 0000000..09f97e5
--- /dev/null
+++ b/llparse/src/implementation/c/node/span-end.ts
@@ -0,0 +1,56 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { STATE_ERROR } from '../constants';
+import { Node } from './base';
+
+export class SpanEnd extends Node<frontend.node.SpanEnd> {
+ public doBuild(out: string[]): void {
+ out.push('const unsigned char* start;');
+ out.push('int err;');
+ out.push('');
+
+ const ctx = this.compilation;
+ const field = this.ref.field;
+ const posField = ctx.spanPosField(field.index);
+
+ // Load start position
+ out.push(`start = ${posField};`);
+
+ // ...and reset
+ out.push(`${posField} = NULL;`);
+
+ // Invoke callback
+ const callback = ctx.buildCode(ctx.unwrapCode(this.ref.callback));
+ out.push(`err = ${callback}(${ctx.stateArg()}, start, ${ctx.posArg()});`);
+
+ out.push('if (err != 0) {');
+ const tmp: string[] = [];
+ this.buildError(tmp, 'err');
+ ctx.indent(out, tmp, ' ');
+ out.push('}');
+
+ const otherwise = this.ref.otherwise!;
+ this.tailTo(out, otherwise);
+ }
+
+ private buildError(out: string[], code: string) {
+ const ctx = this.compilation;
+
+ out.push(`${ctx.errorField()} = ${code};`);
+
+ const otherwise = this.ref.otherwise!;
+ let resumePos = ctx.posArg();
+ if (!otherwise.noAdvance) {
+ resumePos = `(${resumePos} + 1)`;
+ }
+
+ out.push(`${ctx.errorPosField()} = (const char*) ${resumePos};`);
+
+ const resumptionTarget = ctx.unwrapNode(otherwise.node).build(ctx);
+ out.push(`${ctx.currentField()} = ` +
+ `(void*) (intptr_t) ${resumptionTarget};`);
+
+ out.push(`return ${STATE_ERROR};`);
+ }
+}
diff --git a/llparse/src/implementation/c/node/span-start.ts b/llparse/src/implementation/c/node/span-start.ts
new file mode 100644
index 0000000..445da67
--- /dev/null
+++ b/llparse/src/implementation/c/node/span-start.ts
@@ -0,0 +1,26 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Node } from './base';
+
+export class SpanStart extends Node<frontend.node.SpanStart> {
+ public doBuild(out: string[]): void {
+ // Prevent spurious empty spans
+ this.prologue(out);
+
+ const ctx = this.compilation;
+ const field = this.ref.field;
+
+ const posField = ctx.spanPosField(field.index);
+ out.push(`${posField} = (void*) ${ctx.posArg()};`);
+
+ if (field.callbacks.length > 1) {
+ const cbField = ctx.spanCbField(field.index);
+ const callback = ctx.unwrapCode(this.ref.callback);
+ out.push(`${cbField} = ${ctx.buildCode(callback)};`);
+ }
+
+ const otherwise = this.ref.otherwise!;
+ this.tailTo(out, otherwise);
+ }
+}
diff --git a/llparse/src/implementation/c/node/table-lookup.ts b/llparse/src/implementation/c/node/table-lookup.ts
new file mode 100644
index 0000000..6a400a3
--- /dev/null
+++ b/llparse/src/implementation/c/node/table-lookup.ts
@@ -0,0 +1,196 @@
+import * as assert from 'assert';
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Node } from './base';
+
+const MAX_CHAR = 0xff;
+const TABLE_GROUP = 16;
+
+// _mm_cmpestri takes 8 ranges
+const SSE_RANGES_LEN = 16;
+// _mm_cmpestri takes 128bit input
+const SSE_RANGES_PAD = 16;
+const MAX_SSE_CALLS = 2;
+const SSE_ALIGNMENT = 16;
+
+interface ITable {
+ readonly name: string;
+ readonly declaration: ReadonlyArray<string>;
+}
+
+export class TableLookup extends Node<frontend.node.TableLookup> {
+ public doBuild(out: string[]): void {
+ const ctx = this.compilation;
+
+ const table = this.buildTable();
+ for (const line of table.declaration) {
+ out.push(line);
+ }
+
+ this.prologue(out);
+
+ const transform = ctx.unwrapTransform(this.ref.transform!);
+
+ // Try to vectorize nodes matching characters and looping to themselves
+ // NOTE: `switch` below triggers when there is not enough characters in the
+ // stream for vectorized processing.
+ this.buildSSE(out);
+
+ const current = transform.build(ctx, `*${ctx.posArg()}`);
+ out.push(`switch (${table.name}[(uint8_t) ${current}]) {`);
+
+ for (const [ index, edge ] of this.ref.edges.entries()) {
+ out.push(` case ${index + 1}: {`);
+
+ const tmp: string[] = [];
+ const edge = this.ref.edges[index];
+ this.tailTo(tmp, {
+ noAdvance: edge.noAdvance,
+ node: edge.node,
+ value: undefined,
+ });
+ ctx.indent(out, tmp, ' ');
+
+ out.push(' }');
+ }
+
+ out.push(` default: {`);
+
+ const tmp: string[] = [];
+ this.tailTo(tmp, this.ref.otherwise!);
+ ctx.indent(out, tmp, ' ');
+
+ out.push(' }');
+ out.push('}');
+ }
+
+ private buildSSE(out: string[]): boolean {
+ const ctx = this.compilation;
+
+ // Transformation is not supported atm
+ if (this.ref.transform && this.ref.transform.ref.name !== 'id') {
+ return false;
+ }
+
+ if (this.ref.edges.length !== 1) {
+ return false;
+ }
+
+ const edge = this.ref.edges[0];
+ if (edge.node.ref !== this.ref) {
+ return false;
+ }
+
+ // NOTE: keys are sorted
+ let ranges: number[] = [];
+ let first: number | undefined;
+ let last: number | undefined;
+ for (const key of edge.keys) {
+ if (first === undefined) {
+ first = key;
+ }
+ if (last === undefined) {
+ last = key;
+ }
+
+ if (key - last > 1) {
+ ranges.push(first, last);
+ first = key;
+ }
+ last = key;
+ }
+ if (first !== undefined && last !== undefined) {
+ ranges.push(first, last);
+ }
+
+ if (ranges.length === 0) {
+ return false;
+ }
+
+ // Way too many calls would be required
+ if (ranges.length > MAX_SSE_CALLS * SSE_RANGES_LEN) {
+ return false;
+ }
+
+ out.push('#ifdef __SSE4_2__');
+ out.push(`if (${ctx.endPosArg()} - ${ctx.posArg()} >= 16) {`);
+ out.push(' __m128i ranges;');
+ out.push(' __m128i input;');
+ out.push(' int avail;');
+ out.push(' int match_len;');
+ out.push('');
+ out.push(' /* Load input */');
+ out.push(` input = _mm_loadu_si128((__m128i const*) ${ctx.posArg()});`);
+ for (let off = 0; off < ranges.length; off += SSE_RANGES_LEN) {
+ const subRanges = ranges.slice(off, off + SSE_RANGES_LEN);
+
+ let paddedRanges = subRanges.slice();
+ while (paddedRanges.length < SSE_RANGES_PAD) {
+ paddedRanges.push(0);
+ }
+
+ const blob = ctx.blob(Buffer.from(paddedRanges), SSE_ALIGNMENT);
+ out.push(` ranges = _mm_loadu_si128((__m128i const*) ${blob});`);
+ out.push('');
+
+ out.push(' /* Find first character that does not match `ranges` */');
+ out.push(` match_len = _mm_cmpestri(ranges, ${subRanges.length},`);
+ out.push(' input, 16,');
+ out.push(' _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES |');
+ out.push(' _SIDD_NEGATIVE_POLARITY);');
+ out.push('');
+ out.push(' if (match_len != 0) {');
+ out.push(` ${ctx.posArg()} += match_len;`);
+
+ const tmp: string[] = [];
+ assert.strictEqual(edge.noAdvance, false);
+ this.tailTo(tmp, {
+ noAdvance: true,
+ node: edge.node,
+ });
+ ctx.indent(out, tmp, ' ');
+
+ out.push(' }');
+ }
+
+ {
+ const tmp: string[] = [];
+ this.tailTo(tmp, this.ref.otherwise!);
+ ctx.indent(out, tmp, ' ');
+ }
+ out.push('}');
+
+ out.push('#endif /* __SSE4_2__ */');
+
+ return true;
+ }
+
+ private buildTable(): ITable {
+ const table: number[] = new Array(MAX_CHAR + 1).fill(0);
+
+ for (const [ index, edge ] of this.ref.edges.entries()) {
+ edge.keys.forEach((key) => {
+ assert.strictEqual(table[key], 0);
+ table[key] = index + 1;
+ });
+ }
+
+ const lines = [
+ 'static uint8_t lookup_table[] = {',
+ ];
+ for (let i = 0; i < table.length; i += TABLE_GROUP) {
+ let line = ` ${table.slice(i, i + TABLE_GROUP).join(', ')}`;
+ if (i + TABLE_GROUP < table.length) {
+ line += ',';
+ }
+ lines.push(line);
+ }
+ lines.push('};');
+
+ return {
+ name: 'lookup_table',
+ declaration: lines,
+ };
+ }
+}
diff --git a/llparse/src/implementation/c/transform/base.ts b/llparse/src/implementation/c/transform/base.ts
new file mode 100644
index 0000000..82028d5
--- /dev/null
+++ b/llparse/src/implementation/c/transform/base.ts
@@ -0,0 +1,10 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+
+export abstract class Transform<T extends frontend.transform.Transform> {
+ constructor(public readonly ref: T) {
+ }
+
+ public abstract build(ctx: Compilation, value: string): string;
+}
diff --git a/llparse/src/implementation/c/transform/id.ts b/llparse/src/implementation/c/transform/id.ts
new file mode 100644
index 0000000..6c6105f
--- /dev/null
+++ b/llparse/src/implementation/c/transform/id.ts
@@ -0,0 +1,11 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Transform } from './base';
+
+export class ID extends Transform<frontend.transform.ID> {
+ public build(ctx: Compilation, value: string): string {
+ // Identity transformation
+ return value;
+ }
+}
diff --git a/llparse/src/implementation/c/transform/index.ts b/llparse/src/implementation/c/transform/index.ts
new file mode 100644
index 0000000..c13ba50
--- /dev/null
+++ b/llparse/src/implementation/c/transform/index.ts
@@ -0,0 +1,11 @@
+import { ID } from './id';
+import { ToLower } from './to-lower';
+import { ToLowerUnsafe } from './to-lower-unsafe';
+
+export { Transform } from './base';
+
+export default {
+ ID,
+ ToLower,
+ ToLowerUnsafe,
+};
diff --git a/llparse/src/implementation/c/transform/to-lower-unsafe.ts b/llparse/src/implementation/c/transform/to-lower-unsafe.ts
new file mode 100644
index 0000000..27f608c
--- /dev/null
+++ b/llparse/src/implementation/c/transform/to-lower-unsafe.ts
@@ -0,0 +1,10 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Transform } from './base';
+
+export class ToLowerUnsafe extends Transform<frontend.transform.ToLowerUnsafe> {
+ public build(ctx: Compilation, value: string): string {
+ return `((${value}) | 0x20)`;
+ }
+}
diff --git a/llparse/src/implementation/c/transform/to-lower.ts b/llparse/src/implementation/c/transform/to-lower.ts
new file mode 100644
index 0000000..f639ef1
--- /dev/null
+++ b/llparse/src/implementation/c/transform/to-lower.ts
@@ -0,0 +1,11 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Transform } from './base';
+
+export class ToLower extends Transform<frontend.transform.ToLower> {
+ public build(ctx: Compilation, value: string): string {
+ return `((${value}) >= 'A' && (${value}) <= 'Z' ? ` +
+ `(${value} | 0x20) : (${value}))`;
+ }
+}