summaryrefslogtreecommitdiffstats
path: root/llparse/src/implementation/c/node
diff options
context:
space:
mode:
Diffstat (limited to 'llparse/src/implementation/c/node')
-rw-r--r--llparse/src/implementation/c/node/base.ts77
-rw-r--r--llparse/src/implementation/c/node/consume.ts48
-rw-r--r--llparse/src/implementation/c/node/empty.ts16
-rw-r--r--llparse/src/implementation/c/node/error.ts33
-rw-r--r--llparse/src/implementation/c/node/index.ts27
-rw-r--r--llparse/src/implementation/c/node/invoke.ts44
-rw-r--r--llparse/src/implementation/c/node/pause.ts19
-rw-r--r--llparse/src/implementation/c/node/sequence.ts55
-rw-r--r--llparse/src/implementation/c/node/single.ts47
-rw-r--r--llparse/src/implementation/c/node/span-end.ts56
-rw-r--r--llparse/src/implementation/c/node/span-start.ts26
-rw-r--r--llparse/src/implementation/c/node/table-lookup.ts196
12 files changed, 644 insertions, 0 deletions
diff --git a/llparse/src/implementation/c/node/base.ts b/llparse/src/implementation/c/node/base.ts
new file mode 100644
index 0000000..51f90bb
--- /dev/null
+++ b/llparse/src/implementation/c/node/base.ts
@@ -0,0 +1,77 @@
+import * as assert from 'assert';
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import {
+ STATE_PREFIX, LABEL_PREFIX,
+} from '../constants';
+
+export interface INodeEdge {
+ readonly node: frontend.IWrap<frontend.node.Node>;
+ readonly noAdvance: boolean;
+ readonly value?: number;
+}
+
+export abstract class Node<T extends frontend.node.Node> {
+ protected cachedDecl: string | undefined;
+ protected privCompilation: Compilation | undefined;
+
+ constructor(public readonly ref: T) {
+ }
+
+ public build(compilation: Compilation): string {
+ if (this.cachedDecl !== undefined) {
+ return this.cachedDecl;
+ }
+
+ const res = STATE_PREFIX + this.ref.id.name;
+ this.cachedDecl = res;
+
+ this.privCompilation = compilation;
+
+ const out: string[] = [];
+ compilation.debug(out,
+ `Entering node "${this.ref.id.originalName}" ("${this.ref.id.name}")`);
+ this.doBuild(out);
+
+ compilation.addState(res, out);
+
+ return res;
+ }
+
+ protected get compilation(): Compilation {
+ assert(this.privCompilation !== undefined);
+ return this.privCompilation!;
+ }
+
+ protected prologue(out: string[]): void {
+ const ctx = this.compilation;
+
+ out.push(`if (${ctx.posArg()} == ${ctx.endPosArg()}) {`);
+
+ const tmp: string[] = [];
+ this.pause(tmp);
+ this.compilation.indent(out, tmp, ' ');
+
+ out.push('}');
+ }
+
+ protected pause(out: string[]): void {
+ out.push(`return ${this.cachedDecl};`);
+ }
+
+ protected tailTo(out: string[], edge: INodeEdge): void {
+ const ctx = this.compilation;
+ const target = ctx.unwrapNode(edge.node).build(ctx);
+
+ if (!edge.noAdvance) {
+ out.push(`${ctx.posArg()}++;`);
+ }
+ if (edge.value !== undefined) {
+ out.push(`${ctx.matchVar()} = ${edge.value};`);
+ }
+ out.push(`goto ${LABEL_PREFIX}${target};`);
+ }
+
+ protected abstract doBuild(out: string[]): void;
+}
diff --git a/llparse/src/implementation/c/node/consume.ts b/llparse/src/implementation/c/node/consume.ts
new file mode 100644
index 0000000..658a00e
--- /dev/null
+++ b/llparse/src/implementation/c/node/consume.ts
@@ -0,0 +1,48 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Node } from './base';
+
+export class Consume extends Node<frontend.node.Consume> {
+ public doBuild(out: string[]): void {
+ const ctx = this.compilation;
+
+ const index = ctx.stateField(this.ref.field);
+ const ty = ctx.getFieldType(this.ref.field);
+
+ let fieldTy: string;
+ if (ty === 'i64') {
+ fieldTy = 'uint64_t';
+ } else if (ty === 'i32') {
+ fieldTy = 'uint32_t';
+ } else if (ty === 'i16') {
+ fieldTy = 'uint16_t';
+ } else if (ty === 'i8') {
+ fieldTy = 'uint8_t';
+ } else {
+ throw new Error(
+ `Unsupported type ${ty} of field ${this.ref.field} for consume node`);
+ }
+
+ out.push('size_t avail;');
+ out.push(`${fieldTy} need;`);
+
+ out.push('');
+ out.push(`avail = ${ctx.endPosArg()} - ${ctx.posArg()};`);
+ out.push(`need = ${index};`);
+
+ // Note: `avail` or `need` are going to coerced to the largest
+ // datatype needed to hold either of the values.
+ out.push('if (avail >= need) {');
+ out.push(` p += need;`);
+ out.push(` ${index} = 0;`);
+ const tmp: string[] = [];
+ this.tailTo(tmp, this.ref.otherwise!);
+ ctx.indent(out, tmp, ' ');
+ out.push('}');
+ out.push('');
+
+ out.push(`${index} -= avail;`);
+ this.pause(out);
+ }
+}
diff --git a/llparse/src/implementation/c/node/empty.ts b/llparse/src/implementation/c/node/empty.ts
new file mode 100644
index 0000000..e28ecb5
--- /dev/null
+++ b/llparse/src/implementation/c/node/empty.ts
@@ -0,0 +1,16 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Node } from './base';
+
+export class Empty extends Node<frontend.node.Empty> {
+ public doBuild(out: string[]): void {
+ const otherwise = this.ref.otherwise!;
+
+ if (!otherwise.noAdvance) {
+ this.prologue(out);
+ }
+
+ this.tailTo(out, otherwise);
+ }
+}
diff --git a/llparse/src/implementation/c/node/error.ts b/llparse/src/implementation/c/node/error.ts
new file mode 100644
index 0000000..29dce63
--- /dev/null
+++ b/llparse/src/implementation/c/node/error.ts
@@ -0,0 +1,33 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { STATE_ERROR } from '../constants';
+import { Node } from './base';
+
+class ErrorNode<T extends frontend.node.Error> extends Node<T> {
+ protected storeError(out: string[]): void {
+ const ctx = this.compilation;
+
+ let hexCode: string;
+ if (this.ref.code < 0) {
+ hexCode = `-0x` + this.ref.code.toString(16);
+ } else {
+ hexCode = '0x' + this.ref.code.toString(16);
+ }
+
+ out.push(`${ctx.errorField()} = ${hexCode};`);
+ out.push(`${ctx.reasonField()} = ${ctx.cstring(this.ref.reason)};`);
+ out.push(`${ctx.errorPosField()} = (const char*) ${ctx.posArg()};`);
+ }
+
+ public doBuild(out: string[]): void {
+ this.storeError(out);
+
+ // Non-recoverable state
+ out.push(`${this.compilation.currentField()} = ` +
+ `(void*) (intptr_t) ${STATE_ERROR};`);
+ out.push(`return ${STATE_ERROR};`);
+ }
+}
+
+export { ErrorNode as Error };
diff --git a/llparse/src/implementation/c/node/index.ts b/llparse/src/implementation/c/node/index.ts
new file mode 100644
index 0000000..ba751d9
--- /dev/null
+++ b/llparse/src/implementation/c/node/index.ts
@@ -0,0 +1,27 @@
+import * as frontend from 'llparse-frontend';
+
+import { Consume } from './consume';
+import { Empty } from './empty';
+import { Error as ErrorNode } from './error';
+import { Invoke } from './invoke';
+import { Pause } from './pause';
+import { Sequence } from './sequence';
+import { Single } from './single';
+import { SpanEnd } from './span-end';
+import { SpanStart } from './span-start';
+import { TableLookup } from './table-lookup';
+
+export { Node } from './base';
+
+export default {
+ Consume,
+ Empty,
+ Error: class Error extends ErrorNode<frontend.node.Error> {},
+ Invoke,
+ Pause,
+ Sequence,
+ Single,
+ SpanEnd,
+ SpanStart,
+ TableLookup,
+};
diff --git a/llparse/src/implementation/c/node/invoke.ts b/llparse/src/implementation/c/node/invoke.ts
new file mode 100644
index 0000000..ee917e9
--- /dev/null
+++ b/llparse/src/implementation/c/node/invoke.ts
@@ -0,0 +1,44 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Node } from './base';
+
+export class Invoke extends Node<frontend.node.Invoke> {
+ public doBuild(out: string[]): void {
+ const ctx = this.compilation;
+
+ const code = ctx.unwrapCode(this.ref.code);
+ const codeDecl = ctx.buildCode(code);
+
+ const args: string[] = [
+ ctx.stateArg(),
+ ctx.posArg(),
+ ctx.endPosArg(),
+ ];
+
+ const signature = code.ref.signature;
+ if (signature === 'value') {
+ args.push(ctx.matchVar());
+ }
+
+ out.push(`switch (${codeDecl}(${args.join(', ')})) {`);
+ let tmp: string[];
+
+ for (const edge of this.ref.edges) {
+ out.push(` case ${edge.code}:`);
+ tmp = [];
+ this.tailTo(tmp, {
+ noAdvance: true,
+ node: edge.node,
+ value: undefined,
+ });
+ ctx.indent(out, tmp, ' ');
+ }
+
+ out.push(' default:');
+ tmp = [];
+ this.tailTo(tmp, this.ref.otherwise!);
+ ctx.indent(out, tmp, ' ');
+ out.push('}');
+ }
+}
diff --git a/llparse/src/implementation/c/node/pause.ts b/llparse/src/implementation/c/node/pause.ts
new file mode 100644
index 0000000..c239b46
--- /dev/null
+++ b/llparse/src/implementation/c/node/pause.ts
@@ -0,0 +1,19 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { STATE_ERROR } from '../constants';
+import { Error as ErrorNode } from './error';
+
+export class Pause extends ErrorNode<frontend.node.Pause> {
+ public doBuild(out: string[]): void {
+ const ctx = this.compilation;
+
+ this.storeError(out);
+
+ // Recoverable state
+ const otherwise = ctx.unwrapNode(this.ref.otherwise!.node).build(ctx);
+ out.push(`${ctx.currentField()} = ` +
+ `(void*) (intptr_t) ${otherwise};`);
+ out.push(`return ${STATE_ERROR};`);
+ }
+}
diff --git a/llparse/src/implementation/c/node/sequence.ts b/llparse/src/implementation/c/node/sequence.ts
new file mode 100644
index 0000000..73d8816
--- /dev/null
+++ b/llparse/src/implementation/c/node/sequence.ts
@@ -0,0 +1,55 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import {
+ SEQUENCE_COMPLETE, SEQUENCE_MISMATCH, SEQUENCE_PAUSE,
+} from '../constants';
+import { Node } from './base';
+
+export class Sequence extends Node<frontend.node.Sequence> {
+ public doBuild(out: string[]): void {
+ const ctx = this.compilation;
+
+ out.push('llparse_match_t match_seq;');
+ out.push('');
+
+ this.prologue(out);
+
+ const matchSequence = ctx.getMatchSequence(this.ref.transform!,
+ this.ref.select);
+
+ out.push(`match_seq = ${matchSequence}(${ctx.stateArg()}, ` +
+ `${ctx.posArg()}, ` +
+ `${ctx.endPosArg()}, ${ctx.blob(this.ref.select)}, ` +
+ `${this.ref.select.length});`);
+ out.push('p = match_seq.current;');
+
+ let tmp: string[];
+
+ out.push('switch (match_seq.status) {');
+
+ out.push(` case ${SEQUENCE_COMPLETE}: {`);
+ tmp = [];
+ this.tailTo(tmp, {
+ noAdvance: false,
+ node: this.ref.edge!.node,
+ value: this.ref.edge!.value,
+ });
+ ctx.indent(out, tmp, ' ');
+ out.push(' }');
+
+ out.push(` case ${SEQUENCE_PAUSE}: {`);
+ tmp = [];
+ this.pause(tmp);
+ ctx.indent(out, tmp, ' ');
+ out.push(' }');
+
+ out.push(` case ${SEQUENCE_MISMATCH}: {`);
+ tmp = [];
+ this.tailTo(tmp, this.ref.otherwise!);
+ ctx.indent(out, tmp, ' ');
+ out.push(' }');
+
+ out.push('}');
+ }
+}
diff --git a/llparse/src/implementation/c/node/single.ts b/llparse/src/implementation/c/node/single.ts
new file mode 100644
index 0000000..b9c8811
--- /dev/null
+++ b/llparse/src/implementation/c/node/single.ts
@@ -0,0 +1,47 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Node } from './base';
+
+export class Single extends Node<frontend.node.Single> {
+ public doBuild(out: string[]): void {
+ const ctx = this.compilation;
+
+ const otherwise = this.ref.otherwise!;
+
+ this.prologue(out);
+
+ const transform = ctx.unwrapTransform(this.ref.transform!);
+ const current = transform.build(ctx, `*${ctx.posArg()}`);
+
+ out.push(`switch (${current}) {`)
+ this.ref.edges.forEach((edge) => {
+ let ch: string;
+
+ // Non-printable ASCII, or single-quote, or forward slash
+ if (edge.key < 0x20 || edge.key > 0x7e || edge.key === 0x27 ||
+ edge.key === 0x5c) {
+ ch = edge.key.toString();
+ } else {
+ ch = `'${String.fromCharCode(edge.key)}'`;
+ }
+ out.push(` case ${ch}: {`);
+
+ const tmp: string[] = [];
+ this.tailTo(tmp, edge);
+ ctx.indent(out, tmp, ' ');
+
+ out.push(' }');
+ });
+
+ out.push(` default: {`);
+
+ const tmp: string[] = [];
+ this.tailTo(tmp, otherwise);
+ ctx.indent(out, tmp, ' ');
+
+ out.push(' }');
+
+ out.push(`}`);
+ }
+}
diff --git a/llparse/src/implementation/c/node/span-end.ts b/llparse/src/implementation/c/node/span-end.ts
new file mode 100644
index 0000000..09f97e5
--- /dev/null
+++ b/llparse/src/implementation/c/node/span-end.ts
@@ -0,0 +1,56 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { STATE_ERROR } from '../constants';
+import { Node } from './base';
+
+export class SpanEnd extends Node<frontend.node.SpanEnd> {
+ public doBuild(out: string[]): void {
+ out.push('const unsigned char* start;');
+ out.push('int err;');
+ out.push('');
+
+ const ctx = this.compilation;
+ const field = this.ref.field;
+ const posField = ctx.spanPosField(field.index);
+
+ // Load start position
+ out.push(`start = ${posField};`);
+
+ // ...and reset
+ out.push(`${posField} = NULL;`);
+
+ // Invoke callback
+ const callback = ctx.buildCode(ctx.unwrapCode(this.ref.callback));
+ out.push(`err = ${callback}(${ctx.stateArg()}, start, ${ctx.posArg()});`);
+
+ out.push('if (err != 0) {');
+ const tmp: string[] = [];
+ this.buildError(tmp, 'err');
+ ctx.indent(out, tmp, ' ');
+ out.push('}');
+
+ const otherwise = this.ref.otherwise!;
+ this.tailTo(out, otherwise);
+ }
+
+ private buildError(out: string[], code: string) {
+ const ctx = this.compilation;
+
+ out.push(`${ctx.errorField()} = ${code};`);
+
+ const otherwise = this.ref.otherwise!;
+ let resumePos = ctx.posArg();
+ if (!otherwise.noAdvance) {
+ resumePos = `(${resumePos} + 1)`;
+ }
+
+ out.push(`${ctx.errorPosField()} = (const char*) ${resumePos};`);
+
+ const resumptionTarget = ctx.unwrapNode(otherwise.node).build(ctx);
+ out.push(`${ctx.currentField()} = ` +
+ `(void*) (intptr_t) ${resumptionTarget};`);
+
+ out.push(`return ${STATE_ERROR};`);
+ }
+}
diff --git a/llparse/src/implementation/c/node/span-start.ts b/llparse/src/implementation/c/node/span-start.ts
new file mode 100644
index 0000000..445da67
--- /dev/null
+++ b/llparse/src/implementation/c/node/span-start.ts
@@ -0,0 +1,26 @@
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Node } from './base';
+
+export class SpanStart extends Node<frontend.node.SpanStart> {
+ public doBuild(out: string[]): void {
+ // Prevent spurious empty spans
+ this.prologue(out);
+
+ const ctx = this.compilation;
+ const field = this.ref.field;
+
+ const posField = ctx.spanPosField(field.index);
+ out.push(`${posField} = (void*) ${ctx.posArg()};`);
+
+ if (field.callbacks.length > 1) {
+ const cbField = ctx.spanCbField(field.index);
+ const callback = ctx.unwrapCode(this.ref.callback);
+ out.push(`${cbField} = ${ctx.buildCode(callback)};`);
+ }
+
+ const otherwise = this.ref.otherwise!;
+ this.tailTo(out, otherwise);
+ }
+}
diff --git a/llparse/src/implementation/c/node/table-lookup.ts b/llparse/src/implementation/c/node/table-lookup.ts
new file mode 100644
index 0000000..6a400a3
--- /dev/null
+++ b/llparse/src/implementation/c/node/table-lookup.ts
@@ -0,0 +1,196 @@
+import * as assert from 'assert';
+import * as frontend from 'llparse-frontend';
+
+import { Compilation } from '../compilation';
+import { Node } from './base';
+
+const MAX_CHAR = 0xff;
+const TABLE_GROUP = 16;
+
+// _mm_cmpestri takes 8 ranges
+const SSE_RANGES_LEN = 16;
+// _mm_cmpestri takes 128bit input
+const SSE_RANGES_PAD = 16;
+const MAX_SSE_CALLS = 2;
+const SSE_ALIGNMENT = 16;
+
+interface ITable {
+ readonly name: string;
+ readonly declaration: ReadonlyArray<string>;
+}
+
+export class TableLookup extends Node<frontend.node.TableLookup> {
+ public doBuild(out: string[]): void {
+ const ctx = this.compilation;
+
+ const table = this.buildTable();
+ for (const line of table.declaration) {
+ out.push(line);
+ }
+
+ this.prologue(out);
+
+ const transform = ctx.unwrapTransform(this.ref.transform!);
+
+ // Try to vectorize nodes matching characters and looping to themselves
+ // NOTE: `switch` below triggers when there is not enough characters in the
+ // stream for vectorized processing.
+ this.buildSSE(out);
+
+ const current = transform.build(ctx, `*${ctx.posArg()}`);
+ out.push(`switch (${table.name}[(uint8_t) ${current}]) {`);
+
+ for (const [ index, edge ] of this.ref.edges.entries()) {
+ out.push(` case ${index + 1}: {`);
+
+ const tmp: string[] = [];
+ const edge = this.ref.edges[index];
+ this.tailTo(tmp, {
+ noAdvance: edge.noAdvance,
+ node: edge.node,
+ value: undefined,
+ });
+ ctx.indent(out, tmp, ' ');
+
+ out.push(' }');
+ }
+
+ out.push(` default: {`);
+
+ const tmp: string[] = [];
+ this.tailTo(tmp, this.ref.otherwise!);
+ ctx.indent(out, tmp, ' ');
+
+ out.push(' }');
+ out.push('}');
+ }
+
+ private buildSSE(out: string[]): boolean {
+ const ctx = this.compilation;
+
+ // Transformation is not supported atm
+ if (this.ref.transform && this.ref.transform.ref.name !== 'id') {
+ return false;
+ }
+
+ if (this.ref.edges.length !== 1) {
+ return false;
+ }
+
+ const edge = this.ref.edges[0];
+ if (edge.node.ref !== this.ref) {
+ return false;
+ }
+
+ // NOTE: keys are sorted
+ let ranges: number[] = [];
+ let first: number | undefined;
+ let last: number | undefined;
+ for (const key of edge.keys) {
+ if (first === undefined) {
+ first = key;
+ }
+ if (last === undefined) {
+ last = key;
+ }
+
+ if (key - last > 1) {
+ ranges.push(first, last);
+ first = key;
+ }
+ last = key;
+ }
+ if (first !== undefined && last !== undefined) {
+ ranges.push(first, last);
+ }
+
+ if (ranges.length === 0) {
+ return false;
+ }
+
+ // Way too many calls would be required
+ if (ranges.length > MAX_SSE_CALLS * SSE_RANGES_LEN) {
+ return false;
+ }
+
+ out.push('#ifdef __SSE4_2__');
+ out.push(`if (${ctx.endPosArg()} - ${ctx.posArg()} >= 16) {`);
+ out.push(' __m128i ranges;');
+ out.push(' __m128i input;');
+ out.push(' int avail;');
+ out.push(' int match_len;');
+ out.push('');
+ out.push(' /* Load input */');
+ out.push(` input = _mm_loadu_si128((__m128i const*) ${ctx.posArg()});`);
+ for (let off = 0; off < ranges.length; off += SSE_RANGES_LEN) {
+ const subRanges = ranges.slice(off, off + SSE_RANGES_LEN);
+
+ let paddedRanges = subRanges.slice();
+ while (paddedRanges.length < SSE_RANGES_PAD) {
+ paddedRanges.push(0);
+ }
+
+ const blob = ctx.blob(Buffer.from(paddedRanges), SSE_ALIGNMENT);
+ out.push(` ranges = _mm_loadu_si128((__m128i const*) ${blob});`);
+ out.push('');
+
+ out.push(' /* Find first character that does not match `ranges` */');
+ out.push(` match_len = _mm_cmpestri(ranges, ${subRanges.length},`);
+ out.push(' input, 16,');
+ out.push(' _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES |');
+ out.push(' _SIDD_NEGATIVE_POLARITY);');
+ out.push('');
+ out.push(' if (match_len != 0) {');
+ out.push(` ${ctx.posArg()} += match_len;`);
+
+ const tmp: string[] = [];
+ assert.strictEqual(edge.noAdvance, false);
+ this.tailTo(tmp, {
+ noAdvance: true,
+ node: edge.node,
+ });
+ ctx.indent(out, tmp, ' ');
+
+ out.push(' }');
+ }
+
+ {
+ const tmp: string[] = [];
+ this.tailTo(tmp, this.ref.otherwise!);
+ ctx.indent(out, tmp, ' ');
+ }
+ out.push('}');
+
+ out.push('#endif /* __SSE4_2__ */');
+
+ return true;
+ }
+
+ private buildTable(): ITable {
+ const table: number[] = new Array(MAX_CHAR + 1).fill(0);
+
+ for (const [ index, edge ] of this.ref.edges.entries()) {
+ edge.keys.forEach((key) => {
+ assert.strictEqual(table[key], 0);
+ table[key] = index + 1;
+ });
+ }
+
+ const lines = [
+ 'static uint8_t lookup_table[] = {',
+ ];
+ for (let i = 0; i < table.length; i += TABLE_GROUP) {
+ let line = ` ${table.slice(i, i + TABLE_GROUP).join(', ')}`;
+ if (i + TABLE_GROUP < table.length) {
+ line += ',';
+ }
+ lines.push(line);
+ }
+ lines.push('};');
+
+ return {
+ name: 'lookup_table',
+ declaration: lines,
+ };
+ }
+}