diff options
Diffstat (limited to 'llparse/src/implementation/c/node')
-rw-r--r-- | llparse/src/implementation/c/node/base.ts | 77 | ||||
-rw-r--r-- | llparse/src/implementation/c/node/consume.ts | 48 | ||||
-rw-r--r-- | llparse/src/implementation/c/node/empty.ts | 16 | ||||
-rw-r--r-- | llparse/src/implementation/c/node/error.ts | 33 | ||||
-rw-r--r-- | llparse/src/implementation/c/node/index.ts | 27 | ||||
-rw-r--r-- | llparse/src/implementation/c/node/invoke.ts | 44 | ||||
-rw-r--r-- | llparse/src/implementation/c/node/pause.ts | 19 | ||||
-rw-r--r-- | llparse/src/implementation/c/node/sequence.ts | 55 | ||||
-rw-r--r-- | llparse/src/implementation/c/node/single.ts | 47 | ||||
-rw-r--r-- | llparse/src/implementation/c/node/span-end.ts | 56 | ||||
-rw-r--r-- | llparse/src/implementation/c/node/span-start.ts | 26 | ||||
-rw-r--r-- | llparse/src/implementation/c/node/table-lookup.ts | 196 |
12 files changed, 644 insertions, 0 deletions
diff --git a/llparse/src/implementation/c/node/base.ts b/llparse/src/implementation/c/node/base.ts new file mode 100644 index 0000000..51f90bb --- /dev/null +++ b/llparse/src/implementation/c/node/base.ts @@ -0,0 +1,77 @@ +import * as assert from 'assert'; +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { + STATE_PREFIX, LABEL_PREFIX, +} from '../constants'; + +export interface INodeEdge { + readonly node: frontend.IWrap<frontend.node.Node>; + readonly noAdvance: boolean; + readonly value?: number; +} + +export abstract class Node<T extends frontend.node.Node> { + protected cachedDecl: string | undefined; + protected privCompilation: Compilation | undefined; + + constructor(public readonly ref: T) { + } + + public build(compilation: Compilation): string { + if (this.cachedDecl !== undefined) { + return this.cachedDecl; + } + + const res = STATE_PREFIX + this.ref.id.name; + this.cachedDecl = res; + + this.privCompilation = compilation; + + const out: string[] = []; + compilation.debug(out, + `Entering node "${this.ref.id.originalName}" ("${this.ref.id.name}")`); + this.doBuild(out); + + compilation.addState(res, out); + + return res; + } + + protected get compilation(): Compilation { + assert(this.privCompilation !== undefined); + return this.privCompilation!; + } + + protected prologue(out: string[]): void { + const ctx = this.compilation; + + out.push(`if (${ctx.posArg()} == ${ctx.endPosArg()}) {`); + + const tmp: string[] = []; + this.pause(tmp); + this.compilation.indent(out, tmp, ' '); + + out.push('}'); + } + + protected pause(out: string[]): void { + out.push(`return ${this.cachedDecl};`); + } + + protected tailTo(out: string[], edge: INodeEdge): void { + const ctx = this.compilation; + const target = ctx.unwrapNode(edge.node).build(ctx); + + if (!edge.noAdvance) { + out.push(`${ctx.posArg()}++;`); + } + if (edge.value !== undefined) { + out.push(`${ctx.matchVar()} = ${edge.value};`); + } + out.push(`goto ${LABEL_PREFIX}${target};`); + } + + protected abstract doBuild(out: string[]): void; +} diff --git a/llparse/src/implementation/c/node/consume.ts b/llparse/src/implementation/c/node/consume.ts new file mode 100644 index 0000000..658a00e --- /dev/null +++ b/llparse/src/implementation/c/node/consume.ts @@ -0,0 +1,48 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Node } from './base'; + +export class Consume extends Node<frontend.node.Consume> { + public doBuild(out: string[]): void { + const ctx = this.compilation; + + const index = ctx.stateField(this.ref.field); + const ty = ctx.getFieldType(this.ref.field); + + let fieldTy: string; + if (ty === 'i64') { + fieldTy = 'uint64_t'; + } else if (ty === 'i32') { + fieldTy = 'uint32_t'; + } else if (ty === 'i16') { + fieldTy = 'uint16_t'; + } else if (ty === 'i8') { + fieldTy = 'uint8_t'; + } else { + throw new Error( + `Unsupported type ${ty} of field ${this.ref.field} for consume node`); + } + + out.push('size_t avail;'); + out.push(`${fieldTy} need;`); + + out.push(''); + out.push(`avail = ${ctx.endPosArg()} - ${ctx.posArg()};`); + out.push(`need = ${index};`); + + // Note: `avail` or `need` are going to coerced to the largest + // datatype needed to hold either of the values. + out.push('if (avail >= need) {'); + out.push(` p += need;`); + out.push(` ${index} = 0;`); + const tmp: string[] = []; + this.tailTo(tmp, this.ref.otherwise!); + ctx.indent(out, tmp, ' '); + out.push('}'); + out.push(''); + + out.push(`${index} -= avail;`); + this.pause(out); + } +} diff --git a/llparse/src/implementation/c/node/empty.ts b/llparse/src/implementation/c/node/empty.ts new file mode 100644 index 0000000..e28ecb5 --- /dev/null +++ b/llparse/src/implementation/c/node/empty.ts @@ -0,0 +1,16 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Node } from './base'; + +export class Empty extends Node<frontend.node.Empty> { + public doBuild(out: string[]): void { + const otherwise = this.ref.otherwise!; + + if (!otherwise.noAdvance) { + this.prologue(out); + } + + this.tailTo(out, otherwise); + } +} diff --git a/llparse/src/implementation/c/node/error.ts b/llparse/src/implementation/c/node/error.ts new file mode 100644 index 0000000..29dce63 --- /dev/null +++ b/llparse/src/implementation/c/node/error.ts @@ -0,0 +1,33 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { STATE_ERROR } from '../constants'; +import { Node } from './base'; + +class ErrorNode<T extends frontend.node.Error> extends Node<T> { + protected storeError(out: string[]): void { + const ctx = this.compilation; + + let hexCode: string; + if (this.ref.code < 0) { + hexCode = `-0x` + this.ref.code.toString(16); + } else { + hexCode = '0x' + this.ref.code.toString(16); + } + + out.push(`${ctx.errorField()} = ${hexCode};`); + out.push(`${ctx.reasonField()} = ${ctx.cstring(this.ref.reason)};`); + out.push(`${ctx.errorPosField()} = (const char*) ${ctx.posArg()};`); + } + + public doBuild(out: string[]): void { + this.storeError(out); + + // Non-recoverable state + out.push(`${this.compilation.currentField()} = ` + + `(void*) (intptr_t) ${STATE_ERROR};`); + out.push(`return ${STATE_ERROR};`); + } +} + +export { ErrorNode as Error }; diff --git a/llparse/src/implementation/c/node/index.ts b/llparse/src/implementation/c/node/index.ts new file mode 100644 index 0000000..ba751d9 --- /dev/null +++ b/llparse/src/implementation/c/node/index.ts @@ -0,0 +1,27 @@ +import * as frontend from 'llparse-frontend'; + +import { Consume } from './consume'; +import { Empty } from './empty'; +import { Error as ErrorNode } from './error'; +import { Invoke } from './invoke'; +import { Pause } from './pause'; +import { Sequence } from './sequence'; +import { Single } from './single'; +import { SpanEnd } from './span-end'; +import { SpanStart } from './span-start'; +import { TableLookup } from './table-lookup'; + +export { Node } from './base'; + +export default { + Consume, + Empty, + Error: class Error extends ErrorNode<frontend.node.Error> {}, + Invoke, + Pause, + Sequence, + Single, + SpanEnd, + SpanStart, + TableLookup, +}; diff --git a/llparse/src/implementation/c/node/invoke.ts b/llparse/src/implementation/c/node/invoke.ts new file mode 100644 index 0000000..ee917e9 --- /dev/null +++ b/llparse/src/implementation/c/node/invoke.ts @@ -0,0 +1,44 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Node } from './base'; + +export class Invoke extends Node<frontend.node.Invoke> { + public doBuild(out: string[]): void { + const ctx = this.compilation; + + const code = ctx.unwrapCode(this.ref.code); + const codeDecl = ctx.buildCode(code); + + const args: string[] = [ + ctx.stateArg(), + ctx.posArg(), + ctx.endPosArg(), + ]; + + const signature = code.ref.signature; + if (signature === 'value') { + args.push(ctx.matchVar()); + } + + out.push(`switch (${codeDecl}(${args.join(', ')})) {`); + let tmp: string[]; + + for (const edge of this.ref.edges) { + out.push(` case ${edge.code}:`); + tmp = []; + this.tailTo(tmp, { + noAdvance: true, + node: edge.node, + value: undefined, + }); + ctx.indent(out, tmp, ' '); + } + + out.push(' default:'); + tmp = []; + this.tailTo(tmp, this.ref.otherwise!); + ctx.indent(out, tmp, ' '); + out.push('}'); + } +} diff --git a/llparse/src/implementation/c/node/pause.ts b/llparse/src/implementation/c/node/pause.ts new file mode 100644 index 0000000..c239b46 --- /dev/null +++ b/llparse/src/implementation/c/node/pause.ts @@ -0,0 +1,19 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { STATE_ERROR } from '../constants'; +import { Error as ErrorNode } from './error'; + +export class Pause extends ErrorNode<frontend.node.Pause> { + public doBuild(out: string[]): void { + const ctx = this.compilation; + + this.storeError(out); + + // Recoverable state + const otherwise = ctx.unwrapNode(this.ref.otherwise!.node).build(ctx); + out.push(`${ctx.currentField()} = ` + + `(void*) (intptr_t) ${otherwise};`); + out.push(`return ${STATE_ERROR};`); + } +} diff --git a/llparse/src/implementation/c/node/sequence.ts b/llparse/src/implementation/c/node/sequence.ts new file mode 100644 index 0000000..73d8816 --- /dev/null +++ b/llparse/src/implementation/c/node/sequence.ts @@ -0,0 +1,55 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { + SEQUENCE_COMPLETE, SEQUENCE_MISMATCH, SEQUENCE_PAUSE, +} from '../constants'; +import { Node } from './base'; + +export class Sequence extends Node<frontend.node.Sequence> { + public doBuild(out: string[]): void { + const ctx = this.compilation; + + out.push('llparse_match_t match_seq;'); + out.push(''); + + this.prologue(out); + + const matchSequence = ctx.getMatchSequence(this.ref.transform!, + this.ref.select); + + out.push(`match_seq = ${matchSequence}(${ctx.stateArg()}, ` + + `${ctx.posArg()}, ` + + `${ctx.endPosArg()}, ${ctx.blob(this.ref.select)}, ` + + `${this.ref.select.length});`); + out.push('p = match_seq.current;'); + + let tmp: string[]; + + out.push('switch (match_seq.status) {'); + + out.push(` case ${SEQUENCE_COMPLETE}: {`); + tmp = []; + this.tailTo(tmp, { + noAdvance: false, + node: this.ref.edge!.node, + value: this.ref.edge!.value, + }); + ctx.indent(out, tmp, ' '); + out.push(' }'); + + out.push(` case ${SEQUENCE_PAUSE}: {`); + tmp = []; + this.pause(tmp); + ctx.indent(out, tmp, ' '); + out.push(' }'); + + out.push(` case ${SEQUENCE_MISMATCH}: {`); + tmp = []; + this.tailTo(tmp, this.ref.otherwise!); + ctx.indent(out, tmp, ' '); + out.push(' }'); + + out.push('}'); + } +} diff --git a/llparse/src/implementation/c/node/single.ts b/llparse/src/implementation/c/node/single.ts new file mode 100644 index 0000000..b9c8811 --- /dev/null +++ b/llparse/src/implementation/c/node/single.ts @@ -0,0 +1,47 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Node } from './base'; + +export class Single extends Node<frontend.node.Single> { + public doBuild(out: string[]): void { + const ctx = this.compilation; + + const otherwise = this.ref.otherwise!; + + this.prologue(out); + + const transform = ctx.unwrapTransform(this.ref.transform!); + const current = transform.build(ctx, `*${ctx.posArg()}`); + + out.push(`switch (${current}) {`) + this.ref.edges.forEach((edge) => { + let ch: string; + + // Non-printable ASCII, or single-quote, or forward slash + if (edge.key < 0x20 || edge.key > 0x7e || edge.key === 0x27 || + edge.key === 0x5c) { + ch = edge.key.toString(); + } else { + ch = `'${String.fromCharCode(edge.key)}'`; + } + out.push(` case ${ch}: {`); + + const tmp: string[] = []; + this.tailTo(tmp, edge); + ctx.indent(out, tmp, ' '); + + out.push(' }'); + }); + + out.push(` default: {`); + + const tmp: string[] = []; + this.tailTo(tmp, otherwise); + ctx.indent(out, tmp, ' '); + + out.push(' }'); + + out.push(`}`); + } +} diff --git a/llparse/src/implementation/c/node/span-end.ts b/llparse/src/implementation/c/node/span-end.ts new file mode 100644 index 0000000..09f97e5 --- /dev/null +++ b/llparse/src/implementation/c/node/span-end.ts @@ -0,0 +1,56 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { STATE_ERROR } from '../constants'; +import { Node } from './base'; + +export class SpanEnd extends Node<frontend.node.SpanEnd> { + public doBuild(out: string[]): void { + out.push('const unsigned char* start;'); + out.push('int err;'); + out.push(''); + + const ctx = this.compilation; + const field = this.ref.field; + const posField = ctx.spanPosField(field.index); + + // Load start position + out.push(`start = ${posField};`); + + // ...and reset + out.push(`${posField} = NULL;`); + + // Invoke callback + const callback = ctx.buildCode(ctx.unwrapCode(this.ref.callback)); + out.push(`err = ${callback}(${ctx.stateArg()}, start, ${ctx.posArg()});`); + + out.push('if (err != 0) {'); + const tmp: string[] = []; + this.buildError(tmp, 'err'); + ctx.indent(out, tmp, ' '); + out.push('}'); + + const otherwise = this.ref.otherwise!; + this.tailTo(out, otherwise); + } + + private buildError(out: string[], code: string) { + const ctx = this.compilation; + + out.push(`${ctx.errorField()} = ${code};`); + + const otherwise = this.ref.otherwise!; + let resumePos = ctx.posArg(); + if (!otherwise.noAdvance) { + resumePos = `(${resumePos} + 1)`; + } + + out.push(`${ctx.errorPosField()} = (const char*) ${resumePos};`); + + const resumptionTarget = ctx.unwrapNode(otherwise.node).build(ctx); + out.push(`${ctx.currentField()} = ` + + `(void*) (intptr_t) ${resumptionTarget};`); + + out.push(`return ${STATE_ERROR};`); + } +} diff --git a/llparse/src/implementation/c/node/span-start.ts b/llparse/src/implementation/c/node/span-start.ts new file mode 100644 index 0000000..445da67 --- /dev/null +++ b/llparse/src/implementation/c/node/span-start.ts @@ -0,0 +1,26 @@ +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Node } from './base'; + +export class SpanStart extends Node<frontend.node.SpanStart> { + public doBuild(out: string[]): void { + // Prevent spurious empty spans + this.prologue(out); + + const ctx = this.compilation; + const field = this.ref.field; + + const posField = ctx.spanPosField(field.index); + out.push(`${posField} = (void*) ${ctx.posArg()};`); + + if (field.callbacks.length > 1) { + const cbField = ctx.spanCbField(field.index); + const callback = ctx.unwrapCode(this.ref.callback); + out.push(`${cbField} = ${ctx.buildCode(callback)};`); + } + + const otherwise = this.ref.otherwise!; + this.tailTo(out, otherwise); + } +} diff --git a/llparse/src/implementation/c/node/table-lookup.ts b/llparse/src/implementation/c/node/table-lookup.ts new file mode 100644 index 0000000..6a400a3 --- /dev/null +++ b/llparse/src/implementation/c/node/table-lookup.ts @@ -0,0 +1,196 @@ +import * as assert from 'assert'; +import * as frontend from 'llparse-frontend'; + +import { Compilation } from '../compilation'; +import { Node } from './base'; + +const MAX_CHAR = 0xff; +const TABLE_GROUP = 16; + +// _mm_cmpestri takes 8 ranges +const SSE_RANGES_LEN = 16; +// _mm_cmpestri takes 128bit input +const SSE_RANGES_PAD = 16; +const MAX_SSE_CALLS = 2; +const SSE_ALIGNMENT = 16; + +interface ITable { + readonly name: string; + readonly declaration: ReadonlyArray<string>; +} + +export class TableLookup extends Node<frontend.node.TableLookup> { + public doBuild(out: string[]): void { + const ctx = this.compilation; + + const table = this.buildTable(); + for (const line of table.declaration) { + out.push(line); + } + + this.prologue(out); + + const transform = ctx.unwrapTransform(this.ref.transform!); + + // Try to vectorize nodes matching characters and looping to themselves + // NOTE: `switch` below triggers when there is not enough characters in the + // stream for vectorized processing. + this.buildSSE(out); + + const current = transform.build(ctx, `*${ctx.posArg()}`); + out.push(`switch (${table.name}[(uint8_t) ${current}]) {`); + + for (const [ index, edge ] of this.ref.edges.entries()) { + out.push(` case ${index + 1}: {`); + + const tmp: string[] = []; + const edge = this.ref.edges[index]; + this.tailTo(tmp, { + noAdvance: edge.noAdvance, + node: edge.node, + value: undefined, + }); + ctx.indent(out, tmp, ' '); + + out.push(' }'); + } + + out.push(` default: {`); + + const tmp: string[] = []; + this.tailTo(tmp, this.ref.otherwise!); + ctx.indent(out, tmp, ' '); + + out.push(' }'); + out.push('}'); + } + + private buildSSE(out: string[]): boolean { + const ctx = this.compilation; + + // Transformation is not supported atm + if (this.ref.transform && this.ref.transform.ref.name !== 'id') { + return false; + } + + if (this.ref.edges.length !== 1) { + return false; + } + + const edge = this.ref.edges[0]; + if (edge.node.ref !== this.ref) { + return false; + } + + // NOTE: keys are sorted + let ranges: number[] = []; + let first: number | undefined; + let last: number | undefined; + for (const key of edge.keys) { + if (first === undefined) { + first = key; + } + if (last === undefined) { + last = key; + } + + if (key - last > 1) { + ranges.push(first, last); + first = key; + } + last = key; + } + if (first !== undefined && last !== undefined) { + ranges.push(first, last); + } + + if (ranges.length === 0) { + return false; + } + + // Way too many calls would be required + if (ranges.length > MAX_SSE_CALLS * SSE_RANGES_LEN) { + return false; + } + + out.push('#ifdef __SSE4_2__'); + out.push(`if (${ctx.endPosArg()} - ${ctx.posArg()} >= 16) {`); + out.push(' __m128i ranges;'); + out.push(' __m128i input;'); + out.push(' int avail;'); + out.push(' int match_len;'); + out.push(''); + out.push(' /* Load input */'); + out.push(` input = _mm_loadu_si128((__m128i const*) ${ctx.posArg()});`); + for (let off = 0; off < ranges.length; off += SSE_RANGES_LEN) { + const subRanges = ranges.slice(off, off + SSE_RANGES_LEN); + + let paddedRanges = subRanges.slice(); + while (paddedRanges.length < SSE_RANGES_PAD) { + paddedRanges.push(0); + } + + const blob = ctx.blob(Buffer.from(paddedRanges), SSE_ALIGNMENT); + out.push(` ranges = _mm_loadu_si128((__m128i const*) ${blob});`); + out.push(''); + + out.push(' /* Find first character that does not match `ranges` */'); + out.push(` match_len = _mm_cmpestri(ranges, ${subRanges.length},`); + out.push(' input, 16,'); + out.push(' _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES |'); + out.push(' _SIDD_NEGATIVE_POLARITY);'); + out.push(''); + out.push(' if (match_len != 0) {'); + out.push(` ${ctx.posArg()} += match_len;`); + + const tmp: string[] = []; + assert.strictEqual(edge.noAdvance, false); + this.tailTo(tmp, { + noAdvance: true, + node: edge.node, + }); + ctx.indent(out, tmp, ' '); + + out.push(' }'); + } + + { + const tmp: string[] = []; + this.tailTo(tmp, this.ref.otherwise!); + ctx.indent(out, tmp, ' '); + } + out.push('}'); + + out.push('#endif /* __SSE4_2__ */'); + + return true; + } + + private buildTable(): ITable { + const table: number[] = new Array(MAX_CHAR + 1).fill(0); + + for (const [ index, edge ] of this.ref.edges.entries()) { + edge.keys.forEach((key) => { + assert.strictEqual(table[key], 0); + table[key] = index + 1; + }); + } + + const lines = [ + 'static uint8_t lookup_table[] = {', + ]; + for (let i = 0; i < table.length; i += TABLE_GROUP) { + let line = ` ${table.slice(i, i + TABLE_GROUP).join(', ')}`; + if (i + TABLE_GROUP < table.length) { + line += ','; + } + lines.push(line); + } + lines.push('};'); + + return { + name: 'lookup_table', + declaration: lines, + }; + } +} |