diff options
Diffstat (limited to '')
39 files changed, 1617 insertions, 0 deletions
diff --git a/llparse-builder/src/builder.ts b/llparse-builder/src/builder.ts new file mode 100644 index 0000000..a335a85 --- /dev/null +++ b/llparse-builder/src/builder.ts @@ -0,0 +1,147 @@ +import * as code from './code'; +import * as node from './node'; +import { Property, PropertyType } from './property'; +import { Span } from './span'; +import * as transform from './transform'; + +export { code, node, transform, Property, PropertyType, Span }; +export { Edge } from './edge'; +export { LoopChecker } from './loop-checker'; +export { ISpanAllocatorResult, SpanAllocator } from './span-allocator'; +export { Reachability } from './reachability'; + +/** + * Construct parsing graph for later use in `llparse`. + */ +export class Builder { + /** + * API for creating external callbacks and intrinsic operations. + */ + public readonly code: code.Creator = new code.Creator(); + + /** + * API for creating character transforms for use in nodes created with + * `builder.node()` + */ + public readonly transform: transform.Creator = new transform.Creator(); + + private readonly privProperties: Map<string, Property> = new Map(); + + // Various nodes + + /** + * Create regular node for matching characters and sequences. + * + * @param name Node name + */ + public node(name: string): node.Match { + return new node.Match(name); + } + + /** + * Create terminal error node. Returns error code to user, and sets reason + * in the parser's state object. + * + * This node does not consume any bytes upon execution. + * + * @param errorCode Integer error code + * @param reason Error description + */ + public error(errorCode: number, reason: string): node.Error { + return new node.Error(errorCode, reason); + } + + /** + * Create invoke node that calls either external user callback or an + * intrinsic operation. + * + * This node does not consume any bytes upon execution. + * + * NOTE: When `.invoke()` is a target of `node().select()` - callback must + * have signature that accepts `.select()`'s value, otherwise it must be of + * the signature that takes no such value. + * + * @param fn Code instance to invoke + * @param map Object with integer keys and `Node` values. Describes + * nodes that are visited upon receiving particular + * return integer value + * @param otherwise Convenience `Node` argument. Effect is the same as calling + * `p.invoke(...).otherwise(node)` + */ + public invoke(fn: code.Code, map?: node.IInvokeMap | node.Node, + otherwise?: node.Node): node.Invoke { + let res: node.Invoke; + + // `.invoke(name)` + if (map === undefined) { + res = new node.Invoke(fn, {}); + // `.invoke(name, otherwise)` + } else if (map instanceof node.Node) { + res = new node.Invoke(fn, {}); + otherwise = map; + } else { + res = new node.Invoke(fn, map as node.IInvokeMap); + } + + if (otherwise !== undefined) { + res.otherwise(otherwise); + } + return res; + } + + /** + * Create node that consumes number of bytes specified by value of the + * state's property with name in `field` argument. + * + * @param field Property name to use + */ + public consume(field: string): node.Consume { + return new node.Consume(field); + } + + /** + * Create non-terminal node that returns `errorCode` as error number to + * user, but still allows feeding more data to the parser. + * + * This node does not consume any bytes upon execution. + * + * @param errorCode Integer error code + * @param reason Error description + */ + public pause(errorCode: number, reason: string): node.Pause { + return new node.Pause(errorCode, reason); + } + + // Span + + /** + * Create Span with given `callback`. + * + * @param callback External span callback, must be result of + * `.code.span(...)` + */ + public span(callback: code.Span): Span { + return new Span(callback); + } + + // Custom property API + + /** + * Allocate space for property in parser's state. + */ + public property(ty: PropertyType, name: string): void { + if (this.privProperties.has(name)) { + throw new Error(`Duplicate property with a name: "${name}"`); + } + + const prop = new Property(ty, name); + this.privProperties.set(name, prop); + } + + /** + * Return list of all allocated properties in parser's state. + */ + public get properties(): ReadonlyArray<Property> { + return Array.from(this.privProperties.values()); + } +} diff --git a/llparse-builder/src/code/and.ts b/llparse-builder/src/code/and.ts new file mode 100644 index 0000000..5f78675 --- /dev/null +++ b/llparse-builder/src/code/and.ts @@ -0,0 +1,7 @@ +import { FieldValue } from './field-value'; + +export class And extends FieldValue { + constructor(field: string, value: number) { + super('match', 'and', field, value); + } +} diff --git a/llparse-builder/src/code/base.ts b/llparse-builder/src/code/base.ts new file mode 100644 index 0000000..00b479f --- /dev/null +++ b/llparse-builder/src/code/base.ts @@ -0,0 +1,16 @@ +export type Signature = 'match' | 'value'; + +/** + * Base code class. + */ +export abstract class Code { + /** + * @param signature Code signature to be used. `match` means that code takes + * no input value (from `.select()`), otherwise it must be + * `value` + * @param name External function or intrinsic name. + */ + constructor(public readonly signature: Signature, + public readonly name: string) { + } +} diff --git a/llparse-builder/src/code/creator.ts b/llparse-builder/src/code/creator.ts new file mode 100644 index 0000000..98f9296 --- /dev/null +++ b/llparse-builder/src/code/creator.ts @@ -0,0 +1,184 @@ +import * as code from './'; + +/** + * API for creating external callbacks and intrinsic operations. + */ +export class Creator { + // Callbacks to external C functions + + /** + * Create an external callback that **has no** `value` argument. + * + * This callback can be used in all `Invoke` nodes except those that are + * targets of `.select()` method. + * + * C signature of callback must be: + * + * ```c + * int name(llparse_t* state, const char* p, const char* endp) + * ``` + * + * Where `llparse_t` is parser state's type name. + * + * @param name External function name. + */ + public match(name: string): code.Match { + return new code.Match(name); + } + + /** + * Create an external callback that **has** `value` argument. + * + * This callback can be used only in `Invoke` nodes that are targets of + * `.select()` method. + * + * C signature of callback must be: + * + * ```c + * int name(llparse_t* state, const char* p, const char* endp, int value) + * ``` + * + * Where `llparse_t` is parser state's type name. + * + * @param name External function name. + */ + public value(name: string): code.Value { + return new code.Value(name); + } + + /** + * Create an external span callback. + * + * This callback can be used only in `Span` constructor. + * + * C signature of callback must be: + * + * ```c + * int name(llparse_t* state, const char* p, const char* endp) + * ``` + * + * NOTE: non-zero return value is treated as resumable error. + * + * @param name External function name. + */ + public span(name: string): code.Span { + return new code.Span(name); + } + + // Helpers + + /** + * Intrinsic operation. Stores `value` from `.select()` node into the state's + * property with the name specified by `field`, returns zero. + * + * state[field] = value; + * return 0; + * + * @param field Property name + */ + public store(field: string): code.Store { + return new code.Store(field); + } + + /** + * Intrinsic operation. Loads and returns state's property with the name + * specified by `field`. + * + * The value of the property is either truncated or zero-extended to fit into + * 32-bit unsigned integer. + * + * return state[field]; + * + * @param field Property name. + */ + public load(field: string): code.Load { + return new code.Load(field); + } + + /** + * Intrinsic operation. Takes `value` from `.select()`, state's property + * with the name `field` and does: + * + * field = state[field]; + * field *= options.base; + * field += value; + * state[field] = field; + * return 0; // or 1 on overflow + * + * Return values are: + * + * - 0 - success + * - 1 - overflow + * + * @param field Property name + * @param options See `code.MulAdd` documentation. + */ + public mulAdd(field: string, options: code.IMulAddOptions): code.MulAdd { + return new code.MulAdd(field, options); + } + + /** + * Intrinsic operation. Puts `value` integer into the state's property with + * the name specified by `field`. + * + * state[field] = value; + * return 0; + * + * @param field Property name + * @param value Integer value to be stored into the property. + */ + public update(field: string, value: number): code.Update { + return new code.Update(field, value); + } + + /** + * Intrinsic operation. Returns 1 if the integer `value` is equal to the + * state's property with the name specified by `field`. + * + * return state[field] === value ? 1 : 0; + * + * @param field Property name + * @param value Integer value to be checked against. + */ + public isEqual(field: string, value: number): code.IsEqual { + return new code.IsEqual(field, value); + } + + /** + * Intrinsic operation. + * + * state[field] &= value + * return 0; + * + * @param field Property name + * @param value Integer value + */ + public and(field: string, value: number): code.And { + return new code.And(field, value); + } + + /** + * Intrinsic operation. + * + * state[field] |= value + * return 0; + * + * @param field Property name + * @param value Integer value + */ + public or(field: string, value: number): code.Or { + return new code.Or(field, value); + } + + /** + * Intrinsic operation. + * + * return (state[field] & value) == value ? 1 : 0; + * + * @param field Property name + * @param value Integer value + */ + public test(field: string, value: number): code.Test { + return new code.Test(field, value); + } +} diff --git a/llparse-builder/src/code/field-value.ts b/llparse-builder/src/code/field-value.ts new file mode 100644 index 0000000..2ceea69 --- /dev/null +++ b/llparse-builder/src/code/field-value.ts @@ -0,0 +1,9 @@ +import { Signature } from './base'; +import { Field } from './field'; + +export abstract class FieldValue extends Field { + constructor(signature: Signature, name: string, field: string, + public readonly value: number) { + super(signature, name, field); + } +} diff --git a/llparse-builder/src/code/field.ts b/llparse-builder/src/code/field.ts new file mode 100644 index 0000000..af58c84 --- /dev/null +++ b/llparse-builder/src/code/field.ts @@ -0,0 +1,10 @@ +import * as assert from 'assert'; +import { Code, Signature } from './base'; + +export abstract class Field extends Code { + constructor(signature: Signature, name: string, + public readonly field: string) { + super(signature, name + '_' + field); + assert(!/^_/.test(field), 'Can\'t access internal field from user code'); + } +} diff --git a/llparse-builder/src/code/index.ts b/llparse-builder/src/code/index.ts new file mode 100644 index 0000000..7a651e3 --- /dev/null +++ b/llparse-builder/src/code/index.ts @@ -0,0 +1,15 @@ +export { Code } from './base'; +export { Creator } from './creator'; +export { Field } from './field'; +export { FieldValue } from './field-value'; +export { IsEqual } from './is-equal'; +export { Load } from './load'; +export { Match } from './match'; +export { IMulAddOptions, MulAdd } from './mul-add'; +export { Or } from './or'; +export { And } from './and'; +export { Span } from './span'; +export { Store } from './store'; +export { Test } from './test'; +export { Update } from './update'; +export { Value } from './value'; diff --git a/llparse-builder/src/code/is-equal.ts b/llparse-builder/src/code/is-equal.ts new file mode 100644 index 0000000..91bb957 --- /dev/null +++ b/llparse-builder/src/code/is-equal.ts @@ -0,0 +1,7 @@ +import { FieldValue } from './field-value'; + +export class IsEqual extends FieldValue { + constructor(field: string, value: number) { + super('match', 'is_equal', field, value); + } +} diff --git a/llparse-builder/src/code/load.ts b/llparse-builder/src/code/load.ts new file mode 100644 index 0000000..9f3df2e --- /dev/null +++ b/llparse-builder/src/code/load.ts @@ -0,0 +1,7 @@ +import { Field } from './field'; + +export class Load extends Field { + constructor(field: string) { + super('match', 'load', field); + } +} diff --git a/llparse-builder/src/code/match.ts b/llparse-builder/src/code/match.ts new file mode 100644 index 0000000..631376a --- /dev/null +++ b/llparse-builder/src/code/match.ts @@ -0,0 +1,7 @@ +import { Code } from './base'; + +export class Match extends Code { + constructor(name: string) { + super('match', name); + } +} diff --git a/llparse-builder/src/code/mul-add.ts b/llparse-builder/src/code/mul-add.ts new file mode 100644 index 0000000..fd648ed --- /dev/null +++ b/llparse-builder/src/code/mul-add.ts @@ -0,0 +1,28 @@ +import { Field } from './field'; + +/** + * Options for `code.mulAdd()`. + */ +export interface IMulAddOptions { + /** Value to multiply the property with in the first step */ + readonly base: number; + + /** + * Maximum value of the property. If at any point of computation the + * intermediate result exceeds it - `mulAdd` returns 1 (overflow). + */ + readonly max?: number; + + /** + * If `true` - all arithmetics perfomed by `mulAdd` will be signed. + * + * Default value: `false` + */ + readonly signed?: boolean; +} + +export class MulAdd extends Field { + constructor(field: string, public readonly options: IMulAddOptions) { + super('value', 'mul_add', field); + } +} diff --git a/llparse-builder/src/code/or.ts b/llparse-builder/src/code/or.ts new file mode 100644 index 0000000..33bd402 --- /dev/null +++ b/llparse-builder/src/code/or.ts @@ -0,0 +1,7 @@ +import { FieldValue } from './field-value'; + +export class Or extends FieldValue { + constructor(field: string, value: number) { + super('match', 'or', field, value); + } +} diff --git a/llparse-builder/src/code/span.ts b/llparse-builder/src/code/span.ts new file mode 100644 index 0000000..b97e09e --- /dev/null +++ b/llparse-builder/src/code/span.ts @@ -0,0 +1,5 @@ +import { Match } from './match'; + +export class Span extends Match { + // no-op +} diff --git a/llparse-builder/src/code/store.ts b/llparse-builder/src/code/store.ts new file mode 100644 index 0000000..84abfef --- /dev/null +++ b/llparse-builder/src/code/store.ts @@ -0,0 +1,7 @@ +import { Field } from './field'; + +export class Store extends Field { + constructor(field: string) { + super('value', 'store', field); + } +} diff --git a/llparse-builder/src/code/test.ts b/llparse-builder/src/code/test.ts new file mode 100644 index 0000000..a9d0a22 --- /dev/null +++ b/llparse-builder/src/code/test.ts @@ -0,0 +1,7 @@ +import { FieldValue } from './field-value'; + +export class Test extends FieldValue { + constructor(field: string, value: number) { + super('match', 'test', field, value); + } +} diff --git a/llparse-builder/src/code/update.ts b/llparse-builder/src/code/update.ts new file mode 100644 index 0000000..de62476 --- /dev/null +++ b/llparse-builder/src/code/update.ts @@ -0,0 +1,7 @@ +import { FieldValue } from './field-value'; + +export class Update extends FieldValue { + constructor(field: string, value: number) { + super('match', 'update', field, value); + } +} diff --git a/llparse-builder/src/code/value.ts b/llparse-builder/src/code/value.ts new file mode 100644 index 0000000..06c6fd7 --- /dev/null +++ b/llparse-builder/src/code/value.ts @@ -0,0 +1,7 @@ +import { Code } from './base'; + +export class Value extends Code { + constructor(name: string) { + super('value', name); + } +} diff --git a/llparse-builder/src/edge.ts b/llparse-builder/src/edge.ts new file mode 100644 index 0000000..f6b55cc --- /dev/null +++ b/llparse-builder/src/edge.ts @@ -0,0 +1,54 @@ +import * as assert from 'assert'; + +import { Buffer } from 'buffer'; +import { Invoke, Node } from './node'; + +/** + * This class represents an edge in the parser graph. + */ +export class Edge { + /** + * Comparator for `.sort()` function. + */ + public static compare(a: Edge, b: Edge): number { + if (typeof a.key === 'number') { + return a.key - (b.key as number); + } + return a.key!.compare(b.key as Buffer); + } + + /** + * @param node Edge target + * @param noAdvance If `true` - the parent should not consume bytes before + * moving to the target `node` + * @param key `Buffer` for `node.Match`, `number` for `node.Invoke`, + * `undefined` for edges created with `.otherwise()` + * @param value `.select()` value associated with the edge + */ + constructor(public readonly node: Node, + public readonly noAdvance: boolean, + public readonly key: Buffer | number | undefined, + public readonly value: number | undefined) { + if (node instanceof Invoke) { + if (value === undefined) { + assert.strictEqual(node.code.signature, 'match', + 'Invalid Invoke\'s code signature'); + } else { + assert.strictEqual(node.code.signature, 'value', + 'Invalid Invoke\'s code signature'); + } + } else { + assert.strictEqual(value, undefined, + 'Attempting to pass value to non-Invoke node'); + } + + if (Buffer.isBuffer(key)) { + assert(key.length > 0, 'Invalid edge buffer length'); + + if (noAdvance) { + assert.strictEqual(key.length, 1, + 'Only 1-char keys are allowed in `noAdvance` edges'); + } + } + } +} diff --git a/llparse-builder/src/loop-checker/index.ts b/llparse-builder/src/loop-checker/index.ts new file mode 100644 index 0000000..5751955 --- /dev/null +++ b/llparse-builder/src/loop-checker/index.ts @@ -0,0 +1,205 @@ +import * as assert from 'assert'; +import * as debugAPI from 'debug'; + +import { Node } from '../node'; +import { Reachability } from '../reachability'; +import { Lattice } from './lattice'; + +const debug = debugAPI('llparse-builder:loop-checker'); + +const EMPTY_VALUE = new Lattice('empty'); +const ANY_VALUE = new Lattice('any'); + +/** + * This class implements a loop checker pass. The goal of this pass is to verify + * that the graph doesn't contain infinite loops. + */ +export class LoopChecker { + private readonly lattice: Map<Node, Lattice> = new Map(); + + // Just a cache of terminated keys + private readonly terminatedCache: Map<Node, Lattice> = new Map(); + + /** + * Run loop checker pass on a graph starting from `root`. + * + * Throws on failure. + * + * @param root Graph root node + */ + public check(root: Node): void { + const r = new Reachability(); + + const nodes = r.build(root); + + for (const node of nodes) { + debug('checking loops starting from %j', node.name); + + // Set initial lattice value for all nodes + this.clear(nodes); + + // Mark root as reachable with any value + this.lattice.set(node, ANY_VALUE); + + // Raise lattice values + let changed: Set<Node> = new Set([ root ]); + while (changed.size !== 0) { + if (debug.enabled) { + debug('changed %j', Array.from(changed).map((other) => other.name)); + } + + const next: Set<Node> = new Set(); + for (const changedNode of changed) { + this.propagate(changedNode, next); + } + changed = next; + } + + debug('lattice stabilized'); + + // Visit nodes and walk through reachable edges to detect loops + this.visit(node, []); + } + } + + private clear(nodes: ReadonlyArray<Node>): void { + for (const node of nodes) { + this.lattice.set(node, EMPTY_VALUE); + } + } + + private propagate(node: Node, changed: Set<Node>): void { + let value: Lattice = this.lattice.get(node)!; + debug('propagate(%j), initial value %j', node.name, value); + + // Terminate values that are consumed by `match`/`select` + const terminated = this.terminate(node, value, changed); + if (!terminated.isEqual(EMPTY_VALUE)) { + debug('node %j terminates %j', node.name, terminated); + value = value.subtract(terminated); + if (value.isEqual(EMPTY_VALUE)) { + return; + } + } + + const keysByTarget: Map<Node, Lattice> = new Map(); + // Propagate value through `.peek()`/`.otherwise()` edges + for (const edge of node.getAllEdges()) { + if (!edge.noAdvance) { + continue; + } + + let targetValue: Lattice; + if (keysByTarget.has(edge.node)) { + targetValue = keysByTarget.get(edge.node)!; + } else { + targetValue = this.lattice.get(edge.node)!; + } + + // `otherwise` or `Invoke`'s edges + if (edge.key === undefined || typeof edge.key === 'number') { + targetValue = targetValue.union(value); + } else { + // `.peek()` + const edgeValue = new Lattice([ edge.key[0] ]).intersect(value); + if (edgeValue.isEqual(EMPTY_VALUE)) { + continue; + } + + targetValue = targetValue.union(edgeValue); + } + + keysByTarget.set(edge.node, targetValue); + } + + for (const [ child, childValue ] of keysByTarget) { + debug('node %j propagates %j to %j', node.name, childValue, + child.name); + this.update(child, childValue, changed); + } + } + + private update(node: Node, newValue: Lattice, changed: Set<Node>): boolean { + const value = this.lattice.get(node)!; + if (newValue.isEqual(value)) { + return false; + } + + this.lattice.set(node, newValue); + changed.add(node); + return true; + } + + private terminate(node: Node, value: Lattice, changed: Set<Node>): Lattice { + if (this.terminatedCache.has(node)) { + return this.terminatedCache.get(node)!; + } + + const terminated: number[] = []; + for (const edge of node.getAllEdges()) { + if (edge.noAdvance) { + continue; + } + + // Ignore `otherwise` and `Invoke`'s edges + if (edge.key === undefined || typeof edge.key === 'number') { + continue; + } + + terminated.push(edge.key[0]); + } + + const result = new Lattice(terminated); + this.terminatedCache.set(node, result); + return result; + } + + private visit(node: Node, path: ReadonlyArray<Node>): void { + let value = this.lattice.get(node)!; + debug('enter %j, value is %j', node.name, value); + + const terminated = this.terminatedCache.has(node) ? + this.terminatedCache.get(node)! : EMPTY_VALUE; + if (!terminated.isEqual(EMPTY_VALUE)) { + debug('subtract terminated %j', terminated); + value = value.subtract(terminated); + if (value.isEqual(EMPTY_VALUE)) { + debug('terminated everything'); + return; + } + } + + for (const edge of node.getAllEdges()) { + if (!edge.noAdvance) { + continue; + } + + let edgeValue = value; + + // `otherwise` or `Invoke`'s edges + if (edge.key === undefined || typeof edge.key === 'number') { + // nothing to do + // `.peek()` + } else { + edgeValue = edgeValue.intersect(new Lattice([ edge.key[0] ])); + } + + // Ignore unreachable edges + if (edgeValue.isEqual(EMPTY_VALUE)) { + continue; + } + if (path.indexOf(edge.node) !== -1) { + if (path.length === 0) { + throw new Error( + `Detected loop in "${edge.node.name}" through "${edge.node.name}"`); + } + throw new Error( + `Detected loop in "${edge.node.name}" through chain ` + + `${path.map((parent) => '"' + parent.name + '"').join(' -> ')}`); + } + this.visit(edge.node, path.concat(edge.node)); + } + + debug('leave %j', node.name); + } +} diff --git a/llparse-builder/src/loop-checker/lattice.ts b/llparse-builder/src/loop-checker/lattice.ts new file mode 100644 index 0000000..8d2a7fe --- /dev/null +++ b/llparse-builder/src/loop-checker/lattice.ts @@ -0,0 +1,115 @@ +import * as assert from 'assert'; + +const MAX_VALUE = 256; +const WORD_SIZE = 32; +const SIZE = (MAX_VALUE / WORD_SIZE) | 0; +const WORD_FILL = -1 | 0; + +assert.strictEqual(MAX_VALUE % WORD_SIZE, 0); + +export type LatticeValue = 'empty' | ReadonlyArray<number> | 'any'; + +/** + * A fixed-size bitfield, really + */ +export class Lattice { + protected readonly words: number[]; + + constructor(value: LatticeValue) { + this.words = new Array(SIZE).fill(value === 'any' ? WORD_FILL : 0); + + if (Array.isArray(value)) { + for (const single of value) { + this.add(single); + } + } + } + + public check(bit: number): boolean { + assert(0 <= bit && bit < MAX_VALUE, 'Invalid bit'); + const index = (bit / WORD_SIZE) | 0; + const off = bit % WORD_SIZE; + return (this.words[index] & (1 << off)) !== 0; + } + + public union(other: Lattice): Lattice { + const result = new Lattice('empty'); + + for (let i = 0; i < SIZE; i++) { + result.words[i] = this.words[i] | other.words[i]; + } + + return result; + } + + public intersect(other: Lattice): Lattice { + const result = new Lattice('empty'); + + for (let i = 0; i < SIZE; i++) { + result.words[i] = this.words[i] & other.words[i]; + } + + return result; + } + + public subtract(other: Lattice): Lattice { + const result = new Lattice('empty'); + + for (let i = 0; i < SIZE; i++) { + result.words[i] = this.words[i] & (~other.words[i]); + } + + return result; + } + + public isEqual(other: Lattice): boolean { + if (this === other) { + return true; + } + + for (let i = 0; i < SIZE; i++) { + if (this.words[i] !== other.words[i]) { + return false; + } + } + return true; + } + + public *[Symbol.iterator](): Iterator<number> { + // TODO(indutny): improve speed if needed + for (let i = 0; i < MAX_VALUE; i++) { + if (this.check(i)) { + yield i; + } + } + } + + public toJSON(): any { + let isEmpty = true; + let isFull = true; + for (let i = 0; i < SIZE; i++) { + if (this.words[i] !== 0) { + isEmpty = false; + } + if (this.words[i] !== WORD_FILL) { + isFull = false; + } + } + if (isEmpty) { + return 'empty'; + } + if (isFull) { + return 'any'; + } + return Array.from(this); + } + + // Private + + private add(bit: number): void { + assert(0 <= bit && bit < MAX_VALUE, 'Invalid bit'); + const index = (bit / WORD_SIZE) | 0; + const off = bit % WORD_SIZE; + this.words[index] |= 1 << off; + } +} diff --git a/llparse-builder/src/node/base.ts b/llparse-builder/src/node/base.ts new file mode 100644 index 0000000..9840f16 --- /dev/null +++ b/llparse-builder/src/node/base.ts @@ -0,0 +1,96 @@ +import * as assert from 'assert'; +import binarySearch = require('binary-search'); +import { Edge } from '../edge'; + +/** + * Base class for all graph nodes. + */ +export abstract class Node { + private otherwiseEdge: Edge | undefined; + private privEdges: Edge[] = []; + + /** + * @param name Node name + */ + constructor(public readonly name: string) { + // no-op + } + + /** + * Create an otherwise edge to node `node`. + * + * This edge is executed when no other edges match current input. No + * characters are consumed upon transition. + * + * NOTE: At most one otherwise (skipping or not) edge can be set, most nodes + * except `Error` require it. + * + * @param node Target node + */ + public otherwise(node: Node): this { + if (this.otherwiseEdge !== undefined) { + throw new Error('Node already has `otherwise` or `skipTo`'); + } + + this.otherwiseEdge = new Edge(node, true, undefined, undefined); + return this; + } + + /** + * Create a skipping otherwise edge to node `node`. + * + * This edge is executed when no other edges match current input. Single + * character is consumed upon transition. + * + * NOTE: At most one otherwise (skipping or not) edge can be set, most nodes + * except `Error` require it. + * + * @param node Target node + */ + public skipTo(node: Node): this { + if (this.otherwiseEdge !== undefined) { + throw new Error('Node already has `otherwise` or `skipTo`'); + } + + this.otherwiseEdge = new Edge(node, false, undefined, undefined); + return this; + } + + // Limited public use + + /** Get otherwise edge. */ + public getOtherwiseEdge(): Edge | undefined { + return this.otherwiseEdge; + } + + /** Get list of all non-otherwise edges. */ + public getEdges(): ReadonlyArray<Edge> { + return this.privEdges; + } + + /** Get list of all edges (including otherwise, if present). */ + public getAllEdges(): ReadonlyArray<Edge> { + const res = this.privEdges; + if (this.otherwiseEdge === undefined) { + return res; + } else { + return res.concat(this.otherwiseEdge); + } + } + + /** Get iterator through all non-otherwise edges. */ + public *[Symbol.iterator](): Iterator<Edge> { + yield* this.privEdges; + } + + // Internal + + protected addEdge(edge: Edge): void { + assert.notStrictEqual(edge.key, undefined); + + const index = binarySearch(this.privEdges, edge, Edge.compare); + assert(index < 0, 'Attempting to create duplicate edge'); + + this.privEdges.splice(-1 - index, 0, edge); + } +} diff --git a/llparse-builder/src/node/consume.ts b/llparse-builder/src/node/consume.ts new file mode 100644 index 0000000..eff4037 --- /dev/null +++ b/llparse-builder/src/node/consume.ts @@ -0,0 +1,19 @@ +import * as assert from 'assert'; +import { Node } from './base'; + +/** + * This node consumes number of characters specified by state's property with + * name `field` from the input, and forwards execution to `otherwise` node. + */ +export class Consume extends Node { + /** + * @param field State's property name + */ + constructor(public readonly field: string) { + super('consume_' + field); + + if (/^_/.test(field)) { + throw new Error(`Can't use internal field in \`consume()\`: "${field}"`); + } + } +} diff --git a/llparse-builder/src/node/error.ts b/llparse-builder/src/node/error.ts new file mode 100644 index 0000000..393f566 --- /dev/null +++ b/llparse-builder/src/node/error.ts @@ -0,0 +1,24 @@ +import * as assert from 'assert'; +import { Node } from './base'; + +/** + * This node terminates the execution with an error + */ +class NodeError extends Node { + /** + * @param code Error code to return to user + * @param reason Error description to store in parser's state + */ + constructor(public readonly code: number, public readonly reason: string) { + super('error'); + assert.strictEqual(code, code | 0, 'code must be integer'); + } + + /** `.otherwise()` is not supported on this type of node */ + public otherwise(node: Node): this { throw new Error('Not supported'); } + + /** `.skipTo()` is not supported on this type of node */ + public skipTo(node: Node): this { throw new Error('Not supported'); } +} + +export { NodeError as Error }; diff --git a/llparse-builder/src/node/index.ts b/llparse-builder/src/node/index.ts new file mode 100644 index 0000000..e3d5fe5 --- /dev/null +++ b/llparse-builder/src/node/index.ts @@ -0,0 +1,8 @@ +export { Node } from './base'; +export { Consume } from './consume'; +export { Error } from './error'; +export { Invoke, IInvokeMap } from './invoke'; +export { Match } from './match'; +export { Pause } from './pause'; +export { SpanStart } from './span-start'; +export { SpanEnd } from './span-end'; diff --git a/llparse-builder/src/node/invoke.ts b/llparse-builder/src/node/invoke.ts new file mode 100644 index 0000000..d6791a7 --- /dev/null +++ b/llparse-builder/src/node/invoke.ts @@ -0,0 +1,39 @@ +import * as assert from 'assert'; + +import { Code } from '../code'; +import { Edge } from '../edge'; +import { Node } from './base'; + +/** + * Map of return codes of the callback. Each key is a return code, + * value is the target node that must be executed upon getting such return code. + */ +export interface IInvokeMap { + readonly [key: number]: Node; +} + +/** + * This node invokes either external callback or intrinsic code and passes the + * execution to either a target from a `map` (if the return code matches one of + * registered in it), or to `otherwise` node. + */ +export class Invoke extends Node { + /** + * @param code External callback or intrinsic code. Can be created with + * `builder.code.*()` methods. + * @param map Map from callback return codes to target nodes + */ + constructor(public readonly code: Code, map: IInvokeMap) { + super('invoke_' + code.name); + + Object.keys(map).forEach((mapKey) => { + const numKey: number = parseInt(mapKey, 10); + const targetNode = map[numKey]!; + + assert.strictEqual(numKey, numKey | 0, + 'Invoke\'s map keys must be integers'); + + this.addEdge(new Edge(targetNode, true, numKey, undefined)); + }); + } +} diff --git a/llparse-builder/src/node/match.ts b/llparse-builder/src/node/match.ts new file mode 100644 index 0000000..617a659 --- /dev/null +++ b/llparse-builder/src/node/match.ts @@ -0,0 +1,162 @@ +import * as assert from 'assert'; +import { Buffer } from 'buffer'; + +import { Edge } from '../edge'; +import { Transform } from '../transform'; +import { toBuffer } from '../utils'; +import { Node } from './base'; + +/** + * Character/sequence to match. + * + * May have following types: + * + * * `number` - for single character + * * `string` - for printable character sequence + * * `Buffer` - for raw byte sequence + */ +export type MatchSingleValue = string | number | Buffer; + +/** + * Convenience type for passing several characters/sequences to match methods. + */ +export type MatchValue = MatchSingleValue | ReadonlyArray<MatchSingleValue>; + +/** + * A map from characters/sequences to `.select()`'s values. Used for specifying + * the value to be passed to `.select()'`s targets. + */ +export interface IMatchSelect { + readonly [key: string]: number; +} + +/** + * This node matches characters/sequences and forwards the execution according + * to matched character with optional attached value (See `.select()`). + */ +export class Match extends Node { + private transformFn: Transform | undefined; + + /** + * Set character transformation function. + * + * @param transform Transformation to apply. Can be created with + * `builder.transform.*()` methods. + */ + public transform(transformFn: Transform): this { + this.transformFn = transformFn; + return this; + } + + /** + * Match sequence/character and forward execution to `next` on success, + * consuming matched bytes of the input. + * + * No value is attached on such execution forwarding, and the target node + * **must not** be an `Invoke` node with a callback expecting the value. + * + * @param value Sequence/character to be matched + * @param next Target node to be executed on success. + */ + public match(value: MatchValue, next: Node): this { + if (Array.isArray(value)) { + for (const subvalue of value) { + this.match(subvalue, next); + } + return this; + } + + const buffer = toBuffer(value as MatchSingleValue); + const edge = new Edge(next, false, buffer, undefined); + this.addEdge(edge); + return this; + } + + /** + * Match character and forward execution to `next` on success + * without consuming one byte of the input. + * + * No value is attached on such execution forwarding, and the target node + * **must not** be an `Invoke` with a callback expecting the value. + * + * @param value Character to be matched + * @param next Target node to be executed on success. + */ + public peek(value: MatchValue, next: Node): this { + if (Array.isArray(value)) { + for (const subvalue of value) { + this.peek(subvalue, next); + } + return this; + } + + const buffer = toBuffer(value as MatchSingleValue); + assert.strictEqual(buffer.length, 1, + '`.peek()` accepts only single character keys'); + + const edge = new Edge(next, true, buffer, undefined); + this.addEdge(edge); + return this; + } + + /** + * Match character/sequence and forward execution to `next` on success + * consumed matched bytes of the input. + * + * Value is attached on such execution forwarding, and the target node + * **must** be an `Invoke` with a callback expecting the value. + * + * Possible signatures: + * + * * `.select(key, value [, next ])` + * * `.select({ key: value } [, next])` + * + * @param keyOrMap Either a sequence to match, or a map from sequences to + * values + * @param valueOrNext Either an integer value to be forwarded to the target + * node, or an otherwise node + * @param next Convenience param. Same as calling `.otherwise(...)` + */ + public select(keyOrMap: MatchSingleValue | IMatchSelect, + valueOrNext?: number | Node, next?: Node): this { + // .select({ key: value, ... }, next) + if (typeof keyOrMap === 'object') { + assert(valueOrNext instanceof Node, + 'Invalid `next` argument of `.select()`'); + assert.strictEqual(next, undefined, + 'Invalid argument count of `.select()`'); + + const map: IMatchSelect = keyOrMap as IMatchSelect; + next = valueOrNext as Node | undefined; + + Object.keys(map).forEach((mapKey) => { + const numKey: number = mapKey as any; + + this.select(numKey, map[numKey]!, next); + }); + return this; + } + + // .select(key, value, next) + assert.strictEqual(typeof valueOrNext, 'number', + 'Invalid `value` argument of `.select()`'); + assert.notStrictEqual(next, undefined, + 'Invalid `next` argument of `.select()`'); + + const key = toBuffer(keyOrMap as MatchSingleValue); + const value = valueOrNext as number; + + const edge = new Edge(next!, false, key, value); + this.addEdge(edge); + return this; + } + + // Limited public use + + /** + * Get tranformation function + */ + public getTransform(): Transform | undefined { + return this.transformFn; + } +} diff --git a/llparse-builder/src/node/pause.ts b/llparse-builder/src/node/pause.ts new file mode 100644 index 0000000..2dcf5d1 --- /dev/null +++ b/llparse-builder/src/node/pause.ts @@ -0,0 +1,25 @@ +import * as assert from 'assert'; +import { Node } from './base'; + +/** + * This returns the specified error code, but makes the resumption to + * `otherwise` target possible. + */ +export class Pause extends Node { + /** + * @param code Error code to return + * @param reason Error description + */ + constructor(public readonly code: number, public readonly reason: string) { + super('pause'); + assert.strictEqual(code, code | 0, 'code must be integer'); + } + + /** + * `.skipTo()` is not supported on this type of node, please use + * `.otherwise()` + */ + public skipTo(node: Node): this { + throw new Error('Not supported, please use `pause.otherwise()`'); + } +} diff --git a/llparse-builder/src/node/span-end.ts b/llparse-builder/src/node/span-end.ts new file mode 100644 index 0000000..377cd73 --- /dev/null +++ b/llparse-builder/src/node/span-end.ts @@ -0,0 +1,19 @@ +import { Span } from '../span'; +import { Node } from './base'; + +/** + * Indicates span end. + * + * A callback will be invoked with all input data since the most recent of: + * + * * Span start invocation + * * Parser execution + */ +export class SpanEnd extends Node { + /** + * @param span Span instance + */ + constructor(public readonly span: Span) { + super(`span_end_${span.callback.name}`); + } +} diff --git a/llparse-builder/src/node/span-start.ts b/llparse-builder/src/node/span-start.ts new file mode 100644 index 0000000..f81b432 --- /dev/null +++ b/llparse-builder/src/node/span-start.ts @@ -0,0 +1,16 @@ +import { Span } from '../span'; +import { Node } from './base'; + +/** + * Indicates span start. + * + * See `SpanEnd` for details on callback invocation. + */ +export class SpanStart extends Node { + /** + * @param span Span instance + */ + constructor(public readonly span: Span) { + super(`span_start_${span.callback.name}`); + } +} diff --git a/llparse-builder/src/property.ts b/llparse-builder/src/property.ts new file mode 100644 index 0000000..cf2fe4b --- /dev/null +++ b/llparse-builder/src/property.ts @@ -0,0 +1,12 @@ +export type PropertyType = 'i8' | 'i16' | 'i32' | 'i64' | 'ptr'; + +/** + * Class describing allocated property in parser's state + */ +export class Property { + constructor(public readonly ty: PropertyType, public readonly name: string) { + if (/^_/.test(name)) { + throw new Error(`Can't use internal property name: "${name}"`); + } + } +} diff --git a/llparse-builder/src/reachability.ts b/llparse-builder/src/reachability.ts new file mode 100644 index 0000000..88bcd65 --- /dev/null +++ b/llparse-builder/src/reachability.ts @@ -0,0 +1,31 @@ +import { Node } from './node'; + +/** + * This class finds all reachable nodes + */ +export class Reachability { + /** + * Build and return list of reachable nodes. + */ + public build(root: Node): ReadonlyArray<Node> { + const res = new Set(); + const queue = [ root ]; + while (queue.length !== 0) { + const node = queue.pop()!; + if (res.has(node)) { + continue; + } + res.add(node); + + for (const edge of node) { + queue.push(edge.node); + } + + const otherwise = node.getOtherwiseEdge(); + if (otherwise !== undefined) { + queue.push(otherwise.node); + } + } + return Array.from(res) as ReadonlyArray<Node>; + } +} diff --git a/llparse-builder/src/span-allocator.ts b/llparse-builder/src/span-allocator.ts new file mode 100644 index 0000000..b3e8f6b --- /dev/null +++ b/llparse-builder/src/span-allocator.ts @@ -0,0 +1,182 @@ +import * as assert from 'assert'; +import * as debugAPI from 'debug'; + +import { Node, SpanEnd, SpanStart } from './node'; +import { Reachability } from './reachability'; +import { Span } from './span'; + +const debug = debugAPI('llparse-builder:span-allocator'); + +type SpanSet = Set<Span>; + +interface ISpanActiveInfo { + readonly active: Map<Node, SpanSet>; + readonly spans: ReadonlyArray<Span>; +} + +type SpanOverlap = Map<Span, SpanSet>; + +export interface ISpanAllocatorResult { + readonly colors: ReadonlyMap<Span, number>; + readonly concurrency: ReadonlyArray<ReadonlyArray<Span> >; + readonly max: number; +} + +function id(node: SpanStart | SpanEnd): Span { + return node.span; +} + +export class SpanAllocator { + public allocate(root: Node): ISpanAllocatorResult { + const r = new Reachability(); + const nodes = r.build(root); + const info = this.computeActive(nodes); + this.check(info); + const overlap = this.computeOverlap(info); + return this.color(info.spans, overlap); + } + + private computeActive(nodes: ReadonlyArray<Node>): ISpanActiveInfo { + const activeMap: Map<Node, SpanSet> = new Map(); + nodes.forEach((node) => activeMap.set(node, new Set())); + + const queue: Set<Node> = new Set(nodes); + const spans: SpanSet = new Set(); + for (const node of queue) { + queue.delete(node); + + const active = activeMap.get(node)!; + + if (node instanceof SpanStart) { + const span = id(node); + spans.add(span); + active.add(span); + } + + active.forEach((span) => { + // Don't propagate span past the spanEnd + if (node instanceof SpanEnd && span === id(node)) { + return; + } + + node.getAllEdges().forEach((edge) => { + const edgeNode = edge.node; + + // Disallow loops + if (edgeNode instanceof SpanStart) { + assert.notStrictEqual(id(edgeNode), span, + `Detected loop in span "${span.callback.name}", started ` + + `at "${node.name}"`); + } + + const edgeActive = activeMap.get(edgeNode)!; + if (edgeActive.has(span)) { + return; + } + + edgeActive.add(span); + queue.add(edgeNode); + }); + }); + } + + return { active: activeMap, spans: Array.from(spans) }; + } + + private check(info: ISpanActiveInfo): void { + debug('check start'); + for (const [ node, spans ] of info.active) { + for (const edge of node.getAllEdges()) { + if (edge.node instanceof SpanStart) { + continue; + } + + // Skip terminal nodes + if (edge.node.getAllEdges().length === 0) { + continue; + } + + debug('checking edge from %j to %j', node.name, edge.node.name); + + const edgeSpans = info.active.get(edge.node)!; + for (const subSpan of edgeSpans) { + assert(spans.has(subSpan), + `Unmatched span end for "${subSpan.callback.name}" ` + + `at "${edge.node.name}", coming from "${node.name}"`); + } + + if (edge.node instanceof SpanEnd) { + const span = id(edge.node); + assert(spans.has(span), + `Unmatched span end for "${span.callback.name}"`); + } + } + } + } + + private computeOverlap(info: ISpanActiveInfo): SpanOverlap { + const active = info.active; + const overlap: SpanOverlap = new Map(); + + info.spans.forEach((span) => overlap.set(span, new Set())); + + active.forEach((spans) => { + spans.forEach((one) => { + const set = overlap.get(one)!; + spans.forEach((other) => { + if (other !== one) { + set.add(other); + } + }); + }); + }); + + return overlap; + } + + private color(spans: ReadonlyArray<Span>, overlapMap: SpanOverlap) + : ISpanAllocatorResult { + let max = -1; + const colors: Map<Span, number> = new Map(); + + const allocate = (span: Span): number => { + if (colors.has(span)) { + return colors.get(span)!; + } + + const overlap = overlapMap.get(span)!; + + // See which colors are already used + const used: Set<number> = new Set(); + for (const subSpan of overlap) { + if (colors.has(subSpan)) { + used.add(colors.get(subSpan)!); + } + } + + // Find minimum available color + let i; + for (i = 0; used.has(i); i++) { + // no-op + } + + max = Math.max(max, i); + colors.set(span, i); + + return i; + }; + + const map: Map<Span, number> = new Map(); + + spans.forEach((span) => map.set(span, allocate(span))); + + const concurrency: Span[][] = new Array(max + 1); + for (let i = 0; i < concurrency.length; i++) { + concurrency[i] = []; + } + + spans.forEach((span) => concurrency[allocate(span)].push(span)); + + return { colors: map, concurrency, max }; + } +} diff --git a/llparse-builder/src/span.ts b/llparse-builder/src/span.ts new file mode 100644 index 0000000..99cafb0 --- /dev/null +++ b/llparse-builder/src/span.ts @@ -0,0 +1,57 @@ +import * as assert from 'assert'; + +import { Span as SpanCallback } from './code'; +import { Node, SpanEnd, SpanStart } from './node'; + +/** + * Spans are used for notifying parser user about matched data. Each byte after + * span start will be sent to the span callback until span end is called. + */ +export class Span { + private readonly startCache: Map<Node, SpanStart> = new Map(); + private readonly endCache: Map<Node, SpanEnd> = new Map(); + + /** + * @param callback External callback, must be `code.span(...)` result. + */ + constructor(public readonly callback: SpanCallback) { + } + + /** + * Create `SpanStart` that indicates the start of the span. + * + * @param otherwise Optional convenience value. Same as calling + * `span.start().otherwise(...)` + */ + public start(otherwise?: Node) { + if (otherwise !== undefined && this.startCache.has(otherwise)) { + return this.startCache.get(otherwise)!; + } + + const res = new SpanStart(this); + if (otherwise !== undefined) { + res.otherwise(otherwise); + this.startCache.set(otherwise, res); + } + return res; + } + + /** + * Create `SpanEnd` that indicates the end of the span. + * + * @param otherwise Optional convenience value. Same as calling + * `span.end().otherwise(...)` + */ + public end(otherwise?: Node) { + if (otherwise !== undefined && this.endCache.has(otherwise)) { + return this.endCache.get(otherwise)!; + } + + const res = new SpanEnd(this); + if (otherwise !== undefined) { + res.otherwise(otherwise); + this.endCache.set(otherwise, res); + } + return res; + } +} diff --git a/llparse-builder/src/transform/base.ts b/llparse-builder/src/transform/base.ts new file mode 100644 index 0000000..902199c --- /dev/null +++ b/llparse-builder/src/transform/base.ts @@ -0,0 +1,12 @@ +export type TransformName = 'to_lower_unsafe' | 'to_lower'; + +/** + * Character transformation. + */ +export abstract class Transform { + /** + * @param name Transform name + */ + constructor(public readonly name: TransformName) { + } +} diff --git a/llparse-builder/src/transform/creator.ts b/llparse-builder/src/transform/creator.ts new file mode 100644 index 0000000..eaf3d5c --- /dev/null +++ b/llparse-builder/src/transform/creator.ts @@ -0,0 +1,28 @@ +import { Transform } from './base'; +import { ToLower } from './to-lower'; +import { ToLowerUnsafe } from './to-lower-unsafe'; + +/** + * API for creating character transformations. + * + * The results of methods of this class can be used as an argument to: + * `p.node().transform(...)`. + */ +export class Creator { + /** + * Unsafe transform to lowercase. + * + * The operation of this transformation is equivalent to: + * `String.fromCharCode(input.charCodeAt(0) | 0x20)`. + */ + public toLowerUnsafe(): Transform { + return new ToLowerUnsafe(); + } + + /** + * Safe transform to lowercase. + */ + public toLower(): Transform { + return new ToLower(); + } +} diff --git a/llparse-builder/src/transform/index.ts b/llparse-builder/src/transform/index.ts new file mode 100644 index 0000000..acdcf01 --- /dev/null +++ b/llparse-builder/src/transform/index.ts @@ -0,0 +1,3 @@ +export { Transform } from './base'; +export { Creator } from './creator'; +export { ToLowerUnsafe } from './to-lower-unsafe'; diff --git a/llparse-builder/src/transform/to-lower-unsafe.ts b/llparse-builder/src/transform/to-lower-unsafe.ts new file mode 100644 index 0000000..99d9618 --- /dev/null +++ b/llparse-builder/src/transform/to-lower-unsafe.ts @@ -0,0 +1,7 @@ +import { Transform } from './base'; + +export class ToLowerUnsafe extends Transform { + constructor() { + super('to_lower_unsafe'); + } +} diff --git a/llparse-builder/src/transform/to-lower.ts b/llparse-builder/src/transform/to-lower.ts new file mode 100644 index 0000000..b333fce --- /dev/null +++ b/llparse-builder/src/transform/to-lower.ts @@ -0,0 +1,7 @@ +import { Transform } from './base'; + +export class ToLower extends Transform { + constructor() { + super('to_lower'); + } +} diff --git a/llparse-builder/src/utils.ts b/llparse-builder/src/utils.ts new file mode 100644 index 0000000..3521b20 --- /dev/null +++ b/llparse-builder/src/utils.ts @@ -0,0 +1,19 @@ +import * as assert from 'assert'; +import { Buffer } from 'buffer'; + +/** + * Internal + */ +export function toBuffer(value: number | string | Buffer): Buffer { + let res: Buffer; + if (Buffer.isBuffer(value)) { + res = value; + } else if (typeof value === 'string') { + res = Buffer.from(value); + } else { + assert(0 <= value && value <= 0xff, 'Invalid byte value'); + res = Buffer.from([ value ]); + } + assert(res.length >= 1, 'Invalid key length'); + return res; +} |