From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- xpcom/ds/tools/make_dafsa.py | 372 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 372 insertions(+) create mode 100644 xpcom/ds/tools/make_dafsa.py (limited to 'xpcom/ds/tools/make_dafsa.py') diff --git a/xpcom/ds/tools/make_dafsa.py b/xpcom/ds/tools/make_dafsa.py new file mode 100644 index 0000000000..a9cedf78a8 --- /dev/null +++ b/xpcom/ds/tools/make_dafsa.py @@ -0,0 +1,372 @@ +#!/usr/bin/env python +# Copyright 2014 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +""" +A Deterministic acyclic finite state automaton (DAFSA) is a compact +representation of an unordered word list (dictionary). + +http://en.wikipedia.org/wiki/Deterministic_acyclic_finite_state_automaton + +This python program converts a list of strings to a byte array in C++. +This python program fetches strings and return values from a gperf file +and generates a C++ file with a byte array representing graph that can be +used as a memory efficient replacement for the perfect hash table. + +The input strings are assumed to consist of printable 7-bit ASCII characters +and the return values are assumed to be one digit integers. + +In this program a DAFSA is a diamond shaped graph starting at a common +root node and ending at a common end node. All internal nodes contain +a character and each word is represented by the characters in one path from +the root node to the end node. + +The order of the operations is crucial since lookups will be performed +starting from the source with no backtracking. Thus a node must have at +most one child with a label starting by the same character. The output +is also arranged so that all jumps are to increasing addresses, thus forward +in memory. + +The generated output has suffix free decoding so that the sign of leading +bits in a link (a reference to a child node) indicate if it has a size of one, +two or three bytes and if it is the last outgoing link from the actual node. +A node label is terminated by a byte with the leading bit set. + +The generated byte array can described by the following BNF: + + ::= < 8-bit value in range [0x00-0xFF] > + + ::= < printable 7-bit ASCII character, byte in range [0x20-0x7F] > + ::= < char + 0x80, byte in range [0xA0-0xFF] > + ::= < value + 0x80, byte in range [0x80-0x8F] > + + ::= < byte in range [0x00-0x3F] > + ::= < byte in range [0x40-0x5F] > + ::= < byte in range [0x60-0x7F] > + + ::= < byte in range [0x80-0xBF] > + ::= < byte in range [0xC0-0xDF] > + ::= < byte in range [0xE0-0xFF] > + + ::= + +