From b485aab7e71c1625cfc27e0f92c9509f42378458 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 5 May 2024 13:19:16 +0200 Subject: Adding upstream version 1.45.3+dfsg. Signed-off-by: Daniel Baumann --- .../h2o/libh2o/misc/oktavia/src/binary-util.jsx | 597 --- .../h2o/libh2o/misc/oktavia/src/bit-vector.jsx | 295 -- .../misc/oktavia/src/burrows-wheeler-transform.jsx | 73 - .../h2o/libh2o/misc/oktavia/src/csvparser.jsx | 22 - .../h2o/libh2o/misc/oktavia/src/fm-index.jsx | 323 -- web/server/h2o/libh2o/misc/oktavia/src/getopt.jsx | 356 -- .../h2o/libh2o/misc/oktavia/src/htmlparser.jsx | 280 -- .../h2o/libh2o/misc/oktavia/src/metadata.jsx | 498 --- .../h2o/libh2o/misc/oktavia/src/node-sqlite3.jsx | 115 - web/server/h2o/libh2o/misc/oktavia/src/oktavia.jsx | 427 --- .../h2o/libh2o/misc/oktavia/src/query-parser.jsx | 60 - .../misc/oktavia/src/query-string-parser.jsx | 128 - web/server/h2o/libh2o/misc/oktavia/src/query.jsx | 37 - web/server/h2o/libh2o/misc/oktavia/src/sais.jsx | 250 -- web/server/h2o/libh2o/misc/oktavia/src/sax.jsx | 1356 ------- .../h2o/libh2o/misc/oktavia/src/search-result.jsx | 287 -- .../h2o/libh2o/misc/oktavia/src/stemmer/among.jsx | 34 - .../misc/oktavia/src/stemmer/base-stemmer.jsx | 419 --- .../misc/oktavia/src/stemmer/danish-stemmer.jsx | 507 --- .../misc/oktavia/src/stemmer/dutch-stemmer.jsx | 1020 ------ .../misc/oktavia/src/stemmer/english-stemmer.jsx | 1638 --------- .../misc/oktavia/src/stemmer/finnish-stemmer.jsx | 1208 ------- .../misc/oktavia/src/stemmer/french-stemmer.jsx | 1867 ---------- .../misc/oktavia/src/stemmer/german-stemmer.jsx | 894 ----- .../misc/oktavia/src/stemmer/hungarian-stemmer.jsx | 1478 -------- .../misc/oktavia/src/stemmer/italian-stemmer.jsx | 1412 -------- .../misc/oktavia/src/stemmer/norwegian-stemmer.jsx | 428 --- .../misc/oktavia/src/stemmer/porter-stemmer.jsx | 1121 ------ .../oktavia/src/stemmer/portuguese-stemmer.jsx | 1321 ------- .../misc/oktavia/src/stemmer/romanian-stemmer.jsx | 1227 ------- .../misc/oktavia/src/stemmer/russian-stemmer.jsx | 875 ----- .../misc/oktavia/src/stemmer/spanish-stemmer.jsx | 1408 ------- .../libh2o/misc/oktavia/src/stemmer/stemmer.jsx | 5 - .../misc/oktavia/src/stemmer/swedish-stemmer.jsx | 416 --- .../misc/oktavia/src/stemmer/turkish-stemmer.jsx | 3824 -------------------- web/server/h2o/libh2o/misc/oktavia/src/style.jsx | 105 - .../h2o/libh2o/misc/oktavia/src/textparser.jsx | 25 - .../h2o/libh2o/misc/oktavia/src/tiny-segmenter.jsx | 350 -- .../h2o/libh2o/misc/oktavia/src/wavelet-matrix.jsx | 321 -- 39 files changed, 27007 deletions(-) delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/binary-util.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/bit-vector.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/burrows-wheeler-transform.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/csvparser.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/fm-index.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/getopt.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/htmlparser.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/metadata.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/node-sqlite3.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/oktavia.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/query-parser.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/query-string-parser.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/query.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/sais.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/sax.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/search-result.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/among.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/base-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/danish-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/dutch-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/english-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/finnish-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/french-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/german-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/hungarian-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/italian-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/norwegian-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/porter-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/portuguese-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/romanian-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/russian-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/spanish-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/swedish-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/turkish-stemmer.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/style.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/textparser.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/tiny-segmenter.jsx delete mode 100644 web/server/h2o/libh2o/misc/oktavia/src/wavelet-matrix.jsx (limited to 'web/server/h2o/libh2o/misc/oktavia/src') diff --git a/web/server/h2o/libh2o/misc/oktavia/src/binary-util.jsx b/web/server/h2o/libh2o/misc/oktavia/src/binary-util.jsx deleted file mode 100644 index 06d5e7584..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/binary-util.jsx +++ /dev/null @@ -1,597 +0,0 @@ -class Binary -{ - static function dump32bitNumber (num : number) : string - { - var result = [String.fromCharCode(Math.floor(num / 65536))]; - result.push(String.fromCharCode(num % 65536)); - return result.join(""); - } - - static function load32bitNumber (buffer : string, offset : int) : number - { - var result = buffer.charCodeAt(offset) * 65536 + buffer.charCodeAt(offset + 1); - return result; - } - - static function dump16bitNumber (num : int) : string - { - return String.fromCharCode(num % 65536); - } - - static function load16bitNumber (buffer : string, offset : int) : int - { - return buffer.charCodeAt(offset); - } - - static function dumpString (str : string) : string - { - return Binary.dumpString(str, null); - } - - static function dumpString (str : string, report : Nullable.) : string - { - if (str.length > 32768) - { - str = str.slice(0, 32768); - } - var length = str.length; - var compress = true; - var charCodes = [] : int[]; - for (var i = 0; i < length; i++) - { - var charCode = str.charCodeAt(i); - if (charCode > 255) - { - compress = false; - break; - } - charCodes.push(charCode); - } - if (compress) - { - var result = [Binary.dump16bitNumber(length + 32768)]; - for (var i = 0; i < length; i += 2) - { - var bytes = charCodes[i]; - if (i != length - 1) - { - bytes += charCodes[i + 1] << 8; - } - result.push(Binary.dump16bitNumber(bytes)); - } - if (report) - { - report.add(length, Math.ceil(length / 2)); - } - } - else - { - var result = [Binary.dump16bitNumber(length), str]; - if (report) - { - report.add(length, length); - } - } - return result.join(''); - } - - static function loadString (buffer : string, offset : int) : LoadedStringResult - { - return new LoadedStringResult(buffer, offset); - } - - static function dumpStringList (strList : string[]) : string - { - return Binary.dumpStringList(strList, null); - } - - static function dumpStringList (strList : string[], report : Nullable.) : string - { - var result = [Binary.dump32bitNumber(strList.length)]; - for (var i = 0; i < strList.length; i++) - { - result.push(Binary.dumpString(strList[i], report)); - } - return result.join(''); - } - - static function loadStringList (buffer : string, offset : int) : LoadedStringListResult - { - return new LoadedStringListResult(buffer, offset); - } - - static function dumpStringListMap (strMap : Map.) : string - { - return Binary.dumpStringListMap(strMap, null); - } - - static function dumpStringListMap (strMap : Map., report : Nullable.) : string - { - var result = [] : string[]; - var counter = 0; - for (var key in strMap) - { - result.push(Binary.dumpString(key, report)); - result.push(Binary.dumpStringList(strMap[key], report)); - counter++; - } - return Binary.dump32bitNumber(counter) + result.join(''); - } - - static function loadStringListMap (buffer : string, offset : int) : LoadedStringListMapResult - { - return new LoadedStringListMapResult(buffer, offset); - } - - static function dump32bitNumberList (array : number[]) : string - { - return Binary.dump32bitNumberList(array, null); - } - - static function dump32bitNumberList (array : number[], report : Nullable.) : string - { - var result = [Binary.dump32bitNumber(array.length)] : string[]; - var index = 0; - var inputLength = array.length; - while (index < inputLength) - { - if (array[index] == 0) - { - var length = Binary._countZero(array, index); - result.push(Binary._zeroBlock(length)); - index += length; - } - else if (Binary._shouldZebraCode(array, index)) - { - result.push(Binary._createZebraCode(array, index)); - index = Math.min(array.length, index + 15); - } - else - { - var length = Binary._searchDoubleZero(array, index); - result.push(Binary._nonZeroBlock(array, index, length)); - if (length == 0) - { - throw new Error(''); - } - index += length; - } - } - var resultString = result.join(''); - if (report) - { - report.add(array.length * 2 + 2, resultString.length); - } - return resultString; - } - - static function load32bitNumberList (buffer :string, offset : int) : LoadedNumberListResult - { - return new LoadedNumberListResult(buffer, offset); - } - - static function _countZero (array : number[], offset : int) : int - { - for (var i = offset; i < array.length; i++) - { - if (array[i] != 0) - { - return i - offset; - } - } - return array.length - offset; - } - - static function _zeroBlock (length : int) : string - { - var result = [] : string[]; - while (length > 0) - { - if (length > 16384) - { - result.push(Binary.dump16bitNumber(16384 - 1)); - length -= 16384; - } - else - { - result.push(Binary.dump16bitNumber(length - 1)); - length = 0; - } - } - return result.join(''); - } - - static function _shouldZebraCode(array : number[], offset : int) : boolean - { - if (array.length - offset < 16) - { - return true; - } - var change = 0; - var isLastZero = false; - for (var i = offset; i < offset + 15; i++) - { - if (array[i] == 0) - { - if (!isLastZero) - { - isLastZero = true; - change++; - } - } - else - { - if (isLastZero) - { - isLastZero = false; - change++; - } - } - } - return change > 2; - } - - static function _searchDoubleZero (array : number[], offset : int) : int - { - var isLastZero = false; - for (var i = offset; i < array.length; i++) - { - if (array[i] == 0) - { - if (isLastZero) - { - return i - offset - 1; - } - isLastZero = true; - } - else - { - isLastZero = false; - } - } - return array.length - offset; - } - - static function _nonZeroBlock (array : number[], offset : int, length : int) : string - { - var result = [] : string[]; - while (length > 0) - { - var blockLength : int; - if (length > 16384) - { - blockLength = 16384; - length -= 16384; - } - else - { - blockLength = length; - length = 0; - } - result.push(Binary.dump16bitNumber((blockLength - 1) + 0x4000)); - for (var i = offset; i < offset + blockLength; i++) - { - result.push(Binary.dump32bitNumber(array[i])); - } - offset += blockLength; - } - return result.join(''); - } - - static function _createZebraCode (array : number[], offset : int) : string - { - var last = Math.min(offset + 15, array.length); - var code = 0x8000; - var result = [] : string[]; - for (var i = offset; i < last; i++) - { - if (array[i] != 0) - { - result.push(Binary.dump32bitNumber(array[i])); - code = code + (0x1 << (i - offset)); - } - } - return String.fromCharCode(code) + result.join(''); - } - - /* These base64 functions are based on http://www.onicos.com/staff/iz/amuse/javascript/expert/base64.txt - * original license: - * Copyright (C) 1999 Masanao Izumo - * Version: 1.0 - * LastModified: Dec 25 1999 - * This library is free. You can redistribute it and/or modify it. - */ - static const _base64EncodeChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - - static function base64encode (str : string) : string - { - var out = [] : string[]; - var source = [] : int[]; - for (var i = 0; i < str.length; i++) - { - var code = str.charCodeAt(i); - source.push(code & 0x00ff, code >>> 8); - } - var len = str.length * 2; - var i = 0; - while (i < len) - { - var c1 = source[i++] & 0xff; - if (i == len) - { - out.push(Binary._base64EncodeChars.charAt(c1 >> 2)); - out.push(Binary._base64EncodeChars.charAt((c1 & 0x3) << 4)); - out.push("=="); - break; - } - var c2 = source[i++]; - if (i == len) - { - out.push(Binary._base64EncodeChars.charAt(c1 >> 2)); - out.push(Binary._base64EncodeChars.charAt(((c1 & 0x3)<< 4) | ((c2 & 0xF0) >> 4))); - out.push(Binary._base64EncodeChars.charAt((c2 & 0xF) << 2)); - out.push("="); - break; - } - var c3 = source[i++]; - out.push(Binary._base64EncodeChars.charAt(c1 >> 2)); - out.push(Binary._base64EncodeChars.charAt(((c1 & 0x3)<< 4) | ((c2 & 0xF0) >> 4))); - out.push(Binary._base64EncodeChars.charAt(((c2 & 0xF) << 2) | ((c3 & 0xC0) >>6))); - out.push(Binary._base64EncodeChars.charAt(c3 & 0x3F)); - } - return out.join(''); - } - - static const _base64DecodeChars = [ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, - -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, - -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1]; - - static function _mergeCharCode (source : int[]) : string - { - var result = [] : string[]; - for (var i = 0; i < source.length; i += 2) - { - result.push(String.fromCharCode(source[i] + (source[i + 1] << 8))); - } - return result.join(''); - } - - static function base64decode (str : string) : string - { - var len = str.length; - var i = 0; - var out = [] : int[]; - - while (i < len) - { - var c1, c2, c3, c4 : int; - - /* c1 */ - do { - c1 = Binary._base64DecodeChars[str.charCodeAt(i++) & 0xff]; - } while(i < len && c1 == -1); - if (c1 == -1) - { - break; - } - /* c2 */ - do { - c2 = Binary._base64DecodeChars[str.charCodeAt(i++) & 0xff]; - } while(i < len && c2 == -1); - if (c2 == -1) - { - break; - } - out.push((c1 << 2) | ((c2 & 0x30) >> 4)); - /* c3 */ - do { - c3 = str.charCodeAt(i++) & 0xff; - if (c3 == 61) - { - return Binary._mergeCharCode(out); - } - c3 = Binary._base64DecodeChars[c3]; - } while(i < len && c3 == -1); - if (c3 == -1) - { - break; - } - out.push(((c2 & 0XF) << 4) | ((c3 & 0x3C) >> 2)); - - /* c4 */ - do { - c4 = str.charCodeAt(i++) & 0xff; - if (c4 == 61) - { - return Binary._mergeCharCode(out); - } - c4 = Binary._base64DecodeChars[c4]; - } while(i < len && c4 == -1); - if (c4 == -1) - { - break; - } - out.push(((c3 & 0x03) << 6) | c4); - } - return Binary._mergeCharCode(out); - } -} - -class LoadedStringResult -{ - var result : string; - var offset : int; - - function constructor (data : string, offset : int) - { - var strLength = Binary.load16bitNumber(data, offset++); - if (strLength > 32767) - { - strLength = strLength - 32768; - var bytes = [] : string[]; - - for (var i = 0; i < strLength; i += 2) - { - var code = data.charCodeAt(offset); - bytes.push(String.fromCharCode(code & 0x00ff)); - if (i != strLength - 1) - { - bytes.push(String.fromCharCode(code >>> 8)); - } - offset++; - } - this.result = bytes.join(''); - this.offset = offset; - } - else - { - this.result = data.slice(offset, offset + strLength); - this.offset = offset + strLength; - } - } -} - -class LoadedStringListResult -{ - var result : string[]; - var offset : int; - - function constructor (data : string, offset : int) - { - this.result = [] : string[]; - - var length = Binary.load32bitNumber(data, offset); - offset += 2; - for (var i = 0; i < length; i++) - { - var strLength = Binary.load16bitNumber(data, offset++); - var resultStr : string; - if (strLength > 32767) - { - var strLength = strLength - 32768; - var bytes = [] : string[]; - for (var j = 0; j < strLength; j += 2) - { - var code = data.charCodeAt(offset); - bytes.push(String.fromCharCode(code & 0x00ff)); - if (j != strLength - 1) - { - bytes.push(String.fromCharCode(code >>> 8)); - } - offset++; - } - resultStr = bytes.join(''); - } - else - { - resultStr = data.slice(offset, offset + strLength); - offset = offset + strLength; - } - this.result.push(resultStr); - } - this.offset = offset; - } -} - -class LoadedStringListMapResult -{ - var result : Map.; - var offset : int; - - function constructor (data : string, offset : int) - { - this.result = {} : Map.; - - var length = Binary.load32bitNumber(data, offset); - offset += 2; - for (var i = 0; i < length; i++) - { - var keyResult = Binary.loadString(data, offset); - var valueResult = Binary.loadStringList(data, keyResult.offset); - this.result[keyResult.result] = valueResult.result; - offset = valueResult.offset; - } - this.offset = offset; - } -} - -class LoadedNumberListResult -{ - var result : number[]; - var offset : int; - - function constructor(data : string, offset : int) - { - var resultLength = Binary.load32bitNumber(data, offset); - var originalOffset = offset; - offset += 2; - var result = [] : number[]; - while (result.length < resultLength) - { - var tag = data.charCodeAt(offset++); - if ((tag >>> 15) == 1) // zebra - { - var length = Math.min(resultLength - result.length, 15); - for (var i = 0; i < length; i++) - { - if ((tag >>> i) & 0x1) - { - result.push(Binary.load32bitNumber(data, offset)); - offset += 2; - } - else - { - result.push(0); - } - } - } - else if ((tag >>> 14) == 1) // non-zero - { - var length = tag - 0x4000 + 1; - for (var i = 0; i < length; i++) - { - result.push(Binary.load32bitNumber(data, offset)); - offset += 2; - } - } - else // zero - { - var length = tag + 1; - for (var i = 0; i < length; i++) - { - result.push(0); - } - } - } - this.result = result; - this.offset = offset; - } -} - -class CompressionReport -{ - var source : int; - var result : int; - function constructor () - { - this.source = 0; - this.result = 0; - } - - function add (source : int, result : int) : void - { - this.source += source; - this.result += result; - } - - function rate () : int - { - return Math.round(this.result * 100.0 / this.source); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/bit-vector.jsx b/web/server/h2o/libh2o/misc/oktavia/src/bit-vector.jsx deleted file mode 100644 index b366e43a0..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/bit-vector.jsx +++ /dev/null @@ -1,295 +0,0 @@ -/** - * This is a JSX version of shellinford library: - * https://code.google.com/p/shellinford/ - * - * License: http://shibu.mit-license.org/ - */ - -import "binary-util.jsx"; - -class BitVector -{ - static const SMALL_BLOCK_SIZE : int = 32; - static const LARGE_BLOCK_SIZE : int = 256; - static const BLOCK_RATE : int = 8; - - var _v : number[]; - var _r : number[]; - var _size : int; - var _size1 : int; - - function constructor () - { - this._r = [] : number[]; - this._v = [] : number[]; - this.clear(); - } - - function build () : void - { - this._size1 = 0; - for (var i = 0; i < this._v.length; i++) - { - if (i % BitVector.BLOCK_RATE == 0) - { - this._r.push(this.size(true)); - } - this._size1 += this._rank32(this._v[i], BitVector.SMALL_BLOCK_SIZE, true); - } - } - - function clear () : void - { - this._v.length = 0; - this._r.length = 0; - this._size = 0; - this._size1 = 0; - } - - function size () : int - { - return this._size; - } - - function size (b : boolean) : int - { - return b ? (this._size1) : (this._size - this._size1); - } - - function set (value : int) : void - { - this.set(value, true); - } - - function set (value : int, flag : boolean) : void - { - if (value >= this.size()) - { - this._size = value + 1; - } - var q : int = value / BitVector.SMALL_BLOCK_SIZE; - var r : int = value % BitVector.SMALL_BLOCK_SIZE; - while (q >= this._v.length) - { - this._v.push(0); - } - var m : int = 0x1 << r; - if (flag) - { - this._v[q] |= m; - } - else - { - this._v[q] &= ~m; - } - } - - function get (value : int) : boolean - { - if (value >= this.size()) - { - throw new Error("BitVector.get() : range error"); - } - var q : int = value / BitVector.SMALL_BLOCK_SIZE; - var r : int = value % BitVector.SMALL_BLOCK_SIZE; - var m : int = 0x1 << r; - return (this._v[q] & m) as boolean; - } - - function rank (i : int) : int - { - return this.rank(i, true); - } - - function rank (i : int, b : boolean) : int - { - if (i > this.size()) - { - throw new Error("BitVector.rank() : range error"); - } - if (i == 0) - { - return 0; - } - i--; - var q_large : int = Math.floor(i / BitVector.LARGE_BLOCK_SIZE); - var q_small : int = Math.floor(i / BitVector.SMALL_BLOCK_SIZE); - var r : int = Math.floor(i % BitVector.SMALL_BLOCK_SIZE); - var rank : int = this._r[q_large]; - if (!b) - { - rank = q_large * BitVector.LARGE_BLOCK_SIZE - rank; - } - var begin = q_large * BitVector.BLOCK_RATE; - for (var j = begin; j < q_small; j++) - { - rank += this._rank32(this._v[j], BitVector.SMALL_BLOCK_SIZE, b); - } - rank += this._rank32(this._v[q_small], r + 1, b); - return rank; - } - - function select(i : int) : int - { - return this.select(i, true); - } - - function select(i : int, b : boolean) : int - { - if (i >= this.size(b)) - { - throw new Error("BitVector.select() : range error"); - } - - var left = 0; - var right = this._r.length; - while (left < right) - { - var pivot = Math.floor((left + right) / 2); - var rank = this._r[pivot]; - if (!b) - { - rank = pivot * BitVector.LARGE_BLOCK_SIZE - rank; - } - if (i < rank) - { - right = pivot; - } - else - { - left = pivot + 1; - } - } - right--; - - if (b) - { - i -= this._r[right]; - } - else - { - i -= right * BitVector.LARGE_BLOCK_SIZE - this._r[right]; - } - var j = right * BitVector.BLOCK_RATE; - while (1) - { - var rank = this._rank32(this._v[j], BitVector.SMALL_BLOCK_SIZE, b); - if (i < rank) - { - break; - } - j++; - i -= rank; - } - return j * BitVector.SMALL_BLOCK_SIZE + this._select32(this._v[j], i, b); - } - - function _rank32 (x : int, i : int, b : boolean) : int - { - if (!b) - { - x = ~x; - } - x <<= (BitVector.SMALL_BLOCK_SIZE - i); - x = ((x & 0xaaaaaaaa) >>> 1) - + (x & 0x55555555); - x = ((x & 0xcccccccc) >>> 2) - + (x & 0x33333333); - x = ((x & 0xf0f0f0f0) >>> 4) - + (x & 0x0f0f0f0f); - x = ((x & 0xff00ff00) >>> 8) - + (x & 0x00ff00ff); - x = ((x & 0xffff0000) >>> 16) - + (x & 0x0000ffff); - return x; - } - - function _select32(x : int, i : int, b : boolean) : int - { - if (!b) - { - x = ~x; - } - var x1 = ((x & 0xaaaaaaaa) >>> 1) - + (x & 0x55555555); - var x2 = ((x1 & 0xcccccccc) >>> 2) - + (x1 & 0x33333333); - var x3 = ((x2 & 0xf0f0f0f0) >>> 4) - + (x2 & 0x0f0f0f0f); - var x4 = ((x3 & 0xff00ff00) >>> 8) - + (x3 & 0x00ff00ff); - var x5 = ((x4 & 0xffff0000) >>> 16) - + (x4 & 0x0000ffff); - i++; - var pos = 0; - var v5 = x5 & 0xffffffff; - if (i > v5) - { - i -= v5; - pos += 32; - } - var v4 = (x4 >>> pos) & 0x0000ffff; - if (i > v4) - { - i -= v4; - pos += 16; - } - var v3 = (x3 >>> pos) & 0x000000ff; - if (i > v3) - { - i -= v3; - pos += 8; - } - var v2 = (x2 >>> pos) & 0x0000000f; - if (i > v2) - { - i -= v2; - pos += 4; - } - var v1 = (x1 >>> pos) & 0x00000003; - if (i > v1) - { - i -= v1; - pos += 2; - } - var v0 = (x >>> pos) & 0x00000001; - if (i > v0) - { - i -= v0; - pos += 1; - } - return pos; - } - - function dump () : string - { - var contents = [] : string[]; - contents.push(Binary.dump32bitNumber(this._size)); - contents.push(Binary.dump32bitNumberList(this._v)); - return contents.join(''); - } - - function dump (report : CompressionReport) : string - { - var contents = [] : string[]; - contents.push(Binary.dump32bitNumber(this._size)); - report.add(2, 2); - contents.push(Binary.dump32bitNumberList(this._v, report)); - return contents.join(''); - } - - function load (data : string) : int - { - return this.load(data, 0); - } - - function load (data : string, offset : int) : int - { - this.clear(); - this._size = Binary.load32bitNumber(data, offset); - var result = Binary.load32bitNumberList(data, offset + 2); - this._v = result.result; - this.build(); - return result.offset; - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/burrows-wheeler-transform.jsx b/web/server/h2o/libh2o/misc/oktavia/src/burrows-wheeler-transform.jsx deleted file mode 100644 index 4bdd72fe6..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/burrows-wheeler-transform.jsx +++ /dev/null @@ -1,73 +0,0 @@ -import "sais.jsx"; - -class BurrowsWheelerTransform -{ - static var END_MARKER = String.fromCharCode(0); - - var _str : string; - var _size : int; - var _head : int; - var _suffixarray : int[]; - - function constructor () - { - this._size = 0; - this._head = 0; - this._suffixarray = [] : int[]; - } - - function size () : int - { - return this._size; - } - - function head () : int - { - return this._head; - } - - function clear () : void - { - this._str = ""; - this._size = 0; - this._head = 0; - this._suffixarray.length = 0; - } - - function build (str : string) : void - { - this._str = str; - this._size = this._str.length; - this._suffixarray = SAIS.make(str); - this._head = this._suffixarray.indexOf(0); - } - - function get (i : int) : string - { - var size = this.size(); - if (i >= size) - { - throw new Error("BurrowsWheelerTransform.get() : range error"); - } - var index = (this._suffixarray[i] + size - 1) % size; - return this._str.charAt(index); - } - - function get () : string - { - var str = [] : string []; - var size = this.size(); - for (var i = 0; i < size; i++) - { - str.push(this.get(i)); - } - return str.join(""); - } - - function get (replace : string) : string - { - var result = this.get(); - return result.replace(BurrowsWheelerTransform.END_MARKER, replace); - } -} - diff --git a/web/server/h2o/libh2o/misc/oktavia/src/csvparser.jsx b/web/server/h2o/libh2o/misc/oktavia/src/csvparser.jsx deleted file mode 100644 index 64ed6ac85..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/csvparser.jsx +++ /dev/null @@ -1,22 +0,0 @@ -import "oktavia.jsx"; -import "stemmer/stemmer.jsx"; - - -class CSVParser -{ - var oktavia : Oktavia; - var root : string; - var stemmer : Nullable.; - - function constructor (root : string, stemmer : Stemmer) - { - this.oktavia = new Oktavia(); - this.root = root; - this.stemmer = stemmer; - } - - function parse (filepath : string) : void - { - log (filepath); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/fm-index.jsx b/web/server/h2o/libh2o/misc/oktavia/src/fm-index.jsx deleted file mode 100644 index 502b4fcf9..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/fm-index.jsx +++ /dev/null @@ -1,323 +0,0 @@ -/** - * This is a JSX version of shellinford library: - * https://code.google.com/p/shellinford/ - * - * License: http://shibu.mit-license.org/ - */ - -import "wavelet-matrix.jsx"; -import "bit-vector.jsx"; -import "burrows-wheeler-transform.jsx"; -import "binary-util.jsx"; -import "console.jsx"; - - -class FMIndex -{ - var _substr : string; - var _ddic : int; - var _ssize : int; - var _head : int; - var _sv : WaveletMatrix; - var _posdic : int[]; - var _idic : int[]; - var _rlt : int[]; - - function constructor () - { - this._ddic = 0, - this._head = 0; - this._substr = ""; - this._sv = new WaveletMatrix(); - this._posdic = [] : int[]; - this._idic = [] : int[]; - this._rlt = [] : int[]; - this._rlt.length = 65536; - } - - function clear () : void - { - this._sv.clear(); - this._posdic.length = 0; - this._idic.length = 0; - this._ddic = 0; - this._head = 0; - this._substr = ""; - } - - function size () : int - { - return this._sv.size(); - } - - function contentSize () : int - { - return this._substr.length; - } - - function getRows (key : string) : int - { - var pos = [] : int[]; - return this.getRows(key, pos); - } - function getRows (key : string, pos : int[]) : int - { - var i = key.length - 1; - var code = key.charCodeAt(i); - var first = this._rlt[code] + 1; - var last = this._rlt[code + 1]; - while (first <= last) - { - if (i == 0) - { - pos[0] = --first; - pos[1] = --last; - return (last - first + 1); - } - i--; - var c = key.charCodeAt(i); - first = this._rlt[c] + this._sv.rank(first - 1, c) + 1; - last = this._rlt[c] + this._sv.rank(last, c); - } - return 0; - } - - function getPosition (i : int) : int - { - if (i >= this.size()) - { - throw new Error("FMIndex.getPosition() : range error"); - } - var pos = 0; - while (i != this._head) - { - if ((i % this._ddic) == 0) - { - pos += (this._posdic[i / this._ddic] + 1); - break; - } - var c = this._sv.get(i); - i = this._rlt[c] + this._sv.rank(i, c); //LF - pos++; - } - return pos % this.size(); - } - - function getSubstring (pos : int, len : int) : string - { - if (pos >= this.size()) - { - throw new Error("FMIndex.getSubstring() : range error"); - } - var pos_end = Math.min(pos + len, this.size()); - var pos_tmp = this.size() - 1; - var i = this._head; - var pos_idic = Math.floor((pos_end + this._ddic - 2) / this._ddic); - if (pos_idic < this._idic.length) - { - pos_tmp = pos_idic * this._ddic; - i = this._idic[pos_idic]; - } - - var substr = ""; - while (pos_tmp >= pos) - { - var c = this._sv.get(i); - i = this._rlt[c] + this._sv.rank(i, c); //LF - if (pos_tmp < pos_end) - { - substr = String.fromCharCode(c) + substr; - } - if (pos_tmp == 0) - { - break; - } - pos_tmp--; - } - return substr; - } - - function build () : void - { - this.build(String.fromCharCode(0), 65535, 20, false); - } - - function build(end_marker : string, ddic : int, verbose : boolean) : void - { - this.build(end_marker, 65535, ddic, verbose); - } - - function build(end_marker : string, maxChar : int, ddic : int, verbose : boolean) : void - { - if (verbose) - { - console.time("building burrows-wheeler transform"); - } - this._substr += end_marker; - var b = new BurrowsWheelerTransform(); - b.build(this._substr); - var s = b.get(); - this._ssize = s.length; - this._head = b.head(); - b.clear(); - this._substr = ""; - if (verbose) - { - console.timeEnd("building burrows-wheeler transform"); - } - if (verbose) - { - console.time("building wavelet matrix"); - } - this._sv.setMaxCharCode(maxChar); - if (verbose) - { - console.log(" maxCharCode: ", maxChar); - console.log(" bitSize: ", this._sv.bitsize()); - } - this._sv.build(s); - if (verbose) - { - console.timeEnd("building wavelet matrix"); - } - - if (verbose) - { - console.time("caching rank less than"); - } - for (var c = 0; c < maxChar; c++) - { - this._rlt[c] = this._sv.rank_less_than(this._sv.size(), c); - } - if (verbose) - { - console.timeEnd("caching rank less than"); - } - this._ddic = ddic; - if (verbose) - { - console.time("building dictionaries"); - } - this._buildDictionaries(); - if (verbose) - { - console.timeEnd("building dictionaries"); - console.log(''); - } - } - - function _buildDictionaries () : void - { - for (var i = 0; i < (this._ssize / this._ddic + 1); i++) - { - this._posdic.push(0); - this._idic.push(0); - } - var i = this._head; - var pos = this.size() - 1; - do { - if ((i % this._ddic) == 0) - { - this._posdic[Math.floor(i / this._ddic)] = pos; - } - if ((pos % this._ddic) == 0) - { - this._idic[Math.floor(pos / this._ddic)] = i; - } - var c = this._sv.get(i); - i = this._rlt[c] + this._sv.rank(i, c); //LF - pos--; - } while (i != this._head); - } - - function push (doc : string) : void - { - if (doc.length <= 0) - { - throw new Error("FMIndex::push(): empty string"); - } - this._substr += doc; - } - - function search (keyword : string) : int[] - { - var result_map = {} : Map.; - var result = [] : int[]; - var position = [] : int[]; - var rows = this.getRows(keyword, position); - if (rows > 0) - { - var first = position[0]; - var last = position[1]; - for (var i = first; i <= last; i++) - { - result.push(this.getPosition(i)); - } - } - return result; - } - - function dump () : string - { - return this.dump(false); - } - - function dump (verbose : boolean) : string - { - var contents = [] : string[]; - var report = new CompressionReport(); - contents.push(Binary.dump32bitNumber(this._ddic)); - contents.push(Binary.dump32bitNumber(this._ssize)); - contents.push(Binary.dump32bitNumber(this._head)); - report.add(6, 6); - contents.push(this._sv.dump(report)); - if (verbose) - { - console.log("Serializing FM-index"); - console.log(' Wavelet Matrix: ' + (contents[3].length * 2) as string + ' bytes (' + report.rate() as string + '%)'); - } - contents.push(Binary.dump32bitNumber(this._posdic.length)); - for (var i in this._posdic) - { - contents.push(Binary.dump32bitNumber(this._posdic[i])); - } - for (var i in this._idic) - { - contents.push(Binary.dump32bitNumber(this._idic[i])); - } - if (verbose) - { - console.log(' Dictionary Cache: ' + (this._idic.length * 16) as string + ' bytes'); - } - return contents.join(""); - } - - function load (data : string) : int - { - return this.load(data, 0); - } - - function load (data : string, offset : int) : int - { - this._ddic = Binary.load32bitNumber(data, offset); - this._ssize = Binary.load32bitNumber(data, offset + 2); - this._head = Binary.load32bitNumber(data, offset + 4); - offset = this._sv.load(data, offset + 6); - var maxChar = Math.pow(2, this._sv.bitsize()); - for (var c = 0; c < maxChar; c++) - { - this._rlt[c] = this._sv.rank_less_than(this._sv.size(), c); - } - var size = Binary.load32bitNumber(data, offset); - offset += 2; - for (var i = 0; i < size; i++, offset += 2) - { - this._posdic.push(Binary.load32bitNumber(data, offset)); - } - for (var i = 0; i < size; i++, offset += 2) - { - this._idic.push(Binary.load32bitNumber(data, offset)); - } - return offset; - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/getopt.jsx b/web/server/h2o/libh2o/misc/oktavia/src/getopt.jsx deleted file mode 100644 index 56db655a9..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/getopt.jsx +++ /dev/null @@ -1,356 +0,0 @@ -/* - * getopt.js: node.js implementation of POSIX getopt() (and then some) - * - * Copyright 2011 David Pacheco. All rights reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -import "console.jsx"; - -class CommandOption -{ - var option : string; - var optarg : Nullable.; - var error : boolean; - - function constructor (option : string) - { - this.option = option; - this.optarg = null; - this.error = false; - } - - function constructor (option : string, optarg : string) - { - this.option = option; - this.optarg = optarg; - this.error = false; - } - - function constructor (option : string, optarg : string, error : boolean) - { - this.option = option; - this.optarg = optarg; - this.error = error; - } -} - - -/* - * The BasicParser is our primary interface to the outside world. The - * documentation for this object and its public methods is contained in - * the included README.md. - */ -class BasicParser -{ - var _argv : string[]; - var _options : Map.; - var _aliases : Map.; - var _optind : int; - var _subind : int; - var _silent : boolean; - var _extraoptions : boolean; - - function constructor (optstring : string, argv : string[]) - { - this._argv = argv; - this._options = {} : Map.; - this._aliases = {} : Map.; - this._optind = 0; - this._subind = 0; - this._extraoptions = false; - - this._parseOptstr(optstring); - } - - static function _makeError (msg : string) : Error - { - return (new Error('getopt: ' + msg)); - } - - /* - * Parse the option string and update the following fields: - * - * _silent Whether to log errors to stderr. Silent mode is - * indicated by a leading ':' in the option string. - * - * _options Maps valid single-letter-options to booleans indicating - * whether each option is required. - * - * _aliases Maps valid long options to the corresponding - * single-letter short option. - */ - function _parseOptstr (optstr : string) : void - { - var i = 0; - - if (optstr.length > 0 && optstr.slice(0, 1) == ':') - { - this._silent = true; - i++; - } - else - { - this._silent = false; - } - while (i < optstr.length) - { - var chr = optstr.slice(i, i + 1); - var arg = false; - - if (!/^[\w\d]$/.test(chr)) - { - throw new Error('invalid optstring: only alphanumeric ' + - 'characters may be used as options: ' + chr); - } - - if (i + 1 < optstr.length && optstr.slice(i + 1, i + 2) == ':') - { - arg = true; - i++; - } - - this._options[chr] = arg; - - while (i + 1 < optstr.length && optstr.slice(i + 1, i + 2) == '(') - { - i++; - var cp = optstr.indexOf(')', i + 1); - if (cp == -1) - { - throw new Error('invalid optstring: missing ' + - '")" to match "(" at char ' + i as string); - } - var alias = optstr.substring(i + 1, cp); - this._aliases[alias] = chr; - i = cp; - } - i++; - } - } - - function optind () : int - { - return this._optind; - } - - /* - * For documentation on what getopt() does, see README.md. The following - * implementation invariants are maintained by getopt() and its helper methods: - * - * this._optind Refers to the element of _argv that contains - * the next argument to be processed. This may - * exceed _argv, in which case the end of input - * has been reached. - * - * this._subind Refers to the character inside - * this._options[this._optind] which begins - * the next option to be processed. This may never - * exceed the length of _argv[_optind], so - * when incrementing this value we must always - * check if we should instead increment optind and - * reset subind to 0. - * - * That is, when any of these functions is entered, the above indices' values - * are as described above. getopt() itself and getoptArgument() may both be - * called at the end of the input, so they check whether optind exceeds - * argv.length. getoptShort() and getoptLong() are called only when the indices - * already point to a valid short or long option, respectively. - * - * getopt() processes the next option as follows: - * - * o If _optind > _argv.length, then we already parsed all arguments. - * - * o If _subind == 0, then we're looking at the start of an argument: - * - * o Check for special cases like '-', '--', and non-option arguments. - * If present, update the indices and return the appropriate value. - * - * o Check for a long-form option (beginning with '--'). If present, - * delegate to getoptLong() and return the result. - * - * o Otherwise, advance subind past the argument's leading '-' and - * continue as though _subind != 0 (since that's now the case). - * - * o Delegate to getoptShort() and return the result. - */ - function getopt () : Nullable. - { - if (this._optind >= this._argv.length) - { - /* end of input */ - return null; - } - - var arg = this._argv[this._optind]; - if (this._extraoptions) - { - this._optind++; - return new CommandOption(arg); - } - - if (this._subind == 0) - { - if (arg == '-' || arg == '') - { - return null; - } - - if (arg.charAt(0) != '-') - { - this._extraoptions = true; - this._optind++; - return new CommandOption(arg); - } - - if (arg == '--') - { - this._optind++; - this._subind = 0; - return null; - } - - if (arg.slice(1, 2) == '-') - { - return this._getoptLong(); - } - this._subind++; - } - - return this._getoptShort(); - } - - /* - * Implements getopt() for the case where optind/subind point to a short option. - */ - function _getoptShort () : CommandOption - { - var arg = this._argv[this._optind]; - var chr = arg.slice(this._subind, this._subind + 1); - - if (++this._subind >= arg.length) - { - this._optind++; - this._subind = 0; - } - - if (!(chr in this._options)) - { - return this._errInvalidOption(chr); - } - - if (!this._options[chr]) - { - return new CommandOption(chr); - } - return this._getoptArgument(chr); - } - - /* - * Implements getopt() for the case where optind/subind point to a long option. - */ - function _getoptLong () : CommandOption - { - var arg = this._argv[this._optind]; - var eq = arg.indexOf('='); - var alias = arg.substring(2, eq == -1 ? arg.length : eq); - if (!(alias in this._aliases)) - { - return this._errInvalidOption(alias); - } - - var chr = this._aliases[alias]; - if (!this._options[chr]) - { - if (eq != -1) - { - return this._errExtraArg(alias); - } - this._optind++; /* eat this argument */ - return new CommandOption(chr); - } - - /* - * Advance optind/subind for the argument value and retrieve it. - */ - if (eq == -1) - { - this._optind++; - } - else - { - this._subind = eq + 1; - } - return this._getoptArgument(chr); - } - - /* - * For the given option letter 'chr' that takes an argument, assumes that - * optind/subind point to the argument (or denote the end of input) and return - * the appropriate getopt() return value for this option and argument (or return - * the appropriate error). - */ - function _getoptArgument (chr : string) : CommandOption - { - if (this._optind >= this._argv.length) - { - return this._errMissingArg(chr); - } - - var arg = this._argv[this._optind].substring(this._subind); - this._optind++; - this._subind = 0; - return new CommandOption(chr, arg); - } - - function _errMissingArg (chr : string) : CommandOption - { - if (this._silent) - { - return new CommandOption(':', chr); - } - console.error('option requires an argument -- ' + chr + '\n'); - return new CommandOption('?', chr, true); - } - - function _errInvalidOption (chr : string) : CommandOption - { - if (!this._silent) - { - console.error('illegal option -- ' + chr + '\n'); - } - return new CommandOption('?', chr, true); - } - - /* - * This error is not specified by POSIX, but neither is the notion of specifying - * long option arguments using "=" in the same argv-argument, but it's common - * practice and pretty convenient. - */ - function _errExtraArg (chr : string) : CommandOption - { - if (!this._silent) - { - console.error('option expects no argument -- ' + - chr + '\n'); - } - return new CommandOption('?', chr, true); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/htmlparser.jsx b/web/server/h2o/libh2o/misc/oktavia/src/htmlparser.jsx deleted file mode 100644 index 3d0ace782..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/htmlparser.jsx +++ /dev/null @@ -1,280 +0,0 @@ -import "console.jsx"; -import "js/nodejs.jsx"; -import "oktavia.jsx"; -import "metadata.jsx"; -import "sax.jsx"; -import "stemmer/stemmer.jsx"; - - -class _HTMLHandler extends SAXHandler -{ - var startParse : boolean; - var startTag : string; - var stack : string []; - var oktavia : Oktavia; - var section : Section; - var tag : Block; - var filter : TagFilter; - var filepath : string; - var unit : int; - var currentLink : string; - var currentTitle : string; - var lastId : string; - var waitTitle : boolean; - var sectionCount : int; - var inCode : boolean; - var addText : boolean; - - function constructor (oktavia : Oktavia, filepath : string, unit : int, filter : TagFilter) - { - super(); - this.startParse = false; - this.stack = [] : string[]; - this.oktavia = oktavia; - this.section = this.oktavia.getSection('section'); - this.tag = this.oktavia.getBlock('tag'); - this.unit = unit; - this.filter = filter; - this.filepath = filepath; - this.currentTitle = ''; - this.lastId = ''; - this.waitTitle = false; - this.addText = false; - } - - override function onready () : void - { - this.currentLink = this.filepath; - this.inCode = false; - } - - override function onopentag (tagname : string, attributes : Map.) : void - { - var headingId = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']; - if (this.startParse) - { - this.stack.push(tagname); - if ('id' in attributes) - { - this.lastId = attributes['id']; - } - switch (tagname) - { - case 'h1': - case 'h2': - case 'h3': - case 'h4': - case 'h5': - case 'h6': - if (headingId.indexOf(tagname) < this.unit) - { - if (this.oktavia.contentSize() > 0) - { - this.section.setTail(this.currentTitle + Oktavia.eob + this.currentLink); - } - this.currentLink = this.filepath + '#' + this.lastId; - this.currentTitle = ''; - this.waitTitle = true; - } - this.oktavia.addEndOfBlock(); - this.tag.startBlock(tagname); - break; - case 'pre': - this.tag.startBlock('pre'); - this.oktavia.addEndOfBlock(); - this.inCode = true; - break; - case 'p': - case 'div': - case 'blockquote': - this.oktavia.addEndOfBlock(); - break; - } - } - else - { - if (this.filter.match(tagname, attributes)) - { - this.startParse = true; - this.startTag = tagname; - this.stack.push(tagname); - } - } - if (tagname == 'title') - { - this.waitTitle = true; - this.currentTitle = ''; - } - } - - override function onclosetag (tagname : string) : void - { - if (this.startParse) - { - switch (tagname) - { - case 'h1': - case 'h2': - case 'h3': - case 'h4': - case 'h5': - case 'h6': - this.tag.endBlock(); - if (this.addText) - { - this.oktavia.addWord('\n'); - this.addText = false; - } - this.waitTitle = false; - break; - case 'pre': - this.inCode = false; - this.tag.endBlock(); - if (this.addText) - { - this.oktavia.addWord('\n'); - this.addText = false; - } - break; - case 'div': - case 'p': - case 'blockquote': - if (this.addText) - { - this.oktavia.addWord('\n'); - this.addText = false; - } - break; - } - if (this.stack.length == 0) - { - this.startParse = false; - } - } - if (tagname == 'title') - { - this.waitTitle = false; - } - } - - override function ontext (text : string) : void - { - if (this.startParse) - { - this.oktavia.addWord(text, !this.inCode); - this.addText = true; - } - if (this.waitTitle) - { - this.currentTitle += text; - } - } - - override function onend () : void - { - this.section.setTail(this.currentTitle + Oktavia.eob + this.currentLink); - } -} - -class TagFilter -{ - var tags : string[]; - var ids : string[]; - var classes : string[]; - var tagAndClasses : string[]; - - function constructor (filters : string[]) - { - this.tags = [] : string[]; - this.ids = [] : string[]; - this.classes = [] : string[]; - this.tagAndClasses = [] : string[]; - - for (var i = 0; i < filters.length; i++) - { - var filter = filters[i]; - switch (filter.charAt(0)) - { - case '#': - this.ids.push(filter.slice(1)); - break; - case '.': - this.classes.push(filter.slice(1)); - break; - default: - if (filter.indexOf('.') != -1) - { - this.tags.push(filter); - } - else - { - this.tagAndClasses.push(filter); - } - } - } - } - - function match (tagname : string, attributes : Map.) : boolean - { - var result = false; - if (this.tags.indexOf(tagname) != -1) - { - result = true; - } - else if (attributes['id'] && this.ids.indexOf(attributes['id']) != -1) - { - result = true; - } - else if (attributes['class']) - { - var classname = attributes['class']; - if (this.classes.indexOf(classname) != -1 || - this.tagAndClasses.indexOf(tagname + '.' + classname) != -1) - { - result = true; - } - } - return result; - } -} - -class HTMLParser -{ - var oktavia : Oktavia; - var unit : int; - var root : string; - var prefix : string; - var filter : TagFilter; - - function constructor (unit : int, root : string, prefix : string, filter : string[], stemmer : Nullable.) - { - this.unit = unit; - this.root = root; - this.prefix = prefix; - this.filter = new TagFilter(filter); - this.oktavia = new Oktavia(); - this.oktavia.addSection('section'); - this.oktavia.addBlock('tag'); - if (stemmer) - { - this.oktavia.setStemmer(stemmer); - } - } - - function parse (filepath : string) : void - { - var relative = this.prefix + node.path.relative(this.root, filepath); - console.log('reading: ' + relative); - var lines = node.fs.readFileSync(filepath, 'utf8'); - var handler = new _HTMLHandler(this.oktavia, relative, this.unit, this.filter); - var parser = new SAXParser(handler); - parser.parse(lines); - } - - function dump (cacheDensity : int, verbose : boolean) : string - { - console.log('\nbuilding...\n'); - this.oktavia.build(cacheDensity, verbose); - return this.oktavia.dump(verbose); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/metadata.jsx b/web/server/h2o/libh2o/misc/oktavia/src/metadata.jsx deleted file mode 100644 index 25c388133..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/metadata.jsx +++ /dev/null @@ -1,498 +0,0 @@ -import "bit-vector.jsx"; -import "oktavia.jsx"; -import "binary-util.jsx"; -import "search-result.jsx"; - - -class Metadata -{ - var _parent : Oktavia; - var _bitVector : BitVector; - - function constructor (parent : Oktavia) - { - this._parent = parent; - this._bitVector = new BitVector(); - } - - function _size () : int - { - return this._bitVector.rank(this._bitVector.size()); - } - - function getContent (index : int) : string - { - if (index < 0 || this._size() <= index) - { - throw new Error("Section.getContent() : range error " + index as string); - } - var startPosition = 0; - if (index > 0) - { - startPosition = this._bitVector.select(index - 1) + 1; - } - var length = this._bitVector.select(index) - startPosition + 1; - return this._parent._getSubstring(startPosition, length); - } - - function getStartPosition (index : int) : int - { - if (index < 0 || this._size() <= index) - { - throw new Error("Section.getContent() : range error " + index as string); - } - var startPosition = 0; - if (index > 0) - { - startPosition = this._bitVector.select(index - 1) + 1; - } - return startPosition; - } - - function grouping (result : SingleResult, positions : int [], word : string, stemmed : boolean) : void - { - } - - function getInformation(index : int) : string - { - return ''; - } - - function _build () : void - { - this._bitVector.build(); - } - - function _load (name : string, data : string, offset : int) : int - { - offset = this._bitVector.load(data, offset); - this._parent._metadataLabels.push(name); - this._parent._metadatas[name] = this; - return offset; - } - - function _dump () : string - { - return this._bitVector.dump(); - } - - function _dump (report : CompressionReport) : string - { - return this._bitVector.dump(report); - } -} - -class Section extends Metadata -{ - var _names : string[]; - - function constructor (parent : Oktavia) - { - super(parent); - this._names = [] : string[]; - } - - function setTail (name : string) : void - { - this.setTail(name, this._parent.contentSize()); - } - - function setTail (name : string, index : int) : void - { - this._names.push(name); - this._bitVector.set(index - 1); - } - - function size () : int - { - return this._names.length; - } - - function getSectionIndex (position : int) : int - { - if (position < 0 || this._bitVector.size() <= position) - { - throw new Error("Section.getSectionIndex() : range error " + position as string); - } - return this._bitVector.rank(position); - } - - function getName (index : int) : string - { - if (index < 0 || this.size() <= index) - { - throw new Error("Section.getName() : range error"); - } - return this._names[index]; - } - - override function grouping (result : SingleResult, positions : int [], word : string, stemmed : boolean) : void - { - for (var i = 0; i < positions.length; i++) - { - var position = positions[i]; - var index = this.getSectionIndex(position); - var unit = result.getSearchUnit(index); - if (unit.startPosition < 0) - { - unit.startPosition = this.getStartPosition(index); - } - unit.addPosition(word, position - unit.startPosition, stemmed); - } - } - - override function getInformation(index : int) : string - { - return this.getName(index); - } - - static function _load (parent : Oktavia, name : string, data : string, offset : int) : int - { - var strs = Binary.loadStringList(data, offset); - var section = new Section(parent); - section._names = strs.result; - return section._load(name, data, strs.offset); - } - - override function _dump () : string - { - return [Binary.dump16bitNumber(0), Binary.dumpStringList(this._names), super._dump()].join(''); - } - - override function _dump (report : CompressionReport) : string - { - report.add(1, 1); - return [Binary.dump16bitNumber(0), Binary.dumpStringList(this._names, report), super._dump(report)].join(''); - } -} - -class Splitter extends Metadata -{ - var name : Nullable.; - - function constructor (parent : Oktavia) - { - super(parent); - this.name = null; - } - - function constructor (parent : Oktavia, name : string) - { - super(parent); - this.name = name; - } - - function size () : int - { - return this._size(); - } - - function split () : void - { - this.split(this._parent.contentSize()); - } - - function split (index : int) : void - { - this._bitVector.set(index - 1); - } - - function getIndex (position : int) : int - { - if (position < 0 || this._bitVector.size() <= position) - { - throw new Error("Section.getSectionIndex() : range error"); - } - return this._bitVector.rank(position); - } - - override function grouping (result : SingleResult, positions : int [], word : string, stemmed : boolean) : void - { - for (var i = 0; i < positions.length; i++) - { - var position = positions[i]; - var index = this.getIndex(position); - var unit = result.getSearchUnit(index); - if (unit.startPosition < 0) - { - unit.startPosition = this.getStartPosition(index); - } - unit.addPosition(word, position - unit.startPosition, stemmed); - } - } - - override function getInformation(index : int) : string - { - if (this.name != null) - { - return this.name + ((index + 1) as string); - } - return ''; - } - - static function _load (parent : Oktavia, name : string, data : string, offset : int) : int - { - var section = new Splitter(parent); - return section._load(name, data, offset); - } - - override function _dump () : string - { - return [Binary.dump16bitNumber(1), super._dump()].join(''); - } - - override function _dump (report : CompressionReport) : string - { - report.add(1, 1); - return [Binary.dump16bitNumber(1), super._dump(report)].join(''); - } -} - -class Table extends Metadata -{ - var _headers : string[]; - var _columnTails : BitVector; - - function constructor (parent : Oktavia, headers : string[]) - { - super(parent); - this._headers = headers; - this._columnTails = new BitVector(); - } - - function rowSize () : int - { - return this._size(); - } - - function columnSize () : int - { - return this._headers.length; - } - - function setColumnTail () : void - { - var index = this._parent.contentSize(); - this._parent.addEndOfBlock(); - this._columnTails.set(index - 1); - } - - function setRowTail () : void - { - var index = this._parent.contentSize(); - this._bitVector.set(index - 1); - } - - function getCell (position : int) : int[] - { - if (position < 0 || this._bitVector.size() <= position) - { - throw new Error("Section.getSectionIndex() : range error " + position as string); - } - var row = this._bitVector.rank(position); - var currentColumn = this._columnTails.rank(position); - - var lastRowColumn = 0; - if (row > 0) - { - var startPosition = this._bitVector.select(row - 1) + 1; - lastRowColumn = this._columnTails.rank(startPosition); - } - var result = [row, currentColumn - lastRowColumn] : int[]; - return result; - } - - function getRowContent (rowIndex : int) : Map. - { - var content = this.getContent(rowIndex); - var values = content.split(Oktavia.eob, this._headers.length); - var result = {} : Map.; - for (var i in this._headers) - { - if (i < values.length) - { - result[this._headers[i]] = values[i]; - } - else - { - result[this._headers[i]] = ''; - } - } - return result; - } - - override function grouping (result : SingleResult, positions : int [], word : string, stemmed : boolean) : void - { - // TODO implement - } - - override function getInformation(index : int) : string - { - return ''; - } - - override function _build () : void - { - this._bitVector.build(); - this._columnTails.build(); - } - - static function _load (parent : Oktavia, name : string, data : string, offset : int) : int - { - var strs = Binary.loadStringList(data, offset); - var table = new Table(parent, strs.result); - offset = table._load(name, data, strs.offset); - return table._columnTails.load(data, offset); - } - - override function _dump () : string - { - return [ - Binary.dump16bitNumber(2), Binary.dumpStringList(this._headers), - super._dump(), this._columnTails.dump() - ].join(''); - } - - override function _dump (report : CompressionReport) : string - { - report.add(1, 1); - return [ - Binary.dump16bitNumber(2), Binary.dumpStringList(this._headers, report), - super._dump(report), this._columnTails.dump(report) - ].join(''); - } -} - -class Block extends Metadata -{ - var _names : string[]; - var _start : boolean; - - function constructor (parent : Oktavia) - { - super(parent); - this._names = [] : string[]; - this._start = false; - } - - function startBlock (blockName : string) : void - { - this.startBlock(blockName, this._parent.contentSize()); - } - - function startBlock (blockName : string, index : int) : void - { - if (this._start) - { - throw new Error('Splitter `' + this._names[this._names.length - 1] + '` is not closed'); - } - this._start = true; - this._names.push(blockName); - this._bitVector.set(index - 1); - } - - function endBlock () : void - { - this.endBlock(this._parent.contentSize()); - } - - function endBlock (index : int) : void - { - if (!this._start) - { - throw new Error('Splitter is not started'); - } - this._start = false; - this._bitVector.set(index - 1); - } - - function size () : int - { - return this._names.length; - } - - function blockIndex (position : int) : int - { - if (position < 0 || (this._parent._fmindex.size() - 1) <= position) - { - throw new Error("Block.blockIndex() : range error " + position as string); - } - var result : int; - if (position >= this._bitVector.size()) - { - position = this._bitVector.size() - 1; - result = this._bitVector.rank(position) + 1; - } - else - { - result = this._bitVector.rank(position); - } - return result; - } - - function inBlock (position : int) : boolean - { - var blockIndex = this.blockIndex(position); - return (blockIndex % 2) != 0; - } - - function getBlockContent (position : int) : string - { - var blockIndex = this.blockIndex(position); - var result : string; - if ((blockIndex % 2) != 0) - { - result = this.getContent(blockIndex); - } - else - { - result = ''; - } - return result; - } - - function getBlockName (position : int) : string - { - var blockIndex = this.blockIndex(position); - var result : string; - if ((blockIndex % 2) != 0) - { - result = this._names[blockIndex >>> 1]; - } - else - { - result = ''; - } - return result; - } - - override function grouping (result : SingleResult, positions : int [], word : string, stemmed : boolean) : void - { - // TODO implement - } - - override function getInformation(index : int) : string - { - return ''; - } - - static function _load (parent : Oktavia, name : string, data : string, offset : int) : int - { - var strs = Binary.loadStringList(data, offset); - var block = new Block(parent); - block._names = strs.result; - return block._load(name, data, strs.offset); - } - - override function _dump () : string - { - return [Binary.dump16bitNumber(3), Binary.dumpStringList(this._names), super._dump()].join(''); - } - - override function _dump (report : CompressionReport) : string - { - report.add(1, 1); - return [Binary.dump16bitNumber(3), Binary.dumpStringList(this._names, report), super._dump(report)].join(''); - } -} - diff --git a/web/server/h2o/libh2o/misc/oktavia/src/node-sqlite3.jsx b/web/server/h2o/libh2o/misc/oktavia/src/node-sqlite3.jsx deleted file mode 100644 index 32196983d..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/node-sqlite3.jsx +++ /dev/null @@ -1,115 +0,0 @@ -import "js.jsx"; - -native __fake__ class _sqlite3database -{ - static const OK = 0; - static const ERROR = 1; - static const INTERNAL = 2; - static const PERM = 3; - static const ABORT = 4; - static const BUSY = 5; - static const LOCKED = 6; - static const NOMEM = 7; - static const READONLY = 8; - static const INTERRUPT = 9; - static const IOERR = 10; - static const CORRUPT = 11; - static const NOTFOUND = 12; - static const FULL = 13; - static const CANTOPEN = 14; - static const PROTOCOL = 15; - static const EMPTY = 16; - static const SCHEMA = 17; - static const TOOBIG = 18; - static const CONSTRAINT = 19; - static const MISMATCH = 20; - static const MISUSE = 21; - static const NOLFS = 22; - static const AUTH = 23; - static const FORMAT = 24; - static const RANGE = 25; - - function run(...params : variant) : void; - function all(...params : variant) : void; - function each(...params : variant) : void; - function close(...params : variant) : void; - function serialize(...params : variant) : void; -} - -native __fake__ class _sqlite3statement -{ -} - -native __fake__ class _sqlite3error -{ - var message : string; - var errno : int; - var code : string; -} - -class SQLite3Database -{ - var _instance : _sqlite3database; - - function constructor (filename : string) - { - var exp = "(function () { var __sqlite3 = require('sqlite3'); return new __sqlite3.Database('" + filename + "');})()"; - this._instance = js.eval(exp) as __noconvert__ _sqlite3database; - } - - function run (sql : string) : SQLite3Database - { - this._instance.run(sql); - return this; - } - - function run (sql : string, bind : variant) : SQLite3Database - { - this._instance.run(sql, bind); - return this; - } - - function run (sql : string, bind : variant, callback : (Nullable.<_sqlite3error>) -> void) : SQLite3Database - { - this._instance.run(sql, bind, callback); - return this; - } - - function run (sql : string, callback : (Nullable.<_sqlite3error>) -> void) : SQLite3Database - { - this._instance.run(sql, callback); - return this; - } - - function each (sql : string, callback : (Nullable.<_sqlite3error>, variant) -> void) : SQLite3Database - { - this._instance.each(sql, callback); - return this; - } - - function all (sql : string, callback : (Nullable.<_sqlite3error>, variant[]) -> void) : SQLite3Database - { - this._instance.all(sql, callback); - return this; - } - - function serialize () : void - { - this._instance.serialize(); - } - - function serialize (callback : (Nullable.<_sqlite3error>) -> void) : void - { - this._instance.serialize(callback); - } - - function close () : void - { - this._instance.close(); - } - - function close (callback : (Nullable.<_sqlite3error>) -> void) : void - { - this._instance.close(callback); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/oktavia.jsx b/web/server/h2o/libh2o/misc/oktavia/src/oktavia.jsx deleted file mode 100644 index 8109b4759..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/oktavia.jsx +++ /dev/null @@ -1,427 +0,0 @@ -import "metadata.jsx"; -import "fm-index.jsx"; -import "binary-util.jsx"; -import "query.jsx"; -import "search-result.jsx"; -import "stemmer/stemmer.jsx"; -import "console.jsx"; - - -class Oktavia -{ - var _fmindex : FMIndex; - var _metadatas : Map.; - var _metadataLabels : string[]; - var _stemmer : Nullable.; - var _stemmingResult : Map.; - - // char code remap tables - var _utf162compressCode : string[]; - var _compressCode2utf16 : string[]; - - // sentinels - static const eof = String.fromCharCode(0); - static const eob = String.fromCharCode(1); - static const unknown = String.fromCharCode(3); - - function constructor () - { - this._fmindex = new FMIndex(); - this._metadatas = {} : Map.; - this._metadataLabels = [] : string[]; - this._stemmer = null; - this._stemmingResult = {} : Map.; - this._utf162compressCode = [Oktavia.eof, Oktavia.eob, Oktavia.unknown]; - this._utf162compressCode.length = 65536; - this._compressCode2utf16 = [Oktavia.eof, Oktavia.eob, Oktavia.unknown]; - } - - function setStemmer (stemmer : Stemmer) : void - { - this._stemmer = stemmer; - } - - function getPrimaryMetadata () : Metadata - { - return this._metadatas[this._metadataLabels[0]]; - } - - function addSection (key : string) : Section - { - if (this._metadataLabels.indexOf(key) != -1) - { - throw new Error('Metadata name ' + key + ' is already exists'); - } - this._metadataLabels.push(key); - var section = new Section(this); - this._metadatas[key] = section; - return section; - } - - function getSection (key : string) : Section - { - if (this._metadataLabels.indexOf(key) == -1) - { - throw new Error('Metadata name ' + key + " does't exists"); - } - return this._metadatas[key] as Section; - } - - function addSplitter (key : string) : Splitter - { - if (this._metadataLabels.indexOf(key) != -1) - { - throw new Error('Metadata name ' + key + ' is already exists'); - } - this._metadataLabels.push(key); - var splitter = new Splitter(this); - this._metadatas[key] = splitter; - return splitter; - } - - function getSplitter (key : string) : Splitter - { - if (this._metadataLabels.indexOf(key) == -1) - { - throw new Error('Metadata name ' + key + " does't exists"); - } - return this._metadatas[key] as Splitter; - } - - function addTable (key : string, headers : string[]) : Table - { - if (this._metadataLabels.indexOf(key) != -1) - { - throw new Error('Metadata name ' + key + ' is already exists'); - } - this._metadataLabels.push(key); - var table = new Table(this, headers); - this._metadatas[key] = table; - return table; - } - - function getTable (key : string) : Table - { - if (this._metadataLabels.indexOf(key) == -1) - { - throw new Error('Metadata name ' + key + " does't exists"); - } - return this._metadatas[key] as Table; - } - - function addBlock (key : string) : Block - { - if (this._metadataLabels.indexOf(key) != -1) - { - throw new Error('Metadata name ' + key + ' is already exists'); - } - this._metadataLabels.push(key); - var block = new Block(this); - this._metadatas[key] = block; - return block; - } - - function getBlock (key : string) : Block - { - if (this._metadataLabels.indexOf(key) == -1) - { - throw new Error('Metadata name ' + key + " does't exists"); - } - return this._metadatas[key] as Block; - } - - function addEndOfBlock () : void - { - this._fmindex.push(Oktavia.eob); - } - - function addWord (words : string) : void - { - var str = [] : string[]; - str.length = words.length; - for (var i = 0; i < words.length; i++) - { - var charCode = words.charCodeAt(i); - var newCharCode = this._utf162compressCode[charCode]; - if (newCharCode == null) - { - newCharCode = String.fromCharCode(this._compressCode2utf16.length); - this._utf162compressCode[charCode] = newCharCode; - this._compressCode2utf16.push(String.fromCharCode(charCode)); - } - str.push(newCharCode); - } - this._fmindex.push(str.join('')); - } - - function addWord (words : string, stemming : boolean) : void - { - this.addWord(words); - var wordList = words.split(/\s+/); - for (var i = 0; i < wordList.length; i++) - { - var originalWord = wordList[i]; - var smallWord = originalWord.slice(0, 1).toLowerCase() + originalWord.slice(1); - var registerWord : Nullable. = null; - if (stemming && this._stemmer) - { - var baseWord = this._stemmer.stemWord(originalWord.toLowerCase()); - if (originalWord.indexOf(baseWord) == -1) - { - registerWord = baseWord; - } - } - else if (originalWord != smallWord) - { - registerWord = smallWord; - } - if (registerWord) - { - var compressedCodeWord = this._convertToCompressionCode(originalWord); - var stemmedList = this._stemmingResult[registerWord]; - if (!stemmedList) - { - stemmedList = [compressedCodeWord]; - this._stemmingResult[registerWord] = stemmedList; - } - else if (stemmedList.indexOf(compressedCodeWord) == -1) - { - stemmedList.push(compressedCodeWord); - } - } - } - } - - function _convertToCompressionCode (keyword : string) : string - { - var resultChars = [] : string[]; - for (var i = 0; i < keyword.length; i++) - { - var chr = this._utf162compressCode[keyword.charCodeAt(i)]; - if (chr == null) - { - resultChars.push(Oktavia.unknown); - } - else - { - resultChars.push(chr); - } - } - return resultChars.join(''); - } - - function rawSearch (keyword : string, stemming : boolean) : int[] - { - var result : int[]; - if (stemming) - { - result = [] : int[]; - if (this._stemmer) - { - var baseWord = this._stemmer.stemWord(keyword.toLowerCase()); - var stemmedList = this._stemmingResult[baseWord]; - if (stemmedList) - { - for (var i = 0; i < stemmedList.length; i++) - { - var word = stemmedList[i]; - result = result.concat(this._fmindex.search(word)); - } - } - } - } - else - { - result = this._fmindex.search(this._convertToCompressionCode(keyword)); - } - return result; - } - - function search (queries : Query[]) : SearchSummary - { - var summary = new SearchSummary(this); - for (var i = 0; i < queries.length; i++) - { - summary.addQuery(this._searchQuery(queries[i])); - } - summary.mergeResult(); - return summary; - } - - function _searchQuery (query : Query) : SingleResult - { - var result = new SingleResult(query.word, query.or, query.not); - var positions : int[]; - if (query.raw) - { - positions = this.rawSearch(query.word, false); - } - else - { - positions = this.rawSearch(query.word, false).concat(this.rawSearch(query.word, true)); - } - this.getPrimaryMetadata().grouping(result, positions, query.word, !query.raw); - return result; - } - - function build () : void - { - this.build(5, false); - } - - function build (cacheDensity : int, verbose : boolean) : void - { - for (var key in this._metadatas) - { - this._metadatas[key]._build(); - } - var cacheRange = Math.round(Math.max(1, (100 / Math.min(100, Math.max(0.01, cacheDensity))))); - var maxChar = this._compressCode2utf16.length; - this._fmindex.build(Oktavia.eof, maxChar, cacheRange, verbose); - } - - function dump () : string - { - return this.dump(false); - } - - function dump (verbose : boolean) : string - { - var headerSource = "oktavia-01"; - var header = Binary.dumpString(headerSource).slice(1); - if (verbose) - { - console.log("Source text size: " + (this._fmindex.size() * 2) as string + ' bytes'); - } - var fmdata = this._fmindex.dump(verbose); - var result = [ - header, - fmdata - ]; - - result.push(Binary.dump16bitNumber(this._compressCode2utf16.length)); - for (var i = 3; i < this._compressCode2utf16.length; i++) - { - result.push(this._compressCode2utf16[i]); - } - if (verbose) - { - console.log('Char Code Map: ' + (this._compressCode2utf16.length * 2 - 2) as string + ' bytes'); - } - - var report = new CompressionReport(); - result.push(Binary.dumpStringListMap(this._stemmingResult, report)); - if (verbose) - { - console.log('Stemmed Word Table: ' + (result[result.length - 1].length) as string + ' bytes (' + report.rate() as string + '%)'); - } - - result.push(Binary.dump16bitNumber(this._metadataLabels.length)); - for (var i = 0; i < this._metadataLabels.length; i++) - { - var report = new CompressionReport(); - var name = this._metadataLabels[i]; - var data = this._metadatas[name]._dump(report); - result.push(Binary.dumpString(name, report), data); - if (verbose) - { - console.log('Meta Data ' + name + ': ' + (data.length * 2) as string + ' bytes (' + report.rate() as string + '%)'); - } - } - return result.join(''); - } - - function load (data : string) : void - { - var headerSource = "oktavia-01"; - var header = Binary.dumpString(headerSource).slice(1); - if (data.slice(0, 5) != header) - { - throw new Error('Invalid data file'); - } - this._metadatas = {} : Map.; - this._metadataLabels = [] : string[]; - - var offset = 5; - offset = this._fmindex.load(data, offset); - var charCodeCount = Binary.load16bitNumber(data, offset++); - this._compressCode2utf16 = [Oktavia.eof, Oktavia.eob, Oktavia.unknown]; - this._utf162compressCode = [Oktavia.eof, Oktavia.eob, Oktavia.unknown]; - for (var i = 3; i < charCodeCount; i++) - { - var charCode = Binary.load16bitNumber(data, offset++); - this._compressCode2utf16.push(String.fromCharCode(charCode)); - this._utf162compressCode[charCode] = String.fromCharCode(i); - } - - var stemmedWords = Binary.loadStringListMap(data, offset); - this._stemmingResult = stemmedWords.result; - offset = stemmedWords.offset; - - var metadataCount = Binary.load16bitNumber(data, offset++); - for (var i = 0; i < metadataCount; i++) - { - var nameResult = Binary.loadString(data, offset); - var name = nameResult.result; - var offset = nameResult.offset; - var type = Binary.load16bitNumber(data, offset++); - switch (type) - { - case 0: - offset = Section._load(this, name, data, offset); - break; - case 1: - offset = Splitter._load(this, name, data, offset); - break; - case 2: - offset = Table._load(this, name, data, offset); - break; - case 3: - offset = Block._load(this, name, data, offset); - break; - } - } - } - - function contentSize () : int - { - return this._fmindex.contentSize(); - } - - function wordPositionType (position : int) : int - { - var result = 0; - if (position == 0) - { - result = 4; - } - else - { - var ahead = this._fmindex.getSubstring(position - 1, 1); - if (/\s/.test(ahead)) - { - result = 2; - } - else if (/\W/.test(ahead)) - { - result = 1; - } - else if (Oktavia.eob == ahead) - { - result = 3; - } - } - return result; - } - - function _getSubstring (position : int, length : int) : string - { - var result = this._fmindex.getSubstring(position, length); - var str = [] : string[]; - for (var i = 0; i < result.length; i++) - { - str.push(this._compressCode2utf16[result.charCodeAt(i)]); - } - return str.join(''); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/query-parser.jsx b/web/server/h2o/libh2o/misc/oktavia/src/query-parser.jsx deleted file mode 100644 index 86308bcb8..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/query-parser.jsx +++ /dev/null @@ -1,60 +0,0 @@ -import "query.jsx"; - - -class QueryParser -{ - var queries : Query[]; - function constructor() - { - this.queries = [] : Query[]; - } - - function parse (queryStrings : string[]) : Query[] - { - var nextOr = false; - for (var i = 0; i < queryStrings.length; i++) - { - var word = queryStrings[i]; - if (word == 'OR') - { - nextOr = true; - } - else - { - var query = new Query(); - if (nextOr) - { - query.or = true; - nextOr = false; - } - if (word.slice(0, 1) == '-') - { - query.not = true; - word = word.slice(1); - } - if (word.slice(0, 1) == '"' && word.slice(word.length -1) == '"') - { - query.raw = true; - word = word.slice(1, word.length -1); - } - query.word = word; - this.queries.push(query); - } - } - return this.queries; - } - - function highlight () : string - { - var result = [] : string[]; - for (var i = 0; i < this.queries.length; i++) - { - var query = this.queries[i]; - if (!query.not) - { - result.push("highlight=" + String.encodeURIComponent(query.word)); - } - } - return '?' + result.join('&'); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/query-string-parser.jsx b/web/server/h2o/libh2o/misc/oktavia/src/query-string-parser.jsx deleted file mode 100644 index 71d015c91..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/query-string-parser.jsx +++ /dev/null @@ -1,128 +0,0 @@ -import "query.jsx"; - - -class QueryStringParser -{ - var queries : Query[]; - function constructor() - { - this.queries = [] : Query[]; - } - - function parse (queryString : string) : Query[] - { - var nextOr = false; - var nextNot = false; - var currentWordStart = 0; - var status = 0; - // 0: free - // 1: in unquoted word - // 2: in quote - var isSpace = /[\s\u3000]/; - for (var i = 0; i < queryString.length; i++) - { - var ch = queryString.charAt(i); - switch (status) - { - case 0: // free - if (!isSpace.test(ch)) - { - if (ch == '-') - { - nextNot = true; - } - else if (ch == '"') - { - currentWordStart = i + 1; - status = 2; - } - else - { - currentWordStart = i; - status = 1; - } - } - else - { - nextNot = false; - } - break; - case 1: // unquoted word - if (isSpace.test(ch)) - { - var word = queryString.slice(currentWordStart, i); - if (word == 'OR') - { - nextOr = true; - } - else - { - var query = new Query(); - query.word = word; - query.or = nextOr; - query.not = nextNot; - this.queries.push(query); - nextOr = false; - nextNot = false; - } - status = 0; - } - break; - case 2: // in quote - if (ch == '"') - { - var word = queryString.slice(currentWordStart, i); - var query = new Query(); - query.word = word; - query.or = nextOr; - query.not = nextNot; - query.raw = true; - this.queries.push(query); - nextOr = false; - nextNot = false; - status = 0; - } - break; - } - } - switch (status) - { - case 0: - break; - case 1: - var query = new Query(); - var word = queryString.slice(currentWordStart, queryString.length); - if (word != 'OR') - { - query.word = word; - query.or = nextOr; - query.not = nextNot; - this.queries.push(query); - } - break; - case 2: - var query = new Query(); - query.word = queryString.slice(currentWordStart, queryString.length); - query.or = nextOr; - query.not = nextNot; - query.raw = true; - this.queries.push(query); - break; - } - return this.queries; - } - - function highlight () : string - { - var result = [] : string[]; - for (var i = 0; i < this.queries.length; i++) - { - var query = this.queries[i]; - if (!query.not) - { - result.push("highlight=" + String.encodeURIComponent(query.word)); - } - } - return '?' + result.join('&'); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/query.jsx b/web/server/h2o/libh2o/misc/oktavia/src/query.jsx deleted file mode 100644 index 38c52c71a..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/query.jsx +++ /dev/null @@ -1,37 +0,0 @@ -class Query -{ - var word : string; - var or : boolean; - var not : boolean; - var raw : boolean; - - function constructor () - { - this.word = ''; - this.or = false; - this.not = false; - this.raw = false; - } - - override function toString () : string - { - var result = [] : string[]; - if (this.or) - { - result.push("OR "); - } - if (this.not) - { - result.push("-"); - } - if (this.raw) - { - result.push('"', this.word, '"'); - } - else - { - result.push(this.word); - } - return result.join(''); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/sais.jsx b/web/server/h2o/libh2o/misc/oktavia/src/sais.jsx deleted file mode 100644 index 9d8fa8fb6..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/sais.jsx +++ /dev/null @@ -1,250 +0,0 @@ -/* Original source code: - * G. Nong, S. Zhang and W. H. Chan, Two Efficient Algorithms for Linear Time Suffix Array Construction, IEEE Transactions on Computers, To Appear - * http://www.cs.sysu.edu.cn/nong/index.files/Two%20Efficient%20Algorithms%20for%20Linear%20Suffix%20Array%20Construction.pdf - */ - -import "bit-vector.jsx"; - -class OArray -{ - var offset : int; - var array : int[]; - - function constructor (array : int[]) - { - this.array = array; - this.offset = 0; - } - - function constructor (array : int[], offset : int) - { - this.array = array; - this.offset = offset; - } - - function get (index : int) : int - { - return this.array[index + this.offset]; - } - - function set (index : int, value : int) : void - { - this.array[index + this.offset] = value; - } - - function isS (index : int) : boolean - { - return this.array[index + this.offset] < this.array[index + this.offset + 1]; - } - - function compare (index1 : int, index2 : int) : boolean - { - return this.array[index1 + this.offset] == this.array[index2 + this.offset]; - } -} - - -class SAIS -{ - static function _isLMS (t : BitVector, i : int) : boolean - { - return i > 0 && t.get(i) && !t.get(i - 1); - } - - // find the start or end of each bucket - static function _getBuckets(s : OArray, bkt : int[], n : int, K : int, end : boolean) : void - { - var sum = 0; - for (var i = 0; i <= K; i++) - { - bkt[i] = 0; // clear all buckets - } - for (var i = 0; i < n; i++) - { - bkt[s.get(i)]++; // compute the size of each bucket - } - for (var i = 0; i <= K; i++) - { - sum += bkt[i]; - bkt[i] = end ? sum : sum - bkt[i]; - } - } - - // compute SAl - static function _induceSAl(t : BitVector, SA : int[], s : OArray, bkt : int[], n : int, K : int, end : boolean) : void - { - SAIS._getBuckets(s, bkt, n, K, end); // find starts of buckets - for (var i = 0; i < n; i++) - { - var j = SA[i] - 1; - if (j >= 0 && !t.get(j)) - { - SA[bkt[s.get(j)]++] = j; - } - } - } - - // compute SAs - static function _induceSAs(t : BitVector, SA : int[], s : OArray, bkt : int[], n : int, K : int, end : boolean) : void - { - SAIS._getBuckets(s, bkt, n, K, end); // find ends of buckets - for (var i = n - 1; i >= 0; i--) - { - var j = SA[i] - 1; - if (j >=0 && t.get(j)) - { - SA[--bkt[s.get(j)]] = j; - } - } - } - - // find the suffix array SA of s[0..n-1] in {1..K}^n - // require s[n-1]=0 (the sentinel!), n>=2 - // use a working space (excluding s and SA) of at most 2.25n+O(1) for a constant alphabet - - static function make(source : string) : int[] - { - var charCodes = [] : int[]; - charCodes.length = source.length; - var maxCode = 0; - for (var i = 0; i < source.length; i++) - { - var code = source.charCodeAt(i); - charCodes[i] = code; - maxCode = (code > maxCode) ? code : maxCode; - } - var SA = [] : int[]; - SA.length = source.length; - var s = new OArray(charCodes); - SAIS._make(s, SA, source.length, maxCode); - return SA; - } - - static function _make(s : OArray, SA : int[], n : int, K : int) : void - { - // Classify the type of each character - var t = new BitVector(); - t.set(n - 2, false); - t.set(n - 1, true); // the sentinel must be in s1, important!!! - for (var i = n - 3; i >= 0; i--) - { - t.set(i, (s.isS(i) || (s.compare(i, i + 1) && t.get(i + 1)))); - } - - // stage 1: reduce the problem by at least 1/2 - // sort all the S-substrings - var bkt = [] : int[]; - bkt.length = K + 1; - SAIS._getBuckets(s, bkt, n, K, true); // find ends of buckets - for (var i = 0; i < n; i++) - { - SA[i] = -1; - } - for (var i = 1; i < n; i++) - { - if (SAIS._isLMS(t, i)) - { - SA[--bkt[s.get(i)]] = i; - } - } - SAIS._induceSAl(t, SA, s, bkt, n, K, false); - SAIS._induceSAs(t, SA, s, bkt, n, K, true); - // compact all the sorted substrings into the first n1 items of SA - // 2*n1 must be not larger than n (proveable) - var n1 = 0; - for (var i = 0; i < n; i++) - { - if (SAIS._isLMS(t, SA[i])) - { - SA[n1++] = SA[i]; - } - } - - // find the lexicographic names of all substrings - for (var i = n1; i < n; i++) - { - SA[i]=-1; // init the name array buffer - } - var name = 0; - var prev = -1; - for (i = 0; i < n1; i++) - { - var pos = SA[i]; - var diff = false; - for (var d = 0; d < n; d++) - { - if (prev == -1 || !s.compare(pos + d, prev + d) || t.get(pos + d) != t.get(prev + d)) - { - diff = true; - break; - } - else if (d > 0 && (SAIS._isLMS(t, pos+d) || SAIS._isLMS(t, prev + d))) - { - break; - } - } - if (diff) - { - name++; - prev = pos; - } - pos = (pos % 2 == 0) ? pos / 2 : (pos - 1) / 2; - SA[n1 + pos] = name - 1; - } - for (var i = n - 1, j = n - 1; i >= n1; i--) - { - if (SA[i] >= 0) - { - SA[j--] = SA[i]; - } - } - - // stage 2: solve the reduced problem - // recurse if names are not yet unique - var SA1 = SA; - var s1 = new OArray(SA, n - n1); - - if (name < n1) - { - SAIS._make(s1, SA1, n1, name - 1); - } - else - { - // generate the suffix array of s1 directly - for (i = 0; i < n1; i++) - { - SA1[s1.get(i)] = i; - } - } - - // stage 3: induce the result for the original problem - - bkt = [] : int[]; - bkt.length = K + 1; - // put all left-most S characters into their buckets - SAIS._getBuckets(s, bkt, n, K, true); // find ends of buckets - for (i = 1, j = 0; i < n; i++) - { - if (SAIS._isLMS(t, i)) - { - s1.set(j++, i); // get p1 - } - } - for (i = 0; i < n1; i++) - { - SA1[i] = s1.get(SA1[i]); // get index in s - } - for (i = n1; i < n; i++) - { - SA[i] = -1; // init SA[n1..n-1] - } - for (i = n1 - 1; i >= 0; i--) - { - j = SA[i]; - SA[i] = -1; - SA[--bkt[s.get(j)]] = j; - } - SAIS._induceSAl(t, SA, s, bkt, n, K, false); - SAIS._induceSAs(t, SA, s, bkt, n, K, true); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/sax.jsx b/web/server/h2o/libh2o/misc/oktavia/src/sax.jsx deleted file mode 100644 index d34cb8253..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/src/sax.jsx +++ /dev/null @@ -1,1356 +0,0 @@ -// When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns. -// When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)), -// since that's the earliest that a buffer overrun could occur. This way, checks are -// as rare as required, but as often as necessary to ensure never crossing this bound. -// Furthermore, buffers are only tested at most once per write(), so passing a very -// large string into write() might have undesirable effects, but this is manageable by -// the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme -// edge case, result in creating at most one complete copy of the string passed in. -// Set to Infinity to have unlimited buffers. - - -class Tag -{ - var name : string; - var attributes : Map.; - var isSelfClosing : boolean; - function constructor (name : string) - { - this.name = name; - this.attributes = {} : Map.; - this.isSelfClosing = false; - } -} - -class _Common -{ - static const buffers = [ - "comment", "sgmlDecl", "textNode", "tagName", "doctype", - "procInstName", "procInstBody", "entity", "attribName", - "attribValue", "cdata", "script" - ]; - - static const EVENTS = // for discoverability. - [ "text", - "processinginstruction", - "sgmldeclaration", - "doctype", - "comment", - "attribute", - "opentag", - "closetag", - "opencdata", - "cdata", - "clo_State.CDATA", - "error", - "end", - "ready", - "script", - "opennamespace", - "closenamespace" - ]; - - static const MAX_BUFFER_LENGTH = 64 * 1024; -} - -class _State -{ - static const BEGIN = 1; - static const TEXT = 2; // general stuff - static const TEXT_ENTITY = 3; // & and such. - static const OPEN_WAKA = 4; // < - static const SGML_DECL = 5; // - static const SCRIPT = 33; //