diff options
Diffstat (limited to 'web/server/h2o/libh2o/misc/oktavia/src')
39 files changed, 27007 insertions, 0 deletions
diff --git a/web/server/h2o/libh2o/misc/oktavia/src/binary-util.jsx b/web/server/h2o/libh2o/misc/oktavia/src/binary-util.jsx new file mode 100644 index 00000000..06d5e758 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/binary-util.jsx @@ -0,0 +1,597 @@ +class Binary +{ + static function dump32bitNumber (num : number) : string + { + var result = [String.fromCharCode(Math.floor(num / 65536))]; + result.push(String.fromCharCode(num % 65536)); + return result.join(""); + } + + static function load32bitNumber (buffer : string, offset : int) : number + { + var result = buffer.charCodeAt(offset) * 65536 + buffer.charCodeAt(offset + 1); + return result; + } + + static function dump16bitNumber (num : int) : string + { + return String.fromCharCode(num % 65536); + } + + static function load16bitNumber (buffer : string, offset : int) : int + { + return buffer.charCodeAt(offset); + } + + static function dumpString (str : string) : string + { + return Binary.dumpString(str, null); + } + + static function dumpString (str : string, report : Nullable.<CompressionReport>) : string + { + if (str.length > 32768) + { + str = str.slice(0, 32768); + } + var length = str.length; + var compress = true; + var charCodes = [] : int[]; + for (var i = 0; i < length; i++) + { + var charCode = str.charCodeAt(i); + if (charCode > 255) + { + compress = false; + break; + } + charCodes.push(charCode); + } + if (compress) + { + var result = [Binary.dump16bitNumber(length + 32768)]; + for (var i = 0; i < length; i += 2) + { + var bytes = charCodes[i]; + if (i != length - 1) + { + bytes += charCodes[i + 1] << 8; + } + result.push(Binary.dump16bitNumber(bytes)); + } + if (report) + { + report.add(length, Math.ceil(length / 2)); + } + } + else + { + var result = [Binary.dump16bitNumber(length), str]; + if (report) + { + report.add(length, length); + } + } + return result.join(''); + } + + static function loadString (buffer : string, offset : int) : LoadedStringResult + { + return new LoadedStringResult(buffer, offset); + } + + static function dumpStringList (strList : string[]) : string + { + return Binary.dumpStringList(strList, null); + } + + static function dumpStringList (strList : string[], report : Nullable.<CompressionReport>) : string + { + var result = [Binary.dump32bitNumber(strList.length)]; + for (var i = 0; i < strList.length; i++) + { + result.push(Binary.dumpString(strList[i], report)); + } + return result.join(''); + } + + static function loadStringList (buffer : string, offset : int) : LoadedStringListResult + { + return new LoadedStringListResult(buffer, offset); + } + + static function dumpStringListMap (strMap : Map.<string[]>) : string + { + return Binary.dumpStringListMap(strMap, null); + } + + static function dumpStringListMap (strMap : Map.<string[]>, report : Nullable.<CompressionReport>) : string + { + var result = [] : string[]; + var counter = 0; + for (var key in strMap) + { + result.push(Binary.dumpString(key, report)); + result.push(Binary.dumpStringList(strMap[key], report)); + counter++; + } + return Binary.dump32bitNumber(counter) + result.join(''); + } + + static function loadStringListMap (buffer : string, offset : int) : LoadedStringListMapResult + { + return new LoadedStringListMapResult(buffer, offset); + } + + static function dump32bitNumberList (array : number[]) : string + { + return Binary.dump32bitNumberList(array, null); + } + + static function dump32bitNumberList (array : number[], report : Nullable.<CompressionReport>) : string + { + var result = [Binary.dump32bitNumber(array.length)] : string[]; + var index = 0; + var inputLength = array.length; + while (index < inputLength) + { + if (array[index] == 0) + { + var length = Binary._countZero(array, index); + result.push(Binary._zeroBlock(length)); + index += length; + } + else if (Binary._shouldZebraCode(array, index)) + { + result.push(Binary._createZebraCode(array, index)); + index = Math.min(array.length, index + 15); + } + else + { + var length = Binary._searchDoubleZero(array, index); + result.push(Binary._nonZeroBlock(array, index, length)); + if (length == 0) + { + throw new Error(''); + } + index += length; + } + } + var resultString = result.join(''); + if (report) + { + report.add(array.length * 2 + 2, resultString.length); + } + return resultString; + } + + static function load32bitNumberList (buffer :string, offset : int) : LoadedNumberListResult + { + return new LoadedNumberListResult(buffer, offset); + } + + static function _countZero (array : number[], offset : int) : int + { + for (var i = offset; i < array.length; i++) + { + if (array[i] != 0) + { + return i - offset; + } + } + return array.length - offset; + } + + static function _zeroBlock (length : int) : string + { + var result = [] : string[]; + while (length > 0) + { + if (length > 16384) + { + result.push(Binary.dump16bitNumber(16384 - 1)); + length -= 16384; + } + else + { + result.push(Binary.dump16bitNumber(length - 1)); + length = 0; + } + } + return result.join(''); + } + + static function _shouldZebraCode(array : number[], offset : int) : boolean + { + if (array.length - offset < 16) + { + return true; + } + var change = 0; + var isLastZero = false; + for (var i = offset; i < offset + 15; i++) + { + if (array[i] == 0) + { + if (!isLastZero) + { + isLastZero = true; + change++; + } + } + else + { + if (isLastZero) + { + isLastZero = false; + change++; + } + } + } + return change > 2; + } + + static function _searchDoubleZero (array : number[], offset : int) : int + { + var isLastZero = false; + for (var i = offset; i < array.length; i++) + { + if (array[i] == 0) + { + if (isLastZero) + { + return i - offset - 1; + } + isLastZero = true; + } + else + { + isLastZero = false; + } + } + return array.length - offset; + } + + static function _nonZeroBlock (array : number[], offset : int, length : int) : string + { + var result = [] : string[]; + while (length > 0) + { + var blockLength : int; + if (length > 16384) + { + blockLength = 16384; + length -= 16384; + } + else + { + blockLength = length; + length = 0; + } + result.push(Binary.dump16bitNumber((blockLength - 1) + 0x4000)); + for (var i = offset; i < offset + blockLength; i++) + { + result.push(Binary.dump32bitNumber(array[i])); + } + offset += blockLength; + } + return result.join(''); + } + + static function _createZebraCode (array : number[], offset : int) : string + { + var last = Math.min(offset + 15, array.length); + var code = 0x8000; + var result = [] : string[]; + for (var i = offset; i < last; i++) + { + if (array[i] != 0) + { + result.push(Binary.dump32bitNumber(array[i])); + code = code + (0x1 << (i - offset)); + } + } + return String.fromCharCode(code) + result.join(''); + } + + /* These base64 functions are based on http://www.onicos.com/staff/iz/amuse/javascript/expert/base64.txt + * original license: + * Copyright (C) 1999 Masanao Izumo <iz@onicos.co.jp> + * Version: 1.0 + * LastModified: Dec 25 1999 + * This library is free. You can redistribute it and/or modify it. + */ + static const _base64EncodeChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + + static function base64encode (str : string) : string + { + var out = [] : string[]; + var source = [] : int[]; + for (var i = 0; i < str.length; i++) + { + var code = str.charCodeAt(i); + source.push(code & 0x00ff, code >>> 8); + } + var len = str.length * 2; + var i = 0; + while (i < len) + { + var c1 = source[i++] & 0xff; + if (i == len) + { + out.push(Binary._base64EncodeChars.charAt(c1 >> 2)); + out.push(Binary._base64EncodeChars.charAt((c1 & 0x3) << 4)); + out.push("=="); + break; + } + var c2 = source[i++]; + if (i == len) + { + out.push(Binary._base64EncodeChars.charAt(c1 >> 2)); + out.push(Binary._base64EncodeChars.charAt(((c1 & 0x3)<< 4) | ((c2 & 0xF0) >> 4))); + out.push(Binary._base64EncodeChars.charAt((c2 & 0xF) << 2)); + out.push("="); + break; + } + var c3 = source[i++]; + out.push(Binary._base64EncodeChars.charAt(c1 >> 2)); + out.push(Binary._base64EncodeChars.charAt(((c1 & 0x3)<< 4) | ((c2 & 0xF0) >> 4))); + out.push(Binary._base64EncodeChars.charAt(((c2 & 0xF) << 2) | ((c3 & 0xC0) >>6))); + out.push(Binary._base64EncodeChars.charAt(c3 & 0x3F)); + } + return out.join(''); + } + + static const _base64DecodeChars = [ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1]; + + static function _mergeCharCode (source : int[]) : string + { + var result = [] : string[]; + for (var i = 0; i < source.length; i += 2) + { + result.push(String.fromCharCode(source[i] + (source[i + 1] << 8))); + } + return result.join(''); + } + + static function base64decode (str : string) : string + { + var len = str.length; + var i = 0; + var out = [] : int[]; + + while (i < len) + { + var c1, c2, c3, c4 : int; + + /* c1 */ + do { + c1 = Binary._base64DecodeChars[str.charCodeAt(i++) & 0xff]; + } while(i < len && c1 == -1); + if (c1 == -1) + { + break; + } + /* c2 */ + do { + c2 = Binary._base64DecodeChars[str.charCodeAt(i++) & 0xff]; + } while(i < len && c2 == -1); + if (c2 == -1) + { + break; + } + out.push((c1 << 2) | ((c2 & 0x30) >> 4)); + /* c3 */ + do { + c3 = str.charCodeAt(i++) & 0xff; + if (c3 == 61) + { + return Binary._mergeCharCode(out); + } + c3 = Binary._base64DecodeChars[c3]; + } while(i < len && c3 == -1); + if (c3 == -1) + { + break; + } + out.push(((c2 & 0XF) << 4) | ((c3 & 0x3C) >> 2)); + + /* c4 */ + do { + c4 = str.charCodeAt(i++) & 0xff; + if (c4 == 61) + { + return Binary._mergeCharCode(out); + } + c4 = Binary._base64DecodeChars[c4]; + } while(i < len && c4 == -1); + if (c4 == -1) + { + break; + } + out.push(((c3 & 0x03) << 6) | c4); + } + return Binary._mergeCharCode(out); + } +} + +class LoadedStringResult +{ + var result : string; + var offset : int; + + function constructor (data : string, offset : int) + { + var strLength = Binary.load16bitNumber(data, offset++); + if (strLength > 32767) + { + strLength = strLength - 32768; + var bytes = [] : string[]; + + for (var i = 0; i < strLength; i += 2) + { + var code = data.charCodeAt(offset); + bytes.push(String.fromCharCode(code & 0x00ff)); + if (i != strLength - 1) + { + bytes.push(String.fromCharCode(code >>> 8)); + } + offset++; + } + this.result = bytes.join(''); + this.offset = offset; + } + else + { + this.result = data.slice(offset, offset + strLength); + this.offset = offset + strLength; + } + } +} + +class LoadedStringListResult +{ + var result : string[]; + var offset : int; + + function constructor (data : string, offset : int) + { + this.result = [] : string[]; + + var length = Binary.load32bitNumber(data, offset); + offset += 2; + for (var i = 0; i < length; i++) + { + var strLength = Binary.load16bitNumber(data, offset++); + var resultStr : string; + if (strLength > 32767) + { + var strLength = strLength - 32768; + var bytes = [] : string[]; + for (var j = 0; j < strLength; j += 2) + { + var code = data.charCodeAt(offset); + bytes.push(String.fromCharCode(code & 0x00ff)); + if (j != strLength - 1) + { + bytes.push(String.fromCharCode(code >>> 8)); + } + offset++; + } + resultStr = bytes.join(''); + } + else + { + resultStr = data.slice(offset, offset + strLength); + offset = offset + strLength; + } + this.result.push(resultStr); + } + this.offset = offset; + } +} + +class LoadedStringListMapResult +{ + var result : Map.<string[]>; + var offset : int; + + function constructor (data : string, offset : int) + { + this.result = {} : Map.<string[]>; + + var length = Binary.load32bitNumber(data, offset); + offset += 2; + for (var i = 0; i < length; i++) + { + var keyResult = Binary.loadString(data, offset); + var valueResult = Binary.loadStringList(data, keyResult.offset); + this.result[keyResult.result] = valueResult.result; + offset = valueResult.offset; + } + this.offset = offset; + } +} + +class LoadedNumberListResult +{ + var result : number[]; + var offset : int; + + function constructor(data : string, offset : int) + { + var resultLength = Binary.load32bitNumber(data, offset); + var originalOffset = offset; + offset += 2; + var result = [] : number[]; + while (result.length < resultLength) + { + var tag = data.charCodeAt(offset++); + if ((tag >>> 15) == 1) // zebra + { + var length = Math.min(resultLength - result.length, 15); + for (var i = 0; i < length; i++) + { + if ((tag >>> i) & 0x1) + { + result.push(Binary.load32bitNumber(data, offset)); + offset += 2; + } + else + { + result.push(0); + } + } + } + else if ((tag >>> 14) == 1) // non-zero + { + var length = tag - 0x4000 + 1; + for (var i = 0; i < length; i++) + { + result.push(Binary.load32bitNumber(data, offset)); + offset += 2; + } + } + else // zero + { + var length = tag + 1; + for (var i = 0; i < length; i++) + { + result.push(0); + } + } + } + this.result = result; + this.offset = offset; + } +} + +class CompressionReport +{ + var source : int; + var result : int; + function constructor () + { + this.source = 0; + this.result = 0; + } + + function add (source : int, result : int) : void + { + this.source += source; + this.result += result; + } + + function rate () : int + { + return Math.round(this.result * 100.0 / this.source); + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/bit-vector.jsx b/web/server/h2o/libh2o/misc/oktavia/src/bit-vector.jsx new file mode 100644 index 00000000..b366e43a --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/bit-vector.jsx @@ -0,0 +1,295 @@ +/** + * This is a JSX version of shellinford library: + * https://code.google.com/p/shellinford/ + * + * License: http://shibu.mit-license.org/ + */ + +import "binary-util.jsx"; + +class BitVector +{ + static const SMALL_BLOCK_SIZE : int = 32; + static const LARGE_BLOCK_SIZE : int = 256; + static const BLOCK_RATE : int = 8; + + var _v : number[]; + var _r : number[]; + var _size : int; + var _size1 : int; + + function constructor () + { + this._r = [] : number[]; + this._v = [] : number[]; + this.clear(); + } + + function build () : void + { + this._size1 = 0; + for (var i = 0; i < this._v.length; i++) + { + if (i % BitVector.BLOCK_RATE == 0) + { + this._r.push(this.size(true)); + } + this._size1 += this._rank32(this._v[i], BitVector.SMALL_BLOCK_SIZE, true); + } + } + + function clear () : void + { + this._v.length = 0; + this._r.length = 0; + this._size = 0; + this._size1 = 0; + } + + function size () : int + { + return this._size; + } + + function size (b : boolean) : int + { + return b ? (this._size1) : (this._size - this._size1); + } + + function set (value : int) : void + { + this.set(value, true); + } + + function set (value : int, flag : boolean) : void + { + if (value >= this.size()) + { + this._size = value + 1; + } + var q : int = value / BitVector.SMALL_BLOCK_SIZE; + var r : int = value % BitVector.SMALL_BLOCK_SIZE; + while (q >= this._v.length) + { + this._v.push(0); + } + var m : int = 0x1 << r; + if (flag) + { + this._v[q] |= m; + } + else + { + this._v[q] &= ~m; + } + } + + function get (value : int) : boolean + { + if (value >= this.size()) + { + throw new Error("BitVector.get() : range error"); + } + var q : int = value / BitVector.SMALL_BLOCK_SIZE; + var r : int = value % BitVector.SMALL_BLOCK_SIZE; + var m : int = 0x1 << r; + return (this._v[q] & m) as boolean; + } + + function rank (i : int) : int + { + return this.rank(i, true); + } + + function rank (i : int, b : boolean) : int + { + if (i > this.size()) + { + throw new Error("BitVector.rank() : range error"); + } + if (i == 0) + { + return 0; + } + i--; + var q_large : int = Math.floor(i / BitVector.LARGE_BLOCK_SIZE); + var q_small : int = Math.floor(i / BitVector.SMALL_BLOCK_SIZE); + var r : int = Math.floor(i % BitVector.SMALL_BLOCK_SIZE); + var rank : int = this._r[q_large]; + if (!b) + { + rank = q_large * BitVector.LARGE_BLOCK_SIZE - rank; + } + var begin = q_large * BitVector.BLOCK_RATE; + for (var j = begin; j < q_small; j++) + { + rank += this._rank32(this._v[j], BitVector.SMALL_BLOCK_SIZE, b); + } + rank += this._rank32(this._v[q_small], r + 1, b); + return rank; + } + + function select(i : int) : int + { + return this.select(i, true); + } + + function select(i : int, b : boolean) : int + { + if (i >= this.size(b)) + { + throw new Error("BitVector.select() : range error"); + } + + var left = 0; + var right = this._r.length; + while (left < right) + { + var pivot = Math.floor((left + right) / 2); + var rank = this._r[pivot]; + if (!b) + { + rank = pivot * BitVector.LARGE_BLOCK_SIZE - rank; + } + if (i < rank) + { + right = pivot; + } + else + { + left = pivot + 1; + } + } + right--; + + if (b) + { + i -= this._r[right]; + } + else + { + i -= right * BitVector.LARGE_BLOCK_SIZE - this._r[right]; + } + var j = right * BitVector.BLOCK_RATE; + while (1) + { + var rank = this._rank32(this._v[j], BitVector.SMALL_BLOCK_SIZE, b); + if (i < rank) + { + break; + } + j++; + i -= rank; + } + return j * BitVector.SMALL_BLOCK_SIZE + this._select32(this._v[j], i, b); + } + + function _rank32 (x : int, i : int, b : boolean) : int + { + if (!b) + { + x = ~x; + } + x <<= (BitVector.SMALL_BLOCK_SIZE - i); + x = ((x & 0xaaaaaaaa) >>> 1) + + (x & 0x55555555); + x = ((x & 0xcccccccc) >>> 2) + + (x & 0x33333333); + x = ((x & 0xf0f0f0f0) >>> 4) + + (x & 0x0f0f0f0f); + x = ((x & 0xff00ff00) >>> 8) + + (x & 0x00ff00ff); + x = ((x & 0xffff0000) >>> 16) + + (x & 0x0000ffff); + return x; + } + + function _select32(x : int, i : int, b : boolean) : int + { + if (!b) + { + x = ~x; + } + var x1 = ((x & 0xaaaaaaaa) >>> 1) + + (x & 0x55555555); + var x2 = ((x1 & 0xcccccccc) >>> 2) + + (x1 & 0x33333333); + var x3 = ((x2 & 0xf0f0f0f0) >>> 4) + + (x2 & 0x0f0f0f0f); + var x4 = ((x3 & 0xff00ff00) >>> 8) + + (x3 & 0x00ff00ff); + var x5 = ((x4 & 0xffff0000) >>> 16) + + (x4 & 0x0000ffff); + i++; + var pos = 0; + var v5 = x5 & 0xffffffff; + if (i > v5) + { + i -= v5; + pos += 32; + } + var v4 = (x4 >>> pos) & 0x0000ffff; + if (i > v4) + { + i -= v4; + pos += 16; + } + var v3 = (x3 >>> pos) & 0x000000ff; + if (i > v3) + { + i -= v3; + pos += 8; + } + var v2 = (x2 >>> pos) & 0x0000000f; + if (i > v2) + { + i -= v2; + pos += 4; + } + var v1 = (x1 >>> pos) & 0x00000003; + if (i > v1) + { + i -= v1; + pos += 2; + } + var v0 = (x >>> pos) & 0x00000001; + if (i > v0) + { + i -= v0; + pos += 1; + } + return pos; + } + + function dump () : string + { + var contents = [] : string[]; + contents.push(Binary.dump32bitNumber(this._size)); + contents.push(Binary.dump32bitNumberList(this._v)); + return contents.join(''); + } + + function dump (report : CompressionReport) : string + { + var contents = [] : string[]; + contents.push(Binary.dump32bitNumber(this._size)); + report.add(2, 2); + contents.push(Binary.dump32bitNumberList(this._v, report)); + return contents.join(''); + } + + function load (data : string) : int + { + return this.load(data, 0); + } + + function load (data : string, offset : int) : int + { + this.clear(); + this._size = Binary.load32bitNumber(data, offset); + var result = Binary.load32bitNumberList(data, offset + 2); + this._v = result.result; + this.build(); + return result.offset; + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/burrows-wheeler-transform.jsx b/web/server/h2o/libh2o/misc/oktavia/src/burrows-wheeler-transform.jsx new file mode 100644 index 00000000..4bdd72fe --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/burrows-wheeler-transform.jsx @@ -0,0 +1,73 @@ +import "sais.jsx"; + +class BurrowsWheelerTransform +{ + static var END_MARKER = String.fromCharCode(0); + + var _str : string; + var _size : int; + var _head : int; + var _suffixarray : int[]; + + function constructor () + { + this._size = 0; + this._head = 0; + this._suffixarray = [] : int[]; + } + + function size () : int + { + return this._size; + } + + function head () : int + { + return this._head; + } + + function clear () : void + { + this._str = ""; + this._size = 0; + this._head = 0; + this._suffixarray.length = 0; + } + + function build (str : string) : void + { + this._str = str; + this._size = this._str.length; + this._suffixarray = SAIS.make(str); + this._head = this._suffixarray.indexOf(0); + } + + function get (i : int) : string + { + var size = this.size(); + if (i >= size) + { + throw new Error("BurrowsWheelerTransform.get() : range error"); + } + var index = (this._suffixarray[i] + size - 1) % size; + return this._str.charAt(index); + } + + function get () : string + { + var str = [] : string []; + var size = this.size(); + for (var i = 0; i < size; i++) + { + str.push(this.get(i)); + } + return str.join(""); + } + + function get (replace : string) : string + { + var result = this.get(); + return result.replace(BurrowsWheelerTransform.END_MARKER, replace); + } +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/csvparser.jsx b/web/server/h2o/libh2o/misc/oktavia/src/csvparser.jsx new file mode 100644 index 00000000..64ed6ac8 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/csvparser.jsx @@ -0,0 +1,22 @@ +import "oktavia.jsx"; +import "stemmer/stemmer.jsx"; + + +class CSVParser +{ + var oktavia : Oktavia; + var root : string; + var stemmer : Nullable.<Stemmer>; + + function constructor (root : string, stemmer : Stemmer) + { + this.oktavia = new Oktavia(); + this.root = root; + this.stemmer = stemmer; + } + + function parse (filepath : string) : void + { + log (filepath); + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/fm-index.jsx b/web/server/h2o/libh2o/misc/oktavia/src/fm-index.jsx new file mode 100644 index 00000000..502b4fcf --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/fm-index.jsx @@ -0,0 +1,323 @@ +/** + * This is a JSX version of shellinford library: + * https://code.google.com/p/shellinford/ + * + * License: http://shibu.mit-license.org/ + */ + +import "wavelet-matrix.jsx"; +import "bit-vector.jsx"; +import "burrows-wheeler-transform.jsx"; +import "binary-util.jsx"; +import "console.jsx"; + + +class FMIndex +{ + var _substr : string; + var _ddic : int; + var _ssize : int; + var _head : int; + var _sv : WaveletMatrix; + var _posdic : int[]; + var _idic : int[]; + var _rlt : int[]; + + function constructor () + { + this._ddic = 0, + this._head = 0; + this._substr = ""; + this._sv = new WaveletMatrix(); + this._posdic = [] : int[]; + this._idic = [] : int[]; + this._rlt = [] : int[]; + this._rlt.length = 65536; + } + + function clear () : void + { + this._sv.clear(); + this._posdic.length = 0; + this._idic.length = 0; + this._ddic = 0; + this._head = 0; + this._substr = ""; + } + + function size () : int + { + return this._sv.size(); + } + + function contentSize () : int + { + return this._substr.length; + } + + function getRows (key : string) : int + { + var pos = [] : int[]; + return this.getRows(key, pos); + } + function getRows (key : string, pos : int[]) : int + { + var i = key.length - 1; + var code = key.charCodeAt(i); + var first = this._rlt[code] + 1; + var last = this._rlt[code + 1]; + while (first <= last) + { + if (i == 0) + { + pos[0] = --first; + pos[1] = --last; + return (last - first + 1); + } + i--; + var c = key.charCodeAt(i); + first = this._rlt[c] + this._sv.rank(first - 1, c) + 1; + last = this._rlt[c] + this._sv.rank(last, c); + } + return 0; + } + + function getPosition (i : int) : int + { + if (i >= this.size()) + { + throw new Error("FMIndex.getPosition() : range error"); + } + var pos = 0; + while (i != this._head) + { + if ((i % this._ddic) == 0) + { + pos += (this._posdic[i / this._ddic] + 1); + break; + } + var c = this._sv.get(i); + i = this._rlt[c] + this._sv.rank(i, c); //LF + pos++; + } + return pos % this.size(); + } + + function getSubstring (pos : int, len : int) : string + { + if (pos >= this.size()) + { + throw new Error("FMIndex.getSubstring() : range error"); + } + var pos_end = Math.min(pos + len, this.size()); + var pos_tmp = this.size() - 1; + var i = this._head; + var pos_idic = Math.floor((pos_end + this._ddic - 2) / this._ddic); + if (pos_idic < this._idic.length) + { + pos_tmp = pos_idic * this._ddic; + i = this._idic[pos_idic]; + } + + var substr = ""; + while (pos_tmp >= pos) + { + var c = this._sv.get(i); + i = this._rlt[c] + this._sv.rank(i, c); //LF + if (pos_tmp < pos_end) + { + substr = String.fromCharCode(c) + substr; + } + if (pos_tmp == 0) + { + break; + } + pos_tmp--; + } + return substr; + } + + function build () : void + { + this.build(String.fromCharCode(0), 65535, 20, false); + } + + function build(end_marker : string, ddic : int, verbose : boolean) : void + { + this.build(end_marker, 65535, ddic, verbose); + } + + function build(end_marker : string, maxChar : int, ddic : int, verbose : boolean) : void + { + if (verbose) + { + console.time("building burrows-wheeler transform"); + } + this._substr += end_marker; + var b = new BurrowsWheelerTransform(); + b.build(this._substr); + var s = b.get(); + this._ssize = s.length; + this._head = b.head(); + b.clear(); + this._substr = ""; + if (verbose) + { + console.timeEnd("building burrows-wheeler transform"); + } + if (verbose) + { + console.time("building wavelet matrix"); + } + this._sv.setMaxCharCode(maxChar); + if (verbose) + { + console.log(" maxCharCode: ", maxChar); + console.log(" bitSize: ", this._sv.bitsize()); + } + this._sv.build(s); + if (verbose) + { + console.timeEnd("building wavelet matrix"); + } + + if (verbose) + { + console.time("caching rank less than"); + } + for (var c = 0; c < maxChar; c++) + { + this._rlt[c] = this._sv.rank_less_than(this._sv.size(), c); + } + if (verbose) + { + console.timeEnd("caching rank less than"); + } + this._ddic = ddic; + if (verbose) + { + console.time("building dictionaries"); + } + this._buildDictionaries(); + if (verbose) + { + console.timeEnd("building dictionaries"); + console.log(''); + } + } + + function _buildDictionaries () : void + { + for (var i = 0; i < (this._ssize / this._ddic + 1); i++) + { + this._posdic.push(0); + this._idic.push(0); + } + var i = this._head; + var pos = this.size() - 1; + do { + if ((i % this._ddic) == 0) + { + this._posdic[Math.floor(i / this._ddic)] = pos; + } + if ((pos % this._ddic) == 0) + { + this._idic[Math.floor(pos / this._ddic)] = i; + } + var c = this._sv.get(i); + i = this._rlt[c] + this._sv.rank(i, c); //LF + pos--; + } while (i != this._head); + } + + function push (doc : string) : void + { + if (doc.length <= 0) + { + throw new Error("FMIndex::push(): empty string"); + } + this._substr += doc; + } + + function search (keyword : string) : int[] + { + var result_map = {} : Map.<int>; + var result = [] : int[]; + var position = [] : int[]; + var rows = this.getRows(keyword, position); + if (rows > 0) + { + var first = position[0]; + var last = position[1]; + for (var i = first; i <= last; i++) + { + result.push(this.getPosition(i)); + } + } + return result; + } + + function dump () : string + { + return this.dump(false); + } + + function dump (verbose : boolean) : string + { + var contents = [] : string[]; + var report = new CompressionReport(); + contents.push(Binary.dump32bitNumber(this._ddic)); + contents.push(Binary.dump32bitNumber(this._ssize)); + contents.push(Binary.dump32bitNumber(this._head)); + report.add(6, 6); + contents.push(this._sv.dump(report)); + if (verbose) + { + console.log("Serializing FM-index"); + console.log(' Wavelet Matrix: ' + (contents[3].length * 2) as string + ' bytes (' + report.rate() as string + '%)'); + } + contents.push(Binary.dump32bitNumber(this._posdic.length)); + for (var i in this._posdic) + { + contents.push(Binary.dump32bitNumber(this._posdic[i])); + } + for (var i in this._idic) + { + contents.push(Binary.dump32bitNumber(this._idic[i])); + } + if (verbose) + { + console.log(' Dictionary Cache: ' + (this._idic.length * 16) as string + ' bytes'); + } + return contents.join(""); + } + + function load (data : string) : int + { + return this.load(data, 0); + } + + function load (data : string, offset : int) : int + { + this._ddic = Binary.load32bitNumber(data, offset); + this._ssize = Binary.load32bitNumber(data, offset + 2); + this._head = Binary.load32bitNumber(data, offset + 4); + offset = this._sv.load(data, offset + 6); + var maxChar = Math.pow(2, this._sv.bitsize()); + for (var c = 0; c < maxChar; c++) + { + this._rlt[c] = this._sv.rank_less_than(this._sv.size(), c); + } + var size = Binary.load32bitNumber(data, offset); + offset += 2; + for (var i = 0; i < size; i++, offset += 2) + { + this._posdic.push(Binary.load32bitNumber(data, offset)); + } + for (var i = 0; i < size; i++, offset += 2) + { + this._idic.push(Binary.load32bitNumber(data, offset)); + } + return offset; + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/getopt.jsx b/web/server/h2o/libh2o/misc/oktavia/src/getopt.jsx new file mode 100644 index 00000000..56db655a --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/getopt.jsx @@ -0,0 +1,356 @@ +/* + * getopt.js: node.js implementation of POSIX getopt() (and then some) + * + * Copyright 2011 David Pacheco. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import "console.jsx"; + +class CommandOption +{ + var option : string; + var optarg : Nullable.<string>; + var error : boolean; + + function constructor (option : string) + { + this.option = option; + this.optarg = null; + this.error = false; + } + + function constructor (option : string, optarg : string) + { + this.option = option; + this.optarg = optarg; + this.error = false; + } + + function constructor (option : string, optarg : string, error : boolean) + { + this.option = option; + this.optarg = optarg; + this.error = error; + } +} + + +/* + * The BasicParser is our primary interface to the outside world. The + * documentation for this object and its public methods is contained in + * the included README.md. + */ +class BasicParser +{ + var _argv : string[]; + var _options : Map.<boolean>; + var _aliases : Map.<string>; + var _optind : int; + var _subind : int; + var _silent : boolean; + var _extraoptions : boolean; + + function constructor (optstring : string, argv : string[]) + { + this._argv = argv; + this._options = {} : Map.<boolean>; + this._aliases = {} : Map.<string>; + this._optind = 0; + this._subind = 0; + this._extraoptions = false; + + this._parseOptstr(optstring); + } + + static function _makeError (msg : string) : Error + { + return (new Error('getopt: ' + msg)); + } + + /* + * Parse the option string and update the following fields: + * + * _silent Whether to log errors to stderr. Silent mode is + * indicated by a leading ':' in the option string. + * + * _options Maps valid single-letter-options to booleans indicating + * whether each option is required. + * + * _aliases Maps valid long options to the corresponding + * single-letter short option. + */ + function _parseOptstr (optstr : string) : void + { + var i = 0; + + if (optstr.length > 0 && optstr.slice(0, 1) == ':') + { + this._silent = true; + i++; + } + else + { + this._silent = false; + } + while (i < optstr.length) + { + var chr = optstr.slice(i, i + 1); + var arg = false; + + if (!/^[\w\d]$/.test(chr)) + { + throw new Error('invalid optstring: only alphanumeric ' + + 'characters may be used as options: ' + chr); + } + + if (i + 1 < optstr.length && optstr.slice(i + 1, i + 2) == ':') + { + arg = true; + i++; + } + + this._options[chr] = arg; + + while (i + 1 < optstr.length && optstr.slice(i + 1, i + 2) == '(') + { + i++; + var cp = optstr.indexOf(')', i + 1); + if (cp == -1) + { + throw new Error('invalid optstring: missing ' + + '")" to match "(" at char ' + i as string); + } + var alias = optstr.substring(i + 1, cp); + this._aliases[alias] = chr; + i = cp; + } + i++; + } + } + + function optind () : int + { + return this._optind; + } + + /* + * For documentation on what getopt() does, see README.md. The following + * implementation invariants are maintained by getopt() and its helper methods: + * + * this._optind Refers to the element of _argv that contains + * the next argument to be processed. This may + * exceed _argv, in which case the end of input + * has been reached. + * + * this._subind Refers to the character inside + * this._options[this._optind] which begins + * the next option to be processed. This may never + * exceed the length of _argv[_optind], so + * when incrementing this value we must always + * check if we should instead increment optind and + * reset subind to 0. + * + * That is, when any of these functions is entered, the above indices' values + * are as described above. getopt() itself and getoptArgument() may both be + * called at the end of the input, so they check whether optind exceeds + * argv.length. getoptShort() and getoptLong() are called only when the indices + * already point to a valid short or long option, respectively. + * + * getopt() processes the next option as follows: + * + * o If _optind > _argv.length, then we already parsed all arguments. + * + * o If _subind == 0, then we're looking at the start of an argument: + * + * o Check for special cases like '-', '--', and non-option arguments. + * If present, update the indices and return the appropriate value. + * + * o Check for a long-form option (beginning with '--'). If present, + * delegate to getoptLong() and return the result. + * + * o Otherwise, advance subind past the argument's leading '-' and + * continue as though _subind != 0 (since that's now the case). + * + * o Delegate to getoptShort() and return the result. + */ + function getopt () : Nullable.<CommandOption> + { + if (this._optind >= this._argv.length) + { + /* end of input */ + return null; + } + + var arg = this._argv[this._optind]; + if (this._extraoptions) + { + this._optind++; + return new CommandOption(arg); + } + + if (this._subind == 0) + { + if (arg == '-' || arg == '') + { + return null; + } + + if (arg.charAt(0) != '-') + { + this._extraoptions = true; + this._optind++; + return new CommandOption(arg); + } + + if (arg == '--') + { + this._optind++; + this._subind = 0; + return null; + } + + if (arg.slice(1, 2) == '-') + { + return this._getoptLong(); + } + this._subind++; + } + + return this._getoptShort(); + } + + /* + * Implements getopt() for the case where optind/subind point to a short option. + */ + function _getoptShort () : CommandOption + { + var arg = this._argv[this._optind]; + var chr = arg.slice(this._subind, this._subind + 1); + + if (++this._subind >= arg.length) + { + this._optind++; + this._subind = 0; + } + + if (!(chr in this._options)) + { + return this._errInvalidOption(chr); + } + + if (!this._options[chr]) + { + return new CommandOption(chr); + } + return this._getoptArgument(chr); + } + + /* + * Implements getopt() for the case where optind/subind point to a long option. + */ + function _getoptLong () : CommandOption + { + var arg = this._argv[this._optind]; + var eq = arg.indexOf('='); + var alias = arg.substring(2, eq == -1 ? arg.length : eq); + if (!(alias in this._aliases)) + { + return this._errInvalidOption(alias); + } + + var chr = this._aliases[alias]; + if (!this._options[chr]) + { + if (eq != -1) + { + return this._errExtraArg(alias); + } + this._optind++; /* eat this argument */ + return new CommandOption(chr); + } + + /* + * Advance optind/subind for the argument value and retrieve it. + */ + if (eq == -1) + { + this._optind++; + } + else + { + this._subind = eq + 1; + } + return this._getoptArgument(chr); + } + + /* + * For the given option letter 'chr' that takes an argument, assumes that + * optind/subind point to the argument (or denote the end of input) and return + * the appropriate getopt() return value for this option and argument (or return + * the appropriate error). + */ + function _getoptArgument (chr : string) : CommandOption + { + if (this._optind >= this._argv.length) + { + return this._errMissingArg(chr); + } + + var arg = this._argv[this._optind].substring(this._subind); + this._optind++; + this._subind = 0; + return new CommandOption(chr, arg); + } + + function _errMissingArg (chr : string) : CommandOption + { + if (this._silent) + { + return new CommandOption(':', chr); + } + console.error('option requires an argument -- ' + chr + '\n'); + return new CommandOption('?', chr, true); + } + + function _errInvalidOption (chr : string) : CommandOption + { + if (!this._silent) + { + console.error('illegal option -- ' + chr + '\n'); + } + return new CommandOption('?', chr, true); + } + + /* + * This error is not specified by POSIX, but neither is the notion of specifying + * long option arguments using "=" in the same argv-argument, but it's common + * practice and pretty convenient. + */ + function _errExtraArg (chr : string) : CommandOption + { + if (!this._silent) + { + console.error('option expects no argument -- ' + + chr + '\n'); + } + return new CommandOption('?', chr, true); + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/htmlparser.jsx b/web/server/h2o/libh2o/misc/oktavia/src/htmlparser.jsx new file mode 100644 index 00000000..3d0ace78 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/htmlparser.jsx @@ -0,0 +1,280 @@ +import "console.jsx"; +import "js/nodejs.jsx"; +import "oktavia.jsx"; +import "metadata.jsx"; +import "sax.jsx"; +import "stemmer/stemmer.jsx"; + + +class _HTMLHandler extends SAXHandler +{ + var startParse : boolean; + var startTag : string; + var stack : string []; + var oktavia : Oktavia; + var section : Section; + var tag : Block; + var filter : TagFilter; + var filepath : string; + var unit : int; + var currentLink : string; + var currentTitle : string; + var lastId : string; + var waitTitle : boolean; + var sectionCount : int; + var inCode : boolean; + var addText : boolean; + + function constructor (oktavia : Oktavia, filepath : string, unit : int, filter : TagFilter) + { + super(); + this.startParse = false; + this.stack = [] : string[]; + this.oktavia = oktavia; + this.section = this.oktavia.getSection('section'); + this.tag = this.oktavia.getBlock('tag'); + this.unit = unit; + this.filter = filter; + this.filepath = filepath; + this.currentTitle = ''; + this.lastId = ''; + this.waitTitle = false; + this.addText = false; + } + + override function onready () : void + { + this.currentLink = this.filepath; + this.inCode = false; + } + + override function onopentag (tagname : string, attributes : Map.<string>) : void + { + var headingId = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']; + if (this.startParse) + { + this.stack.push(tagname); + if ('id' in attributes) + { + this.lastId = attributes['id']; + } + switch (tagname) + { + case 'h1': + case 'h2': + case 'h3': + case 'h4': + case 'h5': + case 'h6': + if (headingId.indexOf(tagname) < this.unit) + { + if (this.oktavia.contentSize() > 0) + { + this.section.setTail(this.currentTitle + Oktavia.eob + this.currentLink); + } + this.currentLink = this.filepath + '#' + this.lastId; + this.currentTitle = ''; + this.waitTitle = true; + } + this.oktavia.addEndOfBlock(); + this.tag.startBlock(tagname); + break; + case 'pre': + this.tag.startBlock('pre'); + this.oktavia.addEndOfBlock(); + this.inCode = true; + break; + case 'p': + case 'div': + case 'blockquote': + this.oktavia.addEndOfBlock(); + break; + } + } + else + { + if (this.filter.match(tagname, attributes)) + { + this.startParse = true; + this.startTag = tagname; + this.stack.push(tagname); + } + } + if (tagname == 'title') + { + this.waitTitle = true; + this.currentTitle = ''; + } + } + + override function onclosetag (tagname : string) : void + { + if (this.startParse) + { + switch (tagname) + { + case 'h1': + case 'h2': + case 'h3': + case 'h4': + case 'h5': + case 'h6': + this.tag.endBlock(); + if (this.addText) + { + this.oktavia.addWord('\n'); + this.addText = false; + } + this.waitTitle = false; + break; + case 'pre': + this.inCode = false; + this.tag.endBlock(); + if (this.addText) + { + this.oktavia.addWord('\n'); + this.addText = false; + } + break; + case 'div': + case 'p': + case 'blockquote': + if (this.addText) + { + this.oktavia.addWord('\n'); + this.addText = false; + } + break; + } + if (this.stack.length == 0) + { + this.startParse = false; + } + } + if (tagname == 'title') + { + this.waitTitle = false; + } + } + + override function ontext (text : string) : void + { + if (this.startParse) + { + this.oktavia.addWord(text, !this.inCode); + this.addText = true; + } + if (this.waitTitle) + { + this.currentTitle += text; + } + } + + override function onend () : void + { + this.section.setTail(this.currentTitle + Oktavia.eob + this.currentLink); + } +} + +class TagFilter +{ + var tags : string[]; + var ids : string[]; + var classes : string[]; + var tagAndClasses : string[]; + + function constructor (filters : string[]) + { + this.tags = [] : string[]; + this.ids = [] : string[]; + this.classes = [] : string[]; + this.tagAndClasses = [] : string[]; + + for (var i = 0; i < filters.length; i++) + { + var filter = filters[i]; + switch (filter.charAt(0)) + { + case '#': + this.ids.push(filter.slice(1)); + break; + case '.': + this.classes.push(filter.slice(1)); + break; + default: + if (filter.indexOf('.') != -1) + { + this.tags.push(filter); + } + else + { + this.tagAndClasses.push(filter); + } + } + } + } + + function match (tagname : string, attributes : Map.<string>) : boolean + { + var result = false; + if (this.tags.indexOf(tagname) != -1) + { + result = true; + } + else if (attributes['id'] && this.ids.indexOf(attributes['id']) != -1) + { + result = true; + } + else if (attributes['class']) + { + var classname = attributes['class']; + if (this.classes.indexOf(classname) != -1 || + this.tagAndClasses.indexOf(tagname + '.' + classname) != -1) + { + result = true; + } + } + return result; + } +} + +class HTMLParser +{ + var oktavia : Oktavia; + var unit : int; + var root : string; + var prefix : string; + var filter : TagFilter; + + function constructor (unit : int, root : string, prefix : string, filter : string[], stemmer : Nullable.<Stemmer>) + { + this.unit = unit; + this.root = root; + this.prefix = prefix; + this.filter = new TagFilter(filter); + this.oktavia = new Oktavia(); + this.oktavia.addSection('section'); + this.oktavia.addBlock('tag'); + if (stemmer) + { + this.oktavia.setStemmer(stemmer); + } + } + + function parse (filepath : string) : void + { + var relative = this.prefix + node.path.relative(this.root, filepath); + console.log('reading: ' + relative); + var lines = node.fs.readFileSync(filepath, 'utf8'); + var handler = new _HTMLHandler(this.oktavia, relative, this.unit, this.filter); + var parser = new SAXParser(handler); + parser.parse(lines); + } + + function dump (cacheDensity : int, verbose : boolean) : string + { + console.log('\nbuilding...\n'); + this.oktavia.build(cacheDensity, verbose); + return this.oktavia.dump(verbose); + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/metadata.jsx b/web/server/h2o/libh2o/misc/oktavia/src/metadata.jsx new file mode 100644 index 00000000..25c38813 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/metadata.jsx @@ -0,0 +1,498 @@ +import "bit-vector.jsx"; +import "oktavia.jsx"; +import "binary-util.jsx"; +import "search-result.jsx"; + + +class Metadata +{ + var _parent : Oktavia; + var _bitVector : BitVector; + + function constructor (parent : Oktavia) + { + this._parent = parent; + this._bitVector = new BitVector(); + } + + function _size () : int + { + return this._bitVector.rank(this._bitVector.size()); + } + + function getContent (index : int) : string + { + if (index < 0 || this._size() <= index) + { + throw new Error("Section.getContent() : range error " + index as string); + } + var startPosition = 0; + if (index > 0) + { + startPosition = this._bitVector.select(index - 1) + 1; + } + var length = this._bitVector.select(index) - startPosition + 1; + return this._parent._getSubstring(startPosition, length); + } + + function getStartPosition (index : int) : int + { + if (index < 0 || this._size() <= index) + { + throw new Error("Section.getContent() : range error " + index as string); + } + var startPosition = 0; + if (index > 0) + { + startPosition = this._bitVector.select(index - 1) + 1; + } + return startPosition; + } + + function grouping (result : SingleResult, positions : int [], word : string, stemmed : boolean) : void + { + } + + function getInformation(index : int) : string + { + return ''; + } + + function _build () : void + { + this._bitVector.build(); + } + + function _load (name : string, data : string, offset : int) : int + { + offset = this._bitVector.load(data, offset); + this._parent._metadataLabels.push(name); + this._parent._metadatas[name] = this; + return offset; + } + + function _dump () : string + { + return this._bitVector.dump(); + } + + function _dump (report : CompressionReport) : string + { + return this._bitVector.dump(report); + } +} + +class Section extends Metadata +{ + var _names : string[]; + + function constructor (parent : Oktavia) + { + super(parent); + this._names = [] : string[]; + } + + function setTail (name : string) : void + { + this.setTail(name, this._parent.contentSize()); + } + + function setTail (name : string, index : int) : void + { + this._names.push(name); + this._bitVector.set(index - 1); + } + + function size () : int + { + return this._names.length; + } + + function getSectionIndex (position : int) : int + { + if (position < 0 || this._bitVector.size() <= position) + { + throw new Error("Section.getSectionIndex() : range error " + position as string); + } + return this._bitVector.rank(position); + } + + function getName (index : int) : string + { + if (index < 0 || this.size() <= index) + { + throw new Error("Section.getName() : range error"); + } + return this._names[index]; + } + + override function grouping (result : SingleResult, positions : int [], word : string, stemmed : boolean) : void + { + for (var i = 0; i < positions.length; i++) + { + var position = positions[i]; + var index = this.getSectionIndex(position); + var unit = result.getSearchUnit(index); + if (unit.startPosition < 0) + { + unit.startPosition = this.getStartPosition(index); + } + unit.addPosition(word, position - unit.startPosition, stemmed); + } + } + + override function getInformation(index : int) : string + { + return this.getName(index); + } + + static function _load (parent : Oktavia, name : string, data : string, offset : int) : int + { + var strs = Binary.loadStringList(data, offset); + var section = new Section(parent); + section._names = strs.result; + return section._load(name, data, strs.offset); + } + + override function _dump () : string + { + return [Binary.dump16bitNumber(0), Binary.dumpStringList(this._names), super._dump()].join(''); + } + + override function _dump (report : CompressionReport) : string + { + report.add(1, 1); + return [Binary.dump16bitNumber(0), Binary.dumpStringList(this._names, report), super._dump(report)].join(''); + } +} + +class Splitter extends Metadata +{ + var name : Nullable.<string>; + + function constructor (parent : Oktavia) + { + super(parent); + this.name = null; + } + + function constructor (parent : Oktavia, name : string) + { + super(parent); + this.name = name; + } + + function size () : int + { + return this._size(); + } + + function split () : void + { + this.split(this._parent.contentSize()); + } + + function split (index : int) : void + { + this._bitVector.set(index - 1); + } + + function getIndex (position : int) : int + { + if (position < 0 || this._bitVector.size() <= position) + { + throw new Error("Section.getSectionIndex() : range error"); + } + return this._bitVector.rank(position); + } + + override function grouping (result : SingleResult, positions : int [], word : string, stemmed : boolean) : void + { + for (var i = 0; i < positions.length; i++) + { + var position = positions[i]; + var index = this.getIndex(position); + var unit = result.getSearchUnit(index); + if (unit.startPosition < 0) + { + unit.startPosition = this.getStartPosition(index); + } + unit.addPosition(word, position - unit.startPosition, stemmed); + } + } + + override function getInformation(index : int) : string + { + if (this.name != null) + { + return this.name + ((index + 1) as string); + } + return ''; + } + + static function _load (parent : Oktavia, name : string, data : string, offset : int) : int + { + var section = new Splitter(parent); + return section._load(name, data, offset); + } + + override function _dump () : string + { + return [Binary.dump16bitNumber(1), super._dump()].join(''); + } + + override function _dump (report : CompressionReport) : string + { + report.add(1, 1); + return [Binary.dump16bitNumber(1), super._dump(report)].join(''); + } +} + +class Table extends Metadata +{ + var _headers : string[]; + var _columnTails : BitVector; + + function constructor (parent : Oktavia, headers : string[]) + { + super(parent); + this._headers = headers; + this._columnTails = new BitVector(); + } + + function rowSize () : int + { + return this._size(); + } + + function columnSize () : int + { + return this._headers.length; + } + + function setColumnTail () : void + { + var index = this._parent.contentSize(); + this._parent.addEndOfBlock(); + this._columnTails.set(index - 1); + } + + function setRowTail () : void + { + var index = this._parent.contentSize(); + this._bitVector.set(index - 1); + } + + function getCell (position : int) : int[] + { + if (position < 0 || this._bitVector.size() <= position) + { + throw new Error("Section.getSectionIndex() : range error " + position as string); + } + var row = this._bitVector.rank(position); + var currentColumn = this._columnTails.rank(position); + + var lastRowColumn = 0; + if (row > 0) + { + var startPosition = this._bitVector.select(row - 1) + 1; + lastRowColumn = this._columnTails.rank(startPosition); + } + var result = [row, currentColumn - lastRowColumn] : int[]; + return result; + } + + function getRowContent (rowIndex : int) : Map.<string> + { + var content = this.getContent(rowIndex); + var values = content.split(Oktavia.eob, this._headers.length); + var result = {} : Map.<string>; + for (var i in this._headers) + { + if (i < values.length) + { + result[this._headers[i]] = values[i]; + } + else + { + result[this._headers[i]] = ''; + } + } + return result; + } + + override function grouping (result : SingleResult, positions : int [], word : string, stemmed : boolean) : void + { + // TODO implement + } + + override function getInformation(index : int) : string + { + return ''; + } + + override function _build () : void + { + this._bitVector.build(); + this._columnTails.build(); + } + + static function _load (parent : Oktavia, name : string, data : string, offset : int) : int + { + var strs = Binary.loadStringList(data, offset); + var table = new Table(parent, strs.result); + offset = table._load(name, data, strs.offset); + return table._columnTails.load(data, offset); + } + + override function _dump () : string + { + return [ + Binary.dump16bitNumber(2), Binary.dumpStringList(this._headers), + super._dump(), this._columnTails.dump() + ].join(''); + } + + override function _dump (report : CompressionReport) : string + { + report.add(1, 1); + return [ + Binary.dump16bitNumber(2), Binary.dumpStringList(this._headers, report), + super._dump(report), this._columnTails.dump(report) + ].join(''); + } +} + +class Block extends Metadata +{ + var _names : string[]; + var _start : boolean; + + function constructor (parent : Oktavia) + { + super(parent); + this._names = [] : string[]; + this._start = false; + } + + function startBlock (blockName : string) : void + { + this.startBlock(blockName, this._parent.contentSize()); + } + + function startBlock (blockName : string, index : int) : void + { + if (this._start) + { + throw new Error('Splitter `' + this._names[this._names.length - 1] + '` is not closed'); + } + this._start = true; + this._names.push(blockName); + this._bitVector.set(index - 1); + } + + function endBlock () : void + { + this.endBlock(this._parent.contentSize()); + } + + function endBlock (index : int) : void + { + if (!this._start) + { + throw new Error('Splitter is not started'); + } + this._start = false; + this._bitVector.set(index - 1); + } + + function size () : int + { + return this._names.length; + } + + function blockIndex (position : int) : int + { + if (position < 0 || (this._parent._fmindex.size() - 1) <= position) + { + throw new Error("Block.blockIndex() : range error " + position as string); + } + var result : int; + if (position >= this._bitVector.size()) + { + position = this._bitVector.size() - 1; + result = this._bitVector.rank(position) + 1; + } + else + { + result = this._bitVector.rank(position); + } + return result; + } + + function inBlock (position : int) : boolean + { + var blockIndex = this.blockIndex(position); + return (blockIndex % 2) != 0; + } + + function getBlockContent (position : int) : string + { + var blockIndex = this.blockIndex(position); + var result : string; + if ((blockIndex % 2) != 0) + { + result = this.getContent(blockIndex); + } + else + { + result = ''; + } + return result; + } + + function getBlockName (position : int) : string + { + var blockIndex = this.blockIndex(position); + var result : string; + if ((blockIndex % 2) != 0) + { + result = this._names[blockIndex >>> 1]; + } + else + { + result = ''; + } + return result; + } + + override function grouping (result : SingleResult, positions : int [], word : string, stemmed : boolean) : void + { + // TODO implement + } + + override function getInformation(index : int) : string + { + return ''; + } + + static function _load (parent : Oktavia, name : string, data : string, offset : int) : int + { + var strs = Binary.loadStringList(data, offset); + var block = new Block(parent); + block._names = strs.result; + return block._load(name, data, strs.offset); + } + + override function _dump () : string + { + return [Binary.dump16bitNumber(3), Binary.dumpStringList(this._names), super._dump()].join(''); + } + + override function _dump (report : CompressionReport) : string + { + report.add(1, 1); + return [Binary.dump16bitNumber(3), Binary.dumpStringList(this._names, report), super._dump(report)].join(''); + } +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/node-sqlite3.jsx b/web/server/h2o/libh2o/misc/oktavia/src/node-sqlite3.jsx new file mode 100644 index 00000000..32196983 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/node-sqlite3.jsx @@ -0,0 +1,115 @@ +import "js.jsx"; + +native __fake__ class _sqlite3database +{ + static const OK = 0; + static const ERROR = 1; + static const INTERNAL = 2; + static const PERM = 3; + static const ABORT = 4; + static const BUSY = 5; + static const LOCKED = 6; + static const NOMEM = 7; + static const READONLY = 8; + static const INTERRUPT = 9; + static const IOERR = 10; + static const CORRUPT = 11; + static const NOTFOUND = 12; + static const FULL = 13; + static const CANTOPEN = 14; + static const PROTOCOL = 15; + static const EMPTY = 16; + static const SCHEMA = 17; + static const TOOBIG = 18; + static const CONSTRAINT = 19; + static const MISMATCH = 20; + static const MISUSE = 21; + static const NOLFS = 22; + static const AUTH = 23; + static const FORMAT = 24; + static const RANGE = 25; + + function run(...params : variant) : void; + function all(...params : variant) : void; + function each(...params : variant) : void; + function close(...params : variant) : void; + function serialize(...params : variant) : void; +} + +native __fake__ class _sqlite3statement +{ +} + +native __fake__ class _sqlite3error +{ + var message : string; + var errno : int; + var code : string; +} + +class SQLite3Database +{ + var _instance : _sqlite3database; + + function constructor (filename : string) + { + var exp = "(function () { var __sqlite3 = require('sqlite3'); return new __sqlite3.Database('" + filename + "');})()"; + this._instance = js.eval(exp) as __noconvert__ _sqlite3database; + } + + function run (sql : string) : SQLite3Database + { + this._instance.run(sql); + return this; + } + + function run (sql : string, bind : variant) : SQLite3Database + { + this._instance.run(sql, bind); + return this; + } + + function run (sql : string, bind : variant, callback : (Nullable.<_sqlite3error>) -> void) : SQLite3Database + { + this._instance.run(sql, bind, callback); + return this; + } + + function run (sql : string, callback : (Nullable.<_sqlite3error>) -> void) : SQLite3Database + { + this._instance.run(sql, callback); + return this; + } + + function each (sql : string, callback : (Nullable.<_sqlite3error>, variant) -> void) : SQLite3Database + { + this._instance.each(sql, callback); + return this; + } + + function all (sql : string, callback : (Nullable.<_sqlite3error>, variant[]) -> void) : SQLite3Database + { + this._instance.all(sql, callback); + return this; + } + + function serialize () : void + { + this._instance.serialize(); + } + + function serialize (callback : (Nullable.<_sqlite3error>) -> void) : void + { + this._instance.serialize(callback); + } + + function close () : void + { + this._instance.close(); + } + + function close (callback : (Nullable.<_sqlite3error>) -> void) : void + { + this._instance.close(callback); + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/oktavia.jsx b/web/server/h2o/libh2o/misc/oktavia/src/oktavia.jsx new file mode 100644 index 00000000..8109b475 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/oktavia.jsx @@ -0,0 +1,427 @@ +import "metadata.jsx"; +import "fm-index.jsx"; +import "binary-util.jsx"; +import "query.jsx"; +import "search-result.jsx"; +import "stemmer/stemmer.jsx"; +import "console.jsx"; + + +class Oktavia +{ + var _fmindex : FMIndex; + var _metadatas : Map.<Metadata>; + var _metadataLabels : string[]; + var _stemmer : Nullable.<Stemmer>; + var _stemmingResult : Map.<string[]>; + + // char code remap tables + var _utf162compressCode : string[]; + var _compressCode2utf16 : string[]; + + // sentinels + static const eof = String.fromCharCode(0); + static const eob = String.fromCharCode(1); + static const unknown = String.fromCharCode(3); + + function constructor () + { + this._fmindex = new FMIndex(); + this._metadatas = {} : Map.<Metadata>; + this._metadataLabels = [] : string[]; + this._stemmer = null; + this._stemmingResult = {} : Map.<string[]>; + this._utf162compressCode = [Oktavia.eof, Oktavia.eob, Oktavia.unknown]; + this._utf162compressCode.length = 65536; + this._compressCode2utf16 = [Oktavia.eof, Oktavia.eob, Oktavia.unknown]; + } + + function setStemmer (stemmer : Stemmer) : void + { + this._stemmer = stemmer; + } + + function getPrimaryMetadata () : Metadata + { + return this._metadatas[this._metadataLabels[0]]; + } + + function addSection (key : string) : Section + { + if (this._metadataLabels.indexOf(key) != -1) + { + throw new Error('Metadata name ' + key + ' is already exists'); + } + this._metadataLabels.push(key); + var section = new Section(this); + this._metadatas[key] = section; + return section; + } + + function getSection (key : string) : Section + { + if (this._metadataLabels.indexOf(key) == -1) + { + throw new Error('Metadata name ' + key + " does't exists"); + } + return this._metadatas[key] as Section; + } + + function addSplitter (key : string) : Splitter + { + if (this._metadataLabels.indexOf(key) != -1) + { + throw new Error('Metadata name ' + key + ' is already exists'); + } + this._metadataLabels.push(key); + var splitter = new Splitter(this); + this._metadatas[key] = splitter; + return splitter; + } + + function getSplitter (key : string) : Splitter + { + if (this._metadataLabels.indexOf(key) == -1) + { + throw new Error('Metadata name ' + key + " does't exists"); + } + return this._metadatas[key] as Splitter; + } + + function addTable (key : string, headers : string[]) : Table + { + if (this._metadataLabels.indexOf(key) != -1) + { + throw new Error('Metadata name ' + key + ' is already exists'); + } + this._metadataLabels.push(key); + var table = new Table(this, headers); + this._metadatas[key] = table; + return table; + } + + function getTable (key : string) : Table + { + if (this._metadataLabels.indexOf(key) == -1) + { + throw new Error('Metadata name ' + key + " does't exists"); + } + return this._metadatas[key] as Table; + } + + function addBlock (key : string) : Block + { + if (this._metadataLabels.indexOf(key) != -1) + { + throw new Error('Metadata name ' + key + ' is already exists'); + } + this._metadataLabels.push(key); + var block = new Block(this); + this._metadatas[key] = block; + return block; + } + + function getBlock (key : string) : Block + { + if (this._metadataLabels.indexOf(key) == -1) + { + throw new Error('Metadata name ' + key + " does't exists"); + } + return this._metadatas[key] as Block; + } + + function addEndOfBlock () : void + { + this._fmindex.push(Oktavia.eob); + } + + function addWord (words : string) : void + { + var str = [] : string[]; + str.length = words.length; + for (var i = 0; i < words.length; i++) + { + var charCode = words.charCodeAt(i); + var newCharCode = this._utf162compressCode[charCode]; + if (newCharCode == null) + { + newCharCode = String.fromCharCode(this._compressCode2utf16.length); + this._utf162compressCode[charCode] = newCharCode; + this._compressCode2utf16.push(String.fromCharCode(charCode)); + } + str.push(newCharCode); + } + this._fmindex.push(str.join('')); + } + + function addWord (words : string, stemming : boolean) : void + { + this.addWord(words); + var wordList = words.split(/\s+/); + for (var i = 0; i < wordList.length; i++) + { + var originalWord = wordList[i]; + var smallWord = originalWord.slice(0, 1).toLowerCase() + originalWord.slice(1); + var registerWord : Nullable.<string> = null; + if (stemming && this._stemmer) + { + var baseWord = this._stemmer.stemWord(originalWord.toLowerCase()); + if (originalWord.indexOf(baseWord) == -1) + { + registerWord = baseWord; + } + } + else if (originalWord != smallWord) + { + registerWord = smallWord; + } + if (registerWord) + { + var compressedCodeWord = this._convertToCompressionCode(originalWord); + var stemmedList = this._stemmingResult[registerWord]; + if (!stemmedList) + { + stemmedList = [compressedCodeWord]; + this._stemmingResult[registerWord] = stemmedList; + } + else if (stemmedList.indexOf(compressedCodeWord) == -1) + { + stemmedList.push(compressedCodeWord); + } + } + } + } + + function _convertToCompressionCode (keyword : string) : string + { + var resultChars = [] : string[]; + for (var i = 0; i < keyword.length; i++) + { + var chr = this._utf162compressCode[keyword.charCodeAt(i)]; + if (chr == null) + { + resultChars.push(Oktavia.unknown); + } + else + { + resultChars.push(chr); + } + } + return resultChars.join(''); + } + + function rawSearch (keyword : string, stemming : boolean) : int[] + { + var result : int[]; + if (stemming) + { + result = [] : int[]; + if (this._stemmer) + { + var baseWord = this._stemmer.stemWord(keyword.toLowerCase()); + var stemmedList = this._stemmingResult[baseWord]; + if (stemmedList) + { + for (var i = 0; i < stemmedList.length; i++) + { + var word = stemmedList[i]; + result = result.concat(this._fmindex.search(word)); + } + } + } + } + else + { + result = this._fmindex.search(this._convertToCompressionCode(keyword)); + } + return result; + } + + function search (queries : Query[]) : SearchSummary + { + var summary = new SearchSummary(this); + for (var i = 0; i < queries.length; i++) + { + summary.addQuery(this._searchQuery(queries[i])); + } + summary.mergeResult(); + return summary; + } + + function _searchQuery (query : Query) : SingleResult + { + var result = new SingleResult(query.word, query.or, query.not); + var positions : int[]; + if (query.raw) + { + positions = this.rawSearch(query.word, false); + } + else + { + positions = this.rawSearch(query.word, false).concat(this.rawSearch(query.word, true)); + } + this.getPrimaryMetadata().grouping(result, positions, query.word, !query.raw); + return result; + } + + function build () : void + { + this.build(5, false); + } + + function build (cacheDensity : int, verbose : boolean) : void + { + for (var key in this._metadatas) + { + this._metadatas[key]._build(); + } + var cacheRange = Math.round(Math.max(1, (100 / Math.min(100, Math.max(0.01, cacheDensity))))); + var maxChar = this._compressCode2utf16.length; + this._fmindex.build(Oktavia.eof, maxChar, cacheRange, verbose); + } + + function dump () : string + { + return this.dump(false); + } + + function dump (verbose : boolean) : string + { + var headerSource = "oktavia-01"; + var header = Binary.dumpString(headerSource).slice(1); + if (verbose) + { + console.log("Source text size: " + (this._fmindex.size() * 2) as string + ' bytes'); + } + var fmdata = this._fmindex.dump(verbose); + var result = [ + header, + fmdata + ]; + + result.push(Binary.dump16bitNumber(this._compressCode2utf16.length)); + for (var i = 3; i < this._compressCode2utf16.length; i++) + { + result.push(this._compressCode2utf16[i]); + } + if (verbose) + { + console.log('Char Code Map: ' + (this._compressCode2utf16.length * 2 - 2) as string + ' bytes'); + } + + var report = new CompressionReport(); + result.push(Binary.dumpStringListMap(this._stemmingResult, report)); + if (verbose) + { + console.log('Stemmed Word Table: ' + (result[result.length - 1].length) as string + ' bytes (' + report.rate() as string + '%)'); + } + + result.push(Binary.dump16bitNumber(this._metadataLabels.length)); + for (var i = 0; i < this._metadataLabels.length; i++) + { + var report = new CompressionReport(); + var name = this._metadataLabels[i]; + var data = this._metadatas[name]._dump(report); + result.push(Binary.dumpString(name, report), data); + if (verbose) + { + console.log('Meta Data ' + name + ': ' + (data.length * 2) as string + ' bytes (' + report.rate() as string + '%)'); + } + } + return result.join(''); + } + + function load (data : string) : void + { + var headerSource = "oktavia-01"; + var header = Binary.dumpString(headerSource).slice(1); + if (data.slice(0, 5) != header) + { + throw new Error('Invalid data file'); + } + this._metadatas = {} : Map.<Metadata>; + this._metadataLabels = [] : string[]; + + var offset = 5; + offset = this._fmindex.load(data, offset); + var charCodeCount = Binary.load16bitNumber(data, offset++); + this._compressCode2utf16 = [Oktavia.eof, Oktavia.eob, Oktavia.unknown]; + this._utf162compressCode = [Oktavia.eof, Oktavia.eob, Oktavia.unknown]; + for (var i = 3; i < charCodeCount; i++) + { + var charCode = Binary.load16bitNumber(data, offset++); + this._compressCode2utf16.push(String.fromCharCode(charCode)); + this._utf162compressCode[charCode] = String.fromCharCode(i); + } + + var stemmedWords = Binary.loadStringListMap(data, offset); + this._stemmingResult = stemmedWords.result; + offset = stemmedWords.offset; + + var metadataCount = Binary.load16bitNumber(data, offset++); + for (var i = 0; i < metadataCount; i++) + { + var nameResult = Binary.loadString(data, offset); + var name = nameResult.result; + var offset = nameResult.offset; + var type = Binary.load16bitNumber(data, offset++); + switch (type) + { + case 0: + offset = Section._load(this, name, data, offset); + break; + case 1: + offset = Splitter._load(this, name, data, offset); + break; + case 2: + offset = Table._load(this, name, data, offset); + break; + case 3: + offset = Block._load(this, name, data, offset); + break; + } + } + } + + function contentSize () : int + { + return this._fmindex.contentSize(); + } + + function wordPositionType (position : int) : int + { + var result = 0; + if (position == 0) + { + result = 4; + } + else + { + var ahead = this._fmindex.getSubstring(position - 1, 1); + if (/\s/.test(ahead)) + { + result = 2; + } + else if (/\W/.test(ahead)) + { + result = 1; + } + else if (Oktavia.eob == ahead) + { + result = 3; + } + } + return result; + } + + function _getSubstring (position : int, length : int) : string + { + var result = this._fmindex.getSubstring(position, length); + var str = [] : string[]; + for (var i = 0; i < result.length; i++) + { + str.push(this._compressCode2utf16[result.charCodeAt(i)]); + } + return str.join(''); + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/query-parser.jsx b/web/server/h2o/libh2o/misc/oktavia/src/query-parser.jsx new file mode 100644 index 00000000..86308bcb --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/query-parser.jsx @@ -0,0 +1,60 @@ +import "query.jsx"; + + +class QueryParser +{ + var queries : Query[]; + function constructor() + { + this.queries = [] : Query[]; + } + + function parse (queryStrings : string[]) : Query[] + { + var nextOr = false; + for (var i = 0; i < queryStrings.length; i++) + { + var word = queryStrings[i]; + if (word == 'OR') + { + nextOr = true; + } + else + { + var query = new Query(); + if (nextOr) + { + query.or = true; + nextOr = false; + } + if (word.slice(0, 1) == '-') + { + query.not = true; + word = word.slice(1); + } + if (word.slice(0, 1) == '"' && word.slice(word.length -1) == '"') + { + query.raw = true; + word = word.slice(1, word.length -1); + } + query.word = word; + this.queries.push(query); + } + } + return this.queries; + } + + function highlight () : string + { + var result = [] : string[]; + for (var i = 0; i < this.queries.length; i++) + { + var query = this.queries[i]; + if (!query.not) + { + result.push("highlight=" + String.encodeURIComponent(query.word)); + } + } + return '?' + result.join('&'); + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/query-string-parser.jsx b/web/server/h2o/libh2o/misc/oktavia/src/query-string-parser.jsx new file mode 100644 index 00000000..71d015c9 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/query-string-parser.jsx @@ -0,0 +1,128 @@ +import "query.jsx"; + + +class QueryStringParser +{ + var queries : Query[]; + function constructor() + { + this.queries = [] : Query[]; + } + + function parse (queryString : string) : Query[] + { + var nextOr = false; + var nextNot = false; + var currentWordStart = 0; + var status = 0; + // 0: free + // 1: in unquoted word + // 2: in quote + var isSpace = /[\s\u3000]/; + for (var i = 0; i < queryString.length; i++) + { + var ch = queryString.charAt(i); + switch (status) + { + case 0: // free + if (!isSpace.test(ch)) + { + if (ch == '-') + { + nextNot = true; + } + else if (ch == '"') + { + currentWordStart = i + 1; + status = 2; + } + else + { + currentWordStart = i; + status = 1; + } + } + else + { + nextNot = false; + } + break; + case 1: // unquoted word + if (isSpace.test(ch)) + { + var word = queryString.slice(currentWordStart, i); + if (word == 'OR') + { + nextOr = true; + } + else + { + var query = new Query(); + query.word = word; + query.or = nextOr; + query.not = nextNot; + this.queries.push(query); + nextOr = false; + nextNot = false; + } + status = 0; + } + break; + case 2: // in quote + if (ch == '"') + { + var word = queryString.slice(currentWordStart, i); + var query = new Query(); + query.word = word; + query.or = nextOr; + query.not = nextNot; + query.raw = true; + this.queries.push(query); + nextOr = false; + nextNot = false; + status = 0; + } + break; + } + } + switch (status) + { + case 0: + break; + case 1: + var query = new Query(); + var word = queryString.slice(currentWordStart, queryString.length); + if (word != 'OR') + { + query.word = word; + query.or = nextOr; + query.not = nextNot; + this.queries.push(query); + } + break; + case 2: + var query = new Query(); + query.word = queryString.slice(currentWordStart, queryString.length); + query.or = nextOr; + query.not = nextNot; + query.raw = true; + this.queries.push(query); + break; + } + return this.queries; + } + + function highlight () : string + { + var result = [] : string[]; + for (var i = 0; i < this.queries.length; i++) + { + var query = this.queries[i]; + if (!query.not) + { + result.push("highlight=" + String.encodeURIComponent(query.word)); + } + } + return '?' + result.join('&'); + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/query.jsx b/web/server/h2o/libh2o/misc/oktavia/src/query.jsx new file mode 100644 index 00000000..38c52c71 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/query.jsx @@ -0,0 +1,37 @@ +class Query +{ + var word : string; + var or : boolean; + var not : boolean; + var raw : boolean; + + function constructor () + { + this.word = ''; + this.or = false; + this.not = false; + this.raw = false; + } + + override function toString () : string + { + var result = [] : string[]; + if (this.or) + { + result.push("OR "); + } + if (this.not) + { + result.push("-"); + } + if (this.raw) + { + result.push('"', this.word, '"'); + } + else + { + result.push(this.word); + } + return result.join(''); + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/sais.jsx b/web/server/h2o/libh2o/misc/oktavia/src/sais.jsx new file mode 100644 index 00000000..9d8fa8fb --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/sais.jsx @@ -0,0 +1,250 @@ +/* Original source code: + * G. Nong, S. Zhang and W. H. Chan, Two Efficient Algorithms for Linear Time Suffix Array Construction, IEEE Transactions on Computers, To Appear + * http://www.cs.sysu.edu.cn/nong/index.files/Two%20Efficient%20Algorithms%20for%20Linear%20Suffix%20Array%20Construction.pdf + */ + +import "bit-vector.jsx"; + +class OArray +{ + var offset : int; + var array : int[]; + + function constructor (array : int[]) + { + this.array = array; + this.offset = 0; + } + + function constructor (array : int[], offset : int) + { + this.array = array; + this.offset = offset; + } + + function get (index : int) : int + { + return this.array[index + this.offset]; + } + + function set (index : int, value : int) : void + { + this.array[index + this.offset] = value; + } + + function isS (index : int) : boolean + { + return this.array[index + this.offset] < this.array[index + this.offset + 1]; + } + + function compare (index1 : int, index2 : int) : boolean + { + return this.array[index1 + this.offset] == this.array[index2 + this.offset]; + } +} + + +class SAIS +{ + static function _isLMS (t : BitVector, i : int) : boolean + { + return i > 0 && t.get(i) && !t.get(i - 1); + } + + // find the start or end of each bucket + static function _getBuckets(s : OArray, bkt : int[], n : int, K : int, end : boolean) : void + { + var sum = 0; + for (var i = 0; i <= K; i++) + { + bkt[i] = 0; // clear all buckets + } + for (var i = 0; i < n; i++) + { + bkt[s.get(i)]++; // compute the size of each bucket + } + for (var i = 0; i <= K; i++) + { + sum += bkt[i]; + bkt[i] = end ? sum : sum - bkt[i]; + } + } + + // compute SAl + static function _induceSAl(t : BitVector, SA : int[], s : OArray, bkt : int[], n : int, K : int, end : boolean) : void + { + SAIS._getBuckets(s, bkt, n, K, end); // find starts of buckets + for (var i = 0; i < n; i++) + { + var j = SA[i] - 1; + if (j >= 0 && !t.get(j)) + { + SA[bkt[s.get(j)]++] = j; + } + } + } + + // compute SAs + static function _induceSAs(t : BitVector, SA : int[], s : OArray, bkt : int[], n : int, K : int, end : boolean) : void + { + SAIS._getBuckets(s, bkt, n, K, end); // find ends of buckets + for (var i = n - 1; i >= 0; i--) + { + var j = SA[i] - 1; + if (j >=0 && t.get(j)) + { + SA[--bkt[s.get(j)]] = j; + } + } + } + + // find the suffix array SA of s[0..n-1] in {1..K}^n + // require s[n-1]=0 (the sentinel!), n>=2 + // use a working space (excluding s and SA) of at most 2.25n+O(1) for a constant alphabet + + static function make(source : string) : int[] + { + var charCodes = [] : int[]; + charCodes.length = source.length; + var maxCode = 0; + for (var i = 0; i < source.length; i++) + { + var code = source.charCodeAt(i); + charCodes[i] = code; + maxCode = (code > maxCode) ? code : maxCode; + } + var SA = [] : int[]; + SA.length = source.length; + var s = new OArray(charCodes); + SAIS._make(s, SA, source.length, maxCode); + return SA; + } + + static function _make(s : OArray, SA : int[], n : int, K : int) : void + { + // Classify the type of each character + var t = new BitVector(); + t.set(n - 2, false); + t.set(n - 1, true); // the sentinel must be in s1, important!!! + for (var i = n - 3; i >= 0; i--) + { + t.set(i, (s.isS(i) || (s.compare(i, i + 1) && t.get(i + 1)))); + } + + // stage 1: reduce the problem by at least 1/2 + // sort all the S-substrings + var bkt = [] : int[]; + bkt.length = K + 1; + SAIS._getBuckets(s, bkt, n, K, true); // find ends of buckets + for (var i = 0; i < n; i++) + { + SA[i] = -1; + } + for (var i = 1; i < n; i++) + { + if (SAIS._isLMS(t, i)) + { + SA[--bkt[s.get(i)]] = i; + } + } + SAIS._induceSAl(t, SA, s, bkt, n, K, false); + SAIS._induceSAs(t, SA, s, bkt, n, K, true); + // compact all the sorted substrings into the first n1 items of SA + // 2*n1 must be not larger than n (proveable) + var n1 = 0; + for (var i = 0; i < n; i++) + { + if (SAIS._isLMS(t, SA[i])) + { + SA[n1++] = SA[i]; + } + } + + // find the lexicographic names of all substrings + for (var i = n1; i < n; i++) + { + SA[i]=-1; // init the name array buffer + } + var name = 0; + var prev = -1; + for (i = 0; i < n1; i++) + { + var pos = SA[i]; + var diff = false; + for (var d = 0; d < n; d++) + { + if (prev == -1 || !s.compare(pos + d, prev + d) || t.get(pos + d) != t.get(prev + d)) + { + diff = true; + break; + } + else if (d > 0 && (SAIS._isLMS(t, pos+d) || SAIS._isLMS(t, prev + d))) + { + break; + } + } + if (diff) + { + name++; + prev = pos; + } + pos = (pos % 2 == 0) ? pos / 2 : (pos - 1) / 2; + SA[n1 + pos] = name - 1; + } + for (var i = n - 1, j = n - 1; i >= n1; i--) + { + if (SA[i] >= 0) + { + SA[j--] = SA[i]; + } + } + + // stage 2: solve the reduced problem + // recurse if names are not yet unique + var SA1 = SA; + var s1 = new OArray(SA, n - n1); + + if (name < n1) + { + SAIS._make(s1, SA1, n1, name - 1); + } + else + { + // generate the suffix array of s1 directly + for (i = 0; i < n1; i++) + { + SA1[s1.get(i)] = i; + } + } + + // stage 3: induce the result for the original problem + + bkt = [] : int[]; + bkt.length = K + 1; + // put all left-most S characters into their buckets + SAIS._getBuckets(s, bkt, n, K, true); // find ends of buckets + for (i = 1, j = 0; i < n; i++) + { + if (SAIS._isLMS(t, i)) + { + s1.set(j++, i); // get p1 + } + } + for (i = 0; i < n1; i++) + { + SA1[i] = s1.get(SA1[i]); // get index in s + } + for (i = n1; i < n; i++) + { + SA[i] = -1; // init SA[n1..n-1] + } + for (i = n1 - 1; i >= 0; i--) + { + j = SA[i]; + SA[i] = -1; + SA[--bkt[s.get(j)]] = j; + } + SAIS._induceSAl(t, SA, s, bkt, n, K, false); + SAIS._induceSAs(t, SA, s, bkt, n, K, true); + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/sax.jsx b/web/server/h2o/libh2o/misc/oktavia/src/sax.jsx new file mode 100644 index 00000000..d34cb825 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/sax.jsx @@ -0,0 +1,1356 @@ +// When we pass the MAX_BUFFER_LENGTH position, start checking for buffer overruns. +// When we check, schedule the next check for MAX_BUFFER_LENGTH - (max(buffer lengths)), +// since that's the earliest that a buffer overrun could occur. This way, checks are +// as rare as required, but as often as necessary to ensure never crossing this bound. +// Furthermore, buffers are only tested at most once per write(), so passing a very +// large string into write() might have undesirable effects, but this is manageable by +// the caller, so it is assumed to be safe. Thus, a call to write() may, in the extreme +// edge case, result in creating at most one complete copy of the string passed in. +// Set to Infinity to have unlimited buffers. + + +class Tag +{ + var name : string; + var attributes : Map.<string>; + var isSelfClosing : boolean; + function constructor (name : string) + { + this.name = name; + this.attributes = {} : Map.<string>; + this.isSelfClosing = false; + } +} + +class _Common +{ + static const buffers = [ + "comment", "sgmlDecl", "textNode", "tagName", "doctype", + "procInstName", "procInstBody", "entity", "attribName", + "attribValue", "cdata", "script" + ]; + + static const EVENTS = // for discoverability. + [ "text", + "processinginstruction", + "sgmldeclaration", + "doctype", + "comment", + "attribute", + "opentag", + "closetag", + "opencdata", + "cdata", + "clo_State.CDATA", + "error", + "end", + "ready", + "script", + "opennamespace", + "closenamespace" + ]; + + static const MAX_BUFFER_LENGTH = 64 * 1024; +} + +class _State +{ + static const BEGIN = 1; + static const TEXT = 2; // general stuff + static const TEXT_ENTITY = 3; // & and such. + static const OPEN_WAKA = 4; // < + static const SGML_DECL = 5; // <!BLARG + static const SGML_DECL_QUOTED = 6; // <!BLARG foo "bar + static const DOCTYPE = 7; // <!DOCTYPE + static const DOCTYPE_QUOTED = 8; // <!DOCTYPE "//blah + static const DOCTYPE_DTD = 9; // <!DOCTYPE "//blah" [ ... + static const DOCTYPE_DTD_QUOTED = 10; // <!DOCTYPE "//blah" [ "foo + static const COMMENT_STARTING = 11; // <!- + static const COMMENT = 12; // <!-- + static const COMMENT_ENDING = 13; // <!-- blah - + static const COMMENT_ENDED = 14; // <!-- blah -- + static const CDATA = 15; // <![CDATA[ something + static const CDATA_ENDING = 16; // ] + static const CDATA_ENDING_2 = 17; // ]] + static const PROC_INST = 18; // <?hi + static const PROC_INST_BODY = 19; // <?hi there + static const PROC_INST_ENDING = 20; // <?hi "there" ? + static const OPEN_TAG = 21; // <strong + static const OPEN_TAG_SLASH = 22; // <strong / + static const ATTRIB = 23; // <a + static const ATTRIB_NAME = 24; // <a foo + static const ATTRIB_NAME_SAW_WHITE = 25; // <a foo _ + static const ATTRIB_VALUE = 26; // <a foo= + static const ATTRIB_VALUE_QUOTED = 27; // <a foo="bar + static const ATTRIB_VALUE_UNQUOTED = 28; // <a foo=bar + static const ATTRIB_VALUE_ENTITY_Q = 29; // <foo bar=""" + static const ATTRIB_VALUE_ENTITY_U = 30; // <foo bar=" + static const CLOSE_TAG = 31; // </a + static const CLOSE_TAG_SAW_WHITE = 32; // </a > + static const SCRIPT = 33; // <script> ... + static const SCRIPT_ENDING = 34; // <script> ... < +} + + +class SAXHandler +{ + var position : int; + var column : int; + var line : int; + function constructor () + { + this.position = 0; + this.column = 0; + this.line = 0; + } + function onerror (error : Error) : void + { + } + function ontext (text : string) : void + { + } + function ondoctype (doctype : string) : void + { + } + function onprocessinginstruction (name : string, body : string) : void + { + } + function onsgmldeclaration (sgmlDecl : string) : void + { + } + function onopentag (tagname : string, attributes : Map.<string>) : void + { + } + function onclosetag (tagname : string) : void + { + } + function onattribute (name : string, value : string) : void + { + } + function oncomment (comment : string) : void + { + } + function onopencdata () : void + { + } + function oncdata (cdata : string) : void + { + } + function onclosecdata () : void + { + } + function onend () : void + { + } + function onready () : void + { + } + function onscript (script : string) : void + { + } +} + +class SAXParser +{ + var q : string; + var c : string; + var bufferCheckPosition : int; + var looseCase : string; + var tags = [] : Tag[]; + var closed : boolean; + var closedRoot : boolean; + var sawRoot : boolean; + var tag : Nullable.<Tag>; + var error : Nullable.<Error>; + var handler : SAXHandler; + var ENTITIES : Map.<string>; + var strict : boolean; + var tagName : string; + var state : int; + var line : int; + var column : int; + var position : int; + var startTagPosition : int; + var attribName : string; + var attribValue : string; + var script : string; + var textNode : string; + var attribList : string[][]; + var noscript : boolean; + var cdata : string; + var procInstBody : string; + var procInstName : string; + var doctype : string; + var entity : string; + var sgmlDecl : string; + var comment : string; + var preTags : int; + + function constructor(handler : SAXHandler) + { + this._init(handler, false); + } + + function constructor(handler : SAXHandler, strict : boolean) + { + this._init(handler, strict); + } + + function _init (handler : SAXHandler, strict : boolean) : void + { + this.handler = handler; + this.clearBuffers(); + this.q = ""; + this.bufferCheckPosition = _Common.MAX_BUFFER_LENGTH; + //this.opt = opt || {} + //this.opt.lowercase = this.opt.lowercase || this.opt.lowercasetags + this.looseCase = 'toLowerCase'; // this.opt.lowercase ? "toLowerCase" : "toUpperCase" + this.tags = [] : Tag[]; + this.closed = this.closedRoot = this.sawRoot = false; + this.tag = null; + this.error = null; + this.strict = strict; + this.noscript = strict; //!!(strict || this.opt.noscript); + this.state = _State.BEGIN; + this.ENTITIES = _Entities.entity_list(); + this.attribList = [] : string[][]; + this.noscript = false; + this.preTags = 0; + + this.handler.onready(); + } + + function set_noscript (flag : boolean) : void + { + this.noscript = flag; + } + + function resume () : SAXParser + { + this.error = null; + return this; + } + + function close () : SAXParser + { + return this.parse(''); + } + + function parse (chunk : string) : SAXParser + { + var _ = new Char(); + if (this.error) + { + throw this.error; + } + if (this.closed) + { + return this.emiterror("Cannot write after close. Assign an onready handler."); + } + var i = 0, c = ""; + while (this.c = c = chunk.charAt(i++)) + { + this.position++; + if (c == "\n") + { + this.handler.line++; + this.handler.column = 0; + } + else + { + this.handler.column++; + } + switch (this.state) + { + case _State.BEGIN: + //log "BEGIN"; + if (c == "<") + { + this.state = _State.OPEN_WAKA; + this.startTagPosition = this.position; + } + else if (_.not(_.whitespace, c)) + { + // have to process this as a text node. + // weird, but happens. + this.strictFail("Non-whitespace before first tag."); + this.textNode = c; + this.state = _State.TEXT; + } + continue; + + case _State.TEXT: + //log "TEXT"; + if (this.sawRoot && !this.closedRoot) + { + var starti = i - 1; + while (c && c != "<" && c != "&") + { + c = chunk.charAt(i++); + if (c) + { + this.position++; + if (c == "\n") + { + this.handler.line++; + this.handler.column = 0; + } + else + { + this.handler.column++; + } + } + } + this.textNode += chunk.substring(starti, i - 1); + } + if (c == "<") + { + this.state = _State.OPEN_WAKA; + this.startTagPosition = this.position; + } + else + { + if (_.not(_.whitespace, c) && (!this.sawRoot || this.closedRoot)) + this.strictFail("Text data outside of root node."); + if (c == "&") this.state = _State.TEXT_ENTITY; + else this.textNode += c; + } + continue; + + case _State.SCRIPT: + //log "SCRIPT"; + // only non-strict + if (c == "<") { + this.state = _State.SCRIPT_ENDING; + } else this.script += c; + continue; + + case _State.SCRIPT_ENDING: + //log "SCRIPT END"; + if (c == "/") { + this.state = _State.CLOSE_TAG; + } else { + this.script += "<" + c; + this.state = _State.SCRIPT; + } + continue; + + case _State.OPEN_WAKA: + //log "OPEN_WAKA"; + // either a /, ?, !, or text is coming next. + if (c == "!") { + this.state = _State.SGML_DECL; + this.sgmlDecl = ""; + } else if (_.is(_.whitespace, c)) { + // wait for it... + } else if (_.is(_.nameStart,c)) { + this.state = _State.OPEN_TAG; + this.tagName = c; + } else if (c == "/") { + this.state = _State.CLOSE_TAG; + this.tagName = ""; + } else if (c == "?") { + this.state = _State.PROC_INST; + this.procInstName = this.procInstBody = ""; + } else { + this.strictFail("Unencoded <"); + // if there was some whitespace, then add that in. + if (this.startTagPosition + 1 < this.position) { + var pad = this.position - this.startTagPosition; + for (var i = 0; i < pad; i++) + { + c = " " + c; + } + } + this.textNode += "<" + c; + this.state = _State.TEXT; + } + continue; + + case _State.SGML_DECL: + //log "SGML_DECL"; + if ((this.sgmlDecl+c).toUpperCase() == _.CDATA) { + this.closetext_if_exist(); + this.handler.onopencdata(); + this.state = _State.CDATA; + this.sgmlDecl = ""; + this.cdata = ""; + } else if (this.sgmlDecl+c == "--") { + this.state = _State.COMMENT; + this.comment = ""; + this.sgmlDecl = ""; + } else if ((this.sgmlDecl+c).toUpperCase() == _.DOCTYPE) { + this.state = _State.DOCTYPE; + if (this.doctype || this.sawRoot) + { + this.strictFail("Inappropriately located doctype declaration"); + } + this.doctype = ""; + this.sgmlDecl = ""; + } else if (c == ">") { + this.closetext_if_exist(); + this.handler.onsgmldeclaration(this.sgmlDecl); + this.sgmlDecl = ""; + this.state = _State.TEXT; + } else if (_.is(_.quote, c)) { + this.state = _State.SGML_DECL_QUOTED; + this.sgmlDecl += c; + } else this.sgmlDecl += c; + continue; + + case _State.SGML_DECL_QUOTED: + //log "SGML_DECL_QUOTED"; + if (c == this.q) { + this.state = _State.SGML_DECL; + this.q = ""; + } + this.sgmlDecl += c; + continue; + + case _State.DOCTYPE: + //log "DOCTYPE"; + if (c == ">") { + this.state = _State.TEXT; + this.closetext_if_exist(); + this.handler.ondoctype(this.doctype.trim()); + } else { + this.doctype += c; + if (c == "[") this.state = _State.DOCTYPE_DTD; + else if (_.is(_.quote, c)) { + this.state = _State.DOCTYPE_QUOTED; + this.q = c; + } + } + continue; + + case _State.DOCTYPE_QUOTED: + //log "DOCTYPE_QUOTED"; + this.doctype += c; + if (c == this.q) { + this.q = ""; + this.state = _State.DOCTYPE; + } + continue; + + case _State.DOCTYPE_DTD: + //log "DOCTYPE_DTD"; + this.doctype += c; + if (c == "]") this.state = _State.DOCTYPE; + else if (_.is(_.quote,c)) { + this.state = _State.DOCTYPE_DTD_QUOTED; + this.q = c; + } + continue; + + case _State.DOCTYPE_DTD_QUOTED: + //log "DOCTYPE_DTD_QUOTED"; + this.doctype += c; + if (c == this.q) { + this.state = _State.DOCTYPE_DTD; + this.q = ""; + } + continue; + + case _State.COMMENT: + //log "COMMENT"; + if (c == "-") this.state = _State.COMMENT_ENDING; + else this.comment += c; + continue; + + case _State.COMMENT_ENDING: + //log "COMMENT_ENDING"; + if (c == "-") { + this.state = _State.COMMENT_ENDED; + this.comment = this.textopts(this.comment); + if (this.comment) + { + this.closetext_if_exist(); + this.handler.oncomment(this.comment.trim()); + } + this.comment = ""; + } else { + this.comment += "-" + c; + this.state = _State.COMMENT; + } + continue; + + case _State.COMMENT_ENDED: + //log "COMMENT_ENDED"; + if (c != ">") { + this.strictFail("Malformed comment"); + // allow <!-- blah -- bloo --> in non-strict mode, + // which is a comment of " blah -- bloo " + this.comment += "--" + c; + this.state = _State.COMMENT; + } else this.state = _State.TEXT; + continue; + + case _State.CDATA: + //log "CDATA"; + if (c == "]") this.state = _State.CDATA_ENDING; + else this.cdata += c; + continue; + + case _State.CDATA_ENDING: + //log "CDATA_ENDING"; + if (c == "]") this.state = _State.CDATA_ENDING_2; + else { + this.cdata += "]" + c; + this.state = _State.CDATA; + } + continue; + + case _State.CDATA_ENDING_2: + //log "CDATA_ENDING 2"; + if (c == ">") { + if (this.cdata) + { + this.closetext_if_exist(); + } + this.handler.oncdata(this.cdata); + this.handler.onclosecdata(); + this.cdata = ""; + this.state = _State.TEXT; + } else if (c == "]") { + this.cdata += "]"; + } else { + this.cdata += "]]" + c; + this.state = _State.CDATA; + } + continue; + + case _State.PROC_INST: + if (c == "?") this.state = _State.PROC_INST_ENDING; + else if (_.is(_.whitespace, c)) this.state = _State.PROC_INST_BODY; + else this.procInstName += c; + continue; + + case _State.PROC_INST_BODY: + if (!this.procInstBody && _.is(_.whitespace, c)) continue; + else if (c == "?") this.state = _State.PROC_INST_ENDING; + else this.procInstBody += c; + continue; + + case _State.PROC_INST_ENDING: + if (c == ">") { + this.closetext_if_exist(); + this.handler.onprocessinginstruction(this.procInstName, this.procInstBody); + this.procInstName = this.procInstBody = ""; + this.state = _State.TEXT; + } else { + this.procInstBody += "?" + c; + this.state = _State.PROC_INST_BODY; + } + continue; + + case _State.OPEN_TAG: + //log "OPEN TAG"; + if (_.is(_.nameBody, c)) this.tagName += c; + else { + this.newTag(); + if (c == ">") this.openTag(); + else if (c == "/") this.state = _State.OPEN_TAG_SLASH; + else { + if (_.not(_.whitespace, c)) this.strictFail("Invalid character in tag name"); + this.state = _State.ATTRIB; + } + } + continue; + + case _State.OPEN_TAG_SLASH: + //log "OPEN TAG SLASH"; + if (c == ">") { + this.openTag(true); + this.closeTag(); + } else { + this.strictFail("Forward-slash in opening tag not followed by >"); + this.state = _State.ATTRIB; + } + continue; + + case _State.ATTRIB: + //log "ATTRIB"; + // haven't read the attribute name yet. + if (_.is(_.whitespace, c)) continue; + else if (c == ">") this.openTag(); + else if (c == "/") this.state = _State.OPEN_TAG_SLASH; + else if (_.is(_.nameStart, c)) { + this.attribName = c; + this.attribValue = ""; + this.state = _State.ATTRIB_NAME; + } else this.strictFail("Invalid attribute name"); + continue; + + case _State.ATTRIB_NAME: + //log "ATTRIB_NAME"; + if (c == "=") this.state = _State.ATTRIB_VALUE; + else if (c == ">") { + this.strictFail("Attribute without value"); + this.attribValue = this.attribName; + this.attrib(); + this.openTag(); + } + else if (_.is(_.whitespace, c)) this.state = _State.ATTRIB_NAME_SAW_WHITE; + else if (_.is(_.nameBody, c)) this.attribName += c; + else this.strictFail("Invalid attribute name"); + continue; + + case _State.ATTRIB_NAME_SAW_WHITE: + if (c == "=") this.state = _State.ATTRIB_VALUE; + else if (_.is(_.whitespace, c)) continue; + else { + this.strictFail( "Attribute without value"); + this.tag.attributes[this.attribName] = ""; + this.attribValue = ""; + this.closetext_if_exist(); + this.handler.onattribute(this.attribName, ""); + this.attribName = ""; + if (c == ">") this.openTag(); + else if (_.is(_.nameStart, c)) { + this.attribName = c; + this.state = _State.ATTRIB_NAME; + } else { + this.strictFail("Invalid attribute name"); + this.state = _State.ATTRIB; + } + } + continue; + + case _State.ATTRIB_VALUE: + if (_.is(_.whitespace, c)) continue; + else if (_.is(_.quote, c)) { + this.q = c; + this.state = _State.ATTRIB_VALUE_QUOTED; + } else { + this.strictFail("Unquoted attribute value"); + this.state = _State.ATTRIB_VALUE_UNQUOTED; + this.attribValue = c; + } + continue; + + case _State.ATTRIB_VALUE_QUOTED: + if (c != this.q) { + if (c == "&") this.state = _State.ATTRIB_VALUE_ENTITY_Q; + else this.attribValue += c; + continue; + } + this.attrib(); + this.q = ""; + this.state = _State.ATTRIB; + continue; + + case _State.ATTRIB_VALUE_UNQUOTED: + if (_.not(_.attribEnd,c)) { + if (c == "&") this.state = _State.ATTRIB_VALUE_ENTITY_U; + else this.attribValue += c; + continue; + } + this.attrib(); + if (c == ">") this.openTag(); + else this.state = _State.ATTRIB; + continue; + + case _State.CLOSE_TAG: + //log "CLOSE_TAG", c; + if (!this.tagName) + { + if (_.is(_.whitespace, c)) + { + continue; + } + else if (_.not(_.nameStart, c)) + { + if (this.script) + { + this.script += "</" + c; + this.state = _State.SCRIPT; + } + else + { + this.strictFail("Invalid tagname in closing tag."); + } + } + else + { + this.tagName = c; + } + } + else if (c == ">") + { + this.closeTag(); + } + else if (_.is(_.nameBody, c)) + { + this.tagName += c; + } + else if (this.script) + { + this.script += "</" + this.tagName; + this.tagName = ""; + this.state = _State.SCRIPT; + } + else + { + if (_.not(_.whitespace, c)) + { + this.strictFail("Invalid tagname in closing tag"); + } + this.state = _State.CLOSE_TAG_SAW_WHITE; + } + continue; + + case _State.CLOSE_TAG_SAW_WHITE: + if (_.is(_.whitespace, c)) continue; + if (c == ">") this.closeTag(); + else this.strictFail("Invalid characters in closing tag"); + continue; + + case _State.TEXT_ENTITY: + //log "TEXT_ENTITY"; + if (c == ";") { + this.textNode += this.parseEntity(); + this.entity = ""; + this.state = _State.TEXT; + } + else if (_.is(_.entity, c)) this.entity += c; + else { + this.strictFail("Invalid character entity"); + this.textNode += "&" + this.entity + c; + this.entity = ""; + this.state = _State.TEXT; + } + continue; + + case _State.ATTRIB_VALUE_ENTITY_Q: + case _State.ATTRIB_VALUE_ENTITY_U: + var returnState; + if (this.state == _State.ATTRIB_VALUE_ENTITY_Q) + { + returnState = _State.ATTRIB_VALUE_QUOTED; + } + else + { + returnState = _State.ATTRIB_VALUE_UNQUOTED; + } + if (c == ";") { + this.attribValue += this.parseEntity(); + this.entity = ""; + this.state = returnState; + } + else if (_.is(_.entity, c)) this.entity += c; + else { + this.strictFail("Invalid character entity"); + this.attribValue += "&" + this.entity + c; + this.entity = ""; + this.state = returnState; + } + continue; + + default: + throw new Error("Unknown state: " + (this.state as string)); + } + } + this.end(); + return this; + } + + function clearBuffers () : void + { + this.comment = ''; + this.sgmlDecl = ''; + this.textNode = ''; + this.tagName = ''; + this.doctype = ''; + this.procInstName = ''; + this.procInstBody = ''; + this.entity = ''; + this.attribName = ''; + this.attribValue = ''; + this.cdata = ''; + this.script = ''; + } + + function closetext_if_exist() : void + { + if (this.textNode != '') + { + this.closetext(); + } + } + + function closetext () : void + { + if (this.preTags == 0) + { + var text = this.textopts(this.textNode); + if (text) + { + this.handler.ontext(text); + } + } + else if (this.textNode) + { + this.handler.ontext(this.textNode); + } + this.textNode = ""; + } + + function textopts (text : string) : string + { + text = text.replace(/[\n\t]/g, ' '); + text = text.replace(/\s\s+/g, " "); + return text; + } + + function emiterror (er : string) : SAXParser + { + this.closetext(); + er += "\nLine: " + (this.line as string) + + "\nColumn: " + (this.column as string) + + "\nChar: " + this.c; + var error = new Error(er); + this.error = error; + this.handler.onerror(error); + return this; + } + + function end () : void + { + if (!this.closedRoot) + { + this.strictFail("Unclosed root tag"); + } + if (this.state != _State.TEXT) + { + this.emiterror("Unexpected end"); + } + this.closetext(); + this.c = ""; + this.closed = true; + this.handler.onend(); + } + + function strictFail (message : string) : void + { + if (this.strict) + { + this.emiterror(message); + } + } + + function newTag () : void + { + if (!this.strict) this.tagName = this.tagName.toLowerCase(); + var parent = this.tags[this.tags.length - 1] || this; + var tag = this.tag = new Tag(this.tagName); + this.attribList.length = 0; + } + + function attrib () : void + { + if (!this.strict) this.attribName = this.attribName.toLowerCase(); + + if (this.tag.attributes.hasOwnProperty(this.attribName)) { + this.attribName = this.attribValue = ""; + return; + } + + this.tag.attributes[this.attribName] = this.attribValue; + this.closetext_if_exist(); + this.handler.onattribute(this.attribName, this.attribValue); + this.attribName = this.attribValue = ""; + } + + function openTag () : void + { + this.openTag(false); + } + + function openTag (selfClosing : boolean) : void + { + this.tag.isSelfClosing = selfClosing; + + // process the tag + this.sawRoot = true; + this.tags.push(this.tag); + this.closetext_if_exist(); + this.handler.onopentag(this.tag.name, this.tag.attributes); + if (this.tag.name == 'pre') + { + this.preTags++; + } + if (!selfClosing) + { + // special case for <script> in non-strict mode. + if (!this.noscript && this.tagName.toLowerCase() == "script") + { + this.state = _State.SCRIPT; + } + else + { + this.state = _State.TEXT; + } + this.tag = null; + this.tagName = ""; + } + this.attribName = this.attribValue = ""; + this.attribList.length = 0; + } + + function closeTag () : void + { + if (!this.tagName) + { + this.strictFail("Weird empty close tag."); + this.textNode += "</>"; + this.state = _State.TEXT; + return; + } + + if (this.script) + { + if (this.tagName != "script") + { + this.script += "</" + this.tagName + ">"; + this.tagName = ""; + this.state = _State.SCRIPT; + return; + } + this.closetext_if_exist(); + this.handler.onscript(this.script); + this.script = ""; + } + + // first make sure that the closing tag actually exists. + // <a><b></c></b></a> will close everything, otherwise. + var t = this.tags.length; + var tagName = this.tagName; + if (!this.strict) tagName = tagName.toLowerCase(); + var closeTo = tagName; + while (t --) { + var close = this.tags[t]; + if (close.name != closeTo) { + // fail the first time in strict mode + this.strictFail("Unexpected close tag"); + } else break; + } + + // didn't find it. we already failed for strict, so just abort. + if (t < 0) + { + this.strictFail("Unmatched closing tag: "+this.tagName); + this.textNode += "</" + this.tagName + ">"; + this.state = _State.TEXT; + return; + } + this.tagName = tagName; + var s = this.tags.length; + while (s --> t) + { + var tag = this.tag = this.tags.pop(); + this.tagName = this.tag.name; + this.closetext_if_exist(); + this.handler.onclosetag(this.tagName); + var parent = this.tags[this.tags.length - 1]; + if (this.tagName == 'pre') + { + this.preTags--; + } + } + if (t == 0) + { + this.closedRoot = true; + } + this.tagName = this.attribValue = this.attribName = ""; + this.attribList.length = 0; + this.state = _State.TEXT; + } + + function parseEntity () : string + { + var entity = this.entity; + var entityLC = entity.toLowerCase(); + var num = 0; + var numStr = ""; + if (this.ENTITIES[entity]) + { + return this.ENTITIES[entity]; + } + if (this.ENTITIES[entityLC]) + { + return this.ENTITIES[entityLC]; + } + entity = entityLC; + if (entity.charAt(0) == "#") + { + if (entity.charAt(1) == "x") + { + entity = entity.slice(2); + num = Number.parseInt(entity, 16); + numStr = num.toString(16); + } + else + { + entity = entity.slice(1); + num = Number.parseInt(entity, 10); + numStr = num.toString(10); + } + } + entity = entity.replace(/^0+/, ""); + if (numStr.toLowerCase() != entity) { + this.strictFail("Invalid character entity"); + return "&"+this.entity + ";"; + } + return String.fromCharCode(num); + } +} + +class Char +{ + var whitespace : Map.<boolean>; + var number : Map.<boolean>; + var letter : Map.<boolean>; + var quote : Map.<boolean>; + var entity : Map.<boolean>; + var attribEnd : Map.<boolean>; + var nameStart : RegExp; + var nameBody : RegExp; + var CDATA : string; + var DOCTYPE : string; + var XML_NAMESPACE : string; + + function constructor() + { + // character classes and tokens + var whitespace = "\r\n\t "; + // this really needs to be replaced with character classes. + // XML allows all manner of ridiculous numbers and digits. + var number = "0124356789"; + var letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + // (Letter | "_" | ":") + var quote = "'\""; + var entity = number+letter+"#"; + var attribEnd = whitespace + ">"; + this.CDATA = "[CDATA["; + this.DOCTYPE = "DOCTYPE"; + this.XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"; + + // turn all the string character sets into character class objects. + this.whitespace = this._charClass(whitespace); + this.number = this._charClass(number); + this.letter = this._charClass(letter); + this.quote = this._charClass(quote); + this.entity = this._charClass(entity); + this.attribEnd = this._charClass(attribEnd); + this.nameStart = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD]/; + + this.nameBody = /[:_A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\u00B7\u0300-\u036F\u203F-\u2040\.\d-]/; + } + + function _charClass (str : string) : Map.<boolean> + { + var result = {} : Map.<boolean>; + for (var i = 0; i < str.length; i++) + { + result[str.slice(i, i + 1)] = true; + } + return result; + } + + function is (charclass : RegExp, c : string) : boolean + { + return charclass.test(c); + } + + function is (charclass : Map.<boolean>, c : string) : boolean + { + return charclass.hasOwnProperty(c); + } + + function not (charclass : RegExp, c : string) : boolean { + return !this.is(charclass, c); + } + + function not (charclass : Map.<boolean>, c : string) : boolean { + return !this.is(charclass, c); + } +} + + +class _Entities +{ + static const _entities = { + "amp" : "&", + "gt" : ">", + "lt" : "<", + "quot" : "\"", + "apos" : "'", + "AElig" : 198, + "Aacute" : 193, + "Acirc" : 194, + "Agrave" : 192, + "Aring" : 197, + "Atilde" : 195, + "Auml" : 196, + "Ccedil" : 199, + "ETH" : 208, + "Eacute" : 201, + "Ecirc" : 202, + "Egrave" : 200, + "Euml" : 203, + "Iacute" : 205, + "Icirc" : 206, + "Igrave" : 204, + "Iuml" : 207, + "Ntilde" : 209, + "Oacute" : 211, + "Ocirc" : 212, + "Ograve" : 210, + "Oslash" : 216, + "Otilde" : 213, + "Ouml" : 214, + "THORN" : 222, + "Uacute" : 218, + "Ucirc" : 219, + "Ugrave" : 217, + "Uuml" : 220, + "Yacute" : 221, + "aacute" : 225, + "acirc" : 226, + "aelig" : 230, + "agrave" : 224, + "aring" : 229, + "atilde" : 227, + "auml" : 228, + "ccedil" : 231, + "eacute" : 233, + "ecirc" : 234, + "egrave" : 232, + "eth" : 240, + "euml" : 235, + "iacute" : 237, + "icirc" : 238, + "igrave" : 236, + "iuml" : 239, + "ntilde" : 241, + "oacute" : 243, + "ocirc" : 244, + "ograve" : 242, + "oslash" : 248, + "otilde" : 245, + "ouml" : 246, + "szlig" : 223, + "thorn" : 254, + "uacute" : 250, + "ucirc" : 251, + "ugrave" : 249, + "uuml" : 252, + "yacute" : 253, + "yuml" : 255, + "copy" : 169, + "reg" : 174, + "nbsp" : 160, + "iexcl" : 161, + "cent" : 162, + "pound" : 163, + "curren" : 164, + "yen" : 165, + "brvbar" : 166, + "sect" : 167, + "uml" : 168, + "ordf" : 170, + "laquo" : 171, + "not" : 172, + "shy" : 173, + "macr" : 175, + "deg" : 176, + "plusmn" : 177, + "sup1" : 185, + "sup2" : 178, + "sup3" : 179, + "acute" : 180, + "micro" : 181, + "para" : 182, + "middot" : 183, + "cedil" : 184, + "ordm" : 186, + "raquo" : 187, + "frac14" : 188, + "frac12" : 189, + "frac34" : 190, + "iquest" : 191, + "times" : 215, + "divide" : 247, + "OElig" : 338, + "oelig" : 339, + "Scaron" : 352, + "scaron" : 353, + "Yuml" : 376, + "fnof" : 402, + "circ" : 710, + "tilde" : 732, + "Alpha" : 913, + "Beta" : 914, + "Gamma" : 915, + "Delta" : 916, + "Epsilon" : 917, + "Zeta" : 918, + "Eta" : 919, + "Theta" : 920, + "Iota" : 921, + "Kappa" : 922, + "Lambda" : 923, + "Mu" : 924, + "Nu" : 925, + "Xi" : 926, + "Omicron" : 927, + "Pi" : 928, + "Rho" : 929, + "Sigma" : 931, + "Tau" : 932, + "Upsilon" : 933, + "Phi" : 934, + "Chi" : 935, + "Psi" : 936, + "Omega" : 937, + "alpha" : 945, + "beta" : 946, + "gamma" : 947, + "delta" : 948, + "epsilon" : 949, + "zeta" : 950, + "eta" : 951, + "theta" : 952, + "iota" : 953, + "kappa" : 954, + "lambda" : 955, + "mu" : 956, + "nu" : 957, + "xi" : 958, + "omicron" : 959, + "pi" : 960, + "rho" : 961, + "sigmaf" : 962, + "sigma" : 963, + "tau" : 964, + "upsilon" : 965, + "phi" : 966, + "chi" : 967, + "psi" : 968, + "omega" : 969, + "thetasym" : 977, + "upsih" : 978, + "piv" : 982, + "ensp" : 8194, + "emsp" : 8195, + "thinsp" : 8201, + "zwnj" : 8204, + "zwj" : 8205, + "lrm" : 8206, + "rlm" : 8207, + "ndash" : 8211, + "mdash" : 8212, + "lsquo" : 8216, + "rsquo" : 8217, + "sbquo" : 8218, + "ldquo" : 8220, + "rdquo" : 8221, + "bdquo" : 8222, + "dagger" : 8224, + "Dagger" : 8225, + "bull" : 8226, + "hellip" : 8230, + "permil" : 8240, + "prime" : 8242, + "Prime" : 8243, + "lsaquo" : 8249, + "rsaquo" : 8250, + "oline" : 8254, + "frasl" : 8260, + "euro" : 8364, + "image" : 8465, + "weierp" : 8472, + "real" : 8476, + "trade" : 8482, + "alefsym" : 8501, + "larr" : 8592, + "uarr" : 8593, + "rarr" : 8594, + "darr" : 8595, + "harr" : 8596, + "crarr" : 8629, + "lArr" : 8656, + "uArr" : 8657, + "rArr" : 8658, + "dArr" : 8659, + "hArr" : 8660, + "forall" : 8704, + "part" : 8706, + "exist" : 8707, + "empty" : 8709, + "nabla" : 8711, + "isin" : 8712, + "notin" : 8713, + "ni" : 8715, + "prod" : 8719, + "sum" : 8721, + "minus" : 8722, + "lowast" : 8727, + "radic" : 8730, + "prop" : 8733, + "infin" : 8734, + "ang" : 8736, + "and" : 8743, + "or" : 8744, + "cap" : 8745, + "cup" : 8746, + "int" : 8747, + "there4" : 8756, + "sim" : 8764, + "cong" : 8773, + "asymp" : 8776, + "ne" : 8800, + "equiv" : 8801, + "le" : 8804, + "ge" : 8805, + "sub" : 8834, + "sup" : 8835, + "nsub" : 8836, + "sube" : 8838, + "supe" : 8839, + "oplus" : 8853, + "otimes" : 8855, + "perp" : 8869, + "sdot" : 8901, + "lceil" : 8968, + "rceil" : 8969, + "lfloor" : 8970, + "rfloor" : 8971, + "lang" : 9001, + "rang" : 9002, + "loz" : 9674, + "spades" : 9824, + "clubs" : 9827, + "hearts" : 9829, + "diams" : 9830 + } : Map.<variant>; + + static function entity_list () : Map.<string> + { + var result = {} : Map.<string>; + for (var key in _Entities._entities) + { + var value : variant = _Entities._entities[key]; + if (typeof(value) == 'string') + { + result[key] = value as string; + } + else if (typeof(value) == 'number') + { + result[key] = String.fromCharCode(value as int); + } + } + return result; + } +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/search-result.jsx b/web/server/h2o/libh2o/misc/oktavia/src/search-result.jsx new file mode 100644 index 00000000..fb9721fa --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/search-result.jsx @@ -0,0 +1,287 @@ +import "oktavia.jsx"; + +class Proposal +{ + var omit : int; + var expect : int; + function constructor (omit : int, expect : int) + { + this.omit = omit; + this.expect = expect; + } +} + +class Position +{ + var word : string; + var position : int; + var stemmed : boolean; + function constructor (word : string, position : int, stemmed : boolean) + { + this.word = word; + this.position = position; + this.stemmed = stemmed; + } +} + +class SearchUnit +{ + var positions : Map.<Position>; + var id : int; + var _size : int; + var score : int; + var startPosition : int; + + function constructor (id : int) + { + this.positions = {} : Map.<Position>; + this.id = id; + this._size = 0; + this.score = 0; + this.startPosition = -1; + } + + function addPosition (word : string, position : int, stemmed : boolean) : void + { + var positionObj = this.positions[position as string]; + if (!positionObj) + { + this._size++; + this.positions[position as string] = new Position(word, position, stemmed); + } + else + { + if (positionObj.word.length < word.length) + { + positionObj.word = word; + } + positionObj.stemmed = positionObj.stemmed && stemmed; + } + } + + function get (position : int) : Nullable.<Position> + { + return this.positions[position as string]; + } + + function size () : int + { + return this._size; + } + + function merge (rhs : SearchUnit) : void + { + for (var position in rhs.positions) + { + var pos = rhs.positions[position]; + this.addPosition(pos.word, pos.position, pos.stemmed); + } + } + + function getPositions () : Position[] + { + var result = [] : Position[]; + for (var pos in this.positions) + { + result.push(this.positions[pos]); + } + result.sort((a : Position, b : Position) -> ((a.position - b.position) as number)); + return result; + } +} + +class SingleResult +{ + var units : SearchUnit[]; + var unitIds : int[]; + var or : boolean; + var not : boolean; + var searchWord : string; + + function constructor () + { + this.units = [] : SearchUnit[]; + this.unitIds = [] : int[]; + this.or = false; + this.not = false; + this.searchWord = ''; + } + + function constructor (searchWord : string, or : boolean, not : boolean) + { + this.units = [] : SearchUnit[]; + this.unitIds = [] : int[]; + this.or = or; + this.not = not; + this.searchWord = searchWord; + } + + function getSearchUnit (unitId : int) : SearchUnit + { + var existing = this.unitIds.indexOf(unitId); + var result : SearchUnit; + if (existing == -1) + { + result = new SearchUnit(unitId); + this.units.push(result); + this.unitIds.push(unitId); + } + else + { + result = this.units[existing]; + } + return result; + } + + function merge (rhs : SingleResult) : SingleResult + { + var result = new SingleResult(); + if (rhs.or) + { + this._orMerge(result, rhs); + } + else if (rhs.not) + { + this._notMerge(result, rhs); + } + else + { + this._andMerge(result, rhs); + } + return result; + } + + function size () : int + { + return this.units.length; + } + + function _andMerge (result : SingleResult, rhs : SingleResult) : void + { + for (var i = 0; i < this.unitIds.length; i++) + { + var id = this.unitIds[i]; + if (rhs.unitIds.indexOf(id) != -1) + { + var lhsSection = this.units[i]; + result.unitIds.push(id); + result.units.push(lhsSection); + } + } + } + + function _orMerge (result : SingleResult, rhs : SingleResult) : void + { + result.unitIds = this.unitIds.slice(0, this.unitIds.length); + result.units = this.units.slice(0, this.units.length); + + for (var i = 0; i < rhs.unitIds.length; i++) + { + var id = rhs.unitIds[i]; + var rhsSection = rhs.units[i]; + if (result.unitIds.indexOf(id) != -1) + { + var lhsSection = result.units[result.unitIds.indexOf(id)]; + lhsSection.merge(rhsSection); + } + else + { + result.unitIds.push(id); + result.units.push(rhsSection); + } + } + } + + function _notMerge (result : SingleResult, rhs : SingleResult) : void + { + for (var i = 0; i < this.unitIds.length; i++) + { + var id = this.unitIds[i]; + if (rhs.unitIds.indexOf(id) == -1) + { + var lhsSection = this.units[i]; + result.unitIds.push(id); + result.units.push(lhsSection); + } + } + } +} + +class SearchSummary +{ + var sourceResults : SingleResult[]; + var result : Nullable.<SingleResult>; + var oktavia : Nullable.<Oktavia>; + + function constructor() + { + this.sourceResults = [] : SingleResult[]; + this.result = null; + this.oktavia = null; + } + + function constructor (oktavia : Oktavia) + { + this.sourceResults = [] : SingleResult[]; + this.result = null; + this.oktavia = oktavia; + } + + function addQuery(result : SingleResult) : void + { + this.sourceResults.push(result); + } + + function mergeResult () : void + { + this.result = this.mergeResult(this.sourceResults); + } + + function mergeResult (results : SingleResult[]) : SingleResult + { + var rhs = results[0]; + for (var i = 1; i < results.length; i++) + { + rhs = rhs.merge(results[i]); + } + return rhs; + } + + function getProposal () : Proposal[] + { + var proposals = [] : Proposal[]; + for (var i = 0; i < this.sourceResults.length; i++) + { + var tmpSource = [] : SingleResult[]; + for (var j = 0; j < this.sourceResults.length; j++) + { + if (i != j) + { + tmpSource.push(this.sourceResults[j]); + } + } + var result = this.mergeResult(tmpSource); + proposals.push(new Proposal(i, result.size())); + } + proposals.sort((a : Proposal, b : Proposal) -> ((b.expect - a.expect) as number)); + return proposals; + } + + function getSortedResult () : SearchUnit[] + { + var result = this.result.units.slice(0, this.result.units.length); + result.sort((a : SearchUnit, b : SearchUnit) -> ((b.score - a.score) as number)); + return result; + } + + function size () : int + { + return this.result.size(); + } + + function add (result : SingleResult) : void + { + this.sourceResults.push(result); + } +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/among.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/among.jsx new file mode 100644 index 00000000..f695b94b --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/among.jsx @@ -0,0 +1,34 @@ +import "base-stemmer.jsx"; + +class Among +{ + var s_size : int; /* search string */ + var s : string; /* search string */ + var substring_i : int; /* index to longest matching substring */ + var result : int; /* result of the lookup */ + var instance : Nullable.<BaseStemmer> ; + /* object to invoke method on. It is a BaseStemmer */ + var method : Nullable.<(BaseStemmer) -> boolean>; + /* method to use if substring matches */ + + function constructor (s : string, substring_i : int, result : int) + { + this.s_size = s.length; + this.s = s; + this.substring_i = substring_i; + this.result = result; + this.method = null; + this.instance = null; + } + + function constructor (s : string, substring_i : int, result : int, + method : (BaseStemmer) -> boolean, instance : BaseStemmer) + { + this.s_size = s.length; + this.s = s; + this.substring_i = substring_i; + this.result = result; + this.method = method; + this.instance = instance; + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/base-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/base-stemmer.jsx new file mode 100644 index 00000000..dfc0d6ea --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/base-stemmer.jsx @@ -0,0 +1,419 @@ +import "stemmer.jsx"; +import "among.jsx"; + +class BaseStemmer implements Stemmer +{ + // this.current string + var current : string; + var cursor : int; + var limit : int; + var limit_backward : int; + var bra : int; + var ket : int; + var cache : Map.<string>; + + function constructor () + { + this.cache = {} : Map.<string>; + this.setCurrent(""); + } + + /** + * Set the this.current string. + */ + function setCurrent (value : string) : void + { + this.current = value; + this.cursor = 0; + this.limit = this.current.length; + this.limit_backward = 0; + this.bra = this.cursor; + this.ket = this.limit; + } + + /** + * Get the this.current string. + */ + function getCurrent () : string + { + return this.current; + } + + + function copy_from (other : BaseStemmer) : void + { + this.current = other.current; + this.cursor = other.cursor; + this.limit = other.limit; + this.limit_backward = other.limit_backward; + this.bra = other.bra; + this.ket = other.ket; + } + + function in_grouping (s : int[], min : int, max : int) : boolean + { + if (this.cursor >= this.limit) return false; + var ch = this.current.charCodeAt(this.cursor); + if (ch > max || ch < min) return false; + ch -= min; + if ((s[ch >>> 3] & (0x1 << (ch & 0x7))) == 0) return false; + this.cursor++; + return true; + } + + function in_grouping_b (s : int[], min : int, max : int) : boolean + { + if (this.cursor <= this.limit_backward) return false; + var ch = this.current.charCodeAt(this.cursor - 1); + if (ch > max || ch < min) return false; + ch -= min; + if ((s[ch >>> 3] & (0x1 << (ch & 0x7))) == 0) return false; + this.cursor--; + return true; + } + + function out_grouping (s : int[], min : int, max : int) : boolean + { + if (this.cursor >= this.limit) return false; + var ch = this.current.charCodeAt(this.cursor); + if (ch > max || ch < min) { + this.cursor++; + return true; + } + ch -= min; + if ((s[ch >>> 3] & (0X1 << (ch & 0x7))) == 0) { + this.cursor++; + return true; + } + return false; + } + + function out_grouping_b (s : int[], min : int, max : int) : boolean + { + if (this.cursor <= this.limit_backward) return false; + var ch = this.current.charCodeAt(this.cursor - 1); + if (ch > max || ch < min) { + this.cursor--; + return true; + } + ch -= min; + if ((s[ch >>> 3] & (0x1 << (ch & 0x7))) == 0) { + this.cursor--; + return true; + } + return false; + } + + function in_range (min : int, max : int) : boolean + { + if (this.cursor >= this.limit) return false; + var ch = this.current.charCodeAt(this.cursor); + if (ch > max || ch < min) return false; + this.cursor++; + return true; + } + + function in_range_b (min : int, max : int) : boolean + { + if (this.cursor <= this.limit_backward) return false; + var ch = this.current.charCodeAt(this.cursor - 1); + if (ch > max || ch < min) return false; + this.cursor--; + return true; + } + + function out_range (min : int, max : int) : boolean + { + if (this.cursor >= this.limit) return false; + var ch = this.current.charCodeAt(this.cursor); + if (!(ch > max || ch < min)) return false; + this.cursor++; + return true; + } + + function out_range_b (min : int, max : int) : boolean + { + if (this.cursor <= this.limit_backward) return false; + var ch = this.current.charCodeAt(this.cursor - 1); + if(!(ch > max || ch < min)) return false; + this.cursor--; + return true; + } + + function eq_s (s_size : int, s : string) : boolean + { + if (this.limit - this.cursor < s_size) return false; + if (this.current.slice(this.cursor, this.cursor + s_size) != s) + { + return false; + } + this.cursor += s_size; + return true; + } + + function eq_s_b (s_size : int, s : string) : boolean + { + if (this.cursor - this.limit_backward < s_size) return false; + if (this.current.slice(this.cursor - s_size, this.cursor) != s) + { + return false; + } + this.cursor -= s_size; + return true; + } + + function eq_v (s : string) : boolean + { + return this.eq_s(s.length, s); + } + + function eq_v_b (s : string) : boolean + { + return this.eq_s_b(s.length, s); + } + + function find_among (v : Among[], v_size : int) : int + { + var i = 0; + var j = v_size; + + var c = this.cursor; + var l = this.limit; + + var common_i = 0; + var common_j = 0; + + var first_key_inspected = false; + + while (true) + { + var k = i + ((j - i) >>> 1); + var diff = 0; + var common = common_i < common_j ? common_i : common_j; // smaller + var w = v[k]; + var i2; + for (i2 = common; i2 < w.s_size; i2++) + { + if (c + common == l) + { + diff = -1; + break; + } + diff = this.current.charCodeAt(c + common) - w.s.charCodeAt(i2); + if (diff != 0) break; + common++; + } + if (diff < 0) + { + j = k; + common_j = common; + } + else + { + i = k; + common_i = common; + } + if (j - i <= 1) + { + if (i > 0) break; // v->s has been inspected + if (j == i) break; // only one item in v + + // - but now we need to go round once more to get + // v->s inspected. This looks messy, but is actually + // the optimal approach. + + if (first_key_inspected) break; + first_key_inspected = true; + } + } + while (true) + { + var w = v[i]; + if (common_i >= w.s_size) + { + this.cursor = c + w.s_size; + if (w.method == null) + { + return w.result; + } + var res = w.method(w.instance); + this.cursor = c + w.s_size; + if (res) + { + return w.result; + } + } + i = w.substring_i; + if (i < 0) return 0; + } + return -1; // not reachable + } + + // find_among_b is for backwards processing. Same comments apply + function find_among_b (v : Among[], v_size : int) : int + { + var i = 0; + var j = v_size; + + var c = this.cursor; + var lb = this.limit_backward; + + var common_i = 0; + var common_j = 0; + + var first_key_inspected = false; + + while (true) + { + var k = i + ((j - i) >> 1); + var diff = 0; + var common = common_i < common_j ? common_i : common_j; + var w = v[k]; + var i2; + for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) + { + if (c - common == lb) + { + diff = -1; + break; + } + diff = this.current.charCodeAt(c - 1 - common) - w.s.charCodeAt(i2); + if (diff != 0) break; + common++; + } + if (diff < 0) + { + j = k; + common_j = common; + } + else + { + i = k; + common_i = common; + } + if (j - i <= 1) + { + if (i > 0) break; + if (j == i) break; + if (first_key_inspected) break; + first_key_inspected = true; + } + } + while (true) + { + var w = v[i]; + if (common_i >= w.s_size) + { + this.cursor = c - w.s_size; + if (w.method == null) return w.result; + var res = w.method(this); + this.cursor = c - w.s_size; + if (res) return w.result; + } + i = w.substring_i; + if (i < 0) return 0; + } + return -1; // not reachable + } + + /* to replace chars between c_bra and c_ket in this.current by the + * chars in s. + */ + function replace_s (c_bra : int, c_ket : int, s : string) : int + { + var adjustment = s.length - (c_ket - c_bra); + this.current = this.current.slice(0, c_bra) + s + this.current.slice(c_ket); + this.limit += adjustment; + if (this.cursor >= c_ket) this.cursor += adjustment; + else if (this.cursor > c_bra) this.cursor = c_bra; + return adjustment; + } + + function slice_check () : boolean + { + if (this.bra < 0 || + this.bra > this.ket || + this.ket > this.limit || + this.limit > this.current.length) + { + return false; + } + return true; + } + + function slice_from (s : string) : boolean + { + var result = false; + if (this.slice_check()) + { + this.replace_s(this.bra, this.ket, s); + result = true; + } + return result; + } + + function slice_del () : boolean + { + return this.slice_from(""); + } + + function insert (c_bra : int, c_ket : int, s : string) : void + { + var adjustment = this.replace_s(c_bra, c_ket, s); + if (c_bra <= this.bra) this.bra += adjustment; + if (c_bra <= this.ket) this.ket += adjustment; + } + + /* Copy the slice into the supplied StringBuffer */ + function slice_to (s : string) : string + { + var result = ''; + if (this.slice_check()) + { + result = this.current.slice(this.bra, this.ket); + } + return result; + } + + function assign_to (s : string) : string + { + return this.current.slice(0, this.limit); + } + + function stem () : boolean + { + return false; + } + + override function stemWord (word : string) : string + { + var result = this.cache['.' + word]; + if (result == null) + { + this.setCurrent(word); + this.stem(); + result = this.getCurrent(); + this.cache['.' + word] = result; + } + return result; + } + + override function stemWords (words : string[]) : string[] + { + var results = [] : string[]; + for (var i = 0; i < words.length; i++) + { + var word = words[i]; + var result = this.cache['.' + word]; + if (result == null) + { + this.setCurrent(word); + this.stem(); + result = this.getCurrent(); + this.cache['.' + word] = result; + } + results.push(result); + } + return results; + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/danish-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/danish-stemmer.jsx new file mode 100644 index 00000000..ffebd881 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/danish-stemmer.jsx @@ -0,0 +1,507 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class DanishStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new DanishStemmer(); + + static const a_0 = [ + new Among("hed", -1, 1), + new Among("ethed", 0, 1), + new Among("ered", -1, 1), + new Among("e", -1, 1), + new Among("erede", 3, 1), + new Among("ende", 3, 1), + new Among("erende", 5, 1), + new Among("ene", 3, 1), + new Among("erne", 3, 1), + new Among("ere", 3, 1), + new Among("en", -1, 1), + new Among("heden", 10, 1), + new Among("eren", 10, 1), + new Among("er", -1, 1), + new Among("heder", 13, 1), + new Among("erer", 13, 1), + new Among("s", -1, 2), + new Among("heds", 16, 1), + new Among("es", 16, 1), + new Among("endes", 18, 1), + new Among("erendes", 19, 1), + new Among("enes", 18, 1), + new Among("ernes", 18, 1), + new Among("eres", 18, 1), + new Among("ens", 16, 1), + new Among("hedens", 24, 1), + new Among("erens", 24, 1), + new Among("ers", 16, 1), + new Among("ets", 16, 1), + new Among("erets", 28, 1), + new Among("et", -1, 1), + new Among("eret", 30, 1) + ]; + + static const a_1 = [ + new Among("gd", -1, -1), + new Among("dt", -1, -1), + new Among("gt", -1, -1), + new Among("kt", -1, -1) + ]; + + static const a_2 = [ + new Among("ig", -1, 1), + new Among("lig", 0, 1), + new Among("elig", 1, 1), + new Among("els", -1, 1), + new Among("l\u00F8st", -1, 2) + ]; + + static const g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128] : int[]; + + static const g_s_ending = [239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16] : int[]; + + var I_x : int = 0; + var I_p1 : int = 0; + var S_ch : string = ""; + + function copy_from (other : DanishStemmer) : void + { + this.I_x = other.I_x; + this.I_p1 = other.I_p1; + this.S_ch = other.S_ch; + super.copy_from(other); + } + + function r_mark_regions () : boolean + { + var v_1 : int; + var v_2 : int; + // (, line 29 + this.I_p1 = this.limit; + // test, line 33 + v_1 = this.cursor; + // (, line 33 + // hop, line 33 + { + var c : int = this.cursor + 3; + if (0 > c || c > this.limit) + { + return false; + } + this.cursor = c; + } + // setmark x, line 33 + this.I_x = this.cursor; + this.cursor = v_1; + // goto, line 34 + golab0: while(true) + { + v_2 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + if (!(this.in_grouping(DanishStemmer.g_v, 97, 248))) + { + break lab1; + } + this.cursor = v_2; + break golab0; + } + this.cursor = v_2; + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // gopast, line 34 + golab2: while(true) + { + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + if (!(this.out_grouping(DanishStemmer.g_v, 97, 248))) + { + break lab3; + } + break golab2; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // setmark p1, line 34 + this.I_p1 = this.cursor; + // try, line 35 + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 35 + if (!(this.I_p1 < this.I_x)) + { + break lab4; + } + this.I_p1 = this.I_x; + } + return true; + } + + function r_main_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + // (, line 40 + // setlimit, line 41 + v_1 = this.limit - this.cursor; + // tomark, line 41 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 41 + // [, line 41 + this.ket = this.cursor; + // substring, line 41 + among_var = this.find_among_b(DanishStemmer.a_0, 32); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 41 + this.bra = this.cursor; + this.limit_backward = v_2; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 48 + // delete, line 48 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 50 + if (!(this.in_grouping_b(DanishStemmer.g_s_ending, 97, 229))) + { + return false; + } + // delete, line 50 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_consonant_pair () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + // (, line 54 + // test, line 55 + v_1 = this.limit - this.cursor; + // (, line 55 + // setlimit, line 56 + v_2 = this.limit - this.cursor; + // tomark, line 56 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_3 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_2; + // (, line 56 + // [, line 56 + this.ket = this.cursor; + // substring, line 56 + if (this.find_among_b(DanishStemmer.a_1, 4) == 0) + { + this.limit_backward = v_3; + return false; + } + // ], line 56 + this.bra = this.cursor; + this.limit_backward = v_3; + this.cursor = this.limit - v_1; + // next, line 62 + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + // ], line 62 + this.bra = this.cursor; + // delete, line 62 + if (!this.slice_del()) + { + return false; + } + return true; + } + + function r_other_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + // (, line 65 + // do, line 66 + v_1 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 66 + // [, line 66 + this.ket = this.cursor; + // literal, line 66 + if (!(this.eq_s_b(2, "st"))) + { + break lab0; + } + // ], line 66 + this.bra = this.cursor; + // literal, line 66 + if (!(this.eq_s_b(2, "ig"))) + { + break lab0; + } + // delete, line 66 + if (!this.slice_del()) + { + return false; + } + } + this.cursor = this.limit - v_1; + // setlimit, line 67 + v_2 = this.limit - this.cursor; + // tomark, line 67 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_3 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_2; + // (, line 67 + // [, line 67 + this.ket = this.cursor; + // substring, line 67 + among_var = this.find_among_b(DanishStemmer.a_2, 5); + if (among_var == 0) + { + this.limit_backward = v_3; + return false; + } + // ], line 67 + this.bra = this.cursor; + this.limit_backward = v_3; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 70 + // delete, line 70 + if (!this.slice_del()) + { + return false; + } + // do, line 70 + v_4 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call consonant_pair, line 70 + if (!this.r_consonant_pair()) + { + break lab1; + } + } + this.cursor = this.limit - v_4; + break; + case 2: + // (, line 72 + // <-, line 72 + if (!this.slice_from("l\u00F8s")) + { + return false; + } + break; + } + return true; + } + + function r_undouble () : boolean + { + var v_1 : int; + var v_2 : int; + // (, line 75 + // setlimit, line 76 + v_1 = this.limit - this.cursor; + // tomark, line 76 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 76 + // [, line 76 + this.ket = this.cursor; + if (!(this.out_grouping_b(DanishStemmer.g_v, 97, 248))) + { + this.limit_backward = v_2; + return false; + } + // ], line 76 + this.bra = this.cursor; + // -> ch, line 76 + this.S_ch = this.slice_to(this.S_ch); + if (this.S_ch == '') + { + return false; + } + this.limit_backward = v_2; + // name ch, line 77 + if (!(this.eq_v_b(this.S_ch))) + { + return false; + } + // delete, line 78 + if (!this.slice_del()) + { + return false; + } + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + // (, line 82 + // do, line 84 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // call mark_regions, line 84 + if (!this.r_mark_regions()) + { + break lab0; + } + } + this.cursor = v_1; + // backwards, line 85 + this.limit_backward = this.cursor; this.cursor = this.limit; + // (, line 85 + // do, line 86 + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call main_suffix, line 86 + if (!this.r_main_suffix()) + { + break lab1; + } + } + this.cursor = this.limit - v_2; + // do, line 87 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // call consonant_pair, line 87 + if (!this.r_consonant_pair()) + { + break lab2; + } + } + this.cursor = this.limit - v_3; + // do, line 88 + v_4 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // call other_suffix, line 88 + if (!this.r_other_suffix()) + { + break lab3; + } + } + this.cursor = this.limit - v_4; + // do, line 89 + v_5 = this.limit - this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // call undouble, line 89 + if (!this.r_undouble()) + { + break lab4; + } + } + this.cursor = this.limit - v_5; + this.cursor = this.limit_backward; return true; + } + + function equals (o : variant) : boolean { + return o instanceof DanishStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "DanishStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/dutch-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/dutch-stemmer.jsx new file mode 100644 index 00000000..a8c45d2f --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/dutch-stemmer.jsx @@ -0,0 +1,1020 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class DutchStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new DutchStemmer(); + + static const a_0 = [ + new Among("", -1, 6), + new Among("\u00E1", 0, 1), + new Among("\u00E4", 0, 1), + new Among("\u00E9", 0, 2), + new Among("\u00EB", 0, 2), + new Among("\u00ED", 0, 3), + new Among("\u00EF", 0, 3), + new Among("\u00F3", 0, 4), + new Among("\u00F6", 0, 4), + new Among("\u00FA", 0, 5), + new Among("\u00FC", 0, 5) + ]; + + static const a_1 = [ + new Among("", -1, 3), + new Among("I", 0, 2), + new Among("Y", 0, 1) + ]; + + static const a_2 = [ + new Among("dd", -1, -1), + new Among("kk", -1, -1), + new Among("tt", -1, -1) + ]; + + static const a_3 = [ + new Among("ene", -1, 2), + new Among("se", -1, 3), + new Among("en", -1, 2), + new Among("heden", 2, 1), + new Among("s", -1, 3) + ]; + + static const a_4 = [ + new Among("end", -1, 1), + new Among("ig", -1, 2), + new Among("ing", -1, 1), + new Among("lijk", -1, 3), + new Among("baar", -1, 4), + new Among("bar", -1, 5) + ]; + + static const a_5 = [ + new Among("aa", -1, -1), + new Among("ee", -1, -1), + new Among("oo", -1, -1), + new Among("uu", -1, -1) + ]; + + static const g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128] : int[]; + + static const g_v_I = [1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128] : int[]; + + static const g_v_j = [17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128] : int[]; + + var I_p2 : int = 0; + var I_p1 : int = 0; + var B_e_found : boolean = false; + + function copy_from (other : DutchStemmer) : void + { + this.I_p2 = other.I_p2; + this.I_p1 = other.I_p1; + this.B_e_found = other.B_e_found; + super.copy_from(other); + } + + function r_prelude () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + // (, line 41 + // test, line 42 + v_1 = this.cursor; + // repeat, line 42 + replab0: while(true) + { + v_2 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 42 + // [, line 43 + this.bra = this.cursor; + // substring, line 43 + among_var = this.find_among(DutchStemmer.a_0, 11); + if (among_var == 0) + { + break lab1; + } + // ], line 43 + this.ket = this.cursor; + switch (among_var) { + case 0: + break lab1; + case 1: + // (, line 45 + // <-, line 45 + if (!this.slice_from("a")) + { + return false; + } + break; + case 2: + // (, line 47 + // <-, line 47 + if (!this.slice_from("e")) + { + return false; + } + break; + case 3: + // (, line 49 + // <-, line 49 + if (!this.slice_from("i")) + { + return false; + } + break; + case 4: + // (, line 51 + // <-, line 51 + if (!this.slice_from("o")) + { + return false; + } + break; + case 5: + // (, line 53 + // <-, line 53 + if (!this.slice_from("u")) + { + return false; + } + break; + case 6: + // (, line 54 + // next, line 54 + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + break; + } + continue replab0; + } + this.cursor = v_2; + break replab0; + } + this.cursor = v_1; + // try, line 57 + v_3 = this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 57 + // [, line 57 + this.bra = this.cursor; + // literal, line 57 + if (!(this.eq_s(1, "y"))) + { + this.cursor = v_3; + break lab2; + } + // ], line 57 + this.ket = this.cursor; + // <-, line 57 + if (!this.slice_from("Y")) + { + return false; + } + } + // repeat, line 58 + replab3: while(true) + { + v_4 = this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // goto, line 58 + golab5: while(true) + { + v_5 = this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // (, line 58 + if (!(this.in_grouping(DutchStemmer.g_v, 97, 232))) + { + break lab6; + } + // [, line 59 + this.bra = this.cursor; + // or, line 59 + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + v_6 = this.cursor; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + // (, line 59 + // literal, line 59 + if (!(this.eq_s(1, "i"))) + { + break lab8; + } + // ], line 59 + this.ket = this.cursor; + if (!(this.in_grouping(DutchStemmer.g_v, 97, 232))) + { + break lab8; + } + // <-, line 59 + if (!this.slice_from("I")) + { + return false; + } + break lab7; + } + this.cursor = v_6; + // (, line 60 + // literal, line 60 + if (!(this.eq_s(1, "y"))) + { + break lab6; + } + // ], line 60 + this.ket = this.cursor; + // <-, line 60 + if (!this.slice_from("Y")) + { + return false; + } + } + this.cursor = v_5; + break golab5; + } + this.cursor = v_5; + if (this.cursor >= this.limit) + { + break lab4; + } + this.cursor++; + } + continue replab3; + } + this.cursor = v_4; + break replab3; + } + return true; + } + + function r_mark_regions () : boolean + { + // (, line 64 + this.I_p1 = this.limit; + this.I_p2 = this.limit; + // gopast, line 69 + golab0: while(true) + { + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + if (!(this.in_grouping(DutchStemmer.g_v, 97, 232))) + { + break lab1; + } + break golab0; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // gopast, line 69 + golab2: while(true) + { + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + if (!(this.out_grouping(DutchStemmer.g_v, 97, 232))) + { + break lab3; + } + break golab2; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // setmark p1, line 69 + this.I_p1 = this.cursor; + // try, line 70 + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 70 + if (!(this.I_p1 < 3)) + { + break lab4; + } + this.I_p1 = 3; + } + // gopast, line 71 + golab5: while(true) + { + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + if (!(this.in_grouping(DutchStemmer.g_v, 97, 232))) + { + break lab6; + } + break golab5; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // gopast, line 71 + golab7: while(true) + { + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + if (!(this.out_grouping(DutchStemmer.g_v, 97, 232))) + { + break lab8; + } + break golab7; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // setmark p2, line 71 + this.I_p2 = this.cursor; + return true; + } + + function r_postlude () : boolean + { + var among_var : int; + var v_1 : int; + // repeat, line 75 + replab0: while(true) + { + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 75 + // [, line 77 + this.bra = this.cursor; + // substring, line 77 + among_var = this.find_among(DutchStemmer.a_1, 3); + if (among_var == 0) + { + break lab1; + } + // ], line 77 + this.ket = this.cursor; + switch (among_var) { + case 0: + break lab1; + case 1: + // (, line 78 + // <-, line 78 + if (!this.slice_from("y")) + { + return false; + } + break; + case 2: + // (, line 79 + // <-, line 79 + if (!this.slice_from("i")) + { + return false; + } + break; + case 3: + // (, line 80 + // next, line 80 + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + break; + } + continue replab0; + } + this.cursor = v_1; + break replab0; + } + return true; + } + + function r_R1 () : boolean + { + if (!(this.I_p1 <= this.cursor)) + { + return false; + } + return true; + } + + function r_R2 () : boolean + { + if (!(this.I_p2 <= this.cursor)) + { + return false; + } + return true; + } + + function r_undouble () : boolean + { + var v_1 : int; + // (, line 90 + // test, line 91 + v_1 = this.limit - this.cursor; + // among, line 91 + if (this.find_among_b(DutchStemmer.a_2, 3) == 0) + { + return false; + } + this.cursor = this.limit - v_1; + // [, line 91 + this.ket = this.cursor; + // next, line 91 + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + // ], line 91 + this.bra = this.cursor; + // delete, line 91 + if (!this.slice_del()) + { + return false; + } + return true; + } + + function r_e_ending () : boolean + { + var v_1 : int; + // (, line 94 + // unset e_found, line 95 + this.B_e_found = false; + // [, line 96 + this.ket = this.cursor; + // literal, line 96 + if (!(this.eq_s_b(1, "e"))) + { + return false; + } + // ], line 96 + this.bra = this.cursor; + // call R1, line 96 + if (!this.r_R1()) + { + return false; + } + // test, line 96 + v_1 = this.limit - this.cursor; + if (!(this.out_grouping_b(DutchStemmer.g_v, 97, 232))) + { + return false; + } + this.cursor = this.limit - v_1; + // delete, line 96 + if (!this.slice_del()) + { + return false; + } + // set e_found, line 97 + this.B_e_found = true; + // call undouble, line 98 + if (!this.r_undouble()) + { + return false; + } + return true; + } + + function r_en_ending () : boolean + { + var v_1 : int; + var v_2 : int; + // (, line 101 + // call R1, line 102 + if (!this.r_R1()) + { + return false; + } + // and, line 102 + v_1 = this.limit - this.cursor; + if (!(this.out_grouping_b(DutchStemmer.g_v, 97, 232))) + { + return false; + } + this.cursor = this.limit - v_1; + // not, line 102 + { + v_2 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // literal, line 102 + if (!(this.eq_s_b(3, "gem"))) + { + break lab0; + } + return false; + } + this.cursor = this.limit - v_2; + } + // delete, line 102 + if (!this.slice_del()) + { + return false; + } + // call undouble, line 103 + if (!this.r_undouble()) + { + return false; + } + return true; + } + + function r_standard_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + var v_10 : int; + // (, line 106 + // do, line 107 + v_1 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 107 + // [, line 108 + this.ket = this.cursor; + // substring, line 108 + among_var = this.find_among_b(DutchStemmer.a_3, 5); + if (among_var == 0) + { + break lab0; + } + // ], line 108 + this.bra = this.cursor; + switch (among_var) { + case 0: + break lab0; + case 1: + // (, line 110 + // call R1, line 110 + if (!this.r_R1()) + { + break lab0; + } + // <-, line 110 + if (!this.slice_from("heid")) + { + return false; + } + break; + case 2: + // (, line 113 + // call en_ending, line 113 + if (!this.r_en_ending()) + { + break lab0; + } + break; + case 3: + // (, line 116 + // call R1, line 116 + if (!this.r_R1()) + { + break lab0; + } + if (!(this.out_grouping_b(DutchStemmer.g_v_j, 97, 232))) + { + break lab0; + } + // delete, line 116 + if (!this.slice_del()) + { + return false; + } + break; + } + } + this.cursor = this.limit - v_1; + // do, line 120 + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call e_ending, line 120 + if (!this.r_e_ending()) + { + break lab1; + } + } + this.cursor = this.limit - v_2; + // do, line 122 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 122 + // [, line 122 + this.ket = this.cursor; + // literal, line 122 + if (!(this.eq_s_b(4, "heid"))) + { + break lab2; + } + // ], line 122 + this.bra = this.cursor; + // call R2, line 122 + if (!this.r_R2()) + { + break lab2; + } + // not, line 122 + { + v_4 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // literal, line 122 + if (!(this.eq_s_b(1, "c"))) + { + break lab3; + } + break lab2; + } + this.cursor = this.limit - v_4; + } + // delete, line 122 + if (!this.slice_del()) + { + return false; + } + // [, line 123 + this.ket = this.cursor; + // literal, line 123 + if (!(this.eq_s_b(2, "en"))) + { + break lab2; + } + // ], line 123 + this.bra = this.cursor; + // call en_ending, line 123 + if (!this.r_en_ending()) + { + break lab2; + } + } + this.cursor = this.limit - v_3; + // do, line 126 + v_5 = this.limit - this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 126 + // [, line 127 + this.ket = this.cursor; + // substring, line 127 + among_var = this.find_among_b(DutchStemmer.a_4, 6); + if (among_var == 0) + { + break lab4; + } + // ], line 127 + this.bra = this.cursor; + switch (among_var) { + case 0: + break lab4; + case 1: + // (, line 129 + // call R2, line 129 + if (!this.r_R2()) + { + break lab4; + } + // delete, line 129 + if (!this.slice_del()) + { + return false; + } + // or, line 130 + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + v_6 = this.limit - this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // (, line 130 + // [, line 130 + this.ket = this.cursor; + // literal, line 130 + if (!(this.eq_s_b(2, "ig"))) + { + break lab6; + } + // ], line 130 + this.bra = this.cursor; + // call R2, line 130 + if (!this.r_R2()) + { + break lab6; + } + // not, line 130 + { + v_7 = this.limit - this.cursor; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // literal, line 130 + if (!(this.eq_s_b(1, "e"))) + { + break lab7; + } + break lab6; + } + this.cursor = this.limit - v_7; + } + // delete, line 130 + if (!this.slice_del()) + { + return false; + } + break lab5; + } + this.cursor = this.limit - v_6; + // call undouble, line 130 + if (!this.r_undouble()) + { + break lab4; + } + } + break; + case 2: + // (, line 133 + // call R2, line 133 + if (!this.r_R2()) + { + break lab4; + } + // not, line 133 + { + v_8 = this.limit - this.cursor; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + // literal, line 133 + if (!(this.eq_s_b(1, "e"))) + { + break lab8; + } + break lab4; + } + this.cursor = this.limit - v_8; + } + // delete, line 133 + if (!this.slice_del()) + { + return false; + } + break; + case 3: + // (, line 136 + // call R2, line 136 + if (!this.r_R2()) + { + break lab4; + } + // delete, line 136 + if (!this.slice_del()) + { + return false; + } + // call e_ending, line 136 + if (!this.r_e_ending()) + { + break lab4; + } + break; + case 4: + // (, line 139 + // call R2, line 139 + if (!this.r_R2()) + { + break lab4; + } + // delete, line 139 + if (!this.slice_del()) + { + return false; + } + break; + case 5: + // (, line 142 + // call R2, line 142 + if (!this.r_R2()) + { + break lab4; + } + // Boolean test e_found, line 142 + if (!(this.B_e_found)) + { + break lab4; + } + // delete, line 142 + if (!this.slice_del()) + { + return false; + } + break; + } + } + this.cursor = this.limit - v_5; + // do, line 146 + v_9 = this.limit - this.cursor; + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + // (, line 146 + if (!(this.out_grouping_b(DutchStemmer.g_v_I, 73, 232))) + { + break lab9; + } + // test, line 148 + v_10 = this.limit - this.cursor; + // (, line 148 + // among, line 149 + if (this.find_among_b(DutchStemmer.a_5, 4) == 0) + { + break lab9; + } + if (!(this.out_grouping_b(DutchStemmer.g_v, 97, 232))) + { + break lab9; + } + this.cursor = this.limit - v_10; + // [, line 152 + this.ket = this.cursor; + // next, line 152 + if (this.cursor <= this.limit_backward) + { + break lab9; + } + this.cursor--; + // ], line 152 + this.bra = this.cursor; + // delete, line 152 + if (!this.slice_del()) + { + return false; + } + } + this.cursor = this.limit - v_9; + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + // (, line 157 + // do, line 159 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // call prelude, line 159 + if (!this.r_prelude()) + { + break lab0; + } + } + this.cursor = v_1; + // do, line 160 + v_2 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call mark_regions, line 160 + if (!this.r_mark_regions()) + { + break lab1; + } + } + this.cursor = v_2; + // backwards, line 161 + this.limit_backward = this.cursor; this.cursor = this.limit; + // do, line 162 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // call standard_suffix, line 162 + if (!this.r_standard_suffix()) + { + break lab2; + } + } + this.cursor = this.limit - v_3; + this.cursor = this.limit_backward; // do, line 163 + v_4 = this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // call postlude, line 163 + if (!this.r_postlude()) + { + break lab3; + } + } + this.cursor = v_4; + return true; + } + + function equals (o : variant) : boolean { + return o instanceof DutchStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "DutchStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/english-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/english-stemmer.jsx new file mode 100644 index 00000000..901c12c8 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/english-stemmer.jsx @@ -0,0 +1,1638 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class EnglishStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new EnglishStemmer(); + + static const a_0 = [ + new Among("arsen", -1, -1), + new Among("commun", -1, -1), + new Among("gener", -1, -1) + ]; + + static const a_1 = [ + new Among("'", -1, 1), + new Among("'s'", 0, 1), + new Among("'s", -1, 1) + ]; + + static const a_2 = [ + new Among("ied", -1, 2), + new Among("s", -1, 3), + new Among("ies", 1, 2), + new Among("sses", 1, 1), + new Among("ss", 1, -1), + new Among("us", 1, -1) + ]; + + static const a_3 = [ + new Among("", -1, 3), + new Among("bb", 0, 2), + new Among("dd", 0, 2), + new Among("ff", 0, 2), + new Among("gg", 0, 2), + new Among("bl", 0, 1), + new Among("mm", 0, 2), + new Among("nn", 0, 2), + new Among("pp", 0, 2), + new Among("rr", 0, 2), + new Among("at", 0, 1), + new Among("tt", 0, 2), + new Among("iz", 0, 1) + ]; + + static const a_4 = [ + new Among("ed", -1, 2), + new Among("eed", 0, 1), + new Among("ing", -1, 2), + new Among("edly", -1, 2), + new Among("eedly", 3, 1), + new Among("ingly", -1, 2) + ]; + + static const a_5 = [ + new Among("anci", -1, 3), + new Among("enci", -1, 2), + new Among("ogi", -1, 13), + new Among("li", -1, 16), + new Among("bli", 3, 12), + new Among("abli", 4, 4), + new Among("alli", 3, 8), + new Among("fulli", 3, 14), + new Among("lessli", 3, 15), + new Among("ousli", 3, 10), + new Among("entli", 3, 5), + new Among("aliti", -1, 8), + new Among("biliti", -1, 12), + new Among("iviti", -1, 11), + new Among("tional", -1, 1), + new Among("ational", 14, 7), + new Among("alism", -1, 8), + new Among("ation", -1, 7), + new Among("ization", 17, 6), + new Among("izer", -1, 6), + new Among("ator", -1, 7), + new Among("iveness", -1, 11), + new Among("fulness", -1, 9), + new Among("ousness", -1, 10) + ]; + + static const a_6 = [ + new Among("icate", -1, 4), + new Among("ative", -1, 6), + new Among("alize", -1, 3), + new Among("iciti", -1, 4), + new Among("ical", -1, 4), + new Among("tional", -1, 1), + new Among("ational", 5, 2), + new Among("ful", -1, 5), + new Among("ness", -1, 5) + ]; + + static const a_7 = [ + new Among("ic", -1, 1), + new Among("ance", -1, 1), + new Among("ence", -1, 1), + new Among("able", -1, 1), + new Among("ible", -1, 1), + new Among("ate", -1, 1), + new Among("ive", -1, 1), + new Among("ize", -1, 1), + new Among("iti", -1, 1), + new Among("al", -1, 1), + new Among("ism", -1, 1), + new Among("ion", -1, 2), + new Among("er", -1, 1), + new Among("ous", -1, 1), + new Among("ant", -1, 1), + new Among("ent", -1, 1), + new Among("ment", 15, 1), + new Among("ement", 16, 1) + ]; + + static const a_8 = [ + new Among("e", -1, 1), + new Among("l", -1, 2) + ]; + + static const a_9 = [ + new Among("succeed", -1, -1), + new Among("proceed", -1, -1), + new Among("exceed", -1, -1), + new Among("canning", -1, -1), + new Among("inning", -1, -1), + new Among("earring", -1, -1), + new Among("herring", -1, -1), + new Among("outing", -1, -1) + ]; + + static const a_10 = [ + new Among("andes", -1, -1), + new Among("atlas", -1, -1), + new Among("bias", -1, -1), + new Among("cosmos", -1, -1), + new Among("dying", -1, 3), + new Among("early", -1, 9), + new Among("gently", -1, 7), + new Among("howe", -1, -1), + new Among("idly", -1, 6), + new Among("lying", -1, 4), + new Among("news", -1, -1), + new Among("only", -1, 10), + new Among("singly", -1, 11), + new Among("skies", -1, 2), + new Among("skis", -1, 1), + new Among("sky", -1, -1), + new Among("tying", -1, 5), + new Among("ugly", -1, 8) + ]; + + static const g_v = [17, 65, 16, 1] : int[]; + + static const g_v_WXY = [1, 17, 65, 208, 1] : int[]; + + static const g_valid_LI = [55, 141, 2] : int[]; + + var B_Y_found : boolean = false; + var I_p2 : int = 0; + var I_p1 : int = 0; + + function copy_from (other : EnglishStemmer) : void + { + this.B_Y_found = other.B_Y_found; + this.I_p2 = other.I_p2; + this.I_p1 = other.I_p1; + super.copy_from(other); + } + + function r_prelude () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + // (, line 25 + // unset Y_found, line 26 + this.B_Y_found = false; + // do, line 27 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 27 + // [, line 27 + this.bra = this.cursor; + // literal, line 27 + if (!(this.eq_s(1, "'"))) + { + break lab0; + } + // ], line 27 + this.ket = this.cursor; + // delete, line 27 + if (!this.slice_del()) + { + return false; + } + } + this.cursor = v_1; + // do, line 28 + v_2 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 28 + // [, line 28 + this.bra = this.cursor; + // literal, line 28 + if (!(this.eq_s(1, "y"))) + { + break lab1; + } + // ], line 28 + this.ket = this.cursor; + // <-, line 28 + if (!this.slice_from("Y")) + { + return false; + } + // set Y_found, line 28 + this.B_Y_found = true; + } + this.cursor = v_2; + // do, line 29 + v_3 = this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // repeat, line 29 + replab3: while(true) + { + v_4 = this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 29 + // goto, line 29 + golab5: while(true) + { + v_5 = this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // (, line 29 + if (!(this.in_grouping(EnglishStemmer.g_v, 97, 121))) + { + break lab6; + } + // [, line 29 + this.bra = this.cursor; + // literal, line 29 + if (!(this.eq_s(1, "y"))) + { + break lab6; + } + // ], line 29 + this.ket = this.cursor; + this.cursor = v_5; + break golab5; + } + this.cursor = v_5; + if (this.cursor >= this.limit) + { + break lab4; + } + this.cursor++; + } + // <-, line 29 + if (!this.slice_from("Y")) + { + return false; + } + // set Y_found, line 29 + this.B_Y_found = true; + continue replab3; + } + this.cursor = v_4; + break replab3; + } + } + this.cursor = v_3; + return true; + } + + function r_mark_regions () : boolean + { + var v_1 : int; + var v_2 : int; + // (, line 32 + this.I_p1 = this.limit; + this.I_p2 = this.limit; + // do, line 35 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 35 + // or, line 41 + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + v_2 = this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // among, line 36 + if (this.find_among(EnglishStemmer.a_0, 3) == 0) + { + break lab2; + } + break lab1; + } + this.cursor = v_2; + // (, line 41 + // gopast, line 41 + golab3: while(true) + { + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + if (!(this.in_grouping(EnglishStemmer.g_v, 97, 121))) + { + break lab4; + } + break golab3; + } + if (this.cursor >= this.limit) + { + break lab0; + } + this.cursor++; + } + // gopast, line 41 + golab5: while(true) + { + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + if (!(this.out_grouping(EnglishStemmer.g_v, 97, 121))) + { + break lab6; + } + break golab5; + } + if (this.cursor >= this.limit) + { + break lab0; + } + this.cursor++; + } + } + // setmark p1, line 42 + this.I_p1 = this.cursor; + // gopast, line 43 + golab7: while(true) + { + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + if (!(this.in_grouping(EnglishStemmer.g_v, 97, 121))) + { + break lab8; + } + break golab7; + } + if (this.cursor >= this.limit) + { + break lab0; + } + this.cursor++; + } + // gopast, line 43 + golab9: while(true) + { + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + if (!(this.out_grouping(EnglishStemmer.g_v, 97, 121))) + { + break lab10; + } + break golab9; + } + if (this.cursor >= this.limit) + { + break lab0; + } + this.cursor++; + } + // setmark p2, line 43 + this.I_p2 = this.cursor; + } + this.cursor = v_1; + return true; + } + + function r_shortv () : boolean + { + var v_1 : int; + // (, line 49 + // or, line 51 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 50 + if (!(this.out_grouping_b(EnglishStemmer.g_v_WXY, 89, 121))) + { + break lab1; + } + if (!(this.in_grouping_b(EnglishStemmer.g_v, 97, 121))) + { + break lab1; + } + if (!(this.out_grouping_b(EnglishStemmer.g_v, 97, 121))) + { + break lab1; + } + break lab0; + } + this.cursor = this.limit - v_1; + // (, line 52 + if (!(this.out_grouping_b(EnglishStemmer.g_v, 97, 121))) + { + return false; + } + if (!(this.in_grouping_b(EnglishStemmer.g_v, 97, 121))) + { + return false; + } + // atlimit, line 52 + if (this.cursor > this.limit_backward) + { + return false; + } + } + return true; + } + + function r_R1 () : boolean + { + if (!(this.I_p1 <= this.cursor)) + { + return false; + } + return true; + } + + function r_R2 () : boolean + { + if (!(this.I_p2 <= this.cursor)) + { + return false; + } + return true; + } + + function r_Step_1a () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + // (, line 58 + // try, line 59 + v_1 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 59 + // [, line 60 + this.ket = this.cursor; + // substring, line 60 + among_var = this.find_among_b(EnglishStemmer.a_1, 3); + if (among_var == 0) + { + this.cursor = this.limit - v_1; + break lab0; + } + // ], line 60 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.cursor = this.limit - v_1; + break lab0; + case 1: + // (, line 62 + // delete, line 62 + if (!this.slice_del()) + { + return false; + } + break; + } + } + // [, line 65 + this.ket = this.cursor; + // substring, line 65 + among_var = this.find_among_b(EnglishStemmer.a_2, 6); + if (among_var == 0) + { + return false; + } + // ], line 65 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 66 + // <-, line 66 + if (!this.slice_from("ss")) + { + return false; + } + break; + case 2: + // (, line 68 + // or, line 68 + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + v_2 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 68 + // hop, line 68 + { + var c : int = this.cursor - 2; + if (this.limit_backward > c || c > this.limit) + { + break lab2; + } + this.cursor = c; + } + // <-, line 68 + if (!this.slice_from("i")) + { + return false; + } + break lab1; + } + this.cursor = this.limit - v_2; + // <-, line 68 + if (!this.slice_from("ie")) + { + return false; + } + } + break; + case 3: + // (, line 69 + // next, line 69 + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + // gopast, line 69 + golab3: while(true) + { + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + if (!(this.in_grouping_b(EnglishStemmer.g_v, 97, 121))) + { + break lab4; + } + break golab3; + } + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + } + // delete, line 69 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_Step_1b () : boolean + { + var among_var : int; + var v_1 : int; + var v_3 : int; + var v_4 : int; + // (, line 74 + // [, line 75 + this.ket = this.cursor; + // substring, line 75 + among_var = this.find_among_b(EnglishStemmer.a_4, 6); + if (among_var == 0) + { + return false; + } + // ], line 75 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 77 + // call R1, line 77 + if (!this.r_R1()) + { + return false; + } + // <-, line 77 + if (!this.slice_from("ee")) + { + return false; + } + break; + case 2: + // (, line 79 + // test, line 80 + v_1 = this.limit - this.cursor; + // gopast, line 80 + golab0: while(true) + { + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + if (!(this.in_grouping_b(EnglishStemmer.g_v, 97, 121))) + { + break lab1; + } + break golab0; + } + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + } + this.cursor = this.limit - v_1; + // delete, line 80 + if (!this.slice_del()) + { + return false; + } + // test, line 81 + v_3 = this.limit - this.cursor; + // substring, line 81 + among_var = this.find_among_b(EnglishStemmer.a_3, 13); + if (among_var == 0) + { + return false; + } + this.cursor = this.limit - v_3; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 83 + // <+, line 83 + { + var c : int = this.cursor; + this.insert(this.cursor, this.cursor, "e"); + this.cursor = c; + } + break; + case 2: + // (, line 86 + // [, line 86 + this.ket = this.cursor; + // next, line 86 + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + // ], line 86 + this.bra = this.cursor; + // delete, line 86 + if (!this.slice_del()) + { + return false; + } + break; + case 3: + // (, line 87 + // atmark, line 87 + if (this.cursor != this.I_p1) + { + return false; + } + // test, line 87 + v_4 = this.limit - this.cursor; + // call shortv, line 87 + if (!this.r_shortv()) + { + return false; + } + this.cursor = this.limit - v_4; + // <+, line 87 + { + var c : int = this.cursor; + this.insert(this.cursor, this.cursor, "e"); + this.cursor = c; + } + break; + } + break; + } + return true; + } + + function r_Step_1c () : boolean + { + var v_1 : int; + var v_2 : int; + // (, line 93 + // [, line 94 + this.ket = this.cursor; + // or, line 94 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // literal, line 94 + if (!(this.eq_s_b(1, "y"))) + { + break lab1; + } + break lab0; + } + this.cursor = this.limit - v_1; + // literal, line 94 + if (!(this.eq_s_b(1, "Y"))) + { + return false; + } + } + // ], line 94 + this.bra = this.cursor; + if (!(this.out_grouping_b(EnglishStemmer.g_v, 97, 121))) + { + return false; + } + // not, line 95 + { + v_2 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // atlimit, line 95 + if (this.cursor > this.limit_backward) + { + break lab2; + } + return false; + } + this.cursor = this.limit - v_2; + } + // <-, line 96 + if (!this.slice_from("i")) + { + return false; + } + return true; + } + + function r_Step_2 () : boolean + { + var among_var : int; + // (, line 99 + // [, line 100 + this.ket = this.cursor; + // substring, line 100 + among_var = this.find_among_b(EnglishStemmer.a_5, 24); + if (among_var == 0) + { + return false; + } + // ], line 100 + this.bra = this.cursor; + // call R1, line 100 + if (!this.r_R1()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 101 + // <-, line 101 + if (!this.slice_from("tion")) + { + return false; + } + break; + case 2: + // (, line 102 + // <-, line 102 + if (!this.slice_from("ence")) + { + return false; + } + break; + case 3: + // (, line 103 + // <-, line 103 + if (!this.slice_from("ance")) + { + return false; + } + break; + case 4: + // (, line 104 + // <-, line 104 + if (!this.slice_from("able")) + { + return false; + } + break; + case 5: + // (, line 105 + // <-, line 105 + if (!this.slice_from("ent")) + { + return false; + } + break; + case 6: + // (, line 107 + // <-, line 107 + if (!this.slice_from("ize")) + { + return false; + } + break; + case 7: + // (, line 109 + // <-, line 109 + if (!this.slice_from("ate")) + { + return false; + } + break; + case 8: + // (, line 111 + // <-, line 111 + if (!this.slice_from("al")) + { + return false; + } + break; + case 9: + // (, line 112 + // <-, line 112 + if (!this.slice_from("ful")) + { + return false; + } + break; + case 10: + // (, line 114 + // <-, line 114 + if (!this.slice_from("ous")) + { + return false; + } + break; + case 11: + // (, line 116 + // <-, line 116 + if (!this.slice_from("ive")) + { + return false; + } + break; + case 12: + // (, line 118 + // <-, line 118 + if (!this.slice_from("ble")) + { + return false; + } + break; + case 13: + // (, line 119 + // literal, line 119 + if (!(this.eq_s_b(1, "l"))) + { + return false; + } + // <-, line 119 + if (!this.slice_from("og")) + { + return false; + } + break; + case 14: + // (, line 120 + // <-, line 120 + if (!this.slice_from("ful")) + { + return false; + } + break; + case 15: + // (, line 121 + // <-, line 121 + if (!this.slice_from("less")) + { + return false; + } + break; + case 16: + // (, line 122 + if (!(this.in_grouping_b(EnglishStemmer.g_valid_LI, 99, 116))) + { + return false; + } + // delete, line 122 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_Step_3 () : boolean + { + var among_var : int; + // (, line 126 + // [, line 127 + this.ket = this.cursor; + // substring, line 127 + among_var = this.find_among_b(EnglishStemmer.a_6, 9); + if (among_var == 0) + { + return false; + } + // ], line 127 + this.bra = this.cursor; + // call R1, line 127 + if (!this.r_R1()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 128 + // <-, line 128 + if (!this.slice_from("tion")) + { + return false; + } + break; + case 2: + // (, line 129 + // <-, line 129 + if (!this.slice_from("ate")) + { + return false; + } + break; + case 3: + // (, line 130 + // <-, line 130 + if (!this.slice_from("al")) + { + return false; + } + break; + case 4: + // (, line 132 + // <-, line 132 + if (!this.slice_from("ic")) + { + return false; + } + break; + case 5: + // (, line 134 + // delete, line 134 + if (!this.slice_del()) + { + return false; + } + break; + case 6: + // (, line 136 + // call R2, line 136 + if (!this.r_R2()) + { + return false; + } + // delete, line 136 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_Step_4 () : boolean + { + var among_var : int; + var v_1 : int; + // (, line 140 + // [, line 141 + this.ket = this.cursor; + // substring, line 141 + among_var = this.find_among_b(EnglishStemmer.a_7, 18); + if (among_var == 0) + { + return false; + } + // ], line 141 + this.bra = this.cursor; + // call R2, line 141 + if (!this.r_R2()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 144 + // delete, line 144 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 145 + // or, line 145 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // literal, line 145 + if (!(this.eq_s_b(1, "s"))) + { + break lab1; + } + break lab0; + } + this.cursor = this.limit - v_1; + // literal, line 145 + if (!(this.eq_s_b(1, "t"))) + { + return false; + } + } + // delete, line 145 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_Step_5 () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + // (, line 149 + // [, line 150 + this.ket = this.cursor; + // substring, line 150 + among_var = this.find_among_b(EnglishStemmer.a_8, 2); + if (among_var == 0) + { + return false; + } + // ], line 150 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 151 + // or, line 151 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call R2, line 151 + if (!this.r_R2()) + { + break lab1; + } + break lab0; + } + this.cursor = this.limit - v_1; + // (, line 151 + // call R1, line 151 + if (!this.r_R1()) + { + return false; + } + // not, line 151 + { + v_2 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // call shortv, line 151 + if (!this.r_shortv()) + { + break lab2; + } + return false; + } + this.cursor = this.limit - v_2; + } + } + // delete, line 151 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 152 + // call R2, line 152 + if (!this.r_R2()) + { + return false; + } + // literal, line 152 + if (!(this.eq_s_b(1, "l"))) + { + return false; + } + // delete, line 152 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_exception2 () : boolean + { + // (, line 156 + // [, line 158 + this.ket = this.cursor; + // substring, line 158 + if (this.find_among_b(EnglishStemmer.a_9, 8) == 0) + { + return false; + } + // ], line 158 + this.bra = this.cursor; + // atlimit, line 158 + if (this.cursor > this.limit_backward) + { + return false; + } + return true; + } + + function r_exception1 () : boolean + { + var among_var : int; + // (, line 168 + // [, line 170 + this.bra = this.cursor; + // substring, line 170 + among_var = this.find_among(EnglishStemmer.a_10, 18); + if (among_var == 0) + { + return false; + } + // ], line 170 + this.ket = this.cursor; + // atlimit, line 170 + if (this.cursor < this.limit) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 174 + // <-, line 174 + if (!this.slice_from("ski")) + { + return false; + } + break; + case 2: + // (, line 175 + // <-, line 175 + if (!this.slice_from("sky")) + { + return false; + } + break; + case 3: + // (, line 176 + // <-, line 176 + if (!this.slice_from("die")) + { + return false; + } + break; + case 4: + // (, line 177 + // <-, line 177 + if (!this.slice_from("lie")) + { + return false; + } + break; + case 5: + // (, line 178 + // <-, line 178 + if (!this.slice_from("tie")) + { + return false; + } + break; + case 6: + // (, line 182 + // <-, line 182 + if (!this.slice_from("idl")) + { + return false; + } + break; + case 7: + // (, line 183 + // <-, line 183 + if (!this.slice_from("gentl")) + { + return false; + } + break; + case 8: + // (, line 184 + // <-, line 184 + if (!this.slice_from("ugli")) + { + return false; + } + break; + case 9: + // (, line 185 + // <-, line 185 + if (!this.slice_from("earli")) + { + return false; + } + break; + case 10: + // (, line 186 + // <-, line 186 + if (!this.slice_from("onli")) + { + return false; + } + break; + case 11: + // (, line 187 + // <-, line 187 + if (!this.slice_from("singl")) + { + return false; + } + break; + } + return true; + } + + function r_postlude () : boolean + { + var v_1 : int; + var v_2 : int; + // (, line 203 + // Boolean test Y_found, line 203 + if (!(this.B_Y_found)) + { + return false; + } + // repeat, line 203 + replab0: while(true) + { + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 203 + // goto, line 203 + golab2: while(true) + { + v_2 = this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // (, line 203 + // [, line 203 + this.bra = this.cursor; + // literal, line 203 + if (!(this.eq_s(1, "Y"))) + { + break lab3; + } + // ], line 203 + this.ket = this.cursor; + this.cursor = v_2; + break golab2; + } + this.cursor = v_2; + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + } + // <-, line 203 + if (!this.slice_from("y")) + { + return false; + } + continue replab0; + } + this.cursor = v_1; + break replab0; + } + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + var v_10 : int; + var v_11 : int; + var v_12 : int; + var v_13 : int; + // (, line 205 + // or, line 207 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call exception1, line 207 + if (!this.r_exception1()) + { + break lab1; + } + break lab0; + } + this.cursor = v_1; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // not, line 208 + { + v_2 = this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // hop, line 208 + { + var c : int = this.cursor + 3; + if (0 > c || c > this.limit) + { + break lab3; + } + this.cursor = c; + } + break lab2; + } + this.cursor = v_2; + } + break lab0; + } + this.cursor = v_1; + // (, line 208 + // do, line 209 + v_3 = this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // call prelude, line 209 + if (!this.r_prelude()) + { + break lab4; + } + } + this.cursor = v_3; + // do, line 210 + v_4 = this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // call mark_regions, line 210 + if (!this.r_mark_regions()) + { + break lab5; + } + } + this.cursor = v_4; + // backwards, line 211 + this.limit_backward = this.cursor; this.cursor = this.limit; + // (, line 211 + // do, line 213 + v_5 = this.limit - this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // call Step_1a, line 213 + if (!this.r_Step_1a()) + { + break lab6; + } + } + this.cursor = this.limit - v_5; + // or, line 215 + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + v_6 = this.limit - this.cursor; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + // call exception2, line 215 + if (!this.r_exception2()) + { + break lab8; + } + break lab7; + } + this.cursor = this.limit - v_6; + // (, line 215 + // do, line 217 + v_7 = this.limit - this.cursor; + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + // call Step_1b, line 217 + if (!this.r_Step_1b()) + { + break lab9; + } + } + this.cursor = this.limit - v_7; + // do, line 218 + v_8 = this.limit - this.cursor; + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + // call Step_1c, line 218 + if (!this.r_Step_1c()) + { + break lab10; + } + } + this.cursor = this.limit - v_8; + // do, line 220 + v_9 = this.limit - this.cursor; + var lab11 = true; + lab11: while (lab11 == true) + { + lab11 = false; + // call Step_2, line 220 + if (!this.r_Step_2()) + { + break lab11; + } + } + this.cursor = this.limit - v_9; + // do, line 221 + v_10 = this.limit - this.cursor; + var lab12 = true; + lab12: while (lab12 == true) + { + lab12 = false; + // call Step_3, line 221 + if (!this.r_Step_3()) + { + break lab12; + } + } + this.cursor = this.limit - v_10; + // do, line 222 + v_11 = this.limit - this.cursor; + var lab13 = true; + lab13: while (lab13 == true) + { + lab13 = false; + // call Step_4, line 222 + if (!this.r_Step_4()) + { + break lab13; + } + } + this.cursor = this.limit - v_11; + // do, line 224 + v_12 = this.limit - this.cursor; + var lab14 = true; + lab14: while (lab14 == true) + { + lab14 = false; + // call Step_5, line 224 + if (!this.r_Step_5()) + { + break lab14; + } + } + this.cursor = this.limit - v_12; + } + this.cursor = this.limit_backward; // do, line 227 + v_13 = this.cursor; + var lab15 = true; + lab15: while (lab15 == true) + { + lab15 = false; + // call postlude, line 227 + if (!this.r_postlude()) + { + break lab15; + } + } + this.cursor = v_13; + } + return true; + } + + function equals (o : variant) : boolean { + return o instanceof EnglishStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "EnglishStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/finnish-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/finnish-stemmer.jsx new file mode 100644 index 00000000..35a1d56a --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/finnish-stemmer.jsx @@ -0,0 +1,1208 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class FinnishStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new FinnishStemmer(); + + static const a_0 = [ + new Among("pa", -1, 1), + new Among("sti", -1, 2), + new Among("kaan", -1, 1), + new Among("han", -1, 1), + new Among("kin", -1, 1), + new Among("h\u00E4n", -1, 1), + new Among("k\u00E4\u00E4n", -1, 1), + new Among("ko", -1, 1), + new Among("p\u00E4", -1, 1), + new Among("k\u00F6", -1, 1) + ]; + + static const a_1 = [ + new Among("lla", -1, -1), + new Among("na", -1, -1), + new Among("ssa", -1, -1), + new Among("ta", -1, -1), + new Among("lta", 3, -1), + new Among("sta", 3, -1) + ]; + + static const a_2 = [ + new Among("ll\u00E4", -1, -1), + new Among("n\u00E4", -1, -1), + new Among("ss\u00E4", -1, -1), + new Among("t\u00E4", -1, -1), + new Among("lt\u00E4", 3, -1), + new Among("st\u00E4", 3, -1) + ]; + + static const a_3 = [ + new Among("lle", -1, -1), + new Among("ine", -1, -1) + ]; + + static const a_4 = [ + new Among("nsa", -1, 3), + new Among("mme", -1, 3), + new Among("nne", -1, 3), + new Among("ni", -1, 2), + new Among("si", -1, 1), + new Among("an", -1, 4), + new Among("en", -1, 6), + new Among("\u00E4n", -1, 5), + new Among("ns\u00E4", -1, 3) + ]; + + static const a_5 = [ + new Among("aa", -1, -1), + new Among("ee", -1, -1), + new Among("ii", -1, -1), + new Among("oo", -1, -1), + new Among("uu", -1, -1), + new Among("\u00E4\u00E4", -1, -1), + new Among("\u00F6\u00F6", -1, -1) + ]; + + static const a_6 = [ + new Among("a", -1, 8), + new Among("lla", 0, -1), + new Among("na", 0, -1), + new Among("ssa", 0, -1), + new Among("ta", 0, -1), + new Among("lta", 4, -1), + new Among("sta", 4, -1), + new Among("tta", 4, 9), + new Among("lle", -1, -1), + new Among("ine", -1, -1), + new Among("ksi", -1, -1), + new Among("n", -1, 7), + new Among("han", 11, 1), + new Among("den", 11, -1, ((instance : BaseStemmer) : boolean -> (instance as FinnishStemmer).r_VI()), FinnishStemmer.methodObject), + new Among("seen", 11, -1, ((instance : BaseStemmer) : boolean -> (instance as FinnishStemmer).r_LONG()), FinnishStemmer.methodObject), + new Among("hen", 11, 2), + new Among("tten", 11, -1, ((instance : BaseStemmer) : boolean -> (instance as FinnishStemmer).r_VI()), FinnishStemmer.methodObject), + new Among("hin", 11, 3), + new Among("siin", 11, -1, ((instance : BaseStemmer) : boolean -> (instance as FinnishStemmer).r_VI()), FinnishStemmer.methodObject), + new Among("hon", 11, 4), + new Among("h\u00E4n", 11, 5), + new Among("h\u00F6n", 11, 6), + new Among("\u00E4", -1, 8), + new Among("ll\u00E4", 22, -1), + new Among("n\u00E4", 22, -1), + new Among("ss\u00E4", 22, -1), + new Among("t\u00E4", 22, -1), + new Among("lt\u00E4", 26, -1), + new Among("st\u00E4", 26, -1), + new Among("tt\u00E4", 26, 9) + ]; + + static const a_7 = [ + new Among("eja", -1, -1), + new Among("mma", -1, 1), + new Among("imma", 1, -1), + new Among("mpa", -1, 1), + new Among("impa", 3, -1), + new Among("mmi", -1, 1), + new Among("immi", 5, -1), + new Among("mpi", -1, 1), + new Among("impi", 7, -1), + new Among("ej\u00E4", -1, -1), + new Among("mm\u00E4", -1, 1), + new Among("imm\u00E4", 10, -1), + new Among("mp\u00E4", -1, 1), + new Among("imp\u00E4", 12, -1) + ]; + + static const a_8 = [ + new Among("i", -1, -1), + new Among("j", -1, -1) + ]; + + static const a_9 = [ + new Among("mma", -1, 1), + new Among("imma", 0, -1) + ]; + + static const g_AEI = [17, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8] : int[]; + + static const g_V1 = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32] : int[]; + + static const g_V2 = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32] : int[]; + + static const g_particle_end = [17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32] : int[]; + + var B_ending_removed : boolean = false; + var S_x : string = ""; + var I_p2 : int = 0; + var I_p1 : int = 0; + + function copy_from (other : FinnishStemmer) : void + { + this.B_ending_removed = other.B_ending_removed; + this.S_x = other.S_x; + this.I_p2 = other.I_p2; + this.I_p1 = other.I_p1; + super.copy_from(other); + } + + function r_mark_regions () : boolean + { + var v_1 : int; + var v_3 : int; + // (, line 41 + this.I_p1 = this.limit; + this.I_p2 = this.limit; + // goto, line 46 + golab0: while(true) + { + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + if (!(this.in_grouping(FinnishStemmer.g_V1, 97, 246))) + { + break lab1; + } + this.cursor = v_1; + break golab0; + } + this.cursor = v_1; + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // gopast, line 46 + golab2: while(true) + { + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + if (!(this.out_grouping(FinnishStemmer.g_V1, 97, 246))) + { + break lab3; + } + break golab2; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // setmark p1, line 46 + this.I_p1 = this.cursor; + // goto, line 47 + golab4: while(true) + { + v_3 = this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + if (!(this.in_grouping(FinnishStemmer.g_V1, 97, 246))) + { + break lab5; + } + this.cursor = v_3; + break golab4; + } + this.cursor = v_3; + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // gopast, line 47 + golab6: while(true) + { + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + if (!(this.out_grouping(FinnishStemmer.g_V1, 97, 246))) + { + break lab7; + } + break golab6; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // setmark p2, line 47 + this.I_p2 = this.cursor; + return true; + } + + function r_R2 () : boolean + { + if (!(this.I_p2 <= this.cursor)) + { + return false; + } + return true; + } + + function r_particle_etc () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + // (, line 54 + // setlimit, line 55 + v_1 = this.limit - this.cursor; + // tomark, line 55 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 55 + // [, line 55 + this.ket = this.cursor; + // substring, line 55 + among_var = this.find_among_b(FinnishStemmer.a_0, 10); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 55 + this.bra = this.cursor; + this.limit_backward = v_2; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 62 + if (!(this.in_grouping_b(FinnishStemmer.g_particle_end, 97, 246))) + { + return false; + } + break; + case 2: + // (, line 64 + // call R2, line 64 + if (!this.r_R2()) + { + return false; + } + break; + } + // delete, line 66 + if (!this.slice_del()) + { + return false; + } + return true; + } + + function r_possessive () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + // (, line 68 + // setlimit, line 69 + v_1 = this.limit - this.cursor; + // tomark, line 69 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 69 + // [, line 69 + this.ket = this.cursor; + // substring, line 69 + among_var = this.find_among_b(FinnishStemmer.a_4, 9); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 69 + this.bra = this.cursor; + this.limit_backward = v_2; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 72 + // not, line 72 + { + v_3 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // literal, line 72 + if (!(this.eq_s_b(1, "k"))) + { + break lab0; + } + return false; + } + this.cursor = this.limit - v_3; + } + // delete, line 72 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 74 + // delete, line 74 + if (!this.slice_del()) + { + return false; + } + // [, line 74 + this.ket = this.cursor; + // literal, line 74 + if (!(this.eq_s_b(3, "kse"))) + { + return false; + } + // ], line 74 + this.bra = this.cursor; + // <-, line 74 + if (!this.slice_from("ksi")) + { + return false; + } + break; + case 3: + // (, line 78 + // delete, line 78 + if (!this.slice_del()) + { + return false; + } + break; + case 4: + // (, line 81 + // among, line 81 + if (this.find_among_b(FinnishStemmer.a_1, 6) == 0) + { + return false; + } + // delete, line 81 + if (!this.slice_del()) + { + return false; + } + break; + case 5: + // (, line 83 + // among, line 83 + if (this.find_among_b(FinnishStemmer.a_2, 6) == 0) + { + return false; + } + // delete, line 84 + if (!this.slice_del()) + { + return false; + } + break; + case 6: + // (, line 86 + // among, line 86 + if (this.find_among_b(FinnishStemmer.a_3, 2) == 0) + { + return false; + } + // delete, line 86 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_LONG () : boolean + { + // among, line 91 + if (this.find_among_b(FinnishStemmer.a_5, 7) == 0) + { + return false; + } + return true; + } + + function r_VI () : boolean + { + // (, line 93 + // literal, line 93 + if (!(this.eq_s_b(1, "i"))) + { + return false; + } + if (!(this.in_grouping_b(FinnishStemmer.g_V2, 97, 246))) + { + return false; + } + return true; + } + + function r_case_ending () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + // (, line 95 + // setlimit, line 96 + v_1 = this.limit - this.cursor; + // tomark, line 96 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 96 + // [, line 96 + this.ket = this.cursor; + // substring, line 96 + among_var = this.find_among_b(FinnishStemmer.a_6, 30); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 96 + this.bra = this.cursor; + this.limit_backward = v_2; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 98 + // literal, line 98 + if (!(this.eq_s_b(1, "a"))) + { + return false; + } + break; + case 2: + // (, line 99 + // literal, line 99 + if (!(this.eq_s_b(1, "e"))) + { + return false; + } + break; + case 3: + // (, line 100 + // literal, line 100 + if (!(this.eq_s_b(1, "i"))) + { + return false; + } + break; + case 4: + // (, line 101 + // literal, line 101 + if (!(this.eq_s_b(1, "o"))) + { + return false; + } + break; + case 5: + // (, line 102 + // literal, line 102 + if (!(this.eq_s_b(1, "\u00E4"))) + { + return false; + } + break; + case 6: + // (, line 103 + // literal, line 103 + if (!(this.eq_s_b(1, "\u00F6"))) + { + return false; + } + break; + case 7: + // (, line 111 + // try, line 111 + v_3 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 111 + // and, line 113 + v_4 = this.limit - this.cursor; + // or, line 112 + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + v_5 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // call LONG, line 111 + if (!this.r_LONG()) + { + break lab2; + } + break lab1; + } + this.cursor = this.limit - v_5; + // literal, line 112 + if (!(this.eq_s_b(2, "ie"))) + { + this.cursor = this.limit - v_3; + break lab0; + } + } + this.cursor = this.limit - v_4; + // next, line 113 + if (this.cursor <= this.limit_backward) + { + this.cursor = this.limit - v_3; + break lab0; + } + this.cursor--; + // ], line 113 + this.bra = this.cursor; + } + break; + case 8: + // (, line 119 + if (!(this.in_grouping_b(FinnishStemmer.g_V1, 97, 246))) + { + return false; + } + if (!(this.out_grouping_b(FinnishStemmer.g_V1, 97, 246))) + { + return false; + } + break; + case 9: + // (, line 121 + // literal, line 121 + if (!(this.eq_s_b(1, "e"))) + { + return false; + } + break; + } + // delete, line 138 + if (!this.slice_del()) + { + return false; + } + // set ending_removed, line 139 + this.B_ending_removed = true; + return true; + } + + function r_other_endings () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + // (, line 141 + // setlimit, line 142 + v_1 = this.limit - this.cursor; + // tomark, line 142 + if (this.cursor < this.I_p2) + { + return false; + } + this.cursor = this.I_p2; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 142 + // [, line 142 + this.ket = this.cursor; + // substring, line 142 + among_var = this.find_among_b(FinnishStemmer.a_7, 14); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 142 + this.bra = this.cursor; + this.limit_backward = v_2; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 146 + // not, line 146 + { + v_3 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // literal, line 146 + if (!(this.eq_s_b(2, "po"))) + { + break lab0; + } + return false; + } + this.cursor = this.limit - v_3; + } + break; + } + // delete, line 151 + if (!this.slice_del()) + { + return false; + } + return true; + } + + function r_i_plural () : boolean + { + var v_1 : int; + var v_2 : int; + // (, line 153 + // setlimit, line 154 + v_1 = this.limit - this.cursor; + // tomark, line 154 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 154 + // [, line 154 + this.ket = this.cursor; + // substring, line 154 + if (this.find_among_b(FinnishStemmer.a_8, 2) == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 154 + this.bra = this.cursor; + this.limit_backward = v_2; + // delete, line 158 + if (!this.slice_del()) + { + return false; + } + return true; + } + + function r_t_plural () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + // (, line 160 + // setlimit, line 161 + v_1 = this.limit - this.cursor; + // tomark, line 161 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 161 + // [, line 162 + this.ket = this.cursor; + // literal, line 162 + if (!(this.eq_s_b(1, "t"))) + { + this.limit_backward = v_2; + return false; + } + // ], line 162 + this.bra = this.cursor; + // test, line 162 + v_3 = this.limit - this.cursor; + if (!(this.in_grouping_b(FinnishStemmer.g_V1, 97, 246))) + { + this.limit_backward = v_2; + return false; + } + this.cursor = this.limit - v_3; + // delete, line 163 + if (!this.slice_del()) + { + return false; + } + this.limit_backward = v_2; + // setlimit, line 165 + v_4 = this.limit - this.cursor; + // tomark, line 165 + if (this.cursor < this.I_p2) + { + return false; + } + this.cursor = this.I_p2; + v_5 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_4; + // (, line 165 + // [, line 165 + this.ket = this.cursor; + // substring, line 165 + among_var = this.find_among_b(FinnishStemmer.a_9, 2); + if (among_var == 0) + { + this.limit_backward = v_5; + return false; + } + // ], line 165 + this.bra = this.cursor; + this.limit_backward = v_5; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 167 + // not, line 167 + { + v_6 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // literal, line 167 + if (!(this.eq_s_b(2, "po"))) + { + break lab0; + } + return false; + } + this.cursor = this.limit - v_6; + } + break; + } + // delete, line 170 + if (!this.slice_del()) + { + return false; + } + return true; + } + + function r_tidy () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + // (, line 172 + // setlimit, line 173 + v_1 = this.limit - this.cursor; + // tomark, line 173 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 173 + // do, line 174 + v_3 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 174 + // and, line 174 + v_4 = this.limit - this.cursor; + // call LONG, line 174 + if (!this.r_LONG()) + { + break lab0; + } + this.cursor = this.limit - v_4; + // (, line 174 + // [, line 174 + this.ket = this.cursor; + // next, line 174 + if (this.cursor <= this.limit_backward) + { + break lab0; + } + this.cursor--; + // ], line 174 + this.bra = this.cursor; + // delete, line 174 + if (!this.slice_del()) + { + return false; + } + } + this.cursor = this.limit - v_3; + // do, line 175 + v_5 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 175 + // [, line 175 + this.ket = this.cursor; + if (!(this.in_grouping_b(FinnishStemmer.g_AEI, 97, 228))) + { + break lab1; + } + // ], line 175 + this.bra = this.cursor; + if (!(this.out_grouping_b(FinnishStemmer.g_V1, 97, 246))) + { + break lab1; + } + // delete, line 175 + if (!this.slice_del()) + { + return false; + } + } + this.cursor = this.limit - v_5; + // do, line 176 + v_6 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 176 + // [, line 176 + this.ket = this.cursor; + // literal, line 176 + if (!(this.eq_s_b(1, "j"))) + { + break lab2; + } + // ], line 176 + this.bra = this.cursor; + // or, line 176 + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + v_7 = this.limit - this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // literal, line 176 + if (!(this.eq_s_b(1, "o"))) + { + break lab4; + } + break lab3; + } + this.cursor = this.limit - v_7; + // literal, line 176 + if (!(this.eq_s_b(1, "u"))) + { + break lab2; + } + } + // delete, line 176 + if (!this.slice_del()) + { + return false; + } + } + this.cursor = this.limit - v_6; + // do, line 177 + v_8 = this.limit - this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // (, line 177 + // [, line 177 + this.ket = this.cursor; + // literal, line 177 + if (!(this.eq_s_b(1, "o"))) + { + break lab5; + } + // ], line 177 + this.bra = this.cursor; + // literal, line 177 + if (!(this.eq_s_b(1, "j"))) + { + break lab5; + } + // delete, line 177 + if (!this.slice_del()) + { + return false; + } + } + this.cursor = this.limit - v_8; + this.limit_backward = v_2; + // goto, line 179 + golab6: while(true) + { + v_9 = this.limit - this.cursor; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + if (!(this.out_grouping_b(FinnishStemmer.g_V1, 97, 246))) + { + break lab7; + } + this.cursor = this.limit - v_9; + break golab6; + } + this.cursor = this.limit - v_9; + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + } + // [, line 179 + this.ket = this.cursor; + // next, line 179 + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + // ], line 179 + this.bra = this.cursor; + // -> x, line 179 + this.S_x = this.slice_to(this.S_x); + if (this.S_x == '') + { + return false; + } + // name x, line 179 + if (!(this.eq_v_b(this.S_x))) + { + return false; + } + // delete, line 179 + if (!this.slice_del()) + { + return false; + } + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + // (, line 183 + // do, line 185 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // call mark_regions, line 185 + if (!this.r_mark_regions()) + { + break lab0; + } + } + this.cursor = v_1; + // unset ending_removed, line 186 + this.B_ending_removed = false; + // backwards, line 187 + this.limit_backward = this.cursor; this.cursor = this.limit; + // (, line 187 + // do, line 188 + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call particle_etc, line 188 + if (!this.r_particle_etc()) + { + break lab1; + } + } + this.cursor = this.limit - v_2; + // do, line 189 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // call possessive, line 189 + if (!this.r_possessive()) + { + break lab2; + } + } + this.cursor = this.limit - v_3; + // do, line 190 + v_4 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // call case_ending, line 190 + if (!this.r_case_ending()) + { + break lab3; + } + } + this.cursor = this.limit - v_4; + // do, line 191 + v_5 = this.limit - this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // call other_endings, line 191 + if (!this.r_other_endings()) + { + break lab4; + } + } + this.cursor = this.limit - v_5; + // or, line 192 + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + v_6 = this.limit - this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // (, line 192 + // Boolean test ending_removed, line 192 + if (!(this.B_ending_removed)) + { + break lab6; + } + // do, line 192 + v_7 = this.limit - this.cursor; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // call i_plural, line 192 + if (!this.r_i_plural()) + { + break lab7; + } + } + this.cursor = this.limit - v_7; + break lab5; + } + this.cursor = this.limit - v_6; + // do, line 192 + v_8 = this.limit - this.cursor; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + // call t_plural, line 192 + if (!this.r_t_plural()) + { + break lab8; + } + } + this.cursor = this.limit - v_8; + } + // do, line 193 + v_9 = this.limit - this.cursor; + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + // call tidy, line 193 + if (!this.r_tidy()) + { + break lab9; + } + } + this.cursor = this.limit - v_9; + this.cursor = this.limit_backward; return true; + } + + function equals (o : variant) : boolean { + return o instanceof FinnishStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "FinnishStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/french-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/french-stemmer.jsx new file mode 100644 index 00000000..3d937818 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/french-stemmer.jsx @@ -0,0 +1,1867 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class FrenchStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new FrenchStemmer(); + + static const a_0 = [ + new Among("col", -1, -1), + new Among("par", -1, -1), + new Among("tap", -1, -1) + ]; + + static const a_1 = [ + new Among("", -1, 4), + new Among("I", 0, 1), + new Among("U", 0, 2), + new Among("Y", 0, 3) + ]; + + static const a_2 = [ + new Among("iqU", -1, 3), + new Among("abl", -1, 3), + new Among("I\u00E8r", -1, 4), + new Among("i\u00E8r", -1, 4), + new Among("eus", -1, 2), + new Among("iv", -1, 1) + ]; + + static const a_3 = [ + new Among("ic", -1, 2), + new Among("abil", -1, 1), + new Among("iv", -1, 3) + ]; + + static const a_4 = [ + new Among("iqUe", -1, 1), + new Among("atrice", -1, 2), + new Among("ance", -1, 1), + new Among("ence", -1, 5), + new Among("logie", -1, 3), + new Among("able", -1, 1), + new Among("isme", -1, 1), + new Among("euse", -1, 11), + new Among("iste", -1, 1), + new Among("ive", -1, 8), + new Among("if", -1, 8), + new Among("usion", -1, 4), + new Among("ation", -1, 2), + new Among("ution", -1, 4), + new Among("ateur", -1, 2), + new Among("iqUes", -1, 1), + new Among("atrices", -1, 2), + new Among("ances", -1, 1), + new Among("ences", -1, 5), + new Among("logies", -1, 3), + new Among("ables", -1, 1), + new Among("ismes", -1, 1), + new Among("euses", -1, 11), + new Among("istes", -1, 1), + new Among("ives", -1, 8), + new Among("ifs", -1, 8), + new Among("usions", -1, 4), + new Among("ations", -1, 2), + new Among("utions", -1, 4), + new Among("ateurs", -1, 2), + new Among("ments", -1, 15), + new Among("ements", 30, 6), + new Among("issements", 31, 12), + new Among("it\u00E9s", -1, 7), + new Among("ment", -1, 15), + new Among("ement", 34, 6), + new Among("issement", 35, 12), + new Among("amment", 34, 13), + new Among("emment", 34, 14), + new Among("aux", -1, 10), + new Among("eaux", 39, 9), + new Among("eux", -1, 1), + new Among("it\u00E9", -1, 7) + ]; + + static const a_5 = [ + new Among("ira", -1, 1), + new Among("ie", -1, 1), + new Among("isse", -1, 1), + new Among("issante", -1, 1), + new Among("i", -1, 1), + new Among("irai", 4, 1), + new Among("ir", -1, 1), + new Among("iras", -1, 1), + new Among("ies", -1, 1), + new Among("\u00EEmes", -1, 1), + new Among("isses", -1, 1), + new Among("issantes", -1, 1), + new Among("\u00EEtes", -1, 1), + new Among("is", -1, 1), + new Among("irais", 13, 1), + new Among("issais", 13, 1), + new Among("irions", -1, 1), + new Among("issions", -1, 1), + new Among("irons", -1, 1), + new Among("issons", -1, 1), + new Among("issants", -1, 1), + new Among("it", -1, 1), + new Among("irait", 21, 1), + new Among("issait", 21, 1), + new Among("issant", -1, 1), + new Among("iraIent", -1, 1), + new Among("issaIent", -1, 1), + new Among("irent", -1, 1), + new Among("issent", -1, 1), + new Among("iront", -1, 1), + new Among("\u00EEt", -1, 1), + new Among("iriez", -1, 1), + new Among("issiez", -1, 1), + new Among("irez", -1, 1), + new Among("issez", -1, 1) + ]; + + static const a_6 = [ + new Among("a", -1, 3), + new Among("era", 0, 2), + new Among("asse", -1, 3), + new Among("ante", -1, 3), + new Among("\u00E9e", -1, 2), + new Among("ai", -1, 3), + new Among("erai", 5, 2), + new Among("er", -1, 2), + new Among("as", -1, 3), + new Among("eras", 8, 2), + new Among("\u00E2mes", -1, 3), + new Among("asses", -1, 3), + new Among("antes", -1, 3), + new Among("\u00E2tes", -1, 3), + new Among("\u00E9es", -1, 2), + new Among("ais", -1, 3), + new Among("erais", 15, 2), + new Among("ions", -1, 1), + new Among("erions", 17, 2), + new Among("assions", 17, 3), + new Among("erons", -1, 2), + new Among("ants", -1, 3), + new Among("\u00E9s", -1, 2), + new Among("ait", -1, 3), + new Among("erait", 23, 2), + new Among("ant", -1, 3), + new Among("aIent", -1, 3), + new Among("eraIent", 26, 2), + new Among("\u00E8rent", -1, 2), + new Among("assent", -1, 3), + new Among("eront", -1, 2), + new Among("\u00E2t", -1, 3), + new Among("ez", -1, 2), + new Among("iez", 32, 2), + new Among("eriez", 33, 2), + new Among("assiez", 33, 3), + new Among("erez", 32, 2), + new Among("\u00E9", -1, 2) + ]; + + static const a_7 = [ + new Among("e", -1, 3), + new Among("I\u00E8re", 0, 2), + new Among("i\u00E8re", 0, 2), + new Among("ion", -1, 1), + new Among("Ier", -1, 2), + new Among("ier", -1, 2), + new Among("\u00EB", -1, 4) + ]; + + static const a_8 = [ + new Among("ell", -1, -1), + new Among("eill", -1, -1), + new Among("enn", -1, -1), + new Among("onn", -1, -1), + new Among("ett", -1, -1) + ]; + + static const g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 130, 103, 8, 5] : int[]; + + static const g_keep_with_s = [1, 65, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128] : int[]; + + var I_p2 : int = 0; + var I_p1 : int = 0; + var I_pV : int = 0; + + function copy_from (other : FrenchStemmer) : void + { + this.I_p2 = other.I_p2; + this.I_p1 = other.I_p1; + this.I_pV = other.I_pV; + super.copy_from(other); + } + + function r_prelude () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + // repeat, line 38 + replab0: while(true) + { + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // goto, line 38 + golab2: while(true) + { + v_2 = this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // (, line 38 + // or, line 44 + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + v_3 = this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // (, line 40 + if (!(this.in_grouping(FrenchStemmer.g_v, 97, 251))) + { + break lab5; + } + // [, line 40 + this.bra = this.cursor; + // or, line 40 + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + v_4 = this.cursor; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // (, line 40 + // literal, line 40 + if (!(this.eq_s(1, "u"))) + { + break lab7; + } + // ], line 40 + this.ket = this.cursor; + if (!(this.in_grouping(FrenchStemmer.g_v, 97, 251))) + { + break lab7; + } + // <-, line 40 + if (!this.slice_from("U")) + { + return false; + } + break lab6; + } + this.cursor = v_4; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + // (, line 41 + // literal, line 41 + if (!(this.eq_s(1, "i"))) + { + break lab8; + } + // ], line 41 + this.ket = this.cursor; + if (!(this.in_grouping(FrenchStemmer.g_v, 97, 251))) + { + break lab8; + } + // <-, line 41 + if (!this.slice_from("I")) + { + return false; + } + break lab6; + } + this.cursor = v_4; + // (, line 42 + // literal, line 42 + if (!(this.eq_s(1, "y"))) + { + break lab5; + } + // ], line 42 + this.ket = this.cursor; + // <-, line 42 + if (!this.slice_from("Y")) + { + return false; + } + } + break lab4; + } + this.cursor = v_3; + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + // (, line 45 + // [, line 45 + this.bra = this.cursor; + // literal, line 45 + if (!(this.eq_s(1, "y"))) + { + break lab9; + } + // ], line 45 + this.ket = this.cursor; + if (!(this.in_grouping(FrenchStemmer.g_v, 97, 251))) + { + break lab9; + } + // <-, line 45 + if (!this.slice_from("Y")) + { + return false; + } + break lab4; + } + this.cursor = v_3; + // (, line 47 + // literal, line 47 + if (!(this.eq_s(1, "q"))) + { + break lab3; + } + // [, line 47 + this.bra = this.cursor; + // literal, line 47 + if (!(this.eq_s(1, "u"))) + { + break lab3; + } + // ], line 47 + this.ket = this.cursor; + // <-, line 47 + if (!this.slice_from("U")) + { + return false; + } + } + this.cursor = v_2; + break golab2; + } + this.cursor = v_2; + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + } + continue replab0; + } + this.cursor = v_1; + break replab0; + } + return true; + } + + function r_mark_regions () : boolean + { + var v_1 : int; + var v_2 : int; + var v_4 : int; + // (, line 50 + this.I_pV = this.limit; + this.I_p1 = this.limit; + this.I_p2 = this.limit; + // do, line 56 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 56 + // or, line 58 + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + v_2 = this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 57 + if (!(this.in_grouping(FrenchStemmer.g_v, 97, 251))) + { + break lab2; + } + if (!(this.in_grouping(FrenchStemmer.g_v, 97, 251))) + { + break lab2; + } + // next, line 57 + if (this.cursor >= this.limit) + { + break lab2; + } + this.cursor++; + break lab1; + } + this.cursor = v_2; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // among, line 59 + if (this.find_among(FrenchStemmer.a_0, 3) == 0) + { + break lab3; + } + break lab1; + } + this.cursor = v_2; + // (, line 66 + // next, line 66 + if (this.cursor >= this.limit) + { + break lab0; + } + this.cursor++; + // gopast, line 66 + golab4: while(true) + { + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + if (!(this.in_grouping(FrenchStemmer.g_v, 97, 251))) + { + break lab5; + } + break golab4; + } + if (this.cursor >= this.limit) + { + break lab0; + } + this.cursor++; + } + } + // setmark pV, line 67 + this.I_pV = this.cursor; + } + this.cursor = v_1; + // do, line 69 + v_4 = this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // (, line 69 + // gopast, line 70 + golab7: while(true) + { + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + if (!(this.in_grouping(FrenchStemmer.g_v, 97, 251))) + { + break lab8; + } + break golab7; + } + if (this.cursor >= this.limit) + { + break lab6; + } + this.cursor++; + } + // gopast, line 70 + golab9: while(true) + { + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + if (!(this.out_grouping(FrenchStemmer.g_v, 97, 251))) + { + break lab10; + } + break golab9; + } + if (this.cursor >= this.limit) + { + break lab6; + } + this.cursor++; + } + // setmark p1, line 70 + this.I_p1 = this.cursor; + // gopast, line 71 + golab11: while(true) + { + var lab12 = true; + lab12: while (lab12 == true) + { + lab12 = false; + if (!(this.in_grouping(FrenchStemmer.g_v, 97, 251))) + { + break lab12; + } + break golab11; + } + if (this.cursor >= this.limit) + { + break lab6; + } + this.cursor++; + } + // gopast, line 71 + golab13: while(true) + { + var lab14 = true; + lab14: while (lab14 == true) + { + lab14 = false; + if (!(this.out_grouping(FrenchStemmer.g_v, 97, 251))) + { + break lab14; + } + break golab13; + } + if (this.cursor >= this.limit) + { + break lab6; + } + this.cursor++; + } + // setmark p2, line 71 + this.I_p2 = this.cursor; + } + this.cursor = v_4; + return true; + } + + function r_postlude () : boolean + { + var among_var : int; + var v_1 : int; + // repeat, line 75 + replab0: while(true) + { + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 75 + // [, line 77 + this.bra = this.cursor; + // substring, line 77 + among_var = this.find_among(FrenchStemmer.a_1, 4); + if (among_var == 0) + { + break lab1; + } + // ], line 77 + this.ket = this.cursor; + switch (among_var) { + case 0: + break lab1; + case 1: + // (, line 78 + // <-, line 78 + if (!this.slice_from("i")) + { + return false; + } + break; + case 2: + // (, line 79 + // <-, line 79 + if (!this.slice_from("u")) + { + return false; + } + break; + case 3: + // (, line 80 + // <-, line 80 + if (!this.slice_from("y")) + { + return false; + } + break; + case 4: + // (, line 81 + // next, line 81 + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + break; + } + continue replab0; + } + this.cursor = v_1; + break replab0; + } + return true; + } + + function r_RV () : boolean + { + if (!(this.I_pV <= this.cursor)) + { + return false; + } + return true; + } + + function r_R1 () : boolean + { + if (!(this.I_p1 <= this.cursor)) + { + return false; + } + return true; + } + + function r_R2 () : boolean + { + if (!(this.I_p2 <= this.cursor)) + { + return false; + } + return true; + } + + function r_standard_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + var v_10 : int; + var v_11 : int; + // (, line 91 + // [, line 92 + this.ket = this.cursor; + // substring, line 92 + among_var = this.find_among_b(FrenchStemmer.a_4, 43); + if (among_var == 0) + { + return false; + } + // ], line 92 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 96 + // call R2, line 96 + if (!this.r_R2()) + { + return false; + } + // delete, line 96 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 99 + // call R2, line 99 + if (!this.r_R2()) + { + return false; + } + // delete, line 99 + if (!this.slice_del()) + { + return false; + } + // try, line 100 + v_1 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 100 + // [, line 100 + this.ket = this.cursor; + // literal, line 100 + if (!(this.eq_s_b(2, "ic"))) + { + this.cursor = this.limit - v_1; + break lab0; + } + // ], line 100 + this.bra = this.cursor; + // or, line 100 + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + v_2 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 100 + // call R2, line 100 + if (!this.r_R2()) + { + break lab2; + } + // delete, line 100 + if (!this.slice_del()) + { + return false; + } + break lab1; + } + this.cursor = this.limit - v_2; + // <-, line 100 + if (!this.slice_from("iqU")) + { + return false; + } + } + } + break; + case 3: + // (, line 104 + // call R2, line 104 + if (!this.r_R2()) + { + return false; + } + // <-, line 104 + if (!this.slice_from("log")) + { + return false; + } + break; + case 4: + // (, line 107 + // call R2, line 107 + if (!this.r_R2()) + { + return false; + } + // <-, line 107 + if (!this.slice_from("u")) + { + return false; + } + break; + case 5: + // (, line 110 + // call R2, line 110 + if (!this.r_R2()) + { + return false; + } + // <-, line 110 + if (!this.slice_from("ent")) + { + return false; + } + break; + case 6: + // (, line 113 + // call RV, line 114 + if (!this.r_RV()) + { + return false; + } + // delete, line 114 + if (!this.slice_del()) + { + return false; + } + // try, line 115 + v_3 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // (, line 115 + // [, line 116 + this.ket = this.cursor; + // substring, line 116 + among_var = this.find_among_b(FrenchStemmer.a_2, 6); + if (among_var == 0) + { + this.cursor = this.limit - v_3; + break lab3; + } + // ], line 116 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.cursor = this.limit - v_3; + break lab3; + case 1: + // (, line 117 + // call R2, line 117 + if (!this.r_R2()) + { + this.cursor = this.limit - v_3; + break lab3; + } + // delete, line 117 + if (!this.slice_del()) + { + return false; + } + // [, line 117 + this.ket = this.cursor; + // literal, line 117 + if (!(this.eq_s_b(2, "at"))) + { + this.cursor = this.limit - v_3; + break lab3; + } + // ], line 117 + this.bra = this.cursor; + // call R2, line 117 + if (!this.r_R2()) + { + this.cursor = this.limit - v_3; + break lab3; + } + // delete, line 117 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 118 + // or, line 118 + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + v_4 = this.limit - this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // (, line 118 + // call R2, line 118 + if (!this.r_R2()) + { + break lab5; + } + // delete, line 118 + if (!this.slice_del()) + { + return false; + } + break lab4; + } + this.cursor = this.limit - v_4; + // (, line 118 + // call R1, line 118 + if (!this.r_R1()) + { + this.cursor = this.limit - v_3; + break lab3; + } + // <-, line 118 + if (!this.slice_from("eux")) + { + return false; + } + } + break; + case 3: + // (, line 120 + // call R2, line 120 + if (!this.r_R2()) + { + this.cursor = this.limit - v_3; + break lab3; + } + // delete, line 120 + if (!this.slice_del()) + { + return false; + } + break; + case 4: + // (, line 122 + // call RV, line 122 + if (!this.r_RV()) + { + this.cursor = this.limit - v_3; + break lab3; + } + // <-, line 122 + if (!this.slice_from("i")) + { + return false; + } + break; + } + } + break; + case 7: + // (, line 128 + // call R2, line 129 + if (!this.r_R2()) + { + return false; + } + // delete, line 129 + if (!this.slice_del()) + { + return false; + } + // try, line 130 + v_5 = this.limit - this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // (, line 130 + // [, line 131 + this.ket = this.cursor; + // substring, line 131 + among_var = this.find_among_b(FrenchStemmer.a_3, 3); + if (among_var == 0) + { + this.cursor = this.limit - v_5; + break lab6; + } + // ], line 131 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.cursor = this.limit - v_5; + break lab6; + case 1: + // (, line 132 + // or, line 132 + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + v_6 = this.limit - this.cursor; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + // (, line 132 + // call R2, line 132 + if (!this.r_R2()) + { + break lab8; + } + // delete, line 132 + if (!this.slice_del()) + { + return false; + } + break lab7; + } + this.cursor = this.limit - v_6; + // <-, line 132 + if (!this.slice_from("abl")) + { + return false; + } + } + break; + case 2: + // (, line 133 + // or, line 133 + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + v_7 = this.limit - this.cursor; + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + // (, line 133 + // call R2, line 133 + if (!this.r_R2()) + { + break lab10; + } + // delete, line 133 + if (!this.slice_del()) + { + return false; + } + break lab9; + } + this.cursor = this.limit - v_7; + // <-, line 133 + if (!this.slice_from("iqU")) + { + return false; + } + } + break; + case 3: + // (, line 134 + // call R2, line 134 + if (!this.r_R2()) + { + this.cursor = this.limit - v_5; + break lab6; + } + // delete, line 134 + if (!this.slice_del()) + { + return false; + } + break; + } + } + break; + case 8: + // (, line 140 + // call R2, line 141 + if (!this.r_R2()) + { + return false; + } + // delete, line 141 + if (!this.slice_del()) + { + return false; + } + // try, line 142 + v_8 = this.limit - this.cursor; + var lab11 = true; + lab11: while (lab11 == true) + { + lab11 = false; + // (, line 142 + // [, line 142 + this.ket = this.cursor; + // literal, line 142 + if (!(this.eq_s_b(2, "at"))) + { + this.cursor = this.limit - v_8; + break lab11; + } + // ], line 142 + this.bra = this.cursor; + // call R2, line 142 + if (!this.r_R2()) + { + this.cursor = this.limit - v_8; + break lab11; + } + // delete, line 142 + if (!this.slice_del()) + { + return false; + } + // [, line 142 + this.ket = this.cursor; + // literal, line 142 + if (!(this.eq_s_b(2, "ic"))) + { + this.cursor = this.limit - v_8; + break lab11; + } + // ], line 142 + this.bra = this.cursor; + // or, line 142 + var lab12 = true; + lab12: while (lab12 == true) + { + lab12 = false; + v_9 = this.limit - this.cursor; + var lab13 = true; + lab13: while (lab13 == true) + { + lab13 = false; + // (, line 142 + // call R2, line 142 + if (!this.r_R2()) + { + break lab13; + } + // delete, line 142 + if (!this.slice_del()) + { + return false; + } + break lab12; + } + this.cursor = this.limit - v_9; + // <-, line 142 + if (!this.slice_from("iqU")) + { + return false; + } + } + } + break; + case 9: + // (, line 144 + // <-, line 144 + if (!this.slice_from("eau")) + { + return false; + } + break; + case 10: + // (, line 145 + // call R1, line 145 + if (!this.r_R1()) + { + return false; + } + // <-, line 145 + if (!this.slice_from("al")) + { + return false; + } + break; + case 11: + // (, line 147 + // or, line 147 + var lab14 = true; + lab14: while (lab14 == true) + { + lab14 = false; + v_10 = this.limit - this.cursor; + var lab15 = true; + lab15: while (lab15 == true) + { + lab15 = false; + // (, line 147 + // call R2, line 147 + if (!this.r_R2()) + { + break lab15; + } + // delete, line 147 + if (!this.slice_del()) + { + return false; + } + break lab14; + } + this.cursor = this.limit - v_10; + // (, line 147 + // call R1, line 147 + if (!this.r_R1()) + { + return false; + } + // <-, line 147 + if (!this.slice_from("eux")) + { + return false; + } + } + break; + case 12: + // (, line 150 + // call R1, line 150 + if (!this.r_R1()) + { + return false; + } + if (!(this.out_grouping_b(FrenchStemmer.g_v, 97, 251))) + { + return false; + } + // delete, line 150 + if (!this.slice_del()) + { + return false; + } + break; + case 13: + // (, line 155 + // call RV, line 155 + if (!this.r_RV()) + { + return false; + } + // fail, line 155 + // (, line 155 + // <-, line 155 + if (!this.slice_from("ant")) + { + return false; + } + return false; + case 14: + // (, line 156 + // call RV, line 156 + if (!this.r_RV()) + { + return false; + } + // fail, line 156 + // (, line 156 + // <-, line 156 + if (!this.slice_from("ent")) + { + return false; + } + return false; + case 15: + // (, line 158 + // test, line 158 + v_11 = this.limit - this.cursor; + // (, line 158 + if (!(this.in_grouping_b(FrenchStemmer.g_v, 97, 251))) + { + return false; + } + // call RV, line 158 + if (!this.r_RV()) + { + return false; + } + this.cursor = this.limit - v_11; + // fail, line 158 + // (, line 158 + // delete, line 158 + if (!this.slice_del()) + { + return false; + } + return false; + } + return true; + } + + function r_i_verb_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + // setlimit, line 163 + v_1 = this.limit - this.cursor; + // tomark, line 163 + if (this.cursor < this.I_pV) + { + return false; + } + this.cursor = this.I_pV; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 163 + // [, line 164 + this.ket = this.cursor; + // substring, line 164 + among_var = this.find_among_b(FrenchStemmer.a_5, 35); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 164 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.limit_backward = v_2; + return false; + case 1: + // (, line 170 + if (!(this.out_grouping_b(FrenchStemmer.g_v, 97, 251))) + { + this.limit_backward = v_2; + return false; + } + // delete, line 170 + if (!this.slice_del()) + { + return false; + } + break; + } + this.limit_backward = v_2; + return true; + } + + function r_verb_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + // setlimit, line 174 + v_1 = this.limit - this.cursor; + // tomark, line 174 + if (this.cursor < this.I_pV) + { + return false; + } + this.cursor = this.I_pV; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 174 + // [, line 175 + this.ket = this.cursor; + // substring, line 175 + among_var = this.find_among_b(FrenchStemmer.a_6, 38); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 175 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.limit_backward = v_2; + return false; + case 1: + // (, line 177 + // call R2, line 177 + if (!this.r_R2()) + { + this.limit_backward = v_2; + return false; + } + // delete, line 177 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 185 + // delete, line 185 + if (!this.slice_del()) + { + return false; + } + break; + case 3: + // (, line 190 + // delete, line 190 + if (!this.slice_del()) + { + return false; + } + // try, line 191 + v_3 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 191 + // [, line 191 + this.ket = this.cursor; + // literal, line 191 + if (!(this.eq_s_b(1, "e"))) + { + this.cursor = this.limit - v_3; + break lab0; + } + // ], line 191 + this.bra = this.cursor; + // delete, line 191 + if (!this.slice_del()) + { + return false; + } + } + break; + } + this.limit_backward = v_2; + return true; + } + + function r_residual_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + // (, line 198 + // try, line 199 + v_1 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 199 + // [, line 199 + this.ket = this.cursor; + // literal, line 199 + if (!(this.eq_s_b(1, "s"))) + { + this.cursor = this.limit - v_1; + break lab0; + } + // ], line 199 + this.bra = this.cursor; + // test, line 199 + v_2 = this.limit - this.cursor; + if (!(this.out_grouping_b(FrenchStemmer.g_keep_with_s, 97, 232))) + { + this.cursor = this.limit - v_1; + break lab0; + } + this.cursor = this.limit - v_2; + // delete, line 199 + if (!this.slice_del()) + { + return false; + } + } + // setlimit, line 200 + v_3 = this.limit - this.cursor; + // tomark, line 200 + if (this.cursor < this.I_pV) + { + return false; + } + this.cursor = this.I_pV; + v_4 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_3; + // (, line 200 + // [, line 201 + this.ket = this.cursor; + // substring, line 201 + among_var = this.find_among_b(FrenchStemmer.a_7, 7); + if (among_var == 0) + { + this.limit_backward = v_4; + return false; + } + // ], line 201 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.limit_backward = v_4; + return false; + case 1: + // (, line 202 + // call R2, line 202 + if (!this.r_R2()) + { + this.limit_backward = v_4; + return false; + } + // or, line 202 + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + v_5 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // literal, line 202 + if (!(this.eq_s_b(1, "s"))) + { + break lab2; + } + break lab1; + } + this.cursor = this.limit - v_5; + // literal, line 202 + if (!(this.eq_s_b(1, "t"))) + { + this.limit_backward = v_4; + return false; + } + } + // delete, line 202 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 204 + // <-, line 204 + if (!this.slice_from("i")) + { + return false; + } + break; + case 3: + // (, line 205 + // delete, line 205 + if (!this.slice_del()) + { + return false; + } + break; + case 4: + // (, line 206 + // literal, line 206 + if (!(this.eq_s_b(2, "gu"))) + { + this.limit_backward = v_4; + return false; + } + // delete, line 206 + if (!this.slice_del()) + { + return false; + } + break; + } + this.limit_backward = v_4; + return true; + } + + function r_un_double () : boolean + { + var v_1 : int; + // (, line 211 + // test, line 212 + v_1 = this.limit - this.cursor; + // among, line 212 + if (this.find_among_b(FrenchStemmer.a_8, 5) == 0) + { + return false; + } + this.cursor = this.limit - v_1; + // [, line 212 + this.ket = this.cursor; + // next, line 212 + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + // ], line 212 + this.bra = this.cursor; + // delete, line 212 + if (!this.slice_del()) + { + return false; + } + return true; + } + + function r_un_accent () : boolean + { + var v_3 : int; + // (, line 215 + // atleast, line 216 + { + var v_1 = 1; + // atleast, line 216 + replab0: while(true) + { + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + if (!(this.out_grouping_b(FrenchStemmer.g_v, 97, 251))) + { + break lab1; + } + v_1--; + continue replab0; + } + break replab0; + } + if (v_1 > 0) + { + return false; + } + } + // [, line 217 + this.ket = this.cursor; + // or, line 217 + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + v_3 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // literal, line 217 + if (!(this.eq_s_b(1, "\u00E9"))) + { + break lab3; + } + break lab2; + } + this.cursor = this.limit - v_3; + // literal, line 217 + if (!(this.eq_s_b(1, "\u00E8"))) + { + return false; + } + } + // ], line 217 + this.bra = this.cursor; + // <-, line 217 + if (!this.slice_from("e")) + { + return false; + } + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + var v_10 : int; + var v_11 : int; + // (, line 221 + // do, line 223 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // call prelude, line 223 + if (!this.r_prelude()) + { + break lab0; + } + } + this.cursor = v_1; + // do, line 224 + v_2 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call mark_regions, line 224 + if (!this.r_mark_regions()) + { + break lab1; + } + } + this.cursor = v_2; + // backwards, line 225 + this.limit_backward = this.cursor; this.cursor = this.limit; + // (, line 225 + // do, line 227 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 227 + // or, line 237 + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + v_4 = this.limit - this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 228 + // and, line 233 + v_5 = this.limit - this.cursor; + // (, line 229 + // or, line 229 + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + v_6 = this.limit - this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // call standard_suffix, line 229 + if (!this.r_standard_suffix()) + { + break lab6; + } + break lab5; + } + this.cursor = this.limit - v_6; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // call i_verb_suffix, line 230 + if (!this.r_i_verb_suffix()) + { + break lab7; + } + break lab5; + } + this.cursor = this.limit - v_6; + // call verb_suffix, line 231 + if (!this.r_verb_suffix()) + { + break lab4; + } + } + this.cursor = this.limit - v_5; + // try, line 234 + v_7 = this.limit - this.cursor; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + // (, line 234 + // [, line 234 + this.ket = this.cursor; + // or, line 234 + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + v_8 = this.limit - this.cursor; + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + // (, line 234 + // literal, line 234 + if (!(this.eq_s_b(1, "Y"))) + { + break lab10; + } + // ], line 234 + this.bra = this.cursor; + // <-, line 234 + if (!this.slice_from("i")) + { + return false; + } + break lab9; + } + this.cursor = this.limit - v_8; + // (, line 235 + // literal, line 235 + if (!(this.eq_s_b(1, "\u00E7"))) + { + this.cursor = this.limit - v_7; + break lab8; + } + // ], line 235 + this.bra = this.cursor; + // <-, line 235 + if (!this.slice_from("c")) + { + return false; + } + } + } + break lab3; + } + this.cursor = this.limit - v_4; + // call residual_suffix, line 238 + if (!this.r_residual_suffix()) + { + break lab2; + } + } + } + this.cursor = this.limit - v_3; + // do, line 243 + v_9 = this.limit - this.cursor; + var lab11 = true; + lab11: while (lab11 == true) + { + lab11 = false; + // call un_double, line 243 + if (!this.r_un_double()) + { + break lab11; + } + } + this.cursor = this.limit - v_9; + // do, line 244 + v_10 = this.limit - this.cursor; + var lab12 = true; + lab12: while (lab12 == true) + { + lab12 = false; + // call un_accent, line 244 + if (!this.r_un_accent()) + { + break lab12; + } + } + this.cursor = this.limit - v_10; + this.cursor = this.limit_backward; // do, line 246 + v_11 = this.cursor; + var lab13 = true; + lab13: while (lab13 == true) + { + lab13 = false; + // call postlude, line 246 + if (!this.r_postlude()) + { + break lab13; + } + } + this.cursor = v_11; + return true; + } + + function equals (o : variant) : boolean { + return o instanceof FrenchStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "FrenchStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/german-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/german-stemmer.jsx new file mode 100644 index 00000000..7659b3a7 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/german-stemmer.jsx @@ -0,0 +1,894 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class GermanStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new GermanStemmer(); + + static const a_0 = [ + new Among("", -1, 6), + new Among("U", 0, 2), + new Among("Y", 0, 1), + new Among("\u00E4", 0, 3), + new Among("\u00F6", 0, 4), + new Among("\u00FC", 0, 5) + ]; + + static const a_1 = [ + new Among("e", -1, 2), + new Among("em", -1, 1), + new Among("en", -1, 2), + new Among("ern", -1, 1), + new Among("er", -1, 1), + new Among("s", -1, 3), + new Among("es", 5, 2) + ]; + + static const a_2 = [ + new Among("en", -1, 1), + new Among("er", -1, 1), + new Among("st", -1, 2), + new Among("est", 2, 1) + ]; + + static const a_3 = [ + new Among("ig", -1, 1), + new Among("lich", -1, 1) + ]; + + static const a_4 = [ + new Among("end", -1, 1), + new Among("ig", -1, 2), + new Among("ung", -1, 1), + new Among("lich", -1, 3), + new Among("isch", -1, 2), + new Among("ik", -1, 2), + new Among("heit", -1, 3), + new Among("keit", -1, 4) + ]; + + static const g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8] : int[]; + + static const g_s_ending = [117, 30, 5] : int[]; + + static const g_st_ending = [117, 30, 4] : int[]; + + var I_x : int = 0; + var I_p2 : int = 0; + var I_p1 : int = 0; + + function copy_from (other : GermanStemmer) : void + { + this.I_x = other.I_x; + this.I_p2 = other.I_p2; + this.I_p1 = other.I_p1; + super.copy_from(other); + } + + function r_prelude () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + // (, line 33 + // test, line 35 + v_1 = this.cursor; + // repeat, line 35 + replab0: while(true) + { + v_2 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 35 + // or, line 38 + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + v_3 = this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // (, line 36 + // [, line 37 + this.bra = this.cursor; + // literal, line 37 + if (!(this.eq_s(1, "\u00DF"))) + { + break lab3; + } + // ], line 37 + this.ket = this.cursor; + // <-, line 37 + if (!this.slice_from("ss")) + { + return false; + } + break lab2; + } + this.cursor = v_3; + // next, line 38 + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + } + continue replab0; + } + this.cursor = v_2; + break replab0; + } + this.cursor = v_1; + // repeat, line 41 + replab4: while(true) + { + v_4 = this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // goto, line 41 + golab6: while(true) + { + v_5 = this.cursor; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // (, line 41 + if (!(this.in_grouping(GermanStemmer.g_v, 97, 252))) + { + break lab7; + } + // [, line 42 + this.bra = this.cursor; + // or, line 42 + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + v_6 = this.cursor; + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + // (, line 42 + // literal, line 42 + if (!(this.eq_s(1, "u"))) + { + break lab9; + } + // ], line 42 + this.ket = this.cursor; + if (!(this.in_grouping(GermanStemmer.g_v, 97, 252))) + { + break lab9; + } + // <-, line 42 + if (!this.slice_from("U")) + { + return false; + } + break lab8; + } + this.cursor = v_6; + // (, line 43 + // literal, line 43 + if (!(this.eq_s(1, "y"))) + { + break lab7; + } + // ], line 43 + this.ket = this.cursor; + if (!(this.in_grouping(GermanStemmer.g_v, 97, 252))) + { + break lab7; + } + // <-, line 43 + if (!this.slice_from("Y")) + { + return false; + } + } + this.cursor = v_5; + break golab6; + } + this.cursor = v_5; + if (this.cursor >= this.limit) + { + break lab5; + } + this.cursor++; + } + continue replab4; + } + this.cursor = v_4; + break replab4; + } + return true; + } + + function r_mark_regions () : boolean + { + var v_1 : int; + // (, line 47 + this.I_p1 = this.limit; + this.I_p2 = this.limit; + // test, line 52 + v_1 = this.cursor; + // (, line 52 + // hop, line 52 + { + var c : int = this.cursor + 3; + if (0 > c || c > this.limit) + { + return false; + } + this.cursor = c; + } + // setmark x, line 52 + this.I_x = this.cursor; + this.cursor = v_1; + // gopast, line 54 + golab0: while(true) + { + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + if (!(this.in_grouping(GermanStemmer.g_v, 97, 252))) + { + break lab1; + } + break golab0; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // gopast, line 54 + golab2: while(true) + { + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + if (!(this.out_grouping(GermanStemmer.g_v, 97, 252))) + { + break lab3; + } + break golab2; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // setmark p1, line 54 + this.I_p1 = this.cursor; + // try, line 55 + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 55 + if (!(this.I_p1 < this.I_x)) + { + break lab4; + } + this.I_p1 = this.I_x; + } + // gopast, line 56 + golab5: while(true) + { + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + if (!(this.in_grouping(GermanStemmer.g_v, 97, 252))) + { + break lab6; + } + break golab5; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // gopast, line 56 + golab7: while(true) + { + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + if (!(this.out_grouping(GermanStemmer.g_v, 97, 252))) + { + break lab8; + } + break golab7; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // setmark p2, line 56 + this.I_p2 = this.cursor; + return true; + } + + function r_postlude () : boolean + { + var among_var : int; + var v_1 : int; + // repeat, line 60 + replab0: while(true) + { + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 60 + // [, line 62 + this.bra = this.cursor; + // substring, line 62 + among_var = this.find_among(GermanStemmer.a_0, 6); + if (among_var == 0) + { + break lab1; + } + // ], line 62 + this.ket = this.cursor; + switch (among_var) { + case 0: + break lab1; + case 1: + // (, line 63 + // <-, line 63 + if (!this.slice_from("y")) + { + return false; + } + break; + case 2: + // (, line 64 + // <-, line 64 + if (!this.slice_from("u")) + { + return false; + } + break; + case 3: + // (, line 65 + // <-, line 65 + if (!this.slice_from("a")) + { + return false; + } + break; + case 4: + // (, line 66 + // <-, line 66 + if (!this.slice_from("o")) + { + return false; + } + break; + case 5: + // (, line 67 + // <-, line 67 + if (!this.slice_from("u")) + { + return false; + } + break; + case 6: + // (, line 68 + // next, line 68 + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + break; + } + continue replab0; + } + this.cursor = v_1; + break replab0; + } + return true; + } + + function r_R1 () : boolean + { + if (!(this.I_p1 <= this.cursor)) + { + return false; + } + return true; + } + + function r_R2 () : boolean + { + if (!(this.I_p2 <= this.cursor)) + { + return false; + } + return true; + } + + function r_standard_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + var v_10 : int; + // (, line 78 + // do, line 79 + v_1 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 79 + // [, line 80 + this.ket = this.cursor; + // substring, line 80 + among_var = this.find_among_b(GermanStemmer.a_1, 7); + if (among_var == 0) + { + break lab0; + } + // ], line 80 + this.bra = this.cursor; + // call R1, line 80 + if (!this.r_R1()) + { + break lab0; + } + switch (among_var) { + case 0: + break lab0; + case 1: + // (, line 82 + // delete, line 82 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 85 + // delete, line 85 + if (!this.slice_del()) + { + return false; + } + // try, line 86 + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 86 + // [, line 86 + this.ket = this.cursor; + // literal, line 86 + if (!(this.eq_s_b(1, "s"))) + { + this.cursor = this.limit - v_2; + break lab1; + } + // ], line 86 + this.bra = this.cursor; + // literal, line 86 + if (!(this.eq_s_b(3, "nis"))) + { + this.cursor = this.limit - v_2; + break lab1; + } + // delete, line 86 + if (!this.slice_del()) + { + return false; + } + } + break; + case 3: + // (, line 89 + if (!(this.in_grouping_b(GermanStemmer.g_s_ending, 98, 116))) + { + break lab0; + } + // delete, line 89 + if (!this.slice_del()) + { + return false; + } + break; + } + } + this.cursor = this.limit - v_1; + // do, line 93 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 93 + // [, line 94 + this.ket = this.cursor; + // substring, line 94 + among_var = this.find_among_b(GermanStemmer.a_2, 4); + if (among_var == 0) + { + break lab2; + } + // ], line 94 + this.bra = this.cursor; + // call R1, line 94 + if (!this.r_R1()) + { + break lab2; + } + switch (among_var) { + case 0: + break lab2; + case 1: + // (, line 96 + // delete, line 96 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 99 + if (!(this.in_grouping_b(GermanStemmer.g_st_ending, 98, 116))) + { + break lab2; + } + // hop, line 99 + { + var c : int = this.cursor - 3; + if (this.limit_backward > c || c > this.limit) + { + break lab2; + } + this.cursor = c; + } + // delete, line 99 + if (!this.slice_del()) + { + return false; + } + break; + } + } + this.cursor = this.limit - v_3; + // do, line 103 + v_4 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // (, line 103 + // [, line 104 + this.ket = this.cursor; + // substring, line 104 + among_var = this.find_among_b(GermanStemmer.a_4, 8); + if (among_var == 0) + { + break lab3; + } + // ], line 104 + this.bra = this.cursor; + // call R2, line 104 + if (!this.r_R2()) + { + break lab3; + } + switch (among_var) { + case 0: + break lab3; + case 1: + // (, line 106 + // delete, line 106 + if (!this.slice_del()) + { + return false; + } + // try, line 107 + v_5 = this.limit - this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 107 + // [, line 107 + this.ket = this.cursor; + // literal, line 107 + if (!(this.eq_s_b(2, "ig"))) + { + this.cursor = this.limit - v_5; + break lab4; + } + // ], line 107 + this.bra = this.cursor; + // not, line 107 + { + v_6 = this.limit - this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // literal, line 107 + if (!(this.eq_s_b(1, "e"))) + { + break lab5; + } + this.cursor = this.limit - v_5; + break lab4; + } + this.cursor = this.limit - v_6; + } + // call R2, line 107 + if (!this.r_R2()) + { + this.cursor = this.limit - v_5; + break lab4; + } + // delete, line 107 + if (!this.slice_del()) + { + return false; + } + } + break; + case 2: + // (, line 110 + // not, line 110 + { + v_7 = this.limit - this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // literal, line 110 + if (!(this.eq_s_b(1, "e"))) + { + break lab6; + } + break lab3; + } + this.cursor = this.limit - v_7; + } + // delete, line 110 + if (!this.slice_del()) + { + return false; + } + break; + case 3: + // (, line 113 + // delete, line 113 + if (!this.slice_del()) + { + return false; + } + // try, line 114 + v_8 = this.limit - this.cursor; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // (, line 114 + // [, line 115 + this.ket = this.cursor; + // or, line 115 + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + v_9 = this.limit - this.cursor; + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + // literal, line 115 + if (!(this.eq_s_b(2, "er"))) + { + break lab9; + } + break lab8; + } + this.cursor = this.limit - v_9; + // literal, line 115 + if (!(this.eq_s_b(2, "en"))) + { + this.cursor = this.limit - v_8; + break lab7; + } + } + // ], line 115 + this.bra = this.cursor; + // call R1, line 115 + if (!this.r_R1()) + { + this.cursor = this.limit - v_8; + break lab7; + } + // delete, line 115 + if (!this.slice_del()) + { + return false; + } + } + break; + case 4: + // (, line 119 + // delete, line 119 + if (!this.slice_del()) + { + return false; + } + // try, line 120 + v_10 = this.limit - this.cursor; + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + // (, line 120 + // [, line 121 + this.ket = this.cursor; + // substring, line 121 + among_var = this.find_among_b(GermanStemmer.a_3, 2); + if (among_var == 0) + { + this.cursor = this.limit - v_10; + break lab10; + } + // ], line 121 + this.bra = this.cursor; + // call R2, line 121 + if (!this.r_R2()) + { + this.cursor = this.limit - v_10; + break lab10; + } + switch (among_var) { + case 0: + this.cursor = this.limit - v_10; + break lab10; + case 1: + // (, line 123 + // delete, line 123 + if (!this.slice_del()) + { + return false; + } + break; + } + } + break; + } + } + this.cursor = this.limit - v_4; + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + // (, line 133 + // do, line 134 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // call prelude, line 134 + if (!this.r_prelude()) + { + break lab0; + } + } + this.cursor = v_1; + // do, line 135 + v_2 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call mark_regions, line 135 + if (!this.r_mark_regions()) + { + break lab1; + } + } + this.cursor = v_2; + // backwards, line 136 + this.limit_backward = this.cursor; this.cursor = this.limit; + // do, line 137 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // call standard_suffix, line 137 + if (!this.r_standard_suffix()) + { + break lab2; + } + } + this.cursor = this.limit - v_3; + this.cursor = this.limit_backward; // do, line 138 + v_4 = this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // call postlude, line 138 + if (!this.r_postlude()) + { + break lab3; + } + } + this.cursor = v_4; + return true; + } + + function equals (o : variant) : boolean { + return o instanceof GermanStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "GermanStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/hungarian-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/hungarian-stemmer.jsx new file mode 100644 index 00000000..9d67dba4 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/hungarian-stemmer.jsx @@ -0,0 +1,1478 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class HungarianStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new HungarianStemmer(); + + static const a_0 = [ + new Among("cs", -1, -1), + new Among("dzs", -1, -1), + new Among("gy", -1, -1), + new Among("ly", -1, -1), + new Among("ny", -1, -1), + new Among("sz", -1, -1), + new Among("ty", -1, -1), + new Among("zs", -1, -1) + ]; + + static const a_1 = [ + new Among("\u00E1", -1, 1), + new Among("\u00E9", -1, 2) + ]; + + static const a_2 = [ + new Among("bb", -1, -1), + new Among("cc", -1, -1), + new Among("dd", -1, -1), + new Among("ff", -1, -1), + new Among("gg", -1, -1), + new Among("jj", -1, -1), + new Among("kk", -1, -1), + new Among("ll", -1, -1), + new Among("mm", -1, -1), + new Among("nn", -1, -1), + new Among("pp", -1, -1), + new Among("rr", -1, -1), + new Among("ccs", -1, -1), + new Among("ss", -1, -1), + new Among("zzs", -1, -1), + new Among("tt", -1, -1), + new Among("vv", -1, -1), + new Among("ggy", -1, -1), + new Among("lly", -1, -1), + new Among("nny", -1, -1), + new Among("tty", -1, -1), + new Among("ssz", -1, -1), + new Among("zz", -1, -1) + ]; + + static const a_3 = [ + new Among("al", -1, 1), + new Among("el", -1, 2) + ]; + + static const a_4 = [ + new Among("ba", -1, -1), + new Among("ra", -1, -1), + new Among("be", -1, -1), + new Among("re", -1, -1), + new Among("ig", -1, -1), + new Among("nak", -1, -1), + new Among("nek", -1, -1), + new Among("val", -1, -1), + new Among("vel", -1, -1), + new Among("ul", -1, -1), + new Among("n\u00E1l", -1, -1), + new Among("n\u00E9l", -1, -1), + new Among("b\u00F3l", -1, -1), + new Among("r\u00F3l", -1, -1), + new Among("t\u00F3l", -1, -1), + new Among("b\u00F5l", -1, -1), + new Among("r\u00F5l", -1, -1), + new Among("t\u00F5l", -1, -1), + new Among("\u00FCl", -1, -1), + new Among("n", -1, -1), + new Among("an", 19, -1), + new Among("ban", 20, -1), + new Among("en", 19, -1), + new Among("ben", 22, -1), + new Among("k\u00E9ppen", 22, -1), + new Among("on", 19, -1), + new Among("\u00F6n", 19, -1), + new Among("k\u00E9pp", -1, -1), + new Among("kor", -1, -1), + new Among("t", -1, -1), + new Among("at", 29, -1), + new Among("et", 29, -1), + new Among("k\u00E9nt", 29, -1), + new Among("ank\u00E9nt", 32, -1), + new Among("enk\u00E9nt", 32, -1), + new Among("onk\u00E9nt", 32, -1), + new Among("ot", 29, -1), + new Among("\u00E9rt", 29, -1), + new Among("\u00F6t", 29, -1), + new Among("hez", -1, -1), + new Among("hoz", -1, -1), + new Among("h\u00F6z", -1, -1), + new Among("v\u00E1", -1, -1), + new Among("v\u00E9", -1, -1) + ]; + + static const a_5 = [ + new Among("\u00E1n", -1, 2), + new Among("\u00E9n", -1, 1), + new Among("\u00E1nk\u00E9nt", -1, 3) + ]; + + static const a_6 = [ + new Among("stul", -1, 2), + new Among("astul", 0, 1), + new Among("\u00E1stul", 0, 3), + new Among("st\u00FCl", -1, 2), + new Among("est\u00FCl", 3, 1), + new Among("\u00E9st\u00FCl", 3, 4) + ]; + + static const a_7 = [ + new Among("\u00E1", -1, 1), + new Among("\u00E9", -1, 2) + ]; + + static const a_8 = [ + new Among("k", -1, 7), + new Among("ak", 0, 4), + new Among("ek", 0, 6), + new Among("ok", 0, 5), + new Among("\u00E1k", 0, 1), + new Among("\u00E9k", 0, 2), + new Among("\u00F6k", 0, 3) + ]; + + static const a_9 = [ + new Among("\u00E9i", -1, 7), + new Among("\u00E1\u00E9i", 0, 6), + new Among("\u00E9\u00E9i", 0, 5), + new Among("\u00E9", -1, 9), + new Among("k\u00E9", 3, 4), + new Among("ak\u00E9", 4, 1), + new Among("ek\u00E9", 4, 1), + new Among("ok\u00E9", 4, 1), + new Among("\u00E1k\u00E9", 4, 3), + new Among("\u00E9k\u00E9", 4, 2), + new Among("\u00F6k\u00E9", 4, 1), + new Among("\u00E9\u00E9", 3, 8) + ]; + + static const a_10 = [ + new Among("a", -1, 18), + new Among("ja", 0, 17), + new Among("d", -1, 16), + new Among("ad", 2, 13), + new Among("ed", 2, 13), + new Among("od", 2, 13), + new Among("\u00E1d", 2, 14), + new Among("\u00E9d", 2, 15), + new Among("\u00F6d", 2, 13), + new Among("e", -1, 18), + new Among("je", 9, 17), + new Among("nk", -1, 4), + new Among("unk", 11, 1), + new Among("\u00E1nk", 11, 2), + new Among("\u00E9nk", 11, 3), + new Among("\u00FCnk", 11, 1), + new Among("uk", -1, 8), + new Among("juk", 16, 7), + new Among("\u00E1juk", 17, 5), + new Among("\u00FCk", -1, 8), + new Among("j\u00FCk", 19, 7), + new Among("\u00E9j\u00FCk", 20, 6), + new Among("m", -1, 12), + new Among("am", 22, 9), + new Among("em", 22, 9), + new Among("om", 22, 9), + new Among("\u00E1m", 22, 10), + new Among("\u00E9m", 22, 11), + new Among("o", -1, 18), + new Among("\u00E1", -1, 19), + new Among("\u00E9", -1, 20) + ]; + + static const a_11 = [ + new Among("id", -1, 10), + new Among("aid", 0, 9), + new Among("jaid", 1, 6), + new Among("eid", 0, 9), + new Among("jeid", 3, 6), + new Among("\u00E1id", 0, 7), + new Among("\u00E9id", 0, 8), + new Among("i", -1, 15), + new Among("ai", 7, 14), + new Among("jai", 8, 11), + new Among("ei", 7, 14), + new Among("jei", 10, 11), + new Among("\u00E1i", 7, 12), + new Among("\u00E9i", 7, 13), + new Among("itek", -1, 24), + new Among("eitek", 14, 21), + new Among("jeitek", 15, 20), + new Among("\u00E9itek", 14, 23), + new Among("ik", -1, 29), + new Among("aik", 18, 26), + new Among("jaik", 19, 25), + new Among("eik", 18, 26), + new Among("jeik", 21, 25), + new Among("\u00E1ik", 18, 27), + new Among("\u00E9ik", 18, 28), + new Among("ink", -1, 20), + new Among("aink", 25, 17), + new Among("jaink", 26, 16), + new Among("eink", 25, 17), + new Among("jeink", 28, 16), + new Among("\u00E1ink", 25, 18), + new Among("\u00E9ink", 25, 19), + new Among("aitok", -1, 21), + new Among("jaitok", 32, 20), + new Among("\u00E1itok", -1, 22), + new Among("im", -1, 5), + new Among("aim", 35, 4), + new Among("jaim", 36, 1), + new Among("eim", 35, 4), + new Among("jeim", 38, 1), + new Among("\u00E1im", 35, 2), + new Among("\u00E9im", 35, 3) + ]; + + static const g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 52, 14] : int[]; + + var I_p1 : int = 0; + + function copy_from (other : HungarianStemmer) : void + { + this.I_p1 = other.I_p1; + super.copy_from(other); + } + + function r_mark_regions () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + // (, line 44 + this.I_p1 = this.limit; + // or, line 51 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 48 + if (!(this.in_grouping(HungarianStemmer.g_v, 97, 252))) + { + break lab1; + } + // goto, line 48 + golab2: while(true) + { + v_2 = this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + if (!(this.out_grouping(HungarianStemmer.g_v, 97, 252))) + { + break lab3; + } + this.cursor = v_2; + break golab2; + } + this.cursor = v_2; + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + } + // or, line 49 + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + v_3 = this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // among, line 49 + if (this.find_among(HungarianStemmer.a_0, 8) == 0) + { + break lab5; + } + break lab4; + } + this.cursor = v_3; + // next, line 49 + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + } + // setmark p1, line 50 + this.I_p1 = this.cursor; + break lab0; + } + this.cursor = v_1; + // (, line 53 + if (!(this.out_grouping(HungarianStemmer.g_v, 97, 252))) + { + return false; + } + // gopast, line 53 + golab6: while(true) + { + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + if (!(this.in_grouping(HungarianStemmer.g_v, 97, 252))) + { + break lab7; + } + break golab6; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // setmark p1, line 53 + this.I_p1 = this.cursor; + } + return true; + } + + function r_R1 () : boolean + { + if (!(this.I_p1 <= this.cursor)) + { + return false; + } + return true; + } + + function r_v_ending () : boolean + { + var among_var : int; + // (, line 60 + // [, line 61 + this.ket = this.cursor; + // substring, line 61 + among_var = this.find_among_b(HungarianStemmer.a_1, 2); + if (among_var == 0) + { + return false; + } + // ], line 61 + this.bra = this.cursor; + // call R1, line 61 + if (!this.r_R1()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 62 + // <-, line 62 + if (!this.slice_from("a")) + { + return false; + } + break; + case 2: + // (, line 63 + // <-, line 63 + if (!this.slice_from("e")) + { + return false; + } + break; + } + return true; + } + + function r_double () : boolean + { + var v_1 : int; + // (, line 67 + // test, line 68 + v_1 = this.limit - this.cursor; + // among, line 68 + if (this.find_among_b(HungarianStemmer.a_2, 23) == 0) + { + return false; + } + this.cursor = this.limit - v_1; + return true; + } + + function r_undouble () : boolean + { + // (, line 72 + // next, line 73 + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + // [, line 73 + this.ket = this.cursor; + // hop, line 73 + { + var c : int = this.cursor - 1; + if (this.limit_backward > c || c > this.limit) + { + return false; + } + this.cursor = c; + } + // ], line 73 + this.bra = this.cursor; + // delete, line 73 + if (!this.slice_del()) + { + return false; + } + return true; + } + + function r_instrum () : boolean + { + var among_var : int; + // (, line 76 + // [, line 77 + this.ket = this.cursor; + // substring, line 77 + among_var = this.find_among_b(HungarianStemmer.a_3, 2); + if (among_var == 0) + { + return false; + } + // ], line 77 + this.bra = this.cursor; + // call R1, line 77 + if (!this.r_R1()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 78 + // call double, line 78 + if (!this.r_double()) + { + return false; + } + break; + case 2: + // (, line 79 + // call double, line 79 + if (!this.r_double()) + { + return false; + } + break; + } + // delete, line 81 + if (!this.slice_del()) + { + return false; + } + // call undouble, line 82 + if (!this.r_undouble()) + { + return false; + } + return true; + } + + function r_case () : boolean + { + // (, line 86 + // [, line 87 + this.ket = this.cursor; + // substring, line 87 + if (this.find_among_b(HungarianStemmer.a_4, 44) == 0) + { + return false; + } + // ], line 87 + this.bra = this.cursor; + // call R1, line 87 + if (!this.r_R1()) + { + return false; + } + // delete, line 111 + if (!this.slice_del()) + { + return false; + } + // call v_ending, line 112 + if (!this.r_v_ending()) + { + return false; + } + return true; + } + + function r_case_special () : boolean + { + var among_var : int; + // (, line 115 + // [, line 116 + this.ket = this.cursor; + // substring, line 116 + among_var = this.find_among_b(HungarianStemmer.a_5, 3); + if (among_var == 0) + { + return false; + } + // ], line 116 + this.bra = this.cursor; + // call R1, line 116 + if (!this.r_R1()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 117 + // <-, line 117 + if (!this.slice_from("e")) + { + return false; + } + break; + case 2: + // (, line 118 + // <-, line 118 + if (!this.slice_from("a")) + { + return false; + } + break; + case 3: + // (, line 119 + // <-, line 119 + if (!this.slice_from("a")) + { + return false; + } + break; + } + return true; + } + + function r_case_other () : boolean + { + var among_var : int; + // (, line 123 + // [, line 124 + this.ket = this.cursor; + // substring, line 124 + among_var = this.find_among_b(HungarianStemmer.a_6, 6); + if (among_var == 0) + { + return false; + } + // ], line 124 + this.bra = this.cursor; + // call R1, line 124 + if (!this.r_R1()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 125 + // delete, line 125 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 126 + // delete, line 126 + if (!this.slice_del()) + { + return false; + } + break; + case 3: + // (, line 127 + // <-, line 127 + if (!this.slice_from("a")) + { + return false; + } + break; + case 4: + // (, line 128 + // <-, line 128 + if (!this.slice_from("e")) + { + return false; + } + break; + } + return true; + } + + function r_factive () : boolean + { + var among_var : int; + // (, line 132 + // [, line 133 + this.ket = this.cursor; + // substring, line 133 + among_var = this.find_among_b(HungarianStemmer.a_7, 2); + if (among_var == 0) + { + return false; + } + // ], line 133 + this.bra = this.cursor; + // call R1, line 133 + if (!this.r_R1()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 134 + // call double, line 134 + if (!this.r_double()) + { + return false; + } + break; + case 2: + // (, line 135 + // call double, line 135 + if (!this.r_double()) + { + return false; + } + break; + } + // delete, line 137 + if (!this.slice_del()) + { + return false; + } + // call undouble, line 138 + if (!this.r_undouble()) + { + return false; + } + return true; + } + + function r_plural () : boolean + { + var among_var : int; + // (, line 141 + // [, line 142 + this.ket = this.cursor; + // substring, line 142 + among_var = this.find_among_b(HungarianStemmer.a_8, 7); + if (among_var == 0) + { + return false; + } + // ], line 142 + this.bra = this.cursor; + // call R1, line 142 + if (!this.r_R1()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 143 + // <-, line 143 + if (!this.slice_from("a")) + { + return false; + } + break; + case 2: + // (, line 144 + // <-, line 144 + if (!this.slice_from("e")) + { + return false; + } + break; + case 3: + // (, line 145 + // delete, line 145 + if (!this.slice_del()) + { + return false; + } + break; + case 4: + // (, line 146 + // delete, line 146 + if (!this.slice_del()) + { + return false; + } + break; + case 5: + // (, line 147 + // delete, line 147 + if (!this.slice_del()) + { + return false; + } + break; + case 6: + // (, line 148 + // delete, line 148 + if (!this.slice_del()) + { + return false; + } + break; + case 7: + // (, line 149 + // delete, line 149 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_owned () : boolean + { + var among_var : int; + // (, line 153 + // [, line 154 + this.ket = this.cursor; + // substring, line 154 + among_var = this.find_among_b(HungarianStemmer.a_9, 12); + if (among_var == 0) + { + return false; + } + // ], line 154 + this.bra = this.cursor; + // call R1, line 154 + if (!this.r_R1()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 155 + // delete, line 155 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 156 + // <-, line 156 + if (!this.slice_from("e")) + { + return false; + } + break; + case 3: + // (, line 157 + // <-, line 157 + if (!this.slice_from("a")) + { + return false; + } + break; + case 4: + // (, line 158 + // delete, line 158 + if (!this.slice_del()) + { + return false; + } + break; + case 5: + // (, line 159 + // <-, line 159 + if (!this.slice_from("e")) + { + return false; + } + break; + case 6: + // (, line 160 + // <-, line 160 + if (!this.slice_from("a")) + { + return false; + } + break; + case 7: + // (, line 161 + // delete, line 161 + if (!this.slice_del()) + { + return false; + } + break; + case 8: + // (, line 162 + // <-, line 162 + if (!this.slice_from("e")) + { + return false; + } + break; + case 9: + // (, line 163 + // delete, line 163 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_sing_owner () : boolean + { + var among_var : int; + // (, line 167 + // [, line 168 + this.ket = this.cursor; + // substring, line 168 + among_var = this.find_among_b(HungarianStemmer.a_10, 31); + if (among_var == 0) + { + return false; + } + // ], line 168 + this.bra = this.cursor; + // call R1, line 168 + if (!this.r_R1()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 169 + // delete, line 169 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 170 + // <-, line 170 + if (!this.slice_from("a")) + { + return false; + } + break; + case 3: + // (, line 171 + // <-, line 171 + if (!this.slice_from("e")) + { + return false; + } + break; + case 4: + // (, line 172 + // delete, line 172 + if (!this.slice_del()) + { + return false; + } + break; + case 5: + // (, line 173 + // <-, line 173 + if (!this.slice_from("a")) + { + return false; + } + break; + case 6: + // (, line 174 + // <-, line 174 + if (!this.slice_from("e")) + { + return false; + } + break; + case 7: + // (, line 175 + // delete, line 175 + if (!this.slice_del()) + { + return false; + } + break; + case 8: + // (, line 176 + // delete, line 176 + if (!this.slice_del()) + { + return false; + } + break; + case 9: + // (, line 177 + // delete, line 177 + if (!this.slice_del()) + { + return false; + } + break; + case 10: + // (, line 178 + // <-, line 178 + if (!this.slice_from("a")) + { + return false; + } + break; + case 11: + // (, line 179 + // <-, line 179 + if (!this.slice_from("e")) + { + return false; + } + break; + case 12: + // (, line 180 + // delete, line 180 + if (!this.slice_del()) + { + return false; + } + break; + case 13: + // (, line 181 + // delete, line 181 + if (!this.slice_del()) + { + return false; + } + break; + case 14: + // (, line 182 + // <-, line 182 + if (!this.slice_from("a")) + { + return false; + } + break; + case 15: + // (, line 183 + // <-, line 183 + if (!this.slice_from("e")) + { + return false; + } + break; + case 16: + // (, line 184 + // delete, line 184 + if (!this.slice_del()) + { + return false; + } + break; + case 17: + // (, line 185 + // delete, line 185 + if (!this.slice_del()) + { + return false; + } + break; + case 18: + // (, line 186 + // delete, line 186 + if (!this.slice_del()) + { + return false; + } + break; + case 19: + // (, line 187 + // <-, line 187 + if (!this.slice_from("a")) + { + return false; + } + break; + case 20: + // (, line 188 + // <-, line 188 + if (!this.slice_from("e")) + { + return false; + } + break; + } + return true; + } + + function r_plur_owner () : boolean + { + var among_var : int; + // (, line 192 + // [, line 193 + this.ket = this.cursor; + // substring, line 193 + among_var = this.find_among_b(HungarianStemmer.a_11, 42); + if (among_var == 0) + { + return false; + } + // ], line 193 + this.bra = this.cursor; + // call R1, line 193 + if (!this.r_R1()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 194 + // delete, line 194 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 195 + // <-, line 195 + if (!this.slice_from("a")) + { + return false; + } + break; + case 3: + // (, line 196 + // <-, line 196 + if (!this.slice_from("e")) + { + return false; + } + break; + case 4: + // (, line 197 + // delete, line 197 + if (!this.slice_del()) + { + return false; + } + break; + case 5: + // (, line 198 + // delete, line 198 + if (!this.slice_del()) + { + return false; + } + break; + case 6: + // (, line 199 + // delete, line 199 + if (!this.slice_del()) + { + return false; + } + break; + case 7: + // (, line 200 + // <-, line 200 + if (!this.slice_from("a")) + { + return false; + } + break; + case 8: + // (, line 201 + // <-, line 201 + if (!this.slice_from("e")) + { + return false; + } + break; + case 9: + // (, line 202 + // delete, line 202 + if (!this.slice_del()) + { + return false; + } + break; + case 10: + // (, line 203 + // delete, line 203 + if (!this.slice_del()) + { + return false; + } + break; + case 11: + // (, line 204 + // delete, line 204 + if (!this.slice_del()) + { + return false; + } + break; + case 12: + // (, line 205 + // <-, line 205 + if (!this.slice_from("a")) + { + return false; + } + break; + case 13: + // (, line 206 + // <-, line 206 + if (!this.slice_from("e")) + { + return false; + } + break; + case 14: + // (, line 207 + // delete, line 207 + if (!this.slice_del()) + { + return false; + } + break; + case 15: + // (, line 208 + // delete, line 208 + if (!this.slice_del()) + { + return false; + } + break; + case 16: + // (, line 209 + // delete, line 209 + if (!this.slice_del()) + { + return false; + } + break; + case 17: + // (, line 210 + // delete, line 210 + if (!this.slice_del()) + { + return false; + } + break; + case 18: + // (, line 211 + // <-, line 211 + if (!this.slice_from("a")) + { + return false; + } + break; + case 19: + // (, line 212 + // <-, line 212 + if (!this.slice_from("e")) + { + return false; + } + break; + case 20: + // (, line 214 + // delete, line 214 + if (!this.slice_del()) + { + return false; + } + break; + case 21: + // (, line 215 + // delete, line 215 + if (!this.slice_del()) + { + return false; + } + break; + case 22: + // (, line 216 + // <-, line 216 + if (!this.slice_from("a")) + { + return false; + } + break; + case 23: + // (, line 217 + // <-, line 217 + if (!this.slice_from("e")) + { + return false; + } + break; + case 24: + // (, line 218 + // delete, line 218 + if (!this.slice_del()) + { + return false; + } + break; + case 25: + // (, line 219 + // delete, line 219 + if (!this.slice_del()) + { + return false; + } + break; + case 26: + // (, line 220 + // delete, line 220 + if (!this.slice_del()) + { + return false; + } + break; + case 27: + // (, line 221 + // <-, line 221 + if (!this.slice_from("a")) + { + return false; + } + break; + case 28: + // (, line 222 + // <-, line 222 + if (!this.slice_from("e")) + { + return false; + } + break; + case 29: + // (, line 223 + // delete, line 223 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + var v_10 : int; + // (, line 228 + // do, line 229 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // call mark_regions, line 229 + if (!this.r_mark_regions()) + { + break lab0; + } + } + this.cursor = v_1; + // backwards, line 230 + this.limit_backward = this.cursor; this.cursor = this.limit; + // (, line 230 + // do, line 231 + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call instrum, line 231 + if (!this.r_instrum()) + { + break lab1; + } + } + this.cursor = this.limit - v_2; + // do, line 232 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // call case, line 232 + if (!this.r_case()) + { + break lab2; + } + } + this.cursor = this.limit - v_3; + // do, line 233 + v_4 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // call case_special, line 233 + if (!this.r_case_special()) + { + break lab3; + } + } + this.cursor = this.limit - v_4; + // do, line 234 + v_5 = this.limit - this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // call case_other, line 234 + if (!this.r_case_other()) + { + break lab4; + } + } + this.cursor = this.limit - v_5; + // do, line 235 + v_6 = this.limit - this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // call factive, line 235 + if (!this.r_factive()) + { + break lab5; + } + } + this.cursor = this.limit - v_6; + // do, line 236 + v_7 = this.limit - this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // call owned, line 236 + if (!this.r_owned()) + { + break lab6; + } + } + this.cursor = this.limit - v_7; + // do, line 237 + v_8 = this.limit - this.cursor; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // call sing_owner, line 237 + if (!this.r_sing_owner()) + { + break lab7; + } + } + this.cursor = this.limit - v_8; + // do, line 238 + v_9 = this.limit - this.cursor; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + // call plur_owner, line 238 + if (!this.r_plur_owner()) + { + break lab8; + } + } + this.cursor = this.limit - v_9; + // do, line 239 + v_10 = this.limit - this.cursor; + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + // call plural, line 239 + if (!this.r_plural()) + { + break lab9; + } + } + this.cursor = this.limit - v_10; + this.cursor = this.limit_backward; return true; + } + + function equals (o : variant) : boolean { + return o instanceof HungarianStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "HungarianStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/italian-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/italian-stemmer.jsx new file mode 100644 index 00000000..2072deb9 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/italian-stemmer.jsx @@ -0,0 +1,1412 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class ItalianStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new ItalianStemmer(); + + static const a_0 = [ + new Among("", -1, 7), + new Among("qu", 0, 6), + new Among("\u00E1", 0, 1), + new Among("\u00E9", 0, 2), + new Among("\u00ED", 0, 3), + new Among("\u00F3", 0, 4), + new Among("\u00FA", 0, 5) + ]; + + static const a_1 = [ + new Among("", -1, 3), + new Among("I", 0, 1), + new Among("U", 0, 2) + ]; + + static const a_2 = [ + new Among("la", -1, -1), + new Among("cela", 0, -1), + new Among("gliela", 0, -1), + new Among("mela", 0, -1), + new Among("tela", 0, -1), + new Among("vela", 0, -1), + new Among("le", -1, -1), + new Among("cele", 6, -1), + new Among("gliele", 6, -1), + new Among("mele", 6, -1), + new Among("tele", 6, -1), + new Among("vele", 6, -1), + new Among("ne", -1, -1), + new Among("cene", 12, -1), + new Among("gliene", 12, -1), + new Among("mene", 12, -1), + new Among("sene", 12, -1), + new Among("tene", 12, -1), + new Among("vene", 12, -1), + new Among("ci", -1, -1), + new Among("li", -1, -1), + new Among("celi", 20, -1), + new Among("glieli", 20, -1), + new Among("meli", 20, -1), + new Among("teli", 20, -1), + new Among("veli", 20, -1), + new Among("gli", 20, -1), + new Among("mi", -1, -1), + new Among("si", -1, -1), + new Among("ti", -1, -1), + new Among("vi", -1, -1), + new Among("lo", -1, -1), + new Among("celo", 31, -1), + new Among("glielo", 31, -1), + new Among("melo", 31, -1), + new Among("telo", 31, -1), + new Among("velo", 31, -1) + ]; + + static const a_3 = [ + new Among("ando", -1, 1), + new Among("endo", -1, 1), + new Among("ar", -1, 2), + new Among("er", -1, 2), + new Among("ir", -1, 2) + ]; + + static const a_4 = [ + new Among("ic", -1, -1), + new Among("abil", -1, -1), + new Among("os", -1, -1), + new Among("iv", -1, 1) + ]; + + static const a_5 = [ + new Among("ic", -1, 1), + new Among("abil", -1, 1), + new Among("iv", -1, 1) + ]; + + static const a_6 = [ + new Among("ica", -1, 1), + new Among("logia", -1, 3), + new Among("osa", -1, 1), + new Among("ista", -1, 1), + new Among("iva", -1, 9), + new Among("anza", -1, 1), + new Among("enza", -1, 5), + new Among("ice", -1, 1), + new Among("atrice", 7, 1), + new Among("iche", -1, 1), + new Among("logie", -1, 3), + new Among("abile", -1, 1), + new Among("ibile", -1, 1), + new Among("usione", -1, 4), + new Among("azione", -1, 2), + new Among("uzione", -1, 4), + new Among("atore", -1, 2), + new Among("ose", -1, 1), + new Among("ante", -1, 1), + new Among("mente", -1, 1), + new Among("amente", 19, 7), + new Among("iste", -1, 1), + new Among("ive", -1, 9), + new Among("anze", -1, 1), + new Among("enze", -1, 5), + new Among("ici", -1, 1), + new Among("atrici", 25, 1), + new Among("ichi", -1, 1), + new Among("abili", -1, 1), + new Among("ibili", -1, 1), + new Among("ismi", -1, 1), + new Among("usioni", -1, 4), + new Among("azioni", -1, 2), + new Among("uzioni", -1, 4), + new Among("atori", -1, 2), + new Among("osi", -1, 1), + new Among("anti", -1, 1), + new Among("amenti", -1, 6), + new Among("imenti", -1, 6), + new Among("isti", -1, 1), + new Among("ivi", -1, 9), + new Among("ico", -1, 1), + new Among("ismo", -1, 1), + new Among("oso", -1, 1), + new Among("amento", -1, 6), + new Among("imento", -1, 6), + new Among("ivo", -1, 9), + new Among("it\u00E0", -1, 8), + new Among("ist\u00E0", -1, 1), + new Among("ist\u00E8", -1, 1), + new Among("ist\u00EC", -1, 1) + ]; + + static const a_7 = [ + new Among("isca", -1, 1), + new Among("enda", -1, 1), + new Among("ata", -1, 1), + new Among("ita", -1, 1), + new Among("uta", -1, 1), + new Among("ava", -1, 1), + new Among("eva", -1, 1), + new Among("iva", -1, 1), + new Among("erebbe", -1, 1), + new Among("irebbe", -1, 1), + new Among("isce", -1, 1), + new Among("ende", -1, 1), + new Among("are", -1, 1), + new Among("ere", -1, 1), + new Among("ire", -1, 1), + new Among("asse", -1, 1), + new Among("ate", -1, 1), + new Among("avate", 16, 1), + new Among("evate", 16, 1), + new Among("ivate", 16, 1), + new Among("ete", -1, 1), + new Among("erete", 20, 1), + new Among("irete", 20, 1), + new Among("ite", -1, 1), + new Among("ereste", -1, 1), + new Among("ireste", -1, 1), + new Among("ute", -1, 1), + new Among("erai", -1, 1), + new Among("irai", -1, 1), + new Among("isci", -1, 1), + new Among("endi", -1, 1), + new Among("erei", -1, 1), + new Among("irei", -1, 1), + new Among("assi", -1, 1), + new Among("ati", -1, 1), + new Among("iti", -1, 1), + new Among("eresti", -1, 1), + new Among("iresti", -1, 1), + new Among("uti", -1, 1), + new Among("avi", -1, 1), + new Among("evi", -1, 1), + new Among("ivi", -1, 1), + new Among("isco", -1, 1), + new Among("ando", -1, 1), + new Among("endo", -1, 1), + new Among("Yamo", -1, 1), + new Among("iamo", -1, 1), + new Among("avamo", -1, 1), + new Among("evamo", -1, 1), + new Among("ivamo", -1, 1), + new Among("eremo", -1, 1), + new Among("iremo", -1, 1), + new Among("assimo", -1, 1), + new Among("ammo", -1, 1), + new Among("emmo", -1, 1), + new Among("eremmo", 54, 1), + new Among("iremmo", 54, 1), + new Among("immo", -1, 1), + new Among("ano", -1, 1), + new Among("iscano", 58, 1), + new Among("avano", 58, 1), + new Among("evano", 58, 1), + new Among("ivano", 58, 1), + new Among("eranno", -1, 1), + new Among("iranno", -1, 1), + new Among("ono", -1, 1), + new Among("iscono", 65, 1), + new Among("arono", 65, 1), + new Among("erono", 65, 1), + new Among("irono", 65, 1), + new Among("erebbero", -1, 1), + new Among("irebbero", -1, 1), + new Among("assero", -1, 1), + new Among("essero", -1, 1), + new Among("issero", -1, 1), + new Among("ato", -1, 1), + new Among("ito", -1, 1), + new Among("uto", -1, 1), + new Among("avo", -1, 1), + new Among("evo", -1, 1), + new Among("ivo", -1, 1), + new Among("ar", -1, 1), + new Among("ir", -1, 1), + new Among("er\u00E0", -1, 1), + new Among("ir\u00E0", -1, 1), + new Among("er\u00F2", -1, 1), + new Among("ir\u00F2", -1, 1) + ]; + + static const g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2, 1] : int[]; + + static const g_AEIO = [17, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2] : int[]; + + static const g_CG = [17] : int[]; + + var I_p2 : int = 0; + var I_p1 : int = 0; + var I_pV : int = 0; + + function copy_from (other : ItalianStemmer) : void + { + this.I_p2 = other.I_p2; + this.I_p1 = other.I_p1; + this.I_pV = other.I_pV; + super.copy_from(other); + } + + function r_prelude () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + // (, line 34 + // test, line 35 + v_1 = this.cursor; + // repeat, line 35 + replab0: while(true) + { + v_2 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 35 + // [, line 36 + this.bra = this.cursor; + // substring, line 36 + among_var = this.find_among(ItalianStemmer.a_0, 7); + if (among_var == 0) + { + break lab1; + } + // ], line 36 + this.ket = this.cursor; + switch (among_var) { + case 0: + break lab1; + case 1: + // (, line 37 + // <-, line 37 + if (!this.slice_from("\u00E0")) + { + return false; + } + break; + case 2: + // (, line 38 + // <-, line 38 + if (!this.slice_from("\u00E8")) + { + return false; + } + break; + case 3: + // (, line 39 + // <-, line 39 + if (!this.slice_from("\u00EC")) + { + return false; + } + break; + case 4: + // (, line 40 + // <-, line 40 + if (!this.slice_from("\u00F2")) + { + return false; + } + break; + case 5: + // (, line 41 + // <-, line 41 + if (!this.slice_from("\u00F9")) + { + return false; + } + break; + case 6: + // (, line 42 + // <-, line 42 + if (!this.slice_from("qU")) + { + return false; + } + break; + case 7: + // (, line 43 + // next, line 43 + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + break; + } + continue replab0; + } + this.cursor = v_2; + break replab0; + } + this.cursor = v_1; + // repeat, line 46 + replab2: while(true) + { + v_3 = this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // goto, line 46 + golab4: while(true) + { + v_4 = this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // (, line 46 + if (!(this.in_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab5; + } + // [, line 47 + this.bra = this.cursor; + // or, line 47 + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + v_5 = this.cursor; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // (, line 47 + // literal, line 47 + if (!(this.eq_s(1, "u"))) + { + break lab7; + } + // ], line 47 + this.ket = this.cursor; + if (!(this.in_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab7; + } + // <-, line 47 + if (!this.slice_from("U")) + { + return false; + } + break lab6; + } + this.cursor = v_5; + // (, line 48 + // literal, line 48 + if (!(this.eq_s(1, "i"))) + { + break lab5; + } + // ], line 48 + this.ket = this.cursor; + if (!(this.in_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab5; + } + // <-, line 48 + if (!this.slice_from("I")) + { + return false; + } + } + this.cursor = v_4; + break golab4; + } + this.cursor = v_4; + if (this.cursor >= this.limit) + { + break lab3; + } + this.cursor++; + } + continue replab2; + } + this.cursor = v_3; + break replab2; + } + return true; + } + + function r_mark_regions () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_6 : int; + var v_8 : int; + // (, line 52 + this.I_pV = this.limit; + this.I_p1 = this.limit; + this.I_p2 = this.limit; + // do, line 58 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 58 + // or, line 60 + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + v_2 = this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 59 + if (!(this.in_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab2; + } + // or, line 59 + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + v_3 = this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 59 + if (!(this.out_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab4; + } + // gopast, line 59 + golab5: while(true) + { + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + if (!(this.in_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab6; + } + break golab5; + } + if (this.cursor >= this.limit) + { + break lab4; + } + this.cursor++; + } + break lab3; + } + this.cursor = v_3; + // (, line 59 + if (!(this.in_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab2; + } + // gopast, line 59 + golab7: while(true) + { + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + if (!(this.out_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab8; + } + break golab7; + } + if (this.cursor >= this.limit) + { + break lab2; + } + this.cursor++; + } + } + break lab1; + } + this.cursor = v_2; + // (, line 61 + if (!(this.out_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab0; + } + // or, line 61 + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + v_6 = this.cursor; + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + // (, line 61 + if (!(this.out_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab10; + } + // gopast, line 61 + golab11: while(true) + { + var lab12 = true; + lab12: while (lab12 == true) + { + lab12 = false; + if (!(this.in_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab12; + } + break golab11; + } + if (this.cursor >= this.limit) + { + break lab10; + } + this.cursor++; + } + break lab9; + } + this.cursor = v_6; + // (, line 61 + if (!(this.in_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab0; + } + // next, line 61 + if (this.cursor >= this.limit) + { + break lab0; + } + this.cursor++; + } + } + // setmark pV, line 62 + this.I_pV = this.cursor; + } + this.cursor = v_1; + // do, line 64 + v_8 = this.cursor; + var lab13 = true; + lab13: while (lab13 == true) + { + lab13 = false; + // (, line 64 + // gopast, line 65 + golab14: while(true) + { + var lab15 = true; + lab15: while (lab15 == true) + { + lab15 = false; + if (!(this.in_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab15; + } + break golab14; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // gopast, line 65 + golab16: while(true) + { + var lab17 = true; + lab17: while (lab17 == true) + { + lab17 = false; + if (!(this.out_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab17; + } + break golab16; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // setmark p1, line 65 + this.I_p1 = this.cursor; + // gopast, line 66 + golab18: while(true) + { + var lab19 = true; + lab19: while (lab19 == true) + { + lab19 = false; + if (!(this.in_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab19; + } + break golab18; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // gopast, line 66 + golab20: while(true) + { + var lab21 = true; + lab21: while (lab21 == true) + { + lab21 = false; + if (!(this.out_grouping(ItalianStemmer.g_v, 97, 249))) + { + break lab21; + } + break golab20; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // setmark p2, line 66 + this.I_p2 = this.cursor; + } + this.cursor = v_8; + return true; + } + + function r_postlude () : boolean + { + var among_var : int; + var v_1 : int; + // repeat, line 70 + replab0: while(true) + { + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 70 + // [, line 72 + this.bra = this.cursor; + // substring, line 72 + among_var = this.find_among(ItalianStemmer.a_1, 3); + if (among_var == 0) + { + break lab1; + } + // ], line 72 + this.ket = this.cursor; + switch (among_var) { + case 0: + break lab1; + case 1: + // (, line 73 + // <-, line 73 + if (!this.slice_from("i")) + { + return false; + } + break; + case 2: + // (, line 74 + // <-, line 74 + if (!this.slice_from("u")) + { + return false; + } + break; + case 3: + // (, line 75 + // next, line 75 + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + break; + } + continue replab0; + } + this.cursor = v_1; + break replab0; + } + return true; + } + + function r_RV () : boolean + { + if (!(this.I_pV <= this.cursor)) + { + return false; + } + return true; + } + + function r_R1 () : boolean + { + if (!(this.I_p1 <= this.cursor)) + { + return false; + } + return true; + } + + function r_R2 () : boolean + { + if (!(this.I_p2 <= this.cursor)) + { + return false; + } + return true; + } + + function r_attached_pronoun () : boolean + { + var among_var : int; + // (, line 86 + // [, line 87 + this.ket = this.cursor; + // substring, line 87 + if (this.find_among_b(ItalianStemmer.a_2, 37) == 0) + { + return false; + } + // ], line 87 + this.bra = this.cursor; + // among, line 97 + among_var = this.find_among_b(ItalianStemmer.a_3, 5); + if (among_var == 0) + { + return false; + } + // (, line 97 + // call RV, line 97 + if (!this.r_RV()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 98 + // delete, line 98 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 99 + // <-, line 99 + if (!this.slice_from("e")) + { + return false; + } + break; + } + return true; + } + + function r_standard_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + // (, line 103 + // [, line 104 + this.ket = this.cursor; + // substring, line 104 + among_var = this.find_among_b(ItalianStemmer.a_6, 51); + if (among_var == 0) + { + return false; + } + // ], line 104 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 111 + // call R2, line 111 + if (!this.r_R2()) + { + return false; + } + // delete, line 111 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 113 + // call R2, line 113 + if (!this.r_R2()) + { + return false; + } + // delete, line 113 + if (!this.slice_del()) + { + return false; + } + // try, line 114 + v_1 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 114 + // [, line 114 + this.ket = this.cursor; + // literal, line 114 + if (!(this.eq_s_b(2, "ic"))) + { + this.cursor = this.limit - v_1; + break lab0; + } + // ], line 114 + this.bra = this.cursor; + // call R2, line 114 + if (!this.r_R2()) + { + this.cursor = this.limit - v_1; + break lab0; + } + // delete, line 114 + if (!this.slice_del()) + { + return false; + } + } + break; + case 3: + // (, line 117 + // call R2, line 117 + if (!this.r_R2()) + { + return false; + } + // <-, line 117 + if (!this.slice_from("log")) + { + return false; + } + break; + case 4: + // (, line 119 + // call R2, line 119 + if (!this.r_R2()) + { + return false; + } + // <-, line 119 + if (!this.slice_from("u")) + { + return false; + } + break; + case 5: + // (, line 121 + // call R2, line 121 + if (!this.r_R2()) + { + return false; + } + // <-, line 121 + if (!this.slice_from("ente")) + { + return false; + } + break; + case 6: + // (, line 123 + // call RV, line 123 + if (!this.r_RV()) + { + return false; + } + // delete, line 123 + if (!this.slice_del()) + { + return false; + } + break; + case 7: + // (, line 124 + // call R1, line 125 + if (!this.r_R1()) + { + return false; + } + // delete, line 125 + if (!this.slice_del()) + { + return false; + } + // try, line 126 + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 126 + // [, line 127 + this.ket = this.cursor; + // substring, line 127 + among_var = this.find_among_b(ItalianStemmer.a_4, 4); + if (among_var == 0) + { + this.cursor = this.limit - v_2; + break lab1; + } + // ], line 127 + this.bra = this.cursor; + // call R2, line 127 + if (!this.r_R2()) + { + this.cursor = this.limit - v_2; + break lab1; + } + // delete, line 127 + if (!this.slice_del()) + { + return false; + } + switch (among_var) { + case 0: + this.cursor = this.limit - v_2; + break lab1; + case 1: + // (, line 128 + // [, line 128 + this.ket = this.cursor; + // literal, line 128 + if (!(this.eq_s_b(2, "at"))) + { + this.cursor = this.limit - v_2; + break lab1; + } + // ], line 128 + this.bra = this.cursor; + // call R2, line 128 + if (!this.r_R2()) + { + this.cursor = this.limit - v_2; + break lab1; + } + // delete, line 128 + if (!this.slice_del()) + { + return false; + } + break; + } + } + break; + case 8: + // (, line 133 + // call R2, line 134 + if (!this.r_R2()) + { + return false; + } + // delete, line 134 + if (!this.slice_del()) + { + return false; + } + // try, line 135 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 135 + // [, line 136 + this.ket = this.cursor; + // substring, line 136 + among_var = this.find_among_b(ItalianStemmer.a_5, 3); + if (among_var == 0) + { + this.cursor = this.limit - v_3; + break lab2; + } + // ], line 136 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.cursor = this.limit - v_3; + break lab2; + case 1: + // (, line 137 + // call R2, line 137 + if (!this.r_R2()) + { + this.cursor = this.limit - v_3; + break lab2; + } + // delete, line 137 + if (!this.slice_del()) + { + return false; + } + break; + } + } + break; + case 9: + // (, line 141 + // call R2, line 142 + if (!this.r_R2()) + { + return false; + } + // delete, line 142 + if (!this.slice_del()) + { + return false; + } + // try, line 143 + v_4 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // (, line 143 + // [, line 143 + this.ket = this.cursor; + // literal, line 143 + if (!(this.eq_s_b(2, "at"))) + { + this.cursor = this.limit - v_4; + break lab3; + } + // ], line 143 + this.bra = this.cursor; + // call R2, line 143 + if (!this.r_R2()) + { + this.cursor = this.limit - v_4; + break lab3; + } + // delete, line 143 + if (!this.slice_del()) + { + return false; + } + // [, line 143 + this.ket = this.cursor; + // literal, line 143 + if (!(this.eq_s_b(2, "ic"))) + { + this.cursor = this.limit - v_4; + break lab3; + } + // ], line 143 + this.bra = this.cursor; + // call R2, line 143 + if (!this.r_R2()) + { + this.cursor = this.limit - v_4; + break lab3; + } + // delete, line 143 + if (!this.slice_del()) + { + return false; + } + } + break; + } + return true; + } + + function r_verb_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + // setlimit, line 148 + v_1 = this.limit - this.cursor; + // tomark, line 148 + if (this.cursor < this.I_pV) + { + return false; + } + this.cursor = this.I_pV; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 148 + // [, line 149 + this.ket = this.cursor; + // substring, line 149 + among_var = this.find_among_b(ItalianStemmer.a_7, 87); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 149 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.limit_backward = v_2; + return false; + case 1: + // (, line 163 + // delete, line 163 + if (!this.slice_del()) + { + return false; + } + break; + } + this.limit_backward = v_2; + return true; + } + + function r_vowel_suffix () : boolean + { + var v_1 : int; + var v_2 : int; + // (, line 170 + // try, line 171 + v_1 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 171 + // [, line 172 + this.ket = this.cursor; + if (!(this.in_grouping_b(ItalianStemmer.g_AEIO, 97, 242))) + { + this.cursor = this.limit - v_1; + break lab0; + } + // ], line 172 + this.bra = this.cursor; + // call RV, line 172 + if (!this.r_RV()) + { + this.cursor = this.limit - v_1; + break lab0; + } + // delete, line 172 + if (!this.slice_del()) + { + return false; + } + // [, line 173 + this.ket = this.cursor; + // literal, line 173 + if (!(this.eq_s_b(1, "i"))) + { + this.cursor = this.limit - v_1; + break lab0; + } + // ], line 173 + this.bra = this.cursor; + // call RV, line 173 + if (!this.r_RV()) + { + this.cursor = this.limit - v_1; + break lab0; + } + // delete, line 173 + if (!this.slice_del()) + { + return false; + } + } + // try, line 175 + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 175 + // [, line 176 + this.ket = this.cursor; + // literal, line 176 + if (!(this.eq_s_b(1, "h"))) + { + this.cursor = this.limit - v_2; + break lab1; + } + // ], line 176 + this.bra = this.cursor; + if (!(this.in_grouping_b(ItalianStemmer.g_CG, 99, 103))) + { + this.cursor = this.limit - v_2; + break lab1; + } + // call RV, line 176 + if (!this.r_RV()) + { + this.cursor = this.limit - v_2; + break lab1; + } + // delete, line 176 + if (!this.slice_del()) + { + return false; + } + } + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + // (, line 181 + // do, line 182 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // call prelude, line 182 + if (!this.r_prelude()) + { + break lab0; + } + } + this.cursor = v_1; + // do, line 183 + v_2 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call mark_regions, line 183 + if (!this.r_mark_regions()) + { + break lab1; + } + } + this.cursor = v_2; + // backwards, line 184 + this.limit_backward = this.cursor; this.cursor = this.limit; + // (, line 184 + // do, line 185 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // call attached_pronoun, line 185 + if (!this.r_attached_pronoun()) + { + break lab2; + } + } + this.cursor = this.limit - v_3; + // do, line 186 + v_4 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // (, line 186 + // or, line 186 + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + v_5 = this.limit - this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // call standard_suffix, line 186 + if (!this.r_standard_suffix()) + { + break lab5; + } + break lab4; + } + this.cursor = this.limit - v_5; + // call verb_suffix, line 186 + if (!this.r_verb_suffix()) + { + break lab3; + } + } + } + this.cursor = this.limit - v_4; + // do, line 187 + v_6 = this.limit - this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // call vowel_suffix, line 187 + if (!this.r_vowel_suffix()) + { + break lab6; + } + } + this.cursor = this.limit - v_6; + this.cursor = this.limit_backward; // do, line 189 + v_7 = this.cursor; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // call postlude, line 189 + if (!this.r_postlude()) + { + break lab7; + } + } + this.cursor = v_7; + return true; + } + + function equals (o : variant) : boolean { + return o instanceof ItalianStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "ItalianStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/norwegian-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/norwegian-stemmer.jsx new file mode 100644 index 00000000..29640bf1 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/norwegian-stemmer.jsx @@ -0,0 +1,428 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class NorwegianStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new NorwegianStemmer(); + + static const a_0 = [ + new Among("a", -1, 1), + new Among("e", -1, 1), + new Among("ede", 1, 1), + new Among("ande", 1, 1), + new Among("ende", 1, 1), + new Among("ane", 1, 1), + new Among("ene", 1, 1), + new Among("hetene", 6, 1), + new Among("erte", 1, 3), + new Among("en", -1, 1), + new Among("heten", 9, 1), + new Among("ar", -1, 1), + new Among("er", -1, 1), + new Among("heter", 12, 1), + new Among("s", -1, 2), + new Among("as", 14, 1), + new Among("es", 14, 1), + new Among("edes", 16, 1), + new Among("endes", 16, 1), + new Among("enes", 16, 1), + new Among("hetenes", 19, 1), + new Among("ens", 14, 1), + new Among("hetens", 21, 1), + new Among("ers", 14, 1), + new Among("ets", 14, 1), + new Among("et", -1, 1), + new Among("het", 25, 1), + new Among("ert", -1, 3), + new Among("ast", -1, 1) + ]; + + static const a_1 = [ + new Among("dt", -1, -1), + new Among("vt", -1, -1) + ]; + + static const a_2 = [ + new Among("leg", -1, 1), + new Among("eleg", 0, 1), + new Among("ig", -1, 1), + new Among("eig", 2, 1), + new Among("lig", 2, 1), + new Among("elig", 4, 1), + new Among("els", -1, 1), + new Among("lov", -1, 1), + new Among("elov", 7, 1), + new Among("slov", 7, 1), + new Among("hetslov", 9, 1) + ]; + + static const g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128] : int[]; + + static const g_s_ending = [119, 125, 149, 1] : int[]; + + var I_x : int = 0; + var I_p1 : int = 0; + + function copy_from (other : NorwegianStemmer) : void + { + this.I_x = other.I_x; + this.I_p1 = other.I_p1; + super.copy_from(other); + } + + function r_mark_regions () : boolean + { + var v_1 : int; + var v_2 : int; + // (, line 26 + this.I_p1 = this.limit; + // test, line 30 + v_1 = this.cursor; + // (, line 30 + // hop, line 30 + { + var c : int = this.cursor + 3; + if (0 > c || c > this.limit) + { + return false; + } + this.cursor = c; + } + // setmark x, line 30 + this.I_x = this.cursor; + this.cursor = v_1; + // goto, line 31 + golab0: while(true) + { + v_2 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + if (!(this.in_grouping(NorwegianStemmer.g_v, 97, 248))) + { + break lab1; + } + this.cursor = v_2; + break golab0; + } + this.cursor = v_2; + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // gopast, line 31 + golab2: while(true) + { + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + if (!(this.out_grouping(NorwegianStemmer.g_v, 97, 248))) + { + break lab3; + } + break golab2; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // setmark p1, line 31 + this.I_p1 = this.cursor; + // try, line 32 + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 32 + if (!(this.I_p1 < this.I_x)) + { + break lab4; + } + this.I_p1 = this.I_x; + } + return true; + } + + function r_main_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + // (, line 37 + // setlimit, line 38 + v_1 = this.limit - this.cursor; + // tomark, line 38 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 38 + // [, line 38 + this.ket = this.cursor; + // substring, line 38 + among_var = this.find_among_b(NorwegianStemmer.a_0, 29); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 38 + this.bra = this.cursor; + this.limit_backward = v_2; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 44 + // delete, line 44 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 46 + // or, line 46 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_3 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + if (!(this.in_grouping_b(NorwegianStemmer.g_s_ending, 98, 122))) + { + break lab1; + } + break lab0; + } + this.cursor = this.limit - v_3; + // (, line 46 + // literal, line 46 + if (!(this.eq_s_b(1, "k"))) + { + return false; + } + if (!(this.out_grouping_b(NorwegianStemmer.g_v, 97, 248))) + { + return false; + } + } + // delete, line 46 + if (!this.slice_del()) + { + return false; + } + break; + case 3: + // (, line 48 + // <-, line 48 + if (!this.slice_from("er")) + { + return false; + } + break; + } + return true; + } + + function r_consonant_pair () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + // (, line 52 + // test, line 53 + v_1 = this.limit - this.cursor; + // (, line 53 + // setlimit, line 54 + v_2 = this.limit - this.cursor; + // tomark, line 54 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_3 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_2; + // (, line 54 + // [, line 54 + this.ket = this.cursor; + // substring, line 54 + if (this.find_among_b(NorwegianStemmer.a_1, 2) == 0) + { + this.limit_backward = v_3; + return false; + } + // ], line 54 + this.bra = this.cursor; + this.limit_backward = v_3; + this.cursor = this.limit - v_1; + // next, line 59 + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + // ], line 59 + this.bra = this.cursor; + // delete, line 59 + if (!this.slice_del()) + { + return false; + } + return true; + } + + function r_other_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + // (, line 62 + // setlimit, line 63 + v_1 = this.limit - this.cursor; + // tomark, line 63 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 63 + // [, line 63 + this.ket = this.cursor; + // substring, line 63 + among_var = this.find_among_b(NorwegianStemmer.a_2, 11); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 63 + this.bra = this.cursor; + this.limit_backward = v_2; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 67 + // delete, line 67 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + // (, line 72 + // do, line 74 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // call mark_regions, line 74 + if (!this.r_mark_regions()) + { + break lab0; + } + } + this.cursor = v_1; + // backwards, line 75 + this.limit_backward = this.cursor; this.cursor = this.limit; + // (, line 75 + // do, line 76 + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call main_suffix, line 76 + if (!this.r_main_suffix()) + { + break lab1; + } + } + this.cursor = this.limit - v_2; + // do, line 77 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // call consonant_pair, line 77 + if (!this.r_consonant_pair()) + { + break lab2; + } + } + this.cursor = this.limit - v_3; + // do, line 78 + v_4 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // call other_suffix, line 78 + if (!this.r_other_suffix()) + { + break lab3; + } + } + this.cursor = this.limit - v_4; + this.cursor = this.limit_backward; return true; + } + + function equals (o : variant) : boolean { + return o instanceof NorwegianStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "NorwegianStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/porter-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/porter-stemmer.jsx new file mode 100644 index 00000000..f8570501 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/porter-stemmer.jsx @@ -0,0 +1,1121 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class PorterStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new PorterStemmer(); + + static const a_0 = [ + new Among("s", -1, 3), + new Among("ies", 0, 2), + new Among("sses", 0, 1), + new Among("ss", 0, -1) + ]; + + static const a_1 = [ + new Among("", -1, 3), + new Among("bb", 0, 2), + new Among("dd", 0, 2), + new Among("ff", 0, 2), + new Among("gg", 0, 2), + new Among("bl", 0, 1), + new Among("mm", 0, 2), + new Among("nn", 0, 2), + new Among("pp", 0, 2), + new Among("rr", 0, 2), + new Among("at", 0, 1), + new Among("tt", 0, 2), + new Among("iz", 0, 1) + ]; + + static const a_2 = [ + new Among("ed", -1, 2), + new Among("eed", 0, 1), + new Among("ing", -1, 2) + ]; + + static const a_3 = [ + new Among("anci", -1, 3), + new Among("enci", -1, 2), + new Among("abli", -1, 4), + new Among("eli", -1, 6), + new Among("alli", -1, 9), + new Among("ousli", -1, 12), + new Among("entli", -1, 5), + new Among("aliti", -1, 10), + new Among("biliti", -1, 14), + new Among("iviti", -1, 13), + new Among("tional", -1, 1), + new Among("ational", 10, 8), + new Among("alism", -1, 10), + new Among("ation", -1, 8), + new Among("ization", 13, 7), + new Among("izer", -1, 7), + new Among("ator", -1, 8), + new Among("iveness", -1, 13), + new Among("fulness", -1, 11), + new Among("ousness", -1, 12) + ]; + + static const a_4 = [ + new Among("icate", -1, 2), + new Among("ative", -1, 3), + new Among("alize", -1, 1), + new Among("iciti", -1, 2), + new Among("ical", -1, 2), + new Among("ful", -1, 3), + new Among("ness", -1, 3) + ]; + + static const a_5 = [ + new Among("ic", -1, 1), + new Among("ance", -1, 1), + new Among("ence", -1, 1), + new Among("able", -1, 1), + new Among("ible", -1, 1), + new Among("ate", -1, 1), + new Among("ive", -1, 1), + new Among("ize", -1, 1), + new Among("iti", -1, 1), + new Among("al", -1, 1), + new Among("ism", -1, 1), + new Among("ion", -1, 2), + new Among("er", -1, 1), + new Among("ous", -1, 1), + new Among("ant", -1, 1), + new Among("ent", -1, 1), + new Among("ment", 15, 1), + new Among("ement", 16, 1), + new Among("ou", -1, 1) + ]; + + static const g_v = [17, 65, 16, 1] : int[]; + + static const g_v_WXY = [1, 17, 65, 208, 1] : int[]; + + var B_Y_found : boolean = false; + var I_p2 : int = 0; + var I_p1 : int = 0; + + function copy_from (other : PorterStemmer) : void + { + this.B_Y_found = other.B_Y_found; + this.I_p2 = other.I_p2; + this.I_p1 = other.I_p1; + super.copy_from(other); + } + + function r_shortv () : boolean + { + // (, line 19 + if (!(this.out_grouping_b(PorterStemmer.g_v_WXY, 89, 121))) + { + return false; + } + if (!(this.in_grouping_b(PorterStemmer.g_v, 97, 121))) + { + return false; + } + if (!(this.out_grouping_b(PorterStemmer.g_v, 97, 121))) + { + return false; + } + return true; + } + + function r_R1 () : boolean + { + if (!(this.I_p1 <= this.cursor)) + { + return false; + } + return true; + } + + function r_R2 () : boolean + { + if (!(this.I_p2 <= this.cursor)) + { + return false; + } + return true; + } + + function r_Step_1a () : boolean + { + var among_var : int; + // (, line 24 + // [, line 25 + this.ket = this.cursor; + // substring, line 25 + among_var = this.find_among_b(PorterStemmer.a_0, 4); + if (among_var == 0) + { + return false; + } + // ], line 25 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 26 + // <-, line 26 + if (!this.slice_from("ss")) + { + return false; + } + break; + case 2: + // (, line 27 + // <-, line 27 + if (!this.slice_from("i")) + { + return false; + } + break; + case 3: + // (, line 29 + // delete, line 29 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_Step_1b () : boolean + { + var among_var : int; + var v_1 : int; + var v_3 : int; + var v_4 : int; + // (, line 33 + // [, line 34 + this.ket = this.cursor; + // substring, line 34 + among_var = this.find_among_b(PorterStemmer.a_2, 3); + if (among_var == 0) + { + return false; + } + // ], line 34 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 35 + // call R1, line 35 + if (!this.r_R1()) + { + return false; + } + // <-, line 35 + if (!this.slice_from("ee")) + { + return false; + } + break; + case 2: + // (, line 37 + // test, line 38 + v_1 = this.limit - this.cursor; + // gopast, line 38 + golab0: while(true) + { + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + if (!(this.in_grouping_b(PorterStemmer.g_v, 97, 121))) + { + break lab1; + } + break golab0; + } + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + } + this.cursor = this.limit - v_1; + // delete, line 38 + if (!this.slice_del()) + { + return false; + } + // test, line 39 + v_3 = this.limit - this.cursor; + // substring, line 39 + among_var = this.find_among_b(PorterStemmer.a_1, 13); + if (among_var == 0) + { + return false; + } + this.cursor = this.limit - v_3; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 41 + // <+, line 41 + { + var c : int = this.cursor; + this.insert(this.cursor, this.cursor, "e"); + this.cursor = c; + } + break; + case 2: + // (, line 44 + // [, line 44 + this.ket = this.cursor; + // next, line 44 + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + // ], line 44 + this.bra = this.cursor; + // delete, line 44 + if (!this.slice_del()) + { + return false; + } + break; + case 3: + // (, line 45 + // atmark, line 45 + if (this.cursor != this.I_p1) + { + return false; + } + // test, line 45 + v_4 = this.limit - this.cursor; + // call shortv, line 45 + if (!this.r_shortv()) + { + return false; + } + this.cursor = this.limit - v_4; + // <+, line 45 + { + var c : int = this.cursor; + this.insert(this.cursor, this.cursor, "e"); + this.cursor = c; + } + break; + } + break; + } + return true; + } + + function r_Step_1c () : boolean + { + var v_1 : int; + // (, line 51 + // [, line 52 + this.ket = this.cursor; + // or, line 52 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // literal, line 52 + if (!(this.eq_s_b(1, "y"))) + { + break lab1; + } + break lab0; + } + this.cursor = this.limit - v_1; + // literal, line 52 + if (!(this.eq_s_b(1, "Y"))) + { + return false; + } + } + // ], line 52 + this.bra = this.cursor; + // gopast, line 53 + golab2: while(true) + { + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + if (!(this.in_grouping_b(PorterStemmer.g_v, 97, 121))) + { + break lab3; + } + break golab2; + } + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + } + // <-, line 54 + if (!this.slice_from("i")) + { + return false; + } + return true; + } + + function r_Step_2 () : boolean + { + var among_var : int; + // (, line 57 + // [, line 58 + this.ket = this.cursor; + // substring, line 58 + among_var = this.find_among_b(PorterStemmer.a_3, 20); + if (among_var == 0) + { + return false; + } + // ], line 58 + this.bra = this.cursor; + // call R1, line 58 + if (!this.r_R1()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 59 + // <-, line 59 + if (!this.slice_from("tion")) + { + return false; + } + break; + case 2: + // (, line 60 + // <-, line 60 + if (!this.slice_from("ence")) + { + return false; + } + break; + case 3: + // (, line 61 + // <-, line 61 + if (!this.slice_from("ance")) + { + return false; + } + break; + case 4: + // (, line 62 + // <-, line 62 + if (!this.slice_from("able")) + { + return false; + } + break; + case 5: + // (, line 63 + // <-, line 63 + if (!this.slice_from("ent")) + { + return false; + } + break; + case 6: + // (, line 64 + // <-, line 64 + if (!this.slice_from("e")) + { + return false; + } + break; + case 7: + // (, line 66 + // <-, line 66 + if (!this.slice_from("ize")) + { + return false; + } + break; + case 8: + // (, line 68 + // <-, line 68 + if (!this.slice_from("ate")) + { + return false; + } + break; + case 9: + // (, line 69 + // <-, line 69 + if (!this.slice_from("al")) + { + return false; + } + break; + case 10: + // (, line 71 + // <-, line 71 + if (!this.slice_from("al")) + { + return false; + } + break; + case 11: + // (, line 72 + // <-, line 72 + if (!this.slice_from("ful")) + { + return false; + } + break; + case 12: + // (, line 74 + // <-, line 74 + if (!this.slice_from("ous")) + { + return false; + } + break; + case 13: + // (, line 76 + // <-, line 76 + if (!this.slice_from("ive")) + { + return false; + } + break; + case 14: + // (, line 77 + // <-, line 77 + if (!this.slice_from("ble")) + { + return false; + } + break; + } + return true; + } + + function r_Step_3 () : boolean + { + var among_var : int; + // (, line 81 + // [, line 82 + this.ket = this.cursor; + // substring, line 82 + among_var = this.find_among_b(PorterStemmer.a_4, 7); + if (among_var == 0) + { + return false; + } + // ], line 82 + this.bra = this.cursor; + // call R1, line 82 + if (!this.r_R1()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 83 + // <-, line 83 + if (!this.slice_from("al")) + { + return false; + } + break; + case 2: + // (, line 85 + // <-, line 85 + if (!this.slice_from("ic")) + { + return false; + } + break; + case 3: + // (, line 87 + // delete, line 87 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_Step_4 () : boolean + { + var among_var : int; + var v_1 : int; + // (, line 91 + // [, line 92 + this.ket = this.cursor; + // substring, line 92 + among_var = this.find_among_b(PorterStemmer.a_5, 19); + if (among_var == 0) + { + return false; + } + // ], line 92 + this.bra = this.cursor; + // call R2, line 92 + if (!this.r_R2()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 95 + // delete, line 95 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 96 + // or, line 96 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // literal, line 96 + if (!(this.eq_s_b(1, "s"))) + { + break lab1; + } + break lab0; + } + this.cursor = this.limit - v_1; + // literal, line 96 + if (!(this.eq_s_b(1, "t"))) + { + return false; + } + } + // delete, line 96 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_Step_5a () : boolean + { + var v_1 : int; + var v_2 : int; + // (, line 100 + // [, line 101 + this.ket = this.cursor; + // literal, line 101 + if (!(this.eq_s_b(1, "e"))) + { + return false; + } + // ], line 101 + this.bra = this.cursor; + // or, line 102 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call R2, line 102 + if (!this.r_R2()) + { + break lab1; + } + break lab0; + } + this.cursor = this.limit - v_1; + // (, line 102 + // call R1, line 102 + if (!this.r_R1()) + { + return false; + } + // not, line 102 + { + v_2 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // call shortv, line 102 + if (!this.r_shortv()) + { + break lab2; + } + return false; + } + this.cursor = this.limit - v_2; + } + } + // delete, line 103 + if (!this.slice_del()) + { + return false; + } + return true; + } + + function r_Step_5b () : boolean + { + // (, line 106 + // [, line 107 + this.ket = this.cursor; + // literal, line 107 + if (!(this.eq_s_b(1, "l"))) + { + return false; + } + // ], line 107 + this.bra = this.cursor; + // call R2, line 108 + if (!this.r_R2()) + { + return false; + } + // literal, line 108 + if (!(this.eq_s_b(1, "l"))) + { + return false; + } + // delete, line 109 + if (!this.slice_del()) + { + return false; + } + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_10 : int; + var v_11 : int; + var v_12 : int; + var v_13 : int; + var v_14 : int; + var v_15 : int; + var v_16 : int; + var v_17 : int; + var v_18 : int; + var v_19 : int; + var v_20 : int; + // (, line 113 + // unset Y_found, line 115 + this.B_Y_found = false; + // do, line 116 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 116 + // [, line 116 + this.bra = this.cursor; + // literal, line 116 + if (!(this.eq_s(1, "y"))) + { + break lab0; + } + // ], line 116 + this.ket = this.cursor; + // <-, line 116 + if (!this.slice_from("Y")) + { + return false; + } + // set Y_found, line 116 + this.B_Y_found = true; + } + this.cursor = v_1; + // do, line 117 + v_2 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // repeat, line 117 + replab2: while(true) + { + v_3 = this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // (, line 117 + // goto, line 117 + golab4: while(true) + { + v_4 = this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // (, line 117 + if (!(this.in_grouping(PorterStemmer.g_v, 97, 121))) + { + break lab5; + } + // [, line 117 + this.bra = this.cursor; + // literal, line 117 + if (!(this.eq_s(1, "y"))) + { + break lab5; + } + // ], line 117 + this.ket = this.cursor; + this.cursor = v_4; + break golab4; + } + this.cursor = v_4; + if (this.cursor >= this.limit) + { + break lab3; + } + this.cursor++; + } + // <-, line 117 + if (!this.slice_from("Y")) + { + return false; + } + // set Y_found, line 117 + this.B_Y_found = true; + continue replab2; + } + this.cursor = v_3; + break replab2; + } + } + this.cursor = v_2; + this.I_p1 = this.limit; + this.I_p2 = this.limit; + // do, line 121 + v_5 = this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // (, line 121 + // gopast, line 122 + golab7: while(true) + { + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + if (!(this.in_grouping(PorterStemmer.g_v, 97, 121))) + { + break lab8; + } + break golab7; + } + if (this.cursor >= this.limit) + { + break lab6; + } + this.cursor++; + } + // gopast, line 122 + golab9: while(true) + { + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + if (!(this.out_grouping(PorterStemmer.g_v, 97, 121))) + { + break lab10; + } + break golab9; + } + if (this.cursor >= this.limit) + { + break lab6; + } + this.cursor++; + } + // setmark p1, line 122 + this.I_p1 = this.cursor; + // gopast, line 123 + golab11: while(true) + { + var lab12 = true; + lab12: while (lab12 == true) + { + lab12 = false; + if (!(this.in_grouping(PorterStemmer.g_v, 97, 121))) + { + break lab12; + } + break golab11; + } + if (this.cursor >= this.limit) + { + break lab6; + } + this.cursor++; + } + // gopast, line 123 + golab13: while(true) + { + var lab14 = true; + lab14: while (lab14 == true) + { + lab14 = false; + if (!(this.out_grouping(PorterStemmer.g_v, 97, 121))) + { + break lab14; + } + break golab13; + } + if (this.cursor >= this.limit) + { + break lab6; + } + this.cursor++; + } + // setmark p2, line 123 + this.I_p2 = this.cursor; + } + this.cursor = v_5; + // backwards, line 126 + this.limit_backward = this.cursor; this.cursor = this.limit; + // (, line 126 + // do, line 127 + v_10 = this.limit - this.cursor; + var lab15 = true; + lab15: while (lab15 == true) + { + lab15 = false; + // call Step_1a, line 127 + if (!this.r_Step_1a()) + { + break lab15; + } + } + this.cursor = this.limit - v_10; + // do, line 128 + v_11 = this.limit - this.cursor; + var lab16 = true; + lab16: while (lab16 == true) + { + lab16 = false; + // call Step_1b, line 128 + if (!this.r_Step_1b()) + { + break lab16; + } + } + this.cursor = this.limit - v_11; + // do, line 129 + v_12 = this.limit - this.cursor; + var lab17 = true; + lab17: while (lab17 == true) + { + lab17 = false; + // call Step_1c, line 129 + if (!this.r_Step_1c()) + { + break lab17; + } + } + this.cursor = this.limit - v_12; + // do, line 130 + v_13 = this.limit - this.cursor; + var lab18 = true; + lab18: while (lab18 == true) + { + lab18 = false; + // call Step_2, line 130 + if (!this.r_Step_2()) + { + break lab18; + } + } + this.cursor = this.limit - v_13; + // do, line 131 + v_14 = this.limit - this.cursor; + var lab19 = true; + lab19: while (lab19 == true) + { + lab19 = false; + // call Step_3, line 131 + if (!this.r_Step_3()) + { + break lab19; + } + } + this.cursor = this.limit - v_14; + // do, line 132 + v_15 = this.limit - this.cursor; + var lab20 = true; + lab20: while (lab20 == true) + { + lab20 = false; + // call Step_4, line 132 + if (!this.r_Step_4()) + { + break lab20; + } + } + this.cursor = this.limit - v_15; + // do, line 133 + v_16 = this.limit - this.cursor; + var lab21 = true; + lab21: while (lab21 == true) + { + lab21 = false; + // call Step_5a, line 133 + if (!this.r_Step_5a()) + { + break lab21; + } + } + this.cursor = this.limit - v_16; + // do, line 134 + v_17 = this.limit - this.cursor; + var lab22 = true; + lab22: while (lab22 == true) + { + lab22 = false; + // call Step_5b, line 134 + if (!this.r_Step_5b()) + { + break lab22; + } + } + this.cursor = this.limit - v_17; + this.cursor = this.limit_backward; // do, line 137 + v_18 = this.cursor; + var lab23 = true; + lab23: while (lab23 == true) + { + lab23 = false; + // (, line 137 + // Boolean test Y_found, line 137 + if (!(this.B_Y_found)) + { + break lab23; + } + // repeat, line 137 + replab24: while(true) + { + v_19 = this.cursor; + var lab25 = true; + lab25: while (lab25 == true) + { + lab25 = false; + // (, line 137 + // goto, line 137 + golab26: while(true) + { + v_20 = this.cursor; + var lab27 = true; + lab27: while (lab27 == true) + { + lab27 = false; + // (, line 137 + // [, line 137 + this.bra = this.cursor; + // literal, line 137 + if (!(this.eq_s(1, "Y"))) + { + break lab27; + } + // ], line 137 + this.ket = this.cursor; + this.cursor = v_20; + break golab26; + } + this.cursor = v_20; + if (this.cursor >= this.limit) + { + break lab25; + } + this.cursor++; + } + // <-, line 137 + if (!this.slice_from("y")) + { + return false; + } + continue replab24; + } + this.cursor = v_19; + break replab24; + } + } + this.cursor = v_18; + return true; + } + + function equals (o : variant) : boolean { + return o instanceof PorterStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "PorterStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/portuguese-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/portuguese-stemmer.jsx new file mode 100644 index 00000000..317837d9 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/portuguese-stemmer.jsx @@ -0,0 +1,1321 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class PortugueseStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new PortugueseStemmer(); + + static const a_0 = [ + new Among("", -1, 3), + new Among("\u00E3", 0, 1), + new Among("\u00F5", 0, 2) + ]; + + static const a_1 = [ + new Among("", -1, 3), + new Among("a~", 0, 1), + new Among("o~", 0, 2) + ]; + + static const a_2 = [ + new Among("ic", -1, -1), + new Among("ad", -1, -1), + new Among("os", -1, -1), + new Among("iv", -1, 1) + ]; + + static const a_3 = [ + new Among("ante", -1, 1), + new Among("avel", -1, 1), + new Among("\u00EDvel", -1, 1) + ]; + + static const a_4 = [ + new Among("ic", -1, 1), + new Among("abil", -1, 1), + new Among("iv", -1, 1) + ]; + + static const a_5 = [ + new Among("ica", -1, 1), + new Among("\u00E2ncia", -1, 1), + new Among("\u00EAncia", -1, 4), + new Among("ira", -1, 9), + new Among("adora", -1, 1), + new Among("osa", -1, 1), + new Among("ista", -1, 1), + new Among("iva", -1, 8), + new Among("eza", -1, 1), + new Among("log\u00EDa", -1, 2), + new Among("idade", -1, 7), + new Among("ante", -1, 1), + new Among("mente", -1, 6), + new Among("amente", 12, 5), + new Among("\u00E1vel", -1, 1), + new Among("\u00EDvel", -1, 1), + new Among("uci\u00F3n", -1, 3), + new Among("ico", -1, 1), + new Among("ismo", -1, 1), + new Among("oso", -1, 1), + new Among("amento", -1, 1), + new Among("imento", -1, 1), + new Among("ivo", -1, 8), + new Among("a\u00E7a~o", -1, 1), + new Among("ador", -1, 1), + new Among("icas", -1, 1), + new Among("\u00EAncias", -1, 4), + new Among("iras", -1, 9), + new Among("adoras", -1, 1), + new Among("osas", -1, 1), + new Among("istas", -1, 1), + new Among("ivas", -1, 8), + new Among("ezas", -1, 1), + new Among("log\u00EDas", -1, 2), + new Among("idades", -1, 7), + new Among("uciones", -1, 3), + new Among("adores", -1, 1), + new Among("antes", -1, 1), + new Among("a\u00E7o~es", -1, 1), + new Among("icos", -1, 1), + new Among("ismos", -1, 1), + new Among("osos", -1, 1), + new Among("amentos", -1, 1), + new Among("imentos", -1, 1), + new Among("ivos", -1, 8) + ]; + + static const a_6 = [ + new Among("ada", -1, 1), + new Among("ida", -1, 1), + new Among("ia", -1, 1), + new Among("aria", 2, 1), + new Among("eria", 2, 1), + new Among("iria", 2, 1), + new Among("ara", -1, 1), + new Among("era", -1, 1), + new Among("ira", -1, 1), + new Among("ava", -1, 1), + new Among("asse", -1, 1), + new Among("esse", -1, 1), + new Among("isse", -1, 1), + new Among("aste", -1, 1), + new Among("este", -1, 1), + new Among("iste", -1, 1), + new Among("ei", -1, 1), + new Among("arei", 16, 1), + new Among("erei", 16, 1), + new Among("irei", 16, 1), + new Among("am", -1, 1), + new Among("iam", 20, 1), + new Among("ariam", 21, 1), + new Among("eriam", 21, 1), + new Among("iriam", 21, 1), + new Among("aram", 20, 1), + new Among("eram", 20, 1), + new Among("iram", 20, 1), + new Among("avam", 20, 1), + new Among("em", -1, 1), + new Among("arem", 29, 1), + new Among("erem", 29, 1), + new Among("irem", 29, 1), + new Among("assem", 29, 1), + new Among("essem", 29, 1), + new Among("issem", 29, 1), + new Among("ado", -1, 1), + new Among("ido", -1, 1), + new Among("ando", -1, 1), + new Among("endo", -1, 1), + new Among("indo", -1, 1), + new Among("ara~o", -1, 1), + new Among("era~o", -1, 1), + new Among("ira~o", -1, 1), + new Among("ar", -1, 1), + new Among("er", -1, 1), + new Among("ir", -1, 1), + new Among("as", -1, 1), + new Among("adas", 47, 1), + new Among("idas", 47, 1), + new Among("ias", 47, 1), + new Among("arias", 50, 1), + new Among("erias", 50, 1), + new Among("irias", 50, 1), + new Among("aras", 47, 1), + new Among("eras", 47, 1), + new Among("iras", 47, 1), + new Among("avas", 47, 1), + new Among("es", -1, 1), + new Among("ardes", 58, 1), + new Among("erdes", 58, 1), + new Among("irdes", 58, 1), + new Among("ares", 58, 1), + new Among("eres", 58, 1), + new Among("ires", 58, 1), + new Among("asses", 58, 1), + new Among("esses", 58, 1), + new Among("isses", 58, 1), + new Among("astes", 58, 1), + new Among("estes", 58, 1), + new Among("istes", 58, 1), + new Among("is", -1, 1), + new Among("ais", 71, 1), + new Among("eis", 71, 1), + new Among("areis", 73, 1), + new Among("ereis", 73, 1), + new Among("ireis", 73, 1), + new Among("\u00E1reis", 73, 1), + new Among("\u00E9reis", 73, 1), + new Among("\u00EDreis", 73, 1), + new Among("\u00E1sseis", 73, 1), + new Among("\u00E9sseis", 73, 1), + new Among("\u00EDsseis", 73, 1), + new Among("\u00E1veis", 73, 1), + new Among("\u00EDeis", 73, 1), + new Among("ar\u00EDeis", 84, 1), + new Among("er\u00EDeis", 84, 1), + new Among("ir\u00EDeis", 84, 1), + new Among("ados", -1, 1), + new Among("idos", -1, 1), + new Among("amos", -1, 1), + new Among("\u00E1ramos", 90, 1), + new Among("\u00E9ramos", 90, 1), + new Among("\u00EDramos", 90, 1), + new Among("\u00E1vamos", 90, 1), + new Among("\u00EDamos", 90, 1), + new Among("ar\u00EDamos", 95, 1), + new Among("er\u00EDamos", 95, 1), + new Among("ir\u00EDamos", 95, 1), + new Among("emos", -1, 1), + new Among("aremos", 99, 1), + new Among("eremos", 99, 1), + new Among("iremos", 99, 1), + new Among("\u00E1ssemos", 99, 1), + new Among("\u00EAssemos", 99, 1), + new Among("\u00EDssemos", 99, 1), + new Among("imos", -1, 1), + new Among("armos", -1, 1), + new Among("ermos", -1, 1), + new Among("irmos", -1, 1), + new Among("\u00E1mos", -1, 1), + new Among("ar\u00E1s", -1, 1), + new Among("er\u00E1s", -1, 1), + new Among("ir\u00E1s", -1, 1), + new Among("eu", -1, 1), + new Among("iu", -1, 1), + new Among("ou", -1, 1), + new Among("ar\u00E1", -1, 1), + new Among("er\u00E1", -1, 1), + new Among("ir\u00E1", -1, 1) + ]; + + static const a_7 = [ + new Among("a", -1, 1), + new Among("i", -1, 1), + new Among("o", -1, 1), + new Among("os", -1, 1), + new Among("\u00E1", -1, 1), + new Among("\u00ED", -1, 1), + new Among("\u00F3", -1, 1) + ]; + + static const a_8 = [ + new Among("e", -1, 1), + new Among("\u00E7", -1, 2), + new Among("\u00E9", -1, 1), + new Among("\u00EA", -1, 1) + ]; + + static const g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 19, 12, 2] : int[]; + + var I_p2 : int = 0; + var I_p1 : int = 0; + var I_pV : int = 0; + + function copy_from (other : PortugueseStemmer) : void + { + this.I_p2 = other.I_p2; + this.I_p1 = other.I_p1; + this.I_pV = other.I_pV; + super.copy_from(other); + } + + function r_prelude () : boolean + { + var among_var : int; + var v_1 : int; + // repeat, line 36 + replab0: while(true) + { + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 36 + // [, line 37 + this.bra = this.cursor; + // substring, line 37 + among_var = this.find_among(PortugueseStemmer.a_0, 3); + if (among_var == 0) + { + break lab1; + } + // ], line 37 + this.ket = this.cursor; + switch (among_var) { + case 0: + break lab1; + case 1: + // (, line 38 + // <-, line 38 + if (!this.slice_from("a~")) + { + return false; + } + break; + case 2: + // (, line 39 + // <-, line 39 + if (!this.slice_from("o~")) + { + return false; + } + break; + case 3: + // (, line 40 + // next, line 40 + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + break; + } + continue replab0; + } + this.cursor = v_1; + break replab0; + } + return true; + } + + function r_mark_regions () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_6 : int; + var v_8 : int; + // (, line 44 + this.I_pV = this.limit; + this.I_p1 = this.limit; + this.I_p2 = this.limit; + // do, line 50 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 50 + // or, line 52 + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + v_2 = this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 51 + if (!(this.in_grouping(PortugueseStemmer.g_v, 97, 250))) + { + break lab2; + } + // or, line 51 + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + v_3 = this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 51 + if (!(this.out_grouping(PortugueseStemmer.g_v, 97, 250))) + { + break lab4; + } + // gopast, line 51 + golab5: while(true) + { + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + if (!(this.in_grouping(PortugueseStemmer.g_v, 97, 250))) + { + break lab6; + } + break golab5; + } + if (this.cursor >= this.limit) + { + break lab4; + } + this.cursor++; + } + break lab3; + } + this.cursor = v_3; + // (, line 51 + if (!(this.in_grouping(PortugueseStemmer.g_v, 97, 250))) + { + break lab2; + } + // gopast, line 51 + golab7: while(true) + { + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + if (!(this.out_grouping(PortugueseStemmer.g_v, 97, 250))) + { + break lab8; + } + break golab7; + } + if (this.cursor >= this.limit) + { + break lab2; + } + this.cursor++; + } + } + break lab1; + } + this.cursor = v_2; + // (, line 53 + if (!(this.out_grouping(PortugueseStemmer.g_v, 97, 250))) + { + break lab0; + } + // or, line 53 + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + v_6 = this.cursor; + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + // (, line 53 + if (!(this.out_grouping(PortugueseStemmer.g_v, 97, 250))) + { + break lab10; + } + // gopast, line 53 + golab11: while(true) + { + var lab12 = true; + lab12: while (lab12 == true) + { + lab12 = false; + if (!(this.in_grouping(PortugueseStemmer.g_v, 97, 250))) + { + break lab12; + } + break golab11; + } + if (this.cursor >= this.limit) + { + break lab10; + } + this.cursor++; + } + break lab9; + } + this.cursor = v_6; + // (, line 53 + if (!(this.in_grouping(PortugueseStemmer.g_v, 97, 250))) + { + break lab0; + } + // next, line 53 + if (this.cursor >= this.limit) + { + break lab0; + } + this.cursor++; + } + } + // setmark pV, line 54 + this.I_pV = this.cursor; + } + this.cursor = v_1; + // do, line 56 + v_8 = this.cursor; + var lab13 = true; + lab13: while (lab13 == true) + { + lab13 = false; + // (, line 56 + // gopast, line 57 + golab14: while(true) + { + var lab15 = true; + lab15: while (lab15 == true) + { + lab15 = false; + if (!(this.in_grouping(PortugueseStemmer.g_v, 97, 250))) + { + break lab15; + } + break golab14; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // gopast, line 57 + golab16: while(true) + { + var lab17 = true; + lab17: while (lab17 == true) + { + lab17 = false; + if (!(this.out_grouping(PortugueseStemmer.g_v, 97, 250))) + { + break lab17; + } + break golab16; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // setmark p1, line 57 + this.I_p1 = this.cursor; + // gopast, line 58 + golab18: while(true) + { + var lab19 = true; + lab19: while (lab19 == true) + { + lab19 = false; + if (!(this.in_grouping(PortugueseStemmer.g_v, 97, 250))) + { + break lab19; + } + break golab18; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // gopast, line 58 + golab20: while(true) + { + var lab21 = true; + lab21: while (lab21 == true) + { + lab21 = false; + if (!(this.out_grouping(PortugueseStemmer.g_v, 97, 250))) + { + break lab21; + } + break golab20; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // setmark p2, line 58 + this.I_p2 = this.cursor; + } + this.cursor = v_8; + return true; + } + + function r_postlude () : boolean + { + var among_var : int; + var v_1 : int; + // repeat, line 62 + replab0: while(true) + { + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 62 + // [, line 63 + this.bra = this.cursor; + // substring, line 63 + among_var = this.find_among(PortugueseStemmer.a_1, 3); + if (among_var == 0) + { + break lab1; + } + // ], line 63 + this.ket = this.cursor; + switch (among_var) { + case 0: + break lab1; + case 1: + // (, line 64 + // <-, line 64 + if (!this.slice_from("\u00E3")) + { + return false; + } + break; + case 2: + // (, line 65 + // <-, line 65 + if (!this.slice_from("\u00F5")) + { + return false; + } + break; + case 3: + // (, line 66 + // next, line 66 + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + break; + } + continue replab0; + } + this.cursor = v_1; + break replab0; + } + return true; + } + + function r_RV () : boolean + { + if (!(this.I_pV <= this.cursor)) + { + return false; + } + return true; + } + + function r_R1 () : boolean + { + if (!(this.I_p1 <= this.cursor)) + { + return false; + } + return true; + } + + function r_R2 () : boolean + { + if (!(this.I_p2 <= this.cursor)) + { + return false; + } + return true; + } + + function r_standard_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + // (, line 76 + // [, line 77 + this.ket = this.cursor; + // substring, line 77 + among_var = this.find_among_b(PortugueseStemmer.a_5, 45); + if (among_var == 0) + { + return false; + } + // ], line 77 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 92 + // call R2, line 93 + if (!this.r_R2()) + { + return false; + } + // delete, line 93 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 97 + // call R2, line 98 + if (!this.r_R2()) + { + return false; + } + // <-, line 98 + if (!this.slice_from("log")) + { + return false; + } + break; + case 3: + // (, line 101 + // call R2, line 102 + if (!this.r_R2()) + { + return false; + } + // <-, line 102 + if (!this.slice_from("u")) + { + return false; + } + break; + case 4: + // (, line 105 + // call R2, line 106 + if (!this.r_R2()) + { + return false; + } + // <-, line 106 + if (!this.slice_from("ente")) + { + return false; + } + break; + case 5: + // (, line 109 + // call R1, line 110 + if (!this.r_R1()) + { + return false; + } + // delete, line 110 + if (!this.slice_del()) + { + return false; + } + // try, line 111 + v_1 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 111 + // [, line 112 + this.ket = this.cursor; + // substring, line 112 + among_var = this.find_among_b(PortugueseStemmer.a_2, 4); + if (among_var == 0) + { + this.cursor = this.limit - v_1; + break lab0; + } + // ], line 112 + this.bra = this.cursor; + // call R2, line 112 + if (!this.r_R2()) + { + this.cursor = this.limit - v_1; + break lab0; + } + // delete, line 112 + if (!this.slice_del()) + { + return false; + } + switch (among_var) { + case 0: + this.cursor = this.limit - v_1; + break lab0; + case 1: + // (, line 113 + // [, line 113 + this.ket = this.cursor; + // literal, line 113 + if (!(this.eq_s_b(2, "at"))) + { + this.cursor = this.limit - v_1; + break lab0; + } + // ], line 113 + this.bra = this.cursor; + // call R2, line 113 + if (!this.r_R2()) + { + this.cursor = this.limit - v_1; + break lab0; + } + // delete, line 113 + if (!this.slice_del()) + { + return false; + } + break; + } + } + break; + case 6: + // (, line 121 + // call R2, line 122 + if (!this.r_R2()) + { + return false; + } + // delete, line 122 + if (!this.slice_del()) + { + return false; + } + // try, line 123 + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 123 + // [, line 124 + this.ket = this.cursor; + // substring, line 124 + among_var = this.find_among_b(PortugueseStemmer.a_3, 3); + if (among_var == 0) + { + this.cursor = this.limit - v_2; + break lab1; + } + // ], line 124 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.cursor = this.limit - v_2; + break lab1; + case 1: + // (, line 127 + // call R2, line 127 + if (!this.r_R2()) + { + this.cursor = this.limit - v_2; + break lab1; + } + // delete, line 127 + if (!this.slice_del()) + { + return false; + } + break; + } + } + break; + case 7: + // (, line 133 + // call R2, line 134 + if (!this.r_R2()) + { + return false; + } + // delete, line 134 + if (!this.slice_del()) + { + return false; + } + // try, line 135 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 135 + // [, line 136 + this.ket = this.cursor; + // substring, line 136 + among_var = this.find_among_b(PortugueseStemmer.a_4, 3); + if (among_var == 0) + { + this.cursor = this.limit - v_3; + break lab2; + } + // ], line 136 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.cursor = this.limit - v_3; + break lab2; + case 1: + // (, line 139 + // call R2, line 139 + if (!this.r_R2()) + { + this.cursor = this.limit - v_3; + break lab2; + } + // delete, line 139 + if (!this.slice_del()) + { + return false; + } + break; + } + } + break; + case 8: + // (, line 145 + // call R2, line 146 + if (!this.r_R2()) + { + return false; + } + // delete, line 146 + if (!this.slice_del()) + { + return false; + } + // try, line 147 + v_4 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // (, line 147 + // [, line 148 + this.ket = this.cursor; + // literal, line 148 + if (!(this.eq_s_b(2, "at"))) + { + this.cursor = this.limit - v_4; + break lab3; + } + // ], line 148 + this.bra = this.cursor; + // call R2, line 148 + if (!this.r_R2()) + { + this.cursor = this.limit - v_4; + break lab3; + } + // delete, line 148 + if (!this.slice_del()) + { + return false; + } + } + break; + case 9: + // (, line 152 + // call RV, line 153 + if (!this.r_RV()) + { + return false; + } + // literal, line 153 + if (!(this.eq_s_b(1, "e"))) + { + return false; + } + // <-, line 154 + if (!this.slice_from("ir")) + { + return false; + } + break; + } + return true; + } + + function r_verb_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + // setlimit, line 159 + v_1 = this.limit - this.cursor; + // tomark, line 159 + if (this.cursor < this.I_pV) + { + return false; + } + this.cursor = this.I_pV; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 159 + // [, line 160 + this.ket = this.cursor; + // substring, line 160 + among_var = this.find_among_b(PortugueseStemmer.a_6, 120); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 160 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.limit_backward = v_2; + return false; + case 1: + // (, line 179 + // delete, line 179 + if (!this.slice_del()) + { + return false; + } + break; + } + this.limit_backward = v_2; + return true; + } + + function r_residual_suffix () : boolean + { + var among_var : int; + // (, line 183 + // [, line 184 + this.ket = this.cursor; + // substring, line 184 + among_var = this.find_among_b(PortugueseStemmer.a_7, 7); + if (among_var == 0) + { + return false; + } + // ], line 184 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 187 + // call RV, line 187 + if (!this.r_RV()) + { + return false; + } + // delete, line 187 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_residual_form () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + // (, line 191 + // [, line 192 + this.ket = this.cursor; + // substring, line 192 + among_var = this.find_among_b(PortugueseStemmer.a_8, 4); + if (among_var == 0) + { + return false; + } + // ], line 192 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 194 + // call RV, line 194 + if (!this.r_RV()) + { + return false; + } + // delete, line 194 + if (!this.slice_del()) + { + return false; + } + // [, line 194 + this.ket = this.cursor; + // or, line 194 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 194 + // literal, line 194 + if (!(this.eq_s_b(1, "u"))) + { + break lab1; + } + // ], line 194 + this.bra = this.cursor; + // test, line 194 + v_2 = this.limit - this.cursor; + // literal, line 194 + if (!(this.eq_s_b(1, "g"))) + { + break lab1; + } + this.cursor = this.limit - v_2; + break lab0; + } + this.cursor = this.limit - v_1; + // (, line 195 + // literal, line 195 + if (!(this.eq_s_b(1, "i"))) + { + return false; + } + // ], line 195 + this.bra = this.cursor; + // test, line 195 + v_3 = this.limit - this.cursor; + // literal, line 195 + if (!(this.eq_s_b(1, "c"))) + { + return false; + } + this.cursor = this.limit - v_3; + } + // call RV, line 195 + if (!this.r_RV()) + { + return false; + } + // delete, line 195 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 196 + // <-, line 196 + if (!this.slice_from("c")) + { + return false; + } + break; + } + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + var v_10 : int; + // (, line 201 + // do, line 202 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // call prelude, line 202 + if (!this.r_prelude()) + { + break lab0; + } + } + this.cursor = v_1; + // do, line 203 + v_2 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call mark_regions, line 203 + if (!this.r_mark_regions()) + { + break lab1; + } + } + this.cursor = v_2; + // backwards, line 204 + this.limit_backward = this.cursor; this.cursor = this.limit; + // (, line 204 + // do, line 205 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 205 + // or, line 209 + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + v_4 = this.limit - this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 206 + // and, line 207 + v_5 = this.limit - this.cursor; + // (, line 206 + // or, line 206 + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + v_6 = this.limit - this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // call standard_suffix, line 206 + if (!this.r_standard_suffix()) + { + break lab6; + } + break lab5; + } + this.cursor = this.limit - v_6; + // call verb_suffix, line 206 + if (!this.r_verb_suffix()) + { + break lab4; + } + } + this.cursor = this.limit - v_5; + // do, line 207 + v_7 = this.limit - this.cursor; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // (, line 207 + // [, line 207 + this.ket = this.cursor; + // literal, line 207 + if (!(this.eq_s_b(1, "i"))) + { + break lab7; + } + // ], line 207 + this.bra = this.cursor; + // test, line 207 + v_8 = this.limit - this.cursor; + // literal, line 207 + if (!(this.eq_s_b(1, "c"))) + { + break lab7; + } + this.cursor = this.limit - v_8; + // call RV, line 207 + if (!this.r_RV()) + { + break lab7; + } + // delete, line 207 + if (!this.slice_del()) + { + return false; + } + } + this.cursor = this.limit - v_7; + break lab3; + } + this.cursor = this.limit - v_4; + // call residual_suffix, line 209 + if (!this.r_residual_suffix()) + { + break lab2; + } + } + } + this.cursor = this.limit - v_3; + // do, line 211 + v_9 = this.limit - this.cursor; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + // call residual_form, line 211 + if (!this.r_residual_form()) + { + break lab8; + } + } + this.cursor = this.limit - v_9; + this.cursor = this.limit_backward; // do, line 213 + v_10 = this.cursor; + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + // call postlude, line 213 + if (!this.r_postlude()) + { + break lab9; + } + } + this.cursor = v_10; + return true; + } + + function equals (o : variant) : boolean { + return o instanceof PortugueseStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "PortugueseStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/romanian-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/romanian-stemmer.jsx new file mode 100644 index 00000000..1eb9f3d8 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/romanian-stemmer.jsx @@ -0,0 +1,1227 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class RomanianStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new RomanianStemmer(); + + static const a_0 = [ + new Among("", -1, 3), + new Among("I", 0, 1), + new Among("U", 0, 2) + ]; + + static const a_1 = [ + new Among("ea", -1, 3), + new Among("a\u0163ia", -1, 7), + new Among("aua", -1, 2), + new Among("iua", -1, 4), + new Among("a\u0163ie", -1, 7), + new Among("ele", -1, 3), + new Among("ile", -1, 5), + new Among("iile", 6, 4), + new Among("iei", -1, 4), + new Among("atei", -1, 6), + new Among("ii", -1, 4), + new Among("ului", -1, 1), + new Among("ul", -1, 1), + new Among("elor", -1, 3), + new Among("ilor", -1, 4), + new Among("iilor", 14, 4) + ]; + + static const a_2 = [ + new Among("icala", -1, 4), + new Among("iciva", -1, 4), + new Among("ativa", -1, 5), + new Among("itiva", -1, 6), + new Among("icale", -1, 4), + new Among("a\u0163iune", -1, 5), + new Among("i\u0163iune", -1, 6), + new Among("atoare", -1, 5), + new Among("itoare", -1, 6), + new Among("\u0103toare", -1, 5), + new Among("icitate", -1, 4), + new Among("abilitate", -1, 1), + new Among("ibilitate", -1, 2), + new Among("ivitate", -1, 3), + new Among("icive", -1, 4), + new Among("ative", -1, 5), + new Among("itive", -1, 6), + new Among("icali", -1, 4), + new Among("atori", -1, 5), + new Among("icatori", 18, 4), + new Among("itori", -1, 6), + new Among("\u0103tori", -1, 5), + new Among("icitati", -1, 4), + new Among("abilitati", -1, 1), + new Among("ivitati", -1, 3), + new Among("icivi", -1, 4), + new Among("ativi", -1, 5), + new Among("itivi", -1, 6), + new Among("icit\u0103i", -1, 4), + new Among("abilit\u0103i", -1, 1), + new Among("ivit\u0103i", -1, 3), + new Among("icit\u0103\u0163i", -1, 4), + new Among("abilit\u0103\u0163i", -1, 1), + new Among("ivit\u0103\u0163i", -1, 3), + new Among("ical", -1, 4), + new Among("ator", -1, 5), + new Among("icator", 35, 4), + new Among("itor", -1, 6), + new Among("\u0103tor", -1, 5), + new Among("iciv", -1, 4), + new Among("ativ", -1, 5), + new Among("itiv", -1, 6), + new Among("ical\u0103", -1, 4), + new Among("iciv\u0103", -1, 4), + new Among("ativ\u0103", -1, 5), + new Among("itiv\u0103", -1, 6) + ]; + + static const a_3 = [ + new Among("ica", -1, 1), + new Among("abila", -1, 1), + new Among("ibila", -1, 1), + new Among("oasa", -1, 1), + new Among("ata", -1, 1), + new Among("ita", -1, 1), + new Among("anta", -1, 1), + new Among("ista", -1, 3), + new Among("uta", -1, 1), + new Among("iva", -1, 1), + new Among("ic", -1, 1), + new Among("ice", -1, 1), + new Among("abile", -1, 1), + new Among("ibile", -1, 1), + new Among("isme", -1, 3), + new Among("iune", -1, 2), + new Among("oase", -1, 1), + new Among("ate", -1, 1), + new Among("itate", 17, 1), + new Among("ite", -1, 1), + new Among("ante", -1, 1), + new Among("iste", -1, 3), + new Among("ute", -1, 1), + new Among("ive", -1, 1), + new Among("ici", -1, 1), + new Among("abili", -1, 1), + new Among("ibili", -1, 1), + new Among("iuni", -1, 2), + new Among("atori", -1, 1), + new Among("osi", -1, 1), + new Among("ati", -1, 1), + new Among("itati", 30, 1), + new Among("iti", -1, 1), + new Among("anti", -1, 1), + new Among("isti", -1, 3), + new Among("uti", -1, 1), + new Among("i\u015Fti", -1, 3), + new Among("ivi", -1, 1), + new Among("it\u0103i", -1, 1), + new Among("o\u015Fi", -1, 1), + new Among("it\u0103\u0163i", -1, 1), + new Among("abil", -1, 1), + new Among("ibil", -1, 1), + new Among("ism", -1, 3), + new Among("ator", -1, 1), + new Among("os", -1, 1), + new Among("at", -1, 1), + new Among("it", -1, 1), + new Among("ant", -1, 1), + new Among("ist", -1, 3), + new Among("ut", -1, 1), + new Among("iv", -1, 1), + new Among("ic\u0103", -1, 1), + new Among("abil\u0103", -1, 1), + new Among("ibil\u0103", -1, 1), + new Among("oas\u0103", -1, 1), + new Among("at\u0103", -1, 1), + new Among("it\u0103", -1, 1), + new Among("ant\u0103", -1, 1), + new Among("ist\u0103", -1, 3), + new Among("ut\u0103", -1, 1), + new Among("iv\u0103", -1, 1) + ]; + + static const a_4 = [ + new Among("ea", -1, 1), + new Among("ia", -1, 1), + new Among("esc", -1, 1), + new Among("\u0103sc", -1, 1), + new Among("ind", -1, 1), + new Among("\u00E2nd", -1, 1), + new Among("are", -1, 1), + new Among("ere", -1, 1), + new Among("ire", -1, 1), + new Among("\u00E2re", -1, 1), + new Among("se", -1, 2), + new Among("ase", 10, 1), + new Among("sese", 10, 2), + new Among("ise", 10, 1), + new Among("use", 10, 1), + new Among("\u00E2se", 10, 1), + new Among("e\u015Fte", -1, 1), + new Among("\u0103\u015Fte", -1, 1), + new Among("eze", -1, 1), + new Among("ai", -1, 1), + new Among("eai", 19, 1), + new Among("iai", 19, 1), + new Among("sei", -1, 2), + new Among("e\u015Fti", -1, 1), + new Among("\u0103\u015Fti", -1, 1), + new Among("ui", -1, 1), + new Among("ezi", -1, 1), + new Among("\u00E2i", -1, 1), + new Among("a\u015Fi", -1, 1), + new Among("se\u015Fi", -1, 2), + new Among("ase\u015Fi", 29, 1), + new Among("sese\u015Fi", 29, 2), + new Among("ise\u015Fi", 29, 1), + new Among("use\u015Fi", 29, 1), + new Among("\u00E2se\u015Fi", 29, 1), + new Among("i\u015Fi", -1, 1), + new Among("u\u015Fi", -1, 1), + new Among("\u00E2\u015Fi", -1, 1), + new Among("a\u0163i", -1, 2), + new Among("ea\u0163i", 38, 1), + new Among("ia\u0163i", 38, 1), + new Among("e\u0163i", -1, 2), + new Among("i\u0163i", -1, 2), + new Among("\u00E2\u0163i", -1, 2), + new Among("ar\u0103\u0163i", -1, 1), + new Among("ser\u0103\u0163i", -1, 2), + new Among("aser\u0103\u0163i", 45, 1), + new Among("seser\u0103\u0163i", 45, 2), + new Among("iser\u0103\u0163i", 45, 1), + new Among("user\u0103\u0163i", 45, 1), + new Among("\u00E2ser\u0103\u0163i", 45, 1), + new Among("ir\u0103\u0163i", -1, 1), + new Among("ur\u0103\u0163i", -1, 1), + new Among("\u00E2r\u0103\u0163i", -1, 1), + new Among("am", -1, 1), + new Among("eam", 54, 1), + new Among("iam", 54, 1), + new Among("em", -1, 2), + new Among("asem", 57, 1), + new Among("sesem", 57, 2), + new Among("isem", 57, 1), + new Among("usem", 57, 1), + new Among("\u00E2sem", 57, 1), + new Among("im", -1, 2), + new Among("\u00E2m", -1, 2), + new Among("\u0103m", -1, 2), + new Among("ar\u0103m", 65, 1), + new Among("ser\u0103m", 65, 2), + new Among("aser\u0103m", 67, 1), + new Among("seser\u0103m", 67, 2), + new Among("iser\u0103m", 67, 1), + new Among("user\u0103m", 67, 1), + new Among("\u00E2ser\u0103m", 67, 1), + new Among("ir\u0103m", 65, 1), + new Among("ur\u0103m", 65, 1), + new Among("\u00E2r\u0103m", 65, 1), + new Among("au", -1, 1), + new Among("eau", 76, 1), + new Among("iau", 76, 1), + new Among("indu", -1, 1), + new Among("\u00E2ndu", -1, 1), + new Among("ez", -1, 1), + new Among("easc\u0103", -1, 1), + new Among("ar\u0103", -1, 1), + new Among("ser\u0103", -1, 2), + new Among("aser\u0103", 84, 1), + new Among("seser\u0103", 84, 2), + new Among("iser\u0103", 84, 1), + new Among("user\u0103", 84, 1), + new Among("\u00E2ser\u0103", 84, 1), + new Among("ir\u0103", -1, 1), + new Among("ur\u0103", -1, 1), + new Among("\u00E2r\u0103", -1, 1), + new Among("eaz\u0103", -1, 1) + ]; + + static const a_5 = [ + new Among("a", -1, 1), + new Among("e", -1, 1), + new Among("ie", 1, 1), + new Among("i", -1, 1), + new Among("\u0103", -1, 1) + ]; + + static const g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 32, 0, 0, 4] : int[]; + + var B_standard_suffix_removed : boolean = false; + var I_p2 : int = 0; + var I_p1 : int = 0; + var I_pV : int = 0; + + function copy_from (other : RomanianStemmer) : void + { + this.B_standard_suffix_removed = other.B_standard_suffix_removed; + this.I_p2 = other.I_p2; + this.I_p1 = other.I_p1; + this.I_pV = other.I_pV; + super.copy_from(other); + } + + function r_prelude () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + // (, line 31 + // repeat, line 32 + replab0: while(true) + { + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // goto, line 32 + golab2: while(true) + { + v_2 = this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // (, line 32 + if (!(this.in_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab3; + } + // [, line 33 + this.bra = this.cursor; + // or, line 33 + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + v_3 = this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // (, line 33 + // literal, line 33 + if (!(this.eq_s(1, "u"))) + { + break lab5; + } + // ], line 33 + this.ket = this.cursor; + if (!(this.in_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab5; + } + // <-, line 33 + if (!this.slice_from("U")) + { + return false; + } + break lab4; + } + this.cursor = v_3; + // (, line 34 + // literal, line 34 + if (!(this.eq_s(1, "i"))) + { + break lab3; + } + // ], line 34 + this.ket = this.cursor; + if (!(this.in_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab3; + } + // <-, line 34 + if (!this.slice_from("I")) + { + return false; + } + } + this.cursor = v_2; + break golab2; + } + this.cursor = v_2; + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + } + continue replab0; + } + this.cursor = v_1; + break replab0; + } + return true; + } + + function r_mark_regions () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_6 : int; + var v_8 : int; + // (, line 38 + this.I_pV = this.limit; + this.I_p1 = this.limit; + this.I_p2 = this.limit; + // do, line 44 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 44 + // or, line 46 + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + v_2 = this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 45 + if (!(this.in_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab2; + } + // or, line 45 + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + v_3 = this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 45 + if (!(this.out_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab4; + } + // gopast, line 45 + golab5: while(true) + { + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + if (!(this.in_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab6; + } + break golab5; + } + if (this.cursor >= this.limit) + { + break lab4; + } + this.cursor++; + } + break lab3; + } + this.cursor = v_3; + // (, line 45 + if (!(this.in_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab2; + } + // gopast, line 45 + golab7: while(true) + { + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + if (!(this.out_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab8; + } + break golab7; + } + if (this.cursor >= this.limit) + { + break lab2; + } + this.cursor++; + } + } + break lab1; + } + this.cursor = v_2; + // (, line 47 + if (!(this.out_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab0; + } + // or, line 47 + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + v_6 = this.cursor; + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + // (, line 47 + if (!(this.out_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab10; + } + // gopast, line 47 + golab11: while(true) + { + var lab12 = true; + lab12: while (lab12 == true) + { + lab12 = false; + if (!(this.in_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab12; + } + break golab11; + } + if (this.cursor >= this.limit) + { + break lab10; + } + this.cursor++; + } + break lab9; + } + this.cursor = v_6; + // (, line 47 + if (!(this.in_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab0; + } + // next, line 47 + if (this.cursor >= this.limit) + { + break lab0; + } + this.cursor++; + } + } + // setmark pV, line 48 + this.I_pV = this.cursor; + } + this.cursor = v_1; + // do, line 50 + v_8 = this.cursor; + var lab13 = true; + lab13: while (lab13 == true) + { + lab13 = false; + // (, line 50 + // gopast, line 51 + golab14: while(true) + { + var lab15 = true; + lab15: while (lab15 == true) + { + lab15 = false; + if (!(this.in_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab15; + } + break golab14; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // gopast, line 51 + golab16: while(true) + { + var lab17 = true; + lab17: while (lab17 == true) + { + lab17 = false; + if (!(this.out_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab17; + } + break golab16; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // setmark p1, line 51 + this.I_p1 = this.cursor; + // gopast, line 52 + golab18: while(true) + { + var lab19 = true; + lab19: while (lab19 == true) + { + lab19 = false; + if (!(this.in_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab19; + } + break golab18; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // gopast, line 52 + golab20: while(true) + { + var lab21 = true; + lab21: while (lab21 == true) + { + lab21 = false; + if (!(this.out_grouping(RomanianStemmer.g_v, 97, 259))) + { + break lab21; + } + break golab20; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // setmark p2, line 52 + this.I_p2 = this.cursor; + } + this.cursor = v_8; + return true; + } + + function r_postlude () : boolean + { + var among_var : int; + var v_1 : int; + // repeat, line 56 + replab0: while(true) + { + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 56 + // [, line 58 + this.bra = this.cursor; + // substring, line 58 + among_var = this.find_among(RomanianStemmer.a_0, 3); + if (among_var == 0) + { + break lab1; + } + // ], line 58 + this.ket = this.cursor; + switch (among_var) { + case 0: + break lab1; + case 1: + // (, line 59 + // <-, line 59 + if (!this.slice_from("i")) + { + return false; + } + break; + case 2: + // (, line 60 + // <-, line 60 + if (!this.slice_from("u")) + { + return false; + } + break; + case 3: + // (, line 61 + // next, line 61 + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + break; + } + continue replab0; + } + this.cursor = v_1; + break replab0; + } + return true; + } + + function r_RV () : boolean + { + if (!(this.I_pV <= this.cursor)) + { + return false; + } + return true; + } + + function r_R1 () : boolean + { + if (!(this.I_p1 <= this.cursor)) + { + return false; + } + return true; + } + + function r_R2 () : boolean + { + if (!(this.I_p2 <= this.cursor)) + { + return false; + } + return true; + } + + function r_step_0 () : boolean + { + var among_var : int; + var v_1 : int; + // (, line 72 + // [, line 73 + this.ket = this.cursor; + // substring, line 73 + among_var = this.find_among_b(RomanianStemmer.a_1, 16); + if (among_var == 0) + { + return false; + } + // ], line 73 + this.bra = this.cursor; + // call R1, line 73 + if (!this.r_R1()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 75 + // delete, line 75 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 77 + // <-, line 77 + if (!this.slice_from("a")) + { + return false; + } + break; + case 3: + // (, line 79 + // <-, line 79 + if (!this.slice_from("e")) + { + return false; + } + break; + case 4: + // (, line 81 + // <-, line 81 + if (!this.slice_from("i")) + { + return false; + } + break; + case 5: + // (, line 83 + // not, line 83 + { + v_1 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // literal, line 83 + if (!(this.eq_s_b(2, "ab"))) + { + break lab0; + } + return false; + } + this.cursor = this.limit - v_1; + } + // <-, line 83 + if (!this.slice_from("i")) + { + return false; + } + break; + case 6: + // (, line 85 + // <-, line 85 + if (!this.slice_from("at")) + { + return false; + } + break; + case 7: + // (, line 87 + // <-, line 87 + if (!this.slice_from("a\u0163i")) + { + return false; + } + break; + } + return true; + } + + function r_combo_suffix () : boolean + { + var among_var : int; + var v_1 : int; + // test, line 91 + v_1 = this.limit - this.cursor; + // (, line 91 + // [, line 92 + this.ket = this.cursor; + // substring, line 92 + among_var = this.find_among_b(RomanianStemmer.a_2, 46); + if (among_var == 0) + { + return false; + } + // ], line 92 + this.bra = this.cursor; + // call R1, line 92 + if (!this.r_R1()) + { + return false; + } + // (, line 92 + switch (among_var) { + case 0: + return false; + case 1: + // (, line 100 + // <-, line 101 + if (!this.slice_from("abil")) + { + return false; + } + break; + case 2: + // (, line 103 + // <-, line 104 + if (!this.slice_from("ibil")) + { + return false; + } + break; + case 3: + // (, line 106 + // <-, line 107 + if (!this.slice_from("iv")) + { + return false; + } + break; + case 4: + // (, line 112 + // <-, line 113 + if (!this.slice_from("ic")) + { + return false; + } + break; + case 5: + // (, line 117 + // <-, line 118 + if (!this.slice_from("at")) + { + return false; + } + break; + case 6: + // (, line 121 + // <-, line 122 + if (!this.slice_from("it")) + { + return false; + } + break; + } + // set standard_suffix_removed, line 125 + this.B_standard_suffix_removed = true; + this.cursor = this.limit - v_1; + return true; + } + + function r_standard_suffix () : boolean + { + var among_var : int; + var v_1 : int; + // (, line 129 + // unset standard_suffix_removed, line 130 + this.B_standard_suffix_removed = false; + // repeat, line 131 + replab0: while(true) + { + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call combo_suffix, line 131 + if (!this.r_combo_suffix()) + { + break lab1; + } + continue replab0; + } + this.cursor = this.limit - v_1; + break replab0; + } + // [, line 132 + this.ket = this.cursor; + // substring, line 132 + among_var = this.find_among_b(RomanianStemmer.a_3, 62); + if (among_var == 0) + { + return false; + } + // ], line 132 + this.bra = this.cursor; + // call R2, line 132 + if (!this.r_R2()) + { + return false; + } + // (, line 132 + switch (among_var) { + case 0: + return false; + case 1: + // (, line 148 + // delete, line 149 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 151 + // literal, line 152 + if (!(this.eq_s_b(1, "\u0163"))) + { + return false; + } + // ], line 152 + this.bra = this.cursor; + // <-, line 152 + if (!this.slice_from("t")) + { + return false; + } + break; + case 3: + // (, line 155 + // <-, line 156 + if (!this.slice_from("ist")) + { + return false; + } + break; + } + // set standard_suffix_removed, line 160 + this.B_standard_suffix_removed = true; + return true; + } + + function r_verb_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + // setlimit, line 164 + v_1 = this.limit - this.cursor; + // tomark, line 164 + if (this.cursor < this.I_pV) + { + return false; + } + this.cursor = this.I_pV; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 164 + // [, line 165 + this.ket = this.cursor; + // substring, line 165 + among_var = this.find_among_b(RomanianStemmer.a_4, 94); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 165 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.limit_backward = v_2; + return false; + case 1: + // (, line 200 + // or, line 200 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_3 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + if (!(this.out_grouping_b(RomanianStemmer.g_v, 97, 259))) + { + break lab1; + } + break lab0; + } + this.cursor = this.limit - v_3; + // literal, line 200 + if (!(this.eq_s_b(1, "u"))) + { + this.limit_backward = v_2; + return false; + } + } + // delete, line 200 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 214 + // delete, line 214 + if (!this.slice_del()) + { + return false; + } + break; + } + this.limit_backward = v_2; + return true; + } + + function r_vowel_suffix () : boolean + { + var among_var : int; + // (, line 218 + // [, line 219 + this.ket = this.cursor; + // substring, line 219 + among_var = this.find_among_b(RomanianStemmer.a_5, 5); + if (among_var == 0) + { + return false; + } + // ], line 219 + this.bra = this.cursor; + // call RV, line 219 + if (!this.r_RV()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 220 + // delete, line 220 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + // (, line 225 + // do, line 226 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // call prelude, line 226 + if (!this.r_prelude()) + { + break lab0; + } + } + this.cursor = v_1; + // do, line 227 + v_2 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call mark_regions, line 227 + if (!this.r_mark_regions()) + { + break lab1; + } + } + this.cursor = v_2; + // backwards, line 228 + this.limit_backward = this.cursor; this.cursor = this.limit; + // (, line 228 + // do, line 229 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // call step_0, line 229 + if (!this.r_step_0()) + { + break lab2; + } + } + this.cursor = this.limit - v_3; + // do, line 230 + v_4 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // call standard_suffix, line 230 + if (!this.r_standard_suffix()) + { + break lab3; + } + } + this.cursor = this.limit - v_4; + // do, line 231 + v_5 = this.limit - this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 231 + // or, line 231 + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + v_6 = this.limit - this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // Boolean test standard_suffix_removed, line 231 + if (!(this.B_standard_suffix_removed)) + { + break lab6; + } + break lab5; + } + this.cursor = this.limit - v_6; + // call verb_suffix, line 231 + if (!this.r_verb_suffix()) + { + break lab4; + } + } + } + this.cursor = this.limit - v_5; + // do, line 232 + v_7 = this.limit - this.cursor; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // call vowel_suffix, line 232 + if (!this.r_vowel_suffix()) + { + break lab7; + } + } + this.cursor = this.limit - v_7; + this.cursor = this.limit_backward; // do, line 234 + v_8 = this.cursor; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + // call postlude, line 234 + if (!this.r_postlude()) + { + break lab8; + } + } + this.cursor = v_8; + return true; + } + + function equals (o : variant) : boolean { + return o instanceof RomanianStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "RomanianStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/russian-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/russian-stemmer.jsx new file mode 100644 index 00000000..2b7d30a9 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/russian-stemmer.jsx @@ -0,0 +1,875 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class RussianStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new RussianStemmer(); + + static const a_0 = [ + new Among("\u0432", -1, 1), + new Among("\u0438\u0432", 0, 2), + new Among("\u044B\u0432", 0, 2), + new Among("\u0432\u0448\u0438", -1, 1), + new Among("\u0438\u0432\u0448\u0438", 3, 2), + new Among("\u044B\u0432\u0448\u0438", 3, 2), + new Among("\u0432\u0448\u0438\u0441\u044C", -1, 1), + new Among("\u0438\u0432\u0448\u0438\u0441\u044C", 6, 2), + new Among("\u044B\u0432\u0448\u0438\u0441\u044C", 6, 2) + ]; + + static const a_1 = [ + new Among("\u0435\u0435", -1, 1), + new Among("\u0438\u0435", -1, 1), + new Among("\u043E\u0435", -1, 1), + new Among("\u044B\u0435", -1, 1), + new Among("\u0438\u043C\u0438", -1, 1), + new Among("\u044B\u043C\u0438", -1, 1), + new Among("\u0435\u0439", -1, 1), + new Among("\u0438\u0439", -1, 1), + new Among("\u043E\u0439", -1, 1), + new Among("\u044B\u0439", -1, 1), + new Among("\u0435\u043C", -1, 1), + new Among("\u0438\u043C", -1, 1), + new Among("\u043E\u043C", -1, 1), + new Among("\u044B\u043C", -1, 1), + new Among("\u0435\u0433\u043E", -1, 1), + new Among("\u043E\u0433\u043E", -1, 1), + new Among("\u0435\u043C\u0443", -1, 1), + new Among("\u043E\u043C\u0443", -1, 1), + new Among("\u0438\u0445", -1, 1), + new Among("\u044B\u0445", -1, 1), + new Among("\u0435\u044E", -1, 1), + new Among("\u043E\u044E", -1, 1), + new Among("\u0443\u044E", -1, 1), + new Among("\u044E\u044E", -1, 1), + new Among("\u0430\u044F", -1, 1), + new Among("\u044F\u044F", -1, 1) + ]; + + static const a_2 = [ + new Among("\u0435\u043C", -1, 1), + new Among("\u043D\u043D", -1, 1), + new Among("\u0432\u0448", -1, 1), + new Among("\u0438\u0432\u0448", 2, 2), + new Among("\u044B\u0432\u0448", 2, 2), + new Among("\u0449", -1, 1), + new Among("\u044E\u0449", 5, 1), + new Among("\u0443\u044E\u0449", 6, 2) + ]; + + static const a_3 = [ + new Among("\u0441\u044C", -1, 1), + new Among("\u0441\u044F", -1, 1) + ]; + + static const a_4 = [ + new Among("\u043B\u0430", -1, 1), + new Among("\u0438\u043B\u0430", 0, 2), + new Among("\u044B\u043B\u0430", 0, 2), + new Among("\u043D\u0430", -1, 1), + new Among("\u0435\u043D\u0430", 3, 2), + new Among("\u0435\u0442\u0435", -1, 1), + new Among("\u0438\u0442\u0435", -1, 2), + new Among("\u0439\u0442\u0435", -1, 1), + new Among("\u0435\u0439\u0442\u0435", 7, 2), + new Among("\u0443\u0439\u0442\u0435", 7, 2), + new Among("\u043B\u0438", -1, 1), + new Among("\u0438\u043B\u0438", 10, 2), + new Among("\u044B\u043B\u0438", 10, 2), + new Among("\u0439", -1, 1), + new Among("\u0435\u0439", 13, 2), + new Among("\u0443\u0439", 13, 2), + new Among("\u043B", -1, 1), + new Among("\u0438\u043B", 16, 2), + new Among("\u044B\u043B", 16, 2), + new Among("\u0435\u043C", -1, 1), + new Among("\u0438\u043C", -1, 2), + new Among("\u044B\u043C", -1, 2), + new Among("\u043D", -1, 1), + new Among("\u0435\u043D", 22, 2), + new Among("\u043B\u043E", -1, 1), + new Among("\u0438\u043B\u043E", 24, 2), + new Among("\u044B\u043B\u043E", 24, 2), + new Among("\u043D\u043E", -1, 1), + new Among("\u0435\u043D\u043E", 27, 2), + new Among("\u043D\u043D\u043E", 27, 1), + new Among("\u0435\u0442", -1, 1), + new Among("\u0443\u0435\u0442", 30, 2), + new Among("\u0438\u0442", -1, 2), + new Among("\u044B\u0442", -1, 2), + new Among("\u044E\u0442", -1, 1), + new Among("\u0443\u044E\u0442", 34, 2), + new Among("\u044F\u0442", -1, 2), + new Among("\u043D\u044B", -1, 1), + new Among("\u0435\u043D\u044B", 37, 2), + new Among("\u0442\u044C", -1, 1), + new Among("\u0438\u0442\u044C", 39, 2), + new Among("\u044B\u0442\u044C", 39, 2), + new Among("\u0435\u0448\u044C", -1, 1), + new Among("\u0438\u0448\u044C", -1, 2), + new Among("\u044E", -1, 2), + new Among("\u0443\u044E", 44, 2) + ]; + + static const a_5 = [ + new Among("\u0430", -1, 1), + new Among("\u0435\u0432", -1, 1), + new Among("\u043E\u0432", -1, 1), + new Among("\u0435", -1, 1), + new Among("\u0438\u0435", 3, 1), + new Among("\u044C\u0435", 3, 1), + new Among("\u0438", -1, 1), + new Among("\u0435\u0438", 6, 1), + new Among("\u0438\u0438", 6, 1), + new Among("\u0430\u043C\u0438", 6, 1), + new Among("\u044F\u043C\u0438", 6, 1), + new Among("\u0438\u044F\u043C\u0438", 10, 1), + new Among("\u0439", -1, 1), + new Among("\u0435\u0439", 12, 1), + new Among("\u0438\u0435\u0439", 13, 1), + new Among("\u0438\u0439", 12, 1), + new Among("\u043E\u0439", 12, 1), + new Among("\u0430\u043C", -1, 1), + new Among("\u0435\u043C", -1, 1), + new Among("\u0438\u0435\u043C", 18, 1), + new Among("\u043E\u043C", -1, 1), + new Among("\u044F\u043C", -1, 1), + new Among("\u0438\u044F\u043C", 21, 1), + new Among("\u043E", -1, 1), + new Among("\u0443", -1, 1), + new Among("\u0430\u0445", -1, 1), + new Among("\u044F\u0445", -1, 1), + new Among("\u0438\u044F\u0445", 26, 1), + new Among("\u044B", -1, 1), + new Among("\u044C", -1, 1), + new Among("\u044E", -1, 1), + new Among("\u0438\u044E", 30, 1), + new Among("\u044C\u044E", 30, 1), + new Among("\u044F", -1, 1), + new Among("\u0438\u044F", 33, 1), + new Among("\u044C\u044F", 33, 1) + ]; + + static const a_6 = [ + new Among("\u043E\u0441\u0442", -1, 1), + new Among("\u043E\u0441\u0442\u044C", -1, 1) + ]; + + static const a_7 = [ + new Among("\u0435\u0439\u0448\u0435", -1, 1), + new Among("\u043D", -1, 2), + new Among("\u0435\u0439\u0448", -1, 1), + new Among("\u044C", -1, 3) + ]; + + static const g_v = [33, 65, 8, 232] : int[]; + + var I_p2 : int = 0; + var I_pV : int = 0; + + function copy_from (other : RussianStemmer) : void + { + this.I_p2 = other.I_p2; + this.I_pV = other.I_pV; + super.copy_from(other); + } + + function r_mark_regions () : boolean + { + var v_1 : int; + // (, line 57 + this.I_pV = this.limit; + this.I_p2 = this.limit; + // do, line 61 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 61 + // gopast, line 62 + golab1: while(true) + { + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + if (!(this.in_grouping(RussianStemmer.g_v, 1072, 1103))) + { + break lab2; + } + break golab1; + } + if (this.cursor >= this.limit) + { + break lab0; + } + this.cursor++; + } + // setmark pV, line 62 + this.I_pV = this.cursor; + // gopast, line 62 + golab3: while(true) + { + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + if (!(this.out_grouping(RussianStemmer.g_v, 1072, 1103))) + { + break lab4; + } + break golab3; + } + if (this.cursor >= this.limit) + { + break lab0; + } + this.cursor++; + } + // gopast, line 63 + golab5: while(true) + { + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + if (!(this.in_grouping(RussianStemmer.g_v, 1072, 1103))) + { + break lab6; + } + break golab5; + } + if (this.cursor >= this.limit) + { + break lab0; + } + this.cursor++; + } + // gopast, line 63 + golab7: while(true) + { + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + if (!(this.out_grouping(RussianStemmer.g_v, 1072, 1103))) + { + break lab8; + } + break golab7; + } + if (this.cursor >= this.limit) + { + break lab0; + } + this.cursor++; + } + // setmark p2, line 63 + this.I_p2 = this.cursor; + } + this.cursor = v_1; + return true; + } + + function r_R2 () : boolean + { + if (!(this.I_p2 <= this.cursor)) + { + return false; + } + return true; + } + + function r_perfective_gerund () : boolean + { + var among_var : int; + var v_1 : int; + // (, line 71 + // [, line 72 + this.ket = this.cursor; + // substring, line 72 + among_var = this.find_among_b(RussianStemmer.a_0, 9); + if (among_var == 0) + { + return false; + } + // ], line 72 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 76 + // or, line 76 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // literal, line 76 + if (!(this.eq_s_b(1, "\u0430"))) + { + break lab1; + } + break lab0; + } + this.cursor = this.limit - v_1; + // literal, line 76 + if (!(this.eq_s_b(1, "\u044F"))) + { + return false; + } + } + // delete, line 76 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 83 + // delete, line 83 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_adjective () : boolean + { + var among_var : int; + // (, line 87 + // [, line 88 + this.ket = this.cursor; + // substring, line 88 + among_var = this.find_among_b(RussianStemmer.a_1, 26); + if (among_var == 0) + { + return false; + } + // ], line 88 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 97 + // delete, line 97 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_adjectival () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + // (, line 101 + // call adjective, line 102 + if (!this.r_adjective()) + { + return false; + } + // try, line 109 + v_1 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 109 + // [, line 110 + this.ket = this.cursor; + // substring, line 110 + among_var = this.find_among_b(RussianStemmer.a_2, 8); + if (among_var == 0) + { + this.cursor = this.limit - v_1; + break lab0; + } + // ], line 110 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.cursor = this.limit - v_1; + break lab0; + case 1: + // (, line 115 + // or, line 115 + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + v_2 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // literal, line 115 + if (!(this.eq_s_b(1, "\u0430"))) + { + break lab2; + } + break lab1; + } + this.cursor = this.limit - v_2; + // literal, line 115 + if (!(this.eq_s_b(1, "\u044F"))) + { + this.cursor = this.limit - v_1; + break lab0; + } + } + // delete, line 115 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 122 + // delete, line 122 + if (!this.slice_del()) + { + return false; + } + break; + } + } + return true; + } + + function r_reflexive () : boolean + { + var among_var : int; + // (, line 128 + // [, line 129 + this.ket = this.cursor; + // substring, line 129 + among_var = this.find_among_b(RussianStemmer.a_3, 2); + if (among_var == 0) + { + return false; + } + // ], line 129 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 132 + // delete, line 132 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_verb () : boolean + { + var among_var : int; + var v_1 : int; + // (, line 136 + // [, line 137 + this.ket = this.cursor; + // substring, line 137 + among_var = this.find_among_b(RussianStemmer.a_4, 46); + if (among_var == 0) + { + return false; + } + // ], line 137 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 143 + // or, line 143 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // literal, line 143 + if (!(this.eq_s_b(1, "\u0430"))) + { + break lab1; + } + break lab0; + } + this.cursor = this.limit - v_1; + // literal, line 143 + if (!(this.eq_s_b(1, "\u044F"))) + { + return false; + } + } + // delete, line 143 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 151 + // delete, line 151 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_noun () : boolean + { + var among_var : int; + // (, line 159 + // [, line 160 + this.ket = this.cursor; + // substring, line 160 + among_var = this.find_among_b(RussianStemmer.a_5, 36); + if (among_var == 0) + { + return false; + } + // ], line 160 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 167 + // delete, line 167 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_derivational () : boolean + { + var among_var : int; + // (, line 175 + // [, line 176 + this.ket = this.cursor; + // substring, line 176 + among_var = this.find_among_b(RussianStemmer.a_6, 2); + if (among_var == 0) + { + return false; + } + // ], line 176 + this.bra = this.cursor; + // call R2, line 176 + if (!this.r_R2()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 179 + // delete, line 179 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_tidy_up () : boolean + { + var among_var : int; + // (, line 183 + // [, line 184 + this.ket = this.cursor; + // substring, line 184 + among_var = this.find_among_b(RussianStemmer.a_7, 4); + if (among_var == 0) + { + return false; + } + // ], line 184 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 188 + // delete, line 188 + if (!this.slice_del()) + { + return false; + } + // [, line 189 + this.ket = this.cursor; + // literal, line 189 + if (!(this.eq_s_b(1, "\u043D"))) + { + return false; + } + // ], line 189 + this.bra = this.cursor; + // literal, line 189 + if (!(this.eq_s_b(1, "\u043D"))) + { + return false; + } + // delete, line 189 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 192 + // literal, line 192 + if (!(this.eq_s_b(1, "\u043D"))) + { + return false; + } + // delete, line 192 + if (!this.slice_del()) + { + return false; + } + break; + case 3: + // (, line 194 + // delete, line 194 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + var v_10 : int; + // (, line 199 + // do, line 201 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // call mark_regions, line 201 + if (!this.r_mark_regions()) + { + break lab0; + } + } + this.cursor = v_1; + // backwards, line 202 + this.limit_backward = this.cursor; this.cursor = this.limit; + // setlimit, line 202 + v_2 = this.limit - this.cursor; + // tomark, line 202 + if (this.cursor < this.I_pV) + { + return false; + } + this.cursor = this.I_pV; + v_3 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_2; + // (, line 202 + // do, line 203 + v_4 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 203 + // or, line 204 + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + v_5 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // call perfective_gerund, line 204 + if (!this.r_perfective_gerund()) + { + break lab3; + } + break lab2; + } + this.cursor = this.limit - v_5; + // (, line 205 + // try, line 205 + v_6 = this.limit - this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // call reflexive, line 205 + if (!this.r_reflexive()) + { + this.cursor = this.limit - v_6; + break lab4; + } + } + // or, line 206 + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + v_7 = this.limit - this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // call adjectival, line 206 + if (!this.r_adjectival()) + { + break lab6; + } + break lab5; + } + this.cursor = this.limit - v_7; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // call verb, line 206 + if (!this.r_verb()) + { + break lab7; + } + break lab5; + } + this.cursor = this.limit - v_7; + // call noun, line 206 + if (!this.r_noun()) + { + break lab1; + } + } + } + } + this.cursor = this.limit - v_4; + // try, line 209 + v_8 = this.limit - this.cursor; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + // (, line 209 + // [, line 209 + this.ket = this.cursor; + // literal, line 209 + if (!(this.eq_s_b(1, "\u0438"))) + { + this.cursor = this.limit - v_8; + break lab8; + } + // ], line 209 + this.bra = this.cursor; + // delete, line 209 + if (!this.slice_del()) + { + return false; + } + } + // do, line 212 + v_9 = this.limit - this.cursor; + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + // call derivational, line 212 + if (!this.r_derivational()) + { + break lab9; + } + } + this.cursor = this.limit - v_9; + // do, line 213 + v_10 = this.limit - this.cursor; + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + // call tidy_up, line 213 + if (!this.r_tidy_up()) + { + break lab10; + } + } + this.cursor = this.limit - v_10; + this.limit_backward = v_3; + this.cursor = this.limit_backward; return true; + } + + function equals (o : variant) : boolean { + return o instanceof RussianStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "RussianStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/spanish-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/spanish-stemmer.jsx new file mode 100644 index 00000000..e9bee696 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/spanish-stemmer.jsx @@ -0,0 +1,1408 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class SpanishStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new SpanishStemmer(); + + static const a_0 = [ + new Among("", -1, 6), + new Among("\u00E1", 0, 1), + new Among("\u00E9", 0, 2), + new Among("\u00ED", 0, 3), + new Among("\u00F3", 0, 4), + new Among("\u00FA", 0, 5) + ]; + + static const a_1 = [ + new Among("la", -1, -1), + new Among("sela", 0, -1), + new Among("le", -1, -1), + new Among("me", -1, -1), + new Among("se", -1, -1), + new Among("lo", -1, -1), + new Among("selo", 5, -1), + new Among("las", -1, -1), + new Among("selas", 7, -1), + new Among("les", -1, -1), + new Among("los", -1, -1), + new Among("selos", 10, -1), + new Among("nos", -1, -1) + ]; + + static const a_2 = [ + new Among("ando", -1, 6), + new Among("iendo", -1, 6), + new Among("yendo", -1, 7), + new Among("\u00E1ndo", -1, 2), + new Among("i\u00E9ndo", -1, 1), + new Among("ar", -1, 6), + new Among("er", -1, 6), + new Among("ir", -1, 6), + new Among("\u00E1r", -1, 3), + new Among("\u00E9r", -1, 4), + new Among("\u00EDr", -1, 5) + ]; + + static const a_3 = [ + new Among("ic", -1, -1), + new Among("ad", -1, -1), + new Among("os", -1, -1), + new Among("iv", -1, 1) + ]; + + static const a_4 = [ + new Among("able", -1, 1), + new Among("ible", -1, 1), + new Among("ante", -1, 1) + ]; + + static const a_5 = [ + new Among("ic", -1, 1), + new Among("abil", -1, 1), + new Among("iv", -1, 1) + ]; + + static const a_6 = [ + new Among("ica", -1, 1), + new Among("ancia", -1, 2), + new Among("encia", -1, 5), + new Among("adora", -1, 2), + new Among("osa", -1, 1), + new Among("ista", -1, 1), + new Among("iva", -1, 9), + new Among("anza", -1, 1), + new Among("log\u00EDa", -1, 3), + new Among("idad", -1, 8), + new Among("able", -1, 1), + new Among("ible", -1, 1), + new Among("ante", -1, 2), + new Among("mente", -1, 7), + new Among("amente", 13, 6), + new Among("aci\u00F3n", -1, 2), + new Among("uci\u00F3n", -1, 4), + new Among("ico", -1, 1), + new Among("ismo", -1, 1), + new Among("oso", -1, 1), + new Among("amiento", -1, 1), + new Among("imiento", -1, 1), + new Among("ivo", -1, 9), + new Among("ador", -1, 2), + new Among("icas", -1, 1), + new Among("ancias", -1, 2), + new Among("encias", -1, 5), + new Among("adoras", -1, 2), + new Among("osas", -1, 1), + new Among("istas", -1, 1), + new Among("ivas", -1, 9), + new Among("anzas", -1, 1), + new Among("log\u00EDas", -1, 3), + new Among("idades", -1, 8), + new Among("ables", -1, 1), + new Among("ibles", -1, 1), + new Among("aciones", -1, 2), + new Among("uciones", -1, 4), + new Among("adores", -1, 2), + new Among("antes", -1, 2), + new Among("icos", -1, 1), + new Among("ismos", -1, 1), + new Among("osos", -1, 1), + new Among("amientos", -1, 1), + new Among("imientos", -1, 1), + new Among("ivos", -1, 9) + ]; + + static const a_7 = [ + new Among("ya", -1, 1), + new Among("ye", -1, 1), + new Among("yan", -1, 1), + new Among("yen", -1, 1), + new Among("yeron", -1, 1), + new Among("yendo", -1, 1), + new Among("yo", -1, 1), + new Among("yas", -1, 1), + new Among("yes", -1, 1), + new Among("yais", -1, 1), + new Among("yamos", -1, 1), + new Among("y\u00F3", -1, 1) + ]; + + static const a_8 = [ + new Among("aba", -1, 2), + new Among("ada", -1, 2), + new Among("ida", -1, 2), + new Among("ara", -1, 2), + new Among("iera", -1, 2), + new Among("\u00EDa", -1, 2), + new Among("ar\u00EDa", 5, 2), + new Among("er\u00EDa", 5, 2), + new Among("ir\u00EDa", 5, 2), + new Among("ad", -1, 2), + new Among("ed", -1, 2), + new Among("id", -1, 2), + new Among("ase", -1, 2), + new Among("iese", -1, 2), + new Among("aste", -1, 2), + new Among("iste", -1, 2), + new Among("an", -1, 2), + new Among("aban", 16, 2), + new Among("aran", 16, 2), + new Among("ieran", 16, 2), + new Among("\u00EDan", 16, 2), + new Among("ar\u00EDan", 20, 2), + new Among("er\u00EDan", 20, 2), + new Among("ir\u00EDan", 20, 2), + new Among("en", -1, 1), + new Among("asen", 24, 2), + new Among("iesen", 24, 2), + new Among("aron", -1, 2), + new Among("ieron", -1, 2), + new Among("ar\u00E1n", -1, 2), + new Among("er\u00E1n", -1, 2), + new Among("ir\u00E1n", -1, 2), + new Among("ado", -1, 2), + new Among("ido", -1, 2), + new Among("ando", -1, 2), + new Among("iendo", -1, 2), + new Among("ar", -1, 2), + new Among("er", -1, 2), + new Among("ir", -1, 2), + new Among("as", -1, 2), + new Among("abas", 39, 2), + new Among("adas", 39, 2), + new Among("idas", 39, 2), + new Among("aras", 39, 2), + new Among("ieras", 39, 2), + new Among("\u00EDas", 39, 2), + new Among("ar\u00EDas", 45, 2), + new Among("er\u00EDas", 45, 2), + new Among("ir\u00EDas", 45, 2), + new Among("es", -1, 1), + new Among("ases", 49, 2), + new Among("ieses", 49, 2), + new Among("abais", -1, 2), + new Among("arais", -1, 2), + new Among("ierais", -1, 2), + new Among("\u00EDais", -1, 2), + new Among("ar\u00EDais", 55, 2), + new Among("er\u00EDais", 55, 2), + new Among("ir\u00EDais", 55, 2), + new Among("aseis", -1, 2), + new Among("ieseis", -1, 2), + new Among("asteis", -1, 2), + new Among("isteis", -1, 2), + new Among("\u00E1is", -1, 2), + new Among("\u00E9is", -1, 1), + new Among("ar\u00E9is", 64, 2), + new Among("er\u00E9is", 64, 2), + new Among("ir\u00E9is", 64, 2), + new Among("ados", -1, 2), + new Among("idos", -1, 2), + new Among("amos", -1, 2), + new Among("\u00E1bamos", 70, 2), + new Among("\u00E1ramos", 70, 2), + new Among("i\u00E9ramos", 70, 2), + new Among("\u00EDamos", 70, 2), + new Among("ar\u00EDamos", 74, 2), + new Among("er\u00EDamos", 74, 2), + new Among("ir\u00EDamos", 74, 2), + new Among("emos", -1, 1), + new Among("aremos", 78, 2), + new Among("eremos", 78, 2), + new Among("iremos", 78, 2), + new Among("\u00E1semos", 78, 2), + new Among("i\u00E9semos", 78, 2), + new Among("imos", -1, 2), + new Among("ar\u00E1s", -1, 2), + new Among("er\u00E1s", -1, 2), + new Among("ir\u00E1s", -1, 2), + new Among("\u00EDs", -1, 2), + new Among("ar\u00E1", -1, 2), + new Among("er\u00E1", -1, 2), + new Among("ir\u00E1", -1, 2), + new Among("ar\u00E9", -1, 2), + new Among("er\u00E9", -1, 2), + new Among("ir\u00E9", -1, 2), + new Among("i\u00F3", -1, 2) + ]; + + static const a_9 = [ + new Among("a", -1, 1), + new Among("e", -1, 2), + new Among("o", -1, 1), + new Among("os", -1, 1), + new Among("\u00E1", -1, 1), + new Among("\u00E9", -1, 2), + new Among("\u00ED", -1, 1), + new Among("\u00F3", -1, 1) + ]; + + static const g_v = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 10] : int[]; + + var I_p2 : int = 0; + var I_p1 : int = 0; + var I_pV : int = 0; + + function copy_from (other : SpanishStemmer) : void + { + this.I_p2 = other.I_p2; + this.I_p1 = other.I_p1; + this.I_pV = other.I_pV; + super.copy_from(other); + } + + function r_mark_regions () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_6 : int; + var v_8 : int; + // (, line 31 + this.I_pV = this.limit; + this.I_p1 = this.limit; + this.I_p2 = this.limit; + // do, line 37 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 37 + // or, line 39 + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + v_2 = this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 38 + if (!(this.in_grouping(SpanishStemmer.g_v, 97, 252))) + { + break lab2; + } + // or, line 38 + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + v_3 = this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 38 + if (!(this.out_grouping(SpanishStemmer.g_v, 97, 252))) + { + break lab4; + } + // gopast, line 38 + golab5: while(true) + { + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + if (!(this.in_grouping(SpanishStemmer.g_v, 97, 252))) + { + break lab6; + } + break golab5; + } + if (this.cursor >= this.limit) + { + break lab4; + } + this.cursor++; + } + break lab3; + } + this.cursor = v_3; + // (, line 38 + if (!(this.in_grouping(SpanishStemmer.g_v, 97, 252))) + { + break lab2; + } + // gopast, line 38 + golab7: while(true) + { + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + if (!(this.out_grouping(SpanishStemmer.g_v, 97, 252))) + { + break lab8; + } + break golab7; + } + if (this.cursor >= this.limit) + { + break lab2; + } + this.cursor++; + } + } + break lab1; + } + this.cursor = v_2; + // (, line 40 + if (!(this.out_grouping(SpanishStemmer.g_v, 97, 252))) + { + break lab0; + } + // or, line 40 + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + v_6 = this.cursor; + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + // (, line 40 + if (!(this.out_grouping(SpanishStemmer.g_v, 97, 252))) + { + break lab10; + } + // gopast, line 40 + golab11: while(true) + { + var lab12 = true; + lab12: while (lab12 == true) + { + lab12 = false; + if (!(this.in_grouping(SpanishStemmer.g_v, 97, 252))) + { + break lab12; + } + break golab11; + } + if (this.cursor >= this.limit) + { + break lab10; + } + this.cursor++; + } + break lab9; + } + this.cursor = v_6; + // (, line 40 + if (!(this.in_grouping(SpanishStemmer.g_v, 97, 252))) + { + break lab0; + } + // next, line 40 + if (this.cursor >= this.limit) + { + break lab0; + } + this.cursor++; + } + } + // setmark pV, line 41 + this.I_pV = this.cursor; + } + this.cursor = v_1; + // do, line 43 + v_8 = this.cursor; + var lab13 = true; + lab13: while (lab13 == true) + { + lab13 = false; + // (, line 43 + // gopast, line 44 + golab14: while(true) + { + var lab15 = true; + lab15: while (lab15 == true) + { + lab15 = false; + if (!(this.in_grouping(SpanishStemmer.g_v, 97, 252))) + { + break lab15; + } + break golab14; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // gopast, line 44 + golab16: while(true) + { + var lab17 = true; + lab17: while (lab17 == true) + { + lab17 = false; + if (!(this.out_grouping(SpanishStemmer.g_v, 97, 252))) + { + break lab17; + } + break golab16; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // setmark p1, line 44 + this.I_p1 = this.cursor; + // gopast, line 45 + golab18: while(true) + { + var lab19 = true; + lab19: while (lab19 == true) + { + lab19 = false; + if (!(this.in_grouping(SpanishStemmer.g_v, 97, 252))) + { + break lab19; + } + break golab18; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // gopast, line 45 + golab20: while(true) + { + var lab21 = true; + lab21: while (lab21 == true) + { + lab21 = false; + if (!(this.out_grouping(SpanishStemmer.g_v, 97, 252))) + { + break lab21; + } + break golab20; + } + if (this.cursor >= this.limit) + { + break lab13; + } + this.cursor++; + } + // setmark p2, line 45 + this.I_p2 = this.cursor; + } + this.cursor = v_8; + return true; + } + + function r_postlude () : boolean + { + var among_var : int; + var v_1 : int; + // repeat, line 49 + replab0: while(true) + { + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 49 + // [, line 50 + this.bra = this.cursor; + // substring, line 50 + among_var = this.find_among(SpanishStemmer.a_0, 6); + if (among_var == 0) + { + break lab1; + } + // ], line 50 + this.ket = this.cursor; + switch (among_var) { + case 0: + break lab1; + case 1: + // (, line 51 + // <-, line 51 + if (!this.slice_from("a")) + { + return false; + } + break; + case 2: + // (, line 52 + // <-, line 52 + if (!this.slice_from("e")) + { + return false; + } + break; + case 3: + // (, line 53 + // <-, line 53 + if (!this.slice_from("i")) + { + return false; + } + break; + case 4: + // (, line 54 + // <-, line 54 + if (!this.slice_from("o")) + { + return false; + } + break; + case 5: + // (, line 55 + // <-, line 55 + if (!this.slice_from("u")) + { + return false; + } + break; + case 6: + // (, line 57 + // next, line 57 + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + break; + } + continue replab0; + } + this.cursor = v_1; + break replab0; + } + return true; + } + + function r_RV () : boolean + { + if (!(this.I_pV <= this.cursor)) + { + return false; + } + return true; + } + + function r_R1 () : boolean + { + if (!(this.I_p1 <= this.cursor)) + { + return false; + } + return true; + } + + function r_R2 () : boolean + { + if (!(this.I_p2 <= this.cursor)) + { + return false; + } + return true; + } + + function r_attached_pronoun () : boolean + { + var among_var : int; + // (, line 67 + // [, line 68 + this.ket = this.cursor; + // substring, line 68 + if (this.find_among_b(SpanishStemmer.a_1, 13) == 0) + { + return false; + } + // ], line 68 + this.bra = this.cursor; + // substring, line 72 + among_var = this.find_among_b(SpanishStemmer.a_2, 11); + if (among_var == 0) + { + return false; + } + // call RV, line 72 + if (!this.r_RV()) + { + return false; + } + switch (among_var) { + case 0: + return false; + case 1: + // (, line 73 + // ], line 73 + this.bra = this.cursor; + // <-, line 73 + if (!this.slice_from("iendo")) + { + return false; + } + break; + case 2: + // (, line 74 + // ], line 74 + this.bra = this.cursor; + // <-, line 74 + if (!this.slice_from("ando")) + { + return false; + } + break; + case 3: + // (, line 75 + // ], line 75 + this.bra = this.cursor; + // <-, line 75 + if (!this.slice_from("ar")) + { + return false; + } + break; + case 4: + // (, line 76 + // ], line 76 + this.bra = this.cursor; + // <-, line 76 + if (!this.slice_from("er")) + { + return false; + } + break; + case 5: + // (, line 77 + // ], line 77 + this.bra = this.cursor; + // <-, line 77 + if (!this.slice_from("ir")) + { + return false; + } + break; + case 6: + // (, line 81 + // delete, line 81 + if (!this.slice_del()) + { + return false; + } + break; + case 7: + // (, line 82 + // literal, line 82 + if (!(this.eq_s_b(1, "u"))) + { + return false; + } + // delete, line 82 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_standard_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + // (, line 86 + // [, line 87 + this.ket = this.cursor; + // substring, line 87 + among_var = this.find_among_b(SpanishStemmer.a_6, 46); + if (among_var == 0) + { + return false; + } + // ], line 87 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 98 + // call R2, line 99 + if (!this.r_R2()) + { + return false; + } + // delete, line 99 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 104 + // call R2, line 105 + if (!this.r_R2()) + { + return false; + } + // delete, line 105 + if (!this.slice_del()) + { + return false; + } + // try, line 106 + v_1 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 106 + // [, line 106 + this.ket = this.cursor; + // literal, line 106 + if (!(this.eq_s_b(2, "ic"))) + { + this.cursor = this.limit - v_1; + break lab0; + } + // ], line 106 + this.bra = this.cursor; + // call R2, line 106 + if (!this.r_R2()) + { + this.cursor = this.limit - v_1; + break lab0; + } + // delete, line 106 + if (!this.slice_del()) + { + return false; + } + } + break; + case 3: + // (, line 110 + // call R2, line 111 + if (!this.r_R2()) + { + return false; + } + // <-, line 111 + if (!this.slice_from("log")) + { + return false; + } + break; + case 4: + // (, line 114 + // call R2, line 115 + if (!this.r_R2()) + { + return false; + } + // <-, line 115 + if (!this.slice_from("u")) + { + return false; + } + break; + case 5: + // (, line 118 + // call R2, line 119 + if (!this.r_R2()) + { + return false; + } + // <-, line 119 + if (!this.slice_from("ente")) + { + return false; + } + break; + case 6: + // (, line 122 + // call R1, line 123 + if (!this.r_R1()) + { + return false; + } + // delete, line 123 + if (!this.slice_del()) + { + return false; + } + // try, line 124 + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 124 + // [, line 125 + this.ket = this.cursor; + // substring, line 125 + among_var = this.find_among_b(SpanishStemmer.a_3, 4); + if (among_var == 0) + { + this.cursor = this.limit - v_2; + break lab1; + } + // ], line 125 + this.bra = this.cursor; + // call R2, line 125 + if (!this.r_R2()) + { + this.cursor = this.limit - v_2; + break lab1; + } + // delete, line 125 + if (!this.slice_del()) + { + return false; + } + switch (among_var) { + case 0: + this.cursor = this.limit - v_2; + break lab1; + case 1: + // (, line 126 + // [, line 126 + this.ket = this.cursor; + // literal, line 126 + if (!(this.eq_s_b(2, "at"))) + { + this.cursor = this.limit - v_2; + break lab1; + } + // ], line 126 + this.bra = this.cursor; + // call R2, line 126 + if (!this.r_R2()) + { + this.cursor = this.limit - v_2; + break lab1; + } + // delete, line 126 + if (!this.slice_del()) + { + return false; + } + break; + } + } + break; + case 7: + // (, line 134 + // call R2, line 135 + if (!this.r_R2()) + { + return false; + } + // delete, line 135 + if (!this.slice_del()) + { + return false; + } + // try, line 136 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 136 + // [, line 137 + this.ket = this.cursor; + // substring, line 137 + among_var = this.find_among_b(SpanishStemmer.a_4, 3); + if (among_var == 0) + { + this.cursor = this.limit - v_3; + break lab2; + } + // ], line 137 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.cursor = this.limit - v_3; + break lab2; + case 1: + // (, line 140 + // call R2, line 140 + if (!this.r_R2()) + { + this.cursor = this.limit - v_3; + break lab2; + } + // delete, line 140 + if (!this.slice_del()) + { + return false; + } + break; + } + } + break; + case 8: + // (, line 146 + // call R2, line 147 + if (!this.r_R2()) + { + return false; + } + // delete, line 147 + if (!this.slice_del()) + { + return false; + } + // try, line 148 + v_4 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // (, line 148 + // [, line 149 + this.ket = this.cursor; + // substring, line 149 + among_var = this.find_among_b(SpanishStemmer.a_5, 3); + if (among_var == 0) + { + this.cursor = this.limit - v_4; + break lab3; + } + // ], line 149 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.cursor = this.limit - v_4; + break lab3; + case 1: + // (, line 152 + // call R2, line 152 + if (!this.r_R2()) + { + this.cursor = this.limit - v_4; + break lab3; + } + // delete, line 152 + if (!this.slice_del()) + { + return false; + } + break; + } + } + break; + case 9: + // (, line 158 + // call R2, line 159 + if (!this.r_R2()) + { + return false; + } + // delete, line 159 + if (!this.slice_del()) + { + return false; + } + // try, line 160 + v_5 = this.limit - this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 160 + // [, line 161 + this.ket = this.cursor; + // literal, line 161 + if (!(this.eq_s_b(2, "at"))) + { + this.cursor = this.limit - v_5; + break lab4; + } + // ], line 161 + this.bra = this.cursor; + // call R2, line 161 + if (!this.r_R2()) + { + this.cursor = this.limit - v_5; + break lab4; + } + // delete, line 161 + if (!this.slice_del()) + { + return false; + } + } + break; + } + return true; + } + + function r_y_verb_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + // (, line 167 + // setlimit, line 168 + v_1 = this.limit - this.cursor; + // tomark, line 168 + if (this.cursor < this.I_pV) + { + return false; + } + this.cursor = this.I_pV; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 168 + // [, line 168 + this.ket = this.cursor; + // substring, line 168 + among_var = this.find_among_b(SpanishStemmer.a_7, 12); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 168 + this.bra = this.cursor; + this.limit_backward = v_2; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 171 + // literal, line 171 + if (!(this.eq_s_b(1, "u"))) + { + return false; + } + // delete, line 171 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_verb_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + // (, line 175 + // setlimit, line 176 + v_1 = this.limit - this.cursor; + // tomark, line 176 + if (this.cursor < this.I_pV) + { + return false; + } + this.cursor = this.I_pV; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 176 + // [, line 176 + this.ket = this.cursor; + // substring, line 176 + among_var = this.find_among_b(SpanishStemmer.a_8, 96); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 176 + this.bra = this.cursor; + this.limit_backward = v_2; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 179 + // try, line 179 + v_3 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 179 + // literal, line 179 + if (!(this.eq_s_b(1, "u"))) + { + this.cursor = this.limit - v_3; + break lab0; + } + // test, line 179 + v_4 = this.limit - this.cursor; + // literal, line 179 + if (!(this.eq_s_b(1, "g"))) + { + this.cursor = this.limit - v_3; + break lab0; + } + this.cursor = this.limit - v_4; + } + // ], line 179 + this.bra = this.cursor; + // delete, line 179 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 200 + // delete, line 200 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_residual_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + // (, line 204 + // [, line 205 + this.ket = this.cursor; + // substring, line 205 + among_var = this.find_among_b(SpanishStemmer.a_9, 8); + if (among_var == 0) + { + return false; + } + // ], line 205 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 208 + // call RV, line 208 + if (!this.r_RV()) + { + return false; + } + // delete, line 208 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 210 + // call RV, line 210 + if (!this.r_RV()) + { + return false; + } + // delete, line 210 + if (!this.slice_del()) + { + return false; + } + // try, line 210 + v_1 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 210 + // [, line 210 + this.ket = this.cursor; + // literal, line 210 + if (!(this.eq_s_b(1, "u"))) + { + this.cursor = this.limit - v_1; + break lab0; + } + // ], line 210 + this.bra = this.cursor; + // test, line 210 + v_2 = this.limit - this.cursor; + // literal, line 210 + if (!(this.eq_s_b(1, "g"))) + { + this.cursor = this.limit - v_1; + break lab0; + } + this.cursor = this.limit - v_2; + // call RV, line 210 + if (!this.r_RV()) + { + this.cursor = this.limit - v_1; + break lab0; + } + // delete, line 210 + if (!this.slice_del()) + { + return false; + } + } + break; + } + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + // (, line 215 + // do, line 216 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // call mark_regions, line 216 + if (!this.r_mark_regions()) + { + break lab0; + } + } + this.cursor = v_1; + // backwards, line 217 + this.limit_backward = this.cursor; this.cursor = this.limit; + // (, line 217 + // do, line 218 + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call attached_pronoun, line 218 + if (!this.r_attached_pronoun()) + { + break lab1; + } + } + this.cursor = this.limit - v_2; + // do, line 219 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 219 + // or, line 219 + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + v_4 = this.limit - this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // call standard_suffix, line 219 + if (!this.r_standard_suffix()) + { + break lab4; + } + break lab3; + } + this.cursor = this.limit - v_4; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // call y_verb_suffix, line 220 + if (!this.r_y_verb_suffix()) + { + break lab5; + } + break lab3; + } + this.cursor = this.limit - v_4; + // call verb_suffix, line 221 + if (!this.r_verb_suffix()) + { + break lab2; + } + } + } + this.cursor = this.limit - v_3; + // do, line 223 + v_5 = this.limit - this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // call residual_suffix, line 223 + if (!this.r_residual_suffix()) + { + break lab6; + } + } + this.cursor = this.limit - v_5; + this.cursor = this.limit_backward; // do, line 225 + v_6 = this.cursor; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // call postlude, line 225 + if (!this.r_postlude()) + { + break lab7; + } + } + this.cursor = v_6; + return true; + } + + function equals (o : variant) : boolean { + return o instanceof SpanishStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "SpanishStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/stemmer.jsx new file mode 100644 index 00000000..43bd75fa --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/stemmer.jsx @@ -0,0 +1,5 @@ +interface Stemmer +{ + function stemWord (word : string) : string; + function stemWords (words : string[]) : string[]; +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/swedish-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/swedish-stemmer.jsx new file mode 100644 index 00000000..536094ae --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/swedish-stemmer.jsx @@ -0,0 +1,416 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class SwedishStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new SwedishStemmer(); + + static const a_0 = [ + new Among("a", -1, 1), + new Among("arna", 0, 1), + new Among("erna", 0, 1), + new Among("heterna", 2, 1), + new Among("orna", 0, 1), + new Among("ad", -1, 1), + new Among("e", -1, 1), + new Among("ade", 6, 1), + new Among("ande", 6, 1), + new Among("arne", 6, 1), + new Among("are", 6, 1), + new Among("aste", 6, 1), + new Among("en", -1, 1), + new Among("anden", 12, 1), + new Among("aren", 12, 1), + new Among("heten", 12, 1), + new Among("ern", -1, 1), + new Among("ar", -1, 1), + new Among("er", -1, 1), + new Among("heter", 18, 1), + new Among("or", -1, 1), + new Among("s", -1, 2), + new Among("as", 21, 1), + new Among("arnas", 22, 1), + new Among("ernas", 22, 1), + new Among("ornas", 22, 1), + new Among("es", 21, 1), + new Among("ades", 26, 1), + new Among("andes", 26, 1), + new Among("ens", 21, 1), + new Among("arens", 29, 1), + new Among("hetens", 29, 1), + new Among("erns", 21, 1), + new Among("at", -1, 1), + new Among("andet", -1, 1), + new Among("het", -1, 1), + new Among("ast", -1, 1) + ]; + + static const a_1 = [ + new Among("dd", -1, -1), + new Among("gd", -1, -1), + new Among("nn", -1, -1), + new Among("dt", -1, -1), + new Among("gt", -1, -1), + new Among("kt", -1, -1), + new Among("tt", -1, -1) + ]; + + static const a_2 = [ + new Among("ig", -1, 1), + new Among("lig", 0, 1), + new Among("els", -1, 1), + new Among("fullt", -1, 3), + new Among("l\u00F6st", -1, 2) + ]; + + static const g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32] : int[]; + + static const g_s_ending = [119, 127, 149] : int[]; + + var I_x : int = 0; + var I_p1 : int = 0; + + function copy_from (other : SwedishStemmer) : void + { + this.I_x = other.I_x; + this.I_p1 = other.I_p1; + super.copy_from(other); + } + + function r_mark_regions () : boolean + { + var v_1 : int; + var v_2 : int; + // (, line 26 + this.I_p1 = this.limit; + // test, line 29 + v_1 = this.cursor; + // (, line 29 + // hop, line 29 + { + var c : int = this.cursor + 3; + if (0 > c || c > this.limit) + { + return false; + } + this.cursor = c; + } + // setmark x, line 29 + this.I_x = this.cursor; + this.cursor = v_1; + // goto, line 30 + golab0: while(true) + { + v_2 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + if (!(this.in_grouping(SwedishStemmer.g_v, 97, 246))) + { + break lab1; + } + this.cursor = v_2; + break golab0; + } + this.cursor = v_2; + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // gopast, line 30 + golab2: while(true) + { + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + if (!(this.out_grouping(SwedishStemmer.g_v, 97, 246))) + { + break lab3; + } + break golab2; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // setmark p1, line 30 + this.I_p1 = this.cursor; + // try, line 31 + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 31 + if (!(this.I_p1 < this.I_x)) + { + break lab4; + } + this.I_p1 = this.I_x; + } + return true; + } + + function r_main_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + // (, line 36 + // setlimit, line 37 + v_1 = this.limit - this.cursor; + // tomark, line 37 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 37 + // [, line 37 + this.ket = this.cursor; + // substring, line 37 + among_var = this.find_among_b(SwedishStemmer.a_0, 37); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 37 + this.bra = this.cursor; + this.limit_backward = v_2; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 44 + // delete, line 44 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 46 + if (!(this.in_grouping_b(SwedishStemmer.g_s_ending, 98, 121))) + { + return false; + } + // delete, line 46 + if (!this.slice_del()) + { + return false; + } + break; + } + return true; + } + + function r_consonant_pair () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + // setlimit, line 50 + v_1 = this.limit - this.cursor; + // tomark, line 50 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 50 + // and, line 52 + v_3 = this.limit - this.cursor; + // among, line 51 + if (this.find_among_b(SwedishStemmer.a_1, 7) == 0) + { + this.limit_backward = v_2; + return false; + } + this.cursor = this.limit - v_3; + // (, line 52 + // [, line 52 + this.ket = this.cursor; + // next, line 52 + if (this.cursor <= this.limit_backward) + { + this.limit_backward = v_2; + return false; + } + this.cursor--; + // ], line 52 + this.bra = this.cursor; + // delete, line 52 + if (!this.slice_del()) + { + return false; + } + this.limit_backward = v_2; + return true; + } + + function r_other_suffix () : boolean + { + var among_var : int; + var v_1 : int; + var v_2 : int; + // setlimit, line 55 + v_1 = this.limit - this.cursor; + // tomark, line 55 + if (this.cursor < this.I_p1) + { + return false; + } + this.cursor = this.I_p1; + v_2 = this.limit_backward; + this.limit_backward = this.cursor; + this.cursor = this.limit - v_1; + // (, line 55 + // [, line 56 + this.ket = this.cursor; + // substring, line 56 + among_var = this.find_among_b(SwedishStemmer.a_2, 5); + if (among_var == 0) + { + this.limit_backward = v_2; + return false; + } + // ], line 56 + this.bra = this.cursor; + switch (among_var) { + case 0: + this.limit_backward = v_2; + return false; + case 1: + // (, line 57 + // delete, line 57 + if (!this.slice_del()) + { + return false; + } + break; + case 2: + // (, line 58 + // <-, line 58 + if (!this.slice_from("l\u00F6s")) + { + return false; + } + break; + case 3: + // (, line 59 + // <-, line 59 + if (!this.slice_from("full")) + { + return false; + } + break; + } + this.limit_backward = v_2; + return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + // (, line 64 + // do, line 66 + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // call mark_regions, line 66 + if (!this.r_mark_regions()) + { + break lab0; + } + } + this.cursor = v_1; + // backwards, line 67 + this.limit_backward = this.cursor; this.cursor = this.limit; + // (, line 67 + // do, line 68 + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call main_suffix, line 68 + if (!this.r_main_suffix()) + { + break lab1; + } + } + this.cursor = this.limit - v_2; + // do, line 69 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // call consonant_pair, line 69 + if (!this.r_consonant_pair()) + { + break lab2; + } + } + this.cursor = this.limit - v_3; + // do, line 70 + v_4 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // call other_suffix, line 70 + if (!this.r_other_suffix()) + { + break lab3; + } + } + this.cursor = this.limit - v_4; + this.cursor = this.limit_backward; return true; + } + + function equals (o : variant) : boolean { + return o instanceof SwedishStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "SwedishStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/turkish-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/turkish-stemmer.jsx new file mode 100644 index 00000000..d26ed8ba --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/turkish-stemmer.jsx @@ -0,0 +1,3824 @@ +// This file was generated automatically by the Snowball to JSX compiler + +import "base-stemmer.jsx"; +import "among.jsx"; + + /** + * This class was automatically generated by a Snowball to JSX compiler + * It implements the stemming algorithm defined by a snowball script. + */ + +class TurkishStemmer extends BaseStemmer +{ + static const serialVersionUID = 1; + static const methodObject = new TurkishStemmer(); + + static const a_0 = [ + new Among("m", -1, -1), + new Among("n", -1, -1), + new Among("miz", -1, -1), + new Among("niz", -1, -1), + new Among("muz", -1, -1), + new Among("nuz", -1, -1), + new Among("m\u00FCz", -1, -1), + new Among("n\u00FCz", -1, -1), + new Among("m\u0131z", -1, -1), + new Among("n\u0131z", -1, -1) + ]; + + static const a_1 = [ + new Among("leri", -1, -1), + new Among("lar\u0131", -1, -1) + ]; + + static const a_2 = [ + new Among("ni", -1, -1), + new Among("nu", -1, -1), + new Among("n\u00FC", -1, -1), + new Among("n\u0131", -1, -1) + ]; + + static const a_3 = [ + new Among("in", -1, -1), + new Among("un", -1, -1), + new Among("\u00FCn", -1, -1), + new Among("\u0131n", -1, -1) + ]; + + static const a_4 = [ + new Among("a", -1, -1), + new Among("e", -1, -1) + ]; + + static const a_5 = [ + new Among("na", -1, -1), + new Among("ne", -1, -1) + ]; + + static const a_6 = [ + new Among("da", -1, -1), + new Among("ta", -1, -1), + new Among("de", -1, -1), + new Among("te", -1, -1) + ]; + + static const a_7 = [ + new Among("nda", -1, -1), + new Among("nde", -1, -1) + ]; + + static const a_8 = [ + new Among("dan", -1, -1), + new Among("tan", -1, -1), + new Among("den", -1, -1), + new Among("ten", -1, -1) + ]; + + static const a_9 = [ + new Among("ndan", -1, -1), + new Among("nden", -1, -1) + ]; + + static const a_10 = [ + new Among("la", -1, -1), + new Among("le", -1, -1) + ]; + + static const a_11 = [ + new Among("ca", -1, -1), + new Among("ce", -1, -1) + ]; + + static const a_12 = [ + new Among("im", -1, -1), + new Among("um", -1, -1), + new Among("\u00FCm", -1, -1), + new Among("\u0131m", -1, -1) + ]; + + static const a_13 = [ + new Among("sin", -1, -1), + new Among("sun", -1, -1), + new Among("s\u00FCn", -1, -1), + new Among("s\u0131n", -1, -1) + ]; + + static const a_14 = [ + new Among("iz", -1, -1), + new Among("uz", -1, -1), + new Among("\u00FCz", -1, -1), + new Among("\u0131z", -1, -1) + ]; + + static const a_15 = [ + new Among("siniz", -1, -1), + new Among("sunuz", -1, -1), + new Among("s\u00FCn\u00FCz", -1, -1), + new Among("s\u0131n\u0131z", -1, -1) + ]; + + static const a_16 = [ + new Among("lar", -1, -1), + new Among("ler", -1, -1) + ]; + + static const a_17 = [ + new Among("niz", -1, -1), + new Among("nuz", -1, -1), + new Among("n\u00FCz", -1, -1), + new Among("n\u0131z", -1, -1) + ]; + + static const a_18 = [ + new Among("dir", -1, -1), + new Among("tir", -1, -1), + new Among("dur", -1, -1), + new Among("tur", -1, -1), + new Among("d\u00FCr", -1, -1), + new Among("t\u00FCr", -1, -1), + new Among("d\u0131r", -1, -1), + new Among("t\u0131r", -1, -1) + ]; + + static const a_19 = [ + new Among("cas\u0131na", -1, -1), + new Among("cesine", -1, -1) + ]; + + static const a_20 = [ + new Among("di", -1, -1), + new Among("ti", -1, -1), + new Among("dik", -1, -1), + new Among("tik", -1, -1), + new Among("duk", -1, -1), + new Among("tuk", -1, -1), + new Among("d\u00FCk", -1, -1), + new Among("t\u00FCk", -1, -1), + new Among("d\u0131k", -1, -1), + new Among("t\u0131k", -1, -1), + new Among("dim", -1, -1), + new Among("tim", -1, -1), + new Among("dum", -1, -1), + new Among("tum", -1, -1), + new Among("d\u00FCm", -1, -1), + new Among("t\u00FCm", -1, -1), + new Among("d\u0131m", -1, -1), + new Among("t\u0131m", -1, -1), + new Among("din", -1, -1), + new Among("tin", -1, -1), + new Among("dun", -1, -1), + new Among("tun", -1, -1), + new Among("d\u00FCn", -1, -1), + new Among("t\u00FCn", -1, -1), + new Among("d\u0131n", -1, -1), + new Among("t\u0131n", -1, -1), + new Among("du", -1, -1), + new Among("tu", -1, -1), + new Among("d\u00FC", -1, -1), + new Among("t\u00FC", -1, -1), + new Among("d\u0131", -1, -1), + new Among("t\u0131", -1, -1) + ]; + + static const a_21 = [ + new Among("sa", -1, -1), + new Among("se", -1, -1), + new Among("sak", -1, -1), + new Among("sek", -1, -1), + new Among("sam", -1, -1), + new Among("sem", -1, -1), + new Among("san", -1, -1), + new Among("sen", -1, -1) + ]; + + static const a_22 = [ + new Among("mi\u015F", -1, -1), + new Among("mu\u015F", -1, -1), + new Among("m\u00FC\u015F", -1, -1), + new Among("m\u0131\u015F", -1, -1) + ]; + + static const a_23 = [ + new Among("b", -1, 1), + new Among("c", -1, 2), + new Among("d", -1, 3), + new Among("\u011F", -1, 4) + ]; + + static const g_vowel = [17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 8, 0, 0, 0, 0, 0, 0, 1] : int[]; + + static const g_U = [1, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 1] : int[]; + + static const g_vowel1 = [1, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] : int[]; + + static const g_vowel2 = [17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130] : int[]; + + static const g_vowel3 = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] : int[]; + + static const g_vowel4 = [17] : int[]; + + static const g_vowel5 = [65] : int[]; + + static const g_vowel6 = [65] : int[]; + + var B_continue_stemming_noun_suffixes : boolean = false; + var I_strlen : int = 0; + + function copy_from (other : TurkishStemmer) : void + { + this.B_continue_stemming_noun_suffixes = other.B_continue_stemming_noun_suffixes; + this.I_strlen = other.I_strlen; + super.copy_from(other); + } + + function r_check_vowel_harmony () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + var v_10 : int; + var v_11 : int; + // (, line 111 + // test, line 112 + v_1 = this.limit - this.cursor; + // (, line 113 + // (, line 114 + // goto, line 114 + golab0: while(true) + { + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel, 97, 305))) + { + break lab1; + } + this.cursor = this.limit - v_2; + break golab0; + } + this.cursor = this.limit - v_2; + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + } + // (, line 115 + // or, line 116 + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + v_3 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // (, line 116 + // literal, line 116 + if (!(this.eq_s_b(1, "a"))) + { + break lab3; + } + // goto, line 116 + golab4: while(true) + { + v_4 = this.limit - this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel1, 97, 305))) + { + break lab5; + } + this.cursor = this.limit - v_4; + break golab4; + } + this.cursor = this.limit - v_4; + if (this.cursor <= this.limit_backward) + { + break lab3; + } + this.cursor--; + } + break lab2; + } + this.cursor = this.limit - v_3; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // (, line 117 + // literal, line 117 + if (!(this.eq_s_b(1, "e"))) + { + break lab6; + } + // goto, line 117 + golab7: while(true) + { + v_5 = this.limit - this.cursor; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel2, 101, 252))) + { + break lab8; + } + this.cursor = this.limit - v_5; + break golab7; + } + this.cursor = this.limit - v_5; + if (this.cursor <= this.limit_backward) + { + break lab6; + } + this.cursor--; + } + break lab2; + } + this.cursor = this.limit - v_3; + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + // (, line 118 + // literal, line 118 + if (!(this.eq_s_b(1, "\u0131"))) + { + break lab9; + } + // goto, line 118 + golab10: while(true) + { + v_6 = this.limit - this.cursor; + var lab11 = true; + lab11: while (lab11 == true) + { + lab11 = false; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel3, 97, 305))) + { + break lab11; + } + this.cursor = this.limit - v_6; + break golab10; + } + this.cursor = this.limit - v_6; + if (this.cursor <= this.limit_backward) + { + break lab9; + } + this.cursor--; + } + break lab2; + } + this.cursor = this.limit - v_3; + var lab12 = true; + lab12: while (lab12 == true) + { + lab12 = false; + // (, line 119 + // literal, line 119 + if (!(this.eq_s_b(1, "i"))) + { + break lab12; + } + // goto, line 119 + golab13: while(true) + { + v_7 = this.limit - this.cursor; + var lab14 = true; + lab14: while (lab14 == true) + { + lab14 = false; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel4, 101, 105))) + { + break lab14; + } + this.cursor = this.limit - v_7; + break golab13; + } + this.cursor = this.limit - v_7; + if (this.cursor <= this.limit_backward) + { + break lab12; + } + this.cursor--; + } + break lab2; + } + this.cursor = this.limit - v_3; + var lab15 = true; + lab15: while (lab15 == true) + { + lab15 = false; + // (, line 120 + // literal, line 120 + if (!(this.eq_s_b(1, "o"))) + { + break lab15; + } + // goto, line 120 + golab16: while(true) + { + v_8 = this.limit - this.cursor; + var lab17 = true; + lab17: while (lab17 == true) + { + lab17 = false; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel5, 111, 117))) + { + break lab17; + } + this.cursor = this.limit - v_8; + break golab16; + } + this.cursor = this.limit - v_8; + if (this.cursor <= this.limit_backward) + { + break lab15; + } + this.cursor--; + } + break lab2; + } + this.cursor = this.limit - v_3; + var lab18 = true; + lab18: while (lab18 == true) + { + lab18 = false; + // (, line 121 + // literal, line 121 + if (!(this.eq_s_b(1, "\u00F6"))) + { + break lab18; + } + // goto, line 121 + golab19: while(true) + { + v_9 = this.limit - this.cursor; + var lab20 = true; + lab20: while (lab20 == true) + { + lab20 = false; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel6, 246, 252))) + { + break lab20; + } + this.cursor = this.limit - v_9; + break golab19; + } + this.cursor = this.limit - v_9; + if (this.cursor <= this.limit_backward) + { + break lab18; + } + this.cursor--; + } + break lab2; + } + this.cursor = this.limit - v_3; + var lab21 = true; + lab21: while (lab21 == true) + { + lab21 = false; + // (, line 122 + // literal, line 122 + if (!(this.eq_s_b(1, "u"))) + { + break lab21; + } + // goto, line 122 + golab22: while(true) + { + v_10 = this.limit - this.cursor; + var lab23 = true; + lab23: while (lab23 == true) + { + lab23 = false; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel5, 111, 117))) + { + break lab23; + } + this.cursor = this.limit - v_10; + break golab22; + } + this.cursor = this.limit - v_10; + if (this.cursor <= this.limit_backward) + { + break lab21; + } + this.cursor--; + } + break lab2; + } + this.cursor = this.limit - v_3; + // (, line 123 + // literal, line 123 + if (!(this.eq_s_b(1, "\u00FC"))) + { + return false; + } + // goto, line 123 + golab24: while(true) + { + v_11 = this.limit - this.cursor; + var lab25 = true; + lab25: while (lab25 == true) + { + lab25 = false; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel6, 246, 252))) + { + break lab25; + } + this.cursor = this.limit - v_11; + break golab24; + } + this.cursor = this.limit - v_11; + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + } + } + this.cursor = this.limit - v_1; + return true; + } + + function r_mark_suffix_with_optional_n_consonant () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + // (, line 132 + // or, line 134 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 133 + // (, line 133 + // test, line 133 + v_2 = this.limit - this.cursor; + // literal, line 133 + if (!(this.eq_s_b(1, "n"))) + { + break lab1; + } + this.cursor = this.limit - v_2; + // next, line 133 + if (this.cursor <= this.limit_backward) + { + break lab1; + } + this.cursor--; + // (, line 133 + // test, line 133 + v_3 = this.limit - this.cursor; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel, 97, 305))) + { + break lab1; + } + this.cursor = this.limit - v_3; + break lab0; + } + this.cursor = this.limit - v_1; + // (, line 135 + // (, line 135 + // not, line 135 + { + v_4 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 135 + // test, line 135 + v_5 = this.limit - this.cursor; + // literal, line 135 + if (!(this.eq_s_b(1, "n"))) + { + break lab2; + } + this.cursor = this.limit - v_5; + return false; + } + this.cursor = this.limit - v_4; + } + // test, line 135 + v_6 = this.limit - this.cursor; + // (, line 135 + // next, line 135 + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + // (, line 135 + // test, line 135 + v_7 = this.limit - this.cursor; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel, 97, 305))) + { + return false; + } + this.cursor = this.limit - v_7; + this.cursor = this.limit - v_6; + } + return true; + } + + function r_mark_suffix_with_optional_s_consonant () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + // (, line 143 + // or, line 145 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 144 + // (, line 144 + // test, line 144 + v_2 = this.limit - this.cursor; + // literal, line 144 + if (!(this.eq_s_b(1, "s"))) + { + break lab1; + } + this.cursor = this.limit - v_2; + // next, line 144 + if (this.cursor <= this.limit_backward) + { + break lab1; + } + this.cursor--; + // (, line 144 + // test, line 144 + v_3 = this.limit - this.cursor; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel, 97, 305))) + { + break lab1; + } + this.cursor = this.limit - v_3; + break lab0; + } + this.cursor = this.limit - v_1; + // (, line 146 + // (, line 146 + // not, line 146 + { + v_4 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 146 + // test, line 146 + v_5 = this.limit - this.cursor; + // literal, line 146 + if (!(this.eq_s_b(1, "s"))) + { + break lab2; + } + this.cursor = this.limit - v_5; + return false; + } + this.cursor = this.limit - v_4; + } + // test, line 146 + v_6 = this.limit - this.cursor; + // (, line 146 + // next, line 146 + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + // (, line 146 + // test, line 146 + v_7 = this.limit - this.cursor; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel, 97, 305))) + { + return false; + } + this.cursor = this.limit - v_7; + this.cursor = this.limit - v_6; + } + return true; + } + + function r_mark_suffix_with_optional_y_consonant () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + // (, line 153 + // or, line 155 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 154 + // (, line 154 + // test, line 154 + v_2 = this.limit - this.cursor; + // literal, line 154 + if (!(this.eq_s_b(1, "y"))) + { + break lab1; + } + this.cursor = this.limit - v_2; + // next, line 154 + if (this.cursor <= this.limit_backward) + { + break lab1; + } + this.cursor--; + // (, line 154 + // test, line 154 + v_3 = this.limit - this.cursor; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel, 97, 305))) + { + break lab1; + } + this.cursor = this.limit - v_3; + break lab0; + } + this.cursor = this.limit - v_1; + // (, line 156 + // (, line 156 + // not, line 156 + { + v_4 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 156 + // test, line 156 + v_5 = this.limit - this.cursor; + // literal, line 156 + if (!(this.eq_s_b(1, "y"))) + { + break lab2; + } + this.cursor = this.limit - v_5; + return false; + } + this.cursor = this.limit - v_4; + } + // test, line 156 + v_6 = this.limit - this.cursor; + // (, line 156 + // next, line 156 + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + // (, line 156 + // test, line 156 + v_7 = this.limit - this.cursor; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel, 97, 305))) + { + return false; + } + this.cursor = this.limit - v_7; + this.cursor = this.limit - v_6; + } + return true; + } + + function r_mark_suffix_with_optional_U_vowel () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + // (, line 159 + // or, line 161 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 160 + // (, line 160 + // test, line 160 + v_2 = this.limit - this.cursor; + if (!(this.in_grouping_b(TurkishStemmer.g_U, 105, 305))) + { + break lab1; + } + this.cursor = this.limit - v_2; + // next, line 160 + if (this.cursor <= this.limit_backward) + { + break lab1; + } + this.cursor--; + // (, line 160 + // test, line 160 + v_3 = this.limit - this.cursor; + if (!(this.out_grouping_b(TurkishStemmer.g_vowel, 97, 305))) + { + break lab1; + } + this.cursor = this.limit - v_3; + break lab0; + } + this.cursor = this.limit - v_1; + // (, line 162 + // (, line 162 + // not, line 162 + { + v_4 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 162 + // test, line 162 + v_5 = this.limit - this.cursor; + if (!(this.in_grouping_b(TurkishStemmer.g_U, 105, 305))) + { + break lab2; + } + this.cursor = this.limit - v_5; + return false; + } + this.cursor = this.limit - v_4; + } + // test, line 162 + v_6 = this.limit - this.cursor; + // (, line 162 + // next, line 162 + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + // (, line 162 + // test, line 162 + v_7 = this.limit - this.cursor; + if (!(this.out_grouping_b(TurkishStemmer.g_vowel, 97, 305))) + { + return false; + } + this.cursor = this.limit - v_7; + this.cursor = this.limit - v_6; + } + return true; + } + + function r_mark_possessives () : boolean + { + // (, line 166 + // among, line 167 + if (this.find_among_b(TurkishStemmer.a_0, 10) == 0) + { + return false; + } + // (, line 169 + // call mark_suffix_with_optional_U_vowel, line 169 + if (!this.r_mark_suffix_with_optional_U_vowel()) + { + return false; + } + return true; + } + + function r_mark_sU () : boolean + { + // (, line 172 + // call check_vowel_harmony, line 173 + if (!this.r_check_vowel_harmony()) + { + return false; + } + if (!(this.in_grouping_b(TurkishStemmer.g_U, 105, 305))) + { + return false; + } + // (, line 175 + // call mark_suffix_with_optional_s_consonant, line 175 + if (!this.r_mark_suffix_with_optional_s_consonant()) + { + return false; + } + return true; + } + + function r_mark_lArI () : boolean + { + // (, line 178 + // among, line 179 + if (this.find_among_b(TurkishStemmer.a_1, 2) == 0) + { + return false; + } + return true; + } + + function r_mark_yU () : boolean + { + // (, line 182 + // call check_vowel_harmony, line 183 + if (!this.r_check_vowel_harmony()) + { + return false; + } + if (!(this.in_grouping_b(TurkishStemmer.g_U, 105, 305))) + { + return false; + } + // (, line 185 + // call mark_suffix_with_optional_y_consonant, line 185 + if (!this.r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } + + function r_mark_nU () : boolean + { + // (, line 188 + // call check_vowel_harmony, line 189 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 190 + if (this.find_among_b(TurkishStemmer.a_2, 4) == 0) + { + return false; + } + return true; + } + + function r_mark_nUn () : boolean + { + // (, line 193 + // call check_vowel_harmony, line 194 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 195 + if (this.find_among_b(TurkishStemmer.a_3, 4) == 0) + { + return false; + } + // (, line 196 + // call mark_suffix_with_optional_n_consonant, line 196 + if (!this.r_mark_suffix_with_optional_n_consonant()) + { + return false; + } + return true; + } + + function r_mark_yA () : boolean + { + // (, line 199 + // call check_vowel_harmony, line 200 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 201 + if (this.find_among_b(TurkishStemmer.a_4, 2) == 0) + { + return false; + } + // (, line 202 + // call mark_suffix_with_optional_y_consonant, line 202 + if (!this.r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } + + function r_mark_nA () : boolean + { + // (, line 205 + // call check_vowel_harmony, line 206 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 207 + if (this.find_among_b(TurkishStemmer.a_5, 2) == 0) + { + return false; + } + return true; + } + + function r_mark_DA () : boolean + { + // (, line 210 + // call check_vowel_harmony, line 211 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 212 + if (this.find_among_b(TurkishStemmer.a_6, 4) == 0) + { + return false; + } + return true; + } + + function r_mark_ndA () : boolean + { + // (, line 215 + // call check_vowel_harmony, line 216 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 217 + if (this.find_among_b(TurkishStemmer.a_7, 2) == 0) + { + return false; + } + return true; + } + + function r_mark_DAn () : boolean + { + // (, line 220 + // call check_vowel_harmony, line 221 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 222 + if (this.find_among_b(TurkishStemmer.a_8, 4) == 0) + { + return false; + } + return true; + } + + function r_mark_ndAn () : boolean + { + // (, line 225 + // call check_vowel_harmony, line 226 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 227 + if (this.find_among_b(TurkishStemmer.a_9, 2) == 0) + { + return false; + } + return true; + } + + function r_mark_ylA () : boolean + { + // (, line 230 + // call check_vowel_harmony, line 231 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 232 + if (this.find_among_b(TurkishStemmer.a_10, 2) == 0) + { + return false; + } + // (, line 233 + // call mark_suffix_with_optional_y_consonant, line 233 + if (!this.r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } + + function r_mark_ki () : boolean + { + // (, line 236 + // literal, line 237 + if (!(this.eq_s_b(2, "ki"))) + { + return false; + } + return true; + } + + function r_mark_ncA () : boolean + { + // (, line 240 + // call check_vowel_harmony, line 241 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 242 + if (this.find_among_b(TurkishStemmer.a_11, 2) == 0) + { + return false; + } + // (, line 243 + // call mark_suffix_with_optional_n_consonant, line 243 + if (!this.r_mark_suffix_with_optional_n_consonant()) + { + return false; + } + return true; + } + + function r_mark_yUm () : boolean + { + // (, line 246 + // call check_vowel_harmony, line 247 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 248 + if (this.find_among_b(TurkishStemmer.a_12, 4) == 0) + { + return false; + } + // (, line 249 + // call mark_suffix_with_optional_y_consonant, line 249 + if (!this.r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } + + function r_mark_sUn () : boolean + { + // (, line 252 + // call check_vowel_harmony, line 253 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 254 + if (this.find_among_b(TurkishStemmer.a_13, 4) == 0) + { + return false; + } + return true; + } + + function r_mark_yUz () : boolean + { + // (, line 257 + // call check_vowel_harmony, line 258 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 259 + if (this.find_among_b(TurkishStemmer.a_14, 4) == 0) + { + return false; + } + // (, line 260 + // call mark_suffix_with_optional_y_consonant, line 260 + if (!this.r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } + + function r_mark_sUnUz () : boolean + { + // (, line 263 + // among, line 264 + if (this.find_among_b(TurkishStemmer.a_15, 4) == 0) + { + return false; + } + return true; + } + + function r_mark_lAr () : boolean + { + // (, line 267 + // call check_vowel_harmony, line 268 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 269 + if (this.find_among_b(TurkishStemmer.a_16, 2) == 0) + { + return false; + } + return true; + } + + function r_mark_nUz () : boolean + { + // (, line 272 + // call check_vowel_harmony, line 273 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 274 + if (this.find_among_b(TurkishStemmer.a_17, 4) == 0) + { + return false; + } + return true; + } + + function r_mark_DUr () : boolean + { + // (, line 277 + // call check_vowel_harmony, line 278 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 279 + if (this.find_among_b(TurkishStemmer.a_18, 8) == 0) + { + return false; + } + return true; + } + + function r_mark_cAsInA () : boolean + { + // (, line 282 + // among, line 283 + if (this.find_among_b(TurkishStemmer.a_19, 2) == 0) + { + return false; + } + return true; + } + + function r_mark_yDU () : boolean + { + // (, line 286 + // call check_vowel_harmony, line 287 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 288 + if (this.find_among_b(TurkishStemmer.a_20, 32) == 0) + { + return false; + } + // (, line 292 + // call mark_suffix_with_optional_y_consonant, line 292 + if (!this.r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } + + function r_mark_ysA () : boolean + { + // (, line 296 + // among, line 297 + if (this.find_among_b(TurkishStemmer.a_21, 8) == 0) + { + return false; + } + // (, line 298 + // call mark_suffix_with_optional_y_consonant, line 298 + if (!this.r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } + + function r_mark_ymUs_ () : boolean + { + // (, line 301 + // call check_vowel_harmony, line 302 + if (!this.r_check_vowel_harmony()) + { + return false; + } + // among, line 303 + if (this.find_among_b(TurkishStemmer.a_22, 4) == 0) + { + return false; + } + // (, line 304 + // call mark_suffix_with_optional_y_consonant, line 304 + if (!this.r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } + + function r_mark_yken () : boolean + { + // (, line 307 + // literal, line 308 + if (!(this.eq_s_b(3, "ken"))) + { + return false; + } + // (, line 308 + // call mark_suffix_with_optional_y_consonant, line 308 + if (!this.r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } + + function r_stem_nominal_verb_suffixes () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + var v_10 : int; + // (, line 311 + // [, line 312 + this.ket = this.cursor; + // set continue_stemming_noun_suffixes, line 313 + this.B_continue_stemming_noun_suffixes = true; + // or, line 315 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 314 + // or, line 314 + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + v_2 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // call mark_ymUs_, line 314 + if (!this.r_mark_ymUs_()) + { + break lab3; + } + break lab2; + } + this.cursor = this.limit - v_2; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // call mark_yDU, line 314 + if (!this.r_mark_yDU()) + { + break lab4; + } + break lab2; + } + this.cursor = this.limit - v_2; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // call mark_ysA, line 314 + if (!this.r_mark_ysA()) + { + break lab5; + } + break lab2; + } + this.cursor = this.limit - v_2; + // call mark_yken, line 314 + if (!this.r_mark_yken()) + { + break lab1; + } + } + break lab0; + } + this.cursor = this.limit - v_1; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // (, line 316 + // call mark_cAsInA, line 316 + if (!this.r_mark_cAsInA()) + { + break lab6; + } + // (, line 316 + // or, line 316 + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + v_3 = this.limit - this.cursor; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + // call mark_sUnUz, line 316 + if (!this.r_mark_sUnUz()) + { + break lab8; + } + break lab7; + } + this.cursor = this.limit - v_3; + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + // call mark_lAr, line 316 + if (!this.r_mark_lAr()) + { + break lab9; + } + break lab7; + } + this.cursor = this.limit - v_3; + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + // call mark_yUm, line 316 + if (!this.r_mark_yUm()) + { + break lab10; + } + break lab7; + } + this.cursor = this.limit - v_3; + var lab11 = true; + lab11: while (lab11 == true) + { + lab11 = false; + // call mark_sUn, line 316 + if (!this.r_mark_sUn()) + { + break lab11; + } + break lab7; + } + this.cursor = this.limit - v_3; + var lab12 = true; + lab12: while (lab12 == true) + { + lab12 = false; + // call mark_yUz, line 316 + if (!this.r_mark_yUz()) + { + break lab12; + } + break lab7; + } + this.cursor = this.limit - v_3; + } + // call mark_ymUs_, line 316 + if (!this.r_mark_ymUs_()) + { + break lab6; + } + break lab0; + } + this.cursor = this.limit - v_1; + var lab13 = true; + lab13: while (lab13 == true) + { + lab13 = false; + // (, line 318 + // call mark_lAr, line 319 + if (!this.r_mark_lAr()) + { + break lab13; + } + // ], line 319 + this.bra = this.cursor; + // delete, line 319 + if (!this.slice_del()) + { + return false; + } + // try, line 319 + v_4 = this.limit - this.cursor; + var lab14 = true; + lab14: while (lab14 == true) + { + lab14 = false; + // (, line 319 + // [, line 319 + this.ket = this.cursor; + // (, line 319 + // or, line 319 + var lab15 = true; + lab15: while (lab15 == true) + { + lab15 = false; + v_5 = this.limit - this.cursor; + var lab16 = true; + lab16: while (lab16 == true) + { + lab16 = false; + // call mark_DUr, line 319 + if (!this.r_mark_DUr()) + { + break lab16; + } + break lab15; + } + this.cursor = this.limit - v_5; + var lab17 = true; + lab17: while (lab17 == true) + { + lab17 = false; + // call mark_yDU, line 319 + if (!this.r_mark_yDU()) + { + break lab17; + } + break lab15; + } + this.cursor = this.limit - v_5; + var lab18 = true; + lab18: while (lab18 == true) + { + lab18 = false; + // call mark_ysA, line 319 + if (!this.r_mark_ysA()) + { + break lab18; + } + break lab15; + } + this.cursor = this.limit - v_5; + // call mark_ymUs_, line 319 + if (!this.r_mark_ymUs_()) + { + this.cursor = this.limit - v_4; + break lab14; + } + } + } + // unset continue_stemming_noun_suffixes, line 320 + this.B_continue_stemming_noun_suffixes = false; + break lab0; + } + this.cursor = this.limit - v_1; + var lab19 = true; + lab19: while (lab19 == true) + { + lab19 = false; + // (, line 323 + // call mark_nUz, line 323 + if (!this.r_mark_nUz()) + { + break lab19; + } + // (, line 323 + // or, line 323 + var lab20 = true; + lab20: while (lab20 == true) + { + lab20 = false; + v_6 = this.limit - this.cursor; + var lab21 = true; + lab21: while (lab21 == true) + { + lab21 = false; + // call mark_yDU, line 323 + if (!this.r_mark_yDU()) + { + break lab21; + } + break lab20; + } + this.cursor = this.limit - v_6; + // call mark_ysA, line 323 + if (!this.r_mark_ysA()) + { + break lab19; + } + } + break lab0; + } + this.cursor = this.limit - v_1; + var lab22 = true; + lab22: while (lab22 == true) + { + lab22 = false; + // (, line 325 + // (, line 325 + // or, line 325 + var lab23 = true; + lab23: while (lab23 == true) + { + lab23 = false; + v_7 = this.limit - this.cursor; + var lab24 = true; + lab24: while (lab24 == true) + { + lab24 = false; + // call mark_sUnUz, line 325 + if (!this.r_mark_sUnUz()) + { + break lab24; + } + break lab23; + } + this.cursor = this.limit - v_7; + var lab25 = true; + lab25: while (lab25 == true) + { + lab25 = false; + // call mark_yUz, line 325 + if (!this.r_mark_yUz()) + { + break lab25; + } + break lab23; + } + this.cursor = this.limit - v_7; + var lab26 = true; + lab26: while (lab26 == true) + { + lab26 = false; + // call mark_sUn, line 325 + if (!this.r_mark_sUn()) + { + break lab26; + } + break lab23; + } + this.cursor = this.limit - v_7; + // call mark_yUm, line 325 + if (!this.r_mark_yUm()) + { + break lab22; + } + } + // ], line 325 + this.bra = this.cursor; + // delete, line 325 + if (!this.slice_del()) + { + return false; + } + // try, line 325 + v_8 = this.limit - this.cursor; + var lab27 = true; + lab27: while (lab27 == true) + { + lab27 = false; + // (, line 325 + // [, line 325 + this.ket = this.cursor; + // call mark_ymUs_, line 325 + if (!this.r_mark_ymUs_()) + { + this.cursor = this.limit - v_8; + break lab27; + } + } + break lab0; + } + this.cursor = this.limit - v_1; + // (, line 327 + // call mark_DUr, line 327 + if (!this.r_mark_DUr()) + { + return false; + } + // ], line 327 + this.bra = this.cursor; + // delete, line 327 + if (!this.slice_del()) + { + return false; + } + // try, line 327 + v_9 = this.limit - this.cursor; + var lab28 = true; + lab28: while (lab28 == true) + { + lab28 = false; + // (, line 327 + // [, line 327 + this.ket = this.cursor; + // (, line 327 + // or, line 327 + var lab29 = true; + lab29: while (lab29 == true) + { + lab29 = false; + v_10 = this.limit - this.cursor; + var lab30 = true; + lab30: while (lab30 == true) + { + lab30 = false; + // call mark_sUnUz, line 327 + if (!this.r_mark_sUnUz()) + { + break lab30; + } + break lab29; + } + this.cursor = this.limit - v_10; + var lab31 = true; + lab31: while (lab31 == true) + { + lab31 = false; + // call mark_lAr, line 327 + if (!this.r_mark_lAr()) + { + break lab31; + } + break lab29; + } + this.cursor = this.limit - v_10; + var lab32 = true; + lab32: while (lab32 == true) + { + lab32 = false; + // call mark_yUm, line 327 + if (!this.r_mark_yUm()) + { + break lab32; + } + break lab29; + } + this.cursor = this.limit - v_10; + var lab33 = true; + lab33: while (lab33 == true) + { + lab33 = false; + // call mark_sUn, line 327 + if (!this.r_mark_sUn()) + { + break lab33; + } + break lab29; + } + this.cursor = this.limit - v_10; + var lab34 = true; + lab34: while (lab34 == true) + { + lab34 = false; + // call mark_yUz, line 327 + if (!this.r_mark_yUz()) + { + break lab34; + } + break lab29; + } + this.cursor = this.limit - v_10; + } + // call mark_ymUs_, line 327 + if (!this.r_mark_ymUs_()) + { + this.cursor = this.limit - v_9; + break lab28; + } + } + } + // ], line 328 + this.bra = this.cursor; + // delete, line 328 + if (!this.slice_del()) + { + return false; + } + return true; + } + + function r_stem_suffix_chain_before_ki () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + var v_10 : int; + var v_11 : int; + // (, line 332 + // [, line 333 + this.ket = this.cursor; + // call mark_ki, line 334 + if (!this.r_mark_ki()) + { + return false; + } + // (, line 335 + // or, line 342 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 336 + // call mark_DA, line 336 + if (!this.r_mark_DA()) + { + break lab1; + } + // ], line 336 + this.bra = this.cursor; + // delete, line 336 + if (!this.slice_del()) + { + return false; + } + // try, line 336 + v_2 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 336 + // [, line 336 + this.ket = this.cursor; + // or, line 338 + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + v_3 = this.limit - this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 337 + // call mark_lAr, line 337 + if (!this.r_mark_lAr()) + { + break lab4; + } + // ], line 337 + this.bra = this.cursor; + // delete, line 337 + if (!this.slice_del()) + { + return false; + } + // try, line 337 + v_4 = this.limit - this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // (, line 337 + // call stem_suffix_chain_before_ki, line 337 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_4; + break lab5; + } + } + break lab3; + } + this.cursor = this.limit - v_3; + // (, line 339 + // call mark_possessives, line 339 + if (!this.r_mark_possessives()) + { + this.cursor = this.limit - v_2; + break lab2; + } + // ], line 339 + this.bra = this.cursor; + // delete, line 339 + if (!this.slice_del()) + { + return false; + } + // try, line 339 + v_5 = this.limit - this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // (, line 339 + // [, line 339 + this.ket = this.cursor; + // call mark_lAr, line 339 + if (!this.r_mark_lAr()) + { + this.cursor = this.limit - v_5; + break lab6; + } + // ], line 339 + this.bra = this.cursor; + // delete, line 339 + if (!this.slice_del()) + { + return false; + } + // call stem_suffix_chain_before_ki, line 339 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_5; + break lab6; + } + } + } + } + break lab0; + } + this.cursor = this.limit - v_1; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // (, line 343 + // call mark_nUn, line 343 + if (!this.r_mark_nUn()) + { + break lab7; + } + // ], line 343 + this.bra = this.cursor; + // delete, line 343 + if (!this.slice_del()) + { + return false; + } + // try, line 343 + v_6 = this.limit - this.cursor; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + // (, line 343 + // [, line 343 + this.ket = this.cursor; + // or, line 345 + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + v_7 = this.limit - this.cursor; + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + // (, line 344 + // call mark_lArI, line 344 + if (!this.r_mark_lArI()) + { + break lab10; + } + // ], line 344 + this.bra = this.cursor; + // delete, line 344 + if (!this.slice_del()) + { + return false; + } + break lab9; + } + this.cursor = this.limit - v_7; + var lab11 = true; + lab11: while (lab11 == true) + { + lab11 = false; + // (, line 346 + // [, line 346 + this.ket = this.cursor; + // or, line 346 + var lab12 = true; + lab12: while (lab12 == true) + { + lab12 = false; + v_8 = this.limit - this.cursor; + var lab13 = true; + lab13: while (lab13 == true) + { + lab13 = false; + // call mark_possessives, line 346 + if (!this.r_mark_possessives()) + { + break lab13; + } + break lab12; + } + this.cursor = this.limit - v_8; + // call mark_sU, line 346 + if (!this.r_mark_sU()) + { + break lab11; + } + } + // ], line 346 + this.bra = this.cursor; + // delete, line 346 + if (!this.slice_del()) + { + return false; + } + // try, line 346 + v_9 = this.limit - this.cursor; + var lab14 = true; + lab14: while (lab14 == true) + { + lab14 = false; + // (, line 346 + // [, line 346 + this.ket = this.cursor; + // call mark_lAr, line 346 + if (!this.r_mark_lAr()) + { + this.cursor = this.limit - v_9; + break lab14; + } + // ], line 346 + this.bra = this.cursor; + // delete, line 346 + if (!this.slice_del()) + { + return false; + } + // call stem_suffix_chain_before_ki, line 346 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_9; + break lab14; + } + } + break lab9; + } + this.cursor = this.limit - v_7; + // (, line 348 + // call stem_suffix_chain_before_ki, line 348 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_6; + break lab8; + } + } + } + break lab0; + } + this.cursor = this.limit - v_1; + // (, line 351 + // call mark_ndA, line 351 + if (!this.r_mark_ndA()) + { + return false; + } + // (, line 351 + // or, line 353 + var lab15 = true; + lab15: while (lab15 == true) + { + lab15 = false; + v_10 = this.limit - this.cursor; + var lab16 = true; + lab16: while (lab16 == true) + { + lab16 = false; + // (, line 352 + // call mark_lArI, line 352 + if (!this.r_mark_lArI()) + { + break lab16; + } + // ], line 352 + this.bra = this.cursor; + // delete, line 352 + if (!this.slice_del()) + { + return false; + } + break lab15; + } + this.cursor = this.limit - v_10; + var lab17 = true; + lab17: while (lab17 == true) + { + lab17 = false; + // (, line 354 + // (, line 354 + // call mark_sU, line 354 + if (!this.r_mark_sU()) + { + break lab17; + } + // ], line 354 + this.bra = this.cursor; + // delete, line 354 + if (!this.slice_del()) + { + return false; + } + // try, line 354 + v_11 = this.limit - this.cursor; + var lab18 = true; + lab18: while (lab18 == true) + { + lab18 = false; + // (, line 354 + // [, line 354 + this.ket = this.cursor; + // call mark_lAr, line 354 + if (!this.r_mark_lAr()) + { + this.cursor = this.limit - v_11; + break lab18; + } + // ], line 354 + this.bra = this.cursor; + // delete, line 354 + if (!this.slice_del()) + { + return false; + } + // call stem_suffix_chain_before_ki, line 354 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_11; + break lab18; + } + } + break lab15; + } + this.cursor = this.limit - v_10; + // (, line 356 + // call stem_suffix_chain_before_ki, line 356 + if (!this.r_stem_suffix_chain_before_ki()) + { + return false; + } + } + } + return true; + } + + function r_stem_noun_suffixes () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + var v_10 : int; + var v_11 : int; + var v_12 : int; + var v_13 : int; + var v_14 : int; + var v_15 : int; + var v_16 : int; + var v_17 : int; + var v_18 : int; + var v_19 : int; + var v_20 : int; + var v_21 : int; + var v_22 : int; + var v_23 : int; + var v_24 : int; + var v_25 : int; + var v_26 : int; + var v_27 : int; + // (, line 361 + // or, line 363 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 362 + // [, line 362 + this.ket = this.cursor; + // call mark_lAr, line 362 + if (!this.r_mark_lAr()) + { + break lab1; + } + // ], line 362 + this.bra = this.cursor; + // delete, line 362 + if (!this.slice_del()) + { + return false; + } + // try, line 362 + v_2 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // (, line 362 + // call stem_suffix_chain_before_ki, line 362 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_2; + break lab2; + } + } + break lab0; + } + this.cursor = this.limit - v_1; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // (, line 364 + // [, line 364 + this.ket = this.cursor; + // call mark_ncA, line 364 + if (!this.r_mark_ncA()) + { + break lab3; + } + // ], line 364 + this.bra = this.cursor; + // delete, line 364 + if (!this.slice_del()) + { + return false; + } + // try, line 365 + v_3 = this.limit - this.cursor; + var lab4 = true; + lab4: while (lab4 == true) + { + lab4 = false; + // (, line 365 + // or, line 367 + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + v_4 = this.limit - this.cursor; + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + // (, line 366 + // [, line 366 + this.ket = this.cursor; + // call mark_lArI, line 366 + if (!this.r_mark_lArI()) + { + break lab6; + } + // ], line 366 + this.bra = this.cursor; + // delete, line 366 + if (!this.slice_del()) + { + return false; + } + break lab5; + } + this.cursor = this.limit - v_4; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // (, line 368 + // [, line 368 + this.ket = this.cursor; + // or, line 368 + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + v_5 = this.limit - this.cursor; + var lab9 = true; + lab9: while (lab9 == true) + { + lab9 = false; + // call mark_possessives, line 368 + if (!this.r_mark_possessives()) + { + break lab9; + } + break lab8; + } + this.cursor = this.limit - v_5; + // call mark_sU, line 368 + if (!this.r_mark_sU()) + { + break lab7; + } + } + // ], line 368 + this.bra = this.cursor; + // delete, line 368 + if (!this.slice_del()) + { + return false; + } + // try, line 368 + v_6 = this.limit - this.cursor; + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + // (, line 368 + // [, line 368 + this.ket = this.cursor; + // call mark_lAr, line 368 + if (!this.r_mark_lAr()) + { + this.cursor = this.limit - v_6; + break lab10; + } + // ], line 368 + this.bra = this.cursor; + // delete, line 368 + if (!this.slice_del()) + { + return false; + } + // call stem_suffix_chain_before_ki, line 368 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_6; + break lab10; + } + } + break lab5; + } + this.cursor = this.limit - v_4; + // (, line 370 + // [, line 370 + this.ket = this.cursor; + // call mark_lAr, line 370 + if (!this.r_mark_lAr()) + { + this.cursor = this.limit - v_3; + break lab4; + } + // ], line 370 + this.bra = this.cursor; + // delete, line 370 + if (!this.slice_del()) + { + return false; + } + // call stem_suffix_chain_before_ki, line 370 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_3; + break lab4; + } + } + } + break lab0; + } + this.cursor = this.limit - v_1; + var lab11 = true; + lab11: while (lab11 == true) + { + lab11 = false; + // (, line 374 + // [, line 374 + this.ket = this.cursor; + // (, line 374 + // or, line 374 + var lab12 = true; + lab12: while (lab12 == true) + { + lab12 = false; + v_7 = this.limit - this.cursor; + var lab13 = true; + lab13: while (lab13 == true) + { + lab13 = false; + // call mark_ndA, line 374 + if (!this.r_mark_ndA()) + { + break lab13; + } + break lab12; + } + this.cursor = this.limit - v_7; + // call mark_nA, line 374 + if (!this.r_mark_nA()) + { + break lab11; + } + } + // (, line 375 + // or, line 377 + var lab14 = true; + lab14: while (lab14 == true) + { + lab14 = false; + v_8 = this.limit - this.cursor; + var lab15 = true; + lab15: while (lab15 == true) + { + lab15 = false; + // (, line 376 + // call mark_lArI, line 376 + if (!this.r_mark_lArI()) + { + break lab15; + } + // ], line 376 + this.bra = this.cursor; + // delete, line 376 + if (!this.slice_del()) + { + return false; + } + break lab14; + } + this.cursor = this.limit - v_8; + var lab16 = true; + lab16: while (lab16 == true) + { + lab16 = false; + // (, line 378 + // call mark_sU, line 378 + if (!this.r_mark_sU()) + { + break lab16; + } + // ], line 378 + this.bra = this.cursor; + // delete, line 378 + if (!this.slice_del()) + { + return false; + } + // try, line 378 + v_9 = this.limit - this.cursor; + var lab17 = true; + lab17: while (lab17 == true) + { + lab17 = false; + // (, line 378 + // [, line 378 + this.ket = this.cursor; + // call mark_lAr, line 378 + if (!this.r_mark_lAr()) + { + this.cursor = this.limit - v_9; + break lab17; + } + // ], line 378 + this.bra = this.cursor; + // delete, line 378 + if (!this.slice_del()) + { + return false; + } + // call stem_suffix_chain_before_ki, line 378 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_9; + break lab17; + } + } + break lab14; + } + this.cursor = this.limit - v_8; + // (, line 380 + // call stem_suffix_chain_before_ki, line 380 + if (!this.r_stem_suffix_chain_before_ki()) + { + break lab11; + } + } + break lab0; + } + this.cursor = this.limit - v_1; + var lab18 = true; + lab18: while (lab18 == true) + { + lab18 = false; + // (, line 384 + // [, line 384 + this.ket = this.cursor; + // (, line 384 + // or, line 384 + var lab19 = true; + lab19: while (lab19 == true) + { + lab19 = false; + v_10 = this.limit - this.cursor; + var lab20 = true; + lab20: while (lab20 == true) + { + lab20 = false; + // call mark_ndAn, line 384 + if (!this.r_mark_ndAn()) + { + break lab20; + } + break lab19; + } + this.cursor = this.limit - v_10; + // call mark_nU, line 384 + if (!this.r_mark_nU()) + { + break lab18; + } + } + // (, line 384 + // or, line 384 + var lab21 = true; + lab21: while (lab21 == true) + { + lab21 = false; + v_11 = this.limit - this.cursor; + var lab22 = true; + lab22: while (lab22 == true) + { + lab22 = false; + // (, line 384 + // call mark_sU, line 384 + if (!this.r_mark_sU()) + { + break lab22; + } + // ], line 384 + this.bra = this.cursor; + // delete, line 384 + if (!this.slice_del()) + { + return false; + } + // try, line 384 + v_12 = this.limit - this.cursor; + var lab23 = true; + lab23: while (lab23 == true) + { + lab23 = false; + // (, line 384 + // [, line 384 + this.ket = this.cursor; + // call mark_lAr, line 384 + if (!this.r_mark_lAr()) + { + this.cursor = this.limit - v_12; + break lab23; + } + // ], line 384 + this.bra = this.cursor; + // delete, line 384 + if (!this.slice_del()) + { + return false; + } + // call stem_suffix_chain_before_ki, line 384 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_12; + break lab23; + } + } + break lab21; + } + this.cursor = this.limit - v_11; + // (, line 384 + // call mark_lArI, line 384 + if (!this.r_mark_lArI()) + { + break lab18; + } + } + break lab0; + } + this.cursor = this.limit - v_1; + var lab24 = true; + lab24: while (lab24 == true) + { + lab24 = false; + // (, line 386 + // [, line 386 + this.ket = this.cursor; + // call mark_DAn, line 386 + if (!this.r_mark_DAn()) + { + break lab24; + } + // ], line 386 + this.bra = this.cursor; + // delete, line 386 + if (!this.slice_del()) + { + return false; + } + // try, line 386 + v_13 = this.limit - this.cursor; + var lab25 = true; + lab25: while (lab25 == true) + { + lab25 = false; + // (, line 386 + // [, line 386 + this.ket = this.cursor; + // (, line 387 + // or, line 389 + var lab26 = true; + lab26: while (lab26 == true) + { + lab26 = false; + v_14 = this.limit - this.cursor; + var lab27 = true; + lab27: while (lab27 == true) + { + lab27 = false; + // (, line 388 + // call mark_possessives, line 388 + if (!this.r_mark_possessives()) + { + break lab27; + } + // ], line 388 + this.bra = this.cursor; + // delete, line 388 + if (!this.slice_del()) + { + return false; + } + // try, line 388 + v_15 = this.limit - this.cursor; + var lab28 = true; + lab28: while (lab28 == true) + { + lab28 = false; + // (, line 388 + // [, line 388 + this.ket = this.cursor; + // call mark_lAr, line 388 + if (!this.r_mark_lAr()) + { + this.cursor = this.limit - v_15; + break lab28; + } + // ], line 388 + this.bra = this.cursor; + // delete, line 388 + if (!this.slice_del()) + { + return false; + } + // call stem_suffix_chain_before_ki, line 388 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_15; + break lab28; + } + } + break lab26; + } + this.cursor = this.limit - v_14; + var lab29 = true; + lab29: while (lab29 == true) + { + lab29 = false; + // (, line 390 + // call mark_lAr, line 390 + if (!this.r_mark_lAr()) + { + break lab29; + } + // ], line 390 + this.bra = this.cursor; + // delete, line 390 + if (!this.slice_del()) + { + return false; + } + // try, line 390 + v_16 = this.limit - this.cursor; + var lab30 = true; + lab30: while (lab30 == true) + { + lab30 = false; + // (, line 390 + // call stem_suffix_chain_before_ki, line 390 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_16; + break lab30; + } + } + break lab26; + } + this.cursor = this.limit - v_14; + // (, line 392 + // call stem_suffix_chain_before_ki, line 392 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_13; + break lab25; + } + } + } + break lab0; + } + this.cursor = this.limit - v_1; + var lab31 = true; + lab31: while (lab31 == true) + { + lab31 = false; + // (, line 396 + // [, line 396 + this.ket = this.cursor; + // or, line 396 + var lab32 = true; + lab32: while (lab32 == true) + { + lab32 = false; + v_17 = this.limit - this.cursor; + var lab33 = true; + lab33: while (lab33 == true) + { + lab33 = false; + // call mark_nUn, line 396 + if (!this.r_mark_nUn()) + { + break lab33; + } + break lab32; + } + this.cursor = this.limit - v_17; + // call mark_ylA, line 396 + if (!this.r_mark_ylA()) + { + break lab31; + } + } + // ], line 396 + this.bra = this.cursor; + // delete, line 396 + if (!this.slice_del()) + { + return false; + } + // try, line 397 + v_18 = this.limit - this.cursor; + var lab34 = true; + lab34: while (lab34 == true) + { + lab34 = false; + // (, line 397 + // or, line 399 + var lab35 = true; + lab35: while (lab35 == true) + { + lab35 = false; + v_19 = this.limit - this.cursor; + var lab36 = true; + lab36: while (lab36 == true) + { + lab36 = false; + // (, line 398 + // [, line 398 + this.ket = this.cursor; + // call mark_lAr, line 398 + if (!this.r_mark_lAr()) + { + break lab36; + } + // ], line 398 + this.bra = this.cursor; + // delete, line 398 + if (!this.slice_del()) + { + return false; + } + // call stem_suffix_chain_before_ki, line 398 + if (!this.r_stem_suffix_chain_before_ki()) + { + break lab36; + } + break lab35; + } + this.cursor = this.limit - v_19; + var lab37 = true; + lab37: while (lab37 == true) + { + lab37 = false; + // (, line 400 + // [, line 400 + this.ket = this.cursor; + // or, line 400 + var lab38 = true; + lab38: while (lab38 == true) + { + lab38 = false; + v_20 = this.limit - this.cursor; + var lab39 = true; + lab39: while (lab39 == true) + { + lab39 = false; + // call mark_possessives, line 400 + if (!this.r_mark_possessives()) + { + break lab39; + } + break lab38; + } + this.cursor = this.limit - v_20; + // call mark_sU, line 400 + if (!this.r_mark_sU()) + { + break lab37; + } + } + // ], line 400 + this.bra = this.cursor; + // delete, line 400 + if (!this.slice_del()) + { + return false; + } + // try, line 400 + v_21 = this.limit - this.cursor; + var lab40 = true; + lab40: while (lab40 == true) + { + lab40 = false; + // (, line 400 + // [, line 400 + this.ket = this.cursor; + // call mark_lAr, line 400 + if (!this.r_mark_lAr()) + { + this.cursor = this.limit - v_21; + break lab40; + } + // ], line 400 + this.bra = this.cursor; + // delete, line 400 + if (!this.slice_del()) + { + return false; + } + // call stem_suffix_chain_before_ki, line 400 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_21; + break lab40; + } + } + break lab35; + } + this.cursor = this.limit - v_19; + // call stem_suffix_chain_before_ki, line 402 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_18; + break lab34; + } + } + } + break lab0; + } + this.cursor = this.limit - v_1; + var lab41 = true; + lab41: while (lab41 == true) + { + lab41 = false; + // (, line 406 + // [, line 406 + this.ket = this.cursor; + // call mark_lArI, line 406 + if (!this.r_mark_lArI()) + { + break lab41; + } + // ], line 406 + this.bra = this.cursor; + // delete, line 406 + if (!this.slice_del()) + { + return false; + } + break lab0; + } + this.cursor = this.limit - v_1; + var lab42 = true; + lab42: while (lab42 == true) + { + lab42 = false; + // (, line 408 + // call stem_suffix_chain_before_ki, line 408 + if (!this.r_stem_suffix_chain_before_ki()) + { + break lab42; + } + break lab0; + } + this.cursor = this.limit - v_1; + var lab43 = true; + lab43: while (lab43 == true) + { + lab43 = false; + // (, line 410 + // [, line 410 + this.ket = this.cursor; + // or, line 410 + var lab44 = true; + lab44: while (lab44 == true) + { + lab44 = false; + v_22 = this.limit - this.cursor; + var lab45 = true; + lab45: while (lab45 == true) + { + lab45 = false; + // call mark_DA, line 410 + if (!this.r_mark_DA()) + { + break lab45; + } + break lab44; + } + this.cursor = this.limit - v_22; + var lab46 = true; + lab46: while (lab46 == true) + { + lab46 = false; + // call mark_yU, line 410 + if (!this.r_mark_yU()) + { + break lab46; + } + break lab44; + } + this.cursor = this.limit - v_22; + // call mark_yA, line 410 + if (!this.r_mark_yA()) + { + break lab43; + } + } + // ], line 410 + this.bra = this.cursor; + // delete, line 410 + if (!this.slice_del()) + { + return false; + } + // try, line 410 + v_23 = this.limit - this.cursor; + var lab47 = true; + lab47: while (lab47 == true) + { + lab47 = false; + // (, line 410 + // [, line 410 + this.ket = this.cursor; + // (, line 410 + // or, line 410 + var lab48 = true; + lab48: while (lab48 == true) + { + lab48 = false; + v_24 = this.limit - this.cursor; + var lab49 = true; + lab49: while (lab49 == true) + { + lab49 = false; + // (, line 410 + // call mark_possessives, line 410 + if (!this.r_mark_possessives()) + { + break lab49; + } + // ], line 410 + this.bra = this.cursor; + // delete, line 410 + if (!this.slice_del()) + { + return false; + } + // try, line 410 + v_25 = this.limit - this.cursor; + var lab50 = true; + lab50: while (lab50 == true) + { + lab50 = false; + // (, line 410 + // [, line 410 + this.ket = this.cursor; + // call mark_lAr, line 410 + if (!this.r_mark_lAr()) + { + this.cursor = this.limit - v_25; + break lab50; + } + } + break lab48; + } + this.cursor = this.limit - v_24; + // call mark_lAr, line 410 + if (!this.r_mark_lAr()) + { + this.cursor = this.limit - v_23; + break lab47; + } + } + // ], line 410 + this.bra = this.cursor; + // delete, line 410 + if (!this.slice_del()) + { + return false; + } + // [, line 410 + this.ket = this.cursor; + // call stem_suffix_chain_before_ki, line 410 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_23; + break lab47; + } + } + break lab0; + } + this.cursor = this.limit - v_1; + // (, line 412 + // [, line 412 + this.ket = this.cursor; + // or, line 412 + var lab51 = true; + lab51: while (lab51 == true) + { + lab51 = false; + v_26 = this.limit - this.cursor; + var lab52 = true; + lab52: while (lab52 == true) + { + lab52 = false; + // call mark_possessives, line 412 + if (!this.r_mark_possessives()) + { + break lab52; + } + break lab51; + } + this.cursor = this.limit - v_26; + // call mark_sU, line 412 + if (!this.r_mark_sU()) + { + return false; + } + } + // ], line 412 + this.bra = this.cursor; + // delete, line 412 + if (!this.slice_del()) + { + return false; + } + // try, line 412 + v_27 = this.limit - this.cursor; + var lab53 = true; + lab53: while (lab53 == true) + { + lab53 = false; + // (, line 412 + // [, line 412 + this.ket = this.cursor; + // call mark_lAr, line 412 + if (!this.r_mark_lAr()) + { + this.cursor = this.limit - v_27; + break lab53; + } + // ], line 412 + this.bra = this.cursor; + // delete, line 412 + if (!this.slice_del()) + { + return false; + } + // call stem_suffix_chain_before_ki, line 412 + if (!this.r_stem_suffix_chain_before_ki()) + { + this.cursor = this.limit - v_27; + break lab53; + } + } + } + return true; + } + + function r_post_process_last_consonants () : boolean + { + var among_var : int; + // (, line 415 + // [, line 416 + this.ket = this.cursor; + // substring, line 416 + among_var = this.find_among_b(TurkishStemmer.a_23, 4); + if (among_var == 0) + { + return false; + } + // ], line 416 + this.bra = this.cursor; + switch (among_var) { + case 0: + return false; + case 1: + // (, line 417 + // <-, line 417 + if (!this.slice_from("p")) + { + return false; + } + break; + case 2: + // (, line 418 + // <-, line 418 + if (!this.slice_from("\u00E7")) + { + return false; + } + break; + case 3: + // (, line 419 + // <-, line 419 + if (!this.slice_from("t")) + { + return false; + } + break; + case 4: + // (, line 420 + // <-, line 420 + if (!this.slice_from("k")) + { + return false; + } + break; + } + return true; + } + + function r_append_U_to_stems_ending_with_d_or_g () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + var v_4 : int; + var v_5 : int; + var v_6 : int; + var v_7 : int; + var v_8 : int; + var v_9 : int; + var v_10 : int; + var v_11 : int; + var v_12 : int; + var v_13 : int; + var v_14 : int; + var v_15 : int; + // (, line 430 + // test, line 431 + v_1 = this.limit - this.cursor; + // (, line 431 + // or, line 431 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // literal, line 431 + if (!(this.eq_s_b(1, "d"))) + { + break lab1; + } + break lab0; + } + this.cursor = this.limit - v_2; + // literal, line 431 + if (!(this.eq_s_b(1, "g"))) + { + return false; + } + } + this.cursor = this.limit - v_1; + // or, line 433 + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + v_3 = this.limit - this.cursor; + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // (, line 432 + // test, line 432 + v_4 = this.limit - this.cursor; + // (, line 432 + // (, line 432 + // goto, line 432 + golab4: while(true) + { + v_5 = this.limit - this.cursor; + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel, 97, 305))) + { + break lab5; + } + this.cursor = this.limit - v_5; + break golab4; + } + this.cursor = this.limit - v_5; + if (this.cursor <= this.limit_backward) + { + break lab3; + } + this.cursor--; + } + // or, line 432 + var lab6 = true; + lab6: while (lab6 == true) + { + lab6 = false; + v_6 = this.limit - this.cursor; + var lab7 = true; + lab7: while (lab7 == true) + { + lab7 = false; + // literal, line 432 + if (!(this.eq_s_b(1, "a"))) + { + break lab7; + } + break lab6; + } + this.cursor = this.limit - v_6; + // literal, line 432 + if (!(this.eq_s_b(1, "\u0131"))) + { + break lab3; + } + } + this.cursor = this.limit - v_4; + // <+, line 432 + { + var c : int = this.cursor; + this.insert(this.cursor, this.cursor, "\u0131"); + this.cursor = c; + } + break lab2; + } + this.cursor = this.limit - v_3; + var lab8 = true; + lab8: while (lab8 == true) + { + lab8 = false; + // (, line 434 + // test, line 434 + v_7 = this.limit - this.cursor; + // (, line 434 + // (, line 434 + // goto, line 434 + golab9: while(true) + { + v_8 = this.limit - this.cursor; + var lab10 = true; + lab10: while (lab10 == true) + { + lab10 = false; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel, 97, 305))) + { + break lab10; + } + this.cursor = this.limit - v_8; + break golab9; + } + this.cursor = this.limit - v_8; + if (this.cursor <= this.limit_backward) + { + break lab8; + } + this.cursor--; + } + // or, line 434 + var lab11 = true; + lab11: while (lab11 == true) + { + lab11 = false; + v_9 = this.limit - this.cursor; + var lab12 = true; + lab12: while (lab12 == true) + { + lab12 = false; + // literal, line 434 + if (!(this.eq_s_b(1, "e"))) + { + break lab12; + } + break lab11; + } + this.cursor = this.limit - v_9; + // literal, line 434 + if (!(this.eq_s_b(1, "i"))) + { + break lab8; + } + } + this.cursor = this.limit - v_7; + // <+, line 434 + { + var c : int = this.cursor; + this.insert(this.cursor, this.cursor, "i"); + this.cursor = c; + } + break lab2; + } + this.cursor = this.limit - v_3; + var lab13 = true; + lab13: while (lab13 == true) + { + lab13 = false; + // (, line 436 + // test, line 436 + v_10 = this.limit - this.cursor; + // (, line 436 + // (, line 436 + // goto, line 436 + golab14: while(true) + { + v_11 = this.limit - this.cursor; + var lab15 = true; + lab15: while (lab15 == true) + { + lab15 = false; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel, 97, 305))) + { + break lab15; + } + this.cursor = this.limit - v_11; + break golab14; + } + this.cursor = this.limit - v_11; + if (this.cursor <= this.limit_backward) + { + break lab13; + } + this.cursor--; + } + // or, line 436 + var lab16 = true; + lab16: while (lab16 == true) + { + lab16 = false; + v_12 = this.limit - this.cursor; + var lab17 = true; + lab17: while (lab17 == true) + { + lab17 = false; + // literal, line 436 + if (!(this.eq_s_b(1, "o"))) + { + break lab17; + } + break lab16; + } + this.cursor = this.limit - v_12; + // literal, line 436 + if (!(this.eq_s_b(1, "u"))) + { + break lab13; + } + } + this.cursor = this.limit - v_10; + // <+, line 436 + { + var c : int = this.cursor; + this.insert(this.cursor, this.cursor, "u"); + this.cursor = c; + } + break lab2; + } + this.cursor = this.limit - v_3; + // (, line 438 + // test, line 438 + v_13 = this.limit - this.cursor; + // (, line 438 + // (, line 438 + // goto, line 438 + golab18: while(true) + { + v_14 = this.limit - this.cursor; + var lab19 = true; + lab19: while (lab19 == true) + { + lab19 = false; + if (!(this.in_grouping_b(TurkishStemmer.g_vowel, 97, 305))) + { + break lab19; + } + this.cursor = this.limit - v_14; + break golab18; + } + this.cursor = this.limit - v_14; + if (this.cursor <= this.limit_backward) + { + return false; + } + this.cursor--; + } + // or, line 438 + var lab20 = true; + lab20: while (lab20 == true) + { + lab20 = false; + v_15 = this.limit - this.cursor; + var lab21 = true; + lab21: while (lab21 == true) + { + lab21 = false; + // literal, line 438 + if (!(this.eq_s_b(1, "\u00F6"))) + { + break lab21; + } + break lab20; + } + this.cursor = this.limit - v_15; + // literal, line 438 + if (!(this.eq_s_b(1, "\u00FC"))) + { + return false; + } + } + this.cursor = this.limit - v_13; + // <+, line 438 + { + var c : int = this.cursor; + this.insert(this.cursor, this.cursor, "\u00FC"); + this.cursor = c; + } + } + return true; + } + + function r_more_than_one_syllable_word () : boolean + { + var v_1 : int; + var v_3 : int; + // (, line 445 + // test, line 446 + v_1 = this.cursor; + // (, line 446 + // atleast, line 446 + { + var v_2 = 2; + // atleast, line 446 + replab0: while(true) + { + v_3 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // (, line 446 + // gopast, line 446 + golab2: while(true) + { + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + if (!(this.in_grouping(TurkishStemmer.g_vowel, 97, 305))) + { + break lab3; + } + break golab2; + } + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + } + v_2--; + continue replab0; + } + this.cursor = v_3; + break replab0; + } + if (v_2 > 0) + { + return false; + } + } + this.cursor = v_1; + return true; + } + + function r_is_reserved_word () : boolean + { + var v_1 : int; + var v_2 : int; + var v_4 : int; + // (, line 449 + // or, line 451 + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + v_1 = this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // test, line 450 + v_2 = this.cursor; + // (, line 450 + // gopast, line 450 + golab2: while(true) + { + var lab3 = true; + lab3: while (lab3 == true) + { + lab3 = false; + // literal, line 450 + if (!(this.eq_s(2, "ad"))) + { + break lab3; + } + break golab2; + } + if (this.cursor >= this.limit) + { + break lab1; + } + this.cursor++; + } + // (, line 450 + this.I_strlen = 2; + // (, line 450 + if (!(this.I_strlen == this.limit)) + { + break lab1; + } + this.cursor = v_2; + break lab0; + } + this.cursor = v_1; + // test, line 452 + v_4 = this.cursor; + // (, line 452 + // gopast, line 452 + golab4: while(true) + { + var lab5 = true; + lab5: while (lab5 == true) + { + lab5 = false; + // literal, line 452 + if (!(this.eq_s(5, "soyad"))) + { + break lab5; + } + break golab4; + } + if (this.cursor >= this.limit) + { + return false; + } + this.cursor++; + } + // (, line 452 + this.I_strlen = 5; + // (, line 452 + if (!(this.I_strlen == this.limit)) + { + return false; + } + this.cursor = v_4; + } + return true; + } + + function r_postlude () : boolean + { + var v_1 : int; + var v_2 : int; + var v_3 : int; + // (, line 455 + // not, line 456 + { + v_1 = this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // (, line 456 + // call is_reserved_word, line 456 + if (!this.r_is_reserved_word()) + { + break lab0; + } + return false; + } + this.cursor = v_1; + } + // backwards, line 457 + this.limit_backward = this.cursor; this.cursor = this.limit; + // (, line 457 + // do, line 458 + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call append_U_to_stems_ending_with_d_or_g, line 458 + if (!this.r_append_U_to_stems_ending_with_d_or_g()) + { + break lab1; + } + } + this.cursor = this.limit - v_2; + // do, line 459 + v_3 = this.limit - this.cursor; + var lab2 = true; + lab2: while (lab2 == true) + { + lab2 = false; + // call post_process_last_consonants, line 459 + if (!this.r_post_process_last_consonants()) + { + break lab2; + } + } + this.cursor = this.limit - v_3; + this.cursor = this.limit_backward; return true; + } + + override function stem () : boolean + { + var v_1 : int; + var v_2 : int; + // (, line 464 + // (, line 465 + // call more_than_one_syllable_word, line 465 + if (!this.r_more_than_one_syllable_word()) + { + return false; + } + // (, line 466 + // backwards, line 467 + this.limit_backward = this.cursor; this.cursor = this.limit; + // (, line 467 + // do, line 468 + v_1 = this.limit - this.cursor; + var lab0 = true; + lab0: while (lab0 == true) + { + lab0 = false; + // call stem_nominal_verb_suffixes, line 468 + if (!this.r_stem_nominal_verb_suffixes()) + { + break lab0; + } + } + this.cursor = this.limit - v_1; + // Boolean test continue_stemming_noun_suffixes, line 469 + if (!(this.B_continue_stemming_noun_suffixes)) + { + return false; + } + // do, line 470 + v_2 = this.limit - this.cursor; + var lab1 = true; + lab1: while (lab1 == true) + { + lab1 = false; + // call stem_noun_suffixes, line 470 + if (!this.r_stem_noun_suffixes()) + { + break lab1; + } + } + this.cursor = this.limit - v_2; + this.cursor = this.limit_backward; // call postlude, line 473 + if (!this.r_postlude()) + { + return false; + } + return true; + } + + function equals (o : variant) : boolean { + return o instanceof TurkishStemmer; + } + + function hashCode() : int + { + //http://stackoverflow.com/questions/194846/is-there-any-kind-of-hashcode-function-in-javascript + var classname = "TurkishStemmer"; + var hash = 0; + if (classname.length == 0) return hash; + for (var i = 0; i < classname.length; i++) { + var char = classname.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return hash; + } + +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/style.jsx b/web/server/h2o/libh2o/misc/oktavia/src/style.jsx new file mode 100644 index 00000000..3886dc45 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/style.jsx @@ -0,0 +1,105 @@ +import "sax.jsx"; + +class _HTMLHandler extends SAXHandler +{ + var text : string[]; + var styles : Map.<string[]>; + var escape : boolean; + + static function escapeHTML (str : string) : string + { + return str.replace(/\n/g, "<br/>").replace(/&/g, "&").replace(/"/g, """).replace(/</g, "<").replace(/>/g, ">"); + } + + function constructor (styles : Map.<string[]>, escape : boolean) + { + this.text = [] : string[]; + this.escape = escape; + this.styles = styles; + } + + override function onopentag (tagname : string, attributes : Map.<string>) : void + { + this.text.push(this.styles[tagname][0]); + } + + override function onclosetag (tagname : string) : void + { + this.text.push(this.styles[tagname][1]); + } + + override function ontext (text : string) : void + { + if (this.escape) + { + this.text.push(_HTMLHandler.escapeHTML(text)); + } + else + { + this.text.push(text); + } + } + + function result () : string + { + return this.text.join(''); + } +} + +class Style +{ + var styles : Map.<string[]>; + var escapeHTML : boolean; + + static const console = { + 'title' : ['\x1B[32m\x1b[4m', '\x1B[39m\x1b[0m'], + 'url' : ['\x1B[34m', '\x1B[39m'], + 'hit' : ['\x1B[4m', '\x1B[0m'], + 'del' : ['\x1B[9m', '\x1B[0m'], + 'summary' : ['\x1B[90m', '\x1B[39m'] + }; + + static const html = { + 'title' : ['<span class="title">', '</span>'], + 'url' : ['<span class="url">', '</span>'], + 'hit' : ['<span class="hit">', '</span>'], + 'del' : ['<del>', '</del>'], + 'summary' : ['<span class="reuslt">', '</span>'] + }; + + static const ignore = { + 'tilte' : ['', ''], + 'url' : ['', ''], + 'hit' : ['', ''], + 'del' : ['', ''], + 'summary' : ['', ''] + }; + + function constructor (mode : string) + { + switch (mode) + { + case 'console': + this.styles = Style.console; + break; + case 'html': + this.styles = Style.html; + break; + case 'ignore': + this.styles = Style.ignore; + break; + default: + this.styles = Style.ignore; + break; + } + this.escapeHTML = (mode == 'html'); + } + + function convert (source : string) : string + { + var handler = new _HTMLHandler(this.styles, this.escapeHTML); + var parser = new SAXParser(handler); + parser.parse(source); + return handler.result(); + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/textparser.jsx b/web/server/h2o/libh2o/misc/oktavia/src/textparser.jsx new file mode 100644 index 00000000..6cb4676a --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/textparser.jsx @@ -0,0 +1,25 @@ +import "js/nodejs.jsx"; +import "oktavia.jsx"; +import "stemmer/stemmer.jsx"; + + +class TextParser +{ + var oktavia : Oktavia; + var unit : string; + var root : string; + var stemmer : Nullable.<Stemmer>; + + function constructor (unit : string, root : string, stemmer : Stemmer) + { + this.oktavia = new Oktavia(); + this.unit = unit; + this.root = root; + this.stemmer = stemmer; + } + + function parse (filepath : string) : void + { + var lines = node.fs.readFileSync(filepath, 'utf8'); + } +} diff --git a/web/server/h2o/libh2o/misc/oktavia/src/tiny-segmenter.jsx b/web/server/h2o/libh2o/misc/oktavia/src/tiny-segmenter.jsx new file mode 100644 index 00000000..b69796c2 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/tiny-segmenter.jsx @@ -0,0 +1,350 @@ +// JSX version of TinySegmenter. -- Super compact Japanese tokenizer in Javascript +// Ported by Yoshiki Shibukawa +// +// Original Code Copyright +// TinySegmenter 0.2 -- Super compact Japanese tokenizer in Javascript +// (c) 2008 Taku Kudo <taku@chasen.org> +// TinySegmenter is freely distributable under the terms of a new BSD licence. +// For details, see http://chasen.org/~taku/software/TinySegmenter/LICENCE.txt + +class CharType +{ + var re : RegExp; + var type : string; + function constructor (pattern : RegExp, type : string) + { + this.re = pattern; + this.type = type; + } +} + +class TinySegmenter +{ + static const _chartype = [ + new CharType(/[一二三四五六七八九十百千万億兆]/ ,"M"), + new CharType(/[一-龠々〆ヵヶ]/, "H"), + new CharType(/[ぁ-ん]/, "I"), + new CharType(/[ァ-ヴーア-ン゙ー]/, "K"), + new CharType(/[a-zA-Za-zA-Z]/, "A"), + new CharType(/[0-90-9]/, "N") + ]; + + static const _BIAS = -332; + static const _BC1 = {"HH":6,"II":2461,"KH":406,"OH":-1378}; + static const _BC2 = {"AA":-3267,"AI":2744,"AN":-878,"HH":-4070,"HM":-1711,"HN":4012,"HO":3761, + "IA":1327,"IH":-1184,"II":-1332,"IK":1721,"IO":5492,"KI":3831,"KK":-8741,"MH":-3132,"MK":3334, + "OO":-2920}; + static const _BC3 = {"HH":996,"HI":626,"HK":-721,"HN":-1307,"HO":-836,"IH":-301,"KK":2762,"MK":1079, + "MM":4034,"OA":-1652,"OH":266}; + static const _BP1 = {"BB":295,"OB":304,"OO":-125,"UB":352}; + static const _BP2 = {"BO":60,"OO":-1762}; + static const _BQ1 = {"BHH":1150,"BHM":1521,"BII":-1158,"BIM":886,"BMH":1208,"BNH":449,"BOH":-91, + "BOO":-2597,"OHI":451,"OIH":-296,"OKA":1851,"OKH":-1020,"OKK":904,"OOO":2965}; + static const _BQ2 = {"BHH":118,"BHI":-1159,"BHM":466,"BIH":-919,"BKK":-1720,"BKO":864,"OHH":-1139, + "OHM":-181,"OIH":153,"UHI":-1146}; + static const _BQ3 = {"BHH":-792,"BHI":2664,"BII":-299,"BKI":419,"BMH":937,"BMM":8335,"BNN":998, + "BOH":775,"OHH":2174,"OHM":439,"OII":280,"OKH":1798,"OKI":-793,"OKO":-2242,"OMH":-2402, + "OOO":11699}; + static const _BQ4 = {"BHH":-3895,"BIH":3761,"BII":-4654,"BIK":1348,"BKK":-1806,"BMI":-3385, + "BOO":-12396,"OAH":926,"OHH":266,"OHK":-2036,"ONN":-973}; + static const _BW1 = {",と":660,",同":727,"B1あ":1404,"B1同":542,"、と":660,"、同":727,"」と":1682, + "あっ":1505,"いう":1743,"いっ":-2055,"いる":672,"うし":-4817,"うん":665,"から":3472,"がら":600, + "こう":-790,"こと":2083,"こん":-1262,"さら":-4143,"さん":4573,"した":2641,"して":1104, + "すで":-3399,"そこ":1977,"それ":-871,"たち":1122,"ため":601,"った":3463,"つい":-802,"てい":805, + "てき":1249,"でき":1127,"です":3445,"では":844,"とい":-4915,"とみ":1922,"どこ":3887,"ない":5713, + "なっ":3015,"など":7379,"なん":-1113,"にし":2468,"には":1498,"にも":1671,"に対":-912,"の一":-501, + "の中":741,"ませ":2448,"まで":1711,"まま":2600,"まる":-2155,"やむ":-1947,"よっ":-2565,"れた":2369, + "れで":-913,"をし":1860,"を見":731,"亡く":-1886,"京都":2558,"取り":-2784,"大き":-2604,"大阪":1497, + "平方":-2314,"引き":-1336,"日本":-195,"本当":-2423,"毎日":-2113,"目指":-724,"B1あ":1404, + "B1同":542,"」と":1682}; + static const _BW2 = {"..":-11822,"11":-669,"――":-5730,"−−":-13175,"いう":-1609,"うか":2490, + "かし":-1350,"かも":-602,"から":-7194,"かれ":4612,"がい":853,"がら":-3198,"きた":1941, + "くな":-1597,"こと":-8392,"この":-4193,"させ":4533,"され":13168,"さん":-3977,"しい":-1819, + "しか":-545,"した":5078,"して":972,"しな":939,"その":-3744,"たい":-1253,"たた":-662,"ただ":-3857, + "たち":-786,"たと":1224,"たは":-939,"った":4589,"って":1647,"っと":-2094,"てい":6144, + "てき":3640,"てく":2551,"ては":-3110,"ても":-3065,"でい":2666,"でき":-1528,"でし":-3828, + "です":-4761,"でも":-4203,"とい":1890,"とこ":-1746,"とと":-2279,"との":720,"とみ":5168, + "とも":-3941,"ない":-2488,"なが":-1313,"など":-6509,"なの":2614,"なん":3099,"にお":-1615, + "にし":2748,"にな":2454,"によ":-7236,"に対":-14943,"に従":-4688,"に関":-11388,"のか":2093, + "ので":-7059,"のに":-6041,"のの":-6125,"はい":1073,"はが":-1033,"はず":-2532,"ばれ":1813, + "まし":-1316,"まで":-6621,"まれ":5409,"めて":-3153,"もい":2230,"もの":-10713,"らか":-944, + "らし":-1611,"らに":-1897,"りし":651,"りま":1620,"れた":4270,"れて":849,"れば":4114, + "ろう":6067,"われ":7901,"を通":-11877,"んだ":728,"んな":-4115,"一人":602,"一方":-1375, + "一日":970,"一部":-1051,"上が":-4479,"会社":-1116,"出て":2163,"分の":-7758,"同党":970, + "同日":-913,"大阪":-2471,"委員":-1250,"少な":-1050,"年度":-8669,"年間":-1626,"府県":-2363, + "手権":-1982,"新聞":-4066,"日新":-722,"日本":-7068,"日米":3372,"曜日":-601,"朝鮮":-2355, + "本人":-2697,"東京":-1543,"然と":-1384,"社会":-1276,"立て":-990,"第に":-1612,"米国":-4268, + "11":-669}; + static const _BW3 = {"あた":-2194,"あり":719,"ある":3846,"い.":-1185,"い。":-1185,"いい":5308, + "いえ":2079,"いく":3029,"いた":2056,"いっ":1883,"いる":5600,"いわ":1527,"うち":1117,"うと":4798, + "えと":1454,"か.":2857,"か。":2857,"かけ":-743,"かっ":-4098,"かに":-669,"から":6520,"かり":-2670, + "が,":1816,"が、":1816,"がき":-4855,"がけ":-1127,"がっ":-913,"がら":-4977,"がり":-2064, + "きた":1645,"けど":1374,"こと":7397,"この":1542,"ころ":-2757,"さい":-714,"さを":976,"し,":1557, + "し、":1557,"しい":-3714,"した":3562,"して":1449,"しな":2608,"しま":1200,"す.":-1310, + "す。":-1310,"する":6521,"ず,":3426,"ず、":3426,"ずに":841,"そう":428,"た.":8875,"た。":8875, + "たい":-594,"たの":812,"たり":-1183,"たる":-853,"だ.":4098,"だ。":4098,"だっ":1004,"った":-4748, + "って":300,"てい":6240,"てお":855,"ても":302,"です":1437,"でに":-1482,"では":2295,"とう":-1387, + "とし":2266,"との":541,"とも":-3543,"どう":4664,"ない":1796,"なく":-903,"など":2135,"に,":-1021, + "に、":-1021,"にし":1771,"にな":1906,"には":2644,"の,":-724,"の、":-724,"の子":-1000,"は,":1337, + "は、":1337,"べき":2181,"まし":1113,"ます":6943,"まっ":-1549,"まで":6154,"まれ":-793,"らし":1479, + "られ":6820,"るる":3818,"れ,":854,"れ、":854,"れた":1850,"れて":1375,"れば":-3246,"れる":1091, + "われ":-605,"んだ":606,"んで":798,"カ月":990,"会議":860,"入り":1232,"大会":2217,"始め":1681, + "市":965,"新聞":-5055,"日,":974,"日、":974,"社会":2024,"カ月":990}; + static const _TC1 = {"AAA":1093,"HHH":1029,"HHM":580,"HII":998,"HOH":-390,"HOM":-331,"IHI":1169, + "IOH":-142,"IOI":-1015,"IOM":467,"MMH":187,"OOI":-1832}; + static const _TC2 = {"HHO":2088,"HII":-1023,"HMM":-1154,"IHI":-1965,"KKH":703,"OII":-2649}; + static const _TC3 = {"AAA":-294,"HHH":346,"HHI":-341,"HII":-1088,"HIK":731,"HOH":-1486, + "IHH":128,"IHI":-3041,"IHO":-1935,"IIH":-825,"IIM":-1035,"IOI":-542,"KHH":-1216, + "KKA":491,"KKH":-1217,"KOK":-1009,"MHH":-2694,"MHM":-457,"MHO":123,"MMH":-471, + "NNH":-1689,"NNO":662,"OHO":-3393}; + static const _TC4 = {"HHH":-203,"HHI":1344,"HHK":365,"HHM":-122,"HHN":182,"HHO":669,"HIH":804, + "HII":679,"HOH":446,"IHH":695,"IHO":-2324,"IIH":321,"III":1497,"IIO":656,"IOO":54, + "KAK":4845,"KKA":3386,"KKK":3065,"MHH":-405,"MHI":201,"MMH":-241,"MMM":661,"MOM":841}; + static const _TQ1 = {"BHHH":-227,"BHHI":316,"BHIH":-132,"BIHH":60,"BIII":1595,"BNHH":-744, + "BOHH":225,"BOOO":-908,"OAKK":482,"OHHH":281,"OHIH":249,"OIHI":200,"OIIH":-68}; + static const _TQ2 = {"BIHH":-1401,"BIII":-1033,"BKAK":-543,"BOOO":-5591}; + static const _TQ3 = {"BHHH":478,"BHHM":-1073,"BHIH":222,"BHII":-504,"BIIH":-116,"BIII":-105, + "BMHI":-863,"BMHM":-464,"BOMH":620,"OHHH":346,"OHHI":1729,"OHII":997,"OHMH":481,"OIHH":623, + "OIIH":1344,"OKAK":2792,"OKHH":587,"OKKA":679,"OOHH":110,"OOII":-685}; + static const _TQ4 = {"BHHH":-721,"BHHM":-3604,"BHII":-966,"BIIH":-607,"BIII":-2181,"OAAA":-2763, + "OAKK":180,"OHHH":-294,"OHHI":2446,"OHHO":480,"OHIH":-1573,"OIHH":1935,"OIHI":-493,"OIIH":626, + "OIII":-4007,"OKAK":-8156}; + static const _TW1 = {"につい":-4681,"東京都":2026}; + static const _TW2 = {"ある程":-2049,"いった":-1256,"ころが":-2434,"しょう":3873,"その後":-4430, + "だって":-1049,"ていた":1833,"として":-4657,"ともに":-4517,"もので":1882,"一気に":-792, + "初めて":-1512,"同時に":-8097,"大きな":-1255,"対して":-2721,"社会党":-3216}; + static const _TW3 = {"いただ":-1734,"してい":1314,"として":-4314,"につい":-5483,"にとっ":-5989, + "に当た":-6247,"ので,":-727,"ので、":-727,"のもの":-600,"れから":-3752,"十二月":-2287}; + static const _TW4 = {"いう.":8576,"いう。":8576,"からな":-2348,"してい":2958,"たが,":1516, + "たが、":1516,"ている":1538,"という":1349,"ました":5543,"ません":1097,"ようと":-4258, + "よると":5865}; + static const _UC1 = {"A":484,"K":93,"M":645,"O":-505}; + static const _UC2 = {"A":819,"H":1059,"I":409,"M":3987,"N":5775,"O":646}; + static const _UC3 = {"A":-1370,"I":2311}; + static const _UC4 = {"A":-2643,"H":1809,"I":-1032,"K":-3450,"M":3565,"N":3876,"O":6646}; + static const _UC5 = {"H":313,"I":-1238,"K":-799,"M":539,"O":-831}; + static const _UC6 = {"H":-506,"I":-253,"K":87,"M":247,"O":-387}; + static const _UP1 = {"O":-214}; + static const _UP2 = {"B":69,"O":935}; + static const _UP3 = {"B":189}; + static const _UQ1 = {"BH":21,"BI":-12,"BK":-99,"BN":142,"BO":-56,"OH":-95,"OI":477,"OK":410,"OO":-2422}; + static const _UQ2 = {"BH":216,"BI":113,"OK":1759}; + static const _UQ3 = {"BA":-479,"BH":42,"BI":1913,"BK":-7198,"BM":3160,"BN":6427,"BO":14761, + "OI":-827,"ON":-3212}; + static const _UW1 = {",":156,"、":156,"「":-463,"あ":-941,"う":-127,"が":-553,"き":121,"こ":505, + "で":-201,"と":-547,"ど":-123,"に":-789,"の":-185,"は":-847,"も":-466,"や":-470,"よ":182, + "ら":-292,"り":208,"れ":169,"を":-446,"ん":-137,"・":-135,"主":-402,"京":-268,"区":-912, + "午":871,"国":-460,"大":561,"委":729,"市":-411,"日":-141,"理":361,"生":-408,"県":-386, + "都":-718,"「":-463,"・":-135}; + static const _UW2 = {",":-829,"、":-829,"〇":892,"「":-645,"」":3145,"あ":-538,"い":505,"う":134, + "お":-502,"か":1454,"が":-856,"く":-412,"こ":1141,"さ":878,"ざ":540,"し":1529,"す":-675, + "せ":300,"そ":-1011,"た":188,"だ":1837,"つ":-949,"て":-291,"で":-268,"と":-981,"ど":1273, + "な":1063,"に":-1764,"の":130,"は":-409,"ひ":-1273,"べ":1261,"ま":600,"も":-1263,"や":-402, + "よ":1639,"り":-579,"る":-694,"れ":571,"を":-2516,"ん":2095,"ア":-587,"カ":306,"キ":568, + "ッ":831,"三":-758,"不":-2150,"世":-302,"中":-968,"主":-861,"事":492,"人":-123,"会":978, + "保":362,"入":548,"初":-3025,"副":-1566,"北":-3414,"区":-422,"大":-1769,"天":-865,"太":-483, + "子":-1519,"学":760,"実":1023,"小":-2009,"市":-813,"年":-1060,"強":1067,"手":-1519,"揺":-1033, + "政":1522,"文":-1355,"新":-1682,"日":-1815,"明":-1462,"最":-630,"朝":-1843,"本":-1650, + "東":-931,"果":-665,"次":-2378,"民":-180,"気":-1740,"理":752,"発":529,"目":-1584,"相":-242, + "県":-1165,"立":-763,"第":810,"米":509,"自":-1353,"行":838,"西":-744,"見":-3874,"調":1010, + "議":1198,"込":3041,"開":1758,"間":-1257,"「":-645,"」":3145,"ッ":831,"ア":-587,"カ":306,"キ":568}; + static const _UW3 = {",":4889,"1":-800,"−":-1723,"、":4889,"々":-2311,"〇":5827,"」":2670, + "〓":-3573,"あ":-2696,"い":1006,"う":2342,"え":1983,"お":-4864,"か":-1163,"が":3271,"く":1004, + "け":388,"げ":401,"こ":-3552,"ご":-3116,"さ":-1058,"し":-395,"す":584,"せ":3685,"そ":-5228, + "た":842,"ち":-521,"っ":-1444,"つ":-1081,"て":6167,"で":2318,"と":1691,"ど":-899,"な":-2788, + "に":2745,"の":4056,"は":4555,"ひ":-2171,"ふ":-1798,"へ":1199,"ほ":-5516,"ま":-4384,"み":-120, + "め":1205,"も":2323,"や":-788,"よ":-202,"ら":727,"り":649,"る":5905,"れ":2773,"わ":-1207, + "を":6620,"ん":-518,"ア":551,"グ":1319,"ス":874,"ッ":-1350,"ト":521,"ム":1109,"ル":1591, + "ロ":2201,"ン":278,"・":-3794,"一":-1619,"下":-1759,"世":-2087,"両":3815,"中":653,"主":-758, + "予":-1193,"二":974,"人":2742,"今":792,"他":1889,"以":-1368,"低":811,"何":4265,"作":-361, + "保":-2439,"元":4858,"党":3593,"全":1574,"公":-3030,"六":755,"共":-1880,"円":5807,"再":3095, + "分":457,"初":2475,"別":1129,"前":2286,"副":4437,"力":365,"動":-949,"務":-1872,"化":1327, + "北":-1038,"区":4646,"千":-2309,"午":-783,"協":-1006,"口":483,"右":1233,"各":3588,"合":-241, + "同":3906,"和":-837,"員":4513,"国":642,"型":1389,"場":1219,"外":-241,"妻":2016,"学":-1356, + "安":-423,"実":-1008,"家":1078,"小":-513,"少":-3102,"州":1155,"市":3197,"平":-1804,"年":2416, + "広":-1030,"府":1605,"度":1452,"建":-2352,"当":-3885,"得":1905,"思":-1291,"性":1822,"戸":-488, + "指":-3973,"政":-2013,"教":-1479,"数":3222,"文":-1489,"新":1764,"日":2099,"旧":5792,"昨":-661, + "時":-1248,"曜":-951,"最":-937,"月":4125,"期":360,"李":3094,"村":364,"東":-805,"核":5156, + "森":2438,"業":484,"氏":2613,"民":-1694,"決":-1073,"法":1868,"海":-495,"無":979,"物":461, + "特":-3850,"生":-273,"用":914,"町":1215,"的":7313,"直":-1835,"省":792,"県":6293,"知":-1528, + "私":4231,"税":401,"立":-960,"第":1201,"米":7767,"系":3066,"約":3663,"級":1384,"統":-4229, + "総":1163,"線":1255,"者":6457,"能":725,"自":-2869,"英":785,"見":1044,"調":-562,"財":-733, + "費":1777,"車":1835,"軍":1375,"込":-1504,"通":-1136,"選":-681,"郎":1026,"郡":4404,"部":1200, + "金":2163,"長":421,"開":-1432,"間":1302,"関":-1282,"雨":2009,"電":-1045,"非":2066,"駅":1620, + "1":-800,"」":2670,"・":-3794,"ッ":-1350,"ア":551,"グ":1319,"ス":874,"ト":521,"ム":1109,"ル":1591, + "ロ":2201,"ン":278}; + static const _UW4 = {",":3930,".":3508,"―":-4841,"、":3930,"。":3508,"〇":4999,"「":1895,"」":3798, + "〓":-5156,"あ":4752,"い":-3435,"う":-640,"え":-2514,"お":2405,"か":530,"が":6006,"き":-4482, + "ぎ":-3821,"く":-3788,"け":-4376,"げ":-4734,"こ":2255,"ご":1979,"さ":2864,"し":-843,"じ":-2506, + "す":-731,"ず":1251,"せ":181,"そ":4091,"た":5034,"だ":5408,"ち":-3654,"っ":-5882,"つ":-1659, + "て":3994,"で":7410,"と":4547,"な":5433,"に":6499,"ぬ":1853,"ね":1413,"の":7396,"は":8578, + "ば":1940,"ひ":4249,"び":-4134,"ふ":1345,"へ":6665,"べ":-744,"ほ":1464,"ま":1051,"み":-2082, + "む":-882,"め":-5046,"も":4169,"ゃ":-2666,"や":2795,"ょ":-1544,"よ":3351,"ら":-2922,"り":-9726, + "る":-14896,"れ":-2613,"ろ":-4570,"わ":-1783,"を":13150,"ん":-2352,"カ":2145,"コ":1789, + "セ":1287,"ッ":-724,"ト":-403,"メ":-1635,"ラ":-881,"リ":-541,"ル":-856,"ン":-3637,"・":-4371, + "ー":-11870,"一":-2069,"中":2210,"予":782,"事":-190,"井":-1768,"人":1036,"以":544,"会":950, + "体":-1286,"作":530,"側":4292,"先":601,"党":-2006,"共":-1212,"内":584,"円":788,"初":1347, + "前":1623,"副":3879,"力":-302,"動":-740,"務":-2715,"化":776,"区":4517,"協":1013,"参":1555, + "合":-1834,"和":-681,"員":-910,"器":-851,"回":1500,"国":-619,"園":-1200,"地":866,"場":-1410, + "塁":-2094,"士":-1413,"多":1067,"大":571,"子":-4802,"学":-1397,"定":-1057,"寺":-809, + "小":1910,"屋":-1328,"山":-1500,"島":-2056,"川":-2667,"市":2771,"年":374,"庁":-4556,"後":456, + "性":553,"感":916,"所":-1566,"支":856,"改":787,"政":2182,"教":704,"文":522,"方":-856,"日":1798, + "時":1829,"最":845,"月":-9066,"木":-485,"来":-442,"校":-360,"業":-1043,"氏":5388,"民":-2716, + "気":-910,"沢":-939,"済":-543,"物":-735,"率":672,"球":-1267,"生":-1286,"産":-1101,"田":-2900, + "町":1826,"的":2586,"目":922,"省":-3485,"県":2997,"空":-867,"立":-2112,"第":788,"米":2937, + "系":786,"約":2171,"経":1146,"統":-1169,"総":940,"線":-994,"署":749,"者":2145,"能":-730, + "般":-852,"行":-792,"規":792,"警":-1184,"議":-244,"谷":-1000,"賞":730,"車":-1481,"軍":1158, + "輪":-1433,"込":-3370,"近":929,"道":-1291,"選":2596,"郎":-4866,"都":1192,"野":-1100,"銀":-2213, + "長":357,"間":-2344,"院":-2297,"際":-2604,"電":-878,"領":-1659,"題":-792,"館":-1984,"首":1749, + "高":2120,"「":1895,"」":3798,"・":-4371,"ッ":-724,"ー":-11870,"カ":2145,"コ":1789,"セ":1287, + "ト":-403,"メ":-1635,"ラ":-881,"リ":-541,"ル":-856,"ン":-3637}; + static const _UW5 = {",":465,".":-299,"1":-514,"E2":-32768,"]":-2762,"、":465,"。":-299, + "「":363,"あ":1655,"い":331,"う":-503,"え":1199,"お":527,"か":647,"が":-421,"き":1624,"ぎ":1971, + "く":312,"げ":-983,"さ":-1537,"し":-1371,"す":-852,"だ":-1186,"ち":1093,"っ":52,"つ":921, + "て":-18,"で":-850,"と":-127,"ど":1682,"な":-787,"に":-1224,"の":-635,"は":-578,"べ":1001, + "み":502,"め":865,"ゃ":3350,"ょ":854,"り":-208,"る":429,"れ":504,"わ":419,"を":-1264,"ん":327, + "イ":241,"ル":451,"ン":-343,"中":-871,"京":722,"会":-1153,"党":-654,"務":3519,"区":-901, + "告":848,"員":2104,"大":-1296,"学":-548,"定":1785,"嵐":-1304,"市":-2991,"席":921,"年":1763, + "思":872,"所":-814,"挙":1618,"新":-1682,"日":218,"月":-4353,"査":932,"格":1356,"機":-1508, + "氏":-1347,"田":240,"町":-3912,"的":-3149,"相":1319,"省":-1052,"県":-4003,"研":-997,"社":-278, + "空":-813,"統":1955,"者":-2233,"表":663,"語":-1073,"議":1219,"選":-1018,"郎":-368,"長":786, + "間":1191,"題":2368,"館":-689,"1":-514,"E2":-32768,"「":363,"イ":241,"ル":451,"ン":-343}; + static const _UW6 = {",":227,".":808,"1":-270,"E1":306,"、":227,"。":808,"あ":-307,"う":189, + "か":241,"が":-73,"く":-121,"こ":-200,"じ":1782,"す":383,"た":-428,"っ":573,"て":-1014,"で":101, + "と":-105,"な":-253,"に":-149,"の":-417,"は":-236,"も":-206,"り":187,"る":-135,"を":195, + "ル":-673,"ン":-496,"一":-277,"中":201,"件":-800,"会":624,"前":302,"区":1792,"員":-1212, + "委":798,"学":-960,"市":887,"広":-695,"後":535,"業":-697,"相":753,"社":-507,"福":974, + "空":-822,"者":1811,"連":463,"郎":1082,"1":-270,"E1":306,"ル":-673,"ン":-496}; + + static function _ctype (str : string) : string + { + for (var i = 0; i < TinySegmenter._chartype.length; i++) + { + if (str.match(TinySegmenter._chartype[i].re)) + { + return TinySegmenter._chartype[i].type; + } + } + return "O"; + } + + static function _ts (v : Nullable.<int>) : int + { + if (v == null) + { + return 0; + } + return v; + } + + static function segment (input : string) : string [] + { + if (input == "") + { + return [] : string[]; + } + var result = [] : string[]; + var seg = ["B3","B2","B1"]; + var ctype = ["O","O","O"]; + var o = input.split(""); + for (var i = 0; i < o.length; ++i) + { + seg.push(o[i]); + ctype.push(TinySegmenter._ctype(o[i])); + } + seg.push("E1"); + seg.push("E2"); + seg.push("E3"); + ctype.push("O"); + ctype.push("O"); + ctype.push("O"); + var word = seg[3]; + var p1 = "U"; + var p2 = "U"; + var p3 = "U"; + for (var i = 4; i < seg.length - 3; ++i) + { + var score = TinySegmenter._BIAS; + var w1 = seg[i-3]; + var w2 = seg[i-2]; + var w3 = seg[i-1]; + var w4 = seg[i]; + var w5 = seg[i+1]; + var w6 = seg[i+2]; + var c1 = ctype[i-3]; + var c2 = ctype[i-2]; + var c3 = ctype[i-1]; + var c4 = ctype[i]; + var c5 = ctype[i+1]; + var c6 = ctype[i+2]; + score += TinySegmenter._ts(TinySegmenter._UP1[p1]); + score += TinySegmenter._ts(TinySegmenter._UP2[p2]); + score += TinySegmenter._ts(TinySegmenter._UP3[p3]); + score += TinySegmenter._ts(TinySegmenter._BP1[p1 + p2]); + score += TinySegmenter._ts(TinySegmenter._BP2[p2 + p3]); + score += TinySegmenter._ts(TinySegmenter._UW1[w1]); + score += TinySegmenter._ts(TinySegmenter._UW2[w2]); + score += TinySegmenter._ts(TinySegmenter._UW3[w3]); + score += TinySegmenter._ts(TinySegmenter._UW4[w4]); + score += TinySegmenter._ts(TinySegmenter._UW5[w5]); + score += TinySegmenter._ts(TinySegmenter._UW6[w6]); + score += TinySegmenter._ts(TinySegmenter._BW1[w2 + w3]); + score += TinySegmenter._ts(TinySegmenter._BW2[w3 + w4]); + score += TinySegmenter._ts(TinySegmenter._BW3[w4 + w5]); + score += TinySegmenter._ts(TinySegmenter._TW1[w1 + w2 + w3]); + score += TinySegmenter._ts(TinySegmenter._TW2[w2 + w3 + w4]); + score += TinySegmenter._ts(TinySegmenter._TW3[w3 + w4 + w5]); + score += TinySegmenter._ts(TinySegmenter._TW4[w4 + w5 + w6]); + score += TinySegmenter._ts(TinySegmenter._UC1[c1]); + score += TinySegmenter._ts(TinySegmenter._UC2[c2]); + score += TinySegmenter._ts(TinySegmenter._UC3[c3]); + score += TinySegmenter._ts(TinySegmenter._UC4[c4]); + score += TinySegmenter._ts(TinySegmenter._UC5[c5]); + score += TinySegmenter._ts(TinySegmenter._UC6[c6]); + score += TinySegmenter._ts(TinySegmenter._BC1[c2 + c3]); + score += TinySegmenter._ts(TinySegmenter._BC2[c3 + c4]); + score += TinySegmenter._ts(TinySegmenter._BC3[c4 + c5]); + score += TinySegmenter._ts(TinySegmenter._TC1[c1 + c2 + c3]); + score += TinySegmenter._ts(TinySegmenter._TC2[c2 + c3 + c4]); + score += TinySegmenter._ts(TinySegmenter._TC3[c3 + c4 + c5]); + score += TinySegmenter._ts(TinySegmenter._TC4[c4 + c5 + c6]); + // score += TinySegmenter._ts(TinySegmenter._TC5[c4 + c5 + c6]); + score += TinySegmenter._ts(TinySegmenter._UQ1[p1 + c1]); + score += TinySegmenter._ts(TinySegmenter._UQ2[p2 + c2]); + score += TinySegmenter._ts(TinySegmenter._UQ3[p3 + c3]); + score += TinySegmenter._ts(TinySegmenter._BQ1[p2 + c2 + c3]); + score += TinySegmenter._ts(TinySegmenter._BQ2[p2 + c3 + c4]); + score += TinySegmenter._ts(TinySegmenter._BQ3[p3 + c2 + c3]); + score += TinySegmenter._ts(TinySegmenter._BQ4[p3 + c3 + c4]); + score += TinySegmenter._ts(TinySegmenter._TQ1[p2 + c1 + c2 + c3]); + score += TinySegmenter._ts(TinySegmenter._TQ2[p2 + c2 + c3 + c4]); + score += TinySegmenter._ts(TinySegmenter._TQ3[p3 + c1 + c2 + c3]); + score += TinySegmenter._ts(TinySegmenter._TQ4[p3 + c2 + c3 + c4]); + var p = "O"; + if (score > 0) + { + result.push(word); + word = ""; + p = "B"; + } + p1 = p2; + p2 = p3; + p3 = p; + word += seg[i]; + } + result.push(word); + return result; + } +} + +class _Main +{ + static function main(args : string[]) : void + { + // test data from http://www.ai-gakkai.or.jp/jsai/journal/mybookmark/26-6.html + var test = "近年、Web技術や計測技術の発展により言語やゲノムデータは大規模化しています。従来のデータ構造は大規模データを扱うにはサイズが大きくメモリに載らない、しかし、圧縮するとランダムアクセスをすることができないという欠点があります。"; + + log TinySegmenter.segment(test); + } +} + diff --git a/web/server/h2o/libh2o/misc/oktavia/src/wavelet-matrix.jsx b/web/server/h2o/libh2o/misc/oktavia/src/wavelet-matrix.jsx new file mode 100644 index 00000000..2a07b015 --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/wavelet-matrix.jsx @@ -0,0 +1,321 @@ +/** + * This is a JSX version of shellinford library: + * https://code.google.com/p/shellinford/ + * + * License: http://shibu.mit-license.org/ + */ + +import "bit-vector.jsx"; +import "binary-util.jsx"; +import "console.jsx"; + + +class WaveletMatrix +{ + var _bv : BitVector[]; + var _seps : int[]; + var _range : Map.<int>; + var _bitsize : int; + var _size : int; + + function constructor () + { + this._range = {} : Map.<int>; + this._bv = [] : BitVector[]; + this._seps = [] : int[]; + this._bitsize = 16; + this.clear(); + } + + function bitsize () : int + { + return this._bitsize; + } + + function setMaxCharCode (charCode : int) : void + { + this._bitsize = Math.ceil(Math.log(charCode) / Math.LN2); + } + + function clear () : void + { + this._bv.length = 0; + this._seps.length = 0; + this._size = 0; + } + + function build (v : string) : void + { + this.clear(); + var size = v.length; + var bitsize = this.bitsize(); + for (var i = 0; i < bitsize; i++) + { + this._bv.push(new BitVector); + this._seps.push(0); + } + this._size = size; + for (var i = 0; i < size; i++) + { + this._bv[0].set(i, this._uint2bit(v.charCodeAt(i), 0)); + } + this._bv[0].build(); + this._seps[0] = this._bv[0].size(false); + this._range[0 as string] = 0; + this._range[1 as string] = this._seps[0]; + + var depth : int = 1; + while (depth < bitsize) + { + var range_tmp = WaveletMatrix._shallow_copy(this._range); // copy + for (var i = 0; i < size; i++) + { + var code = v.charCodeAt(i); + var bit = this._uint2bit(code, depth); + var key = code >>> (bitsize - depth); + this._bv[depth].set(range_tmp[key as string], bit); + range_tmp[key as string]++; + } + this._bv[depth].build(); + this._seps[depth] = this._bv[depth].size(false); + + var range_rev = {} : Map.<int>; + for (var range_key in this._range) + { + var value : int = this._range[range_key]; + if (value != range_tmp[range_key]) + { + range_rev[value as string] = range_key as int; + } + } + this._range = {} : Map.<int>; + var pos0 = 0; + var pos1 = this._seps[depth]; + for (var range_rev_key in range_rev) + { + var begin = range_rev_key as int; + var value = range_rev[range_rev_key]; + var end = range_tmp[value as string]; + var num0 = this._bv[depth].rank(end , false) - + this._bv[depth].rank(begin, false); + var num1 = end - begin - num0; + if (num0 > 0) + { + this._range[(value << 1) as string] = pos0; + pos0 += num0; + } + if (num1 > 0) + { + this._range[((value << 1) + 1) as string] = pos1; + pos1 += num1; + } + } + depth++; + } + } + + function size () : int + { + return this._size; + } + + function size (c : int) : int + { + return this.rank(this.size(), c); + } + + function get (i : int) : int + { + if (i >= this.size()) + { + throw new Error("WaveletMatrix.get() : range error"); + } + var value = 0; + var depth = 0; + while (depth < this.bitsize()) + { + var bit = this._bv[depth].get(i); + i = this._bv[depth].rank(i, bit); + value <<= 1; + if (bit) + { + i += this._seps[depth]; + value += 1; + } + depth++; + } + return value; + } + + function rank (i : int, c : int) : int + { + if (i > this.size()) + { + throw new Error("WaveletMatrix.rank(): range error"); + } + if (i == 0) + { + return 0; + } + + var begin = this._range[c as string]; + if (begin == null) + { + return 0; + } + var end = i; + var depth = 0; + while (depth < this.bitsize()) + { + var bit = this._uint2bit(c, depth); + end = this._bv[depth].rank(end, bit); + if (bit) + { + end += this._seps[depth]; + } + depth++; + } + return end - begin; + } + + function rank_less_than (i : int, c : int) : int + { + if (i > this.size()) + { + throw new Error("WaveletMatrix.rank_less_than(): range error"); + } + if (i == 0) + { + return 0; + } + + var begin = 0; + var end = i; + var depth = 0; + var rlt = 0; + while (depth < this.bitsize()) + { + var rank0_begin = this._bv[depth].rank(begin, false); + var rank0_end = this._bv[depth].rank(end , false); + if (this._uint2bit(c, depth)) + { + rlt += (rank0_end - rank0_begin); + begin += (this._seps[depth] - rank0_begin); + end += (this._seps[depth] - rank0_end); + } + else + { + begin = rank0_begin; + end = rank0_end; + } + depth++; + } + return rlt; + } + + function dump () : string + { + var contents = [ + Binary.dump16bitNumber(this._bitsize), + Binary.dump32bitNumber(this._size) + ]; + for (var i = 0; i < this.bitsize(); i++) + { + contents.push(this._bv[i].dump()); + } + for (var i = 0; i < this.bitsize(); i++) + { + contents.push(Binary.dump32bitNumber(this._seps[i])); + } + var range_contents = [] : string[]; + var counter = 0; + for (var key in this._range) + { + range_contents.push(Binary.dump32bitNumber(key as int)); + range_contents.push(Binary.dump32bitNumber(this._range[key])); + counter++; + } + contents.push(Binary.dump32bitNumber(counter)); + return contents.join('') + range_contents.join(''); + } + + function dump (report : CompressionReport) : string + { + var contents = [ + Binary.dump16bitNumber(this._bitsize), + Binary.dump32bitNumber(this._size) + ]; + report.add(3, 3); + for (var i = 0; i < this.bitsize(); i++) + { + contents.push(this._bv[i].dump(report)); + } + for (var i = 0; i < this.bitsize(); i++) + { + contents.push(Binary.dump32bitNumber(this._seps[i])); + report.add(2, 2); + } + var range_contents = [] : string[]; + var counter = 0; + for (var key in this._range) + { + range_contents.push(Binary.dump32bitNumber(key as int)); + range_contents.push(Binary.dump32bitNumber(this._range[key])); + report.add(4, 4); + counter++; + } + report.add(2, 2); + contents.push(Binary.dump32bitNumber(counter)); + return contents.join('') + range_contents.join(''); + } + + function load (data : string) : int + { + return this.load(data, 0); + } + + function load (data : string, offset : int) : int + { + this.clear(); + this._bitsize = Binary.load16bitNumber(data, offset++); + this._size = Binary.load32bitNumber(data, offset); + offset += 2; + for (var i = 0; i < this.bitsize(); i++) + { + var bit_vector = new BitVector(); + offset = bit_vector.load(data, offset); + this._bv.push(bit_vector); + } + var sep = 0; + for (var i = 0; i < this.bitsize(); i++, offset += 2) + { + this._seps.push(Binary.load32bitNumber(data, offset)); + } + + var range_size = Binary.load32bitNumber(data, offset); + offset += 2; + for (var i = 0; i < range_size; i++, offset += 4) + { + var key = Binary.load32bitNumber(data, offset); + var value = Binary.load32bitNumber(data, offset + 2); + this._range[key as string] = value; + } + return offset; + } + + static function _shallow_copy (input : Map.<int>) : Map.<int> + { + var result = {} : Map.<int>; + for (var key in input) + { + result[key] = input[key]; + } + return result; + } + + function _uint2bit (c : int, i : int) : boolean + { + return ((c >>> (this._bitsize - 1 - i)) & 0x1) == 0x1; + } +} + |