From be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 04:57:58 +0200 Subject: Adding upstream version 1.44.3. Signed-off-by: Daniel Baumann --- .../misc/oktavia/src/stemmer/base-stemmer.jsx | 419 +++++++++++++++++++++ 1 file changed, 419 insertions(+) create mode 100644 web/server/h2o/libh2o/misc/oktavia/src/stemmer/base-stemmer.jsx (limited to 'web/server/h2o/libh2o/misc/oktavia/src/stemmer/base-stemmer.jsx') diff --git a/web/server/h2o/libh2o/misc/oktavia/src/stemmer/base-stemmer.jsx b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/base-stemmer.jsx new file mode 100644 index 00000000..dfc0d6ea --- /dev/null +++ b/web/server/h2o/libh2o/misc/oktavia/src/stemmer/base-stemmer.jsx @@ -0,0 +1,419 @@ +import "stemmer.jsx"; +import "among.jsx"; + +class BaseStemmer implements Stemmer +{ + // this.current string + var current : string; + var cursor : int; + var limit : int; + var limit_backward : int; + var bra : int; + var ket : int; + var cache : Map.; + + function constructor () + { + this.cache = {} : Map.; + this.setCurrent(""); + } + + /** + * Set the this.current string. + */ + function setCurrent (value : string) : void + { + this.current = value; + this.cursor = 0; + this.limit = this.current.length; + this.limit_backward = 0; + this.bra = this.cursor; + this.ket = this.limit; + } + + /** + * Get the this.current string. + */ + function getCurrent () : string + { + return this.current; + } + + + function copy_from (other : BaseStemmer) : void + { + this.current = other.current; + this.cursor = other.cursor; + this.limit = other.limit; + this.limit_backward = other.limit_backward; + this.bra = other.bra; + this.ket = other.ket; + } + + function in_grouping (s : int[], min : int, max : int) : boolean + { + if (this.cursor >= this.limit) return false; + var ch = this.current.charCodeAt(this.cursor); + if (ch > max || ch < min) return false; + ch -= min; + if ((s[ch >>> 3] & (0x1 << (ch & 0x7))) == 0) return false; + this.cursor++; + return true; + } + + function in_grouping_b (s : int[], min : int, max : int) : boolean + { + if (this.cursor <= this.limit_backward) return false; + var ch = this.current.charCodeAt(this.cursor - 1); + if (ch > max || ch < min) return false; + ch -= min; + if ((s[ch >>> 3] & (0x1 << (ch & 0x7))) == 0) return false; + this.cursor--; + return true; + } + + function out_grouping (s : int[], min : int, max : int) : boolean + { + if (this.cursor >= this.limit) return false; + var ch = this.current.charCodeAt(this.cursor); + if (ch > max || ch < min) { + this.cursor++; + return true; + } + ch -= min; + if ((s[ch >>> 3] & (0X1 << (ch & 0x7))) == 0) { + this.cursor++; + return true; + } + return false; + } + + function out_grouping_b (s : int[], min : int, max : int) : boolean + { + if (this.cursor <= this.limit_backward) return false; + var ch = this.current.charCodeAt(this.cursor - 1); + if (ch > max || ch < min) { + this.cursor--; + return true; + } + ch -= min; + if ((s[ch >>> 3] & (0x1 << (ch & 0x7))) == 0) { + this.cursor--; + return true; + } + return false; + } + + function in_range (min : int, max : int) : boolean + { + if (this.cursor >= this.limit) return false; + var ch = this.current.charCodeAt(this.cursor); + if (ch > max || ch < min) return false; + this.cursor++; + return true; + } + + function in_range_b (min : int, max : int) : boolean + { + if (this.cursor <= this.limit_backward) return false; + var ch = this.current.charCodeAt(this.cursor - 1); + if (ch > max || ch < min) return false; + this.cursor--; + return true; + } + + function out_range (min : int, max : int) : boolean + { + if (this.cursor >= this.limit) return false; + var ch = this.current.charCodeAt(this.cursor); + if (!(ch > max || ch < min)) return false; + this.cursor++; + return true; + } + + function out_range_b (min : int, max : int) : boolean + { + if (this.cursor <= this.limit_backward) return false; + var ch = this.current.charCodeAt(this.cursor - 1); + if(!(ch > max || ch < min)) return false; + this.cursor--; + return true; + } + + function eq_s (s_size : int, s : string) : boolean + { + if (this.limit - this.cursor < s_size) return false; + if (this.current.slice(this.cursor, this.cursor + s_size) != s) + { + return false; + } + this.cursor += s_size; + return true; + } + + function eq_s_b (s_size : int, s : string) : boolean + { + if (this.cursor - this.limit_backward < s_size) return false; + if (this.current.slice(this.cursor - s_size, this.cursor) != s) + { + return false; + } + this.cursor -= s_size; + return true; + } + + function eq_v (s : string) : boolean + { + return this.eq_s(s.length, s); + } + + function eq_v_b (s : string) : boolean + { + return this.eq_s_b(s.length, s); + } + + function find_among (v : Among[], v_size : int) : int + { + var i = 0; + var j = v_size; + + var c = this.cursor; + var l = this.limit; + + var common_i = 0; + var common_j = 0; + + var first_key_inspected = false; + + while (true) + { + var k = i + ((j - i) >>> 1); + var diff = 0; + var common = common_i < common_j ? common_i : common_j; // smaller + var w = v[k]; + var i2; + for (i2 = common; i2 < w.s_size; i2++) + { + if (c + common == l) + { + diff = -1; + break; + } + diff = this.current.charCodeAt(c + common) - w.s.charCodeAt(i2); + if (diff != 0) break; + common++; + } + if (diff < 0) + { + j = k; + common_j = common; + } + else + { + i = k; + common_i = common; + } + if (j - i <= 1) + { + if (i > 0) break; // v->s has been inspected + if (j == i) break; // only one item in v + + // - but now we need to go round once more to get + // v->s inspected. This looks messy, but is actually + // the optimal approach. + + if (first_key_inspected) break; + first_key_inspected = true; + } + } + while (true) + { + var w = v[i]; + if (common_i >= w.s_size) + { + this.cursor = c + w.s_size; + if (w.method == null) + { + return w.result; + } + var res = w.method(w.instance); + this.cursor = c + w.s_size; + if (res) + { + return w.result; + } + } + i = w.substring_i; + if (i < 0) return 0; + } + return -1; // not reachable + } + + // find_among_b is for backwards processing. Same comments apply + function find_among_b (v : Among[], v_size : int) : int + { + var i = 0; + var j = v_size; + + var c = this.cursor; + var lb = this.limit_backward; + + var common_i = 0; + var common_j = 0; + + var first_key_inspected = false; + + while (true) + { + var k = i + ((j - i) >> 1); + var diff = 0; + var common = common_i < common_j ? common_i : common_j; + var w = v[k]; + var i2; + for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) + { + if (c - common == lb) + { + diff = -1; + break; + } + diff = this.current.charCodeAt(c - 1 - common) - w.s.charCodeAt(i2); + if (diff != 0) break; + common++; + } + if (diff < 0) + { + j = k; + common_j = common; + } + else + { + i = k; + common_i = common; + } + if (j - i <= 1) + { + if (i > 0) break; + if (j == i) break; + if (first_key_inspected) break; + first_key_inspected = true; + } + } + while (true) + { + var w = v[i]; + if (common_i >= w.s_size) + { + this.cursor = c - w.s_size; + if (w.method == null) return w.result; + var res = w.method(this); + this.cursor = c - w.s_size; + if (res) return w.result; + } + i = w.substring_i; + if (i < 0) return 0; + } + return -1; // not reachable + } + + /* to replace chars between c_bra and c_ket in this.current by the + * chars in s. + */ + function replace_s (c_bra : int, c_ket : int, s : string) : int + { + var adjustment = s.length - (c_ket - c_bra); + this.current = this.current.slice(0, c_bra) + s + this.current.slice(c_ket); + this.limit += adjustment; + if (this.cursor >= c_ket) this.cursor += adjustment; + else if (this.cursor > c_bra) this.cursor = c_bra; + return adjustment; + } + + function slice_check () : boolean + { + if (this.bra < 0 || + this.bra > this.ket || + this.ket > this.limit || + this.limit > this.current.length) + { + return false; + } + return true; + } + + function slice_from (s : string) : boolean + { + var result = false; + if (this.slice_check()) + { + this.replace_s(this.bra, this.ket, s); + result = true; + } + return result; + } + + function slice_del () : boolean + { + return this.slice_from(""); + } + + function insert (c_bra : int, c_ket : int, s : string) : void + { + var adjustment = this.replace_s(c_bra, c_ket, s); + if (c_bra <= this.bra) this.bra += adjustment; + if (c_bra <= this.ket) this.ket += adjustment; + } + + /* Copy the slice into the supplied StringBuffer */ + function slice_to (s : string) : string + { + var result = ''; + if (this.slice_check()) + { + result = this.current.slice(this.bra, this.ket); + } + return result; + } + + function assign_to (s : string) : string + { + return this.current.slice(0, this.limit); + } + + function stem () : boolean + { + return false; + } + + override function stemWord (word : string) : string + { + var result = this.cache['.' + word]; + if (result == null) + { + this.setCurrent(word); + this.stem(); + result = this.getCurrent(); + this.cache['.' + word] = result; + } + return result; + } + + override function stemWords (words : string[]) : string[] + { + var results = [] : string[]; + for (var i = 0; i < words.length; i++) + { + var word = words[i]; + var result = this.cache['.' + word]; + if (result == null) + { + this.setCurrent(word); + this.stem(); + result = this.getCurrent(); + this.cache['.' + word] = result; + } + results.push(result); + } + return results; + } +} -- cgit v1.2.3