summaryrefslogtreecommitdiffstats
path: root/web/server/h2o/libh2o/misc/oktavia/src/sais.jsx
diff options
context:
space:
mode:
Diffstat (limited to 'web/server/h2o/libh2o/misc/oktavia/src/sais.jsx')
-rw-r--r--web/server/h2o/libh2o/misc/oktavia/src/sais.jsx250
1 files changed, 0 insertions, 250 deletions
diff --git a/web/server/h2o/libh2o/misc/oktavia/src/sais.jsx b/web/server/h2o/libh2o/misc/oktavia/src/sais.jsx
deleted file mode 100644
index 9d8fa8fb6..000000000
--- a/web/server/h2o/libh2o/misc/oktavia/src/sais.jsx
+++ /dev/null
@@ -1,250 +0,0 @@
-/* Original source code:
- * G. Nong, S. Zhang and W. H. Chan, Two Efficient Algorithms for Linear Time Suffix Array Construction, IEEE Transactions on Computers, To Appear
- * http://www.cs.sysu.edu.cn/nong/index.files/Two%20Efficient%20Algorithms%20for%20Linear%20Suffix%20Array%20Construction.pdf
- */
-
-import "bit-vector.jsx";
-
-class OArray
-{
- var offset : int;
- var array : int[];
-
- function constructor (array : int[])
- {
- this.array = array;
- this.offset = 0;
- }
-
- function constructor (array : int[], offset : int)
- {
- this.array = array;
- this.offset = offset;
- }
-
- function get (index : int) : int
- {
- return this.array[index + this.offset];
- }
-
- function set (index : int, value : int) : void
- {
- this.array[index + this.offset] = value;
- }
-
- function isS (index : int) : boolean
- {
- return this.array[index + this.offset] < this.array[index + this.offset + 1];
- }
-
- function compare (index1 : int, index2 : int) : boolean
- {
- return this.array[index1 + this.offset] == this.array[index2 + this.offset];
- }
-}
-
-
-class SAIS
-{
- static function _isLMS (t : BitVector, i : int) : boolean
- {
- return i > 0 && t.get(i) && !t.get(i - 1);
- }
-
- // find the start or end of each bucket
- static function _getBuckets(s : OArray, bkt : int[], n : int, K : int, end : boolean) : void
- {
- var sum = 0;
- for (var i = 0; i <= K; i++)
- {
- bkt[i] = 0; // clear all buckets
- }
- for (var i = 0; i < n; i++)
- {
- bkt[s.get(i)]++; // compute the size of each bucket
- }
- for (var i = 0; i <= K; i++)
- {
- sum += bkt[i];
- bkt[i] = end ? sum : sum - bkt[i];
- }
- }
-
- // compute SAl
- static function _induceSAl(t : BitVector, SA : int[], s : OArray, bkt : int[], n : int, K : int, end : boolean) : void
- {
- SAIS._getBuckets(s, bkt, n, K, end); // find starts of buckets
- for (var i = 0; i < n; i++)
- {
- var j = SA[i] - 1;
- if (j >= 0 && !t.get(j))
- {
- SA[bkt[s.get(j)]++] = j;
- }
- }
- }
-
- // compute SAs
- static function _induceSAs(t : BitVector, SA : int[], s : OArray, bkt : int[], n : int, K : int, end : boolean) : void
- {
- SAIS._getBuckets(s, bkt, n, K, end); // find ends of buckets
- for (var i = n - 1; i >= 0; i--)
- {
- var j = SA[i] - 1;
- if (j >=0 && t.get(j))
- {
- SA[--bkt[s.get(j)]] = j;
- }
- }
- }
-
- // find the suffix array SA of s[0..n-1] in {1..K}^n
- // require s[n-1]=0 (the sentinel!), n>=2
- // use a working space (excluding s and SA) of at most 2.25n+O(1) for a constant alphabet
-
- static function make(source : string) : int[]
- {
- var charCodes = [] : int[];
- charCodes.length = source.length;
- var maxCode = 0;
- for (var i = 0; i < source.length; i++)
- {
- var code = source.charCodeAt(i);
- charCodes[i] = code;
- maxCode = (code > maxCode) ? code : maxCode;
- }
- var SA = [] : int[];
- SA.length = source.length;
- var s = new OArray(charCodes);
- SAIS._make(s, SA, source.length, maxCode);
- return SA;
- }
-
- static function _make(s : OArray, SA : int[], n : int, K : int) : void
- {
- // Classify the type of each character
- var t = new BitVector();
- t.set(n - 2, false);
- t.set(n - 1, true); // the sentinel must be in s1, important!!!
- for (var i = n - 3; i >= 0; i--)
- {
- t.set(i, (s.isS(i) || (s.compare(i, i + 1) && t.get(i + 1))));
- }
-
- // stage 1: reduce the problem by at least 1/2
- // sort all the S-substrings
- var bkt = [] : int[];
- bkt.length = K + 1;
- SAIS._getBuckets(s, bkt, n, K, true); // find ends of buckets
- for (var i = 0; i < n; i++)
- {
- SA[i] = -1;
- }
- for (var i = 1; i < n; i++)
- {
- if (SAIS._isLMS(t, i))
- {
- SA[--bkt[s.get(i)]] = i;
- }
- }
- SAIS._induceSAl(t, SA, s, bkt, n, K, false);
- SAIS._induceSAs(t, SA, s, bkt, n, K, true);
- // compact all the sorted substrings into the first n1 items of SA
- // 2*n1 must be not larger than n (proveable)
- var n1 = 0;
- for (var i = 0; i < n; i++)
- {
- if (SAIS._isLMS(t, SA[i]))
- {
- SA[n1++] = SA[i];
- }
- }
-
- // find the lexicographic names of all substrings
- for (var i = n1; i < n; i++)
- {
- SA[i]=-1; // init the name array buffer
- }
- var name = 0;
- var prev = -1;
- for (i = 0; i < n1; i++)
- {
- var pos = SA[i];
- var diff = false;
- for (var d = 0; d < n; d++)
- {
- if (prev == -1 || !s.compare(pos + d, prev + d) || t.get(pos + d) != t.get(prev + d))
- {
- diff = true;
- break;
- }
- else if (d > 0 && (SAIS._isLMS(t, pos+d) || SAIS._isLMS(t, prev + d)))
- {
- break;
- }
- }
- if (diff)
- {
- name++;
- prev = pos;
- }
- pos = (pos % 2 == 0) ? pos / 2 : (pos - 1) / 2;
- SA[n1 + pos] = name - 1;
- }
- for (var i = n - 1, j = n - 1; i >= n1; i--)
- {
- if (SA[i] >= 0)
- {
- SA[j--] = SA[i];
- }
- }
-
- // stage 2: solve the reduced problem
- // recurse if names are not yet unique
- var SA1 = SA;
- var s1 = new OArray(SA, n - n1);
-
- if (name < n1)
- {
- SAIS._make(s1, SA1, n1, name - 1);
- }
- else
- {
- // generate the suffix array of s1 directly
- for (i = 0; i < n1; i++)
- {
- SA1[s1.get(i)] = i;
- }
- }
-
- // stage 3: induce the result for the original problem
-
- bkt = [] : int[];
- bkt.length = K + 1;
- // put all left-most S characters into their buckets
- SAIS._getBuckets(s, bkt, n, K, true); // find ends of buckets
- for (i = 1, j = 0; i < n; i++)
- {
- if (SAIS._isLMS(t, i))
- {
- s1.set(j++, i); // get p1
- }
- }
- for (i = 0; i < n1; i++)
- {
- SA1[i] = s1.get(SA1[i]); // get index in s
- }
- for (i = n1; i < n; i++)
- {
- SA[i] = -1; // init SA[n1..n-1]
- }
- for (i = n1 - 1; i >= 0; i--)
- {
- j = SA[i];
- SA[i] = -1;
- SA[--bkt[s.get(j)]] = j;
- }
- SAIS._induceSAl(t, SA, s, bkt, n, K, false);
- SAIS._induceSAs(t, SA, s, bkt, n, K, true);
- }
-}