diff options
Diffstat (limited to 'web/server/h2o/libh2o/misc/oktavia/tool')
21 files changed, 0 insertions, 1496 deletions
diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/httpstatus.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/httpstatus.jsx deleted file mode 100644 index a4d7451e5..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/httpstatus.jsx +++ /dev/null @@ -1,130 +0,0 @@ -import "console.jsx"; -import "oktavia.jsx"; -import "metadata.jsx"; -import "query-parser.jsx"; -import "stemmer/english-stemmer.jsx"; - - -class HTTPStatus -{ - var oktavia : Oktavia; - var splitter : Splitter; - var httpstatus : string[]; - - function constructor () - { - this.oktavia = new Oktavia; - this.oktavia.setStemmer(new EnglishStemmer()); - this.splitter = this.oktavia.addSplitter('line break'); - this.makeIndex(); - } - - function makeIndex () : void - { - this.httpstatus = [ - "100: Continue", - "101: Switching Protocols", - "102: Processing", - "200: OK", - "201: Created", - "202: Accepted", - "203: Non-Authoritative Information", - "204: No Content", - "205: Reset Content", - "206: Partial Content", - "207: Multi-Status", - "208: Already Reported", - "300: Multiple Choices", - "301: Moved Permanently", - "302: Found", - "303: See Other", - "304: Not Modified", - "305: Use Proxy", - "307: Temporary Redirect", - "400: Bad Request", - "401: Unauthorized", - "402: Payment Required", - "403: Forbidden", - "404: Not Found", - "405: Method Not Allowed", - "406: Not Acceptable", - "407: Proxy Authentication Required", - "408: Request Timeout", - "409: Conflict", - "410: Gone", - "411: Length Required", - "412: Precondition Failed", - "413: Request Entity Too Large", - "414: Request-URI Too Large", - "415: Unsupported Media Type", - "416: Request Range Not Satisfiable", - "417: Expectation Failed", - "418: I'm a teapot", - "422: Unprocessable Entity", - "423: Locked", - "424: Failed Dependency", - "425: No code", - "426: Upgrade Required", - "428: Precondition Required", - "429: Too Many Requests", - "431: Request Header Fields Too Large", - "449: Retry with", - "500: Internal Server Error", - "501: Not Implemented", - "502: Bad Gateway", - "503: Service Unavailable", - "504: Gateway Timeout", - "505: HTTP Version Not Supported", - "506: Variant Also Negotiates", - "507: Insufficient Storage", - "509: Bandwidth Limit Exceeded", - "510: Not Extended" - ]; - for (var i in this.httpstatus) - { - this.oktavia.addWord(this.httpstatus[i], true); - this.splitter.split(); - } - this.oktavia.build(); - } - - function search (words : string[]) : string - { - var queryParser = new QueryParser(); - var queries = queryParser.parse(words); - if (queries.length == 0) - { - var result = this.httpstatus.join('\n'); - result = result + "\n\nToday's status: " + this.random(); - return result; - } - else - { - var summary = this.oktavia.search(queries); - if (summary.size() == 0) - { - return "not found "; - } - var resultWords = [] : string[]; - for (var i in summary.result.unitIds) - { - resultWords.push(this.splitter.getContent(summary.result.unitIds[i])); - } - return resultWords.join('\n'); - } - } - - function random () : string - { - return this.httpstatus[Math.round(Math.random() * this.httpstatus.length)]; - } -} - -class _Main -{ - static function main (argv : string []) : void - { - var httpstatus = new HTTPStatus(); - console.log(httpstatus.search(argv)); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/oktavia-mkindex.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/oktavia-mkindex.jsx deleted file mode 100644 index f2593bc9e..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/oktavia-mkindex.jsx +++ /dev/null @@ -1,470 +0,0 @@ -import "console.jsx"; -import "js/nodejs.jsx"; - -import "oktavia.jsx"; -import "getopt.jsx"; -import "htmlparser.jsx"; -import "csvparser.jsx"; -import "textparser.jsx"; -import "binary-util.jsx"; - -import "stemmer/stemmer.jsx"; -import "stemmer/danish-stemmer.jsx"; -import "stemmer/dutch-stemmer.jsx"; -import "stemmer/english-stemmer.jsx"; -import "stemmer/finnish-stemmer.jsx"; -import "stemmer/french-stemmer.jsx"; -import "stemmer/german-stemmer.jsx"; -import "stemmer/hungarian-stemmer.jsx"; -import "stemmer/italian-stemmer.jsx"; -import "stemmer/norwegian-stemmer.jsx"; -import "stemmer/porter-stemmer.jsx"; -import "stemmer/portuguese-stemmer.jsx"; -import "stemmer/romanian-stemmer.jsx"; -import "stemmer/russian-stemmer.jsx"; -import "stemmer/spanish-stemmer.jsx"; -import "stemmer/swedish-stemmer.jsx"; -import "stemmer/turkish-stemmer.jsx"; - - -class _Main -{ - static function usage () : void - { - console.log([ - "usage: oktavia_mkindex [options]", - "", - "Common Options:", - " -i, --input [input folder/file ] : Target files to search. .html, .csv, .txt are available.", - " -o, --output [outputfolder] : Directory that will store output files.", - " : This is a relative path from root.", - " : Default value is 'search'. ", - " -t, --type [type] : Export type. 'index'(default), 'base64', 'cmd', 'js',", - " : 'commonjs' are available.", - " : 'index' is a just index file. 'cmd' is a base64 code with search program.", - " : Others are base64 source code style output.", - " -m, --mode [mode] : Mode type. 'html', 'csv', 'text' are available.", - " -c, --cache-density [percent] : Cache data density. It effects file size and search speed.", - " : 100% become four times of base index file size. Default value is 5%.", - " : Valid value is 0.1% - 100%.", - " -n, --name [function] : A variable name for 'js' output or property name", - " : for 'js' and 'commonjs'. Default value is 'searchIndex'.", - " -q, --quiet : Hide detail information.", - " -h, --help : Display this message.", - "", - "HTML Mode Options:", - " -r, --root [document root] : Document root folder. Default is current. ", - " : Indexer creates result file path from this folder.", - " -p, --prefix [directory prefix] : Directory prefix for a document root from a server root.", - " : If your domain is example.com and 'manual' is passed,", - " : document root become http://example.com/manual/.", - " : It effects search result URL. Default value is '/'.", - " -u, --unit [search unit] : 'file', 'h1'-'h6'. Default value is 'file'.", - " -f, --filter [target tag] : Only contents inside this tag is indexed.", - " : Default value is \"article,#content,#main,div.body\".", - " -s, --stemmer [algorithm] : Select stemming algorithm.", - " -w, --word-splitter [splitter] : Use optional word splitter.", - " : 'ts' (TinySegmenter for Japanese) is available", - "", - "Text Mode Options:", - " -s, --stemmer [algorithm] : Select stemming algorithm.", - " -w, --word-splitter [splitter] : Use optional word splitter.", - " : 'ts' (TinySegmenter for Japanese) is available", - " -u, --unit [search unit] : file, block, line. Default value is 'file'.", - "", - "Supported Stemmer Algorithms:", - " danish, dutch, english, finnish, french german, hungarian italian", - " norwegian, porter, portuguese, romanian, russian, spanish, swedish, turkish" - ].join('\n')); - } - - static function main(args : string[]) : void - { - console.log("Search Engine Oktavia - Index Generator\n"); - - var inputs = [] : string[]; - var root = process.cwd(); - var prefix = '/'; - var output = "search"; - var showhelp = false; - var notrun = false; - var unit = 'file'; - var type = 'js'; - var mode = ''; - var verbose = true; - var filter = [] : string[]; - var algorithm : Nullable.<string> = null; - var wordsplitter : Nullable.<string> = null; - var cacheDensity : number = 5.0; - var name = null : Nullable.<string>; - var validModes = ['html', 'csv', 'text']; - var validUnitsForHTML = ['file', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']; - var validUnitsForText = ['file', 'block', 'line']; - var validStemmers = [ - 'danish', 'dutch', 'english', 'finnish', 'french', 'german', 'hungarian', - 'italian', 'norwegian', 'porter', 'portuguese', 'romanian', 'russian', - 'spanish', 'swedish', 'turkish' - ]; - var validTypes = ['index', 'base64', 'cmd', 'js', 'commonjs']; - var validWordSplitters = ['ts']; - - var optstring = "n:(name)q(quiet)m:(mode)i:(input)r:(root)p:(prefix)o:(output)h(help)u:(unit)f:(filter)s:(stemmer)w:(word-splitter)t:(type)c:(cache-density)"; - var parser = new BasicParser(optstring, args); - var opt = parser.getopt(); - while (opt) - { - switch (opt.option) - { - case "m": - if (validModes.indexOf(opt.optarg) == -1) - { - console.error("Option m/mode should be 'html', 'csv', 'text'."); - notrun = true; - } - mode = opt.optarg; - break; - case "i": - inputs.push(opt.optarg); - break; - case "r": - root = node.path.resolve(opt.optarg); - break; - case "p": - prefix = opt.optarg; - break; - case "n": - name = opt.optarg; - break; - case "o": - output = opt.optarg; - if (output.slice(0, 1) == '/') - { - output = output.slice(1); - } - break; - case "h": - showhelp = true; - break; - case "q": - verbose = false; - break; - case "u": - unit = opt.optarg; - break; - case "f": - var items = opt.optarg.split(','); - for (var i in items) - { - filter.push(items[i]); - } - break; - case "t": - if (validTypes.indexOf(opt.optarg) == -1) - { - console.error('Option -t/--type is invalid.'); - notrun = true; - } - else - { - type = opt.optarg; - } - break; - case "s": - if (validStemmers.indexOf(opt.optarg) == -1) - { - console.error('Option -s/--stemmer is invalid.'); - notrun = true; - } - else - { - algorithm = opt.optarg; - } - break; - case "w": - - break; - case "c": - var match = /(\d+\.?\d*)/.exec(opt.optarg); - if (match) - { - var tmpValue = match[1] as number; - if (0.1 <= tmpValue && tmpValue <= 100) - { - cacheDensity = tmpValue; - } - else - { - console.error('Option -c/--cache-density should be in 0.1 - 100.'); - notrun = true; - } - } - else - { - console.error('Option -c/--cache-density is invalid.'); - notrun = true; - } - break; - case "?": - notrun = true; - break; - } - opt = parser.getopt(); - } - var inputTextFiles = [] : string[]; - var inputHTMLFiles = [] : string[]; - var inputCSVFiles = [] : string[]; - if (filter.length == 0) - { - filter = ['article', '#content', '#main', 'div.body']; - } - for (var i in inputs) - { - var input = inputs[i]; - if (!node.fs.existsSync(input)) - { - console.error("Following input folder/file doesn't exist: " + input); - notrun = true; - } - else - { - var stat = node.fs.statSync(input); - if (stat.isFile()) - { - _Main._checkFileType(node.path.resolve(input), inputTextFiles, inputHTMLFiles, inputCSVFiles); - } - else if (stat.isDirectory()) - { - _Main._checkDirectory(input, inputTextFiles, inputHTMLFiles, inputCSVFiles); - } - else - { - console.error("Following input is not folder or file: " + input); - notrun = true; - } - } - } - if (inputTextFiles.length == 0 && inputHTMLFiles.length == 0 && inputCSVFiles.length == 0 || !mode) - { - showhelp = true; - } - if (showhelp) - { - _Main.usage(); - } - else if (!notrun) - { - var stemmer : Nullable.<Stemmer> = null; - if (algorithm) - { - stemmer = _Main._createStemmer(algorithm); - } - var dump = null : Nullable.<string>; - switch (mode) - { - case 'html': - var unitIndex = validUnitsForHTML.indexOf(unit); - if (unitIndex == -1) - { - console.error('Option -u/--unit should be file, h1, h2, h3, h4, h5, h6. But ' + unit); - } - else - { - var htmlParser = new HTMLParser(unitIndex, root, prefix, filter, stemmer); - for (var i = 0; i < inputHTMLFiles.length; i++) - { - htmlParser.parse(inputHTMLFiles[i]); - } - console.log('generating index...'); - if (verbose) - { - console.log(''); - } - dump = htmlParser.dump(cacheDensity, verbose); - } - break; - case 'csv': - var csvParser = new CSVParser(root, stemmer); - for (var i in inputCSVFiles) - { - csvParser.parse(inputCSVFiles[i]); - } - break; - case 'text': - if (validUnitsForText.indexOf(unit) == -1) - { - console.error('Option u/unit should be file, block, line. But ' + unit); - } - else - { - var textParser = new TextParser(unit, root, stemmer); - for (var i in inputTextFiles) - { - textParser.parse(inputTextFiles[i]); - } - } - break; - } - if (dump) - { - var indexFilePath = ""; - switch (type) - { - case 'index': - indexFilePath = node.path.resolve(root, output, 'searchindex.okt'); - var dirPath = node.path.dirname(indexFilePath); - _Main._mkdirP(dirPath); - node.fs.writeFileSync(indexFilePath, dump, "utf16le"); - break; - case 'base64': - indexFilePath = node.path.resolve(root, output, 'searchindex.okt.b64'); - var dirPath = node.path.dirname(indexFilePath); - _Main._mkdirP(dirPath); - node.fs.writeFileSync(indexFilePath, Binary.base64encode(dump), "utf8"); - break; - case 'cmd': - break; - case 'js': - indexFilePath = node.path.resolve(root, output, 'searchindex.js'); - var dirPath = node.path.dirname(indexFilePath); - _Main._mkdirP(dirPath); - if (name == null) - { - name = 'searchIndex'; - } - var contents = [ - '// Oktavia Search Index', - 'var ' + name + ' = "' + Binary.base64encode(dump) + '";', '' - ]; - node.fs.writeFileSync(indexFilePath, contents.join('\n'), "utf8"); - break; - case 'commonjs': - indexFilePath = node.path.resolve(root, output, 'searchindex.js'); - var dirPath = node.path.dirname(indexFilePath); - _Main._mkdirP(dirPath); - if (name == null) - { - name = 'searchIndex'; - } - var contents = [ - '// Oktavia Search Index', - 'exports.' + name + ' = "' + Binary.base64encode(dump) + '";', '' - ]; - node.fs.writeFileSync(indexFilePath, contents.join('\n'), "utf8"); - break; - } - if (indexFilePath) - { - console.log("generated: " + indexFilePath); - } - } - } - } - - static function _checkFileType (path : string, texts : string[], HTMLs : string[], CSVs : string[]) : void - { - var match = path.match(/(.*)\.(.*)/); - if (match && match[1]) - { - switch (match[2].toLowerCase()) - { - case 'html': - case 'htm': - HTMLs.push(path); - break; - case 'csv': - CSVs.push(path); - break; - default: - texts.push(path); - } - } - } - - static function _checkDirectory (path : string, texts : string[], HTMLs : string[], CSVs : string[]) : void - { - var files = node.fs.readdirSync(path); - for (var j in files) - { - var filepath = node.path.resolve(path, files[j]); - var stat = node.fs.statSync(filepath); - if (stat.isFile()) - { - _Main._checkFileType(filepath, texts, HTMLs, CSVs); - } - else if (stat.isDirectory()) - { - _Main._checkDirectory(filepath, texts, HTMLs, CSVs); - } - } - } - - static function _mkdirP (path : string) : void - { - if (node.fs.existsSync(path)) - { - return; - } - _Main._mkdirP(node.path.dirname(path)); - node.fs.mkdirSync(path); - } - - static function _createStemmer (algorithm : string) : Stemmer - { - var stemmer : Stemmer; - switch (algorithm.toLowerCase()) - { - case "danish": - stemmer = new DanishStemmer(); - break; - case "dutch": - stemmer = new DutchStemmer(); - break; - case "english": - stemmer = new EnglishStemmer(); - break; - case "finnish": - stemmer = new FinnishStemmer(); - break; - case "french": - stemmer = new FrenchStemmer(); - break; - case "german": - stemmer = new GermanStemmer(); - break; - case "hungarian": - stemmer = new HungarianStemmer(); - break; - case "italian": - stemmer = new ItalianStemmer(); - break; - case "norwegian": - stemmer = new NorwegianStemmer(); - break; - case "porter": - stemmer = new PorterStemmer(); - break; - case "portuguese": - stemmer = new PortugueseStemmer(); - break; - case "romanian": - stemmer = new RomanianStemmer(); - break; - case "russian": - stemmer = new RussianStemmer(); - break; - case "spanish": - stemmer = new SpanishStemmer(); - break; - case "swedish": - stemmer = new SwedishStemmer(); - break; - case "turkish": - stemmer = new TurkishStemmer(); - break; - default: - stemmer = new EnglishStemmer(); - break; - } - return stemmer; - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/oktavia-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/oktavia-search.jsx deleted file mode 100644 index 719c71b86..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/oktavia-search.jsx +++ /dev/null @@ -1,370 +0,0 @@ -import "console.jsx"; -import "js/nodejs.jsx"; - -import "oktavia.jsx"; -import "getopt.jsx"; -import "query-parser.jsx"; -import "search-result.jsx"; -import "style.jsx"; -import "binary-util.jsx"; - -import "stemmer/stemmer.jsx"; -import "stemmer/danish-stemmer.jsx"; -import "stemmer/dutch-stemmer.jsx"; -import "stemmer/english-stemmer.jsx"; -import "stemmer/finnish-stemmer.jsx"; -import "stemmer/french-stemmer.jsx"; -import "stemmer/german-stemmer.jsx"; -import "stemmer/hungarian-stemmer.jsx"; -import "stemmer/italian-stemmer.jsx"; -import "stemmer/norwegian-stemmer.jsx"; -import "stemmer/porter-stemmer.jsx"; -import "stemmer/portuguese-stemmer.jsx"; -import "stemmer/romanian-stemmer.jsx"; -import "stemmer/russian-stemmer.jsx"; -import "stemmer/spanish-stemmer.jsx"; -import "stemmer/swedish-stemmer.jsx"; -import "stemmer/turkish-stemmer.jsx"; - - -class Search -{ - var style : Style; - - function search (indexFile : string, queryStrings : string[], num : int, style : Style, algorithm : Nullable.<string>) : void - { - this.style = style; - var oktavia = new Oktavia(); - if (algorithm != null) - { - oktavia.setStemmer(this.createStemmer(algorithm)); - } - if (!this.loadIndex(oktavia, indexFile)) - { - return; - } - console.time('searching'); - var queryParser = new QueryParser(); - queryParser.parse(queryStrings); - var summary = oktavia.search(queryParser.queries); - console.timeEnd('searching'); - if (summary.size() == 0) - { - this.notFound(summary, queryStrings); - } - else - { - this.showResult(oktavia, summary, num); - } - } - - function loadIndex (oktavia : Oktavia, filepath : string) : boolean - { - var ext = node.path.extname(filepath); - var content : string; - var result = true; - switch (ext) - { - case ".okt": - content = node.fs.readFileSync(filepath, "utf16le"); - oktavia.load(content); - break; - case ".b64": - content = node.fs.readFileSync(filepath, "utf8"); - oktavia.load(Binary.base64decode(content)); - break; - case ".js": - content = node.fs.readFileSync(filepath, "utf8"); - var index = content.indexOf('"'); - var lastIndex = content.lastIndexOf('"'); - oktavia.load(Binary.base64decode(content.slice(index, lastIndex))); - break; - default: - console.log("unknown file extension: " + ext); - result = false; - break; - } - return result; - } - - function sortResult (oktavia : Oktavia, summary : SearchSummary) : SearchUnit[] - { - for (var i = 0; i < summary.result.units.length; i++) - { - var score = 0; - var unit = summary.result.units[i]; - for (var pos in unit.positions) - { - var position = unit.positions[pos]; - if (oktavia.wordPositionType(position.position)) - { - score += 10; - } - else - { - score += 1; - } - if (!position.stemmed) - { - score += 2; - } - } - unit.score = score; - } - return summary.getSortedResult(); - } - - function showResult (oktavia : Oktavia, summary : SearchSummary, num : int) : void - { - var results = this.sortResult(oktavia, summary); - var style = this.style; - var metadata = oktavia.getPrimaryMetadata(); - for (var i = 0; i < results.length; i++) - { - var unit = results[i]; - var info = metadata.getInformation(unit.id).split(Oktavia.eob); - /*console.log(info.replace(Oktavia.eob, ' -- ') + '\n'); - + ' ----------------------------------------------- ' - + unit.score as string + ' pt');*/ - console.log(style.convert('<title>' + info[0] + '</title>') + ' ' + style.convert('<url>' + info[1] + '</url>')); - var offset = info[0].length + 1; - var content = metadata.getContent(unit.id); - var start = 0; - var positions = unit.getPositions(); - if (content.indexOf(info[0]) == 1) - { - content = content.slice(info[0].length + 2, content.length); - start += (info[0].length + 2); - } - var end = start + num; - var split = false; - if (positions[0].position > end - positions[0].word.length) - { - end = positions[0].position + Math.floor(num / 2); - split = true; - } - for (var j = positions.length - 1; j > -1; j--) - { - var pos = positions[j]; - if (pos.position + pos.word.length < end) - { - /*log('--------------begin : ' + (pos.position - start) as string); - log(content.slice(0, pos.position - start)); - log('--------------match : ' + pos.word.length as string); - .log(content.slice(pos.position - start, pos.position + pos.word.length - start)); - log('--------------match : ' + (content.length - pos.position + pos.word.length - start) as string); - log(content.slice(pos.position + pos.word.length - start, content.length)); - log('--------------end');*/ - content = [ - content.slice(0, pos.position - start), - style.convert('<hit>*</hit>').replace('*', content.slice(pos.position - start, pos.position + pos.word.length - start)), - content.slice(pos.position + pos.word.length - start, content.length) - ].join(''); - } - } - var text : string; - if (split) - { - text = [ - content.slice(0, Math.floor(num / 2)) + ' ...', - content.slice(-Math.floor(num / 2), end - start)].join('\n'); - } - else - { - text = content.slice(0, end - start) + ' ...\n'; - } - text = text.replace(Oktavia.eob, ' ').replace(/\n\n+/, '\n\n'); - console.log(text); - } - console.log(style.convert('<summary>' + (summary.size() as string) + " results.</summary>\n")); - } - - function notFound (summary : SearchSummary, query : string[]) : void - { - var style = this.style; - if (query.length > 1) - { - console.log("Suggestions:"); - var proposals = summary.getProposal(); - for (var i = 0; i < proposals.length; i++) - { - var proposal = proposals[i]; - var querywords = [] : string[]; - for (var j = 0; j < query.length; j++) - { - if (j != proposal.omit) - { - querywords.push(style.convert('<hit>' + query[j] + '</hit>')); - } - else - { - //querywords.push(style.convert('<del>' + query[j] + '</del>')); - } - } - console.log("* Expected result: " + querywords.join(" ") + " - " + (proposal.expect as string) + " hit"); - } - } - else - { - console.log(style.convert("Your search - <hit>" + query[0] + "</hit> - didn't match any documents.")); - } - } - - function createStemmer (algorithm : string) : Stemmer - { - var stemmer : Stemmer; - switch (algorithm.toLowerCase()) - { - case "danish": - stemmer = new DanishStemmer(); - break; - case "dutch": - stemmer = new DutchStemmer(); - break; - case "english": - stemmer = new EnglishStemmer(); - break; - case "finnish": - stemmer = new FinnishStemmer(); - break; - case "french": - stemmer = new FrenchStemmer(); - break; - case "german": - stemmer = new GermanStemmer(); - break; - case "hungarian": - stemmer = new HungarianStemmer(); - break; - case "italian": - stemmer = new ItalianStemmer(); - break; - case "norwegian": - stemmer = new NorwegianStemmer(); - break; - case "porter": - stemmer = new PorterStemmer(); - break; - case "portuguese": - stemmer = new PortugueseStemmer(); - break; - case "romanian": - stemmer = new RomanianStemmer(); - break; - case "russian": - stemmer = new RussianStemmer(); - break; - case "spanish": - stemmer = new SpanishStemmer(); - break; - case "swedish": - stemmer = new SwedishStemmer(); - break; - case "turkish": - stemmer = new TurkishStemmer(); - break; - default: - stemmer = new EnglishStemmer(); - break; - } - return stemmer; - } -} - -class _Main { - static function usage () : void - { - console.log([ - "usage: oktavia_search index_file [options] query...", - "", - "Options:", - " -m, --mono : Don't use color.", - " -s, --stemmer [algorithm] : Select stemming algorithm.", - " -n, --number [char number] : Result display number. Default value = 250", - " -h, --help : Display this message.", - "", - "Search Query Syntax:", - " word1 word2 : All words.", - ' "word1 word2" : Exact words or phrase.', - " word1 OR word2 : Any of these words.", - " word1 -word2 : None of these words." - ].join('\n')); - } - - static function main(args : string[]) : void - { - console.log("Search Engine Oktavia - Command-line Search Client\n"); - - var indexFile : Nullable.<string> = null; - var showhelp = false; - var notrun = false; - var styleType = 'console'; - var num : int = 250; - var queryStrings = [] : string[]; - var algorithm : Nullable.<string> = null; - - var validStemmers = [ - 'danish', 'dutch', 'english', 'finnish', 'french', 'german', 'hungarian', - 'italian', 'norwegian', 'porter', 'portuguese', 'romanian', 'russian', - 'spanish', 'swedish', 'turkish' - ]; - - if (args.length == 0) - { - showhelp = true; - } - else if (!node.fs.existsSync(args[0])) - { - console.error("Index file '" + args[0] + "' doesn't exist."); - notrun = true; - } - else - { - indexFile = args[0]; - } - - var optstring = "m(mono)s:(stemmer)n:(number)h(help)"; - var parser = new BasicParser(optstring, args.slice(1)); - var opt = parser.getopt(); - while (opt) - { - switch (opt.option) - { - case "s": - if (validStemmers.indexOf(opt.optarg) == -1) - { - console.error('Option s/stemmer is invalid.'); - notrun = true; - } - else - { - algorithm = opt.optarg; - } - break; - case "m": - styleType = 'ignore'; - break; - case "n": - num = opt.optarg as int; - break; - case "h": - showhelp = true; - break; - default: - queryStrings.push(opt.option); - break; - } - opt = parser.getopt(); - } - if (showhelp || queryStrings.length == 0) - { - _Main.usage(); - } - else if (!notrun) - { - var style = new Style(styleType); - var search = new Search(); - search.search(indexFile, queryStrings, num, style, algorithm); - } - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/search_simple.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/search_simple.jsx deleted file mode 100644 index f9b867511..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/search_simple.jsx +++ /dev/null @@ -1,39 +0,0 @@ -import "nodejs.jsx"; -import "fm_index.jsx"; - -class _Main -{ - static function usage () : void - { - log "Simple FM-Index Search Engine: Oktavia"; - log ""; - log "[usage]"; - log " search [input db file name] keyword"; - } - - static function main(args : string[]) : void - { - if (args.length <2) - { - _Main.usage(); - } - else - { - var indexFileName = args.shift(); - log "index file name: ", indexFileName; - var fm_index = new FMIndex(); - fm_index.load(node.fs.readFileSync(indexFileName, "utf16le")); - for (var i in args) - { - log "[search world]", args[i]; - var results = fm_index.search(args[i]); - for (var j in results) - { - var result = results[j]; - log "[", result[0], "]: ", "(", result[1], ")"; - } - log results.length, " hits"; - } - } - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-danish-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-danish-search.jsx deleted file mode 100644 index 98dba01dd..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-danish-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/danish-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new DanishStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-dutch-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-dutch-search.jsx deleted file mode 100644 index 117f2cce4..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-dutch-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/dutch-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new DutchStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-english-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-english-search.jsx deleted file mode 100644 index d30ad2ccf..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-english-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/english-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new EnglishStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-finnish-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-finnish-search.jsx deleted file mode 100644 index 640063958..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-finnish-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/finnish-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new FinnishStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-french-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-french-search.jsx deleted file mode 100644 index 777f5e2a3..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-french-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/french-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new FrenchStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-german-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-german-search.jsx deleted file mode 100644 index 588318704..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-german-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/german-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new GermanStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-hungarian-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-hungarian-search.jsx deleted file mode 100644 index a14fe3450..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-hungarian-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/hungarian-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new HungarianStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-italian-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-italian-search.jsx deleted file mode 100644 index 30769d1a6..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-italian-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/italian-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new ItalianStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-norwegian-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-norwegian-search.jsx deleted file mode 100644 index 180e6b045..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-norwegian-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/norwegian-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new NorwegianStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-porter-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-porter-search.jsx deleted file mode 100644 index ba1de086e..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-porter-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/porter-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new PorterStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-portuguese-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-portuguese-search.jsx deleted file mode 100644 index 89ed1a0f8..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-portuguese-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/portuguese-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new PortugueseStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-romanian-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-romanian-search.jsx deleted file mode 100644 index ef8b47fca..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-romanian-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/romanian-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new RomanianStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-russian-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-russian-search.jsx deleted file mode 100644 index 2a572d712..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-russian-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/russian-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new RussianStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-search.jsx deleted file mode 100644 index 22dc3f779..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-search.jsx +++ /dev/null @@ -1,327 +0,0 @@ -import "oktavia.jsx"; -import "binary-util.jsx"; -import "query.jsx"; -import "query-string-parser.jsx"; -import "search-result.jsx"; -import "style.jsx"; -import "stemmer/stemmer.jsx"; -import "console.jsx"; - - -class _Result -{ - var title : string; - var url : string; - var content : string; - var score : int; - function constructor (title : string, url : string, content : string, score : int) - { - this.title = title; - this.url = url; - this.content = content; - this.score = score; - } -} - -class _Proposal -{ - var options : string; - var label : string; - var count : int; - function constructor (options : string, label : string, count : int) - { - this.options = options; - this.label = label; - this.count = count; - } -} - -class OktaviaSearch -{ - var _oktavia : Oktavia; - static var _stemmer : Nullable.<Stemmer> = null; - static var _instance : Nullable.<OktaviaSearch> = null; - var _queryString : Nullable.<string>; - var _queries : Query[]; - var _highlight : string; - var _callback : Nullable.<function(:int, :int):void>; - var _entriesPerPage : int; - var _currentPage : int; - var _result : SearchUnit[]; - var _proposals : Proposal[]; - var _currentFolderDepth : int; - - function constructor (entriesPerPage : int) - { - this._oktavia = new Oktavia(); - this._entriesPerPage = entriesPerPage; - this._currentPage = 1; - this._queryString = null; - this._callback = null; - OktaviaSearch._instance = this; - } - - static function setStemmer(stemmer : Stemmer) : void - { - if (OktaviaSearch._instance) - { - OktaviaSearch._instance._oktavia.setStemmer(stemmer); - } - else - { - OktaviaSearch._stemmer = stemmer; - } - } - - function loadIndex (index : string) : void - { - if (OktaviaSearch._stemmer) - { - this._oktavia.setStemmer(OktaviaSearch._stemmer); - } - this._oktavia.load(Binary.base64decode(index)); - if (this._queryString) - { - this.search(this._queryString, this._callback); - this._queryString = null; - this._callback = null; - } - } - - function search (queryString : string, callback : function(:int, :int):void) : void - { - if (this._oktavia) - { - var queryParser = new QueryStringParser(); - this._queries = queryParser.parse(queryString); - this._highlight = queryParser.highlight(); - var summary = this._oktavia.search(this._queries); - if (summary.size() > 0) - { - this._result = this._sortResult(summary); - this._proposals = [] : Proposal[]; - this._currentPage = 1; - } - else - { - this._result = [] : SearchUnit[]; - if (this._queries.length > 1) - { - this._proposals = summary.getProposal(); - } - else - { - this._proposals = [] : Proposal[]; - } - this._currentPage = 1; - } - callback(this.resultSize(), this.totalPages()); - } - else - { - this._queryString = queryString; - this._callback = callback; - } - } - - function resultSize () : int - { - return this._result.length; - } - - function totalPages () : int - { - return Math.ceil(this._result.length / this._entriesPerPage); - } - - function currentPage () : int - { - return this._currentPage; - } - - function setCurrentPage (page : int) : void - { - this._currentPage = page; - } - - function hasPrevPage () : boolean - { - return (this._currentPage != 1); - } - - function hasNextPage () : boolean - { - return (this._currentPage != this.totalPages()); - } - - function pageIndexes () : string[] - { - var result = [] : string[]; - var total = this.totalPages(); - if (total < 10) - { - for (var i = 1; i <= total; i++) - { - result.push(i as string); - } - } - else if (this._currentPage <= 5) - { - for (var i = 1; i <= 7; i++) - { - result.push(i as string); - } - result.push('...', total as string); - } - else if (total - 5 <= this._currentPage) - { - result.push('1', '...'); - for (var i = total - 8; i <= total; i++) - { - result.push(i as string); - } - } - else - { - result.push('1', '...'); - for (var i = this._currentPage - 3; i <= this._currentPage + 3; i++) - { - result.push(i as string); - } - result.push('...', total as string); - } - return result; - } - - function getResult () : _Result[] - { - var style = new Style('html'); - var start = (this._currentPage - 1) * this._entriesPerPage; - var last = Math.min(this._currentPage * this._entriesPerPage, this._result.length); - var metadata = this._oktavia.getPrimaryMetadata(); - var num = 250; - - var results = [] : _Result[]; - - for (var i = start; i < last; i++) - { - var unit = this._result[i]; - var info = metadata.getInformation(unit.id).split(Oktavia.eob); - - var offset = info[0].length + 1; - var content = metadata.getContent(unit.id); - var start = 0; - var positions = unit.getPositions(); - if (content.indexOf(info[0]) == 1) - { - content = content.slice(info[0].length + 2, content.length); - start += (info[0].length + 2); - } - var end = start + num; - var split = false; - if (positions[0].position > end - positions[0].word.length) - { - end = positions[0].position + Math.floor(num / 2); - split = true; - } - for (var j = positions.length - 1; j > -1; j--) - { - var pos = positions[j]; - if (pos.position + pos.word.length < end) - { - content = [ - content.slice(0, pos.position - start), - style.convert('<hit>*</hit>').replace('*', content.slice(pos.position - start, pos.position + pos.word.length - start)), - content.slice(pos.position + pos.word.length - start, content.length) - ].join(''); - } - } - var text : string; - if (split) - { - text = [ - content.slice(0, Math.floor(num / 2)) + ' ...', - content.slice(-Math.floor(num / 2), end - start)].join('<br/>'); - } - else - { - text = content.slice(0, end - start) + ' ...<br/>'; - } - text = text.replace(Oktavia.eob, ' ').replace(/(<br\/>)(<br\/>)+/, '<br/><br/>'); - results.push(new _Result(info[0], info[1], text, unit.score)); - } - return results; - } - - function getHighlight () : string - { - return this._highlight; - } - - function getProposals () : _Proposal[] - { - var style = new Style('html'); - var results = [] : _Proposal[]; - - if (this._queries.length > 1) - { - for (var i = 0; i < this._proposals.length; i++) - { - var proposal = this._proposals[i]; - if (proposal.expect > 0) - { - var label = [] : string[]; - var option = [] : string[]; - for (var j = 0; j < this._queries.length; j++) - { - if (j != proposal.omit) - { - label.push(style.convert('<hit>' + this._queries[j].toString() + '</hit>')); - option.push(this._queries[j].toString()); - } - else - { - label.push(style.convert('<del>' + this._queries[j].toString() + '</del>')); - } - } - results.push(new _Proposal(option.join(' '), label.join(' '), proposal.expect)); - } - } - } - return results; - } - - function _sortResult (summary : SearchSummary) : SearchUnit[] - { - for (var i = 0; i < summary.result.units.length; i++) - { - var score = 0; - var unit = summary.result.units[i]; - for (var pos in unit.positions) - { - var position = unit.positions[pos]; - if (this._oktavia.wordPositionType(position.position)) - { - score += 10; - } - else - { - score += 1; - } - if (!position.stemmed) - { - score += 2; - } - } - unit.score = score; - } - return summary.getSortedResult(); - } -} - -class _Main -{ - static function main(args : string[]) : void - { - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-spanish-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-spanish-search.jsx deleted file mode 100644 index 3ad9b8d1e..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-spanish-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/spanish-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new SpanishStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-swedish-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-swedish-search.jsx deleted file mode 100644 index 9b900a484..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-swedish-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/swedish-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new SwedishStemmer); - } -} diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-turkish-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-turkish-search.jsx deleted file mode 100644 index 24d040584..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/web/oktavia-turkish-search.jsx +++ /dev/null @@ -1,10 +0,0 @@ -import "oktavia-search.jsx"; -import "stemmer/turkish-stemmer.jsx"; - -class _Main -{ - static function main(args : string[]) : void - { - OktaviaSearch.setStemmer(new TurkishStemmer); - } -} |