diff options
Diffstat (limited to 'web/server/h2o/libh2o/misc/oktavia/tool/oktavia-search.jsx')
-rw-r--r-- | web/server/h2o/libh2o/misc/oktavia/tool/oktavia-search.jsx | 370 |
1 files changed, 0 insertions, 370 deletions
diff --git a/web/server/h2o/libh2o/misc/oktavia/tool/oktavia-search.jsx b/web/server/h2o/libh2o/misc/oktavia/tool/oktavia-search.jsx deleted file mode 100644 index 719c71b86..000000000 --- a/web/server/h2o/libh2o/misc/oktavia/tool/oktavia-search.jsx +++ /dev/null @@ -1,370 +0,0 @@ -import "console.jsx"; -import "js/nodejs.jsx"; - -import "oktavia.jsx"; -import "getopt.jsx"; -import "query-parser.jsx"; -import "search-result.jsx"; -import "style.jsx"; -import "binary-util.jsx"; - -import "stemmer/stemmer.jsx"; -import "stemmer/danish-stemmer.jsx"; -import "stemmer/dutch-stemmer.jsx"; -import "stemmer/english-stemmer.jsx"; -import "stemmer/finnish-stemmer.jsx"; -import "stemmer/french-stemmer.jsx"; -import "stemmer/german-stemmer.jsx"; -import "stemmer/hungarian-stemmer.jsx"; -import "stemmer/italian-stemmer.jsx"; -import "stemmer/norwegian-stemmer.jsx"; -import "stemmer/porter-stemmer.jsx"; -import "stemmer/portuguese-stemmer.jsx"; -import "stemmer/romanian-stemmer.jsx"; -import "stemmer/russian-stemmer.jsx"; -import "stemmer/spanish-stemmer.jsx"; -import "stemmer/swedish-stemmer.jsx"; -import "stemmer/turkish-stemmer.jsx"; - - -class Search -{ - var style : Style; - - function search (indexFile : string, queryStrings : string[], num : int, style : Style, algorithm : Nullable.<string>) : void - { - this.style = style; - var oktavia = new Oktavia(); - if (algorithm != null) - { - oktavia.setStemmer(this.createStemmer(algorithm)); - } - if (!this.loadIndex(oktavia, indexFile)) - { - return; - } - console.time('searching'); - var queryParser = new QueryParser(); - queryParser.parse(queryStrings); - var summary = oktavia.search(queryParser.queries); - console.timeEnd('searching'); - if (summary.size() == 0) - { - this.notFound(summary, queryStrings); - } - else - { - this.showResult(oktavia, summary, num); - } - } - - function loadIndex (oktavia : Oktavia, filepath : string) : boolean - { - var ext = node.path.extname(filepath); - var content : string; - var result = true; - switch (ext) - { - case ".okt": - content = node.fs.readFileSync(filepath, "utf16le"); - oktavia.load(content); - break; - case ".b64": - content = node.fs.readFileSync(filepath, "utf8"); - oktavia.load(Binary.base64decode(content)); - break; - case ".js": - content = node.fs.readFileSync(filepath, "utf8"); - var index = content.indexOf('"'); - var lastIndex = content.lastIndexOf('"'); - oktavia.load(Binary.base64decode(content.slice(index, lastIndex))); - break; - default: - console.log("unknown file extension: " + ext); - result = false; - break; - } - return result; - } - - function sortResult (oktavia : Oktavia, summary : SearchSummary) : SearchUnit[] - { - for (var i = 0; i < summary.result.units.length; i++) - { - var score = 0; - var unit = summary.result.units[i]; - for (var pos in unit.positions) - { - var position = unit.positions[pos]; - if (oktavia.wordPositionType(position.position)) - { - score += 10; - } - else - { - score += 1; - } - if (!position.stemmed) - { - score += 2; - } - } - unit.score = score; - } - return summary.getSortedResult(); - } - - function showResult (oktavia : Oktavia, summary : SearchSummary, num : int) : void - { - var results = this.sortResult(oktavia, summary); - var style = this.style; - var metadata = oktavia.getPrimaryMetadata(); - for (var i = 0; i < results.length; i++) - { - var unit = results[i]; - var info = metadata.getInformation(unit.id).split(Oktavia.eob); - /*console.log(info.replace(Oktavia.eob, ' -- ') + '\n'); - + ' ----------------------------------------------- ' - + unit.score as string + ' pt');*/ - console.log(style.convert('<title>' + info[0] + '</title>') + ' ' + style.convert('<url>' + info[1] + '</url>')); - var offset = info[0].length + 1; - var content = metadata.getContent(unit.id); - var start = 0; - var positions = unit.getPositions(); - if (content.indexOf(info[0]) == 1) - { - content = content.slice(info[0].length + 2, content.length); - start += (info[0].length + 2); - } - var end = start + num; - var split = false; - if (positions[0].position > end - positions[0].word.length) - { - end = positions[0].position + Math.floor(num / 2); - split = true; - } - for (var j = positions.length - 1; j > -1; j--) - { - var pos = positions[j]; - if (pos.position + pos.word.length < end) - { - /*log('--------------begin : ' + (pos.position - start) as string); - log(content.slice(0, pos.position - start)); - log('--------------match : ' + pos.word.length as string); - .log(content.slice(pos.position - start, pos.position + pos.word.length - start)); - log('--------------match : ' + (content.length - pos.position + pos.word.length - start) as string); - log(content.slice(pos.position + pos.word.length - start, content.length)); - log('--------------end');*/ - content = [ - content.slice(0, pos.position - start), - style.convert('<hit>*</hit>').replace('*', content.slice(pos.position - start, pos.position + pos.word.length - start)), - content.slice(pos.position + pos.word.length - start, content.length) - ].join(''); - } - } - var text : string; - if (split) - { - text = [ - content.slice(0, Math.floor(num / 2)) + ' ...', - content.slice(-Math.floor(num / 2), end - start)].join('\n'); - } - else - { - text = content.slice(0, end - start) + ' ...\n'; - } - text = text.replace(Oktavia.eob, ' ').replace(/\n\n+/, '\n\n'); - console.log(text); - } - console.log(style.convert('<summary>' + (summary.size() as string) + " results.</summary>\n")); - } - - function notFound (summary : SearchSummary, query : string[]) : void - { - var style = this.style; - if (query.length > 1) - { - console.log("Suggestions:"); - var proposals = summary.getProposal(); - for (var i = 0; i < proposals.length; i++) - { - var proposal = proposals[i]; - var querywords = [] : string[]; - for (var j = 0; j < query.length; j++) - { - if (j != proposal.omit) - { - querywords.push(style.convert('<hit>' + query[j] + '</hit>')); - } - else - { - //querywords.push(style.convert('<del>' + query[j] + '</del>')); - } - } - console.log("* Expected result: " + querywords.join(" ") + " - " + (proposal.expect as string) + " hit"); - } - } - else - { - console.log(style.convert("Your search - <hit>" + query[0] + "</hit> - didn't match any documents.")); - } - } - - function createStemmer (algorithm : string) : Stemmer - { - var stemmer : Stemmer; - switch (algorithm.toLowerCase()) - { - case "danish": - stemmer = new DanishStemmer(); - break; - case "dutch": - stemmer = new DutchStemmer(); - break; - case "english": - stemmer = new EnglishStemmer(); - break; - case "finnish": - stemmer = new FinnishStemmer(); - break; - case "french": - stemmer = new FrenchStemmer(); - break; - case "german": - stemmer = new GermanStemmer(); - break; - case "hungarian": - stemmer = new HungarianStemmer(); - break; - case "italian": - stemmer = new ItalianStemmer(); - break; - case "norwegian": - stemmer = new NorwegianStemmer(); - break; - case "porter": - stemmer = new PorterStemmer(); - break; - case "portuguese": - stemmer = new PortugueseStemmer(); - break; - case "romanian": - stemmer = new RomanianStemmer(); - break; - case "russian": - stemmer = new RussianStemmer(); - break; - case "spanish": - stemmer = new SpanishStemmer(); - break; - case "swedish": - stemmer = new SwedishStemmer(); - break; - case "turkish": - stemmer = new TurkishStemmer(); - break; - default: - stemmer = new EnglishStemmer(); - break; - } - return stemmer; - } -} - -class _Main { - static function usage () : void - { - console.log([ - "usage: oktavia_search index_file [options] query...", - "", - "Options:", - " -m, --mono : Don't use color.", - " -s, --stemmer [algorithm] : Select stemming algorithm.", - " -n, --number [char number] : Result display number. Default value = 250", - " -h, --help : Display this message.", - "", - "Search Query Syntax:", - " word1 word2 : All words.", - ' "word1 word2" : Exact words or phrase.', - " word1 OR word2 : Any of these words.", - " word1 -word2 : None of these words." - ].join('\n')); - } - - static function main(args : string[]) : void - { - console.log("Search Engine Oktavia - Command-line Search Client\n"); - - var indexFile : Nullable.<string> = null; - var showhelp = false; - var notrun = false; - var styleType = 'console'; - var num : int = 250; - var queryStrings = [] : string[]; - var algorithm : Nullable.<string> = null; - - var validStemmers = [ - 'danish', 'dutch', 'english', 'finnish', 'french', 'german', 'hungarian', - 'italian', 'norwegian', 'porter', 'portuguese', 'romanian', 'russian', - 'spanish', 'swedish', 'turkish' - ]; - - if (args.length == 0) - { - showhelp = true; - } - else if (!node.fs.existsSync(args[0])) - { - console.error("Index file '" + args[0] + "' doesn't exist."); - notrun = true; - } - else - { - indexFile = args[0]; - } - - var optstring = "m(mono)s:(stemmer)n:(number)h(help)"; - var parser = new BasicParser(optstring, args.slice(1)); - var opt = parser.getopt(); - while (opt) - { - switch (opt.option) - { - case "s": - if (validStemmers.indexOf(opt.optarg) == -1) - { - console.error('Option s/stemmer is invalid.'); - notrun = true; - } - else - { - algorithm = opt.optarg; - } - break; - case "m": - styleType = 'ignore'; - break; - case "n": - num = opt.optarg as int; - break; - case "h": - showhelp = true; - break; - default: - queryStrings.push(opt.option); - break; - } - opt = parser.getopt(); - } - if (showhelp || queryStrings.length == 0) - { - _Main.usage(); - } - else if (!notrun) - { - var style = new Style(styleType); - var search = new Search(); - search.search(indexFile, queryStrings, num, style, algorithm); - } - } -} |