summaryrefslogtreecommitdiffstats
path: root/storage/mroonga/vendor/groonga/examples/dictionary/gene95
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:07:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:07:14 +0000
commita175314c3e5827eb193872241446f2f8f5c9d33c (patch)
treecd3d60ca99ae00829c52a6ca79150a5b6e62528b /storage/mroonga/vendor/groonga/examples/dictionary/gene95
parentInitial commit. (diff)
downloadmariadb-10.5-a175314c3e5827eb193872241446f2f8f5c9d33c.tar.xz
mariadb-10.5-a175314c3e5827eb193872241446f2f8f5c9d33c.zip
Adding upstream version 1:10.5.12.upstream/1%10.5.12upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/mroonga/vendor/groonga/examples/dictionary/gene95')
-rw-r--r--storage/mroonga/vendor/groonga/examples/dictionary/gene95/Makefile.am4
-rwxr-xr-xstorage/mroonga/vendor/groonga/examples/dictionary/gene95/gene-import.sh26
-rwxr-xr-xstorage/mroonga/vendor/groonga/examples/dictionary/gene95/gene2grn.rb33
3 files changed, 63 insertions, 0 deletions
diff --git a/storage/mroonga/vendor/groonga/examples/dictionary/gene95/Makefile.am b/storage/mroonga/vendor/groonga/examples/dictionary/gene95/Makefile.am
new file mode 100644
index 00000000..e89f13f5
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/examples/dictionary/gene95/Makefile.am
@@ -0,0 +1,4 @@
+gene95dir = $(examples_dictionarydir)/gene95
+dist_gene95_SCRIPTS = \
+ gene2grn.rb \
+ gene-import.sh
diff --git a/storage/mroonga/vendor/groonga/examples/dictionary/gene95/gene-import.sh b/storage/mroonga/vendor/groonga/examples/dictionary/gene95/gene-import.sh
new file mode 100755
index 00000000..488d6c83
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/examples/dictionary/gene95/gene-import.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+base_dir=$(dirname $0)
+
+if [ 1 != $# -a 2 != $# ]; then
+ echo "usage: $0 db_path [gene.txt_path]"
+ exit 1
+fi
+
+if [ -z $2 ]; then
+ dictionary_dir=gene95-dictionary
+ gene_txt=${dictionary_dir}/gene.txt
+ if [ ! -f $gene_txt ]; then
+ gene95_tar_gz=gene95.tar.gz
+ wget -O $gene95_tar_gz \
+ http://www.namazu.org/~tsuchiya/sdic/data/gene95.tar.gz
+ mkdir -p ${dictionary_dir}
+ tar xvzf ${gene95_tar_gz} -C ${dictionary_dir}
+ fi
+else
+ gene_txt=$2
+fi
+
+if cat $gene_txt | ${base_dir}/gene2grn.rb | groonga $1 > /dev/null; then
+ echo "gene95 data loaded."
+fi
diff --git a/storage/mroonga/vendor/groonga/examples/dictionary/gene95/gene2grn.rb b/storage/mroonga/vendor/groonga/examples/dictionary/gene95/gene2grn.rb
new file mode 100755
index 00000000..c9d9a593
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/examples/dictionary/gene95/gene2grn.rb
@@ -0,0 +1,33 @@
+#!/usr/bin/env ruby
+
+require "json"
+
+print(<<HEADER.chomp)
+column_create item_dictionary gene95_desc COLUMN_SCALAR ShortText
+column_create bigram item_dictionary_gene95_desc COLUMN_INDEX|WITH_POSITION item_dictionary gene95_desc
+load --table item_dictionary
+[
+["_key","gene95_desc"]
+HEADER
+
+loop do
+ raw_key = gets
+ break if raw_key.nil?
+ raw_body = gets
+
+ key = nil
+ body = nil
+ begin
+ key = raw_key.encode("UTF-8", "Windows-31J").strip
+ body = raw_body.encode("UTF-8", "Windows-31J").strip
+ rescue EncodingError
+ $stderr.puts("Ignore:")
+ $stderr.puts(" key: <#{raw_key}>")
+ $stderr.puts(" body: <#{raw_body}>")
+ next
+ end
+ puts(",")
+ print([key, body].to_json)
+end
+puts
+puts("]")