summaryrefslogtreecommitdiffstats
path: root/storage/mroonga/vendor/groonga/examples/dictionary/edict
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:07:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 18:07:14 +0000
commita175314c3e5827eb193872241446f2f8f5c9d33c (patch)
treecd3d60ca99ae00829c52a6ca79150a5b6e62528b /storage/mroonga/vendor/groonga/examples/dictionary/edict
parentInitial commit. (diff)
downloadmariadb-10.5-a175314c3e5827eb193872241446f2f8f5c9d33c.tar.xz
mariadb-10.5-a175314c3e5827eb193872241446f2f8f5c9d33c.zip
Adding upstream version 1:10.5.12.upstream/1%10.5.12upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'storage/mroonga/vendor/groonga/examples/dictionary/edict')
-rw-r--r--storage/mroonga/vendor/groonga/examples/dictionary/edict/Makefile.am4
-rwxr-xr-xstorage/mroonga/vendor/groonga/examples/dictionary/edict/edict-import.sh27
-rwxr-xr-xstorage/mroonga/vendor/groonga/examples/dictionary/edict/edict2grn.rb34
3 files changed, 65 insertions, 0 deletions
diff --git a/storage/mroonga/vendor/groonga/examples/dictionary/edict/Makefile.am b/storage/mroonga/vendor/groonga/examples/dictionary/edict/Makefile.am
new file mode 100644
index 00000000..376f9d52
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/examples/dictionary/edict/Makefile.am
@@ -0,0 +1,4 @@
+edictdir = $(examples_dictionarydir)/edict
+dist_edict_SCRIPTS = \
+ edict2grn.rb \
+ edict-import.sh
diff --git a/storage/mroonga/vendor/groonga/examples/dictionary/edict/edict-import.sh b/storage/mroonga/vendor/groonga/examples/dictionary/edict/edict-import.sh
new file mode 100755
index 00000000..e48700af
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/examples/dictionary/edict/edict-import.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+base_dir=$(dirname $0)
+
+if [ 1 != $# -a 2 != $# ]; then
+ echo "usage: $0 db_path [edict.gz_path]"
+ exit 1
+fi
+
+if [ -z $2 ]; then
+ edict_gz=edict.gz
+ if [ ! -f $edict_gz ]; then
+ wget -O $edict_gz http://ftp.monash.edu.au/pub/nihongo/edict.gz
+ fi
+else
+ edict_gz=$2
+fi
+
+if type gzcat > /dev/null 2>&1; then
+ zcat="gzcat"
+else
+ zcat="zcat"
+fi
+
+if $zcat $edict_gz | ${base_dir}/edict2grn.rb | groonga $1 > /dev/null; then
+ echo "edict data loaded."
+fi
diff --git a/storage/mroonga/vendor/groonga/examples/dictionary/edict/edict2grn.rb b/storage/mroonga/vendor/groonga/examples/dictionary/edict/edict2grn.rb
new file mode 100755
index 00000000..b795e25a
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/examples/dictionary/edict/edict2grn.rb
@@ -0,0 +1,34 @@
+#!/usr/bin/env ruby
+
+require "English"
+require "nkf"
+require "json"
+
+print(<<HEADER.chomp)
+column_create item_dictionary edict_desc COLUMN_SCALAR ShortText
+column_create bigram item_dictionary_edict_desc COLUMN_INDEX|WITH_POSITION item_dictionary edict_desc
+load --table item_dictionary
+[
+["_key","edict_desc","kana"]
+HEADER
+
+loop do
+ raw_line = gets
+ break if raw_line.nil?
+
+ line = raw_line.encode("UTF-8", "EUC-JP")
+ key, body = line.strip.split("/", 2)
+ key = key.strip
+ if /\s*\[(.+)\]\z/ =~ key
+ key = $PREMATCH
+ reading = $1
+ body = "[#{reading}] #{body}"
+ kana = NKF.nkf("-Ww --katakana", reading)
+ else
+ kana = NKF.nkf("-Ww --katakana", key)
+ end
+ puts(",")
+ puts([key, body, kana].to_json)
+end
+puts
+puts("]")