diff options
Diffstat (limited to '')
3 files changed, 65 insertions, 0 deletions
diff --git a/storage/mroonga/vendor/groonga/examples/dictionary/edict/Makefile.am b/storage/mroonga/vendor/groonga/examples/dictionary/edict/Makefile.am new file mode 100644 index 00000000..376f9d52 --- /dev/null +++ b/storage/mroonga/vendor/groonga/examples/dictionary/edict/Makefile.am @@ -0,0 +1,4 @@ +edictdir = $(examples_dictionarydir)/edict +dist_edict_SCRIPTS = \ + edict2grn.rb \ + edict-import.sh diff --git a/storage/mroonga/vendor/groonga/examples/dictionary/edict/edict-import.sh b/storage/mroonga/vendor/groonga/examples/dictionary/edict/edict-import.sh new file mode 100755 index 00000000..e48700af --- /dev/null +++ b/storage/mroonga/vendor/groonga/examples/dictionary/edict/edict-import.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +base_dir=$(dirname $0) + +if [ 1 != $# -a 2 != $# ]; then + echo "usage: $0 db_path [edict.gz_path]" + exit 1 +fi + +if [ -z $2 ]; then + edict_gz=edict.gz + if [ ! -f $edict_gz ]; then + wget -O $edict_gz http://ftp.monash.edu.au/pub/nihongo/edict.gz + fi +else + edict_gz=$2 +fi + +if type gzcat > /dev/null 2>&1; then + zcat="gzcat" +else + zcat="zcat" +fi + +if $zcat $edict_gz | ${base_dir}/edict2grn.rb | groonga $1 > /dev/null; then + echo "edict data loaded." +fi diff --git a/storage/mroonga/vendor/groonga/examples/dictionary/edict/edict2grn.rb b/storage/mroonga/vendor/groonga/examples/dictionary/edict/edict2grn.rb new file mode 100755 index 00000000..b795e25a --- /dev/null +++ b/storage/mroonga/vendor/groonga/examples/dictionary/edict/edict2grn.rb @@ -0,0 +1,34 @@ +#!/usr/bin/env ruby + +require "English" +require "nkf" +require "json" + +print(<<HEADER.chomp) +column_create item_dictionary edict_desc COLUMN_SCALAR ShortText +column_create bigram item_dictionary_edict_desc COLUMN_INDEX|WITH_POSITION item_dictionary edict_desc +load --table item_dictionary +[ +["_key","edict_desc","kana"] +HEADER + +loop do + raw_line = gets + break if raw_line.nil? + + line = raw_line.encode("UTF-8", "EUC-JP") + key, body = line.strip.split("/", 2) + key = key.strip + if /\s*\[(.+)\]\z/ =~ key + key = $PREMATCH + reading = $1 + body = "[#{reading}] #{body}" + kana = NKF.nkf("-Ww --katakana", reading) + else + kana = NKF.nkf("-Ww --katakana", key) + end + puts(",") + puts([key, body, kana].to_json) +end +puts +puts("]") |