1 files changed, 897 insertions, 0 deletions
diff --git a/storage/mroonga/vendor/groonga/lib/nfkc.rb b/storage/mroonga/vendor/groonga/lib/nfkc.rb
new file mode 100755
index 00000000..0c0e7fe7
--- /dev/null
+++ b/storage/mroonga/vendor/groonga/lib/nfkc.rb
@@ -0,0 +1,897 @@
+#!/usr/bin/env ruby
+# -*- coding: utf-8 -*-
+#
+# Copyright(C) 2010-2016 Brazil
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License version 2.1 as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
+
+CUSTOM_RULE_PATH = 'nfkc-custom-rules.txt'
+
+class SwitchGenerator
+  def initialize(unicode_version, output)
+    @unicode_version = unicode_version
+    @output = output
+  end
+
+  def generate(bc, decompose_map, compose_map)
+    STDERR.puts('generating char type code..')
+    generate_blockcode_char_type(bc)
+    STDERR.puts('generating decompose code..')
+    generate_decompose(decompose_map)
+    STDERR.puts('generating compose code..')
+    generate_compose(compose_map)
+  end
+
+  private
+  def generate_blockcode_char_type(bc)
+    @output.puts(<<-HEADER)
+
+grn_char_type
+grn_nfkc#{@unicode_version}_char_type(const unsigned char *str)
+{
+    HEADER
+
+    @lv = 0
+    gen_bc(bc, 0)
+
+    @output.puts(<<-FOOTER)
+  return -1;
+}
+    FOOTER
+  end
+
+  def gen_bc(hash, level)
+    bl = ' ' * (level * 2)
+    h2 = {}
+    hash.each{|key,val|
+      key = key.dup
+      key.force_encoding("ASCII-8BIT")
+      head = key.bytes[0]
+      rest = key[1..-1]
+      if h2[head]
+        h2[head][rest] = val
+      else
+        h2[head] = {rest => val}
+      end
+    }
+    if h2.size < 3
+      h2.keys.sort.each{|k|
+        if (0x80 < k)
+          @output.printf("#{bl}if (str[#{level}] < 0x%02X) { return #{@lv}; }\n", k)
+        end
+        h = h2[k]
+        if h.keys.join =~ /^\x80*$/n
+          @lv, = h.values
+        else
+          @output.printf("#{bl}if (str[#{level}] == 0x%02X) {\n", k)
+          gen_bc(h, level + 1)
+          @output.puts bl + '}'
+        end
+      }
+      @output.puts bl + "return #{@lv};"
+    else
+      @output.puts bl + "switch (str[#{level}]) {"
+      lk = 0x80
+      br = true
+      h2.keys.sort.each{|k|
+        if (lk < k)
+          for j in lk..k-1
+            @output.printf("#{bl}case 0x%02X :\n", j)
+          end
+          br = false
+        end
+        unless br
+          @output.puts bl + "  return #{@lv};"
+          @output.puts bl + '  break;'
+        end
+        h = h2[k]
+        @output.printf("#{bl}case 0x%02X :\n", k)
+        if h.keys.join =~ /^\x80*$/n
+          @lv, = h.values
+          br = false
+        else
+          gen_bc(h, level + 1)
+          @output.puts bl + '  break;'
+          br = true
+        end
+        lk = k + 1
+      }
+      @output.puts bl + 'default :'
+      @output.puts bl + "  return #{@lv};"
+      @output.puts bl + '  break;'
+      @output.puts bl + '}'
+    end
+  end
+
+  def generate_decompose(hash)
+    @output.puts(<<-HEADER)
+
+const char *
+grn_nfkc#{@unicode_version}_decompose(const unsigned char *str)
+{
+    HEADER
+
+    gen_decompose(hash, 0)
+
+    @output.puts(<<-FOOTER)
+  return 0;
+}
+    FOOTER
+  end
+
+  def gen_decompose(hash, level)
+    bl = ' ' * ((level + 0) * 2)
+    if hash['']
+      dst = ''
+      hash[''].each_byte{|b| dst << format('\x%02X', b)}
+      @output.puts "#{bl}return \"#{dst}\";"
+      hash.delete('')
+    end
+    return if hash.empty?
+    h2 = {}
+    hash.each{|key,val|
+      key = key.dup
+      key.force_encoding("ASCII-8BIT")
+      head = key.bytes[0]
+      rest = key[1..-1]
+      if h2[head]
+        h2[head][rest] = val
+      else
+        h2[head] = {rest => val}
+      end
+    }
+    if h2.size == 1
+      h2.each{|key,val|
+        @output.printf("#{bl}if (str[#{level}] == 0x%02X) {\n", key)
+        gen_decompose(val, level + 1)
+        @output.puts bl + '}'
+      }
+    else
+      @output.puts "#{bl}switch (str[#{level}]) {"
+      h2.keys.sort.each{|k|
+        @output.printf("#{bl}case 0x%02X :\n", k)
+        gen_decompose(h2[k], level + 1)
+        @output.puts("#{bl}  break;")
+      }
+      @output.puts bl + '}'
+    end
+  end
+
+  def generate_compose(compose_map)
+    @output.puts(<<-HEADER)
+
+const char *
+grn_nfkc#{@unicode_version}_compose(const unsigned char *prefix, const unsigned char *suffix)
+{
+    HEADER
+    suffix = {}
+    compose_map.each{|src,dst|
+      chars = src.chars
+      if chars.size != 2
+        STDERR.puts "caution: more than two chars in pattern #{chars.join('|')}"
+      end
+      s = chars.pop
+      if suffix[s]
+        suffix[s][chars.join] = dst
+      else
+        suffix[s] = {chars.join=>dst}
+      end
+    }
+    gen_compose_sub(suffix, 0)
+    @output.puts(<<-FOOTER)
+  return 0;
+}
+    FOOTER
+  end
+
+  def gen_compose_sub2(hash, level, indent)
+    bl = ' ' * ((level + indent + 0) * 2)
+    if hash['']
+      @output.print "#{bl}return \""
+      hash[''].each_byte{|b| @output.printf('\x%02X', b)}
+      @output.puts "\";"
+      hash.delete('')
+    end
+    return if hash.empty?
+
+    h2 = {}
+    hash.each{|key,val|
+      key = key.dup
+      key.force_encoding("ASCII-8BIT")
+      head = key.bytes[0]
+      rest = key[1..-1]
+      if h2[head]
+        h2[head][rest] = val
+      else
+        h2[head] = {rest => val}
+      end
+    }
+
+    if h2.size == 1
+      h2.each{|key,val|
+        @output.printf("#{bl}if (prefix[#{level}] == 0x%02X) {\n", key)
+        gen_compose_sub2(val, level + 1, indent)
+        @output.puts bl + '}'
+      }
+    else
+      @output.puts "#{bl}switch (prefix[#{level}]) {"
+      h2.keys.sort.each{|k|
+        @output.printf("#{bl}case 0x%02X :\n", k)
+        gen_compose_sub2(h2[k], level + 1, indent)
+        @output.puts("#{bl}  break;")
+      }
+      @output.puts bl + '}'
+    end
+  end
+
+  def gen_compose_sub(hash, level)
+    bl = ' ' * ((level + 0) * 2)
+    if hash['']
+      gen_compose_sub2(hash[''], 0, level)
+      hash.delete('')
+    end
+    return if hash.empty?
+    h2 = {}
+    hash.each{|key,val|
+      key = key.dup
+      key.force_encoding("ASCII-8BIT")
+      head = key.bytes[0]
+      rest = key[1..-1]
+      if h2[head]
+        h2[head][rest] = val
+      else
+        h2[head] = {rest => val}
+      end
+    }
+    if h2.size == 1
+      h2.each{|key,val|
+        @output.printf("#{bl}if (suffix[#{level}] == 0x%02X) {\n", key)
+        gen_compose_sub(val, level + 1)
+        @output.puts bl + '}'
+      }
+    else
+      @output.puts "#{bl}switch (suffix[#{level}]) {"
+      h2.keys.sort.each{|k|
+        @output.printf("#{bl}case 0x%02X :\n", k)
+        gen_compose_sub(h2[k], level + 1)
+        @output.puts("#{bl}  break;")
+      }
+      @output.puts bl + '}'
+    end
+  end
+end
+
+class TableGenerator < SwitchGenerator
+  private
+  def name_prefix
+    "grn_nfkc#{@unicode_version}_"
+  end
+
+  def table_name(type, common_bytes)
+    suffix = common_bytes.collect {|byte| "%02x" % byte}.join("")
+    "#{name_prefix}#{type}_table_#{suffix}"
+  end
+
+  def function_name(type)
+    "#{name_prefix}#{type}"
+  end
+
+  def generate_char_convert_tables(type, return_type, byte_size_groups)
+    if return_type.end_with?("*")
+      space = ""
+    else
+      space = " "
+    end
+    byte_size_groups.keys.sort.each do |common_bytes|
+      chars = byte_size_groups[common_bytes]
+      lines = []
+      all_values = []
+      last_bytes = chars.collect {|char| char.bytes.last}
+      last_bytes.min.step(last_bytes.max).each_slice(8) do |slice|
+        values = slice.collect do |last_byte|
+          char = (common_bytes + [last_byte]).pack("c*")
+          char.force_encoding("UTF-8")
+          yield(char)
+        end
+        all_values.concat(values)
+        lines << ("  " + values.join(", "))
+      end
+
+      next if all_values.uniq.size == 1
+
+      @output.puts(<<-TABLE_HEADER)
+
+static #{return_type}#{space}#{table_name(type, common_bytes)}[] = {
+      TABLE_HEADER
+      @output.puts(lines.join(",\n"))
+      @output.puts(<<-TABLE_FOOTER)
+};
+      TABLE_FOOTER
+    end
+  end
+
+  def generate_char_convert_function(type,
+                                     argument_list,
+                                     char_variable,
+                                     default,
+                                     return_type,
+                                     byte_size_groups,
+                                     options={})
+    modifier = options[:internal] ? "static inline " : ""
+    @output.puts(<<-HEADER)
+
+#{modifier}#{return_type}
+#{function_name(type)}(#{argument_list})
+{
+    HEADER
+
+    prev_common_bytes = []
+    prev_n_common_bytes = 0
+    first_group = true
+    byte_size_groups.keys.sort.each do |common_bytes|
+      chars = byte_size_groups[common_bytes]
+      chars_bytes = chars.collect(&:bytes).sort
+      min = chars_bytes.first.last
+      max = chars_bytes.last.last
+      n_common_bytes = 0
+      if common_bytes.empty?
+        indent = "  "
+        yield(:no_common_bytes, indent, chars, chars_bytes)
+      else
+        if first_group
+          @output.puts(<<-BODY)
+  {
+          BODY
+        end
+
+        found_different_byte = false
+        common_bytes.each_with_index do |common_byte, i|
+          unless found_different_byte
+            if prev_common_bytes[i] == common_byte
+              n_common_bytes += 1
+              next
+            end
+            found_different_byte = true
+          end
+          indent = "  " * i
+          # p [i, prev_common_bytes.collect{|x| "%#04x" % x}, common_bytes.collect{|x| "%#04x" % x}, "%#04x" % common_byte, n_common_bytes, prev_n_common_bytes]
+          # TODO: The following code may be able to be simplified.
+          if prev_common_bytes[i].nil?
+            # p nil
+            @output.puts(<<-BODY)
+    #{indent}switch (#{char_variable}[#{i}]) {
+            BODY
+          elsif i < prev_n_common_bytes
+            # p :prev
+            @output.puts(<<-BODY)
+    #{indent}  default :
+    #{indent}    break;
+    #{indent}  }
+    #{indent}  break;
+            BODY
+          elsif n_common_bytes < prev_n_common_bytes
+            # p :common_prev
+            @output.puts(<<-BODY)
+    #{indent}switch (#{char_variable}[#{i}]) {
+            BODY
+          else
+            # p :else
+            prev_common_bytes.size.downto(common_bytes.size + 1) do |j|
+              sub_indent = "  " * (j - 1)
+              @output.puts(<<-BODY)
+    #{indent}#{sub_indent}default :
+    #{indent}#{sub_indent}  break;
+    #{indent}#{sub_indent}}
+    #{indent}#{sub_indent}break;
+              BODY
+            end
+          end
+          @output.puts(<<-BODY)
+    #{indent}case #{"%#04x" % common_byte} :
+          BODY
+        end
+
+        n = chars_bytes.first.size - 1
+        indent = "    " + ("  " * common_bytes.size)
+        yield(:have_common_bytes, indent, chars, chars_bytes, n, common_bytes)
+      end
+
+      prev_common_bytes = common_bytes
+      prev_n_common_bytes = n_common_bytes
+      first_group = false
+    end
+
+    # p [prev_common_bytes.collect{|x| "%#04x" % x}, prev_n_common_bytes]
+
+    (prev_common_bytes.size - 1).step(0, -1) do |i|
+      indent = "  " * i
+      @output.puts(<<-BODY)
+    #{indent}default :
+    #{indent}  break;
+    #{indent}}
+      BODY
+      if i > 0
+        @output.puts(<<-BODY)
+    #{indent}break;
+        BODY
+      end
+    end
+
+    @output.puts(<<-FOOTER)
+  }
+
+  return #{default};
+}
+    FOOTER
+  end
+
+  def generate_char_converter(type,
+                              function_type,
+                              char_map,
+                              default,
+                              return_type,
+                              options={},
+                              &converter)
+    byte_size_groups = char_map.keys.group_by do |from|
+      bytes = from.bytes
+      bytes[0..-2]
+    end
+
+    generate_char_convert_tables(type,
+                                 return_type,
+                                 byte_size_groups,
+                                 &converter)
+
+    char_variable = "utf8"
+    generate_char_convert_function(function_type,
+                                   "const unsigned char *#{char_variable}",
+                                   char_variable,
+                                   default,
+                                   return_type,
+                                   byte_size_groups,
+                                   options) do |state, *args|
+      case state
+      when :no_common_bytes
+        indent, chars, chars_bytes = args
+        if chars.size == 1
+          char = chars[0]
+          char_byte = chars_bytes.first.first
+          value = yield(char)
+          @output.puts(<<-BODY)
+#{indent}if (#{char_variable}[0] < 0x80) {
+#{indent}  if (#{char_variable}[0] == #{"%#04x" % char_byte}) {
+#{indent}    return #{value};
+#{indent}  } else {
+#{indent}    return #{default};
+#{indent}  }
+#{indent}} else {
+          BODY
+        else
+          min = chars_bytes.first.first
+          max = chars_bytes.last.first
+          @output.puts(<<-BODY)
+#{indent}if (#{char_variable}[0] < 0x80) {
+#{indent}  if (#{char_variable}[0] >= #{"%#04x" % min} &&
+#{indent}      #{char_variable}[0] <= #{"%#04x" % max}) {
+#{indent}    return #{table_name(type, [])}[#{char_variable}[0] - #{"%#04x" % min}];
+#{indent}  } else {
+#{indent}    return #{default};
+#{indent}  }
+#{indent}} else {
+          BODY
+        end
+      when :have_common_bytes
+        indent, chars, chars_bytes, n, common_bytes = args
+        if chars.size == 1
+          char = chars[0]
+          char_byte = chars_bytes.first.last
+          value = yield(char)
+          @output.puts(<<-BODY)
+#{indent}if (#{char_variable}[#{n}] == #{"%#04x" % char_byte}) {
+#{indent}  return #{value};
+#{indent}}
+#{indent}break;
+          BODY
+        else
+          sorted_chars = chars.sort
+          min = chars_bytes.first.last
+          max = chars_bytes.last.last
+          all_values = (min..max).collect do |last_byte|
+            char = (common_bytes + [last_byte]).pack("c*")
+            char.force_encoding("UTF-8")
+            yield(char)
+          end
+          if all_values.uniq.size == 1
+            value = all_values.first
+          else
+            value = "#{table_name(type, common_bytes)}[#{char_variable}[#{n}] - #{"%#04x" % min}]"
+          end
+          last_n_bits_for_char_in_utf8 = 6
+          max_n_chars_in_byte = 2 ** last_n_bits_for_char_in_utf8
+          if all_values.size == max_n_chars_in_byte
+            @output.puts(<<-BODY)
+#{indent}return #{value};
+            BODY
+          else
+            @output.puts(<<-BODY)
+#{indent}if (#{char_variable}[#{n}] >= #{"%#04x" % min} &&
+#{indent}    #{char_variable}[#{n}] <= #{"%#04x" % max}) {
+#{indent}  return #{value};
+#{indent}}
+#{indent}break;
+            BODY
+          end
+        end
+      end
+    end
+  end
+
+  def generate_blockcode_char_type(block_codes)
+    default = "GRN_CHAR_OTHERS"
+
+    char_types = {}
+    current_type = default
+    prev_char = nil
+    block_codes.keys.sort.each do |char|
+      type = block_codes[char]
+      if current_type != default
+        prev_code_point = prev_char.codepoints[0]
+        code_point = char.codepoints[0]
+        (prev_code_point...code_point).each do |target_code_point|
+          target_char = [target_code_point].pack("U*")
+          char_types[target_char] = current_type
+        end
+      end
+      current_type = type
+      prev_char = char
+    end
+    unless current_type == default
+      raise "TODO: Consider the max unicode character"
+      max_unicode_char = "\u{10ffff}"
+      (prev_char..max_unicode_char).each do |target_char|
+        char_types[target_char] = current_type
+      end
+    end
+
+    generate_char_converter("char_type",
+                            "char_type",
+                            char_types,
+                            default,
+                            "grn_char_type") do |char|
+      char_types[char] || default
+    end
+  end
+
+  def generate_decompose(decompose_map)
+    default = "NULL"
+    generate_char_converter("decompose",
+                            "decompose",
+                            decompose_map,
+                            default,
+                            "const char *") do |from|
+      to = decompose_map[from]
+      if to
+        escaped_value = to.bytes.collect {|char| "\\x%02x" % char}.join("")
+        "\"#{escaped_value}\""
+      else
+        default
+      end
+    end
+  end
+
+  def generate_compose(compose_map)
+    # require "pp"
+    # p compose_map.size
+    # pp compose_map.keys.group_by {|x| x.chars[1]}.size
+    # pp compose_map.keys.group_by {|x| x.chars[1]}.collect {|k, vs| [k, k.codepoints, vs.size, vs.group_by {|x| x.chars[0].bytesize}.collect {|k2, vs2| [k2, vs2.size]}]}
+    # pp compose_map.keys.group_by {|x| x.chars[0].bytesize}.collect {|k, vs| [k, vs.size]}
+    # pp compose_map
+
+    suffix_char_map = {}
+    compose_map.each do |source, destination|
+      chars = source.chars
+      if chars.size != 2
+        STDERR.puts "caution: more than two chars in pattern #{chars.join('|')}"
+        return
+      end
+      prefix, suffix = chars
+      suffix_char_map[suffix] ||= {}
+      suffix_char_map[suffix][prefix] = destination
+    end
+
+    suffix_char_map.each do |suffix, prefix_char_map|
+      suffix_bytes = suffix.bytes.collect {|byte| "%02x" % byte}.join("")
+      default = "NULL"
+      generate_char_converter("compose_prefix_#{suffix_bytes}",
+                              "compose_prefix_#{suffix_bytes}",
+                              prefix_char_map,
+                              default,
+                              "const char *",
+                              :internal => true) do |prefix|
+        to = prefix_char_map[prefix]
+        if to
+          escaped_value = to.bytes.collect {|char| "\\x%02x" % char}.join("")
+          "\"#{escaped_value}\""
+        else
+          default
+        end
+      end
+    end
+
+
+    char_variable = "suffix_utf8"
+    argument_list =
+      "const unsigned char *prefix_utf8, " +
+      "const unsigned char *#{char_variable}"
+    default = "NULL"
+    byte_size_groups = suffix_char_map.keys.group_by do |from|
+      bytes = from.bytes
+      bytes[0..-2]
+    end
+    generate_char_convert_function("compose",
+                                   argument_list,
+                                   char_variable,
+                                   default,
+                                   "const char *",
+                                   byte_size_groups) do |type, *args|
+      case type
+      when :no_common_bytes
+        indent, chars, chars_bytes = args
+        @output.puts(<<-BODY)
+#{indent}switch (#{char_variable}[0]) {
+        BODY
+        chars.each do |char|
+          suffix_bytes = char.bytes.collect {|byte| "%02x" % byte}.join("")
+          type = "compose_prefix_#{suffix_bytes}"
+          @output.puts(<<-BODY)
+#{indent}case #{"%#04x" % char.bytes.last} :
+#{indent}  return #{function_name(type)}(prefix_utf8);
+          BODY
+        end
+        @output.puts(<<-BODY)
+#{indent}default :
+#{indent}  return #{default};
+#{indent}}
+#{indent}break;
+        BODY
+      when :have_common_bytes
+        indent, chars, chars_bytes, n, common_bytes = args
+        @output.puts(<<-BODY)
+#{indent}switch (#{char_variable}[#{n}]) {
+        BODY
+        chars.each do |char|
+          suffix_bytes = char.bytes.collect {|byte| "%02x" % byte}.join("")
+          type = "compose_prefix_#{suffix_bytes}"
+          @output.puts(<<-BODY)
+#{indent}case #{"%#04x" % char.bytes.last} :
+#{indent}  return #{function_name(type)}(prefix_utf8);
+          BODY
+        end
+        @output.puts(<<-BODY)
+#{indent}default :
+#{indent}  return #{default};
+#{indent}}
+#{indent}break;
+        BODY
+      end
+    end
+  end
+
+  def to_bytes_map(char_map)
+    bytes_map = {}
+    char_map.each_key do |from|
+      parent = bytes_map
+      from.bytes[0..-2].each do |byte|
+        parent[byte] ||= {}
+        parent = parent[byte]
+      end
+      parent[from.bytes.last] = char_map[from]
+    end
+    bytes_map
+  end
+end
+
+def create_bc(option)
+  bc = {}
+  open("|./icudump --#{option}").each{|l|
+    src,_,code = l.chomp.split("\t")
+    str = src.split(':').collect(&:hex).pack("c*")
+    str.force_encoding("UTF-8")
+    bc[str] = code
+  }
+  bc
+end
+
+def ccpush(hash, src, dst)
+  head = src.shift
+  hash[head] = {} unless hash[head]
+  if head
+    ccpush(hash[head], src, dst)
+  else
+    hash[head] = dst
+  end
+end
+
+def subst(hash, str)
+  cand = nil
+  src = str.chars
+  for i in 0..src.size-1
+    h = hash
+    for j in i..src.size-1
+      head = src[j]
+      h = h[head]
+      break unless h
+      if h[nil]
+        cand = src[0,i].join("") + h[nil] + src[j + 1..-1].join("")
+      end
+    end
+    return cand if cand
+  end
+  return str
+end
+
+def map_entry(decompose, cc, src, dst)
+  dst.downcase! unless $case_sensitive
+  loop {
+    dst2 = subst(cc, dst)
+    break if dst2 == dst
+    dst = dst2
+  }
+  unless $keep_space
+    dst = $1 if dst =~ /^ +([^ ].*)$/
+  end
+  decompose[src] = dst if src != dst
+end
+
+def create_decompose_map()
+  cc = {}
+  open('|./icudump --cc').each{|l|
+    _,src,dst = l.chomp.split("\t")
+    if cc[src]
+      STDERR.puts "caution: ambiguous mapping #{src}|#{cc[src]}|#{dst}" if cc[src] != dst
+    end
+    ccpush(cc, src.chars, dst)
+  }
+  decompose_map = {}
+  open('|./icudump --nfkd').each{|l|
+    n,src,dst = l.chomp.split("\t")
+    map_entry(decompose_map, cc, src, dst)
+  }
+  if File.readable?(CUSTOM_RULE_PATH)
+    open(CUSTOM_RULE_PATH).each{|l|
+      src,dst = l.chomp.split("\t")
+      map_entry(decompose_map, cc, src, dst)
+    }
+  end
+  unless $case_sensitive
+    for c in 'A'..'Z'
+      decompose_map[c] = c.downcase
+    end
+  end
+  return decompose_map
+end
+
+def create_compose_map(decompose_map)
+  cc = {}
+  open('|./icudump --cc').each{|l|
+    _,src,dst = l.chomp.split("\t")
+    src = src.chars.collect{|c| decompose_map[c] || c}.join
+    dst = decompose_map[dst] || dst
+    if cc[src] && cc[src] != dst
+      STDERR.puts("caution: inconsitent mapping '#{src}' => '#{cc[src]}'|'#{dst}'")
+    end
+    cc[src] = dst if src != dst
+  }
+  loop {
+    noccur = 0
+    cc2 = {}
+    cc.each {|src,dst|
+      src2 = src
+      chars = src.chars
+      l = chars.size - 1
+      for i in 0..l
+        for j in i..l
+          next if i == 0 && j == l
+          str = chars[i..j].join
+          if decompose_map[str]
+            STDERR.printf("caution: recursive mapping '%s'=>'%s'\n",
+                          str, decompose_map[str])
+          end
+          if cc[str]
+            src2 = (i > 0 ? chars[0..i-1].join : '') + cc[str] + (j < l ? chars[j+1..l].join : '')
+            noccur += 1
+          end
+        end
+      end
+      cc2[src2] = dst if src2 != dst
+    }
+    cc = cc2
+    STDERR.puts("substituted #{noccur} patterns.")
+    break if noccur == 0
+    STDERR.puts('try again..')
+  }
+  return cc
+end
+
+######## main #######
+
+generator_class = SwitchGenerator
+ARGV.each{|arg|
+  case arg
+  when /-*c/i
+    $case_sensitive = true
+  when /-*s/i
+    $keep_space = true
+  when "--impl=switch"
+    generator_class = SwitchGenerator
+  when "--impl=table"
+    generator_class = TableGenerator
+  end
+}
+
+STDERR.puts('compiling icudump')
+system('cc -Wall -O3 -o icudump -I/tmp/local/include -L/tmp/local/lib icudump.c -licuuc -licui18n')
+
+STDERR.puts('getting Unicode version')
+unicode_version = `./icudump --version`.strip.gsub(".", "")
+
+STDERR.puts('creating bc..')
+bc = create_bc("gc")
+
+STDERR.puts('creating decompose map..')
+decompose_map = create_decompose_map()
+
+STDERR.puts('creating compose map..')
+compose_map = create_compose_map(decompose_map)
+
+File.open("nfkc#{unicode_version}.c", "w") do |output|
+  output.puts(<<-HEADER)
+/* -*- c-basic-offset: 2 -*- */
+/*
+  Copyright(C) 2010-2016 Brazil
+
+  This library is free software; you can redistribute it and/or
+  modify it under the terms of the GNU Lesser General Public
+  License version 2.1 as published by the Free Software Foundation.
+
+  This library is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with this library; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA
+*/
+
+/*
+  Don't edit this file by hand. it generated automatically by nfkc.rb.
+*/
+
+#include "grn.h"
+#include "grn_nfkc.h"
+#include <groonga/nfkc.h>
+
+#ifdef GRN_WITH_NFKC
+  HEADER
+
+  generator = generator_class.new(unicode_version, output)
+  generator.generate(bc, decompose_map, compose_map)
+
+  output.puts(<<-FOOTER)
+
+#endif /* GRN_WITH_NFKC */
+
+  FOOTER
+end