summaryrefslogtreecommitdiffstats
path: root/storage/mroonga/lib/mrn_encoding.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'storage/mroonga/lib/mrn_encoding.cpp')
-rw-r--r--storage/mroonga/lib/mrn_encoding.cpp242
1 files changed, 242 insertions, 0 deletions
diff --git a/storage/mroonga/lib/mrn_encoding.cpp b/storage/mroonga/lib/mrn_encoding.cpp
new file mode 100644
index 00000000..369c985a
--- /dev/null
+++ b/storage/mroonga/lib/mrn_encoding.cpp
@@ -0,0 +1,242 @@
+/* -*- c-basic-offset: 2 -*- */
+/*
+ Copyright(C) 2013 Kouhei Sutou <kou@clear-code.com>
+ Copyright(C) 2011-2013 Kentoku SHIBA
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
+*/
+
+#include <mrn_err.h>
+#include "mrn_encoding.hpp"
+
+namespace mrn {
+ namespace encoding {
+ CHARSET_INFO *mrn_charset_utf8 = NULL;
+ CHARSET_INFO *mrn_charset_utf8mb4 = NULL;
+ CHARSET_INFO *mrn_charset_binary = NULL;
+ CHARSET_INFO *mrn_charset_ascii = NULL;
+ CHARSET_INFO *mrn_charset_latin1_1 = NULL;
+ CHARSET_INFO *mrn_charset_latin1_2 = NULL;
+ CHARSET_INFO *mrn_charset_cp932 = NULL;
+ CHARSET_INFO *mrn_charset_sjis = NULL;
+ CHARSET_INFO *mrn_charset_eucjpms = NULL;
+ CHARSET_INFO *mrn_charset_ujis = NULL;
+ CHARSET_INFO *mrn_charset_koi8r = NULL;
+
+ void init(void) {
+ CHARSET_INFO **cs;
+ MRN_DBUG_ENTER_FUNCTION();
+ for (cs = all_charsets; cs < all_charsets + MY_ALL_CHARSETS_SIZE; cs++)
+ {
+ if (!cs[0])
+ continue;
+ if (!strcmp(cs[0]->cs_name.str, "utf8mb3"))
+ {
+ DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
+ cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
+ if (!mrn_charset_utf8)
+ mrn_charset_utf8 = cs[0];
+ else if (mrn_charset_utf8->cset != cs[0]->cset)
+ DBUG_ASSERT(0);
+ continue;
+ }
+ if (!strcmp(cs[0]->cs_name.str, "utf8mb4"))
+ {
+ DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
+ cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
+ if (!mrn_charset_utf8mb4)
+ mrn_charset_utf8mb4 = cs[0];
+ else if (mrn_charset_utf8mb4->cset != cs[0]->cset)
+ DBUG_ASSERT(0);
+ continue;
+ }
+ if (!strcmp(cs[0]->cs_name.str, "binary"))
+ {
+ DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
+ cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
+ if (!mrn_charset_binary)
+ mrn_charset_binary = cs[0];
+ else if (mrn_charset_binary->cset != cs[0]->cset)
+ DBUG_ASSERT(0);
+ continue;
+ }
+ if (!strcmp(cs[0]->cs_name.str, "ascii"))
+ {
+ DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
+ cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
+ if (!mrn_charset_ascii)
+ mrn_charset_ascii = cs[0];
+ else if (mrn_charset_ascii->cset != cs[0]->cset)
+ DBUG_ASSERT(0);
+ continue;
+ }
+ if (!strcmp(cs[0]->cs_name.str, "latin1"))
+ {
+ DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
+ cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
+ if (!mrn_charset_latin1_1)
+ mrn_charset_latin1_1 = cs[0];
+ else if (mrn_charset_latin1_1->cset != cs[0]->cset)
+ {
+ if (!mrn_charset_latin1_2)
+ mrn_charset_latin1_2 = cs[0];
+ else if (mrn_charset_latin1_2->cset != cs[0]->cset)
+ DBUG_ASSERT(0);
+ }
+ continue;
+ }
+ if (!strcmp(cs[0]->cs_name.str, "cp932"))
+ {
+ DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
+ cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
+ if (!mrn_charset_cp932)
+ mrn_charset_cp932 = cs[0];
+ else if (mrn_charset_cp932->cset != cs[0]->cset)
+ DBUG_ASSERT(0);
+ continue;
+ }
+ if (!strcmp(cs[0]->cs_name.str, "sjis"))
+ {
+ DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
+ cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
+ if (!mrn_charset_sjis)
+ mrn_charset_sjis = cs[0];
+ else if (mrn_charset_sjis->cset != cs[0]->cset)
+ DBUG_ASSERT(0);
+ continue;
+ }
+ if (!strcmp(cs[0]->cs_name.str, "eucjpms"))
+ {
+ DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
+ cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
+ if (!mrn_charset_eucjpms)
+ mrn_charset_eucjpms = cs[0];
+ else if (mrn_charset_eucjpms->cset != cs[0]->cset)
+ DBUG_ASSERT(0);
+ continue;
+ }
+ if (!strcmp(cs[0]->cs_name.str, "ujis"))
+ {
+ DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
+ cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
+ if (!mrn_charset_ujis)
+ mrn_charset_ujis = cs[0];
+ else if (mrn_charset_ujis->cset != cs[0]->cset)
+ DBUG_ASSERT(0);
+ continue;
+ }
+ if (!strcmp(cs[0]->cs_name.str, "koi8r"))
+ {
+ DBUG_PRINT("info", ("mroonga: %s is %s [%p]",
+ cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
+ if (!mrn_charset_koi8r)
+ mrn_charset_koi8r = cs[0];
+ else if (mrn_charset_koi8r->cset != cs[0]->cset)
+ DBUG_ASSERT(0);
+ continue;
+ }
+ DBUG_PRINT("info", ("mroonga: %s[%s][%p] is not supported",
+ cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset));
+ }
+ DBUG_VOID_RETURN;
+ }
+
+ int set(grn_ctx *ctx, const CHARSET_INFO *charset) {
+ MRN_DBUG_ENTER_FUNCTION();
+ int error = 0;
+
+ if (!set_raw(ctx, charset)) {
+ const char *name = "<null>";
+ const char *csname = "<null>";
+ if (charset) {
+ name = charset->coll_name.str;
+ csname = charset->cs_name.str;
+ }
+ error = ER_MRN_CHARSET_NOT_SUPPORT_NUM;
+ my_printf_error(error,
+ ER_MRN_CHARSET_NOT_SUPPORT_STR,
+ MYF(0), name, csname);
+ }
+
+ DBUG_RETURN(error);
+ }
+
+ bool set_raw(grn_ctx *ctx, const CHARSET_INFO *charset) {
+ MRN_DBUG_ENTER_FUNCTION();
+ if (!charset)
+ {
+ GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE);
+ DBUG_RETURN(true);
+ }
+ if (charset->cset == mrn_charset_utf8->cset)
+ {
+ GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8);
+ DBUG_RETURN(true);
+ }
+ if (mrn_charset_utf8mb4 && charset->cset == mrn_charset_utf8mb4->cset)
+ {
+ GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8);
+ DBUG_RETURN(true);
+ }
+ if (charset->cset == mrn_charset_cp932->cset)
+ {
+ GRN_CTX_SET_ENCODING(ctx, GRN_ENC_SJIS);
+ DBUG_RETURN(true);
+ }
+ if (charset->cset == mrn_charset_eucjpms->cset)
+ {
+ GRN_CTX_SET_ENCODING(ctx, GRN_ENC_EUC_JP);
+ DBUG_RETURN(true);
+ }
+ if (charset->cset == mrn_charset_latin1_1->cset)
+ {
+ GRN_CTX_SET_ENCODING(ctx, GRN_ENC_LATIN1);
+ DBUG_RETURN(true);
+ }
+ if (charset->cset == mrn_charset_latin1_2->cset)
+ {
+ GRN_CTX_SET_ENCODING(ctx, GRN_ENC_LATIN1);
+ DBUG_RETURN(true);
+ }
+ if (charset->cset == mrn_charset_koi8r->cset)
+ {
+ GRN_CTX_SET_ENCODING(ctx, GRN_ENC_KOI8R);
+ DBUG_RETURN(true);
+ }
+ if (charset->cset == mrn_charset_binary->cset)
+ {
+ GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE);
+ DBUG_RETURN(true);
+ }
+ if (charset->cset == mrn_charset_ascii->cset)
+ {
+ GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8);
+ DBUG_RETURN(true);
+ }
+ if (charset->cset == mrn_charset_sjis->cset)
+ {
+ GRN_CTX_SET_ENCODING(ctx, GRN_ENC_SJIS);
+ DBUG_RETURN(true);
+ }
+ if (charset->cset == mrn_charset_ujis->cset)
+ {
+ GRN_CTX_SET_ENCODING(ctx, GRN_ENC_EUC_JP);
+ DBUG_RETURN(true);
+ }
+ GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE);
+ DBUG_RETURN(false);
+ }
+ }
+}