diff options
Diffstat (limited to 'storage/mroonga/lib/mrn_encoding.cpp')
-rw-r--r-- | storage/mroonga/lib/mrn_encoding.cpp | 242 |
1 files changed, 242 insertions, 0 deletions
diff --git a/storage/mroonga/lib/mrn_encoding.cpp b/storage/mroonga/lib/mrn_encoding.cpp new file mode 100644 index 00000000..369c985a --- /dev/null +++ b/storage/mroonga/lib/mrn_encoding.cpp @@ -0,0 +1,242 @@ +/* -*- c-basic-offset: 2 -*- */ +/* + Copyright(C) 2013 Kouhei Sutou <kou@clear-code.com> + Copyright(C) 2011-2013 Kentoku SHIBA + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include <mrn_err.h> +#include "mrn_encoding.hpp" + +namespace mrn { + namespace encoding { + CHARSET_INFO *mrn_charset_utf8 = NULL; + CHARSET_INFO *mrn_charset_utf8mb4 = NULL; + CHARSET_INFO *mrn_charset_binary = NULL; + CHARSET_INFO *mrn_charset_ascii = NULL; + CHARSET_INFO *mrn_charset_latin1_1 = NULL; + CHARSET_INFO *mrn_charset_latin1_2 = NULL; + CHARSET_INFO *mrn_charset_cp932 = NULL; + CHARSET_INFO *mrn_charset_sjis = NULL; + CHARSET_INFO *mrn_charset_eucjpms = NULL; + CHARSET_INFO *mrn_charset_ujis = NULL; + CHARSET_INFO *mrn_charset_koi8r = NULL; + + void init(void) { + CHARSET_INFO **cs; + MRN_DBUG_ENTER_FUNCTION(); + for (cs = all_charsets; cs < all_charsets + MY_ALL_CHARSETS_SIZE; cs++) + { + if (!cs[0]) + continue; + if (!strcmp(cs[0]->cs_name.str, "utf8mb3")) + { + DBUG_PRINT("info", ("mroonga: %s is %s [%p]", + cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset)); + if (!mrn_charset_utf8) + mrn_charset_utf8 = cs[0]; + else if (mrn_charset_utf8->cset != cs[0]->cset) + DBUG_ASSERT(0); + continue; + } + if (!strcmp(cs[0]->cs_name.str, "utf8mb4")) + { + DBUG_PRINT("info", ("mroonga: %s is %s [%p]", + cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset)); + if (!mrn_charset_utf8mb4) + mrn_charset_utf8mb4 = cs[0]; + else if (mrn_charset_utf8mb4->cset != cs[0]->cset) + DBUG_ASSERT(0); + continue; + } + if (!strcmp(cs[0]->cs_name.str, "binary")) + { + DBUG_PRINT("info", ("mroonga: %s is %s [%p]", + cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset)); + if (!mrn_charset_binary) + mrn_charset_binary = cs[0]; + else if (mrn_charset_binary->cset != cs[0]->cset) + DBUG_ASSERT(0); + continue; + } + if (!strcmp(cs[0]->cs_name.str, "ascii")) + { + DBUG_PRINT("info", ("mroonga: %s is %s [%p]", + cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset)); + if (!mrn_charset_ascii) + mrn_charset_ascii = cs[0]; + else if (mrn_charset_ascii->cset != cs[0]->cset) + DBUG_ASSERT(0); + continue; + } + if (!strcmp(cs[0]->cs_name.str, "latin1")) + { + DBUG_PRINT("info", ("mroonga: %s is %s [%p]", + cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset)); + if (!mrn_charset_latin1_1) + mrn_charset_latin1_1 = cs[0]; + else if (mrn_charset_latin1_1->cset != cs[0]->cset) + { + if (!mrn_charset_latin1_2) + mrn_charset_latin1_2 = cs[0]; + else if (mrn_charset_latin1_2->cset != cs[0]->cset) + DBUG_ASSERT(0); + } + continue; + } + if (!strcmp(cs[0]->cs_name.str, "cp932")) + { + DBUG_PRINT("info", ("mroonga: %s is %s [%p]", + cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset)); + if (!mrn_charset_cp932) + mrn_charset_cp932 = cs[0]; + else if (mrn_charset_cp932->cset != cs[0]->cset) + DBUG_ASSERT(0); + continue; + } + if (!strcmp(cs[0]->cs_name.str, "sjis")) + { + DBUG_PRINT("info", ("mroonga: %s is %s [%p]", + cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset)); + if (!mrn_charset_sjis) + mrn_charset_sjis = cs[0]; + else if (mrn_charset_sjis->cset != cs[0]->cset) + DBUG_ASSERT(0); + continue; + } + if (!strcmp(cs[0]->cs_name.str, "eucjpms")) + { + DBUG_PRINT("info", ("mroonga: %s is %s [%p]", + cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset)); + if (!mrn_charset_eucjpms) + mrn_charset_eucjpms = cs[0]; + else if (mrn_charset_eucjpms->cset != cs[0]->cset) + DBUG_ASSERT(0); + continue; + } + if (!strcmp(cs[0]->cs_name.str, "ujis")) + { + DBUG_PRINT("info", ("mroonga: %s is %s [%p]", + cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset)); + if (!mrn_charset_ujis) + mrn_charset_ujis = cs[0]; + else if (mrn_charset_ujis->cset != cs[0]->cset) + DBUG_ASSERT(0); + continue; + } + if (!strcmp(cs[0]->cs_name.str, "koi8r")) + { + DBUG_PRINT("info", ("mroonga: %s is %s [%p]", + cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset)); + if (!mrn_charset_koi8r) + mrn_charset_koi8r = cs[0]; + else if (mrn_charset_koi8r->cset != cs[0]->cset) + DBUG_ASSERT(0); + continue; + } + DBUG_PRINT("info", ("mroonga: %s[%s][%p] is not supported", + cs[0]->coll_name.str, cs[0]->cs_name.str, cs[0]->cset)); + } + DBUG_VOID_RETURN; + } + + int set(grn_ctx *ctx, const CHARSET_INFO *charset) { + MRN_DBUG_ENTER_FUNCTION(); + int error = 0; + + if (!set_raw(ctx, charset)) { + const char *name = "<null>"; + const char *csname = "<null>"; + if (charset) { + name = charset->coll_name.str; + csname = charset->cs_name.str; + } + error = ER_MRN_CHARSET_NOT_SUPPORT_NUM; + my_printf_error(error, + ER_MRN_CHARSET_NOT_SUPPORT_STR, + MYF(0), name, csname); + } + + DBUG_RETURN(error); + } + + bool set_raw(grn_ctx *ctx, const CHARSET_INFO *charset) { + MRN_DBUG_ENTER_FUNCTION(); + if (!charset) + { + GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE); + DBUG_RETURN(true); + } + if (charset->cset == mrn_charset_utf8->cset) + { + GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8); + DBUG_RETURN(true); + } + if (mrn_charset_utf8mb4 && charset->cset == mrn_charset_utf8mb4->cset) + { + GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8); + DBUG_RETURN(true); + } + if (charset->cset == mrn_charset_cp932->cset) + { + GRN_CTX_SET_ENCODING(ctx, GRN_ENC_SJIS); + DBUG_RETURN(true); + } + if (charset->cset == mrn_charset_eucjpms->cset) + { + GRN_CTX_SET_ENCODING(ctx, GRN_ENC_EUC_JP); + DBUG_RETURN(true); + } + if (charset->cset == mrn_charset_latin1_1->cset) + { + GRN_CTX_SET_ENCODING(ctx, GRN_ENC_LATIN1); + DBUG_RETURN(true); + } + if (charset->cset == mrn_charset_latin1_2->cset) + { + GRN_CTX_SET_ENCODING(ctx, GRN_ENC_LATIN1); + DBUG_RETURN(true); + } + if (charset->cset == mrn_charset_koi8r->cset) + { + GRN_CTX_SET_ENCODING(ctx, GRN_ENC_KOI8R); + DBUG_RETURN(true); + } + if (charset->cset == mrn_charset_binary->cset) + { + GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE); + DBUG_RETURN(true); + } + if (charset->cset == mrn_charset_ascii->cset) + { + GRN_CTX_SET_ENCODING(ctx, GRN_ENC_UTF8); + DBUG_RETURN(true); + } + if (charset->cset == mrn_charset_sjis->cset) + { + GRN_CTX_SET_ENCODING(ctx, GRN_ENC_SJIS); + DBUG_RETURN(true); + } + if (charset->cset == mrn_charset_ujis->cset) + { + GRN_CTX_SET_ENCODING(ctx, GRN_ENC_EUC_JP); + DBUG_RETURN(true); + } + GRN_CTX_SET_ENCODING(ctx, GRN_ENC_NONE); + DBUG_RETURN(false); + } + } +} |