diff options
Diffstat (limited to 'sql/cset_narrowing.h')
-rw-r--r-- | sql/cset_narrowing.h | 143 |
1 files changed, 143 insertions, 0 deletions
diff --git a/sql/cset_narrowing.h b/sql/cset_narrowing.h new file mode 100644 index 00000000..bb0a3960 --- /dev/null +++ b/sql/cset_narrowing.h @@ -0,0 +1,143 @@ +/* + Copyright (c) 2023, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef CSET_NARROWING_H_INCLUDED +#define CSET_NARROWING_H_INCLUDED + +/* + A singleton class to provide "utf8mb3_from_mb4.charset()". + + This is a variant of utf8mb3_general_ci that one can use when they have data + in MB4 and want to make index lookup keys in MB3. +*/ +extern +class Charset_utf8narrow +{ + struct my_charset_handler_st cset_handler; + struct charset_info_st cset; +public: + Charset_utf8narrow() : + cset_handler(*my_charset_utf8mb3_general_ci.cset), + cset(my_charset_utf8mb3_general_ci) /* Copy the CHARSET_INFO structure */ + { + /* Insert our function wc_mb */ + cset_handler.wc_mb= my_wc_mb_utf8mb4_bmp_only; + cset.cset=&cset_handler; + + /* Charsets are compared by their name, so assign a different name */ + LEX_CSTRING tmp= {STRING_WITH_LEN("utf8_mb4_to_mb3")}; + cset.cs_name= tmp; + } + + CHARSET_INFO *charset() { return &cset; } + +} utf8mb3_from_mb4; + + +/* + A class to temporary change a field that uses utf8mb3_general_ci to enable + correct lookup key construction from string value in utf8mb4_general_ci + + Intended usage: + + // can do this in advance: + bool do_narrowing= Utf8_narrow::should_do_narrowing(field, value_cset); + ... + + // This sets the field to do narrowing if necessary: + Utf8_narrow narrow(field, do_narrowing); + + // write to 'field' here + // item->save_in_field(field) or something else + + // Stop doing narrowing + narrow.stop(); +*/ + +class Utf8_narrow +{ + Field *field; + DTCollation save_collation; + +public: + static bool should_do_narrowing(const THD *thd, CHARSET_INFO *field_cset, + CHARSET_INFO *value_cset); + + static bool should_do_narrowing(const Field *field, CHARSET_INFO *value_cset) + { + CHARSET_INFO *field_cset= field->charset(); + THD *thd= field->table->in_use; + return should_do_narrowing(thd, field_cset, value_cset); + } + + Utf8_narrow(Field *field_arg, bool is_applicable) + { + field= NULL; + if (is_applicable) + { + DTCollation mb3_from_mb4= utf8mb3_from_mb4.charset(); + field= field_arg; + save_collation= field->dtcollation(); + field->change_charset(mb3_from_mb4); + } + } + + void stop() + { + if (field) + field->change_charset(save_collation); +#ifndef NDEBUG + field= NULL; +#endif + } + + ~Utf8_narrow() + { + DBUG_ASSERT(!field); + } +}; + + +/* + @brief + Check if two fields can participate in a multiple equality using charset + narrowing. + + @detail + Normally, check_simple_equality() checks this by calling: + + left_field->eq_def(right_field) + + This function does the same but takes into account we might use charset + narrowing: + - collations are not the same but rather an utf8mb{3,4}_general_ci pair + - for field lengths, should compare # characters, not #bytes. +*/ + +inline +bool fields_equal_using_narrowing(const THD *thd, const Field *left, const Field *right) +{ + return + dynamic_cast<const Field_longstr*>(left) && + dynamic_cast<const Field_longstr*>(right) && + left->real_type() == right->real_type() && + (Utf8_narrow::should_do_narrowing(left, right->charset()) || + Utf8_narrow::should_do_narrowing(right, left->charset())) && + left->char_length() == right->char_length(); +}; + + +#endif /* CSET_NARROWING_H_INCLUDED */ |