diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-07-01 18:15:00 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-07-01 18:15:00 +0000 |
commit | a2a2e32c02643a0cec111511220227703fda1cd5 (patch) | |
tree | 69cc2b631234c2a8e026b9cd4d72676c61c594df /sql/charset_collations.h | |
parent | Releasing progress-linux version 1:10.11.8-1~progress7.99u1. (diff) | |
download | mariadb-a2a2e32c02643a0cec111511220227703fda1cd5.tar.xz mariadb-a2a2e32c02643a0cec111511220227703fda1cd5.zip |
Merging upstream version 1:11.4.2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | sql/charset_collations.h | 247 |
1 files changed, 247 insertions, 0 deletions
diff --git a/sql/charset_collations.h b/sql/charset_collations.h new file mode 100644 index 00000000..86d6ff6b --- /dev/null +++ b/sql/charset_collations.h @@ -0,0 +1,247 @@ +/* Copyright (c) 2023, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ + +#ifndef LEX_CHARSET_COLLATIONS_INCLUDED +#define LEX_CHARSET_COLLATIONS_INCLUDED + +#include "sql_used.h" + +struct Charset_collation_map_st +{ +public: + + struct Elem_st + { + protected: + CHARSET_INFO *m_from; // From a character set + CHARSET_INFO *m_to; // To a collation + static size_t print_lex_string(char *dst, const LEX_CSTRING &str) + { + memcpy(dst, str.str, str.length); + return str.length; + } + public: + /* + Size in text format: 'utf8mb4=utf8mb4_unicode_ai_ci' + */ + static constexpr size_t text_size_max() + { + return MY_CS_CHARACTER_SET_NAME_SIZE + 1 + + MY_CS_COLLATION_NAME_SIZE; + } + CHARSET_INFO *from() const + { + return m_from; + } + CHARSET_INFO *to() const + { + return m_to; + } + void set_to(CHARSET_INFO *cl) + { + m_to= cl; + } + size_t print(char *dst) const + { + const char *dst0= dst; + dst+= print_lex_string(dst, m_from->cs_name); + *dst++= '='; + dst+= print_lex_string(dst, m_to->coll_name); + return (size_t) (dst - dst0); + } + int cmp_by_charset_id(const Elem_st &rhs) const + { + return m_from->number < rhs.m_from->number ? -1 : + m_from->number > rhs.m_from->number ? +1 : 0; + } + }; + class Elem: public Elem_st + { + public: + Elem(CHARSET_INFO *from, CHARSET_INFO *to) + { + m_from= from; + m_to= to; + } + }; +protected: + Elem_st m_element[8]; // Should be enough for now + uint m_count; + uint m_version; + + static int cmp_by_charset_id(const void *a, const void *b) + { + return static_cast<const Elem_st*>(a)-> + cmp_by_charset_id(*static_cast<const Elem_st*>(b)); + } + + void sort() + { + qsort(m_element, m_count, sizeof(Elem_st), cmp_by_charset_id); + } + + const Elem_st *find_elem_by_charset_id(uint id) const + { + if (!m_count) + return NULL; + int first= 0, last= ((int) m_count) - 1; + for ( ; first <= last; ) + { + const int middle= (first + last) / 2; + DBUG_ASSERT(middle >= 0); + DBUG_ASSERT(middle < (int) m_count); + const uint middle_id= m_element[middle].from()->number; + if (middle_id == id) + return &m_element[middle]; + if (middle_id < id) + first= middle + 1; + else + last= middle - 1; + } + return NULL; + } + + bool insert(const Elem_st &elem) + { + DBUG_ASSERT(elem.from()->state & MY_CS_PRIMARY); + if (m_count >= array_elements(m_element)) + return true; + m_element[m_count]= elem; + m_count++; + sort(); + return false; + } + + bool insert_or_replace(const Elem_st &elem) + { + DBUG_ASSERT(elem.from()->state & MY_CS_PRIMARY); + const Elem_st *found= find_elem_by_charset_id(elem.from()->number); + if (found) + { + const_cast<Elem_st*>(found)->set_to(elem.to()); + return false; + } + return insert(elem); + } + +public: + void init() + { + m_count= 0; + m_version= 0; + } + uint count() const + { + return m_count; + } + uint version() const + { + return m_version; + } + void set(const Charset_collation_map_st &rhs, uint version_increment) + { + uint version= m_version; + *this= rhs; + m_version= version + version_increment; + } + const Elem_st & operator[](uint pos) const + { + DBUG_ASSERT(pos < m_count); + return m_element[pos]; + } + bool insert_or_replace(const class Lex_exact_charset &cs, + const class Lex_extended_collation &cl, + bool error_on_conflicting_duplicate); + bool insert_or_replace(const LEX_CSTRING &cs, + const LEX_CSTRING &cl, + bool error_on_conflicting_duplicate, + myf utf8_flag); + CHARSET_INFO *get_collation_for_charset(Sql_used *used, + CHARSET_INFO *cs) const + { + DBUG_ASSERT(cs->state & MY_CS_PRIMARY); + const Elem_st *elem= find_elem_by_charset_id(cs->number); + used->used|= Sql_used::CHARACTER_SET_COLLATIONS_USED; + if (elem) + return elem->to(); + return cs; + } + size_t text_format_nbytes_needed() const + { + return (Elem_st::text_size_max() + 1/* for ',' */) * m_count; + } + size_t print(char *dst, size_t nbytes_available) const + { + const char *dst0= dst; + const char *end= dst + nbytes_available; + for (uint i= 0; i < m_count; i++) + { + if (Elem_st::text_size_max() + 1/* for ',' */ > (size_t) (end - dst)) + break; + if (i > 0) + *dst++= ','; + dst+= m_element[i].print(dst); + } + return dst - dst0; + } + static constexpr size_t binary_size_max() + { + return 1/*count*/ + 4 * array_elements(m_element); + } + size_t to_binary(char *dst) const + { + const char *dst0= dst; + *dst++= (char) (uchar) m_count; + for (uint i= 0; i < m_count; i++) + { + int2store(dst, (uint16) m_element[i].from()->number); + dst+= 2; + int2store(dst, (uint16) m_element[i].to()->number); + dst+= 2; + } + return (size_t) (dst - dst0); + } + size_t from_binary(const char *src, size_t srclen) + { + const char *src0= src; + init(); + if (!srclen) + return 0; // Empty + uint count= (uchar) *src++; + if (srclen < 1 + 4 * count) + return 0; + for (uint i= 0; i < count; i++, src+= 4) + { + CHARSET_INFO *cs, *cl; + if (!(cs= get_charset(uint2korr(src), MYF(0))) || + !(cl= get_charset(uint2korr(src + 2), MYF(0)))) + { + /* + Unpacking from binary format happens on the slave side. + If for some reasons the slave does not know about a + character set or a collation, just skip the pair here. + This pair might not even be needed. + */ + continue; + } + insert_or_replace(Elem(cs, cl)); + } + return src - src0; + } + bool from_text(const LEX_CSTRING &str, myf utf8_flag); +}; + + +#endif // LEX_CHARSET_COLLATIONS_INCLUDED |