diff options
Diffstat (limited to 'sql/strfunc.cc')
-rw-r--r-- | sql/strfunc.cc | 407 |
1 files changed, 407 insertions, 0 deletions
diff --git a/sql/strfunc.cc b/sql/strfunc.cc new file mode 100644 index 00000000..6c8fd24f --- /dev/null +++ b/sql/strfunc.cc @@ -0,0 +1,407 @@ +/* Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2009, 2020, MariaDB Corporation. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ + +/* Some useful string utility functions used by the MySQL server */ + +#include "mariadb.h" +#include "sql_priv.h" +#include "unireg.h" +#include "strfunc.h" +#include "sql_class.h" +#include "typelib.h" // TYPELIB +#include "m_ctype.h" // my_charset_latin1 +#include "mysqld.h" // system_charset_info + +/* + Return bitmap for strings used in a set + + SYNOPSIS + find_set() + lib Strings in set + str Strings of set-strings separated by ',' + err_pos If error, set to point to start of wrong set string + err_len If error, set to the length of wrong set string + set_warning Set to 1 if some string in set couldn't be used + + NOTE + We delete all end space from str before comparison + + RETURN + bitmap of all sets found in x. + set_warning is set to 1 if there was any sets that couldn't be set +*/ + +static const char field_separator=','; + +ulonglong find_set(const TYPELIB *lib, + const char *str, size_t length, CHARSET_INFO *cs, + char **err_pos, uint *err_len, bool *set_warning) +{ + CHARSET_INFO *strip= cs ? cs : &my_charset_latin1; + const char *end= str + strip->lengthsp(str, length); + ulonglong found= 0; + *err_pos= 0; // No error yet + *err_len= 0; + if (str != end) + { + const char *start= str; + for (;;) + { + const char *pos= start; + uint var_len; + int mblen= 1; + + if (cs && cs->mbminlen > 1) + { + for ( ; pos < end; pos+= mblen) + { + my_wc_t wc; + if ((mblen= cs->mb_wc(&wc, (const uchar *) pos, + (const uchar *) end)) < 1) + mblen= 1; // Not to hang on a wrong multibyte sequence + if (wc == (my_wc_t) field_separator) + break; + } + } + else + for (; pos != end && *pos != field_separator; pos++) ; + var_len= (uint) (pos - start); + uint find= cs ? find_type2(lib, start, var_len, cs) : + find_type(lib, start, var_len, (bool) 0); + if (unlikely(!find)) + { + if (*err_len == 0) + { + // report the first error with length > 0 + *err_pos= (char*) start; + *err_len= var_len; + *set_warning= 1; + } + } + else if (find <= sizeof(longlong) * 8) + found|= 1ULL << (find - 1); + if (pos >= end) + break; + start= pos + mblen; + } + } + return found; +} + +/* + Function to find a string in a TYPELIB + (similar to find_type() of mysys/typelib.c) + + SYNOPSIS + find_type() + lib TYPELIB (struct of pointer to values + count) + find String to find + length Length of string to find + part_match Allow part matching of value + + RETURN + 0 error + > 0 position in TYPELIB->type_names +1 +*/ + +uint find_type(const TYPELIB *lib, const char *find, size_t length, + bool part_match) +{ + uint found_count=0, found_pos=0; + const char *end= find+length; + const char *i; + const char *j; + for (uint pos=0 ; (j=lib->type_names[pos++]) ; ) + { + for (i=find ; i != end && + my_toupper(system_charset_info,*i) == + my_toupper(system_charset_info,*j) ; i++, j++) ; + if (i == end) + { + if (! *j) + return(pos); + found_count++; + found_pos= pos; + } + } + return(found_count == 1 && part_match ? found_pos : 0); +} + + +/* + Find a string in a list of strings according to collation + + SYNOPSIS + find_type2() + lib TYPELIB (struct of pointer to values + count) + x String to find + length String length + cs Character set + collation to use for comparison + + NOTES + + RETURN + 0 No matching value + >0 Offset+1 in typelib for matched string +*/ + +uint find_type2(const TYPELIB *typelib, const char *x, size_t length, + CHARSET_INFO *cs) +{ + int pos; + const char *j; + DBUG_ENTER("find_type2"); + DBUG_PRINT("enter",("x: '%.*s' lib: %p", (int)length, x, typelib)); + + if (!typelib->count) + { + DBUG_PRINT("exit",("no count")); + DBUG_RETURN(0); + } + + for (pos=0 ; (j=typelib->type_names[pos]) ; pos++) + { + if (!cs->strnncoll(x, length, + j, typelib->type_lengths[pos])) + DBUG_RETURN(pos+1); + } + DBUG_PRINT("exit",("Couldn't find type")); + DBUG_RETURN(0); +} /* find_type */ + + +/* + Un-hex all elements in a typelib + + SYNOPSIS + unhex_type2() + interval TYPELIB (struct of pointer to values + lengths + count) + + NOTES + + RETURN + N/A +*/ + +void unhex_type2(TYPELIB *interval) +{ + for (uint pos= 0; pos < interval->count; pos++) + { + char *from, *to; + for (from= to= (char*) interval->type_names[pos]; *from; ) + { + /* + Note, hexchar_to_int(*from++) doesn't work + one some compilers, e.g. IRIX. Looks like a compiler + bug in inline functions in combination with arguments + that have a side effect. So, let's use from[0] and from[1] + and increment 'from' by two later. + */ + + *to++= (char) (hexchar_to_int(from[0]) << 4) + + hexchar_to_int(from[1]); + from+= 2; + } + interval->type_lengths[pos] /= 2; + } +} + + +/* + Check if the first word in a string is one of the ones in TYPELIB + + SYNOPSIS + check_word() + lib TYPELIB + val String to check + end End of input + end_of_word Store value of last used byte here if we found word + + RETURN + 0 No matching value + > 1 lib->type_names[#-1] matched + end_of_word will point to separator character/end in 'val' +*/ + +uint check_word(TYPELIB *lib, const char *val, const char *end, + const char **end_of_word) +{ + int res; + const char *ptr; + + /* Fiend end of word */ + for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++) + ; + if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0) + *end_of_word= ptr; + return res; +} + + +/* + Converts a string between character sets + + SYNOPSIS + strconvert() + from_cs source character set + from source, a null terminated string + to destination buffer + to_length destination buffer length + + NOTES + 'to' is always terminated with a '\0' character. + If there is no enough space to convert whole string, + only prefix is converted, and terminated with '\0'. + + RETURN VALUES + result string length +*/ + + +uint strconvert(CHARSET_INFO *from_cs, const char *from, size_t from_length, + CHARSET_INFO *to_cs, char *to, size_t to_length, uint *errors) +{ + int cnvres; + my_wc_t wc; + char *to_start= to; + uchar *to_end= (uchar*) to + to_length - 1; + const uchar *from_end= (const uchar*) from + from_length; + my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc; + my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb; + uint error_count= 0; + + while (1) + { + if ((cnvres= (*mb_wc)(from_cs, &wc, + (uchar*) from, from_end)) > 0) + { + if (!wc) + break; + from+= cnvres; + } + else if (cnvres == MY_CS_ILSEQ) + { + error_count++; + from++; + wc= '?'; + } + else + break; // Impossible char. + +outp: + + if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0) + to+= cnvres; + else if (cnvres == MY_CS_ILUNI && wc != '?') + { + error_count++; + wc= '?'; + goto outp; + } + else + break; + } + *to= '\0'; + *errors= error_count; + return (uint32) (to - to_start); + +} + + +/* + Searches for a LEX_STRING in an LEX_STRING array. + + SYNOPSIS + find_string_in_array() + heap The array + needle The string to search for + + NOTE + The last LEX_STRING in the array should have str member set to NULL + + RETURN VALUES + -1 Not found + >=0 Ordinal position +*/ + +int find_string_in_array(LEX_CSTRING * const haystack, LEX_CSTRING * const needle, + CHARSET_INFO * const cs) +{ + const LEX_CSTRING *pos; + for (pos= haystack; pos->str; pos++) + if (!cs->strnncollsp(pos->str, pos->length, + needle->str, needle->length)) + { + return (int)(pos - haystack); + } + return -1; +} + + +const char *set_to_string(THD *thd, LEX_CSTRING *result, ulonglong set, + const char *lib[]) +{ + char buff[STRING_BUFFER_USUAL_SIZE*8]; + String tmp(buff, sizeof(buff), &my_charset_latin1); + LEX_CSTRING unused; + + if (!result) + result= &unused; + + tmp.length(0); + + for (uint i= 0; set; i++, set >>= 1) + if (set & 1) { + tmp.append(lib[i]); + tmp.append(','); + } + + if (tmp.length()) + { + result->str= thd->strmake(tmp.ptr(), tmp.length()-1); + result->length= tmp.length()-1; + } + else + { + result->str= const_cast<char*>(""); + result->length= 0; + } + return result->str; +} + +const char *flagset_to_string(THD *thd, LEX_CSTRING *result, ulonglong set, + const char *lib[]) +{ + char buff[STRING_BUFFER_USUAL_SIZE*8]; + String tmp(buff, sizeof(buff), &my_charset_latin1); + LEX_CSTRING unused; + + if (!result) result= &unused; + + tmp.length(0); + + // note that the last element is always "default", and it's ignored below + for (uint i= 0; lib[i+1]; i++, set >>= 1) + { + tmp.append(lib[i]); + tmp.append(set & 1 ? "=on," : "=off,"); + } + + result->str= thd->strmake(tmp.ptr(), tmp.length()-1); + result->length= tmp.length()-1; + + return result->str; +} |