1 files changed, 407 insertions, 0 deletions
diff --git a/sql/strfunc.cc b/sql/strfunc.cc
new file mode 100644
index 00000000..6c8fd24f
--- /dev/null
+++ b/sql/strfunc.cc
@@ -0,0 +1,407 @@
+/* Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+   Copyright (c) 2009, 2020, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1335  USA */
+
+/* Some useful string utility functions used by the MySQL server */
+
+#include "mariadb.h"
+#include "sql_priv.h"
+#include "unireg.h"
+#include "strfunc.h"
+#include "sql_class.h"
+#include "typelib.h"                            // TYPELIB
+#include "m_ctype.h"                            // my_charset_latin1
+#include "mysqld.h"                             // system_charset_info
+
+/*
+  Return bitmap for strings used in a set
+
+  SYNOPSIS
+  find_set()
+  lib			Strings in set
+  str			Strings of set-strings separated by ','
+  err_pos		If error, set to point to start of wrong set string
+  err_len		If error, set to the length of wrong set string
+  set_warning		Set to 1 if some string in set couldn't be used
+
+  NOTE
+    We delete all end space from str before comparison
+
+  RETURN
+    bitmap of all sets found in x.
+    set_warning is set to 1 if there was any sets that couldn't be set
+*/
+
+static const char field_separator=',';
+
+ulonglong find_set(const TYPELIB *lib,
+                   const char *str, size_t length, CHARSET_INFO *cs,
+                   char **err_pos, uint *err_len, bool *set_warning)
+{
+  CHARSET_INFO *strip= cs ? cs : &my_charset_latin1;
+  const char *end= str + strip->lengthsp(str, length);
+  ulonglong found= 0;
+  *err_pos= 0;                  // No error yet
+  *err_len= 0;
+  if (str != end)
+  {
+    const char *start= str;    
+    for (;;)
+    {
+      const char *pos= start;
+      uint var_len;
+      int mblen= 1;
+
+      if (cs && cs->mbminlen > 1)
+      {
+        for ( ; pos < end; pos+= mblen)
+        {
+          my_wc_t wc;
+          if ((mblen= cs->mb_wc(&wc, (const uchar *) pos,
+                                     (const uchar *) end)) < 1)
+            mblen= 1; // Not to hang on a wrong multibyte sequence
+          if (wc == (my_wc_t) field_separator)
+            break;
+        }
+      }
+      else
+        for (; pos != end && *pos != field_separator; pos++) ;
+      var_len= (uint) (pos - start);
+      uint find= cs ? find_type2(lib, start, var_len, cs) :
+                      find_type(lib, start, var_len, (bool) 0);
+      if (unlikely(!find))
+      {
+        if (*err_len == 0)
+        {
+          // report the first error with length > 0
+          *err_pos= (char*) start;
+          *err_len= var_len;
+          *set_warning= 1;
+        }
+      }
+      else if (find <= sizeof(longlong) * 8)
+        found|= 1ULL << (find - 1);
+      if (pos >= end)
+        break;
+      start= pos + mblen;
+    }
+  }
+  return found;
+}
+
+/*
+  Function to find a string in a TYPELIB
+  (similar to find_type() of mysys/typelib.c)
+
+  SYNOPSIS
+   find_type()
+   lib			TYPELIB (struct of pointer to values + count)
+   find			String to find
+   length		Length of string to find
+   part_match		Allow part matching of value
+
+ RETURN
+  0 error
+  > 0 position in TYPELIB->type_names +1
+*/
+
+uint find_type(const TYPELIB *lib, const char *find, size_t length,
+               bool part_match)
+{
+  uint found_count=0, found_pos=0;
+  const char *end= find+length;
+  const char *i;
+  const char *j;
+  for (uint pos=0 ; (j=lib->type_names[pos++]) ; )
+  {
+    for (i=find ; i != end && 
+	   my_toupper(system_charset_info,*i) == 
+	   my_toupper(system_charset_info,*j) ; i++, j++) ;
+    if (i == end)
+    {
+      if (! *j)
+	return(pos);
+      found_count++;
+      found_pos= pos;
+    }
+  }
+  return(found_count == 1 && part_match ? found_pos : 0);
+}
+
+
+/*
+  Find a string in a list of strings according to collation
+
+  SYNOPSIS
+   find_type2()
+   lib			TYPELIB (struct of pointer to values + count)
+   x			String to find
+   length               String length
+   cs			Character set + collation to use for comparison
+
+  NOTES
+
+  RETURN
+    0	No matching value
+    >0  Offset+1 in typelib for matched string
+*/
+
+uint find_type2(const TYPELIB *typelib, const char *x, size_t length,
+                CHARSET_INFO *cs)
+{
+  int pos;
+  const char *j;
+  DBUG_ENTER("find_type2");
+  DBUG_PRINT("enter",("x: '%.*s'  lib: %p", (int)length, x, typelib));
+
+  if (!typelib->count)
+  {
+    DBUG_PRINT("exit",("no count"));
+    DBUG_RETURN(0);
+  }
+
+  for (pos=0 ; (j=typelib->type_names[pos]) ; pos++)
+  {
+    if (!cs->strnncoll(x, length,
+                       j, typelib->type_lengths[pos]))
+      DBUG_RETURN(pos+1);
+  }
+  DBUG_PRINT("exit",("Couldn't find type"));
+  DBUG_RETURN(0);
+} /* find_type */
+
+
+/*
+  Un-hex all elements in a typelib
+
+  SYNOPSIS
+   unhex_type2()
+   interval       TYPELIB (struct of pointer to values + lengths + count)
+
+  NOTES
+
+  RETURN
+    N/A
+*/
+
+void unhex_type2(TYPELIB *interval)
+{
+  for (uint pos= 0; pos < interval->count; pos++)
+  {
+    char *from, *to;
+    for (from= to= (char*) interval->type_names[pos]; *from; )
+    {
+      /*
+        Note, hexchar_to_int(*from++) doesn't work
+        one some compilers, e.g. IRIX. Looks like a compiler
+        bug in inline functions in combination with arguments
+        that have a side effect. So, let's use from[0] and from[1]
+        and increment 'from' by two later.
+      */
+
+      *to++= (char) (hexchar_to_int(from[0]) << 4) +
+                     hexchar_to_int(from[1]);
+      from+= 2;
+    }
+    interval->type_lengths[pos] /= 2;
+  }
+}
+
+
+/*
+  Check if the first word in a string is one of the ones in TYPELIB
+
+  SYNOPSIS
+    check_word()
+    lib		TYPELIB
+    val		String to check
+    end		End of input
+    end_of_word	Store value of last used byte here if we found word
+
+  RETURN
+    0	 No matching value
+    > 1  lib->type_names[#-1] matched
+	 end_of_word will point to separator character/end in 'val'
+*/
+
+uint check_word(TYPELIB *lib, const char *val, const char *end,
+		const char **end_of_word)
+{
+  int res;
+  const char *ptr;
+
+  /* Fiend end of word */
+  for (ptr= val ; ptr < end && my_isalpha(&my_charset_latin1, *ptr) ; ptr++)
+    ;
+  if ((res=find_type(lib, val, (uint) (ptr - val), 1)) > 0)
+    *end_of_word= ptr;
+  return res;
+}
+
+
+/*
+  Converts a string between character sets
+
+  SYNOPSIS
+    strconvert()
+    from_cs       source character set
+    from          source, a null terminated string
+    to            destination buffer
+    to_length     destination buffer length
+
+  NOTES
+    'to' is always terminated with a '\0' character.
+    If there is no enough space to convert whole string,
+    only prefix is converted, and terminated with '\0'.
+
+  RETURN VALUES
+    result string length
+*/
+
+
+uint strconvert(CHARSET_INFO *from_cs, const char *from, size_t from_length,
+                CHARSET_INFO *to_cs, char *to, size_t to_length, uint *errors)
+{
+  int cnvres;
+  my_wc_t wc;
+  char *to_start= to;
+  uchar *to_end= (uchar*) to + to_length - 1;
+  const uchar *from_end= (const uchar*) from + from_length;
+  my_charset_conv_mb_wc mb_wc= from_cs->cset->mb_wc;
+  my_charset_conv_wc_mb wc_mb= to_cs->cset->wc_mb;
+  uint error_count= 0;
+
+  while (1)
+  {
+    if ((cnvres= (*mb_wc)(from_cs, &wc,
+                          (uchar*) from, from_end)) > 0)
+    {
+      if (!wc)
+        break;
+      from+= cnvres;
+    }
+    else if (cnvres == MY_CS_ILSEQ)
+    {
+      error_count++;
+      from++;
+      wc= '?';
+    }
+    else
+      break; // Impossible char.
+
+outp:
+
+    if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
+      to+= cnvres;
+    else if (cnvres == MY_CS_ILUNI && wc != '?')
+    {
+      error_count++;
+      wc= '?';
+      goto outp;
+    }
+    else
+      break;
+  }
+  *to= '\0';
+  *errors= error_count;
+  return (uint32) (to - to_start);
+
+}
+
+
+/*
+  Searches for a LEX_STRING in an LEX_STRING array.
+
+  SYNOPSIS
+    find_string_in_array()
+      heap    The array
+      needle  The string to search for
+
+  NOTE
+    The last LEX_STRING in the array should have str member set to NULL
+
+  RETURN VALUES
+    -1   Not found
+    >=0  Ordinal position
+*/
+
+int find_string_in_array(LEX_CSTRING * const haystack, LEX_CSTRING * const needle,
+                         CHARSET_INFO * const cs)
+{
+  const LEX_CSTRING *pos;
+  for (pos= haystack; pos->str; pos++)
+    if (!cs->strnncollsp(pos->str, pos->length,
+                         needle->str, needle->length))
+    {
+      return (int)(pos - haystack);
+    }
+  return -1;
+}
+
+
+const char *set_to_string(THD *thd, LEX_CSTRING *result, ulonglong set,
+                          const char *lib[])
+{
+  char buff[STRING_BUFFER_USUAL_SIZE*8];
+  String tmp(buff, sizeof(buff), &my_charset_latin1);
+  LEX_CSTRING unused;
+
+  if (!result)
+    result= &unused;
+
+  tmp.length(0);
+
+  for (uint i= 0; set; i++, set >>= 1)
+    if (set & 1) {
+      tmp.append(lib[i]);
+      tmp.append(',');
+    }
+
+  if (tmp.length())
+  {
+    result->str=    thd->strmake(tmp.ptr(), tmp.length()-1);
+    result->length= tmp.length()-1;
+  }
+  else
+  {
+    result->str= const_cast<char*>("");
+    result->length= 0;
+  }
+  return result->str;
+}
+
+const char *flagset_to_string(THD *thd, LEX_CSTRING *result, ulonglong set,
+                              const char *lib[])
+{
+  char buff[STRING_BUFFER_USUAL_SIZE*8];
+  String tmp(buff, sizeof(buff), &my_charset_latin1);
+  LEX_CSTRING unused;
+
+  if (!result) result= &unused;
+
+  tmp.length(0);
+
+  // note that the last element is always "default", and it's ignored below
+  for (uint i= 0; lib[i+1]; i++, set >>= 1)
+  {
+    tmp.append(lib[i]);
+    tmp.append(set & 1 ? "=on," : "=off,");
+  }
+
+  result->str=    thd->strmake(tmp.ptr(), tmp.length()-1);
+  result->length= tmp.length()-1;
+
+  return result->str;
+}