1 files changed, 1431 insertions, 0 deletions
diff --git a/strings/ctype-mb.c b/strings/ctype-mb.c
new file mode 100644
index 00000000..88dd3631
--- /dev/null
+++ b/strings/ctype-mb.c
@@ -0,0 +1,1431 @@
+/* Copyright (c) 2000, 2014, Oracle and/or its affiliates.
+   Copyright (c) 2009, 2021, MariaDB Corporation.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
+
+#include "strings_def.h"
+#include <m_ctype.h>
+#include "ctype-mb.h"
+
+#ifdef USE_MB
+
+
+size_t my_caseup_str_mb(CHARSET_INFO * cs, char *str)
+{
+  register uint32 l;
+  register const uchar *map= cs->to_upper;
+  char *str_orig= str;
+  
+  while (*str)
+  {
+    /* Pointing after the '\0' is safe here. */
+    if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
+      str+= l;
+    else
+    { 
+      *str= (char) map[(uchar)*str];
+      str++;
+    }
+  }
+  return (size_t) (str - str_orig);
+}
+
+
+size_t my_casedn_str_mb(CHARSET_INFO * cs, char *str)
+{
+  register uint32 l;
+  register const uchar *map= cs->to_lower;
+  char *str_orig= str;
+  
+  while (*str)
+  {
+    /* Pointing after the '\0' is safe here. */
+    if ((l= my_ismbchar(cs, str, str + cs->mbmaxlen)))
+      str+= l;
+    else
+    {
+      *str= (char) map[(uchar)*str];
+      str++;
+    }
+  }
+  return (size_t) (str - str_orig);
+}
+
+
+static inline const MY_CASEFOLD_CHARACTER*
+get_case_info_for_ch(CHARSET_INFO *cs, uint page, uint offs)
+{
+  const MY_CASEFOLD_CHARACTER *p;
+  return cs->casefold && (p= cs->casefold->page[page]) ? &p[offs] : NULL;
+}
+
+
+/*
+  Case folding functions for CJK character set.
+  Case conversion can optionally reduce string octet length.
+  For example, in EUCKR,
+    _euckr 0xA9A5 == "LATIN LETTER DOTLESS I" (Turkish letter)
+  is upper-cased to to
+    _euckr 0x49 "LATIN CAPITAL LETTER I"  ('usual' letter I)
+  Length is reduced in this example from two bytes to one byte.
+*/
+static size_t
+my_casefold_mb(CHARSET_INFO *cs,
+               const char *src, size_t srclen,
+               char *dst, size_t dstlen __attribute__((unused)),
+               const uchar *map,
+               size_t is_upper)
+{
+  const char *srcend= src + srclen;
+  char *dst0= dst;
+
+  DBUG_ASSERT(cs->mbmaxlen == 2);
+
+  while (src < srcend)
+  {
+    size_t mblen= my_ismbchar(cs, src, srcend);
+    if (mblen)
+    {
+      const MY_CASEFOLD_CHARACTER *ch;
+      if ((ch= get_case_info_for_ch(cs, (uchar) src[0], (uchar) src[1])))
+      {
+        int code= is_upper ? ch->toupper : ch->tolower;
+        src+= 2;
+        if (code > 0xFF)
+          *dst++= code >> 8;
+        *dst++= code & 0xFF;
+      }
+      else
+      {
+        *dst++= *src++;
+        *dst++= *src++;
+      }
+    }
+    else
+    {
+      *dst++= (char) map[(uchar) *src++];
+    }
+  }
+  return (size_t) (dst - dst0);
+}
+
+
+size_t
+my_casedn_mb(CHARSET_INFO * cs, const char *src, size_t srclen,
+                    char *dst, size_t dstlen)
+{
+  DBUG_ASSERT(dstlen >= srclen * cs->cset->casedn_multiply(cs));
+  DBUG_ASSERT(src != dst || cs->cset->casedn_multiply(cs) == 1);
+  return my_casefold_mb(cs, src, srclen, dst, dstlen, cs->to_lower, 0);
+}
+
+
+size_t
+my_caseup_mb(CHARSET_INFO * cs, const char *src, size_t srclen,
+             char *dst, size_t dstlen)
+{
+  DBUG_ASSERT(dstlen >= srclen * cs->cset->caseup_multiply(cs));
+  DBUG_ASSERT(src != dst || cs->cset->caseup_multiply(cs) == 1);
+  return my_casefold_mb(cs, src, srclen, dst, dstlen, cs->to_upper, 1);
+}
+
+
+/*
+  my_strcasecmp_mb() returns 0 if strings are equal, non-zero otherwise.
+ */
+
+int my_strcasecmp_mb(CHARSET_INFO * cs,const char *s, const char *t)
+{
+  register uint32 l;
+  register const uchar *map=cs->to_upper;
+  
+  while (*s && *t)
+  {
+    /* Pointing after the '\0' is safe here. */
+    if ((l=my_ismbchar(cs, s, s + cs->mbmaxlen)))
+    {
+      while (l--)
+        if (*s++ != *t++) 
+          return 1;
+    }
+    else if (my_ci_charlen(cs, (const uchar *) t, (const uchar *) t + cs->mbmaxlen) > 1)
+      return 1;
+    else if (map[(uchar) *s++] != map[(uchar) *t++])
+      return 1;
+  }
+  /* At least one of '*s' and '*t' is zero here. */
+  return (*t != *s);
+}
+
+
+/*
+** Compare string against string with wildcard
+**	0 if matched
+**	-1 if not matched with wildcard
+**	 1 if matched with wildcard
+*/
+
+#define INC_PTR(cs,A,B) A+=(my_ismbchar(cs,A,B) ? my_ismbchar(cs,A,B) : 1)
+
+#define likeconv(s,A) (uchar) (s)->sort_order[(uchar) (A)]
+
+static
+int my_wildcmp_mb_impl(CHARSET_INFO *cs,
+                       const char *str,const char *str_end,
+                       const char *wildstr,const char *wildend,
+                       int escape, int w_one, int w_many, int recurse_level)
+{
+  int result= -1;				/* Not found, using wildcards */
+
+  if (my_string_stack_guard && my_string_stack_guard(recurse_level))
+    return 1;
+  while (wildstr != wildend)
+  {
+    while (*wildstr != w_many && *wildstr != w_one)
+    {
+      int l;
+      if (*wildstr == escape && wildstr+1 != wildend)
+	wildstr++;
+      if ((l = my_ismbchar(cs, wildstr, wildend)))
+      {
+	  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
+	      return 1;
+	  str += l;
+	  wildstr += l;
+      }
+      else
+      if (str == str_end || likeconv(cs,*wildstr++) != likeconv(cs,*str++))
+	return(1);				/* No match */
+      if (wildstr == wildend)
+	return (str != str_end);		/* Match if both are at end */
+      result=1;					/* Found an anchor char */
+    }
+    if (*wildstr == w_one)
+    {
+      do
+      {
+	if (str == str_end)			/* Skip one char if possible */
+	  return (result);
+	INC_PTR(cs,str,str_end);
+      } while (++wildstr < wildend && *wildstr == w_one);
+      if (wildstr == wildend)
+	break;
+    }
+    if (*wildstr == w_many)
+    {						/* Found w_many */
+      uchar cmp;
+      const char* mb = wildstr;
+      int mb_len=0;
+      
+      wildstr++;
+      /* Remove any '%' and '_' from the wild search string */
+      for (; wildstr != wildend ; wildstr++)
+      {
+	if (*wildstr == w_many)
+	  continue;
+	if (*wildstr == w_one)
+	{
+	  if (str == str_end)
+	    return (-1);
+	  INC_PTR(cs,str,str_end);
+	  continue;
+	}
+	break;					/* Not a wild character */
+      }
+      if (wildstr == wildend)
+	return(0);				/* Ok if w_many is last */
+      if (str == str_end)
+	return -1;
+      
+      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
+	cmp= *++wildstr;
+	
+      mb=wildstr;
+      mb_len= my_ismbchar(cs, wildstr, wildend);
+      INC_PTR(cs,wildstr,wildend);		/* This is compared trough cmp */
+      cmp=likeconv(cs,cmp);   
+      do
+      {
+        for (;;)
+        {
+          if (str >= str_end)
+            return -1;
+          if (mb_len)
+          {
+            if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
+            {
+              str += mb_len;
+              break;
+            }
+          }
+          else if (!my_ismbchar(cs, str, str_end) &&
+                   likeconv(cs,*str) == cmp)
+          {
+            str++;
+            break;
+          }
+          INC_PTR(cs,str, str_end);
+        }
+	{
+	  int tmp=my_wildcmp_mb_impl(cs,str,str_end,wildstr,wildend,escape,w_one,
+                                     w_many, recurse_level + 1);
+	  if (tmp <= 0)
+	    return (tmp);
+	}
+      } while (str != str_end);
+      return(-1);
+    }
+  }
+  return (str != str_end ? 1 : 0);
+}
+
+int my_wildcmp_mb(CHARSET_INFO *cs,
+                  const char *str,const char *str_end,
+                  const char *wildstr,const char *wildend,
+                  int escape, int w_one, int w_many)
+{
+  return my_wildcmp_mb_impl(cs, str, str_end,
+                            wildstr, wildend,
+                            escape, w_one, w_many, 1);
+}
+
+
+size_t my_numchars_mb(CHARSET_INFO *cs __attribute__((unused)),
+		      const char *pos, const char *end)
+{
+  register size_t count= 0;
+  while (pos < end) 
+  {
+    uint mb_len;
+    pos+= (mb_len= my_ismbchar(cs,pos,end)) ? mb_len : 1;
+    count++;
+  }
+  return count;
+}
+
+
+size_t my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)),
+		     const char *pos, const char *end, size_t length)
+{
+  const char *start= pos;
+  
+  while (length && pos < end)
+  {
+    uint mb_len;
+    pos+= (mb_len= my_ismbchar(cs, pos, end)) ? mb_len : 1;
+    length--;
+  }
+  return (size_t) (length ? end+2-start : pos-start);
+}
+
+
+/*
+  Append a badly formed piece of string.
+  Bad bytes are fixed to '?'.
+  
+  @param to        The destination string
+  @param to_end    The end of the destination string
+  @param from      The source string
+  @param from_end  The end of the source string
+  @param nchars    Write not more than "nchars" characters.
+  @param status    Copying status, must be previously initialized,
+                   e.g. using well_formed_char_length() on the original
+                   full source string.
+*/
+static size_t
+my_append_fix_badly_formed_tail(CHARSET_INFO *cs,
+                                char *to, char *to_end,
+                                const char *from, const char *from_end,
+                                size_t nchars,
+                                MY_STRCOPY_STATUS *status)
+{
+  char *to0= to;
+
+  for ( ; nchars; nchars--)
+  {
+    int chlen;
+    if ((chlen= my_ci_charlen(cs, (const uchar*) from,
+                                  (const uchar *) from_end)) > 0)
+    {
+      /* Found a valid character */         /* chlen == 1..MBMAXLEN  */
+      DBUG_ASSERT(chlen <= (int) cs->mbmaxlen);
+      if (to + chlen > to_end)
+        goto end;                           /* Does not fit to "to" */
+      memcpy(to, from, (size_t) chlen);
+      from+= chlen;
+      to+= chlen;
+      continue;
+    }
+    if (chlen == MY_CS_ILSEQ)              /* chlen == 0 */
+    {
+      DBUG_ASSERT(from < from_end);  /* Shouldn't get MY_CS_ILSEQ if empty */
+      goto bad;
+    }
+    /* Got an incomplete character */       /* chlen == MY_CS_TOOSMALLXXX  */
+    DBUG_ASSERT(chlen >= MY_CS_TOOSMALL6); 
+    DBUG_ASSERT(chlen <= MY_CS_TOOSMALL);
+    if (from >= from_end)                   
+      break;                                /* End of the source string    */
+bad:
+    /* Bad byte sequence, or incomplete character found */
+    if (!status->m_well_formed_error_pos)
+      status->m_well_formed_error_pos= from;
+
+    if ((chlen= my_ci_wc_mb(cs, '?', (uchar*) to, (uchar *) to_end)) <= 0)
+      break; /* Question mark does not fit into the destination */
+    to+= chlen;
+    from++;
+  }
+end:
+  status->m_source_end_pos= from;
+  return to - to0;
+}
+
+
+size_t
+my_copy_fix_mb(CHARSET_INFO *cs,
+               char *dst, size_t dst_length,
+               const char *src, size_t src_length,
+               size_t nchars, MY_STRCOPY_STATUS *status)
+{
+  size_t well_formed_nchars;
+  size_t well_formed_length;
+  size_t fixed_length;
+  size_t min_length= MY_MIN(src_length, dst_length);
+
+  well_formed_nchars= my_ci_well_formed_char_length(cs, src, src + min_length,
+                                                        nchars, status);
+  DBUG_ASSERT(well_formed_nchars <= nchars);
+  well_formed_length= status->m_source_end_pos - src;
+  if (well_formed_length)
+    memmove(dst, src, well_formed_length);
+  if (!status->m_well_formed_error_pos)
+    return well_formed_length;
+
+  fixed_length= my_append_fix_badly_formed_tail(cs,
+                                                dst + well_formed_length,
+                                                dst + dst_length,
+                                                src + well_formed_length,
+                                                src + src_length,
+                                                nchars - well_formed_nchars,
+                                                status);
+  return well_formed_length + fixed_length;
+}
+
+
+uint my_instr_mb(CHARSET_INFO *cs,
+                 const char *b, size_t b_length, 
+                 const char *s, size_t s_length,
+                 my_match_t *match, uint nmatch)
+{
+  register const char *end, *b0;
+  int res= 0;
+  
+  if (s_length <= b_length)
+  {
+    if (!s_length)
+    {
+      if (nmatch)
+      {
+        match->beg= 0;
+        match->end= 0;
+        match->mb_len= 0;
+      }
+      return 1;		/* Empty string is always found */
+    }
+    
+    b0= b;
+    end= b+b_length-s_length+1;
+    
+    while (b < end)
+    {
+      int mb_len;
+      
+      if (!my_ci_strnncoll(cs, (const uchar *) b, s_length,
+                               (const uchar *) s, s_length, 0))
+      {
+        if (nmatch)
+        {
+          match[0].beg= 0;
+          match[0].end= (uint) (b-b0);
+          match[0].mb_len= res;
+          if (nmatch > 1)
+          {
+            match[1].beg= match[0].end;
+            match[1].end= (uint)(match[0].end+s_length);
+            match[1].mb_len= 0;	/* Not computed */
+          }
+        }
+        return 2;
+      }
+      mb_len= (mb_len= my_ismbchar(cs, b, end)) ? mb_len : 1;
+      b+= mb_len;
+      b_length-= mb_len;
+      res++;
+    }
+  }
+  return 0;
+}
+
+
+/*
+  Copy one non-ascii character.
+  "dst" must have enough room for the character.
+  Note, we don't use sort_order[] in this macros.
+  This is correct even for case insensitive collations:
+  - basic Latin letters are processed outside this macros;
+  - for other characters sort_order[x] is equal to x.
+*/
+#define my_strnxfrm_mb_non_ascii_char(cs, dst, src, se)                  \
+{                                                                        \
+  switch (my_ismbchar(cs, (const char *) src, (const char *) se)) {      \
+  case 4:                                                                \
+    *dst++= *src++;                                                      \
+    /* fall through */                                                   \
+  case 3:                                                                \
+    *dst++= *src++;                                                      \
+    /* fall through */                                                   \
+  case 2:                                                                \
+    *dst++= *src++;                                                      \
+    /* fall through */                                                   \
+  case 0:                                                                \
+    *dst++= *src++; /* byte in range 0x80..0xFF which is not MB head */  \
+  }                                                                      \
+}
+
+
+/*
+  For character sets with two or three byte multi-byte
+  characters having multibyte weights *equal* to their codes:
+  cp932, euckr, gb2312, sjis, eucjpms, ujis.
+*/
+size_t my_strnxfrm_mb_internal(CHARSET_INFO *cs, uchar *dst, uchar *de,
+                               uint *nweights, const uchar *src, size_t srclen)
+{
+  uchar *d0= dst;
+  const uchar *se= src + srclen;
+  const uchar *sort_order= cs->sort_order;
+
+  DBUG_ASSERT(cs->mbmaxlen <= 4);
+
+  /*
+    If "srclen" is smaller than both "dstlen" and "nweights"
+    then we can run a simplified loop -
+    without checking "nweights" and "de".
+  */
+  if (de >= d0 + srclen && *nweights >= srclen)
+  {
+    if (sort_order)
+    {
+      /* Optimized version for a case insensitive collation */
+      for (; src < se; (*nweights)--)
+      {
+        if (*src < 128) /* quickly catch ASCII characters */
+          *dst++= sort_order[*src++];
+        else
+          my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
+      }
+    }
+    else
+    {
+      /* Optimized version for a case sensitive collation (no sort_order) */
+      for (; src < se; (*nweights)--)
+      {
+        if (*src < 128) /* quickly catch ASCII characters */
+          *dst++= *src++;
+        else
+          my_strnxfrm_mb_non_ascii_char(cs, dst, src, se);
+      }
+    }
+    goto end;
+  }
+
+  /*
+    A thourough loop, checking all possible limits:
+    "se", "nweights" and "de".
+  */
+  for (; src < se && *nweights && dst < de; (*nweights)--)
+  {
+    int chlen;
+    if (*src < 128 || !(chlen= my_ismbchar(cs, (const char *) src,
+                                               (const char *) se)))
+    {
+      /* Single byte character */
+      *dst++= sort_order ? sort_order[*src++] : *src++;
+    }
+    else
+    {
+      /* Multi-byte character */
+      size_t len= (dst + chlen <= de) ? chlen : de - dst;
+      memcpy(dst, src, len);
+      dst+= len;
+      src+= len;
+    }
+  }
+
+end:
+  return dst - d0;
+}
+
+
+size_t
+my_strnxfrm_mb(CHARSET_INFO *cs,
+               uchar *dst, size_t dstlen, uint nweights,
+               const uchar *src, size_t srclen, uint flags)
+{
+  uchar *de= dst + dstlen;
+  uchar *d0= dst;
+  dst= d0 + my_strnxfrm_mb_internal(cs, dst, de, &nweights, src, srclen);
+  return my_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
+}
+
+
+size_t
+my_strnxfrm_mb_nopad(CHARSET_INFO *cs,
+                     uchar *dst, size_t dstlen, uint nweights,
+                     const uchar *src, size_t srclen, uint flags)
+{
+  uchar *de= dst + dstlen;
+  uchar *d0= dst;
+  dst= d0 + my_strnxfrm_mb_internal(cs, dst, de, &nweights, src, srclen);
+  return my_strxfrm_pad_desc_and_reverse_nopad(cs, d0, dst, de, nweights,
+                                               flags, 0);
+}
+
+
+int
+my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
+                     const char *s, const char *t)
+{
+  return strcmp(s,t);
+}
+
+
+
+void
+my_hash_sort_mb_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
+                          const uchar *key, size_t len,ulong *nr1, ulong *nr2)
+{
+  register ulong m1= *nr1, m2= *nr2;
+  const uchar *end= key + len;
+  for (; key < end ; key++)
+  {
+    MY_HASH_ADD(m1, m2, (uint)*key);
+  }
+  *nr1= m1;
+  *nr2= m2;
+}
+
+
+void
+my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
+                    const uchar *key, size_t len,ulong *nr1, ulong *nr2)
+{
+  /*
+     Remove trailing spaces. We have to do this to be able to compare
+    'A ' and 'A' as identical
+  */
+  const uchar *end= skip_trailing_space(key, len);
+  my_hash_sort_mb_nopad_bin(cs, key, end - key, nr1, nr2);
+}
+
+
+static inline size_t
+my_repeat_char_native(CHARSET_INFO *cs,
+                      uchar *dst, size_t dst_size, size_t nchars,
+                      my_wc_t native_code)
+{
+  uchar *dst0= dst;
+  uchar *dstend= dst + dst_size;
+  int chlen= my_ci_native_to_mb(cs, native_code, dst, dstend);
+  if (chlen < 1 /* Not enough space */ || !nchars)
+    return 0;
+  for (dst+= chlen, nchars--;
+       dst + chlen <= dstend && nchars > 0;
+       dst+= chlen, nchars--)
+    memcpy(dst, dst0, chlen);
+  return dst - dst0;
+}
+
+
+size_t my_min_str_mb_simple(CHARSET_INFO *cs,
+                            uchar *dst, size_t dst_size, size_t nchars)
+{
+  return my_repeat_char_native(cs, dst, dst_size, nchars, cs->min_sort_char);
+}
+
+
+size_t my_min_str_mb_simple_nopad(CHARSET_INFO *cs,
+                                  uchar *dst, size_t dst_size, size_t nchars)
+{
+  /* For NOPAD collations, the empty string is the smallest possible */
+  return 0;
+}
+
+
+size_t my_max_str_mb_simple(CHARSET_INFO *cs,
+                            uchar *dst, size_t dst_size, size_t nchars)
+{
+  return my_repeat_char_native(cs, dst, dst_size, nchars, cs->max_sort_char);
+}
+
+
+/* 
+  Fill the given buffer with 'maximum character' for given charset
+  SYNOPSIS
+      pad_max_char()
+      cs   Character set
+      str  Start of buffer to fill
+      end  End of buffer to fill
+
+  DESCRIPTION
+      Write max key:
+      - for non-Unicode character sets:
+        just bfill using max_sort_char if max_sort_char is one byte.
+        In case when max_sort_char is two bytes, fill with double-byte pairs
+        and optionally pad with a single space character.
+      - for Unicode character set (utf-8):
+        create a buffer with multibyte representation of the max_sort_char
+        character, and copy it into max_str in a loop. 
+*/
+static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
+{
+  char buf[10];
+  char buflen= my_ci_native_to_mb(cs, cs->max_sort_char, (uchar*) buf,
+                                      (uchar*) buf + sizeof(buf));
+  DBUG_ASSERT(buflen > 0);
+  do
+  {
+    if ((str + buflen) <= end)
+    {
+      /* Enough space for the character */
+      memcpy(str, buf, buflen);
+      str+= buflen;
+    }
+    else
+    {
+      /* 
+        There is no space for whole multibyte
+        character, then add trailing spaces.
+      */  
+      *str++= ' ';
+    }
+  } while (str < end);
+}
+
+/*
+** Calculate min_str and max_str that ranges a LIKE string.
+** Arguments:
+** ptr		Pointer to LIKE string.
+** ptr_length	Length of LIKE string.
+** escape	Escape character in LIKE.  (Normally '\').
+**		All escape characters should be removed from min_str and max_str
+** res_length	Length of min_str and max_str.
+** min_str	Smallest case sensitive string that ranges LIKE.
+**		Should be space padded to res_length.
+** max_str	Largest case sensitive string that ranges LIKE.
+**		Normally padded with the biggest character sort value.
+**
+** The function should return 0 if ok and 1 if the LIKE string can't be
+** optimized !
+*/
+
+my_bool my_like_range_mb(CHARSET_INFO *cs,
+			 const char *ptr,size_t ptr_length,
+			 pbool escape, pbool w_one, pbool w_many,
+			 size_t res_length,
+			 char *min_str,char *max_str,
+			 size_t *min_length,size_t *max_length)
+{
+  uint mb_len;
+  const char *end= ptr + ptr_length;
+  char *min_org= min_str;
+  char *min_end= min_str + res_length;
+  char *max_end= max_str + res_length;
+  size_t maxcharlen= res_length / cs->mbmaxlen;
+  const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0);
+
+  for (; ptr != end && min_str != min_end && maxcharlen ; maxcharlen--)
+  {
+    /* We assume here that escape, w_any, w_namy are one-byte characters */
+    if (*ptr == escape && ptr+1 != end)
+      ptr++;                                    /* Skip escape */
+    else if (*ptr == w_one || *ptr == w_many)   /* '_' and '%' in SQL */
+    {      
+fill_max_and_min:
+      /*
+        Calculate length of keys:
+        'a\0\0... is the smallest possible string when we have space expand
+        a\ff\ff... is the biggest possible string
+      */
+      *min_length= (cs->state & (MY_CS_BINSORT | MY_CS_NOPAD)) ?
+                    (size_t) (min_str - min_org) :
+                    res_length;
+      /* Create min key  */
+      do
+      {
+	*min_str++= (char) cs->min_sort_char;
+      } while (min_str != min_end);
+      
+      /* 
+        Write max key: create a buffer with multibyte
+        representation of the max_sort_char character,
+        and copy it into max_str in a loop. 
+      */
+      *max_length= res_length;
+      pad_max_char(cs, max_str, max_end);
+      return 0;
+    }
+    if ((mb_len= my_ismbchar(cs, ptr, end)) > 1)
+    {
+      if (ptr+mb_len > end || min_str+mb_len > min_end)
+        break;
+      while (mb_len--)
+       *min_str++= *max_str++= *ptr++;
+    }
+    else
+    {
+      /*
+        Special case for collations with contractions.
+        For example, in Chezh, 'ch' is a separate letter
+        which is sorted between 'h' and 'i'.
+        If the pattern 'abc%', 'c' at the end can mean:
+        - letter 'c' itself,
+        - beginning of the contraction 'ch'.
+
+        If we simply return this LIKE range:
+
+         'abc\min\min\min' and 'abc\max\max\max'
+
+        then this query: SELECT * FROM t1 WHERE a LIKE 'abc%'
+        will only find values starting from 'abc[^h]',
+        but won't find values starting from 'abch'.
+
+        We must ignore contraction heads followed by w_one or w_many.
+        ('Contraction head' means any letter which can be the first
+        letter in a contraction)
+
+        For example, for Czech 'abc%', we will return LIKE range,
+        which is equal to LIKE range for 'ab%':
+
+        'ab\min\min\min\min' and 'ab\max\max\max\max'.
+
+      */
+      if (contractions && ptr + 1 < end &&
+          my_uca_can_be_contraction_head(contractions, (uchar) *ptr))
+      {
+        /* Ptr[0] is a contraction head. */
+        
+        if (ptr[1] == w_one || ptr[1] == w_many)
+        {
+          /* Contraction head followed by a wildcard, quit. */
+          goto fill_max_and_min;
+        }
+        
+        /*
+          Some letters can be both contraction heads and contraction tails.
+          For example, in Danish 'aa' is a separate single letter which
+          is sorted after 'z'. So 'a' can be both head and tail.
+          
+          If ptr[0]+ptr[1] is a contraction,
+          then put both letters together.
+          
+          If ptr[1] can be a contraction part, but ptr[0]+ptr[1]
+          is not a contraction, then we put only ptr[0],
+          and continue with ptr[1] on the next loop.
+        */
+        if (my_uca_can_be_contraction_tail(contractions, (uchar) ptr[1]) &&
+            my_uca_contraction2_weight(contractions, (uchar) ptr[0], ptr[1]))
+        {
+          /* Contraction found */
+          if (maxcharlen == 1 || min_str + 1 >= min_end)
+          {
+            /* Both contraction parts don't fit, quit */
+            goto fill_max_and_min;
+          }
+          
+          /* Put contraction head */
+          *min_str++= *max_str++= *ptr++;
+          maxcharlen--;
+        }
+      }
+      /* Put contraction tail, or a single character */
+      *min_str++= *max_str++= *ptr++;    
+    }
+  }
+
+  *min_length= *max_length = (size_t) (min_str - min_org);
+  while (min_str != min_end)
+    *min_str++= *max_str++= ' ';           /* Because if key compression */
+  return 0;
+}
+
+
+/**
+   Calculate min_str and max_str that ranges a LIKE string.
+   Generic function, currently used for ucs2, utf16, utf32,
+   but should be suitable for any other character sets with
+   cs->min_sort_char and cs->max_sort_char represented in
+   Unicode code points.
+
+   @param cs           Character set and collation pointer
+   @param ptr          Pointer to LIKE pattern.
+   @param ptr_length   Length of LIKE pattern.
+   @param escape       Escape character pattern,  typically '\'.
+   @param w_one        'One character' pattern,   typically '_'.
+   @param w_many       'Many characters' pattern, typically '%'.
+   @param res_length   Length of min_str and max_str.
+
+   @param[out] min_str Smallest string that ranges LIKE.
+   @param[out] max_str Largest string that ranges LIKE.
+   @param[out] min_len Length of min_str
+   @param[out] max_len Length of max_str
+
+   @return Optimization status.
+   @retval FALSE if LIKE pattern can be optimized
+   @rerval TRUE if LIKE can't be optimized.
+*/
+my_bool
+my_like_range_generic(CHARSET_INFO *cs,
+                      const char *ptr, size_t ptr_length,
+                      pbool escape, pbool w_one, pbool w_many,
+                      size_t res_length,
+                      char *min_str,char *max_str,
+                      size_t *min_length,size_t *max_length)
+{
+  const char *end= ptr + ptr_length;
+  const char *min_org= min_str;
+  const char *max_org= max_str;
+  char *min_end= min_str + res_length;
+  char *max_end= max_str + res_length;
+  size_t charlen= res_length / cs->mbmaxlen;
+  size_t res_length_diff;
+  const MY_CONTRACTIONS *contractions= my_charset_get_contractions(cs, 0);
+
+  for ( ; charlen > 0; charlen--)
+  {
+    my_wc_t wc, wc2;
+    int res;
+    if ((res= my_ci_mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
+    {
+      if (res == MY_CS_ILSEQ) /* Bad sequence */
+        return TRUE; /* min_length and max_length are not important */
+      break; /* End of the string */
+    }
+    ptr+= res;
+
+    if (wc == (my_wc_t) escape)
+    {
+      if ((res= my_ci_mb_wc(cs, &wc, (uchar*) ptr, (uchar*) end)) <= 0)
+      {
+        if (res == MY_CS_ILSEQ)
+          return TRUE; /* min_length and max_length are not important */
+        /*
+           End of the string: Escape is the last character.
+           Put escape as a normal character.
+           We'll will leave the loop on the next iteration.
+        */
+      }
+      else
+        ptr+= res;
+
+      /* Put escape character to min_str and max_str  */
+      if ((res= my_ci_wc_mb(cs, wc, (uchar*) min_str, (uchar*) min_end)) <= 0)
+        goto pad_set_lengths; /* No space */
+      min_str+= res;
+
+      if ((res= my_ci_wc_mb(cs, wc, (uchar*) max_str, (uchar*) max_end)) <= 0)
+        goto pad_set_lengths; /* No space */
+      max_str+= res;
+      continue;
+    }
+    else if (wc == (my_wc_t) w_one)
+    {
+      if ((res= my_ci_wc_mb(cs, cs->min_sort_char,
+                             (uchar*) min_str, (uchar*) min_end)) <= 0)
+        goto pad_set_lengths;
+      min_str+= res;
+
+      if ((res= my_ci_wc_mb(cs, cs->max_sort_char,
+                             (uchar*) max_str, (uchar*) max_end)) <= 0)
+        goto pad_set_lengths;
+      max_str+= res;
+      continue;
+    }
+    else if (wc == (my_wc_t) w_many)
+    {
+      /*
+        Calculate length of keys:
+        a\min\min... is the smallest possible string
+        a\max\max... is the biggest possible string
+      */
+      *min_length= (cs->state & (MY_CS_BINSORT | MY_CS_NOPAD)) ?
+                    (size_t) (min_str - min_org) :
+                    res_length;
+      *max_length= res_length;
+      goto pad_min_max;
+    }
+
+    if (contractions &&
+        my_uca_can_be_contraction_head(contractions, wc) &&
+        (res= my_ci_mb_wc(cs, &wc2, (uchar*) ptr, (uchar*) end)) > 0)
+    {
+      const uint16 *weight;
+      if ((wc2 == (my_wc_t) w_one || wc2 == (my_wc_t) w_many))
+      {
+        /* Contraction head followed by a wildcard */
+        *min_length= *max_length= res_length;
+        goto pad_min_max;
+      }
+
+      if (my_uca_can_be_contraction_tail(contractions, wc2) &&
+          (weight= my_uca_contraction2_weight(contractions, wc, wc2)) && weight[0])
+      {
+        /* Contraction found */
+        if (charlen == 1)
+        {
+          /* contraction does not fit to result */
+          *min_length= *max_length= res_length;
+          goto pad_min_max;
+        }
+
+        ptr+= res;
+        charlen--;
+
+        /* Put contraction head */
+        if ((res= my_ci_wc_mb(cs, wc, (uchar*) min_str, (uchar*) min_end)) <= 0)
+          goto pad_set_lengths;
+        min_str+= res;
+
+        if ((res= my_ci_wc_mb(cs, wc, (uchar*) max_str, (uchar*) max_end)) <= 0)
+          goto pad_set_lengths;
+        max_str+= res;
+        wc= wc2; /* Prepare to put contraction tail */
+      }
+    }
+
+    /* Normal character, or contraction tail */
+    if ((res= my_ci_wc_mb(cs, wc, (uchar*) min_str, (uchar*) min_end)) <= 0)
+      goto pad_set_lengths;
+    min_str+= res;
+    if ((res= my_ci_wc_mb(cs, wc, (uchar*) max_str, (uchar*) max_end)) <= 0)
+      goto pad_set_lengths;
+    max_str+= res;
+  }
+
+pad_set_lengths:
+  *min_length= (size_t) (min_str - min_org);
+  *max_length= (size_t) (max_str - max_org);
+
+pad_min_max:
+  /*
+    Fill up max_str and min_str to res_length.
+    fill() cannot set incomplete characters and
+    requires that "length" argument is divisible to mbminlen.
+    Make sure to call fill() with proper "length" argument.
+  */
+  res_length_diff= res_length % cs->mbminlen;
+  my_ci_fill(cs, min_str, min_end - min_str - res_length_diff,
+                 cs->min_sort_char);
+  my_ci_fill(cs, max_str, max_end - max_str - res_length_diff,
+                 cs->max_sort_char);
+
+  /* In case of incomplete characters set the remainder to 0x00's */
+  if (res_length_diff)
+  {
+    /* Example: odd res_length for ucs2 */
+    memset(min_end - res_length_diff, 0, res_length_diff);
+    memset(max_end - res_length_diff, 0, res_length_diff);
+  }
+  return FALSE;
+}
+
+
+static int my_wildcmp_mb_bin_impl(CHARSET_INFO *cs,
+                                  const char *str,const char *str_end,
+                                  const char *wildstr,const char *wildend,
+                                  int escape, int w_one, int w_many, int recurse_level)
+{
+  int result= -1;				/* Not found, using wildcards */
+
+  if (my_string_stack_guard && my_string_stack_guard(recurse_level))
+    return 1;
+  while (wildstr != wildend)
+  {
+    while (*wildstr != w_many && *wildstr != w_one)
+    {
+      int l;
+      if (*wildstr == escape && wildstr+1 != wildend)
+	wildstr++;
+      if ((l = my_ismbchar(cs, wildstr, wildend)))
+      {
+	  if (str+l > str_end || memcmp(str, wildstr, l) != 0)
+	      return 1;
+	  str += l;
+	  wildstr += l;
+      }
+      else
+      if (str == str_end || *wildstr++ != *str++)
+	return(1);				/* No match */
+      if (wildstr == wildend)
+	return (str != str_end);		/* Match if both are at end */
+      result=1;					/* Found an anchor char */
+    }
+    if (*wildstr == w_one)
+    {
+      do
+      {
+	if (str == str_end)			/* Skip one char if possible */
+	  return (result);
+	INC_PTR(cs,str,str_end);
+      } while (++wildstr < wildend && *wildstr == w_one);
+      if (wildstr == wildend)
+	break;
+    }
+    if (*wildstr == w_many)
+    {						/* Found w_many */
+      int cmp;
+      const char* mb = wildstr;
+      int mb_len=0;
+      
+      wildstr++;
+      /* Remove any '%' and '_' from the wild search string */
+      for (; wildstr != wildend ; wildstr++)
+      {
+	if (*wildstr == w_many)
+	  continue;
+	if (*wildstr == w_one)
+	{
+	  if (str == str_end)
+	    return (-1);
+	  INC_PTR(cs,str,str_end);
+	  continue;
+	}
+	break;					/* Not a wild character */
+      }
+      if (wildstr == wildend)
+	return(0);				/* Ok if w_many is last */
+      if (str == str_end)
+	return -1;
+      
+      if ((cmp= *wildstr) == escape && wildstr+1 != wildend)
+	cmp= *++wildstr;
+	
+      mb=wildstr;
+      mb_len= my_ismbchar(cs, wildstr, wildend);
+      INC_PTR(cs,wildstr,wildend);		/* This is compared trough cmp */
+      do
+      {
+        for (;;)
+        {
+          if (str >= str_end)
+            return -1;
+          if (mb_len)
+          {
+            if (str+mb_len <= str_end && memcmp(str, mb, mb_len) == 0)
+            {
+              str += mb_len;
+              break;
+            }
+          }
+          else if (!my_ismbchar(cs, str, str_end) && *str == cmp)
+          {
+            str++;
+            break;
+          }
+          INC_PTR(cs,str, str_end);
+        }
+	{
+	  int tmp=my_wildcmp_mb_bin_impl(cs,str,str_end,
+                                         wildstr,wildend,escape,
+                                         w_one,w_many, recurse_level+1);
+	  if (tmp <= 0)
+	    return (tmp);
+	}
+      } while (str != str_end);
+      return(-1);
+    }
+  }
+  return (str != str_end ? 1 : 0);
+}
+
+int
+my_wildcmp_mb_bin(CHARSET_INFO *cs,
+                  const char *str,const char *str_end,
+                  const char *wildstr,const char *wildend,
+                  int escape, int w_one, int w_many)
+{
+  return my_wildcmp_mb_bin_impl(cs, str, str_end,
+                                wildstr, wildend,
+                                escape, w_one, w_many, 1);
+}
+
+
+/*
+  Data was produced from EastAsianWidth.txt 
+  using utt11-dump utility.
+*/
+static const char pg11[256]=
+{
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+static const char pg23[256]=
+{
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+static const char pg2E[256]=
+{
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+static const char pg2F[256]=
+{
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0
+};
+
+static const char pg30[256]=
+{
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
+0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+
+static const char pg31[256]=
+{
+0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+};
+
+static const char pg32[256]=
+{
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0
+};
+
+static const char pg4D[256]=
+{
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+static const char pg9F[256]=
+{
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+static const char pgA4[256]=
+{
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+static const char pgD7[256]=
+{
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+static const char pgFA[256]=
+{
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+static const char pgFE[256]=
+{
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+static const char pgFF[256]=
+{
+0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+static const struct {int page; const char *p;} utr11_data[256]=
+{
+{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
+{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
+{0,NULL},{0,pg11},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
+{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
+{0,NULL},{0,NULL},{0,NULL},{0,pg23},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
+{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,pg2E},{0,pg2F},
+{0,pg30},{0,pg31},{0,pg32},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg4D},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pg9F},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgA4},{0,NULL},{0,NULL},{0,NULL},
+{0,NULL},{0,NULL},{0,NULL},{0,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},
+{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{1,NULL},{0,pgD7},
+{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
+{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
+{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
+{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},{0,NULL},
+{0,NULL},{1,NULL},{0,pgFA},{0,NULL},{0,NULL},{0,NULL},{0,pgFE},{0,pgFF}
+};
+
+
+size_t my_numcells_mb(CHARSET_INFO *cs, const char *b, const char *e)
+{
+  my_wc_t wc;
+  size_t clen= 0;
+  
+  while (b < e)
+  {
+    int mb_len;
+    uint pg;
+    if ((mb_len= my_ci_mb_wc(cs, &wc, (uchar*) b, (uchar*) e)) <= 0)
+    {
+      mb_len= 1; /* Let's think a wrong sequence takes 1 dysplay cell */
+      b++;
+      continue;
+    }
+    b+= mb_len;
+    if (wc > 0xFFFF)
+    {
+      if (wc >= 0x20000 && wc <= 0x3FFFD) /* CJK Ideograph Extension B, C */
+        clen+= 1;
+    }
+    else
+    {
+      pg= (wc >> 8) & 0xFF;
+      clen+= utr11_data[pg].p ? utr11_data[pg].p[wc & 0xFF] : utr11_data[pg].page;
+    }
+    clen++;
+  }
+  return clen;
+}
+
+
+int my_mb_ctype_mb(CHARSET_INFO *cs, int *ctype,
+                   const uchar *s, const uchar *e)
+{
+  my_wc_t wc;
+  int res= my_ci_mb_wc(cs, &wc, s, e);
+  if (res <= 0 || wc > 0xFFFF)
+    *ctype= 0;
+  else
+    *ctype= my_uni_ctype[wc>>8].ctype ?
+            my_uni_ctype[wc>>8].ctype[wc&0xFF] :
+            my_uni_ctype[wc>>8].pctype;    
+  return res;
+}
+
+
+#endif