diff options
Diffstat (limited to 'strings/ctype-ucs2.c')
-rw-r--r-- | strings/ctype-ucs2.c | 3509 |
1 files changed, 3509 insertions, 0 deletions
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c new file mode 100644 index 00000000..5d67762a --- /dev/null +++ b/strings/ctype-ucs2.c @@ -0,0 +1,3509 @@ +/* Copyright (c) 2003, 2013, Oracle and/or its affiliates + Copyright (c) 2009, 2020, MariaDB + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public + License as published by the Free Software Foundation; version 2 + of the License. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with this library; if not, write to the Free + Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, + MA 02110-1335 USA */ + +/* UCS2 support. Written by Alexander Barkov <bar@mysql.com> */ + +#include "strings_def.h" +#include <m_ctype.h> +#include "ctype-mb.h" +#include <my_sys.h> +#include <stdarg.h> + +#include "ctype-unidata.h" + + +#if defined(HAVE_CHARSET_utf16) || defined(HAVE_CHARSET_ucs2) +#define HAVE_CHARSET_mb2 +#endif + + +#if defined(HAVE_CHARSET_mb2) || defined(HAVE_CHARSET_utf32) +#define HAVE_CHARSET_mb2_or_mb4 +#endif + +#ifndef EILSEQ +#define EILSEQ ENOENT +#endif + +#undef ULONGLONG_MAX +#define ULONGLONG_MAX (~(ulonglong) 0) +#define MAX_NEGATIVE_NUMBER ((ulonglong) 0x8000000000000000LL) +#define INIT_CNT 9 +#define LFACTOR 1000000000ULL +#define LFACTOR1 10000000000ULL +#define LFACTOR2 100000000000ULL + +#if defined(HAVE_CHARSET_utf32) || defined(HAVE_CHARSET_mb2) +static unsigned long lfactor[9]= +{ 1L, 10L, 100L, 1000L, 10000L, 100000L, 1000000L, 10000000L, 100000000L }; +#endif + + +#ifdef HAVE_CHARSET_mb2_or_mb4 +static size_t +my_caseup_str_mb2_or_mb4(CHARSET_INFO * cs __attribute__((unused)), + char * s __attribute__((unused))) +{ + DBUG_ASSERT(0); + return 0; +} + + +static size_t +my_casedn_str_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)), + char * s __attribute__((unused))) +{ + DBUG_ASSERT(0); + return 0; +} + + +static int +my_strcasecmp_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)), + const char *s __attribute__((unused)), + const char *t __attribute__((unused))) +{ + DBUG_ASSERT(0); + return 0; +} + + +typedef enum +{ + MY_CHAR_COPY_OK= 0, /* The character was Okey */ + MY_CHAR_COPY_ERROR= 1, /* The character was not Ok, and could not fix */ + MY_CHAR_COPY_FIXED= 2 /* The character was not Ok, was fixed to '?' */ +} my_char_copy_status_t; + + +/* + Copies an incomplete character, lef-padding it with 0x00 bytes. + + @param cs Character set + @param dst The destination string + @param dst_length Space available in dst + @param src The source string + @param src_length Length of src + @param nchars Copy not more than nchars characters. + The "nchars" parameter of the caller. + Only 0 and non-0 are important here. + @param fix What to do if after zero-padding didn't get a valid + character: + - FALSE - exit with error. + - TRUE - try to put '?' instead. + + @return MY_CHAR_COPY_OK if after zero-padding got a valid character. + cs->mbmaxlen bytes were written to "dst". + @return MY_CHAR_COPY_FIXED if after zero-padding did not get a valid + character, but wrote '?' to the destination + string instead. + cs->mbminlen bytes were written to "dst". + @return MY_CHAR_COPY_ERROR If failed and nothing was written to "dst". + Possible reasons: + - dst_length was too short + - nchars was 0 + - the character after padding appeared not + to be valid, and could not fix it to '?'. +*/ +static my_char_copy_status_t +my_copy_incomplete_char(CHARSET_INFO *cs, + char *dst, size_t dst_length, + const char *src, size_t src_length, + size_t nchars, my_bool fix) +{ + size_t pad_length; + size_t src_offset= src_length % cs->mbminlen; + if (dst_length < cs->mbminlen || !nchars) + return MY_CHAR_COPY_ERROR; + + pad_length= cs->mbminlen - src_offset; + bzero(dst, pad_length); + memmove(dst + pad_length, src, src_offset); + /* + In some cases left zero-padding can create an incorrect character. + For example: + INSERT INTO t1 (utf32_column) VALUES (0x110000); + We'll pad the value to 0x00110000, which is a wrong UTF32 sequence! + The valid characters range is limited to 0x00000000..0x0010FFFF. + + Make sure we didn't pad to an incorrect character. + */ + if (my_ci_charlen(cs, (uchar *) dst, (uchar *) dst + cs->mbminlen) == + (int) cs->mbminlen) + return MY_CHAR_COPY_OK; + + if (fix && + my_ci_wc_mb(cs, '?', (uchar *) dst, (uchar *) dst + cs->mbminlen) == + (int) cs->mbminlen) + return MY_CHAR_COPY_FIXED; + + return MY_CHAR_COPY_ERROR; +} + + +/* + Copy an UCS2/UTF16/UTF32 string, fix bad characters. +*/ +static size_t +my_copy_fix_mb2_or_mb4(CHARSET_INFO *cs, + char *dst, size_t dst_length, + const char *src, size_t src_length, + size_t nchars, MY_STRCOPY_STATUS *status) +{ + size_t length2, src_offset= src_length % cs->mbminlen; + my_char_copy_status_t padstatus; + + if (!src_offset) + return my_copy_fix_mb(cs, dst, dst_length, + src, src_length, nchars, status); + if ((padstatus= my_copy_incomplete_char(cs, dst, dst_length, + src, src_length, nchars, TRUE)) == + MY_CHAR_COPY_ERROR) + { + status->m_source_end_pos= status->m_well_formed_error_pos= src; + return 0; + } + length2= my_copy_fix_mb(cs, dst + cs->mbminlen, dst_length - cs->mbminlen, + src + src_offset, src_length - src_offset, + nchars - 1, status); + if (padstatus == MY_CHAR_COPY_FIXED) + status->m_well_formed_error_pos= src; + return cs->mbminlen /* The left-padded character */ + length2; +} + + +static long +my_strntol_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t l, int base, + char **endptr, int *err) +{ + int negative= 0; + int overflow; + int cnv; + my_wc_t wc; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; + register unsigned int cutlim; + register uint32 cutoff; + register uint32 res; + register const uchar *s= (const uchar*) nptr; + register const uchar *e= (const uchar*) nptr+l; + const uchar *save; + + *err= 0; + do + { + if ((cnv= mb_wc(cs, &wc, s, e)) > 0) + { + switch (wc) + { + case ' ' : break; + case '\t': break; + case '-' : negative= !negative; break; + case '+' : break; + default : goto bs; + } + } + else /* No more characters or bad multibyte sequence */ + { + if (endptr != NULL ) + *endptr= (char*) s; + err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM; + return 0; + } + s+= cnv; + } while (1); + +bs: + + overflow= 0; + res= 0; + save= s; + cutoff= ((uint32)~0L) / (uint32) base; + cutlim= (uint) (((uint32)~0L) % (uint32) base); + + do { + if ((cnv= mb_wc(cs, &wc, s, e)) > 0) + { + s+= cnv; + if (wc >= '0' && wc <= '9') + wc-= '0'; + else if (wc >= 'A' && wc <= 'Z') + wc= wc - 'A' + 10; + else if (wc >= 'a' && wc <= 'z') + wc= wc - 'a' + 10; + else + break; + if ((int)wc >= base) + break; + if (res > cutoff || (res == cutoff && wc > cutlim)) + overflow= 1; + else + { + res*= (uint32) base; + res+= wc; + } + } + else if (cnv == MY_CS_ILSEQ) + { + if (endptr !=NULL ) + *endptr = (char*) s; + err[0]= EILSEQ; + return 0; + } + else + { + /* No more characters */ + break; + } + } while(1); + + if (endptr != NULL) + *endptr = (char *) s; + + if (s == save) + { + err[0]= EDOM; + return 0L; + } + + if (negative) + { + if (res > (uint32) INT_MIN32) + overflow= 1; + } + else if (res > INT_MAX32) + overflow= 1; + + if (overflow) + { + err[0]= ERANGE; + return negative ? INT_MIN32 : INT_MAX32; + } + + return (negative ? -((long) res) : (long) res); +} + + +static ulong +my_strntoul_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t l, int base, + char **endptr, int *err) +{ + int negative= 0; + int overflow; + int cnv; + my_wc_t wc; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; + register unsigned int cutlim; + register uint32 cutoff; + register uint32 res; + register const uchar *s= (const uchar*) nptr; + register const uchar *e= (const uchar*) nptr + l; + const uchar *save; + + *err= 0; + do + { + if ((cnv= mb_wc(cs, &wc, s, e)) > 0) + { + switch (wc) + { + case ' ' : break; + case '\t': break; + case '-' : negative= !negative; break; + case '+' : break; + default : goto bs; + } + } + else /* No more characters or bad multibyte sequence */ + { + if (endptr !=NULL ) + *endptr= (char*)s; + err[0]= (cnv == MY_CS_ILSEQ) ? EILSEQ : EDOM; + return 0; + } + s+= cnv; + } while (1); + +bs: + + overflow= 0; + res= 0; + save= s; + cutoff= ((uint32)~0L) / (uint32) base; + cutlim= (uint) (((uint32)~0L) % (uint32) base); + + do + { + if ((cnv= mb_wc(cs, &wc, s, e)) > 0) + { + s+= cnv; + if (wc >= '0' && wc <= '9') + wc-= '0'; + else if (wc >= 'A' && wc <= 'Z') + wc= wc - 'A' + 10; + else if (wc >= 'a' && wc <= 'z') + wc= wc - 'a' + 10; + else + break; + if ((int) wc >= base) + break; + if (res > cutoff || (res == cutoff && wc > cutlim)) + overflow = 1; + else + { + res*= (uint32) base; + res+= wc; + } + } + else if (cnv == MY_CS_ILSEQ) + { + if (endptr != NULL ) + *endptr= (char*)s; + err[0]= EILSEQ; + return 0; + } + else + { + /* No more characters */ + break; + } + } while(1); + + if (endptr != NULL) + *endptr= (char *) s; + + if (s == save) + { + err[0]= EDOM; + return 0L; + } + + if (overflow) + { + err[0]= (ERANGE); + return (~(uint32) 0); + } + + return (negative ? -((long) res) : (long) res); +} + + +static longlong +my_strntoll_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t l, int base, + char **endptr, int *err) +{ + int negative=0; + int overflow; + int cnv; + my_wc_t wc; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; + register ulonglong cutoff; + register unsigned int cutlim; + register ulonglong res; + register const uchar *s= (const uchar*) nptr; + register const uchar *e= (const uchar*) nptr+l; + const uchar *save; + + *err= 0; + do + { + if ((cnv= mb_wc(cs, &wc, s, e)) > 0) + { + switch (wc) + { + case ' ' : break; + case '\t': break; + case '-' : negative= !negative; break; + case '+' : break; + default : goto bs; + } + } + else /* No more characters or bad multibyte sequence */ + { + if (endptr !=NULL ) + *endptr = (char*)s; + err[0] = (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM; + return 0; + } + s+=cnv; + } while (1); + +bs: + + overflow = 0; + res = 0; + save = s; + cutoff = (~(ulonglong) 0) / (unsigned long int) base; + cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base); + + do { + if ((cnv= mb_wc(cs, &wc, s, e)) > 0) + { + s+=cnv; + if ( wc>='0' && wc<='9') + wc -= '0'; + else if ( wc>='A' && wc<='Z') + wc = wc - 'A' + 10; + else if ( wc>='a' && wc<='z') + wc = wc - 'a' + 10; + else + break; + if ((int)wc >= base) + break; + if (res > cutoff || (res == cutoff && wc > cutlim)) + overflow = 1; + else + { + res *= (ulonglong) base; + res += wc; + } + } + else if (cnv==MY_CS_ILSEQ) + { + if (endptr !=NULL ) + *endptr = (char*)s; + err[0]=EILSEQ; + return 0; + } + else + { + /* No more characters */ + break; + } + } while(1); + + if (endptr != NULL) + *endptr = (char *) s; + + if (s == save) + { + err[0]=EDOM; + return 0L; + } + + if (negative) + { + if (res > (ulonglong) LONGLONG_MIN) + overflow = 1; + } + else if (res > (ulonglong) LONGLONG_MAX) + overflow = 1; + + if (overflow) + { + err[0]=ERANGE; + return negative ? LONGLONG_MIN : LONGLONG_MAX; + } + + return (negative ? -((longlong)res) : (longlong)res); +} + + +static ulonglong +my_strntoull_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t l, int base, + char **endptr, int *err) +{ + int negative= 0; + int overflow; + int cnv; + my_wc_t wc; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; + register ulonglong cutoff; + register unsigned int cutlim; + register ulonglong res; + register const uchar *s= (const uchar*) nptr; + register const uchar *e= (const uchar*) nptr + l; + const uchar *save; + + *err= 0; + do + { + if ((cnv= mb_wc(cs, &wc, s, e)) > 0) + { + switch (wc) + { + case ' ' : break; + case '\t': break; + case '-' : negative= !negative; break; + case '+' : break; + default : goto bs; + } + } + else /* No more characters or bad multibyte sequence */ + { + if (endptr !=NULL ) + *endptr = (char*)s; + err[0]= (cnv==MY_CS_ILSEQ) ? EILSEQ : EDOM; + return 0; + } + s+=cnv; + } while (1); + +bs: + + overflow = 0; + res = 0; + save = s; + cutoff = (~(ulonglong) 0) / (unsigned long int) base; + cutlim = (uint) ((~(ulonglong) 0) % (unsigned long int) base); + + do + { + if ((cnv= mb_wc(cs, &wc, s, e)) > 0) + { + s+=cnv; + if ( wc>='0' && wc<='9') + wc -= '0'; + else if ( wc>='A' && wc<='Z') + wc = wc - 'A' + 10; + else if ( wc>='a' && wc<='z') + wc = wc - 'a' + 10; + else + break; + if ((int)wc >= base) + break; + if (res > cutoff || (res == cutoff && wc > cutlim)) + overflow = 1; + else + { + res *= (ulonglong) base; + res += wc; + } + } + else if (cnv==MY_CS_ILSEQ) + { + if (endptr !=NULL ) + *endptr = (char*)s; + err[0]= EILSEQ; + return 0; + } + else + { + /* No more characters */ + break; + } + } while(1); + + if (endptr != NULL) + *endptr = (char *) s; + + if (s == save) + { + err[0]= EDOM; + return 0L; + } + + if (overflow) + { + err[0]= ERANGE; + return (~(ulonglong) 0); + } + + return (negative ? -((longlong) res) : (longlong) res); +} + + +static double +my_strntod_mb2_or_mb4(CHARSET_INFO *cs, + char *nptr, size_t length, + char **endptr, int *err) +{ + char buf[256]; + double res; + register char *b= buf; + register const uchar *s= (const uchar*) nptr; + const uchar *end; + my_wc_t wc; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; + int cnv; + + *err= 0; + /* Cut too long strings */ + if (length >= sizeof(buf)) + length= sizeof(buf) - 1; + end= s + length; + + while ((cnv= mb_wc(cs, &wc, s, end)) > 0) + { + s+= cnv; + if (wc > (int) (uchar) 'e' || !wc) + break; /* Can't be part of double */ + *b++= (char) wc; + } + + *endptr= b; + res= my_strtod(buf, endptr, err); + *endptr= nptr + cs->mbminlen * (size_t) (*endptr - buf); + return res; +} + + +static ulonglong +my_strntoull10rnd_mb2_or_mb4(CHARSET_INFO *cs, + const char *nptr, size_t length, + int unsign_fl, + char **endptr, int *err) +{ + char buf[256], *b= buf; + ulonglong res; + const uchar *end, *s= (const uchar*) nptr; + my_wc_t wc; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; + int cnv; + + /* Cut too long strings */ + if (length >= sizeof(buf)) + length= sizeof(buf)-1; + end= s + length; + + while ((cnv= mb_wc(cs, &wc, s, end)) > 0) + { + s+= cnv; + if (wc > (int) (uchar) 'e' || !wc) + break; /* Can't be a number part */ + *b++= (char) wc; + } + + res= my_strntoull10rnd_8bit(cs, buf, b - buf, unsign_fl, endptr, err); + *endptr= (char*) nptr + cs->mbminlen * (size_t) (*endptr - buf); + return res; +} + + +/* + This is a fast version optimized for the case of radix 10 / -10 +*/ + +static size_t +my_l10tostr_mb2_or_mb4(CHARSET_INFO *cs, + char *dst, size_t len, int radix, long int val) +{ + char buffer[66]; + register char *p, *db, *de; + long int new_val; + int sl= 0; + unsigned long int uval = (unsigned long int) val; + + p= &buffer[sizeof(buffer) - 1]; + *p= '\0'; + + if (radix < 0) + { + if (val < 0) + { + sl= 1; + /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */ + uval = (unsigned long int)0 - uval; + } + } + + new_val = (long) (uval / 10); + *--p = '0'+ (char) (uval - (unsigned long) new_val * 10); + val= new_val; + + while (val != 0) + { + new_val= val / 10; + *--p= '0' + (char) (val - new_val * 10); + val= new_val; + } + + if (sl) + { + *--p= '-'; + } + + for ( db= dst, de= dst + len ; (dst < de) && *p ; p++) + { + int cnvres= my_ci_wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de); + if (cnvres > 0) + dst+= cnvres; + else + break; + } + return (int) (dst - db); +} + + +static size_t +my_ll10tostr_mb2_or_mb4(CHARSET_INFO *cs, + char *dst, size_t len, int radix, longlong val) +{ + char buffer[65]; + register char *p, *db, *de; + long long_val; + int sl= 0; + ulonglong uval= (ulonglong) val; + + if (radix < 0) + { + if (val < 0) + { + sl= 1; + /* Avoid integer overflow in (-val) for LONGLONG_MIN (BUG#31799). */ + uval = (ulonglong)0 - uval; + } + } + + p= &buffer[sizeof(buffer)-1]; + *p='\0'; + + if (uval == 0) + { + *--p= '0'; + goto cnv; + } + + while (uval > (ulonglong) LONG_MAX) + { + ulonglong quo= uval/(uint) 10; + uint rem= (uint) (uval- quo* (uint) 10); + *--p= '0' + rem; + uval= quo; + } + + long_val= (long) uval; + while (long_val != 0) + { + long quo= long_val/10; + *--p= (char) ('0' + (long_val - quo*10)); + long_val= quo; + } + +cnv: + if (sl) + { + *--p= '-'; + } + + for ( db= dst, de= dst + len ; (dst < de) && *p ; p++) + { + int cnvres= my_ci_wc_mb(cs, (my_wc_t) p[0], (uchar*) dst, (uchar*) de); + if (cnvres > 0) + dst+= cnvres; + else + break; + } + return (int) (dst -db); +} + +#endif /* HAVE_CHARSET_mb2_or_mb4 */ + + +#ifdef HAVE_CHARSET_mb2 +/** + Convert a Unicode code point to a digit. + @param wc - the input Unicode code point + @param[OUT] c - the output character representing the digit value 0..9 + + @return 0 - if wc is a good digit + @return 1 - if wc is not a digit +*/ +static inline my_bool +wc2digit_uchar(uchar *c, my_wc_t wc) +{ + return wc > '9' || (c[0]= (uchar) (wc - '0')) > 9; +} + + +static longlong +my_strtoll10_mb2(CHARSET_INFO *cs __attribute__((unused)), + const char *nptr, char **endptr, int *error) +{ + const uchar *s, *end, *start, *n_end, *true_end; + uchar UNINIT_VAR(c); + unsigned long i, j, k; + ulonglong li; + int negative; + ulong cutoff, cutoff2, cutoff3; + my_wc_t wc; + int res; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; + + s= (const uchar *) nptr; + /* If fixed length string */ + if (endptr) + { + /* + Make sure string length is even. + Odd length indicates a bug in the caller. + Assert in debug, round in production. + */ + DBUG_ASSERT((*endptr - (const char *) s) % 2 == 0); + end= s + ((*endptr - (const char*) s) / 2) * 2; + + for ( ; ; ) /* Skip leading spaces and tabs */ + { + if ((res= mb_wc(cs, &wc, s, end)) <= 0) + goto no_conv; + s+= res; + if (wc != ' ' && wc != '\t') + break; + } + } + else + { + /* We don't support null terminated strings in UCS2 */ + goto no_conv; + } + + /* Check for a sign. */ + negative= 0; + if (wc == '-') + { + *error= -1; /* Mark as negative number */ + negative= 1; + if ((res= mb_wc(cs, &wc, s, end)) <= 0) + goto no_conv; + s+= res; /* wc is now expected to hold the first digit. */ + cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2; + cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100; + cutoff3= MAX_NEGATIVE_NUMBER % 100; + } + else + { + *error= 0; + if (wc == '+') + { + if ((res= mb_wc(cs, &wc, s, end)) <= 0) + goto no_conv; + s+= res; /* wc is now expected to hold the first digit. */ + } + cutoff= ULONGLONG_MAX / LFACTOR2; + cutoff2= ULONGLONG_MAX % LFACTOR2 / 100; + cutoff3= ULONGLONG_MAX % 100; + } + + /* + The code below assumes that 'wc' holds the first digit + and 's' points to the next character after it. + + Scan pre-zeros if any. + */ + if (wc == '0') + { + i= 0; + for ( ; ; s+= res) + { + if (s == end) + goto end_i; /* Return 0 */ + if ((res= mb_wc(cs, &wc, s, end)) <= 0) + goto no_conv; + if (wc != '0') + break; + } + n_end= s + 2 * INIT_CNT; + } + else + { + /* Read first digit to check that it's a valid number */ + if ((i= (wc - '0')) > 9) + goto no_conv; + n_end= s + 2 * (INIT_CNT-1); + } + + /* Handle first 9 digits and store them in i */ + if (n_end > end) + n_end= end; + for ( ; ; s+= res) + { + if ((res= mb_wc(cs, &wc, s, n_end)) <= 0) + break; + if (wc2digit_uchar(&c, wc)) + goto end_i; + i= i*10+c; + } + if (s == end) + goto end_i; + + /* Handle next 9 digits and store them in j */ + j= 0; + start= s; /* Used to know how much to shift i */ + n_end= true_end= s + 2 * INIT_CNT; + if (n_end > end) + n_end= end; + do + { + if ((res= mb_wc(cs, &wc, s, end)) <= 0) + goto no_conv; + if (wc2digit_uchar(&c, wc)) + goto end_i_and_j; + s+= res; + j= j * 10 + c; + } while (s != n_end); + if (s == end) + { + if (s != true_end) + goto end_i_and_j; + goto end3; + } + + /* Handle the next 1 or 2 digits and store them in k */ + if ((res= mb_wc(cs, &wc, s, end)) <= 0) + goto no_conv; + if ((k= (wc - '0')) > 9) + goto end3; + s+= res; + + if (s == end) + goto end4; + if ((res= mb_wc(cs, &wc, s, end)) <= 0) + goto no_conv; + if (wc2digit_uchar(&c, wc)) + goto end4; + s+= res; + k= k*10+c; + *endptr= (char*) s; + + /* number string should have ended here */ + if (s != end && mb_wc(cs, &wc, s, end) > 0 && ((uchar) (wc - '0')) <= 9) + goto overflow; + + /* Check that we didn't get an overflow with the last digit */ + if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) && + k > cutoff3))) + goto overflow; + li=i*LFACTOR2+ (ulonglong) j*100 + k; + return (longlong) li; + +overflow: /* *endptr is set here */ + *error= MY_ERRNO_ERANGE; + return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX; + +end_i: + *endptr= (char*) s; + return (negative ? ((longlong) -(long) i) : (longlong) i); + +end_i_and_j: + li= (ulonglong) i * lfactor[(size_t) (s-start) / 2] + j; + *endptr= (char*) s; + return (negative ? -((longlong) li) : (longlong) li); + +end3: + li=(ulonglong) i*LFACTOR+ (ulonglong) j; + *endptr= (char*) s; + return (negative ? -((longlong) li) : (longlong) li); + +end4: + li=(ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k; + *endptr= (char*) s; + if (negative) + { + if (li > MAX_NEGATIVE_NUMBER) + goto overflow; + return -((longlong) li); + } + return (longlong) li; + +no_conv: + /* There was no number to convert. */ + *error= MY_ERRNO_EDOM; + *endptr= (char *) nptr; + return 0; +} + + +static size_t +my_scan_mb2(CHARSET_INFO *cs __attribute__((unused)), + const char *str, const char *end, int sequence_type) +{ + const char *str0= str; + my_wc_t wc; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; + int res; + + switch (sequence_type) + { + case MY_SEQ_SPACES: + for (res= mb_wc(cs, &wc, (const uchar *) str, (const uchar *) end); + res > 0 && wc == ' '; + str+= res, + res= mb_wc(cs, &wc, (const uchar *) str, (const uchar *) end)) + { + } + return (size_t) (str - str0); + case MY_SEQ_NONSPACES: + DBUG_ASSERT(0); /* Not implemented */ + /* pass through */ + default: + return 0; + } +} + + +static void +my_fill_mb2(CHARSET_INFO *cs, char *s, size_t slen, int fill) +{ + char buf[10], *last; + size_t buflen, remainder; + + DBUG_ASSERT((slen % 2) == 0); + + buflen= my_ci_wc_mb(cs, (my_wc_t) fill, (uchar*) buf, + (uchar*) buf + sizeof(buf)); + + DBUG_ASSERT(buflen > 0); + + /* + "last" in the last position where a sequence of "buflen" bytes can start. + */ + for (last= s + slen - buflen; s <= last; s+= buflen) + { + /* Enough space for the character */ + memcpy(s, buf, buflen); + } + + /* + If there are some more space which is not enough + for the whole multibyte character, then add trailing zeros. + */ + if ((remainder= last + buflen - s) > 0) + bzero(s, (size_t) remainder); +} + + +static size_t +my_vsnprintf_mb2(char *dst, size_t n, const char* fmt, va_list ap) +{ + char *start=dst, *end= dst + n - 1; + for (; *fmt ; fmt++) + { + if (fmt[0] != '%') + { + if (dst == end) /* End of buffer */ + break; + + *dst++='\0'; + *dst++= *fmt; /* Copy ordinary char */ + continue; + } + + fmt++; + + /* Skip if max size is used (to be compatible with printf) */ + while ( (*fmt >= '0' && *fmt <= '9') || *fmt == '.' || *fmt == '-') + fmt++; + + if (*fmt == 'l') + fmt++; + + if (*fmt == 's') /* String parameter */ + { + char *par= va_arg(ap, char *); + size_t plen; + size_t left_len= (size_t)(end-dst); + if (!par) + par= (char*) "(null)"; + plen= strlen(par); + if (left_len <= plen * 2) + plen = left_len / 2 - 1; + + for ( ; plen ; plen--, dst+=2, par++) + { + dst[0]= '\0'; + dst[1]= par[0]; + } + continue; + } + else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */ + { + int iarg; + char nbuf[16]; + char *pbuf= nbuf; + + if ((size_t) (end - dst) < 32) + break; + iarg= va_arg(ap, int); + if (*fmt == 'd') + int10_to_str((long) iarg, nbuf, -10); + else + int10_to_str((long) (uint) iarg, nbuf,10); + + for (; pbuf[0]; pbuf++) + { + *dst++= '\0'; + *dst++= *pbuf; + } + continue; + } + + /* We come here on '%%', unknown code or too long parameter */ + if (dst == end) + break; + *dst++= '\0'; + *dst++= '%'; /* % used as % or unknown code */ + } + + DBUG_ASSERT(dst <= end); + *dst='\0'; /* End of errmessage */ + return (size_t) (dst - start); +} + + +static size_t +my_snprintf_mb2(CHARSET_INFO *cs __attribute__((unused)), + char* to, size_t n, const char* fmt, ...) +{ + size_t ret; + va_list args; + va_start(args,fmt); + ret= my_vsnprintf_mb2(to, n, fmt, args); + va_end(args); + return ret; +} + + +static size_t +my_lengthsp_mb2(CHARSET_INFO *cs __attribute__((unused)), + const char *ptr, size_t length) +{ + const char *end= ptr + length; + while (end > ptr + 1 && end[-1] == ' ' && end[-2] == '\0') + end-= 2; + return (size_t) (end - ptr); +} + +#endif /* HAVE_CHARSET_mb2*/ + + +/* + Next part is actually HAVE_CHARSET_utf16-specific, + but the JSON functions needed my_utf16_uni() + so the #ifdef was moved lower. +*/ +#include "ctype-utf16.h" + +#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b0)) +#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b0) && MY_UTF16_LOW_HEAD(b2)) + +static inline int my_weight_mb2_utf16mb2_general_ci(uchar b0, uchar b1) +{ + my_wc_t wc= MY_UTF16_WC2(b0, b1); + return my_general_ci_bmp_char_to_weight((uint16) wc); +} +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_ci +#define DEFINE_STRNXFRM_UNICODE +#define DEFINE_STRNXFRM_UNICODE_NOPAD +#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf16_quick(pwc, s, e) +#define OPTIMIZE_ASCII 0 +#define MY_WC_WEIGHT(x) my_general_ci_char_to_weight(x) +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1) +#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER +#include "strcoll.inl" + +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b0, b1)) +#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3)) +#include "strcoll.inl" + +#define DEFINE_STRNNCOLLSP_NOPAD +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_general_nopad_ci +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b0,b1) +#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER +#include "strcoll.inl" + +#define DEFINE_STRNNCOLLSP_NOPAD +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16_nopad_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b0, b1)) +#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b0, b1, b2, b3)) +#include "strcoll.inl" + +#undef IS_MB2_CHAR +#undef IS_MB4_CHAR + +/* + These two functions are used in JSON library, so made exportable + and unconditionally compiled into the library. +*/ + +/*static*/ int +my_utf16_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t *pwc, const uchar *s, const uchar *e) +{ + return my_mb_wc_utf16_quick(pwc, s, e); +} + + +/*static*/ int +my_uni_utf16(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *s, uchar *e) +{ + if (wc <= 0xFFFF) + { + if (s + 2 > e) + return MY_CS_TOOSMALL2; + if (MY_UTF16_SURROGATE(wc)) + return MY_CS_ILUNI; + *s++= (uchar) (wc >> 8); + *s= (uchar) (wc & 0xFF); + return 2; + } + + if (wc <= 0x10FFFF) + { + if (s + 4 > e) + return MY_CS_TOOSMALL4; + *s++= (uchar) ((wc-= 0x10000) >> 18) | 0xD8; + *s++= (uchar) (wc >> 10) & 0xFF; + *s++= (uchar) ((wc >> 8) & 3) | 0xDC; + *s= (uchar) wc & 0xFF; + return 4; + } + + return MY_CS_ILUNI; +} + + +#ifdef HAVE_CHARSET_utf16 + +const char charset_name_utf16le[]= "utf16le"; +#define charset_name_utf16le_length (sizeof(charset_name_utf16le)-1) + + +static size_t +my_caseup_utf16(CHARSET_INFO *cs, const char *src, size_t srclen, + char *dst, size_t dstlen) +{ + my_wc_t wc; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; + my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb; + int res; + const char *srcend= src + srclen; + char *dstend= dst + dstlen; + MY_CASEFOLD_INFO *uni_plane= cs->casefold; + DBUG_ASSERT(srclen <= dstlen); + + while ((src < srcend) && + (res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0) + { + my_toupper_unicode(uni_plane, &wc); + if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend)) + break; + src+= res; + dst+= res; + } + return srclen; +} + + +static void +my_hash_sort_utf16_nopad(CHARSET_INFO *cs, + const uchar *s, size_t slen, + ulong *nr1, ulong *nr2) +{ + my_wc_t wc; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; + int res; + const uchar *e= s + slen; + MY_CASEFOLD_INFO *uni_plane= cs->casefold; + register ulong m1= *nr1, m2= *nr2; + + while ((s < e) && (res= mb_wc(cs, &wc, (uchar *) s, (uchar *) e)) > 0) + { + my_tosort_unicode(uni_plane, &wc); + MY_HASH_ADD_16(m1, m2, wc); + s+= res; + } + *nr1= m1; + *nr2= m2; +} + + +static void +my_hash_sort_utf16(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *nr1, ulong *nr2) +{ + size_t lengthsp= my_ci_lengthsp(cs, (const char *) s, slen); + my_hash_sort_utf16_nopad(cs, s, lengthsp, nr1, nr2); +} + + +static size_t +my_casedn_utf16(CHARSET_INFO *cs, const char *src, size_t srclen, + char *dst, size_t dstlen) +{ + my_wc_t wc; + my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; + my_charset_conv_wc_mb wc_mb= cs->cset->wc_mb; + int res; + const char *srcend= src + srclen; + char *dstend= dst + dstlen; + MY_CASEFOLD_INFO *uni_plane= cs->casefold; + DBUG_ASSERT(srclen <= dstlen); + + while ((src < srcend) && + (res= mb_wc(cs, &wc, (uchar *) src, (uchar *) srcend)) > 0) + { + my_tolower_unicode(uni_plane, &wc); + if (res != wc_mb(cs, wc, (uchar *) dst, (uchar *) dstend)) + break; + src+= res; + dst+= res; + } + return srclen; +} + + +static int +my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end) +{ + my_wc_t wc; + return my_ci_mb_wc(cs, &wc, str, end); +} + + +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16 +#define CHARLEN(cs,str,end) my_charlen_utf16(cs,str,end) +#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN +#include "ctype-mb.inl" +#undef MY_FUNCTION_NAME +#undef CHARLEN +#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN +/* Defines my_well_formed_char_length_utf16 */ + + +static size_t +my_numchars_utf16(CHARSET_INFO *cs, + const char *b, const char *e) +{ + size_t nchars= 0; + for ( ; ; nchars++) + { + size_t charlen= my_ismbchar(cs, b, e); + if (!charlen) + break; + b+= charlen; + } + return nchars; +} + + +static size_t +my_charpos_utf16(CHARSET_INFO *cs, + const char *b, const char *e, size_t pos) +{ + const char *b0= b; + uint charlen; + + for ( ; pos; b+= charlen, pos--) + { + if (!(charlen= my_ismbchar(cs, b, e))) + return (e + 2 - b0); /* Error, return pos outside the string */ + } + return (size_t) (pos ? (e + 2 - b0) : (b - b0)); +} + + +static int +my_wildcmp_utf16_ci(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + MY_CASEFOLD_INFO *uni_plane= cs->casefold; + return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, + escape, w_one, w_many, uni_plane); +} + + +static int +my_wildcmp_utf16_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, + escape, w_one, w_many, NULL); +} + + +static void +my_hash_sort_utf16_nopad_bin(CHARSET_INFO *cs __attribute__((unused)), + const uchar *pos, size_t len, + ulong *nr1, ulong *nr2) +{ + const uchar *end= pos + len; + register ulong m1= *nr1, m2= *nr2; + + for ( ; pos < end ; pos++) + { + MY_HASH_ADD(m1, m2, (uint)*pos); + } + *nr1= m1; + *nr2= m2; +} + + +static void +my_hash_sort_utf16_bin(CHARSET_INFO *cs, + const uchar *pos, size_t len, ulong *nr1, ulong *nr2) +{ + size_t lengthsp= my_ci_lengthsp(cs, (const char *) pos, len); + my_hash_sort_utf16_nopad_bin(cs, pos, lengthsp, nr1, nr2); +} + + +static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler = +{ + NULL, /* init */ + my_strnncoll_utf16_general_ci, + my_strnncollsp_utf16_general_ci, + my_strnncollsp_nchars_utf16_general_ci, + my_strnxfrm_utf16_general_ci, + my_strnxfrmlen_unicode, + my_like_range_generic, + my_wildcmp_utf16_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16, + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +static MY_COLLATION_HANDLER my_collation_utf16_bin_handler = +{ + NULL, /* init */ + my_strnncoll_utf16_bin, + my_strnncollsp_utf16_bin, + my_strnncollsp_nchars_utf16_bin, + my_strnxfrm_unicode_full_bin, + my_strnxfrmlen_unicode_full_bin, + my_like_range_generic, + my_wildcmp_utf16_bin, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16_bin, + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler = +{ + NULL, /* init */ + my_strnncoll_utf16_general_ci, + my_strnncollsp_utf16_general_nopad_ci, + my_strnncollsp_nchars_utf16_general_nopad_ci, + my_strnxfrm_nopad_utf16_general_ci, + my_strnxfrmlen_unicode, + my_like_range_generic, + my_wildcmp_utf16_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16_nopad, + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +static MY_COLLATION_HANDLER my_collation_utf16_nopad_bin_handler = +{ + NULL, /* init */ + my_strnncoll_utf16_bin, + my_strnncollsp_utf16_nopad_bin, + my_strnncollsp_nchars_utf16_nopad_bin, + my_strnxfrm_unicode_full_nopad_bin, + my_strnxfrmlen_unicode_full_bin, + my_like_range_generic, + my_wildcmp_utf16_bin, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16_nopad_bin, + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +MY_CHARSET_HANDLER my_charset_utf16_handler= +{ + NULL, /* init */ + my_numchars_utf16, + my_charpos_utf16, + my_lengthsp_mb2, + my_numcells_mb, + my_utf16_uni, /* mb_wc */ + my_uni_utf16, /* wc_mb */ + my_mb_ctype_mb, + my_caseup_str_mb2_or_mb4, + my_casedn_str_mb2_or_mb4, + my_caseup_utf16, + my_casedn_utf16, + my_snprintf_mb2, + my_l10tostr_mb2_or_mb4, + my_ll10tostr_mb2_or_mb4, + my_fill_mb2, + my_strntol_mb2_or_mb4, + my_strntoul_mb2_or_mb4, + my_strntoll_mb2_or_mb4, + my_strntoull_mb2_or_mb4, + my_strntod_mb2_or_mb4, + my_strtoll10_mb2, + my_strntoull10rnd_mb2_or_mb4, + my_scan_mb2, + my_charlen_utf16, + my_well_formed_char_length_utf16, + my_copy_fix_mb2_or_mb4, + my_uni_utf16, + my_wc_to_printable_generic, + my_casefold_multiply_1, + my_casefold_multiply_1 +}; + + +struct charset_info_st my_charset_utf16_general_ci= +{ + 54,0,0, /* number */ + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, + { charset_name_utf16, charset_name_utf16_length }, /* cs name */ + { STRING_WITH_LEN("utf16_general_ci") }, /* name */ + "UTF-16 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default,/* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_utf16_handler, + &my_collation_utf16_general_ci_handler +}; + + +struct charset_info_st my_charset_utf16_bin= +{ + 55,0,0, /* number */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, + { charset_name_utf16, charset_name_utf16_length }, /* cs name */ + { STRING_WITH_LEN("utf16_bin") }, /* name */ + "UTF-16 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default,/* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_utf16_handler, + &my_collation_utf16_bin_handler +}; + + +struct charset_info_st my_charset_utf16_general_nopad_ci= +{ + MY_NOPAD_ID(54),0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD, + { charset_name_utf16, charset_name_utf16_length }, /* cs name */ + { STRING_WITH_LEN("utf16_general_nopad_ci") }, /* name */ + "UTF-16 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default,/* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_utf16_handler, + &my_collation_utf16_general_nopad_ci_handler +}; + + +struct charset_info_st my_charset_utf16_nopad_bin= +{ + MY_NOPAD_ID(55),0,0, /* number */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII| + MY_CS_NOPAD, + { charset_name_utf16, charset_name_utf16_length}, /* cs name */ + { STRING_WITH_LEN("utf16_nopad_bin") }, /* name */ + "UTF-16 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default,/* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_utf16_handler, + &my_collation_utf16_nopad_bin_handler +}; + + +#define IS_MB2_CHAR(b0,b1) (!MY_UTF16_SURROGATE_HEAD(b1)) +#define IS_MB4_CHAR(b0,b1,b2,b3) (MY_UTF16_HIGH_HEAD(b1) && MY_UTF16_LOW_HEAD(b3)) + +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_ci +#define DEFINE_STRNXFRM_UNICODE +#define DEFINE_STRNXFRM_UNICODE_NOPAD +#define MY_MB_WC(cs, pwc, s, e) (my_ci_mb_wc(cs, pwc, s, e)) +#define OPTIMIZE_ASCII 0 +#define MY_WC_WEIGHT(x) my_general_ci_char_to_weight(x) +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0) +#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER +#include "strcoll.inl" + +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b1, b0)) +#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2)) +#include "strcoll.inl" + +#define DEFINE_STRNNCOLLSP_NOPAD +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_general_nopad_ci +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_utf16mb2_general_ci(b1,b0) +#define WEIGHT_MB4(b0,b1,b2,b3) MY_CS_REPLACEMENT_CHARACTER +#include "strcoll.inl" + +#define DEFINE_STRNNCOLLSP_NOPAD +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16le_nopad_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) ((int) MY_UTF16_WC2(b1, b0)) +#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF16_WC4(b1, b0, b3, b2)) +#include "strcoll.inl" + +#undef IS_MB2_CHAR +#undef IS_MB4_CHAR + +static int +my_utf16le_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t *pwc, const uchar *s, const uchar *e) +{ + my_wc_t lo; + + if (s + 2 > e) + return MY_CS_TOOSMALL2; + + if ((*pwc= uint2korr(s)) < MY_UTF16_SURROGATE_HIGH_FIRST || + (*pwc > MY_UTF16_SURROGATE_LOW_LAST)) + return 2; /* [0000-D7FF,E000-FFFF] */ + + if (*pwc >= MY_UTF16_SURROGATE_LOW_FIRST) + return MY_CS_ILSEQ; /* [DC00-DFFF] Low surrogate part without high part */ + + if (s + 4 > e) + return MY_CS_TOOSMALL4; + + s+= 2; + + if ((lo= uint2korr(s)) < MY_UTF16_SURROGATE_LOW_FIRST || + lo > MY_UTF16_SURROGATE_LOW_LAST) + return MY_CS_ILSEQ; /* Expected low surrogate part, got something else */ + + *pwc= 0x10000 + (((*pwc & 0x3FF) << 10) | (lo & 0x3FF)); + return 4; +} + + +static int +my_uni_utf16le(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *s, uchar *e) +{ + uint32 first, second, total; + if (wc < MY_UTF16_SURROGATE_HIGH_FIRST || + (wc > MY_UTF16_SURROGATE_LOW_LAST && + wc <= 0xFFFF)) + { + if (s + 2 > e) + return MY_CS_TOOSMALL2; + int2store(s, wc); + return 2; /* [0000-D7FF,E000-FFFF] */ + } + + if (wc < 0xFFFF || wc > 0x10FFFF) + return MY_CS_ILUNI; /* [D800-DFFF,10FFFF+] */ + + if (s + 4 > e) + return MY_CS_TOOSMALL4; + + wc-= 0x10000; + first= (0xD800 | ((wc >> 10) & 0x3FF)); + second= (0xDC00 | (wc & 0x3FF)); + total= first | (second << 16); + int4store(s, total); + return 4; /* [010000-10FFFF] */ +} + + +static size_t +my_lengthsp_utf16le(CHARSET_INFO *cs __attribute__((unused)), + const char *ptr, size_t length) +{ + const char *end= ptr + length; + while (end > ptr + 1 && uint2korr(end - 2) == ' ') + end-= 2; + return (size_t) (end - ptr); +} + + +static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler = +{ + NULL, /* init */ + my_strnncoll_utf16le_general_ci, + my_strnncollsp_utf16le_general_ci, + my_strnncollsp_nchars_utf16le_general_ci, + my_strnxfrm_utf16le_general_ci, + my_strnxfrmlen_unicode, + my_like_range_generic, + my_wildcmp_utf16_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16, + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler = +{ + NULL, /* init */ + my_strnncoll_utf16le_bin, + my_strnncollsp_utf16le_bin, + my_strnncollsp_nchars_utf16le_bin, + my_strnxfrm_unicode_full_bin, + my_strnxfrmlen_unicode_full_bin, + my_like_range_generic, + my_wildcmp_utf16_bin, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16_bin, + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler = +{ + NULL, /* init */ + my_strnncoll_utf16le_general_ci, + my_strnncollsp_utf16le_general_nopad_ci, + my_strnncollsp_nchars_utf16le_general_nopad_ci, + my_strnxfrm_nopad_utf16le_general_ci, + my_strnxfrmlen_unicode, + my_like_range_generic, + my_wildcmp_utf16_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16_nopad, + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +static MY_COLLATION_HANDLER my_collation_utf16le_nopad_bin_handler = +{ + NULL, /* init */ + my_strnncoll_utf16le_bin, + my_strnncollsp_utf16le_nopad_bin, + my_strnncollsp_nchars_utf16le_nopad_bin, + my_strnxfrm_unicode_full_nopad_bin, + my_strnxfrmlen_unicode_full_bin, + my_like_range_generic, + my_wildcmp_utf16_bin, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf16_nopad_bin, + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +static MY_CHARSET_HANDLER my_charset_utf16le_handler= +{ + NULL, /* init */ + my_numchars_utf16, + my_charpos_utf16, + my_lengthsp_utf16le, + my_numcells_mb, + my_utf16le_uni, /* mb_wc */ + my_uni_utf16le, /* wc_mb */ + my_mb_ctype_mb, + my_caseup_str_mb2_or_mb4, + my_casedn_str_mb2_or_mb4, + my_caseup_utf16, + my_casedn_utf16, + my_snprintf_mb2, + my_l10tostr_mb2_or_mb4, + my_ll10tostr_mb2_or_mb4, + my_fill_mb2, + my_strntol_mb2_or_mb4, + my_strntoul_mb2_or_mb4, + my_strntoll_mb2_or_mb4, + my_strntoull_mb2_or_mb4, + my_strntod_mb2_or_mb4, + my_strtoll10_mb2, + my_strntoull10rnd_mb2_or_mb4, + my_scan_mb2, + my_charlen_utf16, + my_well_formed_char_length_utf16, + my_copy_fix_mb2_or_mb4, + my_uni_utf16le, + my_wc_to_printable_generic, + my_casefold_multiply_1, + my_casefold_multiply_1 +}; + + +struct charset_info_st my_charset_utf16le_general_ci= +{ + 56,0,0, /* number */ + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, + { charset_name_utf16le, charset_name_utf16le_length }, + { STRING_WITH_LEN("utf16le_general_ci") },/* name */ + "UTF-16LE Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default,/* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_utf16le_handler, + &my_collation_utf16le_general_ci_handler +}; + + +struct charset_info_st my_charset_utf16le_bin= +{ + 62,0,0, /* number */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, + { charset_name_utf16le, charset_name_utf16le_length }, + { STRING_WITH_LEN("utf16le_bin") }, /* name */ + "UTF-16LE Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default,/* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_utf16le_handler, + &my_collation_utf16le_bin_handler +}; + + +struct charset_info_st my_charset_utf16le_general_nopad_ci= +{ + MY_NOPAD_ID(56),0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD, + { charset_name_utf16le, charset_name_utf16le_length }, + { STRING_WITH_LEN("utf16le_general_nopad_ci") }, /* name */ + "UTF-16LE Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default,/* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_utf16le_handler, + &my_collation_utf16le_general_nopad_ci_handler +}; + + +struct charset_info_st my_charset_utf16le_nopad_bin= +{ + MY_NOPAD_ID(62),0,0, /* number */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII| + MY_CS_NOPAD, + { charset_name_utf16le, charset_name_utf16le_length }, + { STRING_WITH_LEN("utf16le_nopad_bin") }, /* name */ + "UTF-16LE Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default,/* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 2, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_utf16le_handler, + &my_collation_utf16le_nopad_bin_handler +}; + + +#endif /* HAVE_CHARSET_utf16 */ + + +#ifdef HAVE_CHARSET_utf32 + +#include "ctype-utf32.h" + +/* + Check is b0 and b1 start a valid UTF32 four-byte sequence. + Don't accept characters greater than U+10FFFF. +*/ +#define IS_UTF32_MBHEAD4(b0,b1) (!(b0) && ((uchar) (b1) <= 0x10)) + +#define IS_MB4_CHAR(b0,b1,b2,b3) (IS_UTF32_MBHEAD4(b0,b1)) + + +static inline int my_weight_utf32_general_ci(uchar b0, uchar b1, + uchar b2, uchar b3) +{ + my_wc_t wc= MY_UTF32_WC4(b0, b1, b2, b3); + return my_general_ci_char_to_weight(wc); +} +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_general_ci +#define DEFINE_STRNXFRM_UNICODE +#define DEFINE_STRNXFRM_UNICODE_NOPAD +#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf32_quick(pwc, s, e) +#define OPTIMIZE_ASCII 0 +#define MY_WC_WEIGHT(x) my_general_ci_char_to_weight(x) +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB4(b0,b1,b2,b3) my_weight_utf32_general_ci(b0, b1, b2, b3) +#include "strcoll.inl" + +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF32_WC4(b0, b1, b2, b3)) +#include "strcoll.inl" + +#define DEFINE_STRNNCOLLSP_NOPAD +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_general_nopad_ci +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB4(b0,b1,b2,b3) my_weight_utf32_general_ci(b0, b1, b2, b3) +#include "strcoll.inl" + +#define DEFINE_STRNNCOLLSP_NOPAD +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32_nopad_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB4(b0,b1,b2,b3) ((int) MY_UTF32_WC4(b0, b1, b2, b3)) +#include "strcoll.inl" + +#undef IS_MB2_CHAR +#undef IS_MB4_CHAR + + +static int +my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t *pwc, const uchar *s, const uchar *e) +{ + return my_mb_wc_utf32_quick(pwc, s, e); +} + + +static int +my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t wc, uchar *s, uchar *e) +{ + if (s + 4 > e) + return MY_CS_TOOSMALL4; + + if (wc > 0x10FFFF) + return MY_CS_ILUNI; + + s[0]= (uchar) (wc >> 24); + s[1]= (uchar) (wc >> 16) & 0xFF; + s[2]= (uchar) (wc >> 8) & 0xFF; + s[3]= (uchar) wc & 0xFF; + return 4; +} + + +static size_t +my_lengthsp_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *ptr, size_t length) +{ + const char *end= ptr + length; + DBUG_ASSERT((length % 4) == 0); + while (end > ptr + 3 && end[-1] == ' ' && !end[-2] && !end[-3] && !end[-4]) + end-= 4; + return (size_t) (end - ptr); +} + + +static size_t +my_caseup_utf32(CHARSET_INFO *cs, const char *src, size_t srclen, + char *dst, size_t dstlen) +{ + my_wc_t wc; + int res; + const char *srcend= src + srclen; + char *dstend= dst + dstlen; + MY_CASEFOLD_INFO *uni_plane= cs->casefold; + DBUG_ASSERT(srclen <= dstlen); + + while ((src < srcend) && + (res= my_utf32_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0) + { + my_toupper_unicode(uni_plane, &wc); + if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend)) + break; + src+= res; + dst+= res; + } + return srclen; +} + + +static void +my_hash_sort_utf32_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *nr1, ulong *nr2) +{ + my_wc_t wc; + int res; + const uchar *e= s + slen; + MY_CASEFOLD_INFO *uni_plane= cs->casefold; + register ulong m1= *nr1, m2= *nr2; + + while ((res= my_utf32_uni(cs, &wc, (uchar*) s, (uchar*) e)) > 0) + { + my_tosort_unicode(uni_plane, &wc); + MY_HASH_ADD(m1, m2, (uint) (wc >> 24)); + MY_HASH_ADD(m1, m2, (uint) (wc >> 16) & 0xFF); + MY_HASH_ADD(m1, m2, (uint) (wc >> 8) & 0xFF); + MY_HASH_ADD(m1, m2, (uint) (wc & 0xFF)); + s+= res; + } + *nr1= m1; + *nr2= m2; +} + + +static void +my_hash_sort_utf32(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *nr1, ulong *nr2) +{ + size_t lengthsp= my_lengthsp_utf32(cs, (const char *) s, slen); + my_hash_sort_utf32_nopad(cs, s, lengthsp, nr1, nr2); +} + + +static size_t +my_casedn_utf32(CHARSET_INFO *cs, const char *src, size_t srclen, + char *dst, size_t dstlen) +{ + my_wc_t wc; + int res; + const char *srcend= src + srclen; + char *dstend= dst + dstlen; + MY_CASEFOLD_INFO *uni_plane= cs->casefold; + DBUG_ASSERT(srclen <= dstlen); + + while ((res= my_utf32_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0) + { + my_tolower_unicode(uni_plane,&wc); + if (res != my_uni_utf32(cs, wc, (uchar*) dst, (uchar*) dstend)) + break; + src+= res; + dst+= res; + } + return srclen; +} + + +static int +my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)), + const uchar *b, const uchar *e) +{ + return b + 4 > e ? MY_CS_TOOSMALL4 : + IS_UTF32_MBHEAD4(b[0], b[1]) ? 4 : MY_CS_ILSEQ; +} + + +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32 +#define CHARLEN(cs,str,end) my_charlen_utf32(cs,str,end) +#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN +#include "ctype-mb.inl" +#undef MY_FUNCTION_NAME +#undef CHARLEN +#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN +/* Defines my_well_formed_char_length_utf32 */ + + +static size_t +my_vsnprintf_utf32(char *dst, size_t n, const char* fmt, va_list ap) +{ + char *start= dst, *end= dst + n; + DBUG_ASSERT((n % 4) == 0); + for (; *fmt ; fmt++) + { + if (fmt[0] != '%') + { + if (dst >= end) /* End of buffer */ + break; + + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; + *dst++= *fmt; /* Copy ordinary char */ + continue; + } + + fmt++; + + /* Skip if max size is used (to be compatible with printf) */ + while ( (*fmt>='0' && *fmt<='9') || *fmt == '.' || *fmt == '-') + fmt++; + + if (*fmt == 'l') + fmt++; + + if (*fmt == 's') /* String parameter */ + { + reg2 char *par= va_arg(ap, char *); + size_t plen; + size_t left_len= (size_t)(end - dst); + if (!par) par= (char*)"(null)"; + plen= strlen(par); + if (left_len <= plen*4) + plen= left_len / 4 - 1; + + for ( ; plen ; plen--, dst+= 4, par++) + { + dst[0]= '\0'; + dst[1]= '\0'; + dst[2]= '\0'; + dst[3]= par[0]; + } + continue; + } + else if (*fmt == 'd' || *fmt == 'u') /* Integer parameter */ + { + register int iarg; + char nbuf[16]; + char *pbuf= nbuf; + + if ((size_t) (end - dst) < 64) + break; + iarg= va_arg(ap, int); + if (*fmt == 'd') + int10_to_str((long) iarg, nbuf, -10); + else + int10_to_str((long) (uint) iarg,nbuf,10); + + for (; pbuf[0]; pbuf++) + { + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; + *dst++= *pbuf; + } + continue; + } + + /* We come here on '%%', unknown code or too long parameter */ + if (dst == end) + break; + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; + *dst++= '%'; /* % used as % or unknown code */ + } + + DBUG_ASSERT(dst < end); + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; + *dst++= '\0'; /* End of errmessage */ + return (size_t) (dst - start - 4); +} + + +static size_t +my_snprintf_utf32(CHARSET_INFO *cs __attribute__((unused)), + char* to, size_t n, const char* fmt, ...) +{ + size_t ret; + va_list args; + va_start(args,fmt); + ret= my_vsnprintf_utf32(to, n, fmt, args); + va_end(args); + return ret; +} + + +static longlong +my_strtoll10_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *nptr, char **endptr, int *error) +{ + const char *s, *end, *start, *n_end, *true_end; + uchar c; + unsigned long i, j, k; + ulonglong li; + int negative; + ulong cutoff, cutoff2, cutoff3; + + s= nptr; + /* If fixed length string */ + if (endptr) + { + /* Make sure string length is even */ + end= s + ((*endptr - s) / 4) * 4; + while (s < end && !s[0] && !s[1] && !s[2] && + (s[3] == ' ' || s[3] == '\t')) + s+= 4; + if (s == end) + goto no_conv; + } + else + { + /* We don't support null terminated strings in UCS2 */ + goto no_conv; + } + + /* Check for a sign. */ + negative= 0; + if (!s[0] && !s[1] && !s[2] && s[3] == '-') + { + *error= -1; /* Mark as negative number */ + negative= 1; + s+= 4; + if (s == end) + goto no_conv; + cutoff= MAX_NEGATIVE_NUMBER / LFACTOR2; + cutoff2= (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100; + cutoff3= MAX_NEGATIVE_NUMBER % 100; + } + else + { + *error= 0; + if (!s[0] && !s[1] && !s[2] && s[3] == '+') + { + s+= 4; + if (s == end) + goto no_conv; + } + cutoff= ULONGLONG_MAX / LFACTOR2; + cutoff2= ULONGLONG_MAX % LFACTOR2 / 100; + cutoff3= ULONGLONG_MAX % 100; + } + + /* Handle case where we have a lot of pre-zero */ + if (!s[0] && !s[1] && !s[2] && s[3] == '0') + { + i= 0; + do + { + s+= 4; + if (s == end) + goto end_i; /* Return 0 */ + } + while (!s[0] && !s[1] && !s[2] && s[3] == '0'); + n_end= s + 4 * INIT_CNT; + } + else + { + /* Read first digit to check that it's a valid number */ + if (s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9) + goto no_conv; + i= c; + s+= 4; + n_end= s + 4 * (INIT_CNT-1); + } + + /* Handle first 9 digits and store them in i */ + if (n_end > end) + n_end= end; + for (; s != n_end ; s+= 4) + { + if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9) + goto end_i; + i= i * 10 + c; + } + if (s == end) + goto end_i; + + /* Handle next 9 digits and store them in j */ + j= 0; + start= s; /* Used to know how much to shift i */ + n_end= true_end= s + 4 * INIT_CNT; + if (n_end > end) + n_end= end; + do + { + if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9) + goto end_i_and_j; + j= j * 10 + c; + s+= 4; + } while (s != n_end); + if (s == end) + { + if (s != true_end) + goto end_i_and_j; + goto end3; + } + if (s[0] || s[1] || s[2] || (c= (s[3] - '0')) > 9) + goto end3; + + /* Handle the next 1 or 2 digits and store them in k */ + k=c; + s+= 4; + if (s == end || s[0] || s[1] || s[2] || (c= (s[3]-'0')) > 9) + goto end4; + k= k * 10 + c; + s+= 4; + *endptr= (char*) s; + + /* number string should have ended here */ + if (s != end && !s[0] && !s[1] && !s[2] && (c= (s[3] - '0')) <= 9) + goto overflow; + + /* Check that we didn't get an overflow with the last digit */ + if (i > cutoff || (i == cutoff && ((j > cutoff2 || j == cutoff2) && + k > cutoff3))) + goto overflow; + li= i * LFACTOR2+ (ulonglong) j * 100 + k; + return (longlong) li; + +overflow: /* *endptr is set here */ + *error= MY_ERRNO_ERANGE; + return negative ? LONGLONG_MIN : (longlong) ULONGLONG_MAX; + +end_i: + *endptr= (char*) s; + return (negative ? ((longlong) -(long) i) : (longlong) i); + +end_i_and_j: + li= (ulonglong) i * lfactor[(size_t) (s-start) / 4] + j; + *endptr= (char*) s; + return (negative ? -((longlong) li) : (longlong) li); + +end3: + li= (ulonglong) i*LFACTOR+ (ulonglong) j; + *endptr= (char*) s; + return (negative ? -((longlong) li) : (longlong) li); + +end4: + li= (ulonglong) i*LFACTOR1+ (ulonglong) j * 10 + k; + *endptr= (char*) s; + if (negative) + { + if (li > MAX_NEGATIVE_NUMBER) + goto overflow; + return -((longlong) li); + } + return (longlong) li; + +no_conv: + /* There was no number to convert. */ + *error= MY_ERRNO_EDOM; + *endptr= (char *) nptr; + return 0; +} + + +static size_t +my_numchars_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e) +{ + return (size_t) (e - b) / 4; +} + + +static size_t +my_charpos_utf32(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, size_t pos) +{ + size_t string_length= (size_t) (e - b); + return pos * 4 > string_length ? string_length + 4 : pos * 4; +} + + +static +void my_fill_utf32(CHARSET_INFO *cs, + char *s, size_t slen, int fill) +{ + char buf[10]; +#ifdef DBUG_ASSERT_EXISTS + uint buflen; +#endif + char *e= s + slen; + + DBUG_ASSERT((slen % 4) == 0); + +#ifdef DBUG_ASSERT_EXISTS + buflen= +#endif + my_ci_wc_mb(cs, (my_wc_t) fill, (uchar*) buf, (uchar*) buf + sizeof(buf)); + DBUG_ASSERT(buflen == 4); + while (s < e) + { + memcpy(s, buf, 4); + s+= 4; + } +} + + +static int +my_wildcmp_utf32_ci(CHARSET_INFO *cs, + const char *str, const char *str_end, + const char *wildstr, const char *wildend, + int escape, int w_one, int w_many) +{ + MY_CASEFOLD_INFO *uni_plane= cs->casefold; + return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, + escape, w_one, w_many, uni_plane); +} + + +static int +my_wildcmp_utf32_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, + escape, w_one, w_many, NULL); +} + + +static size_t +my_scan_utf32(CHARSET_INFO *cs, + const char *str, const char *end, int sequence_type) +{ + const char *str0= str; + + switch (sequence_type) + { + case MY_SEQ_SPACES: + for ( ; str < end; ) + { + my_wc_t wc; + int res= my_utf32_uni(cs, &wc, (uchar*) str, (uchar*) end); + if (res < 0 || wc != ' ') + break; + str+= res; + } + return (size_t) (str - str0); + case MY_SEQ_NONSPACES: + DBUG_ASSERT(0); /* Not implemented */ + /* pass through */ + default: + return 0; + } +} + + +static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler = +{ + NULL, /* init */ + my_strnncoll_utf32_general_ci, + my_strnncollsp_utf32_general_ci, + my_strnncollsp_nchars_utf32_general_ci, + my_strnxfrm_utf32_general_ci, + my_strnxfrmlen_unicode, + my_like_range_generic, + my_wildcmp_utf32_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf32, + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +static MY_COLLATION_HANDLER my_collation_utf32_bin_handler = +{ + NULL, /* init */ + my_strnncoll_utf32_bin, + my_strnncollsp_utf32_bin, + my_strnncollsp_nchars_utf32_bin, + my_strnxfrm_unicode_full_bin, + my_strnxfrmlen_unicode_full_bin, + my_like_range_generic, + my_wildcmp_utf32_bin, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf32, + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler = +{ + NULL, /* init */ + my_strnncoll_utf32_general_ci, + my_strnncollsp_utf32_general_nopad_ci, + my_strnncollsp_nchars_utf32_general_nopad_ci, + my_strnxfrm_nopad_utf32_general_ci, + my_strnxfrmlen_unicode, + my_like_range_generic, + my_wildcmp_utf32_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf32_nopad, + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +static MY_COLLATION_HANDLER my_collation_utf32_nopad_bin_handler = +{ + NULL, /* init */ + my_strnncoll_utf32_bin, + my_strnncollsp_utf32_nopad_bin, + my_strnncollsp_nchars_utf32_nopad_bin, + my_strnxfrm_unicode_full_nopad_bin, + my_strnxfrmlen_unicode_full_bin, + my_like_range_generic, + my_wildcmp_utf32_bin, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_utf32_nopad, + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +MY_CHARSET_HANDLER my_charset_utf32_handler= +{ + NULL, /* init */ + my_numchars_utf32, + my_charpos_utf32, + my_lengthsp_utf32, + my_numcells_mb, + my_utf32_uni, + my_uni_utf32, + my_mb_ctype_mb, + my_caseup_str_mb2_or_mb4, + my_casedn_str_mb2_or_mb4, + my_caseup_utf32, + my_casedn_utf32, + my_snprintf_utf32, + my_l10tostr_mb2_or_mb4, + my_ll10tostr_mb2_or_mb4, + my_fill_utf32, + my_strntol_mb2_or_mb4, + my_strntoul_mb2_or_mb4, + my_strntoll_mb2_or_mb4, + my_strntoull_mb2_or_mb4, + my_strntod_mb2_or_mb4, + my_strtoll10_utf32, + my_strntoull10rnd_mb2_or_mb4, + my_scan_utf32, + my_charlen_utf32, + my_well_formed_char_length_utf32, + my_copy_fix_mb2_or_mb4, + my_uni_utf32, + my_wc_to_printable_generic, + my_casefold_multiply_1, + my_casefold_multiply_1 +}; + + +struct charset_info_st my_charset_utf32_general_ci= +{ + 60,0,0, /* number */ + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, + { charset_name_utf32, charset_name_utf32_length }, /* cs name */ + { STRING_WITH_LEN("utf32_general_ci") }, /* name */ + "UTF-32 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default,/* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_utf32_handler, + &my_collation_utf32_general_ci_handler +}; + + +struct charset_info_st my_charset_utf32_bin= +{ + 61,0,0, /* number */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, + { charset_name_utf32, charset_name_utf32_length }, /* cs name */ + { STRING_WITH_LEN("utf32_bin") }, /* name */ + "UTF-32 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default,/* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_utf32_handler, + &my_collation_utf32_bin_handler +}; + + +struct charset_info_st my_charset_utf32_general_nopad_ci= +{ + MY_NOPAD_ID(60),0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD, + { charset_name_utf32, charset_name_utf32_length }, /* cs name */ + { STRING_WITH_LEN("utf32_general_nopad_ci") }, /* name */ + "UTF-32 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default,/* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_utf32_handler, + &my_collation_utf32_general_nopad_ci_handler +}; + + +struct charset_info_st my_charset_utf32_nopad_bin= +{ + MY_NOPAD_ID(61),0,0, /* number */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII| + MY_CS_NOPAD, + { charset_name_utf32, charset_name_utf32_length }, /* cs name */ + { STRING_WITH_LEN("utf32_nopad_bin") }, /* name */ + "UTF-32 Unicode", /* comment */ + NULL, /* tailoring */ + NULL, /* ctype */ + NULL, /* to_lower */ + NULL, /* to_upper */ + NULL, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default,/* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 4, /* mbminlen */ + 4, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_utf32_handler, + &my_collation_utf32_nopad_bin_handler +}; + + +#endif /* HAVE_CHARSET_utf32 */ + + +#ifdef HAVE_CHARSET_ucs2 + +#include "ctype-ucs2.h" + +static const uchar ctype_ucs2[] = { + 0, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32, + 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16, + 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16, + 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +static const uchar to_lower_ucs2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95, + 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, + 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 +}; + +static const uchar to_upper_ucs2[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, + 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127, + 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159, + 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175, + 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191, + 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207, + 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223, + 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239, + 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255 +}; + + +/* Definitions for strcoll.inl */ +#define IS_MB2_CHAR(x,y) (1) +#define UCS2_CODE(b0,b1) (((uchar) b0) << 8 | ((uchar) b1)) + + +static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1) +{ + my_wc_t wc= UCS2_CODE(b0, b1); + return my_general_ci_bmp_char_to_weight((uint16) wc); +} + + +static inline int my_weight_mb2_ucs2_general_mysql500_ci(uchar b0, uchar b1) +{ + my_wc_t wc= UCS2_CODE(b0, b1); + return my_general_mysql500_ci_bmp_char_to_weight((uint16) wc); +} + + +#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_ci +#define DEFINE_STRNXFRM_UNICODE +#define DEFINE_STRNXFRM_UNICODE_NOPAD +#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_ucs2_quick(pwc, s, e) +#define OPTIMIZE_ASCII 0 +#define MY_WC_WEIGHT(x) my_general_ci_bmp_char_to_weight(x) +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1) +#include "strcoll.inl" + + + +#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_mysql500_ci +#define DEFINE_STRNXFRM_UNICODE +#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_ucs2_quick(pwc, s, e) +#define OPTIMIZE_ASCII 0 +#define MY_WC_WEIGHT(x) my_general_mysql500_ci_bmp_char_to_weight(x) +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_mysql500_ci(b0,b1) +#include "strcoll.inl" + + +#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_bin +#define DEFINE_STRNXFRM_UNICODE_BIN2 +#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_ucs2_quick(pwc, s, e) +#define OPTIMIZE_ASCII 0 +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1) +#include "strcoll.inl" + + +#define DEFINE_STRNNCOLLSP_NOPAD +#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_nopad_ci +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_ci(b0,b1) +#include "strcoll.inl" + + +#define DEFINE_STRNNCOLLSP_NOPAD +#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_nopad_bin +#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x)) +#define WEIGHT_MB2(b0,b1) UCS2_CODE(b0,b1) +#include "strcoll.inl" + + +static int +my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)), + const uchar *s, const uchar *e) +{ + return s + 2 > e ? MY_CS_TOOSMALLN(2) : 2; +} + + +static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)), + my_wc_t * pwc, const uchar *s, const uchar *e) +{ + return my_mb_wc_ucs2_quick(pwc, s, e); +} + +static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) , + my_wc_t wc, uchar *r, uchar *e) +{ + if ( r+2 > e ) + return MY_CS_TOOSMALL2; + + if (wc > 0xFFFF) /* UCS2 does not support characters outside BMP */ + return MY_CS_ILUNI; + + r[0]= (uchar) (wc >> 8); + r[1]= (uchar) (wc & 0xFF); + return 2; +} + + +static size_t my_caseup_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen, + char *dst, size_t dstlen) +{ + my_wc_t wc; + int res; + const char *srcend= src + srclen; + char *dstend= dst + dstlen; + MY_CASEFOLD_INFO *uni_plane= cs->casefold; + DBUG_ASSERT(srclen <= dstlen); + + while ((src < srcend) && + (res= my_ucs2_uni(cs, &wc, (uchar *)src, (uchar*) srcend)) > 0) + { + my_toupper_unicode_bmp(uni_plane, &wc); + if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend)) + break; + src+= res; + dst+= res; + } + return srclen; +} + + +static void +my_hash_sort_ucs2_nopad(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *nr1, ulong *nr2) +{ + my_wc_t wc; + int res; + const uchar *e=s+slen; + MY_CASEFOLD_INFO *uni_plane= cs->casefold; + register ulong m1= *nr1, m2= *nr2; + + while ((s < e) && (res=my_ucs2_uni(cs,&wc, (uchar *)s, (uchar*)e)) >0) + { + my_tosort_unicode_bmp(uni_plane, &wc); + MY_HASH_ADD_16(m1, m2, wc); + s+=res; + } + *nr1= m1; + *nr2= m2; +} + + +static void my_hash_sort_ucs2(CHARSET_INFO *cs, const uchar *s, size_t slen, + ulong *nr1, ulong *nr2) +{ + size_t lengthsp= my_lengthsp_mb2(cs, (const char *) s, slen); + my_hash_sort_ucs2_nopad(cs, s, lengthsp, nr1, nr2); +} + +static size_t my_casedn_ucs2(CHARSET_INFO *cs, const char *src, size_t srclen, + char *dst, size_t dstlen) +{ + my_wc_t wc; + int res; + const char *srcend= src + srclen; + char *dstend= dst + dstlen; + MY_CASEFOLD_INFO *uni_plane= cs->casefold; + DBUG_ASSERT(srclen <= dstlen); + + while ((src < srcend) && + (res= my_ucs2_uni(cs, &wc, (uchar*) src, (uchar*) srcend)) > 0) + { + my_tolower_unicode_bmp(uni_plane, &wc); + if (res != my_uni_ucs2(cs, wc, (uchar*) dst, (uchar*) dstend)) + break; + src+= res; + dst+= res; + } + return srclen; +} + + +static void +my_fill_ucs2(CHARSET_INFO *cs __attribute__((unused)), + char *s, size_t l, int fill) +{ + DBUG_ASSERT(fill <= 0xFFFF); +#ifdef WAITING_FOR_GCC_VECTORIZATION_BUG_TO_BE_FIXED + /* + This code with int2store() is known to be faster on some processors, + but crashes on other processors due to a possible bug in GCC's + -ftree-vectorization (which is enabled in -O3) in case of + a non-aligned memory. See here for details: + http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58039 + */ + char *last= s + l - 2; + uint16 tmp= (fill >> 8) + ((fill & 0xFF) << 8); /* swap bytes */ + DBUG_ASSERT(fill <= 0xFFFF); + for ( ; s <= last; s+= 2) + int2store(s, tmp); /* store little-endian */ +#else + for ( ; l >= 2; s[0]= (fill >> 8), s[1]= (fill & 0xFF), s+= 2, l-= 2); +#endif +} + + +static +size_t my_numchars_ucs2(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e) +{ + return (size_t) (e-b)/2; +} + + +static +size_t my_charpos_ucs2(CHARSET_INFO *cs __attribute__((unused)), + const char *b __attribute__((unused)), + const char *e __attribute__((unused)), + size_t pos) +{ + size_t string_length= (size_t) (e - b); + return pos > string_length ? string_length + 2 : pos * 2; +} + + +static size_t +my_well_formed_char_length_ucs2(CHARSET_INFO *cs __attribute__((unused)), + const char *b, const char *e, + size_t nchars, MY_STRCOPY_STATUS *status) +{ + size_t length= e - b; + if (nchars * 2 <= length) + { + status->m_well_formed_error_pos= NULL; + status->m_source_end_pos= b + (nchars * 2); + return nchars; + } + if (length % 2) + { + status->m_well_formed_error_pos= status->m_source_end_pos= e - 1; + } + else + { + status->m_well_formed_error_pos= NULL; + status->m_source_end_pos= e; + } + return length / 2; +} + + +static +int my_wildcmp_ucs2_ci(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + MY_CASEFOLD_INFO *uni_plane= cs->casefold; + return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, + escape,w_one,w_many,uni_plane); +} + + +static +int my_wildcmp_ucs2_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, + escape,w_one,w_many,NULL); +} + + +static void +my_hash_sort_ucs2_nopad_bin(CHARSET_INFO *cs __attribute__((unused)), + const uchar *key, size_t len, + ulong *nr1, ulong *nr2) +{ + const uchar *end= key + len; + register ulong m1= *nr1, m2= *nr2; + for ( ; key < end ; key++) + { + MY_HASH_ADD(m1, m2, (uint)*key); + } + *nr1= m1; + *nr2= m2; +} + + +static void +my_hash_sort_ucs2_bin(CHARSET_INFO *cs, + const uchar *key, size_t len, ulong *nr1, ulong *nr2) +{ + size_t lengthsp= my_lengthsp_mb2(cs, (const char *) key, len); + my_hash_sort_ucs2_nopad_bin(cs, key, lengthsp, nr1, nr2); +} + + +static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = +{ + NULL, /* init */ + my_strnncoll_ucs2_general_ci, + my_strnncollsp_ucs2_general_ci, + my_strnncollsp_nchars_ucs2_general_ci, + my_strnxfrm_ucs2_general_ci, + my_strnxfrmlen_unicode, + my_like_range_generic, + my_wildcmp_ucs2_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_ucs2, + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +static MY_COLLATION_HANDLER my_collation_ucs2_general_mysql500_ci_handler = +{ + NULL, /* init */ + my_strnncoll_ucs2_general_mysql500_ci, + my_strnncollsp_ucs2_general_mysql500_ci, + my_strnncollsp_nchars_ucs2_general_mysql500_ci, + my_strnxfrm_ucs2_general_mysql500_ci, + my_strnxfrmlen_unicode, + my_like_range_generic, + my_wildcmp_ucs2_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_ucs2, + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = +{ + NULL, /* init */ + my_strnncoll_ucs2_bin, + my_strnncollsp_ucs2_bin, + my_strnncollsp_nchars_ucs2_bin, + my_strnxfrm_ucs2_bin, + my_strnxfrmlen_unicode, + my_like_range_generic, + my_wildcmp_ucs2_bin, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_ucs2_bin, + my_propagate_simple, + my_min_str_mb_simple, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler = +{ + NULL, /* init */ + my_strnncoll_ucs2_general_ci, + my_strnncollsp_ucs2_general_nopad_ci, + my_strnncollsp_nchars_ucs2_general_nopad_ci, + my_strnxfrm_nopad_ucs2_general_ci, + my_strnxfrmlen_unicode, + my_like_range_generic, + my_wildcmp_ucs2_ci, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_ucs2_nopad, + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler = +{ + NULL, /* init */ + my_strnncoll_ucs2_bin, + my_strnncollsp_ucs2_nopad_bin, + my_strnncollsp_nchars_ucs2_nopad_bin, + my_strnxfrm_nopad_ucs2_bin, + my_strnxfrmlen_unicode, + my_like_range_generic, + my_wildcmp_ucs2_bin, + my_strcasecmp_mb2_or_mb4, + my_instr_mb, + my_hash_sort_ucs2_nopad_bin, + my_propagate_simple, + my_min_str_mb_simple_nopad, + my_max_str_mb_simple, + my_ci_get_id_generic, + my_ci_get_collation_name_generic +}; + + +MY_CHARSET_HANDLER my_charset_ucs2_handler= +{ + NULL, /* init */ + my_numchars_ucs2, + my_charpos_ucs2, + my_lengthsp_mb2, + my_numcells_mb, + my_ucs2_uni, /* mb_wc */ + my_uni_ucs2, /* wc_mb */ + my_mb_ctype_mb, + my_caseup_str_mb2_or_mb4, + my_casedn_str_mb2_or_mb4, + my_caseup_ucs2, + my_casedn_ucs2, + my_snprintf_mb2, + my_l10tostr_mb2_or_mb4, + my_ll10tostr_mb2_or_mb4, + my_fill_ucs2, + my_strntol_mb2_or_mb4, + my_strntoul_mb2_or_mb4, + my_strntoll_mb2_or_mb4, + my_strntoull_mb2_or_mb4, + my_strntod_mb2_or_mb4, + my_strtoll10_mb2, + my_strntoull10rnd_mb2_or_mb4, + my_scan_mb2, + my_charlen_ucs2, + my_well_formed_char_length_ucs2, + my_copy_fix_mb2_or_mb4, + my_uni_ucs2, + my_wc_to_printable_generic, + my_casefold_multiply_1, + my_casefold_multiply_1 +}; + + +struct charset_info_st my_charset_ucs2_general_ci= +{ + 35,0,0, /* number */ + MY_CS_COMPILED|MY_CS_PRIMARY|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, + { charset_name_ucs2, charset_name_ucs2_length}, /* cs name */ + { STRING_WITH_LEN("ucs2_general_ci") }, /* name */ + "", /* comment */ + NULL, /* tailoring */ + ctype_ucs2, /* ctype */ + to_lower_ucs2, /* to_lower */ + to_upper_ucs2, /* to_upper */ + to_upper_ucs2, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default,/* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 2, /* mbminlen */ + 2, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_ucs2_handler, + &my_collation_ucs2_general_ci_handler +}; + + +struct charset_info_st my_charset_ucs2_general_mysql500_ci= +{ + 159, 0, 0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII, /* state */ + { charset_name_ucs2, charset_name_ucs2_length }, /* cs name */ + { STRING_WITH_LEN("ucs2_general_mysql500_ci") }, /* name */ + "", /* comment */ + NULL, /* tailoring */ + ctype_ucs2, /* ctype */ + to_lower_ucs2, /* to_lower */ + to_upper_ucs2, /* to_upper */ + to_upper_ucs2, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_mysql500, /* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 2, /* mbminlen */ + 2, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_ucs2_handler, + &my_collation_ucs2_general_mysql500_ci_handler +}; + + +struct charset_info_st my_charset_ucs2_bin= +{ + 90,0,0, /* number */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII, + { charset_name_ucs2, charset_name_ucs2_length }, /* cs name */ + { STRING_WITH_LEN("ucs2_bin") }, /* name */ + "", /* comment */ + NULL, /* tailoring */ + ctype_ucs2, /* ctype */ + to_lower_ucs2, /* to_lower */ + to_upper_ucs2, /* to_upper */ + NULL, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default,/* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 2, /* mbminlen */ + 2, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_ucs2_handler, + &my_collation_ucs2_bin_handler +}; + + +struct charset_info_st my_charset_ucs2_general_nopad_ci= +{ + MY_NOPAD_ID(35),0,0, /* number */ + MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD, + { charset_name_ucs2, charset_name_ucs2_length }, /* cs name */ + { STRING_WITH_LEN("ucs2_general_nopad_ci") }, /* name */ + "", /* comment */ + NULL, /* tailoring */ + ctype_ucs2, /* ctype */ + to_lower_ucs2, /* to_lower */ + to_upper_ucs2, /* to_upper */ + to_upper_ucs2, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default, /* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 2, /* mbminlen */ + 2, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_ucs2_handler, + &my_collation_ucs2_general_nopad_ci_handler +}; + + +struct charset_info_st my_charset_ucs2_nopad_bin= +{ + MY_NOPAD_ID(90),0,0, /* number */ + MY_CS_COMPILED|MY_CS_BINSORT|MY_CS_UNICODE|MY_CS_NONASCII|MY_CS_NOPAD, + { charset_name_ucs2, charset_name_ucs2_length }, /* cs name */ + { STRING_WITH_LEN("ucs2_nopad_bin") }, /* name */ + "", /* comment */ + NULL, /* tailoring */ + ctype_ucs2, /* ctype */ + to_lower_ucs2, /* to_lower */ + to_upper_ucs2, /* to_upper */ + NULL, /* sort_order */ + NULL, /* uca */ + NULL, /* tab_to_uni */ + NULL, /* tab_from_uni */ + &my_casefold_default, /* casefold */ + NULL, /* state_map */ + NULL, /* ident_map */ + 1, /* strxfrm_multiply */ + 2, /* mbminlen */ + 2, /* mbmaxlen */ + 0, /* min_sort_char */ + 0xFFFF, /* max_sort_char */ + ' ', /* pad char */ + 0, /* escape_with_backslash_is_dangerous */ + MY_CS_COLL_LEVELS_S1, + &my_charset_ucs2_handler, + &my_collation_ucs2_nopad_bin_handler +}; + +#endif /* HAVE_CHARSET_ucs2 */ |