diff options
Diffstat (limited to 'security/sandbox/chromium/base/third_party/icu')
3 files changed, 649 insertions, 0 deletions
diff --git a/security/sandbox/chromium/base/third_party/icu/LICENSE b/security/sandbox/chromium/base/third_party/icu/LICENSE new file mode 100644 index 0000000000..2882e4ebda --- /dev/null +++ b/security/sandbox/chromium/base/third_party/icu/LICENSE @@ -0,0 +1,76 @@ +COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later) + +Copyright © 1991-2017 Unicode, Inc. All rights reserved. +Distributed under the Terms of Use in http://www.unicode.org/copyright.html + +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Unicode data files and any associated documentation +(the "Data Files") or Unicode software and any associated documentation +(the "Software") to deal in the Data Files or Software +without restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, and/or sell copies of +the Data Files or Software, and to permit persons to whom the Data Files +or Software are furnished to do so, provided that either +(a) this copyright and permission notice appear with all copies +of the Data Files or Software, or +(b) this copyright and permission notice appear in associated +Documentation. + +THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT OF THIRD PARTY RIGHTS. +IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS +NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL +DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, +DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR +PERFORMANCE OF THE DATA FILES OR SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, +use or other dealings in these Data Files or Software without prior +written authorization of the copyright holder. + +--------------------- + +Third-Party Software Licenses + +This section contains third-party software notices and/or additional +terms for licensed third-party software components included within ICU +libraries. + +1. ICU License - ICU 1.8.1 to ICU 57.1 + +COPYRIGHT AND PERMISSION NOTICE + +Copyright (c) 1995-2016 International Business Machines Corporation and others +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, and/or sell copies of the Software, and to permit persons +to whom the Software is furnished to do so, provided that the above +copyright notice(s) and this permission notice appear in all copies of +the Software and that both the above copyright notice(s) and this +permission notice appear in supporting documentation. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY +SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER +RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF +CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +Except as contained in this notice, the name of a copyright holder +shall not be used in advertising or otherwise to promote the sale, use +or other dealings in this Software without prior written authorization +of the copyright holder. + +All trademarks and registered trademarks mentioned herein are the +property of their respective owners. diff --git a/security/sandbox/chromium/base/third_party/icu/icu_utf.cc b/security/sandbox/chromium/base/third_party/icu/icu_utf.cc new file mode 100644 index 0000000000..a3262b04d3 --- /dev/null +++ b/security/sandbox/chromium/base/third_party/icu/icu_utf.cc @@ -0,0 +1,131 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2012, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +* file name: utf_impl.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* created on: 1999sep13 +* created by: Markus W. Scherer +* +* This file provides implementation functions for macros in the utfXX.h +* that would otherwise be too long as macros. +*/ + +#include "base/third_party/icu/icu_utf.h" + +namespace base_icu { + +// source/common/utf_impl.cpp + +static const UChar32 +utf8_errorValue[6]={ + // Same values as UTF8_ERROR_VALUE_1, UTF8_ERROR_VALUE_2, UTF_ERROR_VALUE, + // but without relying on the obsolete unicode/utf_old.h. + 0x15, 0x9f, 0xffff, + 0x10ffff +}; + +static UChar32 +errorValue(int32_t count, int8_t strict) { + if(strict>=0) { + return utf8_errorValue[count]; + } else if(strict==-3) { + return 0xfffd; + } else { + return CBU_SENTINEL; + } +} + +/* + * Handle the non-inline part of the U8_NEXT() and U8_NEXT_FFFD() macros + * and their obsolete sibling UTF8_NEXT_CHAR_SAFE(). + * + * U8_NEXT() supports NUL-terminated strings indicated via length<0. + * + * The "strict" parameter controls the error behavior: + * <0 "Safe" behavior of U8_NEXT(): + * -1: All illegal byte sequences yield U_SENTINEL=-1. + * -2: Same as -1, except for lenient treatment of surrogate code points as legal. + * Some implementations use this for roundtripping of + * Unicode 16-bit strings that are not well-formed UTF-16, that is, they + * contain unpaired surrogates. + * -3: All illegal byte sequences yield U+FFFD. + * 0 Obsolete "safe" behavior of UTF8_NEXT_CHAR_SAFE(..., FALSE): + * All illegal byte sequences yield a positive code point such that this + * result code point would be encoded with the same number of bytes as + * the illegal sequence. + * >0 Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., TRUE): + * Same as the obsolete "safe" behavior, but non-characters are also treated + * like illegal sequences. + * + * Note that a UBool is the same as an int8_t. + */ +UChar32 +utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) { + // *pi is one after byte c. + int32_t i=*pi; + // length can be negative for NUL-terminated strings: Read and validate one byte at a time. + if(i==length || c>0xf4) { + // end of string, or not a lead byte + } else if(c>=0xf0) { + // Test for 4-byte sequences first because + // U8_NEXT() handles shorter valid sequences inline. + uint8_t t1=s[i], t2, t3; + c&=7; + if(CBU8_IS_VALID_LEAD4_AND_T1(c, t1) && + ++i!=length && (t2=s[i]-0x80)<=0x3f && + ++i!=length && (t3=s[i]-0x80)<=0x3f) { + ++i; + c=(c<<18)|((t1&0x3f)<<12)|(t2<<6)|t3; + // strict: forbid non-characters like U+fffe + if(strict<=0 || !CBU_IS_UNICODE_NONCHAR(c)) { + *pi=i; + return c; + } + } + } else if(c>=0xe0) { + c&=0xf; + if(strict!=-2) { + uint8_t t1=s[i], t2; + if(CBU8_IS_VALID_LEAD3_AND_T1(c, t1) && + ++i!=length && (t2=s[i]-0x80)<=0x3f) { + ++i; + c=(c<<12)|((t1&0x3f)<<6)|t2; + // strict: forbid non-characters like U+fffe + if(strict<=0 || !CBU_IS_UNICODE_NONCHAR(c)) { + *pi=i; + return c; + } + } + } else { + // strict=-2 -> lenient: allow surrogates + uint8_t t1=s[i]-0x80, t2; + if(t1<=0x3f && (c>0 || t1>=0x20) && + ++i!=length && (t2=s[i]-0x80)<=0x3f) { + *pi=i+1; + return (c<<12)|(t1<<6)|t2; + } + } + } else if(c>=0xc2) { + uint8_t t1=s[i]-0x80; + if(t1<=0x3f) { + *pi=i+1; + return ((c-0xc0)<<6)|t1; + } + } // else 0x80<=c<0xc2 is not a lead byte + + /* error handling */ + c=errorValue(i-*pi, strict); + *pi=i; + return c; +} + +} // namespace base_icu diff --git a/security/sandbox/chromium/base/third_party/icu/icu_utf.h b/security/sandbox/chromium/base/third_party/icu/icu_utf.h new file mode 100644 index 0000000000..2ba82316c2 --- /dev/null +++ b/security/sandbox/chromium/base/third_party/icu/icu_utf.h @@ -0,0 +1,442 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +****************************************************************************** +* +* Copyright (C) 1999-2015, International Business Machines +* Corporation and others. All Rights Reserved. +* +****************************************************************************** +*/ + +#ifndef BASE_THIRD_PARTY_ICU_ICU_UTF_H_ +#define BASE_THIRD_PARTY_ICU_ICU_UTF_H_ + +#include <stdint.h> + +namespace base_icu { + +// source/common/unicode/umachine.h + +/** The ICU boolean type @stable ICU 2.0 */ +typedef int8_t UBool; + +/** + * Define UChar32 as a type for single Unicode code points. + * UChar32 is a signed 32-bit integer (same as int32_t). + * + * The Unicode code point range is 0..0x10ffff. + * All other values (negative or >=0x110000) are illegal as Unicode code points. + * They may be used as sentinel values to indicate "done", "error" + * or similar non-code point conditions. + * + * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined + * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned) + * or else to be uint32_t. + * That is, the definition of UChar32 was platform-dependent. + * + * @see U_SENTINEL + * @stable ICU 2.4 + */ +typedef int32_t UChar32; + +/** + * This value is intended for sentinel values for APIs that + * (take or) return single code points (UChar32). + * It is outside of the Unicode code point range 0..0x10ffff. + * + * For example, a "done" or "error" value in a new API + * could be indicated with U_SENTINEL. + * + * ICU APIs designed before ICU 2.4 usually define service-specific "done" + * values, mostly 0xffff. + * Those may need to be distinguished from + * actual U+ffff text contents by calling functions like + * CharacterIterator::hasNext() or UnicodeString::length(). + * + * @return -1 + * @see UChar32 + * @stable ICU 2.4 + */ +#define CBU_SENTINEL (-1) + +// source/common/unicode/utf.h + +/** + * Is this code point a Unicode noncharacter? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define CBU_IS_UNICODE_NONCHAR(c) \ + ((c)>=0xfdd0 && \ + ((c)<=0xfdef || ((c)&0xfffe)==0xfffe) && (c)<=0x10ffff) + +/** + * Is c a Unicode code point value (0..U+10ffff) + * that can be assigned a character? + * + * Code points that are not characters include: + * - single surrogate code points (U+d800..U+dfff, 2048 code points) + * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points) + * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points) + * - the highest Unicode code point value is U+10ffff + * + * This means that all code points below U+d800 are character code points, + * and that boundary is tested first for performance. + * + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define CBU_IS_UNICODE_CHAR(c) \ + ((uint32_t)(c)<0xd800 || \ + (0xdfff<(c) && (c)<=0x10ffff && !CBU_IS_UNICODE_NONCHAR(c))) + +/** + * Is this code point a surrogate (U+d800..U+dfff)? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define CBU_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800) + +/** + * Assuming c is a surrogate code point (U_IS_SURROGATE(c)), + * is it a lead surrogate? + * @param c 32-bit code point + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define CBU_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) + +// source/common/unicode/utf8.h + +/** + * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1. + * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence. + * Lead byte E0..EF bits 3..0 are used as byte index, + * first trail byte bits 7..5 are used as bit index into that byte. + * @see U8_IS_VALID_LEAD3_AND_T1 + * @internal + */ +#define CBU8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" + +/** + * Internal 3-byte UTF-8 validity check. + * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence. + * @internal + */ +#define CBU8_IS_VALID_LEAD3_AND_T1(lead, t1) (CBU8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5))) + +/** + * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1. + * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence. + * First trail byte bits 7..4 are used as byte index, + * lead byte F0..F4 bits 2..0 are used as bit index into that byte. + * @see U8_IS_VALID_LEAD4_AND_T1 + * @internal + */ +#define CBU8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00" + +/** + * Internal 4-byte UTF-8 validity check. + * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence. + * @internal + */ +#define CBU8_IS_VALID_LEAD4_AND_T1(lead, t1) (CBU8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7))) + +/** + * Function for handling "next code point" with error-checking. + * + * This is internal since it is not meant to be called directly by external clie +nts; + * however it is U_STABLE (not U_INTERNAL) since it is called by public macros i +n this + * file and thus must remain stable, and should not be hidden when other interna +l + * functions are hidden (otherwise public macros would fail to compile). + * @internal + */ +UChar32 +utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, ::base_icu::UChar32 c, ::base_icu::UBool strict); + +/** + * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)? + * @param c 8-bit code unit (byte) + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define CBU8_IS_SINGLE(c) (((c)&0x80)==0) + +/** + * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4) + * @param c 8-bit code unit (byte) + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define CBU8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32) + +/** + * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF) + * @param c 8-bit code unit (byte) + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define CBU8_IS_TRAIL(c) ((int8_t)(c)<-0x40) + +/** + * How many code units (bytes) are used for the UTF-8 encoding + * of this Unicode code point? + * @param c 32-bit code point + * @return 1..4, or 0 if c is a surrogate or not a Unicode code point + * @stable ICU 2.4 + */ +#define CBU8_LENGTH(c) \ + ((uint32_t)(c)<=0x7f ? 1 : \ + ((uint32_t)(c)<=0x7ff ? 2 : \ + ((uint32_t)(c)<=0xd7ff ? 3 : \ + ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \ + ((uint32_t)(c)<=0xffff ? 3 : 4)\ + ) \ + ) \ + ) \ + ) + +/** + * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff). + * @return 4 + * @stable ICU 2.4 + */ +#define CBU8_MAX_LENGTH 4 + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, checks for illegal sequences and for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead byte of a multi-byte sequence, + * in which case the macro will read the whole sequence. + * If the offset points to a trail byte or an illegal UTF-8 sequence, then + * c is set to a negative value. + * + * @param s const uint8_t * string + * @param i int32_t string offset, must be i<length + * @param length int32_t string length + * @param c output UChar32 variable, set to <0 in case of an error + * @see U8_NEXT_UNSAFE + * @stable ICU 2.4 + */ +#define CBU8_NEXT(s, i, length, c) { \ + (c)=(uint8_t)(s)[(i)++]; \ + if(!CBU8_IS_SINGLE(c)) { \ + uint8_t __t1, __t2; \ + if( /* handle U+0800..U+FFFF inline */ \ + (0xe0<=(c) && (c)<0xf0) && \ + (((i)+1)<(length) || (length)<0) && \ + CBU8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \ + (__t2=(s)[(i)+1]-0x80)<=0x3f) { \ + (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \ + (i)+=2; \ + } else if( /* handle U+0080..U+07FF inline */ \ + ((c)<0xe0 && (c)>=0xc2) && \ + ((i)!=(length)) && \ + (__t1=(s)[i]-0x80)<=0x3f) { \ + (c)=(((c)&0x1f)<<6)|__t1; \ + ++(i); \ + } else { \ + /* function call for "complicated" and error cases */ \ + (c)=::base_icu::utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \ + } \ + } \ +} + +/** + * Append a code point to a string, overwriting 1 to 4 bytes. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Unsafe" macro, assumes a valid code point and sufficient space in the string. + * Otherwise, the result is undefined. + * + * @param s const uint8_t * string buffer + * @param i string offset + * @param c code point to append + * @see U8_APPEND + * @stable ICU 2.4 + */ +#define CBU8_APPEND_UNSAFE(s, i, c) { \ + if((uint32_t)(c)<=0x7f) { \ + (s)[(i)++]=(uint8_t)(c); \ + } else { \ + if((uint32_t)(c)<=0x7ff) { \ + (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ + } else { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ + } else { \ + (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ + (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ + } \ + (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ + } \ +} + +// source/common/unicode/utf16.h + +/** + * Does this code unit alone encode a code point (BMP, not a surrogate)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define CBU16_IS_SINGLE(c) !CBU_IS_SURROGATE(c) + +/** + * Is this code unit a lead surrogate (U+d800..U+dbff)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define CBU16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) + +/** + * Is this code unit a trail surrogate (U+dc00..U+dfff)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define CBU16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) + +/** + * Is this code unit a surrogate (U+d800..U+dfff)? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define CBU16_IS_SURROGATE(c) CBU_IS_SURROGATE(c) + +/** + * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), + * is it a lead surrogate? + * @param c 16-bit code unit + * @return TRUE or FALSE + * @stable ICU 2.4 + */ +#define CBU16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) + +/** + * Helper constant for U16_GET_SUPPLEMENTARY. + * @internal + */ +#define CBU16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) + +/** + * Get a supplementary code point value (U+10000..U+10ffff) + * from its lead and trail surrogates. + * The result is undefined if the input values are not + * lead and trail surrogates. + * + * @param lead lead surrogate (U+d800..U+dbff) + * @param trail trail surrogate (U+dc00..U+dfff) + * @return supplementary code point (U+10000..U+10ffff) + * @stable ICU 2.4 + */ +#define CBU16_GET_SUPPLEMENTARY(lead, trail) \ + (((::base_icu::UChar32)(lead)<<10UL)+(::base_icu::UChar32)(trail)-CBU16_SURROGATE_OFFSET) + +/** + * Get the lead surrogate (0xd800..0xdbff) for a + * supplementary code point (0x10000..0x10ffff). + * @param supplementary 32-bit code point (U+10000..U+10ffff) + * @return lead surrogate (U+d800..U+dbff) for supplementary + * @stable ICU 2.4 + */ +#define CBU16_LEAD(supplementary) (::base_icu::UChar)(((supplementary)>>10)+0xd7c0) + +/** + * Get the trail surrogate (0xdc00..0xdfff) for a + * supplementary code point (0x10000..0x10ffff). + * @param supplementary 32-bit code point (U+10000..U+10ffff) + * @return trail surrogate (U+dc00..U+dfff) for supplementary + * @stable ICU 2.4 + */ +#define CBU16_TRAIL(supplementary) (::base_icu::UChar)(((supplementary)&0x3ff)|0xdc00) + +/** + * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) + * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). + * @param c 32-bit code point + * @return 1 or 2 + * @stable ICU 2.4 + */ +#define CBU16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) + +/** + * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). + * @return 2 + * @stable ICU 2.4 + */ +#define CBU16_MAX_LENGTH 2 + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead surrogate unit + * for a supplementary code point, in which case the macro will read + * the following trail surrogate as well. + * If the offset points to a trail surrogate or + * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate. + * + * @param s const UChar * string + * @param i string offset, must be i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_NEXT_UNSAFE + * @stable ICU 2.4 + */ +#define CBU16_NEXT(s, i, length, c) { \ + (c)=(s)[(i)++]; \ + if(CBU16_IS_LEAD(c)) { \ + uint16_t __c2; \ + if((i)!=(length) && CBU16_IS_TRAIL(__c2=(s)[(i)])) { \ + ++(i); \ + (c)=CBU16_GET_SUPPLEMENTARY((c), __c2); \ + } \ + } \ +} + +/** + * Append a code point to a string, overwriting 1 or 2 code units. + * The offset points to the current end of the string contents + * and is advanced (post-increment). + * "Unsafe" macro, assumes a valid code point and sufficient space in the string. + * Otherwise, the result is undefined. + * + * @param s const UChar * string buffer + * @param i string offset + * @param c code point to append + * @see U16_APPEND + * @stable ICU 2.4 + */ +#define CBU16_APPEND_UNSAFE(s, i, c) { \ + if((uint32_t)(c)<=0xffff) { \ + (s)[(i)++]=(uint16_t)(c); \ + } else { \ + (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ + (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ + } \ +} + +} // namesapce base_icu + +#endif // BASE_THIRD_PARTY_ICU_ICU_UTF_H_ |