diff options
Diffstat (limited to 'intl/icu/source/common/ucnv_bld.cpp')
-rw-r--r-- | intl/icu/source/common/ucnv_bld.cpp | 1689 |
1 files changed, 1689 insertions, 0 deletions
diff --git a/intl/icu/source/common/ucnv_bld.cpp b/intl/icu/source/common/ucnv_bld.cpp new file mode 100644 index 0000000000..564b645bed --- /dev/null +++ b/intl/icu/source/common/ucnv_bld.cpp @@ -0,0 +1,1689 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* + ******************************************************************** + * COPYRIGHT: + * Copyright (c) 1996-2016, International Business Machines Corporation and + * others. All Rights Reserved. + ******************************************************************** + * + * ucnv_bld.cpp: + * + * Defines functions that are used in the creation/initialization/deletion + * of converters and related structures. + * uses uconv_io.h routines to access disk information + * is used by ucnv.h to implement public API create/delete/flushCache routines + * Modification History: + * + * Date Name Description + * + * 06/20/2000 helena OS/400 port changes; mostly typecast. + * 06/29/2000 helena Major rewrite of the callback interface. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_CONVERSION + +#include "unicode/putil.h" +#include "unicode/udata.h" +#include "unicode/ucnv.h" +#include "unicode/uloc.h" +#include "mutex.h" +#include "putilimp.h" +#include "uassert.h" +#include "utracimp.h" +#include "ucnv_io.h" +#include "ucnv_bld.h" +#include "ucnvmbcs.h" +#include "ucnv_ext.h" +#include "ucnv_cnv.h" +#include "ucnv_imp.h" +#include "uhash.h" +#include "umutex.h" +#include "cstring.h" +#include "cmemory.h" +#include "ucln_cmn.h" +#include "ustr_cnv.h" + + +#if 0 +#include <stdio.h> +extern void UCNV_DEBUG_LOG(char *what, char *who, void *p, int l); +#define UCNV_DEBUG_LOG(x,y,z) UCNV_DEBUG_LOG(x,y,z,__LINE__) +#else +# define UCNV_DEBUG_LOG(x,y,z) +#endif + +static const UConverterSharedData * const +converterData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES]={ + nullptr, nullptr, + +#if UCONFIG_NO_LEGACY_CONVERSION + nullptr, +#else + &_MBCSData, +#endif + + &_Latin1Data, + &_UTF8Data, &_UTF16BEData, &_UTF16LEData, +#if UCONFIG_ONLY_HTML_CONVERSION + nullptr, nullptr, +#else + &_UTF32BEData, &_UTF32LEData, +#endif + nullptr, + +#if UCONFIG_NO_LEGACY_CONVERSION + nullptr, +#else + &_ISO2022Data, +#endif + +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, +#else + &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, + &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, + &_HZData, +#endif + +#if UCONFIG_ONLY_HTML_CONVERSION + nullptr, +#else + &_SCSUData, +#endif + + +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION + nullptr, +#else + &_ISCIIData, +#endif + + &_ASCIIData, +#if UCONFIG_ONLY_HTML_CONVERSION + nullptr, nullptr, &_UTF16Data, nullptr, nullptr, nullptr, +#else + &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, +#endif + +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_ONLY_HTML_CONVERSION + nullptr, +#else + &_CompoundTextData +#endif +}; + +/* Please keep this in binary sorted order for getAlgorithmicTypeFromName. + Also the name should be in lower case and all spaces, dashes and underscores + removed +*/ +static struct { + const char *name; + const UConverterType type; +} const cnvNameType[] = { +#if !UCONFIG_ONLY_HTML_CONVERSION + { "bocu1", UCNV_BOCU1 }, + { "cesu8", UCNV_CESU8 }, +#endif +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + { "hz",UCNV_HZ }, +#endif +#if !UCONFIG_ONLY_HTML_CONVERSION + { "imapmailboxname", UCNV_IMAP_MAILBOX }, +#endif +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + { "iscii", UCNV_ISCII }, +#endif +#if !UCONFIG_NO_LEGACY_CONVERSION + { "iso2022", UCNV_ISO_2022 }, +#endif + { "iso88591", UCNV_LATIN_1 }, +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION + { "lmbcs1", UCNV_LMBCS_1 }, + { "lmbcs11",UCNV_LMBCS_11 }, + { "lmbcs16",UCNV_LMBCS_16 }, + { "lmbcs17",UCNV_LMBCS_17 }, + { "lmbcs18",UCNV_LMBCS_18 }, + { "lmbcs19",UCNV_LMBCS_19 }, + { "lmbcs2", UCNV_LMBCS_2 }, + { "lmbcs3", UCNV_LMBCS_3 }, + { "lmbcs4", UCNV_LMBCS_4 }, + { "lmbcs5", UCNV_LMBCS_5 }, + { "lmbcs6", UCNV_LMBCS_6 }, + { "lmbcs8", UCNV_LMBCS_8 }, +#endif +#if !UCONFIG_ONLY_HTML_CONVERSION + { "scsu", UCNV_SCSU }, +#endif + { "usascii", UCNV_US_ASCII }, + { "utf16", UCNV_UTF16 }, + { "utf16be", UCNV_UTF16_BigEndian }, + { "utf16le", UCNV_UTF16_LittleEndian }, +#if U_IS_BIG_ENDIAN + { "utf16oppositeendian", UCNV_UTF16_LittleEndian }, + { "utf16platformendian", UCNV_UTF16_BigEndian }, +#else + { "utf16oppositeendian", UCNV_UTF16_BigEndian}, + { "utf16platformendian", UCNV_UTF16_LittleEndian }, +#endif +#if !UCONFIG_ONLY_HTML_CONVERSION + { "utf32", UCNV_UTF32 }, + { "utf32be", UCNV_UTF32_BigEndian }, + { "utf32le", UCNV_UTF32_LittleEndian }, +#if U_IS_BIG_ENDIAN + { "utf32oppositeendian", UCNV_UTF32_LittleEndian }, + { "utf32platformendian", UCNV_UTF32_BigEndian }, +#else + { "utf32oppositeendian", UCNV_UTF32_BigEndian }, + { "utf32platformendian", UCNV_UTF32_LittleEndian }, +#endif +#endif +#if !UCONFIG_ONLY_HTML_CONVERSION + { "utf7", UCNV_UTF7 }, +#endif + { "utf8", UCNV_UTF8 }, +#if !UCONFIG_ONLY_HTML_CONVERSION + { "x11compoundtext", UCNV_COMPOUND_TEXT} +#endif +}; + + +/*initializes some global variables */ +static UHashtable *SHARED_DATA_HASHTABLE = nullptr; +static icu::UMutex cnvCacheMutex; +/* Note: the global mutex is used for */ +/* reference count updates. */ + +static const char **gAvailableConverters = nullptr; +static uint16_t gAvailableConverterCount = 0; +static icu::UInitOnce gAvailableConvertersInitOnce {}; + +#if !U_CHARSET_IS_UTF8 + +/* This contains the resolved converter name. So no further alias lookup is needed again. */ +static char gDefaultConverterNameBuffer[UCNV_MAX_CONVERTER_NAME_LENGTH + 1]; /* +1 for nullptr */ +static const char *gDefaultConverterName = nullptr; + +/* +If the default converter is an algorithmic converter, this is the cached value. +We don't cache a full UConverter and clone it because ucnv_clone doesn't have +less overhead than an algorithmic open. We don't cache non-algorithmic converters +because ucnv_flushCache must be able to unload the default converter and its table. +*/ +static const UConverterSharedData *gDefaultAlgorithmicSharedData = nullptr; + +/* Does gDefaultConverterName have a converter option and require extra parsing? */ +static UBool gDefaultConverterContainsOption; + +#endif /* !U_CHARSET_IS_UTF8 */ + +static const char DATA_TYPE[] = "cnv"; + +/* ucnv_flushAvailableConverterCache. This is only called from ucnv_cleanup(). + * If it is ever to be called from elsewhere, synchronization + * will need to be considered. + */ +static void +ucnv_flushAvailableConverterCache() { + gAvailableConverterCount = 0; + if (gAvailableConverters) { + uprv_free((char **)gAvailableConverters); + gAvailableConverters = nullptr; + } + gAvailableConvertersInitOnce.reset(); +} + +/* ucnv_cleanup - delete all storage held by the converter cache, except any */ +/* in use by open converters. */ +/* Not thread safe. */ +/* Not supported API. */ +static UBool U_CALLCONV ucnv_cleanup() { + ucnv_flushCache(); + if (SHARED_DATA_HASHTABLE != nullptr && uhash_count(SHARED_DATA_HASHTABLE) == 0) { + uhash_close(SHARED_DATA_HASHTABLE); + SHARED_DATA_HASHTABLE = nullptr; + } + + /* Isn't called from flushCache because other threads may have preexisting references to the table. */ + ucnv_flushAvailableConverterCache(); + +#if !U_CHARSET_IS_UTF8 + gDefaultConverterName = nullptr; + gDefaultConverterNameBuffer[0] = 0; + gDefaultConverterContainsOption = false; + gDefaultAlgorithmicSharedData = nullptr; +#endif + + return (SHARED_DATA_HASHTABLE == nullptr); +} + +U_CAPI void U_EXPORT2 +ucnv_enableCleanup() { + ucln_common_registerCleanup(UCLN_COMMON_UCNV, ucnv_cleanup); +} + +static UBool U_CALLCONV +isCnvAcceptable(void * /*context*/, + const char * /*type*/, const char * /*name*/, + const UDataInfo *pInfo) { + return (UBool)( + pInfo->size>=20 && + pInfo->isBigEndian==U_IS_BIG_ENDIAN && + pInfo->charsetFamily==U_CHARSET_FAMILY && + pInfo->sizeofUChar==U_SIZEOF_UCHAR && + pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ + pInfo->dataFormat[1]==0x6e && + pInfo->dataFormat[2]==0x76 && + pInfo->dataFormat[3]==0x74 && + pInfo->formatVersion[0]==6); /* Everything will be version 6 */ +} + +/** + * Un flatten shared data from a UDATA.. + */ +static UConverterSharedData* +ucnv_data_unFlattenClone(UConverterLoadArgs *pArgs, UDataMemory *pData, UErrorCode *status) +{ + /* UDataInfo info; -- necessary only if some converters have different formatVersion */ + const uint8_t *raw = (const uint8_t *)udata_getMemory(pData); + const UConverterStaticData *source = (const UConverterStaticData *) raw; + UConverterSharedData *data; + UConverterType type = (UConverterType)source->conversionType; + + if(U_FAILURE(*status)) + return nullptr; + + if( (uint16_t)type >= UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES || + converterData[type] == nullptr || + !converterData[type]->isReferenceCounted || + converterData[type]->referenceCounter != 1 || + source->structSize != sizeof(UConverterStaticData)) + { + *status = U_INVALID_TABLE_FORMAT; + return nullptr; + } + + data = (UConverterSharedData *)uprv_malloc(sizeof(UConverterSharedData)); + if(data == nullptr) { + *status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + + /* copy initial values from the static structure for this type */ + uprv_memcpy(data, converterData[type], sizeof(UConverterSharedData)); + + data->staticData = source; + + data->sharedDataCached = false; + + /* fill in fields from the loaded data */ + data->dataMemory = (void*)pData; /* for future use */ + + if(data->impl->load != nullptr) { + data->impl->load(data, pArgs, raw + source->structSize, status); + if(U_FAILURE(*status)) { + uprv_free(data); + return nullptr; + } + } + return data; +} + +/*Takes an alias name gets an actual converter file name + *goes to disk and opens it. + *allocates the memory and returns a new UConverter object + */ +static UConverterSharedData *createConverterFromFile(UConverterLoadArgs *pArgs, UErrorCode * err) +{ + UDataMemory *data; + UConverterSharedData *sharedData; + + UTRACE_ENTRY_OC(UTRACE_UCNV_LOAD); + + if (U_FAILURE (*err)) { + UTRACE_EXIT_STATUS(*err); + return nullptr; + } + + UTRACE_DATA2(UTRACE_OPEN_CLOSE, "load converter %s from package %s", pArgs->name, pArgs->pkg); + + data = udata_openChoice(pArgs->pkg, DATA_TYPE, pArgs->name, isCnvAcceptable, nullptr, err); + if(U_FAILURE(*err)) + { + UTRACE_EXIT_STATUS(*err); + return nullptr; + } + + sharedData = ucnv_data_unFlattenClone(pArgs, data, err); + if(U_FAILURE(*err)) + { + udata_close(data); + UTRACE_EXIT_STATUS(*err); + return nullptr; + } + + /* + * TODO Store pkg in a field in the shared data so that delta-only converters + * can load base converters from the same package. + * If the pkg name is longer than the field, then either do not load the converter + * in the first place, or just set the pkg field to "". + */ + + UTRACE_EXIT_PTR_STATUS(sharedData, *err); + return sharedData; +} + +/*returns a converter type from a string + */ +static const UConverterSharedData * +getAlgorithmicTypeFromName(const char *realName) +{ + uint32_t mid, start, limit; + uint32_t lastMid; + int result; + char strippedName[UCNV_MAX_CONVERTER_NAME_LENGTH]; + + /* Lower case and remove ignoreable characters. */ + ucnv_io_stripForCompare(strippedName, realName); + + /* do a binary search for the alias */ + start = 0; + limit = UPRV_LENGTHOF(cnvNameType); + mid = limit; + lastMid = UINT32_MAX; + + for (;;) { + mid = (uint32_t)((start + limit) / 2); + if (lastMid == mid) { /* Have we moved? */ + break; /* We haven't moved, and it wasn't found. */ + } + lastMid = mid; + result = uprv_strcmp(strippedName, cnvNameType[mid].name); + + if (result < 0) { + limit = mid; + } else if (result > 0) { + start = mid; + } else { + return converterData[cnvNameType[mid].type]; + } + } + + return nullptr; +} + +/* +* Based on the number of known converters, this determines how many times larger +* the shared data hash table should be. When on small platforms, or just a couple +* of converters are used, this number should be 2. When memory is plentiful, or +* when ucnv_countAvailable is ever used with a lot of available converters, +* this should be 4. +* Larger numbers reduce the number of hash collisions, but use more memory. +*/ +#define UCNV_CACHE_LOAD_FACTOR 2 + +/* Puts the shared data in the static hashtable SHARED_DATA_HASHTABLE */ +/* Will always be called with the cnvCacheMutex already being held */ +/* by the calling function. */ +/* Stores the shared data in the SHARED_DATA_HASHTABLE + * @param data The shared data + */ +static void +ucnv_shareConverterData(UConverterSharedData * data) +{ + UErrorCode err = U_ZERO_ERROR; + /*Lazy evaluates the Hashtable itself */ + /*void *sanity = nullptr;*/ + + if (SHARED_DATA_HASHTABLE == nullptr) + { + SHARED_DATA_HASHTABLE = uhash_openSize(uhash_hashChars, uhash_compareChars, nullptr, + ucnv_io_countKnownConverters(&err)*UCNV_CACHE_LOAD_FACTOR, + &err); + ucnv_enableCleanup(); + + if (U_FAILURE(err)) + return; + } + + /* ### check to see if the element is not already there! */ + + /* + sanity = ucnv_getSharedConverterData (data->staticData->name); + if(sanity != nullptr) + { + UCNV_DEBUG_LOG("put:overwrite!",data->staticData->name,sanity); + } + UCNV_DEBUG_LOG("put:chk",data->staticData->name,sanity); + */ + + /* Mark it shared */ + data->sharedDataCached = true; + + uhash_put(SHARED_DATA_HASHTABLE, + (void*) data->staticData->name, /* Okay to cast away const as long as + keyDeleter == nullptr */ + data, + &err); + UCNV_DEBUG_LOG("put", data->staticData->name,data); + +} + +/* Look up a converter name in the shared data cache. */ +/* cnvCacheMutex must be held by the caller to protect the hash table. */ +/* gets the shared data from the SHARED_DATA_HASHTABLE (might return nullptr if it isn't there) + * @param name The name of the shared data + * @return the shared data from the SHARED_DATA_HASHTABLE + */ +static UConverterSharedData * +ucnv_getSharedConverterData(const char *name) +{ + /*special case when no Table has yet been created we return nullptr */ + if (SHARED_DATA_HASHTABLE == nullptr) + { + return nullptr; + } + else + { + UConverterSharedData *rc; + + rc = (UConverterSharedData*)uhash_get(SHARED_DATA_HASHTABLE, name); + UCNV_DEBUG_LOG("get",name,rc); + return rc; + } +} + +/*frees the string of memory blocks associates with a sharedConverter + *if and only if the referenceCounter == 0 + */ +/* Deletes (frees) the Shared data it's passed. first it checks the referenceCounter to + * see if anyone is using it, if not it frees all the memory stemming from sharedConverterData and + * returns true, + * otherwise returns false + * @param sharedConverterData The shared data + * @return if not it frees all the memory stemming from sharedConverterData and + * returns true, otherwise returns false + */ +static UBool +ucnv_deleteSharedConverterData(UConverterSharedData * deadSharedData) +{ + UTRACE_ENTRY_OC(UTRACE_UCNV_UNLOAD); + UTRACE_DATA2(UTRACE_OPEN_CLOSE, "unload converter %s shared data %p", deadSharedData->staticData->name, deadSharedData); + + if (deadSharedData->referenceCounter > 0) { + UTRACE_EXIT_VALUE((int32_t)false); + return false; + } + + if (deadSharedData->impl->unload != nullptr) { + deadSharedData->impl->unload(deadSharedData); + } + + if(deadSharedData->dataMemory != nullptr) + { + UDataMemory *data = (UDataMemory*)deadSharedData->dataMemory; + udata_close(data); + } + + uprv_free(deadSharedData); + + UTRACE_EXIT_VALUE((int32_t)true); + return true; +} + +/** + * Load a non-algorithmic converter. + * If pkg==nullptr, then this function must be called inside umtx_lock(&cnvCacheMutex). + */ +UConverterSharedData * +ucnv_load(UConverterLoadArgs *pArgs, UErrorCode *err) { + UConverterSharedData *mySharedConverterData; + + if(err == nullptr || U_FAILURE(*err)) { + return nullptr; + } + + if(pArgs->pkg != nullptr && *pArgs->pkg != 0) { + /* application-provided converters are not currently cached */ + return createConverterFromFile(pArgs, err); + } + + mySharedConverterData = ucnv_getSharedConverterData(pArgs->name); + if (mySharedConverterData == nullptr) + { + /*Not cached, we need to stream it in from file */ + mySharedConverterData = createConverterFromFile(pArgs, err); + if (U_FAILURE (*err) || (mySharedConverterData == nullptr)) + { + return nullptr; + } + else if (!pArgs->onlyTestIsLoadable) + { + /* share it with other library clients */ + ucnv_shareConverterData(mySharedConverterData); + } + } + else + { + /* The data for this converter was already in the cache. */ + /* Update the reference counter on the shared data: one more client */ + mySharedConverterData->referenceCounter++; + } + + return mySharedConverterData; +} + +/** + * Unload a non-algorithmic converter. + * It must be sharedData->isReferenceCounted + * and this function must be called inside umtx_lock(&cnvCacheMutex). + */ +U_CAPI void +ucnv_unload(UConverterSharedData *sharedData) { + if(sharedData != nullptr) { + if (sharedData->referenceCounter > 0) { + sharedData->referenceCounter--; + } + + if((sharedData->referenceCounter <= 0)&&(sharedData->sharedDataCached == false)) { + ucnv_deleteSharedConverterData(sharedData); + } + } +} + +U_CFUNC void +ucnv_unloadSharedDataIfReady(UConverterSharedData *sharedData) +{ + if(sharedData != nullptr && sharedData->isReferenceCounted) { + umtx_lock(&cnvCacheMutex); + ucnv_unload(sharedData); + umtx_unlock(&cnvCacheMutex); + } +} + +U_CFUNC void +ucnv_incrementRefCount(UConverterSharedData *sharedData) +{ + if(sharedData != nullptr && sharedData->isReferenceCounted) { + umtx_lock(&cnvCacheMutex); + sharedData->referenceCounter++; + umtx_unlock(&cnvCacheMutex); + } +} + +/* + * *pPieces must be initialized. + * The name without options will be copied to pPieces->cnvName. + * The locale and options will be copied to pPieces only if present in inName, + * otherwise the existing values in pPieces remain. + * *pArgs will be set to the pPieces values. + */ +static void +parseConverterOptions(const char *inName, + UConverterNamePieces *pPieces, + UConverterLoadArgs *pArgs, + UErrorCode *err) +{ + char *cnvName = pPieces->cnvName; + char c; + int32_t len = 0; + + pArgs->name=inName; + pArgs->locale=pPieces->locale; + pArgs->options=pPieces->options; + + /* copy the converter name itself to cnvName */ + while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { + if (++len>=UCNV_MAX_CONVERTER_NAME_LENGTH) { + *err = U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ + pPieces->cnvName[0]=0; + return; + } + *cnvName++=c; + inName++; + } + *cnvName=0; + pArgs->name=pPieces->cnvName; + + /* parse options. No more name copying should occur. */ + while((c=*inName)!=0) { + if(c==UCNV_OPTION_SEP_CHAR) { + ++inName; + } + + /* inName is behind an option separator */ + if(uprv_strncmp(inName, "locale=", 7)==0) { + /* do not modify locale itself in case we have multiple locale options */ + char *dest=pPieces->locale; + + /* copy the locale option value */ + inName+=7; + len=0; + while((c=*inName)!=0 && c!=UCNV_OPTION_SEP_CHAR) { + ++inName; + + if(++len>=ULOC_FULLNAME_CAPACITY) { + *err=U_ILLEGAL_ARGUMENT_ERROR; /* bad name */ + pPieces->locale[0]=0; + return; + } + + *dest++=c; + } + *dest=0; + } else if(uprv_strncmp(inName, "version=", 8)==0) { + /* copy the version option value into bits 3..0 of pPieces->options */ + inName+=8; + c=*inName; + if(c==0) { + pArgs->options=(pPieces->options&=~UCNV_OPTION_VERSION); + return; + } else if((uint8_t)(c-'0')<10) { + pArgs->options=pPieces->options=(pPieces->options&~UCNV_OPTION_VERSION)|(uint32_t)(c-'0'); + ++inName; + } + } else if(uprv_strncmp(inName, "swaplfnl", 8)==0) { + inName+=8; + pArgs->options=(pPieces->options|=UCNV_OPTION_SWAP_LFNL); + /* add processing for new options here with another } else if(uprv_strncmp(inName, "option-name=", XX)==0) { */ + } else { + /* ignore any other options until we define some */ + while(((c = *inName++) != 0) && (c != UCNV_OPTION_SEP_CHAR)) { + } + if(c==0) { + return; + } + } + } +} + +/*Logic determines if the converter is Algorithmic AND/OR cached + *depending on that: + * -we either go to get data from disk and cache it (Data=true, Cached=false) + * -Get it from a Hashtable (Data=X, Cached=true) + * -Call dataConverter initializer (Data=true, Cached=true) + * -Call AlgorithmicConverter initializer (Data=false, Cached=true) + */ +U_CFUNC UConverterSharedData * +ucnv_loadSharedData(const char *converterName, + UConverterNamePieces *pPieces, + UConverterLoadArgs *pArgs, + UErrorCode * err) { + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs; + UConverterSharedData *mySharedConverterData = nullptr; + UErrorCode internalErrorCode = U_ZERO_ERROR; + UBool mayContainOption = true; + UBool checkForAlgorithmic = true; + + if (U_FAILURE (*err)) { + return nullptr; + } + + if(pPieces == nullptr) { + if(pArgs != nullptr) { + /* + * Bad: We may set pArgs pointers to stackPieces fields + * which will be invalid after this function returns. + */ + *err = U_INTERNAL_PROGRAM_ERROR; + return nullptr; + } + pPieces = &stackPieces; + } + if(pArgs == nullptr) { + uprv_memset(&stackArgs, 0, sizeof(stackArgs)); + stackArgs.size = (int32_t)sizeof(stackArgs); + pArgs = &stackArgs; + } + + pPieces->cnvName[0] = 0; + pPieces->locale[0] = 0; + pPieces->options = 0; + + pArgs->name = converterName; + pArgs->locale = pPieces->locale; + pArgs->options = pPieces->options; + + /* In case "name" is nullptr we want to open the default converter. */ + if (converterName == nullptr) { +#if U_CHARSET_IS_UTF8 + pArgs->name = "UTF-8"; + return (UConverterSharedData *)converterData[UCNV_UTF8]; +#else + /* Call ucnv_getDefaultName first to query the name from the OS. */ + pArgs->name = ucnv_getDefaultName(); + if (pArgs->name == nullptr) { + *err = U_MISSING_RESOURCE_ERROR; + return nullptr; + } + mySharedConverterData = (UConverterSharedData *)gDefaultAlgorithmicSharedData; + checkForAlgorithmic = false; + mayContainOption = gDefaultConverterContainsOption; + /* the default converter name is already canonical */ +#endif + } + else if(UCNV_FAST_IS_UTF8(converterName)) { + /* fastpath for UTF-8 */ + pArgs->name = "UTF-8"; + return (UConverterSharedData *)converterData[UCNV_UTF8]; + } + else { + /* separate the converter name from the options */ + parseConverterOptions(converterName, pPieces, pArgs, err); + if (U_FAILURE(*err)) { + /* Very bad name used. */ + return nullptr; + } + + /* get the canonical converter name */ + pArgs->name = ucnv_io_getConverterName(pArgs->name, &mayContainOption, &internalErrorCode); + if (U_FAILURE(internalErrorCode) || pArgs->name == nullptr) { + /* + * set the input name in case the converter was added + * without updating the alias table, or when there is no alias table + */ + pArgs->name = pPieces->cnvName; + } else if (internalErrorCode == U_AMBIGUOUS_ALIAS_WARNING) { + *err = U_AMBIGUOUS_ALIAS_WARNING; + } + } + + /* separate the converter name from the options */ + if(mayContainOption && pArgs->name != pPieces->cnvName) { + parseConverterOptions(pArgs->name, pPieces, pArgs, err); + } + + /* get the shared data for an algorithmic converter, if it is one */ + if (checkForAlgorithmic) { + mySharedConverterData = (UConverterSharedData *)getAlgorithmicTypeFromName(pArgs->name); + } + if (mySharedConverterData == nullptr) + { + /* it is a data-based converter, get its shared data. */ + /* Hold the cnvCacheMutex through the whole process of checking the */ + /* converter data cache, and adding new entries to the cache */ + /* to prevent other threads from modifying the cache during the */ + /* process. */ + pArgs->nestedLoads=1; + pArgs->pkg=nullptr; + + umtx_lock(&cnvCacheMutex); + mySharedConverterData = ucnv_load(pArgs, err); + umtx_unlock(&cnvCacheMutex); + if (U_FAILURE (*err) || (mySharedConverterData == nullptr)) + { + return nullptr; + } + } + + return mySharedConverterData; +} + +U_CAPI UConverter * +ucnv_createConverter(UConverter *myUConverter, const char *converterName, UErrorCode * err) +{ + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; + UConverterSharedData *mySharedConverterData; + + UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); + + if(U_SUCCESS(*err)) { + UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open converter %s", converterName); + + mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); + + myUConverter = ucnv_createConverterFromSharedData( + myUConverter, mySharedConverterData, + &stackArgs, + err); + + if(U_SUCCESS(*err)) { + UTRACE_EXIT_PTR_STATUS(myUConverter, *err); + return myUConverter; + } + } + + /* exit with error */ + UTRACE_EXIT_STATUS(*err); + return nullptr; +} + +U_CFUNC UBool +ucnv_canCreateConverter(const char *converterName, UErrorCode *err) { + UConverter myUConverter; + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; + UConverterSharedData *mySharedConverterData; + + UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN); + + if(U_SUCCESS(*err)) { + UTRACE_DATA1(UTRACE_OPEN_CLOSE, "test if can open converter %s", converterName); + + stackArgs.onlyTestIsLoadable=true; + mySharedConverterData = ucnv_loadSharedData(converterName, &stackPieces, &stackArgs, err); + ucnv_createConverterFromSharedData( + &myUConverter, mySharedConverterData, + &stackArgs, + err); + ucnv_unloadSharedDataIfReady(mySharedConverterData); + } + + UTRACE_EXIT_STATUS(*err); + return U_SUCCESS(*err); +} + +UConverter * +ucnv_createAlgorithmicConverter(UConverter *myUConverter, + UConverterType type, + const char *locale, uint32_t options, + UErrorCode *err) { + UConverter *cnv; + const UConverterSharedData *sharedData; + UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; + + UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_ALGORITHMIC); + UTRACE_DATA1(UTRACE_OPEN_CLOSE, "open algorithmic converter type %d", (int32_t)type); + + if(type<0 || UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES<=type) { + *err = U_ILLEGAL_ARGUMENT_ERROR; + UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); + return nullptr; + } + + sharedData = converterData[type]; + if(sharedData == nullptr || sharedData->isReferenceCounted) { + /* not a valid type, or not an algorithmic converter */ + *err = U_ILLEGAL_ARGUMENT_ERROR; + UTRACE_EXIT_STATUS(U_ILLEGAL_ARGUMENT_ERROR); + return nullptr; + } + + stackArgs.name = ""; + stackArgs.options = options; + stackArgs.locale=locale; + cnv = ucnv_createConverterFromSharedData( + myUConverter, (UConverterSharedData *)sharedData, + &stackArgs, err); + + UTRACE_EXIT_PTR_STATUS(cnv, *err); + return cnv; +} + +U_CFUNC UConverter* +ucnv_createConverterFromPackage(const char *packageName, const char *converterName, UErrorCode * err) +{ + UConverter *myUConverter; + UConverterSharedData *mySharedConverterData; + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; + + UTRACE_ENTRY_OC(UTRACE_UCNV_OPEN_PACKAGE); + + if(U_FAILURE(*err)) { + UTRACE_EXIT_STATUS(*err); + return nullptr; + } + + UTRACE_DATA2(UTRACE_OPEN_CLOSE, "open converter %s from package %s", converterName, packageName); + + /* first, get the options out of the converterName string */ + stackPieces.cnvName[0] = 0; + stackPieces.locale[0] = 0; + stackPieces.options = 0; + parseConverterOptions(converterName, &stackPieces, &stackArgs, err); + if (U_FAILURE(*err)) { + /* Very bad name used. */ + UTRACE_EXIT_STATUS(*err); + return nullptr; + } + stackArgs.nestedLoads=1; + stackArgs.pkg=packageName; + + /* open the data, unflatten the shared structure */ + mySharedConverterData = createConverterFromFile(&stackArgs, err); + + if (U_FAILURE(*err)) { + UTRACE_EXIT_STATUS(*err); + return nullptr; + } + + /* create the actual converter */ + myUConverter = ucnv_createConverterFromSharedData(nullptr, mySharedConverterData, &stackArgs, err); + + if (U_FAILURE(*err)) { + ucnv_close(myUConverter); + UTRACE_EXIT_STATUS(*err); + return nullptr; + } + + UTRACE_EXIT_PTR_STATUS(myUConverter, *err); + return myUConverter; +} + + +U_CFUNC UConverter* +ucnv_createConverterFromSharedData(UConverter *myUConverter, + UConverterSharedData *mySharedConverterData, + UConverterLoadArgs *pArgs, + UErrorCode *err) +{ + UBool isCopyLocal; + + if(U_FAILURE(*err)) { + ucnv_unloadSharedDataIfReady(mySharedConverterData); + return myUConverter; + } + if(myUConverter == nullptr) + { + myUConverter = (UConverter *) uprv_malloc (sizeof (UConverter)); + if(myUConverter == nullptr) + { + *err = U_MEMORY_ALLOCATION_ERROR; + ucnv_unloadSharedDataIfReady(mySharedConverterData); + return nullptr; + } + isCopyLocal = false; + } else { + isCopyLocal = true; + } + + /* initialize the converter */ + uprv_memset(myUConverter, 0, sizeof(UConverter)); + myUConverter->isCopyLocal = isCopyLocal; + /*myUConverter->isExtraLocal = false;*/ /* Set by the memset call */ + myUConverter->sharedData = mySharedConverterData; + myUConverter->options = pArgs->options; + if(!pArgs->onlyTestIsLoadable) { + myUConverter->preFromUFirstCP = U_SENTINEL; + myUConverter->fromCharErrorBehaviour = UCNV_TO_U_DEFAULT_CALLBACK; + myUConverter->fromUCharErrorBehaviour = UCNV_FROM_U_DEFAULT_CALLBACK; + myUConverter->toUnicodeStatus = mySharedConverterData->toUnicodeStatus; + myUConverter->maxBytesPerUChar = mySharedConverterData->staticData->maxBytesPerChar; + myUConverter->subChar1 = mySharedConverterData->staticData->subChar1; + myUConverter->subCharLen = mySharedConverterData->staticData->subCharLen; + myUConverter->subChars = (uint8_t *)myUConverter->subUChars; + uprv_memcpy(myUConverter->subChars, mySharedConverterData->staticData->subChar, myUConverter->subCharLen); + myUConverter->toUCallbackReason = UCNV_ILLEGAL; /* default reason to invoke (*fromCharErrorBehaviour) */ + } + + if(mySharedConverterData->impl->open != nullptr) { + mySharedConverterData->impl->open(myUConverter, pArgs, err); + if(U_FAILURE(*err) && !pArgs->onlyTestIsLoadable) { + /* don't ucnv_close() if onlyTestIsLoadable because not fully initialized */ + ucnv_close(myUConverter); + return nullptr; + } + } + + return myUConverter; +} + +/*Frees all shared immutable objects that aren't referred to (reference count = 0) + */ +U_CAPI int32_t U_EXPORT2 +ucnv_flushCache () +{ + UConverterSharedData *mySharedData = nullptr; + int32_t pos; + int32_t tableDeletedNum = 0; + const UHashElement *e; + /*UErrorCode status = U_ILLEGAL_ARGUMENT_ERROR;*/ + int32_t i, remaining; + + UTRACE_ENTRY_OC(UTRACE_UCNV_FLUSH_CACHE); + + /* Close the default converter without creating a new one so that everything will be flushed. */ + u_flushDefaultConverter(); + + /*if shared data hasn't even been lazy evaluated yet + * return 0 + */ + if (SHARED_DATA_HASHTABLE == nullptr) { + UTRACE_EXIT_VALUE((int32_t)0); + return 0; + } + + /*creates an enumeration to iterate through every element in the + * table + * + * Synchronization: holding cnvCacheMutex will prevent any other thread from + * accessing or modifying the hash table during the iteration. + * The reference count of an entry may be decremented by + * ucnv_close while the iteration is in process, but this is + * benign. It can't be incremented (in ucnv_createConverter()) + * because the sequence of looking up in the cache + incrementing + * is protected by cnvCacheMutex. + */ + umtx_lock(&cnvCacheMutex); + /* + * double loop: A delta/extension-only converter has a pointer to its base table's + * shared data; the first iteration of the outer loop may see the delta converter + * before the base converter, and unloading the delta converter may get the base + * converter's reference counter down to 0. + */ + i = 0; + do { + remaining = 0; + pos = UHASH_FIRST; + while ((e = uhash_nextElement (SHARED_DATA_HASHTABLE, &pos)) != nullptr) + { + mySharedData = (UConverterSharedData *) e->value.pointer; + /*deletes only if reference counter == 0 */ + if (mySharedData->referenceCounter == 0) + { + tableDeletedNum++; + + UCNV_DEBUG_LOG("del",mySharedData->staticData->name,mySharedData); + + uhash_removeElement(SHARED_DATA_HASHTABLE, e); + mySharedData->sharedDataCached = false; + ucnv_deleteSharedConverterData (mySharedData); + } else { + ++remaining; + } + } + } while(++i == 1 && remaining > 0); + umtx_unlock(&cnvCacheMutex); + + UTRACE_DATA1(UTRACE_INFO, "ucnv_flushCache() exits with %d converters remaining", remaining); + + UTRACE_EXIT_VALUE(tableDeletedNum); + return tableDeletedNum; +} + +/* available converters list --------------------------------------------------- */ + +static void U_CALLCONV initAvailableConvertersList(UErrorCode &errCode) { + U_ASSERT(gAvailableConverterCount == 0); + U_ASSERT(gAvailableConverters == nullptr); + + ucnv_enableCleanup(); + UEnumeration *allConvEnum = ucnv_openAllNames(&errCode); + int32_t allConverterCount = uenum_count(allConvEnum, &errCode); + if (U_FAILURE(errCode)) { + return; + } + + /* We can't have more than "*converterTable" converters to open */ + gAvailableConverters = (const char **) uprv_malloc(allConverterCount * sizeof(char*)); + if (!gAvailableConverters) { + errCode = U_MEMORY_ALLOCATION_ERROR; + return; + } + + /* Open the default converter to make sure that it has first dibs in the hash table. */ + UErrorCode localStatus = U_ZERO_ERROR; + UConverter tempConverter; + ucnv_close(ucnv_createConverter(&tempConverter, nullptr, &localStatus)); + + gAvailableConverterCount = 0; + + for (int32_t idx = 0; idx < allConverterCount; idx++) { + localStatus = U_ZERO_ERROR; + const char *converterName = uenum_next(allConvEnum, nullptr, &localStatus); + if (ucnv_canCreateConverter(converterName, &localStatus)) { + gAvailableConverters[gAvailableConverterCount++] = converterName; + } + } + + uenum_close(allConvEnum); +} + + +static UBool haveAvailableConverterList(UErrorCode *pErrorCode) { + umtx_initOnce(gAvailableConvertersInitOnce, &initAvailableConvertersList, *pErrorCode); + return U_SUCCESS(*pErrorCode); +} + +U_CFUNC uint16_t +ucnv_bld_countAvailableConverters(UErrorCode *pErrorCode) { + if (haveAvailableConverterList(pErrorCode)) { + return gAvailableConverterCount; + } + return 0; +} + +U_CFUNC const char * +ucnv_bld_getAvailableConverter(uint16_t n, UErrorCode *pErrorCode) { + if (haveAvailableConverterList(pErrorCode)) { + if (n < gAvailableConverterCount) { + return gAvailableConverters[n]; + } + *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + } + return nullptr; +} + +/* default converter name --------------------------------------------------- */ + +#if !U_CHARSET_IS_UTF8 +/* +Copy the canonical converter name. +ucnv_getDefaultName must be thread safe, which can call this function. + +ucnv_setDefaultName calls this function and it doesn't have to be +thread safe because there is no reliable/safe way to reset the +converter in use in all threads. If you did reset the converter, you +would not be sure that retrieving a default converter for one string +would be the same type of default converter for a successive string. +Since the name is a returned via ucnv_getDefaultName without copying, +you shouldn't be modifying or deleting the string from a separate thread. +*/ +static inline void +internalSetName(const char *name, UErrorCode *status) { + UConverterNamePieces stackPieces; + UConverterLoadArgs stackArgs=UCNV_LOAD_ARGS_INITIALIZER; + int32_t length=(int32_t)(uprv_strlen(name)); + UBool containsOption = (UBool)(uprv_strchr(name, UCNV_OPTION_SEP_CHAR) != nullptr); + const UConverterSharedData *algorithmicSharedData; + + stackArgs.name = name; + if(containsOption) { + stackPieces.cnvName[0] = 0; + stackPieces.locale[0] = 0; + stackPieces.options = 0; + parseConverterOptions(name, &stackPieces, &stackArgs, status); + if(U_FAILURE(*status)) { + return; + } + } + algorithmicSharedData = getAlgorithmicTypeFromName(stackArgs.name); + + umtx_lock(&cnvCacheMutex); + + gDefaultAlgorithmicSharedData = algorithmicSharedData; + gDefaultConverterContainsOption = containsOption; + uprv_memcpy(gDefaultConverterNameBuffer, name, length); + gDefaultConverterNameBuffer[length]=0; + + /* gDefaultConverterName MUST be the last global var set by this function. */ + /* It is the variable checked in ucnv_getDefaultName() to see if initialization is required. */ + // But there is nothing here preventing that from being reordered, either by the compiler + // or hardware. I'm adding the mutex to ucnv_getDefaultName for now. UMTX_CHECK is not enough. + // -- Andy + gDefaultConverterName = gDefaultConverterNameBuffer; + + ucnv_enableCleanup(); + + umtx_unlock(&cnvCacheMutex); +} +#endif + +/* + * In order to be really thread-safe, the get function would have to take + * a buffer parameter and copy the current string inside a mutex block. + * This implementation only tries to be really thread-safe while + * setting the name. + * It assumes that setting a pointer is atomic. + */ + +U_CAPI const char* U_EXPORT2 +ucnv_getDefaultName() { +#if U_CHARSET_IS_UTF8 + return "UTF-8"; +#else + /* local variable to be thread-safe */ + const char *name; + + /* + Concurrent calls to ucnv_getDefaultName must be thread safe, + but ucnv_setDefaultName is not thread safe. + */ + { + icu::Mutex lock(&cnvCacheMutex); + name = gDefaultConverterName; + } + if(name==nullptr) { + UErrorCode errorCode = U_ZERO_ERROR; + UConverter *cnv = nullptr; + + name = uprv_getDefaultCodepage(); + + /* if the name is there, test it out and get the canonical name with options */ + if(name != nullptr) { + cnv = ucnv_open(name, &errorCode); + if(U_SUCCESS(errorCode) && cnv != nullptr) { + name = ucnv_getName(cnv, &errorCode); + } + } + + if(name == nullptr || name[0] == 0 + || U_FAILURE(errorCode) || cnv == nullptr + || uprv_strlen(name)>=sizeof(gDefaultConverterNameBuffer)) + { + /* Panic time, let's use a fallback. */ +#if (U_CHARSET_FAMILY == U_ASCII_FAMILY) + name = "US-ASCII"; + /* there is no 'algorithmic' converter for EBCDIC */ +#elif U_PLATFORM == U_PF_OS390 + name = "ibm-1047_P100-1995" UCNV_SWAP_LFNL_OPTION_STRING; +#else + name = "ibm-37_P100-1995"; +#endif + } + + internalSetName(name, &errorCode); + + /* The close may make the current name go away. */ + ucnv_close(cnv); + } + + return name; +#endif +} + +#if U_CHARSET_IS_UTF8 +U_CAPI void U_EXPORT2 ucnv_setDefaultName(const char *) {} +#else +/* +This function is not thread safe, and it can't be thread safe. +See internalSetName or the API reference for details. +*/ +U_CAPI void U_EXPORT2 +ucnv_setDefaultName(const char *converterName) { + if(converterName==nullptr) { + /* reset to the default codepage */ + gDefaultConverterName=nullptr; + } else { + UErrorCode errorCode = U_ZERO_ERROR; + UConverter *cnv = nullptr; + const char *name = nullptr; + + /* if the name is there, test it out and get the canonical name with options */ + cnv = ucnv_open(converterName, &errorCode); + if(U_SUCCESS(errorCode) && cnv != nullptr) { + name = ucnv_getName(cnv, &errorCode); + } + + if(U_SUCCESS(errorCode) && name!=nullptr) { + internalSetName(name, &errorCode); + } + /* else this converter is bad to use. Don't change it to a bad value. */ + + /* The close may make the current name go away. */ + ucnv_close(cnv); + + /* reset the converter cache */ + u_flushDefaultConverter(); + } +} +#endif + +/* data swapping ------------------------------------------------------------ */ + +/* most of this might belong more properly into ucnvmbcs.c, but that is so large */ + +#if !UCONFIG_NO_LEGACY_CONVERSION + +U_CAPI int32_t U_EXPORT2 +ucnv_swap(const UDataSwapper *ds, + const void *inData, int32_t length, void *outData, + UErrorCode *pErrorCode) { + const UDataInfo *pInfo; + int32_t headerSize; + + const uint8_t *inBytes; + uint8_t *outBytes; + + uint32_t offset, count, staticDataSize; + int32_t size; + + const UConverterStaticData *inStaticData; + UConverterStaticData *outStaticData; + + const _MBCSHeader *inMBCSHeader; + _MBCSHeader *outMBCSHeader; + _MBCSHeader mbcsHeader; + uint32_t mbcsHeaderLength; + UBool noFromU=false; + + uint8_t outputType; + + int32_t maxFastUChar, mbcsIndexLength; + + const int32_t *inExtIndexes; + int32_t extOffset; + + /* udata_swapDataHeader checks the arguments */ + headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); + if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { + return 0; + } + + /* check data format and format version */ + pInfo=(const UDataInfo *)((const char *)inData+4); + if(!( + pInfo->dataFormat[0]==0x63 && /* dataFormat="cnvt" */ + pInfo->dataFormat[1]==0x6e && + pInfo->dataFormat[2]==0x76 && + pInfo->dataFormat[3]==0x74 && + pInfo->formatVersion[0]==6 && + pInfo->formatVersion[1]>=2 + )) { + udata_printError(ds, "ucnv_swap(): data format %02x.%02x.%02x.%02x (format version %02x.%02x) is not recognized as an ICU .cnv conversion table\n", + pInfo->dataFormat[0], pInfo->dataFormat[1], + pInfo->dataFormat[2], pInfo->dataFormat[3], + pInfo->formatVersion[0], pInfo->formatVersion[1]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + inBytes=(const uint8_t *)inData+headerSize; + outBytes=(outData == nullptr) ? nullptr : (uint8_t *)outData+headerSize; + + /* read the initial UConverterStaticData structure after the UDataInfo header */ + inStaticData=(const UConverterStaticData *)inBytes; + outStaticData=(UConverterStaticData *)outBytes; + + if(length<0) { + staticDataSize=ds->readUInt32(inStaticData->structSize); + } else { + length-=headerSize; + if( length<(int32_t)sizeof(UConverterStaticData) || + (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) + ) { + udata_printError(ds, "ucnv_swap(): too few bytes (%d after header) for an ICU .cnv conversion table\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + } + + if(length>=0) { + /* swap the static data */ + if(inStaticData!=outStaticData) { + uprv_memcpy(outStaticData, inStaticData, staticDataSize); + } + + ds->swapArray32(ds, &inStaticData->structSize, 4, + &outStaticData->structSize, pErrorCode); + ds->swapArray32(ds, &inStaticData->codepage, 4, + &outStaticData->codepage, pErrorCode); + + ds->swapInvChars(ds, inStaticData->name, (int32_t)uprv_strlen(inStaticData->name), + outStaticData->name, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + udata_printError(ds, "ucnv_swap(): error swapping converter name\n"); + return 0; + } + } + + inBytes+=staticDataSize; + if (outBytes != nullptr) outBytes+=staticDataSize; + if(length>=0) { + length-=(int32_t)staticDataSize; + } + + /* check for supported conversionType values */ + if(inStaticData->conversionType==UCNV_MBCS) { + /* swap MBCS data */ + inMBCSHeader=(const _MBCSHeader *)inBytes; + outMBCSHeader=(_MBCSHeader *)outBytes; + + if(0<=length && length<(int32_t)sizeof(_MBCSHeader)) { + udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) { + mbcsHeaderLength=MBCS_HEADER_V4_LENGTH; + } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 && + ((mbcsHeader.options=ds->readUInt32(inMBCSHeader->options))& + MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0 + ) { + mbcsHeaderLength=mbcsHeader.options&MBCS_OPT_LENGTH_MASK; + noFromU=(UBool)((mbcsHeader.options&MBCS_OPT_NO_FROM_U)!=0); + } else { + udata_printError(ds, "ucnv_swap(): unsupported _MBCSHeader.version %d.%d\n", + inMBCSHeader->version[0], inMBCSHeader->version[1]); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + uprv_memcpy(mbcsHeader.version, inMBCSHeader->version, 4); + mbcsHeader.countStates= ds->readUInt32(inMBCSHeader->countStates); + mbcsHeader.countToUFallbacks= ds->readUInt32(inMBCSHeader->countToUFallbacks); + mbcsHeader.offsetToUCodeUnits= ds->readUInt32(inMBCSHeader->offsetToUCodeUnits); + mbcsHeader.offsetFromUTable= ds->readUInt32(inMBCSHeader->offsetFromUTable); + mbcsHeader.offsetFromUBytes= ds->readUInt32(inMBCSHeader->offsetFromUBytes); + mbcsHeader.flags= ds->readUInt32(inMBCSHeader->flags); + mbcsHeader.fromUBytesLength= ds->readUInt32(inMBCSHeader->fromUBytesLength); + /* mbcsHeader.options have been read above */ + + extOffset=(int32_t)(mbcsHeader.flags>>8); + outputType=(uint8_t)mbcsHeader.flags; + if(noFromU && outputType==MBCS_OUTPUT_1) { + udata_printError(ds, "ucnv_swap(): unsupported combination of makeconv --small with SBCS\n"); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + /* make sure that the output type is known */ + switch(outputType) { + case MBCS_OUTPUT_1: + case MBCS_OUTPUT_2: + case MBCS_OUTPUT_3: + case MBCS_OUTPUT_4: + case MBCS_OUTPUT_3_EUC: + case MBCS_OUTPUT_4_EUC: + case MBCS_OUTPUT_2_SISO: + case MBCS_OUTPUT_EXT_ONLY: + /* OK */ + break; + default: + udata_printError(ds, "ucnv_swap(): unsupported MBCS output type 0x%x\n", + outputType); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + /* calculate the length of the MBCS data */ + + /* + * utf8Friendly MBCS files (mbcsHeader.version 4.3) + * contain an additional mbcsIndex table: + * uint16_t[(maxFastUChar+1)>>6]; + * where maxFastUChar=((mbcsHeader.version[2]<<8)|0xff). + */ + maxFastUChar=0; + mbcsIndexLength=0; + if( outputType!=MBCS_OUTPUT_EXT_ONLY && outputType!=MBCS_OUTPUT_1 && + mbcsHeader.version[1]>=3 && (maxFastUChar=mbcsHeader.version[2])!=0 + ) { + maxFastUChar=(maxFastUChar<<8)|0xff; + mbcsIndexLength=((maxFastUChar+1)>>6)*2; /* number of bytes */ + } + + if(extOffset==0) { + size=(int32_t)(mbcsHeader.offsetFromUBytes+mbcsIndexLength); + if(!noFromU) { + size+=(int32_t)mbcsHeader.fromUBytesLength; + } + + /* avoid compiler warnings - not otherwise necessary, and the value does not matter */ + inExtIndexes=nullptr; + } else { + /* there is extension data after the base data, see ucnv_ext.h */ + if(length>=0 && length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { + udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + inExtIndexes=(const int32_t *)(inBytes+extOffset); + size=extOffset+udata_readInt32(ds, inExtIndexes[UCNV_EXT_SIZE]); + } + + if(length>=0) { + if(length<size) { + udata_printError(ds, "ucnv_swap(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", + length); + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } + + /* copy the data for inaccessible bytes */ + if(inBytes!=outBytes) { + uprv_memcpy(outBytes, inBytes, size); + } + + /* swap the MBCSHeader, except for the version field */ + count=mbcsHeaderLength*4; + ds->swapArray32(ds, &inMBCSHeader->countStates, count-4, + &outMBCSHeader->countStates, pErrorCode); + + if(outputType==MBCS_OUTPUT_EXT_ONLY) { + /* + * extension-only file, + * contains a base name instead of normal base table data + */ + + /* swap the base name, between the header and the extension data */ + const char *inBaseName=(const char *)inBytes+count; + char *outBaseName=(char *)outBytes+count; + ds->swapInvChars(ds, inBaseName, (int32_t)uprv_strlen(inBaseName), + outBaseName, pErrorCode); + } else { + /* normal file with base table data */ + + /* swap the state table, 1kB per state */ + offset=count; + count=mbcsHeader.countStates*1024; + ds->swapArray32(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + + /* swap the toUFallbacks[] */ + offset+=count; + count=mbcsHeader.countToUFallbacks*8; + ds->swapArray32(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + + /* swap the unicodeCodeUnits[] */ + offset=mbcsHeader.offsetToUCodeUnits; + count=mbcsHeader.offsetFromUTable-offset; + ds->swapArray16(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + + /* offset to the stage 1 table, independent of the outputType */ + offset=mbcsHeader.offsetFromUTable; + + if(outputType==MBCS_OUTPUT_1) { + /* SBCS: swap the fromU tables, all 16 bits wide */ + count=(mbcsHeader.offsetFromUBytes-offset)+mbcsHeader.fromUBytesLength; + ds->swapArray16(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + } else { + /* otherwise: swap the stage tables separately */ + + /* stage 1 table: uint16_t[0x440 or 0x40] */ + if(inStaticData->unicodeMask&UCNV_HAS_SUPPLEMENTARY) { + count=0x440*2; /* for all of Unicode */ + } else { + count=0x40*2; /* only BMP */ + } + ds->swapArray16(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + + /* stage 2 table: uint32_t[] */ + offset+=count; + count=mbcsHeader.offsetFromUBytes-offset; + ds->swapArray32(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + + /* stage 3/result bytes: sometimes uint16_t[] or uint32_t[] */ + offset=mbcsHeader.offsetFromUBytes; + count= noFromU ? 0 : mbcsHeader.fromUBytesLength; + switch(outputType) { + case MBCS_OUTPUT_2: + case MBCS_OUTPUT_3_EUC: + case MBCS_OUTPUT_2_SISO: + ds->swapArray16(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + break; + case MBCS_OUTPUT_4: + ds->swapArray32(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + break; + default: + /* just uint8_t[], nothing to swap */ + break; + } + + if(mbcsIndexLength!=0) { + offset+=count; + count=mbcsIndexLength; + ds->swapArray16(ds, inBytes+offset, (int32_t)count, + outBytes+offset, pErrorCode); + } + } + } + + if(extOffset!=0) { + /* swap the extension data */ + inBytes+=extOffset; + outBytes+=extOffset; + + /* swap toUTable[] */ + offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_INDEX]); + length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_LENGTH]); + ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); + + /* swap toUUChars[] */ + offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_INDEX]); + length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_TO_U_UCHARS_LENGTH]); + ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); + + /* swap fromUTableUChars[] */ + offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_UCHARS_INDEX]); + length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_LENGTH]); + ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); + + /* swap fromUTableValues[] */ + offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_VALUES_INDEX]); + /* same length as for fromUTableUChars[] */ + ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); + + /* no need to swap fromUBytes[] */ + + /* swap fromUStage12[] */ + offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_INDEX]); + length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_12_LENGTH]); + ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); + + /* swap fromUStage3[] */ + offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_INDEX]); + length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3_LENGTH]); + ds->swapArray16(ds, inBytes+offset, length*2, outBytes+offset, pErrorCode); + + /* swap fromUStage3b[] */ + offset=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_INDEX]); + length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_FROM_U_STAGE_3B_LENGTH]); + ds->swapArray32(ds, inBytes+offset, length*4, outBytes+offset, pErrorCode); + + /* swap indexes[] */ + length=udata_readInt32(ds, inExtIndexes[UCNV_EXT_INDEXES_LENGTH]); + ds->swapArray32(ds, inBytes, length*4, outBytes, pErrorCode); + } + } + } else { + udata_printError(ds, "ucnv_swap(): unknown conversionType=%d!=UCNV_MBCS\n", + inStaticData->conversionType); + *pErrorCode=U_UNSUPPORTED_ERROR; + return 0; + } + + return headerSize+(int32_t)staticDataSize+size; +} + +#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ + +#endif |