summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/common/loadednormalizer2impl.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/common/loadednormalizer2impl.cpp')
-rw-r--r--intl/icu/source/common/loadednormalizer2impl.cpp418
1 files changed, 418 insertions, 0 deletions
diff --git a/intl/icu/source/common/loadednormalizer2impl.cpp b/intl/icu/source/common/loadednormalizer2impl.cpp
new file mode 100644
index 0000000000..768564edc8
--- /dev/null
+++ b/intl/icu/source/common/loadednormalizer2impl.cpp
@@ -0,0 +1,418 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* loadednormalizer2impl.cpp
+*
+* created on: 2014sep03
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/udata.h"
+#include "unicode/localpointer.h"
+#include "unicode/normalizer2.h"
+#include "unicode/ucptrie.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm.h"
+#include "cstring.h"
+#include "mutex.h"
+#include "norm2allmodes.h"
+#include "normalizer2impl.h"
+#include "uassert.h"
+#include "ucln_cmn.h"
+#include "uhash.h"
+
+U_NAMESPACE_BEGIN
+
+class LoadedNormalizer2Impl : public Normalizer2Impl {
+public:
+ LoadedNormalizer2Impl() : memory(nullptr), ownedTrie(nullptr) {}
+ virtual ~LoadedNormalizer2Impl();
+
+ void load(const char *packageName, const char *name, UErrorCode &errorCode);
+
+private:
+ static UBool U_CALLCONV
+ isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
+
+ UDataMemory *memory;
+ UCPTrie *ownedTrie;
+};
+
+LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
+ udata_close(memory);
+ ucptrie_close(ownedTrie);
+}
+
+UBool U_CALLCONV
+LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
+ const char * /* type */, const char * /*name*/,
+ const UDataInfo *pInfo) {
+ if(
+ pInfo->size>=20 &&
+ pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
+ pInfo->charsetFamily==U_CHARSET_FAMILY &&
+ pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
+ pInfo->dataFormat[1]==0x72 &&
+ pInfo->dataFormat[2]==0x6d &&
+ pInfo->dataFormat[3]==0x32 &&
+ pInfo->formatVersion[0]==4
+ ) {
+ // Normalizer2Impl *me=(Normalizer2Impl *)context;
+ // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+void
+LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
+ const int32_t *inIndexes=(const int32_t *)inBytes;
+ int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
+ if(indexesLength<=IX_MIN_LCCC_CP) {
+ errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
+ return;
+ }
+
+ int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
+ int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
+ ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
+ inBytes+offset, nextOffset-offset, nullptr,
+ &errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+
+ offset=nextOffset;
+ nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
+ const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
+
+ // smallFCD: new in formatVersion 2
+ offset=nextOffset;
+ const uint8_t *inSmallFCD=inBytes+offset;
+
+ init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
+}
+
+// instance cache ---------------------------------------------------------- ***
+
+Norm2AllModes *
+Norm2AllModes::createInstance(const char *packageName,
+ const char *name,
+ UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return nullptr;
+ }
+ LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
+ if(impl==nullptr) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ impl->load(packageName, name, errorCode);
+ return createInstance(impl, errorCode);
+}
+
+U_CDECL_BEGIN
+static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
+U_CDECL_END
+
+#if !NORM2_HARDCODE_NFC_DATA
+static Norm2AllModes *nfcSingleton;
+static icu::UInitOnce nfcInitOnce {};
+#endif
+
+static Norm2AllModes *nfkcSingleton;
+static icu::UInitOnce nfkcInitOnce {};
+
+static Norm2AllModes *nfkc_cfSingleton;
+static icu::UInitOnce nfkc_cfInitOnce {};
+
+static UHashtable *cache=nullptr;
+
+// UInitOnce singleton initialization function
+static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
+#if !NORM2_HARDCODE_NFC_DATA
+ if (uprv_strcmp(what, "nfc") == 0) {
+ nfcSingleton = Norm2AllModes::createInstance(nullptr, "nfc", errorCode);
+ } else
+#endif
+ if (uprv_strcmp(what, "nfkc") == 0) {
+ nfkcSingleton = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode);
+ } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
+ nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode);
+ } else {
+ UPRV_UNREACHABLE_EXIT; // Unknown singleton
+ }
+ ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
+}
+
+U_CDECL_BEGIN
+
+static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
+ delete (Norm2AllModes *)allModes;
+}
+
+static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
+#if !NORM2_HARDCODE_NFC_DATA
+ delete nfcSingleton;
+ nfcSingleton = nullptr;
+ nfcInitOnce.reset();
+#endif
+
+ delete nfkcSingleton;
+ nfkcSingleton = nullptr;
+ nfkcInitOnce.reset();
+
+ delete nfkc_cfSingleton;
+ nfkc_cfSingleton = nullptr;
+ nfkc_cfInitOnce.reset();
+
+ uhash_close(cache);
+ cache=nullptr;
+ return true;
+}
+
+U_CDECL_END
+
+#if !NORM2_HARDCODE_NFC_DATA
+const Norm2AllModes *
+Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return nullptr; }
+ umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
+ return nfcSingleton;
+}
+#endif
+
+const Norm2AllModes *
+Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return nullptr; }
+ umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
+ return nfkcSingleton;
+}
+
+const Norm2AllModes *
+Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) { return nullptr; }
+ umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
+ return nfkc_cfSingleton;
+}
+
+#if !NORM2_HARDCODE_NFC_DATA
+const Normalizer2 *
+Normalizer2::getNFCInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=nullptr ? &allModes->comp : nullptr;
+}
+
+const Normalizer2 *
+Normalizer2::getNFDInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=nullptr ? &allModes->decomp : nullptr;
+}
+
+const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=nullptr ? &allModes->fcd : nullptr;
+}
+
+const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=nullptr ? &allModes->fcc : nullptr;
+}
+
+const Normalizer2Impl *
+Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
+ return allModes!=nullptr ? allModes->impl : nullptr;
+}
+#endif
+
+const Normalizer2 *
+Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
+ return allModes!=nullptr ? &allModes->comp : nullptr;
+}
+
+const Normalizer2 *
+Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
+ return allModes!=nullptr ? &allModes->decomp : nullptr;
+}
+
+const Normalizer2 *
+Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
+ return allModes!=nullptr ? &allModes->comp : nullptr;
+}
+
+const Normalizer2 *
+Normalizer2::getInstance(const char *packageName,
+ const char *name,
+ UNormalization2Mode mode,
+ UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return nullptr;
+ }
+ if(name==nullptr || *name==0) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return nullptr;
+ }
+ const Norm2AllModes *allModes=nullptr;
+ if(packageName==nullptr) {
+ if(0==uprv_strcmp(name, "nfc")) {
+ allModes=Norm2AllModes::getNFCInstance(errorCode);
+ } else if(0==uprv_strcmp(name, "nfkc")) {
+ allModes=Norm2AllModes::getNFKCInstance(errorCode);
+ } else if(0==uprv_strcmp(name, "nfkc_cf")) {
+ allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
+ }
+ }
+ if(allModes==nullptr && U_SUCCESS(errorCode)) {
+ {
+ Mutex lock;
+ if(cache!=nullptr) {
+ allModes=(Norm2AllModes *)uhash_get(cache, name);
+ }
+ }
+ if(allModes==nullptr) {
+ ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
+ LocalPointer<Norm2AllModes> localAllModes(
+ Norm2AllModes::createInstance(packageName, name, errorCode));
+ if(U_SUCCESS(errorCode)) {
+ Mutex lock;
+ if(cache==nullptr) {
+ cache=uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &errorCode);
+ if(U_FAILURE(errorCode)) {
+ return nullptr;
+ }
+ uhash_setKeyDeleter(cache, uprv_free);
+ uhash_setValueDeleter(cache, deleteNorm2AllModes);
+ }
+ void *temp=uhash_get(cache, name);
+ if(temp==nullptr) {
+ int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
+ char *nameCopy=(char *)uprv_malloc(keyLength);
+ if(nameCopy==nullptr) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ uprv_memcpy(nameCopy, name, keyLength);
+ allModes=localAllModes.getAlias();
+ uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
+ } else {
+ // race condition
+ allModes=(Norm2AllModes *)temp;
+ }
+ }
+ }
+ }
+ if(allModes!=nullptr && U_SUCCESS(errorCode)) {
+ switch(mode) {
+ case UNORM2_COMPOSE:
+ return &allModes->comp;
+ case UNORM2_DECOMPOSE:
+ return &allModes->decomp;
+ case UNORM2_FCD:
+ return &allModes->fcd;
+ case UNORM2_COMPOSE_CONTIGUOUS:
+ return &allModes->fcc;
+ default:
+ break; // do nothing
+ }
+ }
+ return nullptr;
+}
+
+const Normalizer2 *
+Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return nullptr;
+ }
+ switch(mode) {
+ case UNORM_NFD:
+ return Normalizer2::getNFDInstance(errorCode);
+ case UNORM_NFKD:
+ return Normalizer2::getNFKDInstance(errorCode);
+ case UNORM_NFC:
+ return Normalizer2::getNFCInstance(errorCode);
+ case UNORM_NFKC:
+ return Normalizer2::getNFKCInstance(errorCode);
+ case UNORM_FCD:
+ return getFCDInstance(errorCode);
+ default: // UNORM_NONE
+ return getNoopInstance(errorCode);
+ }
+}
+
+const Normalizer2Impl *
+Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
+ return allModes!=nullptr ? allModes->impl : nullptr;
+}
+
+const Normalizer2Impl *
+Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
+ const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
+ return allModes!=nullptr ? allModes->impl : nullptr;
+}
+
+U_NAMESPACE_END
+
+// C API ------------------------------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
+ return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
+ return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
+ return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
+}
+
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getInstance(const char *packageName,
+ const char *name,
+ UNormalization2Mode mode,
+ UErrorCode *pErrorCode) {
+ return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
+}
+
+U_CFUNC UNormalizationCheckResult
+unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
+ if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
+ return UNORM_YES;
+ }
+ UErrorCode errorCode=U_ZERO_ERROR;
+ const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
+ if(U_SUCCESS(errorCode)) {
+ return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
+ } else {
+ return UNORM_MAYBE;
+ }
+}
+
+#endif // !UCONFIG_NO_NORMALIZATION