From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 21:33:14 +0200 Subject: Adding upstream version 115.7.0esr. Signed-off-by: Daniel Baumann --- intl/icu/source/i18n/transreg.h | 468 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 468 insertions(+) create mode 100644 intl/icu/source/i18n/transreg.h (limited to 'intl/icu/source/i18n/transreg.h') diff --git a/intl/icu/source/i18n/transreg.h b/intl/icu/source/i18n/transreg.h new file mode 100644 index 0000000000..da9c2ee8b9 --- /dev/null +++ b/intl/icu/source/i18n/transreg.h @@ -0,0 +1,468 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +********************************************************************** +* Copyright (c) 2001-2014, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* Date Name Description +* 08/10/2001 aliu Creation. +********************************************************************** +*/ +#ifndef _TRANSREG_H +#define _TRANSREG_H + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_TRANSLITERATION + +#include "unicode/uobject.h" +#include "unicode/translit.h" +#include "hash.h" +#include "uvector.h" + +U_NAMESPACE_BEGIN + +class TransliteratorEntry; +class TransliteratorSpec; +class UnicodeString; + +//------------------------------------------------------------------ +// TransliteratorAlias +//------------------------------------------------------------------ + +/** + * A TransliteratorAlias object is returned by get() if the given ID + * actually translates into something else. The caller then invokes + * the create() method on the alias to create the actual + * transliterator, and deletes the alias. + * + * Why all the shenanigans? To prevent circular calls between + * the registry code and the transliterator code that deadlocks. + */ +class TransliteratorAlias : public UMemory { + public: + /** + * Construct a simple alias (type == SIMPLE) + * @param aliasID the given id. + */ + TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter); + + /** + * Construct a compound RBT alias (type == COMPOUND) + */ + TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks, + UVector* adoptedTransliterators, + const UnicodeSet* compoundFilter); + + /** + * Construct a rules alias (type = RULES) + */ + TransliteratorAlias(const UnicodeString& theID, + const UnicodeString& rules, + UTransDirection dir); + + ~TransliteratorAlias(); + + /** + * The whole point of create() is that the caller must invoke + * it when the registry mutex is NOT held, to prevent deadlock. + * It may only be called once. + * + * Note: Only call create() if isRuleBased() returns false. + * + * This method must be called *outside* of the TransliteratorRegistry + * mutex. + */ + Transliterator* create(UParseError&, UErrorCode&); + + /** + * Return true if this alias is rule-based. If so, the caller + * must call parse() on it, then call TransliteratorRegistry::reget(). + */ + UBool isRuleBased() const; + + /** + * If isRuleBased() returns true, then the caller must call this + * method, followed by TransliteratorRegistry::reget(). The latter + * method must be called inside the TransliteratorRegistry mutex. + * + * Note: Only call parse() if isRuleBased() returns true. + * + * This method must be called *outside* of the TransliteratorRegistry + * mutex, because it can instantiate Transliterators embedded in + * the rules via the "&Latin-Arabic()" syntax. + */ + void parse(TransliteratorParser& parser, + UParseError& pe, UErrorCode& ec) const; + + private: + // We actually come in three flavors: + // 1. Simple alias + // Here aliasID is the alias string. Everything else is + // null, zero, empty. + // 2. CompoundRBT + // Here ID is the ID, aliasID is the idBlock, trans is the + // contained RBT, and idSplitPoint is the offset in aliasID + // where the contained RBT goes. compoundFilter is the + // compound filter, and it is _not_ owned. + // 3. Rules + // Here ID is the ID, aliasID is the rules string. + // idSplitPoint is the UTransDirection. + UnicodeString ID; + UnicodeString aliasesOrRules; + UVector* transes; // owned + const UnicodeSet* compoundFilter; // alias + UTransDirection direction; + enum { SIMPLE, COMPOUND, RULES } type; + + TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class + TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class +}; + + +/** + * A registry of system transliterators. This is the data structure + * that implements the mapping between transliterator IDs and the data + * or function pointers used to create the corresponding + * transliterators. There is one instance of the registry that is + * created statically. + * + * The registry consists of a dynamic component -- a hashtable -- and + * a static component -- locale resource bundles. The dynamic store + * is semantically overlaid on the static store, so the static mapping + * can be dynamically overridden. + * + * This is an internal class that is only used by Transliterator. + * Transliterator maintains one static instance of this class and + * delegates all registry-related operations to it. + * + * @author Alan Liu + */ +class TransliteratorRegistry : public UMemory { + + public: + + /** + * Constructor + * @param status Output param set to success/failure code. + */ + TransliteratorRegistry(UErrorCode& status); + + /** + * Nonvirtual destructor -- this class is not subclassable. + */ + ~TransliteratorRegistry(); + + //------------------------------------------------------------------ + // Basic public API + //------------------------------------------------------------------ + + /** + * Given a simple ID (forward direction, no inline filter, not + * compound) attempt to instantiate it from the registry. Return + * 0 on failure. + * + * Return a non-nullptr aliasReturn value if the ID points to an alias. + * We cannot instantiate it ourselves because the alias may contain + * filters or compounds, which we do not understand. Caller should + * make aliasReturn nullptr before calling. + * @param ID the given ID + * @param aliasReturn output param to receive TransliteratorAlias; + * should be nullptr on entry + * @param parseError Struct to receive information on position + * of error if an error is encountered + * @param status Output param set to success/failure code. + */ + Transliterator* get(const UnicodeString& ID, + TransliteratorAlias*& aliasReturn, + UErrorCode& status); + + /** + * The caller must call this after calling get(), if [a] calling get() + * returns an alias, and [b] the alias is rule based. In that + * situation the caller must call alias->parse() to do the parsing + * OUTSIDE THE REGISTRY MUTEX, then call this method to retry + * instantiating the transliterator. + * + * Note: Another alias might be returned by this method. + * + * This method (like all public methods of this class) must be called + * from within the TransliteratorRegistry mutex. + * + * @param aliasReturn output param to receive TransliteratorAlias; + * should be nullptr on entry + */ + Transliterator* reget(const UnicodeString& ID, + TransliteratorParser& parser, + TransliteratorAlias*& aliasReturn, + UErrorCode& status); + + /** + * Register a prototype (adopted). This adds an entry to the + * dynamic store, or replaces an existing entry. Any entry in the + * underlying static locale resource store is masked. + */ + void put(Transliterator* adoptedProto, + UBool visible, + UErrorCode& ec); + + /** + * Register an ID and a factory function pointer. This adds an + * entry to the dynamic store, or replaces an existing entry. Any + * entry in the underlying static locale resource store is masked. + */ + void put(const UnicodeString& ID, + Transliterator::Factory factory, + Transliterator::Token context, + UBool visible, + UErrorCode& ec); + + /** + * Register an ID and a resource name. This adds an entry to the + * dynamic store, or replaces an existing entry. Any entry in the + * underlying static locale resource store is masked. + */ + void put(const UnicodeString& ID, + const UnicodeString& resourceName, + UTransDirection dir, + UBool readonlyResourceAlias, + UBool visible, + UErrorCode& ec); + + /** + * Register an ID and an alias ID. This adds an entry to the + * dynamic store, or replaces an existing entry. Any entry in the + * underlying static locale resource store is masked. + */ + void put(const UnicodeString& ID, + const UnicodeString& alias, + UBool readonlyAliasAlias, + UBool visible, + UErrorCode& ec); + + /** + * Unregister an ID. This removes an entry from the dynamic store + * if there is one. The static locale resource store is + * unaffected. + * @param ID the given ID. + */ + void remove(const UnicodeString& ID); + + //------------------------------------------------------------------ + // Public ID and spec management + //------------------------------------------------------------------ + + /** + * Return a StringEnumeration over the IDs currently registered + * with the system. + * @internal + */ + StringEnumeration* getAvailableIDs() const; + + /** + * == OBSOLETE - remove in ICU 3.4 == + * Return the number of IDs currently registered with the system. + * To retrieve the actual IDs, call getAvailableID(i) with + * i from 0 to countAvailableIDs() - 1. + * @return the number of IDs currently registered with the system. + * @internal + */ + int32_t countAvailableIDs() const; + + /** + * == OBSOLETE - remove in ICU 3.4 == + * Return the index-th available ID. index must be between 0 + * and countAvailableIDs() - 1, inclusive. If index is out of + * range, the result of getAvailableID(0) is returned. + * @param index the given index. + * @return the index-th available ID. index must be between 0 + * and countAvailableIDs() - 1, inclusive. If index is out of + * range, the result of getAvailableID(0) is returned. + * @internal + */ + const UnicodeString& getAvailableID(int32_t index) const; + + /** + * Return the number of registered source specifiers. + * @return the number of registered source specifiers. + */ + int32_t countAvailableSources() const; + + /** + * Return a registered source specifier. + * @param index which specifier to return, from 0 to n-1, where + * n = countAvailableSources() + * @param result fill-in parameter to receive the source specifier. + * If index is out of range, result will be empty. + * @return reference to result + */ + UnicodeString& getAvailableSource(int32_t index, + UnicodeString& result) const; + + /** + * Return the number of registered target specifiers for a given + * source specifier. + * @param source the given source specifier. + * @return the number of registered target specifiers for a given + * source specifier. + */ + int32_t countAvailableTargets(const UnicodeString& source) const; + + /** + * Return a registered target specifier for a given source. + * @param index which specifier to return, from 0 to n-1, where + * n = countAvailableTargets(source) + * @param source the source specifier + * @param result fill-in parameter to receive the target specifier. + * If source is invalid or if index is out of range, result will + * be empty. + * @return reference to result + */ + UnicodeString& getAvailableTarget(int32_t index, + const UnicodeString& source, + UnicodeString& result) const; + + /** + * Return the number of registered variant specifiers for a given + * source-target pair. There is always at least one variant: If + * just source-target is registered, then the single variant + * NO_VARIANT is returned. If source-target/variant is registered + * then that variant is returned. + * @param source the source specifiers + * @param target the target specifiers + * @return the number of registered variant specifiers for a given + * source-target pair. + */ + int32_t countAvailableVariants(const UnicodeString& source, + const UnicodeString& target) const; + + /** + * Return a registered variant specifier for a given source-target + * pair. If NO_VARIANT is one of the variants, then it will be + * at index 0. + * @param index which specifier to return, from 0 to n-1, where + * n = countAvailableVariants(source, target) + * @param source the source specifier + * @param target the target specifier + * @param result fill-in parameter to receive the variant + * specifier. If source is invalid or if target is invalid or if + * index is out of range, result will be empty. + * @return reference to result + */ + UnicodeString& getAvailableVariant(int32_t index, + const UnicodeString& source, + const UnicodeString& target, + UnicodeString& result) const; + + private: + + //---------------------------------------------------------------- + // Private implementation + //---------------------------------------------------------------- + + TransliteratorEntry* find(const UnicodeString& ID); + + TransliteratorEntry* find(UnicodeString& source, + UnicodeString& target, + UnicodeString& variant); + + TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src, + const TransliteratorSpec& trg, + const UnicodeString& variant) const; + + TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src, + const TransliteratorSpec& trg, + const UnicodeString& variant); + + static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen, + const TransliteratorSpec& specToFind, + const UnicodeString& variant, + UTransDirection direction); + + void registerEntry(const UnicodeString& source, + const UnicodeString& target, + const UnicodeString& variant, + TransliteratorEntry* adopted, + UBool visible); + + void registerEntry(const UnicodeString& ID, + TransliteratorEntry* adopted, + UBool visible); + + void registerEntry(const UnicodeString& ID, + const UnicodeString& source, + const UnicodeString& target, + const UnicodeString& variant, + TransliteratorEntry* adopted, + UBool visible); + + void registerSTV(const UnicodeString& source, + const UnicodeString& target, + const UnicodeString& variant); + + void removeSTV(const UnicodeString& source, + const UnicodeString& target, + const UnicodeString& variant); + + Transliterator* instantiateEntry(const UnicodeString& ID, + TransliteratorEntry *entry, + TransliteratorAlias*& aliasReturn, + UErrorCode& status); + + /** + * A StringEnumeration over the registered IDs in this object. + */ + class Enumeration : public StringEnumeration { + public: + Enumeration(const TransliteratorRegistry& reg); + virtual ~Enumeration(); + virtual int32_t count(UErrorCode& status) const override; + virtual const UnicodeString* snext(UErrorCode& status) override; + virtual void reset(UErrorCode& status) override; + static UClassID U_EXPORT2 getStaticClassID(); + virtual UClassID getDynamicClassID() const override; + private: + int32_t index; + const TransliteratorRegistry& reg; + }; + friend class Enumeration; + + private: + + /** + * Dynamic registry mapping full IDs to Entry objects. This + * contains both public and internal entities. The visibility is + * controlled by whether an entry is listed in availableIDs and + * specDAG or not. + */ + Hashtable registry; + + /** + * DAG of visible IDs by spec. Hashtable: source => (Hashtable: + * target => variant bitmask) + */ + Hashtable specDAG; + + /** + * Vector of all variant names + */ + UVector variantList; + + /** + * Vector of public full IDs. + */ + UVector availableIDs; + + TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class + TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class +}; + +U_NAMESPACE_END + +U_CFUNC UBool utrans_transliterator_cleanup(); + +#endif /* #if !UCONFIG_NO_TRANSLITERATION */ + +#endif +//eof -- cgit v1.2.3