summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/transreg.h
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/i18n/transreg.h')
-rw-r--r--intl/icu/source/i18n/transreg.h468
1 files changed, 468 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/transreg.h b/intl/icu/source/i18n/transreg.h
new file mode 100644
index 0000000000..da9c2ee8b9
--- /dev/null
+++ b/intl/icu/source/i18n/transreg.h
@@ -0,0 +1,468 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2001-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 08/10/2001 aliu Creation.
+**********************************************************************
+*/
+#ifndef _TRANSREG_H
+#define _TRANSREG_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_TRANSLITERATION
+
+#include "unicode/uobject.h"
+#include "unicode/translit.h"
+#include "hash.h"
+#include "uvector.h"
+
+U_NAMESPACE_BEGIN
+
+class TransliteratorEntry;
+class TransliteratorSpec;
+class UnicodeString;
+
+//------------------------------------------------------------------
+// TransliteratorAlias
+//------------------------------------------------------------------
+
+/**
+ * A TransliteratorAlias object is returned by get() if the given ID
+ * actually translates into something else. The caller then invokes
+ * the create() method on the alias to create the actual
+ * transliterator, and deletes the alias.
+ *
+ * Why all the shenanigans? To prevent circular calls between
+ * the registry code and the transliterator code that deadlocks.
+ */
+class TransliteratorAlias : public UMemory {
+ public:
+ /**
+ * Construct a simple alias (type == SIMPLE)
+ * @param aliasID the given id.
+ */
+ TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
+
+ /**
+ * Construct a compound RBT alias (type == COMPOUND)
+ */
+ TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
+ UVector* adoptedTransliterators,
+ const UnicodeSet* compoundFilter);
+
+ /**
+ * Construct a rules alias (type = RULES)
+ */
+ TransliteratorAlias(const UnicodeString& theID,
+ const UnicodeString& rules,
+ UTransDirection dir);
+
+ ~TransliteratorAlias();
+
+ /**
+ * The whole point of create() is that the caller must invoke
+ * it when the registry mutex is NOT held, to prevent deadlock.
+ * It may only be called once.
+ *
+ * Note: Only call create() if isRuleBased() returns false.
+ *
+ * This method must be called *outside* of the TransliteratorRegistry
+ * mutex.
+ */
+ Transliterator* create(UParseError&, UErrorCode&);
+
+ /**
+ * Return true if this alias is rule-based. If so, the caller
+ * must call parse() on it, then call TransliteratorRegistry::reget().
+ */
+ UBool isRuleBased() const;
+
+ /**
+ * If isRuleBased() returns true, then the caller must call this
+ * method, followed by TransliteratorRegistry::reget(). The latter
+ * method must be called inside the TransliteratorRegistry mutex.
+ *
+ * Note: Only call parse() if isRuleBased() returns true.
+ *
+ * This method must be called *outside* of the TransliteratorRegistry
+ * mutex, because it can instantiate Transliterators embedded in
+ * the rules via the "&Latin-Arabic()" syntax.
+ */
+ void parse(TransliteratorParser& parser,
+ UParseError& pe, UErrorCode& ec) const;
+
+ private:
+ // We actually come in three flavors:
+ // 1. Simple alias
+ // Here aliasID is the alias string. Everything else is
+ // null, zero, empty.
+ // 2. CompoundRBT
+ // Here ID is the ID, aliasID is the idBlock, trans is the
+ // contained RBT, and idSplitPoint is the offset in aliasID
+ // where the contained RBT goes. compoundFilter is the
+ // compound filter, and it is _not_ owned.
+ // 3. Rules
+ // Here ID is the ID, aliasID is the rules string.
+ // idSplitPoint is the UTransDirection.
+ UnicodeString ID;
+ UnicodeString aliasesOrRules;
+ UVector* transes; // owned
+ const UnicodeSet* compoundFilter; // alias
+ UTransDirection direction;
+ enum { SIMPLE, COMPOUND, RULES } type;
+
+ TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
+ TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
+};
+
+
+/**
+ * A registry of system transliterators. This is the data structure
+ * that implements the mapping between transliterator IDs and the data
+ * or function pointers used to create the corresponding
+ * transliterators. There is one instance of the registry that is
+ * created statically.
+ *
+ * The registry consists of a dynamic component -- a hashtable -- and
+ * a static component -- locale resource bundles. The dynamic store
+ * is semantically overlaid on the static store, so the static mapping
+ * can be dynamically overridden.
+ *
+ * This is an internal class that is only used by Transliterator.
+ * Transliterator maintains one static instance of this class and
+ * delegates all registry-related operations to it.
+ *
+ * @author Alan Liu
+ */
+class TransliteratorRegistry : public UMemory {
+
+ public:
+
+ /**
+ * Constructor
+ * @param status Output param set to success/failure code.
+ */
+ TransliteratorRegistry(UErrorCode& status);
+
+ /**
+ * Nonvirtual destructor -- this class is not subclassable.
+ */
+ ~TransliteratorRegistry();
+
+ //------------------------------------------------------------------
+ // Basic public API
+ //------------------------------------------------------------------
+
+ /**
+ * Given a simple ID (forward direction, no inline filter, not
+ * compound) attempt to instantiate it from the registry. Return
+ * 0 on failure.
+ *
+ * Return a non-nullptr aliasReturn value if the ID points to an alias.
+ * We cannot instantiate it ourselves because the alias may contain
+ * filters or compounds, which we do not understand. Caller should
+ * make aliasReturn nullptr before calling.
+ * @param ID the given ID
+ * @param aliasReturn output param to receive TransliteratorAlias;
+ * should be nullptr on entry
+ * @param parseError Struct to receive information on position
+ * of error if an error is encountered
+ * @param status Output param set to success/failure code.
+ */
+ Transliterator* get(const UnicodeString& ID,
+ TransliteratorAlias*& aliasReturn,
+ UErrorCode& status);
+
+ /**
+ * The caller must call this after calling get(), if [a] calling get()
+ * returns an alias, and [b] the alias is rule based. In that
+ * situation the caller must call alias->parse() to do the parsing
+ * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
+ * instantiating the transliterator.
+ *
+ * Note: Another alias might be returned by this method.
+ *
+ * This method (like all public methods of this class) must be called
+ * from within the TransliteratorRegistry mutex.
+ *
+ * @param aliasReturn output param to receive TransliteratorAlias;
+ * should be nullptr on entry
+ */
+ Transliterator* reget(const UnicodeString& ID,
+ TransliteratorParser& parser,
+ TransliteratorAlias*& aliasReturn,
+ UErrorCode& status);
+
+ /**
+ * Register a prototype (adopted). This adds an entry to the
+ * dynamic store, or replaces an existing entry. Any entry in the
+ * underlying static locale resource store is masked.
+ */
+ void put(Transliterator* adoptedProto,
+ UBool visible,
+ UErrorCode& ec);
+
+ /**
+ * Register an ID and a factory function pointer. This adds an
+ * entry to the dynamic store, or replaces an existing entry. Any
+ * entry in the underlying static locale resource store is masked.
+ */
+ void put(const UnicodeString& ID,
+ Transliterator::Factory factory,
+ Transliterator::Token context,
+ UBool visible,
+ UErrorCode& ec);
+
+ /**
+ * Register an ID and a resource name. This adds an entry to the
+ * dynamic store, or replaces an existing entry. Any entry in the
+ * underlying static locale resource store is masked.
+ */
+ void put(const UnicodeString& ID,
+ const UnicodeString& resourceName,
+ UTransDirection dir,
+ UBool readonlyResourceAlias,
+ UBool visible,
+ UErrorCode& ec);
+
+ /**
+ * Register an ID and an alias ID. This adds an entry to the
+ * dynamic store, or replaces an existing entry. Any entry in the
+ * underlying static locale resource store is masked.
+ */
+ void put(const UnicodeString& ID,
+ const UnicodeString& alias,
+ UBool readonlyAliasAlias,
+ UBool visible,
+ UErrorCode& ec);
+
+ /**
+ * Unregister an ID. This removes an entry from the dynamic store
+ * if there is one. The static locale resource store is
+ * unaffected.
+ * @param ID the given ID.
+ */
+ void remove(const UnicodeString& ID);
+
+ //------------------------------------------------------------------
+ // Public ID and spec management
+ //------------------------------------------------------------------
+
+ /**
+ * Return a StringEnumeration over the IDs currently registered
+ * with the system.
+ * @internal
+ */
+ StringEnumeration* getAvailableIDs() const;
+
+ /**
+ * == OBSOLETE - remove in ICU 3.4 ==
+ * Return the number of IDs currently registered with the system.
+ * To retrieve the actual IDs, call getAvailableID(i) with
+ * i from 0 to countAvailableIDs() - 1.
+ * @return the number of IDs currently registered with the system.
+ * @internal
+ */
+ int32_t countAvailableIDs() const;
+
+ /**
+ * == OBSOLETE - remove in ICU 3.4 ==
+ * Return the index-th available ID. index must be between 0
+ * and countAvailableIDs() - 1, inclusive. If index is out of
+ * range, the result of getAvailableID(0) is returned.
+ * @param index the given index.
+ * @return the index-th available ID. index must be between 0
+ * and countAvailableIDs() - 1, inclusive. If index is out of
+ * range, the result of getAvailableID(0) is returned.
+ * @internal
+ */
+ const UnicodeString& getAvailableID(int32_t index) const;
+
+ /**
+ * Return the number of registered source specifiers.
+ * @return the number of registered source specifiers.
+ */
+ int32_t countAvailableSources() const;
+
+ /**
+ * Return a registered source specifier.
+ * @param index which specifier to return, from 0 to n-1, where
+ * n = countAvailableSources()
+ * @param result fill-in parameter to receive the source specifier.
+ * If index is out of range, result will be empty.
+ * @return reference to result
+ */
+ UnicodeString& getAvailableSource(int32_t index,
+ UnicodeString& result) const;
+
+ /**
+ * Return the number of registered target specifiers for a given
+ * source specifier.
+ * @param source the given source specifier.
+ * @return the number of registered target specifiers for a given
+ * source specifier.
+ */
+ int32_t countAvailableTargets(const UnicodeString& source) const;
+
+ /**
+ * Return a registered target specifier for a given source.
+ * @param index which specifier to return, from 0 to n-1, where
+ * n = countAvailableTargets(source)
+ * @param source the source specifier
+ * @param result fill-in parameter to receive the target specifier.
+ * If source is invalid or if index is out of range, result will
+ * be empty.
+ * @return reference to result
+ */
+ UnicodeString& getAvailableTarget(int32_t index,
+ const UnicodeString& source,
+ UnicodeString& result) const;
+
+ /**
+ * Return the number of registered variant specifiers for a given
+ * source-target pair. There is always at least one variant: If
+ * just source-target is registered, then the single variant
+ * NO_VARIANT is returned. If source-target/variant is registered
+ * then that variant is returned.
+ * @param source the source specifiers
+ * @param target the target specifiers
+ * @return the number of registered variant specifiers for a given
+ * source-target pair.
+ */
+ int32_t countAvailableVariants(const UnicodeString& source,
+ const UnicodeString& target) const;
+
+ /**
+ * Return a registered variant specifier for a given source-target
+ * pair. If NO_VARIANT is one of the variants, then it will be
+ * at index 0.
+ * @param index which specifier to return, from 0 to n-1, where
+ * n = countAvailableVariants(source, target)
+ * @param source the source specifier
+ * @param target the target specifier
+ * @param result fill-in parameter to receive the variant
+ * specifier. If source is invalid or if target is invalid or if
+ * index is out of range, result will be empty.
+ * @return reference to result
+ */
+ UnicodeString& getAvailableVariant(int32_t index,
+ const UnicodeString& source,
+ const UnicodeString& target,
+ UnicodeString& result) const;
+
+ private:
+
+ //----------------------------------------------------------------
+ // Private implementation
+ //----------------------------------------------------------------
+
+ TransliteratorEntry* find(const UnicodeString& ID);
+
+ TransliteratorEntry* find(UnicodeString& source,
+ UnicodeString& target,
+ UnicodeString& variant);
+
+ TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
+ const TransliteratorSpec& trg,
+ const UnicodeString& variant) const;
+
+ TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
+ const TransliteratorSpec& trg,
+ const UnicodeString& variant);
+
+ static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
+ const TransliteratorSpec& specToFind,
+ const UnicodeString& variant,
+ UTransDirection direction);
+
+ void registerEntry(const UnicodeString& source,
+ const UnicodeString& target,
+ const UnicodeString& variant,
+ TransliteratorEntry* adopted,
+ UBool visible);
+
+ void registerEntry(const UnicodeString& ID,
+ TransliteratorEntry* adopted,
+ UBool visible);
+
+ void registerEntry(const UnicodeString& ID,
+ const UnicodeString& source,
+ const UnicodeString& target,
+ const UnicodeString& variant,
+ TransliteratorEntry* adopted,
+ UBool visible);
+
+ void registerSTV(const UnicodeString& source,
+ const UnicodeString& target,
+ const UnicodeString& variant);
+
+ void removeSTV(const UnicodeString& source,
+ const UnicodeString& target,
+ const UnicodeString& variant);
+
+ Transliterator* instantiateEntry(const UnicodeString& ID,
+ TransliteratorEntry *entry,
+ TransliteratorAlias*& aliasReturn,
+ UErrorCode& status);
+
+ /**
+ * A StringEnumeration over the registered IDs in this object.
+ */
+ class Enumeration : public StringEnumeration {
+ public:
+ Enumeration(const TransliteratorRegistry& reg);
+ virtual ~Enumeration();
+ virtual int32_t count(UErrorCode& status) const override;
+ virtual const UnicodeString* snext(UErrorCode& status) override;
+ virtual void reset(UErrorCode& status) override;
+ static UClassID U_EXPORT2 getStaticClassID();
+ virtual UClassID getDynamicClassID() const override;
+ private:
+ int32_t index;
+ const TransliteratorRegistry& reg;
+ };
+ friend class Enumeration;
+
+ private:
+
+ /**
+ * Dynamic registry mapping full IDs to Entry objects. This
+ * contains both public and internal entities. The visibility is
+ * controlled by whether an entry is listed in availableIDs and
+ * specDAG or not.
+ */
+ Hashtable registry;
+
+ /**
+ * DAG of visible IDs by spec. Hashtable: source => (Hashtable:
+ * target => variant bitmask)
+ */
+ Hashtable specDAG;
+
+ /**
+ * Vector of all variant names
+ */
+ UVector variantList;
+
+ /**
+ * Vector of public full IDs.
+ */
+ UVector availableIDs;
+
+ TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
+ TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
+};
+
+U_NAMESPACE_END
+
+U_CFUNC UBool utrans_transliterator_cleanup();
+
+#endif /* #if !UCONFIG_NO_TRANSLITERATION */
+
+#endif
+//eof