// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ****************************************************************************** * * Copyright (C) 2008-2016, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** * file name: uspoof_conf.h * encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * * created on: 2009Jan05 * created by: Andy Heninger * * Internal classes for compiling confusable data into its binary (runtime) form. */ #ifndef __USPOOF_BUILDCONF_H__ #define __USPOOF_BUILDCONF_H__ #include "unicode/utypes.h" #if !UCONFIG_NO_NORMALIZATION #if !UCONFIG_NO_REGULAR_EXPRESSIONS #include "unicode/uregex.h" #include "uhash.h" #include "uspoof_impl.h" U_NAMESPACE_BEGIN // SPUString // Holds a string that is the result of one of the mappings defined // by the confusable mapping data (confusables.txt from Unicode.org) // Instances of SPUString exist during the compilation process only. struct SPUString : public UMemory { LocalPointer fStr; // The actual string. int32_t fCharOrStrTableIndex; // Index into the final runtime data for this // string (or, for length 1, the single string char // itself, there being no string table entry for it.) SPUString(LocalPointer s); ~SPUString(); }; // String Pool A utility class for holding the strings that are the result of // the spoof mappings. These strings will utimately end up in the // run-time String Table. // This is sort of like a sorted set of strings, except that ICU's anemic // built-in collections don't support those, so it is implemented with a // combination of a uhash and a UVector. class SPUStringPool : public UMemory { public: SPUStringPool(UErrorCode &status); ~SPUStringPool(); // Add a string. Return the string from the table. // If the input parameter string is already in the table, delete the // input parameter and return the existing string. SPUString *addString(UnicodeString *src, UErrorCode &status); // Get the n-th string in the collection. SPUString *getByIndex(int32_t i); // Sort the contents; affects the ordering of getByIndex(). void sort(UErrorCode &status); int32_t size(); private: UVector *fVec; // Elements are SPUString * UHashtable *fHash; // Key: UnicodeString Value: SPUString }; // class ConfusabledataBuilder // An instance of this class exists while the confusable data is being built from source. // It encapsulates the intermediate data structures that are used for building. // It exports one static function, to do a confusable data build. class ConfusabledataBuilder : public UMemory { private: SpoofImpl *fSpoofImpl; char16_t *fInput; UHashtable *fTable; UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables. // The binary data is first assembled into the following four collections, then // copied to its final raw-memory destination. UVector *fKeyVec; UVector *fValueVec; UnicodeString *fStringTable; SPUStringPool *stringPool; URegularExpression *fParseLine; URegularExpression *fParseHexNum; int32_t fLineNum; ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status); ~ConfusabledataBuilder(); void build(const char * confusables, int32_t confusablesLen, UErrorCode &status); // Add an entry to the key and value tables being built // input: data from SLTable, MATable, etc. // output: entry added to fKeyVec and fValueVec void addKeyEntry(UChar32 keyChar, // The key character UHashtable *table, // The table, one of SATable, MATable, etc. int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc. UErrorCode &status); // From an index into fKeyVec & fValueVec // get a UnicodeString with the corresponding mapping. UnicodeString getMapping(int32_t index); // Populate the final binary output data array with the compiled data. void outputData(UErrorCode &status); public: static void buildConfusableData(SpoofImpl *spImpl, const char * confusables, int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status); }; U_NAMESPACE_END #endif #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS #endif // __USPOOF_BUILDCONF_H__