summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/uspoof_conf.h
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/i18n/uspoof_conf.h')
-rw-r--r--intl/icu/source/i18n/uspoof_conf.h135
1 files changed, 135 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/uspoof_conf.h b/intl/icu/source/i18n/uspoof_conf.h
new file mode 100644
index 0000000000..a5e3b9e109
--- /dev/null
+++ b/intl/icu/source/i18n/uspoof_conf.h
@@ -0,0 +1,135 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2008-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: uspoof_conf.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009Jan05
+* created by: Andy Heninger
+*
+* Internal classes for compiling confusable data into its binary (runtime) form.
+*/
+
+#ifndef __USPOOF_BUILDCONF_H__
+#define __USPOOF_BUILDCONF_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS
+
+#include "unicode/uregex.h"
+#include "uhash.h"
+#include "uspoof_impl.h"
+
+U_NAMESPACE_BEGIN
+
+// SPUString
+// Holds a string that is the result of one of the mappings defined
+// by the confusable mapping data (confusables.txt from Unicode.org)
+// Instances of SPUString exist during the compilation process only.
+
+struct SPUString : public UMemory {
+ LocalPointer<UnicodeString> fStr; // The actual string.
+ int32_t fCharOrStrTableIndex; // Index into the final runtime data for this
+ // string (or, for length 1, the single string char
+ // itself, there being no string table entry for it.)
+
+ SPUString(LocalPointer<UnicodeString> s);
+ ~SPUString();
+};
+
+
+// String Pool A utility class for holding the strings that are the result of
+// the spoof mappings. These strings will utimately end up in the
+// run-time String Table.
+// This is sort of like a sorted set of strings, except that ICU's anemic
+// built-in collections don't support those, so it is implemented with a
+// combination of a uhash and a UVector.
+
+
+class SPUStringPool : public UMemory {
+ public:
+ SPUStringPool(UErrorCode &status);
+ ~SPUStringPool();
+
+ // Add a string. Return the string from the table.
+ // If the input parameter string is already in the table, delete the
+ // input parameter and return the existing string.
+ SPUString *addString(UnicodeString *src, UErrorCode &status);
+
+
+ // Get the n-th string in the collection.
+ SPUString *getByIndex(int32_t i);
+
+ // Sort the contents; affects the ordering of getByIndex().
+ void sort(UErrorCode &status);
+
+ int32_t size();
+
+ private:
+ UVector *fVec; // Elements are SPUString *
+ UHashtable *fHash; // Key: UnicodeString Value: SPUString
+};
+
+
+// class ConfusabledataBuilder
+// An instance of this class exists while the confusable data is being built from source.
+// It encapsulates the intermediate data structures that are used for building.
+// It exports one static function, to do a confusable data build.
+
+class ConfusabledataBuilder : public UMemory {
+ private:
+ SpoofImpl *fSpoofImpl;
+ char16_t *fInput;
+ UHashtable *fTable;
+ UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables.
+
+ // The binary data is first assembled into the following four collections, then
+ // copied to its final raw-memory destination.
+ UVector *fKeyVec;
+ UVector *fValueVec;
+ UnicodeString *fStringTable;
+
+ SPUStringPool *stringPool;
+ URegularExpression *fParseLine;
+ URegularExpression *fParseHexNum;
+ int32_t fLineNum;
+
+ ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
+ ~ConfusabledataBuilder();
+ void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
+
+ // Add an entry to the key and value tables being built
+ // input: data from SLTable, MATable, etc.
+ // output: entry added to fKeyVec and fValueVec
+ void addKeyEntry(UChar32 keyChar, // The key character
+ UHashtable *table, // The table, one of SATable, MATable, etc.
+ int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc.
+ UErrorCode &status);
+
+ // From an index into fKeyVec & fValueVec
+ // get a UnicodeString with the corresponding mapping.
+ UnicodeString getMapping(int32_t index);
+
+ // Populate the final binary output data array with the compiled data.
+ void outputData(UErrorCode &status);
+
+ public:
+ static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
+ int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
+};
+U_NAMESPACE_END
+
+#endif
+#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
+#endif // __USPOOF_BUILDCONF_H__