summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/rbt_data.h
blob: 43cbb4795b6b4969307b7732e3eaa857d876b222 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (C) 1999-2007, International Business Machines Corporation
* and others. All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   11/17/99    aliu        Creation.
**********************************************************************
*/
#ifndef RBT_DATA_H
#define RBT_DATA_H

#include "unicode/utypes.h"
#include "unicode/uclean.h"

#if !UCONFIG_NO_TRANSLITERATION

#include "unicode/uobject.h"
#include "rbt_set.h"
#include "hash.h"

U_NAMESPACE_BEGIN

class UnicodeFunctor;
class UnicodeMatcher;
class UnicodeReplacer;

/**
 * The rule data for a RuleBasedTransliterators.  RBT objects hold
 * a const pointer to a TRD object that they do not own.  TRD objects
 * are essentially the parsed rules in compact, usable form.  The
 * TRD objects themselves are held for the life of the process in
 * a static cache owned by Transliterator.
 *
 * This class' API is a little asymmetric.  There is a method to
 * define a variable, but no way to define a set.  This is because the
 * sets are defined by the parser in a UVector, and the vector is
 * copied into a fixed-size array here.  Once this is done, no new
 * sets may be defined.  In practice, there is no need to do so, since
 * generating the data and using it are discrete phases.  When there
 * is a need to access the set data during the parse phase, another
 * data structure handles this.  See the parsing code for more
 * details.
 */
class TransliterationRuleData : public UMemory {

public:

    // PUBLIC DATA MEMBERS

    /**
     * Rule table.  May be empty.
     */
    TransliterationRuleSet ruleSet;

    /**
     * Map variable name (String) to variable (UnicodeString).  A variable name
     * corresponds to zero or more characters, stored in a UnicodeString in
     * this hash.  One or more of these chars may also correspond to a
     * UnicodeMatcher, in which case the character in the UnicodeString in this hash is
     * a stand-in: it is an index for a secondary lookup in
     * data.variables.  The stand-in also represents the UnicodeMatcher in
     * the stored rules.
     */
    Hashtable variableNames;

    /**
     * Map category variable (char16_t) to set (UnicodeFunctor).
     * Variables that correspond to a set of characters are mapped
     * from variable name to a stand-in character in data.variableNames.
     * The stand-in then serves as a key in this hash to lookup the
     * actual UnicodeFunctor object.  In addition, the stand-in is
     * stored in the rule text to represent the set of characters.
     * variables[i] represents character (variablesBase + i).
     */
    UnicodeFunctor** variables;

    /**
     * Flag that indicates whether the variables are owned (if a single
     * call to Transliterator::createFromRules() produces a CompoundTransliterator
     * with more than one RuleBasedTransliterator as children, they all share
     * the same variables list, so only the first one is considered to own
     * the variables)
     */
    UBool variablesAreOwned;

    /**
     * The character that represents variables[0].  Characters
     * variablesBase through variablesBase +
     * variablesLength - 1 represent UnicodeFunctor objects.
     */
    char16_t variablesBase;

    /**
     * The length of variables.
     */
    int32_t variablesLength;

public:

    /**
     * Constructor
     * @param status Output param set to success/failure code on exit.
     */
    TransliterationRuleData(UErrorCode& status);

    /**
     * Copy Constructor
     */
    TransliterationRuleData(const TransliterationRuleData&);

    /**
     * destructor
     */
    ~TransliterationRuleData();

    /**
     * Given a stand-in character, return the UnicodeFunctor that it
     * represents, or nullptr if it doesn't represent anything.
     * @param standIn    the given stand-in character.
     * @return           the UnicodeFunctor that 'standIn' represents
     */
    UnicodeFunctor* lookup(UChar32 standIn) const;

    /**
     * Given a stand-in character, return the UnicodeMatcher that it
     * represents, or nullptr if it doesn't represent anything or if it
     * represents something that is not a matcher.
     * @param standIn    the given stand-in character.
     * @return           return the UnicodeMatcher that 'standIn' represents
     */
    UnicodeMatcher* lookupMatcher(UChar32 standIn) const;

    /**
     * Given a stand-in character, return the UnicodeReplacer that it
     * represents, or nullptr if it doesn't represent anything or if it
     * represents something that is not a replacer.
     * @param standIn    the given stand-in character.
     * @return           return the UnicodeReplacer that 'standIn' represents
     */
    UnicodeReplacer* lookupReplacer(UChar32 standIn) const;


private:
    TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class
};

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_TRANSLITERATION */

#endif