summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/esctrn.h
blob: a4282ea86a2c511a225761936a05e2f23dc2c342 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (c) 2001-2007, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*   Date        Name        Description
*   11/20/2001  aliu        Creation.
**********************************************************************
*/
#ifndef ESCTRN_H
#define ESCTRN_H

#include "unicode/utypes.h"

#if !UCONFIG_NO_TRANSLITERATION

#include "unicode/translit.h"

U_NAMESPACE_BEGIN

/**
 * A transliterator that converts Unicode characters to an escape
 * form.  Examples of escape forms are "U+4E01" and "".
 * Escape forms have a prefix and suffix, either of which may be
 * empty, a radix, typically 16 or 10, a minimum digit count,
 * typically 1, 4, or 8, and a boolean that specifies whether
 * supplemental characters are handled as 32-bit code points or as two
 * 16-bit code units.  Most escape forms handle 32-bit code points,
 * but some, such as the Java form, intentionally break them into two
 * surrogate pairs, for backward compatibility.
 *
 * <p>Some escape forms actually have two different patterns, one for
 * BMP characters (0..FFFF) and one for supplements (>FFFF).  To
 * handle this, a second EscapeTransliterator may be defined that
 * specifies the pattern to be produced for supplementals.  An example
 * of a form that requires this is the C form, which uses "\\uFFFF"
 * for BMP characters and "\\U0010FFFF" for supplementals.
 *
 * <p>This class is package private.  It registers several standard
 * variants with the system which are then accessed via their IDs.
 *
 * @author Alan Liu
 */
class EscapeTransliterator : public Transliterator {

 private:

    /**
     * The prefix of the escape form; may be empty, but usually isn't.
     */
    UnicodeString prefix;

    /**
     * The prefix of the escape form; often empty.
     */
    UnicodeString suffix;

    /**
     * The radix to display the number in.  Typically 16 or 10.  Must
     * be in the range 2 to 36.
     */
    int32_t radix;

    /**
     * The minimum number of digits.  Typically 1, 4, or 8.  Values
     * less than 1 are equivalent to 1.
     */
    int32_t minDigits;

    /**
     * If true, supplementals are handled as 32-bit code points.  If
     * false, they are handled as two 16-bit code units.
     */
    UBool grokSupplementals;

    /**
     * The form to be used for supplementals.  If this is null then
     * the same form is used for BMP characters and supplementals.  If
     * this is not null and if grokSupplementals is true then the
     * prefix, suffix, radix, and minDigits of this object are used
     * for supplementals.  This pointer is owned.
     */
    EscapeTransliterator* supplementalHandler;

 public:

    /**
     * Registers standard variants with the system.  Called by
     * Transliterator during initialization.
     */
    static void registerIDs();

    /**
     * Constructs an escape transliterator with the given ID and
     * parameters.  See the class member documentation for details.
     */
    EscapeTransliterator(const UnicodeString& ID,
                         const UnicodeString& prefix, const UnicodeString& suffix,
                         int32_t radix, int32_t minDigits,
                         UBool grokSupplementals,
                         EscapeTransliterator* adoptedSupplementalHandler);

    /**
     * Copy constructor.
     */
    EscapeTransliterator(const EscapeTransliterator&);

    /**
     * Destructor.
     */
    virtual ~EscapeTransliterator();

    /**
     * Transliterator API.
     */
    virtual EscapeTransliterator* clone() const override;

    /**
     * ICU "poor man's RTTI", returns a UClassID for the actual class.
     */
    virtual UClassID getDynamicClassID() const override;

    /**
     * ICU "poor man's RTTI", returns a UClassID for this class.
     */
    U_I18N_API static UClassID U_EXPORT2 getStaticClassID();

 protected:

    /**
     * Implements {@link Transliterator#handleTransliterate}.
     */
    virtual void handleTransliterate(Replaceable& text, UTransPosition& offset,
                             UBool isIncremental) const override;

};

U_NAMESPACE_END

#endif /* #if !UCONFIG_NO_TRANSLITERATION */

#endif