summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/common/static_unicode_sets.h
blob: 5d90ce5908de9817be4069ed717ff56f04d6fa0d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// This file contains utilities to deal with static-allocated UnicodeSets.
//
// Common use case: you write a "private static final" UnicodeSet in Java, and
// want something similarly easy in C++.  Originally written for number
// parsing, but this header can be used for other applications.
//
// Main entrypoint: `unisets::get(unisets::MY_SET_ID_HERE)`
//
// This file is in common instead of i18n because it is needed by ucurr.cpp.
//
// Author: sffc

#include "unicode/utypes.h"

#if !UCONFIG_NO_FORMATTING
#ifndef __STATIC_UNICODE_SETS_H__
#define __STATIC_UNICODE_SETS_H__

#include "unicode/uniset.h"
#include "unicode/unistr.h"

U_NAMESPACE_BEGIN
namespace unisets {

enum Key {
    // NONE is used to indicate null in chooseFrom().
    // EMPTY is used to get an empty UnicodeSet.
    NONE = -1,
    EMPTY = 0,

    // Ignorables
    DEFAULT_IGNORABLES,
    STRICT_IGNORABLES,

    // Separators
    // Notes:
    // - COMMA is a superset of STRICT_COMMA
    // - PERIOD is a superset of SCRICT_PERIOD
    // - ALL_SEPARATORS is the union of COMMA, PERIOD, and OTHER_GROUPING_SEPARATORS
    // - STRICT_ALL_SEPARATORS is the union of STRICT_COMMA, STRICT_PERIOD, and OTHER_GRP_SEPARATORS
    COMMA,
    PERIOD,
    STRICT_COMMA,
    STRICT_PERIOD,
    APOSTROPHE_SIGN,
    OTHER_GROUPING_SEPARATORS,
    ALL_SEPARATORS,
    STRICT_ALL_SEPARATORS,

    // Symbols
    MINUS_SIGN,
    PLUS_SIGN,
    PERCENT_SIGN,
    PERMILLE_SIGN,
    INFINITY_SIGN,

    // Currency Symbols
    DOLLAR_SIGN,
    POUND_SIGN,
    RUPEE_SIGN,
    YEN_SIGN,
    WON_SIGN,

    // Other
    DIGITS,

    // Combined Separators with Digits (for lead code points)
    DIGITS_OR_ALL_SEPARATORS,
    DIGITS_OR_STRICT_ALL_SEPARATORS,

    // The number of elements in the enum.
    UNISETS_KEY_COUNT
};

/**
 * Gets the static-allocated UnicodeSet according to the provided key. The
 * pointer will be deleted during u_cleanup(); the caller should NOT delete it.
 *
 * Exported as U_COMMON_API for ucurr.cpp
 *
 * This method is always safe and OK to chain: in the case of a memory or other
 * error, it returns an empty set from static memory.
 * 
 * Example:
 * 
 *     UBool hasIgnorables = unisets::get(unisets::DEFAULT_IGNORABLES)->contains(...);
 *
 * @param key The desired UnicodeSet according to the enum in this file.
 * @return The requested UnicodeSet. Guaranteed to be frozen and non-null, but
 *         may be empty if an error occurred during data loading.
 */
U_COMMON_API const UnicodeSet* get(Key key);

/**
 * Checks if the UnicodeSet given by key1 contains the given string.
 *
 * Exported as U_COMMON_API for numparse_decimal.cpp
 *
 * @param str The string to check.
 * @param key1 The set to check.
 * @return key1 if the set contains str, or NONE if not.
 */
U_COMMON_API Key chooseFrom(UnicodeString str, Key key1);

/**
 * Checks if the UnicodeSet given by either key1 or key2 contains the string.
 *
 * Exported as U_COMMON_API for numparse_decimal.cpp
 *
 * @param str The string to check.
 * @param key1 The first set to check.
 * @param key2 The second set to check.
 * @return key1 if that set contains str; key2 if that set contains str; or
 *         NONE if neither set contains str.
 */
U_COMMON_API Key chooseFrom(UnicodeString str, Key key1, Key key2);

// TODO: Load these from data: ICU-20108
// Unused in C++:
// Key chooseCurrency(UnicodeString str);
// Used instead:
static const struct {
    Key key;
    UChar32 exemplar;
} kCurrencyEntries[] = {
    {DOLLAR_SIGN, u'$'},
    {POUND_SIGN, u'£'},
    {RUPEE_SIGN, u'₹'},
    {YEN_SIGN, u'¥'},
    {WON_SIGN, u'₩'},
};

} // namespace unisets
U_NAMESPACE_END

#endif //__STATIC_UNICODE_SETS_H__
#endif /* #if !UCONFIG_NO_FORMATTING */