summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/common/unicode/stringoptions.h
blob: 7b9f70944f62db69997057f5f67912566cdde220 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// stringoptions.h
// created: 2017jun08 Markus W. Scherer

#ifndef __STRINGOPTIONS_H__
#define __STRINGOPTIONS_H__

#include "unicode/utypes.h"

/**
 * \file
 * \brief C API: Bit set option bit constants for various string and character processing functions.
 */

/**
 * Option value for case folding: Use default mappings defined in CaseFolding.txt.
 *
 * @stable ICU 2.0
 */
#define U_FOLD_CASE_DEFAULT 0

/**
 * Option value for case folding:
 *
 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
 * and dotless i appropriately for Turkic languages (tr, az).
 *
 * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
 * are to be included for default mappings and
 * excluded for the Turkic-specific mappings.
 *
 * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
 * are to be excluded for default mappings and
 * included for the Turkic-specific mappings.
 *
 * @stable ICU 2.0
 */
#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1

/**
 * Titlecase the string as a whole rather than each word.
 * (Titlecase only the character at index 0, possibly adjusted.)
 * Option bits value for titlecasing APIs that take an options bit set.
 *
 * It is an error to specify multiple titlecasing iterator options together,
 * including both an options bit and an explicit BreakIterator.
 *
 * @see U_TITLECASE_ADJUST_TO_CASED
 * @stable ICU 60
 */
#define U_TITLECASE_WHOLE_STRING 0x20

/**
 * Titlecase sentences rather than words.
 * (Titlecase only the first character of each sentence, possibly adjusted.)
 * Option bits value for titlecasing APIs that take an options bit set.
 *
 * It is an error to specify multiple titlecasing iterator options together,
 * including both an options bit and an explicit BreakIterator.
 *
 * @see U_TITLECASE_ADJUST_TO_CASED
 * @stable ICU 60
 */
#define U_TITLECASE_SENTENCES 0x40

/**
 * Do not lowercase non-initial parts of words when titlecasing.
 * Option bit for titlecasing APIs that take an options bit set.
 *
 * By default, titlecasing will titlecase the character at each
 * (possibly adjusted) BreakIterator index and
 * lowercase all other characters up to the next iterator index.
 * With this option, the other characters will not be modified.
 *
 * @see U_TITLECASE_ADJUST_TO_CASED
 * @see UnicodeString::toTitle
 * @see CaseMap::toTitle
 * @see ucasemap_setOptions
 * @see ucasemap_toTitle
 * @see ucasemap_utf8ToTitle
 * @stable ICU 3.8
 */
#define U_TITLECASE_NO_LOWERCASE 0x100

/**
 * Do not adjust the titlecasing BreakIterator indexes;
 * titlecase exactly the characters at breaks from the iterator.
 * Option bit for titlecasing APIs that take an options bit set.
 *
 * By default, titlecasing will take each break iterator index,
 * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
 * and titlecase that one.
 *
 * Other characters are lowercased.
 *
 * It is an error to specify multiple titlecasing adjustment options together.
 *
 * @see U_TITLECASE_ADJUST_TO_CASED
 * @see U_TITLECASE_NO_LOWERCASE
 * @see UnicodeString::toTitle
 * @see CaseMap::toTitle
 * @see ucasemap_setOptions
 * @see ucasemap_toTitle
 * @see ucasemap_utf8ToTitle
 * @stable ICU 3.8
 */
#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200

/**
 * Adjust each titlecasing BreakIterator index to the next cased character.
 * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
 * Option bit for titlecasing APIs that take an options bit set.
 *
 * This used to be the default index adjustment in ICU.
 * Since ICU 60, the default index adjustment is to the next character that is
 * a letter, number, symbol, or private use code point.
 * (Uncased modifier letters are skipped.)
 * The difference in behavior is small for word titlecasing,
 * but the new adjustment is much better for whole-string and sentence titlecasing:
 * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
 *
 * It is an error to specify multiple titlecasing adjustment options together.
 *
 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
 * @stable ICU 60
 */
#define U_TITLECASE_ADJUST_TO_CASED 0x400

/**
 * Option for string transformation functions to not first reset the Edits object.
 * Used for example in some case-mapping and normalization functions.
 *
 * @see CaseMap
 * @see Edits
 * @see Normalizer2
 * @stable ICU 60
 */
#define U_EDITS_NO_RESET 0x2000

/**
 * Omit unchanged text when recording how source substrings
 * relate to changed and unchanged result substrings.
 * Used for example in some case-mapping and normalization functions.
 *
 * @see CaseMap
 * @see Edits
 * @see Normalizer2
 * @stable ICU 60
 */
#define U_OMIT_UNCHANGED_TEXT 0x4000

/**
 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
 * Compare strings in code point order instead of code unit order.
 * @stable ICU 2.2
 */
#define U_COMPARE_CODE_POINT_ORDER  0x8000

/**
 * Option bit for unorm_compare:
 * Perform case-insensitive comparison.
 * @stable ICU 2.2
 */
#define U_COMPARE_IGNORE_CASE       0x10000

/**
 * Option bit for unorm_compare:
 * Both input strings are assumed to fulfill FCD conditions.
 * @stable ICU 2.2
 */
#define UNORM_INPUT_IS_FCD          0x20000

// Related definitions elsewhere.
// Options that are not meaningful in the same functions
// can share the same bits.
//
// Public:
// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
//
// Internal: (may change or be removed)
// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
// ustr_imp.h #define _STRNCMP_STYLE 0x1000
// unormcmp.cpp #define _COMPARE_EQUIV 0x80000

#endif  // __STRINGOPTIONS_H__