summaryrefslogtreecommitdiffstats
path: root/intl/icu_capi/cpp/examples/properties/test.cpp
blob: 24583041df3d93fb9944a2ee62b68ea3453ddf99 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

#include "../../include/ICU4XCodePointSetData.hpp"
#include "../../include/ICU4XUnicodeSetData.hpp"
#include "../../include/ICU4XCodePointMapData16.hpp"
#include "../../include/ICU4XCodePointMapData8.hpp"
#include "../../include/ICU4XPropertyValueNameToEnumMapper.hpp"
#include "../../include/ICU4XGeneralCategoryNameToMaskMapper.hpp"
#include "../../include/ICU4XLogger.hpp"

#include <iostream>

int test_set_property(ICU4XCodePointSetData data, char32_t included, char32_t excluded) {
    bool contains1 = data.contains(included);
    bool contains2 = data.contains(excluded);
    std::cout << std::hex; // print hex for U+####
    if (contains1 && !contains2) {
        std::cout << "Set correctly contains U+" << included << " and not U+" << excluded << std::endl;
    } else {
        std::cout << "Set returns wrong result on U+" << included << " or U+" << excluded << std::endl;
        return 1;
    }
    return 0;
}

int test_map_16_property(ICU4XCodePointMapData16 data, char32_t sample, uint32_t expected) {
    uint32_t actual = data.get(sample);
    std::cout << std::hex; // print hex for U+####
    if (actual == expected) {
        std::cout << "Code point U+" << sample << " correctly mapped to 0x" << actual << std::endl;
    } else {
        std::cout << "Code point U+" << sample << " incorrectly mapped to 0x" << actual << std::endl;
        return 1;
    }
    return 0;
}

int test_map_8_property(ICU4XCodePointMapData8 data, char32_t sample, uint32_t expected) {
    uint32_t actual = data.get(sample);
    std::cout << std::hex; // print hex for U+####
    if (actual == expected) {
        std::cout << "Code point U+" << sample << " correctly mapped to 0x" << actual << std::endl;
    } else {
        std::cout << "Code point U+" << sample << " incorrectly mapped to 0x" << actual << std::endl;
        return 1;
    }
    return 0;
}

int main() {
    ICU4XLogger::init_simple_logger();
    ICU4XDataProvider dp = ICU4XDataProvider::create_compiled();
    int result;

    result = test_set_property(
        ICU4XCodePointSetData::load_ascii_hex_digit(dp).ok().value(),
        u'3',
        u'੩'
    );
    if (result != 0) {
        return result;
    }

    result = test_map_16_property(
        ICU4XCodePointMapData16::load_script(dp).ok().value(),
        u'木',
        17 // Script::Han
    );
    if (result != 0) {
        return result;
    }

    result = test_map_8_property(
        ICU4XCodePointMapData8::load_general_category(dp).ok().value(),
        u'木',
        5 // GeneralCategory::OtherLetter
    );
    if (result != 0) {
        return result;
    }

    result = test_map_8_property(
        ICU4XCodePointMapData8::load_bidi_class(dp).ok().value(),
        u'ع',
        13 // GeneralCategory::ArabicLetter
    );
    if (result != 0) {
        return result;
    }

    ICU4XUnicodeSetData basic_emoji = ICU4XUnicodeSetData::load_basic_emoji(dp).ok().value();
    std::string letter = u8"hello";

    if (!basic_emoji.contains_char(U'🔥')) {
        std::cout << "Character 🔥 not found in Basic_Emoji set" << std::endl;
        result = 1;
    }

    if (!basic_emoji.contains(u8"🗺️")) {
        std::cout << "String \"🗺️\" (U+1F5FA U+FE0F) not found in Basic_Emoji set" << std::endl;
        result = 1;
    }
    if (basic_emoji.contains_char(U'a')) {
        std::cout << "Character a found in Basic_Emoji set" << std::endl;
        result = 1;
    }

    if (basic_emoji.contains(u8"aa")) {
        std::cout << "String \"aa\" found in Basic_Emoji set" << std::endl;
        result = 1;
    }

    if (result != 0) {
        return result;
    } else {
        std::cout << "Basic_Emoji set contains appropriate characters" << std::endl;
    }
    ICU4XLocale locale = ICU4XLocale::create_from_string("bn").ok().value();
    ICU4XUnicodeSetData exemplars = ICU4XUnicodeSetData::load_exemplars_main(dp, locale).ok().value();
    if (!exemplars.contains_char(U'ব')) {
        std::cout << "Character 'ব' not found in Bangla exemplar chars set" << std::endl;
        result = 1;
    }

    if (!exemplars.contains(u8"ক্ষ")) {
        std::cout << "String \"ক্ষ\" (U+0995U+09CDU+09B7) not found in Bangla exemplar chars set" << std::endl;
        result = 1;
    }
    if (exemplars.contains_char(U'a')) {
        std::cout << "Character a found in Bangla exemplar chars set" << std::endl;
        result = 1;
    }

    if (exemplars.contains(u8"aa")) {
        std::cout << "String \"aa\" not found in Bangla exemplar chars set" << std::endl;
        result = 1;
    }
    if (result != 0) {
        return result;
    } else {
        std::cout << "Bangla exemplar chars set contains appropriate characters" << std::endl;
    }


    ICU4XPropertyValueNameToEnumMapper mapper = ICU4XPropertyValueNameToEnumMapper::load_script(dp).ok().value();
    int32_t script = mapper.get_strict("Brah");
    if (script != 65) {
        std::cout << "Expected discriminant 64 for script name `Brah`, found " << script << std::endl;
        result = 1;
    }
    script = mapper.get_strict("Brahmi");
    if (script != 65) {
        std::cout << "Expected discriminant 64 for script name `Brahmi`, found " << script << std::endl;
        result = 1;
    }
    script = mapper.get_loose("brah");
    if (script != 65) {
        std::cout << "Expected discriminant 64 for (loose matched) script name `brah`, found " << script << std::endl;
        result = 1;
    }
    script = mapper.get_strict("Linear_Z");
    if (script != -1) {
        std::cout << "Expected no value for fake script name `Linear_Z`, found " << script << std::endl;
        result = 1;
    }
    if (result != 0) {
        return result;
    } else {
        std::cout << "Script name mapper returns correct values" << std::endl;
    }

    ICU4XGeneralCategoryNameToMaskMapper mask_mapper = ICU4XGeneralCategoryNameToMaskMapper::load(dp).ok().value();
    int32_t mask = mask_mapper.get_strict("Lu");
    if (mask != 0x02) {
        std::cout << "Expected discriminant 0x02 for mask name `Lu`, found " << mask << std::endl;
        result = 1;
    }
    mask = mask_mapper.get_strict("L");
    if (mask != 0x3e) {
        std::cout << "Expected discriminant 0x3e for mask name `Lu`, found " << mask << std::endl;
        result = 1;
    }
    mask = mask_mapper.get_strict("Letter");
    if (mask != 0x3e) {
        std::cout << "Expected discriminant 0x3e for mask name `Letter`, found " << mask << std::endl;
        result = 1;
    }
    mask = mask_mapper.get_loose("l");
    if (mask != 0x3e) {
        std::cout << "Expected discriminant 0x3e for mask name `l`, found " << mask << std::endl;
        result = 1;
    }
    mask = mask_mapper.get_strict("letter");
    if (mask != 0) {
        std::cout << "Expected no mask for (strict matched) name `letter`, found " << mask << std::endl;
        result = 1;
    }
    mask = mask_mapper.get_strict("EverythingLol");
    if (mask != 0) {
        std::cout << "Expected no mask for nonexistant name `EverythingLol`, found " << mask << std::endl;
        result = 1;
    }


    if (result != 0) {
        return result;
    } else {
        std::cout << "Mask name mapper returns correct values" << std::endl;
    }


    mask = mask_mapper.get_strict("Lu");
    ICU4XCodePointMapData8 gc = ICU4XCodePointMapData8::load_general_category(dp).ok().value();
    auto ranges = gc.iter_ranges_for_mask(mask);
    auto next = ranges.next();
    if (next.done) {
        std::cout << "Got empty iterator!";
        result = 1;
    }
    if (next.start != U'A' || next.end != U'Z') {
        std::cout << "Expected range [" <<  U'A' << ", " <<  U'Z' << "], got range [" << next.start << ", " << next.end << "]" << std::endl;
        result = 1;
    }

    // Test iteration to completion for a small set
    mask = mask_mapper.get_strict("Control");
    ranges = gc.iter_ranges_for_mask(mask);
    next = ranges.next();

    if (next.start != 0 || next.end != 0x1f) {
        std::cout << "Expected range [0, 0x1f], got range [" << next.start << ", " << next.end << "]" << std::endl;
        result = 1;
    }

    std::cout << "Found ranges for gc=Control:";
    while (!next.done) {
        std::cout << " [" << next.start << ", " << next.end << "]";

        next = ranges.next();
    }
    std::cout << std::endl;

    if (result != 0) {
        return result;
    } else {
        std::cout << "Ranges iterator works" << std::endl;
    }
    return 0;
}