1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
|
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/**
* Copyright (c) 1999-2016, International Business Machines Corporation and
* others. All Rights Reserved.
*
* Generator for source/i18n/collunsafe.h
* see Makefile
*/
#include <stdio.h>
#include "unicode/uversion.h"
#include "unicode/uniset.h"
#include "collationroot.h"
#include "collationtailoring.h"
/**
* Define the type of generator to use. Choose one.
*/
#define SERIALIZE 1 //< Default: use UnicodeSet.serialize() and a new internal c'tor
#define RANGES 0 //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp)
#define PATTERN 0 //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp)
int main(int argc, const char *argv[]) {
UErrorCode errorCode = U_ZERO_ERROR;
// Get the unsafeBackwardsSet
const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode));
return 1;
}
const UVersionInfo &version = rootEntry->tailoring->version;
const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
char verString[20];
u_versionToString(version, verString);
fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString);
int32_t rangeCount = unsafeBackwardSet->getRangeCount();
#if SERIALIZE
fprintf(stderr, ".. serializing\n");
// UnicodeSet serialization
UErrorCode preflightCode = U_ZERO_ERROR;
// preflight
int32_t serializedCount = unsafeBackwardSet->serialize(nullptr,0,preflightCode);
if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) {
fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode));
return 1;
}
uint16_t *serializedData = new uint16_t[serializedCount];
// serialize
unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode);
if(U_FAILURE(errorCode)) {
delete [] serializedData;
fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode));
return 1;
}
#endif
#if PATTERN
fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n");
// attempt to use pattern
UnicodeString pattern;
UnicodeSet set(*unsafeBackwardSet);
set.compact();
set.toPattern(pattern, false);
if(U_SUCCESS(errorCode)) {
// This fails (bug# ?) - which is why this method was abandoned.
// UnicodeSet usA(pattern, errorCode);
// fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode));
// return 1;
}
const char16_t *buf = pattern.getBuffer();
int32_t needed = pattern.length();
// print
{
char buf2[2048];
int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8");
buf2[len2]=0;
fprintf(stderr,"===\n%s\n===\n", buf2);
}
const UnicodeString unsafeBackwardPattern(false, buf, needed);
if(U_SUCCESS(errorCode)) {
//UnicodeSet us(unsafeBackwardPattern, errorCode);
// fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
} else {
fprintf(stderr, "Uset OK - \n");
}
#endif
// Generate the output file.
printf("// collunsafe.h\n");
printf("// %s\n", U_COPYRIGHT_STRING);
printf("\n");
printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n");
printf("// Machine generated, do not edit.\n");
printf("\n");
printf("#ifndef COLLUNSAFE_H\n"
"#define COLLUNSAFE_H\n"
"\n"
"#include \"unicode/utypes.h\"\n"
"\n"
"#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n");
printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString);
#if PATTERN
printf("#define COLLUNSAFE_PATTERN 1\n");
printf("static const int32_t collunsafe_len = %d;\n", needed);
printf("static const char16_t collunsafe_pattern[collunsafe_len] = {\n");
for(int i=0;i<needed;i++) {
if( (i>0) && (i%8 == 0) ) {
printf(" // %d\n", i);
}
printf("0x%04X", buf[i]); // TODO check
if(i != (needed-1)) {
printf(", ");
}
}
printf(" //%d\n};\n", (needed-1));
#endif
#if RANGE
fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n");
printf("#define COLLUNSAFE_RANGE 1\n");
printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount);
printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2);
for(int32_t i=0;i<rangeCount;i++) {
printf(" 0x%04X, 0x%04X, // %d\n",
unsafeBackwardSet->getRangeStart(i),
unsafeBackwardSet->getRangeEnd(i),
i);
}
printf("};\n");
#endif
#if SERIALIZE
printf("#define COLLUNSAFE_SERIALIZE 1\n");
printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount);
printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount);
for(int32_t i=0;i<serializedCount;i++) {
if( (i>0) && (i%8 == 0) ) {
printf(" // %d\n", i);
}
printf("0x%04X", serializedData[i]); // TODO check
if(i != (serializedCount-1)) {
printf(", ");
}
}
printf("};\n");
#endif
printf("#endif\n");
fflush(stderr);
fflush(stdout);
return(U_SUCCESS(errorCode)?0:1);
}
|