summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/tools/gencolusb/extract_unsafe_backwards.cpp
blob: ee12e69f9b39c879048153eebdae973c4e4bcb5d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/**
 * Copyright (c) 1999-2016, International Business Machines Corporation and
 * others. All Rights Reserved.
 *
 * Generator for source/i18n/collunsafe.h
 * see Makefile
 */

#include <stdio.h>
#include "unicode/uversion.h"
#include "unicode/uniset.h"
#include "collationroot.h"
#include "collationtailoring.h"

/**
 * Define the type of generator to use. Choose one.
 */
#define SERIALIZE 1   //< Default: use UnicodeSet.serialize() and a new internal c'tor
#define RANGES 0      //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp)
#define PATTERN 0     //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp)

int main(int argc, const char *argv[]) {
    UErrorCode errorCode = U_ZERO_ERROR;

    // Get the unsafeBackwardsSet
    const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
    if(U_FAILURE(errorCode)) {
      fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode));
      return 1;
    }
    const UVersionInfo &version = rootEntry->tailoring->version;
    const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
    char verString[20];
    u_versionToString(version, verString);
    fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString);
    int32_t rangeCount = unsafeBackwardSet->getRangeCount();
    
#if SERIALIZE
    fprintf(stderr, ".. serializing\n");
    // UnicodeSet serialization
    
    UErrorCode preflightCode = U_ZERO_ERROR;
    // preflight
    int32_t serializedCount = unsafeBackwardSet->serialize(nullptr,0,preflightCode);
    if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) {
      fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode));
      return 1;
    }
    uint16_t *serializedData = new uint16_t[serializedCount];
    // serialize
    unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode);
    if(U_FAILURE(errorCode)) {
      delete [] serializedData;
      fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode));
      return 1;
    }
#endif
    
#if PATTERN
    fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n");
    // attempt to use pattern
    
    UnicodeString pattern;
    UnicodeSet set(*unsafeBackwardSet);
    set.compact();
    set.toPattern(pattern, false);

    if(U_SUCCESS(errorCode)) {
      // This fails (bug# ?) - which is why this method was abandoned.
      
      // UnicodeSet usA(pattern, errorCode);
      // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode));
      // return 1;
    }


    const char16_t *buf = pattern.getBuffer();
    int32_t needed = pattern.length();

    // print
    {
      char buf2[2048];
      int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8");
      buf2[len2]=0;
      fprintf(stderr,"===\n%s\n===\n", buf2);
    }

    const UnicodeString unsafeBackwardPattern(false, buf, needed);
  if(U_SUCCESS(errorCode)) {
    //UnicodeSet us(unsafeBackwardPattern, errorCode);
    //    fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
  } else {
    fprintf(stderr, "Uset OK - \n");
  }
#endif


  // Generate the output file.

  printf("// collunsafe.h\n");
  printf("// %s\n", U_COPYRIGHT_STRING);
  printf("\n");
  printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n");
  printf("// Machine generated, do not edit.\n");
  printf("\n");
  printf("#ifndef COLLUNSAFE_H\n"
         "#define COLLUNSAFE_H\n"
         "\n"
         "#include \"unicode/utypes.h\"\n"
         "\n"
         "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n");
  printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString);

  
  
#if PATTERN
  printf("#define COLLUNSAFE_PATTERN 1\n");
  printf("static const int32_t collunsafe_len = %d;\n", needed);
  printf("static const char16_t collunsafe_pattern[collunsafe_len] = {\n");
  for(int i=0;i<needed;i++) {
    if( (i>0) && (i%8 == 0) ) {
      printf(" // %d\n", i);
    }
    printf("0x%04X", buf[i]); // TODO check
    if(i != (needed-1)) {
      printf(", ");
    }
    }
  printf(" //%d\n};\n", (needed-1));
#endif

#if RANGE
    fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n");
    printf("#define COLLUNSAFE_RANGE 1\n");
    printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount);
    printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2);
    for(int32_t i=0;i<rangeCount;i++) {
      printf(" 0x%04X, 0x%04X, // %d\n",
             unsafeBackwardSet->getRangeStart(i),
             unsafeBackwardSet->getRangeEnd(i),
             i);
    }
    printf("};\n");
#endif

#if SERIALIZE
    printf("#define COLLUNSAFE_SERIALIZE 1\n");    
    printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount);
    printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount);
    for(int32_t i=0;i<serializedCount;i++) {
      if( (i>0) && (i%8 == 0) ) {
        printf(" // %d\n", i);
      }
      printf("0x%04X", serializedData[i]); // TODO check
      if(i != (serializedCount-1)) {
        printf(", ");
      }
    }  
    printf("};\n");
#endif
    
    printf("#endif\n");
    fflush(stderr);
    fflush(stdout);
    return(U_SUCCESS(errorCode)?0:1);
}