summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/tools/gencolusb
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/tools/gencolusb')
-rw-r--r--intl/icu/source/tools/gencolusb/Makefile45
-rw-r--r--intl/icu/source/tools/gencolusb/README.md10
-rw-r--r--intl/icu/source/tools/gencolusb/extract_unsafe_backwards.cpp168
-rw-r--r--intl/icu/source/tools/gencolusb/verify_uset.cpp71
4 files changed, 294 insertions, 0 deletions
diff --git a/intl/icu/source/tools/gencolusb/Makefile b/intl/icu/source/tools/gencolusb/Makefile
new file mode 100644
index 0000000000..be13b5b106
--- /dev/null
+++ b/intl/icu/source/tools/gencolusb/Makefile
@@ -0,0 +1,45 @@
+## Makefile for rebuilding 'unsafe backward' data
+## Copyright (C) 2016 and later: Unicode, Inc. and others.
+## License & terms of use: http://www.unicode.org/copyright.html
+## Copyright (c) 2015, International Business Machines Corporation and
+## others. All Rights Reserved.
+
+##
+## CONFIGURATION:
+## 1. create Makefile.local containing overrides if necessary:
+## BUILD_ROOT=/home/user/icu-build (location of 'config.status' etc.)
+## PATH_VAR=DYLD_LIBRARY_PATH (if on OSX etc)
+##
+## UPDATING
+## 1. make 'reset-icu' will reset ICU to 'bootstrap' state, zeroing out source/i18n/collunsafe.h
+## 2. make 'gen-file' will generate and test source/i18n/collunsafe.h
+
+subdir=tools/gencolusb
+srcdir=$(shell pwd)
+SOURCE_ROOT=$(shell cd ../.. ; pwd)
+BUILD_ROOT=$(SOURCE_ROOT)
+BUILD_HERE=$(BUILD_ROOT)/$(subdir)
+TOOL=extract_unsafe_backwards
+TEST=verify_uset
+PATH_VAR=LD_LIBRARY_PATH
+
+-include Makefile.local
+
+GEN_FILE=$(SOURCE_ROOT)/i18n/collunsafe.h
+BUILD_OPTS=-I$(SOURCE_ROOT)/common -I$(SOURCE_ROOT)/i18n -L$(BUILD_ROOT)/lib -licuuc -licui18n -licudata
+RUN_OPTS=env $(PATH_VAR)=$(BUILD_ROOT)/lib
+
+reset-icu:
+ >$(GEN_FILE)
+ $(MAKE) -C $(BUILD_ROOT)/i18n
+
+gen-file: reset-icu
+ mkdir -p $(BUILD_HERE)
+ $(CXX) -o $(BUILD_HERE)/$(TOOL) $(srcdir)/$(TOOL).cpp $(BUILD_OPTS)
+ $(RUN_OPTS) $(BUILD_HERE)/$(TOOL) > $(GEN_FILE) || exit 1
+ $(CXX) -o $(BUILD_HERE)/$(TEST) $(srcdir)/$(TEST).cpp $(BUILD_OPTS)
+ $(RUN_OPTS) $(BUILD_HERE)/$(TEST) || exit 1
+ $(MAKE) -C $(BUILD_ROOT)/i18n
+ $(RUN_OPTS) $(BUILD_HERE)/$(TEST) || exit 1
+
+.PHONY: reset-icu gen-file
diff --git a/intl/icu/source/tools/gencolusb/README.md b/intl/icu/source/tools/gencolusb/README.md
new file mode 100644
index 0000000000..b0d9bae091
--- /dev/null
+++ b/intl/icu/source/tools/gencolusb/README.md
@@ -0,0 +1,10 @@
+Unsafe-Backward Collator Data
+===
+
+This directory contains tools to build the `source/i18n/collunsafe.h`
+precomputed data.
+
+See [Makefile](./Makefile) for more details.
+
+* Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html
+* Copyright (c) 2015, International Business Machines Corporation and others. All Rights Reserved.
diff --git a/intl/icu/source/tools/gencolusb/extract_unsafe_backwards.cpp b/intl/icu/source/tools/gencolusb/extract_unsafe_backwards.cpp
new file mode 100644
index 0000000000..ee12e69f9b
--- /dev/null
+++ b/intl/icu/source/tools/gencolusb/extract_unsafe_backwards.cpp
@@ -0,0 +1,168 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ * Copyright (c) 1999-2016, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *
+ * Generator for source/i18n/collunsafe.h
+ * see Makefile
+ */
+
+#include <stdio.h>
+#include "unicode/uversion.h"
+#include "unicode/uniset.h"
+#include "collationroot.h"
+#include "collationtailoring.h"
+
+/**
+ * Define the type of generator to use. Choose one.
+ */
+#define SERIALIZE 1 //< Default: use UnicodeSet.serialize() and a new internal c'tor
+#define RANGES 0 //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp)
+#define PATTERN 0 //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp)
+
+int main(int argc, const char *argv[]) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+
+ // Get the unsafeBackwardsSet
+ const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode));
+ return 1;
+ }
+ const UVersionInfo &version = rootEntry->tailoring->version;
+ const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
+ char verString[20];
+ u_versionToString(version, verString);
+ fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString);
+ int32_t rangeCount = unsafeBackwardSet->getRangeCount();
+
+#if SERIALIZE
+ fprintf(stderr, ".. serializing\n");
+ // UnicodeSet serialization
+
+ UErrorCode preflightCode = U_ZERO_ERROR;
+ // preflight
+ int32_t serializedCount = unsafeBackwardSet->serialize(nullptr,0,preflightCode);
+ if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) {
+ fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode));
+ return 1;
+ }
+ uint16_t *serializedData = new uint16_t[serializedCount];
+ // serialize
+ unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode);
+ if(U_FAILURE(errorCode)) {
+ delete [] serializedData;
+ fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode));
+ return 1;
+ }
+#endif
+
+#if PATTERN
+ fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n");
+ // attempt to use pattern
+
+ UnicodeString pattern;
+ UnicodeSet set(*unsafeBackwardSet);
+ set.compact();
+ set.toPattern(pattern, false);
+
+ if(U_SUCCESS(errorCode)) {
+ // This fails (bug# ?) - which is why this method was abandoned.
+
+ // UnicodeSet usA(pattern, errorCode);
+ // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode));
+ // return 1;
+ }
+
+
+ const char16_t *buf = pattern.getBuffer();
+ int32_t needed = pattern.length();
+
+ // print
+ {
+ char buf2[2048];
+ int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8");
+ buf2[len2]=0;
+ fprintf(stderr,"===\n%s\n===\n", buf2);
+ }
+
+ const UnicodeString unsafeBackwardPattern(false, buf, needed);
+ if(U_SUCCESS(errorCode)) {
+ //UnicodeSet us(unsafeBackwardPattern, errorCode);
+ // fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
+ } else {
+ fprintf(stderr, "Uset OK - \n");
+ }
+#endif
+
+
+ // Generate the output file.
+
+ printf("// collunsafe.h\n");
+ printf("// %s\n", U_COPYRIGHT_STRING);
+ printf("\n");
+ printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n");
+ printf("// Machine generated, do not edit.\n");
+ printf("\n");
+ printf("#ifndef COLLUNSAFE_H\n"
+ "#define COLLUNSAFE_H\n"
+ "\n"
+ "#include \"unicode/utypes.h\"\n"
+ "\n"
+ "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n");
+ printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString);
+
+
+
+#if PATTERN
+ printf("#define COLLUNSAFE_PATTERN 1\n");
+ printf("static const int32_t collunsafe_len = %d;\n", needed);
+ printf("static const char16_t collunsafe_pattern[collunsafe_len] = {\n");
+ for(int i=0;i<needed;i++) {
+ if( (i>0) && (i%8 == 0) ) {
+ printf(" // %d\n", i);
+ }
+ printf("0x%04X", buf[i]); // TODO check
+ if(i != (needed-1)) {
+ printf(", ");
+ }
+ }
+ printf(" //%d\n};\n", (needed-1));
+#endif
+
+#if RANGE
+ fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n");
+ printf("#define COLLUNSAFE_RANGE 1\n");
+ printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount);
+ printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2);
+ for(int32_t i=0;i<rangeCount;i++) {
+ printf(" 0x%04X, 0x%04X, // %d\n",
+ unsafeBackwardSet->getRangeStart(i),
+ unsafeBackwardSet->getRangeEnd(i),
+ i);
+ }
+ printf("};\n");
+#endif
+
+#if SERIALIZE
+ printf("#define COLLUNSAFE_SERIALIZE 1\n");
+ printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount);
+ printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount);
+ for(int32_t i=0;i<serializedCount;i++) {
+ if( (i>0) && (i%8 == 0) ) {
+ printf(" // %d\n", i);
+ }
+ printf("0x%04X", serializedData[i]); // TODO check
+ if(i != (serializedCount-1)) {
+ printf(", ");
+ }
+ }
+ printf("};\n");
+#endif
+
+ printf("#endif\n");
+ fflush(stderr);
+ fflush(stdout);
+ return(U_SUCCESS(errorCode)?0:1);
+}
diff --git a/intl/icu/source/tools/gencolusb/verify_uset.cpp b/intl/icu/source/tools/gencolusb/verify_uset.cpp
new file mode 100644
index 0000000000..03a4930489
--- /dev/null
+++ b/intl/icu/source/tools/gencolusb/verify_uset.cpp
@@ -0,0 +1,71 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/**
+ * Copyright (c) 1999-2012, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *
+ * Test for source/i18n/collunsafe.h
+ */
+
+#include <stdio.h>
+#include "unicode/ucol.h"
+#include "unicode/uniset.h"
+#include "unicode/coll.h"
+#include "collation.h"
+
+#include "collunsafe.h"
+
+
+int main(int argc, const char *argv[]) {
+ puts("verify");
+ UErrorCode errorCode = U_ZERO_ERROR;
+#if defined (COLLUNSAFE_PATTERN)
+ puts("verify pattern");
+ const UnicodeString unsafeBackwardPattern(false, collunsafe_pattern, collunsafe_len);
+ fprintf(stderr, "\n -- pat '%c%c%c%c%c'\n",
+ collunsafe_pattern[0],
+ collunsafe_pattern[1],
+ collunsafe_pattern[2],
+ collunsafe_pattern[3],
+ collunsafe_pattern[4]);
+ if(U_SUCCESS(errorCode)) {
+ UnicodeSet us(unsafeBackwardPattern, errorCode);
+ fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
+ }
+#endif
+
+#if defined (COLLUNSAFE_RANGE)
+ {
+ puts("verify range");
+ UnicodeSet u;
+ for(int32_t i=0;i<unsafe_rangeCount*2;i+=2) {
+ u.add(unsafe_ranges[i+0],unsafe_ranges[i+1]);
+ }
+ printf("Finished with %d ranges\n", u.getRangeCount());
+ }
+#endif
+
+#if defined (COLLUNSAFE_SERIALIZE)
+ {
+ puts("verify serialize");
+ UnicodeSet u(unsafe_serializedData, unsafe_serializedCount, UnicodeSet::kSerialized, errorCode);
+ fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
+ printf("Finished deserialize with %d ranges\n", u.getRangeCount());
+ }
+#endif
+// if(tailoring.unsafeBackwardSet == nullptr) {
+ // errorCode = U_MEMORY_ALLOCATION_ERROR;
+ // fprintf(stderr, "\n%s:%d: err %s\n", __FILE__, __LINE__, u_errorName(errorCode));
+ // }
+ puts("verify col UCA");
+ if(U_SUCCESS(errorCode)) {
+ Collator *col = Collator::createInstance(Locale::getEnglish(), errorCode);
+ fprintf(stderr, "\n%s:%d: err %s creating collator\n", __FILE__, __LINE__, u_errorName(errorCode));
+ }
+
+ if(U_FAILURE(errorCode)) {
+ return 1;
+ } else {
+ return 0;
+ }
+}