summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/tools/genrb
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /intl/icu/source/tools/genrb
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'intl/icu/source/tools/genrb')
-rw-r--r--intl/icu/source/tools/genrb/Makefile.in114
-rw-r--r--intl/icu/source/tools/genrb/derb.1.in198
-rw-r--r--intl/icu/source/tools/genrb/derb.cpp657
-rw-r--r--intl/icu/source/tools/genrb/derb.vcxproj80
-rw-r--r--intl/icu/source/tools/genrb/derb.vcxproj.filters22
-rw-r--r--intl/icu/source/tools/genrb/errmsg.c75
-rw-r--r--intl/icu/source/tools/genrb/errmsg.h46
-rw-r--r--intl/icu/source/tools/genrb/filterrb.cpp239
-rw-r--r--intl/icu/source/tools/genrb/filterrb.h180
-rw-r--r--intl/icu/source/tools/genrb/genrb.1.in148
-rw-r--r--intl/icu/source/tools/genrb/genrb.cpp869
-rw-r--r--intl/icu/source/tools/genrb/genrb.h52
-rw-r--r--intl/icu/source/tools/genrb/genrb.vcxproj113
-rw-r--r--intl/icu/source/tools/genrb/genrb.vcxproj.filters87
-rw-r--r--intl/icu/source/tools/genrb/parse.cpp2435
-rw-r--r--intl/icu/source/tools/genrb/parse.h38
-rw-r--r--intl/icu/source/tools/genrb/prscmnts.cpp248
-rw-r--r--intl/icu/source/tools/genrb/prscmnts.h66
-rw-r--r--intl/icu/source/tools/genrb/rbutil.c119
-rw-r--r--intl/icu/source/tools/genrb/rbutil.h33
-rw-r--r--intl/icu/source/tools/genrb/read.c479
-rw-r--r--intl/icu/source/tools/genrb/read.h54
-rw-r--r--intl/icu/source/tools/genrb/reslist.cpp1794
-rw-r--r--intl/icu/source/tools/genrb/reslist.h446
-rw-r--r--intl/icu/source/tools/genrb/rle.c408
-rw-r--r--intl/icu/source/tools/genrb/rle.h74
-rw-r--r--intl/icu/source/tools/genrb/sources.txt12
-rw-r--r--intl/icu/source/tools/genrb/ustr.c219
-rw-r--r--intl/icu/source/tools/genrb/ustr.h81
-rw-r--r--intl/icu/source/tools/genrb/wrtjava.cpp701
-rw-r--r--intl/icu/source/tools/genrb/wrtxml.cpp1213
31 files changed, 11300 insertions, 0 deletions
diff --git a/intl/icu/source/tools/genrb/Makefile.in b/intl/icu/source/tools/genrb/Makefile.in
new file mode 100644
index 0000000000..336d839448
--- /dev/null
+++ b/intl/icu/source/tools/genrb/Makefile.in
@@ -0,0 +1,114 @@
+#################################################################################
+## Makefile.in for ICU - tools/genrb #
+## Copyright (C) 2016 and later: Unicode, Inc. and others. #
+## License & terms of use: http://www.unicode.org/copyright.html #
+## Copyright (c) 1999-2014, International Business Machines Corporation and #
+## others. All Rights Reserved. #
+#################################################################################
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../..
+
+include $(top_builddir)/icudefs.mk
+
+## Build directory information
+subdir = tools/genrb
+
+TARGET_STUB_NAME = genrb
+DERB_STUB_NAME = derb
+
+SECTION = 1
+
+MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
+@ICUIO_TRUE@MAN_FILES += $(DERB_STUB_NAME).$(SECTION)
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(MAN_FILES) $(DEPS) $(DERB_DEPS)
+
+## Target information
+TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
+# derb depends on icuio
+@ICUIO_TRUE@DERB = $(BINDIR)/$(DERB_STUB_NAME)$(EXEEXT)
+
+CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(srcdir)/../toolutil -I$(top_srcdir)/io
+CPPFLAGS += -DUNISTR_FROM_CHAR_EXPLICIT=explicit -DUNISTR_FROM_STRING_EXPLICIT=explicit
+LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
+
+SOURCES = $(shell cat $(srcdir)/sources.txt)
+OBJECTS = $(patsubst %.cpp,%.o,$(patsubst %.c,%.o, $(SOURCES)))
+DERB_SOURCES = derb.cpp
+DERB_OBJ = $(DERB_SOURCES:.cpp=.o)
+
+DEPS = $(OBJECTS:.o=.d)
+DERB_DEPS = $(DERB_OBJ:.o=.d)
+
+-include Makefile.local
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local \
+distclean distclean-local dist dist-local check check-local install-man
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(TARGET) $(DERB) $(MAN_FILES)
+
+install-local: all-local install-man
+ $(MKINSTALLDIRS) $(DESTDIR)$(bindir)
+ $(INSTALL) $(TARGET) $(DESTDIR)$(bindir)
+@ICUIO_TRUE@ $(INSTALL) $(DERB) $(DESTDIR)$(bindir)
+
+install-man: $(MAN_FILES)
+ $(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
+ $(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
+
+dist-local:
+
+clean-local:
+ test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+ $(RMV) $(TARGET) $(DERB) $(OBJECTS) $(DERB_OBJ)
+
+distclean-local: clean-local
+ $(RMV) Makefile
+
+check-local: all-local
+
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) \
+ && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(TARGET) : $(OBJECTS)
+ $(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
+ $(POST_BUILD_STEP)
+
+$(DERB) : $(DERB_OBJ)
+ $(LINK.cc) $(OUTOPT)$@ $^ $(LIBICUIO) $(LIBS)
+ $(POST_BUILD_STEP)
+
+# This line is needed to serialize builds when the gmake -j option is used.
+$(TARGET_STUB_NAME).$(SECTION): $(DERB_STUB_NAME).$(SECTION)
+
+%.$(SECTION): $(srcdir)/%.$(SECTION).in
+ cd $(top_builddir) \
+ && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+-include $(DERB_DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
diff --git a/intl/icu/source/tools/genrb/derb.1.in b/intl/icu/source/tools/genrb/derb.1.in
new file mode 100644
index 0000000000..725b571ce2
--- /dev/null
+++ b/intl/icu/source/tools/genrb/derb.1.in
@@ -0,0 +1,198 @@
+.\" Hey, Emacs! This is -*-nroff-*- you know...
+.\"
+.\" derb.1: manual page for the derb utility
+.\"
+.\" Copyright (C) 2016 and later: Unicode, Inc. and others.
+.\" License & terms of use: http://www.unicode.org/copyright.html
+.\" Copyright (C) 2000-2014 IBM, Inc. and others.
+.\"
+.TH DERB 1 "7 Mar 2014" "ICU MANPAGE" "ICU @VERSION@ Manual"
+.SH NAME
+.B derb
+\- disassemble a resource bundle
+.SH SYNOPSIS
+.B derb
+[
+.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
+]
+[
+.BR "\-V\fP, \fB\-\-version"
+]
+[
+.BR "\-v\fP, \fB\-\-verbose"
+]
+[
+.BI "\-e\fP, \fB\-\-encoding" " encoding"
+]
+[
+.BI "\-\-bom"
+]
+[
+.BI "\-t\fP, \fB\-\-truncate" " \fR[ \fPsize\fR ]\fP"
+]
+[
+.BI "\-s\fP, \fB\-\-sourcedir" " source"
+]
+[
+.BI "\-d\fP, \fB\-\-destdir" " destination"
+]
+[
+.BI "\-i\fP, \fB\-\-icudatadir" " directory"
+]
+[
+.BI "\-c\fP, \fB\-\-to\-stdout"
+]
+.IR bundle " \.\.\."
+.SH DESCRIPTION
+.B derb
+reads the compiled resource
+.I bundle
+files passed on the command line and write them back in text form.
+The resulting text files have a
+.B .txt
+extension while compiled resource bundle source files typically have a
+.B .res
+extension.
+.PP
+It is customary to name the resource bundles by their locale name,
+i.e. to use a local identifier for the
+.I bundle
+filename, e.g.
+.B ja_JP.res
+for Japanese (Japan) data, or
+.B root.res
+for the root bundle.
+This is especially important for
+.B derb
+since the locale name is not accessible directly from the compiled
+resource bundle, and to know which locale to ask for when opening
+the bundle.
+.B derb
+will produce a file whose base name is the base name of the compiled resource file itself.
+If the
+.BI "\-\-to\-stdout\fP, \fB\-c\fP"
+option is used, however, the text will be written on the standard output.
+.SH OPTIONS
+.TP
+.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
+Print help about usage and exit.
+.TP
+.BR "\-V\fP, \fB\-\-version"
+Print the version of
+.B derb
+and exit.
+.TP
+.BR "\-v\fP, \fB\-\-verbose"
+Display extra informative messages during execution.
+.TP
+.BR "\-A\fP, \fB\-\-suppressAliases"
+Don't follow aliases when producing output.
+.TP
+.BI "\-e\fP, \fB\-\-encoding" " encoding"
+Set the encoding used to write output files to
+.IR encoding .
+The default encoding is the invariant (subset of ASCII or EBCDIC)
+codepage for the system (see section
+.BR "INVARIANT CHARACTERS" ).
+The choice of the encoding does not affect the data, just their
+representation. Characters that cannot be represented in the
+.I encoding
+will be represented using
+.BI \eu "hhhh"
+escape sequences.
+.TP
+.BI "\-\-bom"
+Write a byte order mark (BOM) at the beginning of the file.
+.TP
+.BI "\-l\fP, \fB\-\-locale" " locale"
+Set the
+.I locale
+for the resource bundle, which is used both in the generated text and
+as the base name of the output file.
+.TP
+.BI "\-t\fP, \fB\-\-truncate" " \fR[ \fPsize\fR ]\fP"
+Truncate individual resources (strings or binary data) to
+.I size
+bytes. The default if
+.I size
+is not specified is
+.B 80
+bytes.
+.TP
+.BI "\-s\fP, \fB\-\-sourcedir" " source"
+Set the source directory to
+.IR source .
+The default source directory is the current directory.
+If
+.B -
+is passed for
+.IR source ,
+then the
+.I bundle
+will be looked for in its default location, specified by
+the
+.B ICU_DATA
+environment variable (or defaulting to
+the location set when ICU was built if
+.B ICU_DATA
+is not set).
+.TP
+.BI "\-d\fP, \fB\-\-destdir" " destination"
+Set the destination directory to
+.IR destination .
+The default destination directory is specified by the environment variable
+.BR ICU_DATA
+or is the location set when ICU was built if
+.B ICU_DATA
+is not set.
+.TP
+.BI "\-i\fP, \fB\-\-icudatadir" " directory"
+Look for any necessary ICU data files in
+.IR directory .
+For example, when processing collation overrides, the file
+.B ucadata.dat
+must be located.
+The default ICU data directory is specified by the environment variable
+.BR ICU_DATA .
+.TP
+.BI "\-c\fP, \fB\-\-to\-stdout"
+Write the disassembled
+.I bundle
+on standard output instead of into a file.
+.SH CAVEATS
+When the option
+.BI \-\-bom
+is used, the character
+.B U+FEFF
+is written in the destination
+.I encoding
+regardless of whether it is a Unicode transformation format (UTF) or not.
+This option should only be used with an UTF encoding, as byte order marks
+are not meaningful for other encodings.
+.SH INVARIANT CHARACTERS
+The
+.B invariant character set
+consists of the following set of characters, expressed as a standard POSIX
+regular expression:
+.BR "[a-z]|[A-Z]|[0-9]|_| |+|-|*|/" .
+This is the set which is guaranteed to be available regardless of code page.
+.SH ENVIRONMENT
+.TP 10
+.B ICU_DATA
+Specifies the directory containing ICU data. Defaults to
+.BR @thepkgicudatadir@/@PACKAGE@/@VERSION@/ .
+Some tools in ICU depend on the presence of the trailing slash. It is thus
+important to make sure that it is present if
+.B ICU_DATA
+is set.
+.SH AUTHORS
+Vladimir Weinstein
+.br
+Yves Arrouye
+.SH VERSION
+1.0
+.SH COPYRIGHT
+Copyright (C) 2002 IBM, Inc. and others.
+.SH SEE ALSO
+.BR genrb (1)
+
diff --git a/intl/icu/source/tools/genrb/derb.cpp b/intl/icu/source/tools/genrb/derb.cpp
new file mode 100644
index 0000000000..3b28289569
--- /dev/null
+++ b/intl/icu/source/tools/genrb/derb.cpp
@@ -0,0 +1,657 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: derb.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2000sep6
+* created by: Vladimir Weinstein as an ICU workshop example
+* maintained by: Yves Arrouye <yves@realnames.com>
+*/
+
+#include "unicode/stringpiece.h"
+#include "unicode/ucnv.h"
+#include "unicode/unistr.h"
+#include "unicode/ustring.h"
+#include "unicode/putil.h"
+#include "unicode/ustdio.h"
+
+#include "charstr.h"
+#include "uresimp.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uoptions.h"
+#include "toolutil.h"
+#include "ustrfmt.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#define DERB_VERSION "1.1"
+
+#define DERB_DEFAULT_TRUNC 80
+
+static const int32_t indentsize = 4;
+static int32_t truncsize = DERB_DEFAULT_TRUNC;
+static UBool opt_truncate = false;
+
+static const char *getEncodingName(const char *encoding);
+static void reportError(const char *pname, UErrorCode *status, const char *when);
+static char16_t *quotedString(const char16_t *string);
+static void printOutBundle(UFILE *out, UResourceBundle *resource, int32_t indent, const char *pname, UErrorCode *status);
+static void printString(UFILE *out, const char16_t *str, int32_t len);
+static void printCString(UFILE *out, const char *str, int32_t len);
+static void printIndent(UFILE *out, int32_t indent);
+static void printHex(UFILE *out, uint8_t what);
+
+static UOption options[]={
+ UOPTION_HELP_H,
+ UOPTION_HELP_QUESTION_MARK,
+/* 2 */ UOPTION_ENCODING,
+/* 3 */ { "to-stdout", nullptr, nullptr, nullptr, 'c', UOPT_NO_ARG, 0 } ,
+/* 4 */ { "truncate", nullptr, nullptr, nullptr, 't', UOPT_OPTIONAL_ARG, 0 },
+/* 5 */ UOPTION_VERBOSE,
+/* 6 */ UOPTION_DESTDIR,
+/* 7 */ UOPTION_SOURCEDIR,
+/* 8 */ { "bom", nullptr, nullptr, nullptr, 0, UOPT_NO_ARG, 0 },
+/* 9 */ UOPTION_ICUDATADIR,
+/* 10 */ UOPTION_VERSION,
+/* 11 */ { "suppressAliases", nullptr, nullptr, nullptr, 'A', UOPT_NO_ARG, 0 },
+};
+
+static UBool verbose = false;
+static UBool suppressAliases = false;
+static UFILE *ustderr = nullptr;
+
+extern int
+main(int argc, char* argv[]) {
+ const char *encoding = nullptr;
+ const char *outputDir = nullptr; /* nullptr = no output directory, use current */
+ const char *inputDir = ".";
+ int tostdout = 0;
+ int prbom = 0;
+
+ const char *pname;
+
+ UResourceBundle *bundle = nullptr;
+ int32_t i = 0;
+
+ const char* arg;
+
+ /* Get the name of tool. */
+ pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
+#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
+ if (!pname) {
+ pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR);
+ }
+#endif
+ if (!pname) {
+ pname = *argv;
+ } else {
+ ++pname;
+ }
+
+ /* error handling, printing usage message */
+ argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
+
+ /* error handling, printing usage message */
+ if(argc<0) {
+ fprintf(stderr,
+ "%s: error in command line argument \"%s\"\n", pname,
+ argv[-argc]);
+ }
+ if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
+ fprintf(argc < 0 ? stderr : stdout,
+ "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n"
+ " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n"
+ " [ -t, --truncate [ size ] ]\n"
+ " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n"
+ " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n"
+ " [ -A, --suppressAliases]\n"
+ " bundle ...\n", argc < 0 ? 'u' : 'U',
+ pname);
+ return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
+ }
+
+ if(options[10].doesOccur) {
+ fprintf(stderr,
+ "%s version %s (ICU version %s).\n"
+ "%s\n",
+ pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING);
+ return U_ZERO_ERROR;
+ }
+ if(options[2].doesOccur) {
+ encoding = options[2].value;
+ }
+
+ if (options[3].doesOccur) {
+ if(options[2].doesOccur) {
+ fprintf(stderr, "%s: Error: don't specify an encoding (-e) when writing to stdout (-c).\n", pname);
+ return 3;
+ }
+ tostdout = 1;
+ }
+
+ if(options[4].doesOccur) {
+ opt_truncate = true;
+ if(options[4].value != nullptr) {
+ truncsize = atoi(options[4].value); /* user defined printable size */
+ } else {
+ truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */
+ }
+ } else {
+ opt_truncate = false;
+ }
+
+ if(options[5].doesOccur) {
+ verbose = true;
+ }
+
+ if (options[6].doesOccur) {
+ outputDir = options[6].value;
+ }
+
+ if(options[7].doesOccur) {
+ inputDir = options[7].value; /* we'll use users resources */
+ }
+
+ if (options[8].doesOccur) {
+ prbom = 1;
+ }
+
+ if (options[9].doesOccur) {
+ u_setDataDirectory(options[9].value);
+ }
+
+ if (options[11].doesOccur) {
+ suppressAliases = true;
+ }
+
+ fflush(stderr); // use ustderr now.
+ ustderr = u_finit(stderr, nullptr, nullptr);
+
+ for (i = 1; i < argc; ++i) {
+ static const char16_t sp[] = { 0x0020 }; /* " " */
+
+ arg = getLongPathname(argv[i]);
+
+ if (verbose) {
+ u_fprintf(ustderr, "processing bundle \"%s\"\n", argv[i]);
+ }
+
+ icu::CharString locale;
+ UErrorCode status = U_ZERO_ERROR;
+ {
+ const char *p = findBasename(arg);
+ const char *q = uprv_strrchr(p, '.');
+ if (q == nullptr) {
+ locale.append(p, status);
+ } else {
+ locale.append(p, (int32_t)(q - p), status);
+ }
+ }
+ if (U_FAILURE(status)) {
+ return status;
+ }
+
+ icu::CharString infile;
+ const char *thename = nullptr;
+ UBool fromICUData = !uprv_strcmp(inputDir, "-");
+ if (!fromICUData) {
+ UBool absfilename = *arg == U_FILE_SEP_CHAR;
+#if U_PLATFORM_HAS_WIN32_API
+ if (!absfilename) {
+ absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0])
+ && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR);
+ }
+#endif
+ if (absfilename) {
+ thename = arg;
+ } else {
+ const char *q = uprv_strrchr(arg, U_FILE_SEP_CHAR);
+#if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
+ if (q == nullptr) {
+ q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
+ }
+#endif
+ infile.append(inputDir, status);
+ if(q != nullptr) {
+ infile.appendPathPart(icu::StringPiece(arg, (int32_t)(q - arg)), status);
+ }
+ if (U_FAILURE(status)) {
+ return status;
+ }
+ thename = infile.data();
+ }
+ }
+ if (thename) {
+ bundle = ures_openDirect(thename, locale.data(), &status);
+ } else {
+ bundle = ures_open(fromICUData ? 0 : inputDir, locale.data(), &status);
+ }
+ if (U_SUCCESS(status)) {
+ UFILE *out = nullptr;
+
+ const char *filename = 0;
+ const char *ext = 0;
+
+ if (locale.isEmpty() || !tostdout) {
+ filename = findBasename(arg);
+ ext = uprv_strrchr(filename, '.');
+ if (!ext) {
+ ext = uprv_strchr(filename, 0);
+ }
+ }
+
+ if (tostdout) {
+ out = u_get_stdout();
+ } else {
+ icu::CharString thefile;
+ if (outputDir) {
+ thefile.append(outputDir, status);
+ }
+ thefile.appendPathPart(filename, status);
+ if (*ext) {
+ thefile.truncate(thefile.length() - (int32_t)uprv_strlen(ext));
+ }
+ thefile.append(".txt", status);
+ if (U_FAILURE(status)) {
+ return status;
+ }
+
+ out = u_fopen(thefile.data(), "w", nullptr, encoding);
+ if (!out) {
+ u_fprintf(ustderr, "%s: couldn't create %s\n", pname, thefile.data());
+ u_fclose(ustderr);
+ return 4;
+ }
+ }
+
+ // now, set the callback.
+ ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status);
+ if (U_FAILURE(status)) {
+ u_fprintf(ustderr, "%s: couldn't configure converter for encoding\n", pname);
+ u_fclose(ustderr);
+ if(!tostdout) {
+ u_fclose(out);
+ }
+ return 3;
+ }
+
+ if (prbom) { /* XXX: Should be done only for UTFs */
+ u_fputc(0xFEFF, out);
+ }
+ u_fprintf(out, "// -*- Coding: %s; -*-\n//\n", encoding ? encoding : getEncodingName(ucnv_getDefaultName()));
+ u_fprintf(out, "// This file was dumped by derb(8) from ");
+ if (thename) {
+ u_fprintf(out, "%s", thename);
+ } else if (fromICUData) {
+ u_fprintf(out, "the ICU internal %s locale", locale.data());
+ }
+
+ u_fprintf(out, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n");
+
+ if (!locale.isEmpty()) {
+ u_fprintf(out, "%s", locale.data());
+ } else {
+ u_fprintf(out, "%.*s%.*S", (int32_t)(ext - filename), filename, UPRV_LENGTHOF(sp), sp);
+ }
+ printOutBundle(out, bundle, 0, pname, &status);
+
+ if (!tostdout) {
+ u_fclose(out);
+ }
+ }
+ else {
+ reportError(pname, &status, "opening resource file");
+ }
+
+ ures_close(bundle);
+ }
+
+ return 0;
+}
+
+static char16_t *quotedString(const char16_t *string) {
+ int len = u_strlen(string);
+ int alen = len;
+ const char16_t *sp;
+ char16_t *newstr, *np;
+
+ for (sp = string; *sp; ++sp) {
+ switch (*sp) {
+ case '\n':
+ case 0x0022:
+ ++alen;
+ break;
+ }
+ }
+
+ newstr = (char16_t *) uprv_malloc((1 + alen) * U_SIZEOF_UCHAR);
+ for (sp = string, np = newstr; *sp; ++sp) {
+ switch (*sp) {
+ case '\n':
+ *np++ = 0x005C;
+ *np++ = 0x006E;
+ break;
+
+ case 0x0022:
+ *np++ = 0x005C;
+ U_FALLTHROUGH;
+ default:
+ *np++ = *sp;
+ break;
+ }
+ }
+ *np = 0;
+
+ return newstr;
+}
+
+
+static void printString(UFILE *out, const char16_t *str, int32_t len) {
+ u_file_write(str, len, out);
+}
+
+static void printCString(UFILE *out, const char *str, int32_t len) {
+ if(len==-1) {
+ u_fprintf(out, "%s", str);
+ } else {
+ u_fprintf(out, "%.*s", len, str);
+ }
+}
+
+static void printIndent(UFILE *out, int32_t indent) {
+ icu::UnicodeString inchar(indent, 0x20, indent);
+ printString(out, inchar.getBuffer(), indent);
+}
+
+static void printHex(UFILE *out, uint8_t what) {
+ static const char map[] = "0123456789ABCDEF";
+ char16_t hex[2];
+
+ hex[0] = map[what >> 4];
+ hex[1] = map[what & 0xf];
+
+ printString(out, hex, 2);
+}
+
+static void printOutAlias(UFILE *out, UResourceBundle *parent, Resource r, const char *key, int32_t indent, const char *pname, UErrorCode *status) {
+ static const char16_t cr[] = { 0xA }; // LF
+ int32_t len = 0;
+ const char16_t* thestr = res_getAlias(&(parent->getResData()), r, &len);
+ char16_t *string = quotedString(thestr);
+ if(opt_truncate && len > truncsize) {
+ char msg[128];
+ printIndent(out, indent);
+ snprintf(msg, sizeof(msg), "// WARNING: this resource, size %li is truncated to %li\n",
+ (long)len, (long)truncsize/2);
+ printCString(out, msg, -1);
+ len = truncsize;
+ }
+ if(U_SUCCESS(*status)) {
+ static const char16_t openStr[] = { 0x003A, 0x0061, 0x006C, 0x0069, 0x0061, 0x0073, 0x0020, 0x007B, 0x0020, 0x0022 }; /* ":alias { \"" */
+ static const char16_t closeStr[] = { 0x0022, 0x0020, 0x007D, 0x0020 }; /* "\" } " */
+ printIndent(out, indent);
+ if(key != nullptr) {
+ printCString(out, key, -1);
+ }
+ printString(out, openStr, UPRV_LENGTHOF(openStr));
+ printString(out, string, len);
+ printString(out, closeStr, UPRV_LENGTHOF(closeStr));
+ if(verbose) {
+ printCString(out, " // ALIAS", -1);
+ }
+ printString(out, cr, UPRV_LENGTHOF(cr));
+ } else {
+ reportError(pname, status, "getting binary value");
+ }
+ uprv_free(string);
+}
+
+static void printOutBundle(UFILE *out, UResourceBundle *resource, int32_t indent, const char *pname, UErrorCode *status)
+{
+ static const char16_t cr[] = { 0xA }; // LF
+
+/* int32_t noOfElements = ures_getSize(resource);*/
+ int32_t i = 0;
+ const char *key = ures_getKey(resource);
+
+ switch(ures_getType(resource)) {
+ case URES_STRING :
+ {
+ int32_t len=0;
+ const char16_t* thestr = ures_getString(resource, &len, status);
+ char16_t *string = quotedString(thestr);
+
+ /* TODO: String truncation */
+ if(opt_truncate && len > truncsize) {
+ char msg[128];
+ printIndent(out, indent);
+ snprintf(msg, sizeof(msg), "// WARNING: this resource, size %li is truncated to %li\n",
+ (long)len, (long)(truncsize/2));
+ printCString(out, msg, -1);
+ len = truncsize/2;
+ }
+ printIndent(out, indent);
+ if(key != nullptr) {
+ static const char16_t openStr[] = { 0x0020, 0x007B, 0x0020, 0x0022 }; /* " { \"" */
+ static const char16_t closeStr[] = { 0x0022, 0x0020, 0x007D }; /* "\" }" */
+ printCString(out, key, (int32_t)uprv_strlen(key));
+ printString(out, openStr, UPRV_LENGTHOF(openStr));
+ printString(out, string, len);
+ printString(out, closeStr, UPRV_LENGTHOF(closeStr));
+ } else {
+ static const char16_t openStr[] = { 0x0022 }; /* "\"" */
+ static const char16_t closeStr[] = { 0x0022, 0x002C }; /* "\"," */
+
+ printString(out, openStr, UPRV_LENGTHOF(openStr));
+ printString(out, string, (int32_t)(u_strlen(string)));
+ printString(out, closeStr, UPRV_LENGTHOF(closeStr));
+ }
+
+ if(verbose) {
+ printCString(out, "// STRING", -1);
+ }
+ printString(out, cr, UPRV_LENGTHOF(cr));
+
+ uprv_free(string);
+ }
+ break;
+
+ case URES_INT :
+ {
+ static const char16_t openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0020, 0x007B, 0x0020 }; /* ":int { " */
+ static const char16_t closeStr[] = { 0x0020, 0x007D }; /* " }" */
+ char16_t num[20];
+
+ printIndent(out, indent);
+ if(key != nullptr) {
+ printCString(out, key, -1);
+ }
+ printString(out, openStr, UPRV_LENGTHOF(openStr));
+ uprv_itou(num, 20, ures_getInt(resource, status), 10, 0);
+ printString(out, num, u_strlen(num));
+ printString(out, closeStr, UPRV_LENGTHOF(closeStr));
+
+ if(verbose) {
+ printCString(out, "// INT", -1);
+ }
+ printString(out, cr, UPRV_LENGTHOF(cr));
+ break;
+ }
+ case URES_BINARY :
+ {
+ int32_t len = 0;
+ const int8_t *data = (const int8_t *)ures_getBinary(resource, &len, status);
+ if(opt_truncate && len > truncsize) {
+ char msg[128];
+ printIndent(out, indent);
+ snprintf(msg, sizeof(msg), "// WARNING: this resource, size %li is truncated to %li\n",
+ (long)len, (long)(truncsize/2));
+ printCString(out, msg, -1);
+ len = truncsize;
+ }
+ if(U_SUCCESS(*status)) {
+ static const char16_t openStr[] = { 0x003A, 0x0062, 0x0069, 0x006E, 0x0061, 0x0072, 0x0079, 0x0020, 0x007B, 0x0020 }; /* ":binary { " */
+ static const char16_t closeStr[] = { 0x0020, 0x007D, 0x0020 }; /* " } " */
+ printIndent(out, indent);
+ if(key != nullptr) {
+ printCString(out, key, -1);
+ }
+ printString(out, openStr, UPRV_LENGTHOF(openStr));
+ for(i = 0; i<len; i++) {
+ printHex(out, *data++);
+ }
+ printString(out, closeStr, UPRV_LENGTHOF(closeStr));
+ if(verbose) {
+ printCString(out, " // BINARY", -1);
+ }
+ printString(out, cr, UPRV_LENGTHOF(cr));
+ } else {
+ reportError(pname, status, "getting binary value");
+ }
+ }
+ break;
+ case URES_INT_VECTOR :
+ {
+ int32_t len = 0;
+ const int32_t *data = ures_getIntVector(resource, &len, status);
+ if(U_SUCCESS(*status)) {
+ static const char16_t openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0076, 0x0065, 0x0063, 0x0074, 0x006F, 0x0072, 0x0020, 0x007B, 0x0020 }; /* ":intvector { " */
+ static const char16_t closeStr[] = { 0x0020, 0x007D, 0x0020 }; /* " } " */
+ char16_t num[20];
+
+ printIndent(out, indent);
+ if(key != nullptr) {
+ printCString(out, key, -1);
+ }
+ printString(out, openStr, UPRV_LENGTHOF(openStr));
+ for(i = 0; i < len - 1; i++) {
+ int32_t numLen = uprv_itou(num, 20, data[i], 10, 0);
+ num[numLen++] = 0x002C; /* ',' */
+ num[numLen++] = 0x0020; /* ' ' */
+ num[numLen] = 0;
+ printString(out, num, u_strlen(num));
+ }
+ if(len > 0) {
+ uprv_itou(num, 20, data[len - 1], 10, 0);
+ printString(out, num, u_strlen(num));
+ }
+ printString(out, closeStr, UPRV_LENGTHOF(closeStr));
+ if(verbose) {
+ printCString(out, "// INTVECTOR", -1);
+ }
+ printString(out, cr, UPRV_LENGTHOF(cr));
+ } else {
+ reportError(pname, status, "getting int vector");
+ }
+ }
+ break;
+ case URES_TABLE :
+ case URES_ARRAY :
+ {
+ static const char16_t openStr[] = { 0x007B }; /* "{" */
+ static const char16_t closeStr[] = { 0x007D, '\n' }; /* "}\n" */
+
+ UResourceBundle *t = nullptr;
+ ures_resetIterator(resource);
+ printIndent(out, indent);
+ if(key != nullptr) {
+ printCString(out, key, -1);
+ }
+ printString(out, openStr, UPRV_LENGTHOF(openStr));
+ if(verbose) {
+ if(ures_getType(resource) == URES_TABLE) {
+ printCString(out, "// TABLE", -1);
+ } else {
+ printCString(out, "// ARRAY", -1);
+ }
+ }
+ printString(out, cr, UPRV_LENGTHOF(cr));
+
+ if(suppressAliases == false) {
+ while(U_SUCCESS(*status) && ures_hasNext(resource)) {
+ t = ures_getNextResource(resource, t, status);
+ if(U_SUCCESS(*status)) {
+ printOutBundle(out, t, indent+indentsize, pname, status);
+ } else {
+ reportError(pname, status, "While processing table");
+ *status = U_ZERO_ERROR;
+ }
+ }
+ } else { /* we have to use low level access to do this */
+ Resource r;
+ int32_t resSize = ures_getSize(resource);
+ UBool isTable = (UBool)(ures_getType(resource) == URES_TABLE);
+ for(i = 0; i < resSize; i++) {
+ /* need to know if it's an alias */
+ if(isTable) {
+ r = res_getTableItemByIndex(&resource->getResData(), resource->fRes, i, &key);
+ } else {
+ r = res_getArrayItem(&resource->getResData(), resource->fRes, i);
+ }
+ if(U_SUCCESS(*status)) {
+ if(res_getPublicType(r) == URES_ALIAS) {
+ printOutAlias(out, resource, r, key, indent+indentsize, pname, status);
+ } else {
+ t = ures_getByIndex(resource, i, t, status);
+ printOutBundle(out, t, indent+indentsize, pname, status);
+ }
+ } else {
+ reportError(pname, status, "While processing table");
+ *status = U_ZERO_ERROR;
+ }
+ }
+ }
+
+ printIndent(out, indent);
+ printString(out, closeStr, UPRV_LENGTHOF(closeStr));
+ ures_close(t);
+ }
+ break;
+ default:
+ break;
+ }
+
+}
+
+static const char *getEncodingName(const char *encoding) {
+ UErrorCode err;
+ const char *enc;
+
+ err = U_ZERO_ERROR;
+ if (!(enc = ucnv_getStandardName(encoding, "MIME", &err))) {
+ err = U_ZERO_ERROR;
+ if (!(enc = ucnv_getStandardName(encoding, "IANA", &err))) {
+ // do nothing
+ }
+ }
+
+ return enc;
+}
+
+static void reportError(const char *pname, UErrorCode *status, const char *when) {
+ u_fprintf(ustderr, "%s: error %d while %s: %s\n", pname, *status, when, u_errorName(*status));
+}
+
+#else
+extern int
+main(int argc, char* argv[]) {
+ /* Changing stdio.h ustdio.h requires that formatting not be disabled. */
+ return 3;
+}
+#endif /* !UCONFIG_NO_FORMATTING */
+
+/*
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/intl/icu/source/tools/genrb/derb.vcxproj b/intl/icu/source/tools/genrb/derb.vcxproj
new file mode 100644
index 0000000000..f5ba9bf22f
--- /dev/null
+++ b/intl/icu/source/tools/genrb/derb.vcxproj
@@ -0,0 +1,80 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{D3065ADB-8820-4CC7-9B6C-9510833961A3}</ProjectGuid>
+ </PropertyGroup>
+ <PropertyGroup Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <!-- The following import will include the 'default' configuration options for VS projects. -->
+ <Import Project="..\..\allinone\Build.Windows.ProjectConfiguration.props" />
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+ <OutDir>.\$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>.\$(Platform)\$(Configuration)\</IntDir>
+ <!-- The ICU projects use "Win32" to mean "x86", so we need to special case it. -->
+ <OutDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)\</OutDir>
+ <IntDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)\</IntDir>
+ <!-- Disable Incremental Linking for Release builds as it prevents Link-time Code Generation -->
+ <LinkIncremental Condition="'$(Configuration)'=='Debug'">true</LinkIncremental>
+ <LinkIncremental Condition="'$(Configuration)'=='Release'">false</LinkIncremental>
+ </PropertyGroup>
+ <!-- Options that are common to *all* configurations -->
+ <ItemDefinitionGroup>
+ <Midl>
+ <TypeLibraryName>$(OutDir)/derb.tlb</TypeLibraryName>
+ </Midl>
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <CompileAs>Default</CompileAs>
+ <DisableLanguageExtensions>true</DisableLanguageExtensions>
+ <AdditionalIncludeDirectories>..\..\i18n;..\..\common;..\toolutil;..\..\io;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PrecompiledHeaderOutputFile>$(OutDir)/derb.pch</PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(OutDir)/</AssemblerListingLocation>
+ <ObjectFileName>$(OutDir)/</ObjectFileName>
+ <ProgramDataBaseFileName>$(OutDir)/derb.pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <OutputFile>$(OutDir)/derb.exe</OutputFile>
+ <AdditionalLibraryDirectories>..\..\..\$(IcuLibOutputDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ </Link>
+ <CustomBuildStep>
+ <Command>copy "$(TargetPath)" ..\..\..\$(IcuBinOutputDir)</Command>
+ <Outputs>..\..\..\$(IcuBinOutputDir)\$(TargetFileName);%(Outputs)</Outputs>
+ </CustomBuildStep>
+ </ItemDefinitionGroup>
+ <!-- Options that are common to all 'Debug' project configurations -->
+ <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
+ <ClCompile>
+ <BrowseInformation>true</BrowseInformation>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <AdditionalDependencies>icuucd.lib;icuind.lib;icuiod.lib;icutud.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <!-- Options that are common to all 'Release' project configurations -->
+ <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
+ <ClCompile>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ </ClCompile>
+ <Link>
+ <AdditionalDependencies>icuuc.lib;icuin.lib;icuio.lib;icutu.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="derb.cpp" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/intl/icu/source/tools/genrb/derb.vcxproj.filters b/intl/icu/source/tools/genrb/derb.vcxproj.filters
new file mode 100644
index 0000000000..c62d612888
--- /dev/null
+++ b/intl/icu/source/tools/genrb/derb.vcxproj.filters
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{b10d3c34-0b4c-43e9-9c28-e17fdabee575}</UniqueIdentifier>
+ <Extensions>cpp;c;cxx;rc;def;r;odl;idl;hpj;bat</Extensions>
+ </Filter>
+ <Filter Include="Header Files">
+ <UniqueIdentifier>{0f0a70a2-7e7e-4e7a-88ab-b3bf739fabed}</UniqueIdentifier>
+ <Extensions>h;hpp;hxx;hm;inl</Extensions>
+ </Filter>
+ <Filter Include="Resource Files">
+ <UniqueIdentifier>{ac6d5215-57af-486d-81ed-badc17745780}</UniqueIdentifier>
+ <Extensions>ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe</Extensions>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="derb.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/intl/icu/source/tools/genrb/errmsg.c b/intl/icu/source/tools/genrb/errmsg.c
new file mode 100644
index 0000000000..a99d797ec5
--- /dev/null
+++ b/intl/icu/source/tools/genrb/errmsg.c
@@ -0,0 +1,75 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1998-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File error.c
+*
+* Modification History:
+*
+* Date Name Description
+* 05/28/99 stephen Creation.
+*******************************************************************************
+*/
+
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include "cstring.h"
+#include "errmsg.h"
+#include "toolutil.h"
+
+U_CFUNC void error(uint32_t linenumber, const char *msg, ...)
+{
+ va_list va;
+
+ va_start(va, msg);
+ fprintf(stderr, "%s:%u: ", gCurrentFileName, (int)linenumber);
+ vfprintf(stderr, msg, va);
+ fprintf(stderr, "\n");
+ va_end(va);
+}
+
+static UBool gShowWarning = true;
+
+U_CFUNC void setShowWarning(UBool val)
+{
+ gShowWarning = val;
+}
+
+U_CFUNC UBool getShowWarning(){
+ return gShowWarning;
+}
+
+static UBool gStrict =false;
+U_CFUNC UBool isStrict(){
+ return gStrict;
+}
+U_CFUNC void setStrict(UBool val){
+ gStrict = val;
+}
+static UBool gVerbose =false;
+U_CFUNC UBool isVerbose(){
+ return gVerbose;
+}
+U_CFUNC void setVerbose(UBool val){
+ gVerbose = val;
+}
+U_CFUNC void warning(uint32_t linenumber, const char *msg, ...)
+{
+ if (gShowWarning)
+ {
+ va_list va;
+
+ va_start(va, msg);
+ fprintf(stderr, "%s:%u: warning: ", gCurrentFileName, (int)linenumber);
+ vfprintf(stderr, msg, va);
+ fprintf(stderr, "\n");
+ va_end(va);
+ }
+}
diff --git a/intl/icu/source/tools/genrb/errmsg.h b/intl/icu/source/tools/genrb/errmsg.h
new file mode 100644
index 0000000000..e01b9558f0
--- /dev/null
+++ b/intl/icu/source/tools/genrb/errmsg.h
@@ -0,0 +1,46 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1998-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File error.h
+*
+* Modification History:
+*
+* Date Name Description
+* 05/28/99 stephen Creation.
+*******************************************************************************
+*/
+
+#ifndef ERROR_H
+#define ERROR_H 1
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+extern const char *gCurrentFileName;
+
+U_CFUNC void error(uint32_t linenumber, const char *msg, ...);
+U_CFUNC void warning(uint32_t linenumber, const char *msg, ...);
+
+/* Show warnings? */
+U_CFUNC void setShowWarning(UBool val);
+U_CFUNC UBool getShowWarning(void);
+
+/* strict */
+U_CFUNC void setStrict(UBool val);
+U_CFUNC UBool isStrict(void);
+
+/* verbosity */
+U_CFUNC void setVerbose(UBool val);
+U_CFUNC UBool isVerbose(void);
+
+U_CDECL_END
+
+#endif
diff --git a/intl/icu/source/tools/genrb/filterrb.cpp b/intl/icu/source/tools/genrb/filterrb.cpp
new file mode 100644
index 0000000000..dcc02fc621
--- /dev/null
+++ b/intl/icu/source/tools/genrb/filterrb.cpp
@@ -0,0 +1,239 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include <iostream>
+#include <stack>
+
+#include "filterrb.h"
+#include "errmsg.h"
+
+
+const char* PathFilter::kEInclusionNames[] = {
+ "INCLUDE",
+ "PARTIAL",
+ "EXCLUDE"
+};
+
+
+ResKeyPath::ResKeyPath() {}
+
+ResKeyPath::ResKeyPath(const std::string& path, UErrorCode& status) {
+ if (path.empty() || path[0] != '/') {
+ std::cerr << "genrb error: path must start with /: " << path << std::endl;
+ status = U_PARSE_ERROR;
+ return;
+ }
+ if (path.length() == 1) {
+ return;
+ }
+ size_t i;
+ size_t j = 0;
+ while (true) {
+ i = j + 1;
+ j = path.find('/', i);
+ std::string key = path.substr(i, j - i);
+ if (key.empty()) {
+ std::cerr << "genrb error: empty subpaths and trailing slashes are not allowed: " << path << std::endl;
+ status = U_PARSE_ERROR;
+ return;
+ }
+ push(key);
+ if (j == std::string::npos) {
+ break;
+ }
+ }
+}
+
+void ResKeyPath::push(const std::string& key) {
+ fPath.push_back(key);
+}
+
+void ResKeyPath::pop() {
+ fPath.pop_back();
+}
+
+const std::list<std::string>& ResKeyPath::pieces() const {
+ return fPath;
+}
+
+std::ostream& operator<<(std::ostream& out, const ResKeyPath& value) {
+ if (value.pieces().empty()) {
+ out << "/";
+ } else for (auto& key : value.pieces()) {
+ out << "/" << key;
+ }
+ return out;
+}
+
+
+PathFilter::~PathFilter() = default;
+
+
+void SimpleRuleBasedPathFilter::addRule(const std::string& ruleLine, UErrorCode& status) {
+ if (ruleLine.empty()) {
+ std::cerr << "genrb error: empty filter rules are not allowed" << std::endl;
+ status = U_PARSE_ERROR;
+ return;
+ }
+ bool inclusionRule = false;
+ if (ruleLine[0] == '+') {
+ inclusionRule = true;
+ } else if (ruleLine[0] != '-') {
+ std::cerr << "genrb error: rules must start with + or -: " << ruleLine << std::endl;
+ status = U_PARSE_ERROR;
+ return;
+ }
+ ResKeyPath path(ruleLine.substr(1), status);
+ addRule(path, inclusionRule, status);
+}
+
+void SimpleRuleBasedPathFilter::addRule(const ResKeyPath& path, bool inclusionRule, UErrorCode& status) {
+ if (U_FAILURE(status)) {
+ return;
+ }
+ fRoot.applyRule(path, path.pieces().begin(), inclusionRule, status);
+}
+
+PathFilter::EInclusion SimpleRuleBasedPathFilter::match(const ResKeyPath& path) const {
+ const Tree* node = &fRoot;
+
+ // defaultResult "bubbles up" the nearest "definite" inclusion/exclusion rule
+ EInclusion defaultResult = INCLUDE;
+ if (node->fIncluded != PARTIAL) {
+ // rules handled here: "+/" and "-/"
+ defaultResult = node->fIncluded;
+ }
+
+ // isLeaf is whether the filter tree can provide no additional information
+ // even if additional subpaths are added to the given key
+ bool isLeaf = false;
+
+ for (auto& key : path.pieces()) {
+ auto child = node->fChildren.find(key);
+ // Leaf case 1: input path descends outside the filter tree
+ if (child == node->fChildren.end()) {
+ if (node->fWildcard) {
+ // A wildcard pattern is present; continue checking
+ node = node->fWildcard.get();
+ } else {
+ isLeaf = true;
+ break;
+ }
+ } else {
+ node = &child->second;
+ }
+ if (node->fIncluded != PARTIAL) {
+ defaultResult = node->fIncluded;
+ }
+ }
+
+ // Leaf case 2: input path exactly matches a filter leaf
+ if (node->isLeaf()) {
+ isLeaf = true;
+ }
+
+ // Always return PARTIAL if we are not at a leaf
+ if (!isLeaf) {
+ return PARTIAL;
+ }
+
+ // If leaf node is PARTIAL, return the default
+ if (node->fIncluded == PARTIAL) {
+ return defaultResult;
+ }
+
+ return node->fIncluded;
+}
+
+
+SimpleRuleBasedPathFilter::Tree::Tree(const Tree& other)
+ : fIncluded(other.fIncluded), fChildren(other.fChildren) {
+ // Note: can't use the default copy assignment because of the std::unique_ptr
+ if (other.fWildcard) {
+ fWildcard.reset(new Tree(*other.fWildcard));
+ }
+}
+
+bool SimpleRuleBasedPathFilter::Tree::isLeaf() const {
+ return fChildren.empty() && !fWildcard;
+}
+
+void SimpleRuleBasedPathFilter::Tree::applyRule(
+ const ResKeyPath& path,
+ std::list<std::string>::const_iterator it,
+ bool inclusionRule,
+ UErrorCode& status) {
+
+ // Base Case
+ if (it == path.pieces().end()) {
+ if (isVerbose() && (fIncluded != PARTIAL || !isLeaf())) {
+ std::cout << "genrb info: rule on path " << path
+ << " overrides previous rules" << std::endl;
+ }
+ fIncluded = inclusionRule ? INCLUDE : EXCLUDE;
+ fChildren.clear();
+ fWildcard.reset();
+ return;
+ }
+
+ // Recursive Step
+ auto& key = *it;
+ if (key == "*") {
+ // Case 1: Wildcard
+ if (!fWildcard) {
+ fWildcard.reset(new Tree());
+ }
+ // Apply the rule to fWildcard and also to all existing children.
+ it++;
+ fWildcard->applyRule(path, it, inclusionRule, status);
+ for (auto& child : fChildren) {
+ child.second.applyRule(path, it, inclusionRule, status);
+ }
+ it--;
+
+ } else {
+ // Case 2: Normal Key
+ auto search = fChildren.find(key);
+ if (search == fChildren.end()) {
+ if (fWildcard) {
+ // Deep-copy the existing wildcard tree into the new key
+ search = fChildren.emplace(key, Tree(*fWildcard)).first;
+ } else {
+ search = fChildren.emplace(key, Tree()).first;
+ }
+ }
+ it++;
+ search->second.applyRule(path, it, inclusionRule, status);
+ it--;
+ }
+}
+
+void SimpleRuleBasedPathFilter::Tree::print(std::ostream& out, int32_t indent) const {
+ for (int32_t i=0; i<indent; i++) out << "\t";
+ out << "included: " << kEInclusionNames[fIncluded] << std::endl;
+ for (auto& child : fChildren) {
+ for (int32_t i=0; i<indent; i++) out << "\t";
+ out << child.first << ": {" << std::endl;
+ child.second.print(out, indent + 1);
+ for (int32_t i=0; i<indent; i++) out << "\t";
+ out << "}" << std::endl;
+ }
+ if (fWildcard) {
+ for (int32_t i=0; i<indent; i++) out << "\t";
+ out << "* {" << std::endl;
+ fWildcard->print(out, indent + 1);
+ for (int32_t i=0; i<indent; i++) out << "\t";
+ out << "}" << std::endl;
+ }
+}
+
+void SimpleRuleBasedPathFilter::print(std::ostream& out) const {
+ out << "SimpleRuleBasedPathFilter {" << std::endl;
+ fRoot.print(out, 1);
+ out << "}" << std::endl;
+}
+
+std::ostream& operator<<(std::ostream& out, const SimpleRuleBasedPathFilter& value) {
+ value.print(out);
+ return out;
+}
diff --git a/intl/icu/source/tools/genrb/filterrb.h b/intl/icu/source/tools/genrb/filterrb.h
new file mode 100644
index 0000000000..cf54766041
--- /dev/null
+++ b/intl/icu/source/tools/genrb/filterrb.h
@@ -0,0 +1,180 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#ifndef __FILTERRB_H__
+#define __FILTERRB_H__
+
+#include <list>
+#include <map>
+#include <memory>
+#include <ostream>
+#include <string>
+
+#include "unicode/utypes.h"
+
+
+/**
+ * Represents an absolute path into a resource bundle.
+ * For example: "/units/length/meter"
+ */
+class ResKeyPath {
+public:
+ /** Constructs an empty path (top of tree) */
+ ResKeyPath();
+
+ /** Constructs from a string path */
+ ResKeyPath(const std::string& path, UErrorCode& status);
+
+ void push(const std::string& key);
+ void pop();
+
+ const std::list<std::string>& pieces() const;
+
+ private:
+ std::list<std::string> fPath;
+};
+
+std::ostream& operator<<(std::ostream& out, const ResKeyPath& value);
+
+
+/**
+ * Interface used to determine whether to include or reject pieces of a
+ * resource bundle based on their absolute path.
+ */
+class PathFilter {
+public:
+ enum EInclusion {
+ INCLUDE,
+ PARTIAL,
+ EXCLUDE
+ };
+
+ static const char* kEInclusionNames[];
+
+ virtual ~PathFilter();
+
+ /**
+ * Returns an EInclusion on whether or not the given path should be included.
+ *
+ * INCLUDE = include the whole subtree
+ * PARTIAL = recurse into the subtree
+ * EXCLUDE = reject the whole subtree
+ */
+ virtual EInclusion match(const ResKeyPath& path) const = 0;
+};
+
+
+/**
+ * Implementation of PathFilter for a list of inclusion/exclusion rules.
+ *
+ * The wildcard pattern "*" means that the subsequent filters are applied to
+ * every other tree sharing the same parent.
+ *
+ * For example, given this list of filter rules:
+ */
+// -/alabama
+// +/alabama/alaska/arizona
+// -/fornia/hawaii
+// -/mississippi
+// +/mississippi/michigan
+// +/mississippi/*/maine
+// -/mississippi/*/iowa
+// +/mississippi/louisiana/iowa
+/*
+ * You get the following structure:
+ *
+ * SimpleRuleBasedPathFilter {
+ * included: PARTIAL
+ * alabama: {
+ * included: EXCLUDE
+ * alaska: {
+ * included: PARTIAL
+ * arizona: {
+ * included: INCLUDE
+ * }
+ * }
+ * }
+ * fornia: {
+ * included: PARTIAL
+ * hawaii: {
+ * included: EXCLUDE
+ * }
+ * }
+ * mississippi: {
+ * included: EXCLUDE
+ * louisiana: {
+ * included: PARTIAL
+ * iowa: {
+ * included: INCLUDE
+ * }
+ * maine: {
+ * included: INCLUDE
+ * }
+ * }
+ * michigan: {
+ * included: INCLUDE
+ * iowa: {
+ * included: EXCLUDE
+ * }
+ * maine: {
+ * included: INCLUDE
+ * }
+ * }
+ * * {
+ * included: PARTIAL
+ * iowa: {
+ * included: EXCLUDE
+ * }
+ * maine: {
+ * included: INCLUDE
+ * }
+ * }
+ * }
+ * }
+ */
+class SimpleRuleBasedPathFilter : public PathFilter {
+public:
+ void addRule(const std::string& ruleLine, UErrorCode& status);
+ void addRule(const ResKeyPath& path, bool inclusionRule, UErrorCode& status);
+
+ EInclusion match(const ResKeyPath& path) const override;
+
+ void print(std::ostream& out) const;
+
+private:
+ struct Tree {
+
+ Tree() = default;
+
+ /** Copy constructor */
+ Tree(const Tree& other);
+
+ /**
+ * Information on the USER-SPECIFIED inclusion/exclusion.
+ *
+ * INCLUDE = this path exactly matches a "+" rule
+ * PARTIAL = this path does not match any rule, but subpaths exist
+ * EXCLUDE = this path exactly matches a "-" rule
+ */
+ EInclusion fIncluded = PARTIAL;
+ std::map<std::string, Tree> fChildren;
+ std::unique_ptr<Tree> fWildcard;
+
+ void applyRule(
+ const ResKeyPath& path,
+ std::list<std::string>::const_iterator it,
+ bool inclusionRule,
+ UErrorCode& status);
+
+ bool isLeaf() const;
+
+ void print(std::ostream& out, int32_t indent) const;
+ };
+
+ Tree fRoot;
+};
+
+std::ostream& operator<<(std::ostream& out, const SimpleRuleBasedPathFilter& value);
+
+
+#endif //__FILTERRB_H__
diff --git a/intl/icu/source/tools/genrb/genrb.1.in b/intl/icu/source/tools/genrb/genrb.1.in
new file mode 100644
index 0000000000..a457719238
--- /dev/null
+++ b/intl/icu/source/tools/genrb/genrb.1.in
@@ -0,0 +1,148 @@
+.\" Hey, Emacs! This is -*-nroff-*- you know...
+.\"
+.\" genrb.1: manual page for the genrb utility
+.\"
+.\" Copyright (C) 2016 and later: Unicode, Inc. and others.
+.\" License & terms of use: http://www.unicode.org/copyright.html
+.\" Copyright (C) 2000-2002 IBM, Inc. and others.
+.\"
+.\" Manual page by Yves Arrouye <yves@realnames.com>.
+.\"
+.TH GENRB 1 "16 April 2002" "ICU MANPAGE" "ICU @VERSION@ Manual"
+.SH NAME
+.B genrb
+\- compile a resource bundle
+.SH SYNOPSIS
+.B genrb
+[
+.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
+]
+[
+.BR "\-V\fP, \fB\-\-version"
+]
+[
+.BR "\-v\fP, \fB\-\-verbose"
+]
+[
+.BI "\-e\fP, \fB\-\-encoding" " encoding"
+]
+[
+.BI "\-j\fP, \fB\-\-write\-java" " \fR[ \fPencoding\fR ]\fP"
+]
+[
+.BI "\-s\fP, \fB\-\-sourcedir" " source"
+]
+[
+.BI "\-d\fP, \fB\-\-destdir" " destination"
+]
+[
+.BI "\-i\fP, \fB\-\-icudatadir" " directory"
+]
+.IR bundle " \.\.\."
+.SH DESCRIPTION
+.B genrb
+converts the resource
+.I bundle
+source files passed on the command line to their binary form or to
+a Java source file for use with ICU4J.
+The resulting binary files have a
+.B .res
+extension while resource bundle source files typically have a
+.B .txt
+extension. Java source files have a
+.B java
+extension and follow the ICU4J naming conventions.
+.PP
+It is customary to name the resource bundles by their locale name,
+i.e. to use a local identifier for the
+.I bundle
+filename, e.g.
+.B ja_JP.txt
+for Japanese (Japan) data, or
+.B root.txt
+for the root bundle.
+In any case,
+.B genrb
+will produce a file whose base name is the name of the locale found
+in the resource file, not the base name of the resource file itself.
+.PP
+The binary files can be read directly by ICU, or used by
+.BR pkgdata (1)
+for incorporation into a larger archive or library.
+.SH OPTIONS
+.TP
+.BR "\-h\fP, \fB\-?\fP, \fB\-\-help"
+Print help about usage and exit.
+.TP
+.BR "\-V\fP, \fB\-\-version"
+Print the version of
+.B genrb
+and exit.
+.TP
+.BR "\-v\fP, \fB\-\-verbose"
+Display extra informative messages during execution.
+.TP
+.BI "\-e\fP, \fB\-\-encoding" " encoding"
+Set the encoding used to read input files to
+.IR encoding .
+The default encoding is the invariant (subset of ASCII or EBCDIC)
+codepage for the system (see section
+.BR "INVARIANT CHARACTERS" ).
+The encodings UTF-8, UTF-16BE, and UTF-16LE are automatically detected
+if a byte order mark (BOM) is present.
+.TP
+.BI "\-j\fP, \fB\-\-write\-java" " \fR[ \fPencoding\fR ]\fP"
+Generate a Java source code for use with ICU4J. An optional
+.I encoding
+for the Java file can be given.
+.TP
+.BI "\-s\fP, \fB\-\-sourcedir" " source"
+Set the source directory to
+.IR source .
+The default source directory is specified by the environment variable
+.BR ICU_DATA ,
+or the location set when ICU was built if
+.B ICU_DATA
+is not set.
+.TP
+.BI "\-d\fP, \fB\-\-destdir" " destination"
+Set the destination directory to
+.IR destination .
+The default destination directory is specified by the environment variable
+.BR ICU_DATA
+or is the location set when ICU was built if
+.B ICU_DATA
+is not set.
+.TP
+.BI "\-i\fP, \fB\-\-icudatadir" " directory"
+Look for any necessary ICU data files in
+.IR directory .
+For example, when processing collation overrides, the file
+.B ucadata.dat
+must be located.
+The default ICU data directory is specified by the environment variable
+.BR ICU_DATA .
+.SH INVARIANT CHARACTERS
+The
+.B invariant character set
+consists of the following set of characters, expressed as a standard POSIX
+regular expression:
+.BR "[a-z]|[A-Z]|[0-9]|_| |+|-|*|/" .
+This is the set which is guaranteed to be available regardless of code page.
+.SH ENVIRONMENT
+.TP 10
+.B ICU_DATA
+Specifies the directory containing ICU data. Defaults to
+.BR @thepkgicudatadir@/@PACKAGE@/@VERSION@/ .
+Some tools in ICU depend on the presence of the trailing slash. It is thus
+important to make sure that it is present if
+.B ICU_DATA
+is set.
+.SH VERSION
+@VERSION@
+.SH COPYRIGHT
+Copyright (C) 2000-2002 IBM, Inc. and others.
+.SH SEE ALSO
+.BR derb (1)
+.br
+.BR pkgdata (1)
diff --git a/intl/icu/source/tools/genrb/genrb.cpp b/intl/icu/source/tools/genrb/genrb.cpp
new file mode 100644
index 0000000000..fbf396d468
--- /dev/null
+++ b/intl/icu/source/tools/genrb/genrb.cpp
@@ -0,0 +1,869 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1998-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File genrb.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 05/25/99 stephen Creation.
+* 5/10/01 Ram removed ustdio dependency
+*******************************************************************************
+*/
+
+#include <fstream>
+#include <iostream>
+#include <list>
+#include <string>
+
+#include <assert.h>
+#include "genrb.h"
+#include "unicode/localpointer.h"
+#include "unicode/uclean.h"
+#include "unicode/utf16.h"
+#include "charstr.h"
+#include "cmemory.h"
+#include "filterrb.h"
+#include "reslist.h"
+#include "ucmndata.h" /* TODO: for reading the pool bundle */
+#include "collationroot.h"
+
+U_NAMESPACE_USE
+
+/* Protos */
+void processFile(const char *filename, const char* cp,
+ const char *inputDir, const char *outputDir, const char *filterDir,
+ const char *packageName,
+ SRBRoot *newPoolBundle, UBool omitBinaryCollation, UErrorCode &status);
+static char *make_res_filename(const char *filename, const char *outputDir,
+ const char *packageName, UErrorCode &status);
+
+/* File suffixes */
+#define RES_SUFFIX ".res"
+#define COL_SUFFIX ".col"
+
+const char *gCurrentFileName = nullptr;
+#ifdef XP_MAC_CONSOLE
+#include <console.h>
+#endif
+
+void ResFile::close() {
+ delete[] fBytes;
+ fBytes = nullptr;
+ delete fStrings;
+ fStrings = nullptr;
+}
+
+enum
+{
+ HELP1,
+ HELP2,
+ VERBOSE,
+ QUIET,
+ VERSION,
+ SOURCEDIR,
+ DESTDIR,
+ ENCODING,
+ ICUDATADIR,
+ WRITE_JAVA,
+ COPYRIGHT,
+ JAVA_PACKAGE,
+ BUNDLE_NAME,
+ WRITE_XLIFF,
+ STRICT,
+ NO_BINARY_COLLATION,
+ LANGUAGE,
+ NO_COLLATION_RULES,
+ FORMAT_VERSION,
+ WRITE_POOL_BUNDLE,
+ USE_POOL_BUNDLE,
+ INCLUDE_UNIHAN_COLL,
+ FILTERDIR,
+ ICU4X_MODE,
+ UCADATA
+};
+
+UOption options[]={
+ UOPTION_HELP_H,
+ UOPTION_HELP_QUESTION_MARK,
+ UOPTION_VERBOSE,
+ UOPTION_QUIET,
+ UOPTION_VERSION,
+ UOPTION_SOURCEDIR,
+ UOPTION_DESTDIR,
+ UOPTION_ENCODING,
+ UOPTION_ICUDATADIR,
+ UOPTION_WRITE_JAVA,
+ UOPTION_COPYRIGHT,
+ UOPTION_DEF("java-package", '\x01', UOPT_REQUIRES_ARG),
+ UOPTION_BUNDLE_NAME,
+ UOPTION_DEF("write-xliff", 'x', UOPT_OPTIONAL_ARG),
+ UOPTION_DEF("strict", 'k', UOPT_NO_ARG), /* 14 */
+ UOPTION_DEF("noBinaryCollation", 'C', UOPT_NO_ARG),/* 15 */
+ UOPTION_DEF("language", 'l', UOPT_REQUIRES_ARG), /* 16 */
+ UOPTION_DEF("omitCollationRules", 'R', UOPT_NO_ARG),/* 17 */
+ UOPTION_DEF("formatVersion", '\x01', UOPT_REQUIRES_ARG),/* 18 */
+ UOPTION_DEF("writePoolBundle", '\x01', UOPT_OPTIONAL_ARG),/* 19 */
+ UOPTION_DEF("usePoolBundle", '\x01', UOPT_OPTIONAL_ARG),/* 20 */
+ UOPTION_DEF("includeUnihanColl", '\x01', UOPT_NO_ARG),/* 21 */ /* temporary, don't display in usage info */
+ UOPTION_DEF("filterDir", '\x01', UOPT_OPTIONAL_ARG), /* 22 */
+ UOPTION_DEF("icu4xMode", 'X', UOPT_NO_ARG),/* 23 */
+ UOPTION_DEF("ucadata", '\x01', UOPT_REQUIRES_ARG),/* 24 */
+ };
+
+static UBool write_java = false;
+static UBool write_xliff = false;
+static const char* outputEnc ="";
+
+static ResFile poolBundle;
+
+/*added by Jing*/
+static const char* language = nullptr;
+static const char* xliffOutputFileName = nullptr;
+int
+main(int argc,
+ char* argv[])
+{
+ UErrorCode status = U_ZERO_ERROR;
+ const char *arg = nullptr;
+ const char *outputDir = nullptr; /* nullptr = no output directory, use current */
+ const char *inputDir = nullptr;
+ const char *filterDir = nullptr;
+ const char *encoding = "";
+ int i;
+ UBool illegalArg = false;
+
+ U_MAIN_INIT_ARGS(argc, argv);
+
+ options[JAVA_PACKAGE].value = "com.ibm.icu.impl.data";
+ options[BUNDLE_NAME].value = "LocaleElements";
+ argc = u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
+
+ /* error handling, printing usage message */
+ if(argc<0) {
+ fprintf(stderr, "%s: error in command line argument \"%s\"\n", argv[0], argv[-argc]);
+ illegalArg = true;
+ } else if(argc<2) {
+ illegalArg = true;
+ }
+ if(options[WRITE_POOL_BUNDLE].doesOccur && options[USE_POOL_BUNDLE].doesOccur) {
+ fprintf(stderr, "%s: cannot combine --writePoolBundle and --usePoolBundle\n", argv[0]);
+ illegalArg = true;
+ }
+ if (options[ICU4X_MODE].doesOccur && !options[UCADATA].doesOccur) {
+ fprintf(stderr, "%s: --icu4xMode requires --ucadata\n", argv[0]);
+ illegalArg = true;
+ }
+ if(options[FORMAT_VERSION].doesOccur) {
+ const char *s = options[FORMAT_VERSION].value;
+ if(uprv_strlen(s) != 1 || (s[0] < '1' && '3' < s[0])) {
+ fprintf(stderr, "%s: unsupported --formatVersion %s\n", argv[0], s);
+ illegalArg = true;
+ } else if(s[0] == '1' &&
+ (options[WRITE_POOL_BUNDLE].doesOccur || options[USE_POOL_BUNDLE].doesOccur)
+ ) {
+ fprintf(stderr, "%s: cannot combine --formatVersion 1 with --writePoolBundle or --usePoolBundle\n", argv[0]);
+ illegalArg = true;
+ } else {
+ setFormatVersion(s[0] - '0');
+ }
+ }
+
+ if((options[JAVA_PACKAGE].doesOccur || options[BUNDLE_NAME].doesOccur) &&
+ !options[WRITE_JAVA].doesOccur) {
+ fprintf(stderr,
+ "%s error: command line argument --java-package or --bundle-name "
+ "without --write-java\n",
+ argv[0]);
+ illegalArg = true;
+ }
+
+ if(options[VERSION].doesOccur) {
+ fprintf(stderr,
+ "%s version %s (ICU version %s).\n"
+ "%s\n",
+ argv[0], GENRB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING);
+ if(!illegalArg) {
+ return U_ZERO_ERROR;
+ }
+ }
+
+ if(illegalArg || options[HELP1].doesOccur || options[HELP2].doesOccur) {
+ /*
+ * Broken into chunks because the C89 standard says the minimum
+ * required supported string length is 509 bytes.
+ */
+ fprintf(stderr,
+ "Usage: %s [OPTIONS] [FILES]\n"
+ "\tReads the list of resource bundle source files and creates\n"
+ "\tbinary version of resource bundles (.res files)\n",
+ argv[0]);
+ fprintf(stderr,
+ "Options:\n"
+ "\t-h or -? or --help this usage text\n"
+ "\t-q or --quiet do not display warnings\n"
+ "\t-v or --verbose print extra information when processing files\n"
+ "\t-V or --version prints out version number and exits\n"
+ "\t-c or --copyright include copyright notice\n");
+ fprintf(stderr,
+ "\t-e or --encoding encoding of source files\n"
+ "\t-d or --destdir destination directory, followed by the path, defaults to '%s'\n"
+ "\t-s or --sourcedir source directory for files followed by path, defaults to '%s'\n"
+ "\t-i or --icudatadir directory for locating any needed intermediate data files,\n"
+ "\t followed by path, defaults to '%s'\n",
+ u_getDataDirectory(), u_getDataDirectory(), u_getDataDirectory());
+ fprintf(stderr,
+ "\t-j or --write-java write a Java ListResourceBundle for ICU4J, followed by optional encoding\n"
+ "\t defaults to ASCII and \\uXXXX format.\n"
+ "\t --java-package For --write-java: package name for writing the ListResourceBundle,\n"
+ "\t defaults to com.ibm.icu.impl.data\n");
+ fprintf(stderr,
+ "\t-b or --bundle-name For --write-java: root resource bundle name for writing the ListResourceBundle,\n"
+ "\t defaults to LocaleElements\n"
+ "\t-x or --write-xliff write an XLIFF file for the resource bundle. Followed by\n"
+ "\t an optional output file name.\n"
+ "\t-k or --strict use pedantic parsing of syntax\n"
+ /*added by Jing*/
+ "\t-l or --language for XLIFF: language code compliant with BCP 47.\n");
+ fprintf(stderr,
+ "\t-C or --noBinaryCollation do not generate binary collation image;\n"
+ "\t makes .res file smaller but collator instantiation much slower;\n"
+ "\t maintains ability to get tailoring rules\n"
+ "\t-R or --omitCollationRules do not include collation (tailoring) rules;\n"
+ "\t makes .res file smaller and maintains collator instantiation speed\n"
+ "\t but tailoring rules will not be available (they are rarely used)\n");
+ fprintf(stderr,
+ "\t --formatVersion write a .res file compatible with the requested formatVersion (single digit);\n"
+ "\t for example, --formatVersion 1\n");
+ fprintf(stderr,
+ "\t --writePoolBundle [directory] write a pool.res file with all of the keys of all input bundles\n"
+ "\t --usePoolBundle [directory] point to keys from the pool.res keys pool bundle if they are available there;\n"
+ "\t makes .res files smaller but dependent on the pool bundle\n"
+ "\t (--writePoolBundle and --usePoolBundle cannot be combined)\n");
+ fprintf(stderr,
+ "\t --filterDir Input directory where filter files are available.\n"
+ "\t For more on filter files, see ICU Data Build Tool.\n");
+
+ return illegalArg ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
+ }
+
+ if(options[VERBOSE].doesOccur) {
+ setVerbose(true);
+ }
+
+ if(options[QUIET].doesOccur) {
+ setShowWarning(false);
+ }
+ if(options[STRICT].doesOccur) {
+ setStrict(true);
+ }
+ if(options[COPYRIGHT].doesOccur){
+ setIncludeCopyright(true);
+ }
+
+ if(options[SOURCEDIR].doesOccur) {
+ inputDir = options[SOURCEDIR].value;
+ }
+
+ if(options[DESTDIR].doesOccur) {
+ outputDir = options[DESTDIR].value;
+ }
+
+ if (options[FILTERDIR].doesOccur) {
+ filterDir = options[FILTERDIR].value;
+ }
+
+ if(options[ENCODING].doesOccur) {
+ encoding = options[ENCODING].value;
+ }
+
+ if(options[ICUDATADIR].doesOccur) {
+ u_setDataDirectory(options[ICUDATADIR].value);
+ }
+ /* Initialize ICU */
+ u_init(&status);
+ if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
+ /* Note: u_init() will try to open ICU property data.
+ * failures here are expected when building ICU from scratch.
+ * ignore them.
+ */
+ fprintf(stderr, "%s: can not initialize ICU. status = %s\n",
+ argv[0], u_errorName(status));
+ exit(1);
+ }
+ status = U_ZERO_ERROR;
+ if(options[WRITE_JAVA].doesOccur) {
+ write_java = true;
+ outputEnc = options[WRITE_JAVA].value;
+ }
+
+ if(options[WRITE_XLIFF].doesOccur) {
+ write_xliff = true;
+ if(options[WRITE_XLIFF].value != nullptr){
+ xliffOutputFileName = options[WRITE_XLIFF].value;
+ }
+ }
+
+ if (options[UCADATA].doesOccur) {
+#if !UCONFIG_NO_COLLATION
+ CollationRoot::forceLoadFromFile(options[UCADATA].value, status);
+#else
+ fprintf(stderr, "--ucadata was used with UCONFIG_NO_COLLATION\n");
+ return status;
+#endif
+ }
+
+ initParser();
+
+ /*added by Jing*/
+ if(options[LANGUAGE].doesOccur) {
+ language = options[LANGUAGE].value;
+ }
+
+ LocalPointer<SRBRoot> newPoolBundle;
+ if(options[WRITE_POOL_BUNDLE].doesOccur) {
+ newPoolBundle.adoptInsteadAndCheckErrorCode(new SRBRoot(nullptr, true, status), status);
+ if(U_FAILURE(status)) {
+ fprintf(stderr, "unable to create an empty bundle for the pool keys: %s\n", u_errorName(status));
+ return status;
+ } else {
+ const char *poolResName = "pool.res";
+ char *nameWithoutSuffix = static_cast<char *>(uprv_malloc(uprv_strlen(poolResName) + 1));
+ if (nameWithoutSuffix == nullptr) {
+ fprintf(stderr, "out of memory error\n");
+ return U_MEMORY_ALLOCATION_ERROR;
+ }
+ uprv_strcpy(nameWithoutSuffix, poolResName);
+ *uprv_strrchr(nameWithoutSuffix, '.') = 0;
+ newPoolBundle->fLocale = nameWithoutSuffix;
+ }
+ }
+
+ if(options[USE_POOL_BUNDLE].doesOccur) {
+ const char *poolResName = "pool.res";
+ FileStream *poolFile;
+ int32_t poolFileSize;
+ int32_t indexLength;
+ /*
+ * TODO: Consolidate inputDir/filename handling from main() and processFile()
+ * into a common function, and use it here as well.
+ * Try to create toolutil functions for dealing with dir/filenames and
+ * loading ICU data files without udata_open().
+ * Share code with icupkg?
+ * Also, make_res_filename() seems to be unused. Review and remove.
+ */
+ CharString poolFileName;
+ if (options[USE_POOL_BUNDLE].value!=nullptr) {
+ poolFileName.append(options[USE_POOL_BUNDLE].value, status);
+ } else if (inputDir) {
+ poolFileName.append(inputDir, status);
+ }
+ poolFileName.appendPathPart(poolResName, status);
+ if (U_FAILURE(status)) {
+ return status;
+ }
+ poolFile = T_FileStream_open(poolFileName.data(), "rb");
+ if (poolFile == nullptr) {
+ fprintf(stderr, "unable to open pool bundle file %s\n", poolFileName.data());
+ return 1;
+ }
+ poolFileSize = T_FileStream_size(poolFile);
+ if (poolFileSize < 32) {
+ fprintf(stderr, "the pool bundle file %s is too small\n", poolFileName.data());
+ return 1;
+ }
+ poolBundle.fBytes = new uint8_t[(poolFileSize + 15) & ~15];
+ if (poolFileSize > 0 && poolBundle.fBytes == nullptr) {
+ fprintf(stderr, "unable to allocate memory for the pool bundle file %s\n", poolFileName.data());
+ return U_MEMORY_ALLOCATION_ERROR;
+ }
+
+ UDataSwapper *ds;
+ const DataHeader *header;
+ int32_t bytesRead = T_FileStream_read(poolFile, poolBundle.fBytes, poolFileSize);
+ if (bytesRead != poolFileSize) {
+ fprintf(stderr, "unable to read the pool bundle file %s\n", poolFileName.data());
+ return 1;
+ }
+ /*
+ * Swap the pool bundle so that a single checked-in file can be used.
+ * The swapper functions also test that the data looks like
+ * a well-formed .res file.
+ */
+ ds = udata_openSwapperForInputData(poolBundle.fBytes, bytesRead,
+ U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &status);
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "udata_openSwapperForInputData(pool bundle %s) failed: %s\n",
+ poolFileName.data(), u_errorName(status));
+ return status;
+ }
+ ures_swap(ds, poolBundle.fBytes, bytesRead, poolBundle.fBytes, &status);
+ udata_closeSwapper(ds);
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "ures_swap(pool bundle %s) failed: %s\n",
+ poolFileName.data(), u_errorName(status));
+ return status;
+ }
+ header = (const DataHeader *)poolBundle.fBytes;
+ if (header->info.formatVersion[0] < 2) {
+ fprintf(stderr, "invalid format of pool bundle file %s\n", poolFileName.data());
+ return U_INVALID_FORMAT_ERROR;
+ }
+ const int32_t *pRoot = (const int32_t *)(
+ (const char *)header + header->dataHeader.headerSize);
+ poolBundle.fIndexes = pRoot + 1;
+ indexLength = poolBundle.fIndexes[URES_INDEX_LENGTH] & 0xff;
+ if (indexLength <= URES_INDEX_POOL_CHECKSUM) {
+ fprintf(stderr, "insufficient indexes[] in pool bundle file %s\n", poolFileName.data());
+ return U_INVALID_FORMAT_ERROR;
+ }
+ int32_t keysBottom = 1 + indexLength;
+ int32_t keysTop = poolBundle.fIndexes[URES_INDEX_KEYS_TOP];
+ poolBundle.fKeys = (const char *)(pRoot + keysBottom);
+ poolBundle.fKeysLength = (keysTop - keysBottom) * 4;
+ poolBundle.fChecksum = poolBundle.fIndexes[URES_INDEX_POOL_CHECKSUM];
+
+ for (i = 0; i < poolBundle.fKeysLength; ++i) {
+ if (poolBundle.fKeys[i] == 0) {
+ ++poolBundle.fKeysCount;
+ }
+ }
+
+ // 16BitUnits[] begins with strings-v2.
+ // The strings-v2 may optionally be terminated by what looks like
+ // an explicit string length that exceeds the number of remaining 16-bit units.
+ int32_t stringUnitsLength = (poolBundle.fIndexes[URES_INDEX_16BIT_TOP] - keysTop) * 2;
+ if (stringUnitsLength >= 2 && getFormatVersion() >= 3) {
+ poolBundle.fStrings = new PseudoListResource(nullptr, status);
+ if (poolBundle.fStrings == nullptr) {
+ fprintf(stderr, "unable to allocate memory for the pool bundle strings %s\n",
+ poolFileName.data());
+ return U_MEMORY_ALLOCATION_ERROR;
+ }
+ // The PseudoListResource constructor call did not allocate further memory.
+ assert(U_SUCCESS(status));
+ const char16_t *p = (const char16_t *)(pRoot + keysTop);
+ int32_t remaining = stringUnitsLength;
+ do {
+ int32_t first = *p;
+ int8_t numCharsForLength;
+ int32_t length;
+ if (!U16_IS_TRAIL(first)) {
+ // NUL-terminated
+ numCharsForLength = 0;
+ for (length = 0;
+ length < remaining && p[length] != 0;
+ ++length) {}
+ } else if (first < 0xdfef) {
+ numCharsForLength = 1;
+ length = first & 0x3ff;
+ } else if (first < 0xdfff && remaining >= 2) {
+ numCharsForLength = 2;
+ length = ((first - 0xdfef) << 16) | p[1];
+ } else if (first == 0xdfff && remaining >= 3) {
+ numCharsForLength = 3;
+ length = ((int32_t)p[1] << 16) | p[2];
+ } else {
+ break; // overrun
+ }
+ // Check for overrun before changing remaining,
+ // so that it is always accurate after the loop body.
+ if ((numCharsForLength + length) >= remaining ||
+ p[numCharsForLength + length] != 0) {
+ break; // overrun or explicitly terminated
+ }
+ int32_t poolStringIndex = stringUnitsLength - remaining;
+ // Maximum pool string index when suffix-sharing the last character.
+ int32_t maxStringIndex = poolStringIndex + numCharsForLength + length - 1;
+ if (maxStringIndex >= RES_MAX_OFFSET) {
+ // pool string index overrun
+ break;
+ }
+ p += numCharsForLength;
+ remaining -= numCharsForLength;
+ if (length != 0) {
+ StringResource *sr =
+ new StringResource(poolStringIndex, numCharsForLength,
+ p, length, status);
+ if (sr == nullptr) {
+ fprintf(stderr, "unable to allocate memory for a pool bundle string %s\n",
+ poolFileName.data());
+ return U_MEMORY_ALLOCATION_ERROR;
+ }
+ poolBundle.fStrings->add(sr);
+ poolBundle.fStringIndexLimit = maxStringIndex + 1;
+ // The StringResource constructor did not allocate further memory.
+ assert(U_SUCCESS(status));
+ }
+ p += length + 1;
+ remaining -= length + 1;
+ } while (remaining > 0);
+ if (poolBundle.fStrings->fCount == 0) {
+ delete poolBundle.fStrings;
+ poolBundle.fStrings = nullptr;
+ }
+ }
+
+ T_FileStream_close(poolFile);
+ setUsePoolBundle(true);
+ if (isVerbose() && poolBundle.fStrings != nullptr) {
+ printf("number of shared strings: %d\n", (int)poolBundle.fStrings->fCount);
+ int32_t length = poolBundle.fStringIndexLimit + 1; // incl. last NUL
+ printf("16-bit units for strings: %6d = %6d bytes\n",
+ (int)length, (int)length * 2);
+ }
+ }
+
+ if(!options[FORMAT_VERSION].doesOccur && getFormatVersion() == 3 &&
+ poolBundle.fStrings == nullptr &&
+ !options[WRITE_POOL_BUNDLE].doesOccur) {
+ // If we just default to formatVersion 3
+ // but there are no pool bundle strings to share
+ // and we do not write a pool bundle,
+ // then write formatVersion 2 which is just as good.
+ setFormatVersion(2);
+ }
+
+ if(options[INCLUDE_UNIHAN_COLL].doesOccur) {
+ puts("genrb option --includeUnihanColl ignored: \n"
+ "CLDR 26/ICU 54 unihan data is small, except\n"
+ "the ucadata-unihan.icu version of the collation root data\n"
+ "is about 300kB larger than the ucadata-implicithan.icu version.");
+ }
+
+ if((argc-1)!=1) {
+ printf("genrb number of files: %d\n", argc - 1);
+ }
+ /* generate the binary files */
+ for(i = 1; i < argc; ++i) {
+ status = U_ZERO_ERROR;
+ arg = getLongPathname(argv[i]);
+
+ CharString theCurrentFileName;
+ if (inputDir) {
+ theCurrentFileName.append(inputDir, status);
+ }
+ theCurrentFileName.appendPathPart(arg, status);
+ if (U_FAILURE(status)) {
+ break;
+ }
+
+ gCurrentFileName = theCurrentFileName.data();
+ if (isVerbose()) {
+ printf("Processing file \"%s\"\n", theCurrentFileName.data());
+ }
+ processFile(arg, encoding, inputDir, outputDir, filterDir, nullptr,
+ newPoolBundle.getAlias(),
+ options[NO_BINARY_COLLATION].doesOccur, status);
+ }
+
+ poolBundle.close();
+
+ if(U_SUCCESS(status) && options[WRITE_POOL_BUNDLE].doesOccur) {
+ const char* writePoolDir;
+ if (options[WRITE_POOL_BUNDLE].value!=nullptr) {
+ writePoolDir = options[WRITE_POOL_BUNDLE].value;
+ } else {
+ writePoolDir = outputDir;
+ }
+ char outputFileName[256];
+ newPoolBundle->write(writePoolDir, nullptr, outputFileName, sizeof(outputFileName), status);
+ if(U_FAILURE(status)) {
+ fprintf(stderr, "unable to write the pool bundle: %s\n", u_errorName(status));
+ }
+ }
+
+ u_cleanup();
+
+ /* Don't return warnings as a failure */
+ if (U_SUCCESS(status)) {
+ return 0;
+ }
+
+ return status;
+}
+
+/* Process a file */
+void
+processFile(const char *filename, const char *cp,
+ const char *inputDir, const char *outputDir, const char *filterDir,
+ const char *packageName,
+ SRBRoot *newPoolBundle,
+ UBool omitBinaryCollation, UErrorCode &status) {
+ LocalPointer<SRBRoot> data;
+ LocalUCHARBUFPointer ucbuf;
+ CharString openFileName;
+ CharString inputDirBuf;
+
+ char outputFileName[256];
+ int32_t dirlen = 0;
+
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if(filename==nullptr){
+ status=U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+
+ if(inputDir == nullptr) {
+ const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
+ if (filenameBegin != nullptr) {
+ /*
+ * When a filename ../../../data/root.txt is specified,
+ * we presume that the input directory is ../../../data
+ * This is very important when the resource file includes
+ * another file, like UCARules.txt or thaidict.brk.
+ */
+ int32_t filenameSize = (int32_t)(filenameBegin - filename + 1);
+ inputDirBuf.append(filename, filenameSize, status);
+
+ inputDir = inputDirBuf.data();
+ dirlen = inputDirBuf.length();
+ }
+ }else{
+ dirlen = (int32_t)uprv_strlen(inputDir);
+
+ if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
+ /*
+ * append the input dir to openFileName if the first char in
+ * filename is not file separation char and the last char input directory is not '.'.
+ * This is to support :
+ * genrb -s. /home/icu/data
+ * genrb -s. icu/data
+ * The user cannot mix notations like
+ * genrb -s. /icu/data --- the absolute path specified. -s redundant
+ * user should use
+ * genrb -s. icu/data --- start from CWD and look in icu/data dir
+ */
+ if( (filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
+ openFileName.append(inputDir, status);
+ }
+ } else {
+ openFileName.append(inputDir, status);
+ }
+ }
+ openFileName.appendPathPart(filename, status);
+
+ // Test for CharString failure
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ ucbuf.adoptInstead(ucbuf_open(openFileName.data(), &cp,getShowWarning(),true, &status));
+ if(status == U_FILE_ACCESS_ERROR) {
+
+ fprintf(stderr, "couldn't open file %s\n", openFileName.data());
+ return;
+ }
+ if (ucbuf.isNull() || U_FAILURE(status)) {
+ fprintf(stderr, "An error occurred processing file %s. Error: %s\n",
+ openFileName.data(), u_errorName(status));
+ return;
+ }
+ /* auto detected popular encodings? */
+ if (cp!=nullptr && isVerbose()) {
+ printf("autodetected encoding %s\n", cp);
+ }
+ /* Parse the data into an SRBRoot */
+ data.adoptInstead(parse(ucbuf.getAlias(), inputDir, outputDir, filename,
+ !omitBinaryCollation, options[NO_COLLATION_RULES].doesOccur, options[ICU4X_MODE].doesOccur, &status));
+
+ if (data.isNull() || U_FAILURE(status)) {
+ fprintf(stderr, "couldn't parse the file %s. Error:%s\n", filename, u_errorName(status));
+ return;
+ }
+
+ // Run filtering before writing pool bundle
+ if (filterDir != nullptr) {
+ CharString filterFileName(filterDir, status);
+ filterFileName.appendPathPart(filename, status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ // Open the file and read it into filter
+ SimpleRuleBasedPathFilter filter;
+ std::ifstream f(filterFileName.data());
+ if (f.fail()) {
+ std::cerr << "genrb error: unable to open " << filterFileName.data() << std::endl;
+ status = U_FILE_ACCESS_ERROR;
+ return;
+ }
+ std::string currentLine;
+ while (std::getline(f, currentLine)) {
+ // Ignore # comments and empty lines
+ if (currentLine.empty() || currentLine[0] == '#') {
+ continue;
+ }
+ filter.addRule(currentLine, status);
+ if (U_FAILURE(status)) {
+ return;
+ }
+ }
+
+ if (isVerbose()) {
+ filter.print(std::cout);
+ }
+
+ // Apply the filter to the data
+ ResKeyPath path;
+ data->fRoot->applyFilter(filter, path, data.getAlias());
+ }
+
+ if(options[WRITE_POOL_BUNDLE].doesOccur) {
+ data->fWritePoolBundle = newPoolBundle;
+ data->compactKeys(status);
+ int32_t newKeysLength;
+ const char *newKeys = data->getKeyBytes(&newKeysLength);
+ newPoolBundle->addKeyBytes(newKeys, newKeysLength, status);
+ if(U_FAILURE(status)) {
+ fprintf(stderr, "bundle_compactKeys(%s) or bundle_getKeyBytes() failed: %s\n",
+ filename, u_errorName(status));
+ return;
+ }
+ /* count the number of just-added key strings */
+ for(const char *newKeysLimit = newKeys + newKeysLength; newKeys < newKeysLimit; ++newKeys) {
+ if(*newKeys == 0) {
+ ++newPoolBundle->fKeysCount;
+ }
+ }
+ }
+
+ if(options[USE_POOL_BUNDLE].doesOccur) {
+ data->fUsePoolBundle = &poolBundle;
+ }
+
+ /* Determine the target rb filename */
+ uprv_free(make_res_filename(filename, outputDir, packageName, status));
+ if(U_FAILURE(status)) {
+ fprintf(stderr, "couldn't make the res fileName for bundle %s. Error:%s\n",
+ filename, u_errorName(status));
+ return;
+ }
+ if(write_java== true){
+ bundle_write_java(data.getAlias(), outputDir, outputEnc,
+ outputFileName, sizeof(outputFileName),
+ options[JAVA_PACKAGE].value, options[BUNDLE_NAME].value, &status);
+ }else if(write_xliff ==true){
+ bundle_write_xml(data.getAlias(), outputDir, outputEnc,
+ filename, outputFileName, sizeof(outputFileName),
+ language, xliffOutputFileName, &status);
+ }else{
+ /* Write the data to the file */
+ data->write(outputDir, packageName, outputFileName, sizeof(outputFileName), status);
+ }
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "couldn't write bundle %s. Error:%s\n", outputFileName, u_errorName(status));
+ }
+}
+
+/* Generate the target .res file name from the input file name */
+static char*
+make_res_filename(const char *filename,
+ const char *outputDir,
+ const char *packageName,
+ UErrorCode &status) {
+ char *basename;
+ char *dirname;
+ char *resName;
+
+ int32_t pkgLen = 0; /* length of package prefix */
+
+
+ if (U_FAILURE(status)) {
+ return 0;
+ }
+
+ if(packageName != nullptr)
+ {
+ pkgLen = (int32_t)(1 + uprv_strlen(packageName));
+ }
+
+ /* setup */
+ basename = dirname = resName = 0;
+
+ /* determine basename, and compiled file names */
+ basename = (char*) uprv_malloc(sizeof(char) * (uprv_strlen(filename) + 1));
+ if(basename == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ goto finish;
+ }
+
+ get_basename(basename, filename);
+
+ dirname = (char*) uprv_malloc(sizeof(char) * (uprv_strlen(filename) + 1));
+ if(dirname == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ goto finish;
+ }
+
+ get_dirname(dirname, filename);
+
+ if (outputDir == nullptr) {
+ /* output in same dir as .txt */
+ resName = (char*) uprv_malloc(sizeof(char) * (uprv_strlen(dirname)
+ + pkgLen
+ + uprv_strlen(basename)
+ + uprv_strlen(RES_SUFFIX) + 8));
+ if(resName == 0) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ goto finish;
+ }
+
+ uprv_strcpy(resName, dirname);
+
+ if(packageName != nullptr)
+ {
+ uprv_strcat(resName, packageName);
+ uprv_strcat(resName, "_");
+ }
+
+ uprv_strcat(resName, basename);
+
+ } else {
+ int32_t dirlen = (int32_t)uprv_strlen(outputDir);
+ int32_t basenamelen = (int32_t)uprv_strlen(basename);
+
+ resName = (char*) uprv_malloc(sizeof(char) * (dirlen + pkgLen + basenamelen + 8));
+
+ if (resName == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ goto finish;
+ }
+
+ uprv_strcpy(resName, outputDir);
+
+ if(outputDir[dirlen] != U_FILE_SEP_CHAR) {
+ resName[dirlen] = U_FILE_SEP_CHAR;
+ resName[dirlen + 1] = '\0';
+ }
+
+ if(packageName != nullptr)
+ {
+ uprv_strcat(resName, packageName);
+ uprv_strcat(resName, "_");
+ }
+
+ uprv_strcat(resName, basename);
+ }
+
+finish:
+ uprv_free(basename);
+ uprv_free(dirname);
+
+ return resName;
+}
+
+/*
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/intl/icu/source/tools/genrb/genrb.h b/intl/icu/source/tools/genrb/genrb.h
new file mode 100644
index 0000000000..019020a34a
--- /dev/null
+++ b/intl/icu/source/tools/genrb/genrb.h
@@ -0,0 +1,52 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File genrb.h
+*/
+
+#ifndef GENRB_H
+#define GENRB_H
+
+#include <stdio.h>
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "filestrm.h"
+
+
+#include "ucbuf.h"
+#include "errmsg.h"
+#include "parse.h"
+#include "rbutil.h"
+
+#include "toolutil.h"
+#include "uoptions.h"
+
+#include "unicode/ucol.h"
+#include "unicode/uloc.h"
+
+/* The version of genrb */
+#define GENRB_VERSION "56"
+
+U_CDECL_BEGIN
+
+U_CAPI void processFile(
+ const char *filename,
+ const char* cp,
+ const char *inputDir,
+ const char *outputDir,
+ const char *packageName,
+ UBool omitBinaryCollation,
+ UErrorCode *status);
+
+U_CDECL_END
+
+#endif
diff --git a/intl/icu/source/tools/genrb/genrb.vcxproj b/intl/icu/source/tools/genrb/genrb.vcxproj
new file mode 100644
index 0000000000..66651c11d0
--- /dev/null
+++ b/intl/icu/source/tools/genrb/genrb.vcxproj
@@ -0,0 +1,113 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{97521D06-EC47-45D4-8BD0-9E16B3F93B2A}</ProjectGuid>
+ </PropertyGroup>
+ <PropertyGroup Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseOfMfc>false</UseOfMfc>
+ <CharacterSet>MultiByte</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <!-- The following import will include the 'default' configuration options for VS projects. -->
+ <Import Project="..\..\allinone\Build.Windows.ProjectConfiguration.props" />
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+ <OutDir>.\$(Platform)\$(Configuration)\</OutDir>
+ <IntDir>.\$(Platform)\$(Configuration)\</IntDir>
+ <!-- The ICU projects use "Win32" to mean "x86", so we need to special case it. -->
+ <OutDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)\</OutDir>
+ <IntDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)\</IntDir>
+ <!-- Disable Incremental Linking for Release builds as it prevents Link-time Code Generation -->
+ <LinkIncremental Condition="'$(Configuration)'=='Debug'">true</LinkIncremental>
+ <LinkIncremental Condition="'$(Configuration)'=='Release'">false</LinkIncremental>
+ </PropertyGroup>
+ <!-- Options that are common to *all* configurations -->
+ <ItemDefinitionGroup>
+ <Midl>
+ <TypeLibraryName>$(OutDir)/genrb.tlb</TypeLibraryName>
+ </Midl>
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <CompileAs>Default</CompileAs>
+ <DisableLanguageExtensions>true</DisableLanguageExtensions>
+ <AdditionalIncludeDirectories>..\..\..\include;..\..\common;..\toolutil;..\..\i18n;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PrecompiledHeaderOutputFile>$(OutDir)/genrb.pch</PrecompiledHeaderOutputFile>
+ <AssemblerListingLocation>$(OutDir)/</AssemblerListingLocation>
+ <ObjectFileName>$(OutDir)/</ObjectFileName>
+ <ProgramDataBaseFileName>$(OutDir)/genrb.pdb</ProgramDataBaseFileName>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <OutputFile>$(OutDir)/genrb.exe</OutputFile>
+ <AdditionalLibraryDirectories>..\..\..\$(IcuLibOutputDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+ </Link>
+ <CustomBuildStep>
+ <Command>copy "$(TargetPath)" ..\..\..\$(IcuBinOutputDir)</Command>
+ <Outputs>..\..\..\$(IcuBinOutputDir)\$(TargetFileName);%(Outputs)</Outputs>
+ </CustomBuildStep>
+ </ItemDefinitionGroup>
+ <!-- Options that are common to all 'Debug' project configurations -->
+ <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
+ <ClCompile>
+ <BrowseInformation>true</BrowseInformation>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ </ClCompile>
+ <Link>
+ <AdditionalDependencies>icuucd.lib;icuind.lib;icutud.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <!-- Options that are common to all 'Release' project configurations -->
+ <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
+ <ClCompile>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ </ClCompile>
+ <Link>
+ <AdditionalDependencies>icuuc.lib;icuin.lib;icutu.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="errmsg.c" />
+ <ClCompile Include="filterrb.cpp" />
+ <ClCompile Include="genrb.cpp" />
+ <ClCompile Include="parse.cpp">
+ <DisableLanguageExtensions>false</DisableLanguageExtensions>
+ </ClCompile>
+ <ClCompile Include="prscmnts.cpp">
+ <DisableLanguageExtensions>false</DisableLanguageExtensions>
+ </ClCompile>
+ <ClCompile Include="rbutil.c" />
+ <ClCompile Include="read.c" />
+ <ClCompile Include="reslist.cpp">
+ <DisableLanguageExtensions>false</DisableLanguageExtensions>
+ </ClCompile>
+ <ClCompile Include="rle.c" />
+ <ClCompile Include="ustr.c" />
+ <ClCompile Include="wrtjava.cpp">
+ <DisableLanguageExtensions>false</DisableLanguageExtensions>
+ </ClCompile>
+ <ClCompile Include="wrtxml.cpp">
+ <DisableLanguageExtensions>false</DisableLanguageExtensions>
+ </ClCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="errmsg.h" />
+ <ClInclude Include="genrb.h" />
+ <ClInclude Include="filterrb.h" />
+ <ClInclude Include="parse.h" />
+ <ClInclude Include="prscmnts.h" />
+ <ClInclude Include="rbutil.h" />
+ <ClInclude Include="read.h" />
+ <ClInclude Include="reslist.h" />
+ <ClInclude Include="rle.h" />
+ <ClInclude Include="ustr.h" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/intl/icu/source/tools/genrb/genrb.vcxproj.filters b/intl/icu/source/tools/genrb/genrb.vcxproj.filters
new file mode 100644
index 0000000000..1f2f5b3b8c
--- /dev/null
+++ b/intl/icu/source/tools/genrb/genrb.vcxproj.filters
@@ -0,0 +1,87 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup>
+ <Filter Include="Source Files">
+ <UniqueIdentifier>{2dee2c2f-25a5-43f0-985f-de4ba26925b4}</UniqueIdentifier>
+ <Extensions>cpp;c;cxx;rc;def;r;odl;idl;hpj;bat</Extensions>
+ </Filter>
+ <Filter Include="Header Files">
+ <UniqueIdentifier>{7156c811-7116-4eef-8bb1-0400c51f9fd3}</UniqueIdentifier>
+ <Extensions>h;hpp;hxx;hm;inl</Extensions>
+ </Filter>
+ <Filter Include="Resource Files">
+ <UniqueIdentifier>{df647868-56cc-475d-a3f6-1d1f50aa5e4f}</UniqueIdentifier>
+ <Extensions>ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe</Extensions>
+ </Filter>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="errmsg.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="filterrb.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="genrb.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="parse.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="prscmnts.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="rbutil.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="read.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="reslist.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="rle.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="ustr.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="wrtjava.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ <ClCompile Include="wrtxml.cpp">
+ <Filter>Source Files</Filter>
+ </ClCompile>
+ </ItemGroup>
+ <ItemGroup>
+ <ClInclude Include="errmsg.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="filterrb.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="genrb.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="parse.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="prscmnts.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="rbutil.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="read.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="reslist.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="rle.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ <ClInclude Include="ustr.h">
+ <Filter>Header Files</Filter>
+ </ClInclude>
+ </ItemGroup>
+</Project> \ No newline at end of file
diff --git a/intl/icu/source/tools/genrb/parse.cpp b/intl/icu/source/tools/genrb/parse.cpp
new file mode 100644
index 0000000000..1e82bda6e5
--- /dev/null
+++ b/intl/icu/source/tools/genrb/parse.cpp
@@ -0,0 +1,2435 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1998-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File parse.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 05/26/99 stephen Creation.
+* 02/25/00 weiv Overhaul to write udata
+* 5/10/01 Ram removed ustdio dependency
+* 06/10/2001 Dominic Ludlam <dom@recoil.org> Rewritten
+*******************************************************************************
+*/
+
+// Safer use of UnicodeString.
+#include <cstdint>
+#include "unicode/umachine.h"
+#ifndef UNISTR_FROM_CHAR_EXPLICIT
+# define UNISTR_FROM_CHAR_EXPLICIT explicit
+#endif
+
+// Less important, but still a good idea.
+#ifndef UNISTR_FROM_STRING_EXPLICIT
+# define UNISTR_FROM_STRING_EXPLICIT explicit
+#endif
+
+#include <assert.h>
+#include "parse.h"
+#include "errmsg.h"
+#include "uhash.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uinvchar.h"
+#include "read.h"
+#include "ustr.h"
+#include "reslist.h"
+#include "rbt_pars.h"
+#include "genrb.h"
+#include "unicode/normalizer2.h"
+#include "unicode/stringpiece.h"
+#include "unicode/unistr.h"
+#include "unicode/ustring.h"
+#include "unicode/uscript.h"
+#include "unicode/utf16.h"
+#include "unicode/putil.h"
+#include "charstr.h"
+#include "collationbuilder.h"
+#include "collationdata.h"
+#include "collationdatareader.h"
+#include "collationdatawriter.h"
+#include "collationfastlatinbuilder.h"
+#include "collationinfo.h"
+#include "collationroot.h"
+#include "collationruleparser.h"
+#include "collationtailoring.h"
+#include <stdio.h>
+#include "writesrc.h"
+
+/* Number of tokens to read ahead of the current stream position */
+#define MAX_LOOKAHEAD 3
+
+#define CR 0x000D
+#define LF 0x000A
+#define SPACE 0x0020
+#define TAB 0x0009
+#define ESCAPE 0x005C
+#define HASH 0x0023
+#define QUOTE 0x0027
+#define ZERO 0x0030
+#define STARTCOMMAND 0x005B
+#define ENDCOMMAND 0x005D
+#define OPENSQBRACKET 0x005B
+#define CLOSESQBRACKET 0x005D
+
+#define ICU4X_DIACRITIC_BASE 0x0300
+#define ICU4X_DIACRITIC_LIMIT 0x034F
+
+using icu::CharString;
+using icu::LocalMemory;
+using icu::LocalPointer;
+using icu::LocalUCHARBUFPointer;
+using icu::StringPiece;
+using icu::UnicodeString;
+
+struct Lookahead
+{
+ enum ETokenType type;
+ struct UString value;
+ struct UString comment;
+ uint32_t line;
+};
+
+/* keep in sync with token defines in read.h */
+const char *tokenNames[TOK_TOKEN_COUNT] =
+{
+ "string", /* A string token, such as "MonthNames" */
+ "'{'", /* An opening brace character */
+ "'}'", /* A closing brace character */
+ "','", /* A comma */
+ "':'", /* A colon */
+
+ "<end of file>", /* End of the file has been reached successfully */
+ "<end of line>"
+};
+
+/* Just to store "TRUE" */
+//static const char16_t trueValue[] = {0x0054, 0x0052, 0x0055, 0x0045, 0x0000};
+
+typedef struct {
+ struct Lookahead lookahead[MAX_LOOKAHEAD + 1];
+ uint32_t lookaheadPosition;
+ UCHARBUF *buffer;
+ struct SRBRoot *bundle;
+ const char *inputdir;
+ uint32_t inputdirLength;
+ const char *outputdir;
+ uint32_t outputdirLength;
+ const char *filename;
+ UBool makeBinaryCollation;
+ UBool omitCollationRules;
+ UBool icu4xMode;
+} ParseState;
+
+typedef struct SResource *
+ParseResourceFunction(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status);
+
+static struct SResource *parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status);
+
+/* The nature of the lookahead buffer:
+ There are MAX_LOOKAHEAD + 1 slots, used as a circular buffer. This provides
+ MAX_LOOKAHEAD lookahead tokens and a slot for the current token and value.
+ When getToken is called, the current pointer is moved to the next slot and the
+ old slot is filled with the next token from the reader by calling getNextToken.
+ The token values are stored in the slot, which means that token values don't
+ survive a call to getToken, ie.
+
+ UString *value;
+
+ getToken(&value, nullptr, status);
+ getToken(nullptr, nullptr, status); bad - value is now a different string
+*/
+static void
+initLookahead(ParseState* state, UCHARBUF *buf, UErrorCode *status)
+{
+ static uint32_t initTypeStrings = 0;
+ uint32_t i;
+
+ if (!initTypeStrings)
+ {
+ initTypeStrings = 1;
+ }
+
+ state->lookaheadPosition = 0;
+ state->buffer = buf;
+
+ resetLineNumber();
+
+ for (i = 0; i < MAX_LOOKAHEAD; i++)
+ {
+ state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
+ if (U_FAILURE(*status))
+ {
+ return;
+ }
+ }
+
+ *status = U_ZERO_ERROR;
+}
+
+static void
+cleanupLookahead(ParseState* state)
+{
+ uint32_t i;
+ for (i = 0; i <= MAX_LOOKAHEAD; i++)
+ {
+ ustr_deinit(&state->lookahead[i].value);
+ ustr_deinit(&state->lookahead[i].comment);
+ }
+
+}
+
+static enum ETokenType
+getToken(ParseState* state, struct UString **tokenValue, struct UString* comment, uint32_t *linenumber, UErrorCode *status)
+{
+ enum ETokenType result;
+ uint32_t i;
+
+ result = state->lookahead[state->lookaheadPosition].type;
+
+ if (tokenValue != nullptr)
+ {
+ *tokenValue = &state->lookahead[state->lookaheadPosition].value;
+ }
+
+ if (linenumber != nullptr)
+ {
+ *linenumber = state->lookahead[state->lookaheadPosition].line;
+ }
+
+ if (comment != nullptr)
+ {
+ ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
+ }
+
+ i = (state->lookaheadPosition + MAX_LOOKAHEAD) % (MAX_LOOKAHEAD + 1);
+ state->lookaheadPosition = (state->lookaheadPosition + 1) % (MAX_LOOKAHEAD + 1);
+ ustr_setlen(&state->lookahead[i].comment, 0, status);
+ ustr_setlen(&state->lookahead[i].value, 0, status);
+ state->lookahead[i].type = getNextToken(state->buffer, &state->lookahead[i].value, &state->lookahead[i].line, &state->lookahead[i].comment, status);
+
+ /* printf("getToken, returning %s\n", tokenNames[result]); */
+
+ return result;
+}
+
+static enum ETokenType
+peekToken(ParseState* state, uint32_t lookaheadCount, struct UString **tokenValue, uint32_t *linenumber, struct UString *comment, UErrorCode *status)
+{
+ uint32_t i = (state->lookaheadPosition + lookaheadCount) % (MAX_LOOKAHEAD + 1);
+
+ if (U_FAILURE(*status))
+ {
+ return TOK_ERROR;
+ }
+
+ if (lookaheadCount >= MAX_LOOKAHEAD)
+ {
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return TOK_ERROR;
+ }
+
+ if (tokenValue != nullptr)
+ {
+ *tokenValue = &state->lookahead[i].value;
+ }
+
+ if (linenumber != nullptr)
+ {
+ *linenumber = state->lookahead[i].line;
+ }
+
+ if(comment != nullptr){
+ ustr_cpy(comment, &(state->lookahead[state->lookaheadPosition].comment), status);
+ }
+
+ return state->lookahead[i].type;
+}
+
+static void
+expect(ParseState* state, enum ETokenType expectedToken, struct UString **tokenValue, struct UString *comment, uint32_t *linenumber, UErrorCode *status)
+{
+ uint32_t line;
+
+ enum ETokenType token = getToken(state, tokenValue, comment, &line, status);
+
+ if (linenumber != nullptr)
+ {
+ *linenumber = line;
+ }
+
+ if (U_FAILURE(*status))
+ {
+ return;
+ }
+
+ if (token != expectedToken)
+ {
+ *status = U_INVALID_FORMAT_ERROR;
+ error(line, "expecting %s, got %s", tokenNames[expectedToken], tokenNames[token]);
+ }
+ else
+ {
+ *status = U_ZERO_ERROR;
+ }
+}
+
+static char *getInvariantString(ParseState* state, uint32_t *line, struct UString *comment,
+ int32_t &stringLength, UErrorCode *status)
+{
+ struct UString *tokenValue;
+ char *result;
+
+ expect(state, TOK_STRING, &tokenValue, comment, line, status);
+
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+
+ if(!uprv_isInvariantUString(tokenValue->fChars, tokenValue->fLength)) {
+ *status = U_INVALID_FORMAT_ERROR;
+ error(*line, "invariant characters required for table keys, binary data, etc.");
+ return nullptr;
+ }
+
+ result = static_cast<char *>(uprv_malloc(tokenValue->fLength+1));
+
+ if (result == nullptr)
+ {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+
+ u_UCharsToChars(tokenValue->fChars, result, tokenValue->fLength+1);
+ stringLength = tokenValue->fLength;
+ return result;
+}
+
+static struct SResource *
+parseUCARules(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
+{
+ struct SResource *result = nullptr;
+ struct UString *tokenValue;
+ FileStream *file = nullptr;
+ char filename[256] = { '\0' };
+ char cs[128] = { '\0' };
+ uint32_t line;
+ UBool quoted = false;
+ UCHARBUF *ucbuf=nullptr;
+ UChar32 c = 0;
+ const char* cp = nullptr;
+ char16_t *pTarget = nullptr;
+ char16_t *target = nullptr;
+ char16_t *targetLimit = nullptr;
+ int32_t size = 0;
+
+ expect(state, TOK_STRING, &tokenValue, nullptr, &line, status);
+
+ if(isVerbose()){
+ printf(" %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+ /* make the filename including the directory */
+ if (state->inputdir != nullptr)
+ {
+ uprv_strcat(filename, state->inputdir);
+
+ if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
+ {
+ uprv_strcat(filename, U_FILE_SEP_STRING);
+ }
+ }
+
+ u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
+
+ expect(state, TOK_CLOSE_BRACE, nullptr, nullptr, nullptr, status);
+
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+ uprv_strcat(filename, cs);
+
+ if(state->omitCollationRules) {
+ return res_none();
+ }
+
+ ucbuf = ucbuf_open(filename, &cp, getShowWarning(),false, status);
+
+ if (U_FAILURE(*status)) {
+ error(line, "An error occurred while opening the input file %s\n", filename);
+ return nullptr;
+ }
+
+ /* We allocate more space than actually required
+ * since the actual size needed for storing UChars
+ * is not known in UTF-8 byte stream
+ */
+ size = ucbuf_size(ucbuf) + 1;
+ pTarget = (char16_t*) uprv_malloc(U_SIZEOF_UCHAR * size);
+ uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
+ target = pTarget;
+ targetLimit = pTarget+size;
+
+ /* read the rules into the buffer */
+ while (target < targetLimit)
+ {
+ c = ucbuf_getc(ucbuf, status);
+ if(c == QUOTE) {
+ quoted = (UBool)!quoted;
+ }
+ /* weiv (06/26/2002): adding the following:
+ * - preserving spaces in commands [...]
+ * - # comments until the end of line
+ */
+ if (c == STARTCOMMAND && !quoted)
+ {
+ /* preserve commands
+ * closing bracket will be handled by the
+ * append at the end of the loop
+ */
+ while(c != ENDCOMMAND) {
+ U_APPEND_CHAR32_ONLY(c, target);
+ c = ucbuf_getc(ucbuf, status);
+ }
+ }
+ else if (c == HASH && !quoted) {
+ /* skip comments */
+ while(c != CR && c != LF) {
+ c = ucbuf_getc(ucbuf, status);
+ }
+ continue;
+ }
+ else if (c == ESCAPE)
+ {
+ c = unescape(ucbuf, status);
+
+ if (c == (UChar32)U_ERR)
+ {
+ uprv_free(pTarget);
+ T_FileStream_close(file);
+ return nullptr;
+ }
+ }
+ else if (!quoted && (c == SPACE || c == TAB || c == CR || c == LF))
+ {
+ /* ignore spaces carriage returns
+ * and line feed unless in the form \uXXXX
+ */
+ continue;
+ }
+
+ /* Append char16_t * after dissembling if c > 0xffff*/
+ if (c != (UChar32)U_EOF)
+ {
+ U_APPEND_CHAR32_ONLY(c, target);
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ /* terminate the string */
+ if(target < targetLimit){
+ *target = 0x0000;
+ }
+
+ result = string_open(state->bundle, tag, pTarget, (int32_t)(target - pTarget), nullptr, status);
+
+
+ ucbuf_close(ucbuf);
+ uprv_free(pTarget);
+ T_FileStream_close(file);
+
+ return result;
+}
+
+static struct SResource *
+parseTransliterator(ParseState* state, char *tag, uint32_t startline, const struct UString* /*comment*/, UErrorCode *status)
+{
+ struct SResource *result = nullptr;
+ struct UString *tokenValue;
+ FileStream *file = nullptr;
+ char filename[256] = { '\0' };
+ char cs[128] = { '\0' };
+ uint32_t line;
+ UCHARBUF *ucbuf=nullptr;
+ const char* cp = nullptr;
+ char16_t *pTarget = nullptr;
+ const char16_t *pSource = nullptr;
+ int32_t size = 0;
+
+ expect(state, TOK_STRING, &tokenValue, nullptr, &line, status);
+
+ if(isVerbose()){
+ printf(" %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+ /* make the filename including the directory */
+ if (state->inputdir != nullptr)
+ {
+ uprv_strcat(filename, state->inputdir);
+
+ if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
+ {
+ uprv_strcat(filename, U_FILE_SEP_STRING);
+ }
+ }
+
+ u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
+
+ expect(state, TOK_CLOSE_BRACE, nullptr, nullptr, nullptr, status);
+
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+ uprv_strcat(filename, cs);
+
+
+ ucbuf = ucbuf_open(filename, &cp, getShowWarning(),false, status);
+
+ if (U_FAILURE(*status)) {
+ error(line, "An error occurred while opening the input file %s\n", filename);
+ return nullptr;
+ }
+
+ /* We allocate more space than actually required
+ * since the actual size needed for storing UChars
+ * is not known in UTF-8 byte stream
+ */
+ pSource = ucbuf_getBuffer(ucbuf, &size, status);
+ pTarget = (char16_t*) uprv_malloc(U_SIZEOF_UCHAR * (size + 1));
+ uprv_memset(pTarget, 0, size*U_SIZEOF_UCHAR);
+
+#if !UCONFIG_NO_TRANSLITERATION
+ size = utrans_stripRules(pSource, size, pTarget, status);
+#else
+ size = 0;
+ fprintf(stderr, " Warning: writing empty transliteration data ( UCONFIG_NO_TRANSLITERATION ) \n");
+#endif
+ result = string_open(state->bundle, tag, pTarget, size, nullptr, status);
+
+ ucbuf_close(ucbuf);
+ uprv_free(pTarget);
+ T_FileStream_close(file);
+
+ return result;
+}
+static ArrayResource* dependencyArray = nullptr;
+
+static struct SResource *
+parseDependency(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
+{
+ struct SResource *result = nullptr;
+ struct SResource *elem = nullptr;
+ struct UString *tokenValue;
+ uint32_t line;
+ char filename[256] = { '\0' };
+ char cs[128] = { '\0' };
+
+ expect(state, TOK_STRING, &tokenValue, nullptr, &line, status);
+
+ if(isVerbose()){
+ printf(" %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+ /* make the filename including the directory */
+ if (state->outputdir != nullptr)
+ {
+ uprv_strcat(filename, state->outputdir);
+
+ if (state->outputdir[state->outputdirLength - 1] != U_FILE_SEP_CHAR)
+ {
+ uprv_strcat(filename, U_FILE_SEP_STRING);
+ }
+ }
+
+ u_UCharsToChars(tokenValue->fChars, cs, tokenValue->fLength);
+
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+ uprv_strcat(filename, cs);
+ if(!T_FileStream_file_exists(filename)){
+ if(isStrict()){
+ error(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
+ }else{
+ warning(line, "The dependency file %s does not exist. Please make sure it exists.\n",filename);
+ }
+ }
+ if(dependencyArray==nullptr){
+ dependencyArray = array_open(state->bundle, "%%DEPENDENCY", nullptr, status);
+ }
+ if(tag!=nullptr){
+ result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
+ }
+ elem = string_open(state->bundle, nullptr, tokenValue->fChars, tokenValue->fLength, comment, status);
+
+ dependencyArray->add(elem);
+
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+ expect(state, TOK_CLOSE_BRACE, nullptr, nullptr, nullptr, status);
+ return result;
+}
+static struct SResource *
+parseString(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
+{
+ struct UString *tokenValue;
+ struct SResource *result = nullptr;
+
+/* if (tag != nullptr && uprv_strcmp(tag, "%%UCARULES") == 0)
+ {
+ return parseUCARules(tag, startline, status);
+ }*/
+ if(isVerbose()){
+ printf(" string %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+ expect(state, TOK_STRING, &tokenValue, nullptr, nullptr, status);
+
+ if (U_SUCCESS(*status))
+ {
+ /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
+ doesn't survive expect either) */
+
+ result = string_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
+ if(U_SUCCESS(*status) && result) {
+ expect(state, TOK_CLOSE_BRACE, nullptr, nullptr, nullptr, status);
+
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return nullptr;
+ }
+ }
+ }
+
+ return result;
+}
+
+static struct SResource *
+parseAlias(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
+{
+ struct UString *tokenValue;
+ struct SResource *result = nullptr;
+
+ expect(state, TOK_STRING, &tokenValue, nullptr, nullptr, status);
+
+ if(isVerbose()){
+ printf(" alias %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+
+ if (U_SUCCESS(*status))
+ {
+ /* create the string now - tokenValue doesn't survive a call to getToken (and therefore
+ doesn't survive expect either) */
+
+ result = alias_open(state->bundle, tag, tokenValue->fChars, tokenValue->fLength, comment, status);
+
+ expect(state, TOK_CLOSE_BRACE, nullptr, nullptr, nullptr, status);
+
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return nullptr;
+ }
+ }
+
+ return result;
+}
+
+#if !UCONFIG_NO_COLLATION
+
+namespace {
+
+static struct SResource* resLookup(struct SResource* res, const char* key){
+ if (res == res_none() || !res->isTable()) {
+ return nullptr;
+ }
+
+ TableResource *list = static_cast<TableResource *>(res);
+ SResource *current = list->fFirst;
+ while (current != nullptr) {
+ if (uprv_strcmp(((list->fRoot->fKeys) + (current->fKey)), key) == 0) {
+ return current;
+ }
+ current = current->fNext;
+ }
+ return nullptr;
+}
+
+class GenrbImporter : public icu::CollationRuleParser::Importer {
+public:
+ GenrbImporter(const char *in, const char *out) : inputDir(in), outputDir(out) {}
+ virtual ~GenrbImporter();
+ virtual void getRules(
+ const char *localeID, const char *collationType,
+ UnicodeString &rules,
+ const char *&errorReason, UErrorCode &errorCode) override;
+
+private:
+ const char *inputDir;
+ const char *outputDir;
+};
+
+GenrbImporter::~GenrbImporter() {}
+
+void
+GenrbImporter::getRules(
+ const char *localeID, const char *collationType,
+ UnicodeString &rules,
+ const char *& /*errorReason*/, UErrorCode &errorCode) {
+ CharString filename(localeID, errorCode);
+ for(int32_t i = 0; i < filename.length(); i++){
+ if(filename[i] == '-'){
+ filename.data()[i] = '_';
+ }
+ }
+ filename.append(".txt", errorCode);
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ CharString inputDirBuf;
+ CharString openFileName;
+ if(inputDir == nullptr) {
+ const char *filenameBegin = uprv_strrchr(filename.data(), U_FILE_SEP_CHAR);
+ if (filenameBegin != nullptr) {
+ /*
+ * When a filename ../../../data/root.txt is specified,
+ * we presume that the input directory is ../../../data
+ * This is very important when the resource file includes
+ * another file, like UCARules.txt or thaidict.brk.
+ */
+ StringPiece dir = filename.toStringPiece();
+ const char *filenameLimit = filename.data() + filename.length();
+ dir.remove_suffix((int32_t)(filenameLimit - filenameBegin));
+ inputDirBuf.append(dir, errorCode);
+ inputDir = inputDirBuf.data();
+ }
+ }else{
+ int32_t dirlen = (int32_t)uprv_strlen(inputDir);
+
+ if((filename[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')) {
+ /*
+ * append the input dir to openFileName if the first char in
+ * filename is not file separator char and the last char input directory is not '.'.
+ * This is to support :
+ * genrb -s. /home/icu/data
+ * genrb -s. icu/data
+ * The user cannot mix notations like
+ * genrb -s. /icu/data --- the absolute path specified. -s redundant
+ * user should use
+ * genrb -s. icu/data --- start from CWD and look in icu/data dir
+ */
+ openFileName.append(inputDir, dirlen, errorCode);
+ if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
+ openFileName.append(U_FILE_SEP_CHAR, errorCode);
+ }
+ }
+ }
+ openFileName.append(filename, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ // printf("GenrbImporter::getRules(%s, %s) reads %s\n", localeID, collationType, openFileName.data());
+ const char* cp = "";
+ LocalUCHARBUFPointer ucbuf(
+ ucbuf_open(openFileName.data(), &cp, getShowWarning(), true, &errorCode));
+ if(errorCode == U_FILE_ACCESS_ERROR) {
+ fprintf(stderr, "couldn't open file %s\n", openFileName.data());
+ return;
+ }
+ if (ucbuf.isNull() || U_FAILURE(errorCode)) {
+ fprintf(stderr, "An error occurred processing file %s. Error: %s\n", openFileName.data(), u_errorName(errorCode));
+ return;
+ }
+
+ /* Parse the data into an SRBRoot */
+ LocalPointer<SRBRoot> data(
+ parse(ucbuf.getAlias(), inputDir, outputDir, filename.data(), false, false, false, &errorCode));
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+
+ struct SResource *root = data->fRoot;
+ struct SResource *collations = resLookup(root, "collations");
+ if (collations != nullptr) {
+ struct SResource *collation = resLookup(collations, collationType);
+ if (collation != nullptr) {
+ struct SResource *sequence = resLookup(collation, "Sequence");
+ if (sequence != nullptr && sequence->isString()) {
+ // No string pointer aliasing so that we need not hold onto the resource bundle.
+ StringResource *sr = static_cast<StringResource *>(sequence);
+ rules = sr->fString;
+ }
+ }
+ }
+}
+
+// Quick-and-dirty escaping function.
+// Assumes that we are on an ASCII-based platform.
+static void
+escape(const char16_t *s, char *buffer, size_t n) {
+ int32_t length = u_strlen(s);
+ int32_t i = 0;
+ for (;;) {
+ UChar32 c;
+ U16_NEXT(s, i, length, c);
+ if (c == 0) {
+ *buffer = 0;
+ return;
+ } else if (0x20 <= c && c <= 0x7e) {
+ // printable ASCII
+ *buffer++ = (char)c; // assumes ASCII-based platform
+ } else {
+ buffer += snprintf(buffer, n, "\\u%04X", (int)c);
+ }
+ }
+}
+
+} // namespace
+
+static FILE*
+openTOML(const char* outputdir, const char* name, const char* collationType, const char* structType, UErrorCode *status) {
+ CharString baseName;
+ baseName.append(name, *status);
+ baseName.append("_", *status);
+ baseName.append(collationType, *status);
+ baseName.append("_", *status);
+ baseName.append(structType, *status);
+
+ CharString outFileName;
+ if (outputdir && *outputdir) {
+ outFileName.append(outputdir, *status).ensureEndsWithFileSeparator(*status);
+ }
+ outFileName.append(baseName, *status);
+ outFileName.append(".toml", *status);
+ if (U_FAILURE(*status)) {
+ return nullptr;
+ }
+
+ FILE* f = fopen(outFileName.data(), "w");
+ if (!f) {
+ *status = U_FILE_ACCESS_ERROR;
+ return nullptr;
+ }
+ usrc_writeFileNameGeneratedBy(f, "#", baseName.data(), "genrb -X");
+
+ return f;
+}
+
+static void
+writeCollationMetadataTOML(const char* outputdir, const char* name, const char* collationType, const uint32_t metadataBits, UErrorCode *status) {
+ FILE* f = openTOML(outputdir, name, collationType, "meta", status);
+ if (!f) {
+ return;
+ }
+ // printf("writeCollationMetadataTOML %s %s\n", name, collationType);
+ fprintf(f, "bits = 0x%X\n", metadataBits);
+ fclose(f);
+}
+
+static UChar32
+writeCollationDiacriticsTOML(const char* outputdir, const char* name, const char* collationType, const icu::CollationData* data, UErrorCode *status) {
+ UChar32 limit = ICU4X_DIACRITIC_LIMIT;
+ FILE* f = openTOML(outputdir, name, collationType, "dia", status);
+ if (!f) {
+ return limit;
+ }
+ // printf("writeCollationDiacriticsTOML %s %s\n", name, collationType);
+ uint16_t secondaries[ICU4X_DIACRITIC_LIMIT-ICU4X_DIACRITIC_BASE];
+ for (UChar32 c = ICU4X_DIACRITIC_BASE; c < ICU4X_DIACRITIC_LIMIT; ++c) {
+ uint16_t secondary = 0;
+ uint32_t ce32 = data->getCE32(c);
+ if (ce32 == icu::Collation::FALLBACK_CE32) {
+ ce32 = data->base->getCE32(c);
+ }
+ if (c == 0x0340 || c == 0x0341 || c == 0x0343 || c == 0x0344) {
+ // These never occur in NFD data
+ } else if (!icu::Collation::isSimpleOrLongCE32(ce32)) {
+ if (uprv_strcmp(name, "root") == 0) {
+ printf("UNSUPPORTED DIACRITIC CE32 in root: TAG: %X CE32: %X char: %X\n", icu::Collation::tagFromCE32(ce32), ce32, c);
+ fclose(f);
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return limit;
+ }
+ limit = c;
+ break;
+ } else {
+ uint64_t ce = uint64_t(icu::Collation::ceFromCE32(ce32));
+ if ((ce & 0xFFFFFFFF0000FFFF) != uint64_t(icu::Collation::COMMON_TERTIARY_CE)) {
+ // Not a CE where only the secondary weight differs from the expected
+ // pattern.
+ limit = c;
+ break;
+ }
+ secondary = uint16_t(ce >> 16);
+ }
+ secondaries[c - ICU4X_DIACRITIC_BASE] = secondary;
+
+ }
+ usrc_writeArray(f, "secondaries = [\n ", secondaries, 16, limit-ICU4X_DIACRITIC_BASE, " ", "\n]\n");
+ fclose(f);
+ return limit;
+}
+
+static void
+writeCollationReorderingTOML(const char* outputdir, const char* name, const char* collationType, const icu::CollationSettings* settings, UErrorCode *status) {
+ FILE* f = openTOML(outputdir, name, collationType, "reord", status);
+ if (!f) {
+ return;
+ }
+ // printf("writeCollationReorderingTOML %s %s\n", name, collationType);
+ fprintf(f, "min_high_no_reorder = 0x%X\n", settings->minHighNoReorder);
+ usrc_writeArray(f, "reorder_table = [\n ", settings->reorderTable, 8, 256, " ", "\n]\n");
+ usrc_writeArray(f, "reorder_ranges = [\n ", settings->reorderRanges, 32, settings->reorderRangesLength, " ", "\n]\n");
+ fclose(f);
+}
+
+
+static void
+writeCollationJamoTOML(const char* outputdir, const char* name, const char* collationType, const icu::CollationData* data, UErrorCode *status) {
+ FILE* f = openTOML(outputdir, name, collationType, "jamo", status);
+ if (!f) {
+ printf("writeCollationJamoTOML FAILED TO OPEN FILE %s %s\n", name, collationType);
+ return;
+ }
+ uint32_t jamo[0x1200-0x1100];
+ for (UChar32 c = 0x1100; c < 0x1200; ++c) {
+ uint32_t ce32 = data->getCE32(c);
+ if (ce32 == icu::Collation::FALLBACK_CE32) {
+ ce32 = data->base->getCE32(c);
+ }
+ // Can't reject complex CE32s, because search collations have expansions.
+ // These expansions refer to the tailoring, which foils the reuse of the
+ // these jamo tables.
+ // XXX Figure out what to do. Perhaps instead of having Latin mini expansions,
+ // there should be Hangul mini expansions.
+ // XXX in any case, validate that modern jamo are self-contained.
+ jamo[c - 0x1100] = ce32;
+
+ }
+ usrc_writeArray(f, "ce32s = [\n ", jamo, 32, 0x1200-0x1100, " ", "\n]\n");
+ fclose(f);
+}
+
+static UBool
+convertTrie(const void *context, UChar32 start, UChar32 end, uint32_t value) {
+ if (start >= 0x1100 && start < 0x1200 && end >= 0x1100 && end < 0x1200) {
+ // Range entirely in conjoining jamo block.
+ return true;
+ }
+ icu::IcuToolErrorCode status("genrb: convertTrie");
+ umutablecptrie_setRange((UMutableCPTrie*)context, start, end, value, status);
+ return !U_FAILURE(*status);
+}
+
+static void
+writeCollationDataTOML(const char* outputdir, const char* name, const char* collationType, const icu::CollationData* data, UBool root, UChar32 diacriticLimit, UErrorCode *status) {
+ FILE* f = openTOML(outputdir, name, collationType, "data", status);
+ if (!f) {
+ return;
+ }
+ // printf("writeCollationDataTOML %s %s\n", name, collationType);
+
+ icu::UnicodeSet tailoringSet;
+
+ if (data->base) {
+ tailoringSet.addAll(*(data->unsafeBackwardSet));
+ tailoringSet.removeAll(*(data->base->unsafeBackwardSet));
+ } else {
+ tailoringSet.addAll(*(data->unsafeBackwardSet));
+ }
+
+ // Use the same value for out-of-range and default in the hope of not having to allocate
+ // different blocks, since ICU4X never does out-of-range queries.
+ uint32_t trieDefault = root ? icu::Collation::UNASSIGNED_CE32 : icu::Collation::FALLBACK_CE32;
+ icu::LocalUMutableCPTriePointer builder(umutablecptrie_open(trieDefault, trieDefault, status));
+
+ utrie2_enum(data->trie, nullptr, &convertTrie, builder.getAlias());
+
+ // If the diacritic table was cut short, copy CE32s between the lowered
+ // limit and the max limit from the root to the tailoring. As of June 2022,
+ // no collation in CLDR needs this.
+ for (UChar32 c = diacriticLimit; c < ICU4X_DIACRITIC_LIMIT; ++c) {
+ if (c == 0x0340 || c == 0x0341 || c == 0x0343 || c == 0x0344) {
+ // These never occur in NFD data.
+ continue;
+ }
+ uint32_t ce32 = data->getCE32(c);
+ if (ce32 == icu::Collation::FALLBACK_CE32) {
+ ce32 = data->base->getCE32(c);
+ umutablecptrie_set(builder.getAlias(), c, ce32, status);
+ }
+ }
+
+ // Ensure that the range covered by the diacritic table isn't duplicated
+ // in the trie.
+ for (UChar32 c = ICU4X_DIACRITIC_BASE; c < diacriticLimit; ++c) {
+ if (umutablecptrie_get(builder.getAlias(), c) != trieDefault) {
+ umutablecptrie_set(builder.getAlias(), c, trieDefault, status);
+ }
+ }
+
+ icu::LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
+ builder.getAlias(),
+ UCPTRIE_TYPE_SMALL,
+ UCPTRIE_VALUE_BITS_32,
+ status));
+ usrc_writeArray(f, "contexts = [\n ", data->contexts, 16, data->contextsLength, " ", "\n]\n");
+ usrc_writeArray(f, "ce32s = [\n ", data->ce32s, 32, data->ce32sLength, " ", "\n]\n");
+ usrc_writeArray(f, "ces = [\n ", data->ces, 64, data->cesLength, " ", "\n]\n");
+ fprintf(f, "[trie]\n");
+ usrc_writeUCPTrie(f, "trie", utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
+
+ fclose(f);
+}
+
+static void
+writeCollationSpecialPrimariesTOML(const char* outputdir, const char* name, const char* collationType, const icu::CollationData* data, UErrorCode *status) {
+ FILE* f = openTOML(outputdir, name, collationType, "prim", status);
+ if (!f) {
+ return;
+ }
+ // printf("writeCollationSpecialPrimariesTOML %s %s\n", name, collationType);
+
+ uint16_t lastPrimaries[4];
+ for (int32_t i = 0; i < 4; ++i) {
+ // getLastPrimaryForGroup subtracts one from a 16-bit value, so we add one
+ // back to get a value that fits in 16 bits.
+ lastPrimaries[i] = (uint16_t)((data->getLastPrimaryForGroup(UCOL_REORDER_CODE_FIRST + i) + 1) >> 16);
+ }
+
+ uint32_t numericPrimary = data->numericPrimary;
+ if (numericPrimary & 0xFFFFFF) {
+ printf("Lower 24 bits set in numeric primary");
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+
+ usrc_writeArray(f, "last_primaries = [\n ", lastPrimaries, 16, 4, " ", "\n]\n");
+ fprintf(f, "numeric_primary = 0x%X\n", numericPrimary >> 24);
+ fclose(f);
+}
+
+static void
+writeCollationTOML(const char* outputdir, const char* name, const char* collationType, const icu::CollationData* data, const icu::CollationSettings* settings, UErrorCode *status) {
+ UBool tailored = false;
+ UBool tailoredDiacritics = false;
+ UBool lithuanianDotAbove = (uprv_strcmp(name, "lt") == 0);
+ UBool reordering = false;
+ UBool isRoot = uprv_strcmp(name, "root") == 0;
+ UChar32 diacriticLimit = ICU4X_DIACRITIC_LIMIT;
+ if (!data->base && isRoot) {
+ diacriticLimit = writeCollationDiacriticsTOML(outputdir, name, collationType, data, status);
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ writeCollationJamoTOML(outputdir, name, collationType, data, status);
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ writeCollationSpecialPrimariesTOML(outputdir, name, collationType, data, status);
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ } else if (data->base && !lithuanianDotAbove) {
+ for (UChar32 c = ICU4X_DIACRITIC_BASE; c < ICU4X_DIACRITIC_LIMIT; ++c) {
+ if (c == 0x0340 || c == 0x0341 || c == 0x0343 || c == 0x0344) {
+ // These never occur in NFD data.
+ continue;
+ }
+ uint32_t ce32 = data->getCE32(c);
+ if ((ce32 != icu::Collation::FALLBACK_CE32) && (ce32 != data->base->getCE32(c))) {
+ tailoredDiacritics = true;
+ diacriticLimit = writeCollationDiacriticsTOML(outputdir, name, collationType, data, status);
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ break;
+ }
+ }
+ }
+
+ if (settings->hasReordering()) {
+ reordering = true;
+ // Note: There are duplicate reorderings. Expecting the ICU4X provider
+ // to take care of deduplication.
+ writeCollationReorderingTOML(outputdir, name, collationType, settings, status);
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ }
+
+ // Write collation data if either base is non-null or the name is root.
+ // Languages that only reorder scripts are otherwise root-like and have
+ // null base.
+ if (data->base || isRoot) {
+ tailored = !isRoot;
+ writeCollationDataTOML(outputdir, name, collationType, data, (!data->base && isRoot), diacriticLimit, status);
+ if (U_FAILURE(*status)) {
+ return;
+ }
+ }
+
+ uint32_t maxVariable = (uint32_t)settings->getMaxVariable();
+ if (maxVariable >= 4) {
+ printf("Max variable out of range");
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+
+ uint32_t metadataBits = maxVariable;
+ if (tailored) {
+ metadataBits |= (1 << 3);
+ }
+ if (tailoredDiacritics) {
+ metadataBits |= (1 << 4);
+ }
+ if (reordering) {
+ metadataBits |= (1 << 5);
+ }
+ if (lithuanianDotAbove) {
+ metadataBits |= (1 << 6);
+ }
+ if ((settings->options & icu::CollationSettings::BACKWARD_SECONDARY) != 0) {
+ metadataBits |= (1 << 7);
+ }
+ if (settings->getAlternateHandling() == UCOL_SHIFTED) {
+ metadataBits |= (1 << 8);
+ }
+ switch (settings->getCaseFirst()) {
+ case UCOL_OFF:
+ break;
+ case UCOL_UPPER_FIRST:
+ metadataBits |= (1 << 9);
+ metadataBits |= (1 << 10);
+ break;
+ case UCOL_LOWER_FIRST:
+ metadataBits |= (1 << 9);
+ break;
+ default:
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return;
+ }
+
+ writeCollationMetadataTOML(outputdir, name, collationType, metadataBits, status);
+}
+
+#endif // !UCONFIG_NO_COLLATION
+
+static TableResource *
+addCollation(ParseState* state, TableResource *result, const char *collationType,
+ uint32_t startline, UErrorCode *status)
+{
+ // TODO: Use LocalPointer for result, or make caller close it when there is a failure.
+ struct SResource *member = nullptr;
+ struct UString *tokenValue;
+ struct UString comment;
+ enum ETokenType token;
+ char subtag[1024];
+ UnicodeString rules;
+ UBool haveRules = false;
+ UVersionInfo version;
+ uint32_t line;
+
+ /* '{' . (name resource)* '}' */
+ version[0]=0; version[1]=0; version[2]=0; version[3]=0;
+
+ for (;;)
+ {
+ ustr_init(&comment);
+ token = getToken(state, &tokenValue, &comment, &line, status);
+
+ if (token == TOK_CLOSE_BRACE)
+ {
+ break;
+ }
+
+ if (token != TOK_STRING)
+ {
+ res_close(result);
+ *status = U_INVALID_FORMAT_ERROR;
+
+ if (token == TOK_EOF)
+ {
+ error(startline, "unterminated table");
+ }
+ else
+ {
+ error(line, "Unexpected token %s", tokenNames[token]);
+ }
+
+ return nullptr;
+ }
+
+ u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
+
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return nullptr;
+ }
+
+ member = parseResource(state, subtag, nullptr, status);
+
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return nullptr;
+ }
+ if (result == nullptr)
+ {
+ // Ignore the parsed resources, continue parsing.
+ }
+ else if (uprv_strcmp(subtag, "Version") == 0 && member->isString())
+ {
+ StringResource *sr = static_cast<StringResource *>(member);
+ char ver[40];
+ int32_t length = sr->length();
+
+ if (length >= UPRV_LENGTHOF(ver))
+ {
+ length = UPRV_LENGTHOF(ver) - 1;
+ }
+
+ sr->fString.extract(0, length, ver, UPRV_LENGTHOF(ver), US_INV);
+ u_versionFromString(version, ver);
+
+ result->add(member, line, *status);
+ member = nullptr;
+ }
+ else if(uprv_strcmp(subtag, "%%CollationBin")==0)
+ {
+ /* discard duplicate %%CollationBin if any*/
+ }
+ else if (uprv_strcmp(subtag, "Sequence") == 0 && member->isString())
+ {
+ StringResource *sr = static_cast<StringResource *>(member);
+ rules = sr->fString;
+ haveRules = true;
+ // Defer building the collator until we have seen
+ // all sub-elements of the collation table, including the Version.
+ /* in order to achieve smaller data files, we can direct genrb */
+ /* to omit collation rules */
+ if(!state->omitCollationRules) {
+ result->add(member, line, *status);
+ member = nullptr;
+ }
+ }
+ else // Just copy non-special items.
+ {
+ result->add(member, line, *status);
+ member = nullptr;
+ }
+ res_close(member); // TODO: use LocalPointer
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return nullptr;
+ }
+ }
+
+ if (!haveRules) { return result; }
+
+#if UCONFIG_NO_COLLATION || UCONFIG_NO_FILE_IO
+ warning(line, "Not building collation elements because of UCONFIG_NO_COLLATION and/or UCONFIG_NO_FILE_IO, see uconfig.h");
+ (void)collationType;
+#else
+ // CLDR ticket #3949, ICU ticket #8082:
+ // Do not build collation binary data for for-import-only "private" collation rule strings.
+ if (uprv_strncmp(collationType, "private-", 8) == 0) {
+ if(isVerbose()) {
+ printf("Not building %s~%s collation binary\n", state->filename, collationType);
+ }
+ return result;
+ }
+
+ if(!state->makeBinaryCollation) {
+ if(isVerbose()) {
+ printf("Not building %s~%s collation binary\n", state->filename, collationType);
+ }
+ return result;
+ }
+ UErrorCode intStatus = U_ZERO_ERROR;
+ UParseError parseError;
+ uprv_memset(&parseError, 0, sizeof(parseError));
+ GenrbImporter importer(state->inputdir, state->outputdir);
+ const icu::CollationTailoring *base = icu::CollationRoot::getRoot(intStatus);
+ if(U_FAILURE(intStatus)) {
+ error(line, "failed to load root collator (ucadata.icu) - %s", u_errorName(intStatus));
+ res_close(result);
+ return nullptr; // TODO: use LocalUResourceBundlePointer for result
+ }
+ icu::CollationBuilder builder(base, state->icu4xMode, intStatus);
+ if(state->icu4xMode || (uprv_strncmp(collationType, "search", 6) == 0)) {
+ builder.disableFastLatin(); // build fast-Latin table unless search collator or ICU4X
+ }
+ LocalPointer<icu::CollationTailoring> t(
+ builder.parseAndBuild(rules, version, &importer, &parseError, intStatus));
+ if(U_FAILURE(intStatus)) {
+ const char *reason = builder.getErrorReason();
+ if(reason == nullptr) { reason = ""; }
+ error(line, "CollationBuilder failed at %s~%s/Sequence rule offset %ld: %s %s",
+ state->filename, collationType,
+ (long)parseError.offset, u_errorName(intStatus), reason);
+ if(parseError.preContext[0] != 0 || parseError.postContext[0] != 0) {
+ // Print pre- and post-context.
+ char preBuffer[100], postBuffer[100];
+ escape(parseError.preContext, preBuffer, sizeof(preBuffer));
+ escape(parseError.postContext, postBuffer, sizeof(postBuffer));
+ error(line, " error context: \"...%s\" ! \"%s...\"", preBuffer, postBuffer);
+ }
+ if(isStrict() || t.isNull()) {
+ *status = intStatus;
+ res_close(result);
+ return nullptr;
+ }
+ }
+ if (state->icu4xMode) {
+ char *nameWithoutSuffix = static_cast<char *>(uprv_malloc(uprv_strlen(state->filename) + 1));
+ if (nameWithoutSuffix == nullptr) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ res_close(result);
+ return nullptr;
+ }
+ uprv_strcpy(nameWithoutSuffix, state->filename);
+ *uprv_strrchr(nameWithoutSuffix, '.') = 0;
+
+ writeCollationTOML(state->outputdir, nameWithoutSuffix, collationType, t->data, t->settings, status);
+ uprv_free(nameWithoutSuffix);
+ }
+ icu::LocalMemory<uint8_t> buffer;
+ int32_t capacity = 100000;
+ uint8_t *dest = buffer.allocateInsteadAndCopy(capacity);
+ if(dest == nullptr) {
+ fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
+ (long)capacity);
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ res_close(result);
+ return nullptr;
+ }
+ int32_t indexes[icu::CollationDataReader::IX_TOTAL_SIZE + 1];
+ int32_t totalSize = icu::CollationDataWriter::writeTailoring(
+ *t, *t->settings, indexes, dest, capacity, intStatus);
+ if(intStatus == U_BUFFER_OVERFLOW_ERROR) {
+ intStatus = U_ZERO_ERROR;
+ capacity = totalSize;
+ dest = buffer.allocateInsteadAndCopy(capacity);
+ if(dest == nullptr) {
+ fprintf(stderr, "memory allocation (%ld bytes) for file contents failed\n",
+ (long)capacity);
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ res_close(result);
+ return nullptr;
+ }
+ totalSize = icu::CollationDataWriter::writeTailoring(
+ *t, *t->settings, indexes, dest, capacity, intStatus);
+ }
+ if(U_FAILURE(intStatus)) {
+ fprintf(stderr, "CollationDataWriter::writeTailoring() failed: %s\n",
+ u_errorName(intStatus));
+ res_close(result);
+ return nullptr;
+ }
+ if(isVerbose()) {
+ printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
+ icu::CollationInfo::printSizes(totalSize, indexes);
+ if(t->settings->hasReordering()) {
+ printf("%s~%s collation reordering ranges:\n", state->filename, collationType);
+ icu::CollationInfo::printReorderRanges(
+ *t->data, t->settings->reorderCodes, t->settings->reorderCodesLength);
+ }
+#if 0 // debugging output
+ } else {
+ printf("%s~%s collation tailoring part sizes:\n", state->filename, collationType);
+ icu::CollationInfo::printSizes(totalSize, indexes);
+#endif
+ }
+ struct SResource *collationBin = bin_open(state->bundle, "%%CollationBin", totalSize, dest, nullptr, nullptr, status);
+ result->add(collationBin, line, *status);
+ if (U_FAILURE(*status)) {
+ res_close(result);
+ return nullptr;
+ }
+#endif
+ return result;
+}
+
+static UBool
+keepCollationType(const char * /*type*/) {
+ return true;
+}
+
+static struct SResource *
+parseCollationElements(ParseState* state, char *tag, uint32_t startline, UBool newCollation, UErrorCode *status)
+{
+ TableResource *result = nullptr;
+ struct SResource *member = nullptr;
+ struct UString *tokenValue;
+ struct UString comment;
+ enum ETokenType token;
+ char subtag[1024], typeKeyword[1024];
+ uint32_t line;
+
+ result = table_open(state->bundle, tag, nullptr, status);
+
+ if (result == nullptr || U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+ if(isVerbose()){
+ printf(" collation elements %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+ if(!newCollation) {
+ return addCollation(state, result, "(no type)", startline, status);
+ }
+ else {
+ for(;;) {
+ ustr_init(&comment);
+ token = getToken(state, &tokenValue, &comment, &line, status);
+
+ if (token == TOK_CLOSE_BRACE)
+ {
+ return result;
+ }
+
+ if (token != TOK_STRING)
+ {
+ res_close(result);
+ *status = U_INVALID_FORMAT_ERROR;
+
+ if (token == TOK_EOF)
+ {
+ error(startline, "unterminated table");
+ }
+ else
+ {
+ error(line, "Unexpected token %s", tokenNames[token]);
+ }
+
+ return nullptr;
+ }
+
+ u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
+
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return nullptr;
+ }
+
+ if (uprv_strcmp(subtag, "default") == 0)
+ {
+ member = parseResource(state, subtag, nullptr, status);
+
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return nullptr;
+ }
+
+ result->add(member, line, *status);
+ }
+ else
+ {
+ token = peekToken(state, 0, &tokenValue, &line, &comment, status);
+ /* this probably needs to be refactored or recursively use the parser */
+ /* first we assume that our collation table won't have the explicit type */
+ /* then, we cannot handle aliases */
+ if(token == TOK_OPEN_BRACE) {
+ token = getToken(state, &tokenValue, &comment, &line, status);
+ TableResource *collationRes;
+ if (keepCollationType(subtag)) {
+ collationRes = table_open(state->bundle, subtag, nullptr, status);
+ } else {
+ collationRes = nullptr;
+ }
+ // need to parse the collation data regardless
+ collationRes = addCollation(state, collationRes, subtag, startline, status);
+ if (collationRes != nullptr) {
+ result->add(collationRes, startline, *status);
+ }
+ } else if(token == TOK_COLON) { /* right now, we'll just try to see if we have aliases */
+ /* we could have a table too */
+ token = peekToken(state, 1, &tokenValue, &line, &comment, status);
+ u_UCharsToChars(tokenValue->fChars, typeKeyword, u_strlen(tokenValue->fChars) + 1);
+ if(uprv_strcmp(typeKeyword, "alias") == 0) {
+ member = parseResource(state, subtag, nullptr, status);
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return nullptr;
+ }
+
+ result->add(member, line, *status);
+ } else {
+ res_close(result);
+ *status = U_INVALID_FORMAT_ERROR;
+ return nullptr;
+ }
+ } else {
+ res_close(result);
+ *status = U_INVALID_FORMAT_ERROR;
+ return nullptr;
+ }
+ }
+
+ /*member = string_open(bundle, subtag, tokenValue->fChars, tokenValue->fLength, status);*/
+
+ /*expect(TOK_CLOSE_BRACE, nullptr, nullptr, status);*/
+
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return nullptr;
+ }
+ }
+ }
+}
+
+/* Necessary, because CollationElements requires the bundle->fRoot member to be present which,
+ if this weren't special-cased, wouldn't be set until the entire file had been processed. */
+static struct SResource *
+realParseTable(ParseState* state, TableResource *table, char *tag, uint32_t startline, UErrorCode *status)
+{
+ struct SResource *member = nullptr;
+ struct UString *tokenValue=nullptr;
+ struct UString comment;
+ enum ETokenType token;
+ char subtag[1024];
+ uint32_t line;
+ UBool readToken = false;
+
+ /* '{' . (name resource)* '}' */
+
+ if(isVerbose()){
+ printf(" parsing table %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+ for (;;)
+ {
+ ustr_init(&comment);
+ token = getToken(state, &tokenValue, &comment, &line, status);
+
+ if (token == TOK_CLOSE_BRACE)
+ {
+ if (!readToken && isVerbose()) {
+ warning(startline, "Encountered empty table");
+ }
+ return table;
+ }
+
+ if (token != TOK_STRING)
+ {
+ *status = U_INVALID_FORMAT_ERROR;
+
+ if (token == TOK_EOF)
+ {
+ error(startline, "unterminated table");
+ }
+ else
+ {
+ error(line, "unexpected token %s", tokenNames[token]);
+ }
+
+ return nullptr;
+ }
+
+ if(uprv_isInvariantUString(tokenValue->fChars, -1)) {
+ u_UCharsToChars(tokenValue->fChars, subtag, u_strlen(tokenValue->fChars) + 1);
+ } else {
+ *status = U_INVALID_FORMAT_ERROR;
+ error(line, "invariant characters required for table keys");
+ return nullptr;
+ }
+
+ if (U_FAILURE(*status))
+ {
+ error(line, "parse error. Stopped parsing tokens with %s", u_errorName(*status));
+ return nullptr;
+ }
+
+ member = parseResource(state, subtag, &comment, status);
+
+ if (member == nullptr || U_FAILURE(*status))
+ {
+ error(line, "parse error. Stopped parsing resource with %s", u_errorName(*status));
+ return nullptr;
+ }
+
+ table->add(member, line, *status);
+
+ if (U_FAILURE(*status))
+ {
+ error(line, "parse error. Stopped parsing table with %s", u_errorName(*status));
+ return nullptr;
+ }
+ readToken = true;
+ ustr_deinit(&comment);
+ }
+
+ /* not reached */
+ /* A compiler warning will appear if all paths don't contain a return statement. */
+/* *status = U_INTERNAL_PROGRAM_ERROR;
+ return nullptr;*/
+}
+
+static struct SResource *
+parseTable(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
+{
+ if (tag != nullptr && uprv_strcmp(tag, "CollationElements") == 0)
+ {
+ return parseCollationElements(state, tag, startline, false, status);
+ }
+ if (tag != nullptr && uprv_strcmp(tag, "collations") == 0)
+ {
+ return parseCollationElements(state, tag, startline, true, status);
+ }
+ if(isVerbose()){
+ printf(" table %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+
+ TableResource *result = table_open(state->bundle, tag, comment, status);
+
+ if (result == nullptr || U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+ return realParseTable(state, result, tag, startline, status);
+}
+
+static struct SResource *
+parseArray(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
+{
+ struct SResource *member = nullptr;
+ struct UString *tokenValue;
+ struct UString memberComments;
+ enum ETokenType token;
+ UBool readToken = false;
+
+ ArrayResource *result = array_open(state->bundle, tag, comment, status);
+
+ if (result == nullptr || U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+ if(isVerbose()){
+ printf(" array %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+
+ ustr_init(&memberComments);
+
+ /* '{' . resource [','] '}' */
+ for (;;)
+ {
+ /* reset length */
+ ustr_setlen(&memberComments, 0, status);
+
+ /* check for end of array, but don't consume next token unless it really is the end */
+ token = peekToken(state, 0, &tokenValue, nullptr, &memberComments, status);
+
+
+ if (token == TOK_CLOSE_BRACE)
+ {
+ getToken(state, nullptr, nullptr, nullptr, status);
+ if (!readToken) {
+ warning(startline, "Encountered empty array");
+ }
+ break;
+ }
+
+ if (token == TOK_EOF)
+ {
+ res_close(result);
+ *status = U_INVALID_FORMAT_ERROR;
+ error(startline, "unterminated array");
+ return nullptr;
+ }
+
+ /* string arrays are a special case */
+ if (token == TOK_STRING)
+ {
+ getToken(state, &tokenValue, &memberComments, nullptr, status);
+ member = string_open(state->bundle, nullptr, tokenValue->fChars, tokenValue->fLength, &memberComments, status);
+ }
+ else
+ {
+ member = parseResource(state, nullptr, &memberComments, status);
+ }
+
+ if (member == nullptr || U_FAILURE(*status))
+ {
+ res_close(result);
+ return nullptr;
+ }
+
+ result->add(member);
+
+ /* eat optional comma if present */
+ token = peekToken(state, 0, nullptr, nullptr, nullptr, status);
+
+ if (token == TOK_COMMA)
+ {
+ getToken(state, nullptr, nullptr, nullptr, status);
+ }
+
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return nullptr;
+ }
+ readToken = true;
+ }
+
+ ustr_deinit(&memberComments);
+ return result;
+}
+
+static struct SResource *
+parseIntVector(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
+{
+ enum ETokenType token;
+ char *string;
+ int32_t value;
+ UBool readToken = false;
+ char *stopstring;
+ struct UString memberComments;
+
+ IntVectorResource *result = intvector_open(state->bundle, tag, comment, status);
+
+ if (result == nullptr || U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+
+ if(isVerbose()){
+ printf(" vector %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+ ustr_init(&memberComments);
+ /* '{' . string [','] '}' */
+ for (;;)
+ {
+ ustr_setlen(&memberComments, 0, status);
+
+ /* check for end of array, but don't consume next token unless it really is the end */
+ token = peekToken(state, 0, nullptr, nullptr,&memberComments, status);
+
+ if (token == TOK_CLOSE_BRACE)
+ {
+ /* it's the end, consume the close brace */
+ getToken(state, nullptr, nullptr, nullptr, status);
+ if (!readToken) {
+ warning(startline, "Encountered empty int vector");
+ }
+ ustr_deinit(&memberComments);
+ return result;
+ }
+
+ int32_t stringLength;
+ string = getInvariantString(state, nullptr, nullptr, stringLength, status);
+
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return nullptr;
+ }
+
+ /* For handling illegal char in the Intvector */
+ value = uprv_strtoul(string, &stopstring, 0);/* make intvector support decimal,hexdigit,octal digit ranging from -2^31-2^32-1*/
+ int32_t len = (int32_t)(stopstring-string);
+
+ if(len==stringLength)
+ {
+ result->add(value, *status);
+ uprv_free(string);
+ token = peekToken(state, 0, nullptr, nullptr, nullptr, status);
+ }
+ else
+ {
+ uprv_free(string);
+ *status=U_INVALID_CHAR_FOUND;
+ }
+
+ if (U_FAILURE(*status))
+ {
+ res_close(result);
+ return nullptr;
+ }
+
+ /* the comma is optional (even though it is required to prevent the reader from concatenating
+ consecutive entries) so that a missing comma on the last entry isn't an error */
+ if (token == TOK_COMMA)
+ {
+ getToken(state, nullptr, nullptr, nullptr, status);
+ }
+ readToken = true;
+ }
+
+ /* not reached */
+ /* A compiler warning will appear if all paths don't contain a return statement. */
+/* intvector_close(result, status);
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return nullptr;*/
+}
+
+static struct SResource *
+parseBinary(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
+{
+ uint32_t line;
+ int32_t stringLength;
+ LocalMemory<char> string(getInvariantString(state, &line, nullptr, stringLength, status));
+ if (string.isNull() || U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+
+ expect(state, TOK_CLOSE_BRACE, nullptr, nullptr, nullptr, status);
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+
+ if(isVerbose()){
+ printf(" binary %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+
+ LocalMemory<uint8_t> value;
+ int32_t count = 0;
+ if (stringLength > 0 && value.allocateInsteadAndCopy(stringLength) == nullptr)
+ {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+
+ char toConv[3] = {'\0', '\0', '\0'};
+ for (int32_t i = 0; i < stringLength;)
+ {
+ // Skip spaces (which may have been line endings).
+ char c0 = string[i++];
+ if (c0 == ' ') { continue; }
+ if (i == stringLength) {
+ *status=U_INVALID_CHAR_FOUND;
+ error(line, "Encountered invalid binary value (odd number of hex digits)");
+ return nullptr;
+ }
+ toConv[0] = c0;
+ toConv[1] = string[i++];
+
+ char *stopstring;
+ value[count++] = (uint8_t) uprv_strtoul(toConv, &stopstring, 16);
+ uint32_t len=(uint32_t)(stopstring-toConv);
+
+ if(len!=2)
+ {
+ *status=U_INVALID_CHAR_FOUND;
+ error(line, "Encountered invalid binary value (not all pairs of hex digits)");
+ return nullptr;
+ }
+ }
+
+ if (count == 0) {
+ warning(startline, "Encountered empty binary value");
+ return bin_open(state->bundle, tag, 0, nullptr, "", comment, status);
+ } else {
+ return bin_open(state->bundle, tag, count, value.getAlias(), nullptr, comment, status);
+ }
+}
+
+static struct SResource *
+parseInteger(ParseState* state, char *tag, uint32_t startline, const struct UString *comment, UErrorCode *status)
+{
+ struct SResource *result = nullptr;
+ int32_t value;
+ char *string;
+ char *stopstring;
+
+ int32_t stringLength;
+ string = getInvariantString(state, nullptr, nullptr, stringLength, status);
+
+ if (string == nullptr || U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+
+ expect(state, TOK_CLOSE_BRACE, nullptr, nullptr, nullptr, status);
+
+ if (U_FAILURE(*status))
+ {
+ uprv_free(string);
+ return nullptr;
+ }
+
+ if(isVerbose()){
+ printf(" integer %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+
+ if (stringLength == 0)
+ {
+ warning(startline, "Encountered empty integer. Default value is 0.");
+ }
+
+ /* Allow integer support for hexdecimal, octal digit and decimal*/
+ /* and handle illegal char in the integer*/
+ value = uprv_strtoul(string, &stopstring, 0);
+ int32_t len = (int32_t)(stopstring-string);
+ if(len==stringLength)
+ {
+ result = int_open(state->bundle, tag, value, comment, status);
+ }
+ else
+ {
+ *status=U_INVALID_CHAR_FOUND;
+ }
+ uprv_free(string);
+
+ return result;
+}
+
+static struct SResource *
+parseImport(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
+{
+ uint32_t line;
+ int32_t stringLength;
+ LocalMemory<char> filename(getInvariantString(state, &line, nullptr, stringLength, status));
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+
+ expect(state, TOK_CLOSE_BRACE, nullptr, nullptr, nullptr, status);
+
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+
+ if(isVerbose()){
+ printf(" import %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+
+ /* Open the input file for reading */
+ CharString fullname;
+ if (state->inputdir != nullptr) {
+ fullname.append(state->inputdir, *status);
+ }
+ fullname.appendPathPart(filename.getAlias(), *status);
+ if (U_FAILURE(*status)) {
+ return nullptr;
+ }
+
+ FileStream *file = T_FileStream_open(fullname.data(), "rb");
+ if (file == nullptr)
+ {
+ error(line, "couldn't open input file %s", filename.getAlias());
+ *status = U_FILE_ACCESS_ERROR;
+ return nullptr;
+ }
+
+ int32_t len = T_FileStream_size(file);
+ LocalMemory<uint8_t> data;
+ if(data.allocateInsteadAndCopy(len) == nullptr)
+ {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ T_FileStream_close (file);
+ return nullptr;
+ }
+
+ /* int32_t numRead = */ T_FileStream_read(file, data.getAlias(), len);
+ T_FileStream_close (file);
+
+ return bin_open(state->bundle, tag, len, data.getAlias(), fullname.data(), comment, status);
+}
+
+static struct SResource *
+parseInclude(ParseState* state, char *tag, uint32_t startline, const struct UString* comment, UErrorCode *status)
+{
+ struct SResource *result;
+ int32_t len=0;
+ char *filename;
+ uint32_t line;
+ char16_t *pTarget = nullptr;
+
+ UCHARBUF *ucbuf;
+ char *fullname = nullptr;
+ const char* cp = nullptr;
+ const char16_t* uBuffer = nullptr;
+
+ int32_t stringLength;
+ filename = getInvariantString(state, &line, nullptr, stringLength, status);
+
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+
+ expect(state, TOK_CLOSE_BRACE, nullptr, nullptr, nullptr, status);
+
+ if (U_FAILURE(*status))
+ {
+ uprv_free(filename);
+ return nullptr;
+ }
+
+ if(isVerbose()){
+ printf(" include %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+
+ fullname = (char *) uprv_malloc(state->inputdirLength + stringLength + 2);
+ /* test for nullptr */
+ if(fullname == nullptr)
+ {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ uprv_free(filename);
+ return nullptr;
+ }
+
+ if(state->inputdir!=nullptr){
+ if (state->inputdir[state->inputdirLength - 1] != U_FILE_SEP_CHAR)
+ {
+
+ uprv_strcpy(fullname, state->inputdir);
+
+ fullname[state->inputdirLength] = U_FILE_SEP_CHAR;
+ fullname[state->inputdirLength + 1] = '\0';
+
+ uprv_strcat(fullname, filename);
+ }
+ else
+ {
+ uprv_strcpy(fullname, state->inputdir);
+ uprv_strcat(fullname, filename);
+ }
+ }else{
+ uprv_strcpy(fullname,filename);
+ }
+
+ ucbuf = ucbuf_open(fullname, &cp,getShowWarning(),false,status);
+
+ if (U_FAILURE(*status)) {
+ error(line, "couldn't open input file %s\n", filename);
+ return nullptr;
+ }
+
+ uBuffer = ucbuf_getBuffer(ucbuf,&len,status);
+ result = string_open(state->bundle, tag, uBuffer, len, comment, status);
+
+ ucbuf_close(ucbuf);
+
+ uprv_free(pTarget);
+
+ uprv_free(filename);
+ uprv_free(fullname);
+
+ return result;
+}
+
+
+
+
+
+U_STRING_DECL(k_type_string, "string", 6);
+U_STRING_DECL(k_type_binary, "binary", 6);
+U_STRING_DECL(k_type_bin, "bin", 3);
+U_STRING_DECL(k_type_table, "table", 5);
+U_STRING_DECL(k_type_table_no_fallback, "table(nofallback)", 17);
+U_STRING_DECL(k_type_int, "int", 3);
+U_STRING_DECL(k_type_integer, "integer", 7);
+U_STRING_DECL(k_type_array, "array", 5);
+U_STRING_DECL(k_type_alias, "alias", 5);
+U_STRING_DECL(k_type_intvector, "intvector", 9);
+U_STRING_DECL(k_type_import, "import", 6);
+U_STRING_DECL(k_type_include, "include", 7);
+
+/* Various non-standard processing plugins that create one or more special resources. */
+U_STRING_DECL(k_type_plugin_uca_rules, "process(uca_rules)", 18);
+U_STRING_DECL(k_type_plugin_collation, "process(collation)", 18);
+U_STRING_DECL(k_type_plugin_transliterator, "process(transliterator)", 23);
+U_STRING_DECL(k_type_plugin_dependency, "process(dependency)", 19);
+
+typedef enum EResourceType
+{
+ RESTYPE_UNKNOWN,
+ RESTYPE_STRING,
+ RESTYPE_BINARY,
+ RESTYPE_TABLE,
+ RESTYPE_TABLE_NO_FALLBACK,
+ RESTYPE_INTEGER,
+ RESTYPE_ARRAY,
+ RESTYPE_ALIAS,
+ RESTYPE_INTVECTOR,
+ RESTYPE_IMPORT,
+ RESTYPE_INCLUDE,
+ RESTYPE_PROCESS_UCA_RULES,
+ RESTYPE_PROCESS_COLLATION,
+ RESTYPE_PROCESS_TRANSLITERATOR,
+ RESTYPE_PROCESS_DEPENDENCY,
+ RESTYPE_RESERVED
+} EResourceType;
+
+static struct {
+ const char *nameChars; /* only used for debugging */
+ const char16_t *nameUChars;
+ ParseResourceFunction *parseFunction;
+} gResourceTypes[] = {
+ {"Unknown", nullptr, nullptr},
+ {"string", k_type_string, parseString},
+ {"binary", k_type_binary, parseBinary},
+ {"table", k_type_table, parseTable},
+ {"table(nofallback)", k_type_table_no_fallback, nullptr}, /* parseFunction will never be called */
+ {"integer", k_type_integer, parseInteger},
+ {"array", k_type_array, parseArray},
+ {"alias", k_type_alias, parseAlias},
+ {"intvector", k_type_intvector, parseIntVector},
+ {"import", k_type_import, parseImport},
+ {"include", k_type_include, parseInclude},
+ {"process(uca_rules)", k_type_plugin_uca_rules, parseUCARules},
+ {"process(collation)", k_type_plugin_collation, nullptr /* not implemented yet */},
+ {"process(transliterator)", k_type_plugin_transliterator, parseTransliterator},
+ {"process(dependency)", k_type_plugin_dependency, parseDependency},
+ {"reserved", nullptr, nullptr}
+};
+
+void initParser()
+{
+ U_STRING_INIT(k_type_string, "string", 6);
+ U_STRING_INIT(k_type_binary, "binary", 6);
+ U_STRING_INIT(k_type_bin, "bin", 3);
+ U_STRING_INIT(k_type_table, "table", 5);
+ U_STRING_INIT(k_type_table_no_fallback, "table(nofallback)", 17);
+ U_STRING_INIT(k_type_int, "int", 3);
+ U_STRING_INIT(k_type_integer, "integer", 7);
+ U_STRING_INIT(k_type_array, "array", 5);
+ U_STRING_INIT(k_type_alias, "alias", 5);
+ U_STRING_INIT(k_type_intvector, "intvector", 9);
+ U_STRING_INIT(k_type_import, "import", 6);
+ U_STRING_INIT(k_type_include, "include", 7);
+
+ U_STRING_INIT(k_type_plugin_uca_rules, "process(uca_rules)", 18);
+ U_STRING_INIT(k_type_plugin_collation, "process(collation)", 18);
+ U_STRING_INIT(k_type_plugin_transliterator, "process(transliterator)", 23);
+ U_STRING_INIT(k_type_plugin_dependency, "process(dependency)", 19);
+}
+
+static inline UBool isTable(enum EResourceType type) {
+ return (UBool)(type==RESTYPE_TABLE || type==RESTYPE_TABLE_NO_FALLBACK);
+}
+
+static enum EResourceType
+parseResourceType(ParseState* state, UErrorCode *status)
+{
+ struct UString *tokenValue;
+ struct UString comment;
+ enum EResourceType result = RESTYPE_UNKNOWN;
+ uint32_t line=0;
+ ustr_init(&comment);
+ expect(state, TOK_STRING, &tokenValue, &comment, &line, status);
+
+ if (U_FAILURE(*status))
+ {
+ return RESTYPE_UNKNOWN;
+ }
+
+ *status = U_ZERO_ERROR;
+
+ /* Search for normal types */
+ result=RESTYPE_UNKNOWN;
+ while ((result=(EResourceType)(result+1)) < RESTYPE_RESERVED) {
+ if (u_strcmp(tokenValue->fChars, gResourceTypes[result].nameUChars) == 0) {
+ break;
+ }
+ }
+ /* Now search for the aliases */
+ if (u_strcmp(tokenValue->fChars, k_type_int) == 0) {
+ result = RESTYPE_INTEGER;
+ }
+ else if (u_strcmp(tokenValue->fChars, k_type_bin) == 0) {
+ result = RESTYPE_BINARY;
+ }
+ else if (result == RESTYPE_RESERVED) {
+ char tokenBuffer[1024];
+ u_austrncpy(tokenBuffer, tokenValue->fChars, sizeof(tokenBuffer));
+ tokenBuffer[sizeof(tokenBuffer) - 1] = 0;
+ *status = U_INVALID_FORMAT_ERROR;
+ error(line, "unknown resource type '%s'", tokenBuffer);
+ }
+
+ return result;
+}
+
+/* parse a non-top-level resource */
+static struct SResource *
+parseResource(ParseState* state, char *tag, const struct UString *comment, UErrorCode *status)
+{
+ enum ETokenType token;
+ enum EResourceType resType = RESTYPE_UNKNOWN;
+ ParseResourceFunction *parseFunction = nullptr;
+ struct UString *tokenValue;
+ uint32_t startline;
+ uint32_t line;
+
+
+ token = getToken(state, &tokenValue, nullptr, &startline, status);
+
+ if(isVerbose()){
+ printf(" resource %s at line %i \n", (tag == nullptr) ? "(null)" : tag, (int)startline);
+ }
+
+ /* name . [ ':' type ] '{' resource '}' */
+ /* This function parses from the colon onwards. If the colon is present, parse the
+ type then try to parse a resource of that type. If there is no explicit type,
+ work it out using the lookahead tokens. */
+ switch (token)
+ {
+ case TOK_EOF:
+ *status = U_INVALID_FORMAT_ERROR;
+ error(startline, "Unexpected EOF encountered");
+ return nullptr;
+
+ case TOK_ERROR:
+ *status = U_INVALID_FORMAT_ERROR;
+ return nullptr;
+
+ case TOK_COLON:
+ resType = parseResourceType(state, status);
+ expect(state, TOK_OPEN_BRACE, &tokenValue, nullptr, &startline, status);
+
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+
+ break;
+
+ case TOK_OPEN_BRACE:
+ break;
+
+ default:
+ *status = U_INVALID_FORMAT_ERROR;
+ error(startline, "syntax error while reading a resource, expected '{' or ':'");
+ return nullptr;
+ }
+
+
+ if (resType == RESTYPE_UNKNOWN)
+ {
+ /* No explicit type, so try to work it out. At this point, we've read the first '{'.
+ We could have any of the following:
+ { { => array (nested)
+ { :/} => array
+ { string , => string array
+
+ { string { => table
+
+ { string :/{ => table
+ { string } => string
+ */
+
+ token = peekToken(state, 0, nullptr, &line, nullptr,status);
+
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+
+ if (token == TOK_OPEN_BRACE || token == TOK_COLON ||token ==TOK_CLOSE_BRACE )
+ {
+ resType = RESTYPE_ARRAY;
+ }
+ else if (token == TOK_STRING)
+ {
+ token = peekToken(state, 1, nullptr, &line, nullptr, status);
+
+ if (U_FAILURE(*status))
+ {
+ return nullptr;
+ }
+
+ switch (token)
+ {
+ case TOK_COMMA: resType = RESTYPE_ARRAY; break;
+ case TOK_OPEN_BRACE: resType = RESTYPE_TABLE; break;
+ case TOK_CLOSE_BRACE: resType = RESTYPE_STRING; break;
+ case TOK_COLON: resType = RESTYPE_TABLE; break;
+ default:
+ *status = U_INVALID_FORMAT_ERROR;
+ error(line, "Unexpected token after string, expected ',', '{' or '}'");
+ return nullptr;
+ }
+ }
+ else
+ {
+ *status = U_INVALID_FORMAT_ERROR;
+ error(line, "Unexpected token after '{'");
+ return nullptr;
+ }
+
+ /* printf("Type guessed as %s\n", resourceNames[resType]); */
+ } else if(resType == RESTYPE_TABLE_NO_FALLBACK) {
+ *status = U_INVALID_FORMAT_ERROR;
+ error(startline, "error: %s resource type not valid except on top bundle level", gResourceTypes[resType].nameChars);
+ return nullptr;
+ }
+
+
+ /* We should now know what we need to parse next, so call the appropriate parser
+ function and return. */
+ parseFunction = gResourceTypes[resType].parseFunction;
+ if (parseFunction != nullptr) {
+ return parseFunction(state, tag, startline, comment, status);
+ }
+ else {
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ error(startline, "internal error: %s resource type found and not handled", gResourceTypes[resType].nameChars);
+ }
+
+ return nullptr;
+}
+
+/* parse the top-level resource */
+struct SRBRoot *
+parse(UCHARBUF *buf, const char *inputDir, const char *outputDir, const char *filename,
+ UBool makeBinaryCollation, UBool omitCollationRules, UBool icu4xMode, UErrorCode *status)
+{
+ struct UString *tokenValue;
+ struct UString comment;
+ uint32_t line;
+ enum EResourceType bundleType;
+ enum ETokenType token;
+ ParseState state;
+ uint32_t i;
+
+
+ for (i = 0; i < MAX_LOOKAHEAD + 1; i++)
+ {
+ ustr_init(&state.lookahead[i].value);
+ ustr_init(&state.lookahead[i].comment);
+ }
+
+ initLookahead(&state, buf, status);
+
+ state.inputdir = inputDir;
+ state.inputdirLength = (state.inputdir != nullptr) ? (uint32_t)uprv_strlen(state.inputdir) : 0;
+ state.outputdir = outputDir;
+ state.outputdirLength = (state.outputdir != nullptr) ? (uint32_t)uprv_strlen(state.outputdir) : 0;
+ state.filename = filename;
+ state.makeBinaryCollation = makeBinaryCollation;
+ state.omitCollationRules = omitCollationRules;
+ state.icu4xMode = icu4xMode;
+
+ ustr_init(&comment);
+ expect(&state, TOK_STRING, &tokenValue, &comment, nullptr, status);
+
+ state.bundle = new SRBRoot(&comment, false, *status);
+
+ if (state.bundle == nullptr || U_FAILURE(*status))
+ {
+ delete state.bundle;
+
+ return nullptr;
+ }
+
+
+ state.bundle->setLocale(tokenValue->fChars, *status);
+
+ /* The following code is to make Empty bundle work no matter with :table specifer or not */
+ token = getToken(&state, nullptr, nullptr, &line, status);
+ if(token==TOK_COLON) {
+ *status=U_ZERO_ERROR;
+ bundleType=parseResourceType(&state, status);
+
+ if(isTable(bundleType))
+ {
+ expect(&state, TOK_OPEN_BRACE, nullptr, nullptr, &line, status);
+ }
+ else
+ {
+ *status=U_PARSE_ERROR;
+ error(line, "parse error. Stopped parsing with %s", u_errorName(*status));
+ }
+ }
+ else
+ {
+ /* not a colon */
+ if(token==TOK_OPEN_BRACE)
+ {
+ *status=U_ZERO_ERROR;
+ bundleType=RESTYPE_TABLE;
+ }
+ else
+ {
+ /* neither colon nor open brace */
+ *status=U_PARSE_ERROR;
+ bundleType=RESTYPE_UNKNOWN;
+ error(line, "parse error, did not find open-brace '{' or colon ':', stopped with %s", u_errorName(*status));
+ }
+ }
+
+ if (U_FAILURE(*status))
+ {
+ delete state.bundle;
+ return nullptr;
+ }
+
+ if(bundleType==RESTYPE_TABLE_NO_FALLBACK) {
+ /*
+ * Parse a top-level table with the table(nofallback) declaration.
+ * This is the same as a regular table, but also sets the
+ * URES_ATT_NO_FALLBACK flag in indexes[URES_INDEX_ATTRIBUTES] .
+ */
+ state.bundle->fNoFallback=true;
+ }
+ /* top-level tables need not handle special table names like "collations" */
+ assert(!state.bundle->fIsPoolBundle);
+ assert(state.bundle->fRoot->fType == URES_TABLE);
+ TableResource *rootTable = static_cast<TableResource *>(state.bundle->fRoot);
+ realParseTable(&state, rootTable, nullptr, line, status);
+ if(dependencyArray!=nullptr){
+ rootTable->add(dependencyArray, 0, *status);
+ dependencyArray = nullptr;
+ }
+ if (U_FAILURE(*status))
+ {
+ delete state.bundle;
+ res_close(dependencyArray);
+ return nullptr;
+ }
+
+ if (getToken(&state, nullptr, nullptr, &line, status) != TOK_EOF)
+ {
+ warning(line, "extraneous text after resource bundle (perhaps unmatched braces)");
+ if(isStrict()){
+ *status = U_INVALID_FORMAT_ERROR;
+ return nullptr;
+ }
+ }
+
+ cleanupLookahead(&state);
+ ustr_deinit(&comment);
+ return state.bundle;
+}
diff --git a/intl/icu/source/tools/genrb/parse.h b/intl/icu/source/tools/genrb/parse.h
new file mode 100644
index 0000000000..fa90ede9d2
--- /dev/null
+++ b/intl/icu/source/tools/genrb/parse.h
@@ -0,0 +1,38 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1998-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File parse.h
+*
+* Modification History:
+*
+* Date Name Description
+* 05/26/99 stephen Creation.
+*******************************************************************************
+*/
+
+#ifndef PARSE_H
+#define PARSE_H 1
+
+#include "unicode/utypes.h"
+#include "filestrm.h"
+#include "ucbuf.h"
+
+U_CDECL_BEGIN
+/* One time parser initialization */
+void initParser();
+
+/* Parse a ResourceBundle text file */
+struct SRBRoot* parse(UCHARBUF *buf, const char* inputDir, const char* outputDir,
+ const char *filename,
+ UBool makeBinaryCollation, UBool omitCollationRules, UBool icu4xMode, UErrorCode *status);
+
+U_CDECL_END
+
+#endif
diff --git a/intl/icu/source/tools/genrb/prscmnts.cpp b/intl/icu/source/tools/genrb/prscmnts.cpp
new file mode 100644
index 0000000000..ea55352b41
--- /dev/null
+++ b/intl/icu/source/tools/genrb/prscmnts.cpp
@@ -0,0 +1,248 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *******************************************************************************
+ * Copyright (C) 2003-2014, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *******************************************************************************
+ *
+ * File prscmnts.cpp
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ * 08/22/2003 ram Creation.
+ *******************************************************************************
+ */
+
+// Safer use of UnicodeString.
+#ifndef UNISTR_FROM_CHAR_EXPLICIT
+# define UNISTR_FROM_CHAR_EXPLICIT explicit
+#endif
+
+// Less important, but still a good idea.
+#ifndef UNISTR_FROM_STRING_EXPLICIT
+# define UNISTR_FROM_STRING_EXPLICIT explicit
+#endif
+
+#include "unicode/regex.h"
+#include "unicode/unistr.h"
+#include "unicode/parseerr.h"
+#include "prscmnts.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+U_NAMESPACE_USE
+
+#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
+
+#define MAX_SPLIT_STRINGS 20
+
+const char *patternStrings[UPC_LIMIT]={
+ "^translate\\s*(.*)",
+ "^note\\s*(.*)"
+};
+
+U_CFUNC int32_t
+removeText(char16_t *source, int32_t srcLen,
+ UnicodeString patString,uint32_t options,
+ UnicodeString replaceText, UErrorCode *status){
+
+ if(status == nullptr || U_FAILURE(*status)){
+ return 0;
+ }
+
+ UnicodeString src(source, srcLen);
+
+ RegexMatcher myMatcher(patString, src, options, *status);
+ if(U_FAILURE(*status)){
+ return 0;
+ }
+ UnicodeString dest;
+
+
+ dest = myMatcher.replaceAll(replaceText,*status);
+
+
+ return dest.extract(source, srcLen, *status);
+
+}
+U_CFUNC int32_t
+trim(char16_t *src, int32_t srcLen, UErrorCode *status){
+ srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
+ srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
+ srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remove trailing spcaes
+ return srcLen;
+}
+
+U_CFUNC int32_t
+removeCmtText(char16_t* source, int32_t srcLen, UErrorCode* status){
+ srcLen = trim(source, srcLen, status);
+ UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the beginning of the line
+ srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
+ return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
+}
+
+U_CFUNC int32_t
+getText(const char16_t* source, int32_t srcLen,
+ char16_t** dest, int32_t destCapacity,
+ UnicodeString patternString,
+ UErrorCode* status){
+
+ if(status == nullptr || U_FAILURE(*status)){
+ return 0;
+ }
+
+ UnicodeString stringArray[MAX_SPLIT_STRINGS];
+ RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
+ UnicodeString src (source,srcLen);
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
+
+ RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
+ matcher.reset(stringArray[i]);
+ if(matcher.lookingAt(*status)){
+ UnicodeString out = matcher.group(1, *status);
+
+ return out.extract(*dest, destCapacity,*status);
+ }
+ }
+ return 0;
+}
+
+
+#define AT_SIGN 0x0040
+
+U_CFUNC int32_t
+getDescription( const char16_t* source, int32_t srcLen,
+ char16_t** dest, int32_t destCapacity,
+ UErrorCode* status){
+ if(status == nullptr || U_FAILURE(*status)){
+ return 0;
+ }
+
+ UnicodeString stringArray[MAX_SPLIT_STRINGS];
+ RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
+ UnicodeString src(source, srcLen);
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
+
+ if(stringArray[0].indexOf((char16_t)AT_SIGN)==-1){
+ int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status);
+ return trim(*dest, destLen, status);
+ }
+ return 0;
+}
+
+U_CFUNC int32_t
+getCount(const char16_t* source, int32_t srcLen,
+ UParseCommentsOption option, UErrorCode *status){
+
+ if(status == nullptr || U_FAILURE(*status)){
+ return 0;
+ }
+
+ UnicodeString stringArray[MAX_SPLIT_STRINGS];
+ RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
+ UnicodeString src (source, srcLen);
+
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
+
+ UnicodeString patternString(patternStrings[option]);
+ RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ int32_t count = 0;
+ for(int32_t i=0; i<retLen; i++){
+ matcher.reset(stringArray[i]);
+ if(matcher.lookingAt(*status)){
+ count++;
+ }
+ }
+ if(option == UPC_TRANSLATE && count > 1){
+ fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
+ exit(U_UNSUPPORTED_ERROR);
+ }
+ return count;
+}
+
+U_CFUNC int32_t
+getAt(const char16_t* source, int32_t srcLen,
+ char16_t** dest, int32_t destCapacity,
+ int32_t index,
+ UParseCommentsOption option,
+ UErrorCode* status){
+
+ if(status == nullptr || U_FAILURE(*status)){
+ return 0;
+ }
+
+ UnicodeString stringArray[MAX_SPLIT_STRINGS];
+ RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
+ UnicodeString src (source, srcLen);
+
+
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
+
+ UnicodeString patternString(patternStrings[option]);
+ RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+ int32_t count = 0;
+ for(int32_t i=0; i<retLen; i++){
+ matcher.reset(stringArray[i]);
+ if(matcher.lookingAt(*status)){
+ if(count == index){
+ UnicodeString out = matcher.group(1, *status);
+ return out.extract(*dest, destCapacity,*status);
+ }
+ count++;
+
+ }
+ }
+ return 0;
+
+}
+
+U_CFUNC int32_t
+getTranslate( const char16_t* source, int32_t srcLen,
+ char16_t** dest, int32_t destCapacity,
+ UErrorCode* status){
+ UnicodeString notePatternString("^translate\\s*?(.*)");
+
+ int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
+ return trim(*dest, destLen, status);
+}
+
+U_CFUNC int32_t
+getNote(const char16_t* source, int32_t srcLen,
+ char16_t** dest, int32_t destCapacity,
+ UErrorCode* status){
+
+ UnicodeString notePatternString("^note\\s*?(.*)");
+ int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
+ return trim(*dest, destLen, status);
+
+}
+
+#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
+
diff --git a/intl/icu/source/tools/genrb/prscmnts.h b/intl/icu/source/tools/genrb/prscmnts.h
new file mode 100644
index 0000000000..43195d2d30
--- /dev/null
+++ b/intl/icu/source/tools/genrb/prscmnts.h
@@ -0,0 +1,66 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1998-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File read.h
+*
+* Modification History:
+*
+* Date Name Description
+* 05/26/99 stephen Creation.
+* 5/10/01 Ram removed ustdio dependency
+*******************************************************************************
+*/
+
+#ifndef PRSCMNTS_H
+#define PRSCMNTS_H 1
+
+#include "unicode/utypes.h"
+
+#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
+
+enum UParseCommentsOption {
+ UPC_TRANSLATE,
+ UPC_NOTE,
+ UPC_LIMIT
+};
+
+typedef enum UParseCommentsOption UParseCommentsOption;
+
+U_CFUNC int32_t
+getNote(const UChar* source, int32_t srcLen,
+ UChar** dest, int32_t destCapacity,
+ UErrorCode* status);
+U_CFUNC int32_t
+removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status);
+
+U_CFUNC int32_t
+getDescription( const UChar* source, int32_t srcLen,
+ UChar** dest, int32_t destCapacity,
+ UErrorCode* status);
+U_CFUNC int32_t
+getTranslate( const UChar* source, int32_t srcLen,
+ UChar** dest, int32_t destCapacity,
+ UErrorCode* status);
+
+U_CFUNC int32_t
+getAt(const UChar* source, int32_t srcLen,
+ UChar** dest, int32_t destCapacity,
+ int32_t index,
+ UParseCommentsOption option,
+ UErrorCode* status);
+
+U_CFUNC int32_t
+getCount(const UChar* source, int32_t srcLen,
+ UParseCommentsOption option, UErrorCode *status);
+
+#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
+
+#endif
+
diff --git a/intl/icu/source/tools/genrb/rbutil.c b/intl/icu/source/tools/genrb/rbutil.c
new file mode 100644
index 0000000000..ed3e66b250
--- /dev/null
+++ b/intl/icu/source/tools/genrb/rbutil.c
@@ -0,0 +1,119 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1998-2008, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File util.c
+*
+* Modification History:
+*
+* Date Name Description
+* 06/10/99 stephen Creation.
+* 02/07/08 Spieth Correct XLIFF generation on EBCDIC platform
+*
+*******************************************************************************
+*/
+
+#include "unicode/putil.h"
+#include "rbutil.h"
+#include "cmemory.h"
+#include "cstring.h"
+
+
+/* go from "/usr/local/include/curses.h" to "/usr/local/include" */
+void
+get_dirname(char *dirname,
+ const char *filename)
+{
+ const char *lastSlash = uprv_strrchr(filename, U_FILE_SEP_CHAR);
+ if (lastSlash != NULL) {
+ lastSlash++;
+ }
+
+ if(lastSlash>filename) {
+ uprv_strncpy(dirname, filename, (lastSlash - filename));
+ *(dirname + (lastSlash - filename)) = '\0';
+ } else {
+ *dirname = '\0';
+ }
+}
+
+/* go from "/usr/local/include/curses.h" to "curses" */
+void
+get_basename(char *basename,
+ const char *filename)
+{
+ /* strip off any leading directory portions */
+ const char *lastSlash = uprv_strrchr(filename, U_FILE_SEP_CHAR);
+ if (lastSlash != NULL) {
+ lastSlash++;
+ }
+ char *lastDot;
+
+ if(lastSlash>filename) {
+ uprv_strcpy(basename, lastSlash);
+ } else {
+ uprv_strcpy(basename, filename);
+ }
+
+ /* strip off any suffix */
+ lastDot = uprv_strrchr(basename, '.');
+
+ if(lastDot != NULL) {
+ *lastDot = '\0';
+ }
+}
+
+#define MAX_DIGITS 10
+int32_t
+itostr(char * buffer, int32_t i, uint32_t radix, int32_t pad)
+{
+ const char digits[16] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
+ int32_t length = 0;
+ int32_t num = 0;
+ int32_t save = i;
+ int digit;
+ int32_t j;
+ char temp;
+
+ /* if i is negative make it positive */
+ if(i<0){
+ i=-i;
+ }
+
+ do{
+ digit = (int)(i % radix);
+ buffer[length++]= digits[digit];
+ i=i/radix;
+ } while(i);
+
+ while (length < pad){
+ buffer[length++] = '0';/*zero padding */
+ }
+
+ /* if i is negative add the negative sign */
+ if(save < 0){
+ buffer[length++]='-';
+ }
+
+ /* null terminate the buffer */
+ if(length<MAX_DIGITS){
+ buffer[length] = 0x0000;
+ }
+
+ num= (pad>=length) ? pad :length;
+
+
+ /* Reverses the string */
+ for (j = 0; j < (num / 2); j++){
+ temp = buffer[(length-1) - j];
+ buffer[(length-1) - j] = buffer[j];
+ buffer[j] = temp;
+ }
+ return length;
+}
diff --git a/intl/icu/source/tools/genrb/rbutil.h b/intl/icu/source/tools/genrb/rbutil.h
new file mode 100644
index 0000000000..9a12c50959
--- /dev/null
+++ b/intl/icu/source/tools/genrb/rbutil.h
@@ -0,0 +1,33 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1998-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File rbutil.h
+*
+* Modification History:
+*
+* Date Name Description
+* 06/10/99 stephen Creation.
+*******************************************************************************
+*/
+
+#ifndef UTIL_H
+#define UTIL_H 1
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+void get_dirname(char *dirname, const char *filename);
+void get_basename(char *basename, const char *filename);
+int32_t itostr(char * buffer, int32_t i, uint32_t radix, int32_t pad);
+
+U_CDECL_END
+
+#endif /* ! UTIL_H */
diff --git a/intl/icu/source/tools/genrb/read.c b/intl/icu/source/tools/genrb/read.c
new file mode 100644
index 0000000000..0d4a318a89
--- /dev/null
+++ b/intl/icu/source/tools/genrb/read.c
@@ -0,0 +1,479 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1998-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File read.c
+*
+* Modification History:
+*
+* Date Name Description
+* 05/26/99 stephen Creation.
+* 5/10/01 Ram removed ustdio dependency
+*******************************************************************************
+*/
+
+#include <stdbool.h>
+
+#include "read.h"
+#include "errmsg.h"
+#include "toolutil.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
+
+#define OPENBRACE 0x007B
+#define CLOSEBRACE 0x007D
+#define COMMA 0x002C
+#define QUOTE 0x0022
+#define ESCAPE 0x005C
+#define SLASH 0x002F
+#define ASTERISK 0x002A
+#define SPACE 0x0020
+#define COLON 0x003A
+#define BADBOM 0xFFFE
+#define CR 0x000D
+#define LF 0x000A
+
+static int32_t lineCount;
+
+/* Protos */
+static enum ETokenType getStringToken(UCHARBUF *buf,
+ UChar32 initialChar,
+ struct UString *token,
+ UErrorCode *status);
+
+static UChar32 getNextChar (UCHARBUF *buf, UBool skipwhite, struct UString *token, UErrorCode *status);
+static void seekUntilNewline (UCHARBUF *buf, struct UString *token, UErrorCode *status);
+static void seekUntilEndOfComment (UCHARBUF *buf, struct UString *token, UErrorCode *status);
+static UBool isWhitespace (UChar32 c);
+static UBool isNewline (UChar32 c);
+
+U_CFUNC void resetLineNumber() {
+ lineCount = 1;
+}
+
+/* Read and return the next token from the stream. If the token is of
+ type eString, fill in the token parameter with the token. If the
+ token is eError, then the status parameter will contain the
+ specific error. This will be eItemNotFound at the end of file,
+ indicating that all tokens have been returned. This method will
+ never return eString twice in a row; instead, multiple adjacent
+ string tokens will be merged into one, with no intervening
+ space. */
+U_CFUNC enum ETokenType
+getNextToken(UCHARBUF* buf,
+ struct UString *token,
+ uint32_t *linenumber, /* out: linenumber of token */
+ struct UString *comment,
+ UErrorCode *status) {
+ enum ETokenType result;
+ UChar32 c;
+
+ if (U_FAILURE(*status)) {
+ return TOK_ERROR;
+ }
+
+ /* Skip whitespace */
+ c = getNextChar(buf, true, comment, status);
+
+ if (U_FAILURE(*status)) {
+ return TOK_ERROR;
+ }
+
+ *linenumber = lineCount;
+
+ switch(c) {
+ case BADBOM:
+ return TOK_ERROR;
+ case OPENBRACE:
+ return TOK_OPEN_BRACE;
+ case CLOSEBRACE:
+ return TOK_CLOSE_BRACE;
+ case COMMA:
+ return TOK_COMMA;
+ case U_EOF:
+ return TOK_EOF;
+ case COLON:
+ return TOK_COLON;
+
+ default:
+ result = getStringToken(buf, c, token, status);
+ }
+
+ *linenumber = lineCount;
+ return result;
+}
+
+/* Copy a string token into the given UnicodeString. Upon entry, we
+ have already read the first character of the string token, which is
+ not a whitespace character (but may be a QUOTE or ESCAPE). This
+ function reads all subsequent characters that belong with this
+ string, and copy them into the token parameter. The other
+ important, and slightly convoluted purpose of this function is to
+ merge adjacent strings. It looks forward a bit, and if the next
+ non comment, non whitespace item is a string, it reads it in as
+ well. If two adjacent strings are quoted, they are merged without
+ intervening space. Otherwise a single SPACE character is
+ inserted. */
+static enum ETokenType getStringToken(UCHARBUF* buf,
+ UChar32 initialChar,
+ struct UString *token,
+ UErrorCode *status) {
+ UBool lastStringWasQuoted;
+ UChar32 c;
+ UChar target[3] = { '\0' };
+ UChar *pTarget = target;
+ int len=0;
+ UBool isFollowingCharEscaped=false;
+ UBool isNLUnescaped = false;
+ UChar32 prevC=0;
+
+ /* We are guaranteed on entry that initialChar is not a whitespace
+ character. If we are at the EOF, or have some other problem, it
+ doesn't matter; we still want to validly return the initialChar
+ (if nothing else) as a string token. */
+
+ if (U_FAILURE(*status)) {
+ return TOK_ERROR;
+ }
+
+ /* setup */
+ lastStringWasQuoted = false;
+ c = initialChar;
+ ustr_setlen(token, 0, status);
+
+ if (U_FAILURE(*status)) {
+ return TOK_ERROR;
+ }
+
+ for (;;) {
+ if (c == QUOTE) {
+ if (!lastStringWasQuoted && token->fLength > 0) {
+ ustr_ucat(token, SPACE, status);
+
+ if (U_FAILURE(*status)) {
+ return TOK_ERROR;
+ }
+ }
+
+ lastStringWasQuoted = true;
+
+ for (;;) {
+ c = ucbuf_getc(buf,status);
+
+ /* EOF reached */
+ if (c == U_EOF) {
+ return TOK_EOF;
+ }
+
+ /* Unterminated quoted strings */
+ if (U_FAILURE(*status)) {
+ return TOK_ERROR;
+ }
+
+ if (c == QUOTE && !isFollowingCharEscaped) {
+ break;
+ }
+
+ if (c == ESCAPE && !isFollowingCharEscaped) {
+ pTarget = target;
+ c = unescape(buf, status);
+
+ if (c == U_ERR) {
+ return TOK_ERROR;
+ }
+ if(c == CR || c == LF){
+ isNLUnescaped = true;
+ }
+ }
+
+ if(c==ESCAPE && !isFollowingCharEscaped){
+ isFollowingCharEscaped = true;
+ }else{
+ U_APPEND_CHAR32(c, pTarget,len);
+ pTarget = target;
+ ustr_uscat(token, pTarget,len, status);
+ isFollowingCharEscaped = false;
+ len=0;
+ if(c == CR || c == LF){
+ if(isNLUnescaped == false && prevC!=CR){
+ lineCount++;
+ }
+ isNLUnescaped = false;
+ }
+ }
+
+ if (U_FAILURE(*status)) {
+ return TOK_ERROR;
+ }
+ prevC = c;
+ }
+ } else {
+ if (token->fLength > 0) {
+ ustr_ucat(token, SPACE, status);
+
+ if (U_FAILURE(*status)) {
+ return TOK_ERROR;
+ }
+ }
+
+ if(lastStringWasQuoted){
+ if(getShowWarning()){
+ warning(lineCount, "Mixing quoted and unquoted strings");
+ }
+ if(isStrict()){
+ return TOK_ERROR;
+ }
+
+ }
+
+ lastStringWasQuoted = false;
+
+ /* if we reach here we are mixing
+ * quoted and unquoted strings
+ * warn in normal mode and error in
+ * pedantic mode
+ */
+
+ if (c == ESCAPE) {
+ pTarget = target;
+ c = unescape(buf, status);
+
+ /* EOF reached */
+ if (c == U_EOF) {
+ return TOK_ERROR;
+ }
+ }
+
+ U_APPEND_CHAR32(c, pTarget,len);
+ pTarget = target;
+ ustr_uscat(token, pTarget,len, status);
+ len=0;
+
+ if (U_FAILURE(*status)) {
+ return TOK_ERROR;
+ }
+
+ for (;;) {
+ /* DON'T skip whitespace */
+ c = getNextChar(buf, false, NULL, status);
+
+ /* EOF reached */
+ if (c == U_EOF) {
+ ucbuf_ungetc(c, buf);
+ return TOK_STRING;
+ }
+
+ if (U_FAILURE(*status)) {
+ return TOK_STRING;
+ }
+
+ if (c == QUOTE
+ || c == OPENBRACE
+ || c == CLOSEBRACE
+ || c == COMMA
+ || c == COLON) {
+ ucbuf_ungetc(c, buf);
+ break;
+ }
+
+ if (isWhitespace(c)) {
+ break;
+ }
+
+ if (c == ESCAPE) {
+ pTarget = target;
+ c = unescape(buf, status);
+
+ if (c == U_ERR) {
+ return TOK_ERROR;
+ }
+ }
+
+ U_APPEND_CHAR32(c, pTarget,len);
+ pTarget = target;
+ ustr_uscat(token, pTarget,len, status);
+ len=0;
+ if (U_FAILURE(*status)) {
+ return TOK_ERROR;
+ }
+ }
+ }
+
+ /* DO skip whitespace */
+ c = getNextChar(buf, true, NULL, status);
+
+ if (U_FAILURE(*status)) {
+ return TOK_STRING;
+ }
+
+ if (c == OPENBRACE || c == CLOSEBRACE || c == COMMA || c == COLON) {
+ ucbuf_ungetc(c, buf);
+ return TOK_STRING;
+ }
+ }
+}
+
+/* Retrieve the next character. If skipwhite is
+ true, whitespace is skipped as well. */
+static UChar32 getNextChar(UCHARBUF* buf,
+ UBool skipwhite,
+ struct UString *token,
+ UErrorCode *status) {
+ UChar32 c, c2;
+
+ if (U_FAILURE(*status)) {
+ return U_EOF;
+ }
+
+ for (;;) {
+ c = ucbuf_getc(buf,status);
+
+ if (c == U_EOF) {
+ return U_EOF;
+ }
+
+ if (skipwhite && isWhitespace(c)) {
+ continue;
+ }
+
+ /* This also handles the get() failing case */
+ if (c != SLASH) {
+ return c;
+ }
+
+ c = ucbuf_getc(buf,status); /* "/c" */
+
+ if (c == U_EOF) {
+ return U_EOF;
+ }
+
+ switch (c) {
+ case SLASH: /* "//" */
+ seekUntilNewline(buf, NULL, status);
+ break;
+
+ case ASTERISK: /* " / * " */
+ c2 = ucbuf_getc(buf, status); /* "/ * c" */
+ if(c2 == ASTERISK){ /* "/ * *" */
+ /* parse multi-line comment and store it in token*/
+ seekUntilEndOfComment(buf, token, status);
+ } else {
+ ucbuf_ungetc(c2, buf); /* c2 is the non-asterisk following "/ *". Include c2 back in buffer. */
+ seekUntilEndOfComment(buf, NULL, status);
+ }
+ break;
+
+ default:
+ ucbuf_ungetc(c, buf); /* "/c" - put back the c */
+ /* If get() failed this is a NOP */
+ return SLASH;
+ }
+
+ }
+}
+
+static void seekUntilNewline(UCHARBUF* buf,
+ struct UString *token,
+ UErrorCode *status) {
+ UChar32 c;
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ do {
+ c = ucbuf_getc(buf,status);
+ /* add the char to token */
+ if(token!=NULL){
+ ustr_u32cat(token, c, status);
+ }
+ } while (!isNewline(c) && c != U_EOF && *status == U_ZERO_ERROR);
+}
+
+static void seekUntilEndOfComment(UCHARBUF *buf,
+ struct UString *token,
+ UErrorCode *status) {
+ UChar32 c, d;
+ uint32_t line;
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ line = lineCount;
+
+ do {
+ c = ucbuf_getc(buf, status);
+
+ if (c == ASTERISK) {
+ d = ucbuf_getc(buf, status);
+
+ if (d != SLASH) {
+ ucbuf_ungetc(d, buf);
+ } else {
+ break;
+ }
+ }
+ /* add the char to token */
+ if(token!=NULL){
+ ustr_u32cat(token, c, status);
+ }
+ /* increment the lineCount */
+ isNewline(c);
+
+ } while (c != U_EOF && *status == U_ZERO_ERROR);
+
+ if (c == U_EOF) {
+ *status = U_INVALID_FORMAT_ERROR;
+ error(line, "unterminated comment detected");
+ }
+}
+
+U_CFUNC UChar32 unescape(UCHARBUF *buf, UErrorCode *status) {
+ if (U_FAILURE(*status)) {
+ return U_EOF;
+ }
+
+ /* We expect to be called after the ESCAPE has been seen, but
+ * u_fgetcx needs an ESCAPE to do its magic. */
+ ucbuf_ungetc(ESCAPE, buf);
+
+ return ucbuf_getcx32(buf, status);
+}
+
+static UBool isWhitespace(UChar32 c) {
+ switch (c) {
+ /* ' ', '\t', '\n', '\r', 0x2029, 0xFEFF */
+ case 0x000A:
+ case 0x2029:
+ lineCount++;
+ case 0x000D:
+ case 0x0020:
+ case 0x0009:
+ case 0xFEFF:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static UBool isNewline(UChar32 c) {
+ switch (c) {
+ /* '\n', '\r', 0x2029 */
+ case 0x000A:
+ case 0x2029:
+ lineCount++;
+ case 0x000D:
+ return true;
+
+ default:
+ return false;
+ }
+}
diff --git a/intl/icu/source/tools/genrb/read.h b/intl/icu/source/tools/genrb/read.h
new file mode 100644
index 0000000000..e5b8d155da
--- /dev/null
+++ b/intl/icu/source/tools/genrb/read.h
@@ -0,0 +1,54 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1998-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File read.h
+*
+* Modification History:
+*
+* Date Name Description
+* 05/26/99 stephen Creation.
+* 5/10/01 Ram removed ustdio dependency
+*******************************************************************************
+*/
+
+#ifndef READ_H
+#define READ_H 1
+
+#include "unicode/utypes.h"
+#include "ustr.h"
+#include "ucbuf.h"
+
+/* The types of tokens which may be returned by getNextToken.
+ NOTE: Keep these in sync with tokenNames in parse.c */
+enum ETokenType
+{
+ TOK_STRING, /* A string token, such as "MonthNames" */
+ TOK_OPEN_BRACE, /* An opening brace character */
+ TOK_CLOSE_BRACE, /* A closing brace character */
+ TOK_COMMA, /* A comma */
+ TOK_COLON, /* A colon */
+
+ TOK_EOF, /* End of the file has been reached successfully */
+ TOK_ERROR, /* An error, such an unterminated quoted string */
+ TOK_TOKEN_COUNT /* Number of "real" token types */
+};
+
+U_CFUNC UChar32 unescape(UCHARBUF *buf, UErrorCode *status);
+
+U_CFUNC void resetLineNumber(void);
+
+U_CFUNC enum ETokenType
+getNextToken(UCHARBUF *buf,
+ struct UString *token,
+ uint32_t *linenumber, /* out: linenumber of token */
+ struct UString *comment,
+ UErrorCode *status);
+
+#endif
diff --git a/intl/icu/source/tools/genrb/reslist.cpp b/intl/icu/source/tools/genrb/reslist.cpp
new file mode 100644
index 0000000000..e1c2d25061
--- /dev/null
+++ b/intl/icu/source/tools/genrb/reslist.cpp
@@ -0,0 +1,1794 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2000-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File reslist.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 02/21/00 weiv Creation.
+*******************************************************************************
+*/
+
+// Safer use of UnicodeString.
+#ifndef UNISTR_FROM_CHAR_EXPLICIT
+# define UNISTR_FROM_CHAR_EXPLICIT explicit
+#endif
+
+// Less important, but still a good idea.
+#ifndef UNISTR_FROM_STRING_EXPLICIT
+# define UNISTR_FROM_STRING_EXPLICIT explicit
+#endif
+
+#include <assert.h>
+#include <iostream>
+#include <set>
+#include <stdio.h>
+
+#include "unicode/localpointer.h"
+#include "reslist.h"
+#include "unewdata.h"
+#include "unicode/ures.h"
+#include "unicode/putil.h"
+#include "errmsg.h"
+#include "filterrb.h"
+#include "toolutil.h"
+
+#include "uarrsort.h"
+#include "uelement.h"
+#include "uhash.h"
+#include "uinvchar.h"
+#include "ustr_imp.h"
+#include "unicode/utf16.h"
+#include "uassert.h"
+
+/*
+ * Align binary data at a 16-byte offset from the start of the resource bundle,
+ * to be safe for any data type it may contain.
+ */
+#define BIN_ALIGNMENT 16
+
+// This numeric constant must be at least 1.
+// If StringResource.fNumUnitsSaved == 0 then the string occurs only once,
+// and it makes no sense to move it to the pool bundle.
+// The larger the threshold for fNumUnitsSaved
+// the smaller the savings, and the smaller the pool bundle.
+// We trade some total size reduction to reduce the pool bundle a bit,
+// so that one can reasonably save data size by
+// removing bundle files without rebuilding the pool bundle.
+// This can also help to keep the pool and total (pool+local) string indexes
+// within 16 bits, that is, within range of Table16 and Array16 containers.
+#ifndef GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING
+# define GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING 10
+#endif
+
+U_NAMESPACE_USE
+
+static UBool gIncludeCopyright = false;
+static UBool gUsePoolBundle = false;
+static UBool gIsDefaultFormatVersion = true;
+static int32_t gFormatVersion = 3;
+
+/* How do we store string values? */
+enum {
+ STRINGS_UTF16_V1, /* formatVersion 1: int length + UChars + NUL + padding to 4 bytes */
+ STRINGS_UTF16_V2 /* formatVersion 2 & up: optional length in 1..3 UChars + UChars + NUL */
+};
+
+static const int32_t MAX_IMPLICIT_STRING_LENGTH = 40; /* do not store the length explicitly for such strings */
+
+static const ResFile kNoPoolBundle;
+
+/*
+ * res_none() returns the address of kNoResource,
+ * for use in non-error cases when no resource is to be added to the bundle.
+ * (nullptr is used in error cases.)
+ */
+static SResource kNoResource; // TODO: const
+
+static UDataInfo dataInfo= {
+ sizeof(UDataInfo),
+ 0,
+
+ U_IS_BIG_ENDIAN,
+ U_CHARSET_FAMILY,
+ sizeof(char16_t),
+ 0,
+
+ {0x52, 0x65, 0x73, 0x42}, /* dataFormat="ResB" */
+ {1, 3, 0, 0}, /* formatVersion */
+ {1, 4, 0, 0} /* dataVersion take a look at version inside parsed resb*/
+};
+
+static const UVersionInfo gFormatVersions[4] = { /* indexed by a major-formatVersion integer */
+ { 0, 0, 0, 0 },
+ { 1, 3, 0, 0 },
+ { 2, 0, 0, 0 },
+ { 3, 0, 0, 0 }
+};
+// Remember to update genrb.h GENRB_VERSION when changing the data format.
+// (Or maybe we should remove GENRB_VERSION and report the ICU version number?)
+
+static uint8_t calcPadding(uint32_t size) {
+ /* returns space we need to pad */
+ return (uint8_t) ((size % sizeof(uint32_t)) ? (sizeof(uint32_t) - (size % sizeof(uint32_t))) : 0);
+
+}
+
+void setIncludeCopyright(UBool val){
+ gIncludeCopyright=val;
+}
+
+UBool getIncludeCopyright(){
+ return gIncludeCopyright;
+}
+
+void setFormatVersion(int32_t formatVersion) {
+ gIsDefaultFormatVersion = false;
+ gFormatVersion = formatVersion;
+}
+
+int32_t getFormatVersion() {
+ return gFormatVersion;
+}
+
+void setUsePoolBundle(UBool use) {
+ gUsePoolBundle = use;
+}
+
+// TODO: return const pointer, or find another way to express "none"
+struct SResource* res_none() {
+ return &kNoResource;
+}
+
+SResource::SResource()
+ : fType(URES_NONE), fWritten(false), fRes(RES_BOGUS), fRes16(-1), fKey(-1), fKey16(-1),
+ line(0), fNext(nullptr) {
+ ustr_init(&fComment);
+}
+
+SResource::SResource(SRBRoot *bundle, const char *tag, int8_t type, const UString* comment,
+ UErrorCode &errorCode)
+ : fType(type), fWritten(false), fRes(RES_BOGUS), fRes16(-1),
+ fKey(bundle != nullptr ? bundle->addTag(tag, errorCode) : -1), fKey16(-1),
+ line(0), fNext(nullptr) {
+ ustr_init(&fComment);
+ if(comment != nullptr) {
+ ustr_cpy(&fComment, comment, &errorCode);
+ }
+}
+
+SResource::~SResource() {
+ ustr_deinit(&fComment);
+}
+
+ContainerResource::~ContainerResource() {
+ SResource *current = fFirst;
+ while (current != nullptr) {
+ SResource *next = current->fNext;
+ delete current;
+ current = next;
+ }
+}
+
+TableResource::~TableResource() {}
+
+// TODO: clarify that containers adopt new items, even in error cases; use LocalPointer
+void TableResource::add(SResource *res, int linenumber, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode) || res == nullptr || res == &kNoResource) {
+ return;
+ }
+
+ /* remember this linenumber to report to the user if there is a duplicate key */
+ res->line = linenumber;
+
+ /* here we need to traverse the list */
+ ++fCount;
+
+ /* is the list still empty? */
+ if (fFirst == nullptr) {
+ fFirst = res;
+ res->fNext = nullptr;
+ return;
+ }
+
+ const char *resKeyString = fRoot->fKeys + res->fKey;
+
+ SResource *current = fFirst;
+
+ SResource *prev = nullptr;
+ while (current != nullptr) {
+ const char *currentKeyString = fRoot->fKeys + current->fKey;
+ int diff;
+ /*
+ * formatVersion 1: compare key strings in native-charset order
+ * formatVersion 2 and up: compare key strings in ASCII order
+ */
+ if (gFormatVersion == 1 || U_CHARSET_FAMILY == U_ASCII_FAMILY) {
+ diff = uprv_strcmp(currentKeyString, resKeyString);
+ } else {
+ diff = uprv_compareInvCharsAsAscii(currentKeyString, resKeyString);
+ }
+ if (diff < 0) {
+ prev = current;
+ current = current->fNext;
+ } else if (diff > 0) {
+ /* we're either in front of the list, or in the middle */
+ if (prev == nullptr) {
+ /* front of the list */
+ fFirst = res;
+ } else {
+ /* middle of the list */
+ prev->fNext = res;
+ }
+
+ res->fNext = current;
+ return;
+ } else {
+ /* Key already exists! ERROR! */
+ error(linenumber, "duplicate key '%s' in table, first appeared at line %d", currentKeyString, current->line);
+ errorCode = U_UNSUPPORTED_ERROR;
+ return;
+ }
+ }
+
+ /* end of list */
+ prev->fNext = res;
+ res->fNext = nullptr;
+}
+
+ArrayResource::~ArrayResource() {}
+
+void ArrayResource::add(SResource *res) {
+ if (res != nullptr && res != &kNoResource) {
+ if (fFirst == nullptr) {
+ fFirst = res;
+ } else {
+ fLast->fNext = res;
+ }
+ fLast = res;
+ ++fCount;
+ }
+}
+
+PseudoListResource::~PseudoListResource() {}
+
+void PseudoListResource::add(SResource *res) {
+ if (res != nullptr && res != &kNoResource) {
+ res->fNext = fFirst;
+ fFirst = res;
+ ++fCount;
+ }
+}
+
+StringBaseResource::StringBaseResource(SRBRoot *bundle, const char *tag, int8_t type,
+ const char16_t *value, int32_t len,
+ const UString* comment, UErrorCode &errorCode)
+ : SResource(bundle, tag, type, comment, errorCode) {
+ if (len == 0 && gFormatVersion > 1) {
+ fRes = URES_MAKE_EMPTY_RESOURCE(type);
+ fWritten = true;
+ return;
+ }
+
+ fString.setTo(ConstChar16Ptr(value), len);
+ fString.getTerminatedBuffer(); // Some code relies on NUL-termination.
+ if (U_SUCCESS(errorCode) && fString.isBogus()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+StringBaseResource::StringBaseResource(SRBRoot *bundle, int8_t type,
+ const icu::UnicodeString &value, UErrorCode &errorCode)
+ : SResource(bundle, nullptr, type, nullptr, errorCode), fString(value) {
+ if (value.isEmpty() && gFormatVersion > 1) {
+ fRes = URES_MAKE_EMPTY_RESOURCE(type);
+ fWritten = true;
+ return;
+ }
+
+ fString.getTerminatedBuffer(); // Some code relies on NUL-termination.
+ if (U_SUCCESS(errorCode) && fString.isBogus()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+// Pool bundle string, alias the buffer. Guaranteed NUL-terminated and not empty.
+StringBaseResource::StringBaseResource(int8_t type, const char16_t *value, int32_t len,
+ UErrorCode &errorCode)
+ : SResource(nullptr, nullptr, type, nullptr, errorCode), fString(true, value, len) {
+ assert(len > 0);
+ assert(!fString.isBogus());
+}
+
+StringBaseResource::~StringBaseResource() {}
+
+static int32_t U_CALLCONV
+string_hash(const UElement key) {
+ const StringResource *res = static_cast<const StringResource *>(key.pointer);
+ return res->fString.hashCode();
+}
+
+static UBool U_CALLCONV
+string_comp(const UElement key1, const UElement key2) {
+ const StringResource *res1 = static_cast<const StringResource *>(key1.pointer);
+ const StringResource *res2 = static_cast<const StringResource *>(key2.pointer);
+ return res1->fString == res2->fString;
+}
+
+StringResource::~StringResource() {}
+
+AliasResource::~AliasResource() {}
+
+IntResource::IntResource(SRBRoot *bundle, const char *tag, int32_t value,
+ const UString* comment, UErrorCode &errorCode)
+ : SResource(bundle, tag, URES_INT, comment, errorCode) {
+ fValue = value;
+ fRes = URES_MAKE_RESOURCE(URES_INT, value & RES_MAX_OFFSET);
+ fWritten = true;
+}
+
+IntResource::~IntResource() {}
+
+IntVectorResource::IntVectorResource(SRBRoot *bundle, const char *tag,
+ const UString* comment, UErrorCode &errorCode)
+ : SResource(bundle, tag, URES_INT_VECTOR, comment, errorCode),
+ fCount(0), fSize(RESLIST_INT_VECTOR_INIT_SIZE),
+ fArray(new uint32_t[fSize]) {
+ if (fArray == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+}
+
+IntVectorResource::~IntVectorResource() {
+ delete[] fArray;
+}
+
+void IntVectorResource::add(int32_t value, UErrorCode &errorCode) {
+ if (fCount == fSize) {
+ uint32_t* tmp = new uint32_t[2 * fSize];
+ if (tmp == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_memcpy(tmp, fArray, fSize * sizeof(uint32_t));
+ delete[] fArray;
+ fArray = tmp;
+ fSize *= 2;
+ }
+ if (U_SUCCESS(errorCode)) {
+ fArray[fCount++] = value;
+ }
+}
+
+BinaryResource::BinaryResource(SRBRoot *bundle, const char *tag,
+ uint32_t length, uint8_t *data, const char* fileName,
+ const UString* comment, UErrorCode &errorCode)
+ : SResource(bundle, tag, URES_BINARY, comment, errorCode),
+ fLength(length), fData(nullptr), fFileName(nullptr) {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if (fileName != nullptr && *fileName != 0){
+ fFileName = new char[uprv_strlen(fileName)+1];
+ if (fFileName == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_strcpy(fFileName, fileName);
+ }
+ if (length > 0) {
+ fData = new uint8_t[length];
+ if (fData == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ uprv_memcpy(fData, data, length);
+ } else {
+ if (gFormatVersion > 1) {
+ fRes = URES_MAKE_EMPTY_RESOURCE(URES_BINARY);
+ fWritten = true;
+ }
+ }
+}
+
+BinaryResource::~BinaryResource() {
+ delete[] fData;
+ delete[] fFileName;
+}
+
+/* Writing Functions */
+
+void
+StringResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
+ UErrorCode &errorCode) {
+ assert(fSame == nullptr);
+ fSame = static_cast<StringResource *>(uhash_get(stringSet, this));
+ if (fSame != nullptr) {
+ // This is a duplicate of a pool bundle string or of an earlier-visited string.
+ if (++fSame->fNumCopies == 1) {
+ assert(fSame->fWritten);
+ int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(fSame->fRes);
+ if (poolStringIndex >= bundle->fPoolStringIndexLimit) {
+ bundle->fPoolStringIndexLimit = poolStringIndex + 1;
+ }
+ }
+ return;
+ }
+ /* Put this string into the set for finding duplicates. */
+ fNumCopies = 1;
+ uhash_put(stringSet, this, this, &errorCode);
+
+ if (bundle->fStringsForm != STRINGS_UTF16_V1) {
+ int32_t len = length();
+ if (len <= MAX_IMPLICIT_STRING_LENGTH &&
+ !U16_IS_TRAIL(fString[0]) && fString.indexOf((char16_t)0) < 0) {
+ /*
+ * This string will be stored without an explicit length.
+ * Runtime will detect !U16_IS_TRAIL(s[0]) and call u_strlen().
+ */
+ fNumCharsForLength = 0;
+ } else if (len <= 0x3ee) {
+ fNumCharsForLength = 1;
+ } else if (len <= 0xfffff) {
+ fNumCharsForLength = 2;
+ } else {
+ fNumCharsForLength = 3;
+ }
+ bundle->f16BitStringsLength += fNumCharsForLength + len + 1; /* +1 for the NUL */
+ }
+}
+
+void
+ContainerResource::handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet,
+ UErrorCode &errorCode) {
+ for (SResource *current = fFirst; current != nullptr; current = current->fNext) {
+ current->preflightStrings(bundle, stringSet, errorCode);
+ }
+}
+
+void
+SResource::preflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if (fRes != RES_BOGUS) {
+ /*
+ * The resource item word was already precomputed, which means
+ * no further data needs to be written.
+ * This might be an integer, or an empty string/binary/etc.
+ */
+ return;
+ }
+ handlePreflightStrings(bundle, stringSet, errorCode);
+}
+
+void
+SResource::handlePreflightStrings(SRBRoot * /*bundle*/, UHashtable * /*stringSet*/,
+ UErrorCode & /*errorCode*/) {
+ /* Neither a string nor a container. */
+}
+
+int32_t
+SRBRoot::makeRes16(uint32_t resWord) const {
+ if (resWord == 0) {
+ return 0; /* empty string */
+ }
+ uint32_t type = RES_GET_TYPE(resWord);
+ int32_t offset = (int32_t)RES_GET_OFFSET(resWord);
+ if (type == URES_STRING_V2) {
+ assert(offset > 0);
+ if (offset < fPoolStringIndexLimit) {
+ if (offset < fPoolStringIndex16Limit) {
+ return offset;
+ }
+ } else {
+ offset = offset - fPoolStringIndexLimit + fPoolStringIndex16Limit;
+ if (offset <= 0xffff) {
+ return offset;
+ }
+ }
+ }
+ return -1;
+}
+
+int32_t
+SRBRoot::mapKey(int32_t oldpos) const {
+ const KeyMapEntry *map = fKeyMap;
+ if (map == nullptr) {
+ return oldpos;
+ }
+ int32_t i, start, limit;
+
+ /* do a binary search for the old, pre-compactKeys() key offset */
+ start = fUsePoolBundle->fKeysCount;
+ limit = start + fKeysCount;
+ while (start < limit - 1) {
+ i = (start + limit) / 2;
+ if (oldpos < map[i].oldpos) {
+ limit = i;
+ } else {
+ start = i;
+ }
+ }
+ assert(oldpos == map[start].oldpos);
+ return map[start].newpos;
+}
+
+/*
+ * Only called for UTF-16 v1 strings and duplicate UTF-16 v2 strings.
+ * For unique UTF-16 v2 strings, write16() sees fRes != RES_BOGUS
+ * and exits early.
+ */
+void
+StringResource::handleWrite16(SRBRoot * /*bundle*/) {
+ SResource *same;
+ if ((same = fSame) != nullptr) {
+ /* This is a duplicate. */
+ assert(same->fRes != RES_BOGUS && same->fWritten);
+ fRes = same->fRes;
+ fWritten = same->fWritten;
+ }
+}
+
+void
+ContainerResource::writeAllRes16(SRBRoot *bundle) {
+ for (SResource *current = fFirst; current != nullptr; current = current->fNext) {
+ bundle->f16BitUnits.append((char16_t)current->fRes16);
+ }
+ fWritten = true;
+}
+
+void
+ArrayResource::handleWrite16(SRBRoot *bundle) {
+ if (fCount == 0 && gFormatVersion > 1) {
+ fRes = URES_MAKE_EMPTY_RESOURCE(URES_ARRAY);
+ fWritten = true;
+ return;
+ }
+
+ int32_t res16 = 0;
+ for (SResource *current = fFirst; current != nullptr; current = current->fNext) {
+ current->write16(bundle);
+ res16 |= current->fRes16;
+ }
+ if (fCount <= 0xffff && res16 >= 0 && gFormatVersion > 1) {
+ fRes = URES_MAKE_RESOURCE(URES_ARRAY16, bundle->f16BitUnits.length());
+ bundle->f16BitUnits.append((char16_t)fCount);
+ writeAllRes16(bundle);
+ }
+}
+
+void
+TableResource::handleWrite16(SRBRoot *bundle) {
+ if (fCount == 0 && gFormatVersion > 1) {
+ fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
+ fWritten = true;
+ return;
+ }
+ /* Find the smallest table type that fits the data. */
+ int32_t key16 = 0;
+ int32_t res16 = 0;
+ for (SResource *current = fFirst; current != nullptr; current = current->fNext) {
+ current->write16(bundle);
+ key16 |= current->fKey16;
+ res16 |= current->fRes16;
+ }
+ if(fCount > (uint32_t)bundle->fMaxTableLength) {
+ bundle->fMaxTableLength = fCount;
+ }
+ if (fCount <= 0xffff && key16 >= 0) {
+ if (res16 >= 0 && gFormatVersion > 1) {
+ /* 16-bit count, key offsets and values */
+ fRes = URES_MAKE_RESOURCE(URES_TABLE16, bundle->f16BitUnits.length());
+ bundle->f16BitUnits.append((char16_t)fCount);
+ for (SResource *current = fFirst; current != nullptr; current = current->fNext) {
+ bundle->f16BitUnits.append((char16_t)current->fKey16);
+ }
+ writeAllRes16(bundle);
+ } else {
+ /* 16-bit count, 16-bit key offsets, 32-bit values */
+ fTableType = URES_TABLE;
+ }
+ } else {
+ /* 32-bit count, key offsets and values */
+ fTableType = URES_TABLE32;
+ }
+}
+
+void
+PseudoListResource::handleWrite16(SRBRoot * /*bundle*/) {
+ fRes = URES_MAKE_EMPTY_RESOURCE(URES_TABLE);
+ fWritten = true;
+}
+
+void
+SResource::write16(SRBRoot *bundle) {
+ if (fKey >= 0) {
+ // A tagged resource has a non-negative key index into the parsed key strings.
+ // compactKeys() built a map from parsed key index to the final key index.
+ // After the mapping, negative key indexes are used for shared pool bundle keys.
+ fKey = bundle->mapKey(fKey);
+ // If the key index fits into a Key16 for a Table or Table16,
+ // then set the fKey16 field accordingly.
+ // Otherwise keep it at -1.
+ if (fKey >= 0) {
+ if (fKey < bundle->fLocalKeyLimit) {
+ fKey16 = fKey;
+ }
+ } else {
+ int32_t poolKeyIndex = fKey & 0x7fffffff;
+ if (poolKeyIndex <= 0xffff) {
+ poolKeyIndex += bundle->fLocalKeyLimit;
+ if (poolKeyIndex <= 0xffff) {
+ fKey16 = poolKeyIndex;
+ }
+ }
+ }
+ }
+ /*
+ * fRes != RES_BOGUS:
+ * The resource item word was already precomputed, which means
+ * no further data needs to be written.
+ * This might be an integer, or an empty or UTF-16 v2 string,
+ * an empty binary, etc.
+ */
+ if (fRes == RES_BOGUS) {
+ handleWrite16(bundle);
+ }
+ // Compute fRes16 for precomputed as well as just-computed fRes.
+ fRes16 = bundle->makeRes16(fRes);
+}
+
+void
+SResource::handleWrite16(SRBRoot * /*bundle*/) {
+ /* Only a few resource types write 16-bit units. */
+}
+
+/*
+ * Only called for UTF-16 v1 strings, and for aliases.
+ * For UTF-16 v2 strings, preWrite() sees fRes != RES_BOGUS
+ * and exits early.
+ */
+void
+StringBaseResource::handlePreWrite(uint32_t *byteOffset) {
+ /* Write the UTF-16 v1 string. */
+ fRes = URES_MAKE_RESOURCE(fType, *byteOffset >> 2);
+ *byteOffset += 4 + (length() + 1) * U_SIZEOF_UCHAR;
+}
+
+void
+IntVectorResource::handlePreWrite(uint32_t *byteOffset) {
+ if (fCount == 0 && gFormatVersion > 1) {
+ fRes = URES_MAKE_EMPTY_RESOURCE(URES_INT_VECTOR);
+ fWritten = true;
+ } else {
+ fRes = URES_MAKE_RESOURCE(URES_INT_VECTOR, *byteOffset >> 2);
+ *byteOffset += (1 + fCount) * 4;
+ }
+}
+
+void
+BinaryResource::handlePreWrite(uint32_t *byteOffset) {
+ uint32_t pad = 0;
+ uint32_t dataStart = *byteOffset + sizeof(fLength);
+
+ if (dataStart % BIN_ALIGNMENT) {
+ pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
+ *byteOffset += pad; /* pad == 4 or 8 or 12 */
+ }
+ fRes = URES_MAKE_RESOURCE(URES_BINARY, *byteOffset >> 2);
+ *byteOffset += 4 + fLength;
+}
+
+void
+ContainerResource::preWriteAllRes(uint32_t *byteOffset) {
+ for (SResource *current = fFirst; current != nullptr; current = current->fNext) {
+ current->preWrite(byteOffset);
+ }
+}
+
+void
+ArrayResource::handlePreWrite(uint32_t *byteOffset) {
+ preWriteAllRes(byteOffset);
+ fRes = URES_MAKE_RESOURCE(URES_ARRAY, *byteOffset >> 2);
+ *byteOffset += (1 + fCount) * 4;
+}
+
+void
+TableResource::handlePreWrite(uint32_t *byteOffset) {
+ preWriteAllRes(byteOffset);
+ if (fTableType == URES_TABLE) {
+ /* 16-bit count, 16-bit key offsets, 32-bit values */
+ fRes = URES_MAKE_RESOURCE(URES_TABLE, *byteOffset >> 2);
+ *byteOffset += 2 + fCount * 6;
+ } else {
+ /* 32-bit count, key offsets and values */
+ fRes = URES_MAKE_RESOURCE(URES_TABLE32, *byteOffset >> 2);
+ *byteOffset += 4 + fCount * 8;
+ }
+}
+
+void
+SResource::preWrite(uint32_t *byteOffset) {
+ if (fRes != RES_BOGUS) {
+ /*
+ * The resource item word was already precomputed, which means
+ * no further data needs to be written.
+ * This might be an integer, or an empty or UTF-16 v2 string,
+ * an empty binary, etc.
+ */
+ return;
+ }
+ handlePreWrite(byteOffset);
+ *byteOffset += calcPadding(*byteOffset);
+}
+
+void
+SResource::handlePreWrite(uint32_t * /*byteOffset*/) {
+ assert(false);
+}
+
+/*
+ * Only called for UTF-16 v1 strings, and for aliases. For UTF-16 v2 strings,
+ * write() sees fWritten and exits early.
+ */
+void
+StringBaseResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
+ /* Write the UTF-16 v1 string. */
+ int32_t len = length();
+ udata_write32(mem, len);
+ udata_writeUString(mem, getBuffer(), len + 1);
+ *byteOffset += 4 + (len + 1) * U_SIZEOF_UCHAR;
+ fWritten = true;
+}
+
+void
+ContainerResource::writeAllRes(UNewDataMemory *mem, uint32_t *byteOffset) {
+ uint32_t i = 0;
+ for (SResource *current = fFirst; current != nullptr; ++i, current = current->fNext) {
+ current->write(mem, byteOffset);
+ }
+ assert(i == fCount);
+}
+
+void
+ContainerResource::writeAllRes32(UNewDataMemory *mem, uint32_t *byteOffset) {
+ for (SResource *current = fFirst; current != nullptr; current = current->fNext) {
+ udata_write32(mem, current->fRes);
+ }
+ *byteOffset += fCount * 4;
+}
+
+void
+ArrayResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
+ writeAllRes(mem, byteOffset);
+ udata_write32(mem, fCount);
+ *byteOffset += 4;
+ writeAllRes32(mem, byteOffset);
+}
+
+void
+IntVectorResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
+ udata_write32(mem, fCount);
+ for(uint32_t i = 0; i < fCount; ++i) {
+ udata_write32(mem, fArray[i]);
+ }
+ *byteOffset += (1 + fCount) * 4;
+}
+
+void
+BinaryResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
+ uint32_t pad = 0;
+ uint32_t dataStart = *byteOffset + sizeof(fLength);
+
+ if (dataStart % BIN_ALIGNMENT) {
+ pad = (BIN_ALIGNMENT - dataStart % BIN_ALIGNMENT);
+ udata_writePadding(mem, pad); /* pad == 4 or 8 or 12 */
+ *byteOffset += pad;
+ }
+
+ udata_write32(mem, fLength);
+ if (fLength > 0) {
+ udata_writeBlock(mem, fData, fLength);
+ }
+ *byteOffset += 4 + fLength;
+}
+
+void
+TableResource::handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) {
+ writeAllRes(mem, byteOffset);
+ if(fTableType == URES_TABLE) {
+ udata_write16(mem, (uint16_t)fCount);
+ for (SResource *current = fFirst; current != nullptr; current = current->fNext) {
+ udata_write16(mem, current->fKey16);
+ }
+ *byteOffset += (1 + fCount)* 2;
+ if ((fCount & 1) == 0) {
+ /* 16-bit count and even number of 16-bit key offsets need padding before 32-bit resource items */
+ udata_writePadding(mem, 2);
+ *byteOffset += 2;
+ }
+ } else /* URES_TABLE32 */ {
+ udata_write32(mem, fCount);
+ for (SResource *current = fFirst; current != nullptr; current = current->fNext) {
+ udata_write32(mem, (uint32_t)current->fKey);
+ }
+ *byteOffset += (1 + fCount)* 4;
+ }
+ writeAllRes32(mem, byteOffset);
+}
+
+void
+SResource::write(UNewDataMemory *mem, uint32_t *byteOffset) {
+ if (fWritten) {
+ assert(fRes != RES_BOGUS);
+ return;
+ }
+ handleWrite(mem, byteOffset);
+ uint8_t paddingSize = calcPadding(*byteOffset);
+ if (paddingSize > 0) {
+ udata_writePadding(mem, paddingSize);
+ *byteOffset += paddingSize;
+ }
+ fWritten = true;
+}
+
+void
+SResource::handleWrite(UNewDataMemory * /*mem*/, uint32_t * /*byteOffset*/) {
+ assert(false);
+}
+
+void SRBRoot::write(const char *outputDir, const char *outputPkg,
+ char *writtenFilename, int writtenFilenameLen,
+ UErrorCode &errorCode) {
+ UNewDataMemory *mem = nullptr;
+ uint32_t byteOffset = 0;
+ uint32_t top, size;
+ char dataName[1024];
+ int32_t indexes[URES_INDEX_TOP];
+
+ compactKeys(errorCode);
+ /*
+ * Add padding bytes to fKeys so that fKeysTop is 4-aligned.
+ * Safe because the capacity is a multiple of 4.
+ */
+ while (fKeysTop & 3) {
+ fKeys[fKeysTop++] = (char)0xaa;
+ }
+ /*
+ * In URES_TABLE, use all local key offsets that fit into 16 bits,
+ * and use the remaining 16-bit offsets for pool key offsets
+ * if there are any.
+ * If there are no local keys, then use the whole 16-bit space
+ * for pool key offsets.
+ * Note: This cannot be changed without changing the major formatVersion.
+ */
+ if (fKeysBottom < fKeysTop) {
+ if (fKeysTop <= 0x10000) {
+ fLocalKeyLimit = fKeysTop;
+ } else {
+ fLocalKeyLimit = 0x10000;
+ }
+ } else {
+ fLocalKeyLimit = 0;
+ }
+
+ UHashtable *stringSet;
+ if (gFormatVersion > 1) {
+ stringSet = uhash_open(string_hash, string_comp, string_comp, &errorCode);
+ if (U_SUCCESS(errorCode) &&
+ fUsePoolBundle != nullptr && fUsePoolBundle->fStrings != nullptr) {
+ for (SResource *current = fUsePoolBundle->fStrings->fFirst;
+ current != nullptr;
+ current = current->fNext) {
+ StringResource *sr = static_cast<StringResource *>(current);
+ sr->fNumCopies = 0;
+ sr->fNumUnitsSaved = 0;
+ uhash_put(stringSet, sr, sr, &errorCode);
+ }
+ }
+ fRoot->preflightStrings(this, stringSet, errorCode);
+ } else {
+ stringSet = nullptr;
+ }
+ if (fStringsForm == STRINGS_UTF16_V2 && f16BitStringsLength > 0) {
+ compactStringsV2(stringSet, errorCode);
+ }
+ uhash_close(stringSet);
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+
+ int32_t formatVersion = gFormatVersion;
+ if (fPoolStringIndexLimit != 0) {
+ int32_t sum = fPoolStringIndexLimit + fLocalStringIndexLimit;
+ if ((sum - 1) > RES_MAX_OFFSET) {
+ errorCode = U_BUFFER_OVERFLOW_ERROR;
+ return;
+ }
+ if (fPoolStringIndexLimit < 0x10000 && sum <= 0x10000) {
+ // 16-bit indexes work for all pool + local strings.
+ fPoolStringIndex16Limit = fPoolStringIndexLimit;
+ } else {
+ // Set the pool index threshold so that 16-bit indexes work
+ // for some pool strings and some local strings.
+ fPoolStringIndex16Limit = (int32_t)(
+ ((int64_t)fPoolStringIndexLimit * 0xffff) / sum);
+ }
+ } else if (gIsDefaultFormatVersion && formatVersion == 3 && !fIsPoolBundle) {
+ // If we just default to formatVersion 3
+ // but there are no pool bundle strings to share
+ // and we do not write a pool bundle,
+ // then write formatVersion 2 which is just as good.
+ formatVersion = 2;
+ }
+
+ fRoot->write16(this);
+ if (f16BitUnits.isBogus()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ if (f16BitUnits.length() & 1) {
+ f16BitUnits.append((char16_t)0xaaaa); /* pad to multiple of 4 bytes */
+ }
+
+ byteOffset = fKeysTop + f16BitUnits.length() * 2;
+ fRoot->preWrite(&byteOffset);
+
+ /* total size including the root item */
+ top = byteOffset;
+
+ if (writtenFilename && writtenFilenameLen) {
+ *writtenFilename = 0;
+ }
+
+ if (writtenFilename) {
+ int32_t off = 0, len = 0;
+ if (outputDir) {
+ uprv_strncpy(writtenFilename, outputDir, writtenFilenameLen);
+ }
+ if (writtenFilenameLen -= len) {
+ off += len;
+ writtenFilename[off] = U_FILE_SEP_CHAR;
+ if (--writtenFilenameLen) {
+ ++off;
+ if(outputPkg != nullptr)
+ {
+ uprv_strcpy(writtenFilename+off, outputPkg);
+ off += (int32_t)uprv_strlen(outputPkg);
+ writtenFilename[off] = '_';
+ ++off;
+ }
+
+ len = (int32_t)uprv_strlen(fLocale);
+ if (len > writtenFilenameLen) {
+ len = writtenFilenameLen;
+ }
+ uprv_strncpy(writtenFilename + off, fLocale, writtenFilenameLen - off);
+ if (writtenFilenameLen -= len) {
+ off += len;
+ uprv_strncpy(writtenFilename + off, ".res", writtenFilenameLen - off);
+ }
+ }
+ }
+ }
+
+ if(outputPkg)
+ {
+ uprv_strcpy(dataName, outputPkg);
+ uprv_strcat(dataName, "_");
+ uprv_strcat(dataName, fLocale);
+ }
+ else
+ {
+ uprv_strcpy(dataName, fLocale);
+ }
+
+ uprv_memcpy(dataInfo.formatVersion, gFormatVersions + formatVersion, sizeof(UVersionInfo));
+
+ mem = udata_create(outputDir, "res", dataName,
+ &dataInfo, (gIncludeCopyright==true)? U_COPYRIGHT_STRING:nullptr, &errorCode);
+ if(U_FAILURE(errorCode)){
+ return;
+ }
+
+ /* write the root item */
+ udata_write32(mem, fRoot->fRes);
+
+ /*
+ * formatVersion 1.1 (ICU 2.8):
+ * write int32_t indexes[] after root and before the key strings
+ * to make it easier to parse resource bundles in icuswap or from Java etc.
+ */
+ uprv_memset(indexes, 0, sizeof(indexes));
+ indexes[URES_INDEX_LENGTH]= fIndexLength;
+ indexes[URES_INDEX_KEYS_TOP]= fKeysTop>>2;
+ indexes[URES_INDEX_RESOURCES_TOP]= (int32_t)(top>>2);
+ indexes[URES_INDEX_BUNDLE_TOP]= indexes[URES_INDEX_RESOURCES_TOP];
+ indexes[URES_INDEX_MAX_TABLE_LENGTH]= fMaxTableLength;
+
+ /*
+ * formatVersion 1.2 (ICU 3.6):
+ * write indexes[URES_INDEX_ATTRIBUTES] with URES_ATT_NO_FALLBACK set or not set
+ * the memset() above initialized all indexes[] to 0
+ */
+ if (fNoFallback) {
+ indexes[URES_INDEX_ATTRIBUTES]=URES_ATT_NO_FALLBACK;
+ }
+ /*
+ * formatVersion 2.0 (ICU 4.4):
+ * more compact string value storage, optional pool bundle
+ */
+ if (URES_INDEX_16BIT_TOP < fIndexLength) {
+ indexes[URES_INDEX_16BIT_TOP] = (fKeysTop>>2) + (f16BitUnits.length()>>1);
+ }
+ if (URES_INDEX_POOL_CHECKSUM < fIndexLength) {
+ if (fIsPoolBundle) {
+ indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_IS_POOL_BUNDLE | URES_ATT_NO_FALLBACK;
+ uint32_t checksum = computeCRC((const char *)(fKeys + fKeysBottom),
+ (uint32_t)(fKeysTop - fKeysBottom), 0);
+ if (f16BitUnits.length() <= 1) {
+ // no pool strings to checksum
+ } else if (U_IS_BIG_ENDIAN) {
+ checksum = computeCRC(reinterpret_cast<const char *>(f16BitUnits.getBuffer()),
+ (uint32_t)f16BitUnits.length() * 2, checksum);
+ } else {
+ // Swap to big-endian so we get the same checksum on all platforms
+ // (except for charset family, due to the key strings).
+ UnicodeString s(f16BitUnits);
+ assert(!s.isBogus());
+ // .getBuffer(capacity) returns a mutable buffer
+ char16_t* p = s.getBuffer(f16BitUnits.length());
+ for (int32_t count = f16BitUnits.length(); count > 0; --count) {
+ uint16_t x = *p;
+ *p++ = (uint16_t)((x << 8) | (x >> 8));
+ }
+ s.releaseBuffer(f16BitUnits.length());
+ checksum = computeCRC((const char *)s.getBuffer(),
+ (uint32_t)f16BitUnits.length() * 2, checksum);
+ }
+ indexes[URES_INDEX_POOL_CHECKSUM] = (int32_t)checksum;
+ } else if (gUsePoolBundle) {
+ indexes[URES_INDEX_ATTRIBUTES] |= URES_ATT_USES_POOL_BUNDLE;
+ indexes[URES_INDEX_POOL_CHECKSUM] = fUsePoolBundle->fChecksum;
+ }
+ }
+ // formatVersion 3 (ICU 56):
+ // share string values via pool bundle strings
+ indexes[URES_INDEX_LENGTH] |= fPoolStringIndexLimit << 8; // bits 23..0 -> 31..8
+ indexes[URES_INDEX_ATTRIBUTES] |= (fPoolStringIndexLimit >> 12) & 0xf000; // bits 27..24 -> 15..12
+ indexes[URES_INDEX_ATTRIBUTES] |= fPoolStringIndex16Limit << 16;
+
+ /* write the indexes[] */
+ udata_writeBlock(mem, indexes, fIndexLength*4);
+
+ /* write the table key strings */
+ udata_writeBlock(mem, fKeys+fKeysBottom,
+ fKeysTop-fKeysBottom);
+
+ /* write the v2 UTF-16 strings, URES_TABLE16 and URES_ARRAY16 */
+ udata_writeBlock(mem, f16BitUnits.getBuffer(), f16BitUnits.length()*2);
+
+ /* write all of the bundle contents: the root item and its children */
+ byteOffset = fKeysTop + f16BitUnits.length() * 2;
+ fRoot->write(mem, &byteOffset);
+ assert(byteOffset == top);
+
+ size = udata_finish(mem, &errorCode);
+ if(top != size) {
+ fprintf(stderr, "genrb error: wrote %u bytes but counted %u\n",
+ (int)size, (int)top);
+ errorCode = U_INTERNAL_PROGRAM_ERROR;
+ }
+}
+
+/* Opening Functions */
+
+TableResource* table_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
+ LocalPointer<TableResource> res(new TableResource(bundle, tag, comment, *status), *status);
+ return U_SUCCESS(*status) ? res.orphan() : nullptr;
+}
+
+ArrayResource* array_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
+ LocalPointer<ArrayResource> res(new ArrayResource(bundle, tag, comment, *status), *status);
+ return U_SUCCESS(*status) ? res.orphan() : nullptr;
+}
+
+struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const char16_t *value, int32_t len, const struct UString* comment, UErrorCode *status) {
+ LocalPointer<SResource> res(
+ new StringResource(bundle, tag, value, len, comment, *status), *status);
+ return U_SUCCESS(*status) ? res.orphan() : nullptr;
+}
+
+struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, char16_t *value, int32_t len, const struct UString* comment, UErrorCode *status) {
+ LocalPointer<SResource> res(
+ new AliasResource(bundle, tag, value, len, comment, *status), *status);
+ return U_SUCCESS(*status) ? res.orphan() : nullptr;
+}
+
+IntVectorResource *intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status) {
+ LocalPointer<IntVectorResource> res(
+ new IntVectorResource(bundle, tag, comment, *status), *status);
+ return U_SUCCESS(*status) ? res.orphan() : nullptr;
+}
+
+struct SResource *int_open(struct SRBRoot *bundle, const char *tag, int32_t value, const struct UString* comment, UErrorCode *status) {
+ LocalPointer<SResource> res(new IntResource(bundle, tag, value, comment, *status), *status);
+ return U_SUCCESS(*status) ? res.orphan() : nullptr;
+}
+
+struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t length, uint8_t *data, const char* fileName, const struct UString* comment, UErrorCode *status) {
+ LocalPointer<SResource> res(
+ new BinaryResource(bundle, tag, length, data, fileName, comment, *status), *status);
+ return U_SUCCESS(*status) ? res.orphan() : nullptr;
+}
+
+SRBRoot::SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCode)
+ : fRoot(nullptr), fLocale(nullptr), fIndexLength(0), fMaxTableLength(0), fNoFallback(false),
+ fStringsForm(STRINGS_UTF16_V1), fIsPoolBundle(isPoolBundle),
+ fKeys(nullptr), fKeyMap(nullptr),
+ fKeysBottom(0), fKeysTop(0), fKeysCapacity(0),
+ fKeysCount(0), fLocalKeyLimit(0),
+ f16BitUnits(), f16BitStringsLength(0),
+ fUsePoolBundle(&kNoPoolBundle),
+ fPoolStringIndexLimit(0), fPoolStringIndex16Limit(0), fLocalStringIndexLimit(0),
+ fWritePoolBundle(nullptr) {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+
+ if (gFormatVersion > 1) {
+ // f16BitUnits must start with a zero for empty resources.
+ // We might be able to omit it if there are no empty 16-bit resources.
+ f16BitUnits.append((char16_t)0);
+ }
+
+ fKeys = (char *) uprv_malloc(sizeof(char) * KEY_SPACE_SIZE);
+ if (isPoolBundle) {
+ fRoot = new PseudoListResource(this, errorCode);
+ } else {
+ fRoot = new TableResource(this, nullptr, comment, errorCode);
+ }
+ if (fKeys == nullptr || fRoot == nullptr || U_FAILURE(errorCode)) {
+ if (U_SUCCESS(errorCode)) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return;
+ }
+
+ fKeysCapacity = KEY_SPACE_SIZE;
+ /* formatVersion 1.1 and up: start fKeysTop after the root item and indexes[] */
+ if (gUsePoolBundle || isPoolBundle) {
+ fIndexLength = URES_INDEX_POOL_CHECKSUM + 1;
+ } else if (gFormatVersion >= 2) {
+ fIndexLength = URES_INDEX_16BIT_TOP + 1;
+ } else /* formatVersion 1 */ {
+ fIndexLength = URES_INDEX_ATTRIBUTES + 1;
+ }
+ fKeysBottom = (1 /* root */ + fIndexLength) * 4;
+ uprv_memset(fKeys, 0, fKeysBottom);
+ fKeysTop = fKeysBottom;
+
+ if (gFormatVersion == 1) {
+ fStringsForm = STRINGS_UTF16_V1;
+ } else {
+ fStringsForm = STRINGS_UTF16_V2;
+ }
+}
+
+/* Closing Functions */
+
+void res_close(struct SResource *res) {
+ delete res;
+}
+
+SRBRoot::~SRBRoot() {
+ delete fRoot;
+ uprv_free(fLocale);
+ uprv_free(fKeys);
+ uprv_free(fKeyMap);
+}
+
+/* Misc Functions */
+
+void SRBRoot::setLocale(char16_t *locale, UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+
+ uprv_free(fLocale);
+ fLocale = (char*) uprv_malloc(sizeof(char) * (u_strlen(locale)+1));
+ if(fLocale == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ u_UCharsToChars(locale, fLocale, u_strlen(locale)+1);
+}
+
+const char *
+SRBRoot::getKeyString(int32_t key) const {
+ if (key < 0) {
+ return fUsePoolBundle->fKeys + (key & 0x7fffffff);
+ } else {
+ return fKeys + key;
+ }
+}
+
+const char *
+SResource::getKeyString(const SRBRoot *bundle) const {
+ if (fKey == -1) {
+ return nullptr;
+ }
+ return bundle->getKeyString(fKey);
+}
+
+const char *
+SRBRoot::getKeyBytes(int32_t *pLength) const {
+ *pLength = fKeysTop - fKeysBottom;
+ return fKeys + fKeysBottom;
+}
+
+int32_t
+SRBRoot::addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode) {
+ int32_t keypos;
+
+ // It is not legal to add new key bytes after compactKeys is run!
+ U_ASSERT(fKeyMap == nullptr);
+
+ if (U_FAILURE(errorCode)) {
+ return -1;
+ }
+ if (length < 0 || (keyBytes == nullptr && length != 0)) {
+ errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+ return -1;
+ }
+ if (length == 0) {
+ return fKeysTop;
+ }
+
+ keypos = fKeysTop;
+ fKeysTop += length;
+ if (fKeysTop >= fKeysCapacity) {
+ /* overflow - resize the keys buffer */
+ fKeysCapacity += KEY_SPACE_SIZE;
+ fKeys = static_cast<char *>(uprv_realloc(fKeys, fKeysCapacity));
+ if(fKeys == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return -1;
+ }
+ }
+
+ uprv_memcpy(fKeys + keypos, keyBytes, length);
+
+ return keypos;
+}
+
+int32_t
+SRBRoot::addTag(const char *tag, UErrorCode &errorCode) {
+ int32_t keypos;
+
+ if (U_FAILURE(errorCode)) {
+ return -1;
+ }
+
+ if (tag == nullptr) {
+ /* no error: the root table and array items have no keys */
+ return -1;
+ }
+
+ keypos = addKeyBytes(tag, (int32_t)(uprv_strlen(tag) + 1), errorCode);
+ if (U_SUCCESS(errorCode)) {
+ ++fKeysCount;
+ }
+ return keypos;
+}
+
+static int32_t
+compareInt32(int32_t lPos, int32_t rPos) {
+ /*
+ * Compare possibly-negative key offsets. Don't just return lPos - rPos
+ * because that is prone to negative-integer underflows.
+ */
+ if (lPos < rPos) {
+ return -1;
+ } else if (lPos > rPos) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static int32_t U_CALLCONV
+compareKeySuffixes(const void *context, const void *l, const void *r) {
+ const struct SRBRoot *bundle=(const struct SRBRoot *)context;
+ int32_t lPos = ((const KeyMapEntry *)l)->oldpos;
+ int32_t rPos = ((const KeyMapEntry *)r)->oldpos;
+ const char *lStart = bundle->getKeyString(lPos);
+ const char *lLimit = lStart;
+ const char *rStart = bundle->getKeyString(rPos);
+ const char *rLimit = rStart;
+ int32_t diff;
+ while (*lLimit != 0) { ++lLimit; }
+ while (*rLimit != 0) { ++rLimit; }
+ /* compare keys in reverse character order */
+ while (lStart < lLimit && rStart < rLimit) {
+ diff = (int32_t)(uint8_t)*--lLimit - (int32_t)(uint8_t)*--rLimit;
+ if (diff != 0) {
+ return diff;
+ }
+ }
+ /* sort equal suffixes by descending key length */
+ diff = (int32_t)(rLimit - rStart) - (int32_t)(lLimit - lStart);
+ if (diff != 0) {
+ return diff;
+ }
+ /* Sort pool bundle keys first (negative oldpos), and otherwise keys in parsing order. */
+ return compareInt32(lPos, rPos);
+}
+
+static int32_t U_CALLCONV
+compareKeyNewpos(const void * /*context*/, const void *l, const void *r) {
+ return compareInt32(((const KeyMapEntry *)l)->newpos, ((const KeyMapEntry *)r)->newpos);
+}
+
+static int32_t U_CALLCONV
+compareKeyOldpos(const void * /*context*/, const void *l, const void *r) {
+ return compareInt32(((const KeyMapEntry *)l)->oldpos, ((const KeyMapEntry *)r)->oldpos);
+}
+
+void SResource::collectKeys(std::function<void(int32_t)> collector) const {
+ collector(fKey);
+}
+
+void ContainerResource::collectKeys(std::function<void(int32_t)> collector) const {
+ collector(fKey);
+ for (SResource* curr = fFirst; curr != nullptr; curr = curr->fNext) {
+ curr->collectKeys(collector);
+ }
+}
+
+void
+SRBRoot::compactKeys(UErrorCode &errorCode) {
+ KeyMapEntry *map;
+ char *keys;
+ int32_t i;
+
+ // Except for pool bundles, keys might not be used.
+ // Do not add unused keys to the final bundle.
+ std::set<int32_t> keysInUse;
+ if (!fIsPoolBundle) {
+ fRoot->collectKeys([&keysInUse](int32_t key) {
+ if (key >= 0) {
+ keysInUse.insert(key);
+ }
+ });
+ fKeysCount = static_cast<int32_t>(keysInUse.size());
+ }
+
+ int32_t keysCount = fUsePoolBundle->fKeysCount + fKeysCount;
+ if (U_FAILURE(errorCode) || fKeyMap != nullptr) {
+ return;
+ }
+ map = (KeyMapEntry *)uprv_malloc(keysCount * sizeof(KeyMapEntry));
+ if (map == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ keys = (char *)fUsePoolBundle->fKeys;
+ for (i = 0; i < fUsePoolBundle->fKeysCount; ++i) {
+ map[i].oldpos =
+ (int32_t)(keys - fUsePoolBundle->fKeys) | 0x80000000; /* negative oldpos */
+ map[i].newpos = 0;
+ while (*keys != 0) { ++keys; } /* skip the key */
+ ++keys; /* skip the NUL */
+ }
+ keys = fKeys + fKeysBottom;
+ while (i < keysCount) {
+ int32_t keyOffset = static_cast<int32_t>(keys - fKeys);
+ if (!fIsPoolBundle && keysInUse.count(keyOffset) == 0) {
+ // Mark the unused key as deleted
+ while (*keys != 0) { *keys++ = 1; }
+ *keys++ = 1;
+ } else {
+ map[i].oldpos = keyOffset;
+ map[i].newpos = 0;
+ while (*keys != 0) { ++keys; } /* skip the key */
+ ++keys; /* skip the NUL */
+ i++;
+ }
+ }
+ if (keys != fKeys + fKeysTop) {
+ // Throw away any unused keys from the end
+ fKeysTop = static_cast<int32_t>(keys - fKeys);
+ }
+ /* Sort the keys so that each one is immediately followed by all of its suffixes. */
+ uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
+ compareKeySuffixes, this, false, &errorCode);
+ /*
+ * Make suffixes point into earlier, longer strings that contain them
+ * and mark the old, now unused suffix bytes as deleted.
+ */
+ if (U_SUCCESS(errorCode)) {
+ keys = fKeys;
+ for (i = 0; i < keysCount;) {
+ /*
+ * This key is not a suffix of the previous one;
+ * keep this one and delete the following ones that are
+ * suffixes of this one.
+ */
+ const char *key;
+ const char *keyLimit;
+ int32_t j = i + 1;
+ map[i].newpos = map[i].oldpos;
+ if (j < keysCount && map[j].oldpos < 0) {
+ /* Key string from the pool bundle, do not delete. */
+ i = j;
+ continue;
+ }
+ key = getKeyString(map[i].oldpos);
+ for (keyLimit = key; *keyLimit != 0; ++keyLimit) {}
+ for (; j < keysCount && map[j].oldpos >= 0; ++j) {
+ const char *k;
+ char *suffix;
+ const char *suffixLimit;
+ int32_t offset;
+ suffix = keys + map[j].oldpos;
+ for (suffixLimit = suffix; *suffixLimit != 0; ++suffixLimit) {}
+ offset = static_cast<int32_t>((keyLimit - key) - (suffixLimit - suffix));
+ if (offset < 0) {
+ break; /* suffix cannot be longer than the original */
+ }
+ /* Is it a suffix of the earlier, longer key? */
+ for (k = keyLimit; suffix < suffixLimit && *--k == *--suffixLimit;) {}
+ if (suffix == suffixLimit && *k == *suffixLimit) {
+ map[j].newpos = map[i].oldpos + offset; /* yes, point to the earlier key */
+ // Mark the suffix as deleted
+ while (*suffix != 0) { *suffix++ = 1; }
+ *suffix = 1;
+ } else {
+ break; /* not a suffix, restart from here */
+ }
+ }
+ i = j;
+ }
+ /*
+ * Re-sort by newpos, then modify the key characters array in-place
+ * to squeeze out unused bytes, and readjust the newpos offsets.
+ */
+ uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
+ compareKeyNewpos, nullptr, false, &errorCode);
+ if (U_SUCCESS(errorCode)) {
+ int32_t oldpos, newpos, limit;
+ oldpos = newpos = fKeysBottom;
+ limit = fKeysTop;
+ /* skip key offsets that point into the pool bundle rather than this new bundle */
+ for (i = 0; i < keysCount && map[i].newpos < 0; ++i) {}
+ if (i < keysCount) {
+ while (oldpos < limit) {
+ if (keys[oldpos] == 1) {
+ ++oldpos; /* skip unused bytes */
+ } else {
+ /* adjust the new offsets for keys starting here */
+ while (i < keysCount && map[i].newpos == oldpos) {
+ map[i++].newpos = newpos;
+ }
+ /* move the key characters to their new position */
+ keys[newpos++] = keys[oldpos++];
+ }
+ }
+ U_ASSERT(i == keysCount);
+ }
+ fKeysTop = newpos;
+ /* Re-sort once more, by old offsets for binary searching. */
+ uprv_sortArray(map, keysCount, (int32_t)sizeof(KeyMapEntry),
+ compareKeyOldpos, nullptr, false, &errorCode);
+ if (U_SUCCESS(errorCode)) {
+ /* key size reduction by limit - newpos */
+ fKeyMap = map;
+ map = nullptr;
+ }
+ }
+ }
+ uprv_free(map);
+}
+
+static int32_t U_CALLCONV
+compareStringSuffixes(const void * /*context*/, const void *l, const void *r) {
+ const StringResource *left = *((const StringResource **)l);
+ const StringResource *right = *((const StringResource **)r);
+ const char16_t *lStart = left->getBuffer();
+ const char16_t *lLimit = lStart + left->length();
+ const char16_t *rStart = right->getBuffer();
+ const char16_t *rLimit = rStart + right->length();
+ int32_t diff;
+ /* compare keys in reverse character order */
+ while (lStart < lLimit && rStart < rLimit) {
+ diff = (int32_t)*--lLimit - (int32_t)*--rLimit;
+ if (diff != 0) {
+ return diff;
+ }
+ }
+ /* sort equal suffixes by descending string length */
+ return right->length() - left->length();
+}
+
+static int32_t U_CALLCONV
+compareStringLengths(const void * /*context*/, const void *l, const void *r) {
+ const StringResource *left = *((const StringResource **)l);
+ const StringResource *right = *((const StringResource **)r);
+ int32_t diff;
+ /* Make "is suffix of another string" compare greater than a non-suffix. */
+ diff = (int)(left->fSame != nullptr) - (int)(right->fSame != nullptr);
+ if (diff != 0) {
+ return diff;
+ }
+ /* sort by ascending string length */
+ diff = left->length() - right->length();
+ if (diff != 0) {
+ return diff;
+ }
+ // sort by descending size reduction
+ diff = right->fNumUnitsSaved - left->fNumUnitsSaved;
+ if (diff != 0) {
+ return diff;
+ }
+ // sort lexically
+ return left->fString.compare(right->fString);
+}
+
+void
+StringResource::writeUTF16v2(int32_t base, UnicodeString &dest) {
+ int32_t len = length();
+ fRes = URES_MAKE_RESOURCE(URES_STRING_V2, base + dest.length());
+ fWritten = true;
+ switch(fNumCharsForLength) {
+ case 0:
+ break;
+ case 1:
+ dest.append((char16_t)(0xdc00 + len));
+ break;
+ case 2:
+ dest.append((char16_t)(0xdfef + (len >> 16)));
+ dest.append((char16_t)len);
+ break;
+ case 3:
+ dest.append((char16_t)0xdfff);
+ dest.append((char16_t)(len >> 16));
+ dest.append((char16_t)len);
+ break;
+ default:
+ break; /* will not occur */
+ }
+ dest.append(fString);
+ dest.append((char16_t)0);
+}
+
+void
+SRBRoot::compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode) {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ // Store the StringResource pointers in an array for
+ // easy sorting and processing.
+ // We enumerate a set of strings, so there are no duplicates.
+ int32_t count = uhash_count(stringSet);
+ LocalArray<StringResource *> array(new StringResource *[count], errorCode);
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ for (int32_t pos = UHASH_FIRST, i = 0; i < count; ++i) {
+ array[i] = (StringResource *)uhash_nextElement(stringSet, &pos)->key.pointer;
+ }
+ /* Sort the strings so that each one is immediately followed by all of its suffixes. */
+ uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **),
+ compareStringSuffixes, nullptr, false, &errorCode);
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ /*
+ * Make suffixes point into earlier, longer strings that contain them.
+ * Temporarily use fSame and fSuffixOffset for suffix strings to
+ * refer to the remaining ones.
+ */
+ for (int32_t i = 0; i < count;) {
+ /*
+ * This string is not a suffix of the previous one;
+ * write this one and subsume the following ones that are
+ * suffixes of this one.
+ */
+ StringResource *res = array[i];
+ res->fNumUnitsSaved = (res->fNumCopies - 1) * res->get16BitStringsLength();
+ // Whole duplicates of pool strings are already account for in fPoolStringIndexLimit,
+ // see StringResource::handlePreflightStrings().
+ int32_t j;
+ for (j = i + 1; j < count; ++j) {
+ StringResource *suffixRes = array[j];
+ /* Is it a suffix of the earlier, longer string? */
+ if (res->fString.endsWith(suffixRes->fString)) {
+ assert(res->length() != suffixRes->length()); // Set strings are unique.
+ if (suffixRes->fWritten) {
+ // Pool string, skip.
+ } else if (suffixRes->fNumCharsForLength == 0) {
+ /* yes, point to the earlier string */
+ suffixRes->fSame = res;
+ suffixRes->fSuffixOffset = res->length() - suffixRes->length();
+ if (res->fWritten) {
+ // Suffix-share res which is a pool string.
+ // Compute the resource word and collect the maximum.
+ suffixRes->fRes =
+ res->fRes + res->fNumCharsForLength + suffixRes->fSuffixOffset;
+ int32_t poolStringIndex = (int32_t)RES_GET_OFFSET(suffixRes->fRes);
+ if (poolStringIndex >= fPoolStringIndexLimit) {
+ fPoolStringIndexLimit = poolStringIndex + 1;
+ }
+ suffixRes->fWritten = true;
+ }
+ res->fNumUnitsSaved += suffixRes->fNumCopies * suffixRes->get16BitStringsLength();
+ } else {
+ /* write the suffix by itself if we need explicit length */
+ }
+ } else {
+ break; /* not a suffix, restart from here */
+ }
+ }
+ i = j;
+ }
+ /*
+ * Re-sort the strings by ascending length (except suffixes last)
+ * to optimize for URES_TABLE16 and URES_ARRAY16:
+ * Keep as many as possible within reach of 16-bit offsets.
+ */
+ uprv_sortArray(array.getAlias(), count, (int32_t)sizeof(struct SResource **),
+ compareStringLengths, nullptr, false, &errorCode);
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if (fIsPoolBundle) {
+ // Write strings that are sufficiently shared.
+ // Avoid writing other strings.
+ int32_t numStringsWritten = 0;
+ int32_t numUnitsSaved = 0;
+ int32_t numUnitsNotSaved = 0;
+ for (int32_t i = 0; i < count; ++i) {
+ StringResource *res = array[i];
+ // Maximum pool string index when suffix-sharing the last character.
+ int32_t maxStringIndex =
+ f16BitUnits.length() + res->fNumCharsForLength + res->length() - 1;
+ if (res->fNumUnitsSaved >= GENRB_MIN_16BIT_UNITS_SAVED_FOR_POOL_STRING &&
+ maxStringIndex < RES_MAX_OFFSET) {
+ res->writeUTF16v2(0, f16BitUnits);
+ ++numStringsWritten;
+ numUnitsSaved += res->fNumUnitsSaved;
+ } else {
+ numUnitsNotSaved += res->fNumUnitsSaved;
+ res->fRes = URES_MAKE_EMPTY_RESOURCE(URES_STRING);
+ res->fWritten = true;
+ }
+ }
+ if (f16BitUnits.isBogus()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ }
+ if (getShowWarning()) { // not quiet
+ printf("number of shared strings: %d\n", (int)numStringsWritten);
+ printf("16-bit units for strings: %6d = %6d bytes\n",
+ (int)f16BitUnits.length(), (int)f16BitUnits.length() * 2);
+ printf("16-bit units saved: %6d = %6d bytes\n",
+ (int)numUnitsSaved, (int)numUnitsSaved * 2);
+ printf("16-bit units not saved: %6d = %6d bytes\n",
+ (int)numUnitsNotSaved, (int)numUnitsNotSaved * 2);
+ }
+ } else {
+ assert(fPoolStringIndexLimit <= fUsePoolBundle->fStringIndexLimit);
+ /* Write the non-suffix strings. */
+ int32_t i;
+ for (i = 0; i < count && array[i]->fSame == nullptr; ++i) {
+ StringResource *res = array[i];
+ if (!res->fWritten) {
+ int32_t localStringIndex = f16BitUnits.length();
+ if (localStringIndex >= fLocalStringIndexLimit) {
+ fLocalStringIndexLimit = localStringIndex + 1;
+ }
+ res->writeUTF16v2(fPoolStringIndexLimit, f16BitUnits);
+ }
+ }
+ if (f16BitUnits.isBogus()) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ if (fWritePoolBundle != nullptr && gFormatVersion >= 3) {
+ PseudoListResource *poolStrings =
+ static_cast<PseudoListResource *>(fWritePoolBundle->fRoot);
+ for (i = 0; i < count && array[i]->fSame == nullptr; ++i) {
+ assert(!array[i]->fString.isEmpty());
+ StringResource *poolString =
+ new StringResource(fWritePoolBundle, array[i]->fString, errorCode);
+ if (poolString == nullptr) {
+ errorCode = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
+ poolStrings->add(poolString);
+ }
+ }
+ /* Write the suffix strings. Make each point to the real string. */
+ for (; i < count; ++i) {
+ StringResource *res = array[i];
+ if (res->fWritten) {
+ continue;
+ }
+ StringResource *same = res->fSame;
+ assert(res->length() != same->length()); // Set strings are unique.
+ res->fRes = same->fRes + same->fNumCharsForLength + res->fSuffixOffset;
+ int32_t localStringIndex = (int32_t)RES_GET_OFFSET(res->fRes) - fPoolStringIndexLimit;
+ // Suffixes of pool strings have been set already.
+ assert(localStringIndex >= 0);
+ if (localStringIndex >= fLocalStringIndexLimit) {
+ fLocalStringIndexLimit = localStringIndex + 1;
+ }
+ res->fWritten = true;
+ }
+ }
+ // +1 to account for the initial zero in f16BitUnits
+ assert(f16BitUnits.length() <= (f16BitStringsLength + 1));
+}
+
+void SResource::applyFilter(
+ const PathFilter& /*filter*/,
+ ResKeyPath& /*path*/,
+ const SRBRoot* /*bundle*/) {
+ // Only a few resource types (tables) are capable of being filtered.
+}
+
+void TableResource::applyFilter(
+ const PathFilter& filter,
+ ResKeyPath& path,
+ const SRBRoot* bundle) {
+ SResource* prev = nullptr;
+ SResource* curr = fFirst;
+ for (; curr != nullptr;) {
+ path.push(curr->getKeyString(bundle));
+ auto inclusion = filter.match(path);
+ if (inclusion == PathFilter::EInclusion::INCLUDE) {
+ // Include whole subtree
+ // no-op
+ if (isVerbose()) {
+ std::cout << "genrb subtree: " << bundle->fLocale << ": INCLUDE: " << path << std::endl;
+ }
+ } else if (inclusion == PathFilter::EInclusion::EXCLUDE) {
+ // Reject the whole subtree
+ // Remove it from the linked list
+ if (isVerbose()) {
+ std::cout << "genrb subtree: " << bundle->fLocale << ": DELETE: " << path << std::endl;
+ }
+ if (prev == nullptr) {
+ fFirst = curr->fNext;
+ } else {
+ prev->fNext = curr->fNext;
+ }
+ fCount--;
+ delete curr;
+ curr = prev;
+ } else {
+ U_ASSERT(inclusion == PathFilter::EInclusion::PARTIAL);
+ // Recurse into the child
+ curr->applyFilter(filter, path, bundle);
+ }
+ path.pop();
+
+ prev = curr;
+ if (curr == nullptr) {
+ curr = fFirst;
+ } else {
+ curr = curr->fNext;
+ }
+ }
+}
diff --git a/intl/icu/source/tools/genrb/reslist.h b/intl/icu/source/tools/genrb/reslist.h
new file mode 100644
index 0000000000..17797bc36c
--- /dev/null
+++ b/intl/icu/source/tools/genrb/reslist.h
@@ -0,0 +1,446 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2000-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File reslist.h
+*
+* Modification History:
+*
+* Date Name Description
+* 02/21/00 weiv Creation.
+*******************************************************************************
+*/
+
+#ifndef RESLIST_H
+#define RESLIST_H
+
+#define KEY_SPACE_SIZE 65536
+#define RESLIST_INT_VECTOR_INIT_SIZE 2048
+
+#include <functional>
+
+#include "unicode/utypes.h"
+#include "unicode/unistr.h"
+#include "unicode/ures.h"
+#include "unicode/ustring.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "uhash.h"
+#include "unewdata.h"
+#include "uresdata.h"
+#include "ustr.h"
+
+U_CDECL_BEGIN
+
+class PathFilter;
+class PseudoListResource;
+class ResKeyPath;
+
+struct ResFile {
+ ResFile()
+ : fBytes(nullptr), fIndexes(nullptr),
+ fKeys(nullptr), fKeysLength(0), fKeysCount(0),
+ fStrings(nullptr), fStringIndexLimit(0),
+ fChecksum(0) {}
+ ~ResFile() { close(); }
+
+ void close();
+
+ uint8_t *fBytes;
+ const int32_t *fIndexes;
+ const char *fKeys;
+ int32_t fKeysLength;
+ int32_t fKeysCount;
+
+ PseudoListResource *fStrings;
+ int32_t fStringIndexLimit;
+
+ int32_t fChecksum;
+};
+
+struct SResource;
+
+typedef struct KeyMapEntry {
+ int32_t oldpos, newpos;
+} KeyMapEntry;
+
+/* Resource bundle root table */
+struct SRBRoot {
+ SRBRoot(const UString *comment, UBool isPoolBundle, UErrorCode &errorCode);
+ ~SRBRoot();
+
+ void write(const char *outputDir, const char *outputPkg,
+ char *writtenFilename, int writtenFilenameLen, UErrorCode &errorCode);
+
+ void setLocale(char16_t *locale, UErrorCode &errorCode);
+ int32_t addTag(const char *tag, UErrorCode &errorCode);
+
+ const char *getKeyString(int32_t key) const;
+ const char *getKeyBytes(int32_t *pLength) const;
+
+ int32_t addKeyBytes(const char *keyBytes, int32_t length, UErrorCode &errorCode);
+
+ void compactKeys(UErrorCode &errorCode);
+
+ int32_t makeRes16(uint32_t resWord) const;
+ int32_t mapKey(int32_t oldpos) const;
+
+private:
+ void compactStringsV2(UHashtable *stringSet, UErrorCode &errorCode);
+
+public:
+ // TODO: private
+
+ SResource *fRoot; // Normally a TableResource.
+ char *fLocale;
+ int32_t fIndexLength;
+ int32_t fMaxTableLength;
+ UBool fNoFallback; /* see URES_ATT_NO_FALLBACK */
+ int8_t fStringsForm; /* default STRINGS_UTF16_V1 */
+ UBool fIsPoolBundle;
+
+ char *fKeys;
+ KeyMapEntry *fKeyMap;
+ int32_t fKeysBottom, fKeysTop;
+ int32_t fKeysCapacity;
+ int32_t fKeysCount;
+ int32_t fLocalKeyLimit; /* key offset < limit fits into URES_TABLE */
+
+ icu::UnicodeString f16BitUnits;
+ int32_t f16BitStringsLength;
+
+ const ResFile *fUsePoolBundle;
+ int32_t fPoolStringIndexLimit;
+ int32_t fPoolStringIndex16Limit;
+ int32_t fLocalStringIndexLimit;
+ SRBRoot *fWritePoolBundle;
+};
+
+/* write a java resource file */
+// TODO: C++ify
+void bundle_write_java(struct SRBRoot *bundle, const char *outputDir, const char* outputEnc, char *writtenFilename,
+ int writtenFilenameLen, const char* packageName, const char* bundleName, UErrorCode *status);
+
+/* write a xml resource file */
+// TODO: C++ify
+void bundle_write_xml(struct SRBRoot *bundle, const char *outputDir,const char* outputEnc, const char* rbname,
+ char *writtenFilename, int writtenFilenameLen, const char* language, const char* package, UErrorCode *status);
+
+/* Various resource types */
+
+/*
+ * Return a unique pointer to a dummy object,
+ * for use in non-error cases when no resource is to be added to the bundle.
+ * (nullptr is used in error cases.)
+ */
+struct SResource* res_none();
+
+class ArrayResource;
+class TableResource;
+class IntVectorResource;
+
+TableResource *table_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status);
+
+ArrayResource *array_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status);
+
+struct SResource *string_open(struct SRBRoot *bundle, const char *tag, const char16_t *value, int32_t len, const struct UString* comment, UErrorCode *status);
+
+struct SResource *alias_open(struct SRBRoot *bundle, const char *tag, char16_t *value, int32_t len, const struct UString* comment, UErrorCode *status);
+
+IntVectorResource *intvector_open(struct SRBRoot *bundle, const char *tag, const struct UString* comment, UErrorCode *status);
+
+struct SResource *int_open(struct SRBRoot *bundle, const char *tag, int32_t value, const struct UString* comment, UErrorCode *status);
+
+struct SResource *bin_open(struct SRBRoot *bundle, const char *tag, uint32_t length, uint8_t *data, const char* fileName, const struct UString* comment, UErrorCode *status);
+
+/* Resource place holder */
+
+struct SResource {
+ SResource();
+ SResource(SRBRoot *bundle, const char *tag, int8_t type, const UString* comment,
+ UErrorCode &errorCode);
+ virtual ~SResource();
+
+ UBool isTable() const { return fType == URES_TABLE; }
+ UBool isString() const { return fType == URES_STRING; }
+
+ const char *getKeyString(const SRBRoot *bundle) const;
+
+ /**
+ * Preflights strings.
+ * Finds duplicates and counts the total number of string code units
+ * so that they can be written first to the 16-bit array,
+ * for minimal string and container storage.
+ *
+ * We walk the final parse tree, rather than collecting this information while building it,
+ * so that we need not deal with changes to the parse tree (especially removing resources).
+ */
+ void preflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode);
+ virtual void handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode);
+
+ /**
+ * Writes resource values into f16BitUnits
+ * and determines the resource item word, if possible.
+ */
+ void write16(SRBRoot *bundle);
+ virtual void handleWrite16(SRBRoot *bundle);
+
+ /**
+ * Calculates ("preflights") and advances the *byteOffset
+ * by the size of the resource's data in the binary file and
+ * determines the resource item word.
+ *
+ * Most handlePreWrite() functions may add any number of bytes, but preWrite()
+ * will always pad it to a multiple of 4.
+ * The resource item type may be a related subtype of the fType.
+ *
+ * The preWrite() and write() functions start and end at the same
+ * byteOffset values.
+ * Prewriting allows bundle.write() to determine the root resource item word,
+ * before actually writing the bundle contents to the file,
+ * which is necessary because the root item is stored at the beginning.
+ */
+ void preWrite(uint32_t *byteOffset);
+ virtual void handlePreWrite(uint32_t *byteOffset);
+
+ /**
+ * Writes the resource's data to mem and updates the byteOffset
+ * in parallel.
+ */
+ void write(UNewDataMemory *mem, uint32_t *byteOffset);
+ virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset);
+
+ /**
+ * Applies the given filter with the given base path to this resource.
+ * Removes child resources rejected by the filter recursively.
+ *
+ * @param bundle Needed in order to access the key for this and child resources.
+ */
+ virtual void applyFilter(const PathFilter& filter, ResKeyPath& path, const SRBRoot* bundle);
+
+ /**
+ * Calls the given function for every key ID present in this tree.
+ */
+ virtual void collectKeys(std::function<void(int32_t)> collector) const;
+
+ int8_t fType; /* nominal type: fRes (when != 0xffffffff) may use subtype */
+ UBool fWritten; /* res_write() can exit early */
+ uint32_t fRes; /* resource item word; RES_BOGUS=0xffffffff if not known yet */
+ int32_t fRes16; /* Res16 version of fRes for Table, Table16, Array16; -1 if it does not fit. */
+ int32_t fKey; /* Index into bundle->fKeys; -1 if no key. */
+ int32_t fKey16; /* Key16 version of fKey for Table & Table16; -1 if no key or it does not fit. */
+ int line; /* used internally to report duplicate keys in tables */
+ SResource *fNext; /* This is for internal chaining while building */
+ struct UString fComment;
+};
+
+class ContainerResource : public SResource {
+public:
+ ContainerResource(SRBRoot *bundle, const char *tag, int8_t type,
+ const UString* comment, UErrorCode &errorCode)
+ : SResource(bundle, tag, type, comment, errorCode),
+ fCount(0), fFirst(nullptr) {}
+ virtual ~ContainerResource();
+
+ void handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) override;
+
+ void collectKeys(std::function<void(int32_t)> collector) const override;
+
+protected:
+ void writeAllRes16(SRBRoot *bundle);
+ void preWriteAllRes(uint32_t *byteOffset);
+ void writeAllRes(UNewDataMemory *mem, uint32_t *byteOffset);
+ void writeAllRes32(UNewDataMemory *mem, uint32_t *byteOffset);
+
+public:
+ // TODO: private with getter?
+ uint32_t fCount;
+ SResource *fFirst;
+};
+
+class TableResource : public ContainerResource {
+public:
+ TableResource(SRBRoot *bundle, const char *tag,
+ const UString* comment, UErrorCode &errorCode)
+ : ContainerResource(bundle, tag, URES_TABLE, comment, errorCode),
+ fTableType(URES_TABLE), fRoot(bundle) {}
+ virtual ~TableResource();
+
+ void add(SResource *res, int linenumber, UErrorCode &errorCode);
+
+ void handleWrite16(SRBRoot *bundle) override;
+ void handlePreWrite(uint32_t *byteOffset) override;
+ void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) override;
+
+ void applyFilter(const PathFilter& filter, ResKeyPath& path, const SRBRoot* bundle) override;
+
+ int8_t fTableType; // determined by table_write16() for table_preWrite() & table_write()
+ SRBRoot *fRoot;
+};
+
+class ArrayResource : public ContainerResource {
+public:
+ ArrayResource(SRBRoot *bundle, const char *tag,
+ const UString* comment, UErrorCode &errorCode)
+ : ContainerResource(bundle, tag, URES_ARRAY, comment, errorCode),
+ fLast(nullptr) {}
+ virtual ~ArrayResource();
+
+ void add(SResource *res);
+
+ virtual void handleWrite16(SRBRoot *bundle) override;
+ virtual void handlePreWrite(uint32_t *byteOffset) override;
+ virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) override;
+
+ SResource *fLast;
+};
+
+/**
+ * List of resources for a pool bundle.
+ * Writes an empty table resource, rather than a container structure.
+ */
+class PseudoListResource : public ContainerResource {
+public:
+ PseudoListResource(SRBRoot *bundle, UErrorCode &errorCode)
+ : ContainerResource(bundle, nullptr, URES_TABLE, nullptr, errorCode) {}
+ virtual ~PseudoListResource();
+
+ void add(SResource *res);
+
+ virtual void handleWrite16(SRBRoot *bundle) override;
+};
+
+class StringBaseResource : public SResource {
+public:
+ StringBaseResource(SRBRoot *bundle, const char *tag, int8_t type,
+ const char16_t *value, int32_t len,
+ const UString* comment, UErrorCode &errorCode);
+ StringBaseResource(SRBRoot *bundle, int8_t type,
+ const icu::UnicodeString &value, UErrorCode &errorCode);
+ StringBaseResource(int8_t type, const char16_t *value, int32_t len, UErrorCode &errorCode);
+ virtual ~StringBaseResource();
+
+ const char16_t *getBuffer() const { return icu::toUCharPtr(fString.getBuffer()); }
+ int32_t length() const { return fString.length(); }
+
+ virtual void handlePreWrite(uint32_t *byteOffset) override;
+ virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) override;
+
+ // TODO: private with getter?
+ icu::UnicodeString fString;
+};
+
+class StringResource : public StringBaseResource {
+public:
+ StringResource(SRBRoot *bundle, const char *tag, const char16_t *value, int32_t len,
+ const UString* comment, UErrorCode &errorCode)
+ : StringBaseResource(bundle, tag, URES_STRING, value, len, comment, errorCode),
+ fSame(nullptr), fSuffixOffset(0),
+ fNumCopies(0), fNumUnitsSaved(0), fNumCharsForLength(0) {}
+ StringResource(SRBRoot *bundle, const icu::UnicodeString &value, UErrorCode &errorCode)
+ : StringBaseResource(bundle, URES_STRING, value, errorCode),
+ fSame(nullptr), fSuffixOffset(0),
+ fNumCopies(0), fNumUnitsSaved(0), fNumCharsForLength(0) {}
+ StringResource(int32_t poolStringIndex, int8_t numCharsForLength,
+ const char16_t *value, int32_t length,
+ UErrorCode &errorCode)
+ : StringBaseResource(URES_STRING, value, length, errorCode),
+ fSame(nullptr), fSuffixOffset(0),
+ fNumCopies(0), fNumUnitsSaved(0), fNumCharsForLength(numCharsForLength) {
+ // v3 pool string encoded as string-v2 with low offset
+ fRes = URES_MAKE_RESOURCE(URES_STRING_V2, poolStringIndex);
+ fWritten = true;
+ }
+ virtual ~StringResource();
+
+ int32_t get16BitStringsLength() const {
+ return fNumCharsForLength + length() + 1; // +1 for the NUL
+ }
+
+ virtual void handlePreflightStrings(SRBRoot *bundle, UHashtable *stringSet, UErrorCode &errorCode) override;
+ virtual void handleWrite16(SRBRoot *bundle) override;
+
+ void writeUTF16v2(int32_t base, icu::UnicodeString &dest);
+
+ StringResource *fSame; // used for duplicates
+ int32_t fSuffixOffset; // this string is a suffix of fSame at this offset
+ int32_t fNumCopies; // number of equal strings represented by one stringSet element
+ int32_t fNumUnitsSaved; // from not writing duplicates and suffixes
+ int8_t fNumCharsForLength;
+};
+
+class AliasResource : public StringBaseResource {
+public:
+ AliasResource(SRBRoot *bundle, const char *tag, const char16_t *value, int32_t len,
+ const UString* comment, UErrorCode &errorCode)
+ : StringBaseResource(bundle, tag, URES_ALIAS, value, len, comment, errorCode) {}
+ virtual ~AliasResource();
+};
+
+class IntResource : public SResource {
+public:
+ IntResource(SRBRoot *bundle, const char *tag, int32_t value,
+ const UString* comment, UErrorCode &errorCode);
+ virtual ~IntResource();
+
+ // TODO: private with getter?
+ int32_t fValue;
+};
+
+class IntVectorResource : public SResource {
+public:
+ IntVectorResource(SRBRoot *bundle, const char *tag,
+ const UString* comment, UErrorCode &errorCode);
+ virtual ~IntVectorResource();
+
+ void add(int32_t value, UErrorCode &errorCode);
+
+ virtual void handlePreWrite(uint32_t *byteOffset) override;
+ virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) override;
+
+ // TODO: UVector32
+ size_t fCount;
+ size_t fSize;
+ uint32_t *fArray;
+};
+
+class BinaryResource : public SResource {
+public:
+ BinaryResource(SRBRoot *bundle, const char *tag,
+ uint32_t length, uint8_t *data, const char* fileName,
+ const UString* comment, UErrorCode &errorCode);
+ virtual ~BinaryResource();
+
+ virtual void handlePreWrite(uint32_t *byteOffset) override;
+ virtual void handleWrite(UNewDataMemory *mem, uint32_t *byteOffset) override;
+
+ // TODO: CharString?
+ uint32_t fLength;
+ uint8_t *fData;
+ // TODO: CharString
+ char* fFileName; // file name for binary or import binary tags if any
+};
+
+// TODO: use LocalPointer or delete
+void res_close(struct SResource *res);
+
+void setIncludeCopyright(UBool val);
+UBool getIncludeCopyright();
+
+void setFormatVersion(int32_t formatVersion);
+
+int32_t getFormatVersion();
+
+void setUsePoolBundle(UBool use);
+
+/* in wrtxml.cpp */
+uint32_t computeCRC(const char *ptr, uint32_t len, uint32_t lastcrc);
+
+U_CDECL_END
+#endif /* #ifndef RESLIST_H */
diff --git a/intl/icu/source/tools/genrb/rle.c b/intl/icu/source/tools/genrb/rle.c
new file mode 100644
index 0000000000..f737c45491
--- /dev/null
+++ b/intl/icu/source/tools/genrb/rle.c
@@ -0,0 +1,408 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2000-2003, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File writejava.c
+*
+* Modification History:
+*
+* Date Name Description
+* 01/11/02 Ram Creation.
+*******************************************************************************
+*/
+#include <stdbool.h>
+#include "rle.h"
+/**
+ * The ESCAPE character is used during run-length encoding. It signals
+ * a run of identical chars.
+ */
+static const uint16_t ESCAPE = 0xA5A5;
+
+/**
+ * The ESCAPE_BYTE character is used during run-length encoding. It signals
+ * a run of identical bytes.
+ */
+static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;
+
+/**
+ * Append a byte to the given StringBuffer, packing two bytes into each
+ * character. The state parameter maintains intermediary data between
+ * calls.
+ * @param state A two-element array, with state[0] == 0 if this is the
+ * first byte of a pair, or state[0] != 0 if this is the second byte
+ * of a pair, in which case state[1] is the first byte.
+ */
+static uint16_t*
+appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
+ if(!status || U_FAILURE(*status)){
+ return NULL;
+ }
+ if (state[0] != 0) {
+ uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
+ if(buffer < buffLimit){
+ *buffer++ = c;
+ }else{
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ state[0] = 0;
+ return buffer;
+ }
+ else {
+ state[0] = 1;
+ state[1] = value;
+ return buffer;
+ }
+}
+/**
+ * Encode a run, possibly a degenerate run (of < 4 values).
+ * @param length The length of the run; must be > 0 && <= 0xFF.
+ */
+static uint16_t*
+encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
+ if(!status || U_FAILURE(*status)){
+ return NULL;
+ }
+ if (length < 4) {
+ int32_t j=0;
+ for (; j<length; ++j) {
+ if (value == ESCAPE_BYTE) {
+ buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
+ }
+ buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
+ }
+ }
+ else {
+ if (length == ESCAPE_BYTE) {
+ if (value == ESCAPE_BYTE){
+ buffer = appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
+ }
+ buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
+ --length;
+ }
+ buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
+ buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
+ buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
+ }
+ return buffer;
+}
+
+#define APPEND( buffer, bufLimit, value, num, status) UPRV_BLOCK_MACRO_BEGIN { \
+ if(buffer<bufLimit){ \
+ *buffer++=(value); \
+ }else{ \
+ *status = U_BUFFER_OVERFLOW_ERROR; \
+ } \
+ num++; \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Encode a run, possibly a degenerate run (of < 4 values).
+ * @param length The length of the run; must be > 0 && <= 0xFFFF.
+ */
+static uint16_t*
+encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
+ int32_t num=0;
+ if (length < 4) {
+ int j=0;
+ for (; j<length; ++j) {
+ if (value == (int32_t) ESCAPE){
+ APPEND(buffer,bufLimit,ESCAPE, num, status);
+
+ }
+ APPEND(buffer,bufLimit,value,num, status);
+ }
+ }
+ else {
+ if (length == (int32_t) ESCAPE) {
+ if (value == (int32_t) ESCAPE){
+ APPEND(buffer,bufLimit,ESCAPE,num,status);
+
+ }
+ APPEND(buffer,bufLimit,value,num,status);
+ --length;
+ }
+ APPEND(buffer,bufLimit,ESCAPE,num,status);
+ APPEND(buffer,bufLimit,(uint16_t) length, num,status);
+ APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
+ }
+ return buffer;
+}
+
+/**
+ * Construct a string representing a char array. Use run-length encoding.
+ * A character represents itself, unless it is the ESCAPE character. Then
+ * the following notations are possible:
+ * ESCAPE ESCAPE ESCAPE literal
+ * ESCAPE n c n instances of character c
+ * Since an encoded run occupies 3 characters, we only encode runs of 4 or
+ * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
+ * If we encounter a run where n == ESCAPE, we represent this as:
+ * c ESCAPE n-1 c
+ * The ESCAPE value is chosen so as not to collide with commonly
+ * seen values.
+ */
+int32_t
+usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
+ uint16_t* bufLimit = buffer+bufLen;
+ uint16_t* saveBuffer = buffer;
+ if(buffer < bufLimit){
+ *buffer++ = (uint16_t)(srcLen>>16);
+ if(buffer<bufLimit){
+ uint16_t runValue = src[0];
+ int32_t runLength = 1;
+ int i=1;
+ *buffer++ = (uint16_t) srcLen;
+
+ for (; i<srcLen; ++i) {
+ uint16_t s = src[i];
+ if (s == runValue && runLength < 0xFFFF){
+ ++runLength;
+ }else {
+ buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
+ runValue = s;
+ runLength = 1;
+ }
+ }
+ buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
+ }else{
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }else{
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ return (int32_t)(buffer - saveBuffer);
+}
+
+/**
+ * Construct a string representing a byte array. Use run-length encoding.
+ * Two bytes are packed into a single char, with a single extra zero byte at
+ * the end if needed. A byte represents itself, unless it is the
+ * ESCAPE_BYTE. Then the following notations are possible:
+ * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
+ * ESCAPE_BYTE n b n instances of byte b
+ * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
+ * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
+ * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
+ * b ESCAPE_BYTE n-1 b
+ * The ESCAPE_BYTE value is chosen so as not to collide with commonly
+ * seen values.
+ */
+int32_t
+byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
+ const uint16_t* saveBuf = buffer;
+ uint16_t* bufLimit = buffer+bufLen;
+ if(buffer < bufLimit){
+ *buffer++ = ((uint16_t) (srcLen >> 16));
+
+ if(buffer<bufLimit){
+ uint8_t runValue = src[0];
+ int runLength = 1;
+ uint8_t state[2]= {0};
+ int i=1;
+ *buffer++=((uint16_t) srcLen);
+ for (; i<srcLen; ++i) {
+ uint8_t b = src[i];
+ if (b == runValue && runLength < 0xFF){
+ ++runLength;
+ }
+ else {
+ buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
+ runValue = b;
+ runLength = 1;
+ }
+ }
+ buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);
+
+ /* We must save the final byte, if there is one, by padding
+ * an extra zero.
+ */
+ if (state[0] != 0) {
+ buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
+ }
+ }else{
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }else{
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ return (int32_t) (buffer - saveBuf);
+}
+
+
+/**
+ * Construct an array of shorts from a run-length encoded string.
+ */
+int32_t
+rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
+ int32_t length = 0;
+ int32_t ai = 0;
+ int i=2;
+
+ if(!status || U_FAILURE(*status)){
+ return 0;
+ }
+ /* the source is null terminated */
+ if(srcLen == -1){
+ srcLen = u_strlen(src);
+ }
+ if(srcLen <= 2){
+ return 2;
+ }
+ length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
+
+ if(target == NULL){
+ return length;
+ }
+ if(tgtLen < length){
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return length;
+ }
+
+ for (; i<srcLen; ++i) {
+ uint16_t c = src[i];
+ if (c == ESCAPE) {
+ c = src[++i];
+ if (c == ESCAPE) {
+ target[ai++] = c;
+ } else {
+ int32_t runLength = (int32_t) c;
+ uint16_t runValue = src[++i];
+ int j=0;
+ for (; j<runLength; ++j) {
+ target[ai++] = runValue;
+ }
+ }
+ }
+ else {
+ target[ai++] = c;
+ }
+ }
+
+ if (ai != length){
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ }
+
+ return length;
+}
+
+/**
+ * Construct an array of bytes from a run-length encoded string.
+ */
+int32_t
+rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {
+
+ int32_t length = 0;
+ UBool nextChar = true;
+ uint16_t c = 0;
+ int32_t node = 0;
+ int32_t runLength = 0;
+ int32_t i = 2;
+ int32_t ai=0;
+
+ if(!status || U_FAILURE(*status)){
+ return 0;
+ }
+ /* the source is null terminated */
+ if(srcLen == -1){
+ srcLen = u_strlen(src);
+ }
+ if(srcLen <= 2){
+ return 2;
+ }
+ length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);
+
+ if(target == NULL){
+ return length;
+ }
+ if(tgtLen < length){
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return length;
+ }
+
+ for (; ai<tgtLen; ) {
+ /* This part of the loop places the next byte into the local
+ * variable 'b' each time through the loop. It keeps the
+ * current character in 'c' and uses the boolean 'nextChar'
+ * to see if we've taken both bytes out of 'c' yet.
+ */
+ uint8_t b;
+ if (nextChar) {
+ c = src[i++];
+ b = (uint8_t) (c >> 8);
+ nextChar = false;
+ }
+ else {
+ b = (uint8_t) (c & 0xFF);
+ nextChar = true;
+ }
+
+ /* This part of the loop is a tiny state machine which handles
+ * the parsing of the run-length encoding. This would be simpler
+ * if we could look ahead, but we can't, so we use 'node' to
+ * move between three nodes in the state machine.
+ */
+ switch (node) {
+ case 0:
+ /* Normal idle node */
+ if (b == ESCAPE_BYTE) {
+ node = 1;
+ }
+ else {
+ target[ai++] = b;
+ }
+ break;
+ case 1:
+ /* We have seen one ESCAPE_BYTE; we expect either a second
+ * one, or a run length and value.
+ */
+ if (b == ESCAPE_BYTE) {
+ target[ai++] = ESCAPE_BYTE;
+ node = 0;
+ }
+ else {
+ runLength = b;
+ node = 2;
+ }
+ break;
+ case 2:
+ {
+ int j=0;
+ /* We have seen an ESCAPE_BYTE and length byte. We interpret
+ * the next byte as the value to be repeated.
+ */
+ for (; j<runLength; ++j){
+ if(ai<tgtLen){
+ target[ai++] = b;
+ }else{
+ *status = U_BUFFER_OVERFLOW_ERROR;
+ return ai;
+ }
+ }
+ node = 0;
+ break;
+ }
+ }
+ }
+
+ if (node != 0){
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ /*("Bad run-length encoded byte array")*/
+ return 0;
+ }
+
+
+ if (i != srcLen){
+ /*("Excess data in RLE byte array string");*/
+ *status = U_INTERNAL_PROGRAM_ERROR;
+ return ai;
+ }
+
+ return ai;
+}
+
diff --git a/intl/icu/source/tools/genrb/rle.h b/intl/icu/source/tools/genrb/rle.h
new file mode 100644
index 0000000000..2684bbe6b2
--- /dev/null
+++ b/intl/icu/source/tools/genrb/rle.h
@@ -0,0 +1,74 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2000, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File writejava.c
+*
+* Modification History:
+*
+* Date Name Description
+* 01/11/02 Ram Creation.
+*******************************************************************************
+*/
+
+#ifndef RLE_H
+#define RLE_H 1
+
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+
+U_CDECL_BEGIN
+/**
+ * Construct a string representing a byte array. Use run-length encoding.
+ * Two bytes are packed into a single char, with a single extra zero byte at
+ * the end if needed. A byte represents itself, unless it is the
+ * ESCAPE_BYTE. Then the following notations are possible:
+ * ESCAPE_BYTE ESCAPE_BYTE ESCAPE_BYTE literal
+ * ESCAPE_BYTE n b n instances of byte b
+ * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
+ * more bytes. Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
+ * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
+ * b ESCAPE_BYTE n-1 b
+ * The ESCAPE_BYTE value is chosen so as not to collide with commonly
+ * seen values.
+ */
+int32_t
+byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status);
+
+
+/**
+ * Construct a string representing a char array. Use run-length encoding.
+ * A character represents itself, unless it is the ESCAPE character. Then
+ * the following notations are possible:
+ * ESCAPE ESCAPE ESCAPE literal
+ * ESCAPE n c n instances of character c
+ * Since an encoded run occupies 3 characters, we only encode runs of 4 or
+ * more characters. Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
+ * If we encounter a run where n == ESCAPE, we represent this as:
+ * c ESCAPE n-1 c
+ * The ESCAPE value is chosen so as not to collide with commonly
+ * seen values.
+ */
+int32_t
+usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status);
+
+/**
+ * Construct an array of bytes from a run-length encoded string.
+ */
+int32_t
+rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status);
+/**
+ * Construct an array of shorts from a run-length encoded string.
+ */
+int32_t
+rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status);
+
+U_CDECL_END
+
+#endif
diff --git a/intl/icu/source/tools/genrb/sources.txt b/intl/icu/source/tools/genrb/sources.txt
new file mode 100644
index 0000000000..0128e2094f
--- /dev/null
+++ b/intl/icu/source/tools/genrb/sources.txt
@@ -0,0 +1,12 @@
+errmsg.c
+filterrb.cpp
+genrb.cpp
+parse.cpp
+prscmnts.cpp
+rbutil.c
+read.c
+reslist.cpp
+rle.c
+ustr.c
+wrtjava.cpp
+wrtxml.cpp
diff --git a/intl/icu/source/tools/genrb/ustr.c b/intl/icu/source/tools/genrb/ustr.c
new file mode 100644
index 0000000000..15f76a80ca
--- /dev/null
+++ b/intl/icu/source/tools/genrb/ustr.c
@@ -0,0 +1,219 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1998-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File ustr.c
+*
+* Modification History:
+*
+* Date Name Description
+* 05/28/99 stephen Creation.
+*******************************************************************************
+*/
+
+#include "ustr.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "unicode/ustring.h"
+#include "unicode/putil.h"
+#include "unicode/utf16.h"
+
+/* Protos */
+static void ustr_resize(struct UString *s, int32_t len, UErrorCode *status);
+
+/* Macros */
+#define ALLOCATION(minSize) (minSize < 0x80 ? 0x80 : (2 * minSize + 0x80) & ~(0x80 - 1))
+
+U_CFUNC void
+ustr_init(struct UString *s)
+{
+ s->fChars = 0;
+ s->fLength = s->fCapacity = 0;
+}
+
+U_CFUNC void
+ustr_initChars(struct UString *s, const char* source, int32_t length, UErrorCode *status)
+{
+ int i = 0;
+ if (U_FAILURE(*status)) return;
+ s->fChars = 0;
+ s->fLength = s->fCapacity = 0;
+ if (length == -1) {
+ length = (int32_t)uprv_strlen(source);
+ }
+ if(s->fCapacity < length) {
+ ustr_resize(s, ALLOCATION(length), status);
+ if(U_FAILURE(*status)) return;
+ }
+ for (; i < length; i++)
+ {
+ UChar charToAppend;
+ u_charsToUChars(source+i, &charToAppend, 1);
+ ustr_ucat(s, charToAppend, status);
+ /*
+#if U_CHARSET_FAMILY==U_ASCII_FAMILY
+ ustr_ucat(s, (UChar)(uint8_t)(source[i]), status);
+#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
+ ustr_ucat(s, (UChar)asciiFromEbcdic[(uint8_t)(*cs++)], status);
+#else
+# error U_CHARSET_FAMILY is not valid
+#endif
+ */
+ }
+}
+
+U_CFUNC void
+ustr_deinit(struct UString *s)
+{
+ if (s) {
+ uprv_free(s->fChars);
+ s->fChars = 0;
+ s->fLength = s->fCapacity = 0;
+ }
+}
+
+U_CFUNC void
+ustr_cpy(struct UString *dst,
+ const struct UString *src,
+ UErrorCode *status)
+{
+ if(U_FAILURE(*status) || dst == src)
+ return;
+
+ if(dst->fCapacity < src->fLength) {
+ ustr_resize(dst, ALLOCATION(src->fLength), status);
+ if(U_FAILURE(*status))
+ return;
+ }
+ if(src->fChars == NULL || dst->fChars == NULL){
+ return;
+ }
+ u_memcpy(dst->fChars, src->fChars, src->fLength);
+ dst->fLength = src->fLength;
+ dst->fChars[dst->fLength] = 0x0000;
+}
+
+U_CFUNC void
+ustr_setlen(struct UString *s,
+ int32_t len,
+ UErrorCode *status)
+{
+ if(U_FAILURE(*status))
+ return;
+
+ if(s->fCapacity < (len + 1)) {
+ ustr_resize(s, ALLOCATION(len), status);
+ if(U_FAILURE(*status))
+ return;
+ }
+
+ s->fLength = len;
+ s->fChars[len] = 0x0000;
+}
+
+U_CFUNC void
+ustr_cat(struct UString *dst,
+ const struct UString *src,
+ UErrorCode *status)
+{
+ ustr_ncat(dst, src, src->fLength, status);
+}
+
+U_CFUNC void
+ustr_ncat(struct UString *dst,
+ const struct UString *src,
+ int32_t n,
+ UErrorCode *status)
+{
+ if(U_FAILURE(*status) || dst == src)
+ return;
+
+ if(dst->fCapacity < (dst->fLength + n)) {
+ ustr_resize(dst, ALLOCATION(dst->fLength + n), status);
+ if(U_FAILURE(*status))
+ return;
+ }
+
+ uprv_memcpy(dst->fChars + dst->fLength, src->fChars,
+ sizeof(UChar) * n);
+ dst->fLength += src->fLength;
+ dst->fChars[dst->fLength] = 0x0000;
+}
+
+U_CFUNC void
+ustr_ucat(struct UString *dst,
+ UChar c,
+ UErrorCode *status)
+{
+ if(U_FAILURE(*status))
+ return;
+
+ if(dst->fCapacity < (dst->fLength + 1)) {
+ ustr_resize(dst, ALLOCATION(dst->fLength + 1), status);
+ if(U_FAILURE(*status))
+ return;
+ }
+
+ uprv_memcpy(dst->fChars + dst->fLength, &c,
+ sizeof(UChar) * 1);
+ dst->fLength += 1;
+ dst->fChars[dst->fLength] = 0x0000;
+}
+U_CFUNC void
+ustr_u32cat(struct UString *dst, UChar32 c, UErrorCode *status){
+ if(c > 0x10FFFF){
+ *status = U_ILLEGAL_CHAR_FOUND;
+ return;
+ }
+ if(c >0xFFFF){
+ ustr_ucat(dst, U16_LEAD(c), status);
+ ustr_ucat(dst, U16_TRAIL(c), status);
+ }else{
+ ustr_ucat(dst, (UChar) c, status);
+ }
+}
+U_CFUNC void
+ustr_uscat(struct UString *dst,
+ const UChar* src,int len,
+ UErrorCode *status)
+{
+ if(U_FAILURE(*status))
+ return;
+
+ if(dst->fCapacity < (dst->fLength + len)) {
+ ustr_resize(dst, ALLOCATION(dst->fLength + len), status);
+ if(U_FAILURE(*status))
+ return;
+ }
+
+ uprv_memcpy(dst->fChars + dst->fLength, src,
+ sizeof(UChar) * len);
+ dst->fLength += len;
+ dst->fChars[dst->fLength] = 0x0000;
+}
+
+/* Destroys data in the string */
+static void
+ustr_resize(struct UString *s,
+ int32_t len,
+ UErrorCode *status)
+{
+ if(U_FAILURE(*status))
+ return;
+
+ /* +1 for trailing 0x0000 */
+ s->fChars = (UChar*) uprv_realloc(s->fChars, sizeof(UChar) * (len + 1));
+ if(s->fChars == 0) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ s->fLength = s->fCapacity = 0;
+ return;
+ }
+
+ s->fCapacity = len;
+}
diff --git a/intl/icu/source/tools/genrb/ustr.h b/intl/icu/source/tools/genrb/ustr.h
new file mode 100644
index 0000000000..8a69e9d4d5
--- /dev/null
+++ b/intl/icu/source/tools/genrb/ustr.h
@@ -0,0 +1,81 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1998-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File ustr.h
+*
+* Modification History:
+*
+* Date Name Description
+* 05/28/99 stephen Creation.
+*******************************************************************************
+*/
+
+#ifndef USTR_H
+#define USTR_H 1
+
+#include "unicode/utypes.h"
+
+#define U_APPEND_CHAR32(c,target,len) UPRV_BLOCK_MACRO_BEGIN { \
+ if (c <= 0xffff) \
+ { \
+ *(target)++ = (UChar) c; \
+ len=1; \
+ } \
+ else \
+ { \
+ target[0] = U16_LEAD(c); \
+ target[1] = U16_TRAIL(c); \
+ len=2; \
+ target +=2; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+#define U_APPEND_CHAR32_ONLY(c,target) UPRV_BLOCK_MACRO_BEGIN { \
+ if (c <= 0xffff) \
+ { \
+ *(target)++ = (UChar) c; \
+ } \
+ else \
+ { \
+ target[0] = U16_LEAD(c); \
+ target[1] = U16_TRAIL(c); \
+ target +=2; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* A C representation of a string "object" (to avoid realloc all the time) */
+struct UString {
+ UChar *fChars;
+ int32_t fLength;
+ int32_t fCapacity;
+};
+
+U_CFUNC void ustr_init(struct UString *s);
+
+U_CFUNC void
+ustr_initChars(struct UString *s, const char* source, int32_t length, UErrorCode *status);
+
+U_CFUNC void ustr_deinit(struct UString *s);
+
+U_CFUNC void ustr_setlen(struct UString *s, int32_t len, UErrorCode *status);
+
+U_CFUNC void ustr_cpy(struct UString *dst, const struct UString *src,
+ UErrorCode *status);
+
+U_CFUNC void ustr_cat(struct UString *dst, const struct UString *src,
+ UErrorCode *status);
+
+U_CFUNC void ustr_ncat(struct UString *dst, const struct UString *src,
+ int32_t n, UErrorCode *status);
+
+U_CFUNC void ustr_ucat(struct UString *dst, UChar c, UErrorCode *status);
+U_CFUNC void ustr_u32cat(struct UString *dst, UChar32 c, UErrorCode *status);
+U_CFUNC void ustr_uscat(struct UString *dst, const UChar* src,int len,UErrorCode *status);
+#endif
diff --git a/intl/icu/source/tools/genrb/wrtjava.cpp b/intl/icu/source/tools/genrb/wrtjava.cpp
new file mode 100644
index 0000000000..cb04b5a44a
--- /dev/null
+++ b/intl/icu/source/tools/genrb/wrtjava.cpp
@@ -0,0 +1,701 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2000-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File wrtjava.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 01/11/02 Ram Creation.
+* 02/12/08 Spieth Fix errant 'new Object[][]{' insertion
+* 02/19/08 Spieth Removed ICUListResourceBundle dependency
+*******************************************************************************
+*/
+
+#include <assert.h>
+#include "unicode/unistr.h"
+#include "reslist.h"
+#include "unewdata.h"
+#include "unicode/ures.h"
+#include "errmsg.h"
+#include "filestrm.h"
+#include "cstring.h"
+#include "unicode/ucnv.h"
+#include "genrb.h"
+#include "rle.h"
+#include "uhash.h"
+#include "uresimp.h"
+#include "unicode/ustring.h"
+#include "unicode/utf8.h"
+
+void res_write_java(struct SResource *res,UErrorCode *status);
+
+
+static const char copyRight[] =
+ "/* \n"
+ " *******************************************************************************\n"
+ " *\n"
+ " * Copyright (C) International Business Machines\n"
+ " * Corporation and others. All Rights Reserved.\n"
+ " *\n"
+ " *******************************************************************************\n"
+ " * $" "Source: $ \n"
+ " * $" "Date: $ \n"
+ " * $" "Revision: $ \n"
+ " *******************************************************************************\n"
+ " */\n\n";
+static const char warningMsg[] =
+ "/*********************************************************************\n"
+ "######################################################################\n"
+ "\n"
+ " WARNING: This file is generated by genrb Version " GENRB_VERSION ".\n"
+ " If you edit this file, please make sure that, the source\n"
+ " of this file (XXXX.txt in LocaleElements_XXXX.java)\n"
+ " is also edited.\n"
+ "######################################################################\n"
+ " *********************************************************************\n"
+ " */\n\n";
+static const char* openBrace="{\n";
+static const char* closeClass=" };\n"
+ "}\n";
+
+static const char* javaClass = "import java.util.ListResourceBundle;\n\n"
+ "public class ";
+
+static const char* javaClass1= " extends ListResourceBundle {\n\n"
+ " /**\n"
+ " * Overrides ListResourceBundle \n"
+ " */\n"
+ " public final Object[][] getContents() { \n"
+ " return contents;\n"
+ " }\n\n"
+ " private static Object[][] contents = {\n";
+/*static const char* javaClassICU= " extends ListResourceBundle {\n\n"
+ " public %s () {\n"
+ " super.contents = data;\n"
+ " }\n"
+ " static final Object[][] data = new Object[][] { \n";*/
+static int tabCount = 3;
+
+static FileStream* out=nullptr;
+static struct SRBRoot* srBundle ;
+/*static const char* outDir = nullptr;*/
+
+static const char* bName=nullptr;
+static const char* pName=nullptr;
+
+static void write_tabs(FileStream* os){
+ int i=0;
+ for(;i<=tabCount;i++){
+ T_FileStream_write(os," ",4);
+ }
+}
+
+#define ZERO 0x30
+
+static const char* enc ="";
+static UConverter* conv = nullptr;
+
+static int32_t
+uCharsToChars(char *target, int32_t targetLen, const char16_t *source, int32_t sourceLen, UErrorCode *status) {
+ int i=0, j=0;
+ char str[30]={'\0'};
+ while(i<sourceLen){
+ if (source[i] == '\n') {
+ if (j + 2 < targetLen) {
+ uprv_strcat(target, "\\n");
+ }
+ j += 2;
+ }else if(source[i]==0x0D){
+ if(j+2<targetLen){
+ uprv_strcat(target,"\\f");
+ }
+ j+=2;
+ }else if(source[i] == '"'){
+ if(source[i-1]=='\''){
+ if(j+2<targetLen){
+ uprv_strcat(target,"\\");
+ target[j+1]= (char)source[i];
+ }
+ j+=2;
+ }else if(source[i-1]!='\\'){
+
+ if(j+2<targetLen){
+ uprv_strcat(target,"\\");
+ target[j+1]= (char)source[i];
+ }
+ j+=2;
+ }else if(source[i-1]=='\\'){
+ target[j++]= (char)source[i];
+ }
+ }else if(source[i]=='\\'){
+ if(i+1<sourceLen){
+ switch(source[i+1]){
+ case ',':
+ case '!':
+ case '?':
+ case '#':
+ case '.':
+ case '%':
+ case '&':
+ case ':':
+ case ';':
+ if(j+2<targetLen){
+ uprv_strcat(target,"\\\\");
+ }
+ j+=2;
+ break;
+ case '"':
+ case '\'':
+ if(j+3<targetLen){
+ uprv_strcat(target,"\\\\\\");
+ }
+ j+=3;
+ break;
+ default :
+ if(j<targetLen){
+ target[j]=(char)source[i];
+ }
+ j++;
+ break;
+ }
+ }else{
+ if(j<targetLen){
+ uprv_strcat(target,"\\\\");
+ }
+ j+=2;
+ }
+ }else if(source[i]>=0x20 && source[i]<0x7F/*ASCII*/){
+ if(j<targetLen){
+ target[j] = (char) source[i];
+ }
+ j++;
+ }else{
+ if(*enc =='\0' || source[i]==0x0000){
+ uprv_strcpy(str,"\\u");
+ itostr(str+2,source[i],16,4);
+ if(j+6<targetLen){
+ uprv_strcat(target,str);
+ }
+ j+=6;
+ }else{
+ char dest[30] = {0};
+ int retVal=ucnv_fromUChars(conv,dest,30,source+i,1,status);
+ if(U_FAILURE(*status)){
+ return 0;
+ }
+ if(j+retVal<targetLen){
+ uprv_strcat(target,dest);
+ }
+ j+=retVal;
+ }
+ }
+ i++;
+ }
+ return j;
+}
+
+
+static uint32_t
+strrch(const char* source,uint32_t sourceLen,char find){
+ const char* tSourceEnd =source + (sourceLen-1);
+ while(tSourceEnd>= source){
+ if(*tSourceEnd==find){
+ return (uint32_t)(tSourceEnd-source);
+ }
+ tSourceEnd--;
+ }
+ return (uint32_t)(tSourceEnd-source);
+}
+
+static int32_t getColumnCount(int32_t len){
+ int32_t columnCount = 80;
+ int32_t maxLines = 3000;
+ int32_t adjustedLen = len*5; /* assume that every codepoint is represented in \uXXXX format*/
+ /*
+ * calculate the number of lines that
+ * may be required if column count is 80
+ */
+ if (maxLines < (adjustedLen / columnCount) ){
+ columnCount = adjustedLen / maxLines;
+ }
+ return columnCount;
+}
+static void
+str_write_java(const char16_t *src, int32_t srcLen, UBool printEndLine, UErrorCode *status) {
+
+ uint32_t length = srcLen*8;
+ uint32_t bufLen = 0;
+ uint32_t columnCount;
+ char* buf = (char*) malloc(sizeof(char)*length);
+
+ if(buf == nullptr) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+
+ columnCount = getColumnCount(srcLen);
+ memset(buf,0,length);
+
+ bufLen = uCharsToChars(buf,length,src,srcLen,status);
+ // buflen accounts for extra bytes added due to multi byte encoding of
+ // non ASCII characters
+ if(printEndLine)
+ write_tabs(out);
+
+ if(U_FAILURE(*status)){
+ uprv_free(buf);
+ return;
+ }
+
+ if(bufLen+(tabCount*4) > columnCount ){
+ uint32_t len = 0;
+ char* current = buf;
+ uint32_t add;
+ while(len < bufLen){
+ add = columnCount-(tabCount*4)-5/* for ", +\n */;
+ current = buf +len;
+ if (add < (bufLen-len)) {
+ uint32_t idx = strrch(current,add,'\\');
+ if (idx > add) {
+ idx = add;
+ } else {
+ int32_t num =idx-1;
+ uint32_t seqLen;
+ while(num>0){
+ if(current[num]=='\\'){
+ num--;
+ }else{
+ break;
+ }
+ }
+ if ((idx-num)%2==0) {
+ idx--;
+ }
+ seqLen = (current[idx+1]=='u') ? 6 : 2;
+ if ((add-idx) < seqLen) {
+ add = idx + seqLen;
+ }
+ }
+ }
+ T_FileStream_write(out,"\"",1);
+ uint32_t byteIndex = 0;
+ uint32_t trailBytes = 0;
+ if(len+add<bufLen){
+ // check the trail bytes to be added to the output line
+ while (byteIndex < add) {
+ if (U8_IS_LEAD(*(current + byteIndex))) {
+ trailBytes = U8_COUNT_TRAIL_BYTES(*(current + byteIndex));
+ add += trailBytes;
+ }
+ byteIndex++;
+ }
+ T_FileStream_write(out,current,add);
+ if (len + add < bufLen) {
+ T_FileStream_write(out,"\" +\n",4);
+ write_tabs(out);
+ }
+ }else{
+ T_FileStream_write(out,current,bufLen-len);
+ }
+ len+=add;
+ }
+ }else{
+ T_FileStream_write(out,"\"",1);
+ T_FileStream_write(out, buf,bufLen);
+ }
+ if(printEndLine){
+ T_FileStream_write(out,"\",\n",3);
+ }else{
+ T_FileStream_write(out,"\"",1);
+ }
+ uprv_free(buf);
+}
+
+/* Writing Functions */
+static void
+string_write_java(const StringResource *res,UErrorCode *status) {
+ (void)res->getKeyString(srBundle);
+
+ str_write_java(res->getBuffer(), res->length(), true, status);
+}
+
+static void
+array_write_java(const ArrayResource *res, UErrorCode *status) {
+
+ uint32_t i = 0;
+ const char* arr ="new String[] { \n";
+ struct SResource *current = nullptr;
+ UBool allStrings = true;
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ if (res->fCount > 0) {
+
+ current = res->fFirst;
+ i = 0;
+ while(current != nullptr){
+ if(!current->isString()){
+ allStrings = false;
+ break;
+ }
+ current= current->fNext;
+ }
+
+ current = res->fFirst;
+ if(allStrings==false){
+ const char* object = "new Object[]{\n";
+ write_tabs(out);
+ T_FileStream_write(out, object, (int32_t)uprv_strlen(object));
+ tabCount++;
+ }else{
+ write_tabs(out);
+ T_FileStream_write(out, arr, (int32_t)uprv_strlen(arr));
+ tabCount++;
+ }
+ while (current != nullptr) {
+ /*if(current->isString()){
+ write_tabs(out);
+ }*/
+ res_write_java(current, status);
+ if(U_FAILURE(*status)){
+ return;
+ }
+ i++;
+ current = current->fNext;
+ }
+ T_FileStream_write(out,"\n",1);
+
+ tabCount--;
+ write_tabs(out);
+ T_FileStream_write(out,"},\n",3);
+
+ } else {
+ write_tabs(out);
+ T_FileStream_write(out,arr,(int32_t)uprv_strlen(arr));
+ write_tabs(out);
+ T_FileStream_write(out,"},\n",3);
+ }
+}
+
+static void
+intvector_write_java(const IntVectorResource *res, UErrorCode * /*status*/) {
+ uint32_t i = 0;
+ const char* intArr = "new int[] {\n";
+ /* const char* intC = "new Integer("; */
+ const char* stringArr = "new String[]{\n";
+ const char *resname = res->getKeyString(srBundle);
+ char buf[100];
+ int len =0;
+ buf[0]=0;
+ write_tabs(out);
+
+ if(resname != nullptr && uprv_strcmp(resname,"DateTimeElements")==0){
+ T_FileStream_write(out, stringArr, (int32_t)uprv_strlen(stringArr));
+ tabCount++;
+ for(i = 0; i<res->fCount; i++) {
+ write_tabs(out);
+ len=itostr(buf,res->fArray[i],10,0);
+ T_FileStream_write(out,"\"",1);
+ T_FileStream_write(out,buf,len);
+ T_FileStream_write(out,"\",",2);
+ T_FileStream_write(out,"\n",1);
+ }
+ }else{
+ T_FileStream_write(out, intArr, (int32_t)uprv_strlen(intArr));
+ tabCount++;
+ for(i = 0; i<res->fCount; i++) {
+ write_tabs(out);
+ /* T_FileStream_write(out, intC, (int32_t)uprv_strlen(intC)); */
+ len=itostr(buf,res->fArray[i],10,0);
+ T_FileStream_write(out,buf,len);
+ /* T_FileStream_write(out,"),",2); */
+ /* T_FileStream_write(out,"\n",1); */
+ T_FileStream_write(out,",\n",2);
+ }
+ }
+ tabCount--;
+ write_tabs(out);
+ T_FileStream_write(out,"},\n",3);
+}
+
+static void
+int_write_java(const IntResource *res, UErrorCode * /*status*/) {
+ const char* intC = "new Integer(";
+ char buf[100];
+ int len =0;
+ buf[0]=0;
+
+ /* write the binary data */
+ write_tabs(out);
+ T_FileStream_write(out, intC, (int32_t)uprv_strlen(intC));
+ len=itostr(buf, res->fValue, 10, 0);
+ T_FileStream_write(out,buf,len);
+ T_FileStream_write(out,"),\n",3 );
+
+}
+
+static void
+bytes_write_java(const BinaryResource *res, UErrorCode * /*status*/) {
+ const char* type = "new byte[] {";
+ const char* byteDecl = "%i, ";
+ char byteBuffer[100] = { 0 };
+ uint8_t* byteArray = nullptr;
+ int byteIterator = 0;
+ int32_t srcLen=res->fLength;
+ if(srcLen>0 )
+ {
+ byteArray = res->fData;
+
+ write_tabs(out);
+ T_FileStream_write(out, type, (int32_t)uprv_strlen(type));
+ T_FileStream_write(out, "\n", 1);
+ tabCount++;
+
+ for (;byteIterator<srcLen;byteIterator++)
+ {
+ if (byteIterator%16 == 0)
+ {
+ write_tabs(out);
+ }
+
+ if (byteArray[byteIterator] < 128)
+ {
+ snprintf(byteBuffer, sizeof(byteBuffer), byteDecl, byteArray[byteIterator]);
+ }
+ else
+ {
+ snprintf(byteBuffer, sizeof(byteBuffer), byteDecl, (byteArray[byteIterator]-256));
+ }
+
+ T_FileStream_write(out, byteBuffer, (int32_t)uprv_strlen(byteBuffer));
+
+ if (byteIterator%16 == 15)
+ {
+ T_FileStream_write(out, "\n", 1);
+ }
+
+ }
+
+ if (((byteIterator-1)%16) != 15)
+ {
+ T_FileStream_write(out, "\n", 1);
+ }
+
+ tabCount--;
+ write_tabs(out);
+ T_FileStream_write(out, "},\n", 3);
+
+ }
+ else
+ {
+ /* Empty array */
+ write_tabs(out);
+ T_FileStream_write(out,type,(int32_t)uprv_strlen(type));
+ T_FileStream_write(out,"},\n",3);
+ }
+
+}
+
+static UBool start = true;
+
+static void
+table_write_java(const TableResource *res, UErrorCode *status) {
+ uint32_t i = 0;
+ struct SResource *current = nullptr;
+ const char* obj = "new Object[][]{\n";
+
+ if (U_FAILURE(*status)) {
+ return ;
+ }
+
+ if (res->fCount > 0) {
+ if(start==false){
+ write_tabs(out);
+ T_FileStream_write(out, obj, (int32_t)uprv_strlen(obj));
+ tabCount++;
+ }
+ start = false;
+ current = res->fFirst;
+ i = 0;
+
+
+ while (current != nullptr) {
+ const char *currentKeyString = current->getKeyString(srBundle);
+
+ assert(i < res->fCount);
+ write_tabs(out);
+
+ T_FileStream_write(out, openBrace, 2);
+
+
+ tabCount++;
+
+ write_tabs(out);
+ if(currentKeyString != nullptr) {
+ T_FileStream_write(out, "\"", 1);
+ T_FileStream_write(out, currentKeyString,
+ (int32_t)uprv_strlen(currentKeyString));
+ T_FileStream_write(out, "\",\n", 2);
+
+ T_FileStream_write(out, "\n", 1);
+ }
+ res_write_java(current, status);
+ if(U_FAILURE(*status)){
+ return;
+ }
+ i++;
+ current = current->fNext;
+ tabCount--;
+ write_tabs(out);
+ T_FileStream_write(out, "},\n", 3);
+ }
+ if(tabCount>4){
+ tabCount--;
+ write_tabs(out);
+ T_FileStream_write(out, "},\n", 3);
+ }
+
+ } else {
+ write_tabs(out);
+ T_FileStream_write(out,obj,(int32_t)uprv_strlen(obj));
+
+ write_tabs(out);
+ T_FileStream_write(out,"},\n",3);
+
+ }
+
+}
+
+void
+res_write_java(struct SResource *res,UErrorCode *status) {
+
+ if (U_FAILURE(*status)) {
+ return ;
+ }
+
+ if (res != nullptr) {
+ switch (res->fType) {
+ case URES_STRING:
+ string_write_java (static_cast<const StringResource *>(res), status);
+ return;
+ case URES_ALIAS:
+ printf("Encountered unsupported resource type %d of alias\n", res->fType);
+ *status = U_UNSUPPORTED_ERROR;
+ return;
+ case URES_INT_VECTOR:
+ intvector_write_java (static_cast<const IntVectorResource *>(res), status);
+ return;
+ case URES_BINARY:
+ bytes_write_java (static_cast<const BinaryResource *>(res), status);
+ return;
+ case URES_INT:
+ int_write_java (static_cast<const IntResource *>(res), status);
+ return;
+ case URES_ARRAY:
+ array_write_java (static_cast<const ArrayResource *>(res), status);
+ return;
+ case URES_TABLE:
+ table_write_java (static_cast<const TableResource *>(res), status);
+ return;
+ default:
+ break;
+ }
+ }
+
+ *status = U_INTERNAL_PROGRAM_ERROR;
+}
+
+void
+bundle_write_java(struct SRBRoot *bundle, const char *outputDir,const char* outputEnc,
+ char *writtenFilename, int writtenFilenameLen,
+ const char* packageName, const char* bundleName,
+ UErrorCode *status) {
+
+ char fileName[256] = {'\0'};
+ char className[256]={'\0'};
+ /*char constructor[1000] = { 0 };*/
+ /*UBool j1 =false;*/
+ /*outDir = outputDir;*/
+
+ start = true; /* Reset the start indicator*/
+
+ bName = (bundleName==nullptr) ? "LocaleElements" : bundleName;
+ pName = (packageName==nullptr)? "com.ibm.icu.impl.data" : packageName;
+
+ uprv_strcpy(className, bName);
+ srBundle = bundle;
+ if(uprv_strcmp(srBundle->fLocale,"root")!=0){
+ uprv_strcat(className,"_");
+ uprv_strcat(className,srBundle->fLocale);
+ }
+ if(outputDir){
+ uprv_strcpy(fileName, outputDir);
+ if(outputDir[uprv_strlen(outputDir)-1] !=U_FILE_SEP_CHAR){
+ uprv_strcat(fileName,U_FILE_SEP_STRING);
+ }
+ uprv_strcat(fileName,className);
+ uprv_strcat(fileName,".java");
+ }else{
+ uprv_strcat(fileName,className);
+ uprv_strcat(fileName,".java");
+ }
+
+ if (writtenFilename) {
+ uprv_strncpy(writtenFilename, fileName, writtenFilenameLen);
+ }
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ out= T_FileStream_open(fileName,"w");
+
+ if(out==nullptr){
+ *status = U_FILE_ACCESS_ERROR;
+ return;
+ }
+ if(getIncludeCopyright()){
+ T_FileStream_write(out, copyRight, (int32_t)uprv_strlen(copyRight));
+ T_FileStream_write(out, warningMsg, (int32_t)uprv_strlen(warningMsg));
+ }
+ T_FileStream_write(out,"package ",(int32_t)uprv_strlen("package "));
+ T_FileStream_write(out,pName,(int32_t)uprv_strlen(pName));
+ T_FileStream_write(out,";\n\n",3);
+ T_FileStream_write(out, javaClass, (int32_t)uprv_strlen(javaClass));
+ T_FileStream_write(out, className, (int32_t)uprv_strlen(className));
+ T_FileStream_write(out, javaClass1, (int32_t)uprv_strlen(javaClass1));
+
+ /* if(j1){
+ T_FileStream_write(out, javaClass1, (int32_t)uprv_strlen(javaClass1));
+ }else{
+ sprintf(constructor,javaClassICU,className);
+ T_FileStream_write(out, constructor, (int32_t)uprv_strlen(constructor));
+ }
+ */
+
+ if(outputEnc && *outputEnc!='\0'){
+ /* store the output encoding */
+ enc = outputEnc;
+ conv=ucnv_open(enc,status);
+ if(U_FAILURE(*status)){
+ return;
+ }
+ }
+ res_write_java(bundle->fRoot, status);
+
+ T_FileStream_write(out, closeClass, (int32_t)uprv_strlen(closeClass));
+
+ T_FileStream_close(out);
+
+ ucnv_close(conv);
+}
diff --git a/intl/icu/source/tools/genrb/wrtxml.cpp b/intl/icu/source/tools/genrb/wrtxml.cpp
new file mode 100644
index 0000000000..16f67fabca
--- /dev/null
+++ b/intl/icu/source/tools/genrb/wrtxml.cpp
@@ -0,0 +1,1213 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File wrtxml.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 10/01/02 Ram Creation.
+* 02/07/08 Spieth Correct XLIFF generation on EBCDIC platform
+*
+*******************************************************************************
+*/
+
+// Safer use of UnicodeString.
+#ifndef UNISTR_FROM_CHAR_EXPLICIT
+# define UNISTR_FROM_CHAR_EXPLICIT explicit
+#endif
+
+// Less important, but still a good idea.
+#ifndef UNISTR_FROM_STRING_EXPLICIT
+# define UNISTR_FROM_STRING_EXPLICIT explicit
+#endif
+
+#include "reslist.h"
+#include "unewdata.h"
+#include "unicode/ures.h"
+#include "errmsg.h"
+#include "filestrm.h"
+#include "cstring.h"
+#include "unicode/ucnv.h"
+#include "genrb.h"
+#include "rle.h"
+#include "uhash.h"
+#include "uresimp.h"
+#include "unicode/ustring.h"
+#include "unicode/uchar.h"
+#include "ustr.h"
+#include "prscmnts.h"
+#include "unicode/unistr.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+#include <time.h>
+
+U_NAMESPACE_USE
+
+static int tabCount = 0;
+
+static FileStream* out=nullptr;
+static struct SRBRoot* srBundle ;
+static const char* outDir = nullptr;
+static const char* enc ="";
+static UConverter* conv = nullptr;
+
+const char* const* ISOLanguages;
+const char* const* ISOCountries;
+const char* textExt = ".txt";
+const char* xliffExt = ".xlf";
+
+static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t len = 0;
+
+ // preflight to get the destination buffer size
+ u_strToUTF8(nullptr,
+ 0,
+ &len,
+ toUCharPtr(outString.getBuffer()),
+ outString.length(),
+ &status);
+
+ // allocate the buffer
+ char* dest = (char*)uprv_malloc(len);
+ status = U_ZERO_ERROR;
+
+ // convert the data
+ u_strToUTF8(dest,
+ len,
+ &len,
+ toUCharPtr(outString.getBuffer()),
+ outString.length(),
+ &status);
+
+ // write data to out file
+ int32_t ret = T_FileStream_write(fileStream, dest, len);
+ uprv_free(dest);
+ return (ret);
+}
+
+/*write indentation for formatting*/
+static void write_tabs(FileStream* os){
+ int i=0;
+ for(;i<=tabCount;i++){
+ write_utf8_file(os,UnicodeString(" "));
+ }
+}
+
+/*get ID for each element. ID is globally unique.*/
+static char* getID(const char* id, const char* curKey, char* result) {
+ if(curKey == nullptr) {
+ result = (char *)uprv_malloc(sizeof(char)*uprv_strlen(id) + 1);
+ uprv_memset(result, 0, sizeof(char)*uprv_strlen(id) + 1);
+ uprv_strcpy(result, id);
+ } else {
+ result = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
+ uprv_memset(result, 0, sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
+ if(id[0]!='\0'){
+ uprv_strcpy(result, id);
+ uprv_strcat(result, "_");
+ }
+ uprv_strcat(result, curKey);
+ }
+ return result;
+}
+
+/*compute CRC for binary code*/
+/* The code is from http://www.theorem.com/java/CRC32.java
+ * Calculates the CRC32 - 32 bit Cyclical Redundancy Check
+ * <P> This check is used in numerous systems to verify the integrity
+ * of information. It's also used as a hashing function. Unlike a regular
+ * checksum, it's sensitive to the order of the characters.
+ * It produces a 32 bit
+ *
+ * @author Michael Lecuyer (mjl@theorem.com)
+ * @version 1.1 August 11, 1998
+ */
+
+/* ICU is not endian portable, because ICU data generated on big endian machines can be
+ * ported to big endian machines but not to little endian machines and vice versa. The
+ * conversion is not portable across platforms with different endianness.
+ */
+
+uint32_t computeCRC(const char *ptr, uint32_t len, uint32_t lastcrc){
+ int32_t crc;
+ uint32_t temp1;
+ uint32_t temp2;
+
+ int32_t crc_ta[256];
+ int i = 0;
+ int j = 0;
+ uint32_t crc2 = 0;
+
+#define CRC32_POLYNOMIAL 0xEDB88320
+
+ /*build crc table*/
+ for (i = 0; i <= 255; i++) {
+ crc2 = i;
+ for (j = 8; j > 0; j--) {
+ if ((crc2 & 1) == 1) {
+ crc2 = (crc2 >> 1) ^ CRC32_POLYNOMIAL;
+ } else {
+ crc2 >>= 1;
+ }
+ }
+ crc_ta[i] = crc2;
+ }
+
+ crc = lastcrc;
+ while(len--!=0) {
+ temp1 = (uint32_t)crc>>8;
+ temp2 = crc_ta[(crc^*ptr) & 0xFF];
+ crc = temp1^temp2;
+ ptr++;
+ }
+ return(crc);
+}
+
+static void strnrepchr(char* src, int32_t srcLen, char s, char r){
+ int32_t i = 0;
+ for(i=0;i<srcLen;i++){
+ if(src[i]==s){
+ src[i]=r;
+ }
+ }
+}
+/* Parse the filename, and get its language information.
+ * If it fails to get the language information from the filename,
+ * use "en" as the default value for language
+ */
+static char* parseFilename(const char* id, char* /*lang*/) {
+ int idLen = (int) uprv_strlen(id);
+ char* localeID = (char*) uprv_malloc(idLen);
+ int pos = 0;
+ int canonCapacity = 0;
+ char* canon = nullptr;
+ int canonLen = 0;
+ /*int i;*/
+ UErrorCode status = U_ZERO_ERROR;
+ const char *ext = uprv_strchr(id, '.');
+
+ if(ext != nullptr){
+ pos = (int) (ext - id);
+ } else {
+ pos = idLen;
+ }
+ uprv_memcpy(localeID, id, pos);
+ localeID[pos]=0; /* NUL terminate the string */
+
+ canonCapacity =pos*3;
+ canon = (char*) uprv_malloc(canonCapacity);
+ canonLen = uloc_canonicalize(localeID, canon, canonCapacity, &status);
+
+ if(U_FAILURE(status)){
+ fprintf(stderr, "Could not canonicalize the locale ID: %s. Error: %s\n", localeID, u_errorName(status));
+ exit(status);
+ }
+ strnrepchr(canon, canonLen, '_', '-');
+ return canon;
+}
+
+static const char* xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
+#if 0
+static const char* bundleStart = "<xliff version = \"1.2\" "
+ "xmlns='urn:oasis:names:tc:xliff:document:1.2' "
+ "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
+ "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.2 xliff-core-1.2-transitional.xsd'>\n";
+#else
+static const char* bundleStart = "<xliff version = \"1.1\" "
+ "xmlns='urn:oasis:names:tc:xliff:document:1.1' "
+ "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
+ "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.1 http://www.oasis-open.org/committees/xliff/documents/xliff-core-1.1.xsd'>\n";
+#endif
+static const char* bundleEnd = "</xliff>\n";
+
+void res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status);
+
+static char* convertAndEscape(char** pDest, int32_t destCap, int32_t* destLength,
+ const char16_t* src, int32_t srcLen, UErrorCode* status){
+ int32_t srcIndex=0;
+ char* dest=nullptr;
+ char* temp=nullptr;
+ int32_t destLen=0;
+ UChar32 c = 0;
+
+ if(status==nullptr || U_FAILURE(*status) || pDest==nullptr || srcLen==0 || src == nullptr){
+ return nullptr;
+ }
+ dest =*pDest;
+ if(dest==nullptr || destCap <=0){
+ destCap = srcLen * 8;
+ dest = (char*) uprv_malloc(sizeof(char) * destCap);
+ if(dest==nullptr){
+ *status=U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ }
+
+ dest[0]=0;
+
+ while(srcIndex<srcLen){
+ U16_NEXT(src, srcIndex, srcLen, c);
+
+ if (U16_IS_LEAD(c) || U16_IS_TRAIL(c)) {
+ *status = U_ILLEGAL_CHAR_FOUND;
+ fprintf(stderr, "Illegal Surrogate! \n");
+ uprv_free(dest);
+ return nullptr;
+ }
+
+ if((destLen+U8_LENGTH(c)) < destCap){
+
+ /* ASCII Range */
+ if(c <=0x007F){
+ switch(c) {
+ case '\x26':
+ uprv_strcpy(dest+( destLen),"\x26\x61\x6d\x70\x3b"); /* &amp;*/
+ destLen+=(int32_t)uprv_strlen("\x26\x61\x6d\x70\x3b");
+ break;
+ case '\x3c':
+ uprv_strcpy(dest+(destLen),"\x26\x6c\x74\x3b"); /* &lt;*/
+ destLen+=(int32_t)uprv_strlen("\x26\x6c\x74\x3b");
+ break;
+ case '\x3e':
+ uprv_strcpy(dest+(destLen),"\x26\x67\x74\x3b"); /* &gt;*/
+ destLen+=(int32_t)uprv_strlen("\x26\x67\x74\x3b");
+ break;
+ case '\x22':
+ uprv_strcpy(dest+(destLen),"\x26\x71\x75\x6f\x74\x3b"); /* &quot;*/
+ destLen+=(int32_t)uprv_strlen("\x26\x71\x75\x6f\x74\x3b");
+ break;
+ case '\x27':
+ uprv_strcpy(dest+(destLen),"\x26\x61\x70\x6f\x73\x3b"); /* &apos; */
+ destLen+=(int32_t)uprv_strlen("\x26\x61\x70\x6f\x73\x3b");
+ break;
+
+ /* Disallow C0 controls except TAB, CR, LF*/
+ case 0x00:
+ case 0x01:
+ case 0x02:
+ case 0x03:
+ case 0x04:
+ case 0x05:
+ case 0x06:
+ case 0x07:
+ case 0x08:
+ /*case 0x09:*/
+ /*case 0x0A: */
+ case 0x0B:
+ case 0x0C:
+ /*case 0x0D:*/
+ case 0x0E:
+ case 0x0F:
+ case 0x10:
+ case 0x11:
+ case 0x12:
+ case 0x13:
+ case 0x14:
+ case 0x15:
+ case 0x16:
+ case 0x17:
+ case 0x18:
+ case 0x19:
+ case 0x1A:
+ case 0x1B:
+ case 0x1C:
+ case 0x1D:
+ case 0x1E:
+ case 0x1F:
+ *status = U_ILLEGAL_CHAR_FOUND;
+ fprintf(stderr, "Illegal Character \\u%04X!\n",(int)c);
+ uprv_free(dest);
+ return nullptr;
+ default:
+ dest[destLen++]=(char)c;
+ }
+ }else{
+ UBool isError = false;
+ U8_APPEND((unsigned char*)dest,destLen,destCap,c,isError);
+ if(isError){
+ *status = U_ILLEGAL_CHAR_FOUND;
+ fprintf(stderr, "Illegal Character \\U%08X!\n",(int)c);
+ uprv_free(dest);
+ return nullptr;
+ }
+ }
+ }else{
+ destCap += destLen;
+
+ temp = (char*) uprv_malloc(sizeof(char)*destCap);
+ if(temp==nullptr){
+ *status=U_MEMORY_ALLOCATION_ERROR;
+ uprv_free(dest);
+ return nullptr;
+ }
+ uprv_memmove(temp,dest,destLen);
+ destLen=0;
+ uprv_free(dest);
+ dest=temp;
+ temp=nullptr;
+ }
+
+ }
+ *destLength = destLen;
+ return dest;
+}
+
+#define ASTERISK 0x002A
+#define SPACE 0x0020
+#define CR 0x000A
+#define LF 0x000D
+#define AT_SIGN 0x0040
+
+#if UCONFIG_NO_REGULAR_EXPRESSIONS==0
+static void
+trim(char **src, int32_t *len){
+
+ char *s = nullptr;
+ int32_t i = 0;
+ if(src == nullptr || *src == nullptr){
+ return;
+ }
+ s = *src;
+ /* trim from the end */
+ for( i=(*len-1); i>= 0; i--){
+ switch(s[i]){
+ case ASTERISK:
+ case SPACE:
+ case CR:
+ case LF:
+ s[i] = 0;
+ continue;
+ default:
+ break;
+ }
+ break;
+
+ }
+ *len = i+1;
+}
+
+static void
+print(char16_t* src, int32_t srcLen,const char *tagStart,const char *tagEnd, UErrorCode *status){
+ int32_t bufCapacity = srcLen*4;
+ char *buf = nullptr;
+ int32_t bufLen = 0;
+
+ if(U_FAILURE(*status)){
+ return;
+ }
+
+ buf = (char*) (uprv_malloc(bufCapacity));
+ if(buf==0){
+ fprintf(stderr, "Could not allocate memory!!");
+ exit(U_MEMORY_ALLOCATION_ERROR);
+ }
+ buf = convertAndEscape(&buf, bufCapacity, &bufLen, src, srcLen,status);
+ if(U_SUCCESS(*status)){
+ trim(&buf,&bufLen);
+ write_utf8_file(out,UnicodeString(tagStart));
+ write_utf8_file(out,UnicodeString(buf, bufLen, "UTF-8"));
+ write_utf8_file(out,UnicodeString(tagEnd));
+ write_utf8_file(out,UnicodeString("\n"));
+
+ }
+}
+#endif
+
+static void
+printNoteElements(const UString *src, UErrorCode *status){
+
+#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
+
+ int32_t capacity = 0;
+ char16_t* note = nullptr;
+ int32_t noteLen = 0;
+ int32_t count = 0,i;
+
+ if(src == nullptr){
+ return;
+ }
+
+ capacity = src->fLength;
+ note = (char16_t*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
+
+ count = getCount(src->fChars,src->fLength, UPC_NOTE, status);
+ if(U_FAILURE(*status)){
+ uprv_free(note);
+ return;
+ }
+ for(i=0; i < count; i++){
+ noteLen = getAt(src->fChars,src->fLength, &note, capacity, i, UPC_NOTE, status);
+ if(U_FAILURE(*status)){
+ uprv_free(note);
+ return;
+ }
+ if(noteLen > 0){
+ write_tabs(out);
+ print(note, noteLen,"<note>", "</note>", status);
+ }
+ }
+ uprv_free(note);
+#else
+
+ fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
+
+#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
+
+}
+
+static void printAttribute(const char *name, const char *value, int32_t /*len*/)
+{
+ write_utf8_file(out, UnicodeString(" "));
+ write_utf8_file(out, UnicodeString(name));
+ write_utf8_file(out, UnicodeString(" = \""));
+ write_utf8_file(out, UnicodeString(value));
+ write_utf8_file(out, UnicodeString("\""));
+}
+
+#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
+static void printAttribute(const char *name, const UnicodeString value, int32_t /*len*/)
+{
+ write_utf8_file(out, UnicodeString(" "));
+ write_utf8_file(out, UnicodeString(name));
+ write_utf8_file(out, UnicodeString(" = \""));
+ write_utf8_file(out, value);
+ write_utf8_file(out, UnicodeString("\""));
+}
+#endif
+
+static void
+printComments(struct UString *src, const char *resName, UBool printTranslate, UErrorCode *status){
+
+#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
+
+ if(status==nullptr || U_FAILURE(*status)){
+ return;
+ }
+
+ int32_t capacity = src->fLength + 1;
+ char* buf = nullptr;
+ int32_t bufLen = 0;
+ char16_t* desc = (char16_t*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
+ char16_t* trans = (char16_t*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
+
+ int32_t descLen = 0, transLen=0;
+ if(desc==nullptr || trans==nullptr){
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ uprv_free(desc);
+ uprv_free(trans);
+ return;
+ }
+ // TODO: make src const, stop modifying it in-place, make printContainer() take const resource, etc.
+ src->fLength = removeCmtText(src->fChars, src->fLength, status);
+ descLen = getDescription(src->fChars,src->fLength, &desc, capacity, status);
+ transLen = getTranslate(src->fChars,src->fLength, &trans, capacity, status);
+
+ /* first print translate attribute */
+ if(transLen > 0){
+ if(printTranslate){
+ /* print translate attribute */
+ buf = convertAndEscape(&buf, 0, &bufLen, trans, transLen, status);
+ if(U_SUCCESS(*status)){
+ printAttribute("translate", UnicodeString(buf, bufLen, "UTF-8"), bufLen);
+ write_utf8_file(out,UnicodeString(">\n"));
+ }
+ }else if(getShowWarning()){
+ fprintf(stderr, "Warning: Translate attribute for resource %s cannot be set. XLIFF prohibits it.\n", resName);
+ /* no translate attribute .. just close the tag */
+ write_utf8_file(out,UnicodeString(">\n"));
+ }
+ }else{
+ /* no translate attribute .. just close the tag */
+ write_utf8_file(out,UnicodeString(">\n"));
+ }
+
+ if(descLen > 0){
+ write_tabs(out);
+ print(desc, descLen, "<!--", "-->", status);
+ }
+
+ uprv_free(desc);
+ uprv_free(trans);
+#else
+
+ fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
+
+#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
+
+}
+
+/*
+ * Print out a containing element, like:
+ * <trans-unit id = "blah" resname = "blah" restype = "x-id-alias" translate = "no">
+ * <group id "calendar_gregorian" resname = "gregorian" restype = "x-icu-array">
+ */
+static char *printContainer(SResource *res, const char *container, const char *restype, const char *mimetype, const char *id, UErrorCode *status)
+{
+ const char *resname = nullptr;
+ char *sid = nullptr;
+
+ write_tabs(out);
+
+ resname = res->getKeyString(srBundle);
+ if (resname != nullptr && *resname != 0) {
+ sid = getID(id, resname, sid);
+ } else {
+ sid = getID(id, nullptr, sid);
+ }
+
+ write_utf8_file(out, UnicodeString("<"));
+ write_utf8_file(out, UnicodeString(container));
+ printAttribute("id", sid, (int32_t) uprv_strlen(sid));
+
+ if (resname != nullptr) {
+ printAttribute("resname", resname, (int32_t) uprv_strlen(resname));
+ }
+
+ if (mimetype != nullptr) {
+ printAttribute("mime-type", mimetype, (int32_t) uprv_strlen(mimetype));
+ }
+
+ if (restype != nullptr) {
+ printAttribute("restype", restype, (int32_t) uprv_strlen(restype));
+ }
+
+ tabCount += 1;
+ if (res->fComment.fLength > 0) {
+ /* printComments will print the closing ">\n" */
+ printComments(&res->fComment, resname, true, status);
+ } else {
+ write_utf8_file(out, UnicodeString(">\n"));
+ }
+
+ return sid;
+}
+
+/* Writing Functions */
+
+static const char *trans_unit = "trans-unit";
+static const char *close_trans_unit = "</trans-unit>\n";
+static const char *source = "<source>";
+static const char *close_source = "</source>\n";
+static const char *group = "group";
+static const char *close_group = "</group>\n";
+
+static const char *bin_unit = "bin-unit";
+static const char *close_bin_unit = "</bin-unit>\n";
+static const char *bin_source = "<bin-source>\n";
+static const char *close_bin_source = "</bin-source>\n";
+static const char *external_file = "<external-file";
+/*static const char *close_external_file = "</external-file>\n";*/
+static const char *internal_file = "<internal-file";
+static const char *close_internal_file = "</internal-file>\n";
+
+static const char *application_mimetype = "application"; /* add "/octet-stream"? */
+
+static const char *alias_restype = "x-icu-alias";
+static const char *array_restype = "x-icu-array";
+static const char *binary_restype = "x-icu-binary";
+static const char *integer_restype = "x-icu-integer";
+static const char *intvector_restype = "x-icu-intvector";
+static const char *table_restype = "x-icu-table";
+
+static void
+string_write_xml(StringResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
+
+ char *sid = nullptr;
+ char* buf = nullptr;
+ int32_t bufLen = 0;
+
+ if(status==nullptr || U_FAILURE(*status)){
+ return;
+ }
+
+ sid = printContainer(res, trans_unit, nullptr, nullptr, id, status);
+
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(source));
+
+ buf = convertAndEscape(&buf, 0, &bufLen, res->getBuffer(), res->length(), status);
+
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
+ write_utf8_file(out, UnicodeString(close_source));
+
+ printNoteElements(&res->fComment, status);
+
+ tabCount -= 1;
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(close_trans_unit));
+
+ uprv_free(buf);
+ uprv_free(sid);
+}
+
+static void
+alias_write_xml(AliasResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
+ char *sid = nullptr;
+ char* buf = nullptr;
+ int32_t bufLen=0;
+
+ sid = printContainer(res, trans_unit, alias_restype, nullptr, id, status);
+
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(source));
+
+ buf = convertAndEscape(&buf, 0, &bufLen, res->getBuffer(), res->length(), status);
+
+ if(U_FAILURE(*status)){
+ return;
+ }
+ write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
+ write_utf8_file(out, UnicodeString(close_source));
+
+ printNoteElements(&res->fComment, status);
+
+ tabCount -= 1;
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(close_trans_unit));
+
+ uprv_free(buf);
+ uprv_free(sid);
+}
+
+static void
+array_write_xml(ArrayResource *res, const char* id, const char* language, UErrorCode *status) {
+ char* sid = nullptr;
+ int index = 0;
+
+ struct SResource *current = nullptr;
+
+ sid = printContainer(res, group, array_restype, nullptr, id, status);
+
+ current = res->fFirst;
+
+ while (current != nullptr) {
+ char c[256] = {0};
+ char* subId = nullptr;
+
+ itostr(c, index, 10, 0);
+ index += 1;
+ subId = getID(sid, c, subId);
+
+ res_write_xml(current, subId, language, false, status);
+ uprv_free(subId);
+ subId = nullptr;
+
+ if(U_FAILURE(*status)){
+ return;
+ }
+
+ current = current->fNext;
+ }
+
+ tabCount -= 1;
+ write_tabs(out);
+ write_utf8_file(out, UnicodeString(close_group));
+
+ uprv_free(sid);
+}
+
+static void
+intvector_write_xml(IntVectorResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
+ char* sid = nullptr;
+ char* ivd = nullptr;
+ uint32_t i=0;
+ uint32_t len=0;
+ char buf[256] = {'0'};
+
+ sid = printContainer(res, group, intvector_restype, nullptr, id, status);
+
+ for(i = 0; i < res->fCount; i += 1) {
+ char c[256] = {0};
+
+ itostr(c, i, 10, 0);
+ ivd = getID(sid, c, ivd);
+ len = itostr(buf, res->fArray[i], 10, 0);
+
+ write_tabs(out);
+ write_utf8_file(out, UnicodeString("<"));
+ write_utf8_file(out, UnicodeString(trans_unit));
+
+ printAttribute("id", ivd, (int32_t)uprv_strlen(ivd));
+ printAttribute("restype", integer_restype, (int32_t) strlen(integer_restype));
+
+ write_utf8_file(out, UnicodeString(">\n"));
+
+ tabCount += 1;
+ write_tabs(out);
+ write_utf8_file(out, UnicodeString(source));
+
+ write_utf8_file(out, UnicodeString(buf, len));
+
+ write_utf8_file(out, UnicodeString(close_source));
+ tabCount -= 1;
+ write_tabs(out);
+ write_utf8_file(out, UnicodeString(close_trans_unit));
+
+ uprv_free(ivd);
+ ivd = nullptr;
+ }
+
+ tabCount -= 1;
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(close_group));
+ uprv_free(sid);
+ sid = nullptr;
+}
+
+static void
+int_write_xml(IntResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
+ char* sid = nullptr;
+ char buf[256] = {0};
+ uint32_t len = 0;
+
+ sid = printContainer(res, trans_unit, integer_restype, nullptr, id, status);
+
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(source));
+
+ len = itostr(buf, res->fValue, 10, 0);
+ write_utf8_file(out, UnicodeString(buf, len));
+
+ write_utf8_file(out, UnicodeString(close_source));
+
+ printNoteElements(&res->fComment, status);
+
+ tabCount -= 1;
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(close_trans_unit));
+
+ uprv_free(sid);
+ sid = nullptr;
+}
+
+static void
+bin_write_xml(BinaryResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
+ const char* m_type = application_mimetype;
+ char* sid = nullptr;
+ uint32_t crc = 0xFFFFFFFF;
+
+ char fileName[1024] ={0};
+ int32_t tLen = ( outDir == nullptr) ? 0 :(int32_t)uprv_strlen(outDir);
+ char* fn = (char*) uprv_malloc(sizeof(char) * (tLen+1024 +
+ (res->fFileName !=nullptr ?
+ uprv_strlen(res->fFileName) :0)));
+ const char* ext = nullptr;
+
+ char* f = nullptr;
+
+ fn[0]=0;
+
+ if(res->fFileName != nullptr){
+ uprv_strcpy(fileName, res->fFileName);
+ f = uprv_strrchr(fileName, '\\');
+
+ if (f != nullptr) {
+ f++;
+ } else {
+ f = fileName;
+ }
+
+ ext = uprv_strrchr(fileName, '.');
+
+ if (ext == nullptr) {
+ fprintf(stderr, "Error: %s is an unknown binary filename type.\n", fileName);
+ exit(U_ILLEGAL_ARGUMENT_ERROR);
+ }
+
+ if(uprv_strcmp(ext, ".jpg")==0 || uprv_strcmp(ext, ".jpeg")==0 || uprv_strcmp(ext, ".gif")==0 ){
+ m_type = "image";
+ } else if(uprv_strcmp(ext, ".wav")==0 || uprv_strcmp(ext, ".au")==0 ){
+ m_type = "audio";
+ } else if(uprv_strcmp(ext, ".avi")==0 || uprv_strcmp(ext, ".mpg")==0 || uprv_strcmp(ext, ".mpeg")==0){
+ m_type = "video";
+ } else if(uprv_strcmp(ext, ".txt")==0 || uprv_strcmp(ext, ".text")==0){
+ m_type = "text";
+ }
+
+ sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
+
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(bin_source));
+
+ tabCount+= 1;
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(external_file));
+ printAttribute("href", f, (int32_t)uprv_strlen(f));
+ write_utf8_file(out, UnicodeString("/>\n"));
+ tabCount -= 1;
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(close_bin_source));
+
+ printNoteElements(&res->fComment, status);
+ tabCount -= 1;
+ write_tabs(out);
+ write_utf8_file(out, UnicodeString(close_bin_unit));
+ } else {
+ char temp[256] = {0};
+ uint32_t i = 0;
+ int32_t len=0;
+
+ sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
+
+ write_tabs(out);
+ write_utf8_file(out, UnicodeString(bin_source));
+
+ tabCount += 1;
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(internal_file));
+ printAttribute("form", application_mimetype, (int32_t) uprv_strlen(application_mimetype));
+
+ while(i <res->fLength){
+ len = itostr(temp, res->fData[i], 16, 2);
+ crc = computeCRC(temp, len, crc);
+ i++;
+ }
+
+ len = itostr(temp, crc, 10, 0);
+ printAttribute("crc", temp, len);
+
+ write_utf8_file(out, UnicodeString(">"));
+
+ i = 0;
+ while(i <res->fLength){
+ len = itostr(temp, res->fData[i], 16, 2);
+ write_utf8_file(out, UnicodeString(temp));
+ i += 1;
+ }
+
+ write_utf8_file(out, UnicodeString(close_internal_file));
+
+ tabCount -= 2;
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(close_bin_source));
+ printNoteElements(&res->fComment, status);
+
+ tabCount -= 1;
+ write_tabs(out);
+ write_utf8_file(out, UnicodeString(close_bin_unit));
+
+ uprv_free(sid);
+ sid = nullptr;
+ }
+
+ uprv_free(fn);
+}
+
+
+
+static void
+table_write_xml(TableResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) {
+
+ struct SResource *current = nullptr;
+ char* sid = nullptr;
+
+ if (U_FAILURE(*status)) {
+ return ;
+ }
+
+ sid = printContainer(res, group, table_restype, nullptr, id, status);
+
+ if(isTopLevel) {
+ sid[0] = '\0';
+ }
+
+ current = res->fFirst;
+
+ while (current != nullptr) {
+ res_write_xml(current, sid, language, false, status);
+
+ if(U_FAILURE(*status)){
+ return;
+ }
+
+ current = current->fNext;
+ }
+
+ tabCount -= 1;
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(close_group));
+
+ uprv_free(sid);
+ sid = nullptr;
+}
+
+void
+res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) {
+
+ if (U_FAILURE(*status)) {
+ return ;
+ }
+
+ if (res != nullptr) {
+ switch (res->fType) {
+ case URES_STRING:
+ string_write_xml (static_cast<StringResource *>(res), id, language, status);
+ return;
+
+ case URES_ALIAS:
+ alias_write_xml (static_cast<AliasResource *>(res), id, language, status);
+ return;
+
+ case URES_INT_VECTOR:
+ intvector_write_xml (static_cast<IntVectorResource *>(res), id, language, status);
+ return;
+
+ case URES_BINARY:
+ bin_write_xml (static_cast<BinaryResource *>(res), id, language, status);
+ return;
+
+ case URES_INT:
+ int_write_xml (static_cast<IntResource *>(res), id, language, status);
+ return;
+
+ case URES_ARRAY:
+ array_write_xml (static_cast<ArrayResource *>(res), id, language, status);
+ return;
+
+ case URES_TABLE:
+ table_write_xml (static_cast<TableResource *>(res), id, language, isTopLevel, status);
+ return;
+
+ default:
+ break;
+ }
+ }
+
+ *status = U_INTERNAL_PROGRAM_ERROR;
+}
+
+void
+bundle_write_xml(struct SRBRoot *bundle, const char *outputDir,const char* outputEnc, const char* filename,
+ char *writtenFilename, int writtenFilenameLen,
+ const char* language, const char* outFileName, UErrorCode *status) {
+
+ char* xmlfileName = nullptr;
+ char* outputFileName = nullptr;
+ char* originalFileName = nullptr;
+ const char* fileStart = "<file xml:space = \"preserve\" source-language = \"";
+ const char* file1 = "\" datatype = \"x-icu-resource-bundle\" ";
+ const char* file2 = "original = \"";
+ const char* file4 = "\" date = \"";
+ const char* fileEnd = "</file>\n";
+ const char* headerStart = "<header>\n";
+ const char* headerEnd = "</header>\n";
+ const char* bodyStart = "<body>\n";
+ const char* bodyEnd = "</body>\n";
+
+ const char *tool_start = "<tool";
+ const char *tool_id = "genrb-" GENRB_VERSION "-icu-" U_ICU_VERSION;
+ const char *tool_name = "genrb";
+
+ char* temp = nullptr;
+ char* lang = nullptr;
+ const char* pos = nullptr;
+ int32_t first, index;
+ time_t currTime;
+ char timeBuf[128];
+
+ outDir = outputDir;
+
+ srBundle = bundle;
+
+ pos = uprv_strrchr(filename, '\\');
+ if(pos != nullptr) {
+ first = (int32_t)(pos - filename + 1);
+ } else {
+ first = 0;
+ }
+ index = (int32_t)(uprv_strlen(filename) - uprv_strlen(textExt) - first);
+ originalFileName = (char *)uprv_malloc(sizeof(char)*index+1);
+ uprv_memset(originalFileName, 0, sizeof(char)*index+1);
+ uprv_strncpy(originalFileName, filename + first, index);
+
+ if(uprv_strcmp(originalFileName, srBundle->fLocale) != 0) {
+ fprintf(stdout, "Warning: The file name is not same as the resource name!\n");
+ }
+
+ temp = originalFileName;
+ originalFileName = (char *)uprv_malloc(sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
+ uprv_memset(originalFileName, 0, sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
+ uprv_strcat(originalFileName, temp);
+ uprv_strcat(originalFileName, textExt);
+ uprv_free(temp);
+ temp = nullptr;
+
+
+ if (language == nullptr) {
+/* lang = parseFilename(filename, lang);
+ if (lang == nullptr) {*/
+ /* now check if locale name is valid or not
+ * this is to cater for situation where
+ * pegasusServer.txt contains
+ *
+ * en{
+ * ..
+ * }
+ */
+ lang = parseFilename(srBundle->fLocale, lang);
+ /*
+ * Neither the file name nor the table name inside the
+ * txt file contain a valid country and language codes
+ * throw an error.
+ * pegasusServer.txt contains
+ *
+ * testelements{
+ * ....
+ * }
+ */
+ if(lang==nullptr){
+ fprintf(stderr, "Error: The file name and table name do not contain a valid language code. Please use -l option to specify it.\n");
+ exit(U_ILLEGAL_ARGUMENT_ERROR);
+ }
+ /* }*/
+ } else {
+ lang = (char *)uprv_malloc(sizeof(char)*uprv_strlen(language) +1);
+ uprv_memset(lang, 0, sizeof(char)*uprv_strlen(language) +1);
+ uprv_strcpy(lang, language);
+ }
+
+ if(outFileName) {
+ outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(outFileName) + 1);
+ uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(outFileName) + 1);
+ uprv_strcpy(outputFileName,outFileName);
+ } else {
+ outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
+ uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
+ uprv_strcpy(outputFileName,srBundle->fLocale);
+ }
+
+ if(outputDir) {
+ xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputDir) + uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
+ uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputDir)+ uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
+ } else {
+ xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
+ uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
+ }
+
+ if(outputDir){
+ uprv_strcpy(xmlfileName, outputDir);
+ if(outputDir[uprv_strlen(outputDir)-1] !=U_FILE_SEP_CHAR){
+ uprv_strcat(xmlfileName,U_FILE_SEP_STRING);
+ }
+ }
+ uprv_strcat(xmlfileName,outputFileName);
+ uprv_strcat(xmlfileName,xliffExt);
+
+ if (writtenFilename) {
+ uprv_strncpy(writtenFilename, xmlfileName, writtenFilenameLen);
+ }
+
+ if (U_FAILURE(*status)) {
+ goto cleanup_bundle_write_xml;
+ }
+
+ out= T_FileStream_open(xmlfileName,"w");
+
+ if(out==nullptr){
+ *status = U_FILE_ACCESS_ERROR;
+ goto cleanup_bundle_write_xml;
+ }
+ write_utf8_file(out, UnicodeString(xmlHeader));
+
+ if(outputEnc && *outputEnc!='\0'){
+ /* store the output encoding */
+ enc = outputEnc;
+ conv=ucnv_open(enc,status);
+ if(U_FAILURE(*status)){
+ goto cleanup_bundle_write_xml;
+ }
+ }
+ write_utf8_file(out, UnicodeString(bundleStart));
+ write_tabs(out);
+ write_utf8_file(out, UnicodeString(fileStart));
+ /* check if lang and language are the same */
+ if(language != nullptr && uprv_strcmp(lang, srBundle->fLocale)!=0){
+ fprintf(stderr,"Warning: The top level tag in the resource and language specified are not the same. Please check the input.\n");
+ }
+ write_utf8_file(out, UnicodeString(lang));
+ write_utf8_file(out, UnicodeString(file1));
+ write_utf8_file(out, UnicodeString(file2));
+ write_utf8_file(out, UnicodeString(originalFileName));
+ write_utf8_file(out, UnicodeString(file4));
+
+ time(&currTime);
+ strftime(timeBuf, sizeof(timeBuf), "%Y-%m-%dT%H:%M:%SZ", gmtime(&currTime));
+ write_utf8_file(out, UnicodeString(timeBuf));
+ write_utf8_file(out, UnicodeString("\">\n"));
+
+ tabCount += 1;
+ write_tabs(out);
+ write_utf8_file(out, UnicodeString(headerStart));
+
+ tabCount += 1;
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(tool_start));
+ printAttribute("tool-id", tool_id, (int32_t) uprv_strlen(tool_id));
+ printAttribute("tool-name", tool_name, (int32_t) uprv_strlen(tool_name));
+ write_utf8_file(out, UnicodeString("/>\n"));
+
+ tabCount -= 1;
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(headerEnd));
+
+ write_tabs(out);
+ tabCount += 1;
+
+ write_utf8_file(out, UnicodeString(bodyStart));
+
+
+ res_write_xml(bundle->fRoot, bundle->fLocale, lang, true, status);
+
+ tabCount -= 1;
+ write_tabs(out);
+
+ write_utf8_file(out, UnicodeString(bodyEnd));
+ tabCount--;
+ write_tabs(out);
+ write_utf8_file(out, UnicodeString(fileEnd));
+ tabCount--;
+ write_tabs(out);
+ write_utf8_file(out, UnicodeString(bundleEnd));
+ T_FileStream_close(out);
+
+ ucnv_close(conv);
+
+cleanup_bundle_write_xml:
+ uprv_free(originalFileName);
+ uprv_free(lang);
+ if(xmlfileName != nullptr) {
+ uprv_free(xmlfileName);
+ }
+ if(outputFileName != nullptr){
+ uprv_free(outputFileName);
+ }
+}