From 36d22d82aa202bb199967e9512281e9a53db42c9 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sun, 7 Apr 2024 21:33:14 +0200
Subject: Adding upstream version 115.7.0esr.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 intl/icu/source/tools/toolutil/BUILD.bazel       |  126 ++
 intl/icu/source/tools/toolutil/Makefile.in       |  155 +++
 intl/icu/source/tools/toolutil/collationinfo.cpp |  152 +++
 intl/icu/source/tools/toolutil/collationinfo.h   |   42 +
 intl/icu/source/tools/toolutil/dbgutil.cpp       |  160 +++
 intl/icu/source/tools/toolutil/dbgutil.h         |   45 +
 intl/icu/source/tools/toolutil/denseranges.cpp   |  160 +++
 intl/icu/source/tools/toolutil/denseranges.h     |   41 +
 intl/icu/source/tools/toolutil/filestrm.cpp      |  227 ++++
 intl/icu/source/tools/toolutil/filestrm.h        |  106 ++
 intl/icu/source/tools/toolutil/filetools.cpp     |  140 +++
 intl/icu/source/tools/toolutil/filetools.h       |   34 +
 intl/icu/source/tools/toolutil/flagparser.cpp    |  180 +++
 intl/icu/source/tools/toolutil/flagparser.h      |   32 +
 intl/icu/source/tools/toolutil/package.cpp       | 1311 ++++++++++++++++++++
 intl/icu/source/tools/toolutil/package.h         |  203 ++++
 intl/icu/source/tools/toolutil/pkg_genc.cpp      | 1396 ++++++++++++++++++++++
 intl/icu/source/tools/toolutil/pkg_genc.h        |  107 ++
 intl/icu/source/tools/toolutil/pkg_gencmn.cpp    |  578 +++++++++
 intl/icu/source/tools/toolutil/pkg_gencmn.h      |   18 +
 intl/icu/source/tools/toolutil/pkg_icu.cpp       |  176 +++
 intl/icu/source/tools/toolutil/pkg_icu.h         |   25 +
 intl/icu/source/tools/toolutil/pkg_imp.h         |   38 +
 intl/icu/source/tools/toolutil/pkgitems.cpp      |  645 ++++++++++
 intl/icu/source/tools/toolutil/ppucd.cpp         |  622 ++++++++++
 intl/icu/source/tools/toolutil/ppucd.h           |  180 +++
 intl/icu/source/tools/toolutil/sources.txt       |   24 +
 intl/icu/source/tools/toolutil/swapimpl.cpp      | 1048 ++++++++++++++++
 intl/icu/source/tools/toolutil/swapimpl.h        |   45 +
 intl/icu/source/tools/toolutil/toolutil.cpp      |  381 ++++++
 intl/icu/source/tools/toolutil/toolutil.h        |  201 ++++
 intl/icu/source/tools/toolutil/toolutil.vcxproj  |  272 +++++
 intl/icu/source/tools/toolutil/ucbuf.cpp         |  788 ++++++++++++
 intl/icu/source/tools/toolutil/ucbuf.h           |  218 ++++
 intl/icu/source/tools/toolutil/ucln_tu.cpp       |   19 +
 intl/icu/source/tools/toolutil/ucm.cpp           | 1195 ++++++++++++++++++
 intl/icu/source/tools/toolutil/ucm.h             |  302 +++++
 intl/icu/source/tools/toolutil/ucmstate.cpp      | 1053 ++++++++++++++++
 intl/icu/source/tools/toolutil/udbgutil.cpp      |  769 ++++++++++++
 intl/icu/source/tools/toolutil/udbgutil.h        |  147 +++
 intl/icu/source/tools/toolutil/unewdata.cpp      |  286 +++++
 intl/icu/source/tools/toolutil/unewdata.h        |  113 ++
 intl/icu/source/tools/toolutil/uoptions.cpp      |  133 +++
 intl/icu/source/tools/toolutil/uoptions.h        |  143 +++
 intl/icu/source/tools/toolutil/uparse.cpp        |  383 ++++++
 intl/icu/source/tools/toolutil/uparse.h          |  153 +++
 intl/icu/source/tools/toolutil/writesrc.cpp      |  515 ++++++++
 intl/icu/source/tools/toolutil/writesrc.h        |  198 +++
 intl/icu/source/tools/toolutil/xmlparser.cpp     |  827 +++++++++++++
 intl/icu/source/tools/toolutil/xmlparser.h       |  247 ++++
 50 files changed, 16359 insertions(+)
 create mode 100644 intl/icu/source/tools/toolutil/BUILD.bazel
 create mode 100644 intl/icu/source/tools/toolutil/Makefile.in
 create mode 100644 intl/icu/source/tools/toolutil/collationinfo.cpp
 create mode 100644 intl/icu/source/tools/toolutil/collationinfo.h
 create mode 100644 intl/icu/source/tools/toolutil/dbgutil.cpp
 create mode 100644 intl/icu/source/tools/toolutil/dbgutil.h
 create mode 100644 intl/icu/source/tools/toolutil/denseranges.cpp
 create mode 100644 intl/icu/source/tools/toolutil/denseranges.h
 create mode 100644 intl/icu/source/tools/toolutil/filestrm.cpp
 create mode 100644 intl/icu/source/tools/toolutil/filestrm.h
 create mode 100644 intl/icu/source/tools/toolutil/filetools.cpp
 create mode 100644 intl/icu/source/tools/toolutil/filetools.h
 create mode 100644 intl/icu/source/tools/toolutil/flagparser.cpp
 create mode 100644 intl/icu/source/tools/toolutil/flagparser.h
 create mode 100644 intl/icu/source/tools/toolutil/package.cpp
 create mode 100644 intl/icu/source/tools/toolutil/package.h
 create mode 100644 intl/icu/source/tools/toolutil/pkg_genc.cpp
 create mode 100644 intl/icu/source/tools/toolutil/pkg_genc.h
 create mode 100644 intl/icu/source/tools/toolutil/pkg_gencmn.cpp
 create mode 100644 intl/icu/source/tools/toolutil/pkg_gencmn.h
 create mode 100644 intl/icu/source/tools/toolutil/pkg_icu.cpp
 create mode 100644 intl/icu/source/tools/toolutil/pkg_icu.h
 create mode 100644 intl/icu/source/tools/toolutil/pkg_imp.h
 create mode 100644 intl/icu/source/tools/toolutil/pkgitems.cpp
 create mode 100644 intl/icu/source/tools/toolutil/ppucd.cpp
 create mode 100644 intl/icu/source/tools/toolutil/ppucd.h
 create mode 100644 intl/icu/source/tools/toolutil/sources.txt
 create mode 100644 intl/icu/source/tools/toolutil/swapimpl.cpp
 create mode 100644 intl/icu/source/tools/toolutil/swapimpl.h
 create mode 100644 intl/icu/source/tools/toolutil/toolutil.cpp
 create mode 100644 intl/icu/source/tools/toolutil/toolutil.h
 create mode 100644 intl/icu/source/tools/toolutil/toolutil.vcxproj
 create mode 100644 intl/icu/source/tools/toolutil/ucbuf.cpp
 create mode 100644 intl/icu/source/tools/toolutil/ucbuf.h
 create mode 100644 intl/icu/source/tools/toolutil/ucln_tu.cpp
 create mode 100644 intl/icu/source/tools/toolutil/ucm.cpp
 create mode 100644 intl/icu/source/tools/toolutil/ucm.h
 create mode 100644 intl/icu/source/tools/toolutil/ucmstate.cpp
 create mode 100644 intl/icu/source/tools/toolutil/udbgutil.cpp
 create mode 100644 intl/icu/source/tools/toolutil/udbgutil.h
 create mode 100644 intl/icu/source/tools/toolutil/unewdata.cpp
 create mode 100644 intl/icu/source/tools/toolutil/unewdata.h
 create mode 100644 intl/icu/source/tools/toolutil/uoptions.cpp
 create mode 100644 intl/icu/source/tools/toolutil/uoptions.h
 create mode 100644 intl/icu/source/tools/toolutil/uparse.cpp
 create mode 100644 intl/icu/source/tools/toolutil/uparse.h
 create mode 100644 intl/icu/source/tools/toolutil/writesrc.cpp
 create mode 100644 intl/icu/source/tools/toolutil/writesrc.h
 create mode 100644 intl/icu/source/tools/toolutil/xmlparser.cpp
 create mode 100644 intl/icu/source/tools/toolutil/xmlparser.h

(limited to 'intl/icu/source/tools/toolutil')

diff --git a/intl/icu/source/tools/toolutil/BUILD.bazel b/intl/icu/source/tools/toolutil/BUILD.bazel
new file mode 100644
index 0000000000..276c857f12
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/BUILD.bazel
@@ -0,0 +1,126 @@
+# © 2021 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+# This Bazel build file defines targets that are dependencies for building
+# the gennorm2 and genprops binaries.
+
+load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library")
+
+package(
+    default_visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "toolutil",
+    includes = ["."],
+    hdrs = ["toolutil.h"],
+    srcs = ["toolutil.cpp"],
+    local_defines = [
+        "U_TOOLUTIL_IMPLEMENTATION",
+    ],
+    deps = ["//icu4c/source/common:platform"],
+)
+
+cc_library(
+    name = "unewdata",
+    includes = ["."],
+    hdrs = ["unewdata.h"],
+    srcs = ["unewdata.cpp"],
+    local_defines = [
+        "U_TOOLUTIL_IMPLEMENTATION",
+    ],
+    deps = [
+        ":filestrm",
+        "//icu4c/source/common:platform",
+    ],
+)
+
+cc_library(
+    name = "uoptions",
+    includes = ["."],
+    hdrs = ["uoptions.h"],
+    srcs = ["uoptions.cpp"],
+    local_defines = [
+        "U_TOOLUTIL_IMPLEMENTATION",
+    ],
+    deps = ["//icu4c/source/common:platform"],
+)
+
+cc_library(
+    name = "writesrc",
+    includes = ["."],
+    hdrs = ["writesrc.h"],
+    srcs = ["writesrc.cpp"],
+    local_defines = [
+        "U_TOOLUTIL_IMPLEMENTATION",
+    ],
+    deps = [
+        "//icu4c/source/common:bytestream",
+        "//icu4c/source/common:platform",
+        "//icu4c/source/common:uniset_core",
+    ],
+)
+
+cc_library(
+    name = "uparse",
+    includes = ["."],
+    hdrs = ["uparse.h"],
+    srcs = ["uparse.cpp"],
+    local_defines = [
+        "U_TOOLUTIL_IMPLEMENTATION",
+    ],
+    deps = [
+        ":filestrm",
+        "//icu4c/source/common:platform",
+        ],
+)
+
+cc_library(
+    name = "filestrm",
+    includes = ["."],
+    hdrs = ["filestrm.h"],
+    srcs = ["filestrm.cpp"],
+    local_defines = [
+        "U_TOOLUTIL_IMPLEMENTATION",
+    ],
+    deps = ["//icu4c/source/common:platform"],
+)
+
+cc_library(
+    name = "ppucd",
+    includes = ["."],
+    hdrs = ["ppucd.h"],
+    srcs = ["ppucd.cpp"],
+    local_defines = [
+        "U_TOOLUTIL_IMPLEMENTATION",
+    ],
+    deps = [
+        ":uparse",
+        "//icu4c/source/common:platform",
+    ],
+)
+
+cc_library(
+    name = "denseranges",
+    includes = ["."],
+    hdrs = ["denseranges.h"],
+    srcs = ["denseranges.cpp"],
+    local_defines = [
+        "U_TOOLUTIL_IMPLEMENTATION",
+    ],
+    deps = ["//icu4c/source/common:platform"],
+)
+
+cc_library(
+    name = "collationinfo",
+    includes = ["."],
+    hdrs = ["collationinfo.h"],
+    srcs = ["collationinfo.cpp"],
+    local_defines = [
+        "U_TOOLUTIL_IMPLEMENTATION",
+    ],
+    deps = [
+        "//icu4c/source/common:platform",
+        "//icu4c/source/i18n:headers",
+    ],
+)
diff --git a/intl/icu/source/tools/toolutil/Makefile.in b/intl/icu/source/tools/toolutil/Makefile.in
new file mode 100644
index 0000000000..c9fd89b0f0
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/Makefile.in
@@ -0,0 +1,155 @@
+# Copyright (C) 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+#******************************************************************************
+#
+#   Copyright (C) 1999-2014, International Business Machines
+#   Corporation and others.  All Rights Reserved.
+#
+#******************************************************************************
+## Makefile.in for ICU - tools/toolutil
+## Steven R. Loomis
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../..
+
+## All the flags and other definitions are included here.
+include $(top_builddir)/icudefs.mk
+
+## Build directory information
+subdir = tools/toolutil
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(DEPS) $(IMPORT_LIB) $(MIDDLE_IMPORT_LIB) $(FINAL_IMPORT_LIB)
+
+## Target information
+
+TARGET_STUBNAME=$(TOOLUTIL_STUBNAME)
+
+ifneq ($(ENABLE_STATIC),)
+TARGET = $(LIBDIR)/$(LIBSICU)$(TARGET_STUBNAME)$(ICULIBSUFFIX).$(A)
+endif
+
+ifneq ($(ENABLE_SHARED),)
+SO_TARGET = $(LIBDIR)/$(LIBICU)$(TARGET_STUBNAME)$(ICULIBSUFFIX).$(SO)
+ALL_SO_TARGETS = $(SO_TARGET) $(MIDDLE_SO_TARGET) $(FINAL_SO_TARGET) $(SHARED_OBJECT)
+endif
+
+ALL_TARGETS = $(TARGET) $(ALL_SO_TARGETS)
+
+DYNAMICCPPFLAGS = $(SHAREDLIBCPPFLAGS)
+DYNAMICCFLAGS = $(SHAREDLIBCFLAGS)
+DYNAMICCXXFLAGS = $(SHAREDLIBCXXFLAGS)
+CFLAGS += $(LIBCFLAGS)
+CXXFLAGS += $(LIBCXXFLAGS)
+
+CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(top_srcdir)/i18n $(LIBCPPFLAGS)
+
+# from icuinfo
+CPPFLAGS+=  "-DU_BUILD=\"@build@\"" "-DU_HOST=\"@host@\"" "-DU_CC=\"@CC@\"" "-DU_CXX=\"@CXX@\""
+CPPFLAGS += -DUNISTR_FROM_CHAR_EXPLICIT=explicit -DUNISTR_FROM_STRING_EXPLICIT=explicit
+
+DEFS += -DU_TOOLUTIL_IMPLEMENTATION
+LDFLAGS += $(LDFLAGSICUTOOLUTIL)
+LIBS = $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS)
+
+SOURCES = $(shell cat $(srcdir)/sources.txt)
+OBJECTS = $(SOURCES:.cpp=.o)
+
+STATIC_OBJECTS = $(OBJECTS:.o=.$(STATIC_O))
+
+DEPS = $(OBJECTS:.o=.d)
+
+-include Makefile.local
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local	\
+distclean distclean-local install-library dist	\
+dist-local check check-local
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(ALL_TARGETS)
+
+install-local: install-library
+
+install-library: all-local
+	$(MKINSTALLDIRS) $(DESTDIR)$(libdir)
+ifneq ($(ENABLE_STATIC),)
+	$(INSTALL-L) $(TARGET) $(DESTDIR)$(libdir)
+endif
+ifneq ($(ENABLE_SHARED),)
+# For MinGW, do we want the DLL to go in the bin location?
+ifeq ($(MINGW_MOVEDLLSTOBINDIR),YES)
+	$(MKINSTALLDIRS) $(DESTDIR)$(bindir)
+	$(INSTALL-L) $(FINAL_SO_TARGET) $(DESTDIR)$(bindir)
+else
+	$(INSTALL-L) $(FINAL_SO_TARGET) $(DESTDIR)$(libdir)
+ifneq ($(FINAL_SO_TARGET),$(SO_TARGET))
+	cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(SO_TARGET))
+ifneq ($(FINAL_SO_TARGET),$(MIDDLE_SO_TARGET))
+	cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_SO_TARGET)) && ln -s $(notdir $(FINAL_SO_TARGET)) $(notdir $(MIDDLE_SO_TARGET))
+endif
+endif
+endif
+ifneq ($(IMPORT_LIB_EXT),)
+	$(INSTALL-L) $(FINAL_IMPORT_LIB) $(DESTDIR)$(libdir)
+ifneq ($(IMPORT_LIB),$(FINAL_IMPORT_LIB))
+	cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(IMPORT_LIB))
+endif
+ifneq ($(MIDDLE_IMPORT_LIB),$(FINAL_IMPORT_LIB))
+	cd $(DESTDIR)$(libdir) && $(RM) $(notdir $(MIDDLE_IMPORT_LIB)) && ln -s $(notdir $(FINAL_IMPORT_LIB)) $(notdir $(MIDDLE_IMPORT_LIB))
+endif
+endif
+endif
+
+dist-local:
+
+clean-local:
+	test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+	$(RMV) $(OBJECTS) $(STATIC_OBJECTS) $(ALL_TARGETS)
+
+distclean-local: clean-local
+	$(RMV) Makefile
+
+check-local: all-local
+
+Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) \
+	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+ifneq ($(ENABLE_STATIC),)
+$(TARGET): $(STATIC_OBJECTS)
+	$(AR) $(ARFLAGS) $(AR_OUTOPT)$@ $^
+	$(RANLIB) $@
+endif
+
+ifneq ($(ENABLE_SHARED),)
+$(SHARED_OBJECT): $(OBJECTS)
+	$(SHLIB.cc) $(LD_SONAME) $(OUTOPT)$@ $^ $(LIBS)
+ifeq ($(ENABLE_RPATH),YES)
+ifneq ($(wildcard $(libdir)/$(MIDDLE_SO_TARGET)),)
+	$(warning RPATH warning: --enable-rpath means test programs may use existing $(libdir)/$(MIDDLE_SO_TARGET))
+endif
+endif
+endif
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
+
diff --git a/intl/icu/source/tools/toolutil/collationinfo.cpp b/intl/icu/source/tools/toolutil/collationinfo.cpp
new file mode 100644
index 0000000000..6bad90e133
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/collationinfo.cpp
@@ -0,0 +1,152 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2013-2015, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* collationinfo.cpp
+*
+* created on: 2013aug05
+* created by: Markus W. Scherer
+*/
+
+#include <stdio.h>
+#include <string.h>
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "collationdata.h"
+#include "collationdatareader.h"
+#include "collationinfo.h"
+#include "uassert.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+void
+CollationInfo::printSizes(int32_t sizeWithHeader, const int32_t indexes[]) {
+    int32_t totalSize = indexes[CollationDataReader::IX_TOTAL_SIZE];
+    if(sizeWithHeader > totalSize) {
+        printf("  header size:                  %6ld\n", (long)(sizeWithHeader - totalSize));
+    }
+
+    int32_t length = indexes[CollationDataReader::IX_INDEXES_LENGTH];
+    printf("  indexes:          %6ld *4 = %6ld\n", (long)length, (long)length * 4);
+
+    length = getDataLength(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET);
+    if(length != 0) {
+        printf("  reorder codes:    %6ld *4 = %6ld\n", (long)length / 4, (long)length);
+    }
+
+    length = getDataLength(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET);
+    if(length != 0) {
+        U_ASSERT(length >= 256);
+        printf("  reorder table:                %6ld\n", (long)length);
+    }
+
+    length = getDataLength(indexes, CollationDataReader::IX_TRIE_OFFSET);
+    if(length != 0) {
+        printf("  trie size:                    %6ld\n", (long)length);
+    }
+
+    length = getDataLength(indexes, CollationDataReader::IX_RESERVED8_OFFSET);
+    if(length != 0) {
+        printf("  reserved (offset 8):          %6ld\n", (long)length);
+    }
+
+    length = getDataLength(indexes, CollationDataReader::IX_CES_OFFSET);
+    if(length != 0) {
+        printf("  CEs:              %6ld *8 = %6ld\n", (long)length / 8, (long)length);
+    }
+
+    length = getDataLength(indexes, CollationDataReader::IX_RESERVED10_OFFSET);
+    if(length != 0) {
+        printf("  reserved (offset 10):         %6ld\n", (long)length);
+    }
+
+    length = getDataLength(indexes, CollationDataReader::IX_CE32S_OFFSET);
+    if(length != 0) {
+        printf("  CE32s:            %6ld *4 = %6ld\n", (long)length / 4, (long)length);
+    }
+
+    length = getDataLength(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET);
+    if(length != 0) {
+        printf("  rootElements:     %6ld *4 = %6ld\n", (long)length / 4, (long)length);
+    }
+
+    length = getDataLength(indexes, CollationDataReader::IX_CONTEXTS_OFFSET);
+    if(length != 0) {
+        printf("  contexts:         %6ld *2 = %6ld\n", (long)length / 2, (long)length);
+    }
+
+    length = getDataLength(indexes, CollationDataReader::IX_UNSAFE_BWD_OFFSET);
+    if(length != 0) {
+        printf("  unsafeBwdSet:     %6ld *2 = %6ld\n", (long)length / 2, (long)length);
+    }
+
+    length = getDataLength(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET);
+    if(length != 0) {
+        printf("  fastLatin table:  %6ld *2 = %6ld\n", (long)length / 2, (long)length);
+    }
+
+    length = getDataLength(indexes, CollationDataReader::IX_SCRIPTS_OFFSET);
+    if(length != 0) {
+        printf("  scripts data:     %6ld *2 = %6ld\n", (long)length / 2, (long)length);
+    }
+
+    length = getDataLength(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET);
+    if(length != 0) {
+        U_ASSERT(length >= 256);
+        printf("  compressibleBytes:            %6ld\n", (long)length);
+    }
+
+    length = getDataLength(indexes, CollationDataReader::IX_RESERVED18_OFFSET);
+    if(length != 0) {
+        printf("  reserved (offset 18):         %6ld\n", (long)length);
+    }
+
+    printf(" collator binary total size:    %6ld\n", (long)sizeWithHeader);
+}
+
+int32_t
+CollationInfo::getDataLength(const int32_t indexes[], int32_t startIndex) {
+    return indexes[startIndex + 1] - indexes[startIndex];
+}
+
+void
+CollationInfo::printReorderRanges(const CollationData &data, const int32_t *codes, int32_t length) {
+    UErrorCode errorCode = U_ZERO_ERROR;
+    UVector32 ranges(errorCode);
+    data.makeReorderRanges(codes, length, ranges, errorCode);
+    if(U_FAILURE(errorCode)) {
+        printf("  error building reorder ranges: %s\n", u_errorName(errorCode));
+        return;
+    }
+
+    int32_t start = 0;
+    for(int32_t i = 0; i < ranges.size(); ++i) {
+        int32_t pair = ranges.elementAti(i);
+        int32_t limit = (pair >> 16) & 0xffff;
+        int16_t offset = (int16_t)pair;
+        if(offset == 0) {
+            // [inclusive-start, exclusive-limit[
+            printf("          [%04x, %04x[\n", start, limit);
+        } else if(offset > 0) {
+            printf("  reorder [%04x, %04x[ by offset  %02x to [%04x, %04x[\n",
+                    start, limit, offset,
+                    start + (offset << 8), limit + (offset << 8));
+        } else /* offset < 0 */ {
+            printf("  reorder [%04x, %04x[ by offset -%02x to [%04x, %04x[\n",
+                    start, limit, -offset,
+                    start + (offset << 8), limit + (offset << 8));
+        }
+        start = limit;
+    }
+}
+
+U_NAMESPACE_END
+
+#endif  // !UCONFIG_NO_COLLATION
diff --git a/intl/icu/source/tools/toolutil/collationinfo.h b/intl/icu/source/tools/toolutil/collationinfo.h
new file mode 100644
index 0000000000..815b89d40d
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/collationinfo.h
@@ -0,0 +1,42 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2013-2015, International Business Machines
+* Corporation and others.  All Rights Reserved.
+*******************************************************************************
+* collationinfo.h
+*
+* created on: 2013aug05
+* created by: Markus W. Scherer
+*/
+
+#ifndef __COLLATIONINFO_H__
+#define __COLLATIONINFO_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+U_NAMESPACE_BEGIN
+
+struct CollationData;
+
+/**
+ * Collation-related code for tools & demos.
+ */
+class U_TOOLUTIL_API CollationInfo /* all static */ {
+public:
+    static void printSizes(int32_t sizeWithHeader, const int32_t indexes[]);
+    static void printReorderRanges(const CollationData &data, const int32_t *codes, int32_t length);
+
+private:
+    CollationInfo();  // no constructor
+
+    static int32_t getDataLength(const int32_t indexes[], int32_t startIndex);
+};
+
+U_NAMESPACE_END
+
+#endif  // !UCONFIG_NO_COLLATION
+#endif  // __COLLATIONINFO_H__
diff --git a/intl/icu/source/tools/toolutil/dbgutil.cpp b/intl/icu/source/tools/toolutil/dbgutil.cpp
new file mode 100644
index 0000000000..d42b267f73
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/dbgutil.cpp
@@ -0,0 +1,160 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 2007-2012, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+
+#include "udbgutil.h"
+#include "dbgutil.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/unistr.h"
+#include "unicode/ustring.h"
+#include "util.h"
+#include "ucln.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+U_NAMESPACE_USE
+
+static UnicodeString **strs = nullptr;
+
+static const UnicodeString&  _fieldString(UDebugEnumType type, int32_t field, UnicodeString& fillin) {
+    const char *str = udbg_enumName(type, field);
+    if(str == nullptr) {
+        return fillin.remove();
+    } else {
+        return fillin = UnicodeString(str, -1, US_INV);
+    }
+}
+
+U_CDECL_BEGIN
+static void udbg_cleanup() {
+    if(strs != nullptr) {
+        for(int t=0;t<=UDBG_ENUM_COUNT;t++) {
+            delete [] strs[t];
+        }
+        delete[] strs;
+        strs = nullptr;
+    }
+}
+
+static UBool tu_cleanup()
+{
+    udbg_cleanup();
+    return true;
+}
+
+static void udbg_register_cleanup() {
+   ucln_registerCleanup(UCLN_TOOLUTIL, tu_cleanup);
+}
+U_CDECL_END
+
+static void udbg_setup() {
+    if(strs == nullptr) {
+        udbg_register_cleanup();
+        //fprintf(stderr,"Initializing string cache..\n");
+        //fflush(stderr);
+        UnicodeString **newStrs = new UnicodeString*[UDBG_ENUM_COUNT+1];
+        for(int t=0;t<UDBG_ENUM_COUNT;t++) {
+            int32_t c = udbg_enumCount((UDebugEnumType)t);
+            newStrs[t] = new UnicodeString[c+1];
+            for(int f=0;f<=c;f++) {
+                _fieldString((UDebugEnumType)t, f, newStrs[t][f]);
+            }
+        }
+        newStrs[UDBG_ENUM_COUNT] = new UnicodeString[1]; // empty string
+
+        strs = newStrs;
+    }
+}
+
+
+
+U_TOOLUTIL_API const UnicodeString& U_EXPORT2 udbg_enumString(UDebugEnumType type, int32_t field) {
+    if(strs == nullptr ) {
+        udbg_setup();
+    }
+    if(type<0||type>=UDBG_ENUM_COUNT) {
+        // use UDBG_ENUM_COUNT,0  to mean an empty string
+        //fprintf(stderr, "** returning out of range on %d\n",type);
+        //fflush(stderr);
+        return strs[UDBG_ENUM_COUNT][0];
+    }
+    int32_t count = udbg_enumCount(type);
+    //fprintf(stderr, "enumString [%d,%d]: typecount %d, fieldcount %d\n", type,field,UDBG_ENUM_COUNT,count);
+    //fflush(stderr);
+    if(field<0 || field > count) {
+        return strs[type][count];
+    } else {        return strs[type][field];
+    }
+}
+
+U_CAPI int32_t  U_EXPORT2 udbg_enumByString(UDebugEnumType type, const UnicodeString& string) {
+    if(type<0||type>=UDBG_ENUM_COUNT) {
+        return -1;
+    }
+    // initialize array
+    udbg_enumString(type,0);
+    // search
+   /// printf("type=%d\n", type); fflush(stdout);
+    for(int i=0;i<udbg_enumCount(type);i++) {
+//    printf("i=%d/%d\n", i, udbg_enumCount(type)); fflush(stdout);
+        if(string == (strs[type][i])) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+// from DataMap::utoi
+U_CAPI int32_t 
+udbg_stoi(const UnicodeString &s)
+{
+    char ch[256];
+    const char16_t *u = toUCharPtr(s.getBuffer());
+    int32_t len = s.length();
+    u_UCharsToChars(u, ch, len);
+    ch[len] = 0; /* include terminating \0 */
+    return atoi(ch);
+}
+
+
+U_CAPI double 
+udbg_stod(const UnicodeString &s)
+{
+    char ch[256];
+    const char16_t *u = toUCharPtr(s.getBuffer());
+    int32_t len = s.length();
+    u_UCharsToChars(u, ch, len);
+    ch[len] = 0; /* include terminating \0 */
+    return atof(ch);
+}
+
+U_CAPI UnicodeString *
+udbg_escape(const UnicodeString &src, UnicodeString *dst)
+{
+    dst->remove();
+    for (int32_t i = 0; i < src.length(); ++i) {
+        char16_t c = src[i];
+        if(ICU_Utility::isUnprintable(c)) {
+            *dst += UnicodeString("[");
+            ICU_Utility::escapeUnprintable(*dst, c);
+            *dst += UnicodeString("]");
+        }
+        else {
+            *dst += c;
+        }
+    }
+
+    return dst;
+}
+
+
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/dbgutil.h b/intl/icu/source/tools/toolutil/dbgutil.h
new file mode 100644
index 0000000000..43fe2171b4
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/dbgutil.h
@@ -0,0 +1,45 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+/*
+************************************************************************
+* Copyright (c) 2007-2012, International Business Machines
+* Corporation and others.  All Rights Reserved.
+************************************************************************
+*/
+
+/** C++ Utilities to aid in debugging **/
+
+#ifndef _DBGUTIL_H
+#define _DBGUTIL_H
+
+#include "unicode/utypes.h"
+#include "udbgutil.h"
+#include "unicode/unistr.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+U_TOOLUTIL_API const icu::UnicodeString& U_EXPORT2
+udbg_enumString(UDebugEnumType type, int32_t field);
+
+/**
+ * @return enum offset, or UDBG_INVALID_ENUM on error
+ */ 
+U_CAPI int32_t U_EXPORT2
+udbg_enumByString(UDebugEnumType type, const icu::UnicodeString& string);
+
+/**
+ * Convert a UnicodeString (with ascii digits) into a number.
+ * @param s string
+ * @return numerical value, or 0 on error
+ */
+U_CAPI int32_t U_EXPORT2 udbg_stoi(const icu::UnicodeString &s);
+
+U_CAPI double U_EXPORT2 udbg_stod(const icu::UnicodeString &s);
+
+U_CAPI icu::UnicodeString * U_EXPORT2
+udbg_escape(const icu::UnicodeString &s, icu::UnicodeString *dst);
+
+#endif
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/denseranges.cpp b/intl/icu/source/tools/toolutil/denseranges.cpp
new file mode 100644
index 0000000000..f5e52b1bbb
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/denseranges.cpp
@@ -0,0 +1,160 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  denseranges.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010sep25
+*   created by: Markus W. Scherer
+*
+* Helper code for finding a small number of dense ranges.
+*/
+
+#include "unicode/utypes.h"
+#include "denseranges.h"
+
+// Definitions in the anonymous namespace are invisible outside this file.
+namespace {
+
+/**
+ * Collect up to 15 range gaps and sort them by ascending gap size.
+ */
+class LargestGaps {
+public:
+    LargestGaps(int32_t max) : maxLength(max<=kCapacity ? max : kCapacity), length(0) {}
+
+    void add(int32_t gapStart, int64_t gapLength) {
+        int32_t i=length;
+        while(i>0 && gapLength>gapLengths[i-1]) {
+            --i;
+        }
+        if(i<maxLength) {
+            // The new gap is now one of the maxLength largest.
+            // Insert the new gap, moving up smaller ones of the previous
+            // length largest.
+            int32_t j= length<maxLength ? length++ : maxLength-1;
+            while(j>i) {
+                gapStarts[j]=gapStarts[j-1];
+                gapLengths[j]=gapLengths[j-1];
+                --j;
+            }
+            gapStarts[i]=gapStart;
+            gapLengths[i]=gapLength;
+        }
+    }
+
+    void truncate(int32_t newLength) {
+        if(newLength<length) {
+            length=newLength;
+        }
+    }
+
+    int32_t count() const { return length; }
+    int32_t gapStart(int32_t i) const { return gapStarts[i]; }
+    int64_t gapLength(int32_t i) const { return gapLengths[i]; }
+
+    int32_t firstAfter(int32_t value) const {
+        if(length==0) {
+            return -1;
+        }
+        int32_t minValue=0;
+        int32_t minIndex=-1;
+        for(int32_t i=0; i<length; ++i) {
+            if(value<gapStarts[i] && (minIndex<0 || gapStarts[i]<minValue)) {
+                minValue=gapStarts[i];
+                minIndex=i;
+            }
+        }
+        return minIndex;
+    }
+
+private:
+    static const int32_t kCapacity=15;
+
+    int32_t maxLength;
+    int32_t length;
+    int32_t gapStarts[kCapacity];
+    int64_t gapLengths[kCapacity];
+};
+
+}  // namespace
+
+/**
+ * Does it make sense to write 1..capacity ranges?
+ * Returns 0 if not, otherwise the number of ranges.
+ * @param values Sorted array of signed-integer values.
+ * @param length Number of values.
+ * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
+ *                Should be 0x80..0x100, must be 1..0x100.
+ * @param ranges Output ranges array.
+ * @param capacity Maximum number of ranges.
+ * @return Minimum number of ranges (at most capacity) that have the desired density,
+ *         or 0 if that density cannot be achieved.
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_makeDenseRanges(const int32_t values[], int32_t length,
+                     int32_t density,
+                     int32_t ranges[][2], int32_t capacity) {
+    if(length<=2) {
+        return 0;
+    }
+    int32_t minValue=values[0];
+    int32_t maxValue=values[length-1];  // Assume minValue<=maxValue.
+    // Use int64_t variables for intermediate-value precision and to avoid
+    // signed-int32_t overflow of maxValue-minValue.
+    int64_t maxLength=(int64_t)maxValue-(int64_t)minValue+1;
+    if(length>=(density*maxLength)/0x100) {
+        // Use one range.
+        ranges[0][0]=minValue;
+        ranges[0][1]=maxValue;
+        return 1;
+    }
+    if(length<=4) {
+        return 0;
+    }
+    // See if we can split [minValue, maxValue] into 2..capacity ranges,
+    // divided by the 1..(capacity-1) largest gaps.
+    LargestGaps gaps(capacity-1);
+    int32_t i;
+    int32_t expectedValue=minValue;
+    for(i=1; i<length; ++i) {
+        ++expectedValue;
+        int32_t actualValue=values[i];
+        if(expectedValue!=actualValue) {
+            gaps.add(expectedValue, (int64_t)actualValue-(int64_t)expectedValue);
+            expectedValue=actualValue;
+        }
+    }
+    // We know gaps.count()>=1 because we have fewer values (length) than
+    // the length of the [minValue..maxValue] range (maxLength).
+    // (Otherwise we would have returned with the one range above.)
+    int32_t num;
+    for(i=0, num=2;; ++i, ++num) {
+        if(i>=gaps.count()) {
+            // The values are too sparse for capacity or fewer ranges
+            // of the requested density.
+            return 0;
+        }
+        maxLength-=gaps.gapLength(i);
+        if(length>num*2 && length>=(density*maxLength)/0x100) {
+            break;
+        }
+    }
+    // Use the num ranges with the num-1 largest gaps.
+    gaps.truncate(num-1);
+    ranges[0][0]=minValue;
+    for(i=0; i<=num-2; ++i) {
+        int32_t gapIndex=gaps.firstAfter(minValue);
+        int32_t gapStart=gaps.gapStart(gapIndex);
+        ranges[i][1]=gapStart-1;
+        ranges[i+1][0]=minValue=(int32_t)(gapStart+gaps.gapLength(gapIndex));
+    }
+    ranges[num-1][1]=maxValue;
+    return num;
+}
diff --git a/intl/icu/source/tools/toolutil/denseranges.h b/intl/icu/source/tools/toolutil/denseranges.h
new file mode 100644
index 0000000000..c489ca47d8
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/denseranges.h
@@ -0,0 +1,41 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*   Copyright (C) 2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  denseranges.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2010sep25
+*   created by: Markus W. Scherer
+*
+* Helper code for finding a small number of dense ranges.
+*/
+
+#ifndef __DENSERANGES_H__
+#define __DENSERANGES_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * Does it make sense to write 1..capacity ranges?
+ * Returns 0 if not, otherwise the number of ranges.
+ * @param values Sorted array of signed-integer values.
+ * @param length Number of values.
+ * @param density Minimum average range density, in 256th. (0x100=100%=perfectly dense.)
+ *                Should be 0x80..0x100, must be 1..0x100.
+ * @param ranges Output ranges array.
+ * @param capacity Maximum number of ranges.
+ * @return Minimum number of ranges (at most capacity) that have the desired density,
+ *         or 0 if that density cannot be achieved.
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_makeDenseRanges(const int32_t values[], int32_t length,
+                     int32_t density,
+                     int32_t ranges[][2], int32_t capacity);
+
+#endif  // __DENSERANGES_H__
diff --git a/intl/icu/source/tools/toolutil/filestrm.cpp b/intl/icu/source/tools/toolutil/filestrm.cpp
new file mode 100644
index 0000000000..9a2695197a
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/filestrm.cpp
@@ -0,0 +1,227 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File FILESTRM.C
+*
+* @author       Glenn Marcy
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   5/8/98      gm          Created
+*  03/02/99     stephen     Reordered params in ungetc to match stdio
+*                           Added wopen
+*   3/29/99     helena      Merged Stephen and Bertrand's changes.
+*
+******************************************************************************
+*/
+
+#include "filestrm.h"
+
+#include "cmemory.h"
+
+#include <stdio.h>
+
+U_CAPI FileStream* U_EXPORT2
+T_FileStream_open(const char* filename, const char* mode)
+{
+    if(filename != nullptr && *filename != 0 && mode != nullptr && *mode != 0) {
+        FILE *file = fopen(filename, mode);
+        return (FileStream*)file;
+    } else {
+        return nullptr;
+    }
+}
+
+/*
+U_CAPI FileStream* U_EXPORT2
+T_FileStream_wopen(const wchar_t* filename, const wchar_t* mode)
+{
+   // TBD: _wfopen is believed to be MS-specific? 
+#if U_PLATFORM_USES_ONLY_WIN32_API
+    FILE* result = _wfopen(filename, mode);
+    return (FileStream*)result;
+#else
+    size_t fnMbsSize, mdMbsSize;
+    char *fn, *md;
+    FILE *result;
+
+    // convert from wchar_t to char 
+    fnMbsSize = wcstombs(nullptr, filename, ((size_t)-1) >> 1);
+    fn = (char*)uprv_malloc(fnMbsSize+2);
+    wcstombs(fn, filename, fnMbsSize);
+    fn[fnMbsSize] = 0;
+
+    mdMbsSize = wcstombs(nullptr, mode, ((size_t)-1) >> 1);
+    md = (char*)uprv_malloc(mdMbsSize+2);
+    wcstombs(md, mode, mdMbsSize);
+    md[mdMbsSize] = 0;
+
+    result = fopen(fn, md);
+    uprv_free(fn);
+    uprv_free(md);
+    return (FileStream*)result;
+#endif
+}
+*/
+U_CAPI void U_EXPORT2
+T_FileStream_close(FileStream* fileStream)
+{
+    if (fileStream != 0)
+        fclose((FILE*)fileStream);
+}
+
+U_CAPI UBool U_EXPORT2
+T_FileStream_file_exists(const char* filename)
+{
+    FILE* temp = fopen(filename, "r");
+    if (temp) {
+        fclose(temp);
+        return true;
+    } else
+        return false;
+}
+
+/*static const int32_t kEOF;
+const int32_t FileStream::kEOF = EOF;*/
+
+/*
+U_CAPI FileStream*
+T_FileStream_tmpfile()
+{
+    FILE* file = tmpfile();
+    return (FileStream*)file;
+}
+*/
+
+U_CAPI int32_t U_EXPORT2
+T_FileStream_read(FileStream* fileStream, void* addr, int32_t len)
+{
+    return static_cast<int32_t>(fread(addr, 1, len, (FILE*)fileStream));
+}
+
+U_CAPI int32_t U_EXPORT2
+T_FileStream_write(FileStream* fileStream, const void* addr, int32_t len)
+{
+
+    return static_cast<int32_t>(fwrite(addr, 1, len, (FILE*)fileStream));
+}
+
+U_CAPI void U_EXPORT2
+T_FileStream_rewind(FileStream* fileStream)
+{
+    rewind((FILE*)fileStream);
+}
+
+U_CAPI int32_t U_EXPORT2
+T_FileStream_putc(FileStream* fileStream, int32_t ch)
+{
+    int32_t c = fputc(ch, (FILE*)fileStream);
+    return c;
+}
+
+U_CAPI int U_EXPORT2
+T_FileStream_getc(FileStream* fileStream)
+{
+    int c = fgetc((FILE*)fileStream);
+    return c;
+}
+
+U_CAPI int32_t U_EXPORT2
+T_FileStream_ungetc(int32_t ch, FileStream* fileStream)
+{
+
+    int32_t c = ungetc(ch, (FILE*)fileStream);
+    return c;
+}
+
+U_CAPI int32_t U_EXPORT2
+T_FileStream_peek(FileStream* fileStream)
+{
+    int32_t c = fgetc((FILE*)fileStream);
+    return ungetc(c, (FILE*)fileStream);
+}
+
+U_CAPI char* U_EXPORT2
+T_FileStream_readLine(FileStream* fileStream, char* buffer, int32_t length)
+{
+    return fgets(buffer, length, (FILE*)fileStream);
+}
+
+U_CAPI int32_t U_EXPORT2
+T_FileStream_writeLine(FileStream* fileStream, const char* buffer)
+{
+    return fputs(buffer, (FILE*)fileStream);
+}
+
+U_CAPI int32_t U_EXPORT2
+T_FileStream_size(FileStream* fileStream)
+{
+    int32_t savedPos = ftell((FILE*)fileStream);
+    int32_t size = 0;
+
+    /*Changes by Bertrand A. D. doesn't affect the current position
+    goes to the end of the file before ftell*/
+    fseek((FILE*)fileStream, 0, SEEK_END);
+    size = (int32_t)ftell((FILE*)fileStream);
+    fseek((FILE*)fileStream, savedPos, SEEK_SET);
+    return size;
+}
+
+U_CAPI int U_EXPORT2
+T_FileStream_eof(FileStream* fileStream)
+{
+    return feof((FILE*)fileStream);
+}
+
+/*
+ Warning 
+ This function may not work consistently on all platforms
+ (e.g. HP-UX, FreeBSD and MacOSX don't return an error when
+ putc is used on a file opened as readonly)
+*/
+U_CAPI int U_EXPORT2
+T_FileStream_error(FileStream* fileStream)
+{
+    return (fileStream == 0 || ferror((FILE*)fileStream));
+}
+
+/* This function doesn't work. */
+/* force the stream to set its error flag*/
+/*U_CAPI void U_EXPORT2
+T_FileStream_setError(FileStream* fileStream)
+{
+    fseek((FILE*)fileStream, 99999, SEEK_SET);
+}
+*/
+
+U_CAPI FileStream* U_EXPORT2
+T_FileStream_stdin()
+{
+    return (FileStream*)stdin;
+}
+
+U_CAPI FileStream* U_EXPORT2
+T_FileStream_stdout()
+{
+    return (FileStream*)stdout;
+}
+
+
+U_CAPI FileStream* U_EXPORT2
+T_FileStream_stderr()
+{
+    return (FileStream*)stderr;
+}
+
+U_CAPI UBool U_EXPORT2
+T_FileStream_remove(const char* fileName){
+    return (remove(fileName) == 0);
+}
diff --git a/intl/icu/source/tools/toolutil/filestrm.h b/intl/icu/source/tools/toolutil/filestrm.h
new file mode 100644
index 0000000000..86fac3063f
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/filestrm.h
@@ -0,0 +1,106 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+*   Copyright (C) 1997-2005, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+******************************************************************************
+*
+* File FILESTRM.H
+*
+* Contains FileStream interface
+*
+* @author       Glenn Marcy
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   5/8/98      gm          Created.
+*  03/02/99     stephen     Reordered params in ungetc to match stdio
+*                           Added wopen
+*
+******************************************************************************
+*/
+
+#ifndef FILESTRM_H
+#define FILESTRM_H
+
+#include "unicode/utypes.h"
+
+typedef struct _FileStream FileStream;
+
+U_CAPI FileStream* U_EXPORT2
+T_FileStream_open(const char* filename, const char* mode);
+
+/*
+U_CAPI FileStream* U_EXPORT2
+T_FileStream_wopen(const wchar_t* filename, const wchar_t* mode);
+*/
+U_CAPI void U_EXPORT2
+T_FileStream_close(FileStream* fileStream);
+
+U_CAPI UBool U_EXPORT2
+T_FileStream_file_exists(const char* filename);
+
+/*
+U_CAPI FileStream* U_EXPORT2
+T_FileStream_tmpfile(void);
+*/
+
+U_CAPI int32_t U_EXPORT2
+T_FileStream_read(FileStream* fileStream, void* addr, int32_t len);
+
+U_CAPI int32_t U_EXPORT2
+T_FileStream_write(FileStream* fileStream, const void* addr, int32_t len);
+
+U_CAPI void U_EXPORT2
+T_FileStream_rewind(FileStream* fileStream);
+
+/*Added by Bertrand A. D. */
+U_CAPI char * U_EXPORT2
+T_FileStream_readLine(FileStream* fileStream, char* buffer, int32_t length);
+
+U_CAPI int32_t U_EXPORT2
+T_FileStream_writeLine(FileStream* fileStream, const char* buffer);
+
+U_CAPI int32_t U_EXPORT2
+T_FileStream_putc(FileStream* fileStream, int32_t ch);
+
+U_CAPI int U_EXPORT2
+T_FileStream_getc(FileStream* fileStream);
+
+U_CAPI int32_t U_EXPORT2
+T_FileStream_ungetc(int32_t ch, FileStream *fileStream);
+
+U_CAPI int32_t U_EXPORT2
+T_FileStream_peek(FileStream* fileStream);
+
+U_CAPI int32_t U_EXPORT2
+T_FileStream_size(FileStream* fileStream);
+
+U_CAPI int U_EXPORT2
+T_FileStream_eof(FileStream* fileStream);
+
+U_CAPI int U_EXPORT2
+T_FileStream_error(FileStream* fileStream);
+
+/*
+U_CAPI void U_EXPORT2
+T_FileStream_setError(FileStream* fileStream);
+*/
+
+U_CAPI FileStream* U_EXPORT2
+T_FileStream_stdin(void);
+
+U_CAPI FileStream* U_EXPORT2
+T_FileStream_stdout(void);
+
+U_CAPI FileStream* U_EXPORT2
+T_FileStream_stderr(void);
+
+U_CAPI UBool U_EXPORT2
+T_FileStream_remove(const char* fileName);
+
+#endif /* _FILESTRM*/
diff --git a/intl/icu/source/tools/toolutil/filetools.cpp b/intl/icu/source/tools/toolutil/filetools.cpp
new file mode 100644
index 0000000000..994d8e31f0
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/filetools.cpp
@@ -0,0 +1,140 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/******************************************************************************
+ *   Copyright (C) 2009-2013, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *******************************************************************************
+ */
+
+#include "unicode/platform.h"
+#if U_PLATFORM == U_PF_MINGW
+// *cough* - for struct stat
+#ifdef __STRICT_ANSI__
+#undef __STRICT_ANSI__
+#endif
+#endif
+
+#include "filetools.h"
+#include "filestrm.h"
+#include "charstr.h"
+#include "cstring.h"
+#include "unicode/putil.h"
+#include "putilimp.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <string.h>
+
+#if U_HAVE_DIRENT_H
+#include <dirent.h>
+typedef struct dirent DIRENT;
+
+#define SKIP1 "."
+#define SKIP2 ".."
+#endif
+
+static int32_t whichFileModTimeIsLater(const char *file1, const char *file2);
+
+/*
+ * Goes through the given directory recursive to compare each file's modification time with that of the file given.
+ * Also can be given just one file to check against. Default value for isDir is false.
+ */
+U_CAPI UBool U_EXPORT2
+isFileModTimeLater(const char *filePath, const char *checkAgainst, UBool isDir) {
+    UBool isLatest = true;
+
+    if (filePath == nullptr || checkAgainst == nullptr) {
+        return false;
+    }
+
+    if (isDir == true) {
+#if U_HAVE_DIRENT_H
+        DIR *pDir = nullptr;
+        if ((pDir= opendir(checkAgainst)) != nullptr) {
+            DIR *subDirp = nullptr;
+            DIRENT *dirEntry = nullptr;
+
+            while ((dirEntry = readdir(pDir)) != nullptr) {
+                if (uprv_strcmp(dirEntry->d_name, SKIP1) != 0 && uprv_strcmp(dirEntry->d_name, SKIP2) != 0) {
+                    UErrorCode status = U_ZERO_ERROR;
+                    icu::CharString newpath(checkAgainst, -1, status);
+                    newpath.append(U_FILE_SEP_STRING, -1, status);
+                    newpath.append(dirEntry->d_name, -1, status);
+                    if (U_FAILURE(status)) {
+                        fprintf(stderr, "%s:%d: %s\n", __FILE__, __LINE__, u_errorName(status));
+                        return false;
+                    }
+
+                    if ((subDirp = opendir(newpath.data())) != nullptr) {
+                        /* If this new path is a directory, make a recursive call with the newpath. */
+                        closedir(subDirp);
+                        isLatest = isFileModTimeLater(filePath, newpath.data(), isDir);
+                        if (!isLatest) {
+                            break;
+                        }
+                    } else {
+                        int32_t latest = whichFileModTimeIsLater(filePath, newpath.data());
+                        if (latest < 0 || latest == 2) {
+                            isLatest = false;
+                            break;
+                        }
+                    }
+
+                }
+            }
+            closedir(pDir);
+        } else {
+            fprintf(stderr, "Unable to open directory: %s\n", checkAgainst);
+            return false;
+        }
+#endif
+    } else {
+        if (T_FileStream_file_exists(checkAgainst)) {
+            int32_t latest = whichFileModTimeIsLater(filePath, checkAgainst);
+            if (latest < 0 || latest == 2) {
+                isLatest = false;
+            }
+        } else {
+            isLatest = false;
+        }
+    }
+
+    return isLatest;
+}
+
+/* Compares the mod time of both files returning a number indicating which one is later. -1 if error ocurs. */
+static int32_t whichFileModTimeIsLater(const char *file1, const char *file2) {
+    int32_t result = 0;
+    struct stat stbuf1, stbuf2;
+
+    if (stat(file1, &stbuf1) == 0 && stat(file2, &stbuf2) == 0) {
+        time_t modtime1, modtime2;
+        double diff;
+
+        modtime1 = stbuf1.st_mtime;
+        modtime2 = stbuf2.st_mtime;
+
+        diff = difftime(modtime1, modtime2);
+        if (diff < 0.0) {
+            result = 2;
+        } else if (diff > 0.0) {
+            result = 1;
+        }
+
+    } else {
+        fprintf(stderr, "Unable to get stats from file: %s or %s\n", file1, file2);
+        result = -1;
+    }
+
+    return result;
+}
+
+/* Swap the file separater character given with the new one in the file path. */
+U_CAPI void U_EXPORT2
+swapFileSepChar(char *filePath, const char oldFileSepChar, const char newFileSepChar) {
+    for (int32_t i = 0, length = static_cast<int32_t>(uprv_strlen(filePath)); i < length; i++) {
+        filePath[i] = (filePath[i] == oldFileSepChar ) ? newFileSepChar : filePath[i];
+    }
+}
diff --git a/intl/icu/source/tools/toolutil/filetools.h b/intl/icu/source/tools/toolutil/filetools.h
new file mode 100644
index 0000000000..40a606a7d4
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/filetools.h
@@ -0,0 +1,34 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2009, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  filetools.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009jan09
+*   created by: Michael Ow
+*
+* Contains various functions to handle files.
+* Not suitable for production use. Not supported.
+* Not conformant. Not efficient.
+*/
+
+#ifndef __FILETOOLS_H__
+#define __FILETOOLS_H__
+
+#include "unicode/utypes.h"
+
+U_CAPI UBool U_EXPORT2
+isFileModTimeLater(const char *filePath, const char *checkAgainst, UBool isDir=false);
+
+U_CAPI void U_EXPORT2
+swapFileSepChar(char *filePath, const char oldFileSepChar, const char newFileSepChar);
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/flagparser.cpp b/intl/icu/source/tools/toolutil/flagparser.cpp
new file mode 100644
index 0000000000..8bbceb4f73
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/flagparser.cpp
@@ -0,0 +1,180 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/******************************************************************************
+ *   Copyright (C) 2009-2015, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *******************************************************************************
+ */
+
+#include "flagparser.h"
+#include "filestrm.h"
+#include "cstring.h"
+#include "cmemory.h"
+
+#define DEFAULT_BUFFER_SIZE 512
+
+static int32_t currentBufferSize = DEFAULT_BUFFER_SIZE;
+
+static int32_t extractFlag(char* buffer, int32_t bufferSize, char* flag, int32_t flagSize, const char ** flagNames, int32_t numOfFlags, UErrorCode *status);
+static int32_t getFlagOffset(const char *buffer, int32_t bufferSize);
+
+/*
+ * Opens the given fileName and reads in the information storing the data in flagBuffer.
+ */
+U_CAPI int32_t U_EXPORT2
+parseFlagsFile(const char *fileName, char **flagBuffer, int32_t flagBufferSize, const char ** flagNames, int32_t numOfFlags, UErrorCode *status) {
+    char* buffer = nullptr;
+    char* tmpFlagBuffer = nullptr;
+    UBool allocateMoreSpace = false;
+    int32_t idx, i;
+    int32_t result = 0;
+
+    FileStream *f = T_FileStream_open(fileName, "r");
+    if (f == nullptr) {
+        *status = U_FILE_ACCESS_ERROR;
+        goto parseFlagsFile_cleanup;
+    }
+
+    buffer = (char *)uprv_malloc(sizeof(char) * currentBufferSize);
+    tmpFlagBuffer = (char *)uprv_malloc(sizeof(char) * flagBufferSize);
+
+    if (buffer == nullptr || tmpFlagBuffer == nullptr) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        goto parseFlagsFile_cleanup;
+    }
+
+    do {
+        if (allocateMoreSpace) {
+            allocateMoreSpace = false;
+            currentBufferSize *= 2;
+            uprv_free(buffer);
+            buffer = (char *)uprv_malloc(sizeof(char) * currentBufferSize);
+            if (buffer == nullptr) {
+                *status = U_MEMORY_ALLOCATION_ERROR;
+                goto parseFlagsFile_cleanup;
+            }
+        }
+        for (i = 0; i < numOfFlags;) {
+            if (T_FileStream_readLine(f, buffer, currentBufferSize) == nullptr) {
+                /* End of file reached. */
+                break;
+            }
+            if (buffer[0] == '#') {
+                continue;
+            }
+
+            if ((int32_t)uprv_strlen(buffer) == (currentBufferSize - 1) && buffer[currentBufferSize-2] != '\n') {
+                /* Allocate more space for buffer if it did not read the entire line */
+                allocateMoreSpace = true;
+                T_FileStream_rewind(f);
+                break;
+            } else {
+                idx = extractFlag(buffer, currentBufferSize, tmpFlagBuffer, flagBufferSize, flagNames, numOfFlags, status);
+                if (U_FAILURE(*status)) {
+                    if (*status == U_BUFFER_OVERFLOW_ERROR) {
+                        result = currentBufferSize;
+                    } else {
+                        result = -1;
+                    }
+                    break;
+                } else {
+                    if (flagNames != nullptr) {
+                        if (idx >= 0) {
+                            uprv_strcpy(flagBuffer[idx], tmpFlagBuffer);
+                        } else {
+                            /* No match found.  Skip it. */
+                            continue;
+                        }
+                    } else {
+                        uprv_strcpy(flagBuffer[i++], tmpFlagBuffer);
+                    }
+                }
+            }
+        }
+    } while (allocateMoreSpace && U_SUCCESS(*status));
+
+parseFlagsFile_cleanup:
+    uprv_free(tmpFlagBuffer);
+    uprv_free(buffer);
+
+    T_FileStream_close(f);
+    
+    if (U_FAILURE(*status) && *status != U_BUFFER_OVERFLOW_ERROR) {
+        return -1;
+    }
+
+    if (U_SUCCESS(*status) && result == 0) {
+        currentBufferSize = DEFAULT_BUFFER_SIZE;
+    }
+
+    return result;
+}
+
+
+/*
+ * Extract the setting after the '=' and store it in flag excluding the newline character.
+ */
+static int32_t extractFlag(char* buffer, int32_t bufferSize, char* flag, int32_t flagSize, const char **flagNames, int32_t numOfFlags, UErrorCode *status) {
+    int32_t i, idx = -1;
+    char *pBuffer;
+    int32_t offset=0;
+    UBool bufferWritten = false;
+
+    if (buffer[0] != 0) {
+        /* Get the offset (i.e. position after the '=') */
+        offset = getFlagOffset(buffer, bufferSize);
+        pBuffer = buffer+offset;
+        for(i = 0;;i++) {
+            if (i >= flagSize) {
+                *status = U_BUFFER_OVERFLOW_ERROR;
+                return -1;
+            }
+            if (pBuffer[i+1] == 0) {
+                /* Indicates a new line character. End here. */
+                flag[i] = 0;
+                break;
+            }
+
+            flag[i] = pBuffer[i];
+            if (i == 0) {
+                bufferWritten = true;
+            }
+        }
+    }
+
+    if (!bufferWritten) {
+        flag[0] = 0;
+    }
+
+    if (flagNames != nullptr && offset>0) {
+        offset--;  /* Move offset back 1 because of '='*/
+        for (i = 0; i < numOfFlags; i++) {
+            if (uprv_strncmp(buffer, flagNames[i], offset) == 0) {
+                idx = i;
+                break;
+            }
+        }
+    }
+
+    return idx;
+}
+
+/*
+ * Get the position after the '=' character.
+ */
+static int32_t getFlagOffset(const char *buffer, int32_t bufferSize) {
+    int32_t offset = 0;
+
+    for (offset = 0; offset < bufferSize;offset++) {
+        if (buffer[offset] == '=') {
+            offset++;
+            break;
+        }
+    }
+
+    if (offset == bufferSize || (offset - 1) == bufferSize) {
+        offset = 0;
+    }
+
+    return offset;
+}
diff --git a/intl/icu/source/tools/toolutil/flagparser.h b/intl/icu/source/tools/toolutil/flagparser.h
new file mode 100644
index 0000000000..aa42547164
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/flagparser.h
@@ -0,0 +1,32 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2009-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  flagparser.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2009jan08
+*   created by: Michael Ow
+*
+* Tiny flag file parser using ICU and intended for use in ICU tests and in build tools.
+* Not suitable for production use. Not supported.
+* Not conformant. Not efficient.
+* But very small.
+*/
+
+#ifndef __FLAGPARSER_H__
+#define __FLAGPARSER_H__
+
+#include "unicode/utypes.h"
+
+U_CAPI int32_t U_EXPORT2
+parseFlagsFile(const char *fileName, char **flagBuffer, int32_t flagBufferSize, const char ** flagNames, int32_t numOfFlags, UErrorCode *status);
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/package.cpp b/intl/icu/source/tools/toolutil/package.cpp
new file mode 100644
index 0000000000..3098f5d57d
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/package.cpp
@@ -0,0 +1,1311 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2015, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  package.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2005aug25
+*   created by: Markus W. Scherer
+*
+*   Read, modify, and write ICU .dat data package files.
+*   This is an integral part of the icupkg tool, moved to the toolutil library
+*   because parts of tool implementations tend to be later shared by
+*   other tools.
+*   Subsumes functionality and implementation code from
+*   gencmn, decmn, and icuswap tools.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/udata.h"
+#include "cstring.h"
+#include "uarrsort.h"
+#include "ucmndata.h"
+#include "udataswp.h"
+#include "swapimpl.h"
+#include "toolutil.h"
+#include "package.h"
+#include "cmemory.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */
+
+// general definitions ----------------------------------------------------- ***
+
+/* UDataInfo cf. udata.h */
+static const UDataInfo dataInfo={
+    (uint16_t)sizeof(UDataInfo),
+    0,
+
+    U_IS_BIG_ENDIAN,
+    U_CHARSET_FAMILY,
+    (uint8_t)sizeof(char16_t),
+    0,
+
+    {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
+    {1, 0, 0, 0},                 /* formatVersion */
+    {3, 0, 0, 0}                  /* dataVersion */
+};
+
+U_CDECL_BEGIN
+static void U_CALLCONV
+printPackageError(void *context, const char *fmt, va_list args) {
+    vfprintf((FILE *)context, fmt, args);
+}
+U_CDECL_END
+
+static uint16_t
+readSwapUInt16(uint16_t x) {
+    return (uint16_t)((x<<8)|(x>>8));
+}
+
+// platform types ---------------------------------------------------------- ***
+
+static const char *types="lb?e";
+
+enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT };
+
+static inline int32_t
+makeTypeEnum(uint8_t charset, UBool isBigEndian) {
+    return 2*(int32_t)charset+isBigEndian;
+}
+
+static inline int32_t
+makeTypeEnum(char type) {
+    return
+        type == 'l' ? TYPE_L :
+        type == 'b' ? TYPE_B :
+        type == 'e' ? TYPE_E :
+               -1;
+}
+
+static inline char
+makeTypeLetter(uint8_t charset, UBool isBigEndian) {
+    return types[makeTypeEnum(charset, isBigEndian)];
+}
+
+static inline char
+makeTypeLetter(int32_t typeEnum) {
+    return types[typeEnum];
+}
+
+static void
+makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) {
+    int32_t typeEnum=makeTypeEnum(type);
+    charset=(uint8_t)(typeEnum>>1);
+    isBigEndian=(UBool)(typeEnum&1);
+}
+
+U_CFUNC const UDataInfo *
+getDataInfo(const uint8_t *data, int32_t length,
+            int32_t &infoLength, int32_t &headerLength,
+            UErrorCode *pErrorCode) {
+    const DataHeader *pHeader;
+    const UDataInfo *pInfo;
+
+    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
+        return nullptr;
+    }
+    if( data==nullptr ||
+        (length>=0 && length<(int32_t)sizeof(DataHeader))
+    ) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+
+    pHeader=(const DataHeader *)data;
+    pInfo=&pHeader->info;
+    if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
+        pHeader->dataHeader.magic1!=0xda ||
+        pHeader->dataHeader.magic2!=0x27 ||
+        pInfo->sizeofUChar!=2
+    ) {
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return nullptr;
+    }
+
+    if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) {
+        headerLength=pHeader->dataHeader.headerSize;
+        infoLength=pInfo->size;
+    } else {
+        headerLength=readSwapUInt16(pHeader->dataHeader.headerSize);
+        infoLength=readSwapUInt16(pInfo->size);
+    }
+
+    if( headerLength<(int32_t)sizeof(DataHeader) ||
+        infoLength<(int32_t)sizeof(UDataInfo) ||
+        headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) ||
+        (length>=0 && length<headerLength)
+    ) {
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return nullptr;
+    }
+
+    return pInfo;
+}
+
+static int32_t
+getTypeEnumForInputData(const uint8_t *data, int32_t length,
+                        UErrorCode *pErrorCode) {
+    const UDataInfo *pInfo;
+    int32_t infoLength, headerLength;
+
+    /* getDataInfo() checks for illegal arguments */
+    pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode);
+    if(pInfo==nullptr) {
+        return -1;
+    }
+
+    return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian);
+}
+
+// file handling ----------------------------------------------------------- ***
+
+static void
+extractPackageName(const char *filename,
+                   char pkg[], int32_t capacity) {
+    const char *basename;
+    int32_t len;
+
+    basename=findBasename(filename);
+    len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */
+
+    if(len<=0 || 0!=strcmp(basename+len, ".dat")) {
+        fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n",
+                         basename);
+        exit(U_ILLEGAL_ARGUMENT_ERROR);
+    }
+
+    if(len>=capacity) {
+        fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n",
+                         basename, (long)capacity);
+        exit(U_ILLEGAL_ARGUMENT_ERROR);
+    }
+
+    memcpy(pkg, basename, len);
+    pkg[len]=0;
+}
+
+static int32_t
+getFileLength(FILE *f) {
+    int32_t length;
+
+    fseek(f, 0, SEEK_END);
+    length=(int32_t)ftell(f);
+    fseek(f, 0, SEEK_SET);
+    return length;
+}
+
+/*
+ * Turn tree separators and alternate file separators into normal file separators.
+ */
+#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
+#define treeToPath(s)
+#else
+static void
+treeToPath(char *s) {
+    char *t;
+
+    for(t=s; *t!=0; ++t) {
+        if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
+            *t=U_FILE_SEP_CHAR;
+        }
+    }
+}
+#endif
+
+/*
+ * Turn file separators into tree separators.
+ */
+#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
+#define pathToTree(s)
+#else
+static void
+pathToTree(char *s) {
+    char *t;
+
+    for(t=s; *t!=0; ++t) {
+        if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
+            *t=U_TREE_ENTRY_SEP_CHAR;
+        }
+    }
+}
+#endif
+
+/*
+ * Prepend the path (if any) to the name and run the name through treeToName().
+ */
+static void
+makeFullFilename(const char *path, const char *name,
+                 char *filename, int32_t capacity) {
+    char *s;
+
+    // prepend the path unless nullptr or empty
+    if(path!=nullptr && path[0]!=0) {
+        if((int32_t)(strlen(path)+1)>=capacity) {
+            fprintf(stderr, "pathname too long: \"%s\"\n", path);
+            exit(U_BUFFER_OVERFLOW_ERROR);
+        }
+        strcpy(filename, path);
+
+        // make sure the path ends with a file separator
+        s=strchr(filename, 0);
+        if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) {
+            *s++=U_FILE_SEP_CHAR;
+        }
+    } else {
+        s=filename;
+    }
+
+    // turn the name into a filename, turn tree separators into file separators
+    if((int32_t)((s-filename)+strlen(name))>=capacity) {
+        fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name);
+        exit(U_BUFFER_OVERFLOW_ERROR);
+    }
+    strcpy(s, name);
+    treeToPath(s);
+}
+
+static void
+makeFullFilenameAndDirs(const char *path, const char *name,
+                        char *filename, int32_t capacity) {
+    char *sep;
+    UErrorCode errorCode;
+
+    makeFullFilename(path, name, filename, capacity);
+
+    // make tree directories
+    errorCode=U_ZERO_ERROR;
+    sep=strchr(filename, 0)-strlen(name);
+    while((sep=strchr(sep, U_FILE_SEP_CHAR))!=nullptr) {
+        if(sep!=filename) {
+            *sep=0;                 // truncate temporarily
+            uprv_mkdir(filename, &errorCode);
+            if(U_FAILURE(errorCode)) {
+                fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename);
+                exit(U_FILE_ACCESS_ERROR);
+            }
+        }
+        *sep++=U_FILE_SEP_CHAR; // restore file separator character
+    }
+}
+
+static uint8_t *
+readFile(const char *path, const char *name, int32_t &length, char &type) {
+    char filename[1024];
+    FILE *file;
+    UErrorCode errorCode;
+    int32_t fileLength, typeEnum;
+
+    makeFullFilename(path, name, filename, (int32_t)sizeof(filename));
+
+    /* open the input file, get its length, allocate memory for it, read the file */
+    file=fopen(filename, "rb");
+    if(file==nullptr) {
+        fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    /* get the file length */
+    fileLength=getFileLength(file);
+    if(ferror(file) || fileLength<=0) {
+        fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename);
+        fclose(file);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    /* allocate the buffer, pad to multiple of 16 */
+    length=(fileLength+0xf)&~0xf;
+    icu::LocalMemory<uint8_t> data((uint8_t *)uprv_malloc(length));
+    if(data.isNull()) {
+        fclose(file);
+        fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length);
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+
+    /* read the file */
+    if(fileLength!=(int32_t)fread(data.getAlias(), 1, fileLength, file)) {
+        fprintf(stderr, "icupkg: error reading \"%s\"\n", filename);
+        fclose(file);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    /* pad the file to a multiple of 16 using the usual padding byte */
+    if(fileLength<length) {
+        memset(data.getAlias()+fileLength, 0xaa, length-fileLength);
+    }
+
+    fclose(file);
+
+    // minimum check for ICU-format data
+    errorCode=U_ZERO_ERROR;
+    typeEnum=getTypeEnumForInputData(data.getAlias(), length, &errorCode);
+    if(typeEnum<0 || U_FAILURE(errorCode)) {
+        fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename);
+#if !UCONFIG_NO_LEGACY_CONVERSION
+        exit(U_INVALID_FORMAT_ERROR);
+#else
+        fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n");
+        exit(0);
+#endif
+    }
+    type=makeTypeLetter(typeEnum);
+
+    return data.orphan();
+}
+
+// .dat package file representation ---------------------------------------- ***
+
+U_CDECL_BEGIN
+
+static int32_t U_CALLCONV
+compareItems(const void * /*context*/, const void *left, const void *right) {
+    U_NAMESPACE_USE
+
+    return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name);
+}
+
+U_CDECL_END
+
+U_NAMESPACE_BEGIN
+
+Package::Package()
+        : doAutoPrefix(false), prefixEndsWithType(false) {
+    inPkgName[0]=0;
+    pkgPrefix[0]=0;
+    inData=nullptr;
+    inLength=0;
+    inCharset=U_CHARSET_FAMILY;
+    inIsBigEndian=U_IS_BIG_ENDIAN;
+
+    itemCount=0;
+    itemMax=0;
+    items=nullptr;
+
+    inStringTop=outStringTop=0;
+
+    matchMode=0;
+    findPrefix=findSuffix=nullptr;
+    findPrefixLength=findSuffixLength=0;
+    findNextIndex=-1;
+
+    // create a header for an empty package
+    DataHeader *pHeader;
+    pHeader=(DataHeader *)header;
+    pHeader->dataHeader.magic1=0xda;
+    pHeader->dataHeader.magic2=0x27;
+    memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo));
+    headerLength=(int32_t)(4+sizeof(dataInfo));
+    if(headerLength&0xf) {
+        /* NUL-pad the header to a multiple of 16 */
+        int32_t length=(headerLength+0xf)&~0xf;
+        memset(header+headerLength, 0, length-headerLength);
+        headerLength=length;
+    }
+    pHeader->dataHeader.headerSize=(uint16_t)headerLength;
+}
+
+Package::~Package() {
+    int32_t idx;
+
+    uprv_free(inData);
+
+    for(idx=0; idx<itemCount; ++idx) {
+        if(items[idx].isDataOwned) {
+            uprv_free(items[idx].data);
+        }
+    }
+
+    uprv_free((void*)items);
+}
+
+void
+Package::setPrefix(const char *p) {
+    if(strlen(p)>=sizeof(pkgPrefix)) {
+        fprintf(stderr, "icupkg: --toc_prefix %s too long\n", p);
+        exit(U_ILLEGAL_ARGUMENT_ERROR);
+    }
+    strcpy(pkgPrefix, p);
+}
+
+void
+Package::readPackage(const char *filename) {
+    UDataSwapper *ds;
+    const UDataInfo *pInfo;
+    UErrorCode errorCode;
+
+    const uint8_t *inBytes;
+
+    int32_t length, offset, i;
+    int32_t itemLength, typeEnum;
+    char type;
+
+    const UDataOffsetTOCEntry *inEntries;
+
+    extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName));
+
+    /* read the file */
+    inData=readFile(nullptr, filename, inLength, type);
+    length=inLength;
+
+    /*
+     * swap the header - even if the swapping itself is a no-op
+     * because it tells us the header length
+     */
+    errorCode=U_ZERO_ERROR;
+    makeTypeProps(type, inCharset, inIsBigEndian);
+    ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
+                filename, u_errorName(errorCode));
+        exit(errorCode);
+    }
+
+    ds->printError=printPackageError;
+    ds->printErrorContext=stderr;
+
+    headerLength=sizeof(header);
+    if(length<headerLength) {
+        headerLength=length;
+    }
+    headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        exit(errorCode);
+    }
+
+    /* check data format and format version */
+    pInfo=(const UDataInfo *)((const char *)inData+4);
+    if(!(
+        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
+        pInfo->dataFormat[1]==0x6d &&
+        pInfo->dataFormat[2]==0x6e &&
+        pInfo->dataFormat[3]==0x44 &&
+        pInfo->formatVersion[0]==1
+    )) {
+        fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
+                pInfo->dataFormat[0], pInfo->dataFormat[1],
+                pInfo->dataFormat[2], pInfo->dataFormat[3],
+                pInfo->formatVersion[0]);
+        exit(U_UNSUPPORTED_ERROR);
+    }
+    inIsBigEndian=(UBool)pInfo->isBigEndian;
+    inCharset=pInfo->charsetFamily;
+
+    inBytes=(const uint8_t *)inData+headerLength;
+    inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
+
+    /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
+    length-=headerLength;
+    if(length<4) {
+        /* itemCount does not fit */
+        offset=0x7fffffff;
+    } else {
+        itemCount=udata_readInt32(ds, *(const int32_t *)inBytes);
+        setItemCapacity(itemCount); /* resize so there's space */
+        if(itemCount==0) {
+            offset=4;
+        } else if(length<(4+8*itemCount)) {
+            /* ToC table does not fit */
+            offset=0x7fffffff;
+        } else {
+            /* offset of the last item plus at least 20 bytes for its header */
+            offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset);
+        }
+    }
+    if(length<offset) {
+        fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n",
+                        (long)length);
+        exit(U_INDEX_OUTOFBOUNDS_ERROR);
+    }
+    /* do not modify the package length variable until the last item's length is set */
+
+    if(itemCount<=0) {
+        if(doAutoPrefix) {
+            fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but the input package is empty\n");
+            exit(U_INVALID_FORMAT_ERROR);
+        }
+    } else {
+        char prefix[MAX_PKG_NAME_LENGTH+4];
+        char *s, *inItemStrings;
+
+        if(itemCount>itemMax) {
+            fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax);
+            exit(U_BUFFER_OVERFLOW_ERROR);
+        }
+
+        /* swap the item name strings */
+        int32_t stringsOffset=4+8*itemCount;
+        itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset;
+
+        // don't include padding bytes at the end of the item names
+        while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) {
+            --itemLength;
+        }
+
+        if((inStringTop+itemLength)>STRING_STORE_SIZE) {
+            fprintf(stderr, "icupkg: total length of item name strings too long\n");
+            exit(U_BUFFER_OVERFLOW_ERROR);
+        }
+
+        inItemStrings=inStrings+inStringTop;
+        ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n");
+            exit(U_INVALID_FORMAT_ERROR);
+        }
+        inStringTop+=itemLength;
+
+        // reset the Item entries
+        memset(items, 0, itemCount*sizeof(Item));
+
+        /*
+         * Get the common prefix of the items.
+         * New-style ICU .dat packages use tree separators ('/') between package names,
+         * tree names, and item names,
+         * while old-style ICU .dat packages (before multi-tree support)
+         * use an underscore ('_') between package and item names.
+         */
+        offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset;
+        s=inItemStrings+offset;  // name of the first entry
+        int32_t prefixLength;
+        if(doAutoPrefix) {
+            // Use the first entry's prefix. Must be a new-style package.
+            const char *prefixLimit=strchr(s, U_TREE_ENTRY_SEP_CHAR);
+            if(prefixLimit==nullptr) {
+                fprintf(stderr,
+                        "icupkg: --auto_toc_prefix[_with_type] but "
+                        "the first entry \"%s\" does not contain a '%c'\n",
+                        s, U_TREE_ENTRY_SEP_CHAR);
+                exit(U_INVALID_FORMAT_ERROR);
+            }
+            prefixLength=(int32_t)(prefixLimit-s);
+            if(prefixLength==0 || prefixLength>=UPRV_LENGTHOF(pkgPrefix)) {
+                fprintf(stderr,
+                        "icupkg: --auto_toc_prefix[_with_type] but "
+                        "the prefix of the first entry \"%s\" is empty or too long\n",
+                        s);
+                exit(U_INVALID_FORMAT_ERROR);
+            }
+            if(prefixEndsWithType && s[prefixLength-1]!=type) {
+                fprintf(stderr,
+                        "icupkg: --auto_toc_prefix_with_type but "
+                        "the prefix of the first entry \"%s\" does not end with '%c'\n",
+                        s, type);
+                exit(U_INVALID_FORMAT_ERROR);
+            }
+            memcpy(pkgPrefix, s, prefixLength);
+            pkgPrefix[prefixLength]=0;
+            memcpy(prefix, s, ++prefixLength);  // include the /
+        } else {
+            // Use the package basename as prefix.
+            int32_t inPkgNameLength= static_cast<int32_t>(strlen(inPkgName));
+            memcpy(prefix, inPkgName, inPkgNameLength);
+            prefixLength=inPkgNameLength;
+
+            if( (int32_t)strlen(s)>=(inPkgNameLength+2) &&
+                0==memcmp(s, inPkgName, inPkgNameLength) &&
+                s[inPkgNameLength]=='_'
+            ) {
+                // old-style .dat package
+                prefix[prefixLength++]='_';
+            } else {
+                // new-style .dat package
+                prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
+                // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
+                // then the test in the loop below will fail
+            }
+        }
+        prefix[prefixLength]=0;
+
+        /* read the ToC table */
+        for(i=0; i<itemCount; ++i) {
+            // skip the package part of the item name, error if it does not match the actual package name
+            // or if nothing follows the package name
+            offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset;
+            s=inItemStrings+offset;
+            if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) {
+                fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
+                        s, prefix);
+                exit(U_INVALID_FORMAT_ERROR);
+            }
+            items[i].name=s+prefixLength;
+
+            // set the item's data
+            items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset);
+            if(i>0) {
+                items[i-1].length=(int32_t)(items[i].data-items[i-1].data);
+
+                // set the previous item's platform type
+                typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode);
+                if(typeEnum<0 || U_FAILURE(errorCode)) {
+                    fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
+                    exit(U_INVALID_FORMAT_ERROR);
+                }
+                items[i-1].type=makeTypeLetter(typeEnum);
+            }
+            items[i].isDataOwned=false;
+        }
+        // set the last item's length
+        items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset);
+
+        // set the last item's platform type
+        typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode);
+        if(typeEnum<0 || U_FAILURE(errorCode)) {
+            fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[itemCount-1].name, filename);
+            exit(U_INVALID_FORMAT_ERROR);
+        }
+        items[itemCount-1].type=makeTypeLetter(typeEnum);
+
+        if(type!=U_ICUDATA_TYPE_LETTER[0]) {
+            // sort the item names for the local charset
+            sortItems();
+        }
+    }
+
+    udata_closeSwapper(ds);
+}
+
+char
+Package::getInType() {
+    return makeTypeLetter(inCharset, inIsBigEndian);
+}
+
+void
+Package::writePackage(const char *filename, char outType, const char *comment) {
+    char prefix[MAX_PKG_NAME_LENGTH+4];
+    UDataOffsetTOCEntry entry;
+    UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT];
+    FILE *file;
+    Item *pItem;
+    char *name;
+    UErrorCode errorCode;
+    int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32;
+    uint8_t outCharset;
+    UBool outIsBigEndian;
+
+    extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH);
+
+    // if there is an explicit comment, then use it, else use what's in the current header
+    if(comment!=nullptr) {
+        /* get the header size minus the current comment */
+        DataHeader *pHeader;
+        int32_t length;
+
+        pHeader=(DataHeader *)header;
+        headerLength=4+pHeader->info.size;
+        length=(int32_t)strlen(comment);
+        if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) {
+            fprintf(stderr, "icupkg: comment too long\n");
+            exit(U_BUFFER_OVERFLOW_ERROR);
+        }
+        memcpy(header+headerLength, comment, length+1);
+        headerLength+=length;
+        if(headerLength&0xf) {
+            /* NUL-pad the header to a multiple of 16 */
+            length=(headerLength+0xf)&~0xf;
+            memset(header+headerLength, 0, length-headerLength);
+            headerLength=length;
+        }
+        pHeader->dataHeader.headerSize=(uint16_t)headerLength;
+    }
+
+    makeTypeProps(outType, outCharset, outIsBigEndian);
+
+    // open (TYPE_COUNT-2) swappers
+    // one is a no-op for local type==outType
+    // one type (TYPE_LE) is bogus
+    errorCode=U_ZERO_ERROR;
+    i=makeTypeEnum(outType);
+    ds[TYPE_B]= i==TYPE_B ? nullptr : udata_openSwapper(true, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
+    ds[TYPE_L]= i==TYPE_L ? nullptr : udata_openSwapper(false, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
+    ds[TYPE_LE]=nullptr;
+    ds[TYPE_E]= i==TYPE_E ? nullptr : udata_openSwapper(true, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode));
+        exit(errorCode);
+    }
+    for(i=0; i<TYPE_COUNT; ++i) {
+        if(ds[i]!=nullptr) {
+            ds[i]->printError=printPackageError;
+            ds[i]->printErrorContext=stderr;
+        }
+    }
+
+    dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)];
+
+    // create the file and write its contents
+    file=fopen(filename, "wb");
+    if(file==nullptr) {
+        fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    // swap and write the header
+    if(dsLocalToOut!=nullptr) {
+        udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode));
+            exit(errorCode);
+        }
+    }
+    length=(int32_t)fwrite(header, 1, headerLength, file);
+    if(length!=headerLength) {
+        fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    // prepare and swap the package name with a tree separator
+    // for prepending to item names
+    if(pkgPrefix[0]==0) {
+        prefixLength=(int32_t)strlen(prefix);
+    } else {
+        prefixLength=(int32_t)strlen(pkgPrefix);
+        memcpy(prefix, pkgPrefix, prefixLength);
+        if(prefixEndsWithType) {
+            prefix[prefixLength-1]=outType;
+        }
+    }
+    prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
+    prefix[prefixLength]=0;
+    if(dsLocalToOut!=nullptr) {
+        dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode));
+            exit(errorCode);
+        }
+
+        // swap and sort the item names (sorting needs to be done in the output charset)
+        dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode));
+            exit(errorCode);
+        }
+        sortItems();
+    }
+
+    // create the output item names in sorted order, with the package name prepended to each
+    for(i=0; i<itemCount; ++i) {
+        length=(int32_t)strlen(items[i].name);
+        name=allocString(false, length+prefixLength);
+        memcpy(name, prefix, prefixLength);
+        memcpy(name+prefixLength, items[i].name, length+1);
+        items[i].name=name;
+    }
+
+    // calculate offsets for item names and items, pad to 16-align items
+    // align only the first item; each item's length is a multiple of 16
+    basenameOffset=4+8*itemCount;
+    offset=basenameOffset+outStringTop;
+    if((length=(offset&15))!=0) {
+        length=16-length;
+        memset(allocString(false, length-1), 0xaa, length);
+        offset+=length;
+    }
+
+    // write the table of contents
+    // first the itemCount
+    outInt32=itemCount;
+    if(dsLocalToOut!=nullptr) {
+        dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode));
+            exit(errorCode);
+        }
+    }
+    length=(int32_t)fwrite(&outInt32, 1, 4, file);
+    if(length!=4) {
+        fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    // then write the item entries (and collect the maxItemLength)
+    maxItemLength=0;
+    for(i=0; i<itemCount; ++i) {
+        entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings));
+        entry.dataOffset=(uint32_t)offset;
+        if(dsLocalToOut!=nullptr) {
+            dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode);
+            if(U_FAILURE(errorCode)) {
+                fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode));
+                exit(errorCode);
+            }
+        }
+        length=(int32_t)fwrite(&entry, 1, 8, file);
+        if(length!=8) {
+            fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename);
+            exit(U_FILE_ACCESS_ERROR);
+        }
+
+        length=items[i].length;
+        if(length>maxItemLength) {
+            maxItemLength=length;
+        }
+        offset+=length;
+    }
+
+    // write the item names
+    length=(int32_t)fwrite(outStrings, 1, outStringTop, file);
+    if(length!=outStringTop) {
+        fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    // write the items
+    for(pItem=items, i=0; i<itemCount; ++pItem, ++i) {
+        int32_t type=makeTypeEnum(pItem->type);
+        if(ds[type]!=nullptr) {
+            // swap each item from its platform properties to the desired ones
+            udata_swap(
+                ds[type],
+                pItem->data, pItem->length, pItem->data,
+                &errorCode);
+            if(U_FAILURE(errorCode)) {
+                fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode));
+                exit(errorCode);
+            }
+        }
+        length=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
+        if(length!=pItem->length) {
+            fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename);
+            exit(U_FILE_ACCESS_ERROR);
+        }
+    }
+
+    if(ferror(file)) {
+        fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    fclose(file);
+    for(i=0; i<TYPE_COUNT; ++i) {
+        udata_closeSwapper(ds[i]);
+    }
+}
+
+int32_t
+Package::findItem(const char *name, int32_t length) const {
+    int32_t i, start, limit;
+    int result;
+
+    /* do a binary search for the string */
+    start=0;
+    limit=itemCount;
+    while(start<limit) {
+        i=(start+limit)/2;
+        if(length>=0) {
+            result=strncmp(name, items[i].name, length);
+        } else {
+            result=strcmp(name, items[i].name);
+        }
+
+        if(result==0) {
+            /* found */
+            if(length>=0) {
+                /*
+                 * if we compared just prefixes, then we may need to back up
+                 * to the first item with this prefix
+                 */
+                while(i>0 && 0==strncmp(name, items[i-1].name, length)) {
+                    --i;
+                }
+            }
+            return i;
+        } else if(result<0) {
+            limit=i;
+        } else /* result>0 */ {
+            start=i+1;
+        }
+    }
+
+    return ~start; /* not found, return binary-not of the insertion point */
+}
+
+void
+Package::findItems(const char *pattern) {
+    const char *wild;
+
+    if(pattern==nullptr || *pattern==0) {
+        findNextIndex=-1;
+        return;
+    }
+
+    findPrefix=pattern;
+    findSuffix=nullptr;
+    findSuffixLength=0;
+
+    wild=strchr(pattern, '*');
+    if(wild==nullptr) {
+        // no wildcard
+        findPrefixLength=(int32_t)strlen(pattern);
+    } else {
+        // one wildcard
+        findPrefixLength=(int32_t)(wild-pattern);
+        findSuffix=wild+1;
+        findSuffixLength=(int32_t)strlen(findSuffix);
+        if(nullptr!=strchr(findSuffix, '*')) {
+            // two or more wildcards
+            fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern);
+            exit(U_PARSE_ERROR);
+        }
+    }
+
+    if(findPrefixLength==0) {
+        findNextIndex=0;
+    } else {
+        findNextIndex=findItem(findPrefix, findPrefixLength);
+    }
+}
+
+int32_t
+Package::findNextItem() {
+    const char *name, *middle, *treeSep;
+    int32_t idx, nameLength, middleLength;
+
+    if(findNextIndex<0) {
+        return -1;
+    }
+
+    while(findNextIndex<itemCount) {
+        idx=findNextIndex++;
+        name=items[idx].name;
+        nameLength=(int32_t)strlen(name);
+        if(nameLength<(findPrefixLength+findSuffixLength)) {
+            // item name too short for prefix & suffix
+            continue;
+        }
+        if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) {
+            // left the range of names with this prefix
+            break;
+        }
+        middle=name+findPrefixLength;
+        middleLength=nameLength-findPrefixLength-findSuffixLength;
+        if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) {
+            // suffix does not match
+            continue;
+        }
+        // prefix & suffix match
+
+        if(matchMode&MATCH_NOSLASH) {
+            treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR);
+            if(treeSep!=nullptr && (treeSep-middle)<middleLength) {
+                // the middle (matching the * wildcard) contains a tree separator /
+                continue;
+            }
+        }
+
+        // found a matching item
+        return idx;
+    }
+
+    // no more items
+    findNextIndex=-1;
+    return -1;
+}
+
+void
+Package::setMatchMode(uint32_t mode) {
+    matchMode=mode;
+}
+
+void
+Package::addItem(const char *name) {
+    addItem(name, nullptr, 0, false, U_ICUDATA_TYPE_LETTER[0]);
+}
+
+void
+Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) {
+    int32_t idx;
+
+    idx=findItem(name);
+    if(idx<0) {
+        // new item, make space at the insertion point
+        ensureItemCapacity();
+        // move the following items down
+        idx=~idx;
+        if(idx<itemCount) {
+            memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item));
+        }
+        ++itemCount;
+
+        // reset this Item entry
+        memset(items+idx, 0, sizeof(Item));
+
+        // copy the item's name
+        items[idx].name=allocString(true, static_cast<int32_t>(strlen(name)));
+        strcpy(items[idx].name, name);
+        pathToTree(items[idx].name);
+    } else {
+        // same-name item found, replace it
+        if(items[idx].isDataOwned) {
+            uprv_free(items[idx].data);
+        }
+
+        // keep the item's name since it is the same
+    }
+
+    // set the item's data
+    items[idx].data=data;
+    items[idx].length=length;
+    items[idx].isDataOwned=isDataOwned;
+    items[idx].type=type;
+}
+
+void
+Package::addFile(const char *filesPath, const char *name) {
+    uint8_t *data;
+    int32_t length;
+    char type;
+
+    data=readFile(filesPath, name, length, type);
+    // readFile() exits the tool if it fails
+    addItem(name, data, length, true, type);
+}
+
+void
+Package::addItems(const Package &listPkg) {
+    const Item *pItem;
+    int32_t i;
+
+    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
+        addItem(pItem->name, pItem->data, pItem->length, false, pItem->type);
+    }
+}
+
+void
+Package::removeItem(int32_t idx) {
+    if(idx>=0) {
+        // remove the item
+        if(items[idx].isDataOwned) {
+            uprv_free(items[idx].data);
+        }
+
+        // move the following items up
+        if((idx+1)<itemCount) {
+            memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item));
+        }
+        --itemCount;
+
+        if(idx<=findNextIndex) {
+            --findNextIndex;
+        }
+    }
+}
+
+void
+Package::removeItems(const char *pattern) {
+    int32_t idx;
+
+    findItems(pattern);
+    while((idx=findNextItem())>=0) {
+        removeItem(idx);
+    }
+}
+
+void
+Package::removeItems(const Package &listPkg) {
+    const Item *pItem;
+    int32_t i;
+
+    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
+        removeItems(pItem->name);
+    }
+}
+
+void
+Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) {
+    char filename[1024];
+    UDataSwapper *ds;
+    FILE *file;
+    Item *pItem;
+    int32_t fileLength;
+    uint8_t itemCharset, outCharset;
+    UBool itemIsBigEndian, outIsBigEndian;
+
+    if(idx<0 || itemCount<=idx) {
+        return;
+    }
+    pItem=items+idx;
+
+    // swap the data to the outType
+    // outType==0: don't swap
+    if(outType!=0 && pItem->type!=outType) {
+        // open the swapper
+        UErrorCode errorCode=U_ZERO_ERROR;
+        makeTypeProps(pItem->type, itemCharset, itemIsBigEndian);
+        makeTypeProps(outType, outCharset, outIsBigEndian);
+        ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n",
+                    (long)idx, u_errorName(errorCode));
+            exit(errorCode);
+        }
+
+        ds->printError=printPackageError;
+        ds->printErrorContext=stderr;
+
+        // swap the item from its platform properties to the desired ones
+        udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode));
+            exit(errorCode);
+        }
+        udata_closeSwapper(ds);
+        pItem->type=outType;
+    }
+
+    // create the file and write its contents
+    makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename));
+    file=fopen(filename, "wb");
+    if(file==nullptr) {
+        fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+    fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
+
+    if(ferror(file) || fileLength!=pItem->length) {
+        fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+    fclose(file);
+}
+
+void
+Package::extractItem(const char *filesPath, int32_t idx, char outType) {
+    extractItem(filesPath, items[idx].name, idx, outType);
+}
+
+void
+Package::extractItems(const char *filesPath, const char *pattern, char outType) {
+    int32_t idx;
+
+    findItems(pattern);
+    while((idx=findNextItem())>=0) {
+        extractItem(filesPath, idx, outType);
+    }
+}
+
+void
+Package::extractItems(const char *filesPath, const Package &listPkg, char outType) {
+    const Item *pItem;
+    int32_t i;
+
+    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
+        extractItems(filesPath, pItem->name, outType);
+    }
+}
+
+int32_t
+Package::getItemCount() const {
+    return itemCount;
+}
+
+const Item *
+Package::getItem(int32_t idx) const {
+    if (0 <= idx && idx < itemCount) {
+        return &items[idx];
+    }
+    return nullptr;
+}
+
+void
+Package::checkDependency(void *context, const char *itemName, const char *targetName) {
+    // check dependency: make sure the target item is in the package
+    Package *me=(Package *)context;
+    if(me->findItem(targetName)<0) {
+        me->isMissingItems=true;
+        fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName);
+    }
+}
+
+UBool
+Package::checkDependencies() {
+    isMissingItems=false;
+    enumDependencies(this, checkDependency);
+    return (UBool)!isMissingItems;
+}
+
+void
+Package::enumDependencies(void *context, CheckDependency check) {
+    int32_t i;
+
+    for(i=0; i<itemCount; ++i) {
+        enumDependencies(items+i, context, check);
+    }
+}
+
+char *
+Package::allocString(UBool in, int32_t length) {
+    char *p;
+    int32_t top;
+
+    if(in) {
+        top=inStringTop;
+        p=inStrings+top;
+    } else {
+        top=outStringTop;
+        p=outStrings+top;
+    }
+    top+=length+1;
+
+    if(top>STRING_STORE_SIZE) {
+        fprintf(stderr, "icupkg: string storage overflow\n");
+        exit(U_BUFFER_OVERFLOW_ERROR);
+    }
+    if(in) {
+        inStringTop=top;
+    } else {
+        outStringTop=top;
+    }
+    return p;
+}
+
+void
+Package::sortItems() {
+    UErrorCode errorCode=U_ZERO_ERROR;
+    uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, nullptr, false, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode));
+        exit(errorCode);
+    }
+}
+
+void Package::setItemCapacity(int32_t max)
+{
+  if(max<=itemMax) {
+    return;
+  }
+  Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0]));
+  Item *oldItems = items;
+  if(newItems == nullptr) {
+    fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n",
+        (unsigned long)(max*sizeof(items[0])), max);
+    exit(U_MEMORY_ALLOCATION_ERROR);
+  }
+  if(items && itemCount>0) {
+    uprv_memcpy(newItems, items, (size_t)itemCount*sizeof(items[0]));
+  }
+  itemMax = max;
+  items = newItems;
+  uprv_free(oldItems);
+}
+
+void Package::ensureItemCapacity()
+{
+  if((itemCount+1)>itemMax) {
+    setItemCapacity(itemCount+kItemsChunk);
+  }
+}
+
+U_NAMESPACE_END
diff --git a/intl/icu/source/tools/toolutil/package.h b/intl/icu/source/tools/toolutil/package.h
new file mode 100644
index 0000000000..ea60c13a74
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/package.h
@@ -0,0 +1,203 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2005-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  package.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2005aug25
+*   created by: Markus W. Scherer
+*
+*   Read, modify, and write ICU .dat data package files.
+*/
+
+#ifndef __PACKAGE_H__
+#define __PACKAGE_H__
+
+#include "unicode/utypes.h"
+
+#include <stdio.h>
+
+// .dat package file representation ---------------------------------------- ***
+
+#define STRING_STORE_SIZE 100000
+#define MAX_PKG_NAME_LENGTH 64
+
+typedef void CheckDependency(void *context, const char *itemName, const char *targetName);
+
+U_NAMESPACE_BEGIN
+
+struct Item {
+    char *name;
+    uint8_t *data;
+    int32_t length;
+    UBool isDataOwned;
+    char type;
+};
+
+class U_TOOLUTIL_API Package {
+public:
+    /*
+     * Constructor.
+     * Prepare this object for a new, empty package.
+     */
+    Package();
+
+    /* Destructor. */
+    ~Package();
+
+    /**
+     * Uses the prefix of the first entry of the package in readPackage(),
+     * rather than the package basename.
+     */
+    void setAutoPrefix() { doAutoPrefix=true; }
+    /**
+     * Same as setAutoPrefix(), plus the prefix must end with the platform type letter.
+     */
+    void setAutoPrefixWithType() {
+        doAutoPrefix=true;
+        prefixEndsWithType=true;
+    }
+    void setPrefix(const char *p);
+
+    /*
+     * Read an existing .dat package file.
+     * The header and item name strings are swapped into this object,
+     * but the items are left unswapped.
+     */
+    void readPackage(const char *filename);
+    /*
+     * Write a .dat package file with the items in this object.
+     * Swap all pieces to the desired output platform properties.
+     * The package becomes unusable:
+     * The item names are swapped and sorted in the outCharset rather than the local one.
+     * Also, the items themselves are swapped in-place
+     */
+    void writePackage(const char *filename, char outType, const char *comment);
+
+    /*
+     * Return the input data type letter (l, b, or e).
+     */
+    char getInType();
+
+    // find the item in items[], return the non-negative index if found, else the binary-not of the insertion point
+    int32_t findItem(const char *name, int32_t length=-1) const;
+
+    /*
+     * Set internal state for following calls to findNextItem() which will return
+     * indexes for items whose names match the pattern.
+     */
+    void findItems(const char *pattern);
+    int32_t findNextItem();
+    /*
+     * Set the match mode for findItems() & findNextItem().
+     * @param mode 0=default
+     *             MATCH_NOSLASH * does not match a '/'
+     */
+    void setMatchMode(uint32_t mode);
+
+    enum {
+        MATCH_NOSLASH=1
+    };
+
+    void addItem(const char *name);
+    void addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type);
+    void addFile(const char *filesPath, const char *name);
+    void addItems(const Package &listPkg);
+
+    void removeItem(int32_t itemIndex);
+    void removeItems(const char *pattern);
+    void removeItems(const Package &listPkg);
+
+    /* The extractItem() functions accept outputType=0 to mean "don't swap the item". */
+    void extractItem(const char *filesPath, int32_t itemIndex, char outType);
+    void extractItems(const char *filesPath, const char *pattern, char outType);
+    void extractItems(const char *filesPath, const Package &listPkg, char outType);
+
+    /* This variant extracts an item to a specific filename. */
+    void extractItem(const char *filesPath, const char *outName, int32_t itemIndex, char outType);
+
+    int32_t getItemCount() const;
+    const Item *getItem(int32_t idx) const;
+
+    /*
+     * Check dependencies and return true if all dependencies are fulfilled.
+     */
+    UBool checkDependencies();
+
+    /*
+     * Enumerate all the dependencies and give the results to context and call CheckDependency callback
+     * @param context user context (will be passed to check function)
+     * @param check will be called with context and any missing items
+     */
+    void enumDependencies(void *context, CheckDependency check);
+
+private:
+    void enumDependencies(Item *pItem, void *context, CheckDependency check);
+
+    /**
+     * Default CheckDependency function used by checkDependencies()
+     */
+    static void checkDependency(void *context, const char *itemName, const char *targetName);
+
+    /*
+     * Allocate a string in inStrings or outStrings.
+     * The length does not include the terminating NUL.
+     */
+    char *allocString(UBool in, int32_t length);
+
+    void sortItems();
+
+    // data fields
+    char inPkgName[MAX_PKG_NAME_LENGTH];
+    char pkgPrefix[MAX_PKG_NAME_LENGTH];
+
+    uint8_t *inData;
+    uint8_t header[1024];
+    int32_t inLength, headerLength;
+    uint8_t inCharset;
+    UBool inIsBigEndian;
+    UBool doAutoPrefix;
+    UBool prefixEndsWithType;
+
+    int32_t itemCount;
+    int32_t itemMax;
+    Item   *items;
+
+    int32_t inStringTop, outStringTop;
+    char inStrings[STRING_STORE_SIZE], outStrings[STRING_STORE_SIZE];
+
+    // match mode for findItems(pattern) and findNextItem()
+    uint32_t matchMode;
+
+    // state for findItems(pattern) and findNextItem()
+    const char *findPrefix, *findSuffix;
+    int32_t findPrefixLength, findSuffixLength;
+    int32_t findNextIndex;
+
+    // state for checkDependencies()
+    UBool isMissingItems;
+
+    /**
+     * Grow itemMax to new value
+     */
+    void setItemCapacity(int32_t max);
+
+    /**
+     * Grow itemMax to at least itemCount+1
+     */
+    void ensureItemCapacity();
+};
+
+U_NAMESPACE_END
+
+#endif
+
+
diff --git a/intl/icu/source/tools/toolutil/pkg_genc.cpp b/intl/icu/source/tools/toolutil/pkg_genc.cpp
new file mode 100644
index 0000000000..741a8a5228
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/pkg_genc.cpp
@@ -0,0 +1,1396 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/******************************************************************************
+ *   Copyright (C) 2009-2016, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *******************************************************************************
+ */
+#include "unicode/utypes.h"
+
+#if U_PLATFORM_HAS_WIN32_API
+#   define VC_EXTRALEAN
+#   define WIN32_LEAN_AND_MEAN
+#   define NOUSER
+#   define NOSERVICE
+#   define NOIME
+#   define NOMCX
+#include <windows.h>
+#include <time.h>
+#   ifdef __GNUC__
+#       define WINDOWS_WITH_GNUC
+#   endif
+#endif
+
+#if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
+#   define U_ELF
+#endif
+
+#ifdef U_ELF
+#   include <elf.h>
+#   if defined(ELFCLASS64)
+#       define U_ELF64
+#   endif
+    /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
+#   ifndef EM_X86_64
+#       define EM_X86_64 62
+#   endif
+#   define ICU_ENTRY_OFFSET 0
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "unicode/putil.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "filestrm.h"
+#include "toolutil.h"
+#include "unicode/uclean.h"
+#include "uoptions.h"
+#include "pkg_genc.h"
+#include "filetools.h"
+#include "charstr.h"
+#include "unicode/errorcode.h"
+
+#define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
+
+#define HEX_0X 0 /*  0x1234 */
+#define HEX_0H 1 /*  01234h */
+
+/* prototypes --------------------------------------------------------------- */
+static void
+getOutFilename(
+    const char *inFilename,
+    const char *destdir,
+    char *outFilename,
+    int32_t outFilenameCapacity,
+    char *entryName,
+    int32_t entryNameCapacity,
+    const char *newSuffix,
+    const char *optFilename);
+
+static uint32_t
+write8(FileStream *out, uint8_t byte, uint32_t column);
+
+static uint32_t
+write32(FileStream *out, uint32_t byte, uint32_t column);
+
+#if U_PLATFORM == U_PF_OS400
+static uint32_t
+write8str(FileStream *out, uint8_t byte, uint32_t column);
+#endif
+/* -------------------------------------------------------------------------- */
+
+/*
+Creating Template Files for New Platforms
+
+Let the cc compiler help you get started.
+Compile this program
+    const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
+with the -S option to produce assembly output.
+
+For example, this will generate array.s:
+gcc -S array.c
+
+This will produce a .s file that may look like this:
+
+    .file   "array.c"
+    .version        "01.01"
+gcc2_compiled.:
+    .globl x
+    .section        .rodata
+    .align 4
+    .type    x,@object
+    .size    x,20
+x:
+    .long   1
+    .long   2
+    .long   -559038737
+    .long   -1
+    .long   16
+    .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
+
+which gives a starting point that will compile, and can be transformed
+to become the template, generally with some consulting of as docs and
+some experimentation.
+
+If you want ICU to automatically use this assembly, you should
+specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
+where the name is the compiler or platform that you used in this
+assemblyHeader data structure.
+*/
+static const struct AssemblyType {
+    const char *name;
+    const char *header;
+    const char *beginLine;
+    const char *footer;
+    int8_t      hexType; /* HEX_0X or HEX_0h */
+} assemblyHeader[] = {
+    /* For gcc assemblers, the meaning of .align changes depending on the */
+    /* hardware, so we use .balign 16 which always means 16 bytes. */
+    /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */
+    {"gcc",
+        ".globl %s\n"
+        "\t.section .note.GNU-stack,\"\",%%progbits\n"
+        "#ifdef __CET__\n"
+        "# include <cet.h>\n"
+        "#endif\n"
+        "\t.section .rodata\n"
+        "\t.balign 16\n"
+        "#ifdef U_HIDE_DATA_SYMBOL\n"
+        "\t.hidden %s\n"
+        "#endif\n"
+        "\t.type %s,%%object\n"
+        "%s:\n\n",
+
+        ".long ",".size %s, .-%s\n",HEX_0X
+    },
+    {"gcc-darwin",
+        /*"\t.section __TEXT,__text,regular,pure_instructions\n"
+        "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
+        ".globl _%s\n"
+        "#ifdef U_HIDE_DATA_SYMBOL\n"
+        "\t.private_extern _%s\n"
+        "#endif\n"
+        "\t.data\n"
+        "\t.const\n"
+        "\t.balign 16\n"
+        "_%s:\n\n",
+
+        ".long ","",HEX_0X
+    },
+    /* macOS PPC should use `.p2align 4` instead `.balign 16` because is
+     * unknown pseudo ops for such legacy system*/
+    {"gcc-darwin-ppc",
+        /*"\t.section __TEXT,__text,regular,pure_instructions\n"
+        "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
+        ".globl _%s\n"
+        "#ifdef U_HIDE_DATA_SYMBOL\n"
+        "\t.private_extern _%s\n"
+        "#endif\n"
+        "\t.data\n"
+        "\t.const\n"
+        "\t.p2align 4\n"
+        "_%s:\n\n",
+
+        ".long ","",HEX_0X
+    },
+    {"gcc-cygwin",
+        ".globl _%s\n"
+        "\t.section .rodata\n"
+        "\t.balign 16\n"
+        "_%s:\n\n",
+
+        ".long ","",HEX_0X
+    },
+    {"gcc-mingw64",
+        ".globl %s\n"
+        "\t.section .rodata\n"
+        "\t.balign 16\n"
+        "%s:\n\n",
+
+        ".long ","",HEX_0X
+    },
+/* 16 bytes alignment. */
+/* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */
+    {"sun",
+        "\t.section \".rodata\"\n"
+        "\t.align   16\n"
+        ".globl     %s\n"
+        "%s:\n",
+
+        ".word ","",HEX_0X
+    },
+/* 16 bytes alignment for sun-x86. */
+/* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */
+    {"sun-x86",
+        "Drodata.rodata:\n"
+        "\t.type   Drodata.rodata,@object\n"
+        "\t.size   Drodata.rodata,0\n"
+        "\t.globl  %s\n"
+        "\t.align  16\n" 
+        "%s:\n",
+
+        ".4byte ","",HEX_0X
+    },
+/* 1<<4 bit alignment for aix. */
+/* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */
+    {"xlc",
+        ".globl %s{RO}\n"
+        "\t.toc\n"
+        "%s:\n"
+        "\t.csect %s{RO}, 4\n",
+
+        ".long ","",HEX_0X
+    },
+    {"aCC-ia64",
+        "\t.file   \"%s.s\"\n"
+        "\t.type   %s,@object\n"
+        "\t.global %s\n"
+        "\t.secalias .abe$0.rodata, \".rodata\"\n"
+        "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
+        "\t.align  16\n"
+        "%s::\t",
+
+        "data4 ","",HEX_0X
+    },
+    {"aCC-parisc",
+        "\t.SPACE  $TEXT$\n"
+        "\t.SUBSPA $LIT$\n"
+        "%s\n"
+        "\t.EXPORT %s\n"
+        "\t.ALIGN  16\n",
+
+        ".WORD ","",HEX_0X
+    },
+/* align 16 bytes */
+/*  http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */
+    {"nasm",
+        "global %s\n"
+#if defined(_WIN32)
+        "section .rdata align=16\n"
+#else
+        "section .rodata align=16\n"
+#endif
+        "%s:\n",
+        "  dd ","",HEX_0X
+    },
+    { "masm",
+      "\tTITLE %s\n"
+      "; generated by genccode\n"
+      ".386\n"
+      ".model flat\n"
+      "\tPUBLIC _%s\n"
+      "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
+      "\tALIGN 16\n"
+      "_%s\tLABEL DWORD\n",
+      "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
+    },
+    { "masm64",
+      "\tTITLE %s\n"
+      "; generated by genccode\n"
+      "\tPUBLIC _%s\n"
+      "ICUDATA_%s\tSEGMENT READONLY 'DATA'\n"
+      "\tALIGN 16\n"
+      "_%s\tLABEL DWORD\n",
+      "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
+    }
+};
+
+static int32_t assemblyHeaderIndex = -1;
+static int32_t hexType = HEX_0X;
+
+U_CAPI UBool U_EXPORT2
+checkAssemblyHeaderName(const char* optAssembly) {
+    int32_t idx;
+    assemblyHeaderIndex = -1;
+    for (idx = 0; idx < UPRV_LENGTHOF(assemblyHeader); idx++) {
+        if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
+            assemblyHeaderIndex = idx;
+            hexType = assemblyHeader[idx].hexType; /* set the hex type */
+            return true;
+        }
+    }
+
+    return false;
+}
+
+
+U_CAPI void U_EXPORT2
+printAssemblyHeadersToStdErr() {
+    int32_t idx;
+    fprintf(stderr, "%s", assemblyHeader[0].name);
+    for (idx = 1; idx < UPRV_LENGTHOF(assemblyHeader); idx++) {
+        fprintf(stderr, ", %s", assemblyHeader[idx].name);
+    }
+    fprintf(stderr,
+        ")\n");
+}
+
+U_CAPI void U_EXPORT2
+writeAssemblyCode(
+        const char *filename,
+        const char *destdir,
+        const char *optEntryPoint,
+        const char *optFilename,
+        char *outFilePath,
+        size_t outFilePathCapacity) {
+    uint32_t column = MAX_COLUMN;
+    char entry[96];
+    union {
+        uint32_t uint32s[1024];
+        char chars[4096];
+    } buffer;
+    FileStream *in, *out;
+    size_t i, length, count;
+
+    in=T_FileStream_open(filename, "rb");
+    if(in==nullptr) {
+        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    const char* newSuffix = nullptr;
+
+    if (uprv_strcmp(assemblyHeader[assemblyHeaderIndex].name, "masm") == 0) {
+        newSuffix = ".masm";
+    }
+    else if (uprv_strcmp(assemblyHeader[assemblyHeaderIndex].name, "nasm") == 0) {
+        newSuffix = ".asm";
+    } else {
+        newSuffix = ".S";
+    }
+
+    getOutFilename(
+        filename,
+        destdir,
+        buffer.chars,
+        sizeof(buffer.chars),
+        entry,
+        sizeof(entry),
+        newSuffix,
+        optFilename);
+    out=T_FileStream_open(buffer.chars, "w");
+    if(out==nullptr) {
+        fprintf(stderr, "genccode: unable to open output file %s\n", buffer.chars);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    if (outFilePath != nullptr) {
+        if (uprv_strlen(buffer.chars) >= outFilePathCapacity) {
+            fprintf(stderr, "genccode: filename too long\n");
+            exit(U_ILLEGAL_ARGUMENT_ERROR);
+        }
+        uprv_strcpy(outFilePath, buffer.chars);
+#if defined (WINDOWS_WITH_GNUC) && U_PLATFORM != U_PF_CYGWIN
+        /* Need to fix the file separator character when using MinGW. */
+        swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
+#endif
+    }
+
+    if(optEntryPoint != nullptr) {
+        uprv_strcpy(entry, optEntryPoint);
+        uprv_strcat(entry, "_dat");
+    }
+
+    /* turn dashes or dots in the entry name into underscores */
+    length=uprv_strlen(entry);
+    for(i=0; i<length; ++i) {
+        if(entry[i]=='-' || entry[i]=='.') {
+            entry[i]='_';
+        }
+    }
+
+    count = snprintf(
+        buffer.chars, sizeof(buffer.chars),
+        assemblyHeader[assemblyHeaderIndex].header,
+        entry, entry, entry, entry,
+        entry, entry, entry, entry);
+    if (count >= sizeof(buffer.chars)) {
+        fprintf(stderr, "genccode: entry name too long (long filename?)\n");
+        exit(U_ILLEGAL_ARGUMENT_ERROR);
+    }
+    T_FileStream_writeLine(out, buffer.chars);
+    T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
+
+    for(;;) {
+        memset(buffer.uint32s, 0, sizeof(buffer.uint32s));
+        length=T_FileStream_read(in, buffer.uint32s, sizeof(buffer.uint32s));
+        if(length==0) {
+            break;
+        }
+        for(i=0; i<(length/sizeof(buffer.uint32s[0])); i++) {
+            // TODO: What if the last read sees length not as a multiple of 4?
+            column = write32(out, buffer.uint32s[i], column);
+        }
+    }
+
+    T_FileStream_writeLine(out, "\n");
+
+    count = snprintf(
+        buffer.chars, sizeof(buffer.chars),
+        assemblyHeader[assemblyHeaderIndex].footer,
+        entry, entry, entry, entry,
+        entry, entry, entry, entry);
+    if (count >= sizeof(buffer.chars)) {
+        fprintf(stderr, "genccode: entry name too long (long filename?)\n");
+        exit(U_ILLEGAL_ARGUMENT_ERROR);
+    }
+    T_FileStream_writeLine(out, buffer.chars);
+
+    if(T_FileStream_error(in)) {
+        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    if(T_FileStream_error(out)) {
+        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    T_FileStream_close(out);
+    T_FileStream_close(in);
+}
+
+U_CAPI void U_EXPORT2
+writeCCode(
+        const char *filename,
+        const char *destdir,
+        const char *optEntryPoint,
+        const char *optName,
+        const char *optFilename,
+        char *outFilePath,
+        size_t outFilePathCapacity) {
+    uint32_t column = MAX_COLUMN;
+    char buffer[4096], entry[96];
+    FileStream *in, *out;
+    size_t i, length, count;
+
+    in=T_FileStream_open(filename, "rb");
+    if(in==nullptr) {
+        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    if(optName != nullptr) { /* prepend  'icudt28_' */
+        // +2 includes the _ and the NUL
+        if (uprv_strlen(optName) + 2 > sizeof(entry)) {
+            fprintf(stderr, "genccode: entry name too long (long filename?)\n");
+            exit(U_ILLEGAL_ARGUMENT_ERROR);
+        }
+        strcpy(entry, optName);
+        strcat(entry, "_");
+    } else {
+        entry[0] = 0;
+    }
+
+    getOutFilename(
+        filename,
+        destdir,
+        buffer,
+        static_cast<int32_t>(sizeof(buffer)),
+        entry + uprv_strlen(entry),
+        static_cast<int32_t>(sizeof(entry) - uprv_strlen(entry)),
+        ".c",
+        optFilename);
+
+    if (outFilePath != nullptr) {
+        if (uprv_strlen(buffer) >= outFilePathCapacity) {
+            fprintf(stderr, "genccode: filename too long\n");
+            exit(U_ILLEGAL_ARGUMENT_ERROR);
+        }
+        uprv_strcpy(outFilePath, buffer);
+#if defined (WINDOWS_WITH_GNUC) && U_PLATFORM != U_PF_CYGWIN
+        /* Need to fix the file separator character when using MinGW. */
+        swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
+#endif
+    }
+
+    out=T_FileStream_open(buffer, "w");
+    if(out==nullptr) {
+        fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    if(optEntryPoint != nullptr) {
+        uprv_strcpy(entry, optEntryPoint);
+        uprv_strcat(entry, "_dat");
+    }
+
+    /* turn dashes or dots in the entry name into underscores */
+    length=uprv_strlen(entry);
+    for(i=0; i<length; ++i) {
+        if(entry[i]=='-' || entry[i]=='.') {
+            entry[i]='_';
+        }
+    }
+
+#if U_PLATFORM == U_PF_OS400
+    /*
+    TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
+
+    This is here because this platform can't currently put
+    const data into the read-only pages of an object or
+    shared library (service program). Only strings are allowed in read-only
+    pages, so we use char * strings to store the data.
+
+    In order to prevent the beginning of the data from ever matching the
+    magic numbers we must still use the initial double.
+    [grhoten 4/24/2003]
+    */
+    count = snprintf(buffer, sizeof(buffer),
+        "#ifndef IN_GENERATED_CCODE\n"
+        "#define IN_GENERATED_CCODE\n"
+        "#define U_DISABLE_RENAMING 1\n"
+        "#include \"unicode/umachine.h\"\n"
+        "#endif\n"
+        "U_CDECL_BEGIN\n"
+        "const struct {\n"
+        "    double bogus;\n"
+        "    const char *bytes; \n"
+        "} %s={ 0.0, \n",
+        entry);
+    if (count >= sizeof(buffer)) {
+        fprintf(stderr, "genccode: entry name too long (long filename?)\n");
+        exit(U_ILLEGAL_ARGUMENT_ERROR);
+    }
+    T_FileStream_writeLine(out, buffer);
+
+    for(;;) {
+        length=T_FileStream_read(in, buffer, sizeof(buffer));
+        if(length==0) {
+            break;
+        }
+        for(i=0; i<length; ++i) {
+            column = write8str(out, (uint8_t)buffer[i], column);
+        }
+    }
+
+    T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
+#else
+    /* Function renaming shouldn't be done in data */
+    count = snprintf(buffer, sizeof(buffer),
+        "#ifndef IN_GENERATED_CCODE\n"
+        "#define IN_GENERATED_CCODE\n"
+        "#define U_DISABLE_RENAMING 1\n"
+        "#include \"unicode/umachine.h\"\n"
+        "#endif\n"
+        "U_CDECL_BEGIN\n"
+        "const struct {\n"
+        "    double bogus;\n"
+        "    uint8_t bytes[%ld]; \n"
+        "} %s={ 0.0, {\n",
+        (long)T_FileStream_size(in), entry);
+    if (count >= sizeof(buffer)) {
+        fprintf(stderr, "genccode: entry name too long (long filename?)\n");
+        exit(U_ILLEGAL_ARGUMENT_ERROR);
+    }
+    T_FileStream_writeLine(out, buffer);
+
+    for(;;) {
+        length=T_FileStream_read(in, buffer, sizeof(buffer));
+        if(length==0) {
+            break;
+        }
+        for(i=0; i<length; ++i) {
+            column = write8(out, (uint8_t)buffer[i], column);
+        }
+    }
+
+    T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
+#endif
+
+    if(T_FileStream_error(in)) {
+        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    if(T_FileStream_error(out)) {
+        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    T_FileStream_close(out);
+    T_FileStream_close(in);
+}
+
+static uint32_t
+write32(FileStream *out, uint32_t bitField, uint32_t column) {
+    int32_t i;
+    char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
+    char *s = bitFieldStr;
+    uint8_t *ptrIdx = (uint8_t *)&bitField;
+    static const char hexToStr[16] = {
+        '0','1','2','3',
+        '4','5','6','7',
+        '8','9','A','B',
+        'C','D','E','F'
+    };
+
+    /* write the value, possibly with comma and newline */
+    if(column==MAX_COLUMN) {
+        /* first byte */
+        column=1;
+    } else if(column<32) {
+        *(s++)=',';
+        ++column;
+    } else {
+        *(s++)='\n';
+        uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
+        s+=uprv_strlen(s);
+        column=1;
+    }
+
+    if (bitField < 10) {
+        /* It's a small number. Don't waste the space for 0x */
+        *(s++)=hexToStr[bitField];
+    }
+    else {
+        int seenNonZero = 0; /* This is used to remove leading zeros */
+
+        if(hexType==HEX_0X) {
+         *(s++)='0';
+         *(s++)='x';
+        } else if(hexType==HEX_0H) {
+         *(s++)='0';
+        }
+
+        /* This creates a 32-bit field */
+#if U_IS_BIG_ENDIAN
+        for (i = 0; i < sizeof(uint32_t); i++)
+#else
+        for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
+#endif
+        {
+            uint8_t value = ptrIdx[i];
+            if (value || seenNonZero) {
+                *(s++)=hexToStr[value>>4];
+                *(s++)=hexToStr[value&0xF];
+                seenNonZero = 1;
+            }
+        }
+        if(hexType==HEX_0H) {
+         *(s++)='h';
+        }
+    }
+
+    *(s++)=0;
+    T_FileStream_writeLine(out, bitFieldStr);
+    return column;
+}
+
+static uint32_t
+write8(FileStream *out, uint8_t byte, uint32_t column) {
+    char s[4];
+    int i=0;
+
+    /* convert the byte value to a string */
+    if(byte>=100) {
+        s[i++]=(char)('0'+byte/100);
+        byte%=100;
+    }
+    if(i>0 || byte>=10) {
+        s[i++]=(char)('0'+byte/10);
+        byte%=10;
+    }
+    s[i++]=(char)('0'+byte);
+    s[i]=0;
+
+    /* write the value, possibly with comma and newline */
+    if(column==MAX_COLUMN) {
+        /* first byte */
+        column=1;
+    } else if(column<16) {
+        T_FileStream_writeLine(out, ",");
+        ++column;
+    } else {
+        T_FileStream_writeLine(out, ",\n");
+        column=1;
+    }
+    T_FileStream_writeLine(out, s);
+    return column;
+}
+
+#if U_PLATFORM == U_PF_OS400
+static uint32_t
+write8str(FileStream *out, uint8_t byte, uint32_t column) {
+    char s[8];
+
+    if (byte > 7)
+        snprintf(s, sizeof(s), "\\x%X", byte);
+    else
+        snprintf(s, sizeof(s), "\\%X", byte);
+
+    /* write the value, possibly with comma and newline */
+    if(column==MAX_COLUMN) {
+        /* first byte */
+        column=1;
+        T_FileStream_writeLine(out, "\"");
+    } else if(column<24) {
+        ++column;
+    } else {
+        T_FileStream_writeLine(out, "\"\n\"");
+        column=1;
+    }
+    T_FileStream_writeLine(out, s);
+    return column;
+}
+#endif
+
+static void
+getOutFilename(
+        const char *inFilename,
+        const char *destdir,
+        char *outFilename,
+        int32_t outFilenameCapacity,
+        char *entryName,
+        int32_t entryNameCapacity,
+        const char *newSuffix,
+        const char *optFilename) {
+    const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
+
+    icu::CharString outFilenameBuilder;
+    icu::CharString entryNameBuilder;
+    icu::ErrorCode status;
+
+    /* copy path */
+    if(destdir!=nullptr && *destdir!=0) {
+        outFilenameBuilder.append(destdir, status);
+        outFilenameBuilder.ensureEndsWithFileSeparator(status);
+    } else {
+        outFilenameBuilder.append(inFilename, static_cast<int32_t>(basename - inFilename), status);
+    }
+    inFilename=basename;
+
+    if(suffix==nullptr) {
+        /* the filename does not have a suffix */
+        entryNameBuilder.append(inFilename, status);
+        if(optFilename != nullptr) {
+            outFilenameBuilder.append(optFilename, status);
+        } else {
+            outFilenameBuilder.append(inFilename, status);
+        }
+        outFilenameBuilder.append(newSuffix, status);
+    } else {
+        int32_t saveOutFilenameLength = outFilenameBuilder.length();
+        /* copy basename */
+        while(inFilename<suffix) {
+            // iSeries cannot have '-' in the .o objects.
+            char c = (*inFilename=='-') ? '_' : *inFilename;
+            outFilenameBuilder.append(c, status);
+            entryNameBuilder.append(c, status);
+            inFilename++;
+        }
+
+        /* replace '.' by '_' */
+        outFilenameBuilder.append('_', status);
+        entryNameBuilder.append('_', status);
+        ++inFilename;
+
+        /* copy suffix */
+        outFilenameBuilder.append(inFilename, status);
+        entryNameBuilder.append(inFilename, status);
+
+        if(optFilename != nullptr) {
+            outFilenameBuilder.truncate(saveOutFilenameLength);
+            outFilenameBuilder.append(optFilename, status);
+        }
+        // add ".c"
+        outFilenameBuilder.append(newSuffix, status);
+    }
+
+    if (status.isFailure()) {
+        fprintf(stderr, "genccode: error building filename or entrypoint\n");
+        exit(status.get());
+    }
+
+    if (outFilenameBuilder.length() >= outFilenameCapacity) {
+        fprintf(stderr, "genccode: output filename too long\n");
+        exit(U_ILLEGAL_ARGUMENT_ERROR);
+    }
+
+    if (entryNameBuilder.length() >= entryNameCapacity) {
+        fprintf(stderr, "genccode: entry name too long (long filename?)\n");
+        exit(U_ILLEGAL_ARGUMENT_ERROR);
+    }
+
+    outFilenameBuilder.extract(outFilename, outFilenameCapacity, status);
+    entryNameBuilder.extract(entryName, entryNameCapacity, status);
+}
+
+#ifdef CAN_GENERATE_OBJECTS
+static void
+getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
+    union {
+        char        bytes[2048];
+#ifdef U_ELF
+        Elf32_Ehdr  header32;
+        /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
+#elif U_PLATFORM_HAS_WIN32_API
+        IMAGE_FILE_HEADER header;
+#endif
+    } buffer;
+
+    const char *filename;
+    FileStream *in;
+    int32_t length;
+
+#ifdef U_ELF
+
+#elif U_PLATFORM_HAS_WIN32_API
+    const IMAGE_FILE_HEADER *pHeader;
+#else
+#   error "Unknown platform for CAN_GENERATE_OBJECTS."
+#endif
+
+    if(optMatchArch != nullptr) {
+        filename=optMatchArch;
+    } else {
+        /* set defaults */
+#ifdef U_ELF
+        /* set EM_386 because elf.h does not provide better defaults */
+        *pCPU=EM_386;
+        *pBits=32;
+        *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
+#elif U_PLATFORM_HAS_WIN32_API
+        // Windows always runs in little-endian mode.
+        *pIsBigEndian = false;
+
+        // Note: The various _M_<arch> macros are predefined by the MSVC compiler based
+        // on the target compilation architecture.
+        // https://docs.microsoft.com/cpp/preprocessor/predefined-macros
+
+        // link.exe will link an IMAGE_FILE_MACHINE_UNKNOWN data-only .obj file
+        // no matter what architecture it is targeting (though other values are
+        // required to match). Unfortunately, the variable name decoration/mangling
+        // is slightly different on x86, which means we can't use the UNKNOWN type
+        // for all architectures though.
+#   if defined(_M_IX86)
+        *pCPU = IMAGE_FILE_MACHINE_I386;
+#   else
+        *pCPU = IMAGE_FILE_MACHINE_UNKNOWN;
+#   endif
+#   if defined(_M_IA64) || defined(_M_AMD64) || defined (_M_ARM64)
+        *pBits = 64; // Doesn't seem to be used for anything interesting though?
+#   elif defined(_M_IX86) || defined(_M_ARM)
+        *pBits = 32;
+#   else
+#      error "Unknown platform for CAN_GENERATE_OBJECTS."
+#   endif
+#else
+#   error "Unknown platform for CAN_GENERATE_OBJECTS."
+#endif
+        return;
+    }
+
+    in=T_FileStream_open(filename, "rb");
+    if(in==nullptr) {
+        fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+    length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
+
+#ifdef U_ELF
+    if(length<(int32_t)sizeof(Elf32_Ehdr)) {
+        fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
+        exit(U_UNSUPPORTED_ERROR);
+    }
+    if(
+        buffer.header32.e_ident[0]!=ELFMAG0 ||
+        buffer.header32.e_ident[1]!=ELFMAG1 ||
+        buffer.header32.e_ident[2]!=ELFMAG2 ||
+        buffer.header32.e_ident[3]!=ELFMAG3 ||
+        buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
+    ) {
+        fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
+        exit(U_UNSUPPORTED_ERROR);
+    }
+
+    *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
+#ifdef U_ELF64
+    if(*pBits!=32 && *pBits!=64) {
+        fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
+        exit(U_UNSUPPORTED_ERROR);
+    }
+#else
+    if(*pBits!=32) {
+        fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
+        exit(U_UNSUPPORTED_ERROR);
+    }
+#endif
+
+    *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
+    if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
+        fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
+        exit(U_UNSUPPORTED_ERROR);
+    }
+    /* TODO: Support byte swapping */
+
+    *pCPU=buffer.header32.e_machine;
+#elif U_PLATFORM_HAS_WIN32_API
+    if(length<sizeof(IMAGE_FILE_HEADER)) {
+        fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
+        exit(U_UNSUPPORTED_ERROR);
+    }
+    /* TODO: Use buffer.header.  Keep aliasing legal.  */
+    pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
+    *pCPU=pHeader->Machine;
+    /*
+     * The number of bits is implicit with the Machine value.
+     * *pBits is ignored in the calling code, so this need not be precise.
+     */
+    *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
+    /* Windows always runs on little-endian CPUs. */
+    *pIsBigEndian=false;
+#else
+#   error "Unknown platform for CAN_GENERATE_OBJECTS."
+#endif
+
+    T_FileStream_close(in);
+}
+
+U_CAPI void U_EXPORT2
+writeObjectCode(
+        const char *filename,
+        const char *destdir,
+        const char *optEntryPoint,
+        const char *optMatchArch,
+        const char *optFilename,
+        char *outFilePath,
+        size_t outFilePathCapacity,
+        UBool optWinDllExport) {
+    /* common variables */
+    char buffer[4096], entry[96]={ 0 };
+    FileStream *in, *out;
+    const char *newSuffix;
+    int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
+
+    uint16_t cpu, bits;
+    UBool makeBigEndian;
+
+    (void)optWinDllExport; /* unused except Windows */
+
+    /* platform-specific variables and initialization code */
+#ifdef U_ELF
+    /* 32-bit Elf file header */
+    static Elf32_Ehdr header32={
+        {
+            /* e_ident[] */
+            ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
+            ELFCLASS32,
+            U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
+            EV_CURRENT /* EI_VERSION */
+        },
+        ET_REL,
+        EM_386,
+        EV_CURRENT, /* e_version */
+        0, /* e_entry */
+        0, /* e_phoff */
+        (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
+        0, /* e_flags */
+        (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
+        0, /* e_phentsize */
+        0, /* e_phnum */
+        (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
+        5, /* e_shnum */
+        2 /* e_shstrndx */
+    };
+
+    /* 32-bit Elf section header table */
+    static Elf32_Shdr sectionHeaders32[5]={
+        { /* SHN_UNDEF */
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+        },
+        { /* .symtab */
+            1, /* sh_name */
+            SHT_SYMTAB,
+            0, /* sh_flags */
+            0, /* sh_addr */
+            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
+            (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
+            3, /* sh_link=sect hdr index of .strtab */
+            1, /* sh_info=One greater than the symbol table index of the last
+                * local symbol (with STB_LOCAL). */
+            4, /* sh_addralign */
+            (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
+        },
+        { /* .shstrtab */
+            9, /* sh_name */
+            SHT_STRTAB,
+            0, /* sh_flags */
+            0, /* sh_addr */
+            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
+            40, /* sh_size */
+            0, /* sh_link */
+            0, /* sh_info */
+            1, /* sh_addralign */
+            0 /* sh_entsize */
+        },
+        { /* .strtab */
+            19, /* sh_name */
+            SHT_STRTAB,
+            0, /* sh_flags */
+            0, /* sh_addr */
+            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
+            (Elf32_Word)sizeof(entry), /* sh_size */
+            0, /* sh_link */
+            0, /* sh_info */
+            1, /* sh_addralign */
+            0 /* sh_entsize */
+        },
+        { /* .rodata */
+            27, /* sh_name */
+            SHT_PROGBITS,
+            SHF_ALLOC, /* sh_flags */
+            0, /* sh_addr */
+            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
+            0, /* sh_size */
+            0, /* sh_link */
+            0, /* sh_info */
+            16, /* sh_addralign */
+            0 /* sh_entsize */
+        }
+    };
+
+    /* symbol table */
+    static Elf32_Sym symbols32[2]={
+        { /* STN_UNDEF */
+            0, 0, 0, 0, 0, 0
+        },
+        { /* data entry point */
+            1, /* st_name */
+            0, /* st_value */
+            0, /* st_size */
+            ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
+            0, /* st_other */
+            4 /* st_shndx=index of related section table entry */
+        }
+    };
+
+    /* section header string table, with decimal string offsets */
+    static const char sectionStrings[40]=
+        /*  0 */ "\0"
+        /*  1 */ ".symtab\0"
+        /*  9 */ ".shstrtab\0"
+        /* 19 */ ".strtab\0"
+        /* 27 */ ".rodata\0"
+        /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
+        /* 40: padded to multiple of 8 bytes */
+
+    /*
+     * Use entry[] for the string table which will contain only the
+     * entry point name.
+     * entry[0] must be 0 (NUL)
+     * The entry point name can be up to 38 characters long (sizeof(entry)-2).
+     */
+
+    /* 16-align .rodata in the .o file, just in case */
+    static const char padding[16]={ 0 };
+    int32_t paddingSize;
+
+#ifdef U_ELF64
+    /* 64-bit Elf file header */
+    static Elf64_Ehdr header64={
+        {
+            /* e_ident[] */
+            ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
+            ELFCLASS64,
+            U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
+            EV_CURRENT /* EI_VERSION */
+        },
+        ET_REL,
+        EM_X86_64,
+        EV_CURRENT, /* e_version */
+        0, /* e_entry */
+        0, /* e_phoff */
+        (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
+        0, /* e_flags */
+        (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
+        0, /* e_phentsize */
+        0, /* e_phnum */
+        (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
+        5, /* e_shnum */
+        2 /* e_shstrndx */
+    };
+
+    /* 64-bit Elf section header table */
+    static Elf64_Shdr sectionHeaders64[5]={
+        { /* SHN_UNDEF */
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+        },
+        { /* .symtab */
+            1, /* sh_name */
+            SHT_SYMTAB,
+            0, /* sh_flags */
+            0, /* sh_addr */
+            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
+            (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
+            3, /* sh_link=sect hdr index of .strtab */
+            1, /* sh_info=One greater than the symbol table index of the last
+                * local symbol (with STB_LOCAL). */
+            4, /* sh_addralign */
+            (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
+        },
+        { /* .shstrtab */
+            9, /* sh_name */
+            SHT_STRTAB,
+            0, /* sh_flags */
+            0, /* sh_addr */
+            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
+            40, /* sh_size */
+            0, /* sh_link */
+            0, /* sh_info */
+            1, /* sh_addralign */
+            0 /* sh_entsize */
+        },
+        { /* .strtab */
+            19, /* sh_name */
+            SHT_STRTAB,
+            0, /* sh_flags */
+            0, /* sh_addr */
+            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
+            (Elf64_Xword)sizeof(entry), /* sh_size */
+            0, /* sh_link */
+            0, /* sh_info */
+            1, /* sh_addralign */
+            0 /* sh_entsize */
+        },
+        { /* .rodata */
+            27, /* sh_name */
+            SHT_PROGBITS,
+            SHF_ALLOC, /* sh_flags */
+            0, /* sh_addr */
+            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
+            0, /* sh_size */
+            0, /* sh_link */
+            0, /* sh_info */
+            16, /* sh_addralign */
+            0 /* sh_entsize */
+        }
+    };
+
+    /*
+     * 64-bit symbol table
+     * careful: different order of items compared with Elf32_sym!
+     */
+    static Elf64_Sym symbols64[2]={
+        { /* STN_UNDEF */
+            0, 0, 0, 0, 0, 0
+        },
+        { /* data entry point */
+            1, /* st_name */
+            ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
+            0, /* st_other */
+            4, /* st_shndx=index of related section table entry */
+            0, /* st_value */
+            0 /* st_size */
+        }
+    };
+
+#endif /* U_ELF64 */
+
+    /* entry[] have a leading NUL */
+    entryOffset=1;
+
+    /* in the common code, count entryLength from after the NUL */
+    entryLengthOffset=1;
+
+    newSuffix=".o";
+
+#elif U_PLATFORM_HAS_WIN32_API
+    struct {
+        IMAGE_FILE_HEADER fileHeader;
+        IMAGE_SECTION_HEADER sections[2];
+        char linkerOptions[100];
+    } objHeader;
+    IMAGE_SYMBOL symbols[1];
+    struct {
+        DWORD sizeofLongNames;
+        char longNames[100];
+    } symbolNames;
+
+    /*
+     * entry sometimes have a leading '_'
+     * overwritten if entryOffset==0 depending on the target platform
+     * see check for cpu below
+     */
+    entry[0]='_';
+
+    newSuffix=".obj";
+#else
+#   error "Unknown platform for CAN_GENERATE_OBJECTS."
+#endif
+
+    /* deal with options, files and the entry point name */
+    getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
+    if (optMatchArch)
+    {
+        printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
+    }
+    else
+    {
+        printf("genccode: using architecture cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
+    }
+#if U_PLATFORM_HAS_WIN32_API
+    if(cpu==IMAGE_FILE_MACHINE_I386) {
+        entryOffset=1;
+    }
+#endif
+
+    in=T_FileStream_open(filename, "rb");
+    if(in==nullptr) {
+        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+    size=T_FileStream_size(in);
+
+    getOutFilename(
+        filename,
+        destdir,
+        buffer,
+        sizeof(buffer),
+        entry + entryOffset,
+        sizeof(entry) - entryOffset,
+        newSuffix,
+        optFilename);
+
+    if (outFilePath != nullptr) {
+        if (uprv_strlen(buffer) >= outFilePathCapacity) {
+            fprintf(stderr, "genccode: filename too long\n");
+            exit(U_ILLEGAL_ARGUMENT_ERROR);
+        }
+        uprv_strcpy(outFilePath, buffer);
+    }
+
+    if(optEntryPoint != nullptr) {
+        uprv_strcpy(entry+entryOffset, optEntryPoint);
+        uprv_strcat(entry+entryOffset, "_dat");
+    }
+    /* turn dashes in the entry name into underscores */
+    entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
+    for(i=0; i<entryLength; ++i) {
+        if(entry[entryLengthOffset+i]=='-') {
+            entry[entryLengthOffset+i]='_';
+        }
+    }
+
+    /* open the output file */
+    out=T_FileStream_open(buffer, "wb");
+    if(out==nullptr) {
+        fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+#ifdef U_ELF
+    if(bits==32) {
+        header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
+        header32.e_machine=cpu;
+
+        /* 16-align .rodata in the .o file, just in case */
+        paddingSize=sectionHeaders32[4].sh_offset & 0xf;
+        if(paddingSize!=0) {
+                paddingSize=0x10-paddingSize;
+                sectionHeaders32[4].sh_offset+=paddingSize;
+        }
+
+        sectionHeaders32[4].sh_size=(Elf32_Word)size;
+
+        symbols32[1].st_size=(Elf32_Word)size;
+
+        /* write .o headers */
+        T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
+        T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
+        T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
+    } else /* bits==64 */ {
+#ifdef U_ELF64
+        header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
+        header64.e_machine=cpu;
+
+        /* 16-align .rodata in the .o file, just in case */
+        paddingSize=sectionHeaders64[4].sh_offset & 0xf;
+        if(paddingSize!=0) {
+                paddingSize=0x10-paddingSize;
+                sectionHeaders64[4].sh_offset+=paddingSize;
+        }
+
+        sectionHeaders64[4].sh_size=(Elf64_Xword)size;
+
+        symbols64[1].st_size=(Elf64_Xword)size;
+
+        /* write .o headers */
+        T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
+        T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
+        T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
+#endif
+    }
+
+    T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
+    T_FileStream_write(out, entry, (int32_t)sizeof(entry));
+    if(paddingSize!=0) {
+        T_FileStream_write(out, padding, paddingSize);
+    }
+#elif U_PLATFORM_HAS_WIN32_API
+    /* populate the .obj headers */
+    uprv_memset(&objHeader, 0, sizeof(objHeader));
+    uprv_memset(&symbols, 0, sizeof(symbols));
+    uprv_memset(&symbolNames, 0, sizeof(symbolNames));
+
+    /* write the linker export directive */
+    if (optWinDllExport) {
+        uprv_strcpy(objHeader.linkerOptions, "-export:");
+        length=8;
+        uprv_strcpy(objHeader.linkerOptions+length, entry);
+        length+=entryLength;
+        uprv_strcpy(objHeader.linkerOptions+length, ",data ");
+        length+=6;
+    }
+    else {
+        length=0;
+    }
+
+    /* set the file header */
+    objHeader.fileHeader.Machine=cpu;
+    objHeader.fileHeader.NumberOfSections=2;
+    objHeader.fileHeader.TimeDateStamp=(DWORD)time(nullptr);
+    objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
+    objHeader.fileHeader.NumberOfSymbols=1;
+
+    /* set the section for the linker options */
+    uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
+    objHeader.sections[0].SizeOfRawData=length;
+    objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
+    objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
+
+    /* set the data section */
+    uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
+    objHeader.sections[1].SizeOfRawData=size;
+    objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
+    objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
+
+    /* set the symbol table */
+    if(entryLength<=8) {
+        uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
+        symbolNames.sizeofLongNames=4;
+    } else {
+        symbols[0].N.Name.Short=0;
+        symbols[0].N.Name.Long=4;
+        symbolNames.sizeofLongNames=4+entryLength+1;
+        uprv_strcpy(symbolNames.longNames, entry);
+    }
+    symbols[0].SectionNumber=2;
+    symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
+
+    /* write the file header and the linker options section */
+    T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
+#else
+#   error "Unknown platform for CAN_GENERATE_OBJECTS."
+#endif
+
+    /* copy the data file into section 2 */
+    for(;;) {
+        length=T_FileStream_read(in, buffer, sizeof(buffer));
+        if(length==0) {
+            break;
+        }
+        T_FileStream_write(out, buffer, (int32_t)length);
+    }
+
+#if U_PLATFORM_HAS_WIN32_API
+    /* write the symbol table */
+    T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
+    T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
+#endif
+
+    if(T_FileStream_error(in)) {
+        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    if(T_FileStream_error(out)) {
+        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
+        exit(U_FILE_ACCESS_ERROR);
+    }
+
+    T_FileStream_close(out);
+    T_FileStream_close(in);
+}
+#endif
diff --git a/intl/icu/source/tools/toolutil/pkg_genc.h b/intl/icu/source/tools/toolutil/pkg_genc.h
new file mode 100644
index 0000000000..2dd1b45cde
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/pkg_genc.h
@@ -0,0 +1,107 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/******************************************************************************
+ *   Copyright (C) 2008-2011, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *******************************************************************************
+ */
+
+#ifndef __PKG_GENC_H__
+#define __PKG_GENC_H__
+
+#include "unicode/utypes.h"
+#include "toolutil.h"
+
+#include "unicode/putil.h"
+#include "putilimp.h"
+
+/*** Platform #defines move here ***/
+#if U_PLATFORM_HAS_WIN32_API
+#ifdef __GNUC__
+#define WINDOWS_WITH_GNUC
+#else
+#define WINDOWS_WITH_MSVC
+#endif
+#endif
+
+
+#if !defined(WINDOWS_WITH_MSVC)
+#define BUILD_DATA_WITHOUT_ASSEMBLY
+#endif
+
+#ifndef U_DISABLE_OBJ_CODE /* testing */
+#if defined(WINDOWS_WITH_MSVC) || U_PLATFORM_IS_LINUX_BASED
+#define CAN_WRITE_OBJ_CODE
+#endif
+#if U_PLATFORM_HAS_WIN32_API || defined(U_ELF)
+#define CAN_GENERATE_OBJECTS
+#endif
+#endif
+
+#if U_PLATFORM == U_PF_CYGWIN || defined(CYGWINMSVC)
+#define USING_CYGWIN
+#endif
+
+/*
+ * When building the data library without assembly,
+ * some platforms use a single c code file for all of
+ * the data to generate the final data library. This can
+ * increase the performance of the pkdata tool.
+ */
+#if U_PLATFORM == U_PF_OS400
+#define USE_SINGLE_CCODE_FILE
+#endif
+
+/* Need to fix the file seperator character when using MinGW. */
+#if defined(WINDOWS_WITH_GNUC) || defined(USING_CYGWIN)
+#define PKGDATA_FILE_SEP_STRING "/"
+#else
+#define PKGDATA_FILE_SEP_STRING U_FILE_SEP_STRING
+#endif
+
+#define LARGE_BUFFER_MAX_SIZE 2048
+#define SMALL_BUFFER_MAX_SIZE 512
+#define SMALL_BUFFER_FLAG_NAMES 32
+#define BUFFER_PADDING_SIZE 20
+
+/** End platform defines **/
+
+
+
+U_CAPI void U_EXPORT2
+printAssemblyHeadersToStdErr(void);
+
+U_CAPI UBool U_EXPORT2
+checkAssemblyHeaderName(const char* optAssembly);
+
+U_CAPI void U_EXPORT2
+writeCCode(
+    const char *filename,
+    const char *destdir,
+    const char *optEntryPoint,
+    const char *optName,
+    const char *optFilename,
+    char *outFilePath,
+    size_t outFilePathCapacity);
+
+U_CAPI void U_EXPORT2
+writeAssemblyCode(
+    const char *filename,
+    const char *destdir,
+    const char *optEntryPoint,
+    const char *optFilename,
+    char *outFilePath,
+    size_t outFilePathCapacity);
+
+U_CAPI void U_EXPORT2
+writeObjectCode(
+    const char *filename,
+    const char *destdir,
+    const char *optEntryPoint,
+    const char *optMatchArch,
+    const char *optFilename,
+    char *outFilePath,
+    size_t outFilePathCapacity,
+    UBool optWinDllExport);
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/pkg_gencmn.cpp b/intl/icu/source/tools/toolutil/pkg_gencmn.cpp
new file mode 100644
index 0000000000..a301c322eb
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/pkg_gencmn.cpp
@@ -0,0 +1,578 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/******************************************************************************
+ *   Copyright (C) 2008-2012, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *******************************************************************************
+ */
+#include "unicode/utypes.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "filestrm.h"
+#include "toolutil.h"
+#include "unicode/uclean.h"
+#include "unewdata.h"
+#include "putilimp.h"
+#include "pkg_gencmn.h"
+
+#define STRING_STORE_SIZE 200000
+
+#define COMMON_DATA_NAME U_ICUDATA_NAME
+#define DATA_TYPE "dat"
+
+/* ICU package data file format (.dat files) ------------------------------- ***
+
+Description of the data format after the usual ICU data file header
+(UDataInfo etc.).
+
+Format version 1
+
+A .dat package file contains a simple Table of Contents of item names,
+followed by the items themselves:
+
+1. ToC table
+
+uint32_t count; - number of items
+UDataOffsetTOCEntry entry[count]; - pair of uint32_t values per item:
+    uint32_t nameOffset; - offset of the item name
+    uint32_t dataOffset; - offset of the item data
+both are byte offsets from the beginning of the data
+
+2. item name strings
+
+All item names are stored as char * strings in one block between the ToC table
+and the data items.
+
+3. data items
+
+The data items are stored following the item names block.
+Each data item is 16-aligned.
+The data items are stored in the sorted order of their names.
+
+Therefore, the top of the name strings block is the offset of the first item,
+the length of the last item is the difference between its offset and
+the .dat file length, and the length of all previous items is the difference
+between its offset and the next one.
+
+----------------------------------------------------------------------------- */
+
+/* UDataInfo cf. udata.h */
+static const UDataInfo dataInfo={
+    sizeof(UDataInfo),
+    0,
+
+    U_IS_BIG_ENDIAN,
+    U_CHARSET_FAMILY,
+    sizeof(char16_t),
+    0,
+
+    {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
+    {1, 0, 0, 0},                 /* formatVersion */
+    {3, 0, 0, 0}                  /* dataVersion */
+};
+
+static uint32_t maxSize;
+
+static char stringStore[STRING_STORE_SIZE];
+static uint32_t stringTop=0, basenameTotal=0;
+
+typedef struct {
+    char *pathname, *basename;
+    uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
+} File;
+
+#define CHUNK_FILE_COUNT 256
+static File *files = nullptr;
+static uint32_t fileCount=0;
+static uint32_t fileMax = 0;
+
+
+static char *symPrefix = nullptr;
+
+#define LINE_BUFFER_SIZE 512
+/* prototypes --------------------------------------------------------------- */
+
+static void
+addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose);
+
+static char *
+allocString(uint32_t length);
+
+U_CDECL_BEGIN
+static int
+compareFiles(const void *file1, const void *file2);
+U_CDECL_END
+
+static char *
+pathToFullPath(const char *path, const char *source);
+
+/* map non-tree separator (such as '\') to tree separator ('/') inplace. */
+static void
+fixDirToTreePath(char *s);
+/* -------------------------------------------------------------------------- */
+
+U_CAPI void U_EXPORT2
+createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
+                     const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName) {
+    static char buffer[4096];
+    char *line;
+    char *linePtr;
+    char *s = nullptr;
+    UErrorCode errorCode=U_ZERO_ERROR;
+    uint32_t i, fileOffset, basenameOffset, length, nread;
+    FileStream *in, *file;
+
+    line = (char *)uprv_malloc(sizeof(char) * LINE_BUFFER_SIZE);
+    if (line == nullptr) {
+        fprintf(stderr, "gencmn: unable to allocate memory for line buffer of size %d\n", LINE_BUFFER_SIZE);
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+
+    linePtr = line;
+
+    maxSize = max_size;
+
+    if (destDir == nullptr) {
+        destDir = u_getDataDirectory();
+    }
+    if (name == nullptr) {
+        name = COMMON_DATA_NAME;
+    }
+    if (type == nullptr) {
+        type = DATA_TYPE;
+    }
+    if (source == nullptr) {
+        source = ".";
+    }
+
+    if (dataFile == nullptr) {
+        in = T_FileStream_stdin();
+    } else {
+        in = T_FileStream_open(dataFile, "r");
+        if(in == nullptr) {
+            fprintf(stderr, "gencmn: unable to open input file %s\n", dataFile);
+            exit(U_FILE_ACCESS_ERROR);
+        }
+    }
+
+    if (verbose) {
+        if(sourceTOC) {
+            printf("generating %s_%s.c (table of contents source file)\n", name, type);
+        } else {
+            printf("generating %s.%s (common data file with table of contents)\n", name, type);
+        }
+    }
+
+    /* read the list of files and get their lengths */
+    while((s != nullptr && *s != 0) || (s=T_FileStream_readLine(in, (line=linePtr),
+                                                             LINE_BUFFER_SIZE))!=nullptr) {
+        /* remove trailing newline characters and parse space separated items */
+        if (s != nullptr && *s != 0) {
+            line=s;
+        } else {
+            s=line;
+        }
+        while(*s!=0) {
+            if(*s==' ') {
+                *s=0;
+                ++s;
+                break;
+            } else if(*s=='\r' || *s=='\n') {
+                *s=0;
+                break;
+            }
+            ++s;
+        }
+
+        /* check for comment */
+
+        if (*line == '#') {
+            continue;
+        }
+
+        /* add the file */
+#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
+        {
+          char *t;
+          while((t = uprv_strchr(line,U_FILE_ALT_SEP_CHAR))) {
+            *t = U_FILE_SEP_CHAR;
+          }
+        }
+#endif
+        addFile(getLongPathname(line), name, source, sourceTOC, verbose);
+    }
+
+    uprv_free(linePtr);
+
+    if(in!=T_FileStream_stdin()) {
+        T_FileStream_close(in);
+    }
+
+    if(fileCount==0) {
+        fprintf(stderr, "gencmn: no files listed in %s\n", dataFile == nullptr ? "<stdin>" : dataFile);
+        return;
+    }
+
+    /* sort the files by basename */
+    qsort(files, fileCount, sizeof(File), compareFiles);
+
+    if(!sourceTOC) {
+        UNewDataMemory *out;
+
+        /* determine the offsets of all basenames and files in this common one */
+        basenameOffset=4+8*fileCount;
+        fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
+        for(i=0; i<fileCount; ++i) {
+            files[i].fileOffset=fileOffset;
+            fileOffset+=(files[i].fileSize+15)&~0xf;
+            files[i].basenameOffset=basenameOffset;
+            basenameOffset+=files[i].basenameLength;
+        }
+
+        /* create the output file */
+        out=udata_create(destDir, type, name,
+                         &dataInfo,
+                         copyRight == nullptr ? U_COPYRIGHT_STRING : copyRight,
+                         &errorCode);
+        if(U_FAILURE(errorCode)) {
+            fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
+                destDir, name, type,
+                u_errorName(errorCode));
+            exit(errorCode);
+        }
+
+        /* write the table of contents */
+        udata_write32(out, fileCount);
+        for(i=0; i<fileCount; ++i) {
+            udata_write32(out, files[i].basenameOffset);
+            udata_write32(out, files[i].fileOffset);
+        }
+
+        /* write the basenames */
+        for(i=0; i<fileCount; ++i) {
+            udata_writeString(out, files[i].basename, files[i].basenameLength);
+        }
+        length=4+8*fileCount+basenameTotal;
+
+        /* copy the files */
+        for(i=0; i<fileCount; ++i) {
+            /* pad to 16-align the next file */
+            length&=0xf;
+            if(length!=0) {
+                udata_writePadding(out, 16-length);
+            }
+
+            if (verbose) {
+                printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
+            }
+
+            /* copy the next file */
+            file=T_FileStream_open(files[i].pathname, "rb");
+            if(file==nullptr) {
+                fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
+                exit(U_FILE_ACCESS_ERROR);
+            }
+            for(nread = 0;;) {
+                length=T_FileStream_read(file, buffer, sizeof(buffer));
+                if(length <= 0) {
+                    break;
+                }
+                nread += length;
+                udata_writeBlock(out, buffer, length);
+            }
+            T_FileStream_close(file);
+            length=files[i].fileSize;
+
+            if (nread != files[i].fileSize) {
+              fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname,  (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
+                exit(U_FILE_ACCESS_ERROR);
+            }
+        }
+
+        /* pad to 16-align the last file (cleaner, avoids growing .dat files in icuswap) */
+        length&=0xf;
+        if(length!=0) {
+            udata_writePadding(out, 16-length);
+        }
+
+        /* finish */
+        udata_finish(out, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
+            exit(errorCode);
+        }
+    } else {
+        /* write a .c source file with the table of contents */
+        char *filename;
+        FileStream *out;
+
+        /* create the output filename */
+        filename=s=buffer;
+        uprv_strcpy(filename, destDir);
+        s=filename+uprv_strlen(filename);
+        if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
+            *s++=U_FILE_SEP_CHAR;
+        }
+        uprv_strcpy(s, name);
+        if(*(type)!=0) {
+            s+=uprv_strlen(s);
+            *s++='_';
+            uprv_strcpy(s, type);
+        }
+        s+=uprv_strlen(s);
+        uprv_strcpy(s, ".c");
+
+        /* open the output file */
+        out=T_FileStream_open(filename, "w");
+        if (gencmnFileName != nullptr) {
+            uprv_strcpy(gencmnFileName, filename);
+        }
+        if(out==nullptr) {
+            fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
+            exit(U_FILE_ACCESS_ERROR);
+        }
+
+        /* write the source file */
+        snprintf(buffer, sizeof(buffer),
+            "/*\n"
+            " * ICU common data table of contents for %s.%s\n"
+            " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
+            " */\n\n"
+            "#include \"unicode/utypes.h\"\n"
+            "#include \"unicode/udata.h\"\n"
+            "\n"
+            "/* external symbol declarations for data (%d files) */\n",
+                name, type, fileCount);
+        T_FileStream_writeLine(out, buffer);
+
+        snprintf(buffer, sizeof(buffer), "extern const char\n    %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
+        T_FileStream_writeLine(out, buffer);
+        for(i=1; i<fileCount; ++i) {
+            snprintf(buffer, sizeof(buffer), ",\n    %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
+            T_FileStream_writeLine(out, buffer);
+        }
+        T_FileStream_writeLine(out, ";\n\n");
+
+        snprintf(
+            buffer, sizeof(buffer),
+            "U_EXPORT struct {\n"
+            "    uint16_t headerSize;\n"
+            "    uint8_t magic1, magic2;\n"
+            "    UDataInfo info;\n"
+            "    char padding[%lu];\n"
+            "    uint32_t count, reserved;\n"
+            "    struct {\n"
+            "        const char *name;\n"
+            "        const void *data;\n"
+            "    } toc[%lu];\n"
+            "} U_EXPORT2 %s_dat = {\n"
+            "    32, 0xda, 0x27, {\n"
+            "        %lu, 0,\n"
+            "        %u, %u, %u, 0,\n"
+            "        {0x54, 0x6f, 0x43, 0x50},\n"
+            "        {1, 0, 0, 0},\n"
+            "        {0, 0, 0, 0}\n"
+            "    },\n"
+            "    \"\", %lu, 0, {\n",
+            static_cast<unsigned long>(32-4-sizeof(UDataInfo)),
+            static_cast<unsigned long>(fileCount),
+            entrypointName,
+            static_cast<unsigned long>(sizeof(UDataInfo)),
+            U_IS_BIG_ENDIAN,
+            U_CHARSET_FAMILY,
+            U_SIZEOF_UCHAR,
+            static_cast<unsigned long>(fileCount)
+        );
+        T_FileStream_writeLine(out, buffer);
+
+        snprintf(buffer, sizeof(buffer), "        { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
+        T_FileStream_writeLine(out, buffer);
+        for(i=1; i<fileCount; ++i) {
+            snprintf(buffer, sizeof(buffer), ",\n        { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
+            T_FileStream_writeLine(out, buffer);
+        }
+
+        T_FileStream_writeLine(out, "\n    }\n};\n");
+        T_FileStream_close(out);
+
+        uprv_free(symPrefix);
+    }
+}
+
+static void
+addFile(const char *filename, const char *name, const char *source, UBool sourceTOC, UBool verbose) {
+    char *s;
+    uint32_t length;
+    char *fullPath = nullptr;
+
+    if(fileCount==fileMax) {
+      fileMax += CHUNK_FILE_COUNT;
+      files = (File *)uprv_realloc(files, fileMax*sizeof(files[0])); /* note: never freed. */
+      if(files==nullptr) {
+        fprintf(stderr, "pkgdata/gencmn: Could not allocate %u bytes for %d files\n", (unsigned int)(fileMax*sizeof(files[0])), fileCount);
+        exit(U_MEMORY_ALLOCATION_ERROR);
+      }
+    }
+
+    if(!sourceTOC) {
+        FileStream *file;
+
+        if(uprv_pathIsAbsolute(filename)) {
+            fprintf(stderr, "gencmn: Error: absolute path encountered. Old style paths are not supported. Use relative paths such as 'fur.res' or 'translit%cfur.res'.\n\tBad path: '%s'\n", U_FILE_SEP_CHAR, filename);
+            exit(U_ILLEGAL_ARGUMENT_ERROR);
+        }
+        fullPath = pathToFullPath(filename, source);
+        /* store the pathname */
+        length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
+        s=allocString(length);
+        uprv_strcpy(s, name);
+        uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
+        uprv_strcat(s, filename);
+
+        /* get the basename */
+        fixDirToTreePath(s);
+        files[fileCount].basename=s;
+        files[fileCount].basenameLength=length;
+
+        files[fileCount].pathname=fullPath;
+
+        basenameTotal+=length;
+
+        /* try to open the file */
+        file=T_FileStream_open(fullPath, "rb");
+        if(file==nullptr) {
+            fprintf(stderr, "gencmn: unable to open listed file %s\n", fullPath);
+            exit(U_FILE_ACCESS_ERROR);
+        }
+
+        /* get the file length */
+        length=T_FileStream_size(file);
+        if(T_FileStream_error(file) || length<=20) {
+            fprintf(stderr, "gencmn: unable to get length of listed file %s\n", fullPath);
+            exit(U_FILE_ACCESS_ERROR);
+        }
+
+        T_FileStream_close(file);
+
+        /* do not add files that are longer than maxSize */
+        if(maxSize && length>maxSize) {
+            if (verbose) {
+                printf("%s ignored (size %ld > %ld)\n", fullPath, (long)length, (long)maxSize);
+            }
+            return;
+        }
+        files[fileCount].fileSize=length;
+    } else {
+        char *t;
+        /* get and store the basename */
+        /* need to include the package name */
+        length = (uint32_t)(uprv_strlen(filename) + 1 + uprv_strlen(name) + 1);
+        s=allocString(length);
+        uprv_strcpy(s, name);
+        uprv_strcat(s, U_TREE_ENTRY_SEP_STRING);
+        uprv_strcat(s, filename);
+        fixDirToTreePath(s);
+        files[fileCount].basename=s;
+        /* turn the basename into an entry point name and store in the pathname field */
+        t=files[fileCount].pathname=allocString(length);
+        while(--length>0) {
+            if(*s=='.' || *s=='-' || *s=='/') {
+                *t='_';
+            } else {
+                *t=*s;
+            }
+            ++s;
+            ++t;
+        }
+        *t=0;
+    }
+    ++fileCount;
+}
+
+static char *
+allocString(uint32_t length) {
+    uint32_t top=stringTop+length;
+    char *p;
+
+    if(top>STRING_STORE_SIZE) {
+        fprintf(stderr, "gencmn: out of memory\n");
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+    p=stringStore+stringTop;
+    stringTop=top;
+    return p;
+}
+
+static char *
+pathToFullPath(const char *path, const char *source) {
+    int32_t length;
+    int32_t newLength;
+    char *fullPath;
+    int32_t n;
+
+    length = (uint32_t)(uprv_strlen(path) + 1);
+    newLength = (length + 1 + (int32_t)uprv_strlen(source));
+    fullPath = (char *)uprv_malloc(newLength);
+    if(source != nullptr) {
+        uprv_strcpy(fullPath, source);
+        uprv_strcat(fullPath, U_FILE_SEP_STRING);
+    } else {
+        fullPath[0] = 0;
+    }
+    n = (int32_t)uprv_strlen(fullPath);
+    fullPath[n] = 0;       /* Suppress compiler warning for unused variable n    */
+                           /*  when conditional code below is not compiled.      */
+    uprv_strcat(fullPath, path);
+
+#if (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
+#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR)
+    /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
+    for(;fullPath[n];n++) {
+        if(fullPath[n] == U_FILE_ALT_SEP_CHAR) {
+            fullPath[n] = U_FILE_SEP_CHAR;
+        }
+    }
+#endif
+#endif
+#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
+    /* replace tree separator (such as '/') with file sep char (such as ':' or '\\') */
+    for(;fullPath[n];n++) {
+        if(fullPath[n] == U_TREE_ENTRY_SEP_CHAR) {
+            fullPath[n] = U_FILE_SEP_CHAR;
+        }
+    }
+#endif
+    return fullPath;
+}
+
+U_CDECL_BEGIN
+static int
+compareFiles(const void *file1, const void *file2) {
+    /* sort by basename */
+    return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
+}
+U_CDECL_END
+
+static void
+fixDirToTreePath(char *s)
+{
+    (void)s;
+#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR) || ((U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR))
+    char *t;
+#endif
+#if (U_FILE_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
+    for(t=s;t=uprv_strchr(t,U_FILE_SEP_CHAR);) {
+        *t = U_TREE_ENTRY_SEP_CHAR;
+    }
+#endif
+#if (U_FILE_ALT_SEP_CHAR != U_FILE_SEP_CHAR) && (U_FILE_ALT_SEP_CHAR != U_TREE_ENTRY_SEP_CHAR)
+    for(t=s;t=uprv_strchr(t,U_FILE_ALT_SEP_CHAR);) {
+        *t = U_TREE_ENTRY_SEP_CHAR;
+    }
+#endif
+}
diff --git a/intl/icu/source/tools/toolutil/pkg_gencmn.h b/intl/icu/source/tools/toolutil/pkg_gencmn.h
new file mode 100644
index 0000000000..238239960a
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/pkg_gencmn.h
@@ -0,0 +1,18 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/******************************************************************************
+ *   Copyright (C) 2008, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *******************************************************************************
+ */
+
+#ifndef __PKG_GENCMN_H__
+#define __PKG_GENCMN_H__
+
+#include "unicode/utypes.h"
+
+U_CAPI void U_EXPORT2
+createCommonDataFile(const char *destDir, const char *name, const char *entrypointName, const char *type, const char *source, const char *copyRight,
+                     const char *dataFile, uint32_t max_size, UBool sourceTOC, UBool verbose, char *gencmnFileName);
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/pkg_icu.cpp b/intl/icu/source/tools/toolutil/pkg_icu.cpp
new file mode 100644
index 0000000000..d9c6717ecd
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/pkg_icu.cpp
@@ -0,0 +1,176 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/******************************************************************************
+ *   Copyright (C) 2008-2015, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *******************************************************************************
+ */
+#include "unicode/utypes.h"
+#include "unicode/localpointer.h"
+#include "unicode/putil.h"
+#include "cstring.h"
+#include "toolutil.h"
+#include "uoptions.h"
+#include "uparse.h"
+#include "package.h"
+#include "pkg_icu.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+// read a file list -------------------------------------------------------- ***
+
+U_NAMESPACE_USE
+
+static const struct {
+    const char *suffix;
+    int32_t length;
+} listFileSuffixes[]={
+    { ".txt", 4 },
+    { ".lst", 4 },
+    { ".tmp", 4 }
+};
+
+/* check for multiple text file suffixes to see if this list name is a text file name */
+static UBool
+isListTextFile(const char *listname) {
+    const char *listNameEnd=strchr(listname, 0);
+    const char *suffix;
+    int32_t i, length;
+    for(i=0; i<UPRV_LENGTHOF(listFileSuffixes); ++i) {
+        suffix=listFileSuffixes[i].suffix;
+        length=listFileSuffixes[i].length;
+        if((listNameEnd-listname)>length && 0==memcmp(listNameEnd-length, suffix, length)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+/*
+ * Read a file list.
+ * If the listname ends with ".txt", then read the list file
+ * (in the system/ invariant charset).
+ * If the listname ends with ".dat", then read the ICU .dat package file.
+ * Otherwise, read the file itself as a single-item list.
+ */
+U_CAPI Package * U_EXPORT2
+readList(const char *filesPath, const char *listname, UBool readContents, Package *listPkgIn) {
+    Package *listPkg = listPkgIn;
+    FILE *file;
+    const char *listNameEnd;
+
+    if(listname==nullptr || listname[0]==0) {
+        fprintf(stderr, "missing list file\n");
+        return nullptr;
+    }
+
+    if (listPkg == nullptr) {
+        listPkg=new Package();
+        if(listPkg==nullptr) {
+            fprintf(stderr, "icupkg: not enough memory\n");
+            exit(U_MEMORY_ALLOCATION_ERROR);
+        }
+    }
+
+    listNameEnd=strchr(listname, 0);
+    if(isListTextFile(listname)) {
+        // read the list file
+        char line[1024];
+        char *end;
+        const char *start;
+
+        file=fopen(listname, "r");
+        if(file==nullptr) {
+            fprintf(stderr, "icupkg: unable to open list file \"%s\"\n", listname);
+            delete listPkg;
+            exit(U_FILE_ACCESS_ERROR);
+        }
+
+        while(fgets(line, sizeof(line), file)) {
+            // remove comments
+            end=strchr(line, '#');
+            if(end!=nullptr) {
+                *end=0;
+            } else {
+                // remove trailing CR LF
+                end=strchr(line, 0);
+                while(line<end && (*(end-1)=='\r' || *(end-1)=='\n')) {
+                    *--end=0;
+                }
+            }
+
+            // check first non-whitespace character and
+            // skip empty lines and
+            // skip lines starting with reserved characters
+            start=u_skipWhitespace(line);
+            if(*start==0 || nullptr!=strchr(U_PKG_RESERVED_CHARS, *start)) {
+                continue;
+            }
+
+            // take whitespace-separated items from the line
+            for(;;) {
+                // find whitespace after the item or the end of the line
+                for(end=(char *)start; *end!=0 && *end!=' ' && *end!='\t'; ++end) {}
+                if(*end==0) {
+                    // this item is the last one on the line
+                    end=nullptr;
+                } else {
+                    // the item is terminated by whitespace, terminate it with NUL
+                    *end=0;
+                }
+                if(readContents) {
+                    listPkg->addFile(filesPath, start);
+                } else {
+                    listPkg->addItem(start);
+                }
+
+                // find the start of the next item or exit the loop
+                if(end==nullptr || *(start=u_skipWhitespace(end+1))==0) {
+                    break;
+                }
+            }
+        }
+        fclose(file);
+    } else if((listNameEnd-listname)>4 && 0==memcmp(listNameEnd-4, ".dat", 4)) {
+        // read the ICU .dat package
+        // Accept a .dat file whose name differs from the ToC prefixes.
+        listPkg->setAutoPrefix();
+        listPkg->readPackage(listname);
+    } else {
+        // list the single file itself
+        if(readContents) {
+            listPkg->addFile(filesPath, listname);
+        } else {
+            listPkg->addItem(listname);
+        }
+    }
+
+    return listPkg;
+}
+
+U_CAPI int U_EXPORT2
+writePackageDatFile(const char *outFilename, const char *outComment, const char *sourcePath, const char *addList, Package *pkg, char outType) {
+    LocalPointer<Package> ownedPkg;
+    LocalPointer<Package> addListPkg;
+
+    if (pkg == nullptr) {
+        ownedPkg.adoptInstead(new Package);
+        if(ownedPkg.isNull()) {
+            fprintf(stderr, "icupkg: not enough memory\n");
+            return U_MEMORY_ALLOCATION_ERROR;
+        }
+        pkg = ownedPkg.getAlias();
+
+        addListPkg.adoptInstead(readList(sourcePath, addList, true, nullptr));
+        if(addListPkg.isValid()) {
+            pkg->addItems(*addListPkg);
+        } else {
+            return U_ILLEGAL_ARGUMENT_ERROR;
+        }
+    }
+
+    pkg->writePackage(outFilename, outType, outComment);
+    return 0;
+}
diff --git a/intl/icu/source/tools/toolutil/pkg_icu.h b/intl/icu/source/tools/toolutil/pkg_icu.h
new file mode 100644
index 0000000000..638056e60b
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/pkg_icu.h
@@ -0,0 +1,25 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/******************************************************************************
+ *   Copyright (C) 2008-2016, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *******************************************************************************
+ */
+
+#ifndef __PKG_ICU_H__
+#define __PKG_ICU_H__
+
+#include "unicode/utypes.h"
+#include "package.h"
+
+#define U_PKG_RESERVED_CHARS "\"%&'()*+,-./:;<=>?_"
+
+U_CAPI int U_EXPORT2
+writePackageDatFile(const char *outFilename, const char *outComment,
+                    const char *sourcePath, const char *addList, icu::Package *pkg,
+                    char outType);
+
+U_CAPI icu::Package * U_EXPORT2
+readList(const char *filesPath, const char *listname, UBool readContents, icu::Package *listPkgIn);
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/pkg_imp.h b/intl/icu/source/tools/toolutil/pkg_imp.h
new file mode 100644
index 0000000000..29abd8d83c
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/pkg_imp.h
@@ -0,0 +1,38 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2005-2016, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  pkg_imp.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2005sep18
+*   created by: Markus W. Scherer
+*
+*   Implementation definitions for data package functions in toolutil.
+*/
+
+#ifndef __PKG_IMP_H__
+#define __PKG_IMP_H__
+
+#include "unicode/utypes.h"
+#include "unicode/udata.h"
+
+/*
+ * Read an ICU data item with any platform type,
+ * return the pointer to the UDataInfo in its header,
+ * and set the lengths of the UDataInfo and of the whole header.
+ * All data remains in its platform type.
+ */
+U_CFUNC const UDataInfo *
+getDataInfo(const uint8_t *data, int32_t length,
+            int32_t &infoLength, int32_t &headerLength,
+            UErrorCode *pErrorCode);
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/pkgitems.cpp b/intl/icu/source/tools/toolutil/pkgitems.cpp
new file mode 100644
index 0000000000..e49775d56d
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/pkgitems.cpp
@@ -0,0 +1,645 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2003-2015, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  pkgitems.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2005sep18
+*   created by: Markus W. Scherer
+*
+*   Companion file to package.cpp. Deals with details of ICU data item formats.
+*   Used for item dependencies.
+*   Contains adapted code from ucnv_bld.c (swapper code from 2003).
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/ures.h"
+#include "unicode/putil.h"
+#include "unicode/udata.h"
+#include "cstring.h"
+#include "uinvchar.h"
+#include "ucmndata.h"
+#include "udataswp.h"
+#include "swapimpl.h"
+#include "toolutil.h"
+#include "package.h"
+#include "pkg_imp.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* item formats in common */
+
+#include "uresdata.h"
+#include "ucnv_bld.h"
+#include "ucnv_io.h"
+
+// general definitions ----------------------------------------------------- ***
+
+U_CDECL_BEGIN
+
+static void U_CALLCONV
+printError(void *context, const char *fmt, va_list args) {
+    vfprintf((FILE *)context, fmt, args);
+}
+
+U_CDECL_END
+
+// a data item in native-platform form ------------------------------------- ***
+
+U_NAMESPACE_BEGIN
+
+class NativeItem {
+public:
+    NativeItem() : pItem(nullptr), pInfo(nullptr), bytes(nullptr), swapped(nullptr), length(0) {}
+    NativeItem(const Item *item, UDataSwapFn *swap) : swapped(nullptr) {
+        setItem(item, swap);
+    }
+    ~NativeItem() {
+        delete [] swapped;
+    }
+    const UDataInfo *getDataInfo() const {
+        return pInfo;
+    }
+    const uint8_t *getBytes() const {
+        return bytes;
+    }
+    int32_t getLength() const {
+        return length;
+    }
+
+    void setItem(const Item *item, UDataSwapFn *swap) {
+        pItem=item;
+        int32_t infoLength, itemHeaderLength;
+        UErrorCode errorCode=U_ZERO_ERROR;
+        pInfo=::getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode);
+        if(U_FAILURE(errorCode)) {
+            exit(errorCode); // should succeed because readFile() checks headers
+        }
+        length=pItem->length-itemHeaderLength;
+
+        if(pInfo->isBigEndian==U_IS_BIG_ENDIAN && pInfo->charsetFamily==U_CHARSET_FAMILY) {
+            bytes=pItem->data+itemHeaderLength;
+        } else {
+            UDataSwapper *ds=udata_openSwapper((UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
+            if(U_FAILURE(errorCode)) {
+                fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
+                        pItem->name, u_errorName(errorCode));
+                exit(errorCode);
+            }
+
+            ds->printError=printError;
+            ds->printErrorContext=stderr;
+
+            swapped=new uint8_t[pItem->length];
+            if(swapped==nullptr) {
+                fprintf(stderr, "icupkg: unable to allocate memory for swapping \"%s\"\n", pItem->name);
+                exit(U_MEMORY_ALLOCATION_ERROR);
+            }
+            swap(ds, pItem->data, pItem->length, swapped, &errorCode);
+            pInfo=::getDataInfo(swapped, pItem->length, infoLength, itemHeaderLength, &errorCode);
+            bytes=swapped+itemHeaderLength;
+            udata_closeSwapper(ds);
+        }
+    }
+
+private:
+    const Item *pItem;
+    const UDataInfo *pInfo;
+    const uint8_t *bytes;
+    uint8_t *swapped;
+    int32_t length;
+};
+
+// check a dependency ------------------------------------------------------ ***
+
+/*
+ * assemble the target item name from the source item name, an ID
+ * and a suffix
+ */
+static void 
+makeTargetName(const char *itemName, const char *id, int32_t idLength, const char *suffix,
+               char *target, int32_t capacity,
+               UErrorCode *pErrorCode) {
+    const char *itemID;
+    int32_t treeLength, suffixLength, targetLength;
+
+    // get the item basename
+    itemID=strrchr(itemName, '/');
+    if(itemID!=nullptr) {
+        ++itemID;
+    } else {
+        itemID=itemName;
+    }
+
+    // build the target string
+    treeLength=(int32_t)(itemID-itemName);
+    if(idLength<0) {
+        idLength=(int32_t)strlen(id);
+    }
+    suffixLength=(int32_t)strlen(suffix);
+    targetLength=treeLength+idLength+suffixLength;
+    if(targetLength>=capacity) {
+        fprintf(stderr, "icupkg/makeTargetName(%s) target item name length %ld too long\n",
+                        itemName, (long)targetLength);
+        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+        return;
+    }
+
+    memcpy(target, itemName, treeLength);
+    memcpy(target+treeLength, id, idLength);
+    memcpy(target+treeLength+idLength, suffix, suffixLength+1); // +1 includes the terminating NUL
+}
+
+static void 
+checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char *suffix,
+              CheckDependency check, void *context,
+              UErrorCode *pErrorCode) {
+    char target[200];
+    makeTargetName(itemName, id, idLength, suffix, target, (int32_t)sizeof(target), pErrorCode);
+    if(U_SUCCESS(*pErrorCode)) {
+        check(context, itemName, target);
+    }
+}
+
+/* assemble the target item name from the item's parent item name */
+static void 
+checkParent(const char *itemName, CheckDependency check, void *context,
+            UErrorCode *pErrorCode) {
+    const char *itemID, *parent, *parentLimit, *suffix;
+    int32_t parentLength;
+
+    // get the item basename
+    itemID=strrchr(itemName, '/');
+    if(itemID!=nullptr) {
+        ++itemID;
+    } else {
+        itemID=itemName;
+    }
+
+    // get the item suffix
+    suffix=strrchr(itemID, '.');
+    if(suffix==nullptr) {
+        // empty suffix, point to the end of the string
+        suffix=strrchr(itemID, 0);
+    }
+
+    // get the position of the last '_'
+    for(parentLimit=suffix; parentLimit>itemID && *--parentLimit!='_';) {}
+
+    if(parentLimit!=itemID) {
+        // get the parent item name by truncating the last part of this item's name */
+        parent=itemID;
+        parentLength=(int32_t)(parentLimit-itemID);
+    } else {
+        // no '_' in the item name: the parent is the root bundle
+        parent="root";
+        parentLength=4;
+        if((suffix-itemID)==parentLength && 0==memcmp(itemID, parent, parentLength)) {
+            // the item itself is "root", which does not depend on a parent
+            return;
+        }
+    }
+    checkIDSuffix(itemName, parent, parentLength, suffix, check, context, pErrorCode);
+}
+
+// get dependencies from resource bundles ---------------------------------- ***
+
+static const char16_t SLASH=0x2f;
+
+/*
+ * Check for the alias from the string or alias resource res.
+ */
+static void
+checkAlias(const char *itemName,
+           Resource res, const char16_t *alias, int32_t length, UBool useResSuffix,
+           CheckDependency check, void *context, UErrorCode *pErrorCode) {
+    int32_t i;
+
+    if(!uprv_isInvariantUString(alias, length)) {
+        fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-invariant characters\n",
+                        itemName, res);
+        *pErrorCode=U_INVALID_CHAR_FOUND;
+        return;
+    }
+
+    // extract the locale ID from alias strings like
+    // locale_ID/key1/key2/key3
+    // locale_ID
+
+    // search for the first slash
+    for(i=0; i<length && alias[i]!=SLASH; ++i) {}
+
+    if(res_getPublicType(res)==URES_ALIAS) {
+        // ignore aliases with an initial slash:
+        // /ICUDATA/... and /pkgname/... go to a different package
+        // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle
+        if(i==0) {
+            return; // initial slash ('/')
+        }
+
+        // ignore the intra-bundle path starting from the first slash ('/')
+        length=i;
+    } else /* URES_STRING */ {
+        // the whole string should only consist of a locale ID
+        if(i!=length) {
+            fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n",
+                            itemName, res);
+            *pErrorCode=U_UNSUPPORTED_ERROR;
+            return;
+        }
+    }
+
+    // convert the Unicode string to char *
+    char localeID[48];
+    if(length>=(int32_t)sizeof(localeID)) {
+        fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n",
+                        itemName, res, (long)length);
+        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+        return;
+    }
+    u_UCharsToChars(alias, localeID, length);
+    localeID[length]=0;
+
+    checkIDSuffix(itemName, localeID, -1, (useResSuffix ? ".res" : ""), check, context, pErrorCode);
+}
+
+/*
+ * Enumerate one resource item and its children and extract dependencies from
+ * aliases.
+ */
+static UBool
+ures_enumDependencies(const char *itemName,
+                      const ResourceData *pResData,
+                      Resource res, const char *inKey, const char *parentKey, int32_t depth,
+                      CheckDependency check, void *context,
+                      Package *pkg,
+                      UErrorCode *pErrorCode) {
+    UBool doCheckParent = true;  // always remains true if depth>1
+    switch(res_getPublicType(res)) {
+    case URES_STRING:
+        if(depth==1 && inKey!=nullptr &&
+                (0==strcmp(inKey, "%%ALIAS") || 0==strcmp(inKey, "%%Parent"))) {
+            // Top-level %%ALIAS string:
+            //   The alias resource bundle will be used instead of this one.
+            // Top-level %%Parent string:
+            //   We use this bundle as well as the explicit parent bundle.
+            // Either way, the truncation parent is ignored.
+            doCheckParent = false;
+            // No tracing: build tool
+            int32_t length;
+            const char16_t *alias=res_getStringNoTrace(pResData, res, &length);
+            checkAlias(itemName, res, alias, length, /*useResSuffix=*/ true,
+                       check, context, pErrorCode);
+            // If there is a %%ALIAS, then there should be nothing else in this resource bundle.
+        } else if(depth==2 && parentKey!=nullptr && 0==strcmp(parentKey, "%%DEPENDENCY")) {
+            // Second-level %%DEPENDENCY string:
+            // Explicit declaration of a dependency of this item on that one.
+            // No tracing: build tool
+            int32_t length;
+            const char16_t *alias=res_getStringNoTrace(pResData, res, &length);
+            checkAlias(itemName, res, alias, length, /*useResSuffix=*/ false,
+                       check, context, pErrorCode);
+        }
+        // we ignore all other strings
+        break;
+    case URES_ALIAS:
+        {
+            int32_t length;
+            const char16_t *alias=res_getAlias(pResData, res, &length);
+            checkAlias(itemName, res, alias, length, true, check, context, pErrorCode);
+        }
+        break;
+    case URES_TABLE:
+        {
+            /* recurse */
+            int32_t count=res_countArrayItems(pResData, res);
+            for(int32_t i=0; i<count; ++i) {
+                const char *itemKey;
+                Resource item=res_getTableItemByIndex(pResData, res, i, &itemKey);
+                // This doCheckParent return value is needed to
+                // propagate the possible false value from depth=1 to depth=0.
+                doCheckParent &= ures_enumDependencies(
+                        itemName, pResData,
+                        item, itemKey,
+                        inKey, depth+1,
+                        check, context,
+                        pkg,
+                        pErrorCode);
+                if(U_FAILURE(*pErrorCode)) {
+                    fprintf(stderr, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%s: %08x) failed\n",
+                                    itemName, res, i, itemKey, item);
+                    break;
+                }
+            }
+        }
+        break;
+    case URES_ARRAY:
+        {
+            /* recurse */
+            int32_t count=res_countArrayItems(pResData, res);
+            for(int32_t i=0; i<count; ++i) {
+                Resource item=res_getArrayItem(pResData, res, i);
+                ures_enumDependencies(
+                        itemName, pResData,
+                        item, nullptr,
+                        inKey, depth+1,
+                        check, context,
+                        pkg,
+                        pErrorCode);
+                if(U_FAILURE(*pErrorCode)) {
+                    fprintf(stderr, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n",
+                                    itemName, res, i, item);
+                    break;
+                }
+            }
+        }
+        break;
+    default:
+        break;
+    }
+    return doCheckParent;
+}
+
+static void
+ures_enumDependencies(const char *itemName, const UDataInfo *pInfo,
+                      const uint8_t *inBytes, int32_t length,
+                      CheckDependency check, void *context,
+                      Package *pkg,
+                      UErrorCode *pErrorCode) {
+    ResourceData resData;
+
+    res_read(&resData, pInfo, inBytes, length, pErrorCode);
+    if(U_FAILURE(*pErrorCode)) {
+        fprintf(stderr, "icupkg: .res format version %02x.%02x not supported, or bundle malformed\n",
+                        pInfo->formatVersion[0], pInfo->formatVersion[1]);
+        exit(U_UNSUPPORTED_ERROR);
+    }
+
+    icu::NativeItem nativePool;
+
+    if(resData.usesPoolBundle) {
+        char poolName[200];
+        makeTargetName(itemName, "pool", 4, ".res", poolName, (int32_t)sizeof(poolName), pErrorCode);
+        if(U_FAILURE(*pErrorCode)) {
+            return;
+        }
+        check(context, itemName, poolName);
+        int32_t index=pkg->findItem(poolName);
+        if(index<0) {
+            // We cannot work with a bundle if its pool resource is missing.
+            // check() already printed a complaint.
+            return;
+        }
+        // TODO: Cache the native version in the Item itself.
+        nativePool.setItem(pkg->getItem(index), ures_swap);
+        const UDataInfo *poolInfo=nativePool.getDataInfo();
+        if(poolInfo->formatVersion[0]<=1) {
+            fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName);
+            return;
+        }
+        const int32_t *poolRoot=(const int32_t *)nativePool.getBytes();
+        const int32_t *poolIndexes=poolRoot+1;
+        int32_t poolIndexLength=poolIndexes[URES_INDEX_LENGTH]&0xff;
+        if(!(poolIndexLength>URES_INDEX_POOL_CHECKSUM &&
+             (poolIndexes[URES_INDEX_ATTRIBUTES]&URES_ATT_IS_POOL_BUNDLE))
+        ) {
+            fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName);
+            return;
+        }
+        if(resData.pRoot[1+URES_INDEX_POOL_CHECKSUM]==poolIndexes[URES_INDEX_POOL_CHECKSUM]) {
+            resData.poolBundleKeys=(const char *)(poolIndexes+poolIndexLength);
+            resData.poolBundleStrings=(const uint16_t *)(poolRoot+poolIndexes[URES_INDEX_KEYS_TOP]);
+        } else {
+            fprintf(stderr, "icupkg: %s has mismatched checksum for %s\n", poolName, itemName);
+            return;
+        }
+    }
+
+    UBool doCheckParent = ures_enumDependencies(
+        itemName, &resData,
+        resData.rootRes, nullptr, nullptr, 0,
+        check, context,
+        pkg,
+        pErrorCode);
+    if(!doCheckParent) {
+        return;
+    }
+
+    /*
+     * if the bundle attributes are present and the nofallback flag is not set,
+     * then add the parent bundle as a dependency
+     */
+    if(pInfo->formatVersion[0]>1 || (pInfo->formatVersion[0]==1 && pInfo->formatVersion[1]>=1)) {
+        if(!resData.noFallback) {
+            /* this bundle participates in locale fallback */
+            checkParent(itemName, check, context, pErrorCode);
+        }
+    }
+}
+
+// get dependencies from conversion tables --------------------------------- ***
+
+#if !UCONFIG_NO_CONVERSION
+/* code adapted from ucnv_swap() */
+static void
+ucnv_enumDependencies(const UDataSwapper *ds,
+                      const char *itemName, const UDataInfo *pInfo,
+                      const uint8_t *inBytes, int32_t length,
+                      CheckDependency check, void *context,
+                      UErrorCode *pErrorCode) {
+    uint32_t staticDataSize;
+
+    const UConverterStaticData *inStaticData;
+
+    const _MBCSHeader *inMBCSHeader;
+    uint8_t outputType;
+
+    /* check format version */
+    if(!(
+        pInfo->formatVersion[0]==6 &&
+        pInfo->formatVersion[1]>=2
+    )) {
+        fprintf(stderr, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n",
+                        pInfo->formatVersion[0], pInfo->formatVersion[1]);
+        exit(U_UNSUPPORTED_ERROR);
+    }
+
+    /* read the initial UConverterStaticData structure after the UDataInfo header */
+    inStaticData=(const UConverterStaticData *)inBytes;
+
+    if( length<(int32_t)sizeof(UConverterStaticData) ||
+        (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize))
+    ) {
+        udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n",
+                            length);
+        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+        return;
+    }
+
+    inBytes+=staticDataSize;
+    length-=(int32_t)staticDataSize;
+
+    /* check for supported conversionType values */
+    if(inStaticData->conversionType==UCNV_MBCS) {
+        /* MBCS data */
+        uint32_t mbcsHeaderLength, mbcsHeaderFlags, mbcsHeaderOptions;
+        int32_t extOffset;
+
+        inMBCSHeader=(const _MBCSHeader *)inBytes;
+
+        if(length<(int32_t)sizeof(_MBCSHeader)) {
+            udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
+                                length);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return;
+        }
+        if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) {
+            mbcsHeaderLength=MBCS_HEADER_V4_LENGTH;
+        } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 &&
+                  ((mbcsHeaderOptions=ds->readUInt32(inMBCSHeader->options))&
+                   MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0
+        ) {
+            mbcsHeaderLength=mbcsHeaderOptions&MBCS_OPT_LENGTH_MASK;
+        } else {
+            udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n",
+                             inMBCSHeader->version[0], inMBCSHeader->version[1]);
+            *pErrorCode=U_UNSUPPORTED_ERROR;
+            return;
+        }
+
+        mbcsHeaderFlags=ds->readUInt32(inMBCSHeader->flags);
+        extOffset=(int32_t)(mbcsHeaderFlags>>8);
+        outputType=(uint8_t)mbcsHeaderFlags;
+
+        if(outputType==MBCS_OUTPUT_EXT_ONLY) {
+            /*
+             * extension-only file,
+             * contains a base name instead of normal base table data
+             */
+            char baseName[32];
+            int32_t baseNameLength;
+
+            /* there is extension data after the base data, see ucnv_ext.h */
+            if(length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) {
+                udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n",
+                                 length);
+                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                return;
+            }
+
+            /* swap the base name, between the header and the extension data */
+            const char *inBaseName=(const char *)inBytes+mbcsHeaderLength*4;
+            baseNameLength=(int32_t)strlen(inBaseName);
+            if(baseNameLength>=(int32_t)sizeof(baseName)) {
+                udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n",
+                                 itemName, baseNameLength);
+                *pErrorCode=U_UNSUPPORTED_ERROR;
+                return;
+            }
+            ds->swapInvChars(ds, inBaseName, baseNameLength+1, baseName, pErrorCode);
+
+            checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode);
+        }
+    }
+}
+
+// ICU data formats -------------------------------------------------------- ***
+
+static const struct {
+    uint8_t dataFormat[4];
+} dataFormats[]={
+    { { 0x52, 0x65, 0x73, 0x42 } },     /* dataFormat="ResB" */
+    { { 0x63, 0x6e, 0x76, 0x74 } },     /* dataFormat="cnvt" */
+    { { 0x43, 0x76, 0x41, 0x6c } }      /* dataFormat="CvAl" */
+};
+
+enum {
+    FMT_RES,
+    FMT_CNV,
+    FMT_ALIAS,
+    FMT_COUNT
+};
+
+static int32_t
+getDataFormat(const uint8_t dataFormat[4]) {
+    int32_t i;
+
+    for(i=0; i<FMT_COUNT; ++i) {
+        if(0==memcmp(dataFormats[i].dataFormat, dataFormat, 4)) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+// enumerate dependencies of a package item -------------------------------- ***
+
+void
+Package::enumDependencies(Item *pItem, void *context, CheckDependency check) {
+    int32_t infoLength, itemHeaderLength;
+    UErrorCode errorCode=U_ZERO_ERROR;
+    const UDataInfo *pInfo=getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        return; // should not occur because readFile() checks headers
+    }
+
+    // find the data format and call the corresponding function, if any
+    int32_t format=getDataFormat(pInfo->dataFormat);
+    if(format>=0) {
+        switch(format) {
+        case FMT_RES:
+            {
+                /*
+                 * Swap the resource bundle (if necessary) so that we can use
+                 * the normal runtime uresdata.c code to read it.
+                 * We do not want to duplicate that code, especially not together with on-the-fly swapping.
+                 */
+                NativeItem nrb(pItem, ures_swap);
+                ures_enumDependencies(pItem->name, nrb.getDataInfo(), nrb.getBytes(), nrb.getLength(), check, context, this, &errorCode);
+                break;
+            }
+        case FMT_CNV:
+            {
+                // TODO: share/cache swappers
+                UDataSwapper *ds=udata_openSwapper(
+                                    (UBool)pInfo->isBigEndian, pInfo->charsetFamily,
+                                    U_IS_BIG_ENDIAN, U_CHARSET_FAMILY,
+                                    &errorCode);
+                if(U_FAILURE(errorCode)) {
+                    fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
+                            pItem->name, u_errorName(errorCode));
+                    exit(errorCode);
+                }
+
+                ds->printError=printError;
+                ds->printErrorContext=stderr;
+
+                const uint8_t *inBytes=pItem->data+itemHeaderLength;
+                int32_t length=pItem->length-itemHeaderLength;
+
+                ucnv_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode);
+                udata_closeSwapper(ds);
+                break;
+            }
+        default:
+            break;
+        }
+
+        if(U_FAILURE(errorCode)) {
+            exit(errorCode);
+        }
+    }
+}
+#endif /* UCONFIG_NO_CONVERSION */
+
+U_NAMESPACE_END
diff --git a/intl/icu/source/tools/toolutil/ppucd.cpp b/intl/icu/source/tools/toolutil/ppucd.cpp
new file mode 100644
index 0000000000..0d59b28ce4
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/ppucd.cpp
@@ -0,0 +1,622 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*   Copyright (C) 2011-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  ppucd.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2011dec11
+*   created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#include "charstr.h"
+#include "cstring.h"
+#include "ppucd.h"
+#include "uassert.h"
+#include "uparse.h"
+
+#include <stdio.h>
+#include <string.h>
+
+U_NAMESPACE_BEGIN
+
+PropertyNames::~PropertyNames() {}
+
+// TODO: Create a concrete subclass for the default PropertyNames implementation
+// using the ICU library built-in property names API & data.
+// Currently only the genprops tool uses PreparsedUCD, and provides its own
+// PropertyNames implementation using its just-build property names data and its own code.
+// At some point, we should use PreparsedUCD in tests, and then we will need the
+// default implementation somewhere.
+#if 0
+int32_t
+PropertyNames::getPropertyEnum(const char *name) const {
+    return u_getPropertyEnum(name);
+}
+
+int32_t
+PropertyNames::getPropertyValueEnum(int32_t property, const char *name) const {
+    return u_getPropertyValueEnum((UProperty)property, name);
+}
+#endif
+
+UniProps::UniProps()
+        : start(U_SENTINEL), end(U_SENTINEL),
+          bmg(U_SENTINEL), bpb(U_SENTINEL),
+          scf(U_SENTINEL), slc(U_SENTINEL), stc(U_SENTINEL), suc(U_SENTINEL),
+          digitValue(-1), numericValue(nullptr),
+          name(nullptr), nameAlias(nullptr) {
+    memset(binProps, 0, sizeof(binProps));
+    memset(intProps, 0, sizeof(intProps));
+    memset(age, 0, 4);
+}
+
+UniProps::~UniProps() {}
+
+const int32_t PreparsedUCD::kNumLineBuffers;
+
+PreparsedUCD::PreparsedUCD(const char *filename, UErrorCode &errorCode)
+        : pnames(nullptr),
+          file(nullptr),
+          defaultLineIndex(-1), blockLineIndex(-1), lineIndex(0),
+          lineNumber(0),
+          lineType(NO_LINE),
+          fieldLimit(nullptr), lineLimit(nullptr) {
+    if(U_FAILURE(errorCode)) { return; }
+
+    if(filename==nullptr || *filename==0 || (*filename=='-' && filename[1]==0)) {
+        filename=nullptr;
+        file=stdin;
+    } else {
+        file=fopen(filename, "r");
+    }
+    if(file==nullptr) {
+        perror("error opening preparsed UCD");
+        fprintf(stderr, "error opening preparsed UCD file %s\n", filename ? filename : "\"no file name given\"");
+        errorCode=U_FILE_ACCESS_ERROR;
+        return;
+    }
+
+    memset(ucdVersion, 0, 4);
+    lines[0][0]=0;
+}
+
+PreparsedUCD::~PreparsedUCD() {
+    if(file!=stdin) {
+        fclose(file);
+    }
+}
+
+// Same order as the LineType values.
+static const char *lineTypeStrings[]={
+    nullptr,
+    nullptr,
+    "ucd",
+    "property",
+    "binary",
+    "value",
+    "defaults",
+    "block",
+    "cp",
+    "unassigned",
+    "algnamesrange"
+};
+
+PreparsedUCD::LineType
+PreparsedUCD::readLine(UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) { return NO_LINE; }
+    // Select the next available line buffer.
+    while(!isLineBufferAvailable(lineIndex)) {
+        ++lineIndex;
+        if (lineIndex == kNumLineBuffers) {
+            lineIndex = 0;
+        }
+    }
+    char *line=lines[lineIndex];
+    *line=0;
+    lineLimit=fieldLimit=line;
+    lineType=NO_LINE;
+    char *result=fgets(line, sizeof(lines[0]), file);
+    if(result==nullptr) {
+        if(ferror(file)) {
+            perror("error reading preparsed UCD");
+            fprintf(stderr, "error reading preparsed UCD before line %ld\n", (long)lineNumber);
+            errorCode=U_FILE_ACCESS_ERROR;
+        }
+        return NO_LINE;
+    }
+    ++lineNumber;
+    if(*line=='#') {
+        fieldLimit=strchr(line, 0);
+        return lineType=EMPTY_LINE;
+    }
+    // Remove trailing /r/n.
+    char c;
+    char *limit=strchr(line, 0);
+    while(line<limit && ((c=*(limit-1))=='\n' || c=='\r')) { --limit; }
+    // Remove trailing white space.
+    while(line<limit && ((c=*(limit-1))==' ' || c=='\t')) { --limit; }
+    *limit=0;
+    lineLimit=limit;
+    if(line==limit) {
+        fieldLimit=limit;
+        return lineType=EMPTY_LINE;
+    }
+    // Split by ';'.
+    char *semi=line;
+    while((semi=strchr(semi, ';'))!=nullptr) { *semi++=0; }
+    fieldLimit=strchr(line, 0);
+    // Determine the line type.
+    int32_t type;
+    for(type=EMPTY_LINE+1;; ++type) {
+        if(type==LINE_TYPE_COUNT) {
+            fprintf(stderr,
+                    "error in preparsed UCD: unknown line type (first field) '%s' on line %ld\n",
+                    line, (long)lineNumber);
+            errorCode=U_PARSE_ERROR;
+            return NO_LINE;
+        }
+        if(0==strcmp(line, lineTypeStrings[type])) {
+            break;
+        }
+    }
+    lineType=(LineType)type;
+    if(lineType==UNICODE_VERSION_LINE && fieldLimit<lineLimit) {
+        u_versionFromString(ucdVersion, fieldLimit+1);
+    }
+    return lineType;
+}
+
+const char *
+PreparsedUCD::firstField() {
+    char *field=lines[lineIndex];
+    fieldLimit=strchr(field, 0);
+    return field;
+}
+
+const char *
+PreparsedUCD::nextField() {
+    if(fieldLimit==lineLimit) { return nullptr; }
+    char *field=fieldLimit+1;
+    fieldLimit=strchr(field, 0);
+    return field;
+}
+
+const UniProps *
+PreparsedUCD::getProps(UnicodeSet &newValues, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) { return nullptr; }
+    newValues.clear();
+    if(!lineHasPropertyValues()) {
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    firstField();
+    const char *field=nextField();
+    if(field==nullptr) {
+        // No range field after the type.
+        fprintf(stderr,
+                "error in preparsed UCD: missing default/block/cp range field "
+                "(no second field) on line %ld\n",
+                (long)lineNumber);
+        errorCode=U_PARSE_ERROR;
+        return nullptr;
+    }
+    UChar32 start, end;
+    if(!parseCodePointRange(field, start, end, errorCode)) { return nullptr; }
+    UniProps *props;
+    UBool insideBlock=false;  // true if cp or unassigned range inside the block range.
+    switch(lineType) {
+    case DEFAULTS_LINE:
+        // Should occur before any block/cp/unassigned line.
+        if(blockLineIndex>=0) {
+            fprintf(stderr,
+                    "error in preparsed UCD: default line %ld after one or more block lines\n",
+                    (long)lineNumber);
+            errorCode=U_PARSE_ERROR;
+            return nullptr;
+        }
+        if(defaultLineIndex>=0) {
+            fprintf(stderr,
+                    "error in preparsed UCD: second line with default properties on line %ld\n",
+                    (long)lineNumber);
+            errorCode=U_PARSE_ERROR;
+            return nullptr;
+        }
+        if(start!=0 || end!=0x10ffff) {
+            fprintf(stderr,
+                    "error in preparsed UCD: default range must be 0..10FFFF, not '%s' on line %ld\n",
+                    field, (long)lineNumber);
+            errorCode=U_PARSE_ERROR;
+            return nullptr;
+        }
+        props=&defaultProps;
+        defaultLineIndex=lineIndex;
+        break;
+    case BLOCK_LINE:
+        blockProps=defaultProps;  // Block inherits default properties.
+        props=&blockProps;
+        blockLineIndex=lineIndex;
+        break;
+    case CP_LINE:
+    case UNASSIGNED_LINE:
+        if(blockProps.start<=start && end<=blockProps.end) {
+            insideBlock=true;
+            if(lineType==CP_LINE) {
+                // Code point range fully inside the last block inherits the block properties.
+                cpProps=blockProps;
+            } else {
+                // Unassigned line inside the block is based on default properties
+                // which override block properties.
+                cpProps=defaultProps;
+                newValues=blockValues;
+                // Except, it inherits the one blk=Block property.
+                int32_t blkIndex=UCHAR_BLOCK-UCHAR_INT_START;
+                cpProps.intProps[blkIndex]=blockProps.intProps[blkIndex];
+                newValues.remove((UChar32)UCHAR_BLOCK);
+            }
+        } else if(start>blockProps.end || end<blockProps.start) {
+            // Code point range fully outside the last block inherits the default properties.
+            cpProps=defaultProps;
+        } else {
+            // Code point range partially overlapping with the last block is illegal.
+            fprintf(stderr,
+                    "error in preparsed UCD: cp range %s on line %ld only "
+                    "partially overlaps with block range %04lX..%04lX\n",
+                    field, (long)lineNumber, (long)blockProps.start, (long)blockProps.end);
+            errorCode=U_PARSE_ERROR;
+            return nullptr;
+        }
+        props=&cpProps;
+        break;
+    default:
+        // Will not occur because of the range check above.
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    props->start=start;
+    props->end=end;
+    while((field=nextField())!=nullptr) {
+        if(!parseProperty(*props, field, newValues, errorCode)) { return nullptr; }
+    }
+    if(lineType==BLOCK_LINE) {
+        blockValues=newValues;
+    } else if(lineType==UNASSIGNED_LINE && insideBlock) {
+        // Unset newValues for values that are the same as the block values.
+        for(int32_t prop=0; prop<UCHAR_BINARY_LIMIT; ++prop) {
+            if(newValues.contains(prop) && cpProps.binProps[prop]==blockProps.binProps[prop]) {
+                newValues.remove(prop);
+            }
+        }
+        for(int32_t prop=UCHAR_INT_START; prop<UCHAR_INT_LIMIT; ++prop) {
+            int32_t index=prop-UCHAR_INT_START;
+            if(newValues.contains(prop) && cpProps.intProps[index]==blockProps.intProps[index]) {
+                newValues.remove(prop);
+            }
+        }
+    }
+    return props;
+}
+
+static const struct {
+    const char *name;
+    int32_t prop;
+} ppucdProperties[]={
+    { "Name_Alias", PPUCD_NAME_ALIAS },
+    { "Conditional_Case_Mappings", PPUCD_CONDITIONAL_CASE_MAPPINGS },
+    { "Turkic_Case_Folding", PPUCD_TURKIC_CASE_FOLDING }
+};
+
+// Returns true for "ok to continue parsing fields".
+UBool
+PreparsedUCD::parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
+                            UErrorCode &errorCode) {
+    CharString pBuffer;
+    const char *p=field;
+    const char *v=strchr(p, '=');
+    int binaryValue;
+    if(*p=='-') {
+        if(v!=nullptr) {
+            fprintf(stderr,
+                    "error in preparsed UCD: mix of binary-property-no and "
+                    "enum-property syntax '%s' on line %ld\n",
+                    field, (long)lineNumber);
+            errorCode=U_PARSE_ERROR;
+            return false;
+        }
+        binaryValue=0;
+        ++p;
+    } else if(v==nullptr) {
+        binaryValue=1;
+    } else {
+        binaryValue=-1;
+        // Copy out the property name rather than modifying the field (writing a NUL).
+        pBuffer.append(p, (int32_t)(v-p), errorCode);
+        p=pBuffer.data();
+        ++v;
+    }
+    int32_t prop=pnames->getPropertyEnum(p);
+    if(prop<0) {
+        for(int32_t i=0;; ++i) {
+            if(i==UPRV_LENGTHOF(ppucdProperties)) {
+                // Ignore unknown property names.
+                return true;
+            }
+            if(0==uprv_stricmp(p, ppucdProperties[i].name)) {
+                prop=ppucdProperties[i].prop;
+                U_ASSERT(prop>=0);
+                break;
+            }
+        }
+    }
+    if(prop<UCHAR_BINARY_LIMIT) {
+        if(binaryValue>=0) {
+            props.binProps[prop]=(UBool)binaryValue;
+        } else {
+            // No binary value for a binary property.
+            fprintf(stderr,
+                    "error in preparsed UCD: enum-property syntax '%s' "
+                    "for binary property on line %ld\n",
+                    field, (long)lineNumber);
+            errorCode=U_PARSE_ERROR;
+        }
+    } else if(binaryValue>=0) {
+        // Binary value for a non-binary property.
+        fprintf(stderr,
+                "error in preparsed UCD: binary-property syntax '%s' "
+                "for non-binary property on line %ld\n",
+                field, (long)lineNumber);
+        errorCode=U_PARSE_ERROR;
+    } else if (prop < UCHAR_INT_START) {
+        fprintf(stderr,
+                "error in preparsed UCD: prop value is invalid: '%d' for line %ld\n",
+                prop, (long)lineNumber);
+        errorCode=U_PARSE_ERROR;
+    } else if(prop<UCHAR_INT_LIMIT) {
+        int32_t value=pnames->getPropertyValueEnum(prop, v);
+        if(value==UCHAR_INVALID_CODE && prop==UCHAR_CANONICAL_COMBINING_CLASS) {
+            // TODO: Make getPropertyValueEnum(UCHAR_CANONICAL_COMBINING_CLASS, v) work.
+            char *end;
+            unsigned long ccc=uprv_strtoul(v, &end, 10);
+            if(v<end && *end==0 && ccc<=254) {
+                value=(int32_t)ccc;
+            }
+        }
+        if(value==UCHAR_INVALID_CODE) {
+            fprintf(stderr,
+                    "error in preparsed UCD: '%s' is not a valid value on line %ld\n",
+                    field, (long)lineNumber);
+            errorCode=U_PARSE_ERROR;
+        } else {
+            props.intProps[prop-UCHAR_INT_START]=value;
+        }
+    } else if(*v=='<') {
+        // Do not parse default values like <code point>, just set null values.
+        switch(prop) {
+        case UCHAR_BIDI_MIRRORING_GLYPH:
+            props.bmg=U_SENTINEL;
+            break;
+        case UCHAR_BIDI_PAIRED_BRACKET:
+            props.bpb=U_SENTINEL;
+            break;
+        case UCHAR_SIMPLE_CASE_FOLDING:
+            props.scf=U_SENTINEL;
+            break;
+        case UCHAR_SIMPLE_LOWERCASE_MAPPING:
+            props.slc=U_SENTINEL;
+            break;
+        case UCHAR_SIMPLE_TITLECASE_MAPPING:
+            props.stc=U_SENTINEL;
+            break;
+        case UCHAR_SIMPLE_UPPERCASE_MAPPING:
+            props.suc=U_SENTINEL;
+            break;
+        case UCHAR_CASE_FOLDING:
+            props.cf.remove();
+            break;
+        case UCHAR_LOWERCASE_MAPPING:
+            props.lc.remove();
+            break;
+        case UCHAR_TITLECASE_MAPPING:
+            props.tc.remove();
+            break;
+        case UCHAR_UPPERCASE_MAPPING:
+            props.uc.remove();
+            break;
+        case UCHAR_SCRIPT_EXTENSIONS:
+            props.scx.clear();
+            break;
+        default:
+            fprintf(stderr,
+                    "error in preparsed UCD: '%s' is not a valid default value on line %ld\n",
+                    field, (long)lineNumber);
+            errorCode=U_PARSE_ERROR;
+        }
+    } else {
+        char c;
+        switch(prop) {
+        case UCHAR_NUMERIC_VALUE:
+            props.numericValue=v;
+            c=*v;
+            if('0'<=c && c<='9' && v[1]==0) {
+                props.digitValue=c-'0';
+            } else {
+                props.digitValue=-1;
+            }
+            break;
+        case UCHAR_NAME:
+            props.name=v;
+            break;
+        case UCHAR_AGE:
+            u_versionFromString(props.age, v);  // Writes 0.0.0.0 if v is not numeric.
+            break;
+        case UCHAR_BIDI_MIRRORING_GLYPH:
+            props.bmg=parseCodePoint(v, errorCode);
+            break;
+        case UCHAR_BIDI_PAIRED_BRACKET:
+            props.bpb=parseCodePoint(v, errorCode);
+            break;
+        case UCHAR_SIMPLE_CASE_FOLDING:
+            props.scf=parseCodePoint(v, errorCode);
+            break;
+        case UCHAR_SIMPLE_LOWERCASE_MAPPING:
+            props.slc=parseCodePoint(v, errorCode);
+            break;
+        case UCHAR_SIMPLE_TITLECASE_MAPPING:
+            props.stc=parseCodePoint(v, errorCode);
+            break;
+        case UCHAR_SIMPLE_UPPERCASE_MAPPING:
+            props.suc=parseCodePoint(v, errorCode);
+            break;
+        case UCHAR_CASE_FOLDING:
+            parseString(v, props.cf, errorCode);
+            break;
+        case UCHAR_LOWERCASE_MAPPING:
+            parseString(v, props.lc, errorCode);
+            break;
+        case UCHAR_TITLECASE_MAPPING:
+            parseString(v, props.tc, errorCode);
+            break;
+        case UCHAR_UPPERCASE_MAPPING:
+            parseString(v, props.uc, errorCode);
+            break;
+        case PPUCD_NAME_ALIAS:
+            props.nameAlias=v;
+            break;
+        case PPUCD_CONDITIONAL_CASE_MAPPINGS:
+        case PPUCD_TURKIC_CASE_FOLDING:
+            // No need to parse their values: They are hardcoded in the runtime library.
+            break;
+        case UCHAR_SCRIPT_EXTENSIONS:
+            parseScriptExtensions(v, props.scx, errorCode);
+            break;
+        default:
+            // Ignore unhandled properties.
+            return true;
+        }
+    }
+    if(U_SUCCESS(errorCode)) {
+        newValues.add((UChar32)prop);
+        return true;
+    } else {
+        return false;
+    }
+}
+
+UBool
+PreparsedUCD::getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) { return false; }
+    if(lineType!=ALG_NAMES_RANGE_LINE) {
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return false;
+    }
+    firstField();
+    const char *field=nextField();
+    if(field==nullptr) {
+        // No range field after the type.
+        fprintf(stderr,
+                "error in preparsed UCD: missing algnamesrange range field "
+                "(no second field) on line %ld\n",
+                (long)lineNumber);
+        errorCode=U_PARSE_ERROR;
+        return false;
+    }
+    return parseCodePointRange(field, start, end, errorCode);
+}
+
+UChar32
+PreparsedUCD::parseCodePoint(const char *s, UErrorCode &errorCode) {
+    char *end;
+    uint32_t value=(uint32_t)uprv_strtoul(s, &end, 16);
+    if(end<=s || *end!=0 || value>=0x110000) {
+        fprintf(stderr,
+                "error in preparsed UCD: '%s' is not a valid code point on line %ld\n",
+                s, (long)lineNumber);
+        errorCode=U_PARSE_ERROR;
+        return U_SENTINEL;
+    }
+    return (UChar32)value;
+}
+
+UBool
+PreparsedUCD::parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode) {
+    uint32_t st, e;
+    u_parseCodePointRange(s, &st, &e, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr,
+                "error in preparsed UCD: '%s' is not a valid code point range on line %ld\n",
+                s, (long)lineNumber);
+        return false;
+    }
+    start=(UChar32)st;
+    end=(UChar32)e;
+    return true;
+}
+
+void
+PreparsedUCD::parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode) {
+    char16_t *buffer=toUCharPtr(uni.getBuffer(-1));
+    int32_t length=u_parseString(s, buffer, uni.getCapacity(), nullptr, &errorCode);
+    if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+        errorCode=U_ZERO_ERROR;
+        uni.releaseBuffer(0);
+        buffer=toUCharPtr(uni.getBuffer(length));
+        length=u_parseString(s, buffer, uni.getCapacity(), nullptr, &errorCode);
+    }
+    uni.releaseBuffer(length);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr,
+                "error in preparsed UCD: '%s' is not a valid Unicode string on line %ld\n",
+                s, (long)lineNumber);
+    }
+}
+
+void
+PreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) { return; }
+    scx.clear();
+    CharString scString;
+    for(;;) {
+        const char *scs;
+        const char *scLimit=strchr(s, ' ');
+        if(scLimit!=nullptr) {
+            scs=scString.clear().append(s, (int32_t)(scLimit-s), errorCode).data();
+            if(U_FAILURE(errorCode)) { return; }
+        } else {
+            scs=s;
+        }
+        int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs);
+        if(script==UCHAR_INVALID_CODE) {
+            fprintf(stderr,
+                    "error in preparsed UCD: '%s' is not a valid script code on line %ld\n",
+                    scs, (long)lineNumber);
+            errorCode=U_PARSE_ERROR;
+            return;
+        } else if(scx.contains(script)) {
+            fprintf(stderr,
+                    "error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n",
+                    scs, (long)lineNumber);
+            errorCode=U_PARSE_ERROR;
+            return;
+        } else {
+            scx.add(script);
+        }
+        if(scLimit!=nullptr) {
+            s=scLimit+1;
+        } else {
+            break;
+        }
+    }
+    if(scx.isEmpty()) {
+        fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", (long)lineNumber);
+        errorCode=U_PARSE_ERROR;
+    }
+}
+
+U_NAMESPACE_END
diff --git a/intl/icu/source/tools/toolutil/ppucd.h b/intl/icu/source/tools/toolutil/ppucd.h
new file mode 100644
index 0000000000..d5c63fab49
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/ppucd.h
@@ -0,0 +1,180 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*   Copyright (C) 2011-2013, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  ppucd.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2011dec11
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __PPUCD_H__
+#define __PPUCD_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+
+#include <stdio.h>
+
+/** Additions to the uchar.h enum UProperty. */
+enum {
+    /** Name_Alias */
+    PPUCD_NAME_ALIAS=UCHAR_STRING_LIMIT,
+    PPUCD_CONDITIONAL_CASE_MAPPINGS,
+    PPUCD_TURKIC_CASE_FOLDING
+};
+
+U_NAMESPACE_BEGIN
+
+class U_TOOLUTIL_API PropertyNames {
+public:
+    virtual ~PropertyNames();
+    virtual int32_t getPropertyEnum(const char *name) const = 0;
+    virtual int32_t getPropertyValueEnum(int32_t property, const char *name) const = 0;
+};
+
+struct U_TOOLUTIL_API UniProps {
+    UniProps();
+    ~UniProps();
+
+    int32_t getIntProp(int32_t prop) const { return intProps[prop-UCHAR_INT_START]; }
+
+    UChar32 start, end;
+    UBool binProps[UCHAR_BINARY_LIMIT];
+    int32_t intProps[UCHAR_INT_LIMIT-UCHAR_INT_START];
+    UVersionInfo age;
+    UChar32 bmg, bpb;
+    UChar32 scf, slc, stc, suc;
+    int32_t digitValue;
+    const char *numericValue;
+    const char *name;
+    const char *nameAlias;
+    UnicodeString cf, lc, tc, uc;
+    UnicodeSet scx;
+};
+
+class U_TOOLUTIL_API PreparsedUCD {
+public:
+    enum LineType {
+        /** No line, end of file. */
+        NO_LINE,
+        /** Empty line. (Might contain a comment.) */
+        EMPTY_LINE,
+
+        /** ucd;6.1.0 */
+        UNICODE_VERSION_LINE,
+
+        /** property;Binary;Alpha;Alphabetic */
+        PROPERTY_LINE,
+        /** binary;N;No;F;False */
+        BINARY_LINE,
+        /** value;gc;Zs;Space_Separator */
+        VALUE_LINE,
+
+        /** defaults;0000..10FFFF;age=NA;bc=L;... */
+        DEFAULTS_LINE,
+        /** block;0000..007F;age=1.1;blk=ASCII;ea=Na;... */
+        BLOCK_LINE,
+        /** cp;0030;AHex;bc=EN;gc=Nd;na=DIGIT ZERO;... */
+        CP_LINE,
+        /** unassigned;E01F0..E0FFF;bc=BN;CWKCF;DI;GCB=CN;NFKC_CF= */
+        UNASSIGNED_LINE,
+
+        /** algnamesrange;4E00..9FCC;han;CJK UNIFIED IDEOGRAPH- */
+        ALG_NAMES_RANGE_LINE,
+
+        LINE_TYPE_COUNT
+    };
+
+    /**
+     * Constructor.
+     * Prepare this object for a new, empty package.
+     */
+    PreparsedUCD(const char *filename, UErrorCode &errorCode);
+
+    /** Destructor. */
+    ~PreparsedUCD();
+
+    /** Sets (aliases) a PropertyNames implementation. Caller retains ownership. */
+    void setPropertyNames(const PropertyNames *pn) { pnames=pn; }
+
+    /**
+     * Reads a line from the preparsed UCD file.
+     * Splits the line by replacing each ';' with a NUL.
+     */
+    LineType readLine(UErrorCode &errorCode);
+
+    /** Returns the number of the line read by readLine(). */
+    int32_t getLineNumber() const { return lineNumber; }
+
+    /** Returns the line's next field, or nullptr. */
+    const char *nextField();
+
+    /** Returns the Unicode version when or after the UNICODE_VERSION_LINE has been read. */
+    const UVersionInfo &getUnicodeVersion() const { return ucdVersion; }
+
+    /** Returns true if the current line has property values. */
+    UBool lineHasPropertyValues() const {
+        return DEFAULTS_LINE<=lineType && lineType<=UNASSIGNED_LINE;
+    }
+
+    /**
+     * Parses properties from the current line.
+     * Clears newValues and sets UProperty codes for property values mentioned
+     * on the current line (as opposed to being inherited).
+     * Returns a pointer to the filled-in UniProps, or nullptr if something went wrong.
+     * The returned UniProps are usable until the next line of the same type is read.
+     */
+    const UniProps *getProps(UnicodeSet &newValues, UErrorCode &errorCode);
+
+    /**
+     * Returns the code point range for the current algnamesrange line.
+     * Calls & parses nextField().
+     * Further nextField() calls will yield the range's type & prefix string.
+     * Returns U_SUCCESS(errorCode).
+     */
+    UBool getRangeForAlgNames(UChar32 &start, UChar32 &end, UErrorCode &errorCode);
+
+private:
+    UBool isLineBufferAvailable(int32_t i) {
+        return defaultLineIndex!=i && blockLineIndex!=i;
+    }
+
+    /** Resets the field iterator and returns the line's first field (the line type field). */
+    const char *firstField();
+
+    UBool parseProperty(UniProps &props, const char *field, UnicodeSet &newValues,
+                        UErrorCode &errorCode);
+    UChar32 parseCodePoint(const char *s, UErrorCode &errorCode);
+    UBool parseCodePointRange(const char *s, UChar32 &start, UChar32 &end, UErrorCode &errorCode);
+    void parseString(const char *s, UnicodeString &uni, UErrorCode &errorCode);
+    void parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode);
+
+    static const int32_t kNumLineBuffers=3;
+
+    const PropertyNames *pnames;  // aliased
+    FILE *file;
+    int32_t defaultLineIndex, blockLineIndex, lineIndex;
+    int32_t lineNumber;
+    LineType lineType;
+    char *fieldLimit;
+    char *lineLimit;
+
+    UVersionInfo ucdVersion;
+    UniProps defaultProps, blockProps, cpProps;
+    UnicodeSet blockValues;
+    // Multiple lines so that default and block properties can maintain pointers
+    // into their line buffers.
+    char lines[kNumLineBuffers][4096];
+};
+
+U_NAMESPACE_END
+
+#endif  // __PPUCD_H__
diff --git a/intl/icu/source/tools/toolutil/sources.txt b/intl/icu/source/tools/toolutil/sources.txt
new file mode 100644
index 0000000000..d3288997e2
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/sources.txt
@@ -0,0 +1,24 @@
+collationinfo.cpp
+dbgutil.cpp
+denseranges.cpp
+filestrm.cpp
+filetools.cpp
+flagparser.cpp
+package.cpp
+pkg_genc.cpp
+pkg_gencmn.cpp
+pkg_icu.cpp
+pkgitems.cpp
+ppucd.cpp
+swapimpl.cpp
+toolutil.cpp
+ucbuf.cpp
+ucln_tu.cpp
+ucm.cpp
+ucmstate.cpp
+udbgutil.cpp
+unewdata.cpp
+uoptions.cpp
+uparse.cpp
+writesrc.cpp
+xmlparser.cpp
diff --git a/intl/icu/source/tools/toolutil/swapimpl.cpp b/intl/icu/source/tools/toolutil/swapimpl.cpp
new file mode 100644
index 0000000000..9c58563965
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/swapimpl.cpp
@@ -0,0 +1,1048 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2005-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  swapimpl.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2005may05
+*   created by: Markus W. Scherer
+*
+*   Data file swapping functions moved here from the common library
+*   because some data is hardcoded in ICU4C and needs not be swapped any more.
+*   Moving the functions here simplifies testing (for code coverage) because
+*   we need not jump through hoops (like adding snapshots of these files
+*   to testdata).
+*
+*   The declarations for these functions remain in the internal header files
+*   in icu/source/common/
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/udata.h"
+
+/* Explicit include statement for std_string.h is needed
+ * for compilation on certain platforms. (e.g. AIX/VACPP)
+ */
+#include "unicode/std_string.h"
+
+#include "cmemory.h"
+#include "cstring.h"
+#include "uinvchar.h"
+#include "uassert.h"
+#include "uarrsort.h"
+#include "ucmndata.h"
+#include "udataswp.h"
+#include "ulayout_props.h"
+
+/* swapping implementations in common */
+
+#include "emojiprops.h"
+#include "uresdata.h"
+#include "ucnv_io.h"
+#include "uprops.h"
+#include "ucase.h"
+#include "ubidi_props.h"
+#include "ucol_swp.h"
+#include "ucnv_bld.h"
+#include "unormimp.h"
+#include "normalizer2impl.h"
+#include "sprpimpl.h"
+#include "propname.h"
+#include "rbbidata.h"
+#include "utrie.h"
+#include "utrie2.h"
+#include "dictionarydata.h"
+
+/* swapping implementations in i18n */
+
+#if !UCONFIG_NO_NORMALIZATION
+#include "uspoof_impl.h"
+#endif
+
+U_NAMESPACE_USE
+
+/* definitions */
+
+/* Unicode property (value) aliases data swapping --------------------------- */
+
+static int32_t U_CALLCONV
+upname_swap(const UDataSwapper *ds,
+            const void *inData, int32_t length, void *outData,
+            UErrorCode *pErrorCode) {
+    /* udata_swapDataHeader checks the arguments */
+    int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    /* check data format and format version */
+    const UDataInfo *pInfo=
+        reinterpret_cast<const UDataInfo *>(
+            static_cast<const char *>(inData)+4);
+    if(!(
+        pInfo->dataFormat[0]==0x70 &&   /* dataFormat="pnam" */
+        pInfo->dataFormat[1]==0x6e &&
+        pInfo->dataFormat[2]==0x61 &&
+        pInfo->dataFormat[3]==0x6d &&
+        pInfo->formatVersion[0]==2
+    )) {
+        udata_printError(ds, "upname_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as pnames.icu\n",
+                         pInfo->dataFormat[0], pInfo->dataFormat[1],
+                         pInfo->dataFormat[2], pInfo->dataFormat[3],
+                         pInfo->formatVersion[0]);
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    const uint8_t *inBytes=static_cast<const uint8_t *>(inData)+headerSize;
+    uint8_t *outBytes=static_cast<uint8_t *>(outData)+headerSize;
+
+    if(length>=0) {
+        length-=headerSize;
+        // formatVersion 2 initially has indexes[8], 32 bytes.
+        if(length<32) {
+            udata_printError(ds, "upname_swap(): too few bytes (%d after header) for pnames.icu\n",
+                             (int)length);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+    }
+
+    const int32_t *inIndexes=reinterpret_cast<const int32_t *>(inBytes);
+    int32_t totalSize=udata_readInt32(ds, inIndexes[PropNameData::IX_TOTAL_SIZE]);
+    if(length>=0) {
+        if(length<totalSize) {
+            udata_printError(ds, "upname_swap(): too few bytes (%d after header, should be %d) "
+                             "for pnames.icu\n",
+                             (int)length, (int)totalSize);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+
+        int32_t numBytesIndexesAndValueMaps=
+            udata_readInt32(ds, inIndexes[PropNameData::IX_BYTE_TRIES_OFFSET]);
+
+        // Swap the indexes[] and the valueMaps[].
+        ds->swapArray32(ds, inBytes, numBytesIndexesAndValueMaps, outBytes, pErrorCode);
+
+        // Copy the rest of the data.
+        if(inBytes!=outBytes) {
+            uprv_memcpy(outBytes+numBytesIndexesAndValueMaps,
+                        inBytes+numBytesIndexesAndValueMaps,
+                        totalSize-numBytesIndexesAndValueMaps);
+        }
+
+        // We need not swap anything else:
+        //
+        // The ByteTries are already byte-serialized, and are fixed on ASCII.
+        // (On an EBCDIC machine, the input string is converted to lowercase ASCII
+        // while matching.)
+        //
+        // The name groups are mostly invariant characters, but since we only
+        // generate, and keep in subversion, ASCII versions of pnames.icu,
+        // and since only ICU4J uses the pnames.icu data file
+        // (the data is hardcoded in ICU4C) and ICU4J uses ASCII data files,
+        // we just copy those bytes too.
+    }
+
+    return headerSize+totalSize;
+}
+
+/* Unicode properties data swapping ----------------------------------------- */
+
+static int32_t U_CALLCONV
+uprops_swap(const UDataSwapper *ds,
+            const void *inData, int32_t length, void *outData,
+            UErrorCode *pErrorCode) {
+    const UDataInfo *pInfo;
+    int32_t headerSize, i;
+
+    int32_t dataIndexes[UPROPS_INDEX_COUNT];
+    const int32_t *inData32;
+
+    /* udata_swapDataHeader checks the arguments */
+    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    /* check data format and format version */
+    pInfo=(const UDataInfo *)((const char *)inData+4);
+    if(!(
+        pInfo->dataFormat[0]==0x55 &&   /* dataFormat="UPro" */
+        pInfo->dataFormat[1]==0x50 &&
+        pInfo->dataFormat[2]==0x72 &&
+        pInfo->dataFormat[3]==0x6f &&
+        (3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=7) &&
+        (pInfo->formatVersion[0]>=7 ||
+            (pInfo->formatVersion[2]==UTRIE_SHIFT &&
+             pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
+    )) {
+        udata_printError(ds, "uprops_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not a Unicode properties file\n",
+                         pInfo->dataFormat[0], pInfo->dataFormat[1],
+                         pInfo->dataFormat[2], pInfo->dataFormat[3],
+                         pInfo->formatVersion[0]);
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    /* the properties file must contain at least the indexes array */
+    if(length>=0 && (length-headerSize)<(int32_t)sizeof(dataIndexes)) {
+        udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
+                         length-headerSize);
+        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+        return 0;
+    }
+
+    /* read the indexes */
+    inData32=(const int32_t *)((const char *)inData+headerSize);
+    for(i=0; i<UPROPS_INDEX_COUNT; ++i) {
+        dataIndexes[i]=udata_readInt32(ds, inData32[i]);
+    }
+
+    /*
+     * comments are copied from the data format description in genprops/store.c
+     * indexes[] constants are in uprops.h
+     */
+    int32_t dataTop;
+    if(length>=0) {
+        int32_t *outData32;
+
+        /*
+         * In formatVersion 7, UPROPS_DATA_TOP_INDEX has the post-header data size.
+         * In earlier formatVersions, it is 0 and a lower dataIndexes entry
+         * has the top of the last item.
+         */
+        for(i=UPROPS_DATA_TOP_INDEX; i>0 && (dataTop=dataIndexes[i])==0; --i) {}
+
+        if((length-headerSize)<(4*dataTop)) {
+            udata_printError(ds, "uprops_swap(): too few bytes (%d after header) for a Unicode properties file\n",
+                             length-headerSize);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+
+        outData32=(int32_t *)((char *)outData+headerSize);
+
+        /* copy everything for inaccessible data (padding) */
+        if(inData32!=outData32) {
+            uprv_memcpy(outData32, inData32, 4*(size_t)dataTop);
+        }
+
+        /* swap the indexes[16] */
+        ds->swapArray32(ds, inData32, 4*UPROPS_INDEX_COUNT, outData32, pErrorCode);
+
+        /*
+         * swap the main properties UTrie
+         * PT serialized properties trie, see utrie.h (byte size: 4*(i0-16))
+         */
+        utrie_swapAnyVersion(ds,
+            inData32+UPROPS_INDEX_COUNT,
+            4*(dataIndexes[UPROPS_PROPS32_INDEX]-UPROPS_INDEX_COUNT),
+            outData32+UPROPS_INDEX_COUNT,
+            pErrorCode);
+
+        /*
+         * swap the properties and exceptions words
+         * P  const uint32_t props32[i1-i0];
+         * E  const uint32_t exceptions[i2-i1];
+         */
+        ds->swapArray32(ds,
+            inData32+dataIndexes[UPROPS_PROPS32_INDEX],
+            4*(dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]-dataIndexes[UPROPS_PROPS32_INDEX]),
+            outData32+dataIndexes[UPROPS_PROPS32_INDEX],
+            pErrorCode);
+
+        /*
+         * swap the UChars
+         * U  const char16_t uchars[2*(i3-i2)];
+         */
+        ds->swapArray16(ds,
+            inData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
+            4*(dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]-dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX]),
+            outData32+dataIndexes[UPROPS_EXCEPTIONS_TOP_INDEX],
+            pErrorCode);
+
+        /*
+         * swap the additional UTrie
+         * i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties
+         */
+        utrie_swapAnyVersion(ds,
+            inData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
+            4*(dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX]),
+            outData32+dataIndexes[UPROPS_ADDITIONAL_TRIE_INDEX],
+            pErrorCode);
+
+        /*
+         * swap the properties vectors
+         * PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4];
+         */
+        ds->swapArray32(ds,
+            inData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
+            4*(dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]-dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX]),
+            outData32+dataIndexes[UPROPS_ADDITIONAL_VECTORS_INDEX],
+            pErrorCode);
+
+        // swap the Script_Extensions data
+        // SCX const uint16_t scriptExtensions[2*(i7-i6)];
+        ds->swapArray16(ds,
+            inData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
+            4*(dataIndexes[UPROPS_RESERVED_INDEX_7]-dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX]),
+            outData32+dataIndexes[UPROPS_SCRIPT_EXTENSIONS_INDEX],
+            pErrorCode);
+    }
+
+    /* i7 reservedIndex7; -- 32-bit unit index to the top of the Script_Extensions data */
+    return headerSize+4*dataIndexes[UPROPS_RESERVED_INDEX_7];
+}
+
+/* Unicode case mapping data swapping --------------------------------------- */
+
+static int32_t U_CALLCONV
+ucase_swap(const UDataSwapper *ds,
+           const void *inData, int32_t length, void *outData,
+           UErrorCode *pErrorCode) {
+    const UDataInfo *pInfo;
+    int32_t headerSize;
+
+    const uint8_t *inBytes;
+    uint8_t *outBytes;
+
+    const int32_t *inIndexes;
+    int32_t indexes[16];
+
+    int32_t i, offset, count, size;
+
+    /* udata_swapDataHeader checks the arguments */
+    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    /* check data format and format version */
+    pInfo=(const UDataInfo *)((const char *)inData+4);
+    if(!(
+        pInfo->dataFormat[0]==UCASE_FMT_0 &&    /* dataFormat="cAsE" */
+        pInfo->dataFormat[1]==UCASE_FMT_1 &&
+        pInfo->dataFormat[2]==UCASE_FMT_2 &&
+        pInfo->dataFormat[3]==UCASE_FMT_3 &&
+        ((pInfo->formatVersion[0]==1 &&
+          pInfo->formatVersion[2]==UTRIE_SHIFT &&
+          pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
+         (2<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=4))
+    )) {
+        udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n",
+                         pInfo->dataFormat[0], pInfo->dataFormat[1],
+                         pInfo->dataFormat[2], pInfo->dataFormat[3],
+                         pInfo->formatVersion[0]);
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    inBytes=(const uint8_t *)inData+headerSize;
+    outBytes=(uint8_t *)outData+headerSize;
+
+    inIndexes=(const int32_t *)inBytes;
+
+    if(length>=0) {
+        length-=headerSize;
+        if(length<16*4) {
+            udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for case mapping data\n",
+                             length);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+    }
+
+    /* read the first 16 indexes (ICU 3.2/format version 1: UCASE_IX_TOP==16, might grow) */
+    for(i=0; i<16; ++i) {
+        indexes[i]=udata_readInt32(ds, inIndexes[i]);
+    }
+
+    /* get the total length of the data */
+    size=indexes[UCASE_IX_LENGTH];
+
+    if(length>=0) {
+        if(length<size) {
+            udata_printError(ds, "ucase_swap(): too few bytes (%d after header) for all of case mapping data\n",
+                             length);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+
+        /* copy the data for inaccessible bytes */
+        if(inBytes!=outBytes) {
+            uprv_memcpy(outBytes, inBytes, size);
+        }
+
+        offset=0;
+
+        /* swap the int32_t indexes[] */
+        count=indexes[UCASE_IX_INDEX_TOP]*4;
+        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
+        offset+=count;
+
+        /* swap the UTrie */
+        count=indexes[UCASE_IX_TRIE_SIZE];
+        utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
+        offset+=count;
+
+        /* swap the uint16_t exceptions[] and unfold[] */
+        count=(indexes[UCASE_IX_EXC_LENGTH]+indexes[UCASE_IX_UNFOLD_LENGTH])*2;
+        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
+        offset+=count;
+
+        U_ASSERT(offset==size);
+    }
+
+    return headerSize+size;
+}
+
+/* Unicode bidi/shaping data swapping --------------------------------------- */
+
+static int32_t U_CALLCONV
+ubidi_swap(const UDataSwapper *ds,
+           const void *inData, int32_t length, void *outData,
+           UErrorCode *pErrorCode) {
+    const UDataInfo *pInfo;
+    int32_t headerSize;
+
+    const uint8_t *inBytes;
+    uint8_t *outBytes;
+
+    const int32_t *inIndexes;
+    int32_t indexes[16];
+
+    int32_t i, offset, count, size;
+
+    /* udata_swapDataHeader checks the arguments */
+    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    /* check data format and format version */
+    pInfo=(const UDataInfo *)((const char *)inData+4);
+    if(!(
+        pInfo->dataFormat[0]==UBIDI_FMT_0 &&    /* dataFormat="BiDi" */
+        pInfo->dataFormat[1]==UBIDI_FMT_1 &&
+        pInfo->dataFormat[2]==UBIDI_FMT_2 &&
+        pInfo->dataFormat[3]==UBIDI_FMT_3 &&
+        ((pInfo->formatVersion[0]==1 &&
+          pInfo->formatVersion[2]==UTRIE_SHIFT &&
+          pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) ||
+         pInfo->formatVersion[0]==2)
+    )) {
+        udata_printError(ds, "ubidi_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as bidi/shaping data\n",
+                         pInfo->dataFormat[0], pInfo->dataFormat[1],
+                         pInfo->dataFormat[2], pInfo->dataFormat[3],
+                         pInfo->formatVersion[0]);
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    inBytes=(const uint8_t *)inData+headerSize;
+    outBytes=(uint8_t *)outData+headerSize;
+
+    inIndexes=(const int32_t *)inBytes;
+
+    if(length>=0) {
+        length-=headerSize;
+        if(length<16*4) {
+            udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for bidi/shaping data\n",
+                             length);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+    }
+
+    /* read the first 16 indexes (ICU 3.4/format version 1: UBIDI_IX_TOP==16, might grow) */
+    for(i=0; i<16; ++i) {
+        indexes[i]=udata_readInt32(ds, inIndexes[i]);
+    }
+
+    /* get the total length of the data */
+    size=indexes[UBIDI_IX_LENGTH];
+
+    if(length>=0) {
+        if(length<size) {
+            udata_printError(ds, "ubidi_swap(): too few bytes (%d after header) for all of bidi/shaping data\n",
+                             length);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+
+        /* copy the data for inaccessible bytes */
+        if(inBytes!=outBytes) {
+            uprv_memcpy(outBytes, inBytes, size);
+        }
+
+        offset=0;
+
+        /* swap the int32_t indexes[] */
+        count=indexes[UBIDI_IX_INDEX_TOP]*4;
+        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
+        offset+=count;
+
+        /* swap the UTrie */
+        count=indexes[UBIDI_IX_TRIE_SIZE];
+        utrie_swapAnyVersion(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
+        offset+=count;
+
+        /* swap the uint32_t mirrors[] */
+        count=indexes[UBIDI_IX_MIRROR_LENGTH]*4;
+        ds->swapArray32(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
+        offset+=count;
+
+        /* just skip the uint8_t jgArray[] and jgArray2[] */
+        count=indexes[UBIDI_IX_JG_LIMIT]-indexes[UBIDI_IX_JG_START];
+        offset+=count;
+        count=indexes[UBIDI_IX_JG_LIMIT2]-indexes[UBIDI_IX_JG_START2];
+        offset+=count;
+
+        U_ASSERT(offset==size);
+    }
+
+    return headerSize+size;
+}
+
+/* Unicode normalization data swapping -------------------------------------- */
+
+#if !UCONFIG_NO_NORMALIZATION
+
+static int32_t U_CALLCONV
+unorm_swap(const UDataSwapper *ds,
+           const void *inData, int32_t length, void *outData,
+           UErrorCode *pErrorCode) {
+    const UDataInfo *pInfo;
+    int32_t headerSize;
+
+    const uint8_t *inBytes;
+    uint8_t *outBytes;
+
+    const int32_t *inIndexes;
+    int32_t indexes[32];
+
+    int32_t i, offset, count, size;
+
+    /* udata_swapDataHeader checks the arguments */
+    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    /* check data format and format version */
+    pInfo=(const UDataInfo *)((const char *)inData+4);
+    if(!(
+        pInfo->dataFormat[0]==0x4e &&   /* dataFormat="Norm" */
+        pInfo->dataFormat[1]==0x6f &&
+        pInfo->dataFormat[2]==0x72 &&
+        pInfo->dataFormat[3]==0x6d &&
+        pInfo->formatVersion[0]==2
+    )) {
+        udata_printError(ds, "unorm_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unorm.icu\n",
+                         pInfo->dataFormat[0], pInfo->dataFormat[1],
+                         pInfo->dataFormat[2], pInfo->dataFormat[3],
+                         pInfo->formatVersion[0]);
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    inBytes=(const uint8_t *)inData+headerSize;
+    outBytes=(uint8_t *)outData+headerSize;
+
+    inIndexes=(const int32_t *)inBytes;
+
+    if(length>=0) {
+        length-=headerSize;
+        if(length<32*4) {
+            udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for unorm.icu\n",
+                             length);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+    }
+
+    /* read the first 32 indexes (ICU 2.8/format version 2.2: _NORM_INDEX_TOP==32, might grow) */
+    for(i=0; i<32; ++i) {
+        indexes[i]=udata_readInt32(ds, inIndexes[i]);
+    }
+
+    /* calculate the total length of the data */
+    size=
+        32*4+ /* size of indexes[] */
+        indexes[_NORM_INDEX_TRIE_SIZE]+
+        indexes[_NORM_INDEX_UCHAR_COUNT]*2+
+        indexes[_NORM_INDEX_COMBINE_DATA_COUNT]*2+
+        indexes[_NORM_INDEX_FCD_TRIE_SIZE]+
+        indexes[_NORM_INDEX_AUX_TRIE_SIZE]+
+        indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
+
+    if(length>=0) {
+        if(length<size) {
+            udata_printError(ds, "unorm_swap(): too few bytes (%d after header) for all of unorm.icu\n",
+                             length);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+
+        /* copy the data for inaccessible bytes */
+        if(inBytes!=outBytes) {
+            uprv_memcpy(outBytes, inBytes, size);
+        }
+
+        offset=0;
+
+        /* swap the indexes[] */
+        count=32*4;
+        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
+        offset+=count;
+
+        /* swap the main UTrie */
+        count=indexes[_NORM_INDEX_TRIE_SIZE];
+        utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
+        offset+=count;
+
+        /* swap the uint16_t extraData[] and the uint16_t combiningTable[] */
+        count=(indexes[_NORM_INDEX_UCHAR_COUNT]+indexes[_NORM_INDEX_COMBINE_DATA_COUNT])*2;
+        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
+        offset+=count;
+
+        /* swap the FCD UTrie */
+        count=indexes[_NORM_INDEX_FCD_TRIE_SIZE];
+        if(count!=0) {
+            utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
+            offset+=count;
+        }
+
+        /* swap the aux UTrie */
+        count=indexes[_NORM_INDEX_AUX_TRIE_SIZE];
+        if(count!=0) {
+            utrie_swap(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
+            offset+=count;
+        }
+
+        /* swap the uint16_t combiningTable[] */
+        count=indexes[_NORM_INDEX_CANON_SET_COUNT]*2;
+        ds->swapArray16(ds, inBytes+offset, count, outBytes+offset, pErrorCode);
+        offset+=count;
+    }
+
+    return headerSize+size;
+}
+
+#endif
+
+// Unicode text layout properties data swapping --------------------------------
+
+static int32_t U_CALLCONV
+ulayout_swap(const UDataSwapper *ds,
+             const void *inData, int32_t length, void *outData,
+             UErrorCode *pErrorCode) {
+    // udata_swapDataHeader checks the arguments.
+    int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+    if (pErrorCode == nullptr || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    // Check data format and format version.
+    const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4);
+    if (!(
+            pInfo->dataFormat[0] == ULAYOUT_FMT_0 &&    // dataFormat="Layo"
+            pInfo->dataFormat[1] == ULAYOUT_FMT_1 &&
+            pInfo->dataFormat[2] == ULAYOUT_FMT_2 &&
+            pInfo->dataFormat[3] == ULAYOUT_FMT_3 &&
+            pInfo->formatVersion[0] == 1)) {
+        udata_printError(ds,
+            "ulayout_swap(): data format %02x.%02x.%02x.%02x (format version %02x) "
+            "is not recognized as text layout properties data\n",
+            pInfo->dataFormat[0], pInfo->dataFormat[1],
+            pInfo->dataFormat[2], pInfo->dataFormat[3],
+            pInfo->formatVersion[0]);
+        *pErrorCode = U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    const uint8_t *inBytes = (const uint8_t *)inData + headerSize;
+    uint8_t *outBytes = (uint8_t *)outData + headerSize;
+
+    const int32_t *inIndexes = (const int32_t *)inBytes;
+
+    if (length >= 0) {
+        length -= headerSize;
+        if (length < 12 * 4) {
+            udata_printError(ds,
+                "ulayout_swap(): too few bytes (%d after header) for text layout properties data\n",
+                length);
+            *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+    }
+
+    int32_t indexesLength = udata_readInt32(ds, inIndexes[ULAYOUT_IX_INDEXES_LENGTH]);
+    if (indexesLength < 12) {
+        udata_printError(ds,
+            "ulayout_swap(): too few indexes (%d) for text layout properties data\n",
+            indexesLength);
+        *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+        return 0;
+    }
+
+    // Read the data offsets before swapping anything.
+    int32_t indexes[ULAYOUT_IX_TRIES_TOP + 1];
+    for (int32_t i = ULAYOUT_IX_INPC_TRIE_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) {
+        indexes[i] = udata_readInt32(ds, inIndexes[i]);
+    }
+    int32_t size = indexes[ULAYOUT_IX_TRIES_TOP];
+
+    if (length >= 0) {
+        if (length < size) {
+            udata_printError(ds,
+                "ulayout_swap(): too few bytes (%d after header) "
+                "for all of text layout properties data\n",
+                length);
+            *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+
+        // Copy the data for inaccessible bytes.
+        if (inBytes != outBytes) {
+            uprv_memcpy(outBytes, inBytes, size);
+        }
+
+        // Swap the int32_t indexes[].
+        int32_t offset = 0;
+        int32_t count = indexesLength * 4;
+        ds->swapArray32(ds, inBytes, count, outBytes, pErrorCode);
+        offset += count;
+
+        // Swap each trie.
+        for (int32_t i = ULAYOUT_IX_INPC_TRIE_TOP; i <= ULAYOUT_IX_TRIES_TOP; ++i) {
+            int32_t top = indexes[i];
+            count = top - offset;
+            U_ASSERT(count >= 0);
+            if (count >= 16) {
+                utrie_swapAnyVersion(ds, inBytes + offset, count, outBytes + offset, pErrorCode);
+            }
+            offset = top;
+        }
+
+        U_ASSERT(offset == size);
+    }
+
+    return headerSize + size;
+}
+
+// Unicode emoji properties data swapping --------------------------------------
+
+static int32_t U_CALLCONV
+uemoji_swap(const UDataSwapper *ds,
+            const void *inData, int32_t length, void *outData,
+            UErrorCode *pErrorCode) {
+    // udata_swapDataHeader checks the arguments.
+    int32_t headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+    if (pErrorCode == nullptr || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    // Check data format and format version.
+    const UDataInfo *pInfo = (const UDataInfo *)((const char *)inData + 4);
+    if (!(
+            pInfo->dataFormat[0] == u'E' &&
+            pInfo->dataFormat[1] == u'm' &&
+            pInfo->dataFormat[2] == u'o' &&
+            pInfo->dataFormat[3] == u'j' &&
+            pInfo->formatVersion[0] == 1)) {
+        udata_printError(ds,
+            "uemoji_swap(): data format %02x.%02x.%02x.%02x (format version %02x) "
+            "is not recognized as emoji properties data\n",
+            pInfo->dataFormat[0], pInfo->dataFormat[1],
+            pInfo->dataFormat[2], pInfo->dataFormat[3],
+            pInfo->formatVersion[0]);
+        *pErrorCode = U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    const uint8_t *inBytes = (const uint8_t *)inData + headerSize;
+    uint8_t *outBytes = (uint8_t *)outData + headerSize;
+
+    const int32_t *inIndexes = (const int32_t *)inBytes;
+
+    if (length >= 0) {
+        length -= headerSize;
+        // We expect to read at least EmojiProps::IX_TOTAL_SIZE.
+        if (length < 14 * 4) {
+            udata_printError(ds,
+                "uemoji_swap(): too few bytes (%d after header) for emoji properties data\n",
+                length);
+            *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+    }
+
+    // First offset after indexes[].
+    int32_t cpTrieOffset = udata_readInt32(ds, inIndexes[EmojiProps::IX_CPTRIE_OFFSET]);
+    int32_t indexesLength = cpTrieOffset / 4;
+    if (indexesLength < 14) {
+        udata_printError(ds,
+            "uemoji_swap(): too few indexes (%d) for emoji properties data\n",
+            indexesLength);
+        *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+        return 0;
+    }
+
+    // Read the data offsets before swapping anything.
+    int32_t indexes[EmojiProps::IX_TOTAL_SIZE + 1];
+    indexes[0] = cpTrieOffset;
+    for (int32_t i = 1; i <= EmojiProps::IX_TOTAL_SIZE; ++i) {
+        indexes[i] = udata_readInt32(ds, inIndexes[i]);
+    }
+    int32_t size = indexes[EmojiProps::IX_TOTAL_SIZE];
+
+    if (length >= 0) {
+        if (length < size) {
+            udata_printError(ds,
+                "uemoji_swap(): too few bytes (%d after header) "
+                "for all of emoji properties data\n",
+                length);
+            *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+
+        // Copy the data for inaccessible bytes.
+        if (inBytes != outBytes) {
+            uprv_memcpy(outBytes, inBytes, size);
+        }
+
+        // Swap the int32_t indexes[].
+        int32_t offset = 0;
+        int32_t top = cpTrieOffset;
+        ds->swapArray32(ds, inBytes, top - offset, outBytes, pErrorCode);
+        offset = top;
+
+        // Swap the code point trie.
+        top = indexes[EmojiProps::IX_CPTRIE_OFFSET + 1];
+        int32_t count = top - offset;
+        U_ASSERT(count >= 0);
+        if (count >= 16) {
+            utrie_swapAnyVersion(ds, inBytes + offset, count, outBytes + offset, pErrorCode);
+        }
+        offset = top;
+
+        // Swap all of the string tries.
+        // They are all serialized as arrays of 16-bit units.
+        offset = indexes[EmojiProps::IX_BASIC_EMOJI_TRIE_OFFSET];
+        top = indexes[EmojiProps::IX_RGI_EMOJI_ZWJ_SEQUENCE_TRIE_OFFSET + 1];
+        ds->swapArray16(ds, inBytes + offset, top - offset, outBytes + offset, pErrorCode);
+        offset = top;
+
+        U_ASSERT(offset == size);
+    }
+
+    return headerSize + size;
+}
+
+/* Swap 'Test' data from gentest */
+static int32_t U_CALLCONV
+test_swap(const UDataSwapper *ds,
+           const void *inData, int32_t length, void *outData,
+           UErrorCode *pErrorCode) {
+    const UDataInfo *pInfo;
+    int32_t headerSize;
+
+    const uint8_t *inBytes;
+    uint8_t *outBytes;
+
+    int32_t offset;
+
+    /* udata_swapDataHeader checks the arguments */
+    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
+        udata_printError(ds, "test_swap(): data header swap failed %s\n", pErrorCode != nullptr ? u_errorName(*pErrorCode) : "pErrorCode is nullptr");
+        return 0;
+    }
+
+    /* check data format and format version */
+    pInfo=(const UDataInfo *)((const char *)inData+4);
+    if(!(
+        pInfo->dataFormat[0]==0x54 &&   /* dataFormat="Norm" */
+        pInfo->dataFormat[1]==0x65 &&
+        pInfo->dataFormat[2]==0x73 &&
+        pInfo->dataFormat[3]==0x74 &&
+        pInfo->formatVersion[0]==1
+    )) {
+        udata_printError(ds, "test_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as testdata\n",
+                         pInfo->dataFormat[0], pInfo->dataFormat[1],
+                         pInfo->dataFormat[2], pInfo->dataFormat[3],
+                         pInfo->formatVersion[0]);
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    inBytes=(const uint8_t *)inData+headerSize;
+    outBytes=(uint8_t *)outData+headerSize;
+
+    int32_t size16 = 2; // 16bit plus padding
+    int32_t sizeStr = 5; // 4 char inv-str plus null
+    int32_t size = size16 + sizeStr;
+
+    if(length>=0) {
+        if(length<size) {
+            udata_printError(ds, "test_swap(): too few bytes (%d after header, wanted %d) for all of testdata\n",
+                             length, size);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+
+	offset =0;
+	/* swap a 1 entry array */
+        ds->swapArray16(ds, inBytes+offset, size16, outBytes+offset, pErrorCode);
+	offset+=size16;
+	ds->swapInvChars(ds, inBytes+offset, sizeStr, outBytes+offset, pErrorCode);
+    }
+
+    return headerSize+size;
+}
+
+/* swap any data (except a .dat package) ------------------------------------ */
+
+static const struct {
+    uint8_t dataFormat[4];
+    UDataSwapFn *swapFn;
+} swapFns[]={
+    { { 0x52, 0x65, 0x73, 0x42 }, ures_swap },          /* dataFormat="ResB" */
+#if !UCONFIG_NO_LEGACY_CONVERSION
+    { { 0x63, 0x6e, 0x76, 0x74 }, ucnv_swap },          /* dataFormat="cnvt" */
+#endif
+#if !UCONFIG_NO_CONVERSION
+    { { 0x43, 0x76, 0x41, 0x6c }, ucnv_swapAliases },   /* dataFormat="CvAl" */
+#endif
+#if !UCONFIG_NO_IDNA
+    { { 0x53, 0x50, 0x52, 0x50 }, usprep_swap },        /* dataFormat="SPRP" */
+#endif
+    /* insert data formats here, descending by expected frequency of occurrence */
+    { { 0x55, 0x50, 0x72, 0x6f }, uprops_swap },        /* dataFormat="UPro" */
+
+    { { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 },
+                                  ucase_swap },         /* dataFormat="cAsE" */
+
+    { { UBIDI_FMT_0, UBIDI_FMT_1, UBIDI_FMT_2, UBIDI_FMT_3 },
+                                  ubidi_swap },         /* dataFormat="BiDi" */
+
+#if !UCONFIG_NO_NORMALIZATION
+    { { 0x4e, 0x6f, 0x72, 0x6d }, unorm_swap },         /* dataFormat="Norm" */
+    { { 0x4e, 0x72, 0x6d, 0x32 }, unorm2_swap },        /* dataFormat="Nrm2" */
+#endif
+
+    { { ULAYOUT_FMT_0, ULAYOUT_FMT_1, ULAYOUT_FMT_2, ULAYOUT_FMT_3 },
+                                  ulayout_swap },       // dataFormat="Layo"
+
+    { { u'E', u'm', u'o', u'j' }, uemoji_swap },
+
+#if !UCONFIG_NO_COLLATION
+    { { 0x55, 0x43, 0x6f, 0x6c }, ucol_swap },          /* dataFormat="UCol" */
+    { { 0x49, 0x6e, 0x76, 0x43 }, ucol_swapInverseUCA },/* dataFormat="InvC" */
+#endif
+#if !UCONFIG_NO_BREAK_ITERATION
+    { { 0x42, 0x72, 0x6b, 0x20 }, ubrk_swap },          /* dataFormat="Brk " */
+    { { 0x44, 0x69, 0x63, 0x74 }, udict_swap },         /* dataFormat="Dict" */
+#endif
+    { { 0x70, 0x6e, 0x61, 0x6d }, upname_swap },        /* dataFormat="pnam" */
+    { { 0x75, 0x6e, 0x61, 0x6d }, uchar_swapNames },    /* dataFormat="unam" */
+#if !UCONFIG_NO_NORMALIZATION
+    { { 0x43, 0x66, 0x75, 0x20 }, uspoof_swap },         /* dataFormat="Cfu " */
+#endif
+    { { 0x54, 0x65, 0x73, 0x74 }, test_swap }            /* dataFormat="Test" */
+};
+
+U_CAPI int32_t U_EXPORT2
+udata_swap(const UDataSwapper *ds,
+           const void *inData, int32_t length, void *outData,
+           UErrorCode *pErrorCode) {
+    char dataFormatChars[4];
+    const UDataInfo *pInfo;
+    int32_t i, swappedLength;
+
+    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    /*
+     * Preflight the header first; checks for illegal arguments, too.
+     * Do not swap the header right away because the format-specific swapper
+     * will swap it, get the headerSize again, and also use the header
+     * information. Otherwise we would have to pass some of the information
+     * and not be able to use the UDataSwapFn signature.
+     */
+    udata_swapDataHeader(ds, inData, -1, nullptr, pErrorCode);
+
+    /*
+     * If we wanted udata_swap() to also handle non-loadable data like a UTrie,
+     * then we could check here for further known magic values and structures.
+     */
+    if(U_FAILURE(*pErrorCode)) {
+        return 0; /* the data format was not recognized */
+    }
+
+    pInfo=(const UDataInfo *)((const char *)inData+4);
+
+    {
+        /* convert the data format from ASCII to Unicode to the system charset */
+        char16_t u[4]={
+             pInfo->dataFormat[0], pInfo->dataFormat[1],
+             pInfo->dataFormat[2], pInfo->dataFormat[3]
+        };
+
+        if(uprv_isInvariantUString(u, 4)) {
+            u_UCharsToChars(u, dataFormatChars, 4);
+        } else {
+            dataFormatChars[0]=dataFormatChars[1]=dataFormatChars[2]=dataFormatChars[3]='?';
+        }
+    }
+
+    /* dispatch to the swap function for the dataFormat */
+    for(i=0; i<UPRV_LENGTHOF(swapFns); ++i) {
+        if(0==memcmp(swapFns[i].dataFormat, pInfo->dataFormat, 4)) {
+            swappedLength=swapFns[i].swapFn(ds, inData, length, outData, pErrorCode);
+
+            if(U_FAILURE(*pErrorCode)) {
+                udata_printError(ds, "udata_swap(): failure swapping data format %02x.%02x.%02x.%02x (\"%c%c%c%c\") - %s\n",
+                                 pInfo->dataFormat[0], pInfo->dataFormat[1],
+                                 pInfo->dataFormat[2], pInfo->dataFormat[3],
+                                 dataFormatChars[0], dataFormatChars[1],
+                                 dataFormatChars[2], dataFormatChars[3],
+                                 u_errorName(*pErrorCode));
+            } else if(swappedLength<(length-15)) {
+                /* swapped less than expected */
+                udata_printError(ds, "udata_swap() warning: swapped only %d out of %d bytes - data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
+                                 swappedLength, length,
+                                 pInfo->dataFormat[0], pInfo->dataFormat[1],
+                                 pInfo->dataFormat[2], pInfo->dataFormat[3],
+                                 dataFormatChars[0], dataFormatChars[1],
+                                 dataFormatChars[2], dataFormatChars[3],
+                                 u_errorName(*pErrorCode));
+            }
+
+            return swappedLength;
+        }
+    }
+
+    /* the dataFormat was not recognized */
+    udata_printError(ds, "udata_swap(): unknown data format %02x.%02x.%02x.%02x (\"%c%c%c%c\")\n",
+                     pInfo->dataFormat[0], pInfo->dataFormat[1],
+                     pInfo->dataFormat[2], pInfo->dataFormat[3],
+                     dataFormatChars[0], dataFormatChars[1],
+                     dataFormatChars[2], dataFormatChars[3]);
+
+    *pErrorCode=U_UNSUPPORTED_ERROR;
+    return 0;
+}
diff --git a/intl/icu/source/tools/toolutil/swapimpl.h b/intl/icu/source/tools/toolutil/swapimpl.h
new file mode 100644
index 0000000000..8c6474f662
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/swapimpl.h
@@ -0,0 +1,45 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2005, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  swapimpl.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2005jul29
+*   created by: Markus W. Scherer
+*
+*   Declarations for data file swapping functions not declared in internal
+*   library headers.
+*/
+
+#ifndef __SWAPIMPL_H__
+#define __SWAPIMPL_H__
+
+#include "unicode/utypes.h"
+#include "udataswp.h"
+
+/**
+ * Identifies and then transforms the ICU data piece in-place, or determines
+ * its length. See UDataSwapFn.
+ * This function handles single data pieces (but not .dat data packages)
+ * and internally dispatches to per-type swap functions.
+ * Sets a U_UNSUPPORTED_ERROR if the data format is not recognized.
+ *
+ * @see UDataSwapFn
+ * @see udata_openSwapper
+ * @see udata_openSwapperForInputData
+ * @internal ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+udata_swap(const UDataSwapper *ds,
+           const void *inData, int32_t length, void *outData,
+           UErrorCode *pErrorCode);
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/toolutil.cpp b/intl/icu/source/tools/toolutil/toolutil.cpp
new file mode 100644
index 0000000000..7e7bdc78a1
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/toolutil.cpp
@@ -0,0 +1,381 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2014, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  toolutil.c
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999nov19
+*   created by: Markus W. Scherer
+*
+*	6/25/08 - Added Cygwin specific code in uprv_mkdir - Brian Rower
+*	
+*   This file contains utility functions for ICU tools like genccode.
+*/
+
+#include "unicode/platform.h"
+#if U_PLATFORM == U_PF_MINGW
+// *cough* - for struct stat
+#ifdef __STRICT_ANSI__
+#undef __STRICT_ANSI__
+#endif
+#endif
+
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fstream>
+#include <time.h>
+#include "unicode/utypes.h"
+
+#ifndef U_TOOLUTIL_IMPLEMENTATION
+#error U_TOOLUTIL_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu
+#endif
+
+#if U_PLATFORM_USES_ONLY_WIN32_API
+#   define VC_EXTRALEAN
+#   define WIN32_LEAN_AND_MEAN
+#   define NOUSER
+#   define NOSERVICE
+#   define NOIME
+#   define NOMCX
+#   if U_PLATFORM == U_PF_MINGW
+#     define __NO_MINGW_LFS /* gets around missing 'off64_t' */
+#   endif
+#   include <windows.h>
+#   include <direct.h>
+#else
+#   include <sys/stat.h>
+#   include <sys/types.h>
+#endif
+
+/* In MinGW environment, io.h needs to be included for _mkdir() */
+#if U_PLATFORM == U_PF_MINGW
+#include <io.h>
+#endif
+
+#include <errno.h>
+
+#include <cstddef>
+
+#include "unicode/errorcode.h"
+#include "unicode/putil.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "toolutil.h"
+
+U_NAMESPACE_BEGIN
+
+IcuToolErrorCode::~IcuToolErrorCode() {
+    // Safe because our handleFailure() does not throw exceptions.
+    if(isFailure()) { handleFailure(); }
+}
+
+void IcuToolErrorCode::handleFailure() const {
+    fprintf(stderr, "error at %s: %s\n", location, errorName());
+    exit(errorCode);
+}
+
+U_NAMESPACE_END
+
+static int32_t currentYear = -1;
+
+U_CAPI int32_t U_EXPORT2 getCurrentYear() {
+    if(currentYear == -1) {
+        time_t now = time(nullptr);
+        tm *fields = gmtime(&now);
+        currentYear = 1900 + fields->tm_year;
+    }
+    return currentYear;
+}
+
+
+U_CAPI const char * U_EXPORT2
+getLongPathname(const char *pathname) {
+#if U_PLATFORM_USES_ONLY_WIN32_API
+    /* anticipate problems with "short" pathnames */
+    static WIN32_FIND_DATAA info;
+    HANDLE file=FindFirstFileA(pathname, &info);
+    if(file!=INVALID_HANDLE_VALUE) {
+        if(info.cAlternateFileName[0]!=0) {
+            /* this file has a short name, get and use the long one */
+            const char *basename=findBasename(pathname);
+            if(basename!=pathname) {
+                /* prepend the long filename with the original path */
+                uprv_memmove(info.cFileName+(basename-pathname), info.cFileName, uprv_strlen(info.cFileName)+1);
+                uprv_memcpy(info.cFileName, pathname, basename-pathname);
+            }
+            pathname=info.cFileName;
+        }
+        FindClose(file);
+    }
+#endif
+    return pathname;
+}
+
+U_CAPI const char * U_EXPORT2
+findDirname(const char *path, char *buffer, int32_t bufLen, UErrorCode* status) {
+  if(U_FAILURE(*status)) return nullptr;
+  const char *resultPtr = nullptr;
+  int32_t resultLen = 0;
+
+  const char *basename=uprv_strrchr(path, U_FILE_SEP_CHAR);
+#if U_FILE_ALT_SEP_CHAR!=U_FILE_SEP_CHAR
+  const char *basenameAlt=uprv_strrchr(path, U_FILE_ALT_SEP_CHAR);
+  if(basenameAlt && (!basename || basename<basenameAlt)) {
+    basename = basenameAlt;
+  }
+#endif
+  if(!basename) {
+    /* no basename - return ''. */
+    resultPtr = "";
+    resultLen = 0;
+  } else {
+    resultPtr = path;
+    resultLen = static_cast<int32_t>(basename - path);
+    if(resultLen<1) {
+      resultLen = 1; /* '/' or '/a' -> '/' */
+    }
+  }
+
+  if((resultLen+1) <= bufLen) {
+    uprv_strncpy(buffer, resultPtr, resultLen);
+    buffer[resultLen]=0;
+    return buffer;
+  } else {
+    *status = U_BUFFER_OVERFLOW_ERROR;
+    return nullptr;
+  }
+}
+
+U_CAPI const char * U_EXPORT2
+findBasename(const char *filename) {
+    const char *basename=uprv_strrchr(filename, U_FILE_SEP_CHAR);
+
+#if U_FILE_ALT_SEP_CHAR!=U_FILE_SEP_CHAR
+    //be lenient about pathname separators on Windows, like official implementation of C++17 std::filesystem in MSVC
+    //would be convenient to merge this loop with the one above, but alas, there is no such solution in the standard library
+    const char *alt_basename=uprv_strrchr(filename, U_FILE_ALT_SEP_CHAR);
+    if(alt_basename>basename) {
+        basename=alt_basename;
+    }
+#endif
+
+    if(basename!=nullptr) {
+        return basename+1;
+    } else {
+        return filename;
+    }
+}
+
+U_CAPI void U_EXPORT2
+uprv_mkdir(const char *pathname, UErrorCode *status) {
+
+    int retVal = 0;
+#if U_PLATFORM_USES_ONLY_WIN32_API
+    retVal = _mkdir(pathname);
+#else
+    retVal = mkdir(pathname, S_IRWXU | (S_IROTH | S_IXOTH) | (S_IROTH | S_IXOTH));
+#endif
+    if (retVal && errno != EEXIST) {
+#if U_PF_MINGW <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
+        /*if using Cygwin and the mkdir says it failed...check if the directory already exists..*/
+        /* if it does...don't give the error, if it does not...give the error - Brian Rower - 6/25/08 */
+        struct stat st;
+
+        if(stat(pathname,&st) != 0)
+        {
+            *status = U_FILE_ACCESS_ERROR;
+        }
+#else
+        *status = U_FILE_ACCESS_ERROR;
+#endif
+    }
+}
+
+#if !UCONFIG_NO_FILE_IO
+U_CAPI UBool U_EXPORT2
+uprv_fileExists(const char *file) {
+  struct stat stat_buf;
+  if (stat(file, &stat_buf) == 0) {
+    return true;
+  } else {
+    return false;
+  }
+}
+#endif
+
+U_CAPI int32_t U_EXPORT2
+uprv_compareGoldenFiles(
+        const char* buffer, int32_t bufferLen,
+        const char* goldenFilePath,
+        bool overwrite) {
+
+    if (overwrite) {
+        std::ofstream ofs;
+        ofs.open(goldenFilePath);
+        ofs.write(buffer, bufferLen);
+        ofs.close();
+        return -1;
+    }
+
+    std::ifstream ifs(goldenFilePath, std::ifstream::in);
+    int32_t pos = 0;
+    char c;
+    while (ifs.get(c) && pos < bufferLen) {
+        if (c != buffer[pos]) {
+            // Files differ at this position
+            break;
+        }
+        pos++;
+    }
+    if (pos == bufferLen && ifs.eof()) {
+        // Files are same lengths
+        pos = -1;
+    }
+    ifs.close();
+    return pos;
+}
+
+/*U_CAPI UDate U_EXPORT2
+uprv_getModificationDate(const char *pathname, UErrorCode *status)
+{
+    if(U_FAILURE(*status)) {
+        return;
+    }
+    //  TODO: handle case where stat is not available
+    struct stat st;
+    
+    if(stat(pathname,&st) != 0)
+    {
+        *status = U_FILE_ACCESS_ERROR;
+    } else {
+        return st.st_mtime;
+    }
+}
+*/
+
+/* tool memory helper ------------------------------------------------------- */
+
+struct UToolMemory {
+    char name[64];
+    int32_t capacity, maxCapacity, size, idx;
+    void *array;
+    alignas(std::max_align_t) char staticArray[1];
+};
+
+U_CAPI UToolMemory * U_EXPORT2
+utm_open(const char *name, int32_t initialCapacity, int32_t maxCapacity, int32_t size) {
+    UToolMemory *mem;
+
+    if(maxCapacity<initialCapacity) {
+        maxCapacity=initialCapacity;
+    }
+
+    mem=(UToolMemory *)uprv_malloc(sizeof(UToolMemory)+initialCapacity*size);
+    if(mem==nullptr) {
+        fprintf(stderr, "error: %s - out of memory\n", name);
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+    mem->array=mem->staticArray;
+
+    uprv_strcpy(mem->name, name);
+    mem->capacity=initialCapacity;
+    mem->maxCapacity=maxCapacity;
+    mem->size=size;
+    mem->idx=0;
+    return mem;
+}
+
+U_CAPI void U_EXPORT2
+utm_close(UToolMemory *mem) {
+    if(mem!=nullptr) {
+        if(mem->array!=mem->staticArray) {
+            uprv_free(mem->array);
+        }
+        uprv_free(mem);
+    }
+}
+
+
+U_CAPI void * U_EXPORT2
+utm_getStart(UToolMemory *mem) {
+    return (char *)mem->array;
+}
+
+U_CAPI int32_t U_EXPORT2
+utm_countItems(UToolMemory *mem) {
+    return mem->idx;
+}
+
+
+static UBool
+utm_hasCapacity(UToolMemory *mem, int32_t capacity) {
+    if(mem->capacity<capacity) {
+        int32_t newCapacity;
+
+        if(mem->maxCapacity<capacity) {
+            fprintf(stderr, "error: %s - trying to use more than maxCapacity=%ld units\n",
+                    mem->name, (long)mem->maxCapacity);
+            exit(U_MEMORY_ALLOCATION_ERROR);
+        }
+
+        /* try to allocate a larger array */
+        if(capacity>=2*mem->capacity) {
+            newCapacity=capacity;
+        } else if(mem->capacity<=mem->maxCapacity/3) {
+            newCapacity=2*mem->capacity;
+        } else {
+            newCapacity=mem->maxCapacity;
+        }
+
+        if(mem->array==mem->staticArray) {
+            mem->array=uprv_malloc(newCapacity*mem->size);
+            if(mem->array!=nullptr) {
+                uprv_memcpy(mem->array, mem->staticArray, (size_t)mem->idx*mem->size);
+            }
+        } else {
+            mem->array=uprv_realloc(mem->array, newCapacity*mem->size);
+        }
+
+        if(mem->array==nullptr) {
+            fprintf(stderr, "error: %s - out of memory\n", mem->name);
+            exit(U_MEMORY_ALLOCATION_ERROR);
+        }
+        mem->capacity=newCapacity;
+    }
+
+    return true;
+}
+
+U_CAPI void * U_EXPORT2
+utm_alloc(UToolMemory *mem) {
+    char *p=nullptr;
+    int32_t oldIndex=mem->idx;
+    int32_t newIndex=oldIndex+1;
+    if(utm_hasCapacity(mem, newIndex)) {
+        p=(char *)mem->array+oldIndex*mem->size;
+        mem->idx=newIndex;
+        uprv_memset(p, 0, mem->size);
+    }
+    return p;
+}
+
+U_CAPI void * U_EXPORT2
+utm_allocN(UToolMemory *mem, int32_t n) {
+    char *p=nullptr;
+    int32_t oldIndex=mem->idx;
+    int32_t newIndex=oldIndex+n;
+    if(utm_hasCapacity(mem, newIndex)) {
+        p=(char *)mem->array+oldIndex*mem->size;
+        mem->idx=newIndex;
+        uprv_memset(p, 0, n*mem->size);
+    }
+    return p;
+}
diff --git a/intl/icu/source/tools/toolutil/toolutil.h b/intl/icu/source/tools/toolutil/toolutil.h
new file mode 100644
index 0000000000..b32a0b8762
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/toolutil.h
@@ -0,0 +1,201 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2013, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  toolutil.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999nov19
+*   created by: Markus W. Scherer
+*
+*   This file defines utility functions for ICU tools like genccode.
+*/
+
+#ifndef __TOOLUTIL_H__
+#define __TOOLUTIL_H__
+
+#include "unicode/utypes.h"
+
+#ifdef __cplusplus
+
+#include "unicode/errorcode.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * ErrorCode subclass for use in ICU command-line tools.
+ * The destructor calls handleFailure() which calls exit(errorCode) when isFailure().
+ */
+class U_TOOLUTIL_API IcuToolErrorCode : public ErrorCode {
+public:
+    /**
+     * @param loc A short string describing where the IcuToolErrorCode is used.
+     */
+    IcuToolErrorCode(const char *loc) : location(loc) {}
+    virtual ~IcuToolErrorCode();
+protected:
+    virtual void handleFailure() const override;
+private:
+    const char *location;
+};
+
+U_NAMESPACE_END
+
+#endif
+
+/*
+ * For Windows, a path/filename may be the short (8.3) version
+ * of the "real", long one. In this case, the short one
+ * is abbreviated and contains a tilde etc.
+ * This function returns a pointer to the original pathname
+ * if it is the "real" one itself, and a pointer to a static
+ * buffer (not thread-safe) containing the long version
+ * if the pathname is indeed abbreviated.
+ *
+ * On platforms other than Windows, this function always returns
+ * the input pathname pointer.
+ *
+ * This function is especially useful in tools that are called
+ * by a batch file for loop, which yields short pathnames on Win9x.
+ */
+U_CAPI const char * U_EXPORT2
+getLongPathname(const char *pathname);
+
+/**
+ * Find the basename at the end of a pathname, i.e., the part
+ * after the last file separator, and return a pointer
+ * to this part of the pathname.
+ * If the pathname only contains a basename and no file separator,
+ * then the pathname pointer itself is returned.
+ **/
+U_CAPI const char * U_EXPORT2
+findBasename(const char *filename);
+
+/**
+ * Find the directory name of a pathname, that is, everything
+ * up to but not including the last file separator. 
+ *
+ * If successful, copies the directory name into the output buffer along with
+ * a terminating NULL. 
+ *
+ * If there isn't a directory name in the path, it returns an empty string.
+ * @param path the full pathname to inspect. 
+ * @param buffer the output buffer
+ * @param bufLen the output buffer length
+ * @param status error code- may return U_BUFFER_OVERFLOW_ERROR if bufLen is too small.
+ * @return If successful, a pointer to the output buffer. If failure or bufLen is too small, NULL.
+ **/
+U_CAPI const char * U_EXPORT2
+findDirname(const char *path, char *buffer, int32_t bufLen, UErrorCode* status);
+
+/*
+ * Return the current year in the Gregorian calendar. Used for copyright generation.
+ */
+U_CAPI int32_t U_EXPORT2
+getCurrentYear();
+
+/*
+ * Creates a directory with pathname.
+ *
+ * @param status Set to an error code when mkdir failed.
+ */
+U_CAPI void U_EXPORT2
+uprv_mkdir(const char *pathname, UErrorCode *status);
+
+#if !UCONFIG_NO_FILE_IO
+/**
+ * Return true if the named item exists
+ * @param file filename
+ * @return true if named item (file, dir, etc) exists, false otherwise
+ */
+U_CAPI UBool U_EXPORT2
+uprv_fileExists(const char *file);
+#endif
+
+/**
+ * Performs a golden data test. Asserts that the contents of the buffer is equal
+ * to the data in goldenFilePath.
+ *
+ * Pass the value of the -G flag to "overwrite"; if true, new goldens will be
+ * written to the filesystem.
+ * 
+ * @return The first index at which the files differ, or -1 if they are the same.
+ */
+U_CAPI int32_t U_EXPORT2
+uprv_compareGoldenFiles(
+    const char* buffer, int32_t bufferLen,
+    const char* goldenFilePath,
+    bool overwrite);
+
+/**
+ * Return the modification date for the specified file or directory.
+ * Return value is undefined if there was an error.
+ */
+/*U_CAPI UDate U_EXPORT2
+uprv_getModificationDate(const char *pathname, UErrorCode *status);
+*/
+/*
+ * Returns the modification
+ *
+ * @param status Set to an error code when mkdir failed.
+ */
+
+/*
+ * UToolMemory is used for generic, custom memory management.
+ * It is allocated with enough space for count*size bytes starting
+ * at array.
+ * The array is declared with a union of large data types so
+ * that its base address is aligned for any types.
+ * If size is a multiple of a data type size, then such items
+ * can be safely allocated inside the array, at offsets that
+ * are themselves multiples of size.
+ */
+struct UToolMemory;
+typedef struct UToolMemory UToolMemory;
+
+/**
+ * Open a UToolMemory object for allocation of initialCapacity to maxCapacity
+ * items with size bytes each.
+ */
+U_CAPI UToolMemory * U_EXPORT2
+utm_open(const char *name, int32_t initialCapacity, int32_t maxCapacity, int32_t size);
+
+/**
+ * Close a UToolMemory object.
+ */
+U_CAPI void U_EXPORT2
+utm_close(UToolMemory *mem);
+
+/**
+ * Get the pointer to the beginning of the array of items.
+ * The pointer becomes invalid after allocation of new items.
+ */
+U_CAPI void * U_EXPORT2
+utm_getStart(UToolMemory *mem);
+
+/**
+ * Get the current number of items.
+ */
+U_CAPI int32_t U_EXPORT2
+utm_countItems(UToolMemory *mem);
+
+/**
+ * Allocate one more item and return the pointer to its start in the array.
+ */
+U_CAPI void * U_EXPORT2
+utm_alloc(UToolMemory *mem);
+
+/**
+ * Allocate n items and return the pointer to the start of the first one in the array.
+ */
+U_CAPI void * U_EXPORT2
+utm_allocN(UToolMemory *mem, int32_t n);
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/toolutil.vcxproj b/intl/icu/source/tools/toolutil/toolutil.vcxproj
new file mode 100644
index 0000000000..0995ef06f7
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/toolutil.vcxproj
@@ -0,0 +1,272 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{6B231032-3CB5-4EED-9210-810D666A23A0}</ProjectGuid>
+  </PropertyGroup>
+  <PropertyGroup Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <!-- The following import will include the 'default' configuration options for VS projects. -->
+  <Import Project="..\..\allinone\Build.Windows.ProjectConfiguration.props" />
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir>.\$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>.\$(Platform)\$(Configuration)\</IntDir>
+    <!-- The ICU projects use "Win32" to mean "x86", so we need to special case it. -->
+    <OutDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)\</IntDir>
+    <!-- Disable Incremental Linking for Release builds as it prevents Link-time Code Generation -->
+    <LinkIncremental Condition="'$(Configuration)'=='Debug'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)'=='Release'">false</LinkIncremental>
+  </PropertyGroup>
+  <!-- Options that are common to *all* project configurations -->
+  <ItemDefinitionGroup>
+    <ClCompile>
+       <AdditionalIncludeDirectories>..\..\..\include;..\..\common;..\..\i18n;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>U_TOOLUTIL_IMPLEMENTATION;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DisableLanguageExtensions>false</DisableLanguageExtensions>
+      <WarningLevel>Level3</WarningLevel>
+      <CompileAs>Default</CompileAs>
+    </ClCompile>
+  </ItemDefinitionGroup>
+  <!-- Options that are common to all 'Debug' project configurations -->
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
+    <ClCompile>
+      <BrowseInformation>true</BrowseInformation>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+      <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>icuucd.lib;icuind.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <!-- Options that are common to all 'Release' project configurations -->
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
+    <ClCompile>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>icuuc.lib;icuin.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <!-- Options that are common to all 'Win32' project configurations -->
+  <ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
+    <ClCompile>
+      <PrecompiledHeaderOutputFile>.\x86\$(Configuration)/toolutil.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>.\x86\$(Configuration)/</AssemblerListingLocation>
+      <ObjectFileName>.\x86\$(Configuration)/</ObjectFileName>
+      <ProgramDataBaseFileName>.\x86\$(Configuration)/</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <AdditionalLibraryDirectories>..\..\..\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Midl>
+      <TypeLibraryName>.\..\..\..\lib\icutu.tlb</TypeLibraryName>
+    </Midl>
+    <Link>
+      <OutputFile>..\..\..\bin\icutu$(IcuMajorVersion).dll</OutputFile>
+      <ProgramDatabaseFile>.\..\..\..\lib\icutu.pdb</ProgramDatabaseFile>
+      <DataExecutionPrevention>
+      </DataExecutionPrevention>
+      <ImportLibrary>..\..\..\lib\icutu.lib</ImportLibrary>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Midl>
+      <TypeLibraryName>.\..\..\..\lib\icutud.tlb</TypeLibraryName>
+    </Midl>
+    <Link>
+      <OutputFile>..\..\..\bin\icutu$(IcuMajorVersion)d.dll</OutputFile>
+      <ProgramDatabaseFile>.\..\..\..\lib\icutud.pdb</ProgramDatabaseFile>
+      <DataExecutionPrevention>
+      </DataExecutionPrevention>
+      <ImportLibrary>..\..\..\lib\icutud.lib</ImportLibrary>
+    </Link>
+  </ItemDefinitionGroup>
+  <!-- Options that are common to all 'x64' project configurations -->
+  <ItemDefinitionGroup Condition="'$(Platform)'=='x64'">
+    <ClCompile>
+      <PrecompiledHeaderOutputFile>.\x64\$(Configuration)/toolutil.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>.\x64\$(Configuration)/</AssemblerListingLocation>
+      <ObjectFileName>.\x64\$(Configuration)/</ObjectFileName>
+      <ProgramDataBaseFileName>.\x64\$(Configuration)/</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <AdditionalLibraryDirectories>..\..\..\lib64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Midl>
+      <TypeLibraryName>.\..\..\..\lib64\icutu.tlb</TypeLibraryName>
+    </Midl>
+    <ClCompile>
+      <WholeProgramOptimization>true</WholeProgramOptimization>
+    </ClCompile>
+    <Link>
+      <OutputFile>..\..\..\bin64\icutu$(IcuMajorVersion).dll</OutputFile>
+      <ProgramDatabaseFile>.\..\..\..\lib64\icutu.pdb</ProgramDatabaseFile>
+      <ImportLibrary>..\..\..\lib64\icutu.lib</ImportLibrary>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Midl>
+      <TypeLibraryName>.\..\..\..\lib64\icutud.tlb</TypeLibraryName>
+    </Midl>
+    <Link>
+      <OutputFile>..\..\..\bin64\icutu$(IcuMajorVersion)d.dll</OutputFile>
+      <ProgramDatabaseFile>.\..\..\..\lib64\icutud.pdb</ProgramDatabaseFile>
+      <ImportLibrary>..\..\..\lib64\icutud.lib</ImportLibrary>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Platform)'=='ARM'">
+    <ClCompile>
+      <PrecompiledHeaderOutputFile>.\ARM\$(Configuration)/toolutil.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>.\ARM\$(Configuration)/</AssemblerListingLocation>
+      <ObjectFileName>.\ARM\$(Configuration)/</ObjectFileName>
+      <ProgramDataBaseFileName>.\ARM\$(Configuration)/</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <AdditionalLibraryDirectories>.\..\..\..\libARM;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM'">
+    <Midl>
+      <TypeLibraryName>..\..\..\libARM\icutu.tlb</TypeLibraryName>
+    </Midl>
+    <Link>
+      <OutputFile>..\..\..\binARM\icutu$(IcuMajorVersion).dll</OutputFile>
+      <ProgramDatabaseFile>.\..\..\..\libARM\icutu.pdb</ProgramDatabaseFile>
+      <ImportLibrary>..\..\..\libARM\icutu.lib</ImportLibrary>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
+    <Midl>
+      <TypeLibraryName>.\..\..\..\libARM\icutud.tlb</TypeLibraryName>
+    </Midl>
+    <ClCompile>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <OutputFile>..\..\..\binARM\icutu$(IcuMajorVersion)d.dll</OutputFile>
+      <ProgramDatabaseFile>.\..\..\..\libARM\icutud.pdb</ProgramDatabaseFile>
+      <ImportLibrary>..\..\..\libARM\icutud.lib</ImportLibrary>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Platform)'=='ARM64'">
+    <ClCompile>
+      <PrecompiledHeaderOutputFile>.\ARM64\$(Configuration)/toolutil.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>.\ARM64\$(Configuration)/</AssemblerListingLocation>
+      <ObjectFileName>.\ARM64\$(Configuration)/</ObjectFileName>
+      <ProgramDataBaseFileName>.\ARM64\$(Configuration)/</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <AdditionalLibraryDirectories>.\..\..\..\libARM64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|ARM64'">
+    <Midl>
+      <TypeLibraryName>.\..\..\..\libARM64\icutu.tlb</TypeLibraryName>
+    </Midl>
+    <Link>
+      <OutputFile>..\..\..\binARM64\icutu$(IcuMajorVersion).dll</OutputFile>
+      <ProgramDatabaseFile>.\..\..\..\libARM64\icutu.pdb</ProgramDatabaseFile>
+      <ImportLibrary>..\..\..\libARM64\icutu.lib</ImportLibrary>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
+    <Midl>
+      <TypeLibraryName>.\..\..\..\libARM64\icutud.tlb</TypeLibraryName>
+    </Midl>
+    <ClCompile>
+      <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <OutputFile>..\..\..\binARM64\icutu$(IcuMajorVersion)d.dll</OutputFile>
+      <ProgramDatabaseFile>.\..\..\..\libARM64\icutud.pdb</ProgramDatabaseFile>
+      <ImportLibrary>..\..\..\libARM64\icutud.lib</ImportLibrary>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="collationinfo.cpp">
+      <DisableLanguageExtensions>false</DisableLanguageExtensions>
+    </ClCompile>
+    <ClCompile Include="denseranges.cpp" />
+    <ClCompile Include="filestrm.cpp" />
+    <ClCompile Include="filetools.cpp" />
+    <ClCompile Include="flagparser.cpp" />
+    <ClCompile Include="package.cpp" />
+    <ClCompile Include="pkg_genc.cpp">
+      <DisableLanguageExtensions>false</DisableLanguageExtensions>
+    </ClCompile>
+    <ClCompile Include="pkg_gencmn.cpp">
+      <DisableLanguageExtensions>false</DisableLanguageExtensions>
+    </ClCompile>
+    <ClCompile Include="pkg_icu.cpp" />
+    <ClCompile Include="pkgitems.cpp" />
+    <ClCompile Include="ppucd.cpp">
+      <DisableLanguageExtensions>false</DisableLanguageExtensions>
+    </ClCompile>
+    <ClCompile Include="swapimpl.cpp">
+      <DisableLanguageExtensions>false</DisableLanguageExtensions>
+    </ClCompile>
+    <ClCompile Include="toolutil.cpp">
+      <DisableLanguageExtensions>false</DisableLanguageExtensions>
+    </ClCompile>
+    <ClCompile Include="ucbuf.cpp" />
+    <ClCompile Include="ucm.cpp" />
+    <ClCompile Include="ucmstate.cpp" />
+    <ClCompile Include="unewdata.cpp" />
+    <ClCompile Include="uoptions.cpp" />
+    <ClCompile Include="uparse.cpp" />
+    <ClCompile Include="writesrc.cpp" />
+    <ClCompile Include="xmlparser.cpp">
+      <DisableLanguageExtensions>false</DisableLanguageExtensions>
+    </ClCompile>
+    <ClCompile Include="dbgutil.cpp">
+      <DisableLanguageExtensions>false</DisableLanguageExtensions>
+    </ClCompile>
+    <ClCompile Include="udbgutil.cpp">
+      <DisableLanguageExtensions>false</DisableLanguageExtensions>
+    </ClCompile>
+    <ClCompile Include="ucln_tu.cpp">
+      <DisableLanguageExtensions>false</DisableLanguageExtensions>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="collationinfo.h" />
+    <ClInclude Include="denseranges.h" />
+    <ClInclude Include="filestrm.h" />
+    <ClInclude Include="filetools.h" />
+    <ClInclude Include="flagparser.h" />
+    <ClInclude Include="package.h" />
+    <ClInclude Include="pkg_genc.h" />
+    <ClInclude Include="pkg_gencmn.h" />
+    <ClInclude Include="pkg_icu.h" />
+    <ClInclude Include="pkg_imp.h" />
+    <ClInclude Include="ppucd.h" />
+    <ClInclude Include="swapimpl.h" />
+    <ClInclude Include="toolutil.h" />
+    <ClInclude Include="ucbuf.h" />
+    <ClInclude Include="ucm.h" />
+    <ClInclude Include="unewdata.h" />
+    <ClInclude Include="uoptions.h" />
+    <ClInclude Include="uparse.h" />
+    <ClInclude Include="writesrc.h" />
+    <ClInclude Include="xmlparser.h" />
+    <ClInclude Include="dbgutil.h" />
+    <ClInclude Include="udbgutil.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
diff --git a/intl/icu/source/tools/toolutil/ucbuf.cpp b/intl/icu/source/tools/toolutil/ucbuf.cpp
new file mode 100644
index 0000000000..1eb54e260e
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/ucbuf.cpp
@@ -0,0 +1,788 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1998-2016, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*
+* File ucbuf.cpp
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   05/10/01    Ram         Creation.
+*******************************************************************************
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uchar.h"
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_err.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
+#include "filestrm.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "ustrfmt.h"
+#include "ucbuf.h"
+#include <stdio.h>
+
+#if !UCONFIG_NO_CONVERSION
+
+
+#define MAX_IN_BUF 1000
+#define MAX_U_BUF 1500
+#define CONTEXT_LEN 20
+
+struct UCHARBUF {
+    char16_t* buffer;
+    char16_t* currentPos;
+    char16_t* bufLimit;
+    int32_t bufCapacity;
+    int32_t remaining;
+    int32_t signatureLength;
+    FileStream* in;
+    UConverter* conv;
+    UBool showWarning; /* makes this API not produce any errors */
+    UBool isBuffered;
+};
+
+U_CAPI UBool U_EXPORT2
+ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* error){
+    char start[8];
+    int32_t numRead;
+
+    char16_t target[1]={ 0 };
+    char16_t* pTarget;
+    const char* pStart;
+
+    /* read a few bytes */
+    numRead=T_FileStream_read(in, start, sizeof(start));
+
+    *cp = ucnv_detectUnicodeSignature(start, numRead, signatureLength, error);
+    
+    /* unread the bytes beyond what was consumed for U+FEFF */
+    T_FileStream_rewind(in);
+    if (*signatureLength > 0) {
+        T_FileStream_read(in, start, *signatureLength);
+    }
+
+    if(*cp==nullptr){
+        *conv =nullptr;
+        return false;
+    }
+
+    /* open the converter for the detected Unicode charset */
+    *conv = ucnv_open(*cp,error);
+
+    /* convert and ignore initial U+FEFF, and the buffer overflow */
+    pTarget = target;
+    pStart = start;
+    ucnv_toUnicode(*conv, &pTarget, target+1, &pStart, start+*signatureLength, nullptr, false, error);
+    *signatureLength = (int32_t)(pStart - start);
+    if(*error==U_BUFFER_OVERFLOW_ERROR) {
+        *error=U_ZERO_ERROR;
+    }
+
+    /* verify that we successfully read exactly U+FEFF */
+    if(U_SUCCESS(*error) && (pTarget!=(target+1) || target[0]!=0xfeff)) {
+        *error=U_INTERNAL_PROGRAM_ERROR;
+    }
+
+
+    return true; 
+}
+static UBool ucbuf_isCPKnown(const char* cp){
+    if(ucnv_compareNames("UTF-8",cp)==0){
+        return true;
+    }
+    if(ucnv_compareNames("UTF-16BE",cp)==0){
+        return true;
+    }
+    if(ucnv_compareNames("UTF-16LE",cp)==0){
+        return true;
+    }
+    if(ucnv_compareNames("UTF-16",cp)==0){
+        return true;
+    }
+    if(ucnv_compareNames("UTF-32",cp)==0){
+        return true;
+    }
+    if(ucnv_compareNames("UTF-32BE",cp)==0){
+        return true;
+    }
+    if(ucnv_compareNames("UTF-32LE",cp)==0){
+        return true;
+    }
+    if(ucnv_compareNames("SCSU",cp)==0){
+        return true;
+    }
+    if(ucnv_compareNames("BOCU-1",cp)==0){
+        return true;
+    }
+    if(ucnv_compareNames("UTF-7",cp)==0){
+        return true;
+    }
+    return false;
+}
+
+U_CAPI FileStream * U_EXPORT2
+ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, int32_t* signatureLength,UErrorCode* error){
+    FileStream* in=nullptr;
+    if(error==nullptr || U_FAILURE(*error)){
+        return nullptr;
+    }
+    if(conv==nullptr || cp==nullptr || fileName==nullptr){
+        *error = U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    /* open the file */
+    in= T_FileStream_open(fileName,"rb");
+    
+    if(in == nullptr){
+        *error=U_FILE_ACCESS_ERROR;
+        return nullptr;
+    }
+
+    if(ucbuf_autodetect_fs(in,cp,conv,signatureLength,error)) {
+        return in;
+    } else {
+        ucnv_close(*conv);
+        *conv=nullptr;
+        T_FileStream_close(in);
+        return nullptr;
+    }
+}
+
+/* fill the uchar buffer */
+static UCHARBUF*
+ucbuf_fillucbuf( UCHARBUF* buf,UErrorCode* error){
+    char16_t* pTarget=nullptr;
+    char16_t* target=nullptr;
+    const char* source=nullptr;
+    char  carr[MAX_IN_BUF] = {'\0'};
+    char* cbuf =  carr;
+    int32_t inputRead=0;
+    int32_t outputWritten=0;
+    int32_t offset=0;
+    const char* sourceLimit =nullptr;
+    int32_t cbufSize=0;
+    pTarget = buf->buffer;
+    /* check if we arrived here without exhausting the buffer*/
+    if(buf->currentPos<buf->bufLimit){
+        offset = (int32_t)(buf->bufLimit-buf->currentPos);
+        memmove(buf->buffer,buf->currentPos,offset* sizeof(char16_t));
+    }
+
+#ifdef UCBUF_DEBUG
+    memset(pTarget+offset,0xff,sizeof(char16_t)*(MAX_IN_BUF-offset));
+#endif
+    if(buf->isBuffered){
+        cbufSize = MAX_IN_BUF;
+        /* read the file */
+        inputRead=T_FileStream_read(buf->in,cbuf,cbufSize-offset);
+        buf->remaining-=inputRead;
+        
+    }else{
+        cbufSize = T_FileStream_size(buf->in);
+        cbuf = (char*)uprv_malloc(cbufSize);
+        if (cbuf == nullptr) {
+        	*error = U_MEMORY_ALLOCATION_ERROR;
+        	return nullptr;
+        }
+        inputRead= T_FileStream_read(buf->in,cbuf,cbufSize);
+        buf->remaining-=inputRead;
+    }
+
+    /* just to be sure...*/
+    if ( 0 == inputRead )
+       buf->remaining = 0;
+
+    target=pTarget;
+    /* convert the bytes */
+    if(buf->conv){
+        /* set the callback to stop */
+        UConverterToUCallback toUOldAction ;
+        void* toUOldContext;
+        void* toUNewContext=nullptr;
+        ucnv_setToUCallBack(buf->conv,
+           UCNV_TO_U_CALLBACK_STOP,
+           toUNewContext,
+           &toUOldAction,
+           (const void**)&toUOldContext,
+           error);
+        /* since state is saved in the converter we add offset to source*/
+        target = pTarget+offset;
+        source = cbuf;
+        sourceLimit = source + inputRead;
+        ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
+                        &source,sourceLimit,nullptr,
+                        (UBool)(buf->remaining==0),error);
+
+        if(U_FAILURE(*error)){
+            char context[CONTEXT_LEN+1];
+            char preContext[CONTEXT_LEN+1];
+            char postContext[CONTEXT_LEN+1];
+            int8_t len = CONTEXT_LEN;
+            int32_t start=0;
+            int32_t stop =0;
+            int32_t pos =0;
+            /* use erro1 to preserve the error code */
+            UErrorCode error1 =U_ZERO_ERROR;
+            
+            if( buf->showWarning==true){
+                fprintf(stderr,"\n###WARNING: Encountered abnormal bytes while"
+                               " converting input stream to target encoding: %s\n",
+                               u_errorName(*error));
+            }
+
+
+            /* now get the context chars */
+            ucnv_getInvalidChars(buf->conv,context,&len,&error1);
+            context[len]= 0 ; /* null terminate the buffer */
+
+            pos = (int32_t)(source - cbuf - len);
+
+            /* for pre-context */
+            start = (pos <=CONTEXT_LEN)? 0 : (pos - (CONTEXT_LEN-1));
+            stop  = pos-len;
+
+            memcpy(preContext,cbuf+start,stop-start);
+            /* null terminate the buffer */
+            preContext[stop-start] = 0;
+
+            /* for post-context */
+            start = pos+len;
+            stop  = (int32_t)(((pos+CONTEXT_LEN)<= (sourceLimit-cbuf) )? (pos+(CONTEXT_LEN-1)) : (sourceLimit-cbuf));
+
+            memcpy(postContext,source,stop-start);
+            /* null terminate the buffer */
+            postContext[stop-start] = 0;
+
+            if(buf->showWarning ==true){
+                /* print out the context */
+                fprintf(stderr,"\tPre-context: %s\n",preContext);
+                fprintf(stderr,"\tContext: %s\n",context);
+                fprintf(stderr,"\tPost-context: %s\n", postContext);
+            }
+
+            /* reset the converter */
+            ucnv_reset(buf->conv);
+
+            /* set the call back to substitute
+             * and restart conversion
+             */
+            ucnv_setToUCallBack(buf->conv,
+               UCNV_TO_U_CALLBACK_SUBSTITUTE,
+               toUNewContext,
+               &toUOldAction,
+               (const void**)&toUOldContext,
+               &error1);
+
+            /* reset source and target start positions */
+            target = pTarget+offset;
+            source = cbuf;
+
+            /* re convert */
+            ucnv_toUnicode(buf->conv,&target,target+(buf->bufCapacity-offset),
+                            &source,sourceLimit,nullptr,
+                            (UBool)(buf->remaining==0),&error1);
+
+        }
+        outputWritten = (int32_t)(target - pTarget);
+
+#ifdef UCBUF_DEBUG
+        {
+            int i;
+            target = pTarget;
+            for(i=0;i<numRead;i++){
+              /*  printf("%c", (char)(*target++));*/
+            }
+        }
+#endif
+
+    }else{
+        u_charsToUChars(cbuf,target+offset,inputRead);
+        outputWritten=((buf->remaining>cbufSize)? cbufSize:inputRead+offset);
+    }
+    buf->currentPos = pTarget;
+    buf->bufLimit=pTarget+outputWritten;
+    *buf->bufLimit=0; /*NUL terminate*/
+    if(cbuf!=carr){
+        uprv_free(cbuf);
+    }
+    return buf;
+}
+
+
+
+/* get a char16_t from the stream*/
+U_CAPI int32_t U_EXPORT2
+ucbuf_getc(UCHARBUF* buf,UErrorCode* error){
+    if(error==nullptr || U_FAILURE(*error)){
+        return false;
+    }
+    if(buf->currentPos>=buf->bufLimit){
+        if(buf->remaining==0){
+            return U_EOF;
+        }
+        buf=ucbuf_fillucbuf(buf,error);
+        if(U_FAILURE(*error)){
+            return U_EOF;
+        }
+    }
+
+    return *(buf->currentPos++);
+}
+
+/* get a UChar32 from the stream*/
+U_CAPI int32_t U_EXPORT2
+ucbuf_getc32(UCHARBUF* buf,UErrorCode* error){
+    int32_t retVal = (int32_t)U_EOF;
+    if(error==nullptr || U_FAILURE(*error)){
+        return false;
+    }
+    if(buf->currentPos+1>=buf->bufLimit){
+        if(buf->remaining==0){
+            return U_EOF;
+        }
+        buf=ucbuf_fillucbuf(buf,error);
+        if(U_FAILURE(*error)){
+            return U_EOF;
+        }
+    }
+    if(U16_IS_LEAD(*(buf->currentPos))){
+        retVal=U16_GET_SUPPLEMENTARY(buf->currentPos[0],buf->currentPos[1]);
+        buf->currentPos+=2;
+    }else{
+        retVal = *(buf->currentPos++);
+    }
+    return retVal;
+}
+
+/* u_unescapeAt() callback to return a char16_t*/
+static char16_t U_CALLCONV
+_charAt(int32_t offset, void *context) {
+    return ((UCHARBUF*) context)->currentPos[offset];
+}
+
+/* getc and escape it */
+U_CAPI int32_t U_EXPORT2
+ucbuf_getcx32(UCHARBUF* buf,UErrorCode* error) {
+    int32_t length;
+    int32_t offset;
+    UChar32 c32,c1,c2;
+    if(error==nullptr || U_FAILURE(*error)){
+        return false;
+    }
+    /* Fill the buffer if it is empty */
+    if (buf->currentPos >=buf->bufLimit-2) {
+        ucbuf_fillucbuf(buf,error);
+    }
+
+    /* Get the next character in the buffer */
+    if (buf->currentPos < buf->bufLimit) {
+        c1 = *(buf->currentPos)++;
+    } else {
+        c1 = U_EOF;
+    }
+
+    c2 = *(buf->currentPos);
+
+    /* If it isn't a backslash, return it */
+    if (c1 != 0x005C) {
+        return c1;
+    }
+
+    /* Determine the amount of data in the buffer */
+    length = (int32_t)(buf->bufLimit - buf->currentPos);
+
+    /* The longest escape sequence is \Uhhhhhhhh; make sure
+       we have at least that many characters */
+    if (length < 10) {
+
+        /* fill the buffer */
+        ucbuf_fillucbuf(buf,error);
+        length = (int32_t)(buf->bufLimit - buf->buffer);
+    }
+
+    /* Process the escape */
+    offset = 0;
+    c32 = u_unescapeAt(_charAt, &offset, length, (void*)buf);
+
+    /* check if u_unescapeAt unescaped and converted
+     * to c32 or not
+     */
+    if(c32==(UChar32)0xFFFFFFFF){
+        if(buf->showWarning) {
+            char context[CONTEXT_LEN+1];
+            int32_t len = CONTEXT_LEN;
+            if(length < len) {
+                len = length; 
+            }
+            context[len]= 0 ; /* null terminate the buffer */
+            u_UCharsToChars( buf->currentPos, context, len);
+            fprintf(stderr,"Bad escape: [%c%s]...\n", (int)c1, context);
+        }
+        *error= U_ILLEGAL_ESCAPE_SEQUENCE;
+        return c1;
+    }else if(c32!=c2 || (c32==0x0075 && c2==0x0075 && c1==0x005C) /* for \u0075 c2=0x0075 and c32==0x0075*/){
+        /* Update the current buffer position */
+        buf->currentPos += offset;
+    }else{
+        /* unescaping failed so we just return
+         * c1 and not consume the buffer
+         * this is useful for rules with escapes
+         * in resource bundles
+         * eg: \' \\ \"
+         */
+        return c1;
+    }
+
+    return c32;
+}
+
+U_CAPI UCHARBUF* U_EXPORT2
+ucbuf_open(const char* fileName,const char** cp,UBool showWarning, UBool buffered, UErrorCode* error){
+
+    FileStream* in = nullptr; 
+    int32_t fileSize=0;
+    const char* knownCp;
+    if(error==nullptr || U_FAILURE(*error)){
+        return nullptr;
+    }
+    if(cp==nullptr || fileName==nullptr){
+        *error = U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    if (!uprv_strcmp(fileName, "-")) {
+        in = T_FileStream_stdin();
+    }else{ 
+        in = T_FileStream_open(fileName, "rb");
+    }
+    
+    if(in!=nullptr){
+        UCHARBUF* buf =(UCHARBUF*) uprv_malloc(sizeof(UCHARBUF));
+        fileSize = T_FileStream_size(in);
+        if(buf == nullptr){
+            *error = U_MEMORY_ALLOCATION_ERROR;
+            T_FileStream_close(in);
+            return nullptr;
+        }
+        buf->in=in;
+        buf->conv=nullptr;
+        buf->showWarning = showWarning;
+        buf->isBuffered = buffered;
+        buf->signatureLength=0;
+        if(*cp==nullptr || **cp=='\0'){
+            /* don't have code page name... try to autodetect */
+            ucbuf_autodetect_fs(in,cp,&buf->conv,&buf->signatureLength,error);
+        }else if(ucbuf_isCPKnown(*cp)){
+            /* discard BOM */
+            ucbuf_autodetect_fs(in,&knownCp,&buf->conv,&buf->signatureLength,error);
+        }
+        if(U_SUCCESS(*error) && buf->conv==nullptr) {
+            buf->conv=ucnv_open(*cp,error);
+        }
+        if(U_FAILURE(*error)){
+            ucnv_close(buf->conv);
+            uprv_free(buf);
+            T_FileStream_close(in);
+            return nullptr;
+        }
+        
+        if((buf->conv==nullptr) && (buf->showWarning==true)){
+            fprintf(stderr,"###WARNING: No converter defined. Using codepage of system.\n");
+        }
+        buf->remaining=fileSize-buf->signatureLength;
+        if(buf->isBuffered){
+            buf->bufCapacity=MAX_U_BUF;
+        }else{
+            buf->bufCapacity=buf->remaining+buf->signatureLength+1/*for terminating nul*/;               
+        }
+        buf->buffer=(char16_t*) uprv_malloc(U_SIZEOF_UCHAR * buf->bufCapacity );
+        if (buf->buffer == nullptr) {
+            *error = U_MEMORY_ALLOCATION_ERROR;
+            ucbuf_close(buf);
+            return nullptr;
+        }
+        buf->currentPos=buf->buffer;
+        buf->bufLimit=buf->buffer;
+        if(U_FAILURE(*error)){
+            fprintf(stderr, "Could not open codepage [%s]: %s\n", *cp, u_errorName(*error));
+            ucbuf_close(buf);
+            return nullptr;
+        }
+        ucbuf_fillucbuf(buf,error);
+        if(U_FAILURE(*error)){
+            ucbuf_close(buf);
+            return nullptr;
+        }
+        return buf;
+    }
+    *error =U_FILE_ACCESS_ERROR;
+    return nullptr;
+}
+
+
+
+/* TODO: this method will fail if at the
+ * beginning of buffer and the uchar to unget
+ * is from the previous buffer. Need to implement
+ * system to take care of that situation.
+ */
+U_CAPI void U_EXPORT2
+ucbuf_ungetc(int32_t c,UCHARBUF* buf){
+    /* decrement currentPos pointer
+     * if not at the beginning of buffer
+     */
+    if(buf->currentPos!=buf->buffer){
+        if(*(buf->currentPos-1)==c){
+            buf->currentPos--;
+        } else {
+            /* ungetc failed - did not match. */
+        }
+    } else {
+       /* ungetc failed - beginning of buffer. */
+    }
+}
+
+/* frees the resources of char16_t* buffer */
+static void
+ucbuf_closebuf(UCHARBUF* buf){
+    uprv_free(buf->buffer);
+    buf->buffer = nullptr;
+}
+
+/* close the buf and release resources*/
+U_CAPI void U_EXPORT2
+ucbuf_close(UCHARBUF* buf){
+    if(buf!=nullptr){
+        if(buf->conv){
+            ucnv_close(buf->conv);
+        }
+        T_FileStream_close(buf->in);
+        ucbuf_closebuf(buf);
+        uprv_free(buf);
+    }
+}
+
+/* rewind the buf and file stream */
+U_CAPI void U_EXPORT2
+ucbuf_rewind(UCHARBUF* buf,UErrorCode* error){
+    if(error==nullptr || U_FAILURE(*error)){
+        return;
+    }
+    if(buf){
+        buf->currentPos=buf->buffer;
+        buf->bufLimit=buf->buffer;
+        T_FileStream_rewind(buf->in);
+        buf->remaining=T_FileStream_size(buf->in)-buf->signatureLength;
+
+        ucnv_resetToUnicode(buf->conv);
+        if(buf->signatureLength>0) {
+            char16_t target[1]={ 0 };
+            char16_t* pTarget;
+            char start[8];
+            const char* pStart;
+            int32_t numRead;
+
+            /* read the signature bytes */
+            numRead=T_FileStream_read(buf->in, start, buf->signatureLength);
+
+            /* convert and ignore initial U+FEFF, and the buffer overflow */
+            pTarget = target;
+            pStart = start;
+            ucnv_toUnicode(buf->conv, &pTarget, target+1, &pStart, start+numRead, nullptr, false, error);
+            if(*error==U_BUFFER_OVERFLOW_ERROR) {
+                *error=U_ZERO_ERROR;
+            }
+
+            /* verify that we successfully read exactly U+FEFF */
+            if(U_SUCCESS(*error) && (numRead!=buf->signatureLength || pTarget!=(target+1) || target[0]!=0xfeff)) {
+                *error=U_INTERNAL_PROGRAM_ERROR;
+            }
+        }
+    }
+}
+
+
+U_CAPI int32_t U_EXPORT2
+ucbuf_size(UCHARBUF* buf){
+    if(buf){
+        if(buf->isBuffered){
+            return (T_FileStream_size(buf->in)-buf->signatureLength)/ucnv_getMinCharSize(buf->conv);
+        }else{
+            return (int32_t)(buf->bufLimit - buf->buffer);
+        }
+    }
+    return 0;
+}
+
+U_CAPI const char16_t* U_EXPORT2
+ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* error){
+    if(error==nullptr || U_FAILURE(*error)){
+        return nullptr;
+    }
+    if(buf==nullptr || len==nullptr){
+        *error = U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    *len = (int32_t)(buf->bufLimit - buf->buffer);
+    return buf->buffer;
+}
+
+U_CAPI const char* U_EXPORT2
+ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status){
+    int32_t requiredLen = 0;
+    int32_t dirlen =  0;
+    int32_t filelen = 0;
+    if(status==nullptr || U_FAILURE(*status)){
+        return nullptr;
+    }
+
+    if(inputDir == nullptr || fileName == nullptr || len==nullptr || (target==nullptr && *len>0)){
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+
+
+    dirlen  = (int32_t)uprv_strlen(inputDir);
+    filelen = (int32_t)uprv_strlen(fileName);
+    if(inputDir[dirlen-1] != U_FILE_SEP_CHAR) {
+        requiredLen = dirlen + filelen + 2;
+        if((*len < requiredLen) || target==nullptr){
+            *len = requiredLen;
+            *status = U_BUFFER_OVERFLOW_ERROR;
+            return nullptr;
+        }
+
+        target[0] = '\0';
+        /*
+         * append the input dir to openFileName if the first char in 
+         * filename is not file separation char and the last char input directory is  not '.'.
+         * This is to support :
+         * genrb -s. /home/icu/data
+         * genrb -s. icu/data
+         * The user cannot mix notations like
+         * genrb -s. /icu/data --- the absolute path specified. -s redundant
+         * user should use
+         * genrb -s. icu/data  --- start from CWD and look in icu/data dir
+         */
+        if( (fileName[0] != U_FILE_SEP_CHAR) && (inputDir[dirlen-1] !='.')){
+            uprv_strcpy(target, inputDir);
+            target[dirlen]     = U_FILE_SEP_CHAR;
+        }
+        target[dirlen + 1] = '\0';
+    } else {
+        requiredLen = dirlen + filelen + 1;
+        if((*len < requiredLen) || target==nullptr){
+            *len = requiredLen;
+            *status = U_BUFFER_OVERFLOW_ERROR;
+            return nullptr;
+        }
+        
+        uprv_strcpy(target, inputDir);
+    }
+
+    uprv_strcat(target, fileName);
+    return target;
+}
+/*
+ * Unicode TR 13 says any of the below chars is
+ * a new line char in a readline function in addition
+ * to CR+LF combination which needs to be 
+ * handled separately
+ */
+static UBool ucbuf_isCharNewLine(char16_t c){
+    switch(c){
+    case 0x000A: /* LF  */
+    case 0x000D: /* CR  */
+    case 0x000C: /* FF  */
+    case 0x0085: /* NEL */
+    case 0x2028: /* LS  */
+    case 0x2029: /* PS  */
+        return true;
+    default:
+        return false;
+    }
+}
+
+U_CAPI const char16_t* U_EXPORT2
+ucbuf_readline(UCHARBUF* buf,int32_t* len,UErrorCode* err){
+    char16_t* temp = buf->currentPos;
+    char16_t* savePos =nullptr;
+    char16_t c=0x0000;
+    if(buf->isBuffered){
+        /* The input is buffered we have to do more
+        * for returning a pointer U_TRUNCATED_CHAR_FOUND
+        */
+        for(;;){
+            c = *temp++;
+            if(buf->remaining==0){
+                return nullptr; /* end of file is reached return nullptr */
+            }
+            if(temp>=buf->bufLimit && buf->currentPos == buf->buffer){
+                *err= U_TRUNCATED_CHAR_FOUND;
+                return nullptr;
+            }else{
+                ucbuf_fillucbuf(buf,err);
+                if(U_FAILURE(*err)){
+                    return nullptr; 
+                }
+            }
+            /*
+             * According to TR 13 readLine functions must interpret
+             * CR, CR+LF, LF, NEL, PS, LS or FF as line seperators
+             */
+            /* Windows CR LF */
+            if(c ==0x0d && temp <= buf->bufLimit && *temp == 0x0a ){
+                *len = (int32_t)(temp++ - buf->currentPos);
+                savePos = buf->currentPos;
+                buf->currentPos = temp;
+                return savePos;
+            }
+            /* else */
+
+            if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)){  /* Unipad inserts 2028 line separators! */
+                *len = (int32_t)(temp - buf->currentPos);
+                savePos = buf->currentPos;
+                buf->currentPos = temp;
+                return savePos;
+            }
+        }
+    }else{
+    /* we know that all input is read into the internal
+    * buffer so we can safely return pointers
+        */
+        for(;;){
+            c = *temp++;
+            
+            if(buf->currentPos==buf->bufLimit){
+                return nullptr; /* end of file is reached return nullptr */
+            }
+            /* Windows CR LF */
+            if(c ==0x0d && temp <= buf->bufLimit && *temp == 0x0a ){
+                *len = (int32_t)(temp++ - buf->currentPos);
+                savePos = buf->currentPos;
+                buf->currentPos = temp;
+                return savePos;
+            }
+            /* else */
+            if (temp>=buf->bufLimit|| ucbuf_isCharNewLine(c)) {  /* Unipad inserts 2028 line separators! */
+                *len = (int32_t)(temp - buf->currentPos);
+                savePos = buf->currentPos;
+                buf->currentPos = temp;
+                return savePos;
+            }
+        }
+    }
+    /* not reached */
+    /* A compiler warning will appear if all paths don't contain a return statement. */
+/*    return nullptr;*/
+}
+#endif
diff --git a/intl/icu/source/tools/toolutil/ucbuf.h b/intl/icu/source/tools/toolutil/ucbuf.h
new file mode 100644
index 0000000000..117920b794
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/ucbuf.h
@@ -0,0 +1,218 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1998-2016, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*
+* File ucbuf.h
+*
+* Modification History:
+*
+*   Date        Name        Description
+*   05/10/01    Ram         Creation.
+*
+* This API reads in files and returns UChars
+*******************************************************************************
+*/
+
+#include "unicode/localpointer.h"
+#include "unicode/ucnv.h"
+#include "filestrm.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#ifndef UCBUF_H
+#define UCBUF_H 1
+
+typedef struct UCHARBUF UCHARBUF;
+/**
+ * End of file value
+ */
+#define U_EOF ((int32_t)0xFFFFFFFF)
+/**
+ * Error value if a sequence cannot be unescaped
+ */
+#define U_ERR ((int32_t)0xFFFFFFFE)
+
+typedef struct ULine ULine;
+
+struct  ULine {
+    UChar     *name;
+    int32_t   len;
+};
+
+/**
+ * Opens the UCHARBUF with the given file stream and code page for conversion
+ * @param fileName  Name of the file to open.
+ * @param codepage  The encoding of the file stream to convert to Unicode.
+ *                  If *codepage is NULL on input the API will try to autodetect
+ *                  popular Unicode encodings
+ * @param showWarning Flag to print out warnings to STDOUT
+ * @param buffered  If true performs a buffered read of the input file. If false reads
+ *                  the whole file into memory and converts it.
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ *        indicates a failure on entry, the function will immediately return.
+ *        On exit the value will indicate the success of the operation.
+ * @return pointer to the newly opened UCHARBUF
+ */
+U_CAPI UCHARBUF* U_EXPORT2
+ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);
+
+/**
+ * Gets a UTF-16 code unit at the current position from the converted buffer
+ * and increments the current position
+ * @param buf Pointer to UCHARBUF structure
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ *        indicates a failure on entry, the function will immediately return.
+ *        On exit the value will indicate the success of the operation.
+ */
+U_CAPI int32_t U_EXPORT2
+ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
+
+/**
+ * Gets a UTF-32 code point at the current position from the converted buffer
+ * and increments the current position
+ * @param buf Pointer to UCHARBUF structure
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ *        indicates a failure on entry, the function will immediately return.
+ *        On exit the value will indicate the success of the operation.
+ */
+U_CAPI int32_t U_EXPORT2
+ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);
+
+/**
+ * Gets a UTF-16 code unit at the current position from the converted buffer after
+ * unescaping and increments the current position. If the escape sequence is for UTF-32
+ * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
+ * @param buf Pointer to UCHARBUF structure
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ *        indicates a failure on entry, the function will immediately return.
+ *        On exit the value will indicate the success of the operation.
+ */
+U_CAPI int32_t U_EXPORT2
+ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);
+
+/**
+ * Gets a pointer to the current position in the internal buffer and length of the line.
+ * It imperative to make a copy of the returned buffer before performing operations on it.
+ * @param buf Pointer to UCHARBUF structure
+ * @param len Output param to receive the len of the buffer returned till end of the line
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ *        indicates a failure on entry, the function will immediately return.
+ *        On exit the value will indicate the success of the operation.
+ *        Error: U_TRUNCATED_CHAR_FOUND
+ * @return Pointer to the internal buffer, NULL if EOF
+ */
+U_CAPI const UChar* U_EXPORT2
+ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);
+
+
+/**
+ * Resets the buffers and the underlying file stream.
+ * @param buf Pointer to UCHARBUF structure
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ *        indicates a failure on entry, the function will immediately return.
+ *        On exit the value will indicate the success of the operation.
+ */
+U_CAPI void U_EXPORT2
+ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);
+
+/**
+ * Returns a pointer to the internal converted buffer
+ * @param buf Pointer to UCHARBUF structure
+ * @param len Pointer to int32_t to receive the length of buffer
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ *        indicates a failure on entry, the function will immediately return.
+ *        On exit the value will indicate the success of the operation.
+ * @return Pointer to internal UChar buffer
+ */
+U_CAPI const UChar* U_EXPORT2
+ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);
+
+/**
+ * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
+ * @param buf Pointer to UCHARBUF structure
+ */
+U_CAPI void U_EXPORT2
+ucbuf_close(UCHARBUF* buf);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUCHARBUFPointer
+ * "Smart pointer" class, closes a UCHARBUF via ucbuf_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
+ */
+U_CAPI void U_EXPORT2
+ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
+
+
+/**
+ * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
+ * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
+ * the converter to correct state for converting the rest of the stream. So the UConverter parameter
+ * is necessary.
+ * If the charset was autodetected, the caller must close both the input FileStream
+ * and the converter.
+ *
+ * @param fileName The file name to be opened and encoding autodected
+ * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
+ * @param cp Output param to receive the detected encoding
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ *        indicates a failure on entry, the function will immediately return.
+ *        On exit the value will indicate the success of the operation.
+ * @return The input FileStream if its charset was autodetected; NULL otherwise.
+ */
+U_CAPI FileStream * U_EXPORT2
+ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv,
+int32_t* signatureLength, UErrorCode* status);
+
+/**
+ * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
+ * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
+ * the converter to correct state for converting the rest of the stream. So the UConverter parameter
+ * is necessary.
+ * If the charset was autodetected, the caller must close the converter.
+ *
+ * @param fileStream The file stream whose encoding is to be detected
+ * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
+ * @param cp Output param to receive the detected encoding
+ * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
+ *        indicates a failure on entry, the function will immediately return.
+ *        On exit the value will indicate the success of the operation.
+ * @return Boolean whether the Unicode charset was autodetected.
+ */
+
+U_CAPI UBool U_EXPORT2
+ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status);
+
+/**
+ * Returns the approximate size in UChars required for converting the file to UChars
+ */
+U_CAPI int32_t U_EXPORT2
+ucbuf_size(UCHARBUF* buf);
+
+U_CAPI const char* U_EXPORT2
+ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);
+
+#endif
+#endif
+
diff --git a/intl/icu/source/tools/toolutil/ucln_tu.cpp b/intl/icu/source/tools/toolutil/ucln_tu.cpp
new file mode 100644
index 0000000000..4727227ebf
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/ucln_tu.cpp
@@ -0,0 +1,19 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 2007-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+
+
+/**  Auto-client **/
+#define UCLN_TYPE UCLN_TOOLUTIL
+#include "ucln_imp.h"
+
+int uprv_dummyFunction_TU();
+int uprv_dummyFunction_TU()
+{
+  /* this is here to prevent the compiler from complaining about an empty file */
+  return 0;
+}
diff --git a/intl/icu/source/tools/toolutil/ucm.cpp b/intl/icu/source/tools/toolutil/ucm.cpp
new file mode 100644
index 0000000000..272570e72f
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/ucm.cpp
@@ -0,0 +1,1195 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2003-2013, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  ucm.c
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2003jun20
+*   created by: Markus W. Scherer
+*
+*   This file reads a .ucm file, stores its mappings and sorts them.
+*   It implements handling of Unicode conversion mappings from .ucm files
+*   for makeconv, canonucm, rptp2ucm, etc.
+*
+*   Unicode code point sequences with a length of more than 1,
+*   as well as byte sequences with more than 4 bytes or more than one complete
+*   character sequence are handled to support m:n mappings.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/ustring.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "filestrm.h"
+#include "uarrsort.h"
+#include "ucnvmbcs.h"
+#include "ucnv_bld.h"
+#include "ucnv_ext.h"
+#include "uparse.h"
+#include "ucm.h"
+#include <stdio.h>
+
+#if !UCONFIG_NO_CONVERSION
+
+/* -------------------------------------------------------------------------- */
+
+static void
+printMapping(UCMapping *m, UChar32 *codePoints, uint8_t *bytes, FILE *f) {
+    int32_t j;
+
+    for(j=0; j<m->uLen; ++j) {
+        fprintf(f, "<U%04lX>", (long)codePoints[j]);
+    }
+
+    fputc(' ', f);
+
+    for(j=0; j<m->bLen; ++j) {
+        fprintf(f, "\\x%02X", bytes[j]);
+    }
+
+    if(m->f>=0) {
+        fprintf(f, " |%u\n", m->f);
+    } else {
+        fputs("\n", f);
+    }
+}
+
+U_CAPI void U_EXPORT2
+ucm_printMapping(UCMTable *table, UCMapping *m, FILE *f) {
+    printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), f);
+}
+
+U_CAPI void U_EXPORT2
+ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode) {
+    UCMapping *m;
+    int32_t i, length;
+
+    m=table->mappings;
+    length=table->mappingsLength;
+    if(byUnicode) {
+        for(i=0; i<length; ++m, ++i) {
+            ucm_printMapping(table, m, f);
+        }
+    } else {
+        const int32_t *map=table->reverseMap;
+        for(i=0; i<length; ++i) {
+            ucm_printMapping(table, m+map[i], f);
+        }
+    }
+}
+
+/* mapping comparisons ------------------------------------------------------ */
+
+static int32_t
+compareUnicode(UCMTable *lTable, const UCMapping *l,
+               UCMTable *rTable, const UCMapping *r) {
+    const UChar32 *lu, *ru;
+    int32_t result, i, length;
+
+    if(l->uLen==1 && r->uLen==1) {
+        /* compare two single code points */
+        return l->u-r->u;
+    }
+
+    /* get pointers to the code point sequences */
+    lu=UCM_GET_CODE_POINTS(lTable, l);
+    ru=UCM_GET_CODE_POINTS(rTable, r);
+
+    /* get the minimum length */
+    if(l->uLen<=r->uLen) {
+        length=l->uLen;
+    } else {
+        length=r->uLen;
+    }
+
+    /* compare the code points */
+    for(i=0; i<length; ++i) {
+        result=lu[i]-ru[i];
+        if(result!=0) {
+            return result;
+        }
+    }
+
+    /* compare the lengths */
+    return l->uLen-r->uLen;
+}
+
+static int32_t
+compareBytes(UCMTable *lTable, const UCMapping *l,
+             UCMTable *rTable, const UCMapping *r,
+             UBool lexical) {
+    const uint8_t *lb, *rb;
+    int32_t result, i, length;
+
+    /*
+     * A lexical comparison is used for sorting in the builder, to allow
+     * an efficient search for a byte sequence that could be a prefix
+     * of a previously entered byte sequence.
+     *
+     * Comparing by lengths first is for compatibility with old .ucm tools
+     * like canonucm and rptp2ucm.
+     */
+    if(lexical) {
+        /* get the minimum length and continue */
+        if(l->bLen<=r->bLen) {
+            length=l->bLen;
+        } else {
+            length=r->bLen;
+        }
+    } else {
+        /* compare lengths first */
+        result=l->bLen-r->bLen;
+        if(result!=0) {
+            return result;
+        } else {
+            length=l->bLen;
+        }
+    }
+
+    /* get pointers to the byte sequences */
+    lb=UCM_GET_BYTES(lTable, l);
+    rb=UCM_GET_BYTES(rTable, r);
+
+    /* compare the bytes */
+    for(i=0; i<length; ++i) {
+        result=lb[i]-rb[i];
+        if(result!=0) {
+            return result;
+        }
+    }
+
+    /* compare the lengths */
+    return l->bLen-r->bLen;
+}
+
+/* compare UCMappings for sorting */
+static int32_t
+compareMappings(UCMTable *lTable, const UCMapping *l,
+                UCMTable *rTable, const UCMapping *r,
+                UBool uFirst) {
+    int32_t result;
+
+    /* choose which side to compare first */
+    if(uFirst) {
+        /* Unicode then bytes */
+        result=compareUnicode(lTable, l, rTable, r);
+        if(result==0) {
+            result=compareBytes(lTable, l, rTable, r, false); /* not lexically, like canonucm */
+        }
+    } else {
+        /* bytes then Unicode */
+        result=compareBytes(lTable, l, rTable, r, true); /* lexically, for builder */
+        if(result==0) {
+            result=compareUnicode(lTable, l, rTable, r);
+        }
+    }
+
+    if(result!=0) {
+        return result;
+    }
+
+    /* compare the flags */
+    return l->f-r->f;
+}
+U_CDECL_BEGIN
+/* sorting by Unicode first sorts mappings directly */
+static int32_t  U_CALLCONV
+compareMappingsUnicodeFirst(const void *context, const void *left, const void *right) {
+    return compareMappings(
+        (UCMTable *)context, (const UCMapping *)left,
+        (UCMTable *)context, (const UCMapping *)right, true);
+}
+
+/* sorting by bytes first sorts the reverseMap; use indirection to mappings */
+static int32_t U_CALLCONV
+compareMappingsBytesFirst(const void *context, const void *left, const void *right) {
+    UCMTable *table=(UCMTable *)context;
+    int32_t l=*(const int32_t *)left, r=*(const int32_t *)right;
+    return compareMappings(
+        table, table->mappings+l,
+        table, table->mappings+r, false);
+}
+U_CDECL_END
+
+U_CAPI void U_EXPORT2
+ucm_sortTable(UCMTable *t) {
+    UErrorCode errorCode;
+    int32_t i;
+
+    if(t->isSorted) {
+        return;
+    }
+
+    errorCode=U_ZERO_ERROR;
+
+    /* 1. sort by Unicode first */
+    uprv_sortArray(t->mappings, t->mappingsLength, sizeof(UCMapping),
+                   compareMappingsUnicodeFirst, t,
+                   false, &errorCode);
+
+    /* build the reverseMap */
+    if(t->reverseMap==nullptr) {
+        /*
+         * allocate mappingsCapacity instead of mappingsLength so that
+         * if mappings are added, the reverseMap need not be
+         * reallocated each time
+         * (see ucm_moveMappings() and ucm_addMapping())
+         */
+        t->reverseMap=(int32_t *)uprv_malloc(t->mappingsCapacity*sizeof(int32_t));
+        if(t->reverseMap==nullptr) {
+            fprintf(stderr, "ucm error: unable to allocate reverseMap\n");
+            exit(U_MEMORY_ALLOCATION_ERROR);
+        }
+    }
+    for(i=0; i<t->mappingsLength; ++i) {
+        t->reverseMap[i]=i;
+    }
+
+    /* 2. sort reverseMap by mappings bytes first */
+    uprv_sortArray(t->reverseMap, t->mappingsLength, sizeof(int32_t),
+                   compareMappingsBytesFirst, t,
+                   false, &errorCode);
+
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "ucm error: sortTable()/uprv_sortArray() fails - %s\n",
+                u_errorName(errorCode));
+        exit(errorCode);
+    }
+
+    t->isSorted=true;
+}
+
+/*
+ * remove mappings with their move flag set from the base table
+ * and move some of them (with UCM_MOVE_TO_EXT) to the extension table
+ */
+U_CAPI void U_EXPORT2
+ucm_moveMappings(UCMTable *base, UCMTable *ext) {
+    UCMapping *mb, *mbLimit;
+    int8_t flag;
+
+    mb=base->mappings;
+    mbLimit=mb+base->mappingsLength;
+
+    while(mb<mbLimit) {
+        flag=mb->moveFlag;
+        if(flag!=0) {
+            /* reset the move flag */
+            mb->moveFlag=0;
+
+            if(ext!=nullptr && (flag&UCM_MOVE_TO_EXT)) {
+                /* add the mapping to the extension table */
+                ucm_addMapping(ext, mb, UCM_GET_CODE_POINTS(base, mb), UCM_GET_BYTES(base, mb));
+            }
+
+            /* remove this mapping: move the last base mapping down and overwrite the current one */
+            if(mb<(mbLimit-1)) {
+                uprv_memcpy(mb, mbLimit-1, sizeof(UCMapping));
+            }
+            --mbLimit;
+            --base->mappingsLength;
+            base->isSorted=false;
+        } else {
+            ++mb;
+        }
+    }
+}
+
+enum {
+    NEEDS_MOVE=1,
+    HAS_ERRORS=2
+};
+
+static uint8_t
+checkBaseExtUnicode(UCMStates *baseStates, UCMTable *base, UCMTable *ext,
+                    UBool moveToExt, UBool intersectBase) {
+    (void)baseStates;
+
+    UCMapping *mb, *me, *mbLimit, *meLimit;
+    int32_t cmp;
+    uint8_t result;
+
+    mb=base->mappings;
+    mbLimit=mb+base->mappingsLength;
+
+    me=ext->mappings;
+    meLimit=me+ext->mappingsLength;
+
+    result=0;
+
+    for(;;) {
+        /* skip irrelevant mappings on both sides */
+        for(;;) {
+            if(mb==mbLimit) {
+                return result;
+            }
+
+            if((0<=mb->f && mb->f<=2) || mb->f==4) {
+                break;
+            }
+
+            ++mb;
+        }
+
+        for(;;) {
+            if(me==meLimit) {
+                return result;
+            }
+
+            if((0<=me->f && me->f<=2) || me->f==4) {
+                break;
+            }
+
+            ++me;
+        }
+
+        /* compare the base and extension mappings */
+        cmp=compareUnicode(base, mb, ext, me);
+        if(cmp<0) {
+            if(intersectBase && (intersectBase!=2 || mb->bLen>1)) {
+                /*
+                 * mapping in base but not in ext, move it
+                 *
+                 * if ext is DBCS, move DBCS mappings here
+                 * and check SBCS ones for Unicode prefix below
+                 */
+                mb->moveFlag|=UCM_MOVE_TO_EXT;
+                result|=NEEDS_MOVE;
+
+            /* does mb map from an input sequence that is a prefix of me's? */
+            } else if( mb->uLen<me->uLen &&
+                0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen)
+            ) {
+                if(moveToExt) {
+                    /* mark this mapping to be moved to the extension table */
+                    mb->moveFlag|=UCM_MOVE_TO_EXT;
+                    result|=NEEDS_MOVE;
+                } else {
+                    fprintf(stderr,
+                            "ucm error: the base table contains a mapping whose input sequence\n"
+                            "           is a prefix of the input sequence of an extension mapping\n");
+                    ucm_printMapping(base, mb, stderr);
+                    ucm_printMapping(ext, me, stderr);
+                    result|=HAS_ERRORS;
+                }
+            }
+
+            ++mb;
+        } else if(cmp==0) {
+            /*
+             * same output: remove the extension mapping,
+             * otherwise treat as an error
+             */
+            if( mb->f==me->f && mb->bLen==me->bLen &&
+                0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen)
+            ) {
+                me->moveFlag|=UCM_REMOVE_MAPPING;
+                result|=NEEDS_MOVE;
+            } else if(intersectBase) {
+                /* mapping in base but not in ext, move it */
+                mb->moveFlag|=UCM_MOVE_TO_EXT;
+                result|=NEEDS_MOVE;
+            } else {
+                fprintf(stderr,
+                        "ucm error: the base table contains a mapping whose input sequence\n"
+                        "           is the same as the input sequence of an extension mapping\n"
+                        "           but it maps differently\n");
+                ucm_printMapping(base, mb, stderr);
+                ucm_printMapping(ext, me, stderr);
+                result|=HAS_ERRORS;
+            }
+
+            ++mb;
+        } else /* cmp>0 */ {
+            ++me;
+        }
+    }
+}
+
+static uint8_t
+checkBaseExtBytes(UCMStates *baseStates, UCMTable *base, UCMTable *ext,
+                  UBool moveToExt, UBool intersectBase) {
+    UCMapping *mb, *me;
+    int32_t *baseMap, *extMap;
+    int32_t b, e, bLimit, eLimit, cmp;
+    uint8_t result;
+    UBool isSISO;
+
+    baseMap=base->reverseMap;
+    extMap=ext->reverseMap;
+
+    b=e=0;
+    bLimit=base->mappingsLength;
+    eLimit=ext->mappingsLength;
+
+    result=0;
+
+    isSISO=(UBool)(baseStates->outputType==MBCS_OUTPUT_2_SISO);
+
+    for(;;) {
+        /* skip irrelevant mappings on both sides */
+        for(;; ++b) {
+            if(b==bLimit) {
+                return result;
+            }
+            mb=base->mappings+baseMap[b];
+
+            if(intersectBase==2 && mb->bLen==1) {
+                /*
+                 * comparing a base against a DBCS extension:
+                 * leave SBCS base mappings alone
+                 */
+                continue;
+            }
+
+            if(mb->f==0 || mb->f==3) {
+                break;
+            }
+        }
+
+        for(;;) {
+            if(e==eLimit) {
+                return result;
+            }
+            me=ext->mappings+extMap[e];
+
+            if(me->f==0 || me->f==3) {
+                break;
+            }
+
+            ++e;
+        }
+
+        /* compare the base and extension mappings */
+        cmp=compareBytes(base, mb, ext, me, true);
+        if(cmp<0) {
+            if(intersectBase) {
+                /* mapping in base but not in ext, move it */
+                mb->moveFlag|=UCM_MOVE_TO_EXT;
+                result|=NEEDS_MOVE;
+
+            /*
+             * does mb map from an input sequence that is a prefix of me's?
+             * for SI/SO tables, a single byte is never a prefix because it
+             * occurs in a separate single-byte state
+             */
+            } else if( mb->bLen<me->bLen &&
+                (!isSISO || mb->bLen>1) &&
+                0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen)
+            ) {
+                if(moveToExt) {
+                    /* mark this mapping to be moved to the extension table */
+                    mb->moveFlag|=UCM_MOVE_TO_EXT;
+                    result|=NEEDS_MOVE;
+                } else {
+                    fprintf(stderr,
+                            "ucm error: the base table contains a mapping whose input sequence\n"
+                            "           is a prefix of the input sequence of an extension mapping\n");
+                    ucm_printMapping(base, mb, stderr);
+                    ucm_printMapping(ext, me, stderr);
+                    result|=HAS_ERRORS;
+                }
+            }
+
+            ++b;
+        } else if(cmp==0) {
+            /*
+             * same output: remove the extension mapping,
+             * otherwise treat as an error
+             */
+            if( mb->f==me->f && mb->uLen==me->uLen &&
+                0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen)
+            ) {
+                me->moveFlag|=UCM_REMOVE_MAPPING;
+                result|=NEEDS_MOVE;
+            } else if(intersectBase) {
+                /* mapping in base but not in ext, move it */
+                mb->moveFlag|=UCM_MOVE_TO_EXT;
+                result|=NEEDS_MOVE;
+            } else {
+                fprintf(stderr,
+                        "ucm error: the base table contains a mapping whose input sequence\n"
+                        "           is the same as the input sequence of an extension mapping\n"
+                        "           but it maps differently\n");
+                ucm_printMapping(base, mb, stderr);
+                ucm_printMapping(ext, me, stderr);
+                result|=HAS_ERRORS;
+            }
+
+            ++b;
+        } else /* cmp>0 */ {
+            ++e;
+        }
+    }
+}
+
+U_CAPI UBool U_EXPORT2
+ucm_checkValidity(UCMTable *table, UCMStates *baseStates) {
+    UCMapping *m, *mLimit;
+    int32_t count;
+    UBool isOK;
+
+    m=table->mappings;
+    mLimit=m+table->mappingsLength;
+    isOK=true;
+
+    while(m<mLimit) {
+        count=ucm_countChars(baseStates, UCM_GET_BYTES(table, m), m->bLen);
+        if(count<1) {
+            ucm_printMapping(table, m, stderr);
+            isOK=false;
+        }
+        ++m;
+    }
+
+    return isOK;
+}
+
+U_CAPI UBool U_EXPORT2
+ucm_checkBaseExt(UCMStates *baseStates,
+                 UCMTable *base, UCMTable *ext, UCMTable *moveTarget,
+                 UBool intersectBase) {
+    uint8_t result;
+
+    /* if we have an extension table, we must always use precision flags */
+    if(base->flagsType&UCM_FLAGS_IMPLICIT) {
+        fprintf(stderr, "ucm error: the base table contains mappings without precision flags\n");
+        return false;
+    }
+    if(ext->flagsType&UCM_FLAGS_IMPLICIT) {
+        fprintf(stderr, "ucm error: extension table contains mappings without precision flags\n");
+        return false;
+    }
+
+    /* checking requires both tables to be sorted */
+    ucm_sortTable(base);
+    ucm_sortTable(ext);
+
+    /* check */
+    result=
+        checkBaseExtUnicode(baseStates, base, ext, (UBool)(moveTarget!=nullptr), intersectBase)|
+        checkBaseExtBytes(baseStates, base, ext, (UBool)(moveTarget!=nullptr), intersectBase);
+
+    if(result&HAS_ERRORS) {
+        return false;
+    }
+
+    if(result&NEEDS_MOVE) {
+        ucm_moveMappings(ext, nullptr);
+        ucm_moveMappings(base, moveTarget);
+        ucm_sortTable(base);
+        ucm_sortTable(ext);
+        if(moveTarget!=nullptr) {
+            ucm_sortTable(moveTarget);
+        }
+    }
+
+    return true;
+}
+
+/* merge tables for rptp2ucm ------------------------------------------------ */
+
+U_CAPI void U_EXPORT2
+ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable,
+                const uint8_t *subchar, int32_t subcharLength,
+                uint8_t subchar1) {
+    UCMapping *fromUMapping, *toUMapping;
+    int32_t fromUIndex, toUIndex, fromUTop, toUTop, cmp;
+
+    ucm_sortTable(fromUTable);
+    ucm_sortTable(toUTable);
+
+    fromUMapping=fromUTable->mappings;
+    toUMapping=toUTable->mappings;
+
+    fromUTop=fromUTable->mappingsLength;
+    toUTop=toUTable->mappingsLength;
+
+    fromUIndex=toUIndex=0;
+
+    while(fromUIndex<fromUTop && toUIndex<toUTop) {
+        cmp=compareMappings(fromUTable, fromUMapping, toUTable, toUMapping, true);
+        if(cmp==0) {
+            /* equal: roundtrip, nothing to do (flags are initially 0) */
+            ++fromUMapping;
+            ++toUMapping;
+
+            ++fromUIndex;
+            ++toUIndex;
+        } else if(cmp<0) {
+            /*
+             * the fromU mapping does not have a toU counterpart:
+             * fallback Unicode->codepage
+             */
+            if( (fromUMapping->bLen==subcharLength &&
+                 0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) ||
+                (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1)
+            ) {
+                fromUMapping->f=2; /* SUB mapping */
+            } else {
+                fromUMapping->f=1; /* normal fallback */
+            }
+
+            ++fromUMapping;
+            ++fromUIndex;
+        } else {
+            /*
+             * the toU mapping does not have a fromU counterpart:
+             * (reverse) fallback codepage->Unicode, copy it to the fromU table
+             */
+
+            /* ignore reverse fallbacks to Unicode SUB */
+            if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) {
+                toUMapping->f=3; /* reverse fallback */
+                ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping));
+
+                /* the table may have been reallocated */
+                fromUMapping=fromUTable->mappings+fromUIndex;
+            }
+
+            ++toUMapping;
+            ++toUIndex;
+        }
+    }
+
+    /* either one or both tables are exhausted */
+    while(fromUIndex<fromUTop) {
+        /* leftover fromU mappings are fallbacks */
+        if( (fromUMapping->bLen==subcharLength &&
+             0==uprv_memcmp(UCM_GET_BYTES(fromUTable, fromUMapping), subchar, subcharLength)) ||
+            (subchar1!=0 && fromUMapping->bLen==1 && fromUMapping->b.bytes[0]==subchar1)
+        ) {
+            fromUMapping->f=2; /* SUB mapping */
+        } else {
+            fromUMapping->f=1; /* normal fallback */
+        }
+
+        ++fromUMapping;
+        ++fromUIndex;
+    }
+
+    while(toUIndex<toUTop) {
+        /* leftover toU mappings are reverse fallbacks */
+
+        /* ignore reverse fallbacks to Unicode SUB */
+        if(!(toUMapping->uLen==1 && (toUMapping->u==0xfffd || toUMapping->u==0x1a))) {
+            toUMapping->f=3; /* reverse fallback */
+            ucm_addMapping(fromUTable, toUMapping, UCM_GET_CODE_POINTS(toUTable, toUMapping), UCM_GET_BYTES(toUTable, toUMapping));
+        }
+
+        ++toUMapping;
+        ++toUIndex;
+    }
+
+    fromUTable->isSorted=false;
+}
+
+/* separate extension mappings out of base table for rptp2ucm --------------- */
+
+U_CAPI UBool U_EXPORT2
+ucm_separateMappings(UCMFile *ucm, UBool isSISO) {
+    UCMTable *table;
+    UCMapping *m, *mLimit;
+    int32_t type;
+    UBool needsMove, isOK;
+
+    table=ucm->base;
+    m=table->mappings;
+    mLimit=m+table->mappingsLength;
+
+    needsMove=false;
+    isOK=true;
+
+    for(; m<mLimit; ++m) {
+        if(isSISO && m->bLen==1 && (m->b.bytes[0]==0xe || m->b.bytes[0]==0xf)) {
+            fprintf(stderr, "warning: removing illegal mapping from an SI/SO-stateful table\n");
+            ucm_printMapping(table, m, stderr);
+            m->moveFlag|=UCM_REMOVE_MAPPING;
+            needsMove=true;
+            continue;
+        }
+
+        type=ucm_mappingType(
+                &ucm->states, m,
+                UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m));
+        if(type<0) {
+            /* illegal byte sequence */
+            printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), stderr);
+            isOK=false;
+        } else if(type>0) {
+            m->moveFlag|=UCM_MOVE_TO_EXT;
+            needsMove=true;
+        }
+    }
+
+    if(!isOK) {
+        return false;
+    }
+    if(needsMove) {
+        ucm_moveMappings(ucm->base, ucm->ext);
+        return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, ucm->ext, false);
+    } else {
+        ucm_sortTable(ucm->base);
+        return true;
+    }
+}
+
+/* ucm parser --------------------------------------------------------------- */
+
+U_CAPI int8_t U_EXPORT2
+ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line, const char **ps) {
+    const char *s=*ps;
+    char *end;
+    uint8_t byte;
+    int8_t bLen;
+
+    bLen=0;
+    for(;;) {
+        /* skip an optional plus sign */
+        if(bLen>0 && *s=='+') {
+            ++s;
+        }
+        if(*s!='\\') {
+            break;
+        }
+
+        if( s[1]!='x' ||
+            (byte=(uint8_t)uprv_strtoul(s+2, &end, 16), end)!=s+4
+        ) {
+            fprintf(stderr, "ucm error: byte must be formatted as \\xXX (2 hex digits) - \"%s\"\n", line);
+            return -1;
+        }
+
+        if(bLen==UCNV_EXT_MAX_BYTES) {
+            fprintf(stderr, "ucm error: too many bytes on \"%s\"\n", line);
+            return -1;
+        }
+        bytes[bLen++]=byte;
+        s=end;
+    }
+
+    *ps=s;
+    return bLen;
+}
+
+/* parse a mapping line; must not be empty */
+U_CAPI UBool U_EXPORT2
+ucm_parseMappingLine(UCMapping *m,
+                     UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
+                     uint8_t bytes[UCNV_EXT_MAX_BYTES],
+                     const char *line) {
+    const char *s;
+    char *end;
+    UChar32 cp;
+    int32_t u16Length;
+    int8_t uLen, bLen, f;
+
+    s=line;
+    uLen=bLen=0;
+
+    /* parse code points */
+    for(;;) {
+        /* skip an optional plus sign */
+        if(uLen>0 && *s=='+') {
+            ++s;
+        }
+        if(*s!='<') {
+            break;
+        }
+
+        if( s[1]!='U' ||
+            (cp=(UChar32)uprv_strtoul(s+2, &end, 16), end)==s+2 ||
+            *end!='>'
+        ) {
+            fprintf(stderr, "ucm error: Unicode code point must be formatted as <UXXXX> (1..6 hex digits) - \"%s\"\n", line);
+            return false;
+        }
+        if((uint32_t)cp>0x10ffff || U_IS_SURROGATE(cp)) {
+            fprintf(stderr, "ucm error: Unicode code point must be 0..d7ff or e000..10ffff - \"%s\"\n", line);
+            return false;
+        }
+
+        if(uLen==UCNV_EXT_MAX_UCHARS) {
+            fprintf(stderr, "ucm error: too many code points on \"%s\"\n", line);
+            return false;
+        }
+        codePoints[uLen++]=cp;
+        s=end+1;
+    }
+
+    if(uLen==0) {
+        fprintf(stderr, "ucm error: no Unicode code points on \"%s\"\n", line);
+        return false;
+    } else if(uLen==1) {
+        m->u=codePoints[0];
+    } else {
+        UErrorCode errorCode=U_ZERO_ERROR;
+        u_strFromUTF32(nullptr, 0, &u16Length, codePoints, uLen, &errorCode);
+        if( (U_FAILURE(errorCode) && errorCode!=U_BUFFER_OVERFLOW_ERROR) ||
+            u16Length>UCNV_EXT_MAX_UCHARS
+        ) {
+            fprintf(stderr, "ucm error: too many UChars on \"%s\"\n", line);
+            return false;
+        }
+    }
+
+    s=u_skipWhitespace(s);
+
+    /* parse bytes */
+    bLen=ucm_parseBytes(bytes, line, &s);
+
+    if(bLen<0) {
+        return false;
+    } else if(bLen==0) {
+        fprintf(stderr, "ucm error: no bytes on \"%s\"\n", line);
+        return false;
+    } else if(bLen<=4) {
+        uprv_memcpy(m->b.bytes, bytes, bLen);
+    }
+
+    /* skip everything until the fallback indicator, even the start of a comment */
+    for(;;) {
+        if(*s==0) {
+            f=-1; /* no fallback indicator */
+            break;
+        } else if(*s=='|') {
+            f=(int8_t)(s[1]-'0');
+            if((uint8_t)f>4) {
+                fprintf(stderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line);
+                return false;
+            }
+            break;
+        }
+        ++s;
+    }
+
+    m->uLen=uLen;
+    m->bLen=bLen;
+    m->f=f;
+    return true;
+}
+
+/* general APIs ------------------------------------------------------------- */
+
+U_CAPI UCMTable * U_EXPORT2
+ucm_openTable() {
+    UCMTable *table=(UCMTable *)uprv_malloc(sizeof(UCMTable));
+    if(table==nullptr) {
+        fprintf(stderr, "ucm error: unable to allocate a UCMTable\n");
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+
+    memset(table, 0, sizeof(UCMTable));
+    return table;
+}
+
+U_CAPI void U_EXPORT2
+ucm_closeTable(UCMTable *table) {
+    if(table!=nullptr) {
+        uprv_free(table->mappings);
+        uprv_free(table->codePoints);
+        uprv_free(table->bytes);
+        uprv_free(table->reverseMap);
+        uprv_free(table);
+    }
+}
+
+U_CAPI void U_EXPORT2
+ucm_resetTable(UCMTable *table) {
+    if(table!=nullptr) {
+        table->mappingsLength=0;
+        table->flagsType=0;
+        table->unicodeMask=0;
+        table->bytesLength=table->codePointsLength=0;
+        table->isSorted=false;
+    }
+}
+
+U_CAPI void U_EXPORT2
+ucm_addMapping(UCMTable *table,
+               UCMapping *m,
+               UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
+               uint8_t bytes[UCNV_EXT_MAX_BYTES]) {
+    UCMapping *tm;
+    UChar32 c;
+    int32_t idx;
+
+    if(table->mappingsLength>=table->mappingsCapacity) {
+        /* make the mappings array larger */
+        if(table->mappingsCapacity==0) {
+            table->mappingsCapacity=1000;
+        } else {
+            table->mappingsCapacity*=10;
+        }
+        table->mappings=(UCMapping *)uprv_realloc(table->mappings,
+                                             table->mappingsCapacity*sizeof(UCMapping));
+        if(table->mappings==nullptr) {
+            fprintf(stderr, "ucm error: unable to allocate %d UCMappings\n",
+                            (int)table->mappingsCapacity);
+            exit(U_MEMORY_ALLOCATION_ERROR);
+        }
+
+        if(table->reverseMap!=nullptr) {
+            /* the reverseMap must be reallocated in a new sort */
+            uprv_free(table->reverseMap);
+            table->reverseMap=nullptr;
+        }
+    }
+
+    if(m->uLen>1 && table->codePointsCapacity==0) {
+        table->codePointsCapacity=10000;
+        table->codePoints=(UChar32 *)uprv_malloc(table->codePointsCapacity*4);
+        if(table->codePoints==nullptr) {
+            fprintf(stderr, "ucm error: unable to allocate %d UChar32s\n",
+                            (int)table->codePointsCapacity);
+            exit(U_MEMORY_ALLOCATION_ERROR);
+        }
+    }
+
+    if(m->bLen>4 && table->bytesCapacity==0) {
+        table->bytesCapacity=10000;
+        table->bytes=(uint8_t *)uprv_malloc(table->bytesCapacity);
+        if(table->bytes==nullptr) {
+            fprintf(stderr, "ucm error: unable to allocate %d bytes\n",
+                            (int)table->bytesCapacity);
+            exit(U_MEMORY_ALLOCATION_ERROR);
+        }
+    }
+
+    if(m->uLen>1) {
+        idx=table->codePointsLength;
+        table->codePointsLength+=m->uLen;
+        if(table->codePointsLength>table->codePointsCapacity) {
+            fprintf(stderr, "ucm error: too many code points in multiple-code point mappings\n");
+            exit(U_MEMORY_ALLOCATION_ERROR);
+        }
+
+        uprv_memcpy(table->codePoints+idx, codePoints, (size_t)m->uLen*4);
+        m->u=idx;
+    }
+
+    if(m->bLen>4) {
+        idx=table->bytesLength;
+        table->bytesLength+=m->bLen;
+        if(table->bytesLength>table->bytesCapacity) {
+            fprintf(stderr, "ucm error: too many bytes in mappings with >4 charset bytes\n");
+            exit(U_MEMORY_ALLOCATION_ERROR);
+        }
+
+        uprv_memcpy(table->bytes+idx, bytes, m->bLen);
+        m->b.idx=idx;
+    }
+
+    /* set unicodeMask */
+    for(idx=0; idx<m->uLen; ++idx) {
+        c=codePoints[idx];
+        if(c>=0x10000) {
+            table->unicodeMask|=UCNV_HAS_SUPPLEMENTARY; /* there are supplementary code points */
+        } else if(U_IS_SURROGATE(c)) {
+            table->unicodeMask|=UCNV_HAS_SURROGATES;    /* there are surrogate code points */
+        }
+    }
+
+    /* set flagsType */
+    if(m->f<0) {
+        table->flagsType|=UCM_FLAGS_IMPLICIT;
+    } else {
+        table->flagsType|=UCM_FLAGS_EXPLICIT;
+    }
+
+    tm=table->mappings+table->mappingsLength++;
+    uprv_memcpy(tm, m, sizeof(UCMapping));
+
+    table->isSorted=false;
+}
+
+U_CAPI UCMFile * U_EXPORT2
+ucm_open() {
+    UCMFile *ucm=(UCMFile *)uprv_malloc(sizeof(UCMFile));
+    if(ucm==nullptr) {
+        fprintf(stderr, "ucm error: unable to allocate a UCMFile\n");
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+
+    memset(ucm, 0, sizeof(UCMFile));
+
+    ucm->base=ucm_openTable();
+    ucm->ext=ucm_openTable();
+
+    ucm->states.stateFlags[0]=MBCS_STATE_FLAG_DIRECT;
+    ucm->states.conversionType=UCNV_UNSUPPORTED_CONVERTER;
+    ucm->states.outputType=-1;
+    ucm->states.minCharLength=ucm->states.maxCharLength=1;
+
+    return ucm;
+}
+
+U_CAPI void U_EXPORT2
+ucm_close(UCMFile *ucm) {
+    if(ucm!=nullptr) {
+        ucm_closeTable(ucm->base);
+        ucm_closeTable(ucm->ext);
+        uprv_free(ucm);
+    }
+}
+
+U_CAPI int32_t U_EXPORT2
+ucm_mappingType(UCMStates *baseStates,
+                UCMapping *m,
+                UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
+                uint8_t bytes[UCNV_EXT_MAX_BYTES]) {
+    (void)codePoints;
+    /* check validity of the bytes and count the characters in them */
+    int32_t count=ucm_countChars(baseStates, bytes, m->bLen);
+    if(count<1) {
+        /* illegal byte sequence */
+        return -1;
+    }
+
+    /*
+     * Suitable for an ICU conversion base table means:
+     * - a 1:1 mapping (1 Unicode code point : 1 byte sequence)
+     * - precision flag 0..3
+     * - SBCS: any 1:1 mapping
+     *         (the table stores additional bits to distinguish mapping types)
+     * - MBCS: not a |2 SUB mapping for <subchar1>
+     * - MBCS: not a |1 fallback to 0x00
+     * - MBCS: not a multi-byte mapping with leading 0x00 bytes
+     *
+     * Further restrictions for fromUnicode tables
+     * are enforced in makeconv (MBCSOkForBaseFromUnicode()).
+     *
+     * All of the MBCS fromUnicode specific tests could be removed from here,
+     * but the ones above are for unusual mappings, and removing the tests
+     * from here would change canonucm output which seems gratuitous.
+     * (Markus Scherer 2006-nov-28)
+     *
+     * Exception: All implicit mappings (f<0) that need to be moved
+     * because of fromUnicode restrictions _must_ be moved here because
+     * makeconv uses a hack for moving mappings only for the fromUnicode table
+     * that only works with non-negative values of f.
+     */
+    if( m->uLen==1 && count==1 && m->f<=3 &&
+        (baseStates->maxCharLength==1 ||
+            !((m->f==2 && m->bLen==1) ||
+              (m->f==1 && bytes[0]==0) ||
+              (m->f<=1 && m->bLen>1 && bytes[0]==0)))
+    ) {
+        return 0; /* suitable for a base table */
+    } else {
+        return 1; /* needs to go into an extension table */
+    }
+}
+
+U_CAPI UBool U_EXPORT2
+ucm_addMappingAuto(UCMFile *ucm, UBool forBase, UCMStates *baseStates,
+                   UCMapping *m,
+                   UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
+                   uint8_t bytes[UCNV_EXT_MAX_BYTES]) {
+    int32_t type;
+
+    if(m->f==2 && m->uLen>1) {
+        fprintf(stderr, "ucm error: illegal <subchar1> |2 mapping from multiple code points\n");
+        printMapping(m, codePoints, bytes, stderr);
+        return false;
+    }
+
+    if(baseStates!=nullptr) {
+        /* check validity of the bytes and count the characters in them */
+        type=ucm_mappingType(baseStates, m, codePoints, bytes);
+        if(type<0) {
+            /* illegal byte sequence */
+            printMapping(m, codePoints, bytes, stderr);
+            return false;
+        }
+    } else {
+        /* not used - adding a mapping for an extension-only table before its base table is read */
+        type=1;
+    }
+
+    /*
+     * Add the mapping to the base table if this is requested and suitable.
+     * Otherwise, add it to the extension table.
+     */
+    if(forBase && type==0) {
+        ucm_addMapping(ucm->base, m, codePoints, bytes);
+    } else {
+        ucm_addMapping(ucm->ext, m, codePoints, bytes);
+    }
+
+    return true;
+}
+
+U_CAPI UBool U_EXPORT2
+ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates *baseStates) {
+  UCMapping m={ 0, {0}, 0, 0, 0, 0 };
+    UChar32 codePoints[UCNV_EXT_MAX_UCHARS];
+    uint8_t bytes[UCNV_EXT_MAX_BYTES];
+
+    const char *s;
+
+    /* ignore empty and comment lines */
+    if(line[0]=='#' || *(s=u_skipWhitespace(line))==0 || *s=='\n' || *s=='\r') {
+        return true;
+    }
+
+    return
+        ucm_parseMappingLine(&m, codePoints, bytes, line) &&
+        ucm_addMappingAuto(ucm, forBase, baseStates, &m, codePoints, bytes);
+}
+
+U_CAPI void U_EXPORT2
+ucm_readTable(UCMFile *ucm, FileStream* convFile,
+              UBool forBase, UCMStates *baseStates,
+              UErrorCode *pErrorCode) {
+    char line[500];
+    char *end;
+    UBool isOK;
+
+    if(U_FAILURE(*pErrorCode)) {
+        return;
+    }
+
+    isOK=true;
+
+    for(;;) {
+        /* read the next line */
+        if(!T_FileStream_readLine(convFile, line, sizeof(line))) {
+            fprintf(stderr, "incomplete charmap section\n");
+            isOK=false;
+            break;
+        }
+
+        /* remove CR LF */
+        end=uprv_strchr(line, 0);
+        while(line<end && (*(end-1)=='\r' || *(end-1)=='\n')) {
+            --end;
+        }
+        *end=0;
+
+        /* ignore empty and comment lines */
+        if(line[0]==0 || line[0]=='#') {
+            continue;
+        }
+
+        /* stop at the end of the mapping table */
+        if(0==uprv_strcmp(line, "END CHARMAP")) {
+            break;
+        }
+
+        isOK&=ucm_addMappingFromLine(ucm, line, forBase, baseStates);
+    }
+
+    if(!isOK) {
+        *pErrorCode=U_INVALID_TABLE_FORMAT;
+    }
+}
+#endif
diff --git a/intl/icu/source/tools/toolutil/ucm.h b/intl/icu/source/tools/toolutil/ucm.h
new file mode 100644
index 0000000000..8ea90604d4
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/ucm.h
@@ -0,0 +1,302 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *******************************************************************************
+ *   Copyright (C) 2003-2013, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ *******************************************************************************
+ *   file name:  ucm.h
+ *   encoding:   UTF-8
+ *   tab size:   8 (not used)
+ *   indentation:4
+ *
+ *   created on: 2003jun20
+ *   created by: Markus W. Scherer
+ *
+ *   Definitions for the .ucm file parser and handler module ucm.c.
+ */
+
+#ifndef __UCM_H__
+#define __UCM_H__
+
+#include "unicode/utypes.h"
+#include "ucnvmbcs.h"
+#include "ucnv_ext.h"
+#include "filestrm.h"
+#include <stdio.h>
+
+#if !UCONFIG_NO_CONVERSION
+
+U_CDECL_BEGIN
+
+/* constants for UCMapping.moveFlag */
+enum {
+    UCM_MOVE_TO_EXT=1,
+    UCM_REMOVE_MAPPING=2
+};
+
+/*
+ * Per-mapping data structure
+ *
+ * u if uLen==1: Unicode code point
+ *   else index to uLen code points
+ * b if bLen<=4: up to 4 bytes
+ *   else index to bLen bytes
+ * uLen number of code points
+ * bLen number of words containing left-justified bytes
+ * bIsMultipleChars indicates that the bytes contain more than one sequence
+ *                  according to the state table
+ * f flag for roundtrip (0), fallback (1), sub mapping (2), reverse fallback (3)
+ *   or "good one-way" mapping (4).
+ *   Same values as in the source file after |
+ */
+typedef struct UCMapping {
+    UChar32 u;
+    union {
+        uint32_t idx;
+        uint8_t bytes[4];
+    } b;
+    int8_t uLen, bLen, f, moveFlag;
+} UCMapping;
+
+/* constants for UCMTable.flagsType */
+enum {
+    UCM_FLAGS_INITIAL,  /* no mappings parsed yet */
+    UCM_FLAGS_EXPLICIT, /* .ucm file has mappings with | fallback indicators */
+    UCM_FLAGS_IMPLICIT, /* .ucm file has mappings without | fallback indicators, later wins */
+    UCM_FLAGS_MIXED     /* both implicit and explicit */
+};
+
+typedef struct UCMTable {
+    UCMapping *mappings;
+    int32_t mappingsCapacity, mappingsLength;
+
+    UChar32 *codePoints;
+    int32_t codePointsCapacity, codePointsLength;
+
+    uint8_t *bytes;
+    int32_t bytesCapacity, bytesLength;
+
+    /* index map for mapping by bytes first */
+    int32_t *reverseMap;
+
+    uint8_t unicodeMask;
+    int8_t flagsType; /* UCM_FLAGS_INITIAL etc. */
+    UBool isSorted;
+} UCMTable;
+
+enum {
+    MBCS_STATE_FLAG_DIRECT=1,
+    MBCS_STATE_FLAG_SURROGATES,
+
+    MBCS_STATE_FLAG_READY=16
+};
+
+typedef struct UCMStates {
+    int32_t stateTable[MBCS_MAX_STATE_COUNT][256];
+    uint32_t stateFlags[MBCS_MAX_STATE_COUNT],
+             stateOffsetSum[MBCS_MAX_STATE_COUNT];
+
+    int32_t countStates, minCharLength, maxCharLength, countToUCodeUnits;
+    int8_t conversionType, outputType;
+} UCMStates;
+
+typedef struct UCMFile {
+    UCMTable *base, *ext;
+    UCMStates states;
+
+    char baseName[UCNV_MAX_CONVERTER_NAME_LENGTH];
+} UCMFile;
+
+/* simple accesses ---------------------------------------------------------- */
+
+#define UCM_GET_CODE_POINTS(t, m) \
+    (((m)->uLen==1) ? &(m)->u : (t)->codePoints+(m)->u)
+
+#define UCM_GET_BYTES(t, m) \
+    (((m)->bLen<=4) ? (m)->b.bytes : (t)->bytes+(m)->b.idx)
+
+/* APIs --------------------------------------------------------------------- */
+
+U_CAPI UCMFile * U_EXPORT2
+ucm_open(void);
+
+U_CAPI void U_EXPORT2
+ucm_close(UCMFile *ucm);
+
+U_CAPI UBool U_EXPORT2
+ucm_parseHeaderLine(UCMFile *ucm,
+                    char *line, char **pKey, char **pValue);
+
+/* @return -1 illegal bytes  0 suitable for base table  1 needs to go into extension table */
+U_CAPI int32_t U_EXPORT2
+ucm_mappingType(UCMStates *baseStates,
+                UCMapping *m,
+                UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
+                uint8_t bytes[UCNV_EXT_MAX_BYTES]);
+
+/* add a mapping to the base or extension table as appropriate */
+U_CAPI UBool U_EXPORT2
+ucm_addMappingAuto(UCMFile *ucm, UBool forBase, UCMStates *baseStates,
+                   UCMapping *m,
+                   UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
+                   uint8_t bytes[UCNV_EXT_MAX_BYTES]);
+
+U_CAPI UBool U_EXPORT2
+ucm_addMappingFromLine(UCMFile *ucm, const char *line, UBool forBase, UCMStates *baseStates);
+
+
+U_CAPI UCMTable * U_EXPORT2
+ucm_openTable(void);
+
+U_CAPI void U_EXPORT2
+ucm_closeTable(UCMTable *table);
+
+U_CAPI void U_EXPORT2
+ucm_resetTable(UCMTable *table);
+
+U_CAPI void U_EXPORT2
+ucm_sortTable(UCMTable *t);
+
+/*
+ * Remove mappings with their move flag set from the base table
+ * and move some of them (with UCM_MOVE_TO_EXT) to the extension table.
+ */
+U_CAPI void U_EXPORT2
+ucm_moveMappings(UCMTable *base, UCMTable *ext);
+
+/**
+ * Read a table from a .ucm file, from after the CHARMAP line to
+ * including the END CHARMAP line.
+ */
+U_CAPI void U_EXPORT2
+ucm_readTable(UCMFile *ucm, FileStream* convFile,
+              UBool forBase, UCMStates *baseStates,
+              UErrorCode *pErrorCode);
+
+/**
+ * Check the validity of mappings against a base table's states;
+ * necessary for extension-only tables that were read before their base tables.
+ */
+U_CAPI UBool U_EXPORT2
+ucm_checkValidity(UCMTable *ext, UCMStates *baseStates);
+
+/**
+ * Check a base table against an extension table.
+ * Set the moveTarget!=NULL if it is possible to move mappings from the base.
+ * This is the case where base and extension tables are parsed from a single file
+ * (moveTarget==ext)
+ * or when delta file mappings are subtracted from a base table.
+ *
+ * When a base table cannot be modified because a delta file is parsed in makeconv,
+ * then set moveTarget=NULL.
+ *
+ * if(intersectBase) then mappings that exist in the base table but not in
+ * the extension table are moved to moveTarget instead of showing an error.
+ *
+ * Special mode:
+ * If intersectBase==2 for a DBCS extension table, then SBCS mappings are
+ * not moved out of the base unless their Unicode input requires it.
+ * This helps ucmkbase generate base tables for DBCS-only extension .cnv files.
+ *
+ * For both tables in the same file, the extension table is automatically
+ * built.
+ * For separate files, the extension file can use a complete mapping table (.ucm file),
+ * so that common mappings need not be stripped out manually.
+ *
+ *
+ * Sort both tables, and then for each mapping direction:
+ *
+ * If intersectBase is true and the base table contains a mapping
+ * that does not exist in the extension table, then this mapping is moved
+ * to moveTarget.
+ *
+ * - otherwise -
+ *
+ * If the base table contains a mapping for which the input sequence is
+ * the same as the extension input, then
+ * - if the output is the same: remove the extension mapping
+ * - else: error
+ *
+ * If the base table contains a mapping for which the input sequence is
+ * a prefix of the extension input, then
+ * - if moveTarget!=NULL: move the base mapping to the moveTarget table
+ * - else: error
+ *
+ * @return false in case of an irreparable error
+ */
+U_CAPI UBool U_EXPORT2
+ucm_checkBaseExt(UCMStates *baseStates, UCMTable *base, UCMTable *ext,
+                 UCMTable *moveTarget, UBool intersectBase);
+
+U_CAPI void U_EXPORT2
+ucm_printTable(UCMTable *table, FILE *f, UBool byUnicode);
+
+U_CAPI void U_EXPORT2
+ucm_printMapping(UCMTable *table, UCMapping *m, FILE *f);
+
+
+U_CAPI void U_EXPORT2
+ucm_addState(UCMStates *states, const char *s);
+
+U_CAPI void U_EXPORT2
+ucm_processStates(UCMStates *states, UBool ignoreSISOCheck);
+
+U_CAPI int32_t U_EXPORT2
+ucm_countChars(UCMStates *states,
+               const uint8_t *bytes, int32_t length);
+
+
+U_CAPI int8_t U_EXPORT2
+ucm_parseBytes(uint8_t bytes[UCNV_EXT_MAX_BYTES], const char *line, const char **ps);
+
+U_CAPI UBool U_EXPORT2
+ucm_parseMappingLine(UCMapping *m,
+                     UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
+                     uint8_t bytes[UCNV_EXT_MAX_BYTES],
+                     const char *line);
+
+U_CAPI void U_EXPORT2
+ucm_addMapping(UCMTable *table,
+               UCMapping *m,
+               UChar32 codePoints[UCNV_EXT_MAX_UCHARS],
+               uint8_t bytes[UCNV_EXT_MAX_BYTES]);
+
+/* very makeconv-specific functions ----------------------------------------- */
+
+/* finalize and optimize states after the toUnicode mappings are processed */
+U_CAPI void U_EXPORT2
+ucm_optimizeStates(UCMStates *states,
+                   uint16_t **pUnicodeCodeUnits,
+                   _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks,
+                   UBool verbose);
+
+/* moved here because it is used inside ucmstate.c */
+U_CAPI int32_t U_EXPORT2
+ucm_findFallback(_MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks,
+                 uint32_t offset);
+
+/* very rptp2ucm-specific functions ----------------------------------------- */
+
+/*
+ * Input: Separate tables with mappings from/to Unicode,
+ * subchar and subchar1 (0 if none).
+ * All mappings must have flag 0.
+ *
+ * Output: fromUTable will contain the union of mappings with the correct
+ * precision flags, and be sorted.
+ */
+U_CAPI void U_EXPORT2
+ucm_mergeTables(UCMTable *fromUTable, UCMTable *toUTable,
+                const uint8_t *subchar, int32_t subcharLength,
+                uint8_t subchar1);
+
+U_CAPI UBool U_EXPORT2
+ucm_separateMappings(UCMFile *ucm, UBool isSISO);
+
+U_CDECL_END
+
+#endif
+
+#endif
+
diff --git a/intl/icu/source/tools/toolutil/ucmstate.cpp b/intl/icu/source/tools/toolutil/ucmstate.cpp
new file mode 100644
index 0000000000..08782f68d1
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/ucmstate.cpp
@@ -0,0 +1,1053 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2003-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  ucmstate.c
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2003oct09
+*   created by: Markus W. Scherer
+*
+*   This file handles ICU .ucm file state information as part of the ucm module.
+*   Most of this code used to be in makeconv.c.
+*/
+
+#include "unicode/utypes.h"
+#include "cstring.h"
+#include "cmemory.h"
+#include "uarrsort.h"
+#include "ucnvmbcs.h"
+#include "ucnv_ext.h"
+#include "uparse.h"
+#include "ucm.h"
+#include <stdio.h>
+
+#if !UCONFIG_NO_CONVERSION
+
+/* MBCS state handling ------------------------------------------------------ */
+
+/*
+ * state table row grammar (ebnf-style):
+ * (whitespace is allowed between all tokens)
+ *
+ * row=[[firstentry ','] entry (',' entry)*]
+ * firstentry="initial" | "surrogates"
+ *            (initial state (default for state 0), output is all surrogate pairs)
+ * entry=range [':' nextstate] ['.' action]
+ * range=number ['-' number]
+ * nextstate=number
+ *           (0..7f)
+ * action='u' | 's' | 'p' | 'i'
+ *        (unassigned, state change only, surrogate pair, illegal)
+ * number=(1- or 2-digit hexadecimal number)
+ */
+static const char *
+parseState(const char *s, int32_t state[256], uint32_t *pFlags) {
+    const char *t;
+    uint32_t start, end, i;
+    int32_t entry;
+
+    /* initialize the state: all illegal with U+ffff */
+    for(i=0; i<256; ++i) {
+        state[i]=MBCS_ENTRY_FINAL(0, MBCS_STATE_ILLEGAL, 0xffff);
+    }
+
+    /* skip leading white space */
+    s=u_skipWhitespace(s);
+
+    /* is there an "initial" or "surrogates" directive? */
+    if(uprv_strncmp("initial", s, 7)==0) {
+        *pFlags=MBCS_STATE_FLAG_DIRECT;
+        s=u_skipWhitespace(s+7);
+        if(*s++!=',') {
+            return s-1;
+        }
+    } else if(*pFlags==0 && uprv_strncmp("surrogates", s, 10)==0) {
+        *pFlags=MBCS_STATE_FLAG_SURROGATES;
+        s=u_skipWhitespace(s+10);
+        if(*s++!=',') {
+            return s-1;
+        }
+    } else if(*s==0) {
+        /* empty state row: all-illegal */
+        return nullptr;
+    }
+
+    for(;;) {
+        /* read an entry, the start of the range first */
+        s=u_skipWhitespace(s);
+        start=uprv_strtoul(s, (char **)&t, 16);
+        if(s==t || 0xff<start) {
+            return s;
+        }
+        s=u_skipWhitespace(t);
+
+        /* read the end of the range if there is one */
+        if(*s=='-') {
+            s=u_skipWhitespace(s+1);
+            end=uprv_strtoul(s, (char **)&t, 16);
+            if(s==t || end<start || 0xff<end) {
+                return s;
+            }
+            s=u_skipWhitespace(t);
+        } else {
+            end=start;
+        }
+
+        /* determine the state entry for this range */
+        if(*s!=':' && *s!='.') {
+            /* the default is: final state with valid entries */
+            entry=MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_16, 0);
+        } else {
+            entry=MBCS_ENTRY_TRANSITION(0, 0);
+            if(*s==':') {
+                /* get the next state, default to 0 */
+                s=u_skipWhitespace(s+1);
+                i=uprv_strtoul(s, (char **)&t, 16);
+                if(s!=t) {
+                    if(0x7f<i) {
+                        return s;
+                    }
+                    s=u_skipWhitespace(t);
+                    entry=MBCS_ENTRY_SET_STATE(entry, i);
+                }
+            }
+
+            /* get the state action, default to valid */
+            if(*s=='.') {
+                /* this is a final state */
+                entry=MBCS_ENTRY_SET_FINAL(entry);
+
+                s=u_skipWhitespace(s+1);
+                if(*s=='u') {
+                    /* unassigned set U+fffe */
+                    entry=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_UNASSIGNED, 0xfffe);
+                    s=u_skipWhitespace(s+1);
+                } else if(*s=='p') {
+                    if(*pFlags!=MBCS_STATE_FLAG_DIRECT) {
+                        entry=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_VALID_16_PAIR);
+                    } else {
+                        entry=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_VALID_16);
+                    }
+                    s=u_skipWhitespace(s+1);
+                } else if(*s=='s') {
+                    entry=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_CHANGE_ONLY);
+                    s=u_skipWhitespace(s+1);
+                } else if(*s=='i') {
+                    /* illegal set U+ffff */
+                    entry=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_ILLEGAL, 0xffff);
+                    s=u_skipWhitespace(s+1);
+                } else {
+                    /* default to valid */
+                    entry=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_VALID_16);
+                }
+            } else {
+                /* this is an intermediate state, nothing to do */
+            }
+        }
+
+        /* adjust "final valid" states according to the state flags */
+        if(MBCS_ENTRY_FINAL_ACTION(entry)==MBCS_STATE_VALID_16) {
+            switch(*pFlags) {
+            case 0:
+                /* no adjustment */
+                break;
+            case MBCS_STATE_FLAG_DIRECT:
+                /* set the valid-direct code point to "unassigned"==0xfffe */
+                entry=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_VALID_DIRECT_16, 0xfffe);
+                break;
+            case MBCS_STATE_FLAG_SURROGATES:
+                entry=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_VALID_16_PAIR, 0);
+                break;
+            default:
+                break;
+            }
+        }
+
+        /* set this entry for the range */
+        for(i=start; i<=end; ++i) {
+            state[i]=entry;
+        }
+
+        if(*s==',') {
+            ++s;
+        } else {
+            return *s==0 ? nullptr : s;
+        }
+    }
+}
+
+U_CAPI void U_EXPORT2
+ucm_addState(UCMStates *states, const char *s) {
+    const char *error;
+
+    if(states->countStates==MBCS_MAX_STATE_COUNT) {
+        fprintf(stderr, "ucm error: too many states (maximum %u)\n", MBCS_MAX_STATE_COUNT);
+        exit(U_INVALID_TABLE_FORMAT);
+    }
+
+    error=parseState(s, states->stateTable[states->countStates],
+                       &states->stateFlags[states->countStates]);
+    if(error!=nullptr) {
+        fprintf(stderr, "ucm error: parse error in state definition at '%s'\n", error);
+        exit(U_INVALID_TABLE_FORMAT);
+    }
+
+    ++states->countStates;
+}
+
+U_CAPI UBool U_EXPORT2
+ucm_parseHeaderLine(UCMFile *ucm,
+                    char *line, char **pKey, char **pValue) {
+    UCMStates *states;
+    char *s, *end;
+    char c;
+
+    states=&ucm->states;
+
+    /* remove comments and trailing CR and LF and remove whitespace from the end */
+    for(end=line; (c=*end)!=0; ++end) {
+        if(c=='#' || c=='\r' || c=='\n') {
+            break;
+        }
+    }
+    while(end>line && (*(end-1)==' ' || *(end-1)=='\t')) {
+        --end;
+    }
+    *end=0;
+
+    /* skip leading white space and ignore empty lines */
+    s=(char *)u_skipWhitespace(line);
+    if(*s==0) {
+        return true;
+    }
+
+    /* stop at the beginning of the mapping section */
+    if(uprv_memcmp(s, "CHARMAP", 7)==0) {
+        return false;
+    }
+
+    /* get the key name, bracketed in <> */
+    if(*s!='<') {
+        fprintf(stderr, "ucm error: no header field <key> in line \"%s\"\n", line);
+        exit(U_INVALID_TABLE_FORMAT);
+    }
+    *pKey=++s;
+    while(*s!='>') {
+        if(*s==0) {
+            fprintf(stderr, "ucm error: incomplete header field <key> in line \"%s\"\n", line);
+            exit(U_INVALID_TABLE_FORMAT);
+        }
+        ++s;
+    }
+    *s=0;
+
+    /* get the value string, possibly quoted */
+    s=(char *)u_skipWhitespace(s+1);
+    if(*s!='"') {
+        *pValue=s;
+    } else {
+        /* remove the quotes */
+        *pValue=s+1;
+        if(end>*pValue && *(end-1)=='"') {
+            *--end=0;
+        }
+    }
+
+    /* collect the information from the header field, ignore unknown keys */
+    if(uprv_strcmp(*pKey, "uconv_class")==0) {
+        if(uprv_strcmp(*pValue, "DBCS")==0) {
+            states->conversionType=UCNV_DBCS;
+        } else if(uprv_strcmp(*pValue, "SBCS")==0) {
+            states->conversionType = UCNV_SBCS;
+        } else if(uprv_strcmp(*pValue, "MBCS")==0) {
+            states->conversionType = UCNV_MBCS;
+        } else if(uprv_strcmp(*pValue, "EBCDIC_STATEFUL")==0) {
+            states->conversionType = UCNV_EBCDIC_STATEFUL;
+        } else {
+            fprintf(stderr, "ucm error: unknown <uconv_class> %s\n", *pValue);
+            exit(U_INVALID_TABLE_FORMAT);
+        }
+        return true;
+    } else if(uprv_strcmp(*pKey, "mb_cur_max")==0) {
+        c=**pValue;
+        if('1'<=c && c<='4' && (*pValue)[1]==0) {
+            states->maxCharLength=(int8_t)(c-'0');
+            states->outputType=(int8_t)(states->maxCharLength-1);
+        } else {
+            fprintf(stderr, "ucm error: illegal <mb_cur_max> %s\n", *pValue);
+            exit(U_INVALID_TABLE_FORMAT);
+        }
+        return true;
+    } else if(uprv_strcmp(*pKey, "mb_cur_min")==0) {
+        c=**pValue;
+        if('1'<=c && c<='4' && (*pValue)[1]==0) {
+            states->minCharLength=(int8_t)(c-'0');
+        } else {
+            fprintf(stderr, "ucm error: illegal <mb_cur_min> %s\n", *pValue);
+            exit(U_INVALID_TABLE_FORMAT);
+        }
+        return true;
+    } else if(uprv_strcmp(*pKey, "icu:state")==0) {
+        /* if an SBCS/DBCS/EBCDIC_STATEFUL converter has icu:state, then turn it into MBCS */
+        switch(states->conversionType) {
+        case UCNV_SBCS:
+        case UCNV_DBCS:
+        case UCNV_EBCDIC_STATEFUL:
+            states->conversionType=UCNV_MBCS;
+            break;
+        case UCNV_MBCS:
+            break;
+        default:
+            fprintf(stderr, "ucm error: <icu:state> entry for non-MBCS table or before the <uconv_class> line\n");
+            exit(U_INVALID_TABLE_FORMAT);
+        }
+
+        if(states->maxCharLength==0) {
+            fprintf(stderr, "ucm error: <icu:state> before the <mb_cur_max> line\n");
+            exit(U_INVALID_TABLE_FORMAT);
+        }
+        ucm_addState(states, *pValue);
+        return true;
+    } else if(uprv_strcmp(*pKey, "icu:base")==0) {
+        if(**pValue==0) {
+            fprintf(stderr, "ucm error: <icu:base> without a base table name\n");
+            exit(U_INVALID_TABLE_FORMAT);
+        }
+        uprv_strcpy(ucm->baseName, *pValue);
+        return true;
+    }
+
+    return false;
+}
+
+/* post-processing ---------------------------------------------------------- */
+
+static int32_t
+sumUpStates(UCMStates *states) {
+    int32_t entry, sum, state, cell, count;
+    UBool allStatesReady;
+
+    /*
+     * Sum up the offsets for all states.
+     * In each final state (where there are only final entries),
+     * the offsets add up directly.
+     * In all other state table rows, for each transition entry to another state,
+     * the offsets sum of that state needs to be added.
+     * This is achieved in at most countStates iterations.
+     */
+    allStatesReady=false;
+    for(count=states->countStates; !allStatesReady && count>=0; --count) {
+        allStatesReady=true;
+        for(state=states->countStates-1; state>=0; --state) {
+            if(!(states->stateFlags[state]&MBCS_STATE_FLAG_READY)) {
+                allStatesReady=false;
+                sum=0;
+
+                /* at first, add up only the final delta offsets to keep them <512 */
+                for(cell=0; cell<256; ++cell) {
+                    entry=states->stateTable[state][cell];
+                    if(MBCS_ENTRY_IS_FINAL(entry)) {
+                        switch(MBCS_ENTRY_FINAL_ACTION(entry)) {
+                        case MBCS_STATE_VALID_16:
+                            states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_VALUE(entry, sum);
+                            sum+=1;
+                            break;
+                        case MBCS_STATE_VALID_16_PAIR:
+                            states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_VALUE(entry, sum);
+                            sum+=2;
+                            break;
+                        default:
+                            /* no addition */
+                            break;
+                        }
+                    }
+                }
+
+                /* now, add up the delta offsets for the transitional entries */
+                for(cell=0; cell<256; ++cell) {
+                    entry=states->stateTable[state][cell];
+                    if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+                        if(states->stateFlags[MBCS_ENTRY_TRANSITION_STATE(entry)]&MBCS_STATE_FLAG_READY) {
+                            states->stateTable[state][cell]=MBCS_ENTRY_TRANSITION_SET_OFFSET(entry, sum);
+                            sum+=states->stateOffsetSum[MBCS_ENTRY_TRANSITION_STATE(entry)];
+                        } else {
+                            /* that next state does not have a sum yet, we cannot finish the one for this state */
+                            sum=-1;
+                            break;
+                        }
+                    }
+                }
+
+                if(sum!=-1) {
+                    states->stateOffsetSum[state]=sum;
+                    states->stateFlags[state]|=MBCS_STATE_FLAG_READY;
+                }
+            }
+        }
+    }
+
+    if(!allStatesReady) {
+        fprintf(stderr, "ucm error: the state table contains loops\n");
+        exit(U_INVALID_TABLE_FORMAT);
+    }
+
+    /*
+     * For all "direct" (i.e., initial) states>0,
+     * the offsets need to be increased by the sum of
+     * the previous initial states.
+     */
+    sum=states->stateOffsetSum[0];
+    for(state=1; state<states->countStates; ++state) {
+        if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) {
+            int32_t sum2=sum;
+            sum+=states->stateOffsetSum[state];
+            for(cell=0; cell<256; ++cell) {
+                entry=states->stateTable[state][cell];
+                if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+                    states->stateTable[state][cell]=MBCS_ENTRY_TRANSITION_ADD_OFFSET(entry, sum2);
+                }
+            }
+        }
+    }
+
+    /* round up to the next even number to have the following data 32-bit-aligned */
+    return states->countToUCodeUnits=(sum+1)&~1;
+}
+
+U_CAPI void U_EXPORT2
+ucm_processStates(UCMStates *states, UBool ignoreSISOCheck) {
+    int32_t entry, state, cell, count;
+
+    if(states->conversionType==UCNV_UNSUPPORTED_CONVERTER) {
+        fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");
+        exit(U_INVALID_TABLE_FORMAT);
+    }
+
+    if(states->countStates==0) {
+        switch(states->conversionType) {
+        case UCNV_SBCS:
+            /* SBCS: use MBCS data structure with a default state table */
+            if(states->maxCharLength!=1) {
+                fprintf(stderr, "error: SBCS codepage with max B/char!=1\n");
+                exit(U_INVALID_TABLE_FORMAT);
+            }
+            states->conversionType=UCNV_MBCS;
+            ucm_addState(states, "0-ff");
+            break;
+        case UCNV_MBCS:
+            fprintf(stderr, "ucm error: missing state table information (<icu:state>) for MBCS\n");
+            exit(U_INVALID_TABLE_FORMAT);
+            break;
+        case UCNV_EBCDIC_STATEFUL:
+            /* EBCDIC_STATEFUL: use MBCS data structure with a default state table */
+            if(states->minCharLength!=1 || states->maxCharLength!=2) {
+                fprintf(stderr, "error: DBCS codepage with min B/char!=1 or max B/char!=2\n");
+                exit(U_INVALID_TABLE_FORMAT);
+            }
+            states->conversionType=UCNV_MBCS;
+            ucm_addState(states, "0-ff, e:1.s, f:0.s");
+            ucm_addState(states, "initial, 0-3f:4, e:1.s, f:0.s, 40:3, 41-fe:2, ff:4");
+            ucm_addState(states, "0-40:1.i, 41-fe:1., ff:1.i");
+            ucm_addState(states, "0-ff:1.i, 40:1.");
+            ucm_addState(states, "0-ff:1.i");
+            break;
+        case UCNV_DBCS:
+            /* DBCS: use MBCS data structure with a default state table */
+            if(states->minCharLength!=2 || states->maxCharLength!=2) {
+                fprintf(stderr, "error: DBCS codepage with min or max B/char!=2\n");
+                exit(U_INVALID_TABLE_FORMAT);
+            }
+            states->conversionType = UCNV_MBCS;
+            ucm_addState(states, "0-3f:3, 40:2, 41-fe:1, ff:3");
+            ucm_addState(states, "41-fe");
+            ucm_addState(states, "40");
+            ucm_addState(states, "");
+            break;
+        default:
+            fprintf(stderr, "ucm error: unknown charset structure\n");
+            exit(U_INVALID_TABLE_FORMAT);
+            break;
+        }
+    }
+
+    /*
+     * check that the min/max character lengths are reasonable;
+     * to do this right, all paths through the state table would have to be
+     * recursively walked while keeping track of the sequence lengths,
+     * but these simple checks cover most state tables in practice
+     */
+    if(states->maxCharLength<states->minCharLength) {
+        fprintf(stderr, "ucm error: max B/char < min B/char\n");
+        exit(U_INVALID_TABLE_FORMAT);
+    }
+
+    /* count non-direct states and compare with max B/char */
+    count=0;
+    for(state=0; state<states->countStates; ++state) {
+        if((states->stateFlags[state]&0xf)!=MBCS_STATE_FLAG_DIRECT) {
+            ++count;
+        }
+    }
+    if(states->maxCharLength>count+1) {
+        fprintf(stderr, "ucm error: max B/char too large\n");
+        exit(U_INVALID_TABLE_FORMAT);
+    }
+
+    if(states->minCharLength==1) {
+        int32_t action;
+
+        /*
+         * if there are single-byte characters,
+         * then the initial state must have direct result states
+         */
+        for(cell=0; cell<256; ++cell) {
+            entry=states->stateTable[0][cell];
+            if( MBCS_ENTRY_IS_FINAL(entry) &&
+                ((action=MBCS_ENTRY_FINAL_ACTION(entry))==MBCS_STATE_VALID_DIRECT_16 ||
+                 action==MBCS_STATE_UNASSIGNED)
+            ) {
+                break;
+            }
+        }
+
+        if(cell==256) {
+            fprintf(stderr, "ucm warning: min B/char too small\n");
+        }
+    }
+
+    /*
+     * make sure that all "next state" values are within limits
+     * and that all next states after final ones have the "direct"
+     * flag of initial states
+     */
+    for(state=states->countStates-1; state>=0; --state) {
+        for(cell=0; cell<256; ++cell) {
+            entry=states->stateTable[state][cell];
+            if((uint8_t)MBCS_ENTRY_STATE(entry)>=states->countStates) {
+                fprintf(stderr, "ucm error: state table entry [%x][%x] has a next state of %x that is too high\n",
+                    (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry));
+                exit(U_INVALID_TABLE_FORMAT);
+            }
+            if(MBCS_ENTRY_IS_FINAL(entry) && (states->stateFlags[MBCS_ENTRY_STATE(entry)]&0xf)!=MBCS_STATE_FLAG_DIRECT) {
+                fprintf(stderr, "ucm error: state table entry [%x][%x] is final but has a non-initial next state of %x\n",
+                    (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry));
+                exit(U_INVALID_TABLE_FORMAT);
+            } else if(MBCS_ENTRY_IS_TRANSITION(entry) && (states->stateFlags[MBCS_ENTRY_STATE(entry)]&0xf)==MBCS_STATE_FLAG_DIRECT) {
+                fprintf(stderr, "ucm error: state table entry [%x][%x] is not final but has an initial next state of %x\n",
+                    (int)state, (int)cell, (int)MBCS_ENTRY_STATE(entry));
+                exit(U_INVALID_TABLE_FORMAT);
+            }
+        }
+    }
+
+    /* is this an SI/SO (like EBCDIC-stateful) state table? */
+    if(states->countStates>=2 && (states->stateFlags[1]&0xf)==MBCS_STATE_FLAG_DIRECT) {
+        if(states->maxCharLength!=2) {
+            fprintf(stderr, "ucm error: SI/SO codepages must have max 2 bytes/char (not %x)\n", (int)states->maxCharLength);
+            exit(U_INVALID_TABLE_FORMAT);
+        }
+        if(states->countStates<3) {
+            fprintf(stderr, "ucm error: SI/SO codepages must have at least 3 states (not %x)\n", (int)states->countStates);
+            exit(U_INVALID_TABLE_FORMAT);
+        }
+        /* are the SI/SO all in the right places? */
+        if( ignoreSISOCheck ||
+           (states->stateTable[0][0xe]==MBCS_ENTRY_FINAL(1, MBCS_STATE_CHANGE_ONLY, 0) &&
+            states->stateTable[0][0xf]==MBCS_ENTRY_FINAL(0, MBCS_STATE_CHANGE_ONLY, 0) &&
+            states->stateTable[1][0xe]==MBCS_ENTRY_FINAL(1, MBCS_STATE_CHANGE_ONLY, 0) &&
+            states->stateTable[1][0xf]==MBCS_ENTRY_FINAL(0, MBCS_STATE_CHANGE_ONLY, 0))
+        ) {
+            states->outputType=MBCS_OUTPUT_2_SISO;
+        } else {
+            fprintf(stderr, "ucm error: SI/SO codepages must have in states 0 and 1 transitions e:1.s, f:0.s\n");
+            exit(U_INVALID_TABLE_FORMAT);
+        }
+        state=2;
+    } else {
+        state=1;
+    }
+
+    /* check that no unexpected state is a "direct" one */
+    while(state<states->countStates) {
+        if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) {
+            fprintf(stderr, "ucm error: state %d is 'initial' - not supported except for SI/SO codepages\n", (int)state);
+            exit(U_INVALID_TABLE_FORMAT);
+        }
+        ++state;
+    }
+
+    sumUpStates(states);
+}
+
+/* find a fallback for this offset; return the index or -1 if not found */
+U_CAPI int32_t U_EXPORT2
+ucm_findFallback(_MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks,
+                 uint32_t offset) {
+    int32_t i;
+
+    if(countToUFallbacks==0) {
+        /* shortcut: most codepages do not have fallbacks from codepage to Unicode */
+        return -1;
+    }
+
+    /* do a linear search for the fallback mapping (the table is not yet sorted) */
+    for(i=0; i<countToUFallbacks; ++i) {
+        if(offset==toUFallbacks[i].offset) {
+            return i;
+        }
+    }
+    return -1;
+}
+
+/*
+ * This function tries to compact toUnicode tables for 2-byte codepages
+ * by finding lead bytes with all-unassigned trail bytes and adding another state
+ * for them.
+ */
+static void
+compactToUnicode2(UCMStates *states,
+                  uint16_t **pUnicodeCodeUnits,
+                  _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks,
+                  UBool verbose) {
+    int32_t (*oldStateTable)[256];
+    uint16_t count[256];
+    uint16_t *oldUnicodeCodeUnits;
+    int32_t entry, offset, oldOffset, trailOffset, oldTrailOffset, savings, sum;
+    int32_t i, j, leadState, trailState, newState, fallback;
+    uint16_t unit;
+
+    /* find the lead state */
+    if(states->outputType==MBCS_OUTPUT_2_SISO) {
+        /* use the DBCS lead state for SI/SO codepages */
+        leadState=1;
+    } else {
+        leadState=0;
+    }
+
+    /* find the main trail state: the most used target state */
+    uprv_memset(count, 0, sizeof(count));
+    for(i=0; i<256; ++i) {
+        entry=states->stateTable[leadState][i];
+        if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+            ++count[MBCS_ENTRY_TRANSITION_STATE(entry)];
+        }
+    }
+    trailState=0;
+    for(i=1; i<states->countStates; ++i) {
+        if(count[i]>count[trailState]) {
+            trailState=i;
+        }
+    }
+
+    /* count possible savings from lead bytes with all-unassigned results in all trail bytes */
+    uprv_memset(count, 0, sizeof(count));
+    savings=0;
+    /* for each lead byte */
+    for(i=0; i<256; ++i) {
+        entry=states->stateTable[leadState][i];
+        if(MBCS_ENTRY_IS_TRANSITION(entry) &&
+                (MBCS_ENTRY_TRANSITION_STATE(entry))==static_cast<uint32_t>(trailState)) {
+            /* the offset is different for each lead byte */
+            offset=MBCS_ENTRY_TRANSITION_OFFSET(entry);
+            /* for each trail byte for this lead byte */
+            for(j=0; j<256; ++j) {
+                entry=states->stateTable[trailState][j];
+                switch(MBCS_ENTRY_FINAL_ACTION(entry)) {
+                case MBCS_STATE_VALID_16:
+                    entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
+                    if((*pUnicodeCodeUnits)[entry]==0xfffe && ucm_findFallback(toUFallbacks, countToUFallbacks, entry)<0) {
+                        ++count[i];
+                    } else {
+                        j=999; /* do not count for this lead byte because there are assignments */
+                    }
+                    break;
+                case MBCS_STATE_VALID_16_PAIR:
+                    entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
+                    if((*pUnicodeCodeUnits)[entry]==0xfffe) {
+                        count[i]+=2;
+                    } else {
+                        j=999; /* do not count for this lead byte because there are assignments */
+                    }
+                    break;
+                default:
+                    break;
+                }
+            }
+            if(j==256) {
+                /* all trail bytes for this lead byte are unassigned */
+                savings+=count[i];
+            } else {
+                count[i]=0;
+            }
+        }
+    }
+    /* subtract from the possible savings the cost of an additional state */
+    savings=savings*2-1024; /* count bytes, not 16-bit words */
+    if(savings<=0) {
+        return;
+    }
+    if(verbose) {
+        printf("compacting toUnicode data saves %ld bytes\n", (long)savings);
+    }
+    if(states->countStates>=MBCS_MAX_STATE_COUNT) {
+        fprintf(stderr, "cannot compact toUnicode because the maximum number of states is reached\n");
+        return;
+    }
+
+    /* make a copy of the state table */
+    oldStateTable=(int32_t (*)[256])uprv_malloc(states->countStates*1024);
+    if(oldStateTable==nullptr) {
+        fprintf(stderr, "cannot compact toUnicode: out of memory\n");
+        return;
+    }
+    uprv_memcpy(oldStateTable, states->stateTable, states->countStates*1024);
+
+    /* add the new state */
+    /*
+     * this function does not catch the degenerate case where all lead bytes
+     * have all-unassigned trail bytes and the lead state could be removed
+     */
+    newState=states->countStates++;
+    states->stateFlags[newState]=0;
+    /* copy the old trail state, turning all assigned states into unassigned ones */
+    for(i=0; i<256; ++i) {
+        entry=states->stateTable[trailState][i];
+        switch(MBCS_ENTRY_FINAL_ACTION(entry)) {
+        case MBCS_STATE_VALID_16:
+        case MBCS_STATE_VALID_16_PAIR:
+            states->stateTable[newState][i]=MBCS_ENTRY_FINAL_SET_ACTION_VALUE(entry, MBCS_STATE_UNASSIGNED, 0xfffe);
+            break;
+        default:
+            states->stateTable[newState][i]=entry;
+            break;
+        }
+    }
+
+    /* in the lead state, redirect all lead bytes with all-unassigned trail bytes to the new state */
+    for(i=0; i<256; ++i) {
+        if(count[i]>0) {
+            states->stateTable[leadState][i]=MBCS_ENTRY_SET_STATE(states->stateTable[leadState][i], newState);
+        }
+    }
+
+    /* sum up the new state table */
+    for(i=0; i<states->countStates; ++i) {
+        states->stateFlags[i]&=~MBCS_STATE_FLAG_READY;
+    }
+    sum=sumUpStates(states);
+
+    /* allocate a new, smaller code units array */
+    oldUnicodeCodeUnits=*pUnicodeCodeUnits;
+    if(sum==0) {
+        *pUnicodeCodeUnits=nullptr;
+        if(oldUnicodeCodeUnits!=nullptr) {
+            uprv_free(oldUnicodeCodeUnits);
+        }
+        uprv_free(oldStateTable);
+        return;
+    }
+    *pUnicodeCodeUnits=(uint16_t *)uprv_malloc(sum*sizeof(uint16_t));
+    if(*pUnicodeCodeUnits==nullptr) {
+        fprintf(stderr, "cannot compact toUnicode: out of memory allocating %ld 16-bit code units\n",
+            (long)sum);
+        /* revert to the old state table */
+        *pUnicodeCodeUnits=oldUnicodeCodeUnits;
+        --states->countStates;
+        uprv_memcpy(states->stateTable, oldStateTable, states->countStates*1024);
+        uprv_free(oldStateTable);
+        return;
+    }
+    for(i=0; i<sum; ++i) {
+        (*pUnicodeCodeUnits)[i]=0xfffe;
+    }
+
+    /* copy the code units for all assigned characters */
+    /*
+     * The old state table has the same lead _and_ trail states for assigned characters!
+     * The differences are in the offsets, and in the trail states for some unassigned characters.
+     * For each character with an assigned state in the new table, it was assigned in the old one.
+     * Only still-assigned characters are copied.
+     * Note that fallback mappings need to get their offset values adjusted.
+     */
+
+    /* for each initial state */
+    for(leadState=0; leadState<states->countStates; ++leadState) {
+        if((states->stateFlags[leadState]&0xf)==MBCS_STATE_FLAG_DIRECT) {
+            /* for each lead byte from there */
+            for(i=0; i<256; ++i) {
+                entry=states->stateTable[leadState][i];
+                if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+                    trailState=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
+                    /* the new state does not have assigned states */
+                    if(trailState!=newState) {
+                        trailOffset=MBCS_ENTRY_TRANSITION_OFFSET(entry);
+                        oldTrailOffset=MBCS_ENTRY_TRANSITION_OFFSET(oldStateTable[leadState][i]);
+                        /* for each trail byte */
+                        for(j=0; j<256; ++j) {
+                            entry=states->stateTable[trailState][j];
+                            /* copy assigned-character code units and adjust fallback offsets */
+                            switch(MBCS_ENTRY_FINAL_ACTION(entry)) {
+                            case MBCS_STATE_VALID_16:
+                                offset=trailOffset+MBCS_ENTRY_FINAL_VALUE_16(entry);
+                                /* find the old offset according to the old state table */
+                                oldOffset=oldTrailOffset+MBCS_ENTRY_FINAL_VALUE_16(oldStateTable[trailState][j]);
+                                unit=(*pUnicodeCodeUnits)[offset]=oldUnicodeCodeUnits[oldOffset];
+                                if(unit==0xfffe && (fallback=ucm_findFallback(toUFallbacks, countToUFallbacks, oldOffset))>=0) {
+                                    toUFallbacks[fallback].offset=0x80000000|offset;
+                                }
+                                break;
+                            case MBCS_STATE_VALID_16_PAIR:
+                                offset=trailOffset+MBCS_ENTRY_FINAL_VALUE_16(entry);
+                                /* find the old offset according to the old state table */
+                                oldOffset=oldTrailOffset+MBCS_ENTRY_FINAL_VALUE_16(oldStateTable[trailState][j]);
+                                (*pUnicodeCodeUnits)[offset++]=oldUnicodeCodeUnits[oldOffset++];
+                                (*pUnicodeCodeUnits)[offset]=oldUnicodeCodeUnits[oldOffset];
+                                break;
+                            default:
+                                break;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    /* remove temporary flags from fallback offsets that protected them from being modified twice */
+    for(i=0; i<countToUFallbacks; ++i) {
+        toUFallbacks[i].offset&=0x7fffffff;
+    }
+
+    /* free temporary memory */
+    uprv_free(oldUnicodeCodeUnits);
+    uprv_free(oldStateTable);
+}
+
+/*
+ * recursive sub-function of compactToUnicodeHelper()
+ * returns:
+ * >0 number of bytes that are used in unicodeCodeUnits[] that could be saved,
+ *    if all sequences from this state are unassigned, returns the
+ * <0 there are assignments in unicodeCodeUnits[]
+ * 0  no use of unicodeCodeUnits[]
+ */
+static int32_t
+findUnassigned(UCMStates *states,
+               uint16_t *unicodeCodeUnits,
+               _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks,
+               int32_t state, int32_t offset, uint32_t b) {
+    int32_t i, entry, savings, localSavings, belowSavings;
+    UBool haveAssigned;
+
+    localSavings=belowSavings=0;
+    haveAssigned=false;
+    for(i=0; i<256; ++i) {
+        entry=states->stateTable[state][i];
+        if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+            savings=findUnassigned(states,
+                        unicodeCodeUnits,
+                        toUFallbacks, countToUFallbacks,
+                        MBCS_ENTRY_TRANSITION_STATE(entry),
+                        offset+MBCS_ENTRY_TRANSITION_OFFSET(entry),
+                        (b<<8)|(uint32_t)i);
+            if(savings<0) {
+                haveAssigned=true;
+            } else if(savings>0) {
+                printf("    all-unassigned sequences from prefix 0x%02lx state %ld use %ld bytes\n",
+                    (unsigned long)((b<<8)|i), (long)state, (long)savings);
+                belowSavings+=savings;
+            }
+        } else if(!haveAssigned) {
+            switch(MBCS_ENTRY_FINAL_ACTION(entry)) {
+            case MBCS_STATE_VALID_16:
+                entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
+                if(unicodeCodeUnits[entry]==0xfffe && ucm_findFallback(toUFallbacks, countToUFallbacks, entry)<0) {
+                    localSavings+=2;
+                } else {
+                    haveAssigned=true;
+                }
+                break;
+            case MBCS_STATE_VALID_16_PAIR:
+                entry=offset+MBCS_ENTRY_FINAL_VALUE_16(entry);
+                if(unicodeCodeUnits[entry]==0xfffe) {
+                    localSavings+=4;
+                } else {
+                    haveAssigned=true;
+                }
+                break;
+            default:
+                break;
+            }
+        }
+    }
+    if(haveAssigned) {
+        return -1;
+    } else {
+        return localSavings+belowSavings;
+    }
+}
+
+/* helper function for finding compaction opportunities */
+static void
+compactToUnicodeHelper(UCMStates *states,
+                       uint16_t *unicodeCodeUnits,
+                       _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks) {
+    int32_t state, savings;
+
+    /* for each initial state */
+    for(state=0; state<states->countStates; ++state) {
+        if((states->stateFlags[state]&0xf)==MBCS_STATE_FLAG_DIRECT) {
+            savings=findUnassigned(states,
+                        unicodeCodeUnits,
+                        toUFallbacks, countToUFallbacks,
+                        state, 0, 0);
+            if(savings>0) {
+                printf("    all-unassigned sequences from initial state %ld use %ld bytes\n",
+                    (long)state, (long)savings);
+            }
+        }
+    }
+}
+
+U_CDECL_BEGIN
+static int32_t U_CALLCONV
+compareFallbacks(const void *context, const void *fb1, const void *fb2) {
+    (void)context;
+    return ((const _MBCSToUFallback *)fb1)->offset-((const _MBCSToUFallback *)fb2)->offset;
+}
+U_CDECL_END
+
+U_CAPI void U_EXPORT2
+ucm_optimizeStates(UCMStates *states,
+                   uint16_t **pUnicodeCodeUnits,
+                   _MBCSToUFallback *toUFallbacks, int32_t countToUFallbacks,
+                   UBool verbose) {
+    UErrorCode errorCode;
+    int32_t state, cell, entry;
+
+    /* test each state table entry */
+    for(state=0; state<states->countStates; ++state) {
+        for(cell=0; cell<256; ++cell) {
+            entry=states->stateTable[state][cell];
+            /*
+             * if the entry is a final one with an MBCS_STATE_VALID_DIRECT_16 action code
+             * and the code point is "unassigned" (0xfffe), then change it to
+             * the "unassigned" action code with bits 26..23 set to zero and U+fffe.
+             */
+            if(MBCS_ENTRY_SET_STATE(entry, 0)==MBCS_ENTRY_FINAL(0, MBCS_STATE_VALID_DIRECT_16, 0xfffe)) {
+                states->stateTable[state][cell]=MBCS_ENTRY_FINAL_SET_ACTION(entry, MBCS_STATE_UNASSIGNED);
+            }
+        }
+    }
+
+    /* try to compact the toUnicode tables */
+    if(states->maxCharLength==2) {
+        compactToUnicode2(states, pUnicodeCodeUnits, toUFallbacks, countToUFallbacks, verbose);
+    } else if(states->maxCharLength>2) {
+        if(verbose) {
+            compactToUnicodeHelper(states, *pUnicodeCodeUnits, toUFallbacks, countToUFallbacks);
+        }
+    }
+
+    /* sort toUFallbacks */
+    /*
+     * It should be safe to sort them before compactToUnicode2() is called,
+     * because it should not change the relative order of the offset values
+     * that it adjusts, but they need to be sorted at some point, and
+     * it is safest here.
+     */
+    if(countToUFallbacks>0) {
+        errorCode=U_ZERO_ERROR; /* nothing bad will happen... */
+        uprv_sortArray(toUFallbacks, countToUFallbacks,
+                       sizeof(_MBCSToUFallback),
+                       compareFallbacks, nullptr, false, &errorCode);
+    }
+}
+
+/* use a complete state table ----------------------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+ucm_countChars(UCMStates *states,
+               const uint8_t *bytes, int32_t length) {
+    uint32_t offset;
+    int32_t i, entry, count;
+    uint8_t state;
+
+    offset=0;
+    count=0;
+    state=0;
+
+    if(states->countStates==0) {
+        fprintf(stderr, "ucm error: there is no state information!\n");
+        return -1;
+    }
+
+    /* for SI/SO (like EBCDIC-stateful), double-byte sequences start in state 1 */
+    if(length==2 && states->outputType==MBCS_OUTPUT_2_SISO) {
+        state=1;
+    }
+
+    /*
+     * Walk down the state table like in conversion,
+     * much like getNextUChar().
+     * We assume that c<=0x10ffff.
+     */
+    for(i=0; i<length; ++i) {
+        entry=states->stateTable[state][bytes[i]];
+        if(MBCS_ENTRY_IS_TRANSITION(entry)) {
+            state=(uint8_t)MBCS_ENTRY_TRANSITION_STATE(entry);
+            offset+=MBCS_ENTRY_TRANSITION_OFFSET(entry);
+        } else {
+            switch(MBCS_ENTRY_FINAL_ACTION(entry)) {
+            case MBCS_STATE_ILLEGAL:
+                fprintf(stderr, "ucm error: byte sequence ends in illegal state\n");
+                return -1;
+            case MBCS_STATE_CHANGE_ONLY:
+                fprintf(stderr, "ucm error: byte sequence ends in state-change-only\n");
+                return -1;
+            case MBCS_STATE_UNASSIGNED:
+            case MBCS_STATE_FALLBACK_DIRECT_16:
+            case MBCS_STATE_VALID_DIRECT_16:
+            case MBCS_STATE_FALLBACK_DIRECT_20:
+            case MBCS_STATE_VALID_DIRECT_20:
+            case MBCS_STATE_VALID_16:
+            case MBCS_STATE_VALID_16_PAIR:
+                /* count a complete character and prepare for a new one */
+                ++count;
+                state=(uint8_t)MBCS_ENTRY_FINAL_STATE(entry);
+                offset=0;
+                break;
+            default:
+                /* reserved, must never occur */
+                fprintf(stderr, "ucm error: byte sequence reached reserved action code, entry: 0x%02lx\n", (unsigned long)entry);
+                return -1;
+            }
+        }
+    }
+
+    if(offset!=0) {
+        fprintf(stderr, "ucm error: byte sequence too short, ends in non-final state %u\n", state);
+        return -1;
+    }
+
+    /*
+     * for SI/SO (like EBCDIC-stateful), multiple-character results
+     * must consist of only double-byte sequences
+     */
+    if(count>1 && states->outputType==MBCS_OUTPUT_2_SISO && length!=2*count) {
+        fprintf(stderr, "ucm error: SI/SO (like EBCDIC-stateful) result with %d characters does not contain all DBCS\n", (int)count);
+        return -1;
+    }
+
+    return count;
+}
+#endif
+
diff --git a/intl/icu/source/tools/toolutil/udbgutil.cpp b/intl/icu/source/tools/toolutil/udbgutil.cpp
new file mode 100644
index 0000000000..3f4bf3718e
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/udbgutil.cpp
@@ -0,0 +1,769 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 2007-2016, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************/
+
+#include "udbgutil.h"
+#include <string.h>
+#include "ustr_imp.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "putilimp.h"
+#include "unicode/ulocdata.h"
+#include "unicode/ucnv.h"
+#include "unicode/unistr.h"
+#include "cstr.h"
+
+/*
+To add a new enum type
+      (For example: UShoeSize  with values USHOE_WIDE=0, USHOE_REGULAR, USHOE_NARROW, USHOE_COUNT)
+
+    0. Make sure that all lines you add are protected with appropriate uconfig guards,
+        such as '#if !UCONFIG_NO_SHOES'.
+    1. udbgutil.h:  add  UDBG_UShoeSize to the UDebugEnumType enum before UDBG_ENUM_COUNT
+      ( The subsequent steps involve this file, udbgutil.cpp )
+    2. Find the marker "Add new enum types above this line"
+    3. Before that marker, add a #include of any header file you need.
+    4. Each enum type has three things in this section:  a #define, a count_, and an array of Fields.
+       It may help to copy and paste a previous definition.
+    5. In the case of the USHOE_... strings above, "USHOE_" is common to all values- six characters
+         " #define LEN_USHOE 6 "
+       6 characters will strip off "USHOE_" leaving enum values of WIDE, REGULAR, and NARROW.
+    6. Define the 'count_' variable, with the number of enum values. If the enum has a _MAX or _COUNT value,
+        that can be helpful for automatically defining the count. Otherwise define it manually.
+        " static const int32_t count_UShoeSize = USHOE_COUNT; "
+    7. Define the field names, in order.
+        " static const Field names_UShoeSize[] =  {
+        "  FIELD_NAME_STR( LEN_USHOE, USHOE_WIDE ),
+        "  FIELD_NAME_STR( LEN_USHOE, USHOE_REGULAR ),
+        "  FIELD_NAME_STR( LEN_USHOE, USHOE_NARROW ),
+        " };
+      ( The following command  was usedfor converting ucol.h into partially correct entities )
+      grep "^[  ]*UCOL" < unicode/ucol.h  |
+         sed -e 's%^[  ]*\([A-Z]*\)_\([A-Z_]*\).*%   FIELD_NAME_STR( LEN_\1, \1_\2 ),%g'
+    8. Now, a bit farther down, add the name of the enum itself to the end of names_UDebugEnumType
+          ( UDebugEnumType is an enum, too!)
+        names_UDebugEnumType[] { ...
+            " FIELD_NAME_STR( LEN_UDBG, UDBG_UShoeSize ),   "
+    9. Find the function _udbg_enumCount  and add the count macro:
+            " COUNT_CASE(UShoeSize)
+   10. Find the function _udbg_enumFields  and add the field macro:
+            " FIELD_CASE(UShoeSize)
+   11. verify that your test code, and Java data generation, works properly.
+*/
+
+/**
+ * Structure representing an enum value
+ */
+struct Field {
+    int32_t prefix;   /**< how many characters to remove in the prefix - i.e. UCHAR_ = 5 */
+	const char *str;  /**< The actual string value */
+	int32_t num;      /**< The numeric value */
+};
+
+/**
+ * Define another field name. Used in an array of Field s
+ * @param y the common prefix length (i.e. 6 for "USHOE_" )
+ * @param x the actual enum value - it will be copied in both string and symbolic form.
+ * @see Field
+ */
+#define FIELD_NAME_STR(y,x)  { y, #x, x }
+
+
+// TODO: Currently, this whole functionality goes away with UCONFIG_NO_FORMATTING. Should be split up.
+#if !UCONFIG_NO_FORMATTING
+
+// Calendar
+#include "unicode/ucal.h"
+
+// 'UCAL_' = 5
+#define LEN_UCAL 5 /* UCAL_ */
+static const int32_t count_UCalendarDateFields = UCAL_FIELD_COUNT;
+static const Field names_UCalendarDateFields[] =
+{
+    FIELD_NAME_STR( LEN_UCAL, UCAL_ERA ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_YEAR ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_MONTH ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_WEEK_OF_YEAR ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_WEEK_OF_MONTH ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_DATE ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_DAY_OF_YEAR ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_DAY_OF_WEEK ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_DAY_OF_WEEK_IN_MONTH ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_AM_PM ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_HOUR ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_HOUR_OF_DAY ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_MINUTE ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_SECOND ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_MILLISECOND ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_ZONE_OFFSET ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_DST_OFFSET ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_YEAR_WOY ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_DOW_LOCAL ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_EXTENDED_YEAR ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_JULIAN_DAY ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_MILLISECONDS_IN_DAY ),
+    FIELD_NAME_STR( LEN_UCAL, UCAL_IS_LEAP_MONTH ),
+#ifndef U_HIDE_DRAFT_API
+    FIELD_NAME_STR( LEN_UCAL, UCAL_ORDINAL_MONTH ),
+#endif  // U_HIDE_DRAFT_API
+};
+
+
+static const int32_t count_UCalendarMonths = UCAL_UNDECIMBER+1;
+static const Field names_UCalendarMonths[] =
+{
+  FIELD_NAME_STR( LEN_UCAL, UCAL_JANUARY ),
+  FIELD_NAME_STR( LEN_UCAL, UCAL_FEBRUARY ),
+  FIELD_NAME_STR( LEN_UCAL, UCAL_MARCH ),
+  FIELD_NAME_STR( LEN_UCAL, UCAL_APRIL ),
+  FIELD_NAME_STR( LEN_UCAL, UCAL_MAY ),
+  FIELD_NAME_STR( LEN_UCAL, UCAL_JUNE ),
+  FIELD_NAME_STR( LEN_UCAL, UCAL_JULY ),
+  FIELD_NAME_STR( LEN_UCAL, UCAL_AUGUST ),
+  FIELD_NAME_STR( LEN_UCAL, UCAL_SEPTEMBER ),
+  FIELD_NAME_STR( LEN_UCAL, UCAL_OCTOBER ),
+  FIELD_NAME_STR( LEN_UCAL, UCAL_NOVEMBER ),
+  FIELD_NAME_STR( LEN_UCAL, UCAL_DECEMBER ),
+  FIELD_NAME_STR( LEN_UCAL, UCAL_UNDECIMBER)
+};
+
+#include "unicode/udat.h"
+
+#define LEN_UDAT 5 /* "UDAT_" */
+static const int32_t count_UDateFormatStyle = UDAT_SHORT+1;
+static const Field names_UDateFormatStyle[] =
+{
+        FIELD_NAME_STR( LEN_UDAT, UDAT_FULL ),
+        FIELD_NAME_STR( LEN_UDAT, UDAT_LONG ),
+        FIELD_NAME_STR( LEN_UDAT, UDAT_MEDIUM ),
+        FIELD_NAME_STR( LEN_UDAT, UDAT_SHORT ),
+        /* end regular */
+    /*
+     *  negative enums.. leave out for now.
+        FIELD_NAME_STR( LEN_UDAT, UDAT_NONE ),
+        FIELD_NAME_STR( LEN_UDAT, UDAT_PATTERN ),
+     */
+};
+
+#endif
+
+#include "unicode/uloc.h"
+
+#define LEN_UAR 12 /* "ULOC_ACCEPT_" */
+static const int32_t count_UAcceptResult = 3;
+static const Field names_UAcceptResult[] =
+{
+        FIELD_NAME_STR( LEN_UAR, ULOC_ACCEPT_FAILED ),
+        FIELD_NAME_STR( LEN_UAR, ULOC_ACCEPT_VALID ),
+        FIELD_NAME_STR( LEN_UAR, ULOC_ACCEPT_FALLBACK ),
+};
+
+#if !UCONFIG_NO_COLLATION
+#include "unicode/ucol.h"
+#define LEN_UCOL 5 /* UCOL_ */
+static const int32_t count_UColAttributeValue = UCOL_ATTRIBUTE_VALUE_COUNT;
+static const Field names_UColAttributeValue[]  = {
+   FIELD_NAME_STR( LEN_UCOL, UCOL_PRIMARY ),
+   FIELD_NAME_STR( LEN_UCOL, UCOL_SECONDARY ),
+   FIELD_NAME_STR( LEN_UCOL, UCOL_TERTIARY ),
+//   FIELD_NAME_STR( LEN_UCOL, UCOL_CE_STRENGTH_LIMIT ),
+   FIELD_NAME_STR( LEN_UCOL, UCOL_QUATERNARY ),
+   // gap
+   FIELD_NAME_STR( LEN_UCOL, UCOL_IDENTICAL ),
+//   FIELD_NAME_STR( LEN_UCOL, UCOL_STRENGTH_LIMIT ),
+   FIELD_NAME_STR( LEN_UCOL, UCOL_OFF ),
+   FIELD_NAME_STR( LEN_UCOL, UCOL_ON ),
+   // gap
+   FIELD_NAME_STR( LEN_UCOL, UCOL_SHIFTED ),
+   FIELD_NAME_STR( LEN_UCOL, UCOL_NON_IGNORABLE ),
+   // gap
+   FIELD_NAME_STR( LEN_UCOL, UCOL_LOWER_FIRST ),
+   FIELD_NAME_STR( LEN_UCOL, UCOL_UPPER_FIRST ),
+};
+
+#endif
+
+
+#if UCONFIG_ENABLE_PLUGINS
+#include "unicode/icuplug.h"
+
+#define LEN_UPLUG_REASON 13 /* UPLUG_REASON_ */
+static const int32_t count_UPlugReason = UPLUG_REASON_COUNT;
+static const Field names_UPlugReason[]  = {
+   FIELD_NAME_STR( LEN_UPLUG_REASON, UPLUG_REASON_QUERY ),
+   FIELD_NAME_STR( LEN_UPLUG_REASON, UPLUG_REASON_LOAD ),
+   FIELD_NAME_STR( LEN_UPLUG_REASON, UPLUG_REASON_UNLOAD ),
+};
+
+#define LEN_UPLUG_LEVEL 12 /* UPLUG_LEVEL_ */
+static const int32_t count_UPlugLevel = UPLUG_LEVEL_COUNT;
+static const Field names_UPlugLevel[]  = {
+   FIELD_NAME_STR( LEN_UPLUG_LEVEL, UPLUG_LEVEL_INVALID ),
+   FIELD_NAME_STR( LEN_UPLUG_LEVEL, UPLUG_LEVEL_UNKNOWN ),
+   FIELD_NAME_STR( LEN_UPLUG_LEVEL, UPLUG_LEVEL_LOW ),
+   FIELD_NAME_STR( LEN_UPLUG_LEVEL, UPLUG_LEVEL_HIGH ),
+};
+#endif
+
+#define LEN_UDBG 5 /* "UDBG_" */
+static const int32_t count_UDebugEnumType = UDBG_ENUM_COUNT;
+static const Field names_UDebugEnumType[] =
+{
+    FIELD_NAME_STR( LEN_UDBG, UDBG_UDebugEnumType ),
+#if !UCONFIG_NO_FORMATTING
+    FIELD_NAME_STR( LEN_UDBG, UDBG_UCalendarDateFields ),
+    FIELD_NAME_STR( LEN_UDBG, UDBG_UCalendarMonths ),
+    FIELD_NAME_STR( LEN_UDBG, UDBG_UDateFormatStyle ),
+#endif
+#if UCONFIG_ENABLE_PLUGINS
+    FIELD_NAME_STR( LEN_UDBG, UDBG_UPlugReason ),
+    FIELD_NAME_STR( LEN_UDBG, UDBG_UPlugLevel ),
+#endif
+    FIELD_NAME_STR( LEN_UDBG, UDBG_UAcceptResult ),
+#if !UCONFIG_NO_COLLATION
+    FIELD_NAME_STR( LEN_UDBG, UDBG_UColAttributeValue ),
+#endif
+};
+
+
+// --- Add new enum types above this line ---
+
+#define COUNT_CASE(x)  case UDBG_##x: return (actual?count_##x:UPRV_LENGTHOF(names_##x));
+#define COUNT_FAIL_CASE(x) case UDBG_##x: return -1;
+
+#define FIELD_CASE(x)  case UDBG_##x: return names_##x;
+#define FIELD_FAIL_CASE(x) case UDBG_##x: return nullptr;
+
+// low level
+
+/**
+ * @param type type of item
+ * @param actual true: for the actual enum's type (UCAL_FIELD_COUNT, etc), or false for the string count
+ */
+static int32_t _udbg_enumCount(UDebugEnumType type, UBool actual) {
+	switch(type) {
+		COUNT_CASE(UDebugEnumType)
+#if !UCONFIG_NO_FORMATTING
+		COUNT_CASE(UCalendarDateFields)
+		COUNT_CASE(UCalendarMonths)
+		COUNT_CASE(UDateFormatStyle)
+#endif
+#if UCONFIG_ENABLE_PLUGINS
+        COUNT_CASE(UPlugReason)
+        COUNT_CASE(UPlugLevel)
+#endif
+        COUNT_CASE(UAcceptResult)
+#if !UCONFIG_NO_COLLATION
+        COUNT_CASE(UColAttributeValue)
+#endif
+		// COUNT_FAIL_CASE(UNonExistentEnum)
+	default:
+		return -1;
+	}
+}
+
+static const Field* _udbg_enumFields(UDebugEnumType type) {
+	switch(type) {
+		FIELD_CASE(UDebugEnumType)
+#if !UCONFIG_NO_FORMATTING
+		FIELD_CASE(UCalendarDateFields)
+		FIELD_CASE(UCalendarMonths)
+		FIELD_CASE(UDateFormatStyle)
+#endif
+#if UCONFIG_ENABLE_PLUGINS
+        FIELD_CASE(UPlugReason)
+        FIELD_CASE(UPlugLevel)
+#endif
+        FIELD_CASE(UAcceptResult)
+       // FIELD_FAIL_CASE(UNonExistentEnum)
+#if !UCONFIG_NO_COLLATION
+        FIELD_CASE(UColAttributeValue)
+#endif
+	default:
+		return nullptr;
+	}
+}
+
+// implementation
+
+int32_t  udbg_enumCount(UDebugEnumType type) {
+	return _udbg_enumCount(type, false);
+}
+
+int32_t  udbg_enumExpectedCount(UDebugEnumType type) {
+	return _udbg_enumCount(type, true);
+}
+
+const char *  udbg_enumName(UDebugEnumType type, int32_t field) {
+	if(field<0 ||
+				field>=_udbg_enumCount(type,false)) { // also will catch unsupported items
+		return nullptr;
+	} else {
+		const Field *fields = _udbg_enumFields(type);
+		if(fields == nullptr) {
+			return nullptr;
+		} else {
+			return fields[field].str + fields[field].prefix;
+		}
+	}
+}
+
+int32_t  udbg_enumArrayValue(UDebugEnumType type, int32_t field) {
+	if(field<0 ||
+				field>=_udbg_enumCount(type,false)) { // also will catch unsupported items
+		return -1;
+	} else {
+		const Field *fields = _udbg_enumFields(type);
+		if(fields == nullptr) {
+			return -1;
+		} else {
+			return fields[field].num;
+		}
+	}
+}
+
+int32_t udbg_enumByName(UDebugEnumType type, const char *value) {
+    if(type<0||type>=_udbg_enumCount(UDBG_UDebugEnumType, true)) {
+        return -1; // type out of range
+    }
+	const Field *fields = _udbg_enumFields(type);
+    if (fields != nullptr) {
+        for(int32_t field = 0;field<_udbg_enumCount(type, false);field++) {
+            if(!strcmp(value, fields[field].str + fields[field].prefix)) {
+                return fields[field].num;
+            }
+        }
+        // try with the prefix
+        for(int32_t field = 0;field<_udbg_enumCount(type, false);field++) {
+            if(!strcmp(value, fields[field].str)) {
+                return fields[field].num;
+            }
+        }
+    }
+    // fail
+    return -1;
+}
+
+/* platform info */
+/**
+ * Print the current platform
+ */
+U_CAPI const char *udbg_getPlatform()
+{
+#if U_PLATFORM_USES_ONLY_WIN32_API
+    return "Windows";
+#elif U_PLATFORM == U_PF_CYGWIN
+    return "Cygwin";
+#elif U_PLATFORM == U_PF_UNKNOWN
+    return "unknown";
+#elif U_PLATFORM == U_PF_DARWIN
+    return "Darwin";
+#elif U_PLATFORM == U_PF_BSD
+    return "BSD";
+#elif U_PLATFORM == U_PF_QNX
+    return "QNX";
+#elif U_PLATFORM == U_PF_LINUX
+    return "Linux";
+#elif U_PLATFORM == U_PF_ANDROID
+    return "Android";
+#elif U_PLATFORM == U_PF_CLASSIC_MACOS
+    return "MacOS (Classic)";
+#elif U_PLATFORM == U_PF_OS390
+    return "IBM z";
+#elif U_PLATFORM == U_PF_OS400
+    return "IBM i";
+#else
+    return "Other (POSIX-like)";
+#endif
+}
+
+struct USystemParams;
+
+typedef int32_t U_CALLCONV USystemParameterCallback(const USystemParams *param, char *target, int32_t targetCapacity, UErrorCode *status);
+
+struct USystemParams {
+  const char *paramName;
+  USystemParameterCallback *paramFunction;
+  const char *paramStr;
+  int32_t paramInt;
+};
+
+/* parameter types */
+U_CAPI  int32_t
+paramEmpty(const USystemParams * /* param */, char *target, int32_t targetCapacity, UErrorCode *status) {
+  if(U_FAILURE(*status))return 0;
+  return u_terminateChars(target, targetCapacity, 0, status);
+}
+
+U_CAPI  int32_t
+paramStatic(const USystemParams *param, char *target, int32_t targetCapacity, UErrorCode *status) {
+  if(param->paramStr==nullptr) return paramEmpty(param,target,targetCapacity,status);
+  if(U_FAILURE(*status))return 0;
+  int32_t len = static_cast<int32_t>(uprv_strlen(param->paramStr));
+  if(target!=nullptr) {
+    uprv_strncpy(target,param->paramStr,uprv_min(len,targetCapacity));
+  }
+  return u_terminateChars(target, targetCapacity, len, status);
+}
+
+static const char *nullString = "(null)";
+
+static int32_t stringToStringBuffer(char *target, int32_t targetCapacity, const char *str, UErrorCode *status) {
+  if(str==nullptr) str=nullString;
+
+  int32_t len = static_cast<int32_t>(uprv_strlen(str));
+  if (U_SUCCESS(*status)) {
+    if(target!=nullptr) {
+      uprv_strncpy(target,str,uprv_min(len,targetCapacity));
+    }
+  } else {
+    const char *s = u_errorName(*status);
+    len = static_cast<int32_t>(uprv_strlen(s));
+    if(target!=nullptr) {
+      uprv_strncpy(target,s,uprv_min(len,targetCapacity));
+    }
+  }
+  return u_terminateChars(target, targetCapacity, len, status);
+}
+
+static int32_t integerToStringBuffer(char *target, int32_t targetCapacity, int32_t n, int32_t radix, UErrorCode *status) {
+  if(U_FAILURE(*status)) return 0;
+  char str[300];
+  T_CString_integerToString(str,n,radix);
+  return stringToStringBuffer(target,targetCapacity,str,status);
+}
+
+U_CAPI  int32_t
+paramInteger(const USystemParams *param, char *target, int32_t targetCapacity, UErrorCode *status) {
+  if(U_FAILURE(*status))return 0;
+  if(param->paramStr==nullptr || param->paramStr[0]=='d') {
+    return integerToStringBuffer(target,targetCapacity,param->paramInt, 10,status);
+  } else if(param->paramStr[0]=='x') {
+    return integerToStringBuffer(target,targetCapacity,param->paramInt, 16,status);
+  } else if(param->paramStr[0]=='o') {
+    return integerToStringBuffer(target,targetCapacity,param->paramInt, 8,status);
+  } else if(param->paramStr[0]=='b') {
+    return integerToStringBuffer(target,targetCapacity,param->paramInt, 2,status);
+  } else {
+    *status = U_INTERNAL_PROGRAM_ERROR;
+    return 0;
+  }
+}
+
+
+U_CAPI  int32_t
+paramCldrVersion(const USystemParams * /* param */, char *target, int32_t targetCapacity, UErrorCode *status) {
+  if(U_FAILURE(*status))return 0;
+  char str[200]="";
+  UVersionInfo icu;
+
+  ulocdata_getCLDRVersion(icu, status);
+  if(U_SUCCESS(*status)) {
+    u_versionToString(icu, str);
+    return stringToStringBuffer(target,targetCapacity,str,status);
+  } else {
+    return 0;
+  }
+}
+
+
+#if !UCONFIG_NO_FORMATTING
+U_CAPI  int32_t
+paramTimezoneDefault(const USystemParams * /* param */, char *target, int32_t targetCapacity, UErrorCode *status) {
+  if(U_FAILURE(*status))return 0;
+  char16_t buf[100];
+  char buf2[100];
+  int32_t len;
+
+  len = ucal_getDefaultTimeZone(buf, 100, status);
+  if(U_SUCCESS(*status)&&len>0) {
+    u_UCharsToChars(buf, buf2, len+1);
+    return stringToStringBuffer(target,targetCapacity, buf2,status);
+  } else {
+    return 0;
+  }
+}
+#endif
+
+U_CAPI  int32_t
+paramLocaleDefaultBcp47(const USystemParams * /* param */, char *target, int32_t targetCapacity, UErrorCode *status) {
+  if(U_FAILURE(*status))return 0;
+  const char *def = uloc_getDefault();
+  return uloc_toLanguageTag(def,target,targetCapacity,false,status);
+}
+
+
+/* simple 1-liner param functions */
+#define STRING_PARAM(func, str) U_CAPI  int32_t \
+  func(const USystemParams *, char *target, int32_t targetCapacity, UErrorCode *status) \
+  {  return stringToStringBuffer(target,targetCapacity,(str),status); }
+
+STRING_PARAM(paramIcudataPath, u_getDataDirectory())
+STRING_PARAM(paramPlatform, udbg_getPlatform())
+STRING_PARAM(paramLocaleDefault, uloc_getDefault())
+#if !UCONFIG_NO_CONVERSION
+STRING_PARAM(paramConverterDefault, ucnv_getDefaultName())
+#endif
+
+#if !UCONFIG_NO_FORMATTING
+STRING_PARAM(paramTimezoneVersion, ucal_getTZDataVersion(status))
+#endif
+
+static const USystemParams systemParams[] = {
+  { "copyright",    paramStatic, U_COPYRIGHT_STRING,0 },
+  { "product",      paramStatic, "icu4c",0 },
+  { "product.full", paramStatic, "International Components for Unicode for C/C++",0 },
+  { "version",      paramStatic, U_ICU_VERSION,0 },
+  { "version.unicode", paramStatic, U_UNICODE_VERSION,0 },
+  { "platform.number", paramInteger, "d",U_PLATFORM},
+  { "platform.type", paramPlatform, nullptr ,0},
+  { "locale.default", paramLocaleDefault, nullptr, 0},
+  { "locale.default.bcp47", paramLocaleDefaultBcp47, nullptr, 0},
+#if !UCONFIG_NO_CONVERSION
+  { "converter.default", paramConverterDefault, nullptr, 0},
+#endif
+  { "icudata.name", paramStatic, U_ICUDATA_NAME, 0},
+  { "icudata.path", paramIcudataPath, nullptr, 0},
+
+  { "cldr.version", paramCldrVersion, nullptr, 0},
+
+#if !UCONFIG_NO_FORMATTING
+  { "tz.version", paramTimezoneVersion, nullptr, 0},
+  { "tz.default", paramTimezoneDefault, nullptr, 0},
+#endif
+
+  { "cpu.bits",       paramInteger, "d", (sizeof(void*))*8},
+  { "cpu.big_endian", paramInteger, "b", U_IS_BIG_ENDIAN},
+  { "os.wchar_width", paramInteger, "d", U_SIZEOF_WCHAR_T},
+  { "os.charset_family", paramInteger, "d", U_CHARSET_FAMILY},
+#if defined (U_HOST)
+  { "os.host", paramStatic, U_HOST, 0},
+#endif
+#if defined (U_BUILD)
+  { "build.build", paramStatic, U_BUILD, 0},
+#endif
+#if defined (U_CC)
+  { "build.cc", paramStatic, U_CC, 0},
+#endif
+#if defined (U_CXX)
+  { "build.cxx", paramStatic, U_CXX, 0},
+#endif
+#if defined (CYGWINMSVC)
+  { "build.cygwinmsvc", paramInteger, "b", 1},
+#endif
+  { "uconfig.internal_digitlist", paramInteger, "b", 1}, /* always 1 */
+  { "uconfig.have_parseallinput", paramInteger, "b", UCONFIG_HAVE_PARSEALLINPUT},
+
+
+};
+
+#define U_SYSPARAM_COUNT UPRV_LENGTHOF(systemParams)
+
+U_CAPI const char *udbg_getSystemParameterNameByIndex(int32_t i) {
+  if(i>=0 && i < (int32_t)U_SYSPARAM_COUNT) {
+    return systemParams[i].paramName;
+  } else {
+    return nullptr;
+  }
+}
+
+
+U_CAPI int32_t udbg_getSystemParameterValueByIndex(int32_t i, char *buffer, int32_t bufferCapacity, UErrorCode *status) {
+  if(i>=0 && i< (int32_t)U_SYSPARAM_COUNT) {
+    return systemParams[i].paramFunction(&(systemParams[i]),buffer,bufferCapacity,status);
+  } else {
+    return 0;
+  }
+}
+
+U_CAPI void udbg_writeIcuInfo(FILE *out) {
+  char str[2000];
+  /* todo: API for writing DTD? */
+  fprintf(out, " <icuSystemParams type=\"icu4c\">\n");
+  const char *paramName;
+  for(int32_t i=0;(paramName=udbg_getSystemParameterNameByIndex(i))!=nullptr;i++) {
+    UErrorCode status2 = U_ZERO_ERROR;
+    udbg_getSystemParameterValueByIndex(i, str,2000,&status2);
+    if(U_SUCCESS(status2)) {
+      fprintf(out,"    <param name=\"%s\">%s</param>\n", paramName,str);
+    } else {
+      fprintf(out,"  <!-- n=\"%s\" ERROR: %s -->\n", paramName, u_errorName(status2));
+    }
+  }
+  fprintf(out, " </icuSystemParams>\n");
+}
+
+#define UNICODE_BUG_URL "https://unicode-org.atlassian.net/browse/"
+#define OLD_CLDR_PREFIX "cldrbug:"
+#define CLDR_BUG_PREFIX "CLDR-"
+#define ICU_BUG_PREFIX "ICU-"
+
+
+
+#include <set>
+#include <map>
+#include <string>
+#include <ostream>
+#include <iostream>
+
+class KnownIssues {
+public:
+  KnownIssues();
+  ~KnownIssues();
+  void add(const char *ticket, const char *where, const char16_t *msg, UBool *firstForTicket, UBool *firstForWhere);
+  void add(const char *ticket, const char *where, const char *msg, UBool *firstForTicket, UBool *firstForWhere);
+  UBool print();
+private:
+  std::map< std::string,
+            std::map < std::string, std::set < std::string > > > fTable;
+};
+
+KnownIssues::KnownIssues()
+  : fTable()
+{
+}
+
+KnownIssues::~KnownIssues()
+{
+}
+
+/**
+ * Map cldr:1234 to CLDR-1234
+ * Map 1234 to ICU-1234
+ */
+static std::string mapTicketId(const char *ticketStr) {
+  std::string ticket(ticketStr);
+  // TODO: Can remove this function once all logKnownIssue calls are switched over
+  // to the ICU-1234 and CLDR-1234 format.
+  if(ticket.rfind(OLD_CLDR_PREFIX) == 0) {
+    // map cldrbug:1234 to CLDR-1234
+    ticket.replace(0, uprv_strlen(OLD_CLDR_PREFIX), CLDR_BUG_PREFIX);
+  } else if(::isdigit(ticket[0])) {
+    // map 1234 to ICU-1234
+    ticket.insert(0, ICU_BUG_PREFIX);
+  }
+  return ticket;
+}
+
+void KnownIssues::add(const char *ticketStr, const char *where, const char16_t *msg, UBool *firstForTicket, UBool *firstForWhere)
+{
+  const std::string ticket = mapTicketId(ticketStr);
+  if(fTable.find(ticket) == fTable.end()) {
+    if(firstForTicket!=nullptr) *firstForTicket = true;
+    fTable[ticket] = std::map < std::string, std::set < std::string > >();
+  } else {
+    if(firstForTicket!=nullptr) *firstForTicket = false;
+  }
+  if(where==nullptr) return;
+
+  if(fTable[ticket].find(where) == fTable[ticket].end()) {
+    if(firstForWhere!=nullptr) *firstForWhere = true;
+    fTable[ticket][where] = std::set < std::string >();
+  } else {
+    if(firstForWhere!=nullptr) *firstForWhere = false;
+  }
+  if(msg==nullptr || !*msg) return;
+
+  const icu::UnicodeString ustr(msg);
+
+  fTable[ticket][where].insert(std::string(icu::CStr(ustr)()));
+}
+
+void KnownIssues::add(const char *ticketStr, const char *where, const char *msg, UBool *firstForTicket, UBool *firstForWhere)
+{
+  const std::string ticket = mapTicketId(ticketStr);
+  if(fTable.find(ticket) == fTable.end()) {
+    if(firstForTicket!=nullptr) *firstForTicket = true;
+    fTable[ticket] = std::map < std::string, std::set < std::string > >();
+  } else {
+    if(firstForTicket!=nullptr) *firstForTicket = false;
+  }
+  if(where==nullptr) return;
+
+  if(fTable[ticket].find(where) == fTable[ticket].end()) {
+    if(firstForWhere!=nullptr) *firstForWhere = true;
+    fTable[ticket][where] = std::set < std::string >();
+  } else {
+    if(firstForWhere!=nullptr) *firstForWhere = false;
+  }
+  if(msg==nullptr || !*msg) return;
+
+  std::string str(msg);
+  fTable[ticket][where].insert(str);
+}
+
+UBool KnownIssues::print()
+{
+  if(fTable.empty()) {
+    return false;
+  }
+
+  std::cout << "KNOWN ISSUES" << std::endl;
+  for( std::map<  std::string,
+          std::map <  std::string,  std::set <  std::string > > >::iterator i = fTable.begin();
+       i != fTable.end();
+       i++ ) {
+    const std::string ticketid = (*i).first;
+    std::cout << "[" << ticketid << "] ";
+    if(ticketid.rfind(ICU_BUG_PREFIX) == 0 || ticketid.rfind(CLDR_BUG_PREFIX) == 0) {
+      // If it's a unicode.org bug.
+      std::cout << UNICODE_BUG_URL << ticketid;
+    } // Else: some other kind of bug. Allow this, but without a URL.
+    std::cout << std::endl;
+
+    for( std::map< std::string, std::set < std::string > >::iterator ii = (*i).second.begin();
+         ii != (*i).second.end();
+         ii++ ) {
+      std::cout << "  " << (*ii).first << std::endl;
+      for ( std::set < std::string >::iterator iii = (*ii).second.begin();
+            iii != (*ii).second.end();
+            iii++ ) {
+        std::cout << "     " << '"' << (*iii) << '"' << std::endl;
+      }
+    }
+  }
+  return true;
+}
+
+U_CAPI void *udbg_knownIssue_openU(void *ptr, const char *ticket, char *where, const char16_t *msg, UBool *firstForTicket,
+                                   UBool *firstForWhere) {
+  KnownIssues *t = static_cast<KnownIssues*>(ptr);
+  if(t==nullptr) {
+    t = new KnownIssues();
+  }
+
+  t->add(ticket, where, msg, firstForTicket, firstForWhere);
+
+  return static_cast<void*>(t);
+}
+
+U_CAPI void *udbg_knownIssue_open(void *ptr, const char *ticket, char *where, const char *msg, UBool *firstForTicket,
+                                   UBool *firstForWhere) {
+  KnownIssues *t = static_cast<KnownIssues*>(ptr);
+  if(t==nullptr) {
+    t = new KnownIssues();
+  }
+
+  t->add(ticket, where, msg, firstForTicket, firstForWhere);
+
+  return static_cast<void*>(t);
+}
+
+U_CAPI UBool udbg_knownIssue_print(void *ptr) {
+  KnownIssues *t = static_cast<KnownIssues*>(ptr);
+  if(t==nullptr) {
+    return false;
+  } else {
+    t->print();
+    return true;
+  }
+}
+
+U_CAPI void udbg_knownIssue_close(void *ptr) {
+  KnownIssues *t = static_cast<KnownIssues*>(ptr);
+  delete t;
+}
diff --git a/intl/icu/source/tools/toolutil/udbgutil.h b/intl/icu/source/tools/toolutil/udbgutil.h
new file mode 100644
index 0000000000..e3ed513839
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/udbgutil.h
@@ -0,0 +1,147 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+************************************************************************
+* Copyright (c) 2008-2015, International Business Machines
+* Corporation and others.  All Rights Reserved.
+************************************************************************
+*/
+
+/** C Utilities to aid in debugging **/
+
+#ifndef _UDBGUTIL_H
+#define _UDBGUTIL_H
+
+#include "unicode/utypes.h"
+#include <stdio.h>
+
+enum UDebugEnumType {
+    UDBG_UDebugEnumType = 0, /* Self-referential, strings for UDebugEnumType. Count=ENUM_COUNT. */
+#if !UCONFIG_NO_FORMATTING
+    UDBG_UCalendarDateFields, /* UCalendarDateFields. Count=UCAL_FIELD_COUNT.  Unsupported if UCONFIG_NO_FORMATTING. */
+    UDBG_UCalendarMonths, /* UCalendarMonths. Count= (UCAL_UNDECIMBER+1) */
+    UDBG_UDateFormatStyle, /* Count = UDAT_SHORT=1 */
+#endif
+#if UCONFIG_ENABLE_PLUGINS
+    UDBG_UPlugReason,   /* Count = UPLUG_REASON_COUNT */
+    UDBG_UPlugLevel,    /* COUNT = UPLUG_LEVEL_COUNT */
+#endif
+    UDBG_UAcceptResult, /* Count = ULOC_ACCEPT_FALLBACK+1=3 */
+
+    /* All following enums may be discontiguous. */
+
+#if !UCONFIG_NO_COLLATION
+    UDBG_UColAttributeValue,  /* UCOL_ATTRIBUTE_VALUE_COUNT */
+#endif
+    UDBG_ENUM_COUNT,
+    UDBG_HIGHEST_CONTIGUOUS_ENUM = UDBG_UAcceptResult,  /**< last enum in this list with contiguous (testable) values. */
+    UDBG_INVALID_ENUM = -1 /** Invalid enum value **/
+};
+
+typedef enum UDebugEnumType UDebugEnumType;
+
+/**
+ * @param type the type of enum
+ * Print how many enums are contained for this type.
+ * Should be equal to the appropriate _COUNT constant or there is an error. Return -1 if unsupported.
+ */
+U_CAPI int32_t U_EXPORT2 udbg_enumCount(UDebugEnumType type);
+
+/**
+ * Convert an enum to a string
+ * @param type type of enum
+ * @param field field number
+ * @return string of the format "ERA", "YEAR", etc, or NULL if out of range or unsupported
+ */
+U_CAPI const char * U_EXPORT2 udbg_enumName(UDebugEnumType type, int32_t field);
+
+/**
+ * for consistency checking
+ * @param type the type of enum
+ * Print how many enums should be contained for this type.
+ * This is equal to the appropriate _COUNT constant or there is an error. Returns -1 if unsupported.
+ */
+U_CAPI int32_t U_EXPORT2 udbg_enumExpectedCount(UDebugEnumType type);
+
+/**
+ * For consistency checking, returns the expected enum ordinal value for the given index value.
+ * @param type which type
+ * @param field field number
+ * @return should be equal to 'field' or -1 if out of range.
+ */
+U_CAPI int32_t U_EXPORT2 udbg_enumArrayValue(UDebugEnumType type, int32_t field);
+
+/**
+ * Locate the specified field value by name.
+ * @param type which type
+ * @param name name of string (case sensitive)
+ * @return should be a field value or -1 if not found.
+ */
+U_CAPI int32_t U_EXPORT2 udbg_enumByName(UDebugEnumType type, const char *name);
+
+
+/**
+ * Return the Platform (U_PLATFORM) as a string
+ */
+U_CAPI const char *udbg_getPlatform(void);
+
+/**
+ * Get the nth system parameter's name
+ * @param i index of name, starting from zero
+ * @return name, or NULL if off the end
+ * @see udbg_getSystemParameterValue
+ */
+U_CAPI const char *udbg_getSystemParameterNameByIndex(int32_t i);
+
+/**
+ * Get the nth system parameter's value, in a user supplied buffer
+ * @parameter i index of value, starting from zero
+ * @param status error status
+ * @return length written (standard termination rules)
+ * @see udbg_getSystemParameterName
+ */
+U_CAPI int32_t udbg_getSystemParameterValueByIndex(int32_t i, char *buffer, int32_t bufferCapacity, UErrorCode *status);
+
+/**
+ * Write ICU info as XML
+ */
+U_CAPI void udbg_writeIcuInfo(FILE *f);
+
+/**
+ * \def UDBG_KNOWNISSUE_LEN
+ * Length of output buffer for udbg_knownIssueURLFrom
+ */
+#define UDBG_KNOWNISSUE_LEN 255
+
+/**
+ * Open (or reopen) a 'known issue' table.
+ * @param ptr pointer to 'table'. Opaque.
+ * @return new or existing ptr
+ */
+U_CAPI void *udbg_knownIssue_openU(void *ptr, const char *ticket, char *where, const UChar *msg, UBool *firstForTicket,
+                                   UBool *firstForWhere);
+
+
+/**
+ * Open (or reopen) a 'known issue' table.
+ * @param ptr pointer to 'table'. Opaque.
+ * @return new or existing ptr
+ */
+U_CAPI void *udbg_knownIssue_open(void *ptr, const char *ticket, char *where, const char *msg, UBool *firstForTicket,
+                                   UBool *firstForWhere);
+
+/**
+ * Print 'known issue' table, to std::cout.
+ * @param ptr pointer from udbg_knownIssue
+ * @return true if there were any issues.
+ */
+U_CAPI UBool udbg_knownIssue_print(void *ptr);
+
+/**
+ * Close 'known issue' table.
+ * @param ptr
+ */
+U_CAPI void udbg_knownIssue_close(void *ptr);
+
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/unewdata.cpp b/intl/icu/source/tools/toolutil/unewdata.cpp
new file mode 100644
index 0000000000..27414d2eba
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/unewdata.cpp
@@ -0,0 +1,286 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  unewdata.c
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999oct25
+*   created by: Markus W. Scherer
+*/
+
+#include <stdio.h>
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/ustring.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "filestrm.h"
+#include "unicode/udata.h"
+#include "unewdata.h"
+
+struct UNewDataMemory {
+    FileStream *file;
+    uint16_t headerSize;
+    uint8_t magic1, magic2;
+};
+
+U_CAPI UNewDataMemory * U_EXPORT2
+udata_create(const char *dir, const char *type, const char *name,
+             const UDataInfo *pInfo,
+             const char *comment,
+             UErrorCode *pErrorCode) {
+    UNewDataMemory *pData;
+    uint16_t headerSize, commentLength;
+    char filename[512];
+    uint8_t bytes[16];
+    int32_t length;
+
+    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
+        return nullptr;
+    } else if(name==nullptr || *name==0 || pInfo==nullptr) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+
+    /* allocate the data structure */
+    pData=(UNewDataMemory *)uprv_malloc(sizeof(UNewDataMemory));
+    if(pData==nullptr) {
+        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+        return nullptr;
+    }
+
+    char dirSepChar = U_FILE_SEP_CHAR;
+#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
+    // We may need to append a different directory separator when building for Cygwin or MSYS2.
+    if(dir && *dir) {
+      if(!uprv_strchr(dir, U_FILE_SEP_CHAR) && uprv_strchr(dir, U_FILE_ALT_SEP_CHAR)) {
+          dirSepChar = U_FILE_ALT_SEP_CHAR;
+      }
+    }
+#endif
+
+    /* Check that the full path won't be too long */
+    length = 0;					/* Start with nothing */
+    if(dir != nullptr  && *dir !=0)	/* Add directory length if one was given */
+    {
+    	length += static_cast<int32_t>(strlen(dir));
+
+    	/* Add 1 if dir doesn't end with path sep */
+        if (dir[strlen(dir) - 1]!= dirSepChar) {
+            length++;
+        }
+	}
+    length += static_cast<int32_t>(strlen(name));		/* Add the filename length */
+
+    if(type != nullptr  && *type !=0) { /* Add directory length if  given */
+        length += static_cast<int32_t>(strlen(type));
+    }
+
+
+     /* LDH buffer Length error check */
+    if(length  > ((int32_t)sizeof(filename) - 1))
+    {
+   	    *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
+   	    uprv_free(pData);
+	    return nullptr;
+    }
+
+    /* open the output file */
+    if(dir!=nullptr && *dir!=0) { /* if dir has a value, we prepend it to the filename */
+        char *p=filename+strlen(dir);
+        uprv_strcpy(filename, dir);
+        if (*(p-1)!=dirSepChar) {
+            *p++=dirSepChar;
+            *p=0;
+        }
+    } else { /* otherwise, we'll output to the current dir */
+        filename[0]=0;
+    }
+    uprv_strcat(filename, name);
+    if(type!=nullptr && *type!=0) {
+        uprv_strcat(filename, ".");
+        uprv_strcat(filename, type);
+    }
+    pData->file=T_FileStream_open(filename, "wb");
+    if(pData->file==nullptr) {
+        uprv_free(pData);
+        *pErrorCode=U_FILE_ACCESS_ERROR;
+        return nullptr;
+    }
+
+    /* write the header information */
+    headerSize=(uint16_t)(pInfo->size+4);
+    if(comment!=nullptr && *comment!=0) {
+        commentLength=(uint16_t)(uprv_strlen(comment)+1);
+        headerSize+=commentLength;
+    } else {
+        commentLength=0;
+    }
+
+    /* write the size of the header, take padding into account */
+    pData->headerSize=(uint16_t)((headerSize+15)&~0xf);
+    pData->magic1=0xda;
+    pData->magic2=0x27;
+    T_FileStream_write(pData->file, &pData->headerSize, 4);
+
+    /* write the information data */
+    T_FileStream_write(pData->file, pInfo, pInfo->size);
+
+    /* write the comment */
+    if(commentLength>0) {
+        T_FileStream_write(pData->file, comment, commentLength);
+    }
+
+    /* write padding bytes to align the data section to 16 bytes */
+    headerSize&=0xf;
+    if(headerSize!=0) {
+        headerSize=(uint16_t)(16-headerSize);
+        uprv_memset(bytes, 0, headerSize);
+        T_FileStream_write(pData->file, bytes, headerSize);
+    }
+
+    return pData;
+}
+
+U_CAPI uint32_t U_EXPORT2
+udata_finish(UNewDataMemory *pData, UErrorCode *pErrorCode) {
+    uint32_t fileLength=0;
+
+    if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    if(pData!=nullptr) {
+        if(pData->file!=nullptr) {
+            /* fflush(pData->file);*/
+            fileLength=T_FileStream_size(pData->file);
+            if(T_FileStream_error(pData->file)) {
+                *pErrorCode=U_FILE_ACCESS_ERROR;
+            } else {
+                fileLength-=pData->headerSize;
+            }
+            T_FileStream_close(pData->file);
+        }
+        uprv_free(pData);
+    }
+
+    return fileLength;
+}
+
+/* dummy UDataInfo cf. udata.h */
+static const UDataInfo dummyDataInfo = {
+    sizeof(UDataInfo),
+    0,
+
+    U_IS_BIG_ENDIAN,
+    U_CHARSET_FAMILY,
+    U_SIZEOF_UCHAR,
+    0,
+
+    { 0, 0, 0, 0 },                 /* dummy dataFormat */
+    { 0, 0, 0, 0 },                 /* dummy formatVersion */
+    { 0, 0, 0, 0 }                  /* dummy dataVersion */
+};
+
+U_CAPI void U_EXPORT2
+udata_createDummy(const char *dir, const char *type, const char *name, UErrorCode *pErrorCode) {
+    if(U_SUCCESS(*pErrorCode)) {
+        udata_finish(udata_create(dir, type, name, &dummyDataInfo, nullptr, pErrorCode), pErrorCode);
+        if(U_FAILURE(*pErrorCode)) {
+            fprintf(stderr, "error %s writing dummy data file %s" U_FILE_SEP_STRING "%s.%s\n",
+                    u_errorName(*pErrorCode), dir, name, type);
+            exit(*pErrorCode);
+        }
+    }
+}
+
+U_CAPI void U_EXPORT2
+udata_write8(UNewDataMemory *pData, uint8_t byte) {
+    if(pData!=nullptr && pData->file!=nullptr) {
+        T_FileStream_write(pData->file, &byte, 1);
+    }
+}
+
+U_CAPI void U_EXPORT2
+udata_write16(UNewDataMemory *pData, uint16_t word) {
+    if(pData!=nullptr && pData->file!=nullptr) {
+        T_FileStream_write(pData->file, &word, 2);
+    }
+}
+
+U_CAPI void U_EXPORT2
+udata_write32(UNewDataMemory *pData, uint32_t wyde) {
+    if(pData!=nullptr && pData->file!=nullptr) {
+        T_FileStream_write(pData->file, &wyde, 4);
+    }
+}
+
+U_CAPI void U_EXPORT2
+udata_writeBlock(UNewDataMemory *pData, const void *s, int32_t length) {
+    if(pData!=nullptr && pData->file!=nullptr) {
+        if(length>0) {
+            T_FileStream_write(pData->file, s, length);
+        }
+    }
+}
+
+U_CAPI void U_EXPORT2
+udata_writePadding(UNewDataMemory *pData, int32_t length) {
+    static const uint8_t padding[16]={
+        0xaa, 0xaa, 0xaa, 0xaa,
+        0xaa, 0xaa, 0xaa, 0xaa,
+        0xaa, 0xaa, 0xaa, 0xaa,
+        0xaa, 0xaa, 0xaa, 0xaa
+    };
+    if(pData!=nullptr && pData->file!=nullptr) {
+        while(length>=16) {
+            T_FileStream_write(pData->file, padding, 16);
+            length-=16;
+        }
+        if(length>0) {
+            T_FileStream_write(pData->file, padding, length);
+        }
+    }
+}
+
+U_CAPI void U_EXPORT2
+udata_writeString(UNewDataMemory *pData, const char *s, int32_t length) {
+    if(pData!=nullptr && pData->file!=nullptr) {
+        if(length==-1) {
+            length=(int32_t)uprv_strlen(s);
+        }
+        if(length>0) {
+            T_FileStream_write(pData->file, s, length);
+        }
+    }
+}
+
+U_CAPI void U_EXPORT2
+udata_writeUString(UNewDataMemory *pData, const char16_t *s, int32_t length) {
+    if(pData!=nullptr && pData->file!=nullptr) {
+        if(length==-1) {
+            length=u_strlen(s);
+        }
+        if(length>0) {
+            T_FileStream_write(pData->file, s, length*sizeof(char16_t));
+        }
+    }
+}
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
+
diff --git a/intl/icu/source/tools/toolutil/unewdata.h b/intl/icu/source/tools/toolutil/unewdata.h
new file mode 100644
index 0000000000..137fb49584
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/unewdata.h
@@ -0,0 +1,113 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  unewdata.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 1999oct25
+*   created by: Markus W. Scherer
+*/
+
+#ifndef __UNEWDATA_H__
+#define __UNEWDATA_H__
+
+#include "unicode/utypes.h"
+#include "unicode/udata.h"
+
+/* API for writing data -----------------------------------------------------*/
+
+/** @memo Forward declaration of the data memory creation type. */
+typedef struct UNewDataMemory UNewDataMemory;
+
+/**
+ * Create a new binary data file.
+ * The file-writing <code>udata_</code> functions facilitate writing
+ * binary data files that can be read by ICU's <code>udata</code> API.
+ * This function opens a new file with a filename determined from its
+ * parameters - of the form "name.type".
+ * It then writes a short header, followed by the <code>UDataInfo</code>
+ * structure and, optionally, by the comment string.
+ * It then writes padding bytes to round up to a multiple of 16 bytes.
+ * Subsequent write operations will thus start at an offset in the file
+ * that is a multiple of 16. <code>udata_getMemory()</code> will return
+ * a pointer to this same starting offset.
+ *
+ * See udata.h .
+ *
+ * @param dir A string that specifies the directory where the data will be
+ *            written. If <code>NULL</code>, then
+ *            <code>u_getDataDirectory</code> is used.
+ * @param type A string that specifies the type of data to be written.
+ *             For example, resource bundles are written with type "res",
+ *             conversion tables with type "cnv".
+ *             This may be <code>NULL</code> or empty.
+ * @param name A string that specifies the name of the data.
+ * @param pInfo A pointer to a correctly filled <code>UDataInfo</code>
+ *              structure that will be copied into the file.
+ * @param comment A string (e.g., a copyright statement) that will be
+ *                copied into the file if it is not <code>NULL</code>
+ *                or empty. This string serves only as a comment in the binary
+ *                file. It will not be accessible by any API.
+ * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ */
+U_CAPI UNewDataMemory * U_EXPORT2
+udata_create(const char *dir, const char *type, const char *name,
+             const UDataInfo *pInfo,
+             const char *comment,
+             UErrorCode *pErrorCode);
+
+/** @memo Close a newly written binary file. */
+U_CAPI uint32_t U_EXPORT2
+udata_finish(UNewDataMemory *pData, UErrorCode *pErrorCode);
+
+/** @memo Write a dummy data file. */
+U_CAPI void U_EXPORT2
+udata_createDummy(const char *dir, const char *type, const char *name, UErrorCode *pErrorCode);
+
+/** @memo Write an 8-bit byte to the file. */
+U_CAPI void U_EXPORT2
+udata_write8(UNewDataMemory *pData, uint8_t byte);
+
+/** @memo Write a 16-bit word to the file. */
+U_CAPI void U_EXPORT2
+udata_write16(UNewDataMemory *pData, uint16_t word);
+
+/** @memo Write a 32-bit word to the file. */
+U_CAPI void U_EXPORT2
+udata_write32(UNewDataMemory *pData, uint32_t wyde);
+
+/** @memo Write a block of bytes to the file. */
+U_CAPI void U_EXPORT2
+udata_writeBlock(UNewDataMemory *pData, const void *s, int32_t length);
+
+/** @memo Write a block of arbitrary padding bytes to the file. */
+U_CAPI void U_EXPORT2
+udata_writePadding(UNewDataMemory *pData, int32_t length);
+
+/** @memo Write a <code>char*</code> string of platform "invariant characters" to the file. */
+U_CAPI void U_EXPORT2
+udata_writeString(UNewDataMemory *pData, const char *s, int32_t length);
+
+/** @memo Write a <code>UChar*</code> string of Unicode character code units to the file. */
+U_CAPI void U_EXPORT2
+udata_writeUString(UNewDataMemory *pData, const UChar *s, int32_t length);
+
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/uoptions.cpp b/intl/icu/source/tools/toolutil/uoptions.cpp
new file mode 100644
index 0000000000..808164ae4d
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/uoptions.cpp
@@ -0,0 +1,133 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2000-2015, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uoptions.c
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2000apr17
+*   created by: Markus W. Scherer
+*
+*   This file provides a command line argument parser.
+*/
+
+#include "unicode/utypes.h"
+#include "cstring.h"
+#include "uoptions.h"
+
+U_CAPI int U_EXPORT2
+u_parseArgs(int argc, char* argv[],
+            int optionCount, UOption options[]) {
+    char *arg;
+    int i=1, remaining=1;
+    char c, stopOptions=0;
+
+    while(i<argc) {
+        arg=argv[i];
+        if(!stopOptions && *arg=='-' && (c=arg[1])!=0) {
+            /* process an option */
+            UOption *option=nullptr;
+            arg+=2;
+            if(c=='-') {
+                /* process a long option */
+                if(*arg==0) {
+                    /* stop processing options after "--" */
+                    stopOptions=1;
+                } else {
+                    /* search for the option string */
+                    int j;
+                    for(j=0; j<optionCount; ++j) {
+                        if(options[j].longName && uprv_strcmp(arg, options[j].longName)==0) {
+                            option=options+j;
+                            break;
+                        }
+                    }
+                    if(option==nullptr) {
+                        /* no option matches */
+                        return -i;
+                    }
+                    option->doesOccur=1;
+
+                    if(option->hasArg!=UOPT_NO_ARG) {
+                        /* parse the argument for the option, if any */
+                        if(i+1<argc && !(argv[i+1][0]=='-' && argv[i+1][1]!=0)) {
+                            /* argument in the next argv[], and there is not an option in there */
+                            option->value=argv[++i];
+                        } else if(option->hasArg==UOPT_REQUIRES_ARG) {
+                            /* there is no argument, but one is required: return with error */
+                            option->doesOccur=0;
+                            return -i;
+                        }
+                    }
+
+                    if(option->optionFn!=nullptr && option->optionFn(option->context, option)<0) {
+                        /* the option function was called and returned an error */
+                        option->doesOccur=0;
+                        return -i;
+                    }
+                }
+            } else {
+                /* process one or more short options */
+                do {
+                    /* search for the option letter */
+                    int j;
+                    for(j=0; j<optionCount; ++j) {
+                        if(c==options[j].shortName) {
+                            option=options+j;
+                            break;
+                        }
+                    }
+                    if(option==nullptr) {
+                        /* no option matches */
+                        return -i;
+                    }
+                    option->doesOccur=1;
+
+                    if(option->hasArg!=UOPT_NO_ARG) {
+                        /* parse the argument for the option, if any */
+                        if(*arg!=0) {
+                            /* argument following in the same argv[] */
+                            option->value=arg;
+                            /* do not process the rest of this arg as option letters */
+                            break;
+                        } else if(i+1<argc && !(argv[i+1][0]=='-' && argv[i+1][1]!=0)) {
+                            /* argument in the next argv[], and there is not an option in there */
+                            option->value=argv[++i];
+                            /* this break is redundant because we know that *arg==0 */
+                            break;
+                        } else if(option->hasArg==UOPT_REQUIRES_ARG) {
+                            /* there is no argument, but one is required: return with error */
+                            option->doesOccur=0;
+                            return -i;
+                        }
+                    }
+
+                    if(option->optionFn!=nullptr && option->optionFn(option->context, option)<0) {
+                        /* the option function was called and returned an error */
+                        option->doesOccur=0;
+                        return -i;
+                    }
+
+                    /* get the next option letter */
+                    option=nullptr;
+                    c=*arg++;
+                } while(c!=0);
+            }
+
+            /* go to next argv[] */
+            ++i;
+        } else {
+            /* move a non-option up in argv[] */
+            argv[remaining++]=arg;
+            ++i;
+        }
+    }
+    return remaining;
+}
diff --git a/intl/icu/source/tools/toolutil/uoptions.h b/intl/icu/source/tools/toolutil/uoptions.h
new file mode 100644
index 0000000000..d00e3da924
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/uoptions.h
@@ -0,0 +1,143 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2000-2011, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uoptions.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2000apr17
+*   created by: Markus W. Scherer
+*
+*   This file provides a command line argument parser.
+*/
+
+#ifndef __UOPTIONS_H__
+#define __UOPTIONS_H__
+
+#include "unicode/utypes.h"
+
+/* This should usually be called before calling u_parseArgs */
+/*#if U_PLATFORM == U_PF_OS390 && (U_CHARSET_FAMILY == U_ASCII_FAMILY)*/
+    /* translate args from EBCDIC to ASCII */
+/*#   define U_MAIN_INIT_ARGS(argc, argv) __argvtoascii_a(argc, argv)*/
+/*#elif defined(XP_MAC_CONSOLE)*/
+#if defined(XP_MAC_CONSOLE)
+#   include <console.h>
+    /* Get the arguments from the GUI, since old Macs don't have a console Window. */
+#   define U_MAIN_INIT_ARGS(argc, argv) argc = ccommand((char***)&argv)
+#else
+    /* Normally we do nothing. */
+#   define U_MAIN_INIT_ARGS(argc, argv)
+#endif
+
+
+
+/* forward declarations for the function declaration */
+struct UOption;
+typedef struct UOption UOption;
+
+/* function to be called for a command line option */
+typedef int UOptionFn(void *context, UOption *option);
+
+/* values of UOption.hasArg */
+enum { UOPT_NO_ARG, UOPT_REQUIRES_ARG, UOPT_OPTIONAL_ARG };
+
+/* structure describing a command line option */
+struct UOption {
+    const char *longName;   /* "foo" for --foo */
+    const char *value;      /* output placeholder, will point to the argument string, if any */
+    UOptionFn *optionFn;    /* function to be called when this option occurs */
+    void *context;          /* parameter for the function */
+    char shortName;         /* 'f' for -f */
+    char hasArg;            /* enum value: option takes no/requires/may have argument */
+    char doesOccur;         /* boolean for "this one occurred" */
+};
+
+/* macro for an entry in a declaration of UOption[] */
+#define UOPTION_DEF(longName, shortName, hasArg) \
+    { longName, NULL, NULL, NULL, shortName, hasArg, 0 }
+
+/* ICU Tools option definitions */
+#define UOPTION_HELP_H              UOPTION_DEF("help", 'h', UOPT_NO_ARG)
+#define UOPTION_HELP_QUESTION_MARK  UOPTION_DEF("help", '?', UOPT_NO_ARG)
+#define UOPTION_VERBOSE             UOPTION_DEF("verbose", 'v', UOPT_NO_ARG)
+#define UOPTION_QUIET               UOPTION_DEF("quiet", 'q', UOPT_NO_ARG)
+#define UOPTION_VERSION             UOPTION_DEF("version", 'V', UOPT_NO_ARG)
+#define UOPTION_COPYRIGHT           UOPTION_DEF("copyright", 'c', UOPT_NO_ARG)
+
+#define UOPTION_DESTDIR             UOPTION_DEF("destdir", 'd', UOPT_REQUIRES_ARG)
+#define UOPTION_SOURCEDIR           UOPTION_DEF("sourcedir", 's', UOPT_REQUIRES_ARG)
+#define UOPTION_ENCODING            UOPTION_DEF("encoding", 'e', UOPT_REQUIRES_ARG)
+#define UOPTION_ICUDATADIR          UOPTION_DEF("icudatadir", 'i', UOPT_REQUIRES_ARG)
+#define UOPTION_WRITE_JAVA          UOPTION_DEF("write-java", 'j', UOPT_OPTIONAL_ARG)
+#define UOPTION_PACKAGE_NAME        UOPTION_DEF("package-name", 'p', UOPT_REQUIRES_ARG)
+#define UOPTION_BUNDLE_NAME         UOPTION_DEF("bundle-name", 'b', UOPT_REQUIRES_ARG)
+
+/**
+ * C Command line argument parser.
+ *
+ * This function takes the argv[argc] command line and a description of
+ * the program's options in form of an array of UOption structures.
+ * Each UOption defines a long and a short name (a string and a character)
+ * for options like "--foo" and "-f".
+ *
+ * Each option is marked with whether it does not take an argument,
+ * requires one, or optionally takes one. The argument may follow in
+ * the same argv[] entry for short options, or it may always follow
+ * in the next argv[] entry.
+ *
+ * An argument is in the next argv[] entry for both long and short name
+ * options, except it is taken from directly behind the short name in
+ * its own argv[] entry if there are characters following the option letter.
+ * An argument in its own argv[] entry must not begin with a '-'
+ * unless it is only the '-' itself. There is no restriction of the
+ * argument format if it is part of the short name options's argv[] entry.
+ *
+ * The argument is stored in the value field of the corresponding
+ * UOption entry, and the doesOccur field is set to 1 if the option
+ * is found at all.
+ *
+ * Short name options without arguments can be collapsed into a single
+ * argv[] entry. After an option letter takes an argument, following
+ * letters will be taken as its argument.
+ *
+ * If the same option is found several times, then the last
+ * argument value will be stored in the value field.
+ *
+ * For each option, a function can be called. This could be used
+ * for options that occur multiple times and all arguments are to
+ * be collected.
+ *
+ * All options are removed from the argv[] array itself. If the parser
+ * is successful, then it returns the number of remaining non-option
+ * strings (including argv[0]).
+ * argv[0], the program name, is never read or modified.
+ *
+ * An option "--" ends option processing; everything after this
+ * remains in the argv[] array.
+ *
+ * An option string "-" alone is treated as a non-option.
+ *
+ * If an option is not recognized or an argument missing, then
+ * the parser returns with the negative index of the argv[] entry
+ * where the error was detected.
+ *
+ * The OS/400 compiler requires that argv either be "char* argv[]",
+ * or "const char* const argv[]", and it will not accept, 
+ * "const char* argv[]" as a definition for main().
+ *
+ * @param argv This parameter is modified
+ * @param options This parameter is modified
+ */
+U_CAPI int U_EXPORT2
+u_parseArgs(int argc, char* argv[],
+            int optionCount, UOption options[]);
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/uparse.cpp b/intl/icu/source/tools/toolutil/uparse.cpp
new file mode 100644
index 0000000000..5aee48b5a4
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/uparse.cpp
@@ -0,0 +1,383 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2000-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uparse.c
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2000apr18
+*   created by: Markus W. Scherer
+*
+*   This file provides a parser for files that are delimited by one single
+*   character like ';' or TAB. Example: the Unicode Character Properties files
+*   like UnicodeData.txt are semicolon-delimited.
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#include "unicode/ustring.h"
+#include "unicode/utf16.h"
+#include "cstring.h"
+#include "filestrm.h"
+#include "uparse.h"
+#include "ustr_imp.h"
+
+#include <stdio.h>
+
+U_CAPI const char * U_EXPORT2
+u_skipWhitespace(const char *s) {
+    while(U_IS_INV_WHITESPACE(*s)) {
+        ++s;
+    }
+    return s;
+}
+
+U_CAPI char * U_EXPORT2
+u_rtrim(char *s) {
+    char *end=uprv_strchr(s, 0);
+    while(s<end && U_IS_INV_WHITESPACE(*(end-1))) {
+        *--end = 0;
+    }
+    return end;
+}
+
+/*
+ * If the string starts with # @missing: then return the pointer to the
+ * following non-whitespace character.
+ * Otherwise return the original pointer.
+ * Unicode 5.0 adds such lines in some data files to document
+ * default property values.
+ * Poor man's regex for variable amounts of white space.
+ */
+static const char *
+getMissingLimit(const char *s) {
+    const char *s0=s;
+    if(
+        *(s=u_skipWhitespace(s))=='#' &&
+        *(s=u_skipWhitespace(s+1))=='@' &&
+        0==strncmp((s=u_skipWhitespace(s+1)), "missing", 7) &&
+        *(s=u_skipWhitespace(s+7))==':'
+    ) {
+        return u_skipWhitespace(s+1);
+    } else {
+        return s0;
+    }
+}
+
+U_CAPI void U_EXPORT2
+u_parseDelimitedFile(const char *filename, char delimiter,
+                     char *fields[][2], int32_t fieldCount,
+                     UParseLineFn *lineFn, void *context,
+                     UErrorCode *pErrorCode) {
+    FileStream *file;
+    char line[10000];
+    char *start, *limit;
+    int32_t i, length;
+
+    if(U_FAILURE(*pErrorCode)) {
+        return;
+    }
+
+    if(fields==nullptr || lineFn==nullptr || fieldCount<=0) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return;
+    }
+
+    if(filename==nullptr || *filename==0 || (*filename=='-' && filename[1]==0)) {
+        filename=nullptr;
+        file=T_FileStream_stdin();
+    } else {
+        file=T_FileStream_open(filename, "r");
+    }
+    if(file==nullptr) {
+        *pErrorCode=U_FILE_ACCESS_ERROR;
+        return;
+    }
+
+    while(T_FileStream_readLine(file, line, sizeof(line))!=nullptr) {
+        /* remove trailing newline characters */
+        length=(int32_t)(u_rtrim(line)-line);
+
+        /*
+         * detect a line with # @missing:
+         * start parsing after that, or else from the beginning of the line
+         * set the default warning for @missing lines
+         */
+        start=(char *)getMissingLimit(line);
+        if(start==line) {
+            *pErrorCode=U_ZERO_ERROR;
+        } else {
+            *pErrorCode=U_USING_DEFAULT_WARNING;
+        }
+
+        /* skip this line if it is empty or a comment */
+        if(*start==0 || *start=='#') {
+            continue;
+        }
+
+        /* remove in-line comments */
+        limit=uprv_strchr(start, '#');
+        if(limit!=nullptr) {
+            /* get white space before the pound sign */
+            while(limit>start && U_IS_INV_WHITESPACE(*(limit-1))) {
+                --limit;
+            }
+
+            /* truncate the line */
+            *limit=0;
+        }
+
+        /* skip lines with only whitespace */
+        if(u_skipWhitespace(start)[0]==0) {
+            continue;
+        }
+
+        /* for each field, call the corresponding field function */
+        for(i=0; i<fieldCount; ++i) {
+            /* set the limit pointer of this field */
+            limit=start;
+            while(*limit!=delimiter && *limit!=0) {
+                ++limit;
+            }
+
+            /* set the field start and limit in the fields array */
+            fields[i][0]=start;
+            fields[i][1]=limit;
+
+            /* set start to the beginning of the next field, if any */
+            start=limit;
+            if(*start!=0) {
+                ++start;
+            } else if(i+1<fieldCount) {
+                *pErrorCode=U_PARSE_ERROR;
+                limit=line+length;
+                i=fieldCount;
+                break;
+            }
+        }
+
+        /* too few fields? */
+        if(U_FAILURE(*pErrorCode)) {
+            break;
+        }
+
+        /* call the field function */
+        lineFn(context, fields, fieldCount, pErrorCode);
+        if(U_FAILURE(*pErrorCode)) {
+            break;
+        }
+    }
+
+    if(filename!=nullptr) {
+        T_FileStream_close(file);
+    }
+}
+
+/*
+ * parse a list of code points
+ * store them as a UTF-32 string in dest[destCapacity]
+ * return the number of code points
+ */
+U_CAPI int32_t U_EXPORT2
+u_parseCodePoints(const char *s,
+                  uint32_t *dest, int32_t destCapacity,
+                  UErrorCode *pErrorCode) {
+    char *end;
+    uint32_t value;
+    int32_t count;
+
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(s==nullptr || destCapacity<0 || (destCapacity>0 && dest==nullptr)) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    count=0;
+    for(;;) {
+        s=u_skipWhitespace(s);
+        if(*s==';' || *s==0) {
+            return count;
+        }
+
+        /* read one code point */
+        value=(uint32_t)uprv_strtoul(s, &end, 16);
+        if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) {
+            *pErrorCode=U_PARSE_ERROR;
+            return 0;
+        }
+
+        /* append it to the destination array */
+        if(count<destCapacity) {
+            dest[count++]=value;
+        } else {
+            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+        }
+
+        /* go to the following characters */
+        s=end;
+    }
+}
+
+/*
+ * parse a list of code points
+ * store them as a string in dest[destCapacity]
+ * set the first code point in *pFirst
+ * @return The length of the string in numbers of UChars.
+ */
+U_CAPI int32_t U_EXPORT2
+u_parseString(const char *s,
+              char16_t *dest, int32_t destCapacity,
+              uint32_t *pFirst,
+              UErrorCode *pErrorCode) {
+    char *end;
+    uint32_t value;
+    int32_t destLength;
+
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(s==nullptr || destCapacity<0 || (destCapacity>0 && dest==nullptr)) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    if(pFirst!=nullptr) {
+        *pFirst=0xffffffff;
+    }
+
+    destLength=0;
+    for(;;) {
+        s=u_skipWhitespace(s);
+        if(*s==';' || *s==0) {
+            if(destLength<destCapacity) {
+                dest[destLength]=0;
+            } else if(destLength==destCapacity) {
+                *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;
+            } else {
+                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
+            }
+            return destLength;
+        }
+
+        /* read one code point */
+        value=(uint32_t)uprv_strtoul(s, &end, 16);
+        if(end<=s || (!U_IS_INV_WHITESPACE(*end) && *end!=';' && *end!=0) || value>=0x110000) {
+            *pErrorCode=U_PARSE_ERROR;
+            return 0;
+        }
+
+        /* store the first code point */
+        if(pFirst!=nullptr) {
+            *pFirst=value;
+            pFirst=nullptr;
+        }
+
+        /* append it to the destination array */
+        if((destLength+U16_LENGTH(value))<=destCapacity) {
+            U16_APPEND_UNSAFE(dest, destLength, value);
+        } else {
+            destLength+=U16_LENGTH(value);
+        }
+
+        /* go to the following characters */
+        s=end;
+    }
+}
+
+/* read a range like start or start..end */
+U_CAPI int32_t U_EXPORT2
+u_parseCodePointRangeAnyTerminator(const char *s,
+                                   uint32_t *pStart, uint32_t *pEnd,
+                                   const char **terminator,
+                                   UErrorCode *pErrorCode) {
+    char *end;
+    uint32_t value;
+
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    if(s==nullptr || pStart==nullptr || pEnd==nullptr) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    /* read the start code point */
+    s=u_skipWhitespace(s);
+    value=(uint32_t)uprv_strtoul(s, &end, 16);
+    if(end<=s || value>=0x110000) {
+        *pErrorCode=U_PARSE_ERROR;
+        return 0;
+    }
+    *pStart=*pEnd=value;
+
+    /* is there a "..end"? */
+    s=u_skipWhitespace(end);
+    if(*s!='.' || s[1]!='.') {
+        *terminator=end;
+        return 1;
+    }
+    s=u_skipWhitespace(s+2);
+
+    /* read the end code point */
+    value=(uint32_t)uprv_strtoul(s, &end, 16);
+    if(end<=s || value>=0x110000) {
+        *pErrorCode=U_PARSE_ERROR;
+        return 0;
+    }
+    *pEnd=value;
+
+    /* is this a valid range? */
+    if(value<*pStart) {
+        *pErrorCode=U_PARSE_ERROR;
+        return 0;
+    }
+
+    *terminator=end;
+    return value-*pStart+1;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_parseCodePointRange(const char *s,
+                      uint32_t *pStart, uint32_t *pEnd,
+                      UErrorCode *pErrorCode) {
+    const char *terminator;
+    int32_t rangeLength=
+        u_parseCodePointRangeAnyTerminator(s, pStart, pEnd, &terminator, pErrorCode);
+    if(U_SUCCESS(*pErrorCode)) {
+        terminator=u_skipWhitespace(terminator);
+        if(*terminator!=';' && *terminator!=0) {
+            *pErrorCode=U_PARSE_ERROR;
+            return 0;
+        }
+    }
+    return rangeLength;
+}
+
+U_CAPI int32_t U_EXPORT2
+u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status) {
+    const char *read = source;
+    int32_t i = 0;
+    unsigned int value = 0;
+    if(sLen == -1) {
+        sLen = (int32_t)strlen(source);
+    }
+    
+    while(read < source+sLen) {
+        sscanf(read, "%2x", &value);
+        if(i < destCapacity) {
+            dest[i] = (char)value;
+        }
+        i++;
+        read += 2;
+    }
+    return u_terminateChars(dest, destCapacity, i, status);
+}
diff --git a/intl/icu/source/tools/toolutil/uparse.h b/intl/icu/source/tools/toolutil/uparse.h
new file mode 100644
index 0000000000..df0e79a21f
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/uparse.h
@@ -0,0 +1,153 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2000-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  uparse.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2000apr18
+*   created by: Markus W. Scherer
+*
+*   This file provides a parser for files that are delimited by one single
+*   character like ';' or TAB. Example: the Unicode Character Properties files
+*   like UnicodeData.txt are semicolon-delimited.
+*/
+
+#ifndef __UPARSE_H__
+#define __UPARSE_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * Is c an invariant-character whitespace?
+ * @param c invariant character
+ */
+#define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
+
+U_CDECL_BEGIN
+
+/**
+ * Skip space ' ' and TAB '\t' characters.
+ *
+ * @param s Pointer to characters.
+ * @return Pointer to first character at or after s that is not a space or TAB.
+ */
+U_CAPI const char * U_EXPORT2
+u_skipWhitespace(const char *s);
+
+/**
+ * Trim whitespace (including line endings) from the end of the string.
+ *
+ * @param s Pointer to the string.
+ * @return Pointer to the new end of the string.
+ */
+U_CAPI char * U_EXPORT2
+u_rtrim(char *s);
+
+/** Function type for u_parseDelimitedFile(). */
+typedef void U_CALLCONV
+UParseLineFn(void *context,
+              char *fields[][2],
+              int32_t fieldCount,
+              UErrorCode *pErrorCode);
+
+/**
+ * Parser for files that are similar to UnicodeData.txt:
+ * This function opens the file and reads it line by line. It skips empty lines
+ * and comment lines that start with a '#'.
+ * All other lines are separated into fields with one delimiter character
+ * (semicolon for Unicode Properties files) between two fields. The last field in
+ * a line does not need to be terminated with a delimiter.
+ *
+ * For each line, after segmenting it, a line function is called.
+ * It gets passed the array of field start and limit pointers that is
+ * passed into this parser and filled by it for each line.
+ * For each field i of the line, the start pointer in fields[i][0]
+ * points to the beginning of the field, while the limit pointer in fields[i][1]
+ * points behind the field, i.e., to the delimiter or the line end.
+ *
+ * The context parameter of the line function is
+ * the same as the one for the parse function.
+ *
+ * The line function may modify the contents of the fields including the
+ * limit characters.
+ *
+ * If the file cannot be opened, or there is a parsing error or a field function
+ * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code.
+ */
+U_CAPI void U_EXPORT2
+u_parseDelimitedFile(const char *filename, char delimiter,
+                     char *fields[][2], int32_t fieldCount,
+                     UParseLineFn *lineFn, void *context,
+                     UErrorCode *pErrorCode);
+
+/**
+ * Parse a string of code points like 0061 0308 0300.
+ * s must end with either ';' or NUL.
+ *
+ * @return Number of code points.
+ */
+U_CAPI int32_t U_EXPORT2
+u_parseCodePoints(const char *s,
+                  uint32_t *dest, int32_t destCapacity,
+                  UErrorCode *pErrorCode);
+
+/**
+ * Parse a list of code points like 0061 0308 0300
+ * into a UChar * string.
+ * s must end with either ';' or NUL.
+ *
+ * Set the first code point in *pFirst.
+ *
+ * @param s Input char * string.
+ * @param dest Output string buffer.
+ * @param destCapacity Capacity of dest in numbers of UChars.
+ * @param pFirst If pFirst!=NULL the *pFirst will be set to the first
+ *               code point in the string.
+ * @param pErrorCode ICU error code.
+ * @return The length of the string in numbers of UChars.
+ */
+U_CAPI int32_t U_EXPORT2
+u_parseString(const char *s,
+              UChar *dest, int32_t destCapacity,
+              uint32_t *pFirst,
+              UErrorCode *pErrorCode);
+
+/**
+ * Parse a code point range like
+ * 0085 or
+ * 4E00..9FA5.
+ *
+ * s must contain such a range and end with either ';' or NUL.
+ *
+ * @return Length of code point range, end-start+1
+ */
+U_CAPI int32_t U_EXPORT2
+u_parseCodePointRange(const char *s,
+                      uint32_t *pStart, uint32_t *pEnd,
+                      UErrorCode *pErrorCode);
+
+/**
+ * Same as u_parseCodePointRange() but the range may be terminated by
+ * any character. The position of the terminating character is returned via
+ * the *terminator output parameter.
+ */
+U_CAPI int32_t U_EXPORT2
+u_parseCodePointRangeAnyTerminator(const char *s,
+                                   uint32_t *pStart, uint32_t *pEnd,
+                                   const char **terminator,
+                                   UErrorCode *pErrorCode);
+
+U_CAPI int32_t U_EXPORT2
+u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status);
+
+U_CDECL_END
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/writesrc.cpp b/intl/icu/source/tools/toolutil/writesrc.cpp
new file mode 100644
index 0000000000..55c2f277b3
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/writesrc.cpp
@@ -0,0 +1,515 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2005-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  writesrc.c
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2005apr23
+*   created by: Markus W. Scherer
+*
+*   Helper functions for writing source code for data.
+*/
+
+#include <stdio.h>
+#include <time.h>
+
+// The C99 standard suggested that C++ implementations not define PRId64 etc. constants
+// unless this macro is defined.
+// See the Notes at https://en.cppreference.com/w/cpp/types/integer .
+// Similar to defining __STDC_LIMIT_MACROS in unicode/ptypes.h .
+#ifndef __STDC_FORMAT_MACROS
+#   define __STDC_FORMAT_MACROS
+#endif
+#include <cinttypes>
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/ucptrie.h"
+#include "unicode/errorcode.h"
+#include "unicode/uniset.h"
+#include "unicode/usetiter.h"
+#include "unicode/utf16.h"
+#include "utrie2.h"
+#include "cstring.h"
+#include "writesrc.h"
+#include "util.h"
+
+U_NAMESPACE_BEGIN
+
+ValueNameGetter::~ValueNameGetter() {}
+
+U_NAMESPACE_END
+
+U_NAMESPACE_USE
+
+static FILE *
+usrc_createWithoutHeader(const char *path, const char *filename) {
+    char buffer[1024];
+    const char *p;
+    char *q;
+    FILE *f;
+    char c;
+
+    if(path==nullptr) {
+        p=filename;
+    } else {
+        /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */
+        uprv_strcpy(buffer, path);
+        q=buffer+uprv_strlen(buffer);
+        if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
+            *q++=U_FILE_SEP_CHAR;
+        }
+        uprv_strcpy(q, filename);
+        p=buffer;
+    }
+
+    f=fopen(p, "w");
+    if (f==nullptr) {
+        fprintf(
+            stderr,
+            "usrc_create(%s, %s): unable to create file\n",
+            path!=nullptr ? path : "", filename);
+    }
+    return f;
+}
+
+U_CAPI FILE * U_EXPORT2
+usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
+    FILE *f = usrc_createWithoutHeader(path, filename);
+    if (f == nullptr) {
+        return f;
+    }
+    usrc_writeCopyrightHeader(f, "//", copyrightYear);
+    usrc_writeFileNameGeneratedBy(f, "//", filename, generator);
+    return f;
+}
+
+U_CAPI FILE * U_EXPORT2
+usrc_createTextData(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
+    FILE *f = usrc_createWithoutHeader(path, filename);
+    if (f == nullptr) {
+        return f;
+    }
+    usrc_writeCopyrightHeader(f, "#", copyrightYear);
+    usrc_writeFileNameGeneratedBy(f, "#", filename, generator);
+    return f;
+}
+
+U_CAPI void U_EXPORT2
+usrc_writeCopyrightHeader(FILE *f, const char *prefix, int32_t copyrightYear) {
+    fprintf(f,
+        "%s Copyright (C) %d and later: Unicode, Inc. and others.\n"
+        "%s License & terms of use: http://www.unicode.org/copyright.html\n",
+        prefix, copyrightYear, prefix);
+    if (copyrightYear <= 2016) {
+        fprintf(f,
+            "%s Copyright (C) 1999-2016, International Business Machines\n"
+            "%s Corporation and others.  All Rights Reserved.\n",
+            prefix, prefix);
+    }
+}
+
+U_CAPI void U_EXPORT2
+usrc_writeFileNameGeneratedBy(
+        FILE *f,
+        const char *prefix,
+        const char *filename,
+        const char *generator) {
+    char buffer[1024];
+    const struct tm *lt;
+    time_t t;
+
+    const char *pattern = 
+        "%s\n"
+        "%s file name: %s\n"
+        "%s\n"
+        "%s machine-generated by: %s\n"
+        "\n";
+
+    time(&t);
+    lt=localtime(&t);
+    if(generator==nullptr) {
+        strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt);
+        fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, buffer);
+    } else {
+        fprintf(f, pattern, prefix, prefix, filename, prefix, prefix, generator);
+    }
+}
+
+U_CAPI void U_EXPORT2
+usrc_writeArray(FILE *f,
+                const char *prefix,
+                const void *p, int32_t width, int32_t length,
+                const char *indent,
+                const char *postfix) {
+    const uint8_t *p8;
+    const uint16_t *p16;
+    const uint32_t *p32;
+    const int64_t *p64; // Signed due to TOML!
+    int64_t value; // Signed due to TOML!
+    int32_t i, col;
+
+    p8=nullptr;
+    p16=nullptr;
+    p32=nullptr;
+    p64=nullptr;
+    switch(width) {
+    case 8:
+        p8=(const uint8_t *)p;
+        break;
+    case 16:
+        p16=(const uint16_t *)p;
+        break;
+    case 32:
+        p32=(const uint32_t *)p;
+        break;
+    case 64:
+        p64=(const int64_t *)p;
+        break;
+    default:
+        fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width);
+        return;
+    }
+    if(prefix!=nullptr) {
+        fprintf(f, prefix, (long)length);
+    }
+    for(i=col=0; i<length; ++i, ++col) {
+        if(i>0) {
+            if(col<16) {
+                fputc(',', f);
+            } else {
+                fputs(",\n", f);
+                fputs(indent, f);
+                col=0;
+            }
+        }
+        switch(width) {
+        case 8:
+            value=p8[i];
+            break;
+        case 16:
+            value=p16[i];
+            break;
+        case 32:
+            value=p32[i];
+            break;
+        case 64:
+            value=p64[i];
+            break;
+        default:
+            value=0; /* unreachable */
+            break;
+        }
+        fprintf(f, value<=9 ? "%" PRId64 : "0x%" PRIx64, value);
+    }
+    if(postfix!=nullptr) {
+        fputs(postfix, f);
+    }
+}
+
+U_CAPI void U_EXPORT2
+usrc_writeUTrie2Arrays(FILE *f,
+                       const char *indexPrefix, const char *data32Prefix,
+                       const UTrie2 *pTrie,
+                       const char *postfix) {
+    if(pTrie->data32==nullptr) {
+        /* 16-bit trie */
+        usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, "", postfix);
+    } else {
+        /* 32-bit trie */
+        usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, "", postfix);
+        usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, "", postfix);
+    }
+}
+
+U_CAPI void U_EXPORT2
+usrc_writeUTrie2Struct(FILE *f,
+                       const char *prefix,
+                       const UTrie2 *pTrie,
+                       const char *indexName, const char *data32Name,
+                       const char *postfix) {
+    if(prefix!=nullptr) {
+        fputs(prefix, f);
+    }
+    if(pTrie->data32==nullptr) {
+        /* 16-bit trie */
+        fprintf(
+            f,
+            "    %s,\n"         /* index */
+            "    %s+%ld,\n"     /* data16 */
+            "    nullptr,\n",      /* data32 */
+            indexName,
+            indexName, 
+            (long)pTrie->indexLength);
+    } else {
+        /* 32-bit trie */
+        fprintf(
+            f,
+            "    %s,\n"         /* index */
+            "    nullptr,\n"       /* data16 */
+            "    %s,\n",        /* data32 */
+            indexName,
+            data32Name);
+    }
+    fprintf(
+        f,
+        "    %ld,\n"            /* indexLength */
+        "    %ld,\n"            /* dataLength */
+        "    0x%hx,\n"          /* index2NullOffset */
+        "    0x%hx,\n"          /* dataNullOffset */
+        "    0x%lx,\n"          /* initialValue */
+        "    0x%lx,\n"          /* errorValue */
+        "    0x%lx,\n"          /* highStart */
+        "    0x%lx,\n"          /* highValueIndex */
+        "    nullptr, 0, false, false, 0, nullptr\n",
+        (long)pTrie->indexLength, (long)pTrie->dataLength,
+        (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset,
+        (long)pTrie->initialValue, (long)pTrie->errorValue,
+        (long)pTrie->highStart, (long)pTrie->highValueIndex);
+    if(postfix!=nullptr) {
+        fputs(postfix, f);
+    }
+}
+
+U_CAPI void U_EXPORT2
+usrc_writeUCPTrieArrays(FILE *f,
+                        const char *indexPrefix, const char *dataPrefix,
+                        const UCPTrie *pTrie,
+                        const char *postfix,
+                        UTargetSyntax syntax) {
+    const char* indent = (syntax == UPRV_TARGET_SYNTAX_TOML) ? "  " : "";
+    usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, indent, postfix);
+    int32_t width=
+        pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
+        pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
+        pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
+    usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, indent, postfix);
+}
+
+U_CAPI void U_EXPORT2
+usrc_writeUCPTrieStruct(FILE *f,
+                        const char *prefix,
+                        const UCPTrie *pTrie,
+                        const char *indexName, const char *dataName,
+                        const char *postfix,
+                        UTargetSyntax syntax) {
+    if(prefix!=nullptr) {
+        fputs(prefix, f);
+    }
+    if (syntax == UPRV_TARGET_SYNTAX_CCODE) {
+        fprintf(
+            f,
+            "    %s,\n"             // index
+            "    { %s },\n",        // data (union)
+            indexName,
+            dataName);
+    }
+    const char* pattern =
+        (syntax == UPRV_TARGET_SYNTAX_CCODE) ?
+        "    %ld, %ld,\n"       // indexLength, dataLength
+        "    0x%lx, 0x%x,\n"    // highStart, shifted12HighStart
+        "    %d, %d,\n"         // type, valueWidth
+        "    0, 0,\n"           // reserved32, reserved16
+        "    0x%x, 0x%lx,\n"    // index3NullOffset, dataNullOffset
+        "    0x%lx,\n"          // nullValue
+        :
+        "indexLength = %ld\n"
+        "dataLength = %ld\n"
+        "highStart = 0x%lx\n"
+        "shifted12HighStart = 0x%x\n"
+        "type = %d\n"
+        "valueWidth = %d\n"
+        "index3NullOffset = 0x%x\n"
+        "dataNullOffset = 0x%lx\n"
+        "nullValue = 0x%lx\n"
+        ;
+    fprintf(
+        f,
+        pattern,
+        (long)pTrie->indexLength, (long)pTrie->dataLength,
+        (long)pTrie->highStart, pTrie->shifted12HighStart,
+        pTrie->type, pTrie->valueWidth,
+        pTrie->index3NullOffset, (long)pTrie->dataNullOffset,
+        (long)pTrie->nullValue);
+    if(postfix!=nullptr) {
+        fputs(postfix, f);
+    }
+}
+
+U_CAPI void U_EXPORT2
+usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax syntax) {
+    int32_t width=
+        pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
+        pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
+        pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
+    char line[100], line2[100], line3[100], line4[100];
+
+    switch (syntax) {
+    case UPRV_TARGET_SYNTAX_CCODE:
+        snprintf(line, sizeof(line), "static const uint16_t %s_trieIndex[%%ld]={\n", name);
+        snprintf(line2, sizeof(line2), "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name);
+        snprintf(line3, sizeof(line3), "\n};\n\n");
+        break;
+    case UPRV_TARGET_SYNTAX_TOML:
+        snprintf(line, sizeof(line), "index = [\n  ");
+        snprintf(line2, sizeof(line2), "data_%d = [\n  ", (int)width);
+        snprintf(line3, sizeof(line3), "\n]\n");
+        break;
+    default:
+        UPRV_UNREACHABLE_EXIT;
+    }
+    usrc_writeUCPTrieArrays(f, line, line2, pTrie, line3, syntax);
+
+    switch (syntax) {
+    case UPRV_TARGET_SYNTAX_CCODE:
+        snprintf(line, sizeof(line), "static const UCPTrie %s_trie={\n", name);
+        snprintf(line2, sizeof(line2), "%s_trieIndex", name);
+        snprintf(line3, sizeof(line3), "%s_trieData", name);
+        snprintf(line4, sizeof(line4), "};\n\n");
+        break;
+    case UPRV_TARGET_SYNTAX_TOML:
+        line[0] = 0;
+        line2[0] = 0;
+        line3[0] = 0;
+        line4[0] = 0;
+        break;
+    default:
+        UPRV_UNREACHABLE_EXIT;
+    }
+    usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, line4, syntax);
+}
+
+U_CAPI void U_EXPORT2
+usrc_writeUnicodeSet(
+        FILE *f,
+        const USet *pSet,
+        UTargetSyntax syntax) {
+    // ccode is not yet supported
+    U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
+
+    // Write out a list of ranges
+    const UnicodeSet* set = UnicodeSet::fromUSet(pSet);
+    UnicodeSetIterator it(*set);
+    fprintf(f, "# Inclusive ranges of the code points in the set.\n");
+    fprintf(f, "ranges = [\n");
+    bool seenFirstString = false;
+    while (it.nextRange()) {
+        if (it.isString()) {
+            if (!seenFirstString) {
+                seenFirstString = true;
+                fprintf(f, "]\nstrings = [\n");
+            }
+            const UnicodeString& str = it.getString();
+            fprintf(f, "  ");
+            usrc_writeStringAsASCII(f, str.getBuffer(), str.length(), syntax);
+            fprintf(f, ",\n");
+        } else {
+            U_ASSERT(!seenFirstString);
+            UChar32 start = it.getCodepoint();
+            UChar32 end = it.getCodepointEnd();
+            fprintf(f, "  [0x%x, 0x%x],\n", start, end);
+        }
+    }
+    fprintf(f, "]\n");
+}
+
+U_CAPI void U_EXPORT2
+usrc_writeUCPMap(
+        FILE *f,
+        const UCPMap *pMap,
+        icu::ValueNameGetter *valueNameGetter,
+        UTargetSyntax syntax) {
+    // ccode is not yet supported
+    U_ASSERT(syntax == UPRV_TARGET_SYNTAX_TOML);
+    (void) syntax; // silence unused variable errors
+
+    // Print out list of ranges
+    UChar32 start = 0, end;
+    uint32_t value;
+    fprintf(f, "# Code points `a` through `b` have value `v`, corresponding to `name`.\n");
+    fprintf(f, "ranges = [\n");
+    while ((end = ucpmap_getRange(pMap, start, UCPMAP_RANGE_NORMAL, 0, nullptr, nullptr, &value)) >= 0) {
+        if (valueNameGetter != nullptr) {
+            const char *name = valueNameGetter->getName(value);
+            fprintf(f, "  {a=0x%x, b=0x%x, v=%u, name=\"%s\"},\n", start, end, value, name);
+        } else {
+            fprintf(f, "  {a=0x%x, b=0x%x, v=%u},\n", start, end, value);
+        }
+        start = end + 1;
+    }
+    fprintf(f, "]\n");
+}
+
+U_CAPI void U_EXPORT2
+usrc_writeArrayOfMostlyInvChars(FILE *f,
+                                const char *prefix,
+                                const char *p, int32_t length,
+                                const char *postfix) {
+    int32_t i, col;
+    int prev2, prev, c;
+
+    if(prefix!=nullptr) {
+        fprintf(f, prefix, (long)length);
+    }
+    prev2=prev=-1;
+    for(i=col=0; i<length; ++i, ++col) {
+        c=(uint8_t)p[i];
+        if(i>0) {
+            /* Break long lines. Try to break at interesting places, to minimize revision diffs. */
+            if( 
+                /* Very long line. */
+                col>=32 ||
+                /* Long line, break after terminating NUL. */
+                (col>=24 && prev2>=0x20 && prev==0) ||
+                /* Medium-long line, break before non-NUL, non-character byte. */
+                (col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20)
+            ) {
+                fputs(",\n", f);
+                col=0;
+            } else {
+                fputc(',', f);
+            }
+        }
+        fprintf(f, c<0x20 ? "%u" : "'%c'", c);
+        prev2=prev;
+        prev=c;
+    }
+    if(postfix!=nullptr) {
+        fputs(postfix, f);
+    }
+}
+
+U_CAPI void U_EXPORT2
+usrc_writeStringAsASCII(FILE *f,
+        const char16_t* ptr, int32_t length,
+        UTargetSyntax) {
+    // For now, assume all UTargetSyntax values are valid here.
+    fprintf(f, "\"");
+    int32_t i = 0;
+    UChar32 cp;
+    while (i < length) {
+        U16_NEXT(ptr, i, length, cp);
+        if (cp == u'"') {
+            fprintf(f, "\\\"");
+        } else if (ICU_Utility::isUnprintable(cp)) {
+            UnicodeString u16result;
+            ICU_Utility::escapeUnprintable(u16result, cp);
+            std::string u8result;
+            u16result.toUTF8String(u8result);
+            fprintf(f, "%s", u8result.data());
+        } else {
+            U_ASSERT(cp < 0x80);
+            char s[2] = {static_cast<char>(cp), 0};
+            fprintf(f, "%s", s);
+        }
+    }
+    fprintf(f, "\"");
+}
diff --git a/intl/icu/source/tools/toolutil/writesrc.h b/intl/icu/source/tools/toolutil/writesrc.h
new file mode 100644
index 0000000000..9c0be5a100
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/writesrc.h
@@ -0,0 +1,198 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2005-2012, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  writesrc.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2005apr23
+*   created by: Markus W. Scherer
+*
+*   Helper functions for writing source code for data.
+*/
+
+#ifndef __WRITESRC_H__
+#define __WRITESRC_H__
+
+#include <stdio.h>
+#include "unicode/utypes.h"
+#include "unicode/ucpmap.h"
+#include "unicode/ucptrie.h"
+#include "unicode/umutablecptrie.h"
+#include "unicode/uset.h"
+#include "utrie2.h"
+
+/**
+ * An input to some of the functions in this file specifying whether to write data
+ * as C/C++ code initializers or as TOML.
+ */
+typedef enum UTargetSyntax {
+    UPRV_TARGET_SYNTAX_CCODE = 0,
+    UPRV_TARGET_SYNTAX_TOML = 1,
+} UTargetSyntax;
+
+/**
+ * Creates a source text file and writes a header comment with the ICU copyright.
+ * Writes a C/Java-style comment with the generator name.
+ */
+U_CAPI FILE * U_EXPORT2
+usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator);
+
+/**
+ * Creates a source text file and writes a header comment with the ICU copyright.
+ * Writes the comment with # lines, as used in scripts and text data.
+ */
+U_CAPI FILE * U_EXPORT2
+usrc_createTextData(const char *path, const char *filename, int32_t copyrightYear, const char *generator);
+
+/**
+ * Writes the ICU copyright to a file stream, with configurable year and comment style.
+ */
+U_CAPI void U_EXPORT2
+usrc_writeCopyrightHeader(FILE *f, const char *prefix, int32_t copyrightYear);
+
+/**
+ * Writes information about the file being machine-generated.
+ */
+U_CAPI void U_EXPORT2
+usrc_writeFileNameGeneratedBy(
+        FILE *f,
+        const char *prefix,
+        const char *filename,
+        const char *generator);
+
+/**
+ * Writes the contents of an array of 8/16/32/64-bit words.
+ * The prefix and postfix are optional (can be NULL) and are written first/last.
+ * The prefix may contain a %ld or similar field for the array length.
+ * The {} and declaration etc. need to be included in prefix/postfix or
+ * printed before and after the array contents.
+ */
+U_CAPI void U_EXPORT2
+usrc_writeArray(FILE *f,
+                const char *prefix,
+                const void *p, int32_t width, int32_t length,
+                const char *indent,
+                const char *postfix);
+
+/**
+ * Calls usrc_writeArray() for the index and data arrays of a frozen UTrie2.
+ * Only the index array is written for a 16-bit UTrie2. In this case, dataPrefix
+ * is ignored and can be NULL.
+ */
+U_CAPI void U_EXPORT2
+usrc_writeUTrie2Arrays(FILE *f,
+                       const char *indexPrefix, const char *dataPrefix,
+                       const UTrie2 *pTrie,
+                       const char *postfix);
+
+/**
+ * Writes the UTrie2 struct values.
+ * The {} and declaration etc. need to be included in prefix/postfix or
+ * printed before and after the array contents.
+ */
+U_CAPI void U_EXPORT2
+usrc_writeUTrie2Struct(FILE *f,
+                       const char *prefix,
+                       const UTrie2 *pTrie,
+                       const char *indexName, const char *dataName,
+                       const char *postfix);
+
+/**
+ * Calls usrc_writeArray() for the index and data arrays of a UCPTrie.
+ */
+U_CAPI void U_EXPORT2
+usrc_writeUCPTrieArrays(FILE *f,
+                        const char *indexPrefix, const char *dataPrefix,
+                        const UCPTrie *pTrie,
+                        const char *postfix,
+                        UTargetSyntax syntax);
+
+/**
+ * Writes the UCPTrie struct values.
+ * The {} and declaration etc. need to be included in prefix/postfix or
+ * printed before and after the array contents.
+ */
+U_CAPI void U_EXPORT2
+usrc_writeUCPTrieStruct(FILE *f,
+                        const char *prefix,
+                        const UCPTrie *pTrie,
+                        const char *indexName, const char *dataName,
+                        const char *postfix,
+                        UTargetSyntax syntax);
+
+/**
+ * Writes the UCPTrie arrays and struct values.
+ */
+U_CAPI void U_EXPORT2
+usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie, UTargetSyntax syntax);
+
+/**
+ * Writes the UnicodeSet range and string lists.
+ */
+U_CAPI void U_EXPORT2
+usrc_writeUnicodeSet(
+    FILE *f,
+    const USet *pSet,
+    UTargetSyntax syntax);
+
+#ifdef __cplusplus
+
+U_NAMESPACE_BEGIN
+
+class U_TOOLUTIL_API ValueNameGetter {
+public:
+    virtual ~ValueNameGetter();
+    virtual const char *getName(uint32_t value) = 0;
+};
+
+U_NAMESPACE_END
+
+/**
+ * Writes the UCPMap ranges list.
+ *
+ * The "valueNameGetter" argument is optional; ignored if nullptr.
+ * If present, it will be used to look up value name strings.
+ */
+U_CAPI void U_EXPORT2
+usrc_writeUCPMap(
+    FILE *f,
+    const UCPMap *pMap,
+    icu::ValueNameGetter *valueNameGetter,
+    UTargetSyntax syntax);
+
+#endif  // __cplusplus
+
+/**
+ * Writes the contents of an array of mostly invariant characters.
+ * Characters 0..0x1f are printed as numbers,
+ * others as characters with single quotes: '%c'.
+ *
+ * The prefix and postfix are optional (can be NULL) and are written first/last.
+ * The prefix may contain a %ld or similar field for the array length.
+ * The {} and declaration etc. need to be included in prefix/postfix or
+ * printed before and after the array contents.
+ */
+U_CAPI void U_EXPORT2
+usrc_writeArrayOfMostlyInvChars(FILE *f,
+                                const char *prefix,
+                                const char *p, int32_t length,
+                                const char *postfix);
+
+/**
+ * Writes a syntactically valid Unicode string in all ASCII, escaping quotes
+ * and non-ASCII characters.
+ */
+U_CAPI void U_EXPORT2
+usrc_writeStringAsASCII(FILE *f,
+                        const UChar* ptr, int32_t length,
+                        UTargetSyntax syntax);
+
+#endif
diff --git a/intl/icu/source/tools/toolutil/xmlparser.cpp b/intl/icu/source/tools/toolutil/xmlparser.cpp
new file mode 100644
index 0000000000..edb85bdab0
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/xmlparser.cpp
@@ -0,0 +1,827 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2004-2010, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  xmlparser.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2004jul21
+*   created by: Andy Heninger
+*/
+
+#include <stdio.h>
+#include "unicode/uchar.h"
+#include "unicode/ucnv.h"
+#include "unicode/regex.h"
+#include "filestrm.h"
+#include "xmlparser.h"
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION
+
+// character constants
+enum {
+    x_QUOT=0x22,
+    x_AMP=0x26,
+    x_APOS=0x27,
+    x_LT=0x3c,
+    x_GT=0x3e,
+    x_l=0x6c
+};
+
+#define  XML_SPACES "[ \\u0009\\u000d\\u000a]"
+
+// XML #4
+#define  XML_NAMESTARTCHAR "[[A-Z]:_[a-z][\\u00c0-\\u00d6][\\u00d8-\\u00f6]" \
+                    "[\\u00f8-\\u02ff][\\u0370-\\u037d][\\u037F-\\u1FFF][\\u200C-\\u200D]" \
+                    "[\\u2070-\\u218F][\\u2C00-\\u2FEF][\\u3001-\\uD7FF][\\uF900-\\uFDCF]" \
+                    "[\\uFDF0-\\uFFFD][\\U00010000-\\U000EFFFF]]"
+
+//  XML #5
+#define  XML_NAMECHAR "[" XML_NAMESTARTCHAR "\\-.[0-9]\\u00b7[\\u0300-\\u036f][\\u203f-\\u2040]]"
+
+//  XML #6
+#define  XML_NAME    XML_NAMESTARTCHAR "(?:" XML_NAMECHAR ")*"
+
+U_NAMESPACE_BEGIN
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UXMLParser)
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UXMLElement)
+
+//
+//   UXMLParser constructor.   Mostly just initializes the ICU regexes that are
+//                             used for parsing.
+//
+UXMLParser::UXMLParser(UErrorCode &status) :
+      //  XML Declaration.  XML Production #23.
+      //      example:  "<?xml version=1.0 encoding="utf-16" ?>
+      //      This is a sloppy implementation - just look for the leading <?xml and the closing ?>
+      //            allow for a possible leading BOM.
+      mXMLDecl(UnicodeString("(?s)\\uFEFF?<\\?xml.+?\\?>", -1, US_INV), 0, status),
+      
+      //  XML Comment   production #15
+      //     example:  "<!-- whatever -->
+      //       note, does not detect an illegal "--" within comments
+      mXMLComment(UnicodeString("(?s)<!--.+?-->", -1, US_INV), 0, status),
+      
+      //  XML Spaces
+      //      production [3]
+      mXMLSP(UnicodeString(XML_SPACES "+", -1, US_INV), 0, status),
+      
+      //  XML Doctype decl  production #28
+      //     example   "<!DOCTYPE foo SYSTEM "somewhere" >
+      //       or      "<!DOCTYPE foo [internal dtd]>
+      //    TODO:  we don't actually parse the DOCTYPE or internal subsets.
+      //           Some internal dtd subsets could confuse this simple-minded
+      //           attempt at skipping over them, specifically, occurrences
+      //           of closing square brackets.  These could appear in comments, 
+      //           or in parameter entity declarations, for example.
+      mXMLDoctype(UnicodeString(
+           "(?s)<!DOCTYPE.*?(>|\\[.*?\\].*?>)", -1, US_INV
+           ), 0, status),
+      
+      //  XML PI     production #16
+      //     example   "<?target stuff?>
+      mXMLPI(UnicodeString("(?s)<\\?.+?\\?>", -1, US_INV), 0, status),
+      
+      //  XML Element Start   Productions #40, #41
+      //          example   <foo att1='abc'  att2="d e f" >
+      //      capture #1:  the tag name
+      //
+      mXMLElemStart (UnicodeString("(?s)<(" XML_NAME ")"                                 // match  "<tag_name"
+          "(?:" 
+                XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*"     // match  "ATTR_NAME = "
+                "(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))"        // match  '"attribute value"'
+          ")*"                                                             //   * for zero or more attributes.
+          XML_SPACES "*?>", -1, US_INV), 0, status),                               // match " >"
+      
+      //  XML Element End     production #42
+      //     example   </foo>
+      mXMLElemEnd (UnicodeString("</(" XML_NAME ")" XML_SPACES "*>", -1, US_INV), 0, status),
+      
+      // XML Element Empty    production #44
+      //     example   <foo att1="abc"   att2="d e f" />
+      mXMLElemEmpty (UnicodeString("(?s)<(" XML_NAME ")"                                 // match  "<tag_name"
+          "(?:" 
+                XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*"     // match  "ATTR_NAME = "
+                "(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))"        // match  '"attribute value"'
+          ")*"                                                             //   * for zero or more attributes.
+          XML_SPACES "*?/>", -1, US_INV), 0, status),                              // match " />"
+      
+
+      // XMLCharData.  Everything but '<'.  Note that & will be dealt with later.
+      mXMLCharData(UnicodeString("(?s)[^<]*", -1, US_INV), 0, status),
+
+      // Attribute name = "value".  XML Productions 10, 40/41
+      //  Capture group 1 is name, 
+      //                2 is the attribute value, including the quotes.
+      //
+      //   Note that attributes are scanned twice.  The first time is with
+      //        the regex for an entire element start.  There, the attributes
+      //        are checked syntactically, but not separated out one by one.
+      //        Here, we match a single attribute, and make its name and
+      //        attribute value available to the parser code.
+      mAttrValue(UnicodeString(XML_SPACES "+("  XML_NAME ")"  XML_SPACES "*=" XML_SPACES "*"
+         "((?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))", -1, US_INV), 0, status),
+
+
+      mAttrNormalizer(UnicodeString(XML_SPACES, -1, US_INV), 0, status),
+
+      // Match any of the new-line sequences in content.
+      //   All are changed to \u000a.
+      mNewLineNormalizer(UnicodeString("\\u000d\\u000a|\\u000d\\u0085|\\u000a|\\u000d|\\u0085|\\u2028", -1, US_INV), 0, status),
+
+      // & char references
+      //   We will figure out what we've got based on which capture group has content.
+      //   The last one is a catchall for unrecognized entity references..
+      //             1     2     3      4      5           6                    7          8
+      mAmps(UnicodeString("&(?:(amp;)|(lt;)|(gt;)|(apos;)|(quot;)|#x([0-9A-Fa-f]{1,8});|#([0-9]{1,8});|(.))"),
+                0, status),
+
+      fNames(status),
+      fElementStack(status),
+      fOneLF((char16_t)0x0a)        // Plain new-line string, used in new line normalization.
+      {
+      }
+
+UXMLParser *
+UXMLParser::createParser(UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) {
+        return nullptr;
+    } else {
+        return new UXMLParser(errorCode);
+    }
+}
+
+UXMLParser::~UXMLParser() {}
+
+UXMLElement *
+UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) {
+    char bytes[4096], charsetBuffer[100];
+    FileStream *f;
+    const char *charset, *pb;
+    UnicodeString src;
+    UConverter *cnv;
+    char16_t *buffer, *pu;
+    int32_t fileLength, bytesLength, length, capacity;
+    UBool flush;
+
+    if(U_FAILURE(errorCode)) {
+        return nullptr;
+    }
+
+    f=T_FileStream_open(filename, "rb");
+    if(f==nullptr) {
+        errorCode=U_FILE_ACCESS_ERROR;
+        return nullptr;
+    }
+
+    bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes));
+    if(bytesLength<(int32_t)sizeof(bytes)) {
+        // we have already read the entire file
+        fileLength=bytesLength;
+    } else {
+        // get the file length
+        fileLength=T_FileStream_size(f);
+    }
+
+    /*
+     * get the charset:
+     * 1. Unicode signature
+     * 2. treat as ISO-8859-1 and read XML encoding="charser"
+     * 3. default to UTF-8
+     */
+    charset=ucnv_detectUnicodeSignature(bytes, bytesLength, nullptr, &errorCode);
+    if(U_SUCCESS(errorCode) && charset!=nullptr) {
+        // open converter according to Unicode signature
+        cnv=ucnv_open(charset, &errorCode);
+    } else {
+        // read as Latin-1 and parse the XML declaration and encoding
+        cnv=ucnv_open("ISO-8859-1", &errorCode);
+        if(U_FAILURE(errorCode)) {
+            // unexpected error opening Latin-1 converter
+            goto exit;
+        }
+
+        buffer=toUCharPtr(src.getBuffer(bytesLength));
+        if(buffer==nullptr) {
+            // unexpected failure to reserve some string capacity
+            errorCode=U_MEMORY_ALLOCATION_ERROR;
+            goto exit;
+        }
+        pb=bytes;
+        pu=buffer;
+        ucnv_toUnicode(
+            cnv,
+            &pu, buffer+src.getCapacity(),
+            &pb, bytes+bytesLength,
+            nullptr, true, &errorCode);
+        src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0);
+        ucnv_close(cnv);
+        cnv=nullptr;
+        if(U_FAILURE(errorCode)) {
+            // unexpected error in conversion from Latin-1
+            src.remove();
+            goto exit;
+        }
+
+        // parse XML declaration
+        if(mXMLDecl.reset(src).lookingAt(0, errorCode)) {
+            int32_t declEnd=mXMLDecl.end(errorCode);
+            // go beyond <?xml
+            int32_t pos=src.indexOf((char16_t)x_l)+1;
+
+            mAttrValue.reset(src);
+            while(pos<declEnd && mAttrValue.lookingAt(pos, errorCode)) {  // loop runs once per attribute on this element.
+                UnicodeString attName  = mAttrValue.group(1, errorCode);
+                UnicodeString attValue = mAttrValue.group(2, errorCode);
+
+                // Trim the quotes from the att value.  These are left over from the original regex
+                //   that parsed the attribute, which couldn't conveniently strip them.
+                attValue.remove(0,1);                    // one char from the beginning
+                attValue.truncate(attValue.length()-1);  // and one from the end.
+
+                if(attName==UNICODE_STRING("encoding", 8)) {
+                    length=attValue.extract(0, 0x7fffffff, charsetBuffer, (int32_t)sizeof(charsetBuffer));
+                    charset=charsetBuffer;
+                    break;
+                }
+                pos = mAttrValue.end(2, errorCode);
+            }
+
+            if(charset==nullptr) {
+                // default to UTF-8
+                charset="UTF-8";
+            }
+            cnv=ucnv_open(charset, &errorCode);
+        }
+    }
+
+    if(U_FAILURE(errorCode)) {
+        // unable to open the converter
+        goto exit;
+    }
+
+    // convert the file contents
+    capacity=fileLength;        // estimated capacity
+    src.getBuffer(capacity);
+    src.releaseBuffer(0);       // zero length
+    flush=false;
+    for(;;) {
+        // convert contents of bytes[bytesLength]
+        pb=bytes;
+        for(;;) {
+            length=src.length();
+            buffer=toUCharPtr(src.getBuffer(capacity));
+            if(buffer==nullptr) {
+                // unexpected failure to reserve some string capacity
+                errorCode=U_MEMORY_ALLOCATION_ERROR;
+                goto exit;
+            }
+
+            pu=buffer+length;
+            ucnv_toUnicode(
+                cnv, &pu, buffer+src.getCapacity(),
+                &pb, bytes+bytesLength,
+                nullptr, false, &errorCode);
+            src.releaseBuffer(U_SUCCESS(errorCode) ? (int32_t)(pu-buffer) : 0);
+            if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+                errorCode=U_ZERO_ERROR;
+                capacity=(3*src.getCapacity())/2; // increase capacity by 50%
+            } else {
+                break;
+            }
+        }
+
+        if(U_FAILURE(errorCode)) {
+            break; // conversion error
+        }
+
+        if(flush) {
+            break; // completely converted the file
+        }
+
+        // read next block
+        bytesLength=T_FileStream_read(f, bytes, (int32_t)sizeof(bytes));
+        if(bytesLength==0) {
+            // reached end of file, convert once more to flush the converter
+            flush=true;
+        }
+    }
+
+exit:
+    ucnv_close(cnv);
+    T_FileStream_close(f);
+
+    if(U_SUCCESS(errorCode)) {
+        return parse(src, errorCode);
+    } else {
+        return nullptr;
+    }
+}
+
+UXMLElement *
+UXMLParser::parse(const UnicodeString &src, UErrorCode &status) {
+    if(U_FAILURE(status)) {
+        return nullptr;
+    }
+
+    UXMLElement   *root = nullptr;
+    fPos = 0; // TODO use just a local pos variable and pass it into functions
+              // where necessary?
+
+    // set all matchers to work on the input string
+    mXMLDecl.reset(src);
+    mXMLComment.reset(src);
+    mXMLSP.reset(src);
+    mXMLDoctype.reset(src);
+    mXMLPI.reset(src);
+    mXMLElemStart.reset(src);
+    mXMLElemEnd.reset(src);
+    mXMLElemEmpty.reset(src);
+    mXMLCharData.reset(src);
+    mAttrValue.reset(src);
+    mAttrNormalizer.reset(src);
+    mNewLineNormalizer.reset(src);
+    mAmps.reset(src);
+
+    // Consume the XML Declaration, if present.
+    if (mXMLDecl.lookingAt(fPos, status)) {
+        fPos = mXMLDecl.end(status);
+    }
+
+    // Consume "misc" [XML production 27] appearing before DocType
+    parseMisc(status);
+
+    // Consume a DocType declaration, if present.
+    if (mXMLDoctype.lookingAt(fPos, status)) {
+        fPos = mXMLDoctype.end(status);
+    }
+
+    // Consume additional "misc" [XML production 27] appearing after the DocType
+    parseMisc(status);
+
+    // Get the root element
+    if (mXMLElemEmpty.lookingAt(fPos, status)) {
+        // Root is an empty element (no nested elements or content)
+        root = createElement(mXMLElemEmpty, status);
+        fPos = mXMLElemEmpty.end(status);
+    } else {
+        if (mXMLElemStart.lookingAt(fPos, status) == false) {
+            error("Root Element expected", status);
+            goto errorExit;
+        }
+        root = createElement(mXMLElemStart, status);
+        UXMLElement  *el = root;
+
+        //
+        // This is the loop that consumes the root element of the document,
+        //      including all nested content.   Nested elements are handled by
+        //      explicit pushes/pops of the element stack; there is no recursion
+        //      in the control flow of this code.
+        //      "el" always refers to the current element, the one to which content
+        //      is being added.  It is above the top of the element stack.
+        for (;;) {
+            // Nested Element Start
+            if (mXMLElemStart.lookingAt(fPos, status)) {
+                UXMLElement *t = createElement(mXMLElemStart, status);
+                el->fChildren.addElement(t, status);
+                t->fParent = el;
+                fElementStack.push(el, status);
+                el = t;
+                continue;
+            }
+
+            // Text Content.  String is concatenated onto the current node's content,
+            //                but only if it contains something other than spaces.
+            UnicodeString s = scanContent(status);
+            if (s.length() > 0) {
+                mXMLSP.reset(s);
+                if (mXMLSP.matches(status) == false) {
+                    // This chunk of text contains something other than just
+                    //  white space. Make a child node for it.
+                    replaceCharRefs(s, status);
+                    el->fChildren.addElement(s.clone(), status);
+                }
+                mXMLSP.reset(src);    // The matchers need to stay set to the main input string.
+                continue;
+            }
+
+            // Comments.  Discard.
+            if (mXMLComment.lookingAt(fPos, status)) {
+                fPos = mXMLComment.end(status);
+                continue;
+            }
+
+            // PIs.  Discard.
+            if (mXMLPI.lookingAt(fPos, status)) {
+                fPos = mXMLPI.end(status);
+                continue;
+            }
+
+            // Element End
+            if (mXMLElemEnd.lookingAt(fPos, status)) {
+                fPos = mXMLElemEnd.end(0, status);
+                const UnicodeString name = mXMLElemEnd.group(1, status);
+                if (name != *el->fName) {
+                    error("Element start / end tag mismatch", status);
+                    goto errorExit;
+                }
+                if (fElementStack.empty()) {
+                    // Close of the root element.  We're done with the doc.
+                    el = nullptr;
+                    break;
+                }
+                el = (UXMLElement *)fElementStack.pop();
+                continue;
+            }
+
+            // Empty Element.  Stored as a child of the current element, but not stacked.
+            if (mXMLElemEmpty.lookingAt(fPos, status)) {
+                UXMLElement *t = createElement(mXMLElemEmpty, status);
+                el->fChildren.addElement(t, status);
+                continue;
+            }
+
+            // Hit something within the document that doesn't match anything.
+            //   It's an error.
+            error("Unrecognized markup", status);
+            break;
+        }
+
+        if (el != nullptr || !fElementStack.empty()) {
+            // We bailed out early, for some reason.
+            error("Root element not closed.", status);
+            goto errorExit;
+        }
+    }
+
+    // Root Element parse is complete.
+    // Consume the annoying xml "Misc" that can appear at the end of the doc.
+    parseMisc(status);
+
+    // We should have reached the end of the input
+    if (fPos != src.length()) {
+        error("Extra content at the end of the document", status);
+        goto errorExit;
+    }
+
+    // Success!
+    return root;
+
+errorExit:
+    delete root;
+    return nullptr;
+}
+
+//
+//  createElement
+//      We've just matched an element start tag.  Create and fill in a UXMLElement object
+//      for it.
+//
+UXMLElement *
+UXMLParser::createElement(RegexMatcher  &mEl, UErrorCode &status) {
+    // First capture group is the element's name.
+    UXMLElement *el = new UXMLElement(this, intern(mEl.group(1, status), status), status);
+
+    // Scan for attributes.
+    int32_t   pos = mEl.end(1, status);  // The position after the end of the tag name
+
+    while (mAttrValue.lookingAt(pos, status)) {  // loop runs once per attribute on this element.
+        UnicodeString attName  = mAttrValue.group(1, status);
+        UnicodeString attValue = mAttrValue.group(2, status);
+
+        // Trim the quotes from the att value.  These are left over from the original regex
+        //   that parsed the attribute, which couldn't conveniently strip them.
+        attValue.remove(0,1);                    // one char from the beginning
+        attValue.truncate(attValue.length()-1);  // and one from the end.
+        
+        // XML Attribute value normalization. 
+        // This is one of the really screwy parts of the XML spec.
+        // See http://www.w3.org/TR/2004/REC-xml11-20040204/#AVNormalize
+        // Note that non-validating parsers must treat all entities as type CDATA
+        //   which simplifies things some.
+
+        // Att normalization step 1:  normalize any newlines in the attribute value
+        mNewLineNormalizer.reset(attValue);
+        attValue = mNewLineNormalizer.replaceAll(fOneLF, status);
+
+        // Next change all xml white space chars to plain \u0020 spaces.
+        mAttrNormalizer.reset(attValue);
+        UnicodeString oneSpace((char16_t)0x0020);
+        attValue = mAttrNormalizer.replaceAll(oneSpace, status);
+
+        // Replace character entities.
+        replaceCharRefs(attValue, status);
+
+        // Save the attribute name and value in our document structure.
+        el->fAttNames.addElement((void *)intern(attName, status), status);
+        el->fAttValues.addElement(attValue.clone(), status);
+        pos = mAttrValue.end(2, status);
+    }
+    fPos = mEl.end(0, status);
+    return el;
+}
+
+//
+//  parseMisc
+//     Consume XML "Misc" [production #27]
+//        which is any combination of space, PI and comments
+//      Need to watch end-of-input because xml MISC stuff is allowed after
+//        the document element, so we WILL scan off the end in this function
+//
+void
+UXMLParser::parseMisc(UErrorCode &status)  {
+    for (;;) {
+        if (fPos >= mXMLPI.input().length()) {
+            break;
+        }
+        if (mXMLPI.lookingAt(fPos, status)) {
+            fPos = mXMLPI.end(status);
+            continue;
+        }
+        if (mXMLSP.lookingAt(fPos, status)) {
+            fPos = mXMLSP.end(status);
+            continue;
+        }
+        if (mXMLComment.lookingAt(fPos, status)) {
+            fPos = mXMLComment.end(status);
+            continue;
+        }
+        break;
+    }
+}
+
+//
+//  Scan for document content.
+//
+UnicodeString
+UXMLParser::scanContent(UErrorCode &status) {
+    UnicodeString  result;
+    if (mXMLCharData.lookingAt(fPos, status)) {
+        result = mXMLCharData.group((int32_t)0, status);
+        // Normalize the new-lines.  (Before char ref substitution)
+        mNewLineNormalizer.reset(result);
+        result = mNewLineNormalizer.replaceAll(fOneLF, status);
+        
+        // TODO:  handle CDATA
+        fPos = mXMLCharData.end(0, status);
+    }
+
+    return result;
+}
+
+//
+//   replaceCharRefs
+//
+//      replace the char entities &lt;  &amp; &#123; &#x12ab; etc. in a string
+//       with the corresponding actual character.
+//
+void
+UXMLParser::replaceCharRefs(UnicodeString &s, UErrorCode &status) {
+    UnicodeString result;
+    UnicodeString replacement;
+    int     i;
+
+    mAmps.reset(s);
+    // See the initialization for the regex matcher mAmps.
+    //    Which entity we've matched is determined by which capture group has content,
+    //      which is flagged by start() of that group not being -1.
+    while (mAmps.find()) {
+        if (mAmps.start(1, status) != -1) {
+            replacement.setTo((char16_t)x_AMP);
+        } else if (mAmps.start(2, status) != -1) {
+            replacement.setTo((char16_t)x_LT);
+        } else if (mAmps.start(3, status) != -1) {
+            replacement.setTo((char16_t)x_GT);
+        } else if (mAmps.start(4, status) != -1) {
+            replacement.setTo((char16_t)x_APOS);
+        } else if (mAmps.start(5, status) != -1) {
+            replacement.setTo((char16_t)x_QUOT);
+        } else if (mAmps.start(6, status) != -1) {
+            UnicodeString hexString = mAmps.group(6, status);
+            UChar32 val = 0;
+            for (i=0; i<hexString.length(); i++) {
+                val = (val << 4) + u_digit(hexString.charAt(i), 16);
+            }
+            // TODO:  some verification that the character is valid
+            replacement.setTo(val);
+        } else if (mAmps.start(7, status) != -1) {
+            UnicodeString decimalString = mAmps.group(7, status);
+            UChar32 val = 0;
+            for (i=0; i<decimalString.length(); i++) {
+                val = val*10 + u_digit(decimalString.charAt(i), 10);
+            }
+            // TODO:  some verification that the character is valid
+            replacement.setTo(val);
+        } else {
+            // An unrecognized &entity;  Leave it alone.
+            //  TODO:  check that it really looks like an entity, and is not some
+            //         random & in the text.
+            replacement = mAmps.group((int32_t)0, status);
+        }
+        mAmps.appendReplacement(result, replacement, status);
+    }
+    mAmps.appendTail(result);
+    s = result;
+}
+
+void
+UXMLParser::error(const char *message, UErrorCode &status) {
+    // TODO:  something better here...
+    const UnicodeString &src=mXMLDecl.input();
+    int  line = 0;
+    int  ci = 0;
+    while (ci < fPos && ci>=0) {
+        ci = src.indexOf((char16_t)0x0a, ci+1);
+        line++;
+    }
+    fprintf(stderr, "Error: %s at line %d\n", message, line);
+    if (U_SUCCESS(status)) {
+        status = U_PARSE_ERROR;
+    }
+}
+
+// intern strings like in Java
+
+const UnicodeString *
+UXMLParser::intern(const UnicodeString &s, UErrorCode &errorCode) {
+    const UHashElement *he=fNames.find(s);
+    if(he!=nullptr) {
+        // already a known name, return its hashed key pointer
+        return (const UnicodeString *)he->key.pointer;
+    } else {
+        // add this new name and return its hashed key pointer
+        fNames.puti(s, 1, errorCode);
+        he=fNames.find(s);
+        return (const UnicodeString *)he->key.pointer;
+    }
+}
+
+const UnicodeString *
+UXMLParser::findName(const UnicodeString &s) const {
+    const UHashElement *he=fNames.find(s);
+    if(he!=nullptr) {
+        // a known name, return its hashed key pointer
+        return (const UnicodeString *)he->key.pointer;
+    } else {
+        // unknown name
+        return nullptr;
+    }
+}
+
+// UXMLElement ------------------------------------------------------------- ***
+
+UXMLElement::UXMLElement(const UXMLParser *parser, const UnicodeString *name, UErrorCode &errorCode) :
+   fParser(parser),
+   fName(name),
+   fAttNames(errorCode),
+   fAttValues(errorCode),
+   fChildren(errorCode),
+   fParent(nullptr)
+{
+}
+
+UXMLElement::~UXMLElement() {
+    int   i;
+    // attribute names are owned by the UXMLParser, don't delete them here
+    for (i=fAttValues.size()-1; i>=0; i--) {
+        delete (UObject *)fAttValues.elementAt(i);
+    }
+    for (i=fChildren.size()-1; i>=0; i--) {
+        delete (UObject *)fChildren.elementAt(i);
+    }
+}
+
+const UnicodeString &
+UXMLElement::getTagName() const {
+    return *fName;
+}
+
+UnicodeString
+UXMLElement::getText(UBool recurse) const {
+    UnicodeString text;
+    appendText(text, recurse);
+    return text;
+}
+
+void
+UXMLElement::appendText(UnicodeString &text, UBool recurse) const {
+    const UObject *node;
+    int32_t i, count=fChildren.size();
+    for(i=0; i<count; ++i) {
+        node=(const UObject *)fChildren.elementAt(i);
+        const UnicodeString *s=dynamic_cast<const UnicodeString *>(node);
+        if(s!=nullptr) {
+            text.append(*s);
+        } else if(recurse) /* must be a UXMLElement */ {
+            ((const UXMLElement *)node)->appendText(text, recurse);
+        }
+    }
+}
+
+int32_t
+UXMLElement::countAttributes() const {
+    return fAttNames.size();
+}
+
+const UnicodeString *
+UXMLElement::getAttribute(int32_t i, UnicodeString &name, UnicodeString &value) const {
+    if(0<=i && i<fAttNames.size()) {
+        name.setTo(*(const UnicodeString *)fAttNames.elementAt(i));
+        value.setTo(*(const UnicodeString *)fAttValues.elementAt(i));
+        return &value; // or return (UnicodeString *)fAttValues.elementAt(i);
+    } else {
+        return nullptr;
+    }
+}
+
+const UnicodeString *
+UXMLElement::getAttribute(const UnicodeString &name) const {
+    // search for the attribute name by comparing the interned pointer,
+    // not the string contents
+    const UnicodeString *p=fParser->findName(name);
+    if(p==nullptr) {
+        return nullptr; // no such attribute seen by the parser at all
+    }
+
+    int32_t i, count=fAttNames.size();
+    for(i=0; i<count; ++i) {
+        if(p==(const UnicodeString *)fAttNames.elementAt(i)) {
+            return (const UnicodeString *)fAttValues.elementAt(i);
+        }
+    }
+    return nullptr;
+}
+
+int32_t
+UXMLElement::countChildren() const {
+    return fChildren.size();
+}
+
+const UObject *
+UXMLElement::getChild(int32_t i, UXMLNodeType &type) const {
+    if(0<=i && i<fChildren.size()) {
+        const UObject *node=(const UObject *)fChildren.elementAt(i);
+        if(dynamic_cast<const UXMLElement *>(node)!=nullptr) {
+            type=UXML_NODE_TYPE_ELEMENT;
+        } else {
+            type=UXML_NODE_TYPE_STRING;
+        }
+        return node;
+    } else {
+        return nullptr;
+    }
+}
+
+const UXMLElement *
+UXMLElement::nextChildElement(int32_t &i) const {
+    if(i<0) {
+        return nullptr;
+    }
+
+    const UObject *node;
+    int32_t count=fChildren.size();
+    while(i<count) {
+        node=(const UObject *)fChildren.elementAt(i++);
+        const UXMLElement *elem=dynamic_cast<const UXMLElement *>(node);
+        if(elem!=nullptr) {
+            return elem;
+        }
+    }
+    return nullptr;
+}
+
+const UXMLElement *
+UXMLElement::getChildElement(const UnicodeString &name) const {
+    // search for the element name by comparing the interned pointer,
+    // not the string contents
+    const UnicodeString *p=fParser->findName(name);
+    if(p==nullptr) {
+        return nullptr; // no such element seen by the parser at all
+    }
+
+    const UObject *node;
+    int32_t i, count=fChildren.size();
+    for(i=0; i<count; ++i) {
+        node=(const UObject *)fChildren.elementAt(i);
+        const UXMLElement *elem=dynamic_cast<const UXMLElement *>(node);
+        if(elem!=nullptr) {
+            if(p==elem->fName) {
+                return elem;
+            }
+        }
+    }
+    return nullptr;
+}
+
+U_NAMESPACE_END
+
+#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
+
diff --git a/intl/icu/source/tools/toolutil/xmlparser.h b/intl/icu/source/tools/toolutil/xmlparser.h
new file mode 100644
index 0000000000..d0dcd9a48a
--- /dev/null
+++ b/intl/icu/source/tools/toolutil/xmlparser.h
@@ -0,0 +1,247 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2004-2005, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  xmlparser.h
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2004jul21
+*   created by: Andy Heninger
+*
+* Tiny XML parser using ICU and intended for use in ICU tests and in build tools.
+* Not suitable for production use. Not supported.
+* Not conformant. Not efficient.
+* But very small.
+*/
+
+#ifndef __XMLPARSER_H__
+#define __XMLPARSER_H__
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/regex.h"
+#include "uvector.h"
+#include "hash.h"
+
+#if !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_CONVERSION
+
+enum UXMLNodeType {
+    /** Node type string (text contents), stored as a UnicodeString. */
+    UXML_NODE_TYPE_STRING,
+    /** Node type element, stored as a UXMLElement. */
+    UXML_NODE_TYPE_ELEMENT,
+    UXML_NODE_TYPE_COUNT
+};
+
+U_NAMESPACE_BEGIN
+
+class UXMLParser;
+
+/**
+ * This class represents an element node in a parsed XML tree.
+ */
+class U_TOOLUTIL_API UXMLElement : public UObject {
+public:
+    /**
+     * Destructor.
+     */
+    virtual ~UXMLElement();
+
+    /**
+     * Get the tag name of this element.
+     */
+    const UnicodeString &getTagName() const;
+    /**
+     * Get the text contents of the element.
+     * Append the contents of all text child nodes.
+     * @param recurse If true, also recursively appends the contents of all
+     *        text child nodes of element children.
+     * @return The text contents.
+     */
+    UnicodeString getText(UBool recurse) const;
+    /**
+     * Get the number of attributes.
+     */
+    int32_t countAttributes() const;
+    /**
+     * Get the i-th attribute.
+     * @param i Index of the attribute.
+     * @param name Output parameter, receives the attribute name.
+     * @param value Output parameter, receives the attribute value.
+     * @return A pointer to the attribute value (may be &value or a pointer to an
+     *         internal string object), or nullptr if i is out of bounds.
+     */
+    const UnicodeString *getAttribute(int32_t i, UnicodeString &name, UnicodeString &value) const;
+    /**
+     * Get the value of the attribute with the given name.
+     * @param name Attribute name to be looked up.
+     * @return A pointer to the attribute value, or nullptr if this element
+     * does not have this attribute.
+     */
+    const UnicodeString *getAttribute(const UnicodeString &name) const;
+    /**
+     * Get the number of child nodes.
+     */
+    int32_t countChildren() const;
+    /**
+     * Get the i-th child node.
+     * @param i Index of the child node.
+     * @param type The child node type.
+     * @return A pointer to the child node object, or nullptr if i is out of bounds.
+     */
+    const UObject *getChild(int32_t i, UXMLNodeType &type) const;
+    /**
+     * Get the next child element node, skipping non-element child nodes.
+     * @param i Enumeration index; initialize to 0 before getting the first child element.
+     * @return A pointer to the next child element, or nullptr if there is none.
+     */
+    const UXMLElement *nextChildElement(int32_t &i) const;
+    /**
+     * Get the immediate child element with the given name.
+     * If there are multiple child elements with this name, then return
+     * the first one.
+     * @param name Element name to be looked up.
+     * @return A pointer to the element node, or nullptr if this element
+     * does not have this immediate child element.
+     */
+    const UXMLElement *getChildElement(const UnicodeString &name) const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     */
+    virtual UClassID getDynamicClassID() const override;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+private:
+    // prevent default construction etc.
+    UXMLElement();
+    UXMLElement(const UXMLElement &other);
+    UXMLElement &operator=(const UXMLElement &other);
+
+    void appendText(UnicodeString &text, UBool recurse) const;
+
+    friend class UXMLParser;
+
+    UXMLElement(const UXMLParser *parser, const UnicodeString *name, UErrorCode &errorCode);
+
+    const UXMLParser *fParser;
+    const UnicodeString *fName;          // The tag name of this element (owned by the UXMLParser)
+    UnicodeString       fContent;        // The text content of this node.  All element content is 
+                                         //   concatenated even when there are intervening nested elements
+                                         //   (which doesn't happen with most xml files we care about)
+                                         //   Sections of content containing only white space are dropped,
+                                         //   which gets rid  the bogus white space content from
+                                         //   elements which are primarily containers for nested elements.
+    UVector             fAttNames;       // A vector containing the names of this element's attributes
+                                         //    The names are UnicodeString objects, owned by the UXMLParser.
+    UVector             fAttValues;      // A vector containing the attribute values for
+                                         //    this element's attributes.  The order is the same
+                                         //    as that of the attribute name vector.
+
+    UVector             fChildren;       // The child nodes of this element (a Vector)
+
+    UXMLElement        *fParent;         // A pointer to the parent element of this element.
+};
+
+/**
+ * A simple XML parser; it is neither efficient nor conformant and only useful for
+ * restricted types of XML documents.
+ *
+ * The parse methods parse whole documents and return the parse trees via their
+ * root elements.
+ */
+class U_TOOLUTIL_API UXMLParser : public UObject {
+public:
+    /**
+     * Create an XML parser.
+     */
+    static UXMLParser *createParser(UErrorCode &errorCode);
+    /**
+     * Destructor.
+     */
+    virtual ~UXMLParser();
+
+    /**
+     * Parse an XML document, create the entire document tree, and
+     * return a pointer to the root element of the parsed tree.
+     * The caller must delete the element.
+     */
+    UXMLElement *parse(const UnicodeString &src, UErrorCode &errorCode);
+    /**
+     * Parse an XML file, create the entire document tree, and
+     * return a pointer to the root element of the parsed tree.
+     * The caller must delete the element.
+     */
+    UXMLElement *parseFile(const char *filename, UErrorCode &errorCode);
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     */
+    virtual UClassID getDynamicClassID() const override;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     */
+    static UClassID U_EXPORT2 getStaticClassID();
+
+private:
+    // prevent default construction etc.
+    UXMLParser();
+    UXMLParser(const UXMLParser &other);
+    UXMLParser &operator=(const UXMLParser &other);
+
+    // constructor
+    UXMLParser(UErrorCode &status);
+
+    void           parseMisc(UErrorCode &status);
+    UXMLElement   *createElement(RegexMatcher &mEl, UErrorCode &status);
+    void           error(const char *message, UErrorCode &status);
+    UnicodeString  scanContent(UErrorCode &status);
+    void           replaceCharRefs(UnicodeString &s, UErrorCode &status);
+
+    const UnicodeString *intern(const UnicodeString &s, UErrorCode &errorCode);
+public:
+    // public for UXMLElement only
+    const UnicodeString *findName(const UnicodeString &s) const;
+private:
+
+    // There is one ICU regex matcher for each of the major XML syntax items
+    //  that are recognized.
+    RegexMatcher mXMLDecl;
+    RegexMatcher mXMLComment;
+    RegexMatcher mXMLSP;
+    RegexMatcher mXMLDoctype;
+    RegexMatcher mXMLPI;
+    RegexMatcher mXMLElemStart;
+    RegexMatcher mXMLElemEnd;
+    RegexMatcher mXMLElemEmpty;
+    RegexMatcher mXMLCharData;
+    RegexMatcher mAttrValue;
+    RegexMatcher mAttrNormalizer;
+    RegexMatcher mNewLineNormalizer;
+    RegexMatcher mAmps;
+
+    Hashtable             fNames;           // interned element/attribute name strings
+    UStack                fElementStack;    // Stack holds the parent elements when nested
+                                            //    elements are being parsed.  All items on this
+                                            //    stack are of type UXMLElement.
+    int32_t               fPos;             // String index of the current scan position in
+                                            //    xml source (in fSrc).
+    UnicodeString         fOneLF;
+};
+
+U_NAMESPACE_END
+#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
+
+#endif
-- 
cgit v1.2.3