Adding upstream version 115.7.0esr.upstream/115.7.0esr upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 19:33:14 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-07 19:33:14 +0000
commit: 36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree: 105e8c98ddea1c1e4784a60a5a6410fa416be2de /intl/icu/source/tools/icuexportdata
parent: Initial commit. (diff)
download: firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
6 files changed, 1776 insertions, 0 deletions
diff --git a/intl/icu/source/tools/icuexportdata/Makefile.in b/intl/icu/source/tools/icuexportdata/Makefile.in
new file mode 100644
index 0000000000..6899d74d1b
--- /dev/null
+++ b/intl/icu/source/tools/icuexportdata/Makefile.in
@@ -0,0 +1,94 @@
+## Makefile.in for ICU - tools/icuexportdata
+## Copyright (C) 2021 and later: Unicode, Inc. and others.
+## License & terms of use: http://www.unicode.org/copyright.html
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../..
+
+include $(top_builddir)/icudefs.mk
+
+## Build directory information
+subdir = tools/icuexportdata
+
+TARGET_STUB_NAME = icuexportdata
+
+SECTION = 1
+
+MAN_FILES = $(TARGET_STUB_NAME).$(SECTION)
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(DEPS) $(MAN_FILES)
+
+## Target information
+TARGET = $(BINDIR)/$(TARGET_STUB_NAME)$(EXEEXT)
+
+CPPFLAGS += -I$(srcdir) -I$(top_srcdir)/common -I$(srcdir)/../toolutil
+LIBS = $(LIBICUTOOLUTIL) $(LIBICUI18N) $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
+
+SOURCES = $(shell cat $(srcdir)/sources.txt)
+OBJECTS = $(patsubst %.cpp,%.o,$(patsubst %.c,%.o, $(SOURCES)))
+
+DEPS = $(OBJECTS:.o=.d)
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local	\
+distclean distclean-local dist dist-local check check-local install-man
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(TARGET) $(MAN_FILES)
+
+install-local: all-local install-man
+	$(MKINSTALLDIRS) $(DESTDIR)$(bindir)
+	$(INSTALL) $(TARGET) $(DESTDIR)$(bindir)
+
+install-man: $(MAN_FILES)
+	$(MKINSTALLDIRS) $(DESTDIR)$(mandir)/man$(SECTION)
+	$(INSTALL_DATA) $? $(DESTDIR)$(mandir)/man$(SECTION)
+
+
+dist-local:
+
+clean-local:
+	test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+	$(RMV) $(TARGET) $(OBJECTS)
+
+distclean-local: clean-local
+	$(RMV) Makefile
+
+check-local: all-local
+
+Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) \
+	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(TARGET) : $(OBJECTS)
+	$(LINK.cc) $(OUTOPT)$@ $^ $(LIBS)
+	$(POST_BUILD_STEP)
+
+
+%.$(SECTION): $(srcdir)/%.$(SECTION).in
+	cd $(top_builddir) \
+	 && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
+
diff --git a/intl/icu/source/tools/icuexportdata/icuexportdata.1.in b/intl/icu/source/tools/icuexportdata/icuexportdata.1.in
new file mode 100644
index 0000000000..71243ef053
--- /dev/null
+++ b/intl/icu/source/tools/icuexportdata/icuexportdata.1.in
@@ -0,0 +1,13 @@
+.\" Hey, Emacs! This is -*-nroff-*- you know...
+.\"
+.\" icuexportdata.1: manual page for the icuexportdata utility
+.\"
+.\" Copyright (C) 2016 and later: Unicode, Inc. and others.
+.\" License & terms of use: http://www.unicode.org/copyright.html
+.\"
+.\" Manual page by Shane Carr <shane@unicode.org>.
+.\"
+.TH MAKECONV 1 "12 June 2021" "ICU MANPAGE" "ICU @VERSION@ Manual"
+.SH NAME
+.B icuexportdata
+\- Writes text files with Unicode properties data from ICU.
diff --git a/intl/icu/source/tools/icuexportdata/icuexportdata.cpp b/intl/icu/source/tools/icuexportdata/icuexportdata.cpp
new file mode 100644
index 0000000000..a286040eef
--- /dev/null
+++ b/intl/icu/source/tools/icuexportdata/icuexportdata.cpp
@@ -0,0 +1,1566 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <iostream>
+#include <unicode/localpointer.h>
+#include <unicode/umachine.h>
+#include <unicode/unistr.h>
+#include <unicode/urename.h>
+#include <unicode/uset.h>
+#include <vector>
+#include <algorithm>
+#include "toolutil.h"
+#include "uoptions.h"
+#include "cmemory.h"
+#include "charstr.h"
+#include "cstring.h"
+#include "unicode/uchar.h"
+#include "unicode/errorcode.h"
+#include "unicode/uniset.h"
+#include "unicode/uscript.h"
+#include "unicode/putil.h"
+#include "unicode/umutablecptrie.h"
+#include "unicode/ucharstriebuilder.h"
+#include "ucase.h"
+#include "unicode/normalizer2.h"
+#include "normalizer2impl.h"
+#include "writesrc.h"
+
+U_NAMESPACE_USE
+
+/*
+ * Global - verbosity
+ */
+UBool VERBOSE = false;
+UBool QUIET = false;
+
+UBool haveCopyright = true;
+UCPTrieType trieType = UCPTRIE_TYPE_SMALL;
+const char* destdir = "";
+
+// Mask constants for modified values in the Script CodePointTrie, values are logically 12-bits.
+int16_t DATAEXPORT_SCRIPT_X_WITH_COMMON    = 0x0400;
+int16_t DATAEXPORT_SCRIPT_X_WITH_INHERITED = 0x0800;
+int16_t DATAEXPORT_SCRIPT_X_WITH_OTHER     = 0x0c00;
+
+// TODO(ICU-21821): Replace this with a call to a library function
+int32_t scxCodePoints[] = {
+      7415, 7377, 7380, 7387, 7390, 7391, 7394, 7395, 7396, 7397,
+      7398, 7399, 7400, 7403, 7404, 7406, 7407, 7408, 7409, 113824,
+      113825, 113826, 113827, 834, 837, 7616, 7617, 12294, 12350, 12351,
+      12688, 12689, 12690, 12691, 12692, 12693, 12694, 12695, 12696, 12697,
+      12698, 12699, 12700, 12701, 12702, 12703, 12736, 12737, 12738, 12739,
+      12740, 12741, 12742, 12743, 12744, 12745, 12746, 12747, 12748, 12749,
+      12750, 12751, 12752, 12753, 12754, 12755, 12756, 12757, 12758, 12759,
+      12760, 12761, 12762, 12763, 12764, 12765, 12766, 12767, 12768, 12769,
+      12770, 12771, 12832, 12833, 12834, 12835, 12836, 12837, 12838, 12839,
+      12840, 12841, 12842, 12843, 12844, 12845, 12846, 12847, 12848, 12849,
+      12850, 12851, 12852, 12853, 12854, 12855, 12856, 12857, 12858, 12859,
+      12860, 12861, 12862, 12863, 12864, 12865, 12866, 12867, 12868, 12869,
+      12870, 12871, 12928, 12929, 12930, 12931, 12932, 12933, 12934, 12935,
+      12936, 12937, 12938, 12939, 12940, 12941, 12942, 12943, 12944, 12945,
+      12946, 12947, 12948, 12949, 12950, 12951, 12952, 12953, 12954, 12955,
+      12956, 12957, 12958, 12959, 12960, 12961, 12962, 12963, 12964, 12965,
+      12966, 12967, 12968, 12969, 12970, 12971, 12972, 12973, 12974, 12975,
+      12976, 12992, 12993, 12994, 12995, 12996, 12997, 12998, 12999, 13000,
+      13001, 13002, 13003, 13055, 13144, 13145, 13146, 13147, 13148, 13149,
+      13150, 13151, 13152, 13153, 13154, 13155, 13156, 13157, 13158, 13159,
+      13160, 13161, 13162, 13163, 13164, 13165, 13166, 13167, 13168, 13179,
+      13180, 13181, 13182, 13183, 13280, 13281, 13282, 13283, 13284, 13285,
+      13286, 13287, 13288, 13289, 13290, 13291, 13292, 13293, 13294, 13295,
+      13296, 13297, 13298, 13299, 13300, 13301, 13302, 13303, 13304, 13305,
+      13306, 13307, 13308, 13309, 13310, 119648, 119649, 119650, 119651, 119652,
+      119653, 119654, 119655, 119656, 119657, 119658, 119659, 119660, 119661, 119662,
+      119663, 119664, 119665, 127568, 127569, 867, 868, 869, 870, 871,
+      872, 873, 874, 875, 876, 877, 878, 879, 7418, 7674,
+      66272, 66273, 66274, 66275, 66276, 66277, 66278, 66279, 66280, 66281,
+      66282, 66283, 66284, 66285, 66286, 66287, 66288, 66289, 66290, 66291,
+      66292, 66293, 66294, 66295, 66296, 66297, 66298, 66299, 1748, 64830,
+      64831, 1611, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1619,
+      1620, 1621, 1648, 65010, 65021, 7381, 7382, 7384, 7393, 7402,
+      7405, 7413, 7414, 43249, 12330, 12331, 12332, 12333, 43471, 65794,
+      65847, 65848, 65849, 65850, 65851, 65852, 65853, 65854, 65855, 1156,
+      1159, 11843, 42607, 1157, 1158, 1155, 7672, 7379, 7411, 7416,
+      7417, 7401, 7383, 7385, 7388, 7389, 7392, 43251, 4347, 3046,
+      3047, 3048, 3049, 3050, 3051, 3052, 3053, 3054, 3055, 3056,
+      3057, 3058, 3059, 70401, 70403, 70459, 70460, 73680, 73681, 73683,
+      2790, 2791, 2792, 2793, 2794, 2795, 2796, 2797, 2798, 2799,
+      2662, 2663, 2664, 2665, 2666, 2667, 2668, 2669, 2670, 2671,
+      42752, 42753, 42754, 42755, 42756, 42757, 42758, 42759, 12337, 12338,
+      12339, 12340, 12341, 12441, 12442, 12443, 12444, 12448, 12540, 65392,
+      65438, 65439, 3302, 3303, 3304, 3305, 3306, 3307, 3308, 3309,
+      3310, 3311, 8239, 68338, 6146, 6147, 6149, 1564, 1632, 1633,
+      1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 2534, 2535,
+      2536, 2537, 2538, 2539, 2540, 2541, 2542, 2543, 4160, 4161,
+      4162, 4163, 4164, 4165, 4166, 4167, 4168, 4169, 65792, 65793,
+      65799, 65800, 65801, 65802, 65803, 65804, 65805, 65806, 65807, 65808,
+      65809, 65810, 65811, 65812, 65813, 65814, 65815, 65816, 65817, 65818,
+      65819, 65820, 65821, 65822, 65823, 65824, 65825, 65826, 65827, 65828,
+      65829, 65830, 65831, 65832, 65833, 65834, 65835, 65836, 65837, 65838,
+      65839, 65840, 65841, 65842, 65843, 7412, 8432, 12348, 12349, 43310,
+      7376, 7378, 5941, 5942, 2406, 2407, 2408, 2409, 2410, 2411,
+      2412, 2413, 2414, 2415, 12291, 12307, 12316, 12317, 12318, 12319,
+      12336, 12343, 65093, 65094, 1548, 1563, 12289, 12290, 12296, 12297,
+      12298, 12299, 12300, 12301, 12302, 12303, 12304, 12305, 12308, 12309,
+      12310, 12311, 12312, 12313, 12314, 12315, 12539, 65377, 65378, 65379,
+      65380, 65381, 7386, 1567, 7410, 1600, 43062, 43063, 43064, 43065,
+      2386, 2385, 43059, 43060, 43061, 43056, 43057, 43058, 2404, 2405
+    };
+
+void handleError(ErrorCode& status, const char* context) {
+    if (status.isFailure()) {
+        std::cerr << "Error: " << context << ": " << status.errorName() << std::endl;
+        exit(status.reset());
+    }
+}
+
+class PropertyValueNameGetter : public ValueNameGetter {
+public:
+    PropertyValueNameGetter(UProperty prop) : property(prop) {}
+    ~PropertyValueNameGetter() override;
+    const char *getName(uint32_t value) override {
+        return u_getPropertyValueName(property, value, U_SHORT_PROPERTY_NAME);
+    }
+
+private:
+    UProperty property;
+};
+
+PropertyValueNameGetter::~PropertyValueNameGetter() {}
+
+// Dump an aliases = [...] key for properties with aliases
+void dumpPropertyAliases(UProperty uproperty, FILE* f) {
+    int i = U_LONG_PROPERTY_NAME + 1;
+
+    while(true) {
+        // The API works by having extra names after U_LONG_PROPERTY_NAME, sequentially,
+        // and returning null after that
+        const char* alias = u_getPropertyName(uproperty, (UPropertyNameChoice) i);
+        if (!alias) {
+            break;
+        }
+        if (i == U_LONG_PROPERTY_NAME + 1) {
+            fprintf(f, "aliases = [\"%s\"", alias);
+        } else {
+            fprintf(f, ", \"%s\"", alias);
+        }
+        i++;
+    }
+    if (i != U_LONG_PROPERTY_NAME + 1) {
+        fprintf(f, "]\n");
+    }
+}
+
+void dumpBinaryProperty(UProperty uproperty, FILE* f) {
+    IcuToolErrorCode status("icuexportdata: dumpBinaryProperty");
+    const char* fullPropName = u_getPropertyName(uproperty, U_LONG_PROPERTY_NAME);
+    const char* shortPropName = u_getPropertyName(uproperty, U_SHORT_PROPERTY_NAME);
+    const USet* uset = u_getBinaryPropertySet(uproperty, status);
+    handleError(status, fullPropName);
+
+    fputs("[[binary_property]]\n", f);
+    fprintf(f, "long_name = \"%s\"\n", fullPropName);
+    if (shortPropName) fprintf(f, "short_name = \"%s\"\n", shortPropName);
+    fprintf(f, "uproperty_discr = 0x%X\n", uproperty);
+    dumpPropertyAliases(uproperty, f);
+    usrc_writeUnicodeSet(f, uset, UPRV_TARGET_SYNTAX_TOML);
+}
+
+// If the value exists, dump an indented entry of the format
+// `"  {discr = <discriminant>, long = <longname>, short = <shortname>, aliases = [<aliases>]},"`
+void dumpValueEntry(UProperty uproperty, int v, bool is_mask, FILE* f) {
+    const char* fullValueName = u_getPropertyValueName(uproperty, v, U_LONG_PROPERTY_NAME);
+    const char* shortValueName = u_getPropertyValueName(uproperty, v, U_SHORT_PROPERTY_NAME);
+    if (!fullValueName) {
+        return;
+    }
+    if (is_mask) {
+        fprintf(f, "  {discr = 0x%X", v);
+    } else {
+        fprintf(f, "  {discr = %i", v);
+    }
+    fprintf(f, ", long = \"%s\"", fullValueName);
+    if (shortValueName) {
+        fprintf(f, ", short = \"%s\"", shortValueName);
+    }
+    int i = U_LONG_PROPERTY_NAME + 1;
+    while(true) {
+        // The API works by having extra names after U_LONG_PROPERTY_NAME, sequentially,
+        // and returning null after that
+        const char* alias = u_getPropertyValueName(uproperty, v, (UPropertyNameChoice) i);
+        if (!alias) {
+            break;
+        }
+        if (i == U_LONG_PROPERTY_NAME + 1) {
+            fprintf(f, ", aliases = [\"%s\"", alias);
+        } else {
+            fprintf(f, ", \"%s\"", alias);
+        }
+        i++;
+    }
+    if (i != U_LONG_PROPERTY_NAME + 1) {
+        fprintf(f, "]");
+    }
+    fprintf(f, "},\n");
+}
+
+void dumpEnumeratedProperty(UProperty uproperty, FILE* f) {
+    IcuToolErrorCode status("icuexportdata: dumpEnumeratedProperty");
+    const char* fullPropName = u_getPropertyName(uproperty, U_LONG_PROPERTY_NAME);
+    const char* shortPropName = u_getPropertyName(uproperty, U_SHORT_PROPERTY_NAME);
+    const UCPMap* umap = u_getIntPropertyMap(uproperty, status);
+    handleError(status, fullPropName);
+
+    fputs("[[enum_property]]\n", f);
+    fprintf(f, "long_name = \"%s\"\n", fullPropName);
+    if (shortPropName) fprintf(f, "short_name = \"%s\"\n", shortPropName);
+    fprintf(f, "uproperty_discr = 0x%X\n", uproperty);
+    dumpPropertyAliases(uproperty, f);
+
+    int32_t minValue = u_getIntPropertyMinValue(uproperty);
+    U_ASSERT(minValue >= 0);
+    int32_t maxValue = u_getIntPropertyMaxValue(uproperty);
+    U_ASSERT(maxValue >= 0);
+
+    fprintf(f, "values = [\n");
+    for (int v = minValue; v <= maxValue; v++) {
+        dumpValueEntry(uproperty, v, false, f);
+    }
+    fprintf(f, "]\n");
+
+    PropertyValueNameGetter valueNameGetter(uproperty);
+    usrc_writeUCPMap(f, umap, &valueNameGetter, UPRV_TARGET_SYNTAX_TOML);
+    fputs("\n", f);
+
+
+    UCPTrieValueWidth width = UCPTRIE_VALUE_BITS_32;
+    if (maxValue <= 0xff) {
+        width = UCPTRIE_VALUE_BITS_8;
+    } else if (maxValue <= 0xffff) {
+        width = UCPTRIE_VALUE_BITS_16;
+    }
+    LocalUMutableCPTriePointer builder(umutablecptrie_fromUCPMap(umap, status));
+    LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
+        builder.getAlias(),
+        trieType,
+        width,
+        status));
+    handleError(status, fullPropName);
+
+    fputs("[enum_property.code_point_trie]\n", f);
+    usrc_writeUCPTrie(f, shortPropName, utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
+}
+
+/*
+* Export Bidi_Mirroring_Glyph values (code points) in a similar way to how enumerated
+* properties are dumped to file.
+* Note: the data will store 0 for code points without a value defined for
+* Bidi_Mirroring_Glyph.
+*/
+void dumpBidiMirroringGlyph(FILE* f) {
+    UProperty uproperty = UCHAR_BIDI_MIRRORING_GLYPH;
+    IcuToolErrorCode status("icuexportdata: dumpBidiMirroringGlyph");
+    const char* fullPropName = u_getPropertyName(uproperty, U_LONG_PROPERTY_NAME);
+    const char* shortPropName = u_getPropertyName(uproperty, U_SHORT_PROPERTY_NAME);
+    handleError(status, fullPropName);
+
+    // Store 21-bit code point as is
+    UCPTrieValueWidth width = UCPTRIE_VALUE_BITS_32;
+
+    // note: unlike dumpEnumeratedProperty, which can get inversion map data using
+    // u_getIntPropertyMap(uproperty), the only reliable way to get Bidi_Mirroring_Glyph
+    // is to use u_charMirror(cp) over the code point space.
+    LocalUMutableCPTriePointer builder(umutablecptrie_open(0, 0, status));
+    for(UChar32 c = UCHAR_MIN_VALUE; c <= UCHAR_MAX_VALUE; c++) {
+        UChar32 mirroringGlyph = u_charMirror(c);
+        // The trie builder code throws an error when it cannot compress the data sufficiently.
+        // Therefore, when the value is undefined for a code point, keep a 0 in the trie
+        // instead of the ICU API behavior of returning the code point value. Using 0
+        // results in a relatively significant space savings by not including redundant data.
+        if (c != mirroringGlyph) {
+            umutablecptrie_set(builder.getAlias(), c, mirroringGlyph, status);
+        }
+    }
+
+    LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
+        builder.getAlias(),
+        trieType,
+        width,
+        status));
+    handleError(status, fullPropName);
+
+    // currently a trie and inversion map are the same (as relied upon in characterproperties.cpp)
+    const UCPMap* umap = reinterpret_cast<UCPMap *>(utrie.getAlias());
+
+    fputs("[[enum_property]]\n", f);
+    fprintf(f, "long_name = \"%s\"\n", fullPropName);
+    if (shortPropName) {
+        fprintf(f, "short_name = \"%s\"\n", shortPropName);
+    }
+    fprintf(f, "uproperty_discr = 0x%X\n", uproperty);
+    dumpPropertyAliases(uproperty, f);
+
+    usrc_writeUCPMap(f, umap, nullptr, UPRV_TARGET_SYNTAX_TOML);
+    fputs("\n", f);
+
+    fputs("[enum_property.code_point_trie]\n", f);
+    usrc_writeUCPTrie(f, shortPropName, utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
+}
+
+// After printing property value `v`, print `mask` if and only if `mask` comes immediately
+// after the property in the listing
+void maybeDumpMaskValue(UProperty uproperty, uint32_t v, uint32_t mask, FILE* f) {
+    if (U_MASK(v) < mask && U_MASK(v + 1) > mask)
+        dumpValueEntry(uproperty, mask, true, f);
+}
+
+void dumpGeneralCategoryMask(FILE* f) {
+    IcuToolErrorCode status("icuexportdata: dumpGeneralCategoryMask");
+    UProperty uproperty = UCHAR_GENERAL_CATEGORY_MASK;
+
+    fputs("[[mask_property]]\n", f);
+    const char* fullPropName = u_getPropertyName(uproperty, U_LONG_PROPERTY_NAME);
+    const char* shortPropName = u_getPropertyName(uproperty, U_SHORT_PROPERTY_NAME);
+    fprintf(f, "long_name = \"%s\"\n", fullPropName);
+    if (shortPropName) fprintf(f, "short_name = \"%s\"\n", shortPropName);
+    fprintf(f, "uproperty_discr = 0x%X\n", uproperty);
+    dumpPropertyAliases(uproperty, f);
+
+
+    fprintf(f, "mask_for = \"General_Category\"\n");
+    uint32_t minValue = u_getIntPropertyMinValue(UCHAR_GENERAL_CATEGORY);
+    U_ASSERT(minValue >= 0);
+    uint32_t maxValue = u_getIntPropertyMaxValue(UCHAR_GENERAL_CATEGORY);
+    U_ASSERT(maxValue >= 0);
+
+    fprintf(f, "values = [\n");
+    for (uint32_t v = minValue; v <= maxValue; v++) {
+        dumpValueEntry(uproperty, U_MASK(v), true, f);
+
+        // We want to dump these masks "in order", which means they
+        // should come immediately after every property they contain
+        maybeDumpMaskValue(uproperty, v, U_GC_L_MASK, f);
+        maybeDumpMaskValue(uproperty, v, U_GC_LC_MASK, f);
+        maybeDumpMaskValue(uproperty, v, U_GC_M_MASK, f);
+        maybeDumpMaskValue(uproperty, v, U_GC_N_MASK, f);
+        maybeDumpMaskValue(uproperty, v, U_GC_Z_MASK, f);
+        maybeDumpMaskValue(uproperty, v, U_GC_C_MASK, f);
+        maybeDumpMaskValue(uproperty, v, U_GC_P_MASK, f);
+        maybeDumpMaskValue(uproperty, v, U_GC_S_MASK, f);
+    }
+    fprintf(f, "]\n");
+}
+
+void dumpScriptExtensions(FILE* f) {
+    IcuToolErrorCode status("icuexportdata: dumpScriptExtensions");
+
+    fputs("[[script_extensions]]\n", f);
+    const char* scxFullPropName = u_getPropertyName(UCHAR_SCRIPT_EXTENSIONS, U_LONG_PROPERTY_NAME);
+    const char* scxShortPropName = u_getPropertyName(UCHAR_SCRIPT_EXTENSIONS, U_SHORT_PROPERTY_NAME);
+    fprintf(f, "long_name = \"%s\"\n", scxFullPropName);
+    if (scxShortPropName) fprintf(f, "short_name = \"%s\"\n", scxShortPropName);
+    fprintf(f, "uproperty_discr = 0x%X\n", UCHAR_SCRIPT_EXTENSIONS);
+    dumpPropertyAliases(UCHAR_SCRIPT_EXTENSIONS, f);
+
+    // We want to use 16 bits for our exported trie of sc/scx data because we
+    // need 12 bits to match the 12 bits of data stored for sc/scx in the trie
+    // in the uprops.icu data file.
+    UCPTrieValueWidth scWidth = UCPTRIE_VALUE_BITS_16;
+
+    // Create a mutable UCPTrie builder populated with Script property values data.
+    const UCPMap* scInvMap = u_getIntPropertyMap(UCHAR_SCRIPT, status);
+    handleError(status, scxFullPropName);
+    LocalUMutableCPTriePointer builder(umutablecptrie_fromUCPMap(scInvMap, status));
+    handleError(status, scxFullPropName);
+
+    // The values for the output scx companion array.
+    // Invariant is that all subvectors are distinct.
+    std::vector< std::vector<uint16_t> > outputDedupVec;
+
+    // The sc/scx companion array is an array of arrays (of script codes)
+    fputs("script_code_array = [\n", f);
+    for(const UChar32 cp : scxCodePoints) {
+        // Get the Script value
+        uint32_t scVal = umutablecptrie_get(builder.getAlias(), cp);
+        // Get the Script_Extensions value (array of Script codes)
+        const int32_t SCX_ARRAY_CAPACITY = 32;
+        UScriptCode scxValArray[SCX_ARRAY_CAPACITY];
+        int32_t numScripts = uscript_getScriptExtensions(cp, scxValArray, SCX_ARRAY_CAPACITY, status);
+        handleError(status, scxFullPropName);
+
+        // Convert the scx array into a vector
+        std::vector<uint16_t> scxValVec;
+        for(int i = 0; i < numScripts; i++) {
+            scxValVec.push_back(scxValArray[i]);
+        }
+        // Ensure that it is sorted
+        std::sort(scxValVec.begin(), scxValVec.end());
+        // Copy the Script value into the first position of the scx array only
+        // if we have the "other" case (Script value is not Common nor Inherited).
+        // This offers faster access when users want only the Script value.
+        if (scVal != USCRIPT_COMMON && scVal != USCRIPT_INHERITED) {
+            scxValVec.insert(scxValVec.begin(), scVal);
+        }
+
+        // See if there is already an scx value array matching the newly built one.
+        // If there is, then use its index.
+        // If not, then append the new value array.
+        bool isScxValUnique = true;
+        size_t outputIndex = 0;
+        for (outputIndex = 0; outputIndex < outputDedupVec.size(); outputIndex++) {
+            if (outputDedupVec[outputIndex] == scxValVec) {
+                isScxValUnique = false;
+                break;
+            }
+        }
+
+        if (isScxValUnique) {
+            outputDedupVec.push_back(scxValVec);
+            usrc_writeArray(f, "  [", scxValVec.data(), 16, scxValVec.size(), "    ", "],\n");
+        }
+
+        // We must update the value in the UCPTrie for the code point to contain:
+        // 9..0 the Script code in the lower 10 bits when 11..10 is 0, else it is
+        //   the index into the companion array
+        // 11..10 the same higher-order 2 bits in the trie in uprops.icu indicating whether
+        //   3: other
+        //   2: Script=Inherited
+        //   1: Script=Common
+        //   0: Script=value in 9..0 (N/A because we are in this loop to create the companion array for non-0 cases)
+        uint16_t mask = 0;
+        if (scVal == USCRIPT_COMMON) {
+            mask = DATAEXPORT_SCRIPT_X_WITH_COMMON;
+        } else if (scVal == USCRIPT_INHERITED) {
+            mask = DATAEXPORT_SCRIPT_X_WITH_INHERITED;
+        } else {
+            mask = DATAEXPORT_SCRIPT_X_WITH_OTHER;
+        }
+
+        // The new trie value is the index into the new array with the high order bits set
+        uint32_t newScVal = outputIndex | mask;
+
+        // Update the code point in the mutable trie builder with the trie value
+        umutablecptrie_set(builder.getAlias(), cp, newScVal, status);
+        handleError(status, scxFullPropName);
+    }
+    fputs("]\n\n", f);  // Print the TOML close delimiter for the outer array.
+
+    // Convert from mutable trie builder to immutable trie.
+    LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
+        builder.getAlias(),
+        trieType,
+        scWidth,
+        status));
+    handleError(status, scxFullPropName);
+
+    fputs("[script_extensions.code_point_trie]\n", f);
+    usrc_writeUCPTrie(f, scxShortPropName, utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
+}
+
+FILE* prepareOutputFile(const char* basename) {
+    IcuToolErrorCode status("icuexportdata");
+    CharString outFileName;
+    if (destdir != nullptr && *destdir != 0) {
+        outFileName.append(destdir, status).ensureEndsWithFileSeparator(status);
+    }
+    outFileName.append(basename, status);
+    outFileName.append(".toml", status);
+    handleError(status, basename);
+
+    FILE* f = fopen(outFileName.data(), "w");
+    if (f == nullptr) {
+        std::cerr << "Unable to open file: " << outFileName.data() << std::endl;
+        exit(U_FILE_ACCESS_ERROR);
+    }
+    if (!QUIET) {
+        std::cout << "Writing to: " << outFileName.data() << std::endl;
+    }
+
+    if (haveCopyright) {
+        usrc_writeCopyrightHeader(f, "#", 2021);
+    }
+    usrc_writeFileNameGeneratedBy(f, "#", basename, "icuexportdata.cpp");
+
+    return f;
+}
+
+#if !UCONFIG_NO_NORMALIZATION
+
+struct PendingDescriptor {
+    UChar32 scalar;
+    uint32_t descriptor;
+    UBool supplementary;
+};
+
+void writeCanonicalCompositions(USet* backwardCombiningStarters) {
+    IcuToolErrorCode status("icuexportdata: computeCanonicalCompositions");
+    const char* basename = "compositions";
+    FILE* f = prepareOutputFile(basename);
+
+    LocalPointer<UCharsTrieBuilder> backwardBuilder(new UCharsTrieBuilder(status), status);
+
+    const int32_t DECOMPOSITION_BUFFER_SIZE = 20;
+    UChar32 utf32[DECOMPOSITION_BUFFER_SIZE];
+
+    const Normalizer2* nfc = Normalizer2::getNFCInstance(status);
+    for (UChar32 c = 0; c <= 0x10FFFF; ++c) {
+        if (c >= 0xD800 && c < 0xE000) {
+            // Surrogate
+            continue;
+        }
+        UnicodeString decomposition;
+        if (!nfc->getRawDecomposition(c, decomposition)) {
+            continue;
+        }
+        int32_t len = decomposition.toUTF32(utf32, DECOMPOSITION_BUFFER_SIZE, status);
+        if (len != 2) {
+            continue;
+        }
+        UChar32 starter = utf32[0];
+        UChar32 second = utf32[1];
+        UChar32 composite = nfc->composePair(starter, second);
+        if (composite < 0) {
+            continue;
+        }
+        if (c != composite) {
+            status.set(U_INTERNAL_PROGRAM_ERROR);
+            handleError(status, basename);
+        }
+        if (!u_getCombiningClass(second)) {
+            uset_add(backwardCombiningStarters, second);
+        }
+        if (composite >= 0xAC00 && composite <= 0xD7A3) {
+            // Hangul syllable
+            continue;
+        }
+
+        UnicodeString backward;
+        backward.append(second);
+        backward.append(starter);
+        backwardBuilder->add(backward, int32_t(composite), status);
+    }
+    UnicodeString canonicalCompositionTrie;
+    backwardBuilder->buildUnicodeString(USTRINGTRIE_BUILD_SMALL, canonicalCompositionTrie, status);
+
+    usrc_writeArray(f, "compositions = [\n  ", canonicalCompositionTrie.getBuffer(), 16, canonicalCompositionTrie.length(), "  ", "\n]\n");
+    fclose(f);
+    handleError(status, basename);
+}
+
+void writeDecompositionTables(const char* basename, const uint16_t* ptr16, size_t len16, const uint32_t* ptr32, size_t len32) {
+    FILE* f = prepareOutputFile(basename);
+    usrc_writeArray(f, "scalars16 = [\n  ", ptr16, 16, len16, "  ", "\n]\n");
+    usrc_writeArray(f, "scalars32 = [\n  ", ptr32, 32, len32, "  ", "\n]\n");
+    fclose(f);
+}
+
+void writeDecompositionData(const char* basename, uint32_t baseSize16, uint32_t baseSize32, uint32_t supplementSize16, USet* uset, USet* reference, const std::vector<PendingDescriptor>& pendingTrieInsertions, char16_t passthroughCap) {
+    IcuToolErrorCode status("icuexportdata: writeDecompositionData");
+    FILE* f = prepareOutputFile(basename);
+
+    // Zero is a magic number that means the character decomposes to itself.
+    LocalUMutableCPTriePointer builder(umutablecptrie_open(0, 0, status));
+
+    // Iterate backwards to insert lower code points in the trie first in case it matters
+    // for trie block allocation.
+    for (int32_t i = pendingTrieInsertions.size() - 1; i >= 0; --i) {
+        const PendingDescriptor& pending = pendingTrieInsertions[i];
+        uint32_t additional = 0;
+        if (!(pending.descriptor & 0xFFFE0000)) {
+            uint32_t offset = pending.descriptor & 0xFFF;
+            if (!pending.supplementary) {
+                if (offset >= baseSize16) {
+                    // This is a offset to supplementary 16-bit data. We have
+                    // 16-bit base data and 32-bit base data before. However,
+                    // the 16-bit base data length is already part of offset.
+                    additional = baseSize32;
+                }
+            } else {
+                if (offset >= baseSize32) {
+                    // This is an offset to supplementary 32-bit data. We have 16-bit
+                    // base data, 32-bit base data, and 16-bit supplementary data before.
+                    // However, the 32-bit base data length is already part
+                    // of offset.
+                    additional = baseSize16 + supplementSize16;
+                } else {
+                    // This is an offset to 32-bit base data. We have 16-bit
+                    // base data before.
+                    additional = baseSize16;
+                }
+            }
+            if (offset + additional > 0xFFF) {
+                status.set(U_INTERNAL_PROGRAM_ERROR);
+                handleError(status, basename);
+            }
+        }
+        // It turns out it's better to swap the halves compared to the initial
+        // idea in order to put special marker values close to zero so that
+        // an important marker value becomes 1, so it's efficient to compare
+        // "1 or 0". Unfortunately, going through all the code to swap
+        // things is too error prone, so let's do the swapping here in one
+        // place.
+        uint32_t oldTrieValue = pending.descriptor + additional;
+        uint32_t swappedTrieValue = (oldTrieValue >> 16) | (oldTrieValue << 16);
+        umutablecptrie_set(builder.getAlias(), pending.scalar, swappedTrieValue, status);
+    }
+    LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
+        builder.getAlias(),
+        trieType,
+        UCPTRIE_VALUE_BITS_32,
+        status));
+    handleError(status, basename);
+
+    if (reference) {
+        if (uset_contains(reference, 0xFF9E) || uset_contains(reference, 0xFF9F) || !uset_contains(reference, 0x0345)) {
+            // NFD expectations don't hold. The set must not contain the half-width
+            // kana voicing marks and must contain iota subscript.
+            status.set(U_INTERNAL_PROGRAM_ERROR);
+            handleError(status, basename);
+        }
+
+        USet* halfWidthVoicing = uset_openEmpty();
+        uset_add(halfWidthVoicing, 0xFF9E);
+        uset_add(halfWidthVoicing, 0xFF9F);
+
+        USet* iotaSubscript = uset_openEmpty();
+        uset_add(iotaSubscript, 0x0345);
+
+        uint8_t flags = 0;
+
+        USet* halfWidthCheck = uset_cloneAsThawed(uset);
+        uset_removeAll(halfWidthCheck, reference);
+        if (uset_equals(halfWidthCheck, halfWidthVoicing)) {
+            flags |= 1;
+        } else if (!uset_isEmpty(halfWidthCheck)) {
+            // The result was neither empty nor contained exactly
+            // the two half-width voicing marks. The ICU4X
+            // normalizer doesn't know how to deal with this case.
+            status.set(U_INTERNAL_PROGRAM_ERROR);
+            handleError(status, basename);
+        }
+        uset_close(halfWidthCheck);
+
+        USet* iotaCheck = uset_cloneAsThawed(reference);
+        uset_removeAll(iotaCheck, uset);
+        if (!(uset_equals(iotaCheck, iotaSubscript)) && !uset_isEmpty(iotaCheck)) {
+            // The result was neither empty nor contained exactly
+            // the iota subscript. The ICU4X normalizer doesn't
+            // know how to deal with this case.
+            status.set(U_INTERNAL_PROGRAM_ERROR);
+            handleError(status, basename);
+        }
+        uset_close(halfWidthCheck);
+
+        uset_close(iotaSubscript);
+        uset_close(halfWidthVoicing);
+
+        fprintf(f, "flags = 0x%X\n", flags);
+        fprintf(f, "cap = 0x%X\n", passthroughCap);
+    }
+    fprintf(f, "[trie]\n");
+    usrc_writeUCPTrie(f, "trie", utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
+    fclose(f);
+    handleError(status, basename);
+}
+
+// Special marker for the NFKD form of U+FDFA
+const int32_t FDFA_MARKER = 3;
+
+// Special marker for characters whose decomposition starts with a non-starter
+// and the decomposition isn't the character itself.
+const int32_t SPECIAL_NON_STARTER_DECOMPOSITION_MARKER = 2;
+
+// Special marker for starters that decompose to themselves but that may
+// combine backwards under canonical composition
+const int32_t BACKWARD_COMBINING_STARTER_MARKER = 1;
+
+/// Marker that a complex decomposition isn't round-trippable
+/// under re-composition.
+const uint32_t NON_ROUND_TRIP_MARKER = 1;
+
+UBool permissibleBmpPair(UBool knownToRoundTrip, UChar32 c, UChar32 second) {
+    if (knownToRoundTrip) {
+        return true;
+    }
+    // Nuktas, Hebrew presentation forms and polytonic Greek with oxia
+    // are special-cased in ICU4X.
+    if (c >= 0xFB1D && c <= 0xFB4E) {
+        // Hebrew presentation forms
+        return true;
+    }
+    if (c >= 0x1F71 && c <= 0x1FFB) {
+        // Polytonic Greek with oxia
+        return true;
+    }
+    if ((second & 0x7F) == 0x3C && second >= 0x0900 && second <= 0x0BFF) {
+        // Nukta
+        return true;
+    }
+    // To avoid more branchiness, 4 characters that decompose to
+    // a BMP starter followed by a BMP non-starter are excluded
+    // from being encoded directly into the trie value and are
+    // handled as complex decompositions instead. These are:
+    // U+0F76 TIBETAN VOWEL SIGN VOCALIC R
+    // U+0F78 TIBETAN VOWEL SIGN VOCALIC L
+    // U+212B ANGSTROM SIGN
+    // U+2ADC FORKING
+    return false;
+}
+
+// Computes data for canonical decompositions
+void computeDecompositions(const char* basename,
+                           const USet* backwardCombiningStarters,
+                           std::vector<uint16_t>& storage16,
+                           std::vector<uint32_t>& storage32,
+                           USet* decompositionStartsWithNonStarter,
+                           USet* decompositionStartsWithBackwardCombiningStarter,
+                           std::vector<PendingDescriptor>& pendingTrieInsertions,
+                           UChar32& decompositionPassthroughBound,
+                           UChar32& compositionPassthroughBound) {
+    IcuToolErrorCode status("icuexportdata: computeDecompositions");
+    const Normalizer2* mainNormalizer;
+    const Normalizer2* nfdNormalizer = Normalizer2::getNFDInstance(status);
+    const Normalizer2* nfcNormalizer = Normalizer2::getNFCInstance(status);
+    FILE* f = nullptr;
+    std::vector<uint32_t> nonRecursive32;
+    LocalUMutableCPTriePointer nonRecursiveBuilder(umutablecptrie_open(0, 0, status));
+
+    if (uprv_strcmp(basename, "nfkd") == 0) {
+        mainNormalizer = Normalizer2::getNFKDInstance(status);
+    } else if (uprv_strcmp(basename, "uts46d") == 0) {
+        mainNormalizer = Normalizer2::getInstance(nullptr, "uts46", UNORM2_COMPOSE, status);
+    } else {
+        mainNormalizer = nfdNormalizer;
+        f = prepareOutputFile("decompositionex");
+    }
+
+    // Max length as of Unicode 14 is 4 for NFD. For NFKD the max
+    // is 18 (U+FDFA; special-cased), and the next longest is 8 (U+FDFB).
+    const int32_t LONGEST_ENCODABLE_LENGTH_16 = 9;
+    const int32_t LONGEST_ENCODABLE_LENGTH_32 = 8;
+    const int32_t DECOMPOSITION_BUFFER_SIZE = 20;
+    UChar32 utf32[DECOMPOSITION_BUFFER_SIZE];
+    const int32_t RAW_DECOMPOSITION_BUFFER_SIZE = 2;
+    UChar32 rawUtf32[RAW_DECOMPOSITION_BUFFER_SIZE];
+
+    // Iterate over all scalar values excluding Hangul syllables.
+    //
+    // We go backwards in order to better find overlapping decompositions.
+    //
+    // As of Unicode 14:
+    // Iterate forward without overlap search:
+    // nfd: 16 size: 896, 32 size: 173
+    // nfkd: 16 size: 3854, 32 size: 179
+    //
+    // Iterate forward with overlap search:
+    // nfd: 16 size: 888, 32 size: 173
+    // nfkd: 16 size: 3266, 32 size: 179
+    //
+    // Iterate backward with overlap search:
+    // nfd: 16 size: 776, 32 size: 173
+    // nfkd: 16 size: 2941, 32 size: 179
+    //
+    // UChar32 is signed!
+    for (UChar32 c = 0x10FFFF; c >= 0; --c) {
+        if (c >= 0xAC00 && c <= 0xD7A3) {
+            // Hangul syllable
+            continue;
+        }
+        if (c >= 0xD800 && c < 0xE000) {
+            // Surrogate
+            continue;
+        }
+        UnicodeString src;
+        UnicodeString dst;
+        // True if we're building non-NFD or we're building NFD but
+        // the `c` round trips to NFC.
+        // False if we're building NFD and `c` does not round trip to NFC.
+        UBool nonNfdOrRoundTrips = true;
+        src.append(c);
+        if (mainNormalizer != nfdNormalizer) {
+            UnicodeString inter;
+            mainNormalizer->normalize(src, inter, status);
+            nfdNormalizer->normalize(inter, dst, status);
+        } else {
+            nfdNormalizer->normalize(src, dst, status);
+            UnicodeString nfc;
+            nfcNormalizer->normalize(dst, nfc, status);
+            nonNfdOrRoundTrips = (src == nfc);
+        }
+        int32_t len = dst.toUTF32(utf32, DECOMPOSITION_BUFFER_SIZE, status);
+        if (!len || (len == 1 && utf32[0] == 0xFFFD && c != 0xFFFD)) {
+            // Characters that normalize to nothing or to U+FFFD (without the
+            // input being U+FFFD) in ICU4C's UTS 46 normalization normalize
+            // as in NFD in ICU4X's UTF 46 normalization in the interest
+            // of data size and ICU4X's normalizer being unable to handle
+            // normalizing to nothing.
+            // When UTS 46 is implemented on top of ICU4X, a preprocessing
+            // step is supposed to remove these characters before the
+            // normalization step.
+            if (uprv_strcmp(basename, "uts46d") != 0) {
+                status.set(U_INTERNAL_PROGRAM_ERROR);
+                handleError(status, basename);
+            }
+            nfdNormalizer->normalize(src, dst, status);
+            len = dst.toUTF32(utf32, DECOMPOSITION_BUFFER_SIZE, status);
+            if (!len || (len == 1 && utf32[0] == 0xFFFD && c != 0xFFFD)) {
+                status.set(U_INTERNAL_PROGRAM_ERROR);
+                handleError(status, basename);
+            }
+        }
+        if (len > DECOMPOSITION_BUFFER_SIZE) {
+            status.set(U_INTERNAL_PROGRAM_ERROR);
+            handleError(status, basename);
+        }
+        uint8_t firstCombiningClass = u_getCombiningClass(utf32[0]);
+        bool specialNonStarterDecomposition = false;
+        bool startsWithBackwardCombiningStarter = false;
+        if (firstCombiningClass) {
+            decompositionPassthroughBound = c;
+            compositionPassthroughBound = c;
+            uset_add(decompositionStartsWithNonStarter, c);
+            if (src != dst) {
+                if (c == 0x0340 || c == 0x0341 || c == 0x0343 || c == 0x0344 || c == 0x0F73 || c == 0x0F75 || c == 0x0F81 || c == 0xFF9E || c == 0xFF9F) {
+                    specialNonStarterDecomposition = true;
+                } else {
+                    // A character whose decomposition starts with a non-starter and isn't the same as the character itself and isn't already hard-coded into ICU4X.
+                    status.set(U_INTERNAL_PROGRAM_ERROR);
+                    handleError(status, basename);
+                }
+            }
+        } else if (uset_contains(backwardCombiningStarters, utf32[0])) {
+            compositionPassthroughBound = c;
+            startsWithBackwardCombiningStarter = true;
+            uset_add(decompositionStartsWithBackwardCombiningStarter, c);
+        }
+        if (c != BACKWARD_COMBINING_STARTER_MARKER && len == 1 && utf32[0] == BACKWARD_COMBINING_STARTER_MARKER) {
+            status.set(U_INTERNAL_PROGRAM_ERROR);
+            handleError(status, basename);
+        }
+        if (c != SPECIAL_NON_STARTER_DECOMPOSITION_MARKER && len == 1 && utf32[0] == SPECIAL_NON_STARTER_DECOMPOSITION_MARKER) {
+            status.set(U_INTERNAL_PROGRAM_ERROR);
+            handleError(status, basename);
+        }
+        if (c != FDFA_MARKER && len == 1 && utf32[0] == FDFA_MARKER) {
+            status.set(U_INTERNAL_PROGRAM_ERROR);
+            handleError(status, basename);
+        }
+        if (mainNormalizer != nfdNormalizer) {
+            UnicodeString nfd;
+            nfdNormalizer->normalize(src, nfd, status);
+            if (dst == nfd) {
+                continue;
+            }
+            decompositionPassthroughBound = c;
+            compositionPassthroughBound = c;
+        } else if (firstCombiningClass) {
+            len = 1;
+            if (specialNonStarterDecomposition) {
+                utf32[0] = SPECIAL_NON_STARTER_DECOMPOSITION_MARKER; // magic value
+            } else {
+                // Use the surrogate range to store the canonical combining class
+                utf32[0] = 0xD800 | UChar32(firstCombiningClass);
+            }
+        } else {
+            if (src == dst) {
+                if (startsWithBackwardCombiningStarter) {
+                    pendingTrieInsertions.push_back({c, BACKWARD_COMBINING_STARTER_MARKER << 16, false});
+                }
+                continue;
+            }
+            decompositionPassthroughBound = c;
+            // ICU4X hard-codes ANGSTROM SIGN
+            if (c != 0x212B) {
+                UnicodeString raw;
+                if (!nfdNormalizer->getRawDecomposition(c, raw)) {
+                    // We're always supposed to have a non-recursive decomposition
+                    // if we had a recursive one.
+                    status.set(U_INTERNAL_PROGRAM_ERROR);
+                    handleError(status, basename);
+                }
+                // In addition to actual difference, put the whole range that contains characters
+                // with oxia into the non-recursive trie in order to catch cases where characters
+                // with oxia have singleton decompositions to corresponding characters with tonos.
+                // This way, the run-time decision to fall through can be done on the range
+                // without checking for individual characters inside the range.
+                if (raw != dst || (c >= 0x1F71 && c <= 0x1FFB)) {
+                    int32_t rawLen = raw.toUTF32(rawUtf32, RAW_DECOMPOSITION_BUFFER_SIZE, status);
+                    if (!rawLen) {
+                        status.set(U_INTERNAL_PROGRAM_ERROR);
+                        handleError(status, basename);
+                    }
+                    if (rawLen == 1) {
+                        if (c >= 0xFFFF) {
+                            status.set(U_INTERNAL_PROGRAM_ERROR);
+                            handleError(status, basename);
+                        }
+                        umutablecptrie_set(nonRecursiveBuilder.getAlias(), c, uint32_t(rawUtf32[0]), status);
+                    } else if (rawUtf32[0] <= 0xFFFF && rawUtf32[1] <= 0xFFFF) {
+                        if (!rawUtf32[0] || !rawUtf32[1]) {
+                            status.set(U_INTERNAL_PROGRAM_ERROR);
+                            handleError(status, basename);
+                        }
+                        // Swapped for consistency with the primary trie
+                        uint32_t bmpPair = uint32_t(rawUtf32[1]) << 16 | uint32_t(rawUtf32[0]);
+                        umutablecptrie_set(nonRecursiveBuilder.getAlias(), c, bmpPair, status);
+                    } else {
+                        // Let's add 1 to index to make it always non-zero to distinguish
+                        // it from the default zero.
+                        uint32_t index = nonRecursive32.size() + 1;
+                        nonRecursive32.push_back(uint32_t(rawUtf32[0]));
+                        nonRecursive32.push_back(uint32_t(rawUtf32[1]));
+                        if (index > 0xFFFF) {
+                            status.set(U_INTERNAL_PROGRAM_ERROR);
+                            handleError(status, basename);
+                        }
+                        umutablecptrie_set(nonRecursiveBuilder.getAlias(), c, index << 16, status);
+                    }
+                }
+            }
+        }
+        if (!nonNfdOrRoundTrips) {
+            compositionPassthroughBound = c;
+        }
+        if (len == 1 && utf32[0] <= 0xFFFF) {
+            if (startsWithBackwardCombiningStarter) {
+                if (mainNormalizer == nfdNormalizer) {
+                    // Not supposed to happen in NFD
+                    status.set(U_INTERNAL_PROGRAM_ERROR);
+                    handleError(status, basename);
+                } else if (!((utf32[0] >= 0x1161 && utf32[0] <= 0x1175) || (utf32[0] >= 0x11A8 && utf32[0] <= 0x11C2))) {
+                    // Other than conjoining jamo vowels and trails
+                    // unsupported for non-NFD.
+                    status.set(U_INTERNAL_PROGRAM_ERROR);
+                    handleError(status, basename);
+                }
+            }
+            pendingTrieInsertions.push_back({c, uint32_t(utf32[0]) << 16, false});
+        } else if (len == 2 &&
+                   utf32[0] <= 0xFFFF &&
+                   utf32[1] <= 0xFFFF &&
+                   !u_getCombiningClass(utf32[0]) &&
+                   u_getCombiningClass(utf32[1]) &&
+                   permissibleBmpPair(nonNfdOrRoundTrips, c, utf32[1])) {
+            for (int32_t i = 0; i < len; ++i) {
+                if (((utf32[i] == 0x0345) && (uprv_strcmp(basename, "uts46d") == 0)) || utf32[i] == 0xFF9E || utf32[i] == 0xFF9F) {
+                    // Assert that iota subscript and half-width voicing marks never occur in these
+                    // expansions in the normalization forms where they are special.
+                    status.set(U_INTERNAL_PROGRAM_ERROR);
+                    handleError(status, basename);
+                }
+            }
+            if (startsWithBackwardCombiningStarter) {
+                status.set(U_INTERNAL_PROGRAM_ERROR);
+                handleError(status, basename);
+            }
+            pendingTrieInsertions.push_back({c, (uint32_t(utf32[0]) << 16) | uint32_t(utf32[1]), false});
+        } else {
+            if (startsWithBackwardCombiningStarter) {
+                status.set(U_INTERNAL_PROGRAM_ERROR);
+                handleError(status, basename);
+            }
+
+            UBool supplementary = false;
+            UBool nonInitialStarter = false;
+            for (int32_t i = 0; i < len; ++i) {
+                if (((utf32[i] == 0x0345) && (uprv_strcmp(basename, "uts46d") == 0)) || utf32[i] == 0xFF9E || utf32[i] == 0xFF9F) {
+                    // Assert that iota subscript and half-width voicing marks never occur in these
+                    // expansions in the normalization forms where they are special.
+                    status.set(U_INTERNAL_PROGRAM_ERROR);
+                    handleError(status, basename);
+                }
+
+                if (utf32[i] > 0xFFFF) {
+                    supplementary = true;
+                }
+                if (utf32[i] == 0) {
+                    status.set(U_INTERNAL_PROGRAM_ERROR);
+                    handleError(status, basename);
+                }
+                if (i != 0 && !u_getCombiningClass(utf32[i])) {
+                    nonInitialStarter = true;
+                }
+            }
+            if (!supplementary) {
+                if (len > LONGEST_ENCODABLE_LENGTH_16 || !len || len == 1) {
+                    if (len == 18 && c == 0xFDFA) {
+                        // Special marker for the one character whose decomposition
+                        // is too long.
+                        pendingTrieInsertions.push_back({c, FDFA_MARKER << 16, supplementary});
+                        continue;
+                    } else {
+                        status.set(U_INTERNAL_PROGRAM_ERROR);
+                        handleError(status, basename);
+                    }
+                }
+            } else if (len > LONGEST_ENCODABLE_LENGTH_32 || !len) {
+                status.set(U_INTERNAL_PROGRAM_ERROR);
+                handleError(status, basename);
+            }
+            // Complex decomposition
+            // Format for 16-bit value:
+            // 15..13: length minus two for 16-bit case and length minus one for
+            //         the 32-bit case. Length 8 needs to fit in three bits in
+            //         the 16-bit case, and this way the value is future-proofed
+            //         up to 9 in the 16-bit case. Zero is unused and length one
+            //         in the 16-bit case goes directly into the trie.
+            //     12: 1 if all trailing characters are guaranteed non-starters,
+            //         0 if no guarantees about non-starterness.
+            //         Note: The bit choice is this way around to allow for
+            //         dynamically falling back to not having this but instead
+            //         having one more bit for length by merely choosing
+            //         different masks.
+            //  11..0: Start offset in storage. The offset is to the logical
+            //         sequence of scalars16, scalars32, supplementary_scalars16,
+            //         supplementary_scalars32.
+            uint32_t descriptor = uint32_t(!nonInitialStarter) << 12;
+            if (!supplementary) {
+                descriptor |= (uint32_t(len) - 2) << 13;
+            } else {
+                descriptor |= (uint32_t(len) - 1) << 13;
+            }
+            if (descriptor & 0xFFF) {
+                status.set(U_INTERNAL_PROGRAM_ERROR);
+                handleError(status, basename);
+            }
+            size_t index = 0;
+            bool writeToStorage = false;
+            // Sadly, C++ lacks break and continue by label, so using goto in the
+            // inner loops to break or continue the outer loop.
+            if (!supplementary) {
+                outer16: for (;;) {
+                    if (index == storage16.size()) {
+                        writeToStorage = true;
+                        break;
+                    }
+                    if (storage16[index] == utf32[0]) {
+                        for (int32_t i = 1; i < len; ++i) {
+                            if (storage16[index + i] != uint32_t(utf32[i])) {
+                                ++index;
+                                // continue outer
+                                goto outer16;
+                            }
+                        }
+                        // break outer
+                        goto after;
+                    }
+                    ++index;
+                }
+            } else {
+                outer32: for (;;) {
+                    if (index == storage32.size()) {
+                        writeToStorage = true;
+                        break;
+                    }
+                    if (storage32[index] == uint32_t(utf32[0])) {
+                        for (int32_t i = 1; i < len; ++i) {
+                            if (storage32[index + i] != uint32_t(utf32[i])) {
+                                ++index;
+                                // continue outer
+                                goto outer32;
+                            }
+                        }
+                        // break outer
+                        goto after;
+                    }
+                    ++index;
+                }
+            }
+            after:
+            if (index > 0xFFF) {
+                status.set(U_INTERNAL_PROGRAM_ERROR);
+                handleError(status, basename);
+            }
+            descriptor |= uint32_t(index);
+            if (!descriptor || descriptor > 0xFFFF) {
+                // > 0xFFFF should never happen if the code above is correct.
+                // == 0 should not happen due to the nature of the data.
+                status.set(U_INTERNAL_PROGRAM_ERROR);
+                handleError(status, basename);
+            }
+            if (writeToStorage) {
+                if (!supplementary) {
+                    for (int32_t i = 0; i < len; ++i) {
+                        storage16.push_back(uint16_t(utf32[i]));
+                    }
+                } else {
+                    for (int32_t i = 0; i < len; ++i) {
+                        storage32.push_back(uint32_t(utf32[i]));
+                    }
+                }
+            }
+
+            uint32_t nonRoundTripMarker = 0;
+            if (!nonNfdOrRoundTrips) {
+                nonRoundTripMarker = (NON_ROUND_TRIP_MARKER << 16);
+            }
+            pendingTrieInsertions.push_back({c, descriptor | nonRoundTripMarker, supplementary});
+        }
+    }
+    if (storage16.size() + storage32.size() > 0xFFF) {
+        status.set(U_INTERNAL_PROGRAM_ERROR);
+    }
+    if (f) {
+        usrc_writeArray(f, "scalars32 = [\n  ", nonRecursive32.data(), 32, nonRecursive32.size(), "  ", "\n]\n");
+
+        LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
+            nonRecursiveBuilder.getAlias(),
+            trieType,
+            UCPTRIE_VALUE_BITS_32,
+            status));
+        handleError(status, basename);
+
+        fprintf(f, "[trie]\n");
+        usrc_writeUCPTrie(f, "trie", utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
+
+        fclose(f);
+    }
+    handleError(status, basename);
+}
+
+#endif // !UCONFIG_NO_NORMALIZATION
+
+enum {
+    OPT_HELP_H,
+    OPT_HELP_QUESTION_MARK,
+    OPT_MODE,
+    OPT_TRIE_TYPE,
+    OPT_VERSION,
+    OPT_DESTDIR,
+    OPT_ALL,
+    OPT_INDEX,
+    OPT_COPYRIGHT,
+    OPT_VERBOSE,
+    OPT_QUIET,
+
+    OPT_COUNT
+};
+
+#define UOPTION_MODE UOPTION_DEF("mode", 'm', UOPT_REQUIRES_ARG)
+#define UOPTION_TRIE_TYPE UOPTION_DEF("trie-type", '\1', UOPT_REQUIRES_ARG)
+#define UOPTION_ALL UOPTION_DEF("all", '\1', UOPT_NO_ARG)
+#define UOPTION_INDEX UOPTION_DEF("index", '\1', UOPT_NO_ARG)
+
+static UOption options[]={
+    UOPTION_HELP_H,
+    UOPTION_HELP_QUESTION_MARK,
+    UOPTION_MODE,
+    UOPTION_TRIE_TYPE,
+    UOPTION_VERSION,
+    UOPTION_DESTDIR,
+    UOPTION_ALL,
+    UOPTION_INDEX,
+    UOPTION_COPYRIGHT,
+    UOPTION_VERBOSE,
+    UOPTION_QUIET,
+};
+
+void printHelp(FILE* stdfile, const char* program) {
+  fprintf(stdfile,
+          "usage: %s -m mode [-options] [--all | properties...]\n"
+          "\tdump Unicode property data to .toml files\n"
+          "options:\n"
+          "\t-h or -? or --help  this usage text\n"
+          "\t-V or --version     show a version message\n"
+          "\t-m or --mode        mode: currently only 'uprops', 'ucase', and 'norm', but more may be added\n"
+          "\t      --trie-type   set the trie type (small or fast, default small)\n"
+          "\t-d or --destdir     destination directory, followed by the path\n"
+          "\t      --all         write out all properties known to icuexportdata\n"
+          "\t      --index       write an _index.toml summarizing all data exported\n"
+          "\t-c or --copyright   include a copyright notice\n"
+          "\t-v or --verbose     Turn on verbose output\n"
+          "\t-q or --quiet       do not display warnings and progress\n",
+          program);
+}
+
+int exportUprops(int argc, char* argv[]) {
+    // Load list of Unicode properties
+    std::vector<const char*> propNames;
+    for (int i=1; i<argc; i++) {
+        propNames.push_back(argv[i]);
+    }
+    if (options[OPT_ALL].doesOccur) {
+        int i = UCHAR_BINARY_START;
+        while (true) {
+            if (i == UCHAR_BINARY_LIMIT) {
+                i = UCHAR_INT_START;
+            }
+            if (i == UCHAR_INT_LIMIT) {
+                i = UCHAR_GENERAL_CATEGORY_MASK;
+            }
+            if (i == UCHAR_GENERAL_CATEGORY_MASK + 1) {
+                i = UCHAR_BIDI_MIRRORING_GLYPH;
+            }
+            if (i == UCHAR_BIDI_MIRRORING_GLYPH + 1) {
+                i = UCHAR_SCRIPT_EXTENSIONS;
+            }
+            if (i == UCHAR_SCRIPT_EXTENSIONS + 1) {
+                break;
+            }
+            UProperty uprop = static_cast<UProperty>(i);
+            const char* propName = u_getPropertyName(uprop, U_SHORT_PROPERTY_NAME);
+            if (propName == nullptr) {
+                propName = u_getPropertyName(uprop, U_LONG_PROPERTY_NAME);
+                if (propName != nullptr && VERBOSE) {
+                    std::cerr << "Note: falling back to long name for: " << propName << std::endl;
+                }
+            }
+            if (propName != nullptr) {
+                propNames.push_back(propName);
+            } else {
+                std::cerr << "Warning: Could not find name for: " << uprop << std::endl;
+            }
+            i++;
+        }
+    }
+
+    if (propNames.empty()
+            || options[OPT_HELP_H].doesOccur
+            || options[OPT_HELP_QUESTION_MARK].doesOccur
+            || !options[OPT_MODE].doesOccur) {
+        FILE *stdfile=argc<0 ? stderr : stdout;
+        fprintf(stdfile,
+            "usage: %s -m uprops [-options] [--all | properties...]\n"
+            "\tdump Unicode property data to .toml files\n"
+            "options:\n"
+            "\t-h or -? or --help  this usage text\n"
+            "\t-V or --version     show a version message\n"
+            "\t-m or --mode        mode: currently only 'uprops', but more may be added\n"
+            "\t      --trie-type   set the trie type (small or fast, default small)\n"
+            "\t-d or --destdir     destination directory, followed by the path\n"
+            "\t      --all         write out all properties known to icuexportdata\n"
+            "\t      --index       write an _index.toml summarizing all data exported\n"
+            "\t-c or --copyright   include a copyright notice\n"
+            "\t-v or --verbose     Turn on verbose output\n"
+            "\t-q or --quiet       do not display warnings and progress\n",
+            argv[0]);
+        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
+    }
+
+    const char* mode = options[OPT_MODE].value;
+    if (uprv_strcmp(mode, "uprops") != 0) {
+        fprintf(stderr, "Invalid option for --mode (must be uprops)\n");
+        return U_ILLEGAL_ARGUMENT_ERROR;
+    }
+
+    if (options[OPT_TRIE_TYPE].doesOccur) {
+        if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "fast") == 0) {
+            trieType = UCPTRIE_TYPE_FAST;
+        } else if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "small") == 0) {
+            trieType = UCPTRIE_TYPE_SMALL;
+        } else {
+            fprintf(stderr, "Invalid option for --trie-type (must be small or fast)\n");
+            return U_ILLEGAL_ARGUMENT_ERROR;
+        }
+    }
+
+    for (const char* propName : propNames) {
+        UProperty propEnum = u_getPropertyEnum(propName);
+        if (propEnum == UCHAR_INVALID_CODE) {
+            std::cerr << "Error: Invalid property alias: " << propName << std::endl;
+            return U_ILLEGAL_ARGUMENT_ERROR;
+        }
+
+        FILE* f = prepareOutputFile(propName);
+
+        UVersionInfo versionInfo;
+        u_getUnicodeVersion(versionInfo);
+        char uvbuf[U_MAX_VERSION_STRING_LENGTH];
+        u_versionToString(versionInfo, uvbuf);
+        fprintf(f, "icu_version = \"%s\"\nunicode_version = \"%s\"\n\n",
+            U_ICU_VERSION,
+            uvbuf);
+
+        if (propEnum < UCHAR_BINARY_LIMIT) {
+            dumpBinaryProperty(propEnum, f);
+        } else if (UCHAR_INT_START <= propEnum && propEnum <= UCHAR_INT_LIMIT) {
+            dumpEnumeratedProperty(propEnum, f);
+        } else if (propEnum == UCHAR_GENERAL_CATEGORY_MASK) {
+            dumpGeneralCategoryMask(f);
+        } else if (propEnum == UCHAR_BIDI_MIRRORING_GLYPH) {
+            dumpBidiMirroringGlyph(f);
+        } else if (propEnum == UCHAR_SCRIPT_EXTENSIONS) {
+            dumpScriptExtensions(f);
+        } else {
+            std::cerr << "Don't know how to write property: " << propEnum << std::endl;
+            return U_INTERNAL_PROGRAM_ERROR;
+        }
+
+        fclose(f);
+    }
+
+    if (options[OPT_INDEX].doesOccur) {
+        FILE* f = prepareOutputFile("_index");
+        fprintf(f, "index = [\n");
+        for (const char* propName : propNames) {
+            // At this point, propName is a valid property name, so it should be alphanum ASCII
+            fprintf(f, "  { filename=\"%s.toml\" },\n", propName);
+        }
+        fprintf(f, "]\n");
+        fclose(f);
+    }
+
+    return 0;
+}
+
+struct AddRangeHelper {
+    UMutableCPTrie* ucptrie;
+};
+
+static UBool U_CALLCONV
+addRangeToUCPTrie(const void* context, UChar32 start, UChar32 end, uint32_t value) {
+    IcuToolErrorCode status("addRangeToUCPTrie");
+    UMutableCPTrie* ucptrie = ((const AddRangeHelper*) context)->ucptrie;
+    umutablecptrie_setRange(ucptrie, start, end, value, status);
+    handleError(status, "setRange");
+
+    return true;
+}
+
+int exportCase(int argc, char* argv[]) {
+    if (argc > 1) {
+        fprintf(stderr, "ucase mode does not expect additional arguments\n");
+        return U_ILLEGAL_ARGUMENT_ERROR;
+    }
+    (void) argv; // Suppress unused variable warning
+
+    IcuToolErrorCode status("icuexportdata");
+    LocalUMutableCPTriePointer builder(umutablecptrie_open(0, 0, status));
+    handleError(status, "exportCase");
+
+    int32_t exceptionsLength, unfoldLength;
+    const UCaseProps *caseProps = ucase_getSingleton(&exceptionsLength, &unfoldLength);
+    const UTrie2* caseTrie = &caseProps->trie;
+
+    AddRangeHelper helper = { builder.getAlias() };
+    utrie2_enum(caseTrie, nullptr, addRangeToUCPTrie, &helper);
+
+    UCPTrieValueWidth width = UCPTRIE_VALUE_BITS_16;
+    LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
+        builder.getAlias(),
+        trieType,
+        width,
+        status));
+    handleError(status, "exportCase");
+
+    FILE* f = prepareOutputFile("ucase");
+
+    UVersionInfo versionInfo;
+    u_getUnicodeVersion(versionInfo);
+    char uvbuf[U_MAX_VERSION_STRING_LENGTH];
+    u_versionToString(versionInfo, uvbuf);
+    fprintf(f, "icu_version = \"%s\"\nunicode_version = \"%s\"\n\n",
+            U_ICU_VERSION,
+            uvbuf);
+
+    fputs("[ucase.code_point_trie]\n", f);
+    usrc_writeUCPTrie(f, "case_trie", utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
+    fputs("\n", f);
+
+    const char* indent = "  ";
+    const char* suffix = "\n]\n";
+
+    fputs("[ucase.exceptions]\n", f);
+    const char* exceptionsPrefix = "exceptions = [\n  ";
+    int32_t exceptionsWidth = 16;
+    usrc_writeArray(f, exceptionsPrefix, caseProps->exceptions, exceptionsWidth,
+                    exceptionsLength, indent, suffix);
+    fputs("\n", f);
+
+    fputs("[ucase.unfold]\n", f);
+    const char* unfoldPrefix = "unfold = [\n  ";
+    int32_t unfoldWidth = 16;
+    usrc_writeArray(f, unfoldPrefix, caseProps->unfold, unfoldWidth,
+                    unfoldLength, indent, suffix);
+
+    return 0;
+}
+
+#if !UCONFIG_NO_NORMALIZATION
+
+int exportNorm() {
+    IcuToolErrorCode status("icuexportdata: exportNorm");
+    USet* backwardCombiningStarters = uset_openEmpty();
+    writeCanonicalCompositions(backwardCombiningStarters);
+
+    std::vector<uint16_t> storage16;
+    std::vector<uint32_t> storage32;
+
+    // Note: the USets are not exported. They are only used to check that a new
+    // Unicode version doesn't violate expectations that are hard-coded in ICU4X.
+    USet* nfdDecompositionStartsWithNonStarter = uset_openEmpty();
+    USet* nfdDecompositionStartsWithBackwardCombiningStarter = uset_openEmpty();
+    std::vector<PendingDescriptor> nfdPendingTrieInsertions;
+    UChar32 nfdBound = 0x10FFFF;
+    UChar32 nfcBound = 0x10FFFF;
+    computeDecompositions("nfd",
+                          backwardCombiningStarters,
+                          storage16,
+                          storage32,
+                          nfdDecompositionStartsWithNonStarter,
+                          nfdDecompositionStartsWithBackwardCombiningStarter,
+                          nfdPendingTrieInsertions,
+                          nfdBound,
+                          nfcBound);
+    if (!(nfdBound == 0xC0 && nfcBound == 0x300)) {
+        // Unexpected bounds for NFD/NFC.
+        status.set(U_INTERNAL_PROGRAM_ERROR);
+        handleError(status, "exportNorm");
+    }
+
+    uint32_t baseSize16 = storage16.size();
+    uint32_t baseSize32 = storage32.size();
+
+    USet* nfkdDecompositionStartsWithNonStarter = uset_openEmpty();
+    USet* nfkdDecompositionStartsWithBackwardCombiningStarter = uset_openEmpty();
+    std::vector<PendingDescriptor> nfkdPendingTrieInsertions;
+    UChar32 nfkdBound = 0x10FFFF;
+    UChar32 nfkcBound = 0x10FFFF;
+    computeDecompositions("nfkd",
+                          backwardCombiningStarters,
+                          storage16,
+                          storage32,
+                          nfkdDecompositionStartsWithNonStarter,
+                          nfkdDecompositionStartsWithBackwardCombiningStarter,
+                          nfkdPendingTrieInsertions,
+                          nfkdBound,
+                          nfkcBound);
+    if (!(nfkdBound <= 0xC0 && nfkcBound <= 0x300)) {
+        status.set(U_INTERNAL_PROGRAM_ERROR);
+        handleError(status, "exportNorm");
+    }
+    if (nfkcBound > 0xC0) {
+        if (nfkdBound != 0xC0) {
+            status.set(U_INTERNAL_PROGRAM_ERROR);
+            handleError(status, "exportNorm");
+        }
+    } else {
+        if (nfkdBound != nfkcBound) {
+            status.set(U_INTERNAL_PROGRAM_ERROR);
+            handleError(status, "exportNorm");
+        }
+    }
+
+    USet* uts46DecompositionStartsWithNonStarter = uset_openEmpty();
+    USet* uts46DecompositionStartsWithBackwardCombiningStarter = uset_openEmpty();
+    std::vector<PendingDescriptor> uts46PendingTrieInsertions;
+    UChar32 uts46dBound = 0x10FFFF;
+    UChar32 uts46Bound = 0x10FFFF;
+    computeDecompositions("uts46d",
+                          backwardCombiningStarters,
+                          storage16,
+                          storage32,
+                          uts46DecompositionStartsWithNonStarter,
+                          uts46DecompositionStartsWithBackwardCombiningStarter,
+                          uts46PendingTrieInsertions,
+                          uts46dBound,
+                          uts46Bound);
+    if (!(uts46dBound <= 0xC0 && uts46Bound <= 0x300)) {
+        status.set(U_INTERNAL_PROGRAM_ERROR);
+        handleError(status, "exportNorm");
+    }
+    if (uts46Bound > 0xC0) {
+        if (uts46dBound != 0xC0) {
+            status.set(U_INTERNAL_PROGRAM_ERROR);
+            handleError(status, "exportNorm");
+        }
+    } else {
+        if (uts46dBound != uts46Bound) {
+            status.set(U_INTERNAL_PROGRAM_ERROR);
+            handleError(status, "exportNorm");
+        }
+    }
+
+    uint32_t supplementSize16 = storage16.size() - baseSize16;
+    uint32_t supplementSize32 = storage32.size() - baseSize32;
+
+    writeDecompositionData("nfd", baseSize16, baseSize32, supplementSize16, nfdDecompositionStartsWithNonStarter, nullptr, nfdPendingTrieInsertions, char16_t(nfcBound));
+    writeDecompositionData("nfkd", baseSize16, baseSize32, supplementSize16, nfkdDecompositionStartsWithNonStarter, nfdDecompositionStartsWithNonStarter, nfkdPendingTrieInsertions, char16_t(nfkcBound));
+    writeDecompositionData("uts46d", baseSize16, baseSize32, supplementSize16, uts46DecompositionStartsWithNonStarter, nfdDecompositionStartsWithNonStarter, uts46PendingTrieInsertions, char16_t(uts46Bound));
+
+    writeDecompositionTables("nfdex", storage16.data(), baseSize16, storage32.data(), baseSize32);
+    writeDecompositionTables("nfkdex", storage16.data() + baseSize16, supplementSize16, storage32.data() + baseSize32, supplementSize32);
+
+    uset_close(nfdDecompositionStartsWithNonStarter);
+    uset_close(nfkdDecompositionStartsWithNonStarter);
+    uset_close(uts46DecompositionStartsWithNonStarter);
+
+    uset_close(nfdDecompositionStartsWithBackwardCombiningStarter);
+    uset_close(nfkdDecompositionStartsWithBackwardCombiningStarter);
+    uset_close(uts46DecompositionStartsWithBackwardCombiningStarter);
+
+    uset_close(backwardCombiningStarters);
+    handleError(status, "exportNorm");
+    return 0;
+}
+
+#endif // !UCONFIG_NO_NORMALIZATION
+
+int main(int argc, char* argv[]) {
+    U_MAIN_INIT_ARGS(argc, argv);
+
+    /* preset then read command line options */
+    options[OPT_DESTDIR].value=u_getDataDirectory();
+    argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
+
+    if(options[OPT_VERSION].doesOccur) {
+        printf("icuexportdata version %s, ICU tool to dump data files for external consumers\n",
+               U_ICU_DATA_VERSION);
+        printf("%s\n", U_COPYRIGHT_STRING);
+        exit(0);
+    }
+
+    /* error handling, printing usage message */
+    if(argc<0) {
+        fprintf(stderr,
+            "error in command line argument \"%s\"\n",
+            argv[-argc]);
+    }
+
+    if (argc < 0
+            || options[OPT_HELP_H].doesOccur
+            || options[OPT_HELP_QUESTION_MARK].doesOccur
+            || !options[OPT_MODE].doesOccur) {
+        FILE *stdfile=argc<0 ? stderr : stdout;
+        printHelp(stdfile, argv[0]);
+        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
+    }
+
+    /* get the options values */
+    haveCopyright = options[OPT_COPYRIGHT].doesOccur;
+    destdir = options[OPT_DESTDIR].value;
+    VERBOSE = options[OPT_VERBOSE].doesOccur;
+    QUIET = options[OPT_QUIET].doesOccur;
+
+    if (options[OPT_TRIE_TYPE].doesOccur) {
+        if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "fast") == 0) {
+            trieType = UCPTRIE_TYPE_FAST;
+        } else if (uprv_strcmp(options[OPT_TRIE_TYPE].value, "small") == 0) {
+            trieType = UCPTRIE_TYPE_SMALL;
+        } else {
+            fprintf(stderr, "Invalid option for --trie-type (must be small or fast)\n");
+            return U_ILLEGAL_ARGUMENT_ERROR;
+        }
+    }
+
+    const char* mode = options[OPT_MODE].value;
+    if (uprv_strcmp(mode, "norm") == 0) {
+#if !UCONFIG_NO_NORMALIZATION
+        return exportNorm();
+#else
+    fprintf(stderr, "Exporting normalization data not supported when compiling without normalization support.\n");
+    return U_ILLEGAL_ARGUMENT_ERROR;
+#endif
+    }
+    if (uprv_strcmp(mode, "uprops") == 0) {
+        return exportUprops(argc, argv);
+    } else if (uprv_strcmp(mode, "ucase") == 0) {
+        return exportCase(argc, argv);
+    }
+
+    fprintf(stderr, "Invalid option for --mode (must be uprops, ucase, or norm)\n");
+    return U_ILLEGAL_ARGUMENT_ERROR;
+}
diff --git a/intl/icu/source/tools/icuexportdata/icuexportdata.vcxproj b/intl/icu/source/tools/icuexportdata/icuexportdata.vcxproj
new file mode 100644
index 0000000000..48b4c23cf8
--- /dev/null
+++ b/intl/icu/source/tools/icuexportdata/icuexportdata.vcxproj
@@ -0,0 +1,80 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{C5185F6D-BC0A-4DF7-A63C-B107D1C9C82F}</ProjectGuid>
+  </PropertyGroup>
+  <PropertyGroup Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseOfMfc>false</UseOfMfc>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <!-- The following import will include the 'default' configuration options for VS projects. -->
+  <Import Project="..\..\allinone\Build.Windows.ProjectConfiguration.props" />
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup>
+    <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+    <OutDir>.\$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>.\$(Platform)\$(Configuration)\</IntDir>
+    <!-- The ICU projects use "Win32" to mean "x86", so we need to special case it. -->
+    <OutDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)\</OutDir>
+    <IntDir Condition="'$(Platform)'=='Win32'">.\x86\$(Configuration)\</IntDir>
+    <!-- Disable Incremental Linking for Release builds as it prevents Link-time Code Generation -->
+    <LinkIncremental Condition="'$(Configuration)'=='Debug'">true</LinkIncremental>
+    <LinkIncremental Condition="'$(Configuration)'=='Release'">false</LinkIncremental>
+  </PropertyGroup>
+  <!-- Options that are common to *all* configurations -->
+  <ItemDefinitionGroup>
+    <Midl>
+      <TypeLibraryName>$(OutDir)/icuexportdata.tlb</TypeLibraryName>
+    </Midl>
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <CompileAs>Default</CompileAs>
+      <DisableLanguageExtensions>false</DisableLanguageExtensions>
+      <AdditionalIncludeDirectories>..\..\..\include;..\..\common;..\toolutil;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PrecompiledHeaderOutputFile>$(OutDir)/icuexportdata.pch</PrecompiledHeaderOutputFile>
+      <AssemblerListingLocation>$(OutDir)/</AssemblerListingLocation>
+      <ObjectFileName>$(OutDir)/</ObjectFileName>
+      <ProgramDataBaseFileName>$(OutDir)/icuexportdata.pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <OutputFile>$(OutDir)/icuexportdata.exe</OutputFile>
+      <AdditionalLibraryDirectories>..\..\..\$(IcuLibOutputDir);%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+    </Link>
+    <CustomBuildStep>
+      <Command>copy "$(TargetPath)" ..\..\..\$(IcuBinOutputDir)</Command>
+      <Outputs>..\..\..\$(IcuBinOutputDir)\$(TargetFileName);%(Outputs)</Outputs>
+    </CustomBuildStep>
+  </ItemDefinitionGroup>
+  <!-- Options that are common to all 'Debug' project configurations -->
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug'">
+    <ClCompile>
+      <BrowseInformation>true</BrowseInformation>
+      <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>icuucd.lib;icutud.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <!-- Options that are common to all 'Release' project configurations -->
+  <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
+    <ClCompile>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+    </ClCompile>
+    <Link>
+      <AdditionalDependencies>icuuc.lib;icutu.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="icuexportdata.cpp" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
+\ No newline at end of file
diff --git a/intl/icu/source/tools/icuexportdata/icuexportdata.vcxproj.filters b/intl/icu/source/tools/icuexportdata/icuexportdata.vcxproj.filters
new file mode 100644
index 0000000000..02b3257ba3
--- /dev/null
+++ b/intl/icu/source/tools/icuexportdata/icuexportdata.vcxproj.filters
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{7641b9da-f313-4ee0-8c60-2c8050c87e45}</UniqueIdentifier>
+      <Extensions>cpp;c;cxx;rc;def;r;odl;idl;hpj;bat</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{0333a61f-f79b-490c-9761-a4e5966f3ff0}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl</Extensions>
+    </Filter>
+    <Filter Include="Resource Files">
+      <UniqueIdentifier>{05869d75-29f4-43d9-bebc-9973e550d958}</UniqueIdentifier>
+      <Extensions>ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="icuexportdata.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
+\ No newline at end of file
diff --git a/intl/icu/source/tools/icuexportdata/sources.txt b/intl/icu/source/tools/icuexportdata/sources.txt
new file mode 100644
index 0000000000..13520ecb2f
--- /dev/null
+++ b/intl/icu/source/tools/icuexportdata/sources.txt
@@ -0,0 +1 @@
+icuexportdata.cpp
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 19:33:14 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-07 19:33:14 +0000
commit	36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree	105e8c98ddea1c1e4784a60a5a6410fa416be2de /intl/icu/source/tools/icuexportdata
parent	Initial commit. (diff)
download	firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip