summaryrefslogtreecommitdiffstats
path: root/external/icu
diff options
context:
space:
mode:
Diffstat (limited to 'external/icu')
-rw-r--r--external/icu/ExternalPackage_icu.mk42
-rw-r--r--external/icu/ExternalPackage_icu_ure.mk48
-rw-r--r--external/icu/ExternalProject_icu.mk94
-rw-r--r--external/icu/Makefile7
-rw-r--r--external/icu/Module_icu.mk19
-rw-r--r--external/icu/README1
-rw-r--r--external/icu/UnpackedTarball_icu.mk46
-rw-r--r--external/icu/Wdeprecated-copy-dtor.patch25
-rw-r--r--external/icu/c++20-comparison.patch171
-rwxr-xr-xexternal/icu/cross-bin/icu-config12
-rw-r--r--external/icu/gcc9.patch26
-rw-r--r--external/icu/icu4c-aix.patch.1143
-rw-r--r--external/icu/icu4c-android.patch.175
-rw-r--r--external/icu/icu4c-build.patch.191
-rw-r--r--external/icu/icu4c-clang-cl.patch.128
-rw-r--r--external/icu/icu4c-icudata-stdlibs.patch.114
-rw-r--r--external/icu/icu4c-khmerbreakengine.patch.1845
-rw-r--r--external/icu/icu4c-macosx.patch.120
-rw-r--r--external/icu/icu4c-mkdir.patch.111
-rw-r--r--external/icu/icu4c-rpath.patch.136
-rw-r--r--external/icu/icu4c-rtti.patch.112
-rw-r--r--external/icu/icu4c-scriptrun.patch.160
-rw-r--r--external/icu/icu4c-solarisgcc.patch.112
-rw-r--r--external/icu/icu4c-ubsan.patch.114
-rw-r--r--external/icu/icu4c-warnings.patch.111
-rw-r--r--external/icu/khmerdict.dictbin0 -> 263537 bytes
-rw-r--r--external/icu/ubsan.patch52
27 files changed, 1915 insertions, 0 deletions
diff --git a/external/icu/ExternalPackage_icu.mk b/external/icu/ExternalPackage_icu.mk
new file mode 100644
index 000000000..dcd4da216
--- /dev/null
+++ b/external/icu/ExternalPackage_icu.mk
@@ -0,0 +1,42 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+icu_VERSION := $(ICU_MAJOR).$(ICU_MINOR)$(if $(ICU_MICRO),.$(ICU_MICRO))
+
+$(eval $(call gb_ExternalPackage_ExternalPackage,icu,icu))
+
+$(eval $(call gb_ExternalPackage_use_external_project,icu,icu))
+
+ifneq ($(DISABLE_DYNLOADING),TRUE)
+ifeq ($(OS),WNT)
+
+ifeq ($(COM),GCC)
+$(eval $(call gb_ExternalPackage_add_files,icu,$(LIBO_LIB_FOLDER),\
+ source/lib/icuin$(ICU_MAJOR).dll \
+))
+else
+$(eval $(call gb_ExternalPackage_add_files,icu,$(LIBO_LIB_FOLDER),\
+ source/lib/icuin$(if $(MSVC_USE_DEBUG_RUNTIME),d)$(ICU_MAJOR).dll \
+))
+endif # $(COM)
+
+else ifeq ($(OS),ANDROID)
+
+$(eval $(call gb_ExternalPackage_add_files,icu,$(LIBO_LIB_FOLDER),\
+ source/lib/libicui18nlo.so \
+))
+
+else # $(OS) != WNT/ANDROID
+
+$(eval $(call gb_ExternalPackage_add_file,icu,$(LIBO_LIB_FOLDER)/libicui18n$(gb_Library_DLLEXT).$(ICU_MAJOR),source/lib/libicui18n$(gb_Library_DLLEXT).$(icu_VERSION)))
+
+endif # $(OS)
+endif # DISABLE_DYNLOADING
+
+# vim: set noet sw=4 ts=4:
diff --git a/external/icu/ExternalPackage_icu_ure.mk b/external/icu/ExternalPackage_icu_ure.mk
new file mode 100644
index 000000000..fefe71afd
--- /dev/null
+++ b/external/icu/ExternalPackage_icu_ure.mk
@@ -0,0 +1,48 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+# libxml2 is in URE and depends on icuuc*.dll on Windows; the i18nlangtag lib is
+# in URE and depends on the icuuc lib (which in turn depends on the icudata lib)
+# on all platforms:
+
+$(eval $(call gb_ExternalPackage_ExternalPackage,icu_ure,icu))
+
+$(eval $(call gb_ExternalPackage_use_external_project,icu_ure,icu))
+
+ifneq ($(DISABLE_DYNLOADING),TRUE)
+ifeq ($(OS),WNT)
+
+ifeq ($(COM),GCC)
+$(eval $(call gb_ExternalPackage_add_files,icu_ure,$(LIBO_URE_LIB_FOLDER),\
+ source/lib/icudt$(ICU_MAJOR).dll \
+ source/lib/icuuc$(ICU_MAJOR).dll \
+))
+else
+$(eval $(call gb_ExternalPackage_add_files,icu_ure,$(LIBO_URE_LIB_FOLDER),\
+ source/lib/icudt$(if $(MSVC_USE_DEBUG_RUNTIME),d)$(ICU_MAJOR).dll \
+ source/lib/icuuc$(if $(MSVC_USE_DEBUG_RUNTIME),d)$(ICU_MAJOR).dll \
+))
+endif # $(COM)
+
+else ifeq ($(OS),ANDROID)
+
+$(eval $(call gb_ExternalPackage_add_files,icu_ure,$(LIBO_URE_LIB_FOLDER),\
+ source/lib/libicudatalo.so \
+ source/lib/libicuuclo.so \
+))
+
+else # $(OS) != WNT/ANDROID
+
+$(eval $(call gb_ExternalPackage_add_file,icu_ure,$(LIBO_URE_LIB_FOLDER)/libicudata$(gb_Library_DLLEXT).$(ICU_MAJOR),source/lib/libicudata$(gb_Library_DLLEXT).$(icu_VERSION)))
+$(eval $(call gb_ExternalPackage_add_file,icu_ure,$(LIBO_URE_LIB_FOLDER)/libicuuc$(gb_Library_DLLEXT).$(ICU_MAJOR),source/lib/libicuuc$(gb_Library_DLLEXT).$(icu_VERSION)))
+
+endif # $(OS)
+endif # DISABLE_DYNLOADING
+
+# vim: set noet sw=4 ts=4:
diff --git a/external/icu/ExternalProject_icu.mk b/external/icu/ExternalProject_icu.mk
new file mode 100644
index 000000000..f62d8528c
--- /dev/null
+++ b/external/icu/ExternalProject_icu.mk
@@ -0,0 +1,94 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_ExternalProject_ExternalProject,icu))
+
+$(eval $(call gb_ExternalProject_register_targets,icu,\
+ build \
+))
+
+icu_CPPFLAGS:="-DHAVE_GCC_ATOMICS=$(if $(filter TRUE,$(GCC_HAVE_BUILTIN_ATOMIC)),1,0)"
+
+ifeq ($(OS),WNT)
+
+# Note: runConfigureICU ignores everything following the platform name!
+$(call gb_ExternalProject_get_state_target,icu,build) :
+ $(call gb_Trace_StartRange,icu,EXTERNAL)
+ $(call gb_ExternalProject_run,build,\
+ export LIB="$(ILIB)" \
+ && CFLAGS="-FS $(SOLARINC) $(gb_DEBUGINFO_FLAGS)" CPPFLAGS="$(SOLARINC)" CXXFLAGS="-FS $(SOLARINC) $(gb_DEBUGINFO_FLAGS)" \
+ INSTALL=`cygpath -m /usr/bin/install` \
+ ./runConfigureICU \
+ $(if $(MSVC_USE_DEBUG_RUNTIME),--enable-debug --disable-release) \
+ Cygwin/MSVC --disable-extras \
+ && $(MAKE) \
+ ,source)
+ $(call gb_Trace_EndRange,icu,EXTERNAL)
+
+else # $(OS)
+
+icu_CFLAGS:=" \
+ $(if $(filter iOS,$(OS)),-DUCONFIG_NO_FILE_IO) \
+ $(if $(SYSBASE),-I$(SYSBASE)/usr/include) \
+ $(if $(ENABLE_OPTIMIZED),$(gb_COMPILEROPTFLAGS),$(gb_COMPILERNOOPTFLAGS)) \
+ $(if $(ENABLE_LTO),$(gb_LTOFLAGS)) \
+ $(if $(filter GCC,$(COM)),-fno-strict-aliasing) \
+ $(if $(call gb_Module__symbols_enabled,icu),$(gb_DEBUGINFO_FLAGS)) \
+ $(if $(filter FUZZERS,$(BUILD_TYPE)),-DU_USE_STRTOD_L=0) \
+ $(if $(filter ANDROID,$(OS)),-fvisibility=hidden -fno-omit-frame-pointer)"
+icu_CXXFLAGS:="$(CXXFLAGS) $(CXXFLAGS_CXX11) \
+ $(if $(filter iOS,$(OS)),-DUCONFIG_NO_FILE_IO) \
+ $(if $(ENABLE_OPTIMIZED),$(gb_COMPILEROPTFLAGS),$(gb_COMPILERNOOPTFLAGS)) \
+ $(if $(ENABLE_LTO),$(gb_LTOFLAGS)) \
+ $(if $(filter GCC,$(COM)),-fno-strict-aliasing) \
+ $(if $(call gb_Module__symbols_enabled,icu),$(gb_DEBUGINFO_FLAGS)) \
+ $(if $(filter FUZZERS,$(BUILD_TYPE)),-DU_USE_STRTOD_L=0) \
+ $(if $(filter ANDROID,$(OS)),-fvisibility=hidden -fno-omit-frame-pointer $(SOLARINC))"
+icu_LDFLAGS:=" \
+ $(if $(ENABLE_LTO),$(gb_LTOFLAGS)) \
+ $(if $(filter TRUE,$(HAVE_LD_HASH_STYLE)),-Wl$(COMMA)--hash-style=$(WITH_LINKER_HASH_STYLE)) \
+ $(if $(SYSBASE),-L../lib -L../../lib -L../stubdata -L../../stubdata -L$(SYSBASE)/usr/lib) \
+ $(if $(filter TRUE,$(HAVE_LD_BSYMBOLIC_FUNCTIONS)), -Wl$(COMMA)-Bsymbolic-functions) \
+ $(if $(filter ANDROID,$(OS)),$(gb_STDLIBS))"
+
+# DATASUBDIR=data in cross-compiling case, because --disable-tools completely skips the
+# data directory/doesn't build the requested library in that case (icu/source/Makefile.in)
+# so we need to add it back to the list of subdirectories to build
+$(call gb_ExternalProject_get_state_target,icu,build) :
+ $(call gb_Trace_StartRange,icu,EXTERNAL)
+ $(call gb_ExternalProject_run,build,\
+ CPPFLAGS=$(icu_CPPFLAGS) CFLAGS=$(icu_CFLAGS) \
+ CXXFLAGS=$(icu_CXXFLAGS) LDFLAGS=$(icu_LDFLAGS) \
+ ./configure \
+ --disable-layout --disable-samples \
+ $(if $(filter FUZZERS,$(BUILD_TYPE)),--disable-release) \
+ $(if $(CROSS_COMPILING),--disable-tools --disable-extras) \
+ $(if $(filter iOS ANDROID,$(OS)),--disable-dyload) \
+ $(if $(filter ANDROID,$(OS)),--disable-strict ac_cv_c_bigendian=no) \
+ $(if $(filter SOLARIS AIX,$(OS)),--disable-64bit-libs) \
+ $(if $(filter TRUE,$(DISABLE_DYNLOADING)),\
+ --with-data-packaging=static --enable-static --disable-shared --disable-dyload,\
+ --disable-static --enable-shared $(if $(filter ANDROID,$(OS)),--with-library-suffix=lo)) \
+ $(if $(CROSS_COMPILING),--build=$(BUILD_PLATFORM) --host=$(HOST_PLATFORM)\
+ --with-cross-build=$(WORKDIR_FOR_BUILD)/UnpackedTarball/icu/source) \
+ && $(MAKE) $(if $(CROSS_COMPILING),DATASUBDIR=data) \
+ $(if $(filter MACOSX,$(OS)), \
+ && $(PERL) $(SRCDIR)/solenv/bin/macosx-change-install-names.pl shl \
+ URELIB \
+ $(EXTERNAL_WORKDIR)/source/lib/libicuuc$(gb_Library_DLLEXT).$(icu_VERSION) \
+ $(EXTERNAL_WORKDIR)/source/lib/libicui18n$(gb_Library_DLLEXT).$(icu_VERSION) \
+ && $(PERL) $(SRCDIR)/solenv/bin/macosx-change-install-names.pl shl \
+ OOO \
+ $(EXTERNAL_WORKDIR)/source/lib/libicudata$(gb_Library_DLLEXT).$(icu_VERSION)) \
+ ,source)
+ $(call gb_Trace_EndRange,icu,EXTERNAL)
+
+endif
+
+# vim: set noet sw=4 ts=4:
diff --git a/external/icu/Makefile b/external/icu/Makefile
new file mode 100644
index 000000000..e4968cf85
--- /dev/null
+++ b/external/icu/Makefile
@@ -0,0 +1,7 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+
+module_directory:=$(dir $(realpath $(firstword $(MAKEFILE_LIST))))
+
+include $(module_directory)/../../solenv/gbuild/partial_build.mk
+
+# vim: set noet sw=4 ts=4:
diff --git a/external/icu/Module_icu.mk b/external/icu/Module_icu.mk
new file mode 100644
index 000000000..5c99b930f
--- /dev/null
+++ b/external/icu/Module_icu.mk
@@ -0,0 +1,19 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_Module_Module,icu))
+
+$(eval $(call gb_Module_add_targets,icu,\
+ UnpackedTarball_icu \
+ ExternalPackage_icu \
+ ExternalPackage_icu_ure \
+ ExternalProject_icu \
+))
+
+# vim: set noet sw=4 ts=4:
diff --git a/external/icu/README b/external/icu/README
new file mode 100644
index 000000000..484de1f41
--- /dev/null
+++ b/external/icu/README
@@ -0,0 +1 @@
+Library providing Unicode support, from [http://site.icu-project.org/].
diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk
new file mode 100644
index 000000000..72fae09b1
--- /dev/null
+++ b/external/icu/UnpackedTarball_icu.mk
@@ -0,0 +1,46 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_UnpackedTarball_UnpackedTarball,icu))
+
+$(eval $(call gb_UnpackedTarball_set_tarball,icu,$(ICU_TARBALL)))
+
+$(eval $(call gb_UnpackedTarball_update_autoconf_configs,icu,source))
+
+# Data zip contains data/... and needs to end up in icu/source/data/...
+# Only data/misc/icudata.rc is needed for a Cygwin/MSVC build.
+$(eval $(call gb_UnpackedTarball_set_pre_action,icu,\
+ unzip -q -d source -o $(gb_UnpackedTarget_TARFILE_LOCATION)/$(ICU_DATA_TARBALL) data/misc/icudata.rc \
+))
+
+$(eval $(call gb_UnpackedTarball_set_patchlevel,icu,0))
+
+$(eval $(call gb_UnpackedTarball_add_patches,icu,\
+ external/icu/icu4c-build.patch.1 \
+ external/icu/icu4c-aix.patch.1 \
+ external/icu/icu4c-warnings.patch.1 \
+ external/icu/icu4c-macosx.patch.1 \
+ external/icu/icu4c-solarisgcc.patch.1 \
+ external/icu/icu4c-mkdir.patch.1 \
+ external/icu/icu4c-$(if $(filter ANDROID,$(OS)),android,rpath).patch.1 \
+ external/icu/icu4c-ubsan.patch.1 \
+ external/icu/icu4c-scriptrun.patch.1 \
+ external/icu/icu4c-rtti.patch.1 \
+ external/icu/icu4c-clang-cl.patch.1 \
+ $(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.patch.1) \
+ external/icu/gcc9.patch \
+ external/icu/c++20-comparison.patch \
+ external/icu/ubsan.patch \
+ external/icu/Wdeprecated-copy-dtor.patch \
+ external/icu/icu4c-khmerbreakengine.patch.1 \
+))
+
+$(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict))
+
+# vim: set noet sw=4 ts=4:
diff --git a/external/icu/Wdeprecated-copy-dtor.patch b/external/icu/Wdeprecated-copy-dtor.patch
new file mode 100644
index 000000000..67078ef1b
--- /dev/null
+++ b/external/icu/Wdeprecated-copy-dtor.patch
@@ -0,0 +1,25 @@
+--- source/common/unicode/uobject.h
++++ source/common/unicode/uobject.h
+@@ -245,10 +245,10 @@
+ // direct use of UObject itself
+
+ // default constructor
+- // inline UObject() {}
++ UObject() = default;
+
+ // copy constructor
+- // inline UObject(const UObject &other) {}
++ UObject(const UObject &other) = default;
+
+ #if 0
+ // TODO Sometime in the future. Implement operator==().
+@@ -280,8 +280,8 @@
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+- UObject &UObject::operator=(const UObject &);
+ */
++ UObject &operator=(const UObject &) = default;
+ };
+
+ #ifndef U_HIDE_INTERNAL_API
diff --git a/external/icu/c++20-comparison.patch b/external/icu/c++20-comparison.patch
new file mode 100644
index 000000000..44053e671
--- /dev/null
+++ b/external/icu/c++20-comparison.patch
@@ -0,0 +1,171 @@
+--- source/common/uvector.cpp
++++ source/common/uvector.cpp
+@@ -110,7 +110,7 @@
+ }
+
+ // This only does something sensible if this object has a non-null comparer
+-UBool UVector::operator==(const UVector& other) {
++UBool UVector::operator==(const UVector& other) const {
+ int32_t i;
+ if (count != other.count) return FALSE;
+ if (comparer != NULL) {
+--- source/common/uvector.h
++++ source/common/uvector.h
+@@ -113,12 +113,12 @@
+ * equal if they are of the same size and all elements are equal,
+ * as compared using this object's comparer.
+ */
+- UBool operator==(const UVector& other);
++ UBool operator==(const UVector& other) const;
+
+ /**
+ * Equivalent to !operator==()
+ */
+- inline UBool operator!=(const UVector& other);
++ inline UBool operator!=(const UVector& other) const;
+
+ //------------------------------------------------------------
+ // java.util.Vector API
+@@ -382,7 +382,7 @@
+ return elementAt(index);
+ }
+
+-inline UBool UVector::operator!=(const UVector& other) {
++inline UBool UVector::operator!=(const UVector& other) const {
+ return !operator==(other);
+ }
+
+--- source/i18n/tzrule.cpp
++++ source/i18n/tzrule.cpp
+@@ -53,7 +53,7 @@
+ return *this;
+ }
+
+-UBool
++bool
+ TimeZoneRule::operator==(const TimeZoneRule& that) const {
+ return ((this == &that) ||
+ (typeid(*this) == typeid(that) &&
+@@ -120,7 +120,7 @@
+ return *this;
+ }
+
+-UBool
++bool
+ InitialTimeZoneRule::operator==(const TimeZoneRule& that) const {
+ return ((this == &that) ||
+ (typeid(*this) == typeid(that) &&
+@@ -226,7 +226,7 @@
+ return *this;
+ }
+
+-UBool
++bool
+ AnnualTimeZoneRule::operator==(const TimeZoneRule& that) const {
+ if (this == &that) {
+ return TRUE;
+@@ -445,7 +445,7 @@
+ return *this;
+ }
+
+-UBool
++bool
+ TimeArrayTimeZoneRule::operator==(const TimeZoneRule& that) const {
+ if (this == &that) {
+ return TRUE;
+--- source/i18n/unicode/rbtz.h
++++ source/i18n/unicode/rbtz.h
+@@ -85,6 +85,7 @@
+ * @stable ICU 3.8
+ */
+ virtual UBool operator!=(const TimeZone& that) const;
++ UBool operator!=(const RuleBasedTimeZone& that) const {return !operator==(that);}
+
+ /**
+ * Adds the <code>TimeZoneRule</code> which represents time transitions.
+--- source/i18n/unicode/simpletz.h
++++ source/i18n/unicode/simpletz.h
+@@ -110,6 +110,7 @@
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const TimeZone& that) const;
++ UBool operator!=(const SimpleTimeZone& that) const {return !operator==(that);}
+
+ /**
+ * Constructs a SimpleTimeZone with the given raw GMT offset and time zone ID,
+--- source/i18n/unicode/smpdtfmt.h
++++ source/i18n/unicode/smpdtfmt.h
+@@ -874,6 +874,7 @@
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const Format& other) const;
++ UBool operator!=(const SimpleDateFormat& that) const {return !operator==(that);}
+
+
+ using DateFormat::format;
+--- source/i18n/unicode/stsearch.h
++++ source/i18n/unicode/stsearch.h
+@@ -297,6 +297,7 @@
+ * @stable ICU 2.0
+ */
+ virtual UBool operator==(const SearchIterator &that) const;
++ UBool operator!=(const StringSearch &that) const {return !operator==(that);}
+
+ // public get and set methods ----------------------------------------
+
+--- source/i18n/unicode/tzrule.h
++++ source/i18n/unicode/tzrule.h
+@@ -54,7 +54,7 @@
+ * @return true if the given <code>TimeZoneRule</code> objects are semantically equal.
+ * @stable ICU 3.8
+ */
+- virtual UBool operator==(const TimeZoneRule& that) const;
++ virtual bool operator==(const TimeZoneRule& that) const;
+
+ /**
+ * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
+@@ -245,7 +245,7 @@
+ * @return true if the given <code>TimeZoneRule</code> objects are semantically equal.
+ * @stable ICU 3.8
+ */
+- virtual UBool operator==(const TimeZoneRule& that) const;
++ virtual bool operator==(const TimeZoneRule& that) const;
+
+ /**
+ * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
+@@ -255,6 +255,7 @@
+ * @stable ICU 3.8
+ */
+ virtual UBool operator!=(const TimeZoneRule& that) const;
++ UBool operator!=(const InitialTimeZoneRule& that) const {return !operator==(that);}
+
+ /**
+ * Gets the time when this rule takes effect in the given year.
+@@ -456,7 +457,7 @@
+ * @return true if the given <code>TimeZoneRule</code> objects are semantically equal.
+ * @stable ICU 3.8
+ */
+- virtual UBool operator==(const TimeZoneRule& that) const;
++ virtual bool operator==(const TimeZoneRule& that) const;
+
+ /**
+ * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
+@@ -672,7 +673,7 @@
+ * @return true if the given <code>TimeZoneRule</code> objects are semantically equal.
+ * @stable ICU 3.8
+ */
+- virtual UBool operator==(const TimeZoneRule& that) const;
++ virtual bool operator==(const TimeZoneRule& that) const;
+
+ /**
+ * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
+--- source/i18n/unicode/vtzone.h
++++ source/i18n/unicode/vtzone.h
+@@ -81,6 +81,7 @@
+ * @stable ICU 3.8
+ */
+ virtual UBool operator!=(const TimeZone& that) const;
++ UBool operator!=(const VTimeZone& that) const {return !operator==(that);}
+
+ /**
+ * Create a <code>VTimeZone</code> instance by the time zone ID.
diff --git a/external/icu/cross-bin/icu-config b/external/icu/cross-bin/icu-config
new file mode 100755
index 000000000..8ccf94f9b
--- /dev/null
+++ b/external/icu/cross-bin/icu-config
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+case $1 in
+--version)
+ echo whatever
+ ;;
+--cppflags)
+ echo ${ICU_CFLAGS}
+ ;;
+--ldflags-searchpath)
+ echo ${ICU_LIBS}
+esac
diff --git a/external/icu/gcc9.patch b/external/icu/gcc9.patch
new file mode 100644
index 000000000..5c9808f8c
--- /dev/null
+++ b/external/icu/gcc9.patch
@@ -0,0 +1,26 @@
+--- source/i18n/unicode/format.h
++++ source/i18n/unicode/format.h
+@@ -22,6 +22,13 @@
+
+ #ifndef FORMAT_H
+ #define FORMAT_H
++
++#ifdef __GNUC__
++#pragma GCC diagnostic push
++#pragma GCC diagnostic ignored "-Wpragmas" // for old GCC
++#pragma GCC diagnostic ignored "-Wunknown-warning-option" // for Clang
++#pragma GCC diagnostic ignored "-Wdeprecated-copy"
++#endif
+
+
+ #include "unicode/utypes.h"
+@@ -314,5 +314,9 @@
+
+ #endif /* U_SHOW_CPLUSPLUS_API */
+
++#ifdef __GNUC__
++#pragma GCC diagnostic pop
++#endif
++
+ #endif // _FORMAT
+ //eof
diff --git a/external/icu/icu4c-aix.patch.1 b/external/icu/icu4c-aix.patch.1
new file mode 100644
index 000000000..77982163b
--- /dev/null
+++ b/external/icu/icu4c-aix.patch.1
@@ -0,0 +1,143 @@
+diff -ur icu.org/source/config/mh-aix-gcc icu/source/config/mh-aix-gcc
+--- icu.org/source/config/mh-aix-gcc 2016-06-15 20:58:17.000000000 +0200
++++ icu/source/config/mh-aix-gcc 2017-04-21 21:58:49.731432198 +0200
+@@ -18,84 +18,29 @@
+ GEN_DEPS.c= $(CC) -E -MM $(DEFS) $(CPPFLAGS)
+ GEN_DEPS.cc= $(CXX) -E -MM $(DEFS) $(CPPFLAGS)
+
+-## Commands to link
+-## We need to use the C++ linker, even when linking C programs, since
+-## our libraries contain C++ code (C++ static init not called)
+-LINK.c= $(AIX_PREDELETE) $(CXX) $(CXXFLAGS) $(LDFLAGS)
+-LINK.cc= $(AIX_PREDELETE) $(CXX) $(CXXFLAGS) $(LDFLAGS)
+-
+-## Shared library options
+-LD_SOOPTIONS= -Wl,-bsymbolic
+-
+-## Commands to make a shared library
+-SHLIB.c= $(AIX_PREDELETE) $(CC) $(CFLAGS) $(LDFLAGS) -shared -Wl,-bexpall $(LD_SOOPTIONS)
+-SHLIB.cc= $(AIX_PREDELETE) $(CXX) $(CXXFLAGS) $(LDFLAGS) -shared -Wl,-bexpall $(LD_SOOPTIONS)
+-
+-## Compiler switch to embed a runtime search path
+-LD_RPATH= -I
+-LD_RPATH_PRE=
++## Flags for position independent code
++SHAREDLIBCFLAGS = -fPIC
++SHAREDLIBCXXFLAGS = -fPIC
++SHAREDLIBCPPFLAGS = -DPIC
++
++## Additional flags when building libraries and with threads
++THREADSCPPFLAGS = -D_REENTRANT -D_THREAD_SAFE
++LIBCPPFLAGS =
+
+-## enable the shared lib loader
+-LDFLAGS += -Wl,-bbigtoc
++LD_RPATH=
++LD_RPATH_PRE=
+
+ ## These are the library specific LDFLAGS
+ LDFLAGSICUDT=-nodefaultlibs -nostdlib
+
+-## We need to delete things prior to linking, or else we'll get
+-## SEVERE ERROR: output file in use .. on AIX.
+-## But, shell script version should NOT delete target as we don't
+-## have $@ in that context. (SH = only shell script, icu-config)
+-AIX_PREDELETE=rm -f $@ ;
+-#SH# AIX_PREDELETE=
+-
+ ## Environment variable to set a runtime search path
+ LDLIBRARYPATH_ENVVAR = LIBPATH
+
+-## Override Versioned target for a shared library.
+-FINAL_SO_TARGET= $(basename $(SO_TARGET))$(SO_TARGET_VERSION).$(SO)
+-MIDDLE_SO_TARGET= $(basename $(SO_TARGET))$(SO_TARGET_VERSION_MAJOR).$(SO)
+-SHARED_OBJECT = $(notdir $(FINAL_SO_TARGET:.$(SO)=.$(SOBJ)))
+-SHARED_OBJECT_NO_VERSION = $(basename $(SO_TARGET)).$(SOBJ)
+-
+-# The following is for Makefile.inc's use.
+-ICULIBSUFFIX_VERSION = $(LIB_VERSION_MAJOR)
+-
+-# this one is for icudefs.mk's use
+-ifeq ($(ENABLE_SHARED),YES)
+-SO_TARGET_VERSION_SUFFIX = $(SO_TARGET_VERSION_MAJOR)
+-endif
+-
+-## Compiler switch to embed a library name. Not present on AIX.
+-LD_SONAME =
+-
+-## The type of assembly needed when pkgdata is used for generating shared libraries.
+-GENCCODE_ASSEMBLY=-a xlc
+-
+ ## Shared object suffix
+-SOBJ= so
+-# without the -brtl option, the library names use .a. AIX is funny that way.
+-SO= a
+-A= a
++SO= so
+
+ ## Non-shared intermediate object suffix
+-STATIC_O = o
+-
+-## Special AIX rules
+-
+-## Build archive from shared object
+-%.a : %.so
+- ln -f $< $(SHARED_OBJECT_NO_VERSION)
+- $(AR) $(ARFLAGS) $@ $(SHARED_OBJECT_NO_VERSION)
+- rm -f $(SHARED_OBJECT_NO_VERSION)
+-$(LIBDIR)/%.a : %.so
+- ln -f $< $(SHARED_OBJECT_NO_VERSION)
+- $(AR) $(ARFLAGS) $@ $(SHARED_OBJECT_NO_VERSION)
+- rm -f $(SHARED_OBJECT_NO_VERSION)
+-
+-## Build import list from export list
+-%.e : %.exp
+- @echo "Building an import list for $<"
+- @$(SHELL) -ec "echo '#! $*.a($*.so)' | cat - $< > $@"
++STATIC_O = ao
+
+ ## Compilation rules
+ %.$(STATIC_O): $(srcdir)/%.c
+@@ -123,10 +68,10 @@
+ [ -s $@ ] || rm -f $@'
+
+ ## Versioned libraries rules
+-%$(SO_TARGET_VERSION_MAJOR).$(SO): %$(SO_TARGET_VERSION).$(SO)
+- $(RM) $@ && ln -s ${*F}$(SO_TARGET_VERSION).$(SO) $@
+-%.$(SO): %$(SO_TARGET_VERSION).$(SO)
+- $(RM) $@ && ln -s ${*F}$(SO_TARGET_VERSION).$(SO) $@
++%.$(SO).$(SO_TARGET_VERSION_MAJOR): %.$(SO).$(SO_TARGET_VERSION)
++ $(RM) $@ && ln -s ${<F} $@
++%.$(SO): %.$(SO).$(SO_TARGET_VERSION_MAJOR)
++ $(RM) $@ && ln -s ${*F}.$(SO).$(SO_TARGET_VERSION) $@
+
+
+ ## BIR - bind with internal references [so app data and icu data doesn't collide]
+diff -ur icu.org/source/tools/pkgdata/pkgdata.cpp icu/source/tools/pkgdata/pkgdata.cpp
+--- icu.org/source/tools/pkgdata/pkgdata.cpp 2017-03-21 02:03:49.000000000 +0100
++++ icu/source/tools/pkgdata/pkgdata.cpp 2017-04-21 21:58:49.732432195 +0200
+@@ -934,7 +934,7 @@
+
+ uprv_strcat(pkgDataFlags[SO_EXT], ".");
+ uprv_strcat(pkgDataFlags[SO_EXT], pkgDataFlags[A_EXT]);
+-#elif U_PLATFORM == U_PF_OS400 || defined(_AIX)
++#elif U_PLATFORM == U_PF_OS400
+ sprintf(libFileNames[LIB_FILE_VERSION_TMP], "%s%s%s",
+ libFileNames[LIB_FILE],
+ FILE_EXTENSION_SEP,
+@@ -1407,15 +1407,6 @@
+ pkgDataFlags[LDICUDTFLAGS],
+ targetDir,
+ libFileNames[LIB_FILE_CYGWIN_VERSION],
+-#elif U_PLATFORM == U_PF_AIX
+- sprintf(cmd, "%s %s%s;%s %s -o %s%s %s %s%s %s %s",
+- RM_CMD,
+- targetDir,
+- libFileNames[LIB_FILE_VERSION_TMP],
+- pkgDataFlags[GENLIB],
+- pkgDataFlags[LDICUDTFLAGS],
+- targetDir,
+- libFileNames[LIB_FILE_VERSION_TMP],
+ #else
+ sprintf(cmd, "%s %s -o %s%s %s %s%s %s %s",
+ pkgDataFlags[GENLIB],
diff --git a/external/icu/icu4c-android.patch.1 b/external/icu/icu4c-android.patch.1
new file mode 100644
index 000000000..602d225d7
--- /dev/null
+++ b/external/icu/icu4c-android.patch.1
@@ -0,0 +1,75 @@
+diff -ur icu.org/source/common/unicode/platform.h icu/source/common/unicode/platform.h
+--- icu.org/source/common/unicode/platform.h 2019-10-03 13:16:41.000000000 +0200
++++ icu/source/common/unicode/platform.h 2019-10-29 22:58:26.881221287 +0100
+@@ -818,7 +818,7 @@
+ UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport))
+ # define U_EXPORT __declspec(dllexport)
+ #elif defined(__GNUC__)
+-# define U_EXPORT __attribute__((visibility("default")))
++# define U_EXPORT
+ #elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
+ || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550)
+ # define U_EXPORT __global
+diff -ur icu.org/source/config/mh-linux icu/source/config/mh-linux
+--- icu.org/source/config/mh-linux 2018-09-29 02:34:41.000000000 +0200
++++ icu/source/config/mh-linux 2018-10-20 00:33:36.558130876 +0200
+@@ -27,7 +27,7 @@
+
+ ## Compiler switch to embed a library name
+ # The initial tab in the next line is to prevent icu-config from reading it.
+- LD_SONAME = -Wl,-soname -Wl,$(notdir $(MIDDLE_SO_TARGET))
++ #LD_SONAME = -Wl,-soname -Wl,$(notdir $(MIDDLE_SO_TARGET))
+ #SH# # We can't depend on MIDDLE_SO_TARGET being set.
+ #SH# LD_SONAME=
+
+diff -ur icu.org/source/configure icu/source/configure
+--- icu.org/source/configure 2018-10-02 00:39:56.000000000 +0200
++++ icu/source/configure 2018-10-20 00:33:36.559130874 +0200
+@@ -5207,7 +5207,7 @@
+ else
+ icu_cv_host_frag=mh-linux-va
+ fi ;;
+-*-*-linux*|*-*-gnu|*-*-k*bsd*-gnu|*-*-kopensolaris*-gnu) icu_cv_host_frag=mh-linux ;;
++*-*-linux*|*-*-gnu|*-*-k*bsd*-gnu|*-*-kopensolaris*-gnu|*-*-*-androideabi*) icu_cv_host_frag=mh-linux ;;
+ i[34567]86-*-cygwin)
+ if test "$GCC" = yes; then
+ icu_cv_host_frag=mh-cygwin
+@@ -6400,6 +6400,10 @@
+ # Check to see if genccode can generate simple assembly.
+ GENCCODE_ASSEMBLY=
+ case "${host}" in
++arm-*-linux-androideabi)
++ if test "$GCC" = yes; then
++ GENCCODE_ASSEMBLY="-a gcc-android-arm"
++ fi ;;
+ *-linux*|*-kfreebsd*-gnu*|i*86-*-*bsd*|i*86-pc-gnu)
+ if test "$GCC" = yes; then
+ # We're using gcc, and the simple -a gcc command line works for genccode
+@@ -7499,6 +7503,10 @@
+ # wchar_t can be used
+ CHECK_UTF16_STRING_RESULT="available"
+ ;;
++*-*-*-androideabi|mips-unknown-linux-android)
++ # no UTF-16 strings thanks, I think, this is to avoid the -std=c++0x which causes trouble with uint64_t
++ CHECK_UTF16_STRING_RESULT="nope"
++ ;;
+ *)
+ ;;
+ esac
+diff -ur icu.org/source/i18n/decimfmt.cpp icu/source/i18n/decimfmt.cpp
+--- icu.org/source/i18n/decimfmt.cpp 2018-10-02 00:39:56.000000000 +0200
++++ icu/source/i18n/decimfmt.cpp 2018-10-20 00:33:36.560130873 +0200
+@@ -9,6 +9,13 @@
+ // Helpful in toString methods and elsewhere.
+ #define UNISTR_FROM_STRING_EXPLICIT
+
++#ifdef __ANDROID__
++#ifndef ARM
++#define ARM
++#endif
++#include <android/compatibility.hxx>
++#endif
++
+ #include <cmath>
+ #include <cstdlib>
+ #include <stdlib.h>
diff --git a/external/icu/icu4c-build.patch.1 b/external/icu/icu4c-build.patch.1
new file mode 100644
index 000000000..a878de732
--- /dev/null
+++ b/external/icu/icu4c-build.patch.1
@@ -0,0 +1,91 @@
+diff -ur icu.org/source/config/mh-darwin icu/source/config/mh-darwin
+--- icu.org/source/config/mh-darwin 2016-06-15 20:58:17.000000000 +0200
++++ icu/source/config/mh-darwin 2017-04-21 21:30:23.584568210 +0200
+@@ -30,11 +30,7 @@
+ SHLIB.cc= $(CXX) -dynamiclib -dynamic $(CXXFLAGS) $(LDFLAGS) $(LD_SOOPTIONS)
+
+ ## Compiler switches to embed a library name and version information
+-ifeq ($(ENABLE_RPATH),YES)
+-LD_SONAME = -Wl,-compatibility_version -Wl,$(SO_TARGET_VERSION_MAJOR) -Wl,-current_version -Wl,$(SO_TARGET_VERSION) -install_name $(libdir)/$(notdir $(MIDDLE_SO_TARGET))
+-else
+-LD_SONAME = -Wl,-compatibility_version -Wl,$(SO_TARGET_VERSION_MAJOR) -Wl,-current_version -Wl,$(SO_TARGET_VERSION) -install_name $(notdir $(MIDDLE_SO_TARGET)) $(PKGDATA_TRAILING_SPACE)
+-endif
++LD_SONAME = -Wl,-compatibility_version -Wl,$(SO_TARGET_VERSION_MAJOR) -Wl,-current_version -Wl,$(SO_TARGET_VERSION) -install_name @__________________________________________________URELIB/$(notdir $(MIDDLE_SO_TARGET))
+
+ ## Compiler switch to embed a runtime search path
+ LD_RPATH=
+@@ -50,10 +46,6 @@
+ ## Non-shared intermediate object suffix
+ STATIC_O = ao
+
+-## Override Versioned target for a shared library.
+-FINAL_SO_TARGET= $(basename $(SO_TARGET)).$(SO_TARGET_VERSION).$(SO)
+-MIDDLE_SO_TARGET= $(basename $(SO_TARGET)).$(SO_TARGET_VERSION_MAJOR).$(SO)
+-
+ ## Compilation and dependency rules
+ %.$(STATIC_O): $(srcdir)/%.c
+ $(call SILENT_COMPILE,$(strip $(COMPILE.c) $(STATICCPPFLAGS) $(STATICCFLAGS)) -MMD -MT "$*.d $*.o $*.$(STATIC_O)" -o $@ $<)
+@@ -67,16 +59,10 @@
+
+ ## Versioned libraries rules
+
+-%.$(SO_TARGET_VERSION_MAJOR).$(SO): %.$(SO_TARGET_VERSION).$(SO)
++%.$(SO).$(SO_TARGET_VERSION_MAJOR): %.$(SO).$(SO_TARGET_VERSION)
+ $(RM) $@ && ln -s ${<F} $@
+-%.$(SO): %.$(SO_TARGET_VERSION_MAJOR).$(SO)
+- $(RM) $@ && ln -s ${*F}.$(SO_TARGET_VERSION).$(SO) $@
+-
+-# tzcode option
+-TZORIG_EXTRA_CFLAGS=-DSTD_INSPIRED
+-
+-# genren opts
+-GENREN_PL_OPTS=-x Mach-O -n '-g' -p '| c++filt'
++%.$(SO): %.$(SO).$(SO_TARGET_VERSION_MAJOR)
++ $(RM) $@ && ln -s ${*F}.$(SO).$(SO_TARGET_VERSION) $@
+
+ ## Remove shared library 's'
+ STATIC_PREFIX_WHEN_USED =
+diff -ur icu.org/source/tools/toolutil/pkg_genc.cpp icu/source/tools/toolutil/pkg_genc.cpp
+--- icu.org/source/tools/toolutil/pkg_genc.cpp 2017-04-13 11:46:02.000000000 +0200
++++ icu/source/tools/toolutil/pkg_genc.cpp 2017-04-21 21:30:23.583568212 +0200
+@@ -160,6 +160,28 @@
+
+ ".long ","",HEX_0X
+ },
++ {"gcc-android-arm",
++ "\t.arch armv5te\n"
++ "\t.fpu softvfp\n"
++ "\t.eabi_attribute 20, 1\n"
++ "\t.eabi_attribute 21, 1\n"
++ "\t.eabi_attribute 23, 3\n"
++ "\t.eabi_attribute 24, 1\n"
++ "\t.eabi_attribute 25, 1\n"
++ "\t.eabi_attribute 26, 2\n"
++ "\t.eabi_attribute 30, 6\n"
++ "\t.eabi_attribute 18, 4\n"
++ "\t.file \"%s.s\"\n"
++ "\t.global %s\n"
++ "\t.section .rodata\n"
++ "\t.align 2\n"
++ "\t.type %s, %%object\n"
++ "%s:\n",
++
++ "\t.word ",
++ "\t.section .note.GNU-stack,\"\",%%progbits\n",
++ HEX_0X
++ },
+ /* 16 bytes alignment. */
+ /* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */
+ {"sun",
+diff -ur icu.org/source/tools/toolutil/pkg_genc.h icu/source/tools/toolutil/pkg_genc.h
+--- icu.org/source/tools/toolutil/pkg_genc.h 2017-01-20 01:20:31.000000000 +0100
++++ icu/source/tools/toolutil/pkg_genc.h 2017-04-21 21:30:23.582568215 +0200
+@@ -60,7 +60,7 @@
+ #endif
+
+ #define LARGE_BUFFER_MAX_SIZE 2048
+-#define SMALL_BUFFER_MAX_SIZE 512
++#define SMALL_BUFFER_MAX_SIZE 2048
+ #define SMALL_BUFFER_FLAG_NAMES 32
+ #define BUFFER_PADDING_SIZE 20
+
diff --git a/external/icu/icu4c-clang-cl.patch.1 b/external/icu/icu4c-clang-cl.patch.1
new file mode 100644
index 000000000..a111a0df9
--- /dev/null
+++ b/external/icu/icu4c-clang-cl.patch.1
@@ -0,0 +1,28 @@
+diff -ur icu.org/source/config/mh-cygwin-msvc icu/source/config/mh-cygwin-msvc
+--- icu.org/source/config/mh-cygwin-msvc 2017-01-23 01:38:28.000000000 +0100
++++ icu/source/config/mh-cygwin-msvc 2017-04-21 23:07:28.482892025 +0200
+@@ -55,8 +55,8 @@
+ LDFLAGS+=-nologo
+
+ # Commands to compile
+-COMPILE.c= $(CC) $(CPPFLAGS) $(DEFS) $(CFLAGS) -c
+-COMPILE.cc= $(CXX) $(CPPFLAGS) $(DEFS) $(CXXFLAGS) -c
++COMPILE.c= true && $(CC) $(CPPFLAGS) $(DEFS) $(CFLAGS) -c
++COMPILE.cc= true && $(CXX) $(CPPFLAGS) $(DEFS) $(CXXFLAGS) -c
+
+ # Commands to link
+ LINK.c= LINK.EXE -subsystem:console $(LDFLAGS)
+diff -ur icu.org/source/runConfigureICU icu/source/runConfigureICU
+--- icu.org/source/runConfigureICU 2017-01-23 01:38:28.000000000 +0100
++++ icu/source/runConfigureICU 2017-04-21 23:07:28.482892025 +0200
+@@ -261,8 +261,8 @@
+ Cygwin/MSVC)
+ THE_OS="Windows with Cygwin"
+ THE_COMP="Microsoft Visual C++"
+- CC=cl; export CC
+- CXX=cl; export CXX
++ CC=${CC-cl}; export CC
++ CXX=${CXX-cl}; export CXX
+ RELEASE_CFLAGS='-Gy -MD'
+ RELEASE_CXXFLAGS='-Gy -MD'
+ DEBUG_CFLAGS='-FS -Zi -MDd'
diff --git a/external/icu/icu4c-icudata-stdlibs.patch.1 b/external/icu/icu4c-icudata-stdlibs.patch.1
new file mode 100644
index 000000000..c8d66c6ed
--- /dev/null
+++ b/external/icu/icu4c-icudata-stdlibs.patch.1
@@ -0,0 +1,14 @@
+diff -ur icu.org/source/config/mh-linux icu/source/config/mh-linux
+--- icu.org/source/config/mh-linux 2017-04-21 23:09:57.588533707 +0200
++++ icu/source/config/mh-linux 2017-04-21 23:11:38.075292226 +0200
+@@ -27,7 +27,9 @@
+ RPATHLDFLAGS=${LD_RPATH_PRE}'$$ORIGIN'
+
+ ## These are the library specific LDFLAGS
+-LDFLAGSICUDT=-nodefaultlibs -nostdlib
++#LDFLAGSICUDT=-nodefaultlibs -nostdlib
++# Debian change: linking icudata as data only causes too many problems.
++LDFLAGSICUDT=
+
+ ## Compiler switch to embed a library name
+ # The initial tab in the next line is to prevent icu-config from reading it.
diff --git a/external/icu/icu4c-khmerbreakengine.patch.1 b/external/icu/icu4c-khmerbreakengine.patch.1
new file mode 100644
index 000000000..272d0b8ab
--- /dev/null
+++ b/external/icu/icu4c-khmerbreakengine.patch.1
@@ -0,0 +1,845 @@
+diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
+--- icu.org/source/common/dictbe.cpp 2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictbe.cpp 2020-05-11 18:55:07.702282061 +0200
+@@ -32,7 +32,19 @@
+ ******************************************************************
+ */
+
+-DictionaryBreakEngine::DictionaryBreakEngine() {
++DictionaryBreakEngine::DictionaryBreakEngine()
++ : fTypes(0), clusterLimit(0) {
++}
++
++DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes)
++ : fTypes(breakTypes), clusterLimit(3) {
++ UErrorCode status = U_ZERO_ERROR;
++ fViramaSet.applyPattern(UNICODE_STRING_SIMPLE("[[:ccc=VR:]]"), status);
++
++ // note Skip Sets contain fIgnoreSet characters too.
++ fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]\\u200C\\u200D\\u2060]"), status);
++ fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]\\u200C\\u200D\\u2060]"), status);
++ fNBeforeSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CR:][:lb=LF:][:lb=NL:][:lb=SP:][:lb=ZW:][:lb=IS:][:lb=BA:][:lb=NS:]]"), status);
+ }
+
+ DictionaryBreakEngine::~DictionaryBreakEngine() {
+@@ -79,6 +91,169 @@
+ fSet.compact();
+ }
+
++bool
++DictionaryBreakEngine::scanBeforeStart(UText *text, int32_t& start, bool &doBreak) const {
++ UErrorCode status = U_ZERO_ERROR;
++ UText* ut = utext_clone(NULL, text, false, true, &status);
++ utext_setNativeIndex(ut, start);
++ UChar32 c = utext_current32(ut);
++ bool res = false;
++ doBreak = true;
++ while (start >= 0) {
++ if (!fSkipStartSet.contains(c)) {
++ res = (c == ZWSP);
++ break;
++ }
++ --start;
++ c = utext_previous32(ut);
++ doBreak = false;
++ }
++ utext_close(ut);
++ return res;
++}
++
++bool
++DictionaryBreakEngine::scanAfterEnd(UText *text, int32_t textEnd, int32_t& end, bool &doBreak) const {
++ UErrorCode status = U_ZERO_ERROR;
++ UText* ut = utext_clone(NULL, text, false, true, &status);
++ utext_setNativeIndex(ut, end);
++ UChar32 c = utext_current32(ut);
++ bool res = false;
++ doBreak = !fNBeforeSet.contains(c);
++ while (end < textEnd) {
++ if (!fSkipEndSet.contains(c)) {
++ res = (c == ZWSP);
++ break;
++ }
++ ++end;
++ c = utext_next32(ut);
++ doBreak = false;
++ }
++ utext_close(ut);
++ return res;
++}
++
++void
++DictionaryBreakEngine::scanBackClusters(UText *text, int32_t textStart, int32_t& start) const {
++ UChar32 c = 0;
++ start = utext_getNativeIndex(text);
++ while (start > textStart) {
++ c = utext_previous32(text);
++ --start;
++ if (!fSkipEndSet.contains(c))
++ break;
++ }
++ for (int i = 0; i < clusterLimit; ++i) { // scan backwards clusterLimit clusters
++ while (start > textStart) {
++ while (fIgnoreSet.contains(c))
++ c = utext_previous32(text);
++ if (!fMarkSet.contains(c)) {
++ if (fBaseSet.contains(c)) {
++ c = utext_previous32(text);
++ if (!fViramaSet.contains(c)) { // Virama (e.g. coeng) preceding base. Treat sequence as a mark
++ utext_next32(text);
++ c = utext_current32(text);
++ break;
++ } else {
++ --start;
++ }
++ } else {
++ break;
++ }
++ }
++ c = utext_previous32(text);
++ --start;
++ }
++ if (!fBaseSet.contains(c) || start < textStart) { // not a cluster start so finish
++ break;
++ }
++ c = utext_previous32(text);
++ --start; // go round again
++ } // ignore hitting previous inhibitor since scanning for it should have found us!
++ ++start; // counteract --before
++}
++
++void
++DictionaryBreakEngine::scanFwdClusters(UText *text, int32_t textEnd, int32_t& end) const {
++ UChar32 c = utext_current32(text);
++ end = utext_getNativeIndex(text);
++ while (end < textEnd) {
++ if (!fSkipStartSet.contains(c))
++ break;
++ utext_next32(text);
++ c = utext_current32(text);
++ ++end;
++ }
++ for (int i = 0; i < clusterLimit; ++i) { // scan forwards clusterLimit clusters
++ while (fIgnoreSet.contains(c)) {
++ utext_next32(text);
++ c = utext_current32(text);
++ }
++ if (fBaseSet.contains(c)) {
++ while (end < textEnd) {
++ utext_next32(text);
++ c = utext_current32(text);
++ ++end;
++ if (!fMarkSet.contains(c))
++ break;
++ else if (fViramaSet.contains(c)) { // handle coeng + base as mark
++ utext_next32(text);
++ c = utext_current32(text);
++ ++end;
++ if (!fBaseSet.contains(c))
++ break;
++ }
++ }
++ } else {
++ --end; // bad char so break after char before it
++ break;
++ }
++ }
++}
++
++bool
++DictionaryBreakEngine::scanWJ(UText *text, int32_t &start, int32_t end, int32_t &before, int32_t &after) const {
++ UErrorCode status = U_ZERO_ERROR;
++ UText* ut = utext_clone(NULL, text, false, true, &status);
++ int32_t nat = start;
++ utext_setNativeIndex(ut, nat);
++ bool foundFirst = true;
++ int32_t curr = start;
++ while (nat < end) {
++ UChar32 c = utext_current32(ut);
++ if (c == ZWSP || c == WJ) {
++ curr = nat + 1;
++ if (foundFirst) // only scan backwards for first inhibitor
++ scanBackClusters(ut, start, before);
++ foundFirst = false; // don't scan backwards if we go around again. Also marks found something
++
++ utext_next32(ut);
++ scanFwdClusters(ut, end, after);
++ nat = after + 1;
++
++ if (c == ZWSP || c == WJ) { // did we hit another one?
++ continue;
++ } else {
++ break;
++ }
++ }
++
++ ++nat; // keep hunting
++ utext_next32(ut);
++ }
++
++ utext_close(ut);
++
++ if (nat >= end && foundFirst) {
++ start = before = after = nat;
++ return false; // failed to find anything
++ }
++ else {
++ start = curr;
++ }
++ return true; // yup hit one
++}
++
+ /*
+ ******************************************************************
+ * PossibleWord
+@@ -108,7 +283,7 @@
+ ~PossibleWord() {}
+
+ // Fill the list of candidates if needed, select the longest, and return the number found
+- int32_t candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd );
++ int32_t candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd, UnicodeSet const *ignoreSet = NULL, int32_t minLength = 0 );
+
+ // Select the currently marked candidate, point after it in the text, and invalidate self
+ int32_t acceptMarked( UText *text );
+@@ -129,12 +304,12 @@
+ };
+
+
+-int32_t PossibleWord::candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd ) {
++int32_t PossibleWord::candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd, UnicodeSet const *ignoreSet, int32_t minLength) {
+ // TODO: If getIndex is too slow, use offset < 0 and add discardAll()
+ int32_t start = (int32_t)utext_getNativeIndex(text);
+ if (start != offset) {
+ offset = start;
+- count = dict->matches(text, rangeEnd-start, UPRV_LENGTHOF(cuLengths), cuLengths, cpLengths, NULL, &prefix);
++ count = dict->matches(text, rangeEnd-start, UPRV_LENGTHOF(cuLengths), cuLengths, cpLengths, NULL, &prefix, ignoreSet, minLength);
+ // Dictionary leaves text after longest prefix, not longest word. Back up.
+ if (count <= 0) {
+ utext_setNativeIndex(text, start);
+@@ -815,53 +990,30 @@
+ * KhmerBreakEngine
+ */
+
+-// How many words in a row are "good enough"?
+-static const int32_t KHMER_LOOKAHEAD = 3;
+-
+-// Will not combine a non-word with a preceding dictionary word longer than this
+-static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 3;
+-
+-// Will not combine a non-word that shares at least this much prefix with a
+-// dictionary word, with a preceding word
+-static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 3;
+-
+-// Minimum word size
+-static const int32_t KHMER_MIN_WORD = 2;
+-
+-// Minimum number of characters for two words
+-static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2;
+-
+ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
+- : DictionaryBreakEngine(),
++ : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)),
+ fDictionary(adoptDictionary)
+ {
+ UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+ UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
+- fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
++
++ clusterLimit = 3;
++
++ fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]\\u2060\\u200C\\u200D]"), status);
+ if (U_SUCCESS(status)) {
+ setCharacters(fKhmerWordSet);
+ }
+ fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
+- fMarkSet.add(0x0020);
+- fEndWordSet = fKhmerWordSet;
+- fBeginWordSet.add(0x1780, 0x17B3);
+- //fBeginWordSet.add(0x17A3, 0x17A4); // deprecated vowels
+- //fEndWordSet.remove(0x17A5, 0x17A9); // Khmer independent vowels that can't end a word
+- //fEndWordSet.remove(0x17B2); // Khmer independent vowel that can't end a word
+- fEndWordSet.remove(0x17D2); // KHMER SIGN COENG that combines some following characters
+- //fEndWordSet.remove(0x17B6, 0x17C5); // Remove dependent vowels
+-// fEndWordSet.remove(0x0E31); // MAI HAN-AKAT
+-// fEndWordSet.remove(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
+-// fBeginWordSet.add(0x0E01, 0x0E2E); // KO KAI through HO NOKHUK
+-// fBeginWordSet.add(0x0E40, 0x0E44); // SARA E through SARA AI MAIMALAI
+-// fSuffixSet.add(THAI_PAIYANNOI);
+-// fSuffixSet.add(THAI_MAIYAMOK);
++ fIgnoreSet.add(0x2060); // WJ
++ fIgnoreSet.add(0x200C, 0x200D); // ZWJ, ZWNJ
++ fBaseSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:lb=SA:]&[:^M:]]"), status);
++ fPuncSet.applyPattern(UNICODE_STRING_SIMPLE("[\\u17D4\\u17D5\\u17D6\\u17D7\\u17D9:]"), status);
+
+ // Compact for caching.
+ fMarkSet.compact();
+- fEndWordSet.compact();
+- fBeginWordSet.compact();
+-// fSuffixSet.compact();
++ fIgnoreSet.compact();
++ fBaseSet.compact();
++ fPuncSet.compact();
+ UTRACE_EXIT_STATUS(status);
+ }
+
+@@ -874,180 +1026,204 @@
+ int32_t rangeStart,
+ int32_t rangeEnd,
+ UVector32 &foundBreaks ) const {
+- if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
+- return 0; // Not enough characters for two words
+- }
+-
+- uint32_t wordsFound = 0;
+- int32_t cpWordLength = 0;
+- int32_t cuWordLength = 0;
+- int32_t current;
++ uint32_t wordsFound = foundBreaks.size();
+ UErrorCode status = U_ZERO_ERROR;
+- PossibleWord words[KHMER_LOOKAHEAD];
+-
++ int32_t before = 0;
++ int32_t after = 0;
++ int32_t finalBefore = 0;
++ int32_t initAfter = 0;
++ int32_t scanStart = rangeStart;
++ int32_t scanEnd = rangeEnd;
++
++ bool startZwsp = false;
++ bool breakStart = false;
++ bool breakEnd = false;
++
++ if (rangeStart > 0) {
++ --scanStart;
++ startZwsp = scanBeforeStart(text, scanStart, breakStart);
++ }
+ utext_setNativeIndex(text, rangeStart);
++ scanFwdClusters(text, rangeEnd, initAfter);
++ bool endZwsp = scanAfterEnd(text, utext_nativeLength(text), scanEnd, breakEnd);
++ utext_setNativeIndex(text, rangeEnd - 1);
++ scanBackClusters(text, rangeStart, finalBefore);
++ if (finalBefore < initAfter) { // the whole run is tented so no breaks
++ if (breakStart || fTypes < UBRK_LINE)
++ foundBreaks.push(rangeStart, status);
++ if (breakEnd || fTypes < UBRK_LINE)
++ foundBreaks.push(rangeEnd, status);
++ return foundBreaks.size() - wordsFound;
++ }
+
+- while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
+- cuWordLength = 0;
+- cpWordLength = 0;
+-
+- // Look for candidate words at the current position
+- int32_t candidates = words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
+-
+- // If we found exactly one, use that
+- if (candidates == 1) {
+- cuWordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text);
+- cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength();
+- wordsFound += 1;
+- }
++ scanStart = rangeStart;
++ scanWJ(text, scanStart, rangeEnd, before, after);
++ if (startZwsp || initAfter >= before) {
++ after = initAfter;
++ before = 0;
++ }
++ if (!endZwsp && after > finalBefore && after < rangeEnd)
++ endZwsp = true;
++ if (endZwsp && before > finalBefore)
++ before = finalBefore;
+
+- // If there was more than one, see which one can take us forward the most words
+- else if (candidates > 1) {
+- // If we're already at the end of the range, we're done
+- if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+- goto foundBest;
+- }
+- do {
+- int32_t wordsMatched = 1;
+- if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
+- if (wordsMatched < 2) {
+- // Followed by another dictionary word; mark first word as a good candidate
+- words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
+- wordsMatched = 2;
+- }
++ utext_setNativeIndex(text, rangeStart);
++ int32_t numCodePts = rangeEnd - rangeStart;
++ // bestSnlp[i] is the snlp of the best segmentation of the first i
++ // code points in the range to be matched.
++ UVector32 bestSnlp(numCodePts + 1, status);
++ bestSnlp.addElement(0, status);
++ for(int32_t i = 1; i <= numCodePts; i++) {
++ bestSnlp.addElement(kuint32max, status);
++ }
+
+- // If we're already at the end of the range, we're done
+- if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+- goto foundBest;
+- }
++ // prev[i] is the index of the last code point in the previous word in
++ // the best segmentation of the first i characters. Note negative implies
++ // that the code point is part of an unknown word.
++ UVector32 prev(numCodePts + 1, status);
++ for(int32_t i = 0; i <= numCodePts; i++) {
++ prev.addElement(kuint32max, status);
++ }
+
+- // See if any of the possible second words is followed by a third word
+- do {
+- // If we find a third word, stop right away
+- if (words[(wordsFound + 2) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
+- words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
+- goto foundBest;
+- }
+- }
+- while (words[(wordsFound + 1) % KHMER_LOOKAHEAD].backUp(text));
+- }
++ const int32_t maxWordSize = 20;
++ UVector32 values(maxWordSize, status);
++ values.setSize(maxWordSize);
++ UVector32 lengths(maxWordSize, status);
++ lengths.setSize(maxWordSize);
++
++ // Dynamic programming to find the best segmentation.
++
++ // In outer loop, i is the code point index,
++ // ix is the corresponding string (code unit) index.
++ // They differ when the string contains supplementary characters.
++ int32_t ix = rangeStart;
++ for (int32_t i = 0; i < numCodePts; ++i, utext_setNativeIndex(text, ++ix)) {
++ if ((uint32_t)bestSnlp.elementAti(i) == kuint32max) {
++ continue;
++ }
++
++ int32_t count;
++ count = fDictionary->matches(text, numCodePts - i, maxWordSize,
++ NULL, lengths.getBuffer(), values.getBuffer(), NULL, &fIgnoreSet, 2);
++ // Note: lengths is filled with code point lengths
++ // The NULL parameter is the ignored code unit lengths.
++
++ for (int32_t j = 0; j < count; j++) {
++ int32_t ln = lengths.elementAti(j);
++ if (ln + i >= numCodePts)
++ continue;
++ utext_setNativeIndex(text, ln+ix);
++ int32_t c = utext_current32(text);
++ if (fMarkSet.contains(c) || c == 0x17D2) { // Coeng
++ lengths.removeElementAt(j);
++ values.removeElementAt(j);
++ --j;
++ --count;
+ }
+- while (words[wordsFound % KHMER_LOOKAHEAD].backUp(text));
+-foundBest:
+- cuWordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text);
+- cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength();
+- wordsFound += 1;
+ }
+-
+- // We come here after having either found a word or not. We look ahead to the
+- // next word. If it's not a dictionary word, we will combine it with the word we
+- // just found (if there is one), but only if the preceding word does not exceed
+- // the threshold.
+- // The text iterator should now be positioned at the end of the word we found.
+- if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < KHMER_ROOT_COMBINE_THRESHOLD) {
+- // if it is a dictionary word, do nothing. If it isn't, then if there is
+- // no preceding word, or the non-word shares less than the minimum threshold
+- // of characters with a dictionary word, then scan to resynchronize
+- if (words[wordsFound % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
+- && (cuWordLength == 0
+- || words[wordsFound % KHMER_LOOKAHEAD].longestPrefix() < KHMER_PREFIX_COMBINE_THRESHOLD)) {
+- // Look for a plausible word boundary
+- int32_t remaining = rangeEnd - (current+cuWordLength);
+- UChar32 pc;
+- UChar32 uc;
+- int32_t chars = 0;
+- for (;;) {
+- int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
+- pc = utext_next32(text);
+- int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
+- chars += pcSize;
+- remaining -= pcSize;
+- if (remaining <= 0) {
++ if (count == 0) {
++ utext_setNativeIndex(text, ix);
++ int32_t c = utext_current32(text);
++ if (fPuncSet.contains(c) || fIgnoreSet.contains(c) || c == ZWSP) {
++ values.setElementAt(0, count);
++ lengths.setElementAt(1, count++);
++ } else if (fBaseSet.contains(c)) {
++ int32_t currix = utext_getNativeIndex(text);
++ do {
++ utext_next32(text);
++ c = utext_current32(text);
++ if (utext_getNativeIndex(text) >= rangeEnd)
+ break;
+- }
+- uc = utext_current32(text);
+- if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
+- // Maybe. See if it's in the dictionary.
+- int32_t num_candidates = words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
+- utext_setNativeIndex(text, current+cuWordLength+chars);
+- if (num_candidates > 0) {
++ if (c == 0x17D2) { // Coeng
++ utext_next32(text);
++ c = utext_current32(text);
++ if (!fBaseSet.contains(c) || utext_getNativeIndex(text) >= rangeEnd) {
+ break;
++ } else {
++ utext_next32(text);
++ c = utext_current32(text);
++ if (utext_getNativeIndex(text) >= rangeEnd)
++ break;
+ }
+ }
+- }
+-
+- // Bump the word count if there wasn't already one
+- if (cuWordLength <= 0) {
+- wordsFound += 1;
+- }
++ } while (fMarkSet.contains(c) || fIgnoreSet.contains(c));
++ values.setElementAt(BADSNLP, count);
++ lengths.setElementAt(utext_getNativeIndex(text) - currix, count++);
++ } else {
++ values.setElementAt(BADSNLP, count);
++ lengths.setElementAt(1, count++);
++ }
++ }
+
+- // Update the length with the passed-over characters
+- cuWordLength += chars;
++ for (int32_t j = 0; j < count; j++) {
++ uint32_t v = values.elementAti(j);
++ int32_t newSnlp = bestSnlp.elementAti(i) + v;
++ int32_t ln = lengths.elementAti(j);
++ utext_setNativeIndex(text, ln+ix);
++ int32_t c = utext_current32(text);
++ while ((fPuncSet.contains(c) || fIgnoreSet.contains(c)) && ln + i < numCodePts) {
++ ++ln;
++ utext_next32(text);
++ c = utext_current32(text);
+ }
+- else {
+- // Back up to where we were for next iteration
+- utext_setNativeIndex(text, current+cuWordLength);
++ int32_t ln_j_i = ln + i; // yes really i!
++ if (newSnlp < bestSnlp.elementAti(ln_j_i)) {
++ if (v == BADSNLP) {
++ int32_t p = prev.elementAti(i);
++ if (p < 0)
++ prev.setElementAt(p, ln_j_i);
++ else
++ prev.setElementAt(-i, ln_j_i);
++ }
++ else
++ prev.setElementAt(i, ln_j_i);
++ bestSnlp.setElementAt(newSnlp, ln_j_i);
+ }
+ }
+-
+- // Never stop before a combining mark.
+- int32_t currPos;
+- while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
+- utext_next32(text);
+- cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
++ }
++ // Start pushing the optimal offset index into t_boundary (t for tentative).
++ // prev[numCodePts] is guaranteed to be meaningful.
++ // We'll first push in the reverse order, i.e.,
++ // t_boundary[0] = numCodePts, and afterwards do a swap.
++ UVector32 t_boundary(numCodePts+1, status);
++
++ int32_t numBreaks = 0;
++ // No segmentation found, set boundary to end of range
++ while (numCodePts >= 0 && (uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) {
++ --numCodePts;
++ }
++ if (numCodePts < 0) {
++ t_boundary.addElement(numCodePts, status);
++ numBreaks++;
++ } else {
++ for (int32_t i = numCodePts; (uint32_t)i != kuint32max; i = prev.elementAti(i)) {
++ if (i < 0) i = -i;
++ t_boundary.addElement(i, status);
++ numBreaks++;
+ }
++ U_ASSERT(prev.elementAti(t_boundary.elementAti(numBreaks - 1)) == 0);
++ }
+
+- // Look ahead for possible suffixes if a dictionary word does not follow.
+- // We do this in code rather than using a rule so that the heuristic
+- // resynch continues to function. For example, one of the suffix characters
+- // could be a typo in the middle of a word.
+-// if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength > 0) {
+-// if (words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
+-// && fSuffixSet.contains(uc = utext_current32(text))) {
+-// if (uc == KHMER_PAIYANNOI) {
+-// if (!fSuffixSet.contains(utext_previous32(text))) {
+-// // Skip over previous end and PAIYANNOI
+-// utext_next32(text);
+-// utext_next32(text);
+-// wordLength += 1; // Add PAIYANNOI to word
+-// uc = utext_current32(text); // Fetch next character
+-// }
+-// else {
+-// // Restore prior position
+-// utext_next32(text);
+-// }
+-// }
+-// if (uc == KHMER_MAIYAMOK) {
+-// if (utext_previous32(text) != KHMER_MAIYAMOK) {
+-// // Skip over previous end and MAIYAMOK
+-// utext_next32(text);
+-// utext_next32(text);
+-// wordLength += 1; // Add MAIYAMOK to word
+-// }
+-// else {
+-// // Restore prior position
+-// utext_next32(text);
+-// }
+-// }
+-// }
+-// else {
+-// utext_setNativeIndex(text, current+wordLength);
+-// }
+-// }
+-
+- // Did we find a word on this iteration? If so, push it on the break stack
+- if (cuWordLength > 0) {
+- foundBreaks.push((current+cuWordLength), status);
++ // Now that we're done, convert positions in t_boundary[] (indices in
++ // the normalized input string) back to indices in the original input UText
++ // while reversing t_boundary and pushing values to foundBreaks.
++ for (int32_t i = numBreaks-1; i >= 0; i--) {
++ int32_t cpPos = t_boundary.elementAti(i);
++ if (cpPos == 0 && !breakStart && fTypes >= UBRK_LINE) continue;
++ int32_t utextPos = cpPos + rangeStart;
++ while (utextPos > after && scanWJ(text, utextPos, scanEnd, before, after));
++ if (utextPos < before) {
++ // Boundaries are added to foundBreaks output in ascending order.
++ U_ASSERT(foundBreaks.size() == 0 ||foundBreaks.peeki() < utextPos);
++ foundBreaks.push(utextPos, status);
+ }
+ }
+-
++
+ // Don't return a break for the end of the dictionary range if there is one there.
+- if (foundBreaks.peeki() >= rangeEnd) {
++ if (!breakEnd && fTypes >= UBRK_LINE && foundBreaks.peeki() >= rangeEnd) {
+ (void) foundBreaks.popi();
+- wordsFound -= 1;
+ }
+-
+- return wordsFound;
++ return foundBreaks.size() - wordsFound;
+ }
+
+ #if !UCONFIG_NO_NORMALIZATION
+diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
+--- icu.org/source/common/dictbe.h 2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictbe.h 2020-05-11 19:08:24.754634732 +0200
+@@ -34,7 +34,8 @@
+ * threads without synchronization.</p>
+ */
+ class DictionaryBreakEngine : public LanguageBreakEngine {
+- private:
++ protected:
++
+ /**
+ * The set of characters handled by this engine
+ * @internal
+@@ -42,14 +43,84 @@
+
+ UnicodeSet fSet;
+
++ const int32_t WJ = 0x2060;
++ const int32_t ZWSP = 0x200B;
++
++ /**
++ * The break types it was constructed with
++ * @internal
++ */
++ uint32_t fTypes;
++
++ /**
++ * A Unicode set of all viramas
++ * @internal
++ */
++ UnicodeSet fViramaSet;
++
++ /**
++ * A Unicode set of all base characters
++ * @internal
++ */
++ UnicodeSet fBaseSet;
++
++ /**
++ * A Unicode set of all marks
++ * @internal
++ */
++ UnicodeSet fMarkSet;
++
++ /**
++ * A Unicode set of all characters ignored ignored in dictionary matching
++ * @internal
++ */
++ UnicodeSet fIgnoreSet;
++
++ /**
++ * A Unicode set of all characters ignored ignored in dictionary matching
++ * @internal
++ */
++ UnicodeSet fSkipStartSet;
++
++ /**
++ * A Unicode set of all characters ignored ignored in dictionary matching
++ * @internal
++ */
++ UnicodeSet fSkipEndSet;
++
++ /**
++ * A Unicode set of all characters that should not be broken before
++ * @internal
++ */
++ UnicodeSet fNBeforeSet;
++
++ /**
++ * The number of clusters within which breaks are inhibited
++ * @internal
++ */
++ int32_t clusterLimit;
++
++ bool scanWJ(UText *text, int32_t &start, int32_t end, int32_t &before, int32_t &after) const;
++
++ bool scanBeforeStart(UText *text, int32_t& start, bool &doBreak) const;
++ bool scanAfterEnd(UText *text, int32_t rangeEnd, int32_t& end, bool &doBreak) const;
++ void scanBackClusters(UText *text, int32_t textStart, int32_t& start) const;
++ void scanFwdClusters(UText *text, int32_t textEnd, int32_t& end) const;
++
+ public:
+
+ /**
+- * <p>Constructor </p>
++ * <p>Default constructor.</p>
++ *
+ */
+ DictionaryBreakEngine();
+
+ /**
++ * <p>Constructor with break types.</p>
++ */
++ explicit DictionaryBreakEngine(uint32_t breakTypes);
++
++ /**
+ * <p>Virtual destructor.</p>
+ */
+ virtual ~DictionaryBreakEngine();
+@@ -293,11 +364,13 @@
+ */
+
+ UnicodeSet fKhmerWordSet;
+- UnicodeSet fEndWordSet;
+- UnicodeSet fBeginWordSet;
+- UnicodeSet fMarkSet;
+- DictionaryMatcher *fDictionary;
+-
++ UnicodeSet fBeginWordSet;
++ UnicodeSet fPuncSet;
++ DictionaryMatcher *fDictionary;
++
++ const uint32_t BADSNLP = 256 * 20;
++ const uint32_t kuint32max = 0x7FFFFFFF;
++
+ public:
+
+ /**
+diff -ur icu.org/source/common/dictionarydata.cpp icu/source/common/dictionarydata.cpp
+--- icu.org/source/common/dictionarydata.cpp 2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictionarydata.cpp 2020-05-11 18:50:43.703113749 +0200
+@@ -44,7 +44,7 @@
+
+ int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
+ int32_t *lengths, int32_t *cpLengths, int32_t *values,
+- int32_t *prefix) const {
++ int32_t *prefix, UnicodeSet const* ignoreSet, int32_t minLength) const {
+
+ UCharsTrie uct(characters);
+ int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
+@@ -55,7 +55,13 @@
+ UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
+ int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
+ codePointsMatched += 1;
++ if (ignoreSet != NULL && ignoreSet->contains(c)) {
++ continue;
++ }
+ if (USTRINGTRIE_HAS_VALUE(result)) {
++ if (codePointsMatched < minLength) {
++ continue;
++ }
+ if (wordCount < limit) {
+ if (values != NULL) {
+ values[wordCount] = uct.getValue();
+@@ -112,7 +118,7 @@
+
+ int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
+ int32_t *lengths, int32_t *cpLengths, int32_t *values,
+- int32_t *prefix) const {
++ int32_t *prefix, UnicodeSet const* ignoreSet, int32_t minLength) const {
+ BytesTrie bt(characters);
+ int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
+ int32_t wordCount = 0;
+@@ -122,7 +128,13 @@
+ UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
+ int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
+ codePointsMatched += 1;
++ if (ignoreSet != NULL && ignoreSet->contains(c)) {
++ continue;
++ }
+ if (USTRINGTRIE_HAS_VALUE(result)) {
++ if (codePointsMatched < minLength) {
++ continue;
++ }
+ if (wordCount < limit) {
+ if (values != NULL) {
+ values[wordCount] = bt.getValue();
+diff -ur icu.org/source/common/dictionarydata.h icu/source/common/dictionarydata.h
+--- icu.org/source/common/dictionarydata.h 2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictionarydata.h 2020-05-11 18:50:43.704113746 +0200
+@@ -21,6 +21,7 @@
+ #include "unicode/utext.h"
+ #include "unicode/udata.h"
+ #include "udataswp.h"
++#include "unicode/uniset.h"
+ #include "unicode/uobject.h"
+ #include "unicode/ustringtrie.h"
+
+@@ -92,7 +93,7 @@
+ */
+ virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+ int32_t *lengths, int32_t *cpLengths, int32_t *values,
+- int32_t *prefix) const = 0;
++ int32_t *prefix, UnicodeSet const* ignoreSet = NULL, int32_t minLength = 0) const = 0;
+
+ /** @return DictionaryData::TRIE_TYPE_XYZ */
+ virtual int32_t getType() const = 0;
+@@ -107,7 +108,7 @@
+ virtual ~UCharsDictionaryMatcher();
+ virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+ int32_t *lengths, int32_t *cpLengths, int32_t *values,
+- int32_t *prefix) const;
++ int32_t *prefix, UnicodeSet const* ignoreSet = NULL, int32_t minLength = 0) const;
+ virtual int32_t getType() const;
+ private:
+ const UChar *characters;
+@@ -125,7 +126,7 @@
+ virtual ~BytesDictionaryMatcher();
+ virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+ int32_t *lengths, int32_t *cpLengths, int32_t *values,
+- int32_t *prefix) const;
++ int32_t *prefix, UnicodeSet const* ignoreSet = NULL, int32_t minLength = 0) const;
+ virtual int32_t getType() const;
+ private:
+ UChar32 transform(UChar32 c) const;
diff --git a/external/icu/icu4c-macosx.patch.1 b/external/icu/icu4c-macosx.patch.1
new file mode 100644
index 000000000..fee08eb05
--- /dev/null
+++ b/external/icu/icu4c-macosx.patch.1
@@ -0,0 +1,20 @@
+diff -ur icu.org/source/common/putil.cpp icu/source/common/putil.cpp
+--- icu.org/source/common/putil.cpp 2017-04-10 16:22:16.000000000 +0200
++++ icu/source/common/putil.cpp 2017-04-21 22:14:09.940217733 +0200
+@@ -1198,8 +1198,16 @@
+ static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
+
+ /* This probing will tell us when daylight savings occurs. */
++#if U_PLATFORM_IS_DARWIN_BASED
++ struct tm *tmp;
++ tmp = localtime(&juneSolstice);
++ juneSol = *tmp;
++ tmp = localtime(&decemberSolstice);
++ decemberSol = *tmp;
++#else
+ localtime_r(&juneSolstice, &juneSol);
+ localtime_r(&decemberSolstice, &decemberSol);
++#endif
+ if(decemberSol.tm_isdst > 0) {
+ daylightType = U_DAYLIGHT_DECEMBER;
+ } else if(juneSol.tm_isdst > 0) {
diff --git a/external/icu/icu4c-mkdir.patch.1 b/external/icu/icu4c-mkdir.patch.1
new file mode 100644
index 000000000..112e57cc2
--- /dev/null
+++ b/external/icu/icu4c-mkdir.patch.1
@@ -0,0 +1,11 @@
+diff -ur icu.org/source/data/Makefile.in icu/source/data/Makefile.in
+--- icu.org/source/data/Makefile.in 2019-04-17 21:42:15.000000000 +0200
++++ icu/source/data/Makefile.in 2019-10-28 12:57:15.033649494 +0100
+@@ -226,6 +226,7 @@
+ ifeq ($(PKGDATA_MODE),dll)
+ SO_VERSION_DATA = $(OUTTMPDIR)/icudata.res
+ $(SO_VERSION_DATA) : $(MISCSRCDIR)/icudata.rc
++ mkdir -p $(OUTTMPDIR)
+ ifeq ($(MSYS_RC_MODE),1)
+ rc.exe -i$(srcdir)/../common -i$(top_builddir)/common -fo$@ $(CPPFLAGS) $<
+ else
diff --git a/external/icu/icu4c-rpath.patch.1 b/external/icu/icu4c-rpath.patch.1
new file mode 100644
index 000000000..35a545778
--- /dev/null
+++ b/external/icu/icu4c-rpath.patch.1
@@ -0,0 +1,36 @@
+diff -ur icu.org/source/config/mh-linux icu/source/config/mh-linux
+--- icu.org/source/config/mh-linux 2016-06-15 20:58:17.000000000 +0200
++++ icu/source/config/mh-linux 2017-04-21 22:38:18.893927819 +0200
+@@ -22,6 +22,10 @@
+ LD_RPATH= -Wl,-zorigin,-rpath,'$$'ORIGIN
+ LD_RPATH_PRE = -Wl,-rpath,
+
++## Force RPATH=$ORIGIN to locate own dependencies w/o need for LD_LIBRARY_PATH:
++ENABLE_RPATH=YES
++RPATHLDFLAGS=${LD_RPATH_PRE}'$$ORIGIN'
++
+ ## These are the library specific LDFLAGS
+ LDFLAGSICUDT=-nodefaultlibs -nostdlib
+
+diff -ur icu.org/source/data/pkgdataMakefile.in icu/source/data/pkgdataMakefile.in
+--- icu.org/source/data/pkgdataMakefile.in 2016-06-15 20:58:17.000000000 +0200
++++ icu/source/data/pkgdataMakefile.in 2017-04-21 22:38:18.892927822 +0200
+@@ -18,6 +18,9 @@
+ MIDDLE_SO_TARGET=
+ PKGDATA_TRAILING_SPACE=" "
+
++# escape $ with \ when passing to echo; needed to preserve $ORIGIN
++SHLIB.c.shell := $(subst $$,\$$,$(SHLIB.c))
++
+ all : clean
+ @echo GENCCODE_ASSEMBLY_TYPE=$(GENCCODE_ASSEMBLY) >> $(OUTPUTFILE)
+ @echo SO=$(SO) >> $(OUTPUTFILE)
+@@ -26,7 +29,7 @@
+ @echo LIB_EXT_ORDER=$(FINAL_SO_TARGET) >> $(OUTPUTFILE)
+ @echo COMPILE="$(COMPILE.c)" >> $(OUTPUTFILE)
+ @echo LIBFLAGS="-I$(top_srcdir)/common -I$(top_builddir)/common $(SHAREDLIBCPPFLAGS) $(SHAREDLIBCFLAGS)" >> $(OUTPUTFILE)
+- @echo GENLIB="$(SHLIB.c)" >> $(OUTPUTFILE)
++ @echo GENLIB="$(SHLIB.c.shell)" >> $(OUTPUTFILE)
+ @echo LDICUDTFLAGS=$(LDFLAGSICUDT) >> $(OUTPUTFILE)
+ @echo LD_SONAME=$(LD_SONAME) >> $(OUTPUTFILE)
+ @echo RPATH_FLAGS=$(RPATH_FLAGS) >> $(OUTPUTFILE)
diff --git a/external/icu/icu4c-rtti.patch.1 b/external/icu/icu4c-rtti.patch.1
new file mode 100644
index 000000000..c058c7f3c
--- /dev/null
+++ b/external/icu/icu4c-rtti.patch.1
@@ -0,0 +1,12 @@
+diff -ur icu.org/source/config/mh-linux icu/source/config/mh-linux
+--- icu.org/source/config/mh-linux 2017-04-21 23:01:23.257769703 +0200
++++ icu/source/config/mh-linux 2017-04-21 23:03:23.166481552 +0200
+@@ -36,7 +36,7 @@
+ #SH# LD_SONAME=
+
+ ## Shared library options
+-LD_SOOPTIONS= -Wl,-Bsymbolic
++LD_SOOPTIONS= -Wl,-Bsymbolic-functions
+
+ ## Shared object suffix
+ SO = so
diff --git a/external/icu/icu4c-scriptrun.patch.1 b/external/icu/icu4c-scriptrun.patch.1
new file mode 100644
index 000000000..f2f2cf9f3
--- /dev/null
+++ b/external/icu/icu4c-scriptrun.patch.1
@@ -0,0 +1,60 @@
+diff -ur icu.org/source/extra/scrptrun/scrptrun.cpp icu/source/extra/scrptrun/scrptrun.cpp
+--- icu.org/source/extra/scrptrun/scrptrun.cpp 2017-01-20 01:20:31.000000000 +0100
++++ icu/source/extra/scrptrun/scrptrun.cpp 2017-04-21 22:59:31.708037770 +0200
+@@ -151,7 +151,11 @@
+ // characters above it on the stack will be poped.
+ if (pairIndex >= 0) {
+ if ((pairIndex & 1) == 0) {
+- parenStack[++parenSP].pairIndex = pairIndex;
++ ++parenSP;
++ int32_t nVecSize = parenStack.size();
++ if (parenSP == nVecSize)
++ parenStack.resize(nVecSize + 128);
++ parenStack[parenSP].pairIndex = pairIndex;
+ parenStack[parenSP].scriptCode = scriptCode;
+ } else if (parenSP >= 0) {
+ int32_t pi = pairIndex & ~1;
+@@ -185,7 +189,14 @@
+ // pop it from the stack
+ if (pairIndex >= 0 && (pairIndex & 1) != 0 && parenSP >= 0) {
+ parenSP -= 1;
+- startSP -= 1;
++ /* decrement startSP only if it is >= 0,
++ decrementing it unnecessarily will lead to memory corruption
++ while processing the above while block.
++ e.g. startSP = -4 , parenSP = -1
++ */
++ if (startSP >= 0) {
++ startSP -= 1;
++ }
+ }
+ } else {
+ // if the run broke on a surrogate pair,
+diff -ur icu.org/source/extra/scrptrun/scrptrun.h icu/source/extra/scrptrun/scrptrun.h
+--- icu.org/source/extra/scrptrun/scrptrun.h 2017-01-20 01:20:31.000000000 +0100
++++ icu/source/extra/scrptrun/scrptrun.h 2017-04-21 22:59:31.708037770 +0200
+@@ -19,6 +19,7 @@
+ #include "unicode/utypes.h"
+ #include "unicode/uobject.h"
+ #include "unicode/uscript.h"
++#include <vector>
+
+ U_NAMESPACE_BEGIN
+
+@@ -81,7 +82,7 @@
+ int32_t scriptEnd;
+ UScriptCode scriptCode;
+
+- ParenStackEntry parenStack[128];
++ std::vector<ParenStackEntry> parenStack;
+ int32_t parenSP;
+
+ static int8_t highBit(int32_t value);
+@@ -135,6 +136,7 @@
+ scriptEnd = charStart;
+ scriptCode = USCRIPT_INVALID_CODE;
+ parenSP = -1;
++ parenStack.resize(128);
+ }
+
+ inline void ScriptRun::reset(int32_t start, int32_t length)
diff --git a/external/icu/icu4c-solarisgcc.patch.1 b/external/icu/icu4c-solarisgcc.patch.1
new file mode 100644
index 000000000..6000ed0cb
--- /dev/null
+++ b/external/icu/icu4c-solarisgcc.patch.1
@@ -0,0 +1,12 @@
+diff -ur icu.org/source/common/uposixdefs.h icu/source/common/uposixdefs.h
+--- icu.org/source/common/uposixdefs.h 2017-03-09 03:12:45.000000000 +0100
++++ icu/source/common/uposixdefs.h 2017-04-21 22:23:11.857926971 +0200
+@@ -54,7 +54,7 @@
+ *
+ * z/OS needs this definition for timeval and to get usleep.
+ */
+-#if !defined(_XOPEN_SOURCE_EXTENDED) && defined(__TOS_MVS__)
++#if !defined(_XOPEN_SOURCE_EXTENDED) && (defined(__TOS_MVS__) || defined(__IBMC__) || defined(__IBMCPP__))
+ # define _XOPEN_SOURCE_EXTENDED 1
+ #endif
+
diff --git a/external/icu/icu4c-ubsan.patch.1 b/external/icu/icu4c-ubsan.patch.1
new file mode 100644
index 000000000..7b0c2efc9
--- /dev/null
+++ b/external/icu/icu4c-ubsan.patch.1
@@ -0,0 +1,14 @@
+diff -ur icu.org/source/common/ubidiimp.h icu/source/common/ubidiimp.h
+--- icu.org/source/common/ubidiimp.h 2019-10-03 13:16:41.000000000 +0200
++++ icu/source/common/ubidiimp.h 2019-10-28 19:08:13.533284618 +0100
+@@ -198,8 +198,8 @@
+ /* in a Run, logicalStart will get this bit set if the run level is odd */
+ #define INDEX_ODD_BIT (1UL<<31)
+
+-#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)((level)&1)<<31))
+-#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)((level)&1)<<31))
++#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((uint32_t)((level)&1)<<31))
++#define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((uint32_t)((level)&1)<<31))
+ #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT)
+
+ #define GET_INDEX(x) ((x)&~INDEX_ODD_BIT)
diff --git a/external/icu/icu4c-warnings.patch.1 b/external/icu/icu4c-warnings.patch.1
new file mode 100644
index 000000000..76f8b7298
--- /dev/null
+++ b/external/icu/icu4c-warnings.patch.1
@@ -0,0 +1,11 @@
+diff -ur icu.org/source/common/unicode/utf16.h icu/source/common/unicode/utf16.h
+--- icu.org/source/common/unicode/utf16.h 2019-10-03 13:16:41.000000000 +0200
++++ icu/source/common/unicode/utf16.h 2019-10-28 18:03:07.967208272 +0100
+@@ -397,6 +397,7 @@
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } else /* c>0x10ffff or not enough space */ { \
+ (isError)=TRUE; \
++ (void)(isError); \
+ } \
+ } UPRV_BLOCK_MACRO_END
+
diff --git a/external/icu/khmerdict.dict b/external/icu/khmerdict.dict
new file mode 100644
index 000000000..52605b654
--- /dev/null
+++ b/external/icu/khmerdict.dict
Binary files differ
diff --git a/external/icu/ubsan.patch b/external/icu/ubsan.patch
new file mode 100644
index 000000000..762bd6e5a
--- /dev/null
+++ b/external/icu/ubsan.patch
@@ -0,0 +1,52 @@
+--- source/common/uloc.cpp
++++ source/common/uloc.cpp
+@@ -1203,7 +1203,8 @@
+ return 0;
+ }
+ int32_t reslen = result.length();
+- uprv_memcpy(language, result.data(), std::min(reslen, languageCapacity));
++ auto const n = std::min(reslen, languageCapacity);
++ if (n != 0) uprv_memcpy(language, result.data(), n);
+ return reslen;
+ }
+
+@@ -1251,7 +1252,8 @@
+ return 0;
+ }
+ int32_t reslen = result.length();
+- uprv_memcpy(script, result.data(), std::min(reslen, scriptCapacity));
++ auto const n = std::min(reslen, scriptCapacity);
++ if (n != 0) uprv_memcpy(script, result.data(), n);
+ return reslen;
+ }
+
+--- source/tools/genrb/rbutil.c
++++ source/tools/genrb/rbutil.c
+@@ -30,7 +30,12 @@
+ get_dirname(char *dirname,
+ const char *filename)
+ {
+- const char *lastSlash = uprv_strrchr(filename, U_FILE_SEP_CHAR) + 1;
++ const char *lastSlash = uprv_strrchr(filename, U_FILE_SEP_CHAR);
++ if(lastSlash == NULL) {
++ lastSlash = filename;
++ } else {
++ ++lastSlash;
++ }
+
+ if(lastSlash>filename) {
+ uprv_strncpy(dirname, filename, (lastSlash - filename));
+@@ -46,7 +51,12 @@
+ const char *filename)
+ {
+ /* strip off any leading directory portions */
+- const char *lastSlash = uprv_strrchr(filename, U_FILE_SEP_CHAR) + 1;
++ const char *lastSlash = uprv_strrchr(filename, U_FILE_SEP_CHAR);
++ if(lastSlash == NULL) {
++ lastSlash = filename;
++ } else {
++ ++lastSlash;
++ }
+ char *lastDot;
+
+ if(lastSlash>filename) {