27 files changed, 1915 insertions, 0 deletions
diff --git a/external/icu/ExternalPackage_icu.mk b/external/icu/ExternalPackage_icu.mk
new file mode 100644
index 000000000..dcd4da216
--- /dev/null
+++ b/external/icu/ExternalPackage_icu.mk
@@ -0,0 +1,42 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+icu_VERSION := $(ICU_MAJOR).$(ICU_MINOR)$(if $(ICU_MICRO),.$(ICU_MICRO))
+
+$(eval $(call gb_ExternalPackage_ExternalPackage,icu,icu))
+
+$(eval $(call gb_ExternalPackage_use_external_project,icu,icu))
+
+ifneq ($(DISABLE_DYNLOADING),TRUE)
+ifeq ($(OS),WNT)
+
+ifeq ($(COM),GCC)
+$(eval $(call gb_ExternalPackage_add_files,icu,$(LIBO_LIB_FOLDER),\
+	source/lib/icuin$(ICU_MAJOR).dll \
+))
+else
+$(eval $(call gb_ExternalPackage_add_files,icu,$(LIBO_LIB_FOLDER),\
+	source/lib/icuin$(if $(MSVC_USE_DEBUG_RUNTIME),d)$(ICU_MAJOR).dll \
+))
+endif # $(COM)
+
+else ifeq ($(OS),ANDROID)
+
+$(eval $(call gb_ExternalPackage_add_files,icu,$(LIBO_LIB_FOLDER),\
+	source/lib/libicui18nlo.so \
+))
+
+else # $(OS) != WNT/ANDROID
+
+$(eval $(call gb_ExternalPackage_add_file,icu,$(LIBO_LIB_FOLDER)/libicui18n$(gb_Library_DLLEXT).$(ICU_MAJOR),source/lib/libicui18n$(gb_Library_DLLEXT).$(icu_VERSION)))
+
+endif # $(OS)
+endif # DISABLE_DYNLOADING
+
+# vim: set noet sw=4 ts=4:
diff --git a/external/icu/ExternalPackage_icu_ure.mk b/external/icu/ExternalPackage_icu_ure.mk
new file mode 100644
index 000000000..fefe71afd
--- /dev/null
+++ b/external/icu/ExternalPackage_icu_ure.mk
@@ -0,0 +1,48 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+# libxml2 is in URE and depends on icuuc*.dll on Windows; the i18nlangtag lib is
+# in URE and depends on the icuuc lib (which in turn depends on the icudata lib)
+# on all platforms:
+
+$(eval $(call gb_ExternalPackage_ExternalPackage,icu_ure,icu))
+
+$(eval $(call gb_ExternalPackage_use_external_project,icu_ure,icu))
+
+ifneq ($(DISABLE_DYNLOADING),TRUE)
+ifeq ($(OS),WNT)
+
+ifeq ($(COM),GCC)
+$(eval $(call gb_ExternalPackage_add_files,icu_ure,$(LIBO_URE_LIB_FOLDER),\
+	source/lib/icudt$(ICU_MAJOR).dll \
+	source/lib/icuuc$(ICU_MAJOR).dll \
+))
+else
+$(eval $(call gb_ExternalPackage_add_files,icu_ure,$(LIBO_URE_LIB_FOLDER),\
+	source/lib/icudt$(if $(MSVC_USE_DEBUG_RUNTIME),d)$(ICU_MAJOR).dll \
+	source/lib/icuuc$(if $(MSVC_USE_DEBUG_RUNTIME),d)$(ICU_MAJOR).dll \
+))
+endif # $(COM)
+
+else ifeq ($(OS),ANDROID)
+
+$(eval $(call gb_ExternalPackage_add_files,icu_ure,$(LIBO_URE_LIB_FOLDER),\
+	source/lib/libicudatalo.so \
+	source/lib/libicuuclo.so \
+))
+
+else # $(OS) != WNT/ANDROID
+
+$(eval $(call gb_ExternalPackage_add_file,icu_ure,$(LIBO_URE_LIB_FOLDER)/libicudata$(gb_Library_DLLEXT).$(ICU_MAJOR),source/lib/libicudata$(gb_Library_DLLEXT).$(icu_VERSION)))
+$(eval $(call gb_ExternalPackage_add_file,icu_ure,$(LIBO_URE_LIB_FOLDER)/libicuuc$(gb_Library_DLLEXT).$(ICU_MAJOR),source/lib/libicuuc$(gb_Library_DLLEXT).$(icu_VERSION)))
+
+endif # $(OS)
+endif # DISABLE_DYNLOADING
+
+# vim: set noet sw=4 ts=4:
diff --git a/external/icu/ExternalProject_icu.mk b/external/icu/ExternalProject_icu.mk
new file mode 100644
index 000000000..f62d8528c
--- /dev/null
+++ b/external/icu/ExternalProject_icu.mk
@@ -0,0 +1,94 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_ExternalProject_ExternalProject,icu))
+
+$(eval $(call gb_ExternalProject_register_targets,icu,\
+	build \
+))
+
+icu_CPPFLAGS:="-DHAVE_GCC_ATOMICS=$(if $(filter TRUE,$(GCC_HAVE_BUILTIN_ATOMIC)),1,0)"
+
+ifeq ($(OS),WNT)
+
+# Note: runConfigureICU ignores everything following the platform name!
+$(call gb_ExternalProject_get_state_target,icu,build) :
+	$(call gb_Trace_StartRange,icu,EXTERNAL)
+	$(call gb_ExternalProject_run,build,\
+		export LIB="$(ILIB)" \
+		&& CFLAGS="-FS $(SOLARINC) $(gb_DEBUGINFO_FLAGS)" CPPFLAGS="$(SOLARINC)" CXXFLAGS="-FS $(SOLARINC) $(gb_DEBUGINFO_FLAGS)" \
+			INSTALL=`cygpath -m /usr/bin/install` \
+			./runConfigureICU \
+			$(if $(MSVC_USE_DEBUG_RUNTIME),--enable-debug --disable-release) \
+			Cygwin/MSVC --disable-extras \
+		&& $(MAKE) \
+	,source)
+	$(call gb_Trace_EndRange,icu,EXTERNAL)
+
+else # $(OS)
+
+icu_CFLAGS:=" \
+	$(if $(filter iOS,$(OS)),-DUCONFIG_NO_FILE_IO) \
+	$(if $(SYSBASE),-I$(SYSBASE)/usr/include) \
+	$(if $(ENABLE_OPTIMIZED),$(gb_COMPILEROPTFLAGS),$(gb_COMPILERNOOPTFLAGS)) \
+	$(if $(ENABLE_LTO),$(gb_LTOFLAGS)) \
+	$(if $(filter GCC,$(COM)),-fno-strict-aliasing) \
+	$(if $(call gb_Module__symbols_enabled,icu),$(gb_DEBUGINFO_FLAGS)) \
+	$(if $(filter FUZZERS,$(BUILD_TYPE)),-DU_USE_STRTOD_L=0) \
+	$(if $(filter ANDROID,$(OS)),-fvisibility=hidden -fno-omit-frame-pointer)"
+icu_CXXFLAGS:="$(CXXFLAGS) $(CXXFLAGS_CXX11) \
+	$(if $(filter iOS,$(OS)),-DUCONFIG_NO_FILE_IO) \
+	$(if $(ENABLE_OPTIMIZED),$(gb_COMPILEROPTFLAGS),$(gb_COMPILERNOOPTFLAGS)) \
+	$(if $(ENABLE_LTO),$(gb_LTOFLAGS)) \
+	$(if $(filter GCC,$(COM)),-fno-strict-aliasing) \
+	$(if $(call gb_Module__symbols_enabled,icu),$(gb_DEBUGINFO_FLAGS)) \
+	$(if $(filter FUZZERS,$(BUILD_TYPE)),-DU_USE_STRTOD_L=0) \
+	$(if $(filter ANDROID,$(OS)),-fvisibility=hidden -fno-omit-frame-pointer $(SOLARINC))"
+icu_LDFLAGS:=" \
+	$(if $(ENABLE_LTO),$(gb_LTOFLAGS)) \
+	$(if $(filter TRUE,$(HAVE_LD_HASH_STYLE)),-Wl$(COMMA)--hash-style=$(WITH_LINKER_HASH_STYLE)) \
+    $(if $(SYSBASE),-L../lib -L../../lib -L../stubdata -L../../stubdata -L$(SYSBASE)/usr/lib) \
+    $(if $(filter TRUE,$(HAVE_LD_BSYMBOLIC_FUNCTIONS)), -Wl$(COMMA)-Bsymbolic-functions) \
+    $(if $(filter ANDROID,$(OS)),$(gb_STDLIBS))"
+
+# DATASUBDIR=data in cross-compiling case, because --disable-tools completely skips the
+# data directory/doesn't build the requested library in that case (icu/source/Makefile.in)
+# so we need to add it back to the list of subdirectories to build
+$(call gb_ExternalProject_get_state_target,icu,build) :
+	$(call gb_Trace_StartRange,icu,EXTERNAL)
+	$(call gb_ExternalProject_run,build,\
+		CPPFLAGS=$(icu_CPPFLAGS) CFLAGS=$(icu_CFLAGS) \
+		CXXFLAGS=$(icu_CXXFLAGS) LDFLAGS=$(icu_LDFLAGS) \
+		./configure \
+			--disable-layout --disable-samples \
+			$(if $(filter FUZZERS,$(BUILD_TYPE)),--disable-release) \
+			$(if $(CROSS_COMPILING),--disable-tools --disable-extras) \
+			$(if $(filter iOS ANDROID,$(OS)),--disable-dyload) \
+			$(if $(filter ANDROID,$(OS)),--disable-strict ac_cv_c_bigendian=no) \
+			$(if $(filter SOLARIS AIX,$(OS)),--disable-64bit-libs) \
+			$(if $(filter TRUE,$(DISABLE_DYNLOADING)),\
+				--with-data-packaging=static --enable-static --disable-shared --disable-dyload,\
+				--disable-static --enable-shared $(if $(filter ANDROID,$(OS)),--with-library-suffix=lo)) \
+			$(if $(CROSS_COMPILING),--build=$(BUILD_PLATFORM) --host=$(HOST_PLATFORM)\
+				--with-cross-build=$(WORKDIR_FOR_BUILD)/UnpackedTarball/icu/source) \
+		&& $(MAKE) $(if $(CROSS_COMPILING),DATASUBDIR=data) \
+		$(if $(filter MACOSX,$(OS)), \
+			&& $(PERL) $(SRCDIR)/solenv/bin/macosx-change-install-names.pl shl \
+				URELIB \
+				$(EXTERNAL_WORKDIR)/source/lib/libicuuc$(gb_Library_DLLEXT).$(icu_VERSION) \
+				$(EXTERNAL_WORKDIR)/source/lib/libicui18n$(gb_Library_DLLEXT).$(icu_VERSION) \
+			&& $(PERL) $(SRCDIR)/solenv/bin/macosx-change-install-names.pl shl \
+				OOO \
+				$(EXTERNAL_WORKDIR)/source/lib/libicudata$(gb_Library_DLLEXT).$(icu_VERSION)) \
+	,source)
+	$(call gb_Trace_EndRange,icu,EXTERNAL)
+
+endif
+
+# vim: set noet sw=4 ts=4:
diff --git a/external/icu/Makefile b/external/icu/Makefile
new file mode 100644
index 000000000..e4968cf85
--- /dev/null
+++ b/external/icu/Makefile
@@ -0,0 +1,7 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+
+module_directory:=$(dir $(realpath $(firstword $(MAKEFILE_LIST))))
+
+include $(module_directory)/../../solenv/gbuild/partial_build.mk
+
+# vim: set noet sw=4 ts=4:
diff --git a/external/icu/Module_icu.mk b/external/icu/Module_icu.mk
new file mode 100644
index 000000000..5c99b930f
--- /dev/null
+++ b/external/icu/Module_icu.mk
@@ -0,0 +1,19 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_Module_Module,icu))
+
+$(eval $(call gb_Module_add_targets,icu,\
+	UnpackedTarball_icu \
+	ExternalPackage_icu \
+	ExternalPackage_icu_ure \
+	ExternalProject_icu \
+))
+
+# vim: set noet sw=4 ts=4:
diff --git a/external/icu/README b/external/icu/README
new file mode 100644
index 000000000..484de1f41
--- /dev/null
+++ b/external/icu/README
@@ -0,0 +1 @@
+Library providing Unicode support, from [http://site.icu-project.org/].
diff --git a/external/icu/UnpackedTarball_icu.mk b/external/icu/UnpackedTarball_icu.mk
new file mode 100644
index 000000000..72fae09b1
--- /dev/null
+++ b/external/icu/UnpackedTarball_icu.mk
@@ -0,0 +1,46 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+$(eval $(call gb_UnpackedTarball_UnpackedTarball,icu))
+
+$(eval $(call gb_UnpackedTarball_set_tarball,icu,$(ICU_TARBALL)))
+
+$(eval $(call gb_UnpackedTarball_update_autoconf_configs,icu,source))
+
+# Data zip contains data/... and needs to end up in icu/source/data/...
+# Only data/misc/icudata.rc is needed for a Cygwin/MSVC build.
+$(eval $(call gb_UnpackedTarball_set_pre_action,icu,\
+	unzip -q -d source -o $(gb_UnpackedTarget_TARFILE_LOCATION)/$(ICU_DATA_TARBALL) data/misc/icudata.rc \
+))
+
+$(eval $(call gb_UnpackedTarball_set_patchlevel,icu,0))
+
+$(eval $(call gb_UnpackedTarball_add_patches,icu,\
+	external/icu/icu4c-build.patch.1 \
+	external/icu/icu4c-aix.patch.1 \
+	external/icu/icu4c-warnings.patch.1 \
+	external/icu/icu4c-macosx.patch.1 \
+	external/icu/icu4c-solarisgcc.patch.1 \
+	external/icu/icu4c-mkdir.patch.1 \
+	external/icu/icu4c-$(if $(filter ANDROID,$(OS)),android,rpath).patch.1 \
+	external/icu/icu4c-ubsan.patch.1 \
+	external/icu/icu4c-scriptrun.patch.1 \
+	external/icu/icu4c-rtti.patch.1 \
+	external/icu/icu4c-clang-cl.patch.1 \
+	$(if $(filter-out ANDROID,$(OS)),external/icu/icu4c-icudata-stdlibs.patch.1) \
+	external/icu/gcc9.patch \
+	external/icu/c++20-comparison.patch \
+	external/icu/ubsan.patch \
+	external/icu/Wdeprecated-copy-dtor.patch \
+	external/icu/icu4c-khmerbreakengine.patch.1 \
+))
+
+$(eval $(call gb_UnpackedTarball_add_file,icu,source/data/brkitr/khmerdict.dict,external/icu/khmerdict.dict))
+
+# vim: set noet sw=4 ts=4:
diff --git a/external/icu/Wdeprecated-copy-dtor.patch b/external/icu/Wdeprecated-copy-dtor.patch
new file mode 100644
index 000000000..67078ef1b
--- /dev/null
+++ b/external/icu/Wdeprecated-copy-dtor.patch
@@ -0,0 +1,25 @@
+--- source/common/unicode/uobject.h
++++ source/common/unicode/uobject.h
+@@ -245,10 +245,10 @@
+     // direct use of UObject itself
+ 
+     // default constructor
+-    // inline UObject() {}
++    UObject() = default;
+ 
+     // copy constructor
+-    // inline UObject(const UObject &other) {}
++    UObject(const UObject &other) = default;
+ 
+ #if 0
+     // TODO Sometime in the future. Implement operator==().
+@@ -280,8 +280,8 @@
+      * Subclasses need this assignment operator if they use compiler-provided
+      * assignment operators of their own. An alternative to not declaring one
+      * here would be to declare and empty-implement a protected or public one.
+-    UObject &UObject::operator=(const UObject &);
+      */
++    UObject &operator=(const UObject &) = default;
+ };
+ 
+ #ifndef U_HIDE_INTERNAL_API
diff --git a/external/icu/c++20-comparison.patch b/external/icu/c++20-comparison.patch
new file mode 100644
index 000000000..44053e671
--- /dev/null
+++ b/external/icu/c++20-comparison.patch
@@ -0,0 +1,171 @@
+--- source/common/uvector.cpp
++++ source/common/uvector.cpp
+@@ -110,7 +110,7 @@
+ }
+ 
+ // This only does something sensible if this object has a non-null comparer
+-UBool UVector::operator==(const UVector& other) {
++UBool UVector::operator==(const UVector& other) const {
+     int32_t i;
+     if (count != other.count) return FALSE;
+     if (comparer != NULL) {
+--- source/common/uvector.h
++++ source/common/uvector.h
+@@ -113,12 +113,12 @@
+      * equal if they are of the same size and all elements are equal,
+      * as compared using this object's comparer.
+      */
+-    UBool operator==(const UVector& other);
++    UBool operator==(const UVector& other) const;
+ 
+     /**
+      * Equivalent to !operator==()
+      */
+-    inline UBool operator!=(const UVector& other);
++    inline UBool operator!=(const UVector& other) const;
+ 
+     //------------------------------------------------------------
+     // java.util.Vector API
+@@ -382,7 +382,7 @@
+     return elementAt(index);
+ }
+ 
+-inline UBool UVector::operator!=(const UVector& other) {
++inline UBool UVector::operator!=(const UVector& other) const {
+     return !operator==(other);
+ }
+ 
+--- source/i18n/tzrule.cpp
++++ source/i18n/tzrule.cpp
+@@ -53,7 +53,7 @@
+     return *this;
+ }
+ 
+-UBool
++bool
+ TimeZoneRule::operator==(const TimeZoneRule& that) const {
+     return ((this == &that) ||
+             (typeid(*this) == typeid(that) &&
+@@ -120,7 +120,7 @@
+     return *this;
+ }
+ 
+-UBool
++bool
+ InitialTimeZoneRule::operator==(const TimeZoneRule& that) const {
+     return ((this == &that) ||
+             (typeid(*this) == typeid(that) &&
+@@ -226,7 +226,7 @@
+     return *this;
+ }
+ 
+-UBool
++bool
+ AnnualTimeZoneRule::operator==(const TimeZoneRule& that) const {
+     if (this == &that) {
+         return TRUE;
+@@ -445,7 +445,7 @@
+     return *this;
+ }
+ 
+-UBool
++bool
+ TimeArrayTimeZoneRule::operator==(const TimeZoneRule& that) const {
+     if (this == &that) {
+         return TRUE;
+--- source/i18n/unicode/rbtz.h
++++ source/i18n/unicode/rbtz.h
+@@ -85,6 +85,7 @@
+      * @stable ICU 3.8
+      */
+     virtual UBool operator!=(const TimeZone& that) const;
++    UBool operator!=(const RuleBasedTimeZone& that) const {return !operator==(that);}
+ 
+     /**
+      * Adds the <code>TimeZoneRule</code> which represents time transitions.
+--- source/i18n/unicode/simpletz.h
++++ source/i18n/unicode/simpletz.h
+@@ -110,6 +110,7 @@
+      * @stable ICU 2.0
+      */
+     virtual UBool operator==(const TimeZone& that) const;
++    UBool operator!=(const SimpleTimeZone& that) const {return !operator==(that);}
+ 
+     /**
+      * Constructs a SimpleTimeZone with the given raw GMT offset and time zone ID,
+--- source/i18n/unicode/smpdtfmt.h
++++ source/i18n/unicode/smpdtfmt.h
+@@ -874,6 +874,7 @@
+      * @stable ICU 2.0
+      */
+     virtual UBool operator==(const Format& other) const;
++    UBool operator!=(const SimpleDateFormat& that) const {return !operator==(that);}
+ 
+ 
+     using DateFormat::format;
+--- source/i18n/unicode/stsearch.h
++++ source/i18n/unicode/stsearch.h
+@@ -297,6 +297,7 @@
+      * @stable ICU 2.0
+      */
+     virtual UBool operator==(const SearchIterator &that) const;
++    UBool operator!=(const StringSearch &that) const {return !operator==(that);}
+ 
+     // public get and set methods ----------------------------------------
+ 
+--- source/i18n/unicode/tzrule.h
++++ source/i18n/unicode/tzrule.h
+@@ -54,7 +54,7 @@
+      * @return  true if the given <code>TimeZoneRule</code> objects are semantically equal.
+      * @stable ICU 3.8
+      */
+-    virtual UBool operator==(const TimeZoneRule& that) const;
++    virtual bool operator==(const TimeZoneRule& that) const;
+ 
+     /**
+      * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
+@@ -245,7 +245,7 @@
+      * @return  true if the given <code>TimeZoneRule</code> objects are semantically equal.
+      * @stable ICU 3.8
+      */
+-    virtual UBool operator==(const TimeZoneRule& that) const;
++    virtual bool operator==(const TimeZoneRule& that) const;
+ 
+     /**
+      * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
+@@ -255,6 +255,7 @@
+      * @stable ICU 3.8
+      */
+     virtual UBool operator!=(const TimeZoneRule& that) const;
++    UBool operator!=(const InitialTimeZoneRule& that) const {return !operator==(that);}
+ 
+     /**
+      * Gets the time when this rule takes effect in the given year.
+@@ -456,7 +457,7 @@
+      * @return  true if the given <code>TimeZoneRule</code> objects are semantically equal.
+      * @stable ICU 3.8
+      */
+-    virtual UBool operator==(const TimeZoneRule& that) const;
++    virtual bool operator==(const TimeZoneRule& that) const;
+ 
+     /**
+      * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
+@@ -672,7 +673,7 @@
+      * @return  true if the given <code>TimeZoneRule</code> objects are semantically equal.
+      * @stable ICU 3.8
+      */
+-    virtual UBool operator==(const TimeZoneRule& that) const;
++    virtual bool operator==(const TimeZoneRule& that) const;
+ 
+     /**
+      * Return true if the given <code>TimeZoneRule</code> objects are semantically unequal. Objects
+--- source/i18n/unicode/vtzone.h
++++ source/i18n/unicode/vtzone.h
+@@ -81,6 +81,7 @@
+      * @stable ICU 3.8
+      */
+     virtual UBool operator!=(const TimeZone& that) const;
++    UBool operator!=(const VTimeZone& that) const {return !operator==(that);}
+ 
+     /**
+      * Create a <code>VTimeZone</code> instance by the time zone ID.
diff --git a/external/icu/cross-bin/icu-config b/external/icu/cross-bin/icu-config
new file mode 100755
index 000000000..8ccf94f9b
--- /dev/null
+++ b/external/icu/cross-bin/icu-config
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+case $1 in
+--version)
+    echo whatever
+    ;;
+--cppflags)
+    echo ${ICU_CFLAGS}
+    ;;
+--ldflags-searchpath)
+    echo ${ICU_LIBS}
+esac
diff --git a/external/icu/gcc9.patch b/external/icu/gcc9.patch
new file mode 100644
index 000000000..5c9808f8c
--- /dev/null
+++ b/external/icu/gcc9.patch
@@ -0,0 +1,26 @@
+--- source/i18n/unicode/format.h
++++ source/i18n/unicode/format.h
+@@ -22,6 +22,13 @@
+ 
+ #ifndef FORMAT_H
+ #define FORMAT_H
++
++#ifdef __GNUC__
++#pragma GCC diagnostic push
++#pragma GCC diagnostic ignored "-Wpragmas" // for old GCC
++#pragma GCC diagnostic ignored "-Wunknown-warning-option" // for Clang
++#pragma GCC diagnostic ignored "-Wdeprecated-copy"
++#endif
+ 
+ 
+ #include "unicode/utypes.h"
+@@ -314,5 +314,9 @@
+ 
+ #endif /* U_SHOW_CPLUSPLUS_API */
+ 
++#ifdef __GNUC__
++#pragma GCC diagnostic pop
++#endif
++
+ #endif // _FORMAT
+ //eof
diff --git a/external/icu/icu4c-aix.patch.1 b/external/icu/icu4c-aix.patch.1
new file mode 100644
index 000000000..77982163b
--- /dev/null
+++ b/external/icu/icu4c-aix.patch.1
@@ -0,0 +1,143 @@
+diff -ur icu.org/source/config/mh-aix-gcc icu/source/config/mh-aix-gcc
+--- icu.org/source/config/mh-aix-gcc	2016-06-15 20:58:17.000000000 +0200
++++ icu/source/config/mh-aix-gcc	2017-04-21 21:58:49.731432198 +0200
+@@ -18,84 +18,29 @@
+ GEN_DEPS.c=	$(CC) -E -MM $(DEFS) $(CPPFLAGS)
+ GEN_DEPS.cc=	$(CXX) -E -MM $(DEFS) $(CPPFLAGS)
+ 
+-## Commands to link
+-## We need to use the C++ linker, even when linking C programs, since
+-##  our libraries contain C++ code (C++ static init not called)
+-LINK.c=		$(AIX_PREDELETE) $(CXX) $(CXXFLAGS) $(LDFLAGS) 
+-LINK.cc=	$(AIX_PREDELETE) $(CXX) $(CXXFLAGS) $(LDFLAGS) 
+-
+-## Shared library options
+-LD_SOOPTIONS= -Wl,-bsymbolic
+-
+-## Commands to make a shared library
+-SHLIB.c=    $(AIX_PREDELETE) $(CC) $(CFLAGS) $(LDFLAGS) -shared -Wl,-bexpall $(LD_SOOPTIONS)
+-SHLIB.cc=   $(AIX_PREDELETE) $(CXX) $(CXXFLAGS) $(LDFLAGS) -shared -Wl,-bexpall $(LD_SOOPTIONS)
+-
+-## Compiler switch to embed a runtime search path
+-LD_RPATH=	-I
+-LD_RPATH_PRE=	
++## Flags for position independent code
++SHAREDLIBCFLAGS = -fPIC
++SHAREDLIBCXXFLAGS = -fPIC
++SHAREDLIBCPPFLAGS = -DPIC
++
++## Additional flags when building libraries and with threads
++THREADSCPPFLAGS = -D_REENTRANT -D_THREAD_SAFE
++LIBCPPFLAGS =
+ 
+-## enable the shared lib loader
+-LDFLAGS += -Wl,-bbigtoc
++LD_RPATH=
++LD_RPATH_PRE=
+ 
+ ## These are the library specific LDFLAGS
+ LDFLAGSICUDT=-nodefaultlibs -nostdlib
+ 
+-## We need to delete things prior to linking, or else we'll get
+-## SEVERE ERROR: output file in use ..  on AIX. 
+-## But, shell script version should NOT delete target as we don't
+-## have $@ in that context.  (SH = only shell script, icu-config)
+-AIX_PREDELETE=rm -f $@ ; 
+-#SH# AIX_PREDELETE=
+-
+ ## Environment variable to set a runtime search path
+ LDLIBRARYPATH_ENVVAR = LIBPATH
+ 
+-## Override Versioned target for a shared library.
+-FINAL_SO_TARGET=  $(basename $(SO_TARGET))$(SO_TARGET_VERSION).$(SO)
+-MIDDLE_SO_TARGET= $(basename $(SO_TARGET))$(SO_TARGET_VERSION_MAJOR).$(SO)
+-SHARED_OBJECT = $(notdir $(FINAL_SO_TARGET:.$(SO)=.$(SOBJ)))
+-SHARED_OBJECT_NO_VERSION = $(basename $(SO_TARGET)).$(SOBJ)
+-
+-# The following is for Makefile.inc's use.
+-ICULIBSUFFIX_VERSION = $(LIB_VERSION_MAJOR)
+-
+-# this one is for icudefs.mk's use
+-ifeq ($(ENABLE_SHARED),YES)
+-SO_TARGET_VERSION_SUFFIX = $(SO_TARGET_VERSION_MAJOR)
+-endif
+-
+-## Compiler switch to embed a library name. Not present on AIX.
+-LD_SONAME = 
+-
+-## The type of assembly needed when pkgdata is used for generating shared libraries.
+-GENCCODE_ASSEMBLY=-a xlc
+-
+ ## Shared object suffix
+-SOBJ=   so
+-# without the -brtl option, the library names use .a. AIX is funny that way.
+-SO=	a
+-A=	a
++SO=	so
+ 
+ ## Non-shared intermediate object suffix
+-STATIC_O = o
+-
+-## Special AIX rules
+-
+-## Build archive from shared object
+-%.a : %.so
+-	ln -f $< $(SHARED_OBJECT_NO_VERSION)
+-	$(AR) $(ARFLAGS) $@ $(SHARED_OBJECT_NO_VERSION)
+-	rm -f $(SHARED_OBJECT_NO_VERSION)
+-$(LIBDIR)/%.a : %.so
+-	ln -f $< $(SHARED_OBJECT_NO_VERSION)
+-	$(AR) $(ARFLAGS) $@ $(SHARED_OBJECT_NO_VERSION)
+-	rm -f $(SHARED_OBJECT_NO_VERSION)
+-
+-## Build import list from export list
+-%.e : %.exp
+-	@echo "Building an import list for $<"
+-	@$(SHELL) -ec "echo '#! $*.a($*.so)' | cat - $< > $@"
++STATIC_O = ao
+ 
+ ## Compilation rules
+ %.$(STATIC_O): $(srcdir)/%.c
+@@ -123,10 +68,10 @@
+ 		[ -s $@ ] || rm -f $@'
+ 
+ ## Versioned libraries rules
+-%$(SO_TARGET_VERSION_MAJOR).$(SO): %$(SO_TARGET_VERSION).$(SO)
+-	$(RM) $@ && ln -s ${*F}$(SO_TARGET_VERSION).$(SO) $@
+-%.$(SO): %$(SO_TARGET_VERSION).$(SO)
+-	$(RM) $@ && ln -s ${*F}$(SO_TARGET_VERSION).$(SO) $@
++%.$(SO).$(SO_TARGET_VERSION_MAJOR): %.$(SO).$(SO_TARGET_VERSION)
++	$(RM) $@ && ln -s ${<F} $@
++%.$(SO): %.$(SO).$(SO_TARGET_VERSION_MAJOR)
++	$(RM) $@ && ln -s ${*F}.$(SO).$(SO_TARGET_VERSION) $@
+ 
+ 
+ ## BIR  - bind with internal references [so app data and icu data doesn't collide]
+diff -ur icu.org/source/tools/pkgdata/pkgdata.cpp icu/source/tools/pkgdata/pkgdata.cpp
+--- icu.org/source/tools/pkgdata/pkgdata.cpp	2017-03-21 02:03:49.000000000 +0100
++++ icu/source/tools/pkgdata/pkgdata.cpp	2017-04-21 21:58:49.732432195 +0200
+@@ -934,7 +934,7 @@
+ 
+         uprv_strcat(pkgDataFlags[SO_EXT], ".");
+         uprv_strcat(pkgDataFlags[SO_EXT], pkgDataFlags[A_EXT]);
+-#elif U_PLATFORM == U_PF_OS400 || defined(_AIX)
++#elif U_PLATFORM == U_PF_OS400
+         sprintf(libFileNames[LIB_FILE_VERSION_TMP], "%s%s%s",
+                 libFileNames[LIB_FILE],
+                 FILE_EXTENSION_SEP,
+@@ -1407,15 +1407,6 @@
+                 pkgDataFlags[LDICUDTFLAGS],
+                 targetDir,
+                 libFileNames[LIB_FILE_CYGWIN_VERSION],
+-#elif U_PLATFORM == U_PF_AIX
+-        sprintf(cmd, "%s %s%s;%s %s -o %s%s %s %s%s %s %s",
+-                RM_CMD,
+-                targetDir,
+-                libFileNames[LIB_FILE_VERSION_TMP],
+-                pkgDataFlags[GENLIB],
+-                pkgDataFlags[LDICUDTFLAGS],
+-                targetDir,
+-                libFileNames[LIB_FILE_VERSION_TMP],
+ #else
+         sprintf(cmd, "%s %s -o %s%s %s %s%s %s %s",
+                 pkgDataFlags[GENLIB],
diff --git a/external/icu/icu4c-android.patch.1 b/external/icu/icu4c-android.patch.1
new file mode 100644
index 000000000..602d225d7
--- /dev/null
+++ b/external/icu/icu4c-android.patch.1
@@ -0,0 +1,75 @@
+diff -ur icu.org/source/common/unicode/platform.h icu/source/common/unicode/platform.h
+--- icu.org/source/common/unicode/platform.h	2019-10-03 13:16:41.000000000 +0200
++++ icu/source/common/unicode/platform.h	2019-10-29 22:58:26.881221287 +0100
+@@ -818,7 +818,7 @@
+                             UPRV_HAS_DECLSPEC_ATTRIBUTE(dllimport))
+ #   define U_EXPORT __declspec(dllexport)
+ #elif defined(__GNUC__)
+-#   define U_EXPORT __attribute__((visibility("default")))
++#   define U_EXPORT
+ #elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
+    || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550) 
+ #   define U_EXPORT __global
+diff -ur icu.org/source/config/mh-linux icu/source/config/mh-linux
+--- icu.org/source/config/mh-linux	2018-09-29 02:34:41.000000000 +0200
++++ icu/source/config/mh-linux	2018-10-20 00:33:36.558130876 +0200
+@@ -27,7 +27,7 @@
+ 
+ ## Compiler switch to embed a library name
+ # The initial tab in the next line is to prevent icu-config from reading it.
+-	LD_SONAME = -Wl,-soname -Wl,$(notdir $(MIDDLE_SO_TARGET))
++	#LD_SONAME = -Wl,-soname -Wl,$(notdir $(MIDDLE_SO_TARGET))
+ #SH# # We can't depend on MIDDLE_SO_TARGET being set.
+ #SH# LD_SONAME=
+ 
+diff -ur icu.org/source/configure icu/source/configure
+--- icu.org/source/configure	2018-10-02 00:39:56.000000000 +0200
++++ icu/source/configure	2018-10-20 00:33:36.559130874 +0200
+@@ -5207,7 +5207,7 @@
+ 	else
+ 		icu_cv_host_frag=mh-linux-va
+ 	fi ;;
+-*-*-linux*|*-*-gnu|*-*-k*bsd*-gnu|*-*-kopensolaris*-gnu) icu_cv_host_frag=mh-linux ;;
++*-*-linux*|*-*-gnu|*-*-k*bsd*-gnu|*-*-kopensolaris*-gnu|*-*-*-androideabi*) icu_cv_host_frag=mh-linux ;;
+ i[34567]86-*-cygwin)
+ 	if test "$GCC" = yes; then
+ 		icu_cv_host_frag=mh-cygwin
+@@ -6400,6 +6400,10 @@
+ # Check to see if genccode can generate simple assembly.
+ GENCCODE_ASSEMBLY=
+ case "${host}" in
++arm-*-linux-androideabi)
++    if test "$GCC" = yes; then
++        GENCCODE_ASSEMBLY="-a gcc-android-arm"
++    fi ;;
+ *-linux*|*-kfreebsd*-gnu*|i*86-*-*bsd*|i*86-pc-gnu)
+     if test "$GCC" = yes; then
+         # We're using gcc, and the simple -a gcc command line works for genccode
+@@ -7499,6 +7503,10 @@
+     # wchar_t can be used
+     CHECK_UTF16_STRING_RESULT="available"
+     ;;
++*-*-*-androideabi|mips-unknown-linux-android)
++    # no UTF-16 strings thanks, I think, this is to avoid the -std=c++0x which causes trouble with uint64_t
++    CHECK_UTF16_STRING_RESULT="nope"
++    ;;
+ *)
+     ;;
+ esac
+diff -ur icu.org/source/i18n/decimfmt.cpp icu/source/i18n/decimfmt.cpp
+--- icu.org/source/i18n/decimfmt.cpp	2018-10-02 00:39:56.000000000 +0200
++++ icu/source/i18n/decimfmt.cpp	2018-10-20 00:33:36.560130873 +0200
+@@ -9,6 +9,13 @@
+ // Helpful in toString methods and elsewhere.
+ #define UNISTR_FROM_STRING_EXPLICIT
+ 
++#ifdef __ANDROID__
++#ifndef ARM
++#define ARM
++#endif
++#include <android/compatibility.hxx>
++#endif
++
+ #include <cmath>
+ #include <cstdlib>
+ #include <stdlib.h>
diff --git a/external/icu/icu4c-build.patch.1 b/external/icu/icu4c-build.patch.1
new file mode 100644
index 000000000..a878de732
--- /dev/null
+++ b/external/icu/icu4c-build.patch.1
@@ -0,0 +1,91 @@
+diff -ur icu.org/source/config/mh-darwin icu/source/config/mh-darwin
+--- icu.org/source/config/mh-darwin	2016-06-15 20:58:17.000000000 +0200
++++ icu/source/config/mh-darwin	2017-04-21 21:30:23.584568210 +0200
+@@ -30,11 +30,7 @@
+ SHLIB.cc=	$(CXX) -dynamiclib -dynamic $(CXXFLAGS) $(LDFLAGS) $(LD_SOOPTIONS)
+ 
+ ## Compiler switches to embed a library name and version information
+-ifeq ($(ENABLE_RPATH),YES)
+-LD_SONAME = -Wl,-compatibility_version -Wl,$(SO_TARGET_VERSION_MAJOR) -Wl,-current_version -Wl,$(SO_TARGET_VERSION) -install_name $(libdir)/$(notdir $(MIDDLE_SO_TARGET))
+-else
+-LD_SONAME = -Wl,-compatibility_version -Wl,$(SO_TARGET_VERSION_MAJOR) -Wl,-current_version -Wl,$(SO_TARGET_VERSION) -install_name $(notdir $(MIDDLE_SO_TARGET)) $(PKGDATA_TRAILING_SPACE)
+-endif
++LD_SONAME = -Wl,-compatibility_version -Wl,$(SO_TARGET_VERSION_MAJOR) -Wl,-current_version -Wl,$(SO_TARGET_VERSION) -install_name @__________________________________________________URELIB/$(notdir $(MIDDLE_SO_TARGET))
+ 
+ ## Compiler switch to embed a runtime search path
+ LD_RPATH=
+@@ -50,10 +46,6 @@
+ ## Non-shared intermediate object suffix
+ STATIC_O = ao
+ 
+-## Override Versioned target for a shared library.
+-FINAL_SO_TARGET=  $(basename $(SO_TARGET)).$(SO_TARGET_VERSION).$(SO)
+-MIDDLE_SO_TARGET= $(basename $(SO_TARGET)).$(SO_TARGET_VERSION_MAJOR).$(SO)
+-
+ ## Compilation and dependency rules
+ %.$(STATIC_O): $(srcdir)/%.c
+ 	$(call SILENT_COMPILE,$(strip $(COMPILE.c) $(STATICCPPFLAGS) $(STATICCFLAGS)) -MMD -MT "$*.d $*.o $*.$(STATIC_O)" -o $@ $<)
+@@ -67,16 +59,10 @@
+ 
+ ## Versioned libraries rules
+ 
+-%.$(SO_TARGET_VERSION_MAJOR).$(SO): %.$(SO_TARGET_VERSION).$(SO)
++%.$(SO).$(SO_TARGET_VERSION_MAJOR): %.$(SO).$(SO_TARGET_VERSION)
+ 	$(RM) $@ && ln -s ${<F} $@
+-%.$(SO): %.$(SO_TARGET_VERSION_MAJOR).$(SO)
+-	$(RM) $@ && ln -s ${*F}.$(SO_TARGET_VERSION).$(SO) $@
+-
+-# tzcode option
+-TZORIG_EXTRA_CFLAGS=-DSTD_INSPIRED
+-
+-# genren opts
+-GENREN_PL_OPTS=-x Mach-O -n '-g' -p '| c++filt'
++%.$(SO): %.$(SO).$(SO_TARGET_VERSION_MAJOR)
++	$(RM) $@ && ln -s ${*F}.$(SO).$(SO_TARGET_VERSION) $@
+ 
+ ## Remove shared library 's'
+ STATIC_PREFIX_WHEN_USED = 
+diff -ur icu.org/source/tools/toolutil/pkg_genc.cpp icu/source/tools/toolutil/pkg_genc.cpp
+--- icu.org/source/tools/toolutil/pkg_genc.cpp	2017-04-13 11:46:02.000000000 +0200
++++ icu/source/tools/toolutil/pkg_genc.cpp	2017-04-21 21:30:23.583568212 +0200
+@@ -160,6 +160,28 @@
+ 
+         ".long ","",HEX_0X
+     },
++    {"gcc-android-arm",
++        "\t.arch armv5te\n"
++        "\t.fpu softvfp\n"
++        "\t.eabi_attribute 20, 1\n"
++        "\t.eabi_attribute 21, 1\n"
++        "\t.eabi_attribute 23, 3\n"
++        "\t.eabi_attribute 24, 1\n"
++        "\t.eabi_attribute 25, 1\n"
++        "\t.eabi_attribute 26, 2\n"
++        "\t.eabi_attribute 30, 6\n"
++        "\t.eabi_attribute 18, 4\n"
++        "\t.file \"%s.s\"\n"
++        "\t.global %s\n"
++        "\t.section .rodata\n"
++        "\t.align 2\n"
++        "\t.type %s, %%object\n"
++        "%s:\n",
++
++        "\t.word ",
++        "\t.section .note.GNU-stack,\"\",%%progbits\n",
++        HEX_0X
++    },
+ /* 16 bytes alignment. */
+ /* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */
+     {"sun",
+diff -ur icu.org/source/tools/toolutil/pkg_genc.h icu/source/tools/toolutil/pkg_genc.h
+--- icu.org/source/tools/toolutil/pkg_genc.h	2017-01-20 01:20:31.000000000 +0100
++++ icu/source/tools/toolutil/pkg_genc.h	2017-04-21 21:30:23.582568215 +0200
+@@ -60,7 +60,7 @@
+ #endif
+ 
+ #define LARGE_BUFFER_MAX_SIZE 2048
+-#define SMALL_BUFFER_MAX_SIZE 512
++#define SMALL_BUFFER_MAX_SIZE 2048
+ #define SMALL_BUFFER_FLAG_NAMES 32
+ #define BUFFER_PADDING_SIZE 20
+ 
diff --git a/external/icu/icu4c-clang-cl.patch.1 b/external/icu/icu4c-clang-cl.patch.1
new file mode 100644
index 000000000..a111a0df9
--- /dev/null
+++ b/external/icu/icu4c-clang-cl.patch.1
@@ -0,0 +1,28 @@
+diff -ur icu.org/source/config/mh-cygwin-msvc icu/source/config/mh-cygwin-msvc
+--- icu.org/source/config/mh-cygwin-msvc	2017-01-23 01:38:28.000000000 +0100
++++ icu/source/config/mh-cygwin-msvc	2017-04-21 23:07:28.482892025 +0200
+@@ -55,8 +55,8 @@
+ LDFLAGS+=-nologo
+ 
+ # Commands to compile
+-COMPILE.c=	$(CC) $(CPPFLAGS) $(DEFS) $(CFLAGS) -c
+-COMPILE.cc=	$(CXX) $(CPPFLAGS) $(DEFS) $(CXXFLAGS) -c
++COMPILE.c=	true && $(CC) $(CPPFLAGS) $(DEFS) $(CFLAGS) -c
++COMPILE.cc=	true && $(CXX) $(CPPFLAGS) $(DEFS) $(CXXFLAGS) -c
+ 
+ # Commands to link
+ LINK.c=		LINK.EXE -subsystem:console $(LDFLAGS)
+diff -ur icu.org/source/runConfigureICU icu/source/runConfigureICU
+--- icu.org/source/runConfigureICU	2017-01-23 01:38:28.000000000 +0100
++++ icu/source/runConfigureICU	2017-04-21 23:07:28.482892025 +0200
+@@ -261,8 +261,8 @@
+     Cygwin/MSVC)
+         THE_OS="Windows with Cygwin"
+         THE_COMP="Microsoft Visual C++"
+-        CC=cl; export CC
+-        CXX=cl; export CXX
++        CC=${CC-cl}; export CC
++        CXX=${CXX-cl}; export CXX
+         RELEASE_CFLAGS='-Gy -MD'
+         RELEASE_CXXFLAGS='-Gy -MD'
+         DEBUG_CFLAGS='-FS -Zi -MDd'
diff --git a/external/icu/icu4c-icudata-stdlibs.patch.1 b/external/icu/icu4c-icudata-stdlibs.patch.1
new file mode 100644
index 000000000..c8d66c6ed
--- /dev/null
+++ b/external/icu/icu4c-icudata-stdlibs.patch.1
@@ -0,0 +1,14 @@
+diff -ur icu.org/source/config/mh-linux icu/source/config/mh-linux
+--- icu.org/source/config/mh-linux	2017-04-21 23:09:57.588533707 +0200
++++ icu/source/config/mh-linux	2017-04-21 23:11:38.075292226 +0200
+@@ -27,7 +27,9 @@
+ RPATHLDFLAGS=${LD_RPATH_PRE}'$$ORIGIN'
+ 
+ ## These are the library specific LDFLAGS
+-LDFLAGSICUDT=-nodefaultlibs -nostdlib
++#LDFLAGSICUDT=-nodefaultlibs -nostdlib
++# Debian change: linking icudata as data only causes too many problems.
++LDFLAGSICUDT=
+ 
+ ## Compiler switch to embed a library name
+ # The initial tab in the next line is to prevent icu-config from reading it.
diff --git a/external/icu/icu4c-khmerbreakengine.patch.1 b/external/icu/icu4c-khmerbreakengine.patch.1
new file mode 100644
index 000000000..272d0b8ab
--- /dev/null
+++ b/external/icu/icu4c-khmerbreakengine.patch.1
@@ -0,0 +1,845 @@
+diff -ur icu.org/source/common/dictbe.cpp icu/source/common/dictbe.cpp
+--- icu.org/source/common/dictbe.cpp	2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictbe.cpp	2020-05-11 18:55:07.702282061 +0200
+@@ -32,7 +32,19 @@
+  ******************************************************************
+  */
+ 
+-DictionaryBreakEngine::DictionaryBreakEngine() {
++DictionaryBreakEngine::DictionaryBreakEngine()
++    : fTypes(0), clusterLimit(0) {
++}
++
++DictionaryBreakEngine::DictionaryBreakEngine(uint32_t breakTypes)
++    : fTypes(breakTypes), clusterLimit(3) {
++    UErrorCode status = U_ZERO_ERROR;
++    fViramaSet.applyPattern(UNICODE_STRING_SIMPLE("[[:ccc=VR:]]"), status);
++
++    // note Skip Sets contain fIgnoreSet characters too.
++    fSkipStartSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=OP:][:lb=QU:]\\u200C\\u200D\\u2060]"), status);
++    fSkipEndSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CP:][:lb=QU:][:lb=EX:][:lb=CL:]\\u200C\\u200D\\u2060]"), status);
++    fNBeforeSet.applyPattern(UNICODE_STRING_SIMPLE("[[:lb=CR:][:lb=LF:][:lb=NL:][:lb=SP:][:lb=ZW:][:lb=IS:][:lb=BA:][:lb=NS:]]"), status);
+ }
+ 
+ DictionaryBreakEngine::~DictionaryBreakEngine() {
+@@ -79,6 +91,169 @@
+     fSet.compact();
+ }
+ 
++bool
++DictionaryBreakEngine::scanBeforeStart(UText *text, int32_t& start, bool &doBreak) const {
++    UErrorCode status = U_ZERO_ERROR;
++    UText* ut = utext_clone(NULL, text, false, true, &status);
++    utext_setNativeIndex(ut, start);
++    UChar32 c = utext_current32(ut);
++    bool res = false;
++    doBreak = true;
++    while (start >= 0) {
++        if (!fSkipStartSet.contains(c)) {
++            res = (c == ZWSP);
++            break;
++        }
++        --start;
++        c = utext_previous32(ut);
++        doBreak = false;
++    }
++    utext_close(ut);
++    return res;
++}
++
++bool
++DictionaryBreakEngine::scanAfterEnd(UText *text, int32_t textEnd, int32_t& end, bool &doBreak) const {
++    UErrorCode status = U_ZERO_ERROR;
++    UText* ut = utext_clone(NULL, text, false, true, &status);
++    utext_setNativeIndex(ut, end);
++    UChar32 c = utext_current32(ut);
++    bool res = false;
++    doBreak = !fNBeforeSet.contains(c);
++    while (end < textEnd) {
++        if (!fSkipEndSet.contains(c)) {
++            res = (c == ZWSP);
++            break;
++        }
++        ++end;
++        c = utext_next32(ut);
++        doBreak = false;
++    }
++    utext_close(ut);
++    return res;
++}
++
++void
++DictionaryBreakEngine::scanBackClusters(UText *text, int32_t textStart, int32_t& start) const {
++    UChar32 c = 0;
++    start = utext_getNativeIndex(text);
++    while (start > textStart) {
++        c = utext_previous32(text);
++        --start;
++        if (!fSkipEndSet.contains(c))
++            break;
++    }
++    for (int i = 0; i < clusterLimit; ++i) { // scan backwards clusterLimit clusters
++        while (start > textStart) {
++            while (fIgnoreSet.contains(c))
++                c = utext_previous32(text);
++            if (!fMarkSet.contains(c)) {
++                if (fBaseSet.contains(c)) {
++                    c = utext_previous32(text);
++                    if (!fViramaSet.contains(c)) { // Virama (e.g. coeng) preceding base. Treat sequence as a mark
++                        utext_next32(text);
++                        c = utext_current32(text);
++                        break;
++                    } else {
++                        --start;
++                    }
++                } else {
++                    break;
++                }
++            }
++            c = utext_previous32(text);
++            --start;
++        }
++        if (!fBaseSet.contains(c) || start < textStart) {  // not a cluster start so finish
++            break;
++        }
++        c = utext_previous32(text);
++        --start;        // go round again
++    }                   // ignore hitting previous inhibitor since scanning for it should have found us!
++    ++start;            // counteract --before
++}
++
++void
++DictionaryBreakEngine::scanFwdClusters(UText *text, int32_t textEnd, int32_t& end) const {
++    UChar32 c = utext_current32(text);
++    end = utext_getNativeIndex(text);
++    while (end < textEnd) {
++        if (!fSkipStartSet.contains(c))
++            break;
++        utext_next32(text);
++        c = utext_current32(text);
++        ++end;
++    }
++    for (int i = 0; i < clusterLimit; ++i) { // scan forwards clusterLimit clusters
++        while (fIgnoreSet.contains(c)) {
++            utext_next32(text);
++            c = utext_current32(text);
++        }
++        if (fBaseSet.contains(c)) {
++            while (end < textEnd) {
++                utext_next32(text);
++                c = utext_current32(text);
++                ++end;
++                if (!fMarkSet.contains(c))
++                    break;
++                else if (fViramaSet.contains(c)) {  // handle coeng + base as mark
++                    utext_next32(text);
++                    c = utext_current32(text);
++                    ++end;
++                    if (!fBaseSet.contains(c))
++                        break;
++                }
++            }
++        } else {
++            --end;    // bad char so break after char before it
++            break;
++        }
++    }
++}
++
++bool
++DictionaryBreakEngine::scanWJ(UText *text, int32_t &start, int32_t end, int32_t &before, int32_t &after) const {
++    UErrorCode status = U_ZERO_ERROR;
++    UText* ut = utext_clone(NULL, text, false, true, &status);
++    int32_t nat = start;
++    utext_setNativeIndex(ut, nat);
++    bool foundFirst = true;
++    int32_t curr = start;
++    while (nat < end) {
++        UChar32 c = utext_current32(ut);
++        if (c == ZWSP || c == WJ) {
++            curr = nat + 1;
++            if (foundFirst)     // only scan backwards for first inhibitor
++                scanBackClusters(ut, start, before);
++            foundFirst = false; // don't scan backwards if we go around again. Also marks found something
++
++            utext_next32(ut);
++            scanFwdClusters(ut, end, after);
++            nat = after + 1;
++
++            if (c == ZWSP || c == WJ) {  // did we hit another one?
++                continue;
++            } else {
++                break;
++            }
++        }
++
++        ++nat;                  // keep hunting
++        utext_next32(ut);
++    }
++
++    utext_close(ut);
++
++    if (nat >= end && foundFirst) {
++        start = before = after = nat;
++        return false;           // failed to find anything
++    }
++    else {
++        start = curr;
++    }
++    return true;                // yup hit one
++}
++
+ /*
+  ******************************************************************
+  * PossibleWord
+@@ -108,7 +283,7 @@
+     ~PossibleWord() {}
+   
+     // Fill the list of candidates if needed, select the longest, and return the number found
+-    int32_t   candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd );
++    int32_t   candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd, UnicodeSet const *ignoreSet = NULL, int32_t minLength = 0 );
+   
+     // Select the currently marked candidate, point after it in the text, and invalidate self
+     int32_t   acceptMarked( UText *text );
+@@ -129,12 +304,12 @@
+ };
+ 
+ 
+-int32_t PossibleWord::candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd ) {
++int32_t PossibleWord::candidates( UText *text, DictionaryMatcher *dict, int32_t rangeEnd, UnicodeSet const *ignoreSet, int32_t minLength) {
+     // TODO: If getIndex is too slow, use offset < 0 and add discardAll()
+     int32_t start = (int32_t)utext_getNativeIndex(text);
+     if (start != offset) {
+         offset = start;
+-        count = dict->matches(text, rangeEnd-start, UPRV_LENGTHOF(cuLengths), cuLengths, cpLengths, NULL, &prefix);
++        count = dict->matches(text, rangeEnd-start, UPRV_LENGTHOF(cuLengths), cuLengths, cpLengths, NULL, &prefix, ignoreSet, minLength);
+         // Dictionary leaves text after longest prefix, not longest word. Back up.
+         if (count <= 0) {
+             utext_setNativeIndex(text, start);
+@@ -815,53 +990,30 @@
+  * KhmerBreakEngine
+  */
+ 
+-// How many words in a row are "good enough"?
+-static const int32_t KHMER_LOOKAHEAD = 3;
+-
+-// Will not combine a non-word with a preceding dictionary word longer than this
+-static const int32_t KHMER_ROOT_COMBINE_THRESHOLD = 3;
+-
+-// Will not combine a non-word that shares at least this much prefix with a
+-// dictionary word, with a preceding word
+-static const int32_t KHMER_PREFIX_COMBINE_THRESHOLD = 3;
+-
+-// Minimum word size
+-static const int32_t KHMER_MIN_WORD = 2;
+-
+-// Minimum number of characters for two words
+-static const int32_t KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2;
+-
+ KhmerBreakEngine::KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status)
+-    : DictionaryBreakEngine(),
++    : DictionaryBreakEngine((1 << UBRK_WORD) | (1 << UBRK_LINE)),
+       fDictionary(adoptDictionary)
+ {
+     UTRACE_ENTRY(UTRACE_UBRK_CREATE_BREAK_ENGINE);
+     UTRACE_DATA1(UTRACE_INFO, "dictbe=%s", "Khmr");
+-    fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]]"), status);
++
++    clusterLimit = 3;
++
++    fKhmerWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]\\u2060\\u200C\\u200D]"), status);
+     if (U_SUCCESS(status)) {
+         setCharacters(fKhmerWordSet);
+     }
+     fMarkSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:LineBreak=SA:]&[:M:]]"), status);
+-    fMarkSet.add(0x0020);
+-    fEndWordSet = fKhmerWordSet;
+-    fBeginWordSet.add(0x1780, 0x17B3);
+-    //fBeginWordSet.add(0x17A3, 0x17A4);      // deprecated vowels
+-    //fEndWordSet.remove(0x17A5, 0x17A9);     // Khmer independent vowels that can't end a word
+-    //fEndWordSet.remove(0x17B2);             // Khmer independent vowel that can't end a word
+-    fEndWordSet.remove(0x17D2);             // KHMER SIGN COENG that combines some following characters
+-    //fEndWordSet.remove(0x17B6, 0x17C5);     // Remove dependent vowels
+-//    fEndWordSet.remove(0x0E31);             // MAI HAN-AKAT
+-//    fEndWordSet.remove(0x0E40, 0x0E44);     // SARA E through SARA AI MAIMALAI
+-//    fBeginWordSet.add(0x0E01, 0x0E2E);      // KO KAI through HO NOKHUK
+-//    fBeginWordSet.add(0x0E40, 0x0E44);      // SARA E through SARA AI MAIMALAI
+-//    fSuffixSet.add(THAI_PAIYANNOI);
+-//    fSuffixSet.add(THAI_MAIYAMOK);
++    fIgnoreSet.add(0x2060);         // WJ
++    fIgnoreSet.add(0x200C, 0x200D); // ZWJ, ZWNJ
++    fBaseSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Khmr:]&[:lb=SA:]&[:^M:]]"), status);
++    fPuncSet.applyPattern(UNICODE_STRING_SIMPLE("[\\u17D4\\u17D5\\u17D6\\u17D7\\u17D9:]"), status);
+ 
+     // Compact for caching.
+     fMarkSet.compact();
+-    fEndWordSet.compact();
+-    fBeginWordSet.compact();
+-//    fSuffixSet.compact();
++    fIgnoreSet.compact();
++    fBaseSet.compact();
++    fPuncSet.compact();
+     UTRACE_EXIT_STATUS(status);
+ }
+ 
+@@ -874,180 +1026,204 @@
+                                                 int32_t rangeStart,
+                                                 int32_t rangeEnd,
+                                                 UVector32 &foundBreaks ) const {
+-    if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
+-        return 0;       // Not enough characters for two words
+-    }
+-
+-    uint32_t wordsFound = 0;
+-    int32_t cpWordLength = 0;
+-    int32_t cuWordLength = 0;
+-    int32_t current;
++    uint32_t wordsFound = foundBreaks.size();
+     UErrorCode status = U_ZERO_ERROR;
+-    PossibleWord words[KHMER_LOOKAHEAD];
+-
++    int32_t before = 0;
++    int32_t after = 0;
++    int32_t finalBefore = 0;
++    int32_t initAfter = 0;
++    int32_t scanStart = rangeStart;
++    int32_t scanEnd = rangeEnd;
++
++    bool startZwsp = false;
++    bool breakStart = false;
++    bool breakEnd = false;
++
++    if (rangeStart > 0) {
++        --scanStart;
++        startZwsp = scanBeforeStart(text, scanStart, breakStart);
++    }
+     utext_setNativeIndex(text, rangeStart);
++    scanFwdClusters(text, rangeEnd, initAfter);
++    bool endZwsp = scanAfterEnd(text, utext_nativeLength(text), scanEnd, breakEnd);
++    utext_setNativeIndex(text, rangeEnd - 1);
++    scanBackClusters(text, rangeStart, finalBefore);
++    if (finalBefore < initAfter) {   // the whole run is tented so no breaks
++        if (breakStart || fTypes < UBRK_LINE)
++            foundBreaks.push(rangeStart, status);
++        if (breakEnd || fTypes < UBRK_LINE)
++            foundBreaks.push(rangeEnd, status);
++        return foundBreaks.size() - wordsFound;
++    }
+ 
+-    while (U_SUCCESS(status) && (current = (int32_t)utext_getNativeIndex(text)) < rangeEnd) {
+-        cuWordLength = 0;
+-        cpWordLength = 0;
+-
+-        // Look for candidate words at the current position
+-        int32_t candidates = words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
+-
+-        // If we found exactly one, use that
+-        if (candidates == 1) {
+-            cuWordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text);
+-            cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength();
+-            wordsFound += 1;
+-        }
++    scanStart = rangeStart;
++    scanWJ(text, scanStart, rangeEnd, before, after);
++    if (startZwsp || initAfter >= before) {
++        after = initAfter;
++        before = 0;
++    }
++    if (!endZwsp && after > finalBefore && after < rangeEnd)
++        endZwsp = true;
++    if (endZwsp && before > finalBefore)
++        before = finalBefore;
+ 
+-        // If there was more than one, see which one can take us forward the most words
+-        else if (candidates > 1) {
+-            // If we're already at the end of the range, we're done
+-            if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+-                goto foundBest;
+-            }
+-            do {
+-                int32_t wordsMatched = 1;
+-                if (words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) > 0) {
+-                    if (wordsMatched < 2) {
+-                        // Followed by another dictionary word; mark first word as a good candidate
+-                        words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
+-                        wordsMatched = 2;
+-                    }
++    utext_setNativeIndex(text, rangeStart);
++    int32_t numCodePts = rangeEnd - rangeStart;
++    // bestSnlp[i] is the snlp of the best segmentation of the first i
++    // code points in the range to be matched.
++    UVector32 bestSnlp(numCodePts + 1, status);
++    bestSnlp.addElement(0, status);
++    for(int32_t i = 1; i <= numCodePts; i++) {
++        bestSnlp.addElement(kuint32max, status);
++    }
+ 
+-                    // If we're already at the end of the range, we're done
+-                    if ((int32_t)utext_getNativeIndex(text) >= rangeEnd) {
+-                        goto foundBest;
+-                    }
++    // prev[i] is the index of the last code point in the previous word in
++    // the best segmentation of the first i characters. Note negative implies
++	// that the code point is part of an unknown word.
++    UVector32 prev(numCodePts + 1, status);
++    for(int32_t i = 0; i <= numCodePts; i++) {
++        prev.addElement(kuint32max, status);
++    }
+ 
+-                    // See if any of the possible second words is followed by a third word
+-                    do {
+-                        // If we find a third word, stop right away
+-                        if (words[(wordsFound + 2) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd)) {
+-                            words[wordsFound % KHMER_LOOKAHEAD].markCurrent();
+-                            goto foundBest;
+-                        }
+-                    }
+-                    while (words[(wordsFound + 1) % KHMER_LOOKAHEAD].backUp(text));
+-                }
++    const int32_t maxWordSize = 20;
++    UVector32 values(maxWordSize, status);
++    values.setSize(maxWordSize);
++    UVector32 lengths(maxWordSize, status);
++    lengths.setSize(maxWordSize);
++
++    // Dynamic programming to find the best segmentation.
++
++    // In outer loop, i  is the code point index,
++    //                ix is the corresponding string (code unit) index.
++    //    They differ when the string contains supplementary characters.
++    int32_t ix = rangeStart;
++    for (int32_t i = 0;  i < numCodePts;  ++i, utext_setNativeIndex(text, ++ix)) {
++        if ((uint32_t)bestSnlp.elementAti(i) == kuint32max) {
++            continue;
++        }
++
++        int32_t count;
++        count = fDictionary->matches(text, numCodePts - i, maxWordSize,
++                             NULL, lengths.getBuffer(), values.getBuffer(), NULL, &fIgnoreSet, 2);
++                             // Note: lengths is filled with code point lengths
++                             //       The NULL parameter is the ignored code unit lengths.
++
++        for (int32_t j = 0; j < count; j++) {
++            int32_t ln = lengths.elementAti(j);
++            if (ln + i >= numCodePts)
++                continue;
++            utext_setNativeIndex(text, ln+ix);
++            int32_t c = utext_current32(text);
++            if (fMarkSet.contains(c) || c == 0x17D2) { // Coeng
++                lengths.removeElementAt(j);
++                values.removeElementAt(j);
++                --j;
++                --count;
+             }
+-            while (words[wordsFound % KHMER_LOOKAHEAD].backUp(text));
+-foundBest:
+-            cuWordLength = words[wordsFound % KHMER_LOOKAHEAD].acceptMarked(text);
+-            cpWordLength = words[wordsFound % KHMER_LOOKAHEAD].markedCPLength();
+-            wordsFound += 1;
+         }
+-
+-        // We come here after having either found a word or not. We look ahead to the
+-        // next word. If it's not a dictionary word, we will combine it with the word we
+-        // just found (if there is one), but only if the preceding word does not exceed
+-        // the threshold.
+-        // The text iterator should now be positioned at the end of the word we found.
+-        if ((int32_t)utext_getNativeIndex(text) < rangeEnd && cpWordLength < KHMER_ROOT_COMBINE_THRESHOLD) {
+-            // if it is a dictionary word, do nothing. If it isn't, then if there is
+-            // no preceding word, or the non-word shares less than the minimum threshold
+-            // of characters with a dictionary word, then scan to resynchronize
+-            if (words[wordsFound % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
+-                  && (cuWordLength == 0
+-                      || words[wordsFound % KHMER_LOOKAHEAD].longestPrefix() < KHMER_PREFIX_COMBINE_THRESHOLD)) {
+-                // Look for a plausible word boundary
+-                int32_t remaining = rangeEnd - (current+cuWordLength);
+-                UChar32 pc;
+-                UChar32 uc;
+-                int32_t chars = 0;
+-                for (;;) {
+-                    int32_t pcIndex = (int32_t)utext_getNativeIndex(text);
+-                    pc = utext_next32(text);
+-                    int32_t pcSize = (int32_t)utext_getNativeIndex(text) - pcIndex;
+-                    chars += pcSize;
+-                    remaining -= pcSize;
+-                    if (remaining <= 0) {
++        if (count == 0) {
++            utext_setNativeIndex(text, ix);
++            int32_t c = utext_current32(text);
++            if (fPuncSet.contains(c) || fIgnoreSet.contains(c) || c == ZWSP) {
++                values.setElementAt(0, count);
++                lengths.setElementAt(1, count++);
++            } else if (fBaseSet.contains(c)) {
++                int32_t currix = utext_getNativeIndex(text);
++                do {
++                    utext_next32(text);
++                    c = utext_current32(text);
++                    if (utext_getNativeIndex(text) >= rangeEnd)
+                         break;
+-                    }
+-                    uc = utext_current32(text);
+-                    if (fEndWordSet.contains(pc) && fBeginWordSet.contains(uc)) {
+-                        // Maybe. See if it's in the dictionary.
+-                        int32_t num_candidates = words[(wordsFound + 1) % KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd);
+-                        utext_setNativeIndex(text, current+cuWordLength+chars);
+-                        if (num_candidates > 0) {
++                    if (c == 0x17D2) { // Coeng
++                        utext_next32(text);
++                        c = utext_current32(text);
++                        if (!fBaseSet.contains(c) || utext_getNativeIndex(text) >= rangeEnd) {
+                             break;
++                        } else {
++                            utext_next32(text);
++                            c = utext_current32(text);
++                            if (utext_getNativeIndex(text) >= rangeEnd)
++                                break;
+                         }
+                     }
+-                }
+-
+-                // Bump the word count if there wasn't already one
+-                if (cuWordLength <= 0) {
+-                    wordsFound += 1;
+-                }
++                } while (fMarkSet.contains(c) || fIgnoreSet.contains(c));
++                values.setElementAt(BADSNLP, count);
++                lengths.setElementAt(utext_getNativeIndex(text) - currix, count++);
++            } else {
++                values.setElementAt(BADSNLP, count);
++                lengths.setElementAt(1, count++);
++            }
++        }
+ 
+-                // Update the length with the passed-over characters
+-                cuWordLength += chars;
++        for (int32_t j = 0; j < count; j++) {
++            uint32_t v = values.elementAti(j);
++            int32_t newSnlp = bestSnlp.elementAti(i) + v;
++            int32_t ln = lengths.elementAti(j);
++            utext_setNativeIndex(text, ln+ix);
++            int32_t c = utext_current32(text);
++            while ((fPuncSet.contains(c) || fIgnoreSet.contains(c)) && ln + i < numCodePts) {
++                ++ln;
++                utext_next32(text);
++                c = utext_current32(text);
+             }
+-            else {
+-                // Back up to where we were for next iteration
+-                utext_setNativeIndex(text, current+cuWordLength);
++            int32_t ln_j_i = ln + i;   // yes really i!
++            if (newSnlp < bestSnlp.elementAti(ln_j_i)) {
++                if (v == BADSNLP) {
++                    int32_t p = prev.elementAti(i);
++                    if (p < 0)
++                        prev.setElementAt(p, ln_j_i);
++                    else
++                        prev.setElementAt(-i, ln_j_i);
++                }
++                else
++                    prev.setElementAt(i, ln_j_i);
++                bestSnlp.setElementAt(newSnlp, ln_j_i);
+             }
+         }
+-
+-        // Never stop before a combining mark.
+-        int32_t currPos;
+-        while ((currPos = (int32_t)utext_getNativeIndex(text)) < rangeEnd && fMarkSet.contains(utext_current32(text))) {
+-            utext_next32(text);
+-            cuWordLength += (int32_t)utext_getNativeIndex(text) - currPos;
++    }
++    // Start pushing the optimal offset index into t_boundary (t for tentative).
++    // prev[numCodePts] is guaranteed to be meaningful.
++    // We'll first push in the reverse order, i.e.,
++    // t_boundary[0] = numCodePts, and afterwards do a swap.
++    UVector32 t_boundary(numCodePts+1, status);
++
++    int32_t numBreaks = 0;
++    // No segmentation found, set boundary to end of range
++    while (numCodePts >= 0 && (uint32_t)bestSnlp.elementAti(numCodePts) == kuint32max) {
++        --numCodePts;
++    }
++    if (numCodePts < 0) {
++        t_boundary.addElement(numCodePts, status);
++        numBreaks++;
++    } else {
++        for (int32_t i = numCodePts; (uint32_t)i != kuint32max; i = prev.elementAti(i)) {
++            if (i < 0) i = -i;
++            t_boundary.addElement(i, status);
++            numBreaks++;
+         }
++        U_ASSERT(prev.elementAti(t_boundary.elementAti(numBreaks - 1)) == 0);
++    }
+ 
+-        // Look ahead for possible suffixes if a dictionary word does not follow.
+-        // We do this in code rather than using a rule so that the heuristic
+-        // resynch continues to function. For example, one of the suffix characters
+-        // could be a typo in the middle of a word.
+-//        if ((int32_t)utext_getNativeIndex(text) < rangeEnd && wordLength > 0) {
+-//            if (words[wordsFound%KHMER_LOOKAHEAD].candidates(text, fDictionary, rangeEnd) <= 0
+-//                && fSuffixSet.contains(uc = utext_current32(text))) {
+-//                if (uc == KHMER_PAIYANNOI) {
+-//                    if (!fSuffixSet.contains(utext_previous32(text))) {
+-//                        // Skip over previous end and PAIYANNOI
+-//                        utext_next32(text);
+-//                        utext_next32(text);
+-//                        wordLength += 1;            // Add PAIYANNOI to word
+-//                        uc = utext_current32(text);     // Fetch next character
+-//                    }
+-//                    else {
+-//                        // Restore prior position
+-//                        utext_next32(text);
+-//                    }
+-//                }
+-//                if (uc == KHMER_MAIYAMOK) {
+-//                    if (utext_previous32(text) != KHMER_MAIYAMOK) {
+-//                        // Skip over previous end and MAIYAMOK
+-//                        utext_next32(text);
+-//                        utext_next32(text);
+-//                        wordLength += 1;            // Add MAIYAMOK to word
+-//                    }
+-//                    else {
+-//                        // Restore prior position
+-//                        utext_next32(text);
+-//                    }
+-//                }
+-//            }
+-//            else {
+-//                utext_setNativeIndex(text, current+wordLength);
+-//            }
+-//        }
+-
+-        // Did we find a word on this iteration? If so, push it on the break stack
+-        if (cuWordLength > 0) {
+-            foundBreaks.push((current+cuWordLength), status);
++    // Now that we're done, convert positions in t_boundary[] (indices in
++    // the normalized input string) back to indices in the original input UText
++    // while reversing t_boundary and pushing values to foundBreaks.
++    for (int32_t i = numBreaks-1; i >= 0; i--) {
++        int32_t cpPos = t_boundary.elementAti(i);
++        if (cpPos == 0 && !breakStart && fTypes >= UBRK_LINE) continue;
++        int32_t utextPos = cpPos + rangeStart;
++        while (utextPos > after && scanWJ(text, utextPos, scanEnd, before, after));
++        if (utextPos < before) {
++        // Boundaries are added to foundBreaks output in ascending order.
++            U_ASSERT(foundBreaks.size() == 0 ||foundBreaks.peeki() < utextPos);
++            foundBreaks.push(utextPos, status);
+         }
+     }
+-    
++
+     // Don't return a break for the end of the dictionary range if there is one there.
+-    if (foundBreaks.peeki() >= rangeEnd) {
++    if (!breakEnd && fTypes >= UBRK_LINE && foundBreaks.peeki() >= rangeEnd) {
+         (void) foundBreaks.popi();
+-        wordsFound -= 1;
+     }
+-
+-    return wordsFound;
++    return foundBreaks.size() - wordsFound;
+ }
+ 
+ #if !UCONFIG_NO_NORMALIZATION
+diff -ur icu.org/source/common/dictbe.h icu/source/common/dictbe.h
+--- icu.org/source/common/dictbe.h	2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictbe.h	2020-05-11 19:08:24.754634732 +0200
+@@ -34,7 +34,8 @@
+  * threads without synchronization.</p>
+  */
+ class DictionaryBreakEngine : public LanguageBreakEngine {
+- private:
++ protected:
++
+     /**
+      * The set of characters handled by this engine
+      * @internal
+@@ -42,14 +43,84 @@
+ 
+   UnicodeSet    fSet;
+ 
++  const int32_t WJ   = 0x2060;
++  const int32_t ZWSP = 0x200B;
++
++  /**
++   * The break types it was constructed with
++   * @internal
++   */
++  uint32_t      fTypes;
++
++  /**
++   * A Unicode set of all viramas
++   * @internal
++   */
++  UnicodeSet    fViramaSet;
++
++  /**
++   * A Unicode set of all base characters
++   * @internal
++   */
++  UnicodeSet    fBaseSet;
++
++  /**
++   * A Unicode set of all marks
++   * @internal
++   */
++  UnicodeSet    fMarkSet;
++
++  /**
++   * A Unicode set of all characters ignored ignored in dictionary matching
++   * @internal
++   */
++  UnicodeSet    fIgnoreSet;
++
++  /**
++   * A Unicode set of all characters ignored ignored in dictionary matching
++   * @internal
++   */
++  UnicodeSet    fSkipStartSet;
++
++  /**
++   * A Unicode set of all characters ignored ignored in dictionary matching
++   * @internal
++   */
++  UnicodeSet    fSkipEndSet;
++
++  /**
++   * A Unicode set of all characters that should not be broken before
++   * @internal
++   */
++  UnicodeSet    fNBeforeSet;
++
++  /**
++   * The number of clusters within which breaks are inhibited
++   * @internal
++   */
++  int32_t clusterLimit;
++
++  bool scanWJ(UText *text, int32_t &start, int32_t end, int32_t &before, int32_t &after) const;
++
++  bool scanBeforeStart(UText *text, int32_t& start, bool &doBreak) const;
++  bool scanAfterEnd(UText *text, int32_t rangeEnd, int32_t& end, bool &doBreak) const;
++  void scanBackClusters(UText *text, int32_t textStart, int32_t& start) const;
++  void scanFwdClusters(UText *text, int32_t textEnd, int32_t& end) const;
++
+  public:
+ 
+   /**
+-   * <p>Constructor </p>
++   * <p>Default constructor.</p>
++   *
+    */
+   DictionaryBreakEngine();
+ 
+   /**
++   * <p>Constructor with break types.</p>
++   */
++  explicit DictionaryBreakEngine(uint32_t breakTypes);
++
++  /**
+    * <p>Virtual destructor.</p>
+    */
+   virtual ~DictionaryBreakEngine();
+@@ -293,11 +364,13 @@
+      */ 
+  
+   UnicodeSet                fKhmerWordSet; 
+-  UnicodeSet                fEndWordSet; 
+-  UnicodeSet                fBeginWordSet; 
+-  UnicodeSet                fMarkSet; 
+-  DictionaryMatcher  *fDictionary; 
+- 
++  UnicodeSet                fBeginWordSet;
++  UnicodeSet                fPuncSet;
++  DictionaryMatcher        *fDictionary;
++
++  const uint32_t BADSNLP = 256 * 20;
++  const uint32_t kuint32max = 0x7FFFFFFF;
++
+  public: 
+  
+   /** 
+diff -ur icu.org/source/common/dictionarydata.cpp icu/source/common/dictionarydata.cpp
+--- icu.org/source/common/dictionarydata.cpp	2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictionarydata.cpp	2020-05-11 18:50:43.703113749 +0200
+@@ -44,7 +44,7 @@
+ 
+ int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
+                             int32_t *lengths, int32_t *cpLengths, int32_t *values,
+-                            int32_t *prefix) const {
++                            int32_t *prefix, UnicodeSet const* ignoreSet, int32_t minLength) const {
+ 
+     UCharsTrie uct(characters);
+     int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
+@@ -55,7 +55,13 @@
+         UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c);
+         int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
+         codePointsMatched += 1;
++        if (ignoreSet != NULL && ignoreSet->contains(c)) {
++            continue;
++        }
+         if (USTRINGTRIE_HAS_VALUE(result)) {
++            if (codePointsMatched < minLength) {
++                continue;
++            }
+             if (wordCount < limit) {
+                 if (values != NULL) {
+                     values[wordCount] = uct.getValue();
+@@ -112,7 +118,7 @@
+ 
+ int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit,
+                             int32_t *lengths, int32_t *cpLengths, int32_t *values,
+-                            int32_t *prefix) const {
++                            int32_t *prefix, UnicodeSet const* ignoreSet, int32_t minLength) const {
+     BytesTrie bt(characters);
+     int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text);
+     int32_t wordCount = 0;
+@@ -122,7 +128,13 @@
+         UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c));
+         int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex;
+         codePointsMatched += 1;
++        if (ignoreSet != NULL && ignoreSet->contains(c)) {
++            continue;
++        }
+         if (USTRINGTRIE_HAS_VALUE(result)) {
++            if (codePointsMatched < minLength) {
++                continue;
++            }
+             if (wordCount < limit) {
+                 if (values != NULL) {
+                     values[wordCount] = bt.getValue();
+diff -ur icu.org/source/common/dictionarydata.h icu/source/common/dictionarydata.h
+--- icu.org/source/common/dictionarydata.h	2020-04-22 22:04:20.000000000 +0200
++++ icu/source/common/dictionarydata.h	2020-05-11 18:50:43.704113746 +0200
+@@ -21,6 +21,7 @@
+ #include "unicode/utext.h"
+ #include "unicode/udata.h"
+ #include "udataswp.h"
++#include "unicode/uniset.h"
+ #include "unicode/uobject.h"
+ #include "unicode/ustringtrie.h"
+ 
+@@ -92,7 +93,7 @@
+      */
+     virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+                             int32_t *lengths, int32_t *cpLengths, int32_t *values,
+-                            int32_t *prefix) const = 0;
++                            int32_t *prefix, UnicodeSet const* ignoreSet = NULL, int32_t minLength = 0) const = 0;
+ 
+     /** @return DictionaryData::TRIE_TYPE_XYZ */
+     virtual int32_t getType() const = 0;
+@@ -107,7 +108,7 @@
+     virtual ~UCharsDictionaryMatcher();
+     virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+                             int32_t *lengths, int32_t *cpLengths, int32_t *values,
+-                            int32_t *prefix) const;
++                            int32_t *prefix, UnicodeSet const* ignoreSet = NULL, int32_t minLength = 0) const;
+     virtual int32_t getType() const;
+ private:
+     const UChar *characters;
+@@ -125,7 +126,7 @@
+     virtual ~BytesDictionaryMatcher();
+     virtual int32_t matches(UText *text, int32_t maxLength, int32_t limit,
+                             int32_t *lengths, int32_t *cpLengths, int32_t *values,
+-                            int32_t *prefix) const;
++                            int32_t *prefix, UnicodeSet const* ignoreSet = NULL, int32_t minLength = 0) const;
+     virtual int32_t getType() const;
+ private:
+     UChar32 transform(UChar32 c) const;
diff --git a/external/icu/icu4c-macosx.patch.1 b/external/icu/icu4c-macosx.patch.1
new file mode 100644
index 000000000..fee08eb05
--- /dev/null
+++ b/external/icu/icu4c-macosx.patch.1
@@ -0,0 +1,20 @@
+diff -ur icu.org/source/common/putil.cpp icu/source/common/putil.cpp
+--- icu.org/source/common/putil.cpp	2017-04-10 16:22:16.000000000 +0200
++++ icu/source/common/putil.cpp	2017-04-21 22:14:09.940217733 +0200
+@@ -1198,8 +1198,16 @@
+         static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
+ 
+         /* This probing will tell us when daylight savings occurs.  */
++#if U_PLATFORM_IS_DARWIN_BASED
++        struct tm *tmp;
++        tmp = localtime(&juneSolstice);
++        juneSol = *tmp;
++        tmp = localtime(&decemberSolstice);
++        decemberSol = *tmp;
++#else
+         localtime_r(&juneSolstice, &juneSol);
+         localtime_r(&decemberSolstice, &decemberSol);
++#endif
+         if(decemberSol.tm_isdst > 0) {
+           daylightType = U_DAYLIGHT_DECEMBER;
+         } else if(juneSol.tm_isdst > 0) {
diff --git a/external/icu/icu4c-mkdir.patch.1 b/external/icu/icu4c-mkdir.patch.1
new file mode 100644
index 000000000..112e57cc2
--- /dev/null
+++ b/external/icu/icu4c-mkdir.patch.1
@@ -0,0 +1,11 @@
+diff -ur icu.org/source/data/Makefile.in icu/source/data/Makefile.in
+--- icu.org/source/data/Makefile.in	2019-04-17 21:42:15.000000000 +0200
++++ icu/source/data/Makefile.in	2019-10-28 12:57:15.033649494 +0100
+@@ -226,6 +226,7 @@
+ ifeq ($(PKGDATA_MODE),dll)
+ SO_VERSION_DATA = $(OUTTMPDIR)/icudata.res
+ $(SO_VERSION_DATA) : $(MISCSRCDIR)/icudata.rc
++	mkdir -p $(OUTTMPDIR)
+ ifeq ($(MSYS_RC_MODE),1)
+ 	rc.exe -i$(srcdir)/../common -i$(top_builddir)/common -fo$@ $(CPPFLAGS) $<
+ else
diff --git a/external/icu/icu4c-rpath.patch.1 b/external/icu/icu4c-rpath.patch.1
new file mode 100644
index 000000000..35a545778
--- /dev/null
+++ b/external/icu/icu4c-rpath.patch.1
@@ -0,0 +1,36 @@
+diff -ur icu.org/source/config/mh-linux icu/source/config/mh-linux
+--- icu.org/source/config/mh-linux	2016-06-15 20:58:17.000000000 +0200
++++ icu/source/config/mh-linux	2017-04-21 22:38:18.893927819 +0200
+@@ -22,6 +22,10 @@
+ LD_RPATH= -Wl,-zorigin,-rpath,'$$'ORIGIN 
+ LD_RPATH_PRE = -Wl,-rpath,
+ 
++## Force RPATH=$ORIGIN to locate own dependencies w/o need for LD_LIBRARY_PATH:
++ENABLE_RPATH=YES
++RPATHLDFLAGS=${LD_RPATH_PRE}'$$ORIGIN'
++
+ ## These are the library specific LDFLAGS
+ LDFLAGSICUDT=-nodefaultlibs -nostdlib
+ 
+diff -ur icu.org/source/data/pkgdataMakefile.in icu/source/data/pkgdataMakefile.in
+--- icu.org/source/data/pkgdataMakefile.in	2016-06-15 20:58:17.000000000 +0200
++++ icu/source/data/pkgdataMakefile.in	2017-04-21 22:38:18.892927822 +0200
+@@ -18,6 +18,9 @@
+ MIDDLE_SO_TARGET=
+ PKGDATA_TRAILING_SPACE=" "
+ 
++# escape $ with \ when passing to echo; needed to preserve $ORIGIN
++SHLIB.c.shell := $(subst $$,\$$,$(SHLIB.c))
++
+ all : clean 
+ 	@echo GENCCODE_ASSEMBLY_TYPE=$(GENCCODE_ASSEMBLY) >> $(OUTPUTFILE)
+ 	@echo SO=$(SO) >> $(OUTPUTFILE)
+@@ -26,7 +29,7 @@
+ 	@echo LIB_EXT_ORDER=$(FINAL_SO_TARGET) >> $(OUTPUTFILE)
+ 	@echo COMPILE="$(COMPILE.c)" >> $(OUTPUTFILE)
+ 	@echo LIBFLAGS="-I$(top_srcdir)/common -I$(top_builddir)/common $(SHAREDLIBCPPFLAGS) $(SHAREDLIBCFLAGS)" >> $(OUTPUTFILE)
+-	@echo GENLIB="$(SHLIB.c)" >> $(OUTPUTFILE)
++	@echo GENLIB="$(SHLIB.c.shell)" >> $(OUTPUTFILE)
+ 	@echo LDICUDTFLAGS=$(LDFLAGSICUDT) >> $(OUTPUTFILE)
+ 	@echo LD_SONAME=$(LD_SONAME) >> $(OUTPUTFILE)
+ 	@echo RPATH_FLAGS=$(RPATH_FLAGS) >> $(OUTPUTFILE)
diff --git a/external/icu/icu4c-rtti.patch.1 b/external/icu/icu4c-rtti.patch.1
new file mode 100644
index 000000000..c058c7f3c
--- /dev/null
+++ b/external/icu/icu4c-rtti.patch.1
@@ -0,0 +1,12 @@
+diff -ur icu.org/source/config/mh-linux icu/source/config/mh-linux
+--- icu.org/source/config/mh-linux	2017-04-21 23:01:23.257769703 +0200
++++ icu/source/config/mh-linux	2017-04-21 23:03:23.166481552 +0200
+@@ -36,7 +36,7 @@
+ #SH# LD_SONAME=
+ 
+ ## Shared library options
+-LD_SOOPTIONS= -Wl,-Bsymbolic
++LD_SOOPTIONS= -Wl,-Bsymbolic-functions
+ 
+ ## Shared object suffix
+ SO = so
diff --git a/external/icu/icu4c-scriptrun.patch.1 b/external/icu/icu4c-scriptrun.patch.1
new file mode 100644
index 000000000..f2f2cf9f3
--- /dev/null
+++ b/external/icu/icu4c-scriptrun.patch.1
@@ -0,0 +1,60 @@
+diff -ur icu.org/source/extra/scrptrun/scrptrun.cpp icu/source/extra/scrptrun/scrptrun.cpp
+--- icu.org/source/extra/scrptrun/scrptrun.cpp	2017-01-20 01:20:31.000000000 +0100
++++ icu/source/extra/scrptrun/scrptrun.cpp	2017-04-21 22:59:31.708037770 +0200
+@@ -151,7 +151,11 @@
+         // characters above it on the stack will be poped.
+         if (pairIndex >= 0) {
+             if ((pairIndex & 1) == 0) {
+-                parenStack[++parenSP].pairIndex = pairIndex;
++                ++parenSP;
++                int32_t nVecSize = parenStack.size();
++                if (parenSP == nVecSize)
++                    parenStack.resize(nVecSize + 128);
++                parenStack[parenSP].pairIndex = pairIndex;
+                 parenStack[parenSP].scriptCode  = scriptCode;
+             } else if (parenSP >= 0) {
+                 int32_t pi = pairIndex & ~1;
+@@ -185,7 +189,14 @@
+             // pop it from the stack
+             if (pairIndex >= 0 && (pairIndex & 1) != 0 && parenSP >= 0) {
+                 parenSP -= 1;
+-                startSP -= 1;
++                /* decrement startSP only if it is >= 0,
++                   decrementing it unnecessarily will lead to memory corruption
++                   while processing the above while block.
++                   e.g. startSP = -4 , parenSP = -1
++                */
++                if (startSP >= 0) {
++                    startSP -= 1;
++                }
+             }
+         } else {
+             // if the run broke on a surrogate pair,
+diff -ur icu.org/source/extra/scrptrun/scrptrun.h icu/source/extra/scrptrun/scrptrun.h
+--- icu.org/source/extra/scrptrun/scrptrun.h	2017-01-20 01:20:31.000000000 +0100
++++ icu/source/extra/scrptrun/scrptrun.h	2017-04-21 22:59:31.708037770 +0200
+@@ -19,6 +19,7 @@
+ #include "unicode/utypes.h"
+ #include "unicode/uobject.h"
+ #include "unicode/uscript.h"
++#include <vector>
+ 
+ U_NAMESPACE_BEGIN
+ 
+@@ -81,7 +82,7 @@
+     int32_t scriptEnd;
+     UScriptCode scriptCode;
+ 
+-    ParenStackEntry parenStack[128];
++    std::vector<ParenStackEntry> parenStack;
+     int32_t parenSP;
+ 
+     static int8_t highBit(int32_t value);
+@@ -135,6 +136,7 @@
+     scriptEnd   = charStart;
+     scriptCode  = USCRIPT_INVALID_CODE;
+     parenSP     = -1;
++    parenStack.resize(128);
+ }
+ 
+ inline void ScriptRun::reset(int32_t start, int32_t length)
diff --git a/external/icu/icu4c-solarisgcc.patch.1 b/external/icu/icu4c-solarisgcc.patch.1
new file mode 100644
index 000000000..6000ed0cb
--- /dev/null
+++ b/external/icu/icu4c-solarisgcc.patch.1
@@ -0,0 +1,12 @@
+diff -ur icu.org/source/common/uposixdefs.h icu/source/common/uposixdefs.h
+--- icu.org/source/common/uposixdefs.h	2017-03-09 03:12:45.000000000 +0100
++++ icu/source/common/uposixdefs.h	2017-04-21 22:23:11.857926971 +0200
+@@ -54,7 +54,7 @@
+  *
+  * z/OS needs this definition for timeval and to get usleep.
+  */
+-#if !defined(_XOPEN_SOURCE_EXTENDED) && defined(__TOS_MVS__)
++#if !defined(_XOPEN_SOURCE_EXTENDED) && (defined(__TOS_MVS__) || defined(__IBMC__) || defined(__IBMCPP__))
+ #   define _XOPEN_SOURCE_EXTENDED 1
+ #endif
+ 
diff --git a/external/icu/icu4c-ubsan.patch.1 b/external/icu/icu4c-ubsan.patch.1
new file mode 100644
index 000000000..7b0c2efc9
--- /dev/null
+++ b/external/icu/icu4c-ubsan.patch.1
@@ -0,0 +1,14 @@
+diff -ur icu.org/source/common/ubidiimp.h icu/source/common/ubidiimp.h
+--- icu.org/source/common/ubidiimp.h	2019-10-03 13:16:41.000000000 +0200
++++ icu/source/common/ubidiimp.h	2019-10-28 19:08:13.533284618 +0100
+@@ -198,8 +198,8 @@
+ /* in a Run, logicalStart will get this bit set if the run level is odd */
+ #define INDEX_ODD_BIT (1UL<<31)
+ 
+-#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((int32_t)((level)&1)<<31))
+-#define ADD_ODD_BIT_FROM_LEVEL(x, level)  ((x)|=((int32_t)((level)&1)<<31))
++#define MAKE_INDEX_ODD_PAIR(index, level) ((index)|((uint32_t)((level)&1)<<31))
++#define ADD_ODD_BIT_FROM_LEVEL(x, level)  ((x)|=((uint32_t)((level)&1)<<31))
+ #define REMOVE_ODD_BIT(x)                 ((x)&=~INDEX_ODD_BIT)
+ 
+ #define GET_INDEX(x)   ((x)&~INDEX_ODD_BIT)
diff --git a/external/icu/icu4c-warnings.patch.1 b/external/icu/icu4c-warnings.patch.1
new file mode 100644
index 000000000..76f8b7298
--- /dev/null
+++ b/external/icu/icu4c-warnings.patch.1
@@ -0,0 +1,11 @@
+diff -ur icu.org/source/common/unicode/utf16.h icu/source/common/unicode/utf16.h
+--- icu.org/source/common/unicode/utf16.h	2019-10-03 13:16:41.000000000 +0200
++++ icu/source/common/unicode/utf16.h	2019-10-28 18:03:07.967208272 +0100
+@@ -397,6 +397,7 @@
+         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+     } else /* c>0x10ffff or not enough space */ { \
+         (isError)=TRUE; \
++        (void)(isError); \
+     } \
+ } UPRV_BLOCK_MACRO_END
+ 
diff --git a/external/icu/khmerdict.dict b/external/icu/khmerdict.dict
new file mode 100644
index 000000000..52605b654
--- /dev/null
+++ b/external/icu/khmerdict.dict
diff --git a/external/icu/ubsan.patch b/external/icu/ubsan.patch
new file mode 100644
index 000000000..762bd6e5a
--- /dev/null
+++ b/external/icu/ubsan.patch
@@ -0,0 +1,52 @@
+--- source/common/uloc.cpp
++++ source/common/uloc.cpp
+@@ -1203,7 +1203,8 @@
+         return 0;
+     }
+     int32_t reslen = result.length();
+-    uprv_memcpy(language, result.data(), std::min(reslen, languageCapacity));
++    auto const n = std::min(reslen, languageCapacity);
++    if (n != 0) uprv_memcpy(language, result.data(), n);
+     return reslen;
+ }
+ 
+@@ -1251,7 +1252,8 @@
+         return 0;
+     }
+     int32_t reslen = result.length();
+-    uprv_memcpy(script, result.data(), std::min(reslen, scriptCapacity));
++    auto const n = std::min(reslen, scriptCapacity);
++    if (n != 0) uprv_memcpy(script, result.data(), n);
+     return reslen;
+ }
+ 
+--- source/tools/genrb/rbutil.c
++++ source/tools/genrb/rbutil.c
+@@ -30,7 +30,12 @@
+ get_dirname(char *dirname,
+             const char *filename)
+ {
+-  const char *lastSlash = uprv_strrchr(filename, U_FILE_SEP_CHAR) + 1;
++  const char *lastSlash = uprv_strrchr(filename, U_FILE_SEP_CHAR);
++  if(lastSlash == NULL) {
++    lastSlash = filename;
++  } else {
++     ++lastSlash;
++  }
+ 
+   if(lastSlash>filename) {
+     uprv_strncpy(dirname, filename, (lastSlash - filename));
+@@ -46,7 +51,12 @@
+              const char *filename)
+ {
+   /* strip off any leading directory portions */
+-  const char *lastSlash = uprv_strrchr(filename, U_FILE_SEP_CHAR) + 1;
++  const char *lastSlash = uprv_strrchr(filename, U_FILE_SEP_CHAR);
++  if(lastSlash == NULL) {
++    lastSlash = filename;
++  } else {
++     ++lastSlash;
++  }
+   char *lastDot;
+ 
+   if(lastSlash>filename) {