7 files changed, 2527 insertions, 0 deletions
diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx
new file mode 100644
index 000000000..cdcbff9be
--- /dev/null
+++ b/i18npool/qa/cppunit/test_breakiterator.cxx
@@ -0,0 +1,1068 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <com/sun/star/i18n/XBreakIterator.hpp>
+#include <com/sun/star/i18n/CharacterIteratorMode.hpp>
+#include <com/sun/star/i18n/ScriptType.hpp>
+#include <com/sun/star/i18n/WordType.hpp>
+#include <o3tl/cppunittraitshelper.hxx>
+#include <unotest/bootstrapfixturebase.hxx>
+
+#include <unicode/uvernum.h>
+
+#include <string.h>
+
+#include <stack>
+#include <string_view>
+
+using namespace ::com::sun::star;
+
+class TestBreakIterator : public test::BootstrapFixtureBase
+{
+public:
+    virtual void setUp() override;
+    virtual void tearDown() override;
+
+    void testLineBreaking();
+    void testWordBoundaries();
+    void testGraphemeIteration();
+    void testWeak();
+    void testAsian();
+    void testThai();
+#if (U_ICU_VERSION_MAJOR_NUM > 51)
+    void testLao();
+#ifdef TODO
+    void testNorthernThai();
+    void testKhmer();
+#endif
+#endif
+    void testJapanese();
+    void testChinese();
+
+    CPPUNIT_TEST_SUITE(TestBreakIterator);
+    CPPUNIT_TEST(testLineBreaking);
+    CPPUNIT_TEST(testWordBoundaries);
+    CPPUNIT_TEST(testGraphemeIteration);
+    CPPUNIT_TEST(testWeak);
+    CPPUNIT_TEST(testAsian);
+    CPPUNIT_TEST(testThai);
+#if (U_ICU_VERSION_MAJOR_NUM > 51)
+    CPPUNIT_TEST(testLao);
+#ifdef TODO
+    CPPUNIT_TEST(testKhmer);
+    CPPUNIT_TEST(testNorthernThai);
+#endif
+#endif
+    CPPUNIT_TEST(testJapanese);
+    CPPUNIT_TEST(testChinese);
+    CPPUNIT_TEST_SUITE_END();
+
+private:
+    uno::Reference<i18n::XBreakIterator> m_xBreak;
+    void doTestJapanese(uno::Reference< i18n::XBreakIterator > const &xBreak);
+};
+
+void TestBreakIterator::testLineBreaking()
+{
+    i18n::LineBreakHyphenationOptions aHyphOptions;
+    i18n::LineBreakUserOptions aUserOptions;
+    lang::Locale aLocale;
+
+    //See https://bugs.libreoffice.org/show_bug.cgi?id=31271
+    {
+        OUString aTest("(some text here)");
+
+        aLocale.Language = "en";
+        aLocale.Country = "US";
+
+        {
+            //Here we want the line break to leave text here) on the next line
+            i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions);
+            CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the start of the word", static_cast<sal_Int32>(6), aResult.breakIndex);
+        }
+
+        {
+            //Here we want the line break to leave "here)" on the next line
+            i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions);
+            CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the start of the word", static_cast<sal_Int32>(11), aResult.breakIndex);
+        }
+    }
+
+    //See https://bugs.libreoffice.org/show_bug.cgi?id=49849
+    {
+        static constexpr OUStringLiteral aWord = u"\u05DE\u05D9\u05DC\u05D9\u05DD";
+        OUString aTest(aWord + " " + aWord);
+
+        aLocale.Language = "he";
+        aLocale.Country = "IL";
+
+        {
+            //Here we want the line break to happen at the whitespace
+            i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-1, aLocale, 0, aHyphOptions, aUserOptions);
+            CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the start of the word", aWord.getLength()+1, aResult.breakIndex);
+        }
+    }
+
+    //See https://bz.apache.org/ooo/show_bug.cgi?id=17155
+    {
+        aLocale.Language = "en";
+        aLocale.Country = "US";
+
+        {
+            //Here we want the line break to leave /bar/ba clumped together on the next line
+            i18n::LineBreakResults aResult = m_xBreak->getLineBreak("foo /bar/baz", strlen("foo /bar/ba"), aLocale, 0,
+                aHyphOptions, aUserOptions);
+            CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the first slash", static_cast<sal_Int32>(4), aResult.breakIndex);
+        }
+    }
+
+    //See https://bz.apache.org/ooo/show_bug.cgi?id=19716
+    {
+        aLocale.Language = "en";
+        aLocale.Country = "US";
+
+        {
+            OUString aTest("aaa]aaa");
+            //Here we want the line break to move the whole lot to the next line
+            i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-2, aLocale, 0,
+                aHyphOptions, aUserOptions);
+            CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the start of the line, not at ]", static_cast<sal_Int32>(0), aResult.breakIndex);
+        }
+    }
+
+    //this is an example sequence from tdf92993-1.docx caught by the load crashtesting
+    {
+        static constexpr OUStringLiteral aTest = u"\U0001f356\U0001f357\U0001f346"
+                                       "\U0001f364\u2668\ufe0f\U0001f3c6";
+
+        aLocale.Language = "en";
+        aLocale.Country = "US";
+
+        {
+            //This must not assert/crash
+            (void)m_xBreak->getLineBreak(aTest, 0, aLocale, 0, aHyphOptions, aUserOptions);
+        }
+    }
+
+    //See https://bugs.documentfoundation.org/show_bug.cgi?id=96197
+    {
+        static constexpr OUStringLiteral aTest = u"\uc560\uad6D\uac00\uc758 \uac00"
+                                       "\uc0ac\ub294";
+
+        aLocale.Language = "ko";
+        aLocale.Country = "KR";
+
+        {
+            i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-2, aLocale, 0,
+                aHyphOptions, aUserOptions);
+            CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break don't split the Korean word!", static_cast<sal_Int32>(5), aResult.breakIndex);
+        }
+    }
+}
+
+//See https://bugs.libreoffice.org/show_bug.cgi?id=49629
+void TestBreakIterator::testWordBoundaries()
+{
+    lang::Locale aLocale;
+    aLocale.Language = "en";
+    aLocale.Country = "US";
+
+    i18n::Boundary aBounds;
+
+    //See https://bz.apache.org/ooo/show_bug.cgi?id=11993
+    {
+        OUString aTest("abcd ef  ghi??? KLM");
+
+        CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD));
+        CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD));
+        aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, true);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.endPos);
+
+        CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD));
+        CPPUNIT_ASSERT(!m_xBreak->isEndWord(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD));
+
+        //next word
+        aBounds = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, true);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(12), aBounds.endPos);
+
+        //previous word
+        aBounds = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(7), aBounds.endPos);
+
+        CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD));
+        CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD));
+        aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD, true);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(12), aBounds.endPos);
+
+        CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD));
+        CPPUNIT_ASSERT(!m_xBreak->isEndWord(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD));
+        aBounds = m_xBreak->getWordBoundary(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD, true);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(19), aBounds.endPos);
+    }
+
+    //See https://bz.apache.org/ooo/show_bug.cgi?id=21907
+    {
+        OUString aTest("b a?");
+
+        CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 1, aLocale, i18n::WordType::ANY_WORD));
+        CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 2, aLocale, i18n::WordType::ANY_WORD));
+        CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 3, aLocale, i18n::WordType::ANY_WORD));
+
+        CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 3, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES));
+
+        CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 1, aLocale, i18n::WordType::ANY_WORD));
+        CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 2, aLocale, i18n::WordType::ANY_WORD));
+        CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 3, aLocale, i18n::WordType::ANY_WORD));
+
+        CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 3, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES));
+    }
+
+    //See https://bz.apache.org/ooo/show_bug.cgi?id=14904
+    {
+        static constexpr OUStringLiteral aTest =
+            u"Working \u201CWords"
+            " starting wit"
+            "h quotes\u201D Work"
+            "ing \u2018Broken\u2019 "
+            "?Spanish? doe"
+            "sn\u2019t work. No"
+            "t even \u00BFreal? "
+            "Spanish";
+
+        aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(7), aBounds.endPos);
+
+        aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(14), aBounds.endPos);
+
+        aBounds = m_xBreak->getWordBoundary(aTest, 40, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(37), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(44), aBounds.endPos);
+
+        aBounds = m_xBreak->getWordBoundary(aTest, 49, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(46), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(52), aBounds.endPos);
+
+        aBounds = m_xBreak->getWordBoundary(aTest, 58, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(55), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(62), aBounds.endPos);
+
+        aBounds = m_xBreak->getWordBoundary(aTest, 67, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(64), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(71), aBounds.endPos);
+
+        aBounds = m_xBreak->getWordBoundary(aTest, 90, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(88), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(92), aBounds.endPos);
+    }
+
+    //See https://bugs.libreoffice.org/show_bug.cgi?id=49629
+    sal_Unicode aBreakTests[] = { ' ', 1, 2, 3, 4, 5, 6, 7, 0x91, 0x92, 0x200B, 0xE8FF, 0xF8FF };
+    for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode)
+    {
+        //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary
+        for (size_t i = 0; i < SAL_N_ELEMENTS(aBreakTests); ++i)
+        {
+            OUString aTest = "Word" + OUStringChar(aBreakTests[i]) + "Word";
+            aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true);
+            switch (mode)
+            {
+                case i18n::WordType::ANY_WORD:
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.endPos);
+                    break;
+                case i18n::WordType::ANYWORD_IGNOREWHITESPACES:
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.endPos);
+                    break;
+                case i18n::WordType::DICTIONARY_WORD:
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.endPos);
+                    break;
+                case i18n::WordType::WORD_COUNT:
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.endPos);
+                    break;
+            }
+
+            CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, aBounds.startPos, aLocale, mode));
+            CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, aBounds.endPos, aLocale, mode));
+        }
+    }
+
+    sal_Unicode aJoinTests[] = { 'X', 0x200C, 0x200D, 0x2060, 0xFEFF, 0xFFF9, 0xFFFA, 0xFFFB };
+    for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode)
+    {
+        //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary
+        for (size_t i = 0; i < SAL_N_ELEMENTS(aJoinTests); ++i)
+        {
+            OUString aTest = "Word" + OUStringChar(aJoinTests[i]) + "Word";
+            aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true);
+            switch (mode)
+            {
+                case i18n::WordType::ANY_WORD:
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos);
+                    break;
+                case i18n::WordType::ANYWORD_IGNOREWHITESPACES:
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos);
+                    break;
+                case i18n::WordType::DICTIONARY_WORD:
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos);
+                    break;
+                case i18n::WordType::WORD_COUNT:
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+                    CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos);
+                    break;
+            }
+
+            CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, aBounds.startPos, aLocale, mode));
+            CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, aBounds.endPos, aLocale, mode));
+        }
+    }
+
+    //See https://bz.apache.org/ooo/show_bug.cgi?id=13494
+    {
+        const OUString aBase("xxAAxxBBxxCCxx");
+        const sal_Unicode aTests[] =
+        {
+            '\'', ';', ',', '.', '!', '@', '#', '%', '&', '*',
+            '(', ')', '_', '-', '{', '}', '[', ']', '\"', '/',
+            '\\', '?', '~', '$', '+', '^', '=', '<', '>', '|'
+        };
+
+        const sal_Int32 aDoublePositions[] = {0, 2, 4, 6, 8, 10, 12, 14};
+        for (size_t j = 0; j < SAL_N_ELEMENTS(aTests); ++j)
+        {
+            OUString aTest = aBase.replace('x', aTests[j]);
+            sal_Int32 nPos = -1;
+            size_t i = 0;
+            do
+            {
+                CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aDoublePositions));
+                nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
+                CPPUNIT_ASSERT_EQUAL(aDoublePositions[i], nPos);
+                ++i;
+            }
+            while (nPos < aTest.getLength());
+            nPos = aTest.getLength();
+            i = SAL_N_ELEMENTS(aDoublePositions)-1;
+            do
+            {
+                nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
+                --i;
+                CPPUNIT_ASSERT_EQUAL(aDoublePositions[i], nPos);
+            }
+            while (nPos > 0);
+        }
+
+        const sal_Int32 aSinglePositions[] = {0, 1, 3, 4, 6, 7, 9, 10};
+        for (size_t j = 1; j < SAL_N_ELEMENTS(aTests); ++j)
+        {
+            OUString aTest = aBase.replaceAll("xx", OUStringChar(aTests[j]));
+            sal_Int32 nPos = -1;
+            size_t i = 0;
+            do
+            {
+                CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aSinglePositions));
+                nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
+                CPPUNIT_ASSERT_EQUAL(aSinglePositions[i], nPos);
+                ++i;
+            }
+            while (nPos < aTest.getLength());
+            nPos = aTest.getLength();
+            i = SAL_N_ELEMENTS(aSinglePositions)-1;
+            do
+            {
+                nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
+                --i;
+                CPPUNIT_ASSERT_EQUAL(aSinglePositions[i], nPos);
+            }
+            while (nPos > 0);
+        }
+
+        const sal_Int32 aSingleQuotePositions[] = {0, 1, 9, 10};
+        CPPUNIT_ASSERT_EQUAL(u'\'', aTests[0]);
+        {
+            OUString aTest = aBase.replaceAll("xx", OUStringChar(aTests[0]));
+            sal_Int32 nPos = -1;
+            size_t i = 0;
+            do
+            {
+                CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aSingleQuotePositions));
+                nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
+                CPPUNIT_ASSERT_EQUAL(aSingleQuotePositions[i], nPos);
+                ++i;
+            }
+            while (nPos < aTest.getLength());
+            nPos = aTest.getLength();
+            i = SAL_N_ELEMENTS(aSingleQuotePositions)-1;
+            do
+            {
+                nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
+                --i;
+                CPPUNIT_ASSERT_EQUAL(aSingleQuotePositions[i], nPos);
+            }
+            while (nPos > 0);
+        }
+    }
+
+    //See https://bz.apache.org/ooo/show_bug.cgi?id=13451
+    {
+        aLocale.Language = "ca";
+        aLocale.Country = "ES";
+
+        OUString aTest("mirar-se comprar-vos donem-nos les mans aneu-vos-en!");
+
+        sal_Int32 nPos = 0;
+        sal_Int32 aExpected[] = {8, 20, 30, 34, 39, 51, 52};
+        size_t i = 0;
+        do
+        {
+            CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
+            nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
+                i18n::WordType::DICTIONARY_WORD, true).endPos;
+            CPPUNIT_ASSERT_EQUAL(aExpected[i], nPos);
+            ++i;
+        }
+        while (nPos++ < aTest.getLength());
+        CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected), i);
+    }
+
+    //See https://bz.apache.org/ooo/show_bug.cgi?id=85411
+    for (int j = 0; j < 3; ++j)
+    {
+        switch (j)
+        {
+            case 0:
+                aLocale.Language = "en";
+                aLocale.Country = "US";
+                break;
+            case 1:
+                aLocale.Language = "ca";
+                aLocale.Country = "ES";
+                break;
+            case 2:
+                aLocale.Language = "fi";
+                aLocale.Country = "FI";
+                break;
+            default:
+                CPPUNIT_ASSERT(false);
+                break;
+        }
+
+        static constexpr OUStringLiteral aTest =
+            u"I\u200Bwant\u200Bto\u200Bgo";
+
+        sal_Int32 nPos = 0;
+        sal_Int32 aExpected[] = {1, 6, 9, 12};
+        size_t i = 0;
+        do
+        {
+            CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
+            nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
+                i18n::WordType::DICTIONARY_WORD, true).endPos;
+            CPPUNIT_ASSERT_EQUAL(aExpected[i], nPos);
+            ++i;
+        }
+        while (nPos++ < aTest.getLength());
+        CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected), i);
+    }
+
+    //https://bz.apache.org/ooo/show_bug.cgi?id=21290
+    for (int j = 0; j < 2; ++j)
+    {
+        switch (j)
+        {
+            case 0:
+                aLocale.Language = "en";
+                aLocale.Country = "US";
+                break;
+            case 1:
+                aLocale.Language = "grc";
+                aLocale.Country.clear();
+                break;
+            default:
+                CPPUNIT_ASSERT(false);
+                break;
+        }
+
+        static constexpr OUStringLiteral aTest =
+            u"\u1F0C\u03BD\u03B4\u03C1\u03B1 \u1F00"
+            "\u03C1\u03BD\u1F7B\u03BC\u03B5\u03BD\u03BF"
+            "\u03C2 \u1F00\u03BB\u03BB \u1F24"
+            "\u03C3\u03B8\u03B9\u03BF\u03BD";
+
+        sal_Int32 nPos = 0;
+        sal_Int32 aExpected[] = {5, 15, 19, 26};
+        size_t i = 0;
+        do
+        {
+            CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
+            nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
+                i18n::WordType::DICTIONARY_WORD, true).endPos;
+            CPPUNIT_ASSERT_EQUAL(aExpected[i], nPos);
+            ++i;
+        }
+        while (nPos++ < aTest.getLength());
+        CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected), i);
+    }
+
+    //See https://bz.apache.org/ooo/show_bug.cgi?id=58513
+    //See https://bugs.libreoffice.org/show_bug.cgi?id=55707
+    {
+        aLocale.Language = "fi";
+        aLocale.Country = "FI";
+
+        OUString aTest("Kuorma-auto kaakkois- ja Keski-Suomi USA:n 90:n %:n");
+
+        {
+            sal_Int32 nPos = 0;
+            sal_Int32 aExpected[] = {11, 21, 24, 36, 42, 47, 51};
+            size_t i = 0;
+            do
+            {
+                CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
+                nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
+                    i18n::WordType::WORD_COUNT, true).endPos;
+                CPPUNIT_ASSERT_EQUAL(aExpected[i], nPos);
+                ++i;
+            }
+            while (nPos++ < aTest.getLength());
+            CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected), i);
+        }
+
+        {
+            sal_Int32 nPos = 0;
+            sal_Int32 aExpected[] = {0, 11, 12, 20, 22, 24, 25, 36, 37,
+                                    40, 41, 42, 43, 45, 46, 47, 50, 51};
+            size_t i = 0;
+            do
+            {
+                CPPUNIT_ASSERT(i < SAL_N_ELEMENTS(aExpected));
+                aBounds = m_xBreak->getWordBoundary(aTest, nPos, aLocale,
+                    i18n::WordType::DICTIONARY_WORD, true);
+                CPPUNIT_ASSERT_EQUAL(aExpected[i], aBounds.startPos);
+                ++i;
+                CPPUNIT_ASSERT_EQUAL(aExpected[i], aBounds.endPos);
+                ++i;
+                nPos = aBounds.endPos;
+            }
+            while (nPos++ < aTest.getLength());
+            CPPUNIT_ASSERT_EQUAL(SAL_N_ELEMENTS(aExpected), i);
+        }
+    }
+
+    //See https://bz.apache.org/ooo/show_bug.cgi?id=107843
+    {
+        aLocale.Language = "en";
+        aLocale.Country = "US";
+
+        static constexpr OUStringLiteral aTest =
+            u"ru\uFB00le \uFB01sh";
+
+        aBounds = m_xBreak->getWordBoundary(aTest, 1, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos);
+
+        aBounds = m_xBreak->getWordBoundary(aTest, 7, aLocale, i18n::WordType::DICTIONARY_WORD, false);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos);
+    }
+
+    //See https://bz.apache.org/ooo/show_bug.cgi?id=113785
+    {
+        aLocale.Language = "en";
+        aLocale.Country = "US";
+
+        static constexpr OUStringLiteral aTest =
+            u"a\u2013b\u2014c";
+
+        aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD, true);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aBounds.endPos);
+
+        aBounds = m_xBreak->nextWord(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aBounds.endPos);
+
+        aBounds = m_xBreak->nextWord(aTest, aBounds.endPos, aLocale, i18n::WordType::DICTIONARY_WORD);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos);
+    }
+}
+
+//See https://bugs.libreoffice.org/show_bug.cgi?id=40292
+//See https://bz.apache.org/ooo/show_bug.cgi?id=80412
+//See https://bz.apache.org/ooo/show_bug.cgi?id=111152
+//See https://bz.apache.org/ooo/show_bug.cgi?id=50172
+void TestBreakIterator::testGraphemeIteration()
+{
+    lang::Locale aLocale;
+    aLocale.Language = "bn";
+    aLocale.Country = "IN";
+
+    {
+        static constexpr OUStringLiteral aTest = u"\u09AC\u09CD\u09AF"; // BA HALANT LA
+
+        sal_Int32 nDone=0;
+        sal_Int32 nPos;
+        nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
+            i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", aTest.getLength(), nPos);
+        nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale,
+            i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32>(0), nPos);
+    }
+
+    {
+        static constexpr OUStringLiteral aTest = u"\u09B9\u09CD\u09A3\u09BF";
+            // HA HALANT NA VOWELSIGNI
+
+        sal_Int32 nDone=0;
+        sal_Int32 nPos;
+        nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
+            i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", aTest.getLength(), nPos);
+        nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale,
+            i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32>(0), nPos);
+    }
+
+    {
+        static constexpr OUStringLiteral aTest = u"\u09A4\u09CD\u09AE\u09CD\u09AF";
+            // TA HALANT MA HALANT YA
+
+        sal_Int32 nDone=0;
+        sal_Int32 nPos;
+        nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
+            i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", aTest.getLength(), nPos);
+        nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale,
+            i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32>(0), nPos);
+    }
+
+    aLocale.Language = "ta";
+    aLocale.Country = "IN";
+
+    {
+        static constexpr OUStringLiteral aTest = u"\u0B9A\u0BBF\u0BA4\u0BCD\u0BA4\u0BBF\u0BB0\u0BC8"; // CA VOWELSIGNI TA VIRAMA TA VOWELSIGNI RA VOWELSIGNAI
+
+        sal_Int32 nDone=0;
+        sal_Int32 nPos = 0;
+
+        nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32>(2), nPos);
+        nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32>(4), nPos);
+        nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32>(6), nPos);
+        nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", aTest.getLength(), nPos);
+        nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale,
+            i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32>(6), nPos);
+        nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32>(4), nPos);
+        nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32>(2), nPos);
+        nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32>(0), nPos);
+    }
+
+    {
+        static constexpr OUStringLiteral aTest = u"\u0B95\u0BC1"; // KA VOWELSIGNU
+
+        sal_Int32 nDone=0;
+        sal_Int32 nPos = 0;
+
+        nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
+            i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", aTest.getLength(), nPos);
+        nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale,
+            i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32>(0), nPos);
+    }
+
+    {
+        static constexpr OUStringLiteral aTest =
+            u"\u0B9A\u0BBF\u0BA4\u0BCD\u0BA4\u0BBF\u0BB0\u0BC8";
+            // CA VOWELSIGNI TA VIRAMA TA VOWELSIGNI RA VOWELSIGNAI
+
+        sal_Int32 nDone=0;
+        sal_Int32 nPos=0;
+
+        for (sal_Int32 i = 0; i < 4; ++i)
+        {
+            sal_Int32 nOldPos = nPos;
+            nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale,
+                i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+            CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip 2 units", nOldPos+2, nPos);
+        }
+
+        for (sal_Int32 i = 0; i < 4; ++i)
+        {
+            sal_Int32 nOldPos = nPos;
+            nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale,
+                i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+            CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip 2 units", nOldPos-2, nPos);
+        }
+    }
+
+    {
+        static constexpr OUStringLiteral aText = u"\u05D0\u05B8"; // ALEF QAMATS
+
+        sal_Int32 nGraphemeCount = 0;
+
+        sal_Int32 nCurPos = 0;
+        while (nCurPos < aText.getLength())
+        {
+            sal_Int32 nCount2 = 1;
+            nCurPos = m_xBreak->nextCharacters(aText, nCurPos, lang::Locale(),
+                i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2);
+            ++nGraphemeCount;
+        }
+
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be considered 1 grapheme", static_cast<sal_Int32>(1), nGraphemeCount);
+    }
+
+    aLocale.Language = "hi";
+    aLocale.Country = "IN";
+
+    {
+        static constexpr OUStringLiteral aTest = u"\u0936\u0940"; // SHA VOWELSIGNII
+
+        sal_Int32 nDone=0;
+        sal_Int32 nPos = 0;
+
+        nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
+            i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", aTest.getLength(), nPos);
+        nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale,
+            i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast<sal_Int32>(0), nPos);
+    }
+}
+
+//A test to ensure that certain ranges and codepoints that are categorized as
+//weak remain as weak, so that existing docs that depend on this don't silently
+//change font for those weak chars
+void TestBreakIterator::testWeak()
+{
+    lang::Locale aLocale;
+    aLocale.Language = "en";
+    aLocale.Country = "US";
+
+    {
+        static constexpr OUStringLiteral aWeaks =
+            u"\u0001\u0002"
+            " \u00A0"
+            "\u0300\u036F"  //Combining Diacritical Marks
+            "\u1AB0\u1AFF"  //Combining Diacritical Marks Extended
+            "\u1DC0\u1DFF"  //Combining Diacritical Marks Supplement
+            "\u20D0\u20FF"  //Combining Diacritical Marks for Symbols
+            "\u2150\u215F"  //Number Forms, fractions
+            "\u2160\u2180"  //Number Forms, roman numerals
+            "\u2200\u22FF"  //Mathematical Operators
+            "\u27C0\u27EF"  //Miscellaneous Mathematical Symbols-A
+            "\u2980\u29FF"  //Miscellaneous Mathematical Symbols-B
+            "\u2A00\u2AFF"  //Supplemental Mathematical Operators
+            "\u2100\u214F"  //Letterlike Symbols
+            "\u2308\u230B"  //Miscellaneous technical
+            "\u25A0\u25FF"  //Geometric Shapes
+            "\u2B30\u2B4C"; //Miscellaneous Symbols and Arrows
+
+        for (sal_Int32 i = 0; i < aWeaks.getLength(); ++i)
+        {
+            sal_Int16 nScript = m_xBreak->getScriptType(aWeaks, i);
+            OString aMsg =
+                "Char 0x" +
+                OString::number(static_cast<sal_Int32>(std::u16string_view(aWeaks)[i]), 16) +
+                " should have been weak";
+            CPPUNIT_ASSERT_EQUAL_MESSAGE(aMsg.getStr(),
+                i18n::ScriptType::WEAK, nScript);
+        }
+    }
+}
+
+//A test to ensure that certain ranges and codepoints that are categorized as
+//asian remain as asian, so that existing docs that depend on this don't silently
+//change font for those asian chars.
+//See https://bugs.libreoffice.org/show_bug.cgi?id=38095
+void TestBreakIterator::testAsian()
+{
+    lang::Locale aLocale;
+    aLocale.Language = "en";
+    aLocale.Country = "US";
+
+    {
+        static constexpr OUStringLiteral aAsians =
+            //some typical CJK chars
+            u"\u4E00\u62FF"
+            //The full HalfWidth and FullWidth block has historically been
+            //designated as taking the CJK font :-(
+            //HalfWidth and FullWidth forms of ASCII 0-9, categorized under
+            //UAX24 as "Common" i.e. by that logic WEAK
+            "\uFF10\uFF19"
+            //HalfWidth and FullWidth forms of ASCII A-z, categorized under
+            //UAX25 as "Latin", i.e. by that logic LATIN
+            "\uFF21\uFF5A";
+
+        for (sal_Int32 i = 0; i < aAsians.getLength(); ++i)
+        {
+            sal_Int16 nScript = m_xBreak->getScriptType(aAsians, i);
+            OString aMsg =
+                "Char 0x" +
+                OString::number(static_cast<sal_Int32>(std::u16string_view(aAsians)[i]), 16) +
+                " should have been asian";
+            CPPUNIT_ASSERT_EQUAL_MESSAGE(aMsg.getStr(),
+                i18n::ScriptType::ASIAN, nScript);
+        }
+    }
+}
+
+#if (U_ICU_VERSION_MAJOR_NUM > 51)
+//A test to ensure that our Lao word boundary detection is useful
+void TestBreakIterator::testLao()
+{
+    lang::Locale aLocale;
+    aLocale.Language = "lo";
+    aLocale.Country = "LA";
+
+    static constexpr OUStringLiteral aTest = u"\u0e8d\u0eb4\u0e99\u0e94\u0eb5\u0e95\u0ec9\u0ead\u0e99\u0eae\u0eb1\u0e9a";
+    i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
+        i18n::WordType::DICTIONARY_WORD, true);
+
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos);
+
+    aBounds = m_xBreak->getWordBoundary(aTest, aBounds.endPos, aLocale,
+        i18n::WordType::DICTIONARY_WORD, true);
+
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.startPos);
+#if (U_ICU_VERSION_MAJOR_NUM < 70)
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos);
+#else
+    // FIXME:
+    // In ICU 70/71 for yet unknown reason the word boundary 9 is not detected and
+    // instead the length 12 is returned as endpos.
+    // Deep in
+    // icu_70::RuleBasedBreakIterator::BreakCache::next()
+    // icu_70::RuleBasedBreakIterator::BreakCache::following()
+    // icu_70::RuleBasedBreakIterator::following()
+    // i18npool::BreakIterator_Unicode::getWordBoundary()
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(12), aBounds.endPos);
+#endif
+}
+#endif
+
+//A test to ensure that our thai word boundary detection is useful
+void TestBreakIterator::testThai()
+{
+    lang::Locale aLocale;
+    aLocale.Language = "th";
+    aLocale.Country = "TH";
+
+    //See http://lists.freedesktop.org/archives/libreoffice/2012-February/025959.html
+    {
+        static constexpr OUStringLiteral aTest = u"\u0E01\u0E38\u0E2B\u0E25\u0E32\u0E1A";
+        i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
+            i18n::WordType::DICTIONARY_WORD, true);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full word",
+            sal_Int32(0), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full word",
+            aTest.getLength(), aBounds.endPos);
+    }
+
+    //See https://bz.apache.org/ooo/show_bug.cgi?id=29548
+    //make sure forwards and back are consistent
+    {
+        static constexpr OUStringLiteral aTest =
+            u"\u0E2D\u0E38\u0E17\u0E22\u0E32\u0E19\u0E41"
+            "\u0E2B\u0E48\u0E07\u0E0A\u0E32\u0E15\u0E34"
+            "\u0E19\u0E49\u0E33\u0E2B\u0E19\u0E32\u0E27"
+            "\u0E2D\u0E38\u0E17\u0E22\u0E32\u0E19\u0E41"
+            "\u0E2B\u0E48\u0E07\u0E0A\u0E32\u0E15\u0E34"
+            "\u0E19\u0E49\u0E33\u0E2B\u0E19\u0E32\u0E27";
+
+        std::stack<sal_Int32> aPositions;
+        sal_Int32 nPos = -1;
+        do
+        {
+            nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
+            aPositions.push(nPos);
+        }
+        while (nPos < aTest.getLength());
+        nPos = aTest.getLength();
+        CPPUNIT_ASSERT(!aPositions.empty());
+        aPositions.pop();
+        do
+        {
+            CPPUNIT_ASSERT(!aPositions.empty());
+            nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos;
+            CPPUNIT_ASSERT_EQUAL(aPositions.top(), nPos);
+            aPositions.pop();
+        }
+        while (nPos > 0);
+    }
+
+    // tdf#113694
+    {
+        static constexpr OUStringLiteral aTest = u"\U00010000";
+
+        sal_Int32 nDone=0;
+        sal_Int32 nPos;
+
+        nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
+            i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", aTest.getLength(), nPos);
+        nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale,
+            i18n::CharacterIteratorMode::SKIPCELL, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast<sal_Int32>(0), nPos);
+
+        nPos = m_xBreak->nextCharacters(aTest, 0, aLocale,
+            i18n::CharacterIteratorMode::SKIPCHARACTER, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", aTest.getLength(), nPos);
+        nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale,
+            i18n::CharacterIteratorMode::SKIPCHARACTER, 1, nDone);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast<sal_Int32>(0), nPos);
+    }
+}
+
+#ifdef TODO
+void TestBreakIterator::testNorthernThai()
+{
+    lang::Locale aLocale;
+    aLocale.Language = "nod";
+    aLocale.Country = "TH";
+
+    const sal_Unicode NORTHERN_THAI1[] = { 0x0E01, 0x0E38, 0x0E4A, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A };
+    OUString aTest(NORTHERN_THAI1, SAL_N_ELEMENTS(NORTHERN_THAI1));
+    i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
+        i18n::WordType::DICTIONARY_WORD, true);
+    CPPUNIT_ASSERT_MESSAGE("Should skip full word",
+        aBounds.startPos == 0 && aBounds.endPos == aTest.getLength());
+}
+
+// Not sure if any version earlier than 49 did have Khmer word boundary
+// dictionaries, 4.6 does not.
+
+// As of icu 54, word boundary detection for Khmer is still considered
+// insufficient, so icu khmer stuff is disabled
+
+//A test to ensure that our khmer word boundary detection is useful
+//https://bugs.libreoffice.org/show_bug.cgi?id=52020
+void TestBreakIterator::testKhmer()
+{
+    lang::Locale aLocale;
+    aLocale.Language = "km";
+    aLocale.Country = "KH";
+
+    const sal_Unicode KHMER[] = { 0x17B2, 0x17D2, 0x1799, 0x1782, 0x17C1 };
+
+    OUString aTest(KHMER, SAL_N_ELEMENTS(KHMER));
+    i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale,
+        i18n::WordType::DICTIONARY_WORD, true);
+
+    CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 3);
+
+    aBounds = m_xBreak->getWordBoundary(aTest, aBounds.endPos, aLocale,
+        i18n::WordType::DICTIONARY_WORD, true);
+
+    CPPUNIT_ASSERT(aBounds.startPos == 3 && aBounds.endPos == 5);
+}
+#endif
+
+void TestBreakIterator::doTestJapanese(uno::Reference< i18n::XBreakIterator > const &xBreak)
+{
+    lang::Locale aLocale;
+    aLocale.Language = "ja";
+    aLocale.Country = "JP";
+    i18n::Boundary aBounds;
+
+    {
+        static constexpr OUStringLiteral aTest = u"\u30B7\u30E3\u30C3\u30C8\u30C0\u30A6\u30F3";
+
+        aBounds = xBreak->getWordBoundary(aTest, 5, aLocale,
+            i18n::WordType::DICTIONARY_WORD, true);
+
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(7), aBounds.endPos);
+    }
+
+    {
+        static constexpr OUStringLiteral aTest = u"\u9EBB\u306E\u8449\u9EBB\u306E\u8449";
+
+        aBounds = xBreak->getWordBoundary(aTest, 1, aLocale,
+            i18n::WordType::DICTIONARY_WORD, true);
+
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aBounds.endPos);
+
+        aBounds = xBreak->getWordBoundary(aTest, 5, aLocale,
+            i18n::WordType::DICTIONARY_WORD, true);
+
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.endPos);
+    }
+}
+
+void TestBreakIterator::testJapanese()
+{
+    doTestJapanese(m_xBreak);
+
+    // fdo#78479 - test second / cached instantiation of xdictionary
+    uno::Reference< i18n::XBreakIterator > xTmpBreak(m_xSFactory->createInstance(
+        "com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW);
+
+    doTestJapanese(xTmpBreak);
+}
+
+void TestBreakIterator::testChinese()
+{
+    lang::Locale aLocale;
+    aLocale.Language = "zh";
+    aLocale.Country = "CN";
+
+    {
+        static constexpr OUStringLiteral aTest = u"\u6A35\u6A30\u69FE\u8919\U00029EDB";
+
+        i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale,
+            i18n::WordType::DICTIONARY_WORD, true);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.startPos);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.endPos);
+    }
+}
+void TestBreakIterator::setUp()
+{
+    BootstrapFixtureBase::setUp();
+    m_xBreak.set(m_xSFactory->createInstance("com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW);
+}
+
+void TestBreakIterator::tearDown()
+{
+    m_xBreak.clear();
+    BootstrapFixtureBase::tearDown();
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(TestBreakIterator);
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/qa/cppunit/test_calendar.cxx b/i18npool/qa/cppunit/test_calendar.cxx
new file mode 100644
index 000000000..a522a4a74
--- /dev/null
+++ b/i18npool/qa/cppunit/test_calendar.cxx
@@ -0,0 +1,130 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+#include <calendar_hijri.hxx>
+#include <unotest/bootstrapfixturebase.hxx>
+
+using namespace com::sun::star;
+
+class TestCalendar : public test::BootstrapFixtureBase
+{
+public:
+    void testHijriGregorian();
+    void testGetGregorianJulianDay();
+
+    CPPUNIT_TEST_SUITE(TestCalendar);
+    CPPUNIT_TEST(testHijriGregorian);
+    CPPUNIT_TEST(testGetGregorianJulianDay);
+    CPPUNIT_TEST_SUITE_END();
+};
+
+void TestCalendar::testHijriGregorian()
+{
+    // 21-7-1443 (Hijri) == 22-2-2022 (Gregorian)
+    sal_Int32 day = 22, month = 2, year = 2022;
+    i18npool::Calendar_hijri::getHijri(&day, &month, &year);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(21), day);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(7), month);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1443), year);
+
+    i18npool::Calendar_hijri::ToGregorian(&day, &month, &year);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(22), day);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2), month);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2022), year);
+
+    // 1-1-1 (Hijri) == 15-7-622 (Gregorian)
+    // NOTE: The calculated date is 15-7-622, as it was with the
+    // previous version of i18npool::Calendar_hijri::ToGregorian()
+    // but in some articles, 15-7-622 is considered the equivalent date
+    // https://en.wikipedia.org/wiki/622
+    // This article states that 15-7-622 is correct:
+    // "On the Origins of the Hijrī Calendar: A Multi-Faceted Perspective
+    // Based on the Covenants of the Prophet and Specific Date Verification"
+    // https://www.mdpi.com/2077-1444/12/1/42/htm
+    day = 15;
+    month = 7;
+    year = 622;
+    i18npool::Calendar_hijri::getHijri(&day, &month, &year);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), day);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), year);
+
+    i18npool::Calendar_hijri::ToGregorian(&day, &month, &year);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(15), day);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(7), month);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(622), year);
+
+    // 1-1-100 (Hijri) == 2-8-718 (Gregorian)
+    // https://habibur.com/hijri/100/
+    day = 2;
+    month = 8;
+    year = 718;
+    i18npool::Calendar_hijri::getHijri(&day, &month, &year);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), day);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(100), year);
+
+    i18npool::Calendar_hijri::ToGregorian(&day, &month, &year);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2), day);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(8), month);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(718), year);
+
+    // 1-1-1000 (Hijri) == 19-10-1591 (Gregorian)
+    // NOTE: The calculated date is 18-10-1591, but there is inconsistency
+    // with this website, as it states it should be 19-10-1591
+    // https://habibur.com/hijri/1000/
+    day = 18;
+    month = 10;
+    year = 1591;
+    i18npool::Calendar_hijri::getHijri(&day, &month, &year);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), day);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1000), year);
+
+    i18npool::Calendar_hijri::ToGregorian(&day, &month, &year);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(18), day);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(10), month);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1591), year);
+
+    // 1-1-2000 (Hijri) == 7-1-2562 (Gregorian)
+    // NOTE: The calculated date is 7-1-2562, but there is inconsistency
+    // with this website, as it states it should be 8-1-2562
+    // https://habibur.com/hijri/2000/
+    day = 7;
+    month = 1;
+    year = 2562;
+    i18npool::Calendar_hijri::getHijri(&day, &month, &year);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), day);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2000), year);
+
+    i18npool::Calendar_hijri::ToGregorian(&day, &month, &year);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(7), day);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2562), year);
+}
+
+void TestCalendar::testGetGregorianJulianDay()
+{
+    // Julian day for 22-2-2022 (Gregorian) == 2459633
+    // https://core2.gsfc.nasa.gov/time/julian.html
+    sal_Int32 lJulianDay, day = 22, month = 2, year = 2022;
+    lJulianDay = i18npool::Calendar_hijri::getJulianDay(day, month, year);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2459633), lJulianDay);
+
+    i18npool::Calendar_hijri::getGregorianDay(lJulianDay, &day, &month, &year);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(22), day);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2), month);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2022), year);
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(TestCalendar);
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
diff --git a/i18npool/qa/cppunit/test_characterclassification.cxx b/i18npool/qa/cppunit/test_characterclassification.cxx
new file mode 100644
index 000000000..846477d61
--- /dev/null
+++ b/i18npool/qa/cppunit/test_characterclassification.cxx
@@ -0,0 +1,107 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <com/sun/star/i18n/XCharacterClassification.hpp>
+#include <o3tl/cppunittraitshelper.hxx>
+#include <unotest/bootstrapfixturebase.hxx>
+
+using namespace ::com::sun::star;
+
+class TestCharacterClassification : public test::BootstrapFixtureBase
+{
+public:
+    virtual void setUp() override;
+    virtual void tearDown() override;
+
+    void testTitleCase();
+    void testStringType();
+
+    CPPUNIT_TEST_SUITE(TestCharacterClassification);
+    CPPUNIT_TEST(testTitleCase);
+    CPPUNIT_TEST(testStringType);
+    CPPUNIT_TEST_SUITE_END();
+private:
+    uno::Reference<i18n::XCharacterClassification> m_xCC;
+};
+
+//A test to ensure that our Title Case functionality is working
+//http://lists.freedesktop.org/archives/libreoffice/2012-June/032767.html
+//https://bz.apache.org/ooo/show_bug.cgi?id=30863
+void TestCharacterClassification::testTitleCase()
+{
+    lang::Locale aLocale;
+    aLocale.Language = "en";
+    aLocale.Country = "US";
+
+    {
+        //basic example
+        OUString sTest("Some text");
+        OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), aLocale);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString("Some Text"), sTitleCase);
+        OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), aLocale);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", OUString("SOME TEXT"), sUpperCase);
+        OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), aLocale);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower ", OUString("some text"), sLowerCase);
+    }
+
+    {
+        //tricky one
+        static constexpr OUStringLiteral aTest = u"\u01F3"; // LATIN SMALL LETTER DZ
+        OUString sTitleCase = m_xCC->toTitle(aTest, 0, aTest.getLength(), aLocale);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", sal_Int32(1), sTitleCase.getLength());
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u'\u01F2', sTitleCase[0]);
+        OUString sUpperCase = m_xCC->toUpper(aTest, 0, aTest.getLength(), aLocale);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sal_Int32(1), sUpperCase.getLength());
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u'\u01F1', sUpperCase[0]);
+        OUString sLowerCase = m_xCC->toLower(aTest, 0, aTest.getLength(), aLocale);
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower ", sal_Int32(1), sLowerCase.getLength());
+        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower ", u'\u01F3', sLowerCase[0]);
+    }
+}
+
+//https://bugs.libreoffice.org/show_bug.cgi?id=69641
+void TestCharacterClassification::testStringType()
+{
+    lang::Locale aLocale;
+    aLocale.Language = "en";
+    aLocale.Country = "US";
+
+    {
+        //simple case
+        OUString sTest("Some text");
+        sal_Int32 nResult = m_xCC->getStringType(sTest, 0, sTest.getLength(), aLocale);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(230), nResult);
+    }
+
+    {
+        //tricky case
+        static constexpr OUStringLiteral sTest = u"\U0001D703"; // MATHEMATICAL ITALIC SMALL THETA
+        sal_Int32 nResult = m_xCC->getStringType(sTest, 0, sTest.getLength(), aLocale);
+        CPPUNIT_ASSERT_EQUAL(sal_Int32(228), nResult);
+    }
+
+}
+
+void TestCharacterClassification::setUp()
+{
+    BootstrapFixtureBase::setUp();
+    m_xCC.set(m_xSFactory->createInstance("com.sun.star.i18n.CharacterClassification"), uno::UNO_QUERY_THROW);
+}
+
+void TestCharacterClassification::tearDown()
+{
+    BootstrapFixtureBase::tearDown();
+    m_xCC.clear();
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(TestCharacterClassification);
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/qa/cppunit/test_defaultnumberingprovider.cxx b/i18npool/qa/cppunit/test_defaultnumberingprovider.cxx
new file mode 100644
index 000000000..1bf0ab521
--- /dev/null
+++ b/i18npool/qa/cppunit/test_defaultnumberingprovider.cxx
@@ -0,0 +1,470 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <test/bootstrapfixture.hxx>
+
+#include <com/sun/star/style/NumberingType.hpp>
+#include <com/sun/star/text/DefaultNumberingProvider.hpp>
+#include <com/sun/star/text/XNumberingFormatter.hpp>
+#include <com/sun/star/text/XNumberingTypeInfo.hpp>
+
+#include <comphelper/propertyvalue.hxx>
+
+#include <unordered_map>
+
+using namespace ::com::sun::star;
+
+/// i18npool defaultnumberingprovider tests.
+class I18npoolDefaultnumberingproviderTest : public test::BootstrapFixture
+{
+};
+
+CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testNumberingIdentifiers)
+{
+    // All numbering identifiers must be unique.
+    std::unordered_map<OUString, sal_Int16> aMap;
+    std::vector<OString> aFail;
+
+    uno::Reference<text::XNumberingTypeInfo> xFormatter(
+        text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY);
+
+    // Do not use getSupportedNumberingTypes() because it depends on
+    // configuration whether CTL and CJK numberings are included or not.
+    // Also do not test for known values of
+    // offapi/com/sun/star/style/NumberingType.idl and miss newly added values.
+    // Instead, enumerate until an empty ID is returned but also check there
+    // are at least the known NumberingType values covered, just in case the
+    // table wasn't maintained. So this may have to be adapted from time to
+    // time.
+    constexpr sal_Int16 kLastKnown = css::style::NumberingType::NUMBER_LEGAL_KO;
+    for (sal_Int16 i = 0; i < SAL_MAX_INT16; ++i)
+    {
+        OUString aID(xFormatter->getNumberingIdentifier(i));
+        if (aID.isEmpty() && i > kLastKnown)
+            break; // for
+
+        switch (i)
+        {
+            case css::style::NumberingType::TRANSLITERATION:
+                // TODO: why does this have no identifier?
+            case css::style::NumberingType::NUMBER_UPPER_KO:
+                // FIXME: duplicate of NUMBER_UPPER_ZH_TW
+            case css::style::NumberingType::NUMBER_INDIC_DEVANAGARI:
+                // FIXME: duplicate of NUMBER_EAST_ARABIC_INDIC
+                break;
+            default:
+                if (aID.isEmpty() || !aMap.insert(std::pair(aID, i)).second)
+                {
+                    aFail.emplace_back(
+                        "Numbering: " + OString::number(i) + " \"" + aID.toUtf8() + "\""
+                        + (aID.isEmpty() ? ""
+                                         : OString(" duplicate of " + OString::number(aMap[aID])))
+                        + "\n");
+                }
+        }
+    }
+
+    if (!aFail.empty())
+    {
+        OString aMsg("Not unique numbering identifiers:\n");
+        for (auto const& r : aFail)
+            aMsg += r;
+        CPPUNIT_ASSERT_MESSAGE(aMsg.getStr(), false);
+    }
+}
+
+CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testArabicZero)
+{
+    // 1 -> "01"
+    uno::Reference<text::XNumberingFormatter> xFormatter(
+        text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY);
+    uno::Sequence<beans::PropertyValue> aProperties = {
+        comphelper::makePropertyValue("NumberingType",
+                                      static_cast<sal_uInt16>(style::NumberingType::ARABIC_ZERO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(1)),
+    };
+    lang::Locale aLocale;
+    OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    // Without the accompanying fix in place, this test would have failed with a
+    // lang.IllegalArgumentException, support for ARABIC_ZERO was missing.
+    CPPUNIT_ASSERT_EQUAL(OUString("01"), aActual);
+
+    // 10 -> "10"
+    aProperties = {
+        comphelper::makePropertyValue("NumberingType",
+                                      static_cast<sal_uInt16>(style::NumberingType::ARABIC_ZERO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(10)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString("10"), aActual);
+}
+
+CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testArabicZero3)
+{
+    // 10 -> "010"
+    uno::Reference<text::XNumberingFormatter> xFormatter(
+        text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY);
+    uno::Sequence<beans::PropertyValue> aProperties = {
+        comphelper::makePropertyValue("NumberingType",
+                                      static_cast<sal_uInt16>(style::NumberingType::ARABIC_ZERO3)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(10)),
+    };
+    lang::Locale aLocale;
+    OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    // Without the accompanying fix in place, this test would have failed with a
+    // lang.IllegalArgumentException, support for ARABIC_ZERO3 was missing.
+    CPPUNIT_ASSERT_EQUAL(OUString("010"), aActual);
+
+    // 100 -> "100"
+    aProperties = {
+        comphelper::makePropertyValue("NumberingType",
+                                      static_cast<sal_uInt16>(style::NumberingType::ARABIC_ZERO3)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(100)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString("100"), aActual);
+}
+
+CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testArabicZero4)
+{
+    // 100 -> "0100"
+    uno::Reference<text::XNumberingFormatter> xFormatter(
+        text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY);
+    uno::Sequence<beans::PropertyValue> aProperties = {
+        comphelper::makePropertyValue("NumberingType",
+                                      static_cast<sal_uInt16>(style::NumberingType::ARABIC_ZERO4)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(100)),
+    };
+    lang::Locale aLocale;
+    OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    // Without the accompanying fix in place, this test would have failed with a
+    // lang.IllegalArgumentException, support for ARABIC_ZERO4 was missing.
+    CPPUNIT_ASSERT_EQUAL(OUString("0100"), aActual);
+
+    // 1000 -> "1000"
+    aProperties = {
+        comphelper::makePropertyValue("NumberingType",
+                                      static_cast<sal_uInt16>(style::NumberingType::ARABIC_ZERO4)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(1000)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString("1000"), aActual);
+}
+
+CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testArabicZero5)
+{
+    // 1000 -> "01000"
+    uno::Reference<text::XNumberingFormatter> xFormatter(
+        text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY);
+    uno::Sequence<beans::PropertyValue> aProperties = {
+        comphelper::makePropertyValue("NumberingType",
+                                      static_cast<sal_uInt16>(style::NumberingType::ARABIC_ZERO5)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(1000)),
+    };
+    lang::Locale aLocale;
+    OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    // Without the accompanying fix in place, this test would have failed with a
+    // lang.IllegalArgumentException, support for ARABIC_ZERO5 was missing.
+    CPPUNIT_ASSERT_EQUAL(OUString("01000"), aActual);
+
+    // 10000 -> "10000"
+    aProperties = {
+        comphelper::makePropertyValue("NumberingType",
+                                      static_cast<sal_uInt16>(style::NumberingType::ARABIC_ZERO5)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(10000)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString("10000"), aActual);
+}
+
+CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanCounting)
+{
+    // 1 -> "일"
+    uno::Reference<text::XNumberingFormatter> xFormatter(
+        text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY);
+    uno::Sequence<beans::PropertyValue> aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_HANGUL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(1)),
+    };
+    lang::Locale aLocale;
+    OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    // Without the accompanying fix in place, this test would have failed with a
+    // lang.IllegalArgumentException, support for NUMBER_HANGUL_KO was missing.
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc77c"), aActual);
+
+    // 10 -> "십"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_HANGUL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(10)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc2ed"), aActual);
+
+    // 100 -> "백"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_HANGUL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(100)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\ubc31"), aActual);
+}
+
+CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal)
+{
+    // 1 -> "하나"
+    uno::Reference<text::XNumberingFormatter> xFormatter(
+        text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY);
+    uno::Sequence<beans::PropertyValue> aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(1)),
+    };
+    lang::Locale aLocale;
+    OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    // Without the accompanying fix in place, this test would have failed with a
+    // lang.IllegalArgumentException, support for NUMBER_LEGAL_KO was missing.
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\ud558\ub098"), aActual);
+
+    // 2 -> "둘"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(2)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\ub458"), aActual);
+
+    // 3 -> "셋"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(3)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc14b"), aActual);
+
+    // 4 -> "넷"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(4)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\ub137"), aActual);
+
+    // 5 -> "다섯"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(5)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\ub2e4\uc12f"), aActual);
+    // 6 -> "여섯
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(6)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc5ec\uc12f"), aActual);
+    // 7 -> "일곱"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(7)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc77c\uacf1"), aActual);
+
+    // 8 -> "여덟"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(8)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc5ec\ub35f"), aActual);
+
+    // 9 -> "아홉"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(9)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc544\ud649"), aActual);
+
+    // 10 -> "열"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(10)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc5f4"), aActual);
+
+    // 21 -> "스물하나"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(21)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc2a4\ubb3c\ud558\ub098"), aActual);
+
+    // 32 -> "서른둘"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(32)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc11c\ub978\ub458"), aActual);
+
+    // 43 -> "마흔셋"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(43)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\ub9c8\ud754\uc14b"), aActual);
+
+    // 54 -> "쉰넷"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(54)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc270\ub137"), aActual);
+
+    // 65 -> "예순다섯"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(65)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc608\uc21c\ub2e4\uc12f"), aActual);
+
+    // 76 -> "일흔여섯"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(76)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc77c\ud754\uc5ec\uc12f"), aActual);
+
+    // 87 -> "여든일곱"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(87)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc5ec\ub4e0\uc77c\uacf1"), aActual);
+
+    // 98 -> "아흔여덟"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(98)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc544\ud754\uc5ec\ub35f"), aActual);
+
+    // 99 -> "아흔아홉"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_LEGAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(99)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc544\ud754\uc544\ud649"), aActual);
+}
+
+CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanDigital)
+{
+    // 1 -> "일"
+    uno::Reference<text::XNumberingFormatter> xFormatter(
+        text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY);
+    uno::Sequence<beans::PropertyValue> aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_DIGITAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(1)),
+    };
+    lang::Locale aLocale;
+    OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    // Without the accompanying fix in place, this test would have failed with a
+    // lang.IllegalArgumentException, support for NUMBER_DIGITAL_KO was missing.
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc77c"), aActual);
+
+    // 10 -> "일영"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_DIGITAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(10)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc77c\uc601"), aActual);
+
+    // 100 -> "일영영"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_DIGITAL_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(100)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\uc77c\uc601\uc601"), aActual);
+}
+
+CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanDigital2)
+{
+    // 1 -> "一"
+    uno::Reference<text::XNumberingFormatter> xFormatter(
+        text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY);
+    uno::Sequence<beans::PropertyValue> aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_DIGITAL2_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(1)),
+    };
+    lang::Locale aLocale;
+    OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    // Without the accompanying fix in place, this test would have failed with a
+    // lang.IllegalArgumentException, support for NUMBER_DIGITAL2_KO was missing.
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\u4e00"), aActual);
+
+    // 10 -> "一零"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_DIGITAL2_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(10)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\u4e00\u96f6"), aActual);
+
+    // 100 -> "一零零"
+    aProperties = {
+        comphelper::makePropertyValue(
+            "NumberingType", static_cast<sal_uInt16>(style::NumberingType::NUMBER_DIGITAL2_KO)),
+        comphelper::makePropertyValue("Value", static_cast<sal_Int32>(100)),
+    };
+    aActual = xFormatter->makeNumberingString(aProperties, aLocale);
+    CPPUNIT_ASSERT_EQUAL(OUString(u"\u4e00\u96f6\u96f6"), aActual);
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/qa/cppunit/test_ordinalsuffix.cxx b/i18npool/qa/cppunit/test_ordinalsuffix.cxx
new file mode 100644
index 000000000..be21f38ca
--- /dev/null
+++ b/i18npool/qa/cppunit/test_ordinalsuffix.cxx
@@ -0,0 +1,86 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+#include <algorithm>
+#include <com/sun/star/i18n/XOrdinalSuffix.hpp>
+#include <com/sun/star/lang/Locale.hpp>
+#include <unotest/bootstrapfixturebase.hxx>
+
+using namespace com::sun::star;
+
+class TestOrdinalSuffix : public test::BootstrapFixtureBase
+{
+private:
+    uno::Reference<i18n::XOrdinalSuffix> m_xOrdinal;
+
+public:
+    virtual void setUp() override;
+    virtual void tearDown() override;
+
+    void testFrench();
+    void testEnglish();
+
+    CPPUNIT_TEST_SUITE(TestOrdinalSuffix);
+    CPPUNIT_TEST(testFrench);
+    CPPUNIT_TEST(testEnglish);
+    CPPUNIT_TEST_SUITE_END();
+};
+
+void TestOrdinalSuffix::setUp()
+{
+    BootstrapFixtureBase::setUp();
+    m_xOrdinal.set(m_xSFactory->createInstance("com.sun.star.i18n.OrdinalSuffix"), uno::UNO_QUERY_THROW);
+}
+
+void TestOrdinalSuffix::tearDown()
+{
+    m_xOrdinal.clear();
+    BootstrapFixtureBase::tearDown();
+}
+
+void TestOrdinalSuffix::testFrench()
+{
+    lang::Locale aLocale("fr", "LU", "");
+
+    //1er
+    uno::Sequence<OUString> aSuffixes = m_xOrdinal->getOrdinalSuffix(1, aLocale);
+    const OUString* pFind = std::find(std::cbegin(aSuffixes), std::cend(aSuffixes), OUString("er"));
+    CPPUNIT_ASSERT(pFind != std::cend(aSuffixes));
+
+    //2e, 3e, etc.
+    aSuffixes = m_xOrdinal->getOrdinalSuffix(2, aLocale);
+    pFind = std::find(std::cbegin(aSuffixes), std::cend(aSuffixes), OUString("e"));
+    CPPUNIT_ASSERT(pFind != std::cend(aSuffixes));
+}
+
+void TestOrdinalSuffix::testEnglish()
+{
+    lang::Locale aLocale("en", "US", "");
+
+    //1st
+    uno::Sequence<OUString> aSuffixes = m_xOrdinal->getOrdinalSuffix(1, aLocale);
+    const OUString* pFind = std::find(std::cbegin(aSuffixes), std::cend(aSuffixes), OUString("st"));
+    CPPUNIT_ASSERT(pFind != std::cend(aSuffixes));
+
+    //2nd
+    aSuffixes = m_xOrdinal->getOrdinalSuffix(2, aLocale);
+    pFind = std::find(std::cbegin(aSuffixes), std::cend(aSuffixes), OUString("nd"));
+    CPPUNIT_ASSERT(pFind != std::cend(aSuffixes));
+
+    //3rd
+    aSuffixes = m_xOrdinal->getOrdinalSuffix(3, aLocale);
+    pFind = std::find(std::cbegin(aSuffixes), std::cend(aSuffixes), OUString("rd"));
+    CPPUNIT_ASSERT(pFind != std::cend(aSuffixes));
+}
+
+
+CPPUNIT_TEST_SUITE_REGISTRATION( TestOrdinalSuffix );
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/qa/cppunit/test_textsearch.cxx b/i18npool/qa/cppunit/test_textsearch.cxx
new file mode 100644
index 000000000..f224e58c3
--- /dev/null
+++ b/i18npool/qa/cppunit/test_textsearch.cxx
@@ -0,0 +1,544 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
+ *   contributor license agreements. See the NOTICE file distributed
+ *   with this work for additional information regarding copyright
+ *   ownership. The ASF licenses this file to you under the Apache
+ *   License, Version 2.0 (the "License"); you may not use this file
+ *   except in compliance with the License. You may obtain a copy of
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <com/sun/star/util/SearchFlags.hpp>
+#include <com/sun/star/util/SearchOptions.hpp>
+#include <com/sun/star/util/SearchAlgorithms2.hpp>
+#include <com/sun/star/util/XTextSearch2.hpp>
+#include <unotest/bootstrapfixturebase.hxx>
+#include <i18nutil/transliteration.hxx>
+
+#include <unicode/regex.h>
+
+using namespace ::com::sun::star;
+
+class TestTextSearch : public test::BootstrapFixtureBase
+{
+public:
+    virtual void setUp() override;
+    virtual void tearDown() override;
+
+    void testICU();
+    void testSearches();
+    void testWildcardSearch();
+    void testApostropheSearch();
+    void testTdf138410();
+
+    CPPUNIT_TEST_SUITE(TestTextSearch);
+    CPPUNIT_TEST(testICU);
+    CPPUNIT_TEST(testSearches);
+    CPPUNIT_TEST(testWildcardSearch);
+    CPPUNIT_TEST(testApostropheSearch);
+    CPPUNIT_TEST(testTdf138410);
+    CPPUNIT_TEST_SUITE_END();
+private:
+    uno::Reference<util::XTextSearch> m_xSearch;
+    uno::Reference<util::XTextSearch2> m_xSearch2;
+};
+
+// Sanity check our ICU first ...
+void TestTextSearch::testICU()
+{
+    UErrorCode nErr = U_ZERO_ERROR;
+    sal_uInt32 nSearchFlags = UREGEX_UWORD | UREGEX_CASE_INSENSITIVE;
+
+    OUString aString( "abcdefgh" );
+    OUString aPattern( "e" );
+    icu::UnicodeString aSearchPat( reinterpret_cast<const UChar*>(aPattern.getStr()), aPattern.getLength() );
+
+    std::unique_ptr<icu::RegexMatcher> pRegexMatcher(new icu::RegexMatcher( aSearchPat, nSearchFlags, nErr ));
+
+    icu::UnicodeString aSource( reinterpret_cast<const UChar*>(aString.getStr()), aString.getLength() );
+    pRegexMatcher->reset( aSource );
+
+    CPPUNIT_ASSERT( pRegexMatcher->find( 0, nErr ) );
+    CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
+    CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(4), pRegexMatcher->start( nErr ) );
+    CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
+    CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(5), pRegexMatcher->end( nErr ) );
+    CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
+
+    OUString aString2( "acababaabcababadcdaa" );
+    OUString aPattern2( "a" );
+
+    icu::UnicodeString aSearchPat2( reinterpret_cast<const UChar*>(aPattern2.getStr()), aPattern2.getLength() );
+    pRegexMatcher.reset(new icu::RegexMatcher( aSearchPat2, nSearchFlags, nErr ));
+
+    icu::UnicodeString aSource2( reinterpret_cast<const UChar*>(aString2.getStr()), aString2.getLength() );
+    pRegexMatcher->reset( aSource2 );
+
+    CPPUNIT_ASSERT( pRegexMatcher->find( 0, nErr ) );
+    CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
+    CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(0), pRegexMatcher->start( nErr ) );
+    CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
+    CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(1), pRegexMatcher->end( nErr ) );
+    CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
+}
+
+void TestTextSearch::testSearches()
+{
+    OUString str( "acababaabcababadcdaa" );
+    sal_Int32 startPos = 2, endPos = 20 ;
+    sal_Int32 const fStartRes = 10, fEndRes = 18 ;
+    sal_Int32 const bStartRes = 18, bEndRes = 10 ;
+
+    // set options
+    util::SearchOptions aOptions;
+    aOptions.algorithmType = util::SearchAlgorithms_REGEXP ;
+    aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE;
+    aOptions.searchString = "(ab)*a(c|d)+";
+    m_xSearch->setOptions( aOptions );
+
+    util::SearchResult aRes;
+
+    // search forward
+    aRes = m_xSearch->searchForward( str, startPos, endPos );
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( fStartRes, aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( fEndRes, aRes.endOffset[0] );
+
+    // search backwards
+    aRes = m_xSearch->searchBackward( str, endPos, startPos );
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( bStartRes, aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( bEndRes, aRes.endOffset[0] );
+
+    aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE
+                                | TransliterationFlags::IGNORE_WIDTH);
+    aOptions.searchString = "([^ ]*)[ ]*([^ ]*)";
+    m_xSearch->setOptions(aOptions);
+    aRes = m_xSearch->searchForward("11 22 33", 2, 7);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(3), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.startOffset[1]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[1]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.startOffset[2]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[2]);
+}
+
+void TestTextSearch::testWildcardSearch()
+{
+    util::SearchOptions2 aOptions;
+    OUString aText;
+    util::SearchResult aRes;
+
+    aOptions.AlgorithmType2 = util::SearchAlgorithms2::WILDCARD ;
+    aOptions.WildcardEscapeCharacter = '~';
+    // aOptions.searchFlag = ::css::util::SearchFlags::WILD_MATCH_SELECTION;
+    // is not set, so substring match is allowed.
+    aOptions.transliterateFlags = sal_Int32(::css::i18n::TransliterationModules::TransliterationModules_IGNORE_CASE);
+    aText = "abAca";
+
+    aOptions.searchString = "a";
+    m_xSearch2->setOptions2( aOptions );
+    // match first "a", [0,1)
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]);
+    // match last "a", (5,4]
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]);
+
+    aOptions.searchString = "a?";
+    m_xSearch2->setOptions2( aOptions );
+    // match "ab", [0,2)
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[0]);
+    // match "ac", (4,2]
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[0]);
+
+    aOptions.searchString = "a*c";
+    m_xSearch2->setOptions2( aOptions );
+    // match "abac", [0,4) XXX NOTE: first match forward
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]);
+    // match "ac", (4,2] XXX NOTE: first match backward, not greedy
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[0]);
+
+    aOptions.searchString = "b*a";
+    m_xSearch2->setOptions2( aOptions );
+    // match "ba", [1,3) XXX NOTE: first match forward, not greedy
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.endOffset[0]);
+    // match "baca", (5,1] XXX NOTE: first match backward
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]);
+
+    aText = "ab?ca";
+
+    aOptions.searchString = "?~??";
+    m_xSearch2->setOptions2( aOptions );
+    // match "b?c", [1,4)
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]);
+    // match "b?c", (4,1]
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]);
+
+    aText = "ab*ca";
+
+    aOptions.searchString = "?~*?";
+    m_xSearch2->setOptions2( aOptions );
+    // match "b?c", [1,4)
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]);
+    // match "b?c", (4,1]
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]);
+
+    aOptions.searchString = "ca?";
+    m_xSearch2->setOptions2( aOptions );
+    // no match
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
+    // no match
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
+
+    aOptions.searchString = "ca*";
+    m_xSearch2->setOptions2( aOptions );
+    // match "ca", [3,5)
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[0]);
+    // match "ca", (5,3]
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.endOffset[0]);
+
+    aOptions.searchString = "*ca*";
+    m_xSearch2->setOptions2( aOptions );
+    // match "abaca", [0,5)
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[0]);
+    // match "abaca", (5,0]
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.endOffset[0]);
+
+    aText = "123123";
+    aOptions.searchString = "*2?";
+    m_xSearch2->setOptions2( aOptions );
+    // match first "123", [0,3)
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.endOffset[0]);
+    // match "123123", (6,0]    Yes this looks odd, but it is as searching "?2*" forward.
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.endOffset[0]);
+
+    aOptions.searchFlag |= util::SearchFlags::WILD_MATCH_SELECTION;
+    m_xSearch2->setOptions2( aOptions );
+    // match "123123", [0,6) with greedy '*'
+    aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes.endOffset[0]);
+    // match "123123", (6,0]
+    aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.endOffset[0]);
+}
+
+void TestTextSearch::testApostropheSearch()
+{
+    // A) find typographic apostrophes also by using ASCII apostrophe in searchString
+    OUString str( u"It\u2019s an apostrophe." );
+    sal_Int32 startPos = 0, endPos = str.getLength();
+
+    // set options
+    util::SearchOptions aOptions;
+    aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE;
+    aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE;
+    aOptions.searchString = "'";
+    m_xSearch->setOptions( aOptions );
+
+    util::SearchResult aRes;
+
+    // search forward
+    aRes = m_xSearch->searchForward( str, startPos, endPos );
+    // This was 0.
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
+
+    // search backwards
+    aRes = m_xSearch->searchBackward( str, endPos, startPos );
+    // This was 0.
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
+
+    // check with transliteration
+    aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE
+                                | TransliterationFlags::IGNORE_WIDTH);
+    m_xSearch->setOptions(aOptions);
+
+    // search forward
+    aRes = m_xSearch->searchForward( str, startPos, endPos );
+    // This was 0.
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
+
+    // search backwards
+    aRes = m_xSearch->searchBackward( str, endPos, startPos );
+    // This was 0.
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
+
+    // B) search ASCII apostrophe in a text with ASCII apostrophes
+    str = str.replace(u'\u2019', '\'');
+
+    // search forward
+    aRes = m_xSearch->searchForward( str, startPos, endPos );
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
+
+    // search backwards
+    aRes = m_xSearch->searchBackward( str, endPos, startPos );
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
+
+    // C) search typographic apostrophe in a text with ASCII apostrophes (no result)
+    aOptions.searchString = OUString(u"\u2019");
+    m_xSearch->setOptions( aOptions );
+
+    aRes = m_xSearch->searchForward( str, startPos, endPos );
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
+
+    aRes = m_xSearch->searchBackward( str, endPos, startPos );
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
+
+    // D) search typographic apostrophe in a text with typographic apostrophes
+    str = str.replace('\'', u'\u2019');
+
+    // search forward
+    aRes = m_xSearch->searchForward( str, startPos, endPos );
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
+
+    // search backwards
+    aRes = m_xSearch->searchBackward( str, endPos, startPos );
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
+    CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
+
+    // E) search mixed apostrophes in a text with mixed apostrophes:
+    aOptions.searchString = OUString(u"'\u2019");
+    m_xSearch->setOptions( aOptions );
+    str = u"test: \u2019'";
+
+    // search forward
+    aRes = m_xSearch->searchForward( str, startPos, str.getLength());
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+
+    // search backwards
+    aRes = m_xSearch->searchBackward( str, str.getLength(), startPos );
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+
+    // F) search mixed apostrophes in a text with ASCII apostrophes:
+    str = u"test: ''";
+
+    // search forward
+    aRes = m_xSearch->searchForward( str, startPos, str.getLength());
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+
+    // search backwards
+    aRes = m_xSearch->searchBackward( str, str.getLength(), startPos );
+    CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
+}
+
+void TestTextSearch::testTdf138410()
+{
+    OUString str(u"\u0643\u064f\u062a\u064f\u0628 \u0643\u062a\u0628");
+    sal_Int32 startPos = 0, endPos = str.getLength();
+
+    util::SearchOptions aOptions;
+    aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE;
+
+    util::SearchResult aRes;
+
+    // A) base alone
+    // The search string will be found whether it is followed by a mark in the
+    // text or not, and whether IGNORE_DIACRITICS_CTL is set or not.
+
+    // set options
+    aOptions.searchString = u"\u0643";
+    aOptions.transliterateFlags = 0;
+    m_xSearch->setOptions(aOptions);
+
+    // search forward
+    aRes = m_xSearch->searchForward(str, startPos, endPos);
+    CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]);
+
+    // search backwards
+    aRes = m_xSearch->searchBackward(str, endPos, startPos);
+    CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]);
+
+    // check with transliteration
+    aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL);
+    m_xSearch->setOptions(aOptions);
+
+    // search forward
+    aRes = m_xSearch->searchForward(str, startPos, endPos);
+    CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]);
+
+    // search backwards
+    aRes = m_xSearch->searchBackward(str, endPos, startPos);
+    CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]);
+
+    // b) base+mark
+    // The search string will be found when followed by a mark in the text, or
+    // when IGNORE_DIACRITICS_CTL is set whether it is followed by a mark or
+    // not.
+
+    // set options
+    aOptions.searchString = u"\u0643\u064f";
+    aOptions.transliterateFlags = 0;
+    m_xSearch->setOptions(aOptions);
+
+    // search forward
+    aRes = m_xSearch->searchForward(str, startPos, endPos);
+    CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.endOffset[0]);
+
+    // search backwards
+    aRes = m_xSearch->searchBackward(str, endPos, startPos);
+    CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.endOffset[0]);
+
+    // check with transliteration
+    aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL);
+    m_xSearch->setOptions(aOptions);
+
+    // search forward
+    aRes = m_xSearch->searchForward(str, startPos, endPos);
+    CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]);
+
+    // search backwards
+    aRes = m_xSearch->searchBackward(str, endPos, startPos);
+    CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]);
+
+    // b) mark alone
+    // The search string will be found only when IGNORE_DIACRITICS_CTL is not
+    // set.
+
+    // set options
+    aOptions.searchString = u"\u064f";
+    aOptions.transliterateFlags = 0;
+    m_xSearch->setOptions(aOptions);
+
+    // search forward
+    aRes = m_xSearch->searchForward(str, startPos, endPos);
+    CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.endOffset[0]);
+
+    // search backwards
+    aRes = m_xSearch->searchBackward(str, endPos, startPos);
+    CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(4), aRes.startOffset[0]);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(3), aRes.endOffset[0]);
+
+    // with ignore marks the mark will not be found
+    aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL);
+    m_xSearch->setOptions(aOptions);
+
+    // search forward
+    aRes = m_xSearch->searchForward(str, startPos, endPos);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
+
+    // search backwards
+    aRes = m_xSearch->searchBackward(str, endPos, startPos);
+    CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
+}
+
+void TestTextSearch::setUp()
+{
+    BootstrapFixtureBase::setUp();
+    m_xSearch.set(m_xSFactory->createInstance("com.sun.star.util.TextSearch"), uno::UNO_QUERY_THROW);
+    m_xSearch2.set(m_xSFactory->createInstance("com.sun.star.util.TextSearch2"), uno::UNO_QUERY_THROW);
+}
+
+void TestTextSearch::tearDown()
+{
+    m_xSearch.clear();
+    m_xSearch2.clear();
+    BootstrapFixtureBase::tearDown();
+}
+
+CPPUNIT_TEST_SUITE_REGISTRATION(TestTextSearch);
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/qa/cppunit/transliteration.cxx b/i18npool/qa/cppunit/transliteration.cxx
new file mode 100644
index 000000000..712a3c022
--- /dev/null
+++ b/i18npool/qa/cppunit/transliteration.cxx
@@ -0,0 +1,122 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <sal/config.h>
+
+#include <cppunit/TestFixture.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/plugin/TestPlugIn.h>
+
+#include <com/sun/star/i18n/Transliteration.hpp>
+#include <com/sun/star/i18n/TransliterationModulesNew.hpp>
+#include <cppuhelper/bootstrap.hxx>
+
+namespace
+{
+class Transliteration : public CppUnit::TestFixture
+{
+public:
+    void testLoadModuleNew()
+    {
+        auto const trans
+            = css::i18n::Transliteration::create(cppu::defaultBootstrap_InitialComponentContext());
+        // Verify that loading succeeds without throwing an exception, for each possible
+        // TransliterationModulesNew value (TODO: there is an upper limit of maxCascade 27 in
+        // i18npool/inc/transliterationImpl.hxx for the length of the passed
+        // TransliterationModulesNew value, so pass each one individually rather than all 65 at
+        // once):
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_UPPERCASE_LOWERCASE }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_LOWERCASE_UPPERCASE }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_HALFWIDTH_FULLWIDTH }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_FULLWIDTH_HALFWIDTH }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_KATAKANA_HIRAGANA }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_HIRAGANA_KATAKANA }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IGNORE_CASE }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IGNORE_KANA }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IGNORE_WIDTH }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreTraditionalKanji_ja_JP },
+                             {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreTraditionalKana_ja_JP },
+                             {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreMinusSign_ja_JP }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreIterationMark_ja_JP },
+                             {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreSeparator_ja_JP }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreZiZu_ja_JP }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreBaFa_ja_JP }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreTiJi_ja_JP }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreHyuByu_ja_JP }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreSeZe_ja_JP }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreIandEfollowedByYa_ja_JP },
+                             {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreKiKuFollowedBySa_ja_JP },
+                             {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreSize_ja_JP }, {});
+        trans->loadModuleNew(
+            { css::i18n::TransliterationModulesNew_IgnoreProlongedSoundMark_ja_JP }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreMiddleDot_ja_JP }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreSpace_ja_JP }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_SmallToLarge_ja_JP }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_LargeToSmall_ja_JP }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToTextLower_zh_CN }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToTextUpper_zh_CN }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToTextLower_zh_TW }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToTextUpper_zh_TW }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToTextFormalHangul_ko }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToTextFormalLower_ko }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToTextFormalUpper_ko }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToTextInformalHangul_ko },
+                             {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToTextInformalLower_ko },
+                             {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToTextInformalUpper_ko },
+                             {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharLower_zh_CN }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharUpper_zh_CN }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharLower_zh_TW }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharUpper_zh_TW }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharHangul_ko }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharLower_ko }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharUpper_ko }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharFullwidth }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharKanjiShort_ja_JP },
+                             {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_TextToNumLower_zh_CN }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_TextToNumUpper_zh_CN }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_TextToNumLower_zh_TW }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_TextToNumUpper_zh_TW }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_TextToNumFormalHangul_ko }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_TextToNumFormalLower_ko }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_TextToNumFormalUpper_ko }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_TextToNumInformalHangul_ko },
+                             {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_TextToNumInformalLower_ko },
+                             {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_TextToNumInformalUpper_ko },
+                             {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_CharToNumLower_zh_CN }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_CharToNumUpper_zh_CN }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_CharToNumLower_zh_TW }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_CharToNumUpper_zh_TW }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_CharToNumHangul_ko }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_CharToNumLower_ko }, {});
+        trans->loadModuleNew({ css::i18n::TransliterationModulesNew_CharToNumUpper_ko }, {});
+    }
+
+    CPPUNIT_TEST_SUITE(Transliteration);
+    CPPUNIT_TEST(testLoadModuleNew);
+    CPPUNIT_TEST_SUITE_END();
+};
+
+CPPUNIT_TEST_SUITE_REGISTRATION(Transliteration);
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */