From 267c6f2ac71f92999e969232431ba04678e7437e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 07:54:39 +0200 Subject: Adding upstream version 4:24.2.0. Signed-off-by: Daniel Baumann --- i18npool/qa/cppunit/indexentry.cxx | 70 ++ i18npool/qa/cppunit/test_breakiterator.cxx | 1062 ++++++++++++++++++++ i18npool/qa/cppunit/test_calendar.cxx | 130 +++ .../qa/cppunit/test_characterclassification.cxx | 289 ++++++ .../qa/cppunit/test_defaultnumberingprovider.cxx | 470 +++++++++ i18npool/qa/cppunit/test_ordinalsuffix.cxx | 86 ++ i18npool/qa/cppunit/test_textsearch.cxx | 544 ++++++++++ i18npool/qa/cppunit/transliteration.cxx | 192 ++++ 8 files changed, 2843 insertions(+) create mode 100644 i18npool/qa/cppunit/indexentry.cxx create mode 100644 i18npool/qa/cppunit/test_breakiterator.cxx create mode 100644 i18npool/qa/cppunit/test_calendar.cxx create mode 100644 i18npool/qa/cppunit/test_characterclassification.cxx create mode 100644 i18npool/qa/cppunit/test_defaultnumberingprovider.cxx create mode 100644 i18npool/qa/cppunit/test_ordinalsuffix.cxx create mode 100644 i18npool/qa/cppunit/test_textsearch.cxx create mode 100644 i18npool/qa/cppunit/transliteration.cxx (limited to 'i18npool/qa') diff --git a/i18npool/qa/cppunit/indexentry.cxx b/i18npool/qa/cppunit/indexentry.cxx new file mode 100644 index 0000000000..a95bca547a --- /dev/null +++ b/i18npool/qa/cppunit/indexentry.cxx @@ -0,0 +1,70 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace +{ +class IndexEntry : public CppUnit::TestFixture +{ +public: + void setUp() + { + supplier_ = css::i18n::IndexEntrySupplier::create( + cppu::defaultBootstrap_InitialComponentContext()); + } + + void testJapanese() + { + css::lang::Locale loc("ja", "JP", ""); + auto const s = supplier_->getAlgorithmList(loc); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), s.getLength()); + CPPUNIT_ASSERT( + comphelper::findValue(s, "phonetic (alphanumeric first) (grouped by consonant)") != -1); + CPPUNIT_ASSERT( + comphelper::findValue(s, "phonetic (alphanumeric first) (grouped by syllable)") != -1); + CPPUNIT_ASSERT( + comphelper::findValue(s, "phonetic (alphanumeric last) (grouped by consonant)") != -1); + CPPUNIT_ASSERT( + comphelper::findValue(s, "phonetic (alphanumeric last) (grouped by syllable)") != -1); + CPPUNIT_ASSERT(supplier_->loadAlgorithm( + loc, "phonetic (alphanumeric first) (grouped by consonant)", 0)); + CPPUNIT_ASSERT(supplier_->loadAlgorithm( + loc, "phonetic (alphanumeric first) (grouped by syllable)", 0)); + CPPUNIT_ASSERT(supplier_->loadAlgorithm( + loc, "phonetic (alphanumeric last) (grouped by consonant)", 0)); + CPPUNIT_ASSERT( + supplier_->loadAlgorithm(loc, "phonetic (alphanumeric last) (grouped by syllable)", 0)); + } + + CPPUNIT_TEST_SUITE(IndexEntry); + CPPUNIT_TEST(testJapanese); + CPPUNIT_TEST_SUITE_END(); + +private: + css::uno::Reference supplier_; +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(IndexEntry); +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/i18npool/qa/cppunit/test_breakiterator.cxx b/i18npool/qa/cppunit/test_breakiterator.cxx new file mode 100644 index 0000000000..4463f46270 --- /dev/null +++ b/i18npool/qa/cppunit/test_breakiterator.cxx @@ -0,0 +1,1062 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +using namespace ::com::sun::star; + +class TestBreakIterator : public test::BootstrapFixtureBase +{ +public: + virtual void setUp() override; + virtual void tearDown() override; + + void testLineBreaking(); + void testWordBoundaries(); + void testGraphemeIteration(); + void testWeak(); + void testAsian(); + void testThai(); + void testLao(); +#ifdef TODO + void testNorthernThai(); + void testKhmer(); +#endif + void testJapanese(); + void testChinese(); + + CPPUNIT_TEST_SUITE(TestBreakIterator); + CPPUNIT_TEST(testLineBreaking); + CPPUNIT_TEST(testWordBoundaries); + CPPUNIT_TEST(testGraphemeIteration); + CPPUNIT_TEST(testWeak); + CPPUNIT_TEST(testAsian); + CPPUNIT_TEST(testThai); + CPPUNIT_TEST(testLao); +#ifdef TODO + CPPUNIT_TEST(testKhmer); + CPPUNIT_TEST(testNorthernThai); +#endif + CPPUNIT_TEST(testJapanese); + CPPUNIT_TEST(testChinese); + CPPUNIT_TEST_SUITE_END(); + +private: + uno::Reference m_xBreak; + void doTestJapanese(uno::Reference< i18n::XBreakIterator > const &xBreak); +}; + +void TestBreakIterator::testLineBreaking() +{ + i18n::LineBreakHyphenationOptions aHyphOptions; + i18n::LineBreakUserOptions aUserOptions; + lang::Locale aLocale; + + //See https://bugs.libreoffice.org/show_bug.cgi?id=31271 + { + OUString aTest("(some text here)"); + + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + //Here we want the line break to leave text here) on the next line + i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some tex"), aLocale, 0, aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the start of the word", static_cast(6), aResult.breakIndex); + } + + { + //Here we want the line break to leave "here)" on the next line + i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, strlen("(some text here"), aLocale, 0, aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the start of the word", static_cast(11), aResult.breakIndex); + } + } + + //See https://bugs.libreoffice.org/show_bug.cgi?id=49849 + { + static constexpr OUString aWord = u"\u05DE\u05D9\u05DC\u05D9\u05DD"_ustr; + OUString aTest(aWord + " " + aWord); + + aLocale.Language = "he"; + aLocale.Country = "IL"; + + { + //Here we want the line break to happen at the whitespace + i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-1, aLocale, 0, aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the start of the word", aWord.getLength()+1, aResult.breakIndex); + } + } + + //See https://bz.apache.org/ooo/show_bug.cgi?id=17155 + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + //Here we want the line break to leave /bar/ba clumped together on the next line + i18n::LineBreakResults aResult = m_xBreak->getLineBreak("foo /bar/baz", strlen("foo /bar/ba"), aLocale, 0, + aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the first slash", static_cast(4), aResult.breakIndex); + } + } + + //See https://bz.apache.org/ooo/show_bug.cgi?id=19716 + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + OUString aTest("aaa]aaa"); + //Here we want the line break to move the whole lot to the next line + i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-2, aLocale, 0, + aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break at the start of the line, not at ]", static_cast(0), aResult.breakIndex); + } + } + + //this is an example sequence from tdf92993-1.docx caught by the load crashtesting + { + static constexpr OUStringLiteral aTest = u"\U0001f356\U0001f357\U0001f346" + "\U0001f364\u2668\ufe0f\U0001f3c6"; + + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + //This must not assert/crash + (void)m_xBreak->getLineBreak(aTest, 0, aLocale, 0, aHyphOptions, aUserOptions); + } + } + + //See https://bugs.documentfoundation.org/show_bug.cgi?id=96197 + { + static constexpr OUString aTest = u"\uc560\uad6D\uac00\uc758 \uac00" + "\uc0ac\ub294"_ustr; + + aLocale.Language = "ko"; + aLocale.Country = "KR"; + + { + i18n::LineBreakResults aResult = m_xBreak->getLineBreak(aTest, aTest.getLength()-2, aLocale, 0, + aHyphOptions, aUserOptions); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Expected a break don't split the Korean word!", static_cast(5), aResult.breakIndex); + } + } +} + +//See https://bugs.libreoffice.org/show_bug.cgi?id=49629 +void TestBreakIterator::testWordBoundaries() +{ + lang::Locale aLocale; + aLocale.Language = "en"; + aLocale.Country = "US"; + + i18n::Boundary aBounds; + + //See https://bz.apache.org/ooo/show_bug.cgi?id=11993 + { + OUString aTest("abcd ef ghi??? KLM"); + + CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD)); + CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD)); + aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.endPos); + + CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD)); + CPPUNIT_ASSERT(!m_xBreak->isEndWord(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD)); + + //next word + aBounds = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(12), aBounds.endPos); + + //previous word + aBounds = m_xBreak->getWordBoundary(aTest, 8, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(7), aBounds.endPos); + + CPPUNIT_ASSERT(!m_xBreak->isBeginWord(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD)); + CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD)); + aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(12), aBounds.endPos); + + CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD)); + CPPUNIT_ASSERT(!m_xBreak->isEndWord(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD)); + aBounds = m_xBreak->getWordBoundary(aTest, 16, aLocale, i18n::WordType::DICTIONARY_WORD, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(16), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(19), aBounds.endPos); + } + + //See https://bz.apache.org/ooo/show_bug.cgi?id=21907 + { + OUString aTest("b a?"); + + CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 1, aLocale, i18n::WordType::ANY_WORD)); + CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 2, aLocale, i18n::WordType::ANY_WORD)); + CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 3, aLocale, i18n::WordType::ANY_WORD)); + + CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, 3, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES)); + + CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 1, aLocale, i18n::WordType::ANY_WORD)); + CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 2, aLocale, i18n::WordType::ANY_WORD)); + CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 3, aLocale, i18n::WordType::ANY_WORD)); + + CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, 3, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES)); + } + + //See https://bz.apache.org/ooo/show_bug.cgi?id=14904 + { + static constexpr OUString aTest = + u"Working \u201CWords" + " starting wit" + "h quotes\u201D Work" + "ing \u2018Broken\u2019 " + "?Spanish? doe" + "sn\u2019t work. No" + "t even \u00BFreal? " + "Spanish"_ustr; + + aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(7), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 12, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(14), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 40, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(37), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(44), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 49, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(46), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(52), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 58, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(55), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(62), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 67, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(64), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(71), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 90, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(88), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(92), aBounds.endPos); + } + + //See https://bugs.libreoffice.org/show_bug.cgi?id=49629 + sal_Unicode aBreakTests[] = { ' ', 1, 2, 3, 4, 5, 6, 7, 0x91, 0x92, 0x200B, 0xE8FF, 0xF8FF }; + for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode) + { + //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary + for (auto const& i: aBreakTests) + { + OUString aTest = "Word" + OUStringChar(i) + "Word"; + aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true); + switch (mode) + { + case i18n::WordType::ANY_WORD: + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.endPos); + break; + case i18n::WordType::ANYWORD_IGNOREWHITESPACES: + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.endPos); + break; + case i18n::WordType::DICTIONARY_WORD: + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.endPos); + break; + case i18n::WordType::WORD_COUNT: + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.endPos); + break; + } + + CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, aBounds.startPos, aLocale, mode)); + CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, aBounds.endPos, aLocale, mode)); + } + } + + sal_Unicode aJoinTests[] = { 'X', 0x200C, 0x200D, 0x2060, 0xFEFF, 0xFFF9, 0xFFFA, 0xFFFB }; + for (int mode = i18n::WordType::ANY_WORD; mode <= i18n::WordType::WORD_COUNT; ++mode) + { + //make sure that in all cases isBeginWord and isEndWord matches getWordBoundary + for (auto const& p: aJoinTests) + { + OUString aTest = "Word" + OUStringChar(p) + "Word"; + aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, mode, true); + switch (mode) + { + case i18n::WordType::ANY_WORD: + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos); + break; + case i18n::WordType::ANYWORD_IGNOREWHITESPACES: + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos); + break; + case i18n::WordType::DICTIONARY_WORD: + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos); + break; + case i18n::WordType::WORD_COUNT: + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos); + break; + } + + CPPUNIT_ASSERT(m_xBreak->isBeginWord(aTest, aBounds.startPos, aLocale, mode)); + CPPUNIT_ASSERT(m_xBreak->isEndWord(aTest, aBounds.endPos, aLocale, mode)); + } + } + + //See https://bz.apache.org/ooo/show_bug.cgi?id=13494 + { + constexpr OUString aBase(u"xxAAxxBBxxCCxx"_ustr); + const sal_Unicode aTests[] = + { + '\'', ';', ',', '.', '!', '@', '#', '%', '&', '*', + '(', ')', '_', '-', '{', '}', '[', ']', '\"', '/', + '\\', '?', '~', '$', '+', '^', '=', '<', '>', '|' + }; + + const sal_Int32 aDoublePositions[] = {0, 2, 4, 6, 8, 10, 12, 14}; + for (auto const& r: aTests) + { + OUString aTest = aBase.replace('x', r); + sal_Int32 nPos = -1; + size_t i = 0; + do + { + CPPUNIT_ASSERT(i < std::size(aDoublePositions)); + nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; + CPPUNIT_ASSERT_EQUAL(aDoublePositions[i], nPos); + ++i; + } + while (nPos < aTest.getLength()); + nPos = aTest.getLength(); + i = std::size(aDoublePositions)-1; + do + { + nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; + --i; + CPPUNIT_ASSERT_EQUAL(aDoublePositions[i], nPos); + } + while (nPos > 0); + } + + const sal_Int32 aSinglePositions[] = {0, 1, 3, 4, 6, 7, 9, 10}; + for (size_t j = 1; j < SAL_N_ELEMENTS(aTests); ++j) + { + OUString aTest = aBase.replaceAll("xx", OUStringChar(aTests[j])); + sal_Int32 nPos = -1; + size_t i = 0; + do + { + CPPUNIT_ASSERT(i < std::size(aSinglePositions)); + nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; + CPPUNIT_ASSERT_EQUAL(aSinglePositions[i], nPos); + ++i; + } + while (nPos < aTest.getLength()); + nPos = aTest.getLength(); + i = std::size(aSinglePositions)-1; + do + { + nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; + --i; + CPPUNIT_ASSERT_EQUAL(aSinglePositions[i], nPos); + } + while (nPos > 0); + } + + const sal_Int32 aSingleQuotePositions[] = {0, 1, 9, 10}; + CPPUNIT_ASSERT_EQUAL(u'\'', aTests[0]); + { + OUString aTest = aBase.replaceAll("xx", OUStringChar(aTests[0])); + sal_Int32 nPos = -1; + size_t i = 0; + do + { + CPPUNIT_ASSERT(i < std::size(aSingleQuotePositions)); + nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; + CPPUNIT_ASSERT_EQUAL(aSingleQuotePositions[i], nPos); + ++i; + } + while (nPos < aTest.getLength()); + nPos = aTest.getLength(); + i = std::size(aSingleQuotePositions)-1; + do + { + nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; + --i; + CPPUNIT_ASSERT_EQUAL(aSingleQuotePositions[i], nPos); + } + while (nPos > 0); + } + } + + //See https://bz.apache.org/ooo/show_bug.cgi?id=13451 + { + aLocale.Language = "ca"; + aLocale.Country = "ES"; + + OUString aTest("mirar-se comprar-vos donem-nos les mans aneu-vos-en!"); + + sal_Int32 nPos = 0; + sal_Int32 aExpected[] = {8, 20, 30, 34, 39, 51, 52}; + size_t i = 0; + do + { + CPPUNIT_ASSERT(i < std::size(aExpected)); + nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale, + i18n::WordType::DICTIONARY_WORD, true).endPos; + CPPUNIT_ASSERT_EQUAL(aExpected[i], nPos); + ++i; + } + while (nPos++ < aTest.getLength()); + CPPUNIT_ASSERT_EQUAL(std::size(aExpected), i); + } + + //See https://bz.apache.org/ooo/show_bug.cgi?id=85411 + for (int j = 0; j < 3; ++j) + { + switch (j) + { + case 0: + aLocale.Language = "en"; + aLocale.Country = "US"; + break; + case 1: + aLocale.Language = "ca"; + aLocale.Country = "ES"; + break; + case 2: + aLocale.Language = "fi"; + aLocale.Country = "FI"; + break; + default: + CPPUNIT_ASSERT(false); + break; + } + + static constexpr OUString aTest = + u"I\u200Bwant\u200Bto\u200Bgo"_ustr; + + sal_Int32 nPos = 0; + sal_Int32 aExpected[] = {1, 6, 9, 12}; + size_t i = 0; + do + { + CPPUNIT_ASSERT(i < std::size(aExpected)); + nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale, + i18n::WordType::DICTIONARY_WORD, true).endPos; + CPPUNIT_ASSERT_EQUAL(aExpected[i], nPos); + ++i; + } + while (nPos++ < aTest.getLength()); + CPPUNIT_ASSERT_EQUAL(std::size(aExpected), i); + } + + //https://bz.apache.org/ooo/show_bug.cgi?id=21290 + for (int j = 0; j < 2; ++j) + { + switch (j) + { + case 0: + aLocale.Language = "en"; + aLocale.Country = "US"; + break; + case 1: + aLocale.Language = "grc"; + aLocale.Country.clear(); + break; + default: + CPPUNIT_ASSERT(false); + break; + } + + static constexpr OUString aTest = + u"\u1F0C\u03BD\u03B4\u03C1\u03B1 \u1F00" + "\u03C1\u03BD\u1F7B\u03BC\u03B5\u03BD\u03BF" + "\u03C2 \u1F00\u03BB\u03BB \u1F24" + "\u03C3\u03B8\u03B9\u03BF\u03BD"_ustr; + + sal_Int32 nPos = 0; + sal_Int32 aExpected[] = {5, 15, 19, 26}; + size_t i = 0; + do + { + CPPUNIT_ASSERT(i < std::size(aExpected)); + nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale, + i18n::WordType::DICTIONARY_WORD, true).endPos; + CPPUNIT_ASSERT_EQUAL(aExpected[i], nPos); + ++i; + } + while (nPos++ < aTest.getLength()); + CPPUNIT_ASSERT_EQUAL(std::size(aExpected), i); + } + + //See https://bz.apache.org/ooo/show_bug.cgi?id=58513 + //See https://bugs.libreoffice.org/show_bug.cgi?id=55707 + { + aLocale.Language = "fi"; + aLocale.Country = "FI"; + + OUString aTest("Kuorma-auto kaakkois- ja Keski-Suomi USA:n 90:n %:n"); + + { + sal_Int32 nPos = 0; + sal_Int32 aExpected[] = {11, 21, 24, 36, 42, 47, 51}; + size_t i = 0; + do + { + CPPUNIT_ASSERT(i < std::size(aExpected)); + nPos = m_xBreak->getWordBoundary(aTest, nPos, aLocale, + i18n::WordType::WORD_COUNT, true).endPos; + CPPUNIT_ASSERT_EQUAL(aExpected[i], nPos); + ++i; + } + while (nPos++ < aTest.getLength()); + CPPUNIT_ASSERT_EQUAL(std::size(aExpected), i); + } + + { + sal_Int32 nPos = 0; + sal_Int32 aExpected[] = {0, 11, 12, 20, 22, 24, 25, 36, 37, + 40, 41, 42, 43, 45, 46, 47, 50, 51}; + size_t i = 0; + do + { + CPPUNIT_ASSERT(i < std::size(aExpected)); + aBounds = m_xBreak->getWordBoundary(aTest, nPos, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + CPPUNIT_ASSERT_EQUAL(aExpected[i], aBounds.startPos); + ++i; + CPPUNIT_ASSERT_EQUAL(aExpected[i], aBounds.endPos); + ++i; + nPos = aBounds.endPos; + } + while (nPos++ < aTest.getLength()); + CPPUNIT_ASSERT_EQUAL(std::size(aExpected), i); + } + } + + //See https://bz.apache.org/ooo/show_bug.cgi?id=107843 + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + static constexpr OUString aTest = + u"ru\uFB00le \uFB01sh"_ustr; + + aBounds = m_xBreak->getWordBoundary(aTest, 1, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, 7, aLocale, i18n::WordType::DICTIONARY_WORD, false); + CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos); + } + + //See https://bz.apache.org/ooo/show_bug.cgi?id=113785 + { + aLocale.Language = "en"; + aLocale.Country = "US"; + + static constexpr OUString aTest = + u"a\u2013b\u2014c"_ustr; + + aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aBounds.endPos); + + aBounds = m_xBreak->nextWord(aTest, 0, aLocale, i18n::WordType::DICTIONARY_WORD); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aBounds.endPos); + + aBounds = m_xBreak->nextWord(aTest, aBounds.endPos, aLocale, i18n::WordType::DICTIONARY_WORD); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos); + } +} + +//See https://bugs.libreoffice.org/show_bug.cgi?id=40292 +//See https://bz.apache.org/ooo/show_bug.cgi?id=80412 +//See https://bz.apache.org/ooo/show_bug.cgi?id=111152 +//See https://bz.apache.org/ooo/show_bug.cgi?id=50172 +void TestBreakIterator::testGraphemeIteration() +{ + lang::Locale aLocale; + aLocale.Language = "bn"; + aLocale.Country = "IN"; + + { + static constexpr OUString aTest = u"\u09AC\u09CD\u09AF"_ustr; // BA HALANT LA + + sal_Int32 nDone=0; + sal_Int32 nPos; + nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", aTest.getLength(), nPos); + nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast(0), nPos); + } + + { + static constexpr OUString aTest = u"\u09B9\u09CD\u09A3\u09BF"_ustr; + // HA HALANT NA VOWELSIGNI + + sal_Int32 nDone=0; + sal_Int32 nPos; + nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", aTest.getLength(), nPos); + nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast(0), nPos); + } + + { + static constexpr OUString aTest = u"\u09A4\u09CD\u09AE\u09CD\u09AF"_ustr; + // TA HALANT MA HALANT YA + + sal_Int32 nDone=0; + sal_Int32 nPos; + nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", aTest.getLength(), nPos); + nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast(0), nPos); + } + + aLocale.Language = "ta"; + aLocale.Country = "IN"; + + { + static constexpr OUString aTest = u"\u0B9A\u0BBF\u0BA4\u0BCD\u0BA4\u0BBF\u0BB0\u0BC8"_ustr; // CA VOWELSIGNI TA VIRAMA TA VOWELSIGNI RA VOWELSIGNAI + + sal_Int32 nDone=0; + sal_Int32 nPos = 0; + + nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast(2), nPos); + nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast(4), nPos); + nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast(6), nPos); + nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", aTest.getLength(), nPos); + nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast(6), nPos); + nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast(4), nPos); + nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast(2), nPos); + nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale, i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast(0), nPos); + } + + { + static constexpr OUString aTest = u"\u0B95\u0BC1"_ustr; // KA VOWELSIGNU + + sal_Int32 nDone=0; + sal_Int32 nPos = 0; + + nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", aTest.getLength(), nPos); + nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast(0), nPos); + } + + { + static constexpr OUString aTest = + u"\u0B9A\u0BBF\u0BA4\u0BCD\u0BA4\u0BBF\u0BB0\u0BC8"_ustr; + // CA VOWELSIGNI TA VIRAMA TA VOWELSIGNI RA VOWELSIGNAI + + sal_Int32 nDone=0; + sal_Int32 nPos=0; + + for (sal_Int32 i = 0; i < 4; ++i) + { + sal_Int32 nOldPos = nPos; + nPos = m_xBreak->nextCharacters(aTest, nPos, aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip 2 units", nOldPos+2, nPos); + } + + for (sal_Int32 i = 0; i < 4; ++i) + { + sal_Int32 nOldPos = nPos; + nPos = m_xBreak->previousCharacters(aTest, nPos, aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip 2 units", nOldPos-2, nPos); + } + } + + { + static constexpr OUString aText = u"\u05D0\u05B8"_ustr; // ALEF QAMATS + + sal_Int32 nGraphemeCount = 0; + + sal_Int32 nCurPos = 0; + while (nCurPos < aText.getLength()) + { + sal_Int32 nCount2 = 1; + nCurPos = m_xBreak->nextCharacters(aText, nCurPos, lang::Locale(), + i18n::CharacterIteratorMode::SKIPCELL, nCount2, nCount2); + ++nGraphemeCount; + } + + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be considered 1 grapheme", static_cast(1), nGraphemeCount); + } + + aLocale.Language = "hi"; + aLocale.Country = "IN"; + + { + static constexpr OUString aTest = u"\u0936\u0940"_ustr; // SHA VOWELSIGNII + + sal_Int32 nDone=0; + sal_Int32 nPos = 0; + + nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", aTest.getLength(), nPos); + nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full grapheme", static_cast(0), nPos); + } +} + +//A test to ensure that certain ranges and codepoints that are categorized as +//weak remain as weak, so that existing docs that depend on this don't silently +//change font for those weak chars +void TestBreakIterator::testWeak() +{ + lang::Locale aLocale; + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + static constexpr OUString aWeaks = + u"\u0001\u0002" + " \u00A0" + "\u0300\u036F" //Combining Diacritical Marks + "\u1AB0\u1AFF" //Combining Diacritical Marks Extended + "\u1DC0\u1DFF" //Combining Diacritical Marks Supplement + "\u20D0\u20FF" //Combining Diacritical Marks for Symbols + "\u2150\u215F" //Number Forms, fractions + "\u2160\u2180" //Number Forms, roman numerals + "\u2200\u22FF" //Mathematical Operators + "\u27C0\u27EF" //Miscellaneous Mathematical Symbols-A + "\u2980\u29FF" //Miscellaneous Mathematical Symbols-B + "\u2A00\u2AFF" //Supplemental Mathematical Operators + "\u2100\u214F" //Letterlike Symbols + "\u2308\u230B" //Miscellaneous technical + "\u25A0\u25FF" //Geometric Shapes + "\u2B30\u2B4C"_ustr; //Miscellaneous Symbols and Arrows + + for (sal_Int32 i = 0; i < aWeaks.getLength(); ++i) + { + sal_Int16 nScript = m_xBreak->getScriptType(aWeaks, i); + OString aMsg = + "Char 0x" + + OString::number(static_cast(std::u16string_view(aWeaks)[i]), 16) + + " should have been weak"; + CPPUNIT_ASSERT_EQUAL_MESSAGE(aMsg.getStr(), + i18n::ScriptType::WEAK, nScript); + } + } +} + +//A test to ensure that certain ranges and codepoints that are categorized as +//asian remain as asian, so that existing docs that depend on this don't silently +//change font for those asian chars. +//See https://bugs.libreoffice.org/show_bug.cgi?id=38095 +void TestBreakIterator::testAsian() +{ + lang::Locale aLocale; + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + static constexpr OUString aAsians = + //some typical CJK chars + u"\u4E00\u62FF" + //The full HalfWidth and FullWidth block has historically been + //designated as taking the CJK font :-( + //HalfWidth and FullWidth forms of ASCII 0-9, categorized under + //UAX24 as "Common" i.e. by that logic WEAK + "\uFF10\uFF19" + //HalfWidth and FullWidth forms of ASCII A-z, categorized under + //UAX25 as "Latin", i.e. by that logic LATIN + "\uFF21\uFF5A"_ustr; + + for (sal_Int32 i = 0; i < aAsians.getLength(); ++i) + { + sal_Int16 nScript = m_xBreak->getScriptType(aAsians, i); + OString aMsg = + "Char 0x" + + OString::number(static_cast(std::u16string_view(aAsians)[i]), 16) + + " should have been asian"; + CPPUNIT_ASSERT_EQUAL_MESSAGE(aMsg.getStr(), + i18n::ScriptType::ASIAN, nScript); + } + } +} + +//A test to ensure that our Lao word boundary detection is useful +void TestBreakIterator::testLao() +{ + lang::Locale aLocale; + aLocale.Language = "lo"; + aLocale.Country = "LA"; + + static constexpr OUString aTest = u"\u0e8d\u0eb4\u0e99\u0e94\u0eb5\u0e95\u0ec9\u0ead\u0e99\u0eae\u0eb1\u0e9a"_ustr; + i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.endPos); + + aBounds = m_xBreak->getWordBoundary(aTest, aBounds.endPos, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aBounds.startPos); +#if (U_ICU_VERSION_MAJOR_NUM < 70) + CPPUNIT_ASSERT_EQUAL(sal_Int32(9), aBounds.endPos); +#else + // FIXME: + // In ICU 70/71 for yet unknown reason the word boundary 9 is not detected and + // instead the length 12 is returned as endpos. + // Deep in + // icu_70::RuleBasedBreakIterator::BreakCache::next() + // icu_70::RuleBasedBreakIterator::BreakCache::following() + // icu_70::RuleBasedBreakIterator::following() + // i18npool::BreakIterator_Unicode::getWordBoundary() + CPPUNIT_ASSERT_EQUAL(sal_Int32(12), aBounds.endPos); +#endif +} + +//A test to ensure that our thai word boundary detection is useful +void TestBreakIterator::testThai() +{ + lang::Locale aLocale; + aLocale.Language = "th"; + aLocale.Country = "TH"; + + //See http://lists.freedesktop.org/archives/libreoffice/2012-February/025959.html + { + static constexpr OUString aTest = u"\u0E01\u0E38\u0E2B\u0E25\u0E32\u0E1A"_ustr; + i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full word", + sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full word", + aTest.getLength(), aBounds.endPos); + } + + //See https://bz.apache.org/ooo/show_bug.cgi?id=29548 + //make sure forwards and back are consistent + { + static constexpr OUString aTest = + u"\u0E2D\u0E38\u0E17\u0E22\u0E32\u0E19\u0E41" + "\u0E2B\u0E48\u0E07\u0E0A\u0E32\u0E15\u0E34" + "\u0E19\u0E49\u0E33\u0E2B\u0E19\u0E32\u0E27" + "\u0E2D\u0E38\u0E17\u0E22\u0E32\u0E19\u0E41" + "\u0E2B\u0E48\u0E07\u0E0A\u0E32\u0E15\u0E34" + "\u0E19\u0E49\u0E33\u0E2B\u0E19\u0E32\u0E27"_ustr; + + std::stack aPositions; + sal_Int32 nPos = -1; + do + { + nPos = m_xBreak->nextWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; + aPositions.push(nPos); + } + while (nPos < aTest.getLength()); + nPos = aTest.getLength(); + CPPUNIT_ASSERT(!aPositions.empty()); + aPositions.pop(); + do + { + CPPUNIT_ASSERT(!aPositions.empty()); + nPos = m_xBreak->previousWord(aTest, nPos, aLocale, i18n::WordType::ANYWORD_IGNOREWHITESPACES).startPos; + CPPUNIT_ASSERT_EQUAL(aPositions.top(), nPos); + aPositions.pop(); + } + while (nPos > 0); + } + + // tdf#113694 + { + static constexpr OUString aTest = u"\U00010000"_ustr; + + sal_Int32 nDone=0; + sal_Int32 nPos; + + nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", aTest.getLength(), nPos); + nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale, + i18n::CharacterIteratorMode::SKIPCELL, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast(0), nPos); + + nPos = m_xBreak->nextCharacters(aTest, 0, aLocale, + i18n::CharacterIteratorMode::SKIPCHARACTER, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", aTest.getLength(), nPos); + nPos = m_xBreak->previousCharacters(aTest, aTest.getLength(), aLocale, + i18n::CharacterIteratorMode::SKIPCHARACTER, 1, nDone); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should skip full surrogate pair", static_cast(0), nPos); + } +} + +#ifdef TODO +void TestBreakIterator::testNorthernThai() +{ + lang::Locale aLocale; + aLocale.Language = "nod"; + aLocale.Country = "TH"; + + const sal_Unicode NORTHERN_THAI1[] = { 0x0E01, 0x0E38, 0x0E4A, 0x0E2B, 0x0E25, 0x0E32, 0x0E1A }; + OUString aTest(NORTHERN_THAI1, SAL_N_ELEMENTS(NORTHERN_THAI1)); + i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + CPPUNIT_ASSERT_MESSAGE("Should skip full word", + aBounds.startPos == 0 && aBounds.endPos == aTest.getLength()); +} + +// Not sure if any version earlier than 49 did have Khmer word boundary +// dictionaries, 4.6 does not. + +// As of icu 54, word boundary detection for Khmer is still considered +// insufficient, so icu khmer stuff is disabled + +//A test to ensure that our khmer word boundary detection is useful +//https://bugs.libreoffice.org/show_bug.cgi?id=52020 +void TestBreakIterator::testKhmer() +{ + lang::Locale aLocale; + aLocale.Language = "km"; + aLocale.Country = "KH"; + + const sal_Unicode KHMER[] = { 0x17B2, 0x17D2, 0x1799, 0x1782, 0x17C1 }; + + OUString aTest(KHMER, SAL_N_ELEMENTS(KHMER)); + i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 0, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + + CPPUNIT_ASSERT(aBounds.startPos == 0 && aBounds.endPos == 3); + + aBounds = m_xBreak->getWordBoundary(aTest, aBounds.endPos, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + + CPPUNIT_ASSERT(aBounds.startPos == 3 && aBounds.endPos == 5); +} +#endif + +void TestBreakIterator::doTestJapanese(uno::Reference< i18n::XBreakIterator > const &xBreak) +{ + lang::Locale aLocale; + aLocale.Language = "ja"; + aLocale.Country = "JP"; + i18n::Boundary aBounds; + + { + static constexpr OUStringLiteral aTest = u"\u30B7\u30E3\u30C3\u30C8\u30C0\u30A6\u30F3"; + + aBounds = xBreak->getWordBoundary(aTest, 5, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(7), aBounds.endPos); + } + + { + static constexpr OUString aTest = u"\u9EBB\u306E\u8449\u9EBB\u306E\u8449"_ustr; + + aBounds = xBreak->getWordBoundary(aTest, 1, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aBounds.endPos); + + aBounds = xBreak->getWordBoundary(aTest, 5, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.endPos); + } +} + +void TestBreakIterator::testJapanese() +{ + doTestJapanese(m_xBreak); + + // fdo#78479 - test second / cached instantiation of xdictionary + uno::Reference< i18n::XBreakIterator > xTmpBreak(m_xSFactory->createInstance( + "com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW); + + doTestJapanese(xTmpBreak); +} + +void TestBreakIterator::testChinese() +{ + lang::Locale aLocale; + aLocale.Language = "zh"; + aLocale.Country = "CN"; + + { + static constexpr OUStringLiteral aTest = u"\u6A35\u6A30\u69FE\u8919\U00029EDB"; + + i18n::Boundary aBounds = m_xBreak->getWordBoundary(aTest, 4, aLocale, + i18n::WordType::DICTIONARY_WORD, true); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aBounds.startPos); + CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aBounds.endPos); + } +} +void TestBreakIterator::setUp() +{ + BootstrapFixtureBase::setUp(); + m_xBreak.set(m_xSFactory->createInstance("com.sun.star.i18n.BreakIterator"), uno::UNO_QUERY_THROW); +} + +void TestBreakIterator::tearDown() +{ + m_xBreak.clear(); + BootstrapFixtureBase::tearDown(); +} + +CPPUNIT_TEST_SUITE_REGISTRATION(TestBreakIterator); + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/qa/cppunit/test_calendar.cxx b/i18npool/qa/cppunit/test_calendar.cxx new file mode 100644 index 0000000000..a522a4a742 --- /dev/null +++ b/i18npool/qa/cppunit/test_calendar.cxx @@ -0,0 +1,130 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ +#include +#include + +using namespace com::sun::star; + +class TestCalendar : public test::BootstrapFixtureBase +{ +public: + void testHijriGregorian(); + void testGetGregorianJulianDay(); + + CPPUNIT_TEST_SUITE(TestCalendar); + CPPUNIT_TEST(testHijriGregorian); + CPPUNIT_TEST(testGetGregorianJulianDay); + CPPUNIT_TEST_SUITE_END(); +}; + +void TestCalendar::testHijriGregorian() +{ + // 21-7-1443 (Hijri) == 22-2-2022 (Gregorian) + sal_Int32 day = 22, month = 2, year = 2022; + i18npool::Calendar_hijri::getHijri(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(21), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(7), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1443), year); + + i18npool::Calendar_hijri::ToGregorian(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(22), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2022), year); + + // 1-1-1 (Hijri) == 15-7-622 (Gregorian) + // NOTE: The calculated date is 15-7-622, as it was with the + // previous version of i18npool::Calendar_hijri::ToGregorian() + // but in some articles, 15-7-622 is considered the equivalent date + // https://en.wikipedia.org/wiki/622 + // This article states that 15-7-622 is correct: + // "On the Origins of the Hijrī Calendar: A Multi-Faceted Perspective + // Based on the Covenants of the Prophet and Specific Date Verification" + // https://www.mdpi.com/2077-1444/12/1/42/htm + day = 15; + month = 7; + year = 622; + i18npool::Calendar_hijri::getHijri(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), year); + + i18npool::Calendar_hijri::ToGregorian(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(15), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(7), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(622), year); + + // 1-1-100 (Hijri) == 2-8-718 (Gregorian) + // https://habibur.com/hijri/100/ + day = 2; + month = 8; + year = 718; + i18npool::Calendar_hijri::getHijri(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(100), year); + + i18npool::Calendar_hijri::ToGregorian(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(8), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(718), year); + + // 1-1-1000 (Hijri) == 19-10-1591 (Gregorian) + // NOTE: The calculated date is 18-10-1591, but there is inconsistency + // with this website, as it states it should be 19-10-1591 + // https://habibur.com/hijri/1000/ + day = 18; + month = 10; + year = 1591; + i18npool::Calendar_hijri::getHijri(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1000), year); + + i18npool::Calendar_hijri::ToGregorian(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(18), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(10), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1591), year); + + // 1-1-2000 (Hijri) == 7-1-2562 (Gregorian) + // NOTE: The calculated date is 7-1-2562, but there is inconsistency + // with this website, as it states it should be 8-1-2562 + // https://habibur.com/hijri/2000/ + day = 7; + month = 1; + year = 2562; + i18npool::Calendar_hijri::getHijri(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2000), year); + + i18npool::Calendar_hijri::ToGregorian(&day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(7), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2562), year); +} + +void TestCalendar::testGetGregorianJulianDay() +{ + // Julian day for 22-2-2022 (Gregorian) == 2459633 + // https://core2.gsfc.nasa.gov/time/julian.html + sal_Int32 lJulianDay, day = 22, month = 2, year = 2022; + lJulianDay = i18npool::Calendar_hijri::getJulianDay(day, month, year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2459633), lJulianDay); + + i18npool::Calendar_hijri::getGregorianDay(lJulianDay, &day, &month, &year); + CPPUNIT_ASSERT_EQUAL(sal_Int32(22), day); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), month); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2022), year); +} + +CPPUNIT_TEST_SUITE_REGISTRATION(TestCalendar); + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ diff --git a/i18npool/qa/cppunit/test_characterclassification.cxx b/i18npool/qa/cppunit/test_characterclassification.cxx new file mode 100644 index 0000000000..06f6095a37 --- /dev/null +++ b/i18npool/qa/cppunit/test_characterclassification.cxx @@ -0,0 +1,289 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include +#include +#include + +using namespace ::com::sun::star; + +class TestCharacterClassification : public test::BootstrapFixtureBase +{ +public: + virtual void setUp() override; + virtual void tearDown() override; + +protected: + uno::Reference m_xCC; +}; + +//A test to ensure that our Title Case functionality is working +//http://lists.freedesktop.org/archives/libreoffice/2012-June/032767.html +//https://bz.apache.org/ooo/show_bug.cgi?id=30863 +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testTitleCase) +{ + lang::Locale aLocale; + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + //basic example + OUString sTest("Some text"); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), aLocale); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString("Some Text"), sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), aLocale); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", OUString("SOME TEXT"), sUpperCase); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), aLocale); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower ", OUString("some text"), sLowerCase); + } + + { + //tricky one + static constexpr OUString aTest = u"\u01F3"_ustr; // LATIN SMALL LETTER DZ + OUString sTitleCase = m_xCC->toTitle(aTest, 0, aTest.getLength(), aLocale); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", sal_Int32(1), sTitleCase.getLength()); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u'\u01F2', sTitleCase[0]); + OUString sUpperCase = m_xCC->toUpper(aTest, 0, aTest.getLength(), aLocale); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sal_Int32(1), sUpperCase.getLength()); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u'\u01F1', sUpperCase[0]); + OUString sLowerCase = m_xCC->toLower(aTest, 0, aTest.getLength(), aLocale); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower ", sal_Int32(1), sLowerCase.getLength()); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower ", u'\u01F3', sLowerCase[0]); + } +} + +//https://bugs.libreoffice.org/show_bug.cgi?id=69641 +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testStringType) +{ + lang::Locale aLocale; + aLocale.Language = "en"; + aLocale.Country = "US"; + + { + //simple case + OUString sTest("Some text"); + sal_Int32 nResult = m_xCC->getStringType(sTest, 0, sTest.getLength(), aLocale); + CPPUNIT_ASSERT_EQUAL(sal_Int32(230), nResult); + } + + { + //tricky case + static constexpr OUString sTest = u"\U0001D703"_ustr; // MATHEMATICAL ITALIC SMALL THETA + sal_Int32 nResult = m_xCC->getStringType(sTest, 0, sTest.getLength(), aLocale); + CPPUNIT_ASSERT_EQUAL(sal_Int32(228), nResult); + } + +} + +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testSigma) +{ + { + // From upper case + OUString sTest(u"ὈΔΥΣΣΕΎΣ"_ustr); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"ὀδυσσεύς"_ustr, sLowerCase); + OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase); + } + + { + // From lower case + OUString sTest(u"ὀδυσσεύς"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Ὀδυσσεύς"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"ὈΔΥΣΣΕΎΣ"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, sUpperCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase); + } + + { + // From title case + OUString sTest(u"Ὀδυσσεύς"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Ὀδυσσεύς"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"ὈΔΥΣΣΕΎΣ"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"ὀδυσσεύς"_ustr, sLowerCase); + } +} + +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testTdf96343) +{ + { + // From upper case + OUString sTest(u"ꙊꙌꙖ"_ustr); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"ꙋꙍꙗ"_ustr, sLowerCase); + OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase); + } + + { + // From lower case + OUString sTest(u"ꙋꙍꙗ"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Ꙋꙍꙗ"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"ꙊꙌꙖ"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, sUpperCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase); + } + + { + // From title case + OUString sTest(u"Ꙋꙍꙗ"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Ꙋꙍꙗ"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"ꙊꙌꙖ"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"ꙋꙍꙗ"_ustr, sLowerCase); + } +} + +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testTdf134766) +{ + { + // From upper case + OUString sTest(u"QꞋORBꞋAL"_ustr); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"qꞌorbꞌal"_ustr, sLowerCase); + OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase); + } + + { + // From lower case + OUString sTest(u"qꞌorbꞌal"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Qꞌorbꞌal"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"QꞋORBꞋAL"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, sUpperCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase); + } + + { + // From title case + OUString sTest(u"Qꞌorbꞌal"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Qꞌorbꞌal"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"QꞋORBꞋAL"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"qꞌorbꞌal"_ustr, sLowerCase); + } +} + +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testTdf97152) +{ + { + // From upper case + OUString sTest(u"ͲͰϽϾϿͿϏϹ"_ustr); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"ͳͱͻͼͽϳϗϲ"_ustr, sLowerCase); + OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase); + } + + { + // From lower case + OUString sTest(u"ͳͱͻͼͽϳϗϲ"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Ͳͱͻͼͽϳϗϲ"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"ͲͰϽϾϿͿϏϹ"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, sUpperCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase); + } + + { + // From title case + OUString sTest(u"Ͳͱͻͼͽϳϗϲ"_ustr); + OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", u"Ͳͱͻͼͽϳϗϲ"_ustr, sTitleCase); + OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", u"ͲͰϽϾϿͿϏϹ"_ustr, sUpperCase); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"ͳͱͻͼͽϳϗϲ"_ustr, sLowerCase); + } +} + +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testSurrogatePairs) +{ + { + // No case mapping + OUString sTest(u"\U0001F600"_ustr); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"\U0001F600"_ustr, sLowerCase); + OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase); + } + + { + // Case mapping + OUString sTest(u"\U00010400"_ustr); + OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", u"\U00010428"_ustr, sLowerCase); + OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase); + } +} + +CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testAdlam) +{ + OUString sUpper(u"𞤀𞤁𞤂𞤃𞤄𞤅𞤆𞤇𞤈𞤉𞤊𞤋𞤌𞤍𞤎𞤏𞤐𞤑𞤒𞤓𞤔𞤕𞤖𞤗𞤘𞤙𞤚𞤛𞤜𞤝𞤞𞤟𞤠𞤡"_ustr); + OUString sLower(u"𞤢𞤣𞤤𞤥𞤦𞤧𞤨𞤩𞤪𞤫𞤬𞤭𞤮𞤯𞤰𞤱𞤲𞤳𞤴𞤵𞤶𞤷𞤸𞤹𞤺𞤻𞤼𞤽𞤾𞤿𞥀𞥁𞥂𞥃"_ustr); + OUString sTitle = sLower; // Adlam doesn’t have title case? + { + // From upper case + OUString sLowerRes = m_xCC->toLower(sUpper, 0, sUpper.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sLower, sLowerRes); + OUString sUpperRes = m_xCC->toUpper(sLowerRes, 0, sLower.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sUpper, sUpperRes); + } + + { + // From lower case + OUString sTitleRes = m_xCC->toTitle(sLower, 0, sLower.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", sTitle, sTitleRes); + OUString sUpperRes = m_xCC->toUpper(sLower, 0, sLower.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sUpper, sUpperRes); + OUString sLowerRes = m_xCC->toLower(sUpperRes, 0, sUpperRes.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sLower, sLowerRes); + } + + { + // From title case + OUString sTitleRes = m_xCC->toTitle(sTitle, 0, sTitle.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", sTitle, sTitleRes); + OUString sUpperRes = m_xCC->toUpper(sTitle, 0, sTitle.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sUpper, sUpperRes); + OUString sLowerRes = m_xCC->toLower(sTitle, 0, sTitle.getLength(), {}); + CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sLower, sLowerRes); + } +} + +void TestCharacterClassification::setUp() +{ + BootstrapFixtureBase::setUp(); + m_xCC.set(m_xSFactory->createInstance("com.sun.star.i18n.CharacterClassification"), uno::UNO_QUERY_THROW); +} + +void TestCharacterClassification::tearDown() +{ + BootstrapFixtureBase::tearDown(); + m_xCC.clear(); +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/qa/cppunit/test_defaultnumberingprovider.cxx b/i18npool/qa/cppunit/test_defaultnumberingprovider.cxx new file mode 100644 index 0000000000..7e37f1c28f --- /dev/null +++ b/i18npool/qa/cppunit/test_defaultnumberingprovider.cxx @@ -0,0 +1,470 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include + +#include +#include +#include +#include + +#include + +#include + +using namespace ::com::sun::star; + +/// i18npool defaultnumberingprovider tests. +class I18npoolDefaultnumberingproviderTest : public test::BootstrapFixture +{ +}; + +CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testNumberingIdentifiers) +{ + // All numbering identifiers must be unique. + std::unordered_map aMap; + std::vector aFail; + + uno::Reference xFormatter( + text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY); + + // Do not use getSupportedNumberingTypes() because it depends on + // configuration whether CTL and CJK numberings are included or not. + // Also do not test for known values of + // offapi/com/sun/star/style/NumberingType.idl and miss newly added values. + // Instead, enumerate until an empty ID is returned but also check there + // are at least the known NumberingType values covered, just in case the + // table wasn't maintained. So this may have to be adapted from time to + // time. + constexpr sal_Int16 kLastKnown = css::style::NumberingType::NUMBER_LEGAL_KO; + for (sal_Int16 i = 0; i < SAL_MAX_INT16; ++i) + { + OUString aID(xFormatter->getNumberingIdentifier(i)); + if (aID.isEmpty() && i > kLastKnown) + break; // for + + switch (i) + { + case css::style::NumberingType::TRANSLITERATION: + // TODO: why does this have no identifier? + case css::style::NumberingType::NUMBER_UPPER_KO: + // FIXME: duplicate of NUMBER_UPPER_ZH_TW + case css::style::NumberingType::NUMBER_INDIC_DEVANAGARI: + // FIXME: duplicate of NUMBER_EAST_ARABIC_INDIC + break; + default: + if (aID.isEmpty() || !aMap.insert(std::pair(aID, i)).second) + { + aFail.emplace_back( + "Numbering: " + OString::number(i) + " \"" + aID.toUtf8() + "\"" + + (aID.isEmpty() ? ""_ostr + : OString(" duplicate of " + OString::number(aMap[aID]))) + + "\n"); + } + } + } + + if (!aFail.empty()) + { + OString aMsg("Not unique numbering identifiers:\n"_ostr); + for (auto const& r : aFail) + aMsg += r; + CPPUNIT_ASSERT_MESSAGE(aMsg.getStr(), false); + } +} + +CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testArabicZero) +{ + // 1 -> "01" + uno::Reference xFormatter( + text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY); + uno::Sequence aProperties = { + comphelper::makePropertyValue("NumberingType", + static_cast(style::NumberingType::ARABIC_ZERO)), + comphelper::makePropertyValue("Value", static_cast(1)), + }; + lang::Locale aLocale; + OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale); + // Without the accompanying fix in place, this test would have failed with a + // lang.IllegalArgumentException, support for ARABIC_ZERO was missing. + CPPUNIT_ASSERT_EQUAL(OUString("01"), aActual); + + // 10 -> "10" + aProperties = { + comphelper::makePropertyValue("NumberingType", + static_cast(style::NumberingType::ARABIC_ZERO)), + comphelper::makePropertyValue("Value", static_cast(10)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(OUString("10"), aActual); +} + +CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testArabicZero3) +{ + // 10 -> "010" + uno::Reference xFormatter( + text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY); + uno::Sequence aProperties = { + comphelper::makePropertyValue("NumberingType", + static_cast(style::NumberingType::ARABIC_ZERO3)), + comphelper::makePropertyValue("Value", static_cast(10)), + }; + lang::Locale aLocale; + OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale); + // Without the accompanying fix in place, this test would have failed with a + // lang.IllegalArgumentException, support for ARABIC_ZERO3 was missing. + CPPUNIT_ASSERT_EQUAL(OUString("010"), aActual); + + // 100 -> "100" + aProperties = { + comphelper::makePropertyValue("NumberingType", + static_cast(style::NumberingType::ARABIC_ZERO3)), + comphelper::makePropertyValue("Value", static_cast(100)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(OUString("100"), aActual); +} + +CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testArabicZero4) +{ + // 100 -> "0100" + uno::Reference xFormatter( + text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY); + uno::Sequence aProperties = { + comphelper::makePropertyValue("NumberingType", + static_cast(style::NumberingType::ARABIC_ZERO4)), + comphelper::makePropertyValue("Value", static_cast(100)), + }; + lang::Locale aLocale; + OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale); + // Without the accompanying fix in place, this test would have failed with a + // lang.IllegalArgumentException, support for ARABIC_ZERO4 was missing. + CPPUNIT_ASSERT_EQUAL(OUString("0100"), aActual); + + // 1000 -> "1000" + aProperties = { + comphelper::makePropertyValue("NumberingType", + static_cast(style::NumberingType::ARABIC_ZERO4)), + comphelper::makePropertyValue("Value", static_cast(1000)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(OUString("1000"), aActual); +} + +CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testArabicZero5) +{ + // 1000 -> "01000" + uno::Reference xFormatter( + text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY); + uno::Sequence aProperties = { + comphelper::makePropertyValue("NumberingType", + static_cast(style::NumberingType::ARABIC_ZERO5)), + comphelper::makePropertyValue("Value", static_cast(1000)), + }; + lang::Locale aLocale; + OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale); + // Without the accompanying fix in place, this test would have failed with a + // lang.IllegalArgumentException, support for ARABIC_ZERO5 was missing. + CPPUNIT_ASSERT_EQUAL(OUString("01000"), aActual); + + // 10000 -> "10000" + aProperties = { + comphelper::makePropertyValue("NumberingType", + static_cast(style::NumberingType::ARABIC_ZERO5)), + comphelper::makePropertyValue("Value", static_cast(10000)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(OUString("10000"), aActual); +} + +CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanCounting) +{ + // 1 -> "일" + uno::Reference xFormatter( + text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY); + uno::Sequence aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_HANGUL_KO)), + comphelper::makePropertyValue("Value", static_cast(1)), + }; + lang::Locale aLocale; + OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale); + // Without the accompanying fix in place, this test would have failed with a + // lang.IllegalArgumentException, support for NUMBER_HANGUL_KO was missing. + CPPUNIT_ASSERT_EQUAL(u"\uc77c"_ustr, aActual); + + // 10 -> "십" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_HANGUL_KO)), + comphelper::makePropertyValue("Value", static_cast(10)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc2ed"_ustr, aActual); + + // 100 -> "백" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_HANGUL_KO)), + comphelper::makePropertyValue("Value", static_cast(100)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\ubc31"_ustr, aActual); +} + +CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanLegal) +{ + // 1 -> "하나" + uno::Reference xFormatter( + text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY); + uno::Sequence aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(1)), + }; + lang::Locale aLocale; + OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale); + // Without the accompanying fix in place, this test would have failed with a + // lang.IllegalArgumentException, support for NUMBER_LEGAL_KO was missing. + CPPUNIT_ASSERT_EQUAL(u"\ud558\ub098"_ustr, aActual); + + // 2 -> "둘" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(2)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\ub458"_ustr, aActual); + + // 3 -> "셋" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(3)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc14b"_ustr, aActual); + + // 4 -> "넷" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(4)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\ub137"_ustr, aActual); + + // 5 -> "다섯" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(5)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\ub2e4\uc12f"_ustr, aActual); + // 6 -> "여섯 + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(6)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc5ec\uc12f"_ustr, aActual); + // 7 -> "일곱" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(7)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc77c\uacf1"_ustr, aActual); + + // 8 -> "여덟" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(8)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc5ec\ub35f"_ustr, aActual); + + // 9 -> "아홉" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(9)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc544\ud649"_ustr, aActual); + + // 10 -> "열" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(10)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc5f4"_ustr, aActual); + + // 21 -> "스물하나" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(21)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc2a4\ubb3c\ud558\ub098"_ustr, aActual); + + // 32 -> "서른둘" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(32)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc11c\ub978\ub458"_ustr, aActual); + + // 43 -> "마흔셋" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(43)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\ub9c8\ud754\uc14b"_ustr, aActual); + + // 54 -> "쉰넷" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(54)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc270\ub137"_ustr, aActual); + + // 65 -> "예순다섯" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(65)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc608\uc21c\ub2e4\uc12f"_ustr, aActual); + + // 76 -> "일흔여섯" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(76)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc77c\ud754\uc5ec\uc12f"_ustr, aActual); + + // 87 -> "여든일곱" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(87)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc5ec\ub4e0\uc77c\uacf1"_ustr, aActual); + + // 98 -> "아흔여덟" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(98)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc544\ud754\uc5ec\ub35f"_ustr, aActual); + + // 99 -> "아흔아홉" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_LEGAL_KO)), + comphelper::makePropertyValue("Value", static_cast(99)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc544\ud754\uc544\ud649"_ustr, aActual); +} + +CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanDigital) +{ + // 1 -> "일" + uno::Reference xFormatter( + text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY); + uno::Sequence aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_DIGITAL_KO)), + comphelper::makePropertyValue("Value", static_cast(1)), + }; + lang::Locale aLocale; + OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale); + // Without the accompanying fix in place, this test would have failed with a + // lang.IllegalArgumentException, support for NUMBER_DIGITAL_KO was missing. + CPPUNIT_ASSERT_EQUAL(u"\uc77c"_ustr, aActual); + + // 10 -> "일영" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_DIGITAL_KO)), + comphelper::makePropertyValue("Value", static_cast(10)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc77c\uc601"_ustr, aActual); + + // 100 -> "일영영" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_DIGITAL_KO)), + comphelper::makePropertyValue("Value", static_cast(100)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\uc77c\uc601\uc601"_ustr, aActual); +} + +CPPUNIT_TEST_FIXTURE(I18npoolDefaultnumberingproviderTest, testKoreanDigital2) +{ + // 1 -> "一" + uno::Reference xFormatter( + text::DefaultNumberingProvider::create(mxComponentContext), uno::UNO_QUERY); + uno::Sequence aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_DIGITAL2_KO)), + comphelper::makePropertyValue("Value", static_cast(1)), + }; + lang::Locale aLocale; + OUString aActual = xFormatter->makeNumberingString(aProperties, aLocale); + // Without the accompanying fix in place, this test would have failed with a + // lang.IllegalArgumentException, support for NUMBER_DIGITAL2_KO was missing. + CPPUNIT_ASSERT_EQUAL(u"\u4e00"_ustr, aActual); + + // 10 -> "一零" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_DIGITAL2_KO)), + comphelper::makePropertyValue("Value", static_cast(10)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\u4e00\u96f6"_ustr, aActual); + + // 100 -> "一零零" + aProperties = { + comphelper::makePropertyValue( + "NumberingType", static_cast(style::NumberingType::NUMBER_DIGITAL2_KO)), + comphelper::makePropertyValue("Value", static_cast(100)), + }; + aActual = xFormatter->makeNumberingString(aProperties, aLocale); + CPPUNIT_ASSERT_EQUAL(u"\u4e00\u96f6\u96f6"_ustr, aActual); +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/qa/cppunit/test_ordinalsuffix.cxx b/i18npool/qa/cppunit/test_ordinalsuffix.cxx new file mode 100644 index 0000000000..be21f38ca5 --- /dev/null +++ b/i18npool/qa/cppunit/test_ordinalsuffix.cxx @@ -0,0 +1,86 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ +#include +#include +#include +#include + +using namespace com::sun::star; + +class TestOrdinalSuffix : public test::BootstrapFixtureBase +{ +private: + uno::Reference m_xOrdinal; + +public: + virtual void setUp() override; + virtual void tearDown() override; + + void testFrench(); + void testEnglish(); + + CPPUNIT_TEST_SUITE(TestOrdinalSuffix); + CPPUNIT_TEST(testFrench); + CPPUNIT_TEST(testEnglish); + CPPUNIT_TEST_SUITE_END(); +}; + +void TestOrdinalSuffix::setUp() +{ + BootstrapFixtureBase::setUp(); + m_xOrdinal.set(m_xSFactory->createInstance("com.sun.star.i18n.OrdinalSuffix"), uno::UNO_QUERY_THROW); +} + +void TestOrdinalSuffix::tearDown() +{ + m_xOrdinal.clear(); + BootstrapFixtureBase::tearDown(); +} + +void TestOrdinalSuffix::testFrench() +{ + lang::Locale aLocale("fr", "LU", ""); + + //1er + uno::Sequence aSuffixes = m_xOrdinal->getOrdinalSuffix(1, aLocale); + const OUString* pFind = std::find(std::cbegin(aSuffixes), std::cend(aSuffixes), OUString("er")); + CPPUNIT_ASSERT(pFind != std::cend(aSuffixes)); + + //2e, 3e, etc. + aSuffixes = m_xOrdinal->getOrdinalSuffix(2, aLocale); + pFind = std::find(std::cbegin(aSuffixes), std::cend(aSuffixes), OUString("e")); + CPPUNIT_ASSERT(pFind != std::cend(aSuffixes)); +} + +void TestOrdinalSuffix::testEnglish() +{ + lang::Locale aLocale("en", "US", ""); + + //1st + uno::Sequence aSuffixes = m_xOrdinal->getOrdinalSuffix(1, aLocale); + const OUString* pFind = std::find(std::cbegin(aSuffixes), std::cend(aSuffixes), OUString("st")); + CPPUNIT_ASSERT(pFind != std::cend(aSuffixes)); + + //2nd + aSuffixes = m_xOrdinal->getOrdinalSuffix(2, aLocale); + pFind = std::find(std::cbegin(aSuffixes), std::cend(aSuffixes), OUString("nd")); + CPPUNIT_ASSERT(pFind != std::cend(aSuffixes)); + + //3rd + aSuffixes = m_xOrdinal->getOrdinalSuffix(3, aLocale); + pFind = std::find(std::cbegin(aSuffixes), std::cend(aSuffixes), OUString("rd")); + CPPUNIT_ASSERT(pFind != std::cend(aSuffixes)); +} + + +CPPUNIT_TEST_SUITE_REGISTRATION( TestOrdinalSuffix ); + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/qa/cppunit/test_textsearch.cxx b/i18npool/qa/cppunit/test_textsearch.cxx new file mode 100644 index 0000000000..2efb2b9cdc --- /dev/null +++ b/i18npool/qa/cppunit/test_textsearch.cxx @@ -0,0 +1,544 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include +#include +#include +#include +#include +#include + +#include + +using namespace ::com::sun::star; + +class TestTextSearch : public test::BootstrapFixtureBase +{ +public: + virtual void setUp() override; + virtual void tearDown() override; + + void testICU(); + void testSearches(); + void testWildcardSearch(); + void testApostropheSearch(); + void testTdf138410(); + + CPPUNIT_TEST_SUITE(TestTextSearch); + CPPUNIT_TEST(testICU); + CPPUNIT_TEST(testSearches); + CPPUNIT_TEST(testWildcardSearch); + CPPUNIT_TEST(testApostropheSearch); + CPPUNIT_TEST(testTdf138410); + CPPUNIT_TEST_SUITE_END(); +private: + uno::Reference m_xSearch; + uno::Reference m_xSearch2; +}; + +// Sanity check our ICU first ... +void TestTextSearch::testICU() +{ + UErrorCode nErr = U_ZERO_ERROR; + sal_uInt32 nSearchFlags = UREGEX_UWORD | UREGEX_CASE_INSENSITIVE; + + OUString aString( "abcdefgh" ); + OUString aPattern( "e" ); + icu::UnicodeString aSearchPat( reinterpret_cast(aPattern.getStr()), aPattern.getLength() ); + + std::unique_ptr pRegexMatcher(new icu::RegexMatcher( aSearchPat, nSearchFlags, nErr )); + + icu::UnicodeString aSource( reinterpret_cast(aString.getStr()), aString.getLength() ); + pRegexMatcher->reset( aSource ); + + CPPUNIT_ASSERT( pRegexMatcher->find( 0, nErr ) ); + CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr ); + CPPUNIT_ASSERT_EQUAL( static_cast(4), pRegexMatcher->start( nErr ) ); + CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr ); + CPPUNIT_ASSERT_EQUAL( static_cast(5), pRegexMatcher->end( nErr ) ); + CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr ); + + OUString aString2( "acababaabcababadcdaa" ); + OUString aPattern2( "a" ); + + icu::UnicodeString aSearchPat2( reinterpret_cast(aPattern2.getStr()), aPattern2.getLength() ); + pRegexMatcher.reset(new icu::RegexMatcher( aSearchPat2, nSearchFlags, nErr )); + + icu::UnicodeString aSource2( reinterpret_cast(aString2.getStr()), aString2.getLength() ); + pRegexMatcher->reset( aSource2 ); + + CPPUNIT_ASSERT( pRegexMatcher->find( 0, nErr ) ); + CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr ); + CPPUNIT_ASSERT_EQUAL( static_cast(0), pRegexMatcher->start( nErr ) ); + CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr ); + CPPUNIT_ASSERT_EQUAL( static_cast(1), pRegexMatcher->end( nErr ) ); + CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr ); +} + +void TestTextSearch::testSearches() +{ + OUString str( "acababaabcababadcdaa" ); + sal_Int32 startPos = 2, endPos = 20 ; + sal_Int32 const fStartRes = 10, fEndRes = 18 ; + sal_Int32 const bStartRes = 18, bEndRes = 10 ; + + // set options + util::SearchOptions aOptions; + aOptions.algorithmType = util::SearchAlgorithms_REGEXP ; + aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE; + aOptions.searchString = "(ab)*a(c|d)+"; + m_xSearch->setOptions( aOptions ); + + util::SearchResult aRes; + + // search forward + aRes = m_xSearch->searchForward( str, startPos, endPos ); + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( fStartRes, aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( fEndRes, aRes.endOffset[0] ); + + // search backwards + aRes = m_xSearch->searchBackward( str, endPos, startPos ); + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( bStartRes, aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( bEndRes, aRes.endOffset[0] ); + + aOptions.transliterateFlags = static_cast(TransliterationFlags::IGNORE_CASE + | TransliterationFlags::IGNORE_WIDTH); + aOptions.searchString = "([^ ]*)[ ]*([^ ]*)"; + m_xSearch->setOptions(aOptions); + aRes = m_xSearch->searchForward("11 22 33", 2, 7); + CPPUNIT_ASSERT_EQUAL(static_cast(3), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.startOffset[1]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[1]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.startOffset[2]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[2]); +} + +void TestTextSearch::testWildcardSearch() +{ + util::SearchOptions2 aOptions; + OUString aText; + util::SearchResult aRes; + + aOptions.AlgorithmType2 = util::SearchAlgorithms2::WILDCARD ; + aOptions.WildcardEscapeCharacter = '~'; + // aOptions.searchFlag = ::css::util::SearchFlags::WILD_MATCH_SELECTION; + // is not set, so substring match is allowed. + aOptions.transliterateFlags = sal_Int32(::css::i18n::TransliterationModules::TransliterationModules_IGNORE_CASE); + aText = "abAca"; + + aOptions.searchString = "a"; + m_xSearch2->setOptions2( aOptions ); + // match first "a", [0,1) + aRes = m_xSearch2->searchForward( aText, 0, aText.getLength()); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]); + // match last "a", (5,4] + aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]); + + aOptions.searchString = "a?"; + m_xSearch2->setOptions2( aOptions ); + // match "ab", [0,2) + aRes = m_xSearch2->searchForward( aText, 0, aText.getLength()); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[0]); + // match "ac", (4,2] + aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[0]); + + aOptions.searchString = "a*c"; + m_xSearch2->setOptions2( aOptions ); + // match "abac", [0,4) XXX NOTE: first match forward + aRes = m_xSearch2->searchForward( aText, 0, aText.getLength()); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]); + // match "ac", (4,2] XXX NOTE: first match backward, not greedy + aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[0]); + + aOptions.searchString = "b*a"; + m_xSearch2->setOptions2( aOptions ); + // match "ba", [1,3) XXX NOTE: first match forward, not greedy + aRes = m_xSearch2->searchForward( aText, 0, aText.getLength()); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.endOffset[0]); + // match "baca", (5,1] XXX NOTE: first match backward + aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]); + + aText = "ab?ca"; + + aOptions.searchString = "?~??"; + m_xSearch2->setOptions2( aOptions ); + // match "b?c", [1,4) + aRes = m_xSearch2->searchForward( aText, 0, aText.getLength()); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]); + // match "b?c", (4,1] + aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]); + + aText = "ab*ca"; + + aOptions.searchString = "?~*?"; + m_xSearch2->setOptions2( aOptions ); + // match "b?c", [1,4) + aRes = m_xSearch2->searchForward( aText, 0, aText.getLength()); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]); + // match "b?c", (4,1] + aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]); + + aOptions.searchString = "ca?"; + m_xSearch2->setOptions2( aOptions ); + // no match + aRes = m_xSearch2->searchForward( aText, 0, aText.getLength()); + CPPUNIT_ASSERT_EQUAL(static_cast(0), aRes.subRegExpressions); + // no match + aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0); + CPPUNIT_ASSERT_EQUAL(static_cast(0), aRes.subRegExpressions); + + aOptions.searchString = "ca*"; + m_xSearch2->setOptions2( aOptions ); + // match "ca", [3,5) + aRes = m_xSearch2->searchForward( aText, 0, aText.getLength()); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[0]); + // match "ca", (5,3] + aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.endOffset[0]); + + aOptions.searchString = "*ca*"; + m_xSearch2->setOptions2( aOptions ); + // match "abaca", [0,5) + aRes = m_xSearch2->searchForward( aText, 0, aText.getLength()); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[0]); + // match "abaca", (5,0] + aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.endOffset[0]); + + aText = "123123"; + aOptions.searchString = "*2?"; + m_xSearch2->setOptions2( aOptions ); + // match first "123", [0,3) + aRes = m_xSearch2->searchForward( aText, 0, aText.getLength()); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.endOffset[0]); + // match "123123", (6,0] Yes this looks odd, but it is as searching "?2*" forward. + aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.endOffset[0]); + + aOptions.searchFlag |= util::SearchFlags::WILD_MATCH_SELECTION; + m_xSearch2->setOptions2( aOptions ); + // match "123123", [0,6) with greedy '*' + aRes = m_xSearch2->searchForward( aText, 0, aText.getLength()); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes.endOffset[0]); + // match "123123", (6,0] + aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.subRegExpressions); + CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.endOffset[0]); +} + +void TestTextSearch::testApostropheSearch() +{ + // A) find typographic apostrophes also by using ASCII apostrophe in searchString + OUString str( u"It\u2019s an apostrophe."_ustr ); + sal_Int32 startPos = 0, endPos = str.getLength(); + + // set options + util::SearchOptions aOptions; + aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE; + aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE; + aOptions.searchString = "'"; + m_xSearch->setOptions( aOptions ); + + util::SearchResult aRes; + + // search forward + aRes = m_xSearch->searchForward( str, startPos, endPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast(2), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast(3), aRes.endOffset[0] ); + + // search backwards + aRes = m_xSearch->searchBackward( str, endPos, startPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast(3), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast(2), aRes.endOffset[0] ); + + // check with transliteration + aOptions.transliterateFlags = static_cast(TransliterationFlags::IGNORE_CASE + | TransliterationFlags::IGNORE_WIDTH); + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward( str, startPos, endPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast(2), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast(3), aRes.endOffset[0] ); + + // search backwards + aRes = m_xSearch->searchBackward( str, endPos, startPos ); + // This was 0. + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast(3), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast(2), aRes.endOffset[0] ); + + // B) search ASCII apostrophe in a text with ASCII apostrophes + str = str.replace(u'\u2019', '\''); + + // search forward + aRes = m_xSearch->searchForward( str, startPos, endPos ); + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast(2), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast(3), aRes.endOffset[0] ); + + // search backwards + aRes = m_xSearch->searchBackward( str, endPos, startPos ); + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast(3), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast(2), aRes.endOffset[0] ); + + // C) search typographic apostrophe in a text with ASCII apostrophes (no result) + aOptions.searchString = u"\u2019"_ustr; + m_xSearch->setOptions( aOptions ); + + aRes = m_xSearch->searchForward( str, startPos, endPos ); + CPPUNIT_ASSERT_EQUAL(static_cast(0), aRes.subRegExpressions); + + aRes = m_xSearch->searchBackward( str, endPos, startPos ); + CPPUNIT_ASSERT_EQUAL(static_cast(0), aRes.subRegExpressions); + + // D) search typographic apostrophe in a text with typographic apostrophes + str = str.replace('\'', u'\u2019'); + + // search forward + aRes = m_xSearch->searchForward( str, startPos, endPos ); + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast(2), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast(3), aRes.endOffset[0] ); + + // search backwards + aRes = m_xSearch->searchBackward( str, endPos, startPos ); + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + CPPUNIT_ASSERT_EQUAL( static_cast(3), aRes.startOffset[0] ); + CPPUNIT_ASSERT_EQUAL( static_cast(2), aRes.endOffset[0] ); + + // E) search mixed apostrophes in a text with mixed apostrophes: + aOptions.searchString = u"'\u2019"_ustr; + m_xSearch->setOptions( aOptions ); + str = u"test: \u2019'"_ustr; + + // search forward + aRes = m_xSearch->searchForward( str, startPos, str.getLength()); + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + + // search backwards + aRes = m_xSearch->searchBackward( str, str.getLength(), startPos ); + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + + // F) search mixed apostrophes in a text with ASCII apostrophes: + str = u"test: ''"_ustr; + + // search forward + aRes = m_xSearch->searchForward( str, startPos, str.getLength()); + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); + + // search backwards + aRes = m_xSearch->searchBackward( str, str.getLength(), startPos ); + CPPUNIT_ASSERT( aRes.subRegExpressions > 0 ); +} + +void TestTextSearch::testTdf138410() +{ + OUString str(u"\u0643\u064f\u062a\u064f\u0628 \u0643\u062a\u0628"_ustr); + sal_Int32 startPos = 0, endPos = str.getLength(); + + util::SearchOptions aOptions; + aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE; + + util::SearchResult aRes; + + // A) base alone + // The search string will be found whether it is followed by a mark in the + // text or not, and whether IGNORE_DIACRITICS_CTL is set or not. + + // set options + aOptions.searchString = u"\u0643"_ustr; + aOptions.transliterateFlags = 0; + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast(7), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast(6), aRes.endOffset[0]); + + // check with transliteration + aOptions.transliterateFlags = static_cast(TransliterationFlags::IGNORE_DIACRITICS_CTL); + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast(7), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast(6), aRes.endOffset[0]); + + // b) base+mark + // The search string will be found when followed by a mark in the text, or + // when IGNORE_DIACRITICS_CTL is set whether it is followed by a mark or + // not. + + // set options + aOptions.searchString = u"\u0643\u064f"_ustr; + aOptions.transliterateFlags = 0; + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast(2), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast(2), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast(0), aRes.endOffset[0]); + + // check with transliteration + aOptions.transliterateFlags = static_cast(TransliterationFlags::IGNORE_DIACRITICS_CTL); + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast(0), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast(7), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast(6), aRes.endOffset[0]); + + // b) mark alone + // The search string will be found only when IGNORE_DIACRITICS_CTL is not + // set. + + // set options + aOptions.searchString = u"\u064f"_ustr; + aOptions.transliterateFlags = 0; + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast(1), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast(2), aRes.endOffset[0]); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT(aRes.subRegExpressions > 0); + CPPUNIT_ASSERT_EQUAL(static_cast(4), aRes.startOffset[0]); + CPPUNIT_ASSERT_EQUAL(static_cast(3), aRes.endOffset[0]); + + // with ignore marks the mark will not be found + aOptions.transliterateFlags = static_cast(TransliterationFlags::IGNORE_DIACRITICS_CTL); + m_xSearch->setOptions(aOptions); + + // search forward + aRes = m_xSearch->searchForward(str, startPos, endPos); + CPPUNIT_ASSERT_EQUAL(static_cast(0), aRes.subRegExpressions); + + // search backwards + aRes = m_xSearch->searchBackward(str, endPos, startPos); + CPPUNIT_ASSERT_EQUAL(static_cast(0), aRes.subRegExpressions); +} + +void TestTextSearch::setUp() +{ + BootstrapFixtureBase::setUp(); + m_xSearch.set(m_xSFactory->createInstance("com.sun.star.util.TextSearch"), uno::UNO_QUERY_THROW); + m_xSearch2.set(m_xSFactory->createInstance("com.sun.star.util.TextSearch2"), uno::UNO_QUERY_THROW); +} + +void TestTextSearch::tearDown() +{ + m_xSearch.clear(); + m_xSearch2.clear(); + BootstrapFixtureBase::tearDown(); +} + +CPPUNIT_TEST_SUITE_REGISTRATION(TestTextSearch); + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/qa/cppunit/transliteration.cxx b/i18npool/qa/cppunit/transliteration.cxx new file mode 100644 index 0000000000..cc8eccfbb5 --- /dev/null +++ b/i18npool/qa/cppunit/transliteration.cxx @@ -0,0 +1,192 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace +{ +class Transliteration : public CppUnit::TestFixture +{ +public: + void setUp() + { + transliteration_ + = css::i18n::Transliteration::create(cppu::defaultBootstrap_InitialComponentContext()); + } + + void testLoadModuleNew() + { + // Verify that loading succeeds without throwing an exception, for each possible + // TransliterationModulesNew value (TODO: there is an upper limit of maxCascade 27 in + // i18npool/inc/transliterationImpl.hxx for the length of the passed + // TransliterationModulesNew value, so pass each one individually rather than all 65 at + // once): + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_UPPERCASE_LOWERCASE }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_LOWERCASE_UPPERCASE }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_HALFWIDTH_FULLWIDTH }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_FULLWIDTH_HALFWIDTH }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_KATAKANA_HIRAGANA }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_HIRAGANA_KATAKANA }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IGNORE_CASE }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IGNORE_KANA }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IGNORE_WIDTH }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreTraditionalKanji_ja_JP }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreTraditionalKana_ja_JP }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreMinusSign_ja_JP }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreIterationMark_ja_JP }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreSeparator_ja_JP }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreZiZu_ja_JP }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreBaFa_ja_JP }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreTiJi_ja_JP }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreHyuByu_ja_JP }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreSeZe_ja_JP }, + {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreIandEfollowedByYa_ja_JP }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreKiKuFollowedBySa_ja_JP }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreSize_ja_JP }, + {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreProlongedSoundMark_ja_JP }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_IgnoreMiddleDot_ja_JP }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_IgnoreSpace_ja_JP }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_SmallToLarge_ja_JP }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_LargeToSmall_ja_JP }, + {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextLower_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextUpper_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextLower_zh_TW }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextUpper_zh_TW }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextFormalHangul_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextFormalLower_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextFormalUpper_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextInformalHangul_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextInformalLower_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToTextInformalUpper_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToCharLower_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToCharUpper_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToCharLower_zh_TW }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToCharUpper_zh_TW }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharHangul_ko }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharLower_ko }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharUpper_ko }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_NumToCharFullwidth }, + {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_NumToCharKanjiShort_ja_JP }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumLower_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumUpper_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumLower_zh_TW }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumUpper_zh_TW }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumFormalHangul_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumFormalLower_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumFormalUpper_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumInformalHangul_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumInformalLower_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_TextToNumInformalUpper_ko }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_CharToNumLower_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_CharToNumUpper_zh_CN }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_CharToNumLower_zh_TW }, {}); + transliteration_->loadModuleNew( + { css::i18n::TransliterationModulesNew_CharToNumUpper_zh_TW }, {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_CharToNumHangul_ko }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_CharToNumLower_ko }, + {}); + transliteration_->loadModuleNew({ css::i18n::TransliterationModulesNew_CharToNumUpper_ko }, + {}); + } + + void testTextToChuyin_zh_TW() + { + // Verify that loading succeeds: + transliteration_->loadModuleByImplName("TextToChuyin_zh_TW", {}); + } + + void testTextToPinyin_zh_CN() + { + // Verify that loading succeeds: + transliteration_->loadModuleByImplName("TextToPinyin_zh_CN", {}); + } + + CPPUNIT_TEST_SUITE(Transliteration); + CPPUNIT_TEST(testLoadModuleNew); + CPPUNIT_TEST(testTextToChuyin_zh_TW); + CPPUNIT_TEST(testTextToPinyin_zh_CN); + CPPUNIT_TEST_SUITE_END(); + +private: + css::uno::Reference transliteration_; +}; + +CPPUNIT_TEST_SUITE_REGISTRATION(Transliteration); +} + +CPPUNIT_PLUGIN_IMPLEMENT(); + +/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */ -- cgit v1.2.3