summaryrefslogtreecommitdiffstats
path: root/i18npool/source/transliteration
diff options
context:
space:
mode:
Diffstat (limited to 'i18npool/source/transliteration')
-rw-r--r--i18npool/source/transliteration/chartonum.cxx53
-rw-r--r--i18npool/source/transliteration/fullwidthToHalfwidth.cxx127
-rw-r--r--i18npool/source/transliteration/halfwidthToFullwidth.cxx87
-rw-r--r--i18npool/source/transliteration/hiraganaToKatakana.cxx48
-rw-r--r--i18npool/source/transliteration/ignoreBaFa_ja_JP.cxx45
-rw-r--r--i18npool/source/transliteration/ignoreDiacritics_CTL.cxx95
-rw-r--r--i18npool/source/transliteration/ignoreHyuByu_ja_JP.cxx46
-rw-r--r--i18npool/source/transliteration/ignoreIandEfollowedByYa_ja_JP.cxx125
-rw-r--r--i18npool/source/transliteration/ignoreIterationMark_ja_JP.cxx139
-rw-r--r--i18npool/source/transliteration/ignoreKana.cxx65
-rw-r--r--i18npool/source/transliteration/ignoreKashida_CTL.cxx32
-rw-r--r--i18npool/source/transliteration/ignoreKiKuFollowedBySa_ja_JP.cxx83
-rw-r--r--i18npool/source/transliteration/ignoreMiddleDot_ja_JP.cxx46
-rw-r--r--i18npool/source/transliteration/ignoreMinusSign_ja_JP.cxx53
-rw-r--r--i18npool/source/transliteration/ignoreProlongedSoundMark_ja_JP.cxx345
-rw-r--r--i18npool/source/transliteration/ignoreSeZe_ja_JP.cxx50
-rw-r--r--i18npool/source/transliteration/ignoreSeparator_ja_JP.cxx112
-rw-r--r--i18npool/source/transliteration/ignoreSize_ja_JP.cxx55
-rw-r--r--i18npool/source/transliteration/ignoreSpace_ja_JP.cxx55
-rw-r--r--i18npool/source/transliteration/ignoreTiJi_ja_JP.cxx54
-rw-r--r--i18npool/source/transliteration/ignoreTraditionalKana_ja_JP.cxx53
-rw-r--r--i18npool/source/transliteration/ignoreTraditionalKanji_ja_JP.cxx735
-rw-r--r--i18npool/source/transliteration/ignoreWidth.cxx65
-rw-r--r--i18npool/source/transliteration/ignoreZiZu_ja_JP.cxx53
-rw-r--r--i18npool/source/transliteration/katakanaToHiragana.cxx45
-rw-r--r--i18npool/source/transliteration/largeToSmall_ja_JP.cxx79
-rw-r--r--i18npool/source/transliteration/numtochar.cxx57
-rw-r--r--i18npool/source/transliteration/numtotext_cjk.cxx85
-rw-r--r--i18npool/source/transliteration/smallToLarge_ja_JP.cxx79
-rw-r--r--i18npool/source/transliteration/textToPronounce_zh.cxx197
-rw-r--r--i18npool/source/transliteration/texttonum.cxx52
-rw-r--r--i18npool/source/transliteration/transliterationImpl.cxx682
-rw-r--r--i18npool/source/transliteration/transliteration_Ignore.cxx196
-rw-r--r--i18npool/source/transliteration/transliteration_Numeric.cxx143
-rw-r--r--i18npool/source/transliteration/transliteration_OneToOne.cxx93
-rw-r--r--i18npool/source/transliteration/transliteration_body.cxx306
-rw-r--r--i18npool/source/transliteration/transliteration_caseignore.cxx146
-rw-r--r--i18npool/source/transliteration/transliteration_commonclass.cxx135
38 files changed, 4916 insertions, 0 deletions
diff --git a/i18npool/source/transliteration/chartonum.cxx b/i18npool/source/transliteration/chartonum.cxx
new file mode 100644
index 0000000000..41a6549b67
--- /dev/null
+++ b/i18npool/source/transliteration/chartonum.cxx
@@ -0,0 +1,53 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <chartonum.hxx>
+
+using namespace com::sun::star::uno;
+
+namespace i18npool {
+
+#define TRANSLITERATION_CHARTONUM( name ) \
+CharToNum##name::CharToNum##name() \
+{ \
+ nNativeNumberMode = 0; \
+ tableSize = 0; \
+ implementationName = "com.sun.star.i18n.Transliteration.CharToNum"#name; \
+}
+
+TRANSLITERATION_CHARTONUM( Fullwidth)
+TRANSLITERATION_CHARTONUM( Lower_zh_CN)
+TRANSLITERATION_CHARTONUM( Lower_zh_TW)
+TRANSLITERATION_CHARTONUM( Upper_zh_CN)
+TRANSLITERATION_CHARTONUM( Upper_zh_TW)
+TRANSLITERATION_CHARTONUM( KanjiShort_ja_JP)
+TRANSLITERATION_CHARTONUM( KanjiTraditional_ja_JP)
+TRANSLITERATION_CHARTONUM( Lower_ko)
+TRANSLITERATION_CHARTONUM( Upper_ko)
+TRANSLITERATION_CHARTONUM( Hangul_ko)
+TRANSLITERATION_CHARTONUM( Indic_ar)
+TRANSLITERATION_CHARTONUM( EastIndic_ar)
+TRANSLITERATION_CHARTONUM( Indic_hi)
+TRANSLITERATION_CHARTONUM( _th)
+
+#undef TRANSLITERATION_CHARTONUM
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/fullwidthToHalfwidth.cxx b/i18npool/source/transliteration/fullwidthToHalfwidth.cxx
new file mode 100644
index 0000000000..fbd2624e14
--- /dev/null
+++ b/i18npool/source/transliteration/fullwidthToHalfwidth.cxx
@@ -0,0 +1,127 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <sal/config.h>
+
+#include <com/sun/star/i18n/MultipleCharsOutputException.hpp>
+#include <i18nutil/widthfolding.hxx>
+
+#include <transliteration_OneToOne.hxx>
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::i18n;
+using namespace com::sun::star::lang;
+
+namespace i18npool {
+
+fullwidthToHalfwidth::fullwidthToHalfwidth()
+{
+ func = nullptr;
+ table = &i18nutil::widthfolding::getfull2halfTable();
+ transliterationName = "fullwidthToHalfwidth";
+ implementationName = "com.sun.star.i18n.Transliteration.FULLWIDTH_HALFWIDTH";
+}
+
+/**
+ * Transliterate fullwidth to halfwidth.
+ * The output is a reference of OUString. You MUST delete this object when you do not need to use it any more
+ * The output string contains a transliterated string only, not whole string.
+ */
+OUString
+fullwidthToHalfwidth::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
+{
+ // Decomposition: GA --> KA + voice-mark
+ const OUString& newStr = i18nutil::widthfolding::decompose_ja_voiced_sound_marks (inStr, startPos, nCount, pOffset);
+
+ // One to One mapping
+ return transliteration_OneToOne::transliterateImpl( newStr, 0, newStr.getLength(), nullptr);
+}
+
+sal_Unicode SAL_CALL
+fullwidthToHalfwidth::transliterateChar2Char( sal_Unicode inChar)
+{
+ sal_Unicode newChar = i18nutil::widthfolding::decompose_ja_voiced_sound_marksChar2Char (inChar);
+ if (newChar == 0xFFFF)
+ throw MultipleCharsOutputException();
+ return transliteration_OneToOne::transliterateChar2Char(inChar);
+}
+
+FULLWIDTHKATAKANA_HALFWIDTHKATAKANA::FULLWIDTHKATAKANA_HALFWIDTHKATAKANA()
+{
+ func = nullptr;
+ table = &i18nutil::widthfolding::getfullKana2halfKanaTable();
+ transliterationName = "fullwidthKatakanaToHalfwidthKatakana";
+ implementationName = "com.sun.star.i18n.Transliteration.FULLWIDTHKATAKANA_HALFWIDTHKATAKANA";
+}
+
+/**
+ * Transliterate fullwidth katakana to halfwidth katakana.
+ */
+OUString
+FULLWIDTHKATAKANA_HALFWIDTHKATAKANA::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
+{
+ // Decomposition: GA --> KA + voice-mark
+ const OUString& newStr = i18nutil::widthfolding::decompose_ja_voiced_sound_marks (inStr, startPos, nCount, pOffset);
+
+ // One to One mapping
+ return transliteration_OneToOne::transliterateImpl( newStr, 0, newStr.getLength(), nullptr);
+}
+
+sal_Unicode SAL_CALL
+FULLWIDTHKATAKANA_HALFWIDTHKATAKANA::transliterateChar2Char( sal_Unicode inChar )
+{
+ sal_Unicode newChar = i18nutil::widthfolding::decompose_ja_voiced_sound_marksChar2Char (inChar);
+ if (newChar == 0xFFFF)
+ throw MultipleCharsOutputException();
+ return transliteration_OneToOne::transliterateChar2Char(inChar);
+}
+
+FULLWIDTH_HALFWIDTH_LIKE_ASC::FULLWIDTH_HALFWIDTH_LIKE_ASC()
+{
+ func = nullptr;
+ table = &i18nutil::widthfolding::getfull2halfTableForASC();
+ transliterationName = "fullwidthToHalfwidthLikeASC";
+ implementationName = "com.sun.star.i18n.Transliteration.FULLWIDTH_HALFWIDTH_LIKE_ASC";
+}
+
+/**
+ * Transliterate fullwidth to halfwidth like Excel's ASC function.
+ */
+OUString
+FULLWIDTH_HALFWIDTH_LIKE_ASC::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
+{
+ // Decomposition: GA --> KA + voice-mark
+ const OUString& newStr = i18nutil::widthfolding::decompose_ja_voiced_sound_marks (inStr, startPos, nCount, pOffset);
+
+ // One to One mapping
+ return transliteration_OneToOne::transliterateImpl( newStr, 0, newStr.getLength(), nullptr);
+}
+
+sal_Unicode SAL_CALL
+FULLWIDTH_HALFWIDTH_LIKE_ASC::transliterateChar2Char( sal_Unicode inChar )
+{
+ sal_Unicode newChar = i18nutil::widthfolding::decompose_ja_voiced_sound_marksChar2Char (inChar);
+ if (newChar == 0xFFFF)
+ throw MultipleCharsOutputException();
+ return transliteration_OneToOne::transliterateChar2Char(inChar);
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/halfwidthToFullwidth.cxx b/i18npool/source/transliteration/halfwidthToFullwidth.cxx
new file mode 100644
index 0000000000..8351291031
--- /dev/null
+++ b/i18npool/source/transliteration/halfwidthToFullwidth.cxx
@@ -0,0 +1,87 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <i18nutil/widthfolding.hxx>
+
+#include <transliteration_OneToOne.hxx>
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::lang;
+
+
+namespace i18npool {
+
+halfwidthToFullwidth::halfwidthToFullwidth()
+{
+ func = nullptr;
+ table = &i18nutil::widthfolding::gethalf2fullTable();
+ transliterationName = "halfwidthToFullwidth";
+ implementationName = "com.sun.star.i18n.Transliteration.HALFWIDTH_FULLWIDTH";
+}
+
+OUString
+halfwidthToFullwidth::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
+{
+ // One to One mapping
+ const OUString& newStr = transliteration_OneToOne::transliterateImpl( inStr, startPos, nCount, nullptr);
+
+ // Composition: KA + voice-mark --> GA
+ return i18nutil::widthfolding::compose_ja_voiced_sound_marks ( newStr, 0, newStr.getLength(), pOffset );
+}
+
+HALFWIDTHKATAKANA_FULLWIDTHKATAKANA::HALFWIDTHKATAKANA_FULLWIDTHKATAKANA()
+{
+ func = nullptr;
+ table = &i18nutil::widthfolding::gethalfKana2fullKanaTable();
+ transliterationName = "halfwidthKatakanaToFullwidthKatakana";
+ implementationName = "com.sun.star.i18n.Transliteration.HALFWIDTHKATAKANA_FULLWIDTHKATAKANA";
+}
+
+OUString
+HALFWIDTHKATAKANA_FULLWIDTHKATAKANA::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
+{
+ // One to One mapping
+ const OUString& newStr = transliteration_OneToOne::transliterateImpl( inStr, startPos, nCount, nullptr);
+
+ // Composition: KA + voice-mark --> GA
+ return i18nutil::widthfolding::compose_ja_voiced_sound_marks ( newStr, 0, newStr.getLength(), pOffset );
+}
+
+HALFWIDTH_FULLWIDTH_LIKE_JIS::HALFWIDTH_FULLWIDTH_LIKE_JIS()
+{
+ func = nullptr;
+ table = &i18nutil::widthfolding::gethalf2fullTableForJIS();
+ transliterationName = "halfwidthToFullwidthLikeJIS";
+ implementationName = "com.sun.star.i18n.Transliteration.HALFWIDTH_FULLWIDTH_LIKE_JIS";
+}
+
+OUString
+HALFWIDTH_FULLWIDTH_LIKE_JIS::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
+{
+ // One to One mapping
+ const OUString& newStr = transliteration_OneToOne::transliterateImpl( inStr, startPos, nCount, nullptr);
+
+ // Composition: KA + voice-mark --> GA
+ return i18nutil::widthfolding::compose_ja_voiced_sound_marks ( newStr, 0, newStr.getLength(), pOffset, WIDTHFOLDING_DONT_USE_COMBINED_VU );
+}
+
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/hiraganaToKatakana.cxx b/i18npool/source/transliteration/hiraganaToKatakana.cxx
new file mode 100644
index 0000000000..80b6f14d82
--- /dev/null
+++ b/i18npool/source/transliteration/hiraganaToKatakana.cxx
@@ -0,0 +1,48 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_OneToOne.hxx>
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::lang;
+
+namespace i18npool
+{
+// see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F)
+// see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF)
+static sal_Unicode toKatakana(const sal_Unicode c)
+{
+ if ((0x3041 <= c && c <= 0x3096) || (0x309d <= c && c <= 0x309f))
+ { // 3040 - 309F HIRAGANA LETTER
+ // shift code point by 0x0060
+ return c + (0x30a0 - 0x3040);
+ }
+ return c;
+}
+
+hiraganaToKatakana::hiraganaToKatakana()
+{
+ func = toKatakana;
+ table = nullptr;
+ transliterationName = "hiraganaToKatakana";
+ implementationName = "com.sun.star.i18n.Transliteration.HIRAGANA_KATAKANA";
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreBaFa_ja_JP.cxx b/i18npool/source/transliteration/ignoreBaFa_ja_JP.cxx
new file mode 100644
index 0000000000..042e09c1df
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreBaFa_ja_JP.cxx
@@ -0,0 +1,45 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_Ignore.hxx>
+
+using namespace com::sun::star::uno;
+
+namespace i18npool {
+
+const transliteration_Ignore::Mapping BaFa[] = {
+ { 0x30F4, 0x30A1, 0x30D0, true },
+ { 0x3094, 0x3041, 0x3070, true },
+ { 0x30D5, 0x30A1, 0x30CF, true },
+ { 0x3075, 0x3041, 0x306F, true },
+ { 0, 0, 0, true }
+};
+
+ignoreBaFa_ja_JP::ignoreBaFa_ja_JP()
+{
+ func = nullptr;
+ table = nullptr;
+ map = BaFa;
+ transliterationName = "ignoreBaFa_ja_JP";
+ implementationName = "com.sun.star.i18n.Transliteration.ignoreBaFa_ja_JP";
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreDiacritics_CTL.cxx b/i18npool/source/transliteration/ignoreDiacritics_CTL.cxx
new file mode 100644
index 0000000000..9339d87eab
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreDiacritics_CTL.cxx
@@ -0,0 +1,95 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <comphelper/sequence.hxx>
+#include <rtl/ustrbuf.hxx>
+#include <transliteration_Ignore.hxx>
+#include <unicode/translit.h>
+
+namespace i18npool {
+
+ignoreDiacritics_CTL::ignoreDiacritics_CTL()
+{
+ func = nullptr;
+ table = nullptr;
+ map = nullptr;
+ transliterationName = "ignoreDiacritics_CTL";
+ implementationName = "com.sun.star.i18n.Transliteration.ignoreDiacritics_CTL";
+
+ UErrorCode nStatus = U_ZERO_ERROR;
+ m_transliterator.reset( icu::Transliterator::createInstance("NFD; [:M:] Remove; NFC",
+ UTRANS_FORWARD, nStatus) );
+ if (U_FAILURE(nStatus))
+ m_transliterator = nullptr;
+}
+
+sal_Unicode SAL_CALL
+ignoreDiacritics_CTL::transliterateChar2Char(sal_Unicode nInChar)
+{
+ if (!m_transliterator)
+ throw css::uno::RuntimeException();
+
+ icu::UnicodeString aChar(nInChar);
+ m_transliterator->transliterate(aChar);
+
+ if (aChar.isEmpty())
+ return 0xffff; // Skip this character.
+
+ if (aChar.length() > 1)
+ return nInChar; // Don't know what to do here, return the original.
+
+ return aChar[0];
+}
+
+OUString
+ignoreDiacritics_CTL::foldingImpl(const OUString& rInStr, sal_Int32 nStartPos,
+ sal_Int32 nCount, css::uno::Sequence<sal_Int32>* pOffset)
+{
+ if (!m_transliterator)
+ throw css::uno::RuntimeException();
+
+ if (nStartPos < 0 || nStartPos + nCount > rInStr.getLength())
+ throw css::uno::RuntimeException();
+
+ if (pOffset)
+ {
+ OUStringBuffer aOutBuf(nCount);
+
+ std::vector<sal_Int32> aOffset;
+ aOffset.reserve(nCount);
+
+ sal_Int32 nPosition = nStartPos;
+ while (nPosition < nStartPos + nCount)
+ {
+ sal_Int32 nIndex = nPosition;
+ UChar32 nChar = rInStr.iterateCodePoints(&nIndex);
+ icu::UnicodeString aUStr(nChar);
+ m_transliterator->transliterate(aUStr);
+
+ aOutBuf.append(reinterpret_cast<const sal_Unicode*>(aUStr.getBuffer()), aUStr.length());
+
+ std::fill_n(std::back_inserter(aOffset), aUStr.length(), nPosition);
+
+ nPosition = nIndex;
+ }
+
+ *pOffset = comphelper::containerToSequence(aOffset);
+ return aOutBuf.makeStringAndClear();
+ }
+ else
+ {
+ icu::UnicodeString aUStr(reinterpret_cast<const UChar*>(rInStr.getStr()) + nStartPos, nCount);
+ m_transliterator->transliterate(aUStr);
+ return OUString(reinterpret_cast<const sal_Unicode*>(aUStr.getBuffer()), aUStr.length());
+ }
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreHyuByu_ja_JP.cxx b/i18npool/source/transliteration/ignoreHyuByu_ja_JP.cxx
new file mode 100644
index 0000000000..be66ddfc93
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreHyuByu_ja_JP.cxx
@@ -0,0 +1,46 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_Ignore.hxx>
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::lang;
+
+namespace i18npool {
+
+const transliteration_Ignore::Mapping HyuByu[] = {
+ { 0x30D5, 0x30E5, 0x30D2, false },
+ { 0x3075, 0x3085, 0x3072, false },
+ { 0x30F4, 0x30E5, 0x30D3, false },
+ { 0x3094, 0x3085, 0x3073, false },
+ { 0, 0, 0, false }
+};
+
+ignoreHyuByu_ja_JP::ignoreHyuByu_ja_JP()
+{
+ func = nullptr;
+ table = nullptr;
+ map = HyuByu;
+ transliterationName = "ignoreHyuByu_ja_JP";
+ implementationName = "com.sun.star.i18n.Transliteration.ignoreHyuByu_ja_JP";
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreIandEfollowedByYa_ja_JP.cxx b/i18npool/source/transliteration/ignoreIandEfollowedByYa_ja_JP.cxx
new file mode 100644
index 0000000000..5e86590279
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreIandEfollowedByYa_ja_JP.cxx
@@ -0,0 +1,125 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <i18nutil/oneToOneMapping.hxx>
+
+#include <transliteration_Ignore.hxx>
+
+#include <numeric>
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::lang;
+
+namespace i18npool {
+
+i18nutil::OneToOneMappingTable_t const IandE[] = {
+ { 0x30A3, 0x0000 }, // KATAKANA LETTER SMALL I
+ { 0x30A4, 0x0000 }, // KATAKANA LETTER I
+ { 0x30A7, 0x0000 }, // KATAKANA LETTER SMALL E
+ { 0x30A8, 0x0000 }, // KATAKANA LETTER E
+ { 0x30AD, 0x0000 }, // KATAKANA LETTER KI
+ { 0x30AE, 0x0000 }, // KATAKANA LETTER GI
+ { 0x30B1, 0x0000 }, // KATAKANA LETTER KE
+ { 0x30B2, 0x0000 }, // KATAKANA LETTER GE
+ { 0x30B7, 0x0000 }, // KATAKANA LETTER SI
+ { 0x30B8, 0x0000 }, // KATAKANA LETTER ZI
+ { 0x30BB, 0x0000 }, // KATAKANA LETTER SE
+ { 0x30BC, 0x0000 }, // KATAKANA LETTER ZE
+ { 0x30C1, 0x0000 }, // KATAKANA LETTER TI
+ { 0x30C2, 0x0000 }, // KATAKANA LETTER DI
+ { 0x30C6, 0x0000 }, // KATAKANA LETTER TE
+ { 0x30C7, 0x0000 }, // KATAKANA LETTER DE
+ { 0x30CB, 0x0000 }, // KATAKANA LETTER NI
+ { 0x30CD, 0x0000 }, // KATAKANA LETTER NE
+ { 0x30D2, 0x0000 }, // KATAKANA LETTER HI
+ { 0x30D3, 0x0000 }, // KATAKANA LETTER BI
+ { 0x30D4, 0x0000 }, // KATAKANA LETTER PI
+ { 0x30D8, 0x0000 }, // KATAKANA LETTER HE
+ { 0x30D9, 0x0000 }, // KATAKANA LETTER BE
+ { 0x30DA, 0x0000 }, // KATAKANA LETTER PE
+ { 0x30DF, 0x0000 }, // KATAKANA LETTER MI
+ { 0x30E1, 0x0000 }, // KATAKANA LETTER ME
+ { 0x30EA, 0x0000 }, // KATAKANA LETTER RI
+ { 0x30EC, 0x0000 }, // KATAKANA LETTER RE
+ { 0x30F0, 0x0000 }, // KATAKANA LETTER WI
+ { 0x30F1, 0x0000 }, // KATAKANA LETTER WE
+ { 0x30F6, 0x0000 }, // KATAKANA LETTER SMALL KE
+ { 0x30F8, 0x0000 }, // KATAKANA LETTER VI
+ { 0x30F9, 0x0000 } // KATAKANA LETTER VE
+};
+
+
+OUString
+ignoreIandEfollowedByYa_ja_JP::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
+{
+ // Create a string buffer which can hold nCount + 1 characters.
+ // The reference count is 1 now.
+ rtl_uString * newStr = rtl_uString_alloc(nCount);
+ sal_Unicode * dst = newStr->buffer;
+ const sal_Unicode * src = inStr.getStr() + startPos;
+
+ if (pOffset) {
+ // Allocate nCount length to offset argument.
+ pOffset->realloc( nCount );
+ auto [begin, end] = asNonConstRange(*pOffset);
+ std::iota(begin, end, startPos);
+ }
+
+
+ sal_Unicode previousChar = *src ++;
+ sal_Unicode currentChar;
+
+ // One to one mapping
+ i18nutil::oneToOneMapping aTable(IandE, sizeof(IandE));
+
+ // Translation
+ while (-- nCount > 0) {
+ currentChar = *src ++;
+
+ // the character listed in above table + YA --> the character + A
+ if (currentChar == 0x30E3 || // KATAKANA LETTER SMALL YA
+ currentChar == 0x30E4) { // KATAKANA LETTER YA
+ if (aTable[ previousChar ] != previousChar) {
+ *dst ++ = previousChar;
+ *dst ++ = 0x30A2; // KATAKANA LETTER A
+ previousChar = *src ++;
+ nCount --;
+ continue;
+ }
+ }
+
+ *dst ++ = previousChar;
+ previousChar = currentChar;
+ }
+
+ if (nCount == 0) {
+ *dst ++ = previousChar;
+ }
+
+ *dst = u'\0';
+
+ newStr->length = sal_Int32(dst - newStr->buffer);
+ if (pOffset)
+ pOffset->realloc(newStr->length);
+ return OUString(newStr, SAL_NO_ACQUIRE); // take ownership
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreIterationMark_ja_JP.cxx b/i18npool/source/transliteration/ignoreIterationMark_ja_JP.cxx
new file mode 100644
index 0000000000..c5a3cb0456
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreIterationMark_ja_JP.cxx
@@ -0,0 +1,139 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <i18nutil/oneToOneMapping.hxx>
+
+#include <transliteration_Ignore.hxx>
+
+#include <numeric>
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::lang;
+
+
+namespace i18npool {
+
+i18nutil::OneToOneMappingTable_t const ignoreIterationMark_ja_JP_mappingTable[] = {
+ { 0x3046, 0x3094 }, // HIRAGANA LETTER U --> HIRAGANA LETTER VU
+ { 0x304B, 0x304C }, // HIRAGANA LETTER KA --> HIRAGANA LETTER GA
+ { 0x304D, 0x304E }, // HIRAGANA LETTER KI --> HIRAGANA LETTER GI
+ { 0x304F, 0x3050 }, // HIRAGANA LETTER KU --> HIRAGANA LETTER GU
+ { 0x3051, 0x3052 }, // HIRAGANA LETTER KE --> HIRAGANA LETTER GE
+ { 0x3053, 0x3054 }, // HIRAGANA LETTER KO --> HIRAGANA LETTER GO
+ { 0x3055, 0x3056 }, // HIRAGANA LETTER SA --> HIRAGANA LETTER ZA
+ { 0x3057, 0x3058 }, // HIRAGANA LETTER SI --> HIRAGANA LETTER ZI
+ { 0x3059, 0x305A }, // HIRAGANA LETTER SU --> HIRAGANA LETTER ZU
+ { 0x305B, 0x305C }, // HIRAGANA LETTER SE --> HIRAGANA LETTER ZE
+ { 0x305D, 0x305E }, // HIRAGANA LETTER SO --> HIRAGANA LETTER ZO
+ { 0x305F, 0x3060 }, // HIRAGANA LETTER TA --> HIRAGANA LETTER DA
+ { 0x3061, 0x3062 }, // HIRAGANA LETTER TI --> HIRAGANA LETTER DI
+ { 0x3064, 0x3065 }, // HIRAGANA LETTER TU --> HIRAGANA LETTER DU
+ { 0x3066, 0x3067 }, // HIRAGANA LETTER TE --> HIRAGANA LETTER DE
+ { 0x3068, 0x3069 }, // HIRAGANA LETTER TO --> HIRAGANA LETTER DO
+ { 0x306F, 0x3070 }, // HIRAGANA LETTER HA --> HIRAGANA LETTER BA
+ { 0x3072, 0x3073 }, // HIRAGANA LETTER HI --> HIRAGANA LETTER BI
+ { 0x3075, 0x3076 }, // HIRAGANA LETTER HU --> HIRAGANA LETTER BU
+ { 0x3078, 0x3079 }, // HIRAGANA LETTER HE --> HIRAGANA LETTER BE
+ { 0x307B, 0x307C }, // HIRAGANA LETTER HO --> HIRAGANA LETTER BO
+ { 0x309D, 0x309E }, // HIRAGANA ITERATION MARK --> HIRAGANA VOICED ITERATION MARK
+ { 0x30A6, 0x30F4 }, // KATAKANA LETTER U --> KATAKANA LETTER VU
+ { 0x30AB, 0x30AC }, // KATAKANA LETTER KA --> KATAKANA LETTER GA
+ { 0x30AD, 0x30AE }, // KATAKANA LETTER KI --> KATAKANA LETTER GI
+ { 0x30AF, 0x30B0 }, // KATAKANA LETTER KU --> KATAKANA LETTER GU
+ { 0x30B1, 0x30B2 }, // KATAKANA LETTER KE --> KATAKANA LETTER GE
+ { 0x30B3, 0x30B4 }, // KATAKANA LETTER KO --> KATAKANA LETTER GO
+ { 0x30B5, 0x30B6 }, // KATAKANA LETTER SA --> KATAKANA LETTER ZA
+ { 0x30B7, 0x30B8 }, // KATAKANA LETTER SI --> KATAKANA LETTER ZI
+ { 0x30B9, 0x30BA }, // KATAKANA LETTER SU --> KATAKANA LETTER ZU
+ { 0x30BB, 0x30BC }, // KATAKANA LETTER SE --> KATAKANA LETTER ZE
+ { 0x30BD, 0x30BE }, // KATAKANA LETTER SO --> KATAKANA LETTER ZO
+ { 0x30BF, 0x30C0 }, // KATAKANA LETTER TA --> KATAKANA LETTER DA
+ { 0x30C1, 0x30C2 }, // KATAKANA LETTER TI --> KATAKANA LETTER DI
+ { 0x30C4, 0x30C5 }, // KATAKANA LETTER TU --> KATAKANA LETTER DU
+ { 0x30C6, 0x30C7 }, // KATAKANA LETTER TE --> KATAKANA LETTER DE
+ { 0x30C8, 0x30C9 }, // KATAKANA LETTER TO --> KATAKANA LETTER DO
+ { 0x30CF, 0x30D0 }, // KATAKANA LETTER HA --> KATAKANA LETTER BA
+ { 0x30D2, 0x30D3 }, // KATAKANA LETTER HI --> KATAKANA LETTER BI
+ { 0x30D5, 0x30D6 }, // KATAKANA LETTER HU --> KATAKANA LETTER BU
+ { 0x30D8, 0x30D9 }, // KATAKANA LETTER HE --> KATAKANA LETTER BE
+ { 0x30DB, 0x30DC }, // KATAKANA LETTER HO --> KATAKANA LETTER BO
+ { 0x30EF, 0x30F7 }, // KATAKANA LETTER WA --> KATAKANA LETTER VA
+ { 0x30F0, 0x30F8 }, // KATAKANA LETTER WI --> KATAKANA LETTER VI
+ { 0x30F1, 0x30F9 }, // KATAKANA LETTER WE --> KATAKANA LETTER VE
+ { 0x30F2, 0x30FA }, // KATAKANA LETTER WO --> KATAKANA LETTER VO
+ { 0x30FD, 0x30FE } // KATAKANA ITERATION MARK --> KATAKANA VOICED ITERATION MARK
+};
+
+
+OUString
+ignoreIterationMark_ja_JP::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
+{
+ i18nutil::oneToOneMapping aTable(ignoreIterationMark_ja_JP_mappingTable, sizeof(ignoreIterationMark_ja_JP_mappingTable));
+
+ // Create a string buffer which can hold nCount + 1 characters.
+ // The reference count is 1 now.
+ rtl_uString * newStr = rtl_uString_alloc(nCount);
+ sal_Unicode * dst = newStr->buffer;
+ const sal_Unicode * src = inStr.getStr() + startPos;
+
+ if (pOffset) {
+ // Allocate nCount length to offset argument.
+ pOffset->realloc( nCount );
+ auto [begin, end] = asNonConstRange(*pOffset);
+ std::iota(begin, end, startPos);
+ }
+
+
+ sal_Unicode previousChar = *src ++;
+ sal_Unicode currentChar;
+
+ // Conversion
+ while (-- nCount > 0) {
+ currentChar = *src ++;
+
+ switch ( currentChar ) {
+ case 0x30fd: // KATAKANA ITERATION MARK
+ case 0x309d: // HIRAGANA ITERATION MARK
+ case 0x3005: // IDEOGRAPHIC ITERATION MARK
+ currentChar = previousChar;
+ break;
+ case 0x30fe: // KATAKANA VOICED ITERATION MARK
+ case 0x309e: // HIRAGANA VOICED ITERATION MARK
+ currentChar = aTable[ previousChar ];
+ break;
+ }
+ *dst ++ = previousChar;
+ previousChar = currentChar;
+ }
+
+ if (nCount == 0) {
+ *dst ++ = previousChar;
+ }
+
+ *dst = u'\0';
+
+ newStr->length = sal_Int32(dst - newStr->buffer);
+ if (pOffset)
+ pOffset->realloc(newStr->length);
+ return OUString(newStr, SAL_NO_ACQUIRE); // take ownership
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreKana.cxx b/i18npool/source/transliteration/ignoreKana.cxx
new file mode 100644
index 0000000000..86a484e1a6
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreKana.cxx
@@ -0,0 +1,65 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <rtl/ref.hxx>
+
+#include <transliteration_Ignore.hxx>
+#include <transliteration_OneToOne.hxx>
+
+namespace com::sun::star::uno { class XComponentContext; }
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::lang;
+
+namespace i18npool {
+
+OUString
+ignoreKana::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
+{
+ rtl::Reference< hiraganaToKatakana > t1(new hiraganaToKatakana);
+ return t1->transliterateImpl(inStr, startPos, nCount, pOffset);
+}
+
+Sequence< OUString > SAL_CALL
+ignoreKana::transliterateRange( const OUString& str1, const OUString& str2 )
+{
+ rtl::Reference< hiraganaToKatakana > t1(new hiraganaToKatakana);
+ rtl::Reference< katakanaToHiragana > t2(new katakanaToHiragana);
+
+ return transliteration_Ignore::transliterateRange(str1, str2, *t1, *t2);
+}
+
+sal_Unicode SAL_CALL
+ignoreKana::transliterateChar2Char( sal_Unicode inChar)
+{
+ rtl::Reference< hiraganaToKatakana > t1(new hiraganaToKatakana);
+ return t1->transliterateChar2Char(inChar);
+}
+
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
+com_sun_star_i18n_Transliteration_IGNORE_KANA_get_implementation(
+ css::uno::XComponentContext *,
+ css::uno::Sequence<css::uno::Any> const &)
+{
+ return cppu::acquire(new i18npool::ignoreKana());
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreKashida_CTL.cxx b/i18npool/source/transliteration/ignoreKashida_CTL.cxx
new file mode 100644
index 0000000000..e93ffd8494
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreKashida_CTL.cxx
@@ -0,0 +1,32 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <transliteration_Ignore.hxx>
+
+namespace i18npool
+{
+static sal_Unicode ignoreKashida_CTL_translator(const sal_Unicode c)
+{
+ if (c == 0x0640) // Check if it's Kashida
+ return 0xffff; // Then skip this character
+
+ return c;
+}
+
+ignoreKashida_CTL::ignoreKashida_CTL()
+{
+ func = ignoreKashida_CTL_translator;
+ table = nullptr;
+ map = nullptr;
+ transliterationName = "ignoreKashida_CTL";
+ implementationName = "com.sun.star.i18n.Transliteration.ignoreKashida_CTL";
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreKiKuFollowedBySa_ja_JP.cxx b/i18npool/source/transliteration/ignoreKiKuFollowedBySa_ja_JP.cxx
new file mode 100644
index 0000000000..113c80f9ae
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreKiKuFollowedBySa_ja_JP.cxx
@@ -0,0 +1,83 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_Ignore.hxx>
+
+#include <numeric>
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::lang;
+
+namespace i18npool {
+
+OUString
+ignoreKiKuFollowedBySa_ja_JP::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
+{
+ // Create a string buffer which can hold nCount + 1 characters.
+ // The reference count is 1 now.
+ rtl_uString * newStr = rtl_uString_alloc(nCount);
+ sal_Unicode * dst = newStr->buffer;
+ const sal_Unicode * src = inStr.getStr() + startPos;
+
+ if (pOffset) {
+ // Allocate nCount length to offset argument.
+ pOffset->realloc( nCount );
+ auto [begin, end] = asNonConstRange(*pOffset);
+ std::iota(begin, end, startPos);
+ }
+
+
+ sal_Unicode previousChar = *src ++;
+ sal_Unicode currentChar;
+
+ // Translation
+ while (-- nCount > 0) {
+ currentChar = *src ++;
+
+ // KU + Sa-So --> KI + Sa-So
+ if (previousChar == 0x30AF ) { // KATAKANA LETTER KU
+ if (0x30B5 <= currentChar && // KATAKANA LETTER SA
+ currentChar <= 0x30BE) { // KATAKANA LETTER ZO
+ *dst ++ = 0x30AD; // KATAKANA LETTER KI
+ *dst ++ = currentChar;
+ previousChar = *src ++;
+ nCount --;
+ continue;
+ }
+ }
+
+ *dst ++ = previousChar;
+ previousChar = currentChar;
+ }
+
+ if (nCount == 0) {
+ *dst ++ = previousChar;
+ }
+
+ *dst = u'\0';
+
+ newStr->length = sal_Int32(dst - newStr->buffer);
+ if (pOffset)
+ pOffset->realloc(newStr->length);
+ return OUString(newStr, SAL_NO_ACQUIRE); // take ownership
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreMiddleDot_ja_JP.cxx b/i18npool/source/transliteration/ignoreMiddleDot_ja_JP.cxx
new file mode 100644
index 0000000000..8b03275695
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreMiddleDot_ja_JP.cxx
@@ -0,0 +1,46 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_Ignore.hxx>
+
+namespace i18npool
+{
+static sal_Unicode ignoreMiddleDot_ja_JP_translator(const sal_Unicode c)
+{
+ switch (c)
+ {
+ case 0x30FB: // KATAKANA MIDDLE DOT
+ case 0xFF65: // HALFWIDTH KATAKANA MIDDLE DOT
+ // no break;
+ return 0xffff; // Skip this character
+ }
+ return c;
+}
+
+ignoreMiddleDot_ja_JP::ignoreMiddleDot_ja_JP()
+{
+ func = ignoreMiddleDot_ja_JP_translator;
+ table = nullptr;
+ map = nullptr;
+ transliterationName = "ignoreMiddleDot_ja_JP";
+ implementationName = "com.sun.star.i18n.Transliteration.ignoreMiddleDot_ja_JP";
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreMinusSign_ja_JP.cxx b/i18npool/source/transliteration/ignoreMinusSign_ja_JP.cxx
new file mode 100644
index 0000000000..5817d157bb
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreMinusSign_ja_JP.cxx
@@ -0,0 +1,53 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_Ignore.hxx>
+
+namespace i18npool
+{
+static sal_Unicode ignoreMinusSign_ja_JP_translator(const sal_Unicode c)
+{
+ switch (c)
+ {
+ case 0x2212: // MINUS SIGN
+ case 0x002d: // HYPHEN-MINUS
+ case 0x2010: // HYPHEN
+ case 0x2011: // NON-BREAKING HYPHEN
+ case 0x2012: // FIGURE DASH
+ case 0x2013: // EN DASH
+ case 0x2014: // EM DASH
+ case 0x2015: // HORIZONTAL BAR
+ case 0xff0d: // FULLWIDTH HYPHEN-MINUS
+ case 0xff70: // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+ return 0x30fc; // KATAKANA-HIRAGANA PROLONGED SOUND MARK
+ }
+ return c;
+}
+
+ignoreMinusSign_ja_JP::ignoreMinusSign_ja_JP()
+{
+ func = ignoreMinusSign_ja_JP_translator;
+ table = nullptr;
+ map = nullptr;
+ transliterationName = "ignoreMinusSign_ja_JP";
+ implementationName = "com.sun.star.i18n.Transliteration.ignoreMinusSign_ja_JP";
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreProlongedSoundMark_ja_JP.cxx b/i18npool/source/transliteration/ignoreProlongedSoundMark_ja_JP.cxx
new file mode 100644
index 0000000000..3291afa9ba
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreProlongedSoundMark_ja_JP.cxx
@@ -0,0 +1,345 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_Ignore.hxx>
+
+#include <numeric>
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::lang;
+
+namespace i18npool {
+
+const sal_Unicode table_normalwidth[] = {
+ // 0x0000, // 0x3040
+ 0x3041, // 0x3041 HIRAGANA LETTER SMALL A
+ 0x3042, // 0x3042 HIRAGANA LETTER A
+ 0x3043, // 0x3043 HIRAGANA LETTER SMALL I
+ 0x3044, // 0x3044 HIRAGANA LETTER I
+ 0x3045, // 0x3045 HIRAGANA LETTER SMALL U
+ 0x3046, // 0x3046 HIRAGANA LETTER U
+ 0x3047, // 0x3047 HIRAGANA LETTER SMALL E
+ 0x3048, // 0x3048 HIRAGANA LETTER E
+ 0x3049, // 0x3049 HIRAGANA LETTER SMALL O
+ 0x304a, // 0x304a HIRAGANA LETTER O
+ 0x3042, // 0x304b HIRAGANA LETTER KA
+ 0x3042, // 0x304c HIRAGANA LETTER GA
+ 0x3044, // 0x304d HIRAGANA LETTER KI
+ 0x3044, // 0x304e HIRAGANA LETTER GI
+ 0x3046, // 0x304f HIRAGANA LETTER KU
+ 0x3046, // 0x3050 HIRAGANA LETTER GU
+ 0x3048, // 0x3051 HIRAGANA LETTER KE
+ 0x3048, // 0x3052 HIRAGANA LETTER GE
+ 0x304a, // 0x3053 HIRAGANA LETTER KO
+ 0x304a, // 0x3054 HIRAGANA LETTER GO
+ 0x3042, // 0x3055 HIRAGANA LETTER SA
+ 0x3042, // 0x3056 HIRAGANA LETTER ZA
+ 0x3044, // 0x3057 HIRAGANA LETTER SI
+ 0x3044, // 0x3058 HIRAGANA LETTER ZI
+ 0x3046, // 0x3059 HIRAGANA LETTER SU
+ 0x3046, // 0x305a HIRAGANA LETTER ZU
+ 0x3048, // 0x305b HIRAGANA LETTER SE
+ 0x3048, // 0x305c HIRAGANA LETTER ZE
+ 0x304a, // 0x305d HIRAGANA LETTER SO
+ 0x304a, // 0x305e HIRAGANA LETTER ZO
+ 0x3042, // 0x305f HIRAGANA LETTER TA
+ 0x3042, // 0x3060 HIRAGANA LETTER DA
+ 0x3044, // 0x3061 HIRAGANA LETTER TI
+ 0x3044, // 0x3062 HIRAGANA LETTER DI
+ 0x3045, // 0x3063 HIRAGANA LETTER SMALL TU
+ 0x3046, // 0x3064 HIRAGANA LETTER TU
+ 0x3046, // 0x3065 HIRAGANA LETTER DU
+ 0x3048, // 0x3066 HIRAGANA LETTER TE
+ 0x3048, // 0x3067 HIRAGANA LETTER DE
+ 0x304a, // 0x3068 HIRAGANA LETTER TO
+ 0x304a, // 0x3069 HIRAGANA LETTER DO
+ 0x3042, // 0x306a HIRAGANA LETTER NA
+ 0x3044, // 0x306b HIRAGANA LETTER NI
+ 0x3046, // 0x306c HIRAGANA LETTER NU
+ 0x3048, // 0x306d HIRAGANA LETTER NE
+ 0x304a, // 0x306e HIRAGANA LETTER NO
+ 0x3042, // 0x306f HIRAGANA LETTER HA
+ 0x3042, // 0x3070 HIRAGANA LETTER BA
+ 0x3042, // 0x3071 HIRAGANA LETTER PA
+ 0x3044, // 0x3072 HIRAGANA LETTER HI
+ 0x3044, // 0x3073 HIRAGANA LETTER BI
+ 0x3044, // 0x3074 HIRAGANA LETTER PI
+ 0x3046, // 0x3075 HIRAGANA LETTER HU
+ 0x3046, // 0x3076 HIRAGANA LETTER BU
+ 0x3046, // 0x3077 HIRAGANA LETTER PU
+ 0x3048, // 0x3078 HIRAGANA LETTER HE
+ 0x3048, // 0x3079 HIRAGANA LETTER BE
+ 0x3048, // 0x307a HIRAGANA LETTER PE
+ 0x304a, // 0x307b HIRAGANA LETTER HO
+ 0x304a, // 0x307c HIRAGANA LETTER BO
+ 0x304a, // 0x307d HIRAGANA LETTER PO
+ 0x3042, // 0x307e HIRAGANA LETTER MA
+ 0x3044, // 0x307f HIRAGANA LETTER MI
+ 0x3046, // 0x3080 HIRAGANA LETTER MU
+ 0x3048, // 0x3081 HIRAGANA LETTER ME
+ 0x304a, // 0x3082 HIRAGANA LETTER MO
+ 0x3041, // 0x3083 HIRAGANA LETTER SMALL YA
+ 0x3042, // 0x3084 HIRAGANA LETTER YA
+ 0x3045, // 0x3085 HIRAGANA LETTER SMALL YU
+ 0x3046, // 0x3086 HIRAGANA LETTER YU
+ 0x3049, // 0x3087 HIRAGANA LETTER SMALL YO
+ 0x304a, // 0x3088 HIRAGANA LETTER YO
+ 0x3042, // 0x3089 HIRAGANA LETTER RA
+ 0x3044, // 0x308a HIRAGANA LETTER RI
+ 0x3046, // 0x308b HIRAGANA LETTER RU
+ 0x3048, // 0x308c HIRAGANA LETTER RE
+ 0x304a, // 0x308d HIRAGANA LETTER RO
+ 0x3041, // 0x308e HIRAGANA LETTER SMALL WA
+ 0x3042, // 0x308f HIRAGANA LETTER WA
+ 0x3044, // 0x3090 HIRAGANA LETTER WI
+ 0x3048, // 0x3091 HIRAGANA LETTER WE
+ 0x304a, // 0x3092 HIRAGANA LETTER WO
+ 0x0000, // 0x3093 HIRAGANA LETTER N
+ 0x3046, // 0x3094 HIRAGANA LETTER VU
+ 0x0000, // 0x3095
+ 0x0000, // 0x3096
+ 0x0000, // 0x3097
+ 0x0000, // 0x3098
+ 0x0000, // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
+ 0x0000, // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+ 0x0000, // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK
+ 0x0000, // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
+ 0x0000, // 0x309d HIRAGANA ITERATION MARK
+ 0x0000, // 0x309e HIRAGANA VOICED ITERATION MARK
+ 0x0000, // 0x309f
+ 0x0000, // 0x30a0
+ 0x30a1, // 0x30a1 KATAKANA LETTER SMALL A
+ 0x30a2, // 0x30a2 KATAKANA LETTER A
+ 0x30a3, // 0x30a3 KATAKANA LETTER SMALL I
+ 0x30a4, // 0x30a4 KATAKANA LETTER I
+ 0x30a5, // 0x30a5 KATAKANA LETTER SMALL U
+ 0x30a6, // 0x30a6 KATAKANA LETTER U
+ 0x30a7, // 0x30a7 KATAKANA LETTER SMALL E
+ 0x30a8, // 0x30a8 KATAKANA LETTER E
+ 0x30a9, // 0x30a9 KATAKANA LETTER SMALL O
+ 0x30aa, // 0x30aa KATAKANA LETTER O
+ 0x30a2, // 0x30ab KATAKANA LETTER KA
+ 0x30a2, // 0x30ac KATAKANA LETTER GA
+ 0x30a4, // 0x30ad KATAKANA LETTER KI
+ 0x30a4, // 0x30ae KATAKANA LETTER GI
+ 0x30a6, // 0x30af KATAKANA LETTER KU
+ 0x30a6, // 0x30b0 KATAKANA LETTER GU
+ 0x30a8, // 0x30b1 KATAKANA LETTER KE
+ 0x30a8, // 0x30b2 KATAKANA LETTER GE
+ 0x30aa, // 0x30b3 KATAKANA LETTER KO
+ 0x30aa, // 0x30b4 KATAKANA LETTER GO
+ 0x30a2, // 0x30b5 KATAKANA LETTER SA
+ 0x30a2, // 0x30b6 KATAKANA LETTER ZA
+ 0x30a4, // 0x30b7 KATAKANA LETTER SI
+ 0x30a4, // 0x30b8 KATAKANA LETTER ZI
+ 0x30a6, // 0x30b9 KATAKANA LETTER SU
+ 0x30a6, // 0x30ba KATAKANA LETTER ZU
+ 0x30a8, // 0x30bb KATAKANA LETTER SE
+ 0x30a8, // 0x30bc KATAKANA LETTER ZE
+ 0x30aa, // 0x30bd KATAKANA LETTER SO
+ 0x30aa, // 0x30be KATAKANA LETTER ZO
+ 0x30a2, // 0x30bf KATAKANA LETTER TA
+ 0x30a2, // 0x30c0 KATAKANA LETTER DA
+ 0x30a4, // 0x30c1 KATAKANA LETTER TI
+ 0x30a4, // 0x30c2 KATAKANA LETTER DI
+ 0x30a5, // 0x30c3 KATAKANA LETTER SMALL TU
+ 0x30a6, // 0x30c4 KATAKANA LETTER TU
+ 0x30a6, // 0x30c5 KATAKANA LETTER DU
+ 0x30a8, // 0x30c6 KATAKANA LETTER TE
+ 0x30a8, // 0x30c7 KATAKANA LETTER DE
+ 0x30aa, // 0x30c8 KATAKANA LETTER TO
+ 0x30aa, // 0x30c9 KATAKANA LETTER DO
+ 0x30a2, // 0x30ca KATAKANA LETTER NA
+ 0x30a4, // 0x30cb KATAKANA LETTER NI
+ 0x30a6, // 0x30cc KATAKANA LETTER NU
+ 0x30a8, // 0x30cd KATAKANA LETTER NE
+ 0x30aa, // 0x30ce KATAKANA LETTER NO
+ 0x30a2, // 0x30cf KATAKANA LETTER HA
+ 0x30a2, // 0x30d0 KATAKANA LETTER BA
+ 0x30a2, // 0x30d1 KATAKANA LETTER PA
+ 0x30a4, // 0x30d2 KATAKANA LETTER HI
+ 0x30a4, // 0x30d3 KATAKANA LETTER BI
+ 0x30a4, // 0x30d4 KATAKANA LETTER PI
+ 0x30a6, // 0x30d5 KATAKANA LETTER HU
+ 0x30a6, // 0x30d6 KATAKANA LETTER BU
+ 0x30a6, // 0x30d7 KATAKANA LETTER PU
+ 0x30a8, // 0x30d8 KATAKANA LETTER HE
+ 0x30a8, // 0x30d9 KATAKANA LETTER BE
+ 0x30a8, // 0x30da KATAKANA LETTER PE
+ 0x30aa, // 0x30db KATAKANA LETTER HO
+ 0x30aa, // 0x30dc KATAKANA LETTER BO
+ 0x30aa, // 0x30dd KATAKANA LETTER PO
+ 0x30a2, // 0x30de KATAKANA LETTER MA
+ 0x30a4, // 0x30df KATAKANA LETTER MI
+ 0x30a6, // 0x30e0 KATAKANA LETTER MU
+ 0x30a8, // 0x30e1 KATAKANA LETTER ME
+ 0x30aa, // 0x30e2 KATAKANA LETTER MO
+ 0x30a1, // 0x30e3 KATAKANA LETTER SMALL YA
+ 0x30a2, // 0x30e4 KATAKANA LETTER YA
+ 0x30a5, // 0x30e5 KATAKANA LETTER SMALL YU
+ 0x30a6, // 0x30e6 KATAKANA LETTER YU
+ 0x30a9, // 0x30e7 KATAKANA LETTER SMALL YO
+ 0x30aa, // 0x30e8 KATAKANA LETTER YO
+ 0x30a2, // 0x30e9 KATAKANA LETTER RA
+ 0x30a4, // 0x30ea KATAKANA LETTER RI
+ 0x30a6, // 0x30eb KATAKANA LETTER RU
+ 0x30a8, // 0x30ec KATAKANA LETTER RE
+ 0x30aa, // 0x30ed KATAKANA LETTER RO
+ 0x30a1, // 0x30ee KATAKANA LETTER SMALL WA
+ 0x30a2, // 0x30ef KATAKANA LETTER WA
+ 0x30a4, // 0x30f0 KATAKANA LETTER WI
+ 0x30a8, // 0x30f1 KATAKANA LETTER WE
+ 0x30aa, // 0x30f2 KATAKANA LETTER WO
+ 0x0000, // 0x30f3 KATAKANA LETTER N
+ 0x30a6, // 0x30f4 KATAKANA LETTER VU
+ 0x30a1, // 0x30f5 KATAKANA LETTER SMALL KA
+ 0x30a7, // 0x30f6 KATAKANA LETTER SMALL KE
+ 0x30a2, // 0x30f7 KATAKANA LETTER VA
+ 0x30a4, // 0x30f8 KATAKANA LETTER VI
+ 0x30a8, // 0x30f9 KATAKANA LETTER VE
+ 0x30aa // 0x30fa KATAKANA LETTER VO
+ // 0x0000, // 0x30fb KATAKANA MIDDLE DOT
+ // 0x0000, // 0x30fc KATAKANA-HIRAGANA PROLONGED SOUND MARK
+ // 0x0000, // 0x30fd KATAKANA ITERATION MARK
+ // 0x0000, // 0x30fe KATAKANA VOICED ITERATION MARK
+ // 0x0000 // 0x30ff
+};
+
+const sal_Unicode table_halfwidth[] = {
+ // 0x0000, // 0xff61 HALFWIDTH IDEOGRAPHIC FULL STOP
+ // 0x0000, // 0xff62 HALFWIDTH LEFT CORNER BRACKET
+ // 0x0000, // 0xff63 HALFWIDTH RIGHT CORNER BRACKET
+ // 0x0000, // 0xff64 HALFWIDTH IDEOGRAPHIC COMMA
+ // 0x0000, // 0xff65 HALFWIDTH KATAKANA MIDDLE DOT
+ 0xff75, // 0xff66 HALFWIDTH KATAKANA LETTER WO
+ 0xff67, // 0xff67 HALFWIDTH KATAKANA LETTER SMALL A
+ 0xff68, // 0xff68 HALFWIDTH KATAKANA LETTER SMALL I
+ 0xff69, // 0xff69 HALFWIDTH KATAKANA LETTER SMALL U
+ 0xff6a, // 0xff6a HALFWIDTH KATAKANA LETTER SMALL E
+ 0xff6b, // 0xff6b HALFWIDTH KATAKANA LETTER SMALL O
+ 0xff67, // 0xff6c HALFWIDTH KATAKANA LETTER SMALL YA
+ 0xff69, // 0xff6d HALFWIDTH KATAKANA LETTER SMALL YU
+ 0xff6b, // 0xff6e HALFWIDTH KATAKANA LETTER SMALL YO
+ 0xff69, // 0xff6f HALFWIDTH KATAKANA LETTER SMALL TU
+ 0x0000, // 0xff70 HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+ 0xff71, // 0xff71 HALFWIDTH KATAKANA LETTER A
+ 0xff72, // 0xff72 HALFWIDTH KATAKANA LETTER I
+ 0xff73, // 0xff73 HALFWIDTH KATAKANA LETTER U
+ 0xff74, // 0xff74 HALFWIDTH KATAKANA LETTER E
+ 0xff75, // 0xff75 HALFWIDTH KATAKANA LETTER O
+ 0xff71, // 0xff76 HALFWIDTH KATAKANA LETTER KA
+ 0xff72, // 0xff77 HALFWIDTH KATAKANA LETTER KI
+ 0xff73, // 0xff78 HALFWIDTH KATAKANA LETTER KU
+ 0xff74, // 0xff79 HALFWIDTH KATAKANA LETTER KE
+ 0xff75, // 0xff7a HALFWIDTH KATAKANA LETTER KO
+ 0xff71, // 0xff7b HALFWIDTH KATAKANA LETTER SA
+ 0xff72, // 0xff7c HALFWIDTH KATAKANA LETTER SI
+ 0xff73, // 0xff7d HALFWIDTH KATAKANA LETTER SU
+ 0xff74, // 0xff7e HALFWIDTH KATAKANA LETTER SE
+ 0xff75, // 0xff7f HALFWIDTH KATAKANA LETTER SO
+ 0xff71, // 0xff80 HALFWIDTH KATAKANA LETTER TA
+ 0xff72, // 0xff81 HALFWIDTH KATAKANA LETTER TI
+ 0xff73, // 0xff82 HALFWIDTH KATAKANA LETTER TU
+ 0xff74, // 0xff83 HALFWIDTH KATAKANA LETTER TE
+ 0xff75, // 0xff84 HALFWIDTH KATAKANA LETTER TO
+ 0xff71, // 0xff85 HALFWIDTH KATAKANA LETTER NA
+ 0xff72, // 0xff86 HALFWIDTH KATAKANA LETTER NI
+ 0xff73, // 0xff87 HALFWIDTH KATAKANA LETTER NU
+ 0xff74, // 0xff88 HALFWIDTH KATAKANA LETTER NE
+ 0xff75, // 0xff89 HALFWIDTH KATAKANA LETTER NO
+ 0xff71, // 0xff8a HALFWIDTH KATAKANA LETTER HA
+ 0xff72, // 0xff8b HALFWIDTH KATAKANA LETTER HI
+ 0xff73, // 0xff8c HALFWIDTH KATAKANA LETTER HU
+ 0xff74, // 0xff8d HALFWIDTH KATAKANA LETTER HE
+ 0xff75, // 0xff8e HALFWIDTH KATAKANA LETTER HO
+ 0xff71, // 0xff8f HALFWIDTH KATAKANA LETTER MA
+ 0xff72, // 0xff90 HALFWIDTH KATAKANA LETTER MI
+ 0xff73, // 0xff91 HALFWIDTH KATAKANA LETTER MU
+ 0xff74, // 0xff92 HALFWIDTH KATAKANA LETTER ME
+ 0xff75, // 0xff93 HALFWIDTH KATAKANA LETTER MO
+ 0xff71, // 0xff94 HALFWIDTH KATAKANA LETTER YA
+ 0xff73, // 0xff95 HALFWIDTH KATAKANA LETTER YU
+ 0xff75, // 0xff96 HALFWIDTH KATAKANA LETTER YO
+ 0xff71, // 0xff97 HALFWIDTH KATAKANA LETTER RA
+ 0xff72, // 0xff98 HALFWIDTH KATAKANA LETTER RI
+ 0xff73, // 0xff99 HALFWIDTH KATAKANA LETTER RU
+ 0xff74, // 0xff9a HALFWIDTH KATAKANA LETTER RE
+ 0xff75, // 0xff9b HALFWIDTH KATAKANA LETTER RO
+ 0xff71 // 0xff9c HALFWIDTH KATAKANA LETTER WA
+ // 0x0000, // 0xff9d HALFWIDTH KATAKANA LETTER N
+ // 0x0000, // 0xff9e HALFWIDTH KATAKANA VOICED SOUND MARK
+ // 0x0000 // 0xff9f HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
+};
+
+
+OUString
+ignoreProlongedSoundMark_ja_JP::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
+{
+ // Create a string buffer which can hold nCount + 1 characters.
+ // The reference count is 1 now.
+ rtl_uString * newStr = rtl_uString_alloc(nCount);
+ sal_Unicode * dst = newStr->buffer;
+ const sal_Unicode * src = inStr.getStr() + startPos;
+
+ if (pOffset) {
+ // Allocate nCount length to offset argument.
+ pOffset->realloc( nCount );
+ auto [begin, end] = asNonConstRange(*pOffset);
+ std::iota(begin, end, startPos);
+ }
+
+
+ sal_Unicode previousChar = *src ++;
+ sal_Unicode currentChar;
+
+ // Conversion
+ while (-- nCount > 0) {
+ currentChar = *src ++;
+
+ if (currentChar == 0x30fc || // KATAKANA-HIRAGANA PROLONGED SOUND MARK
+ currentChar == 0xff70) { // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
+
+ if (0x3041 <= previousChar && previousChar <= 0x30fa) {
+ currentChar = table_normalwidth[ previousChar - 0x3041 ];
+ }
+ else if (0xff66 <= previousChar && previousChar <= 0xff9c) {
+ currentChar = table_halfwidth[ previousChar - 0xff66 ];
+ }
+ }
+
+ *dst ++ = previousChar;
+ previousChar = currentChar;
+ }
+
+ if (nCount == 0) {
+ *dst ++ = previousChar;
+ }
+
+ *dst = u'\0';
+
+ newStr->length = sal_Int32(dst - newStr->buffer);
+ if (pOffset)
+ pOffset->realloc(newStr->length);
+ return OUString(newStr, SAL_NO_ACQUIRE); // take ownership
+
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreSeZe_ja_JP.cxx b/i18npool/source/transliteration/ignoreSeZe_ja_JP.cxx
new file mode 100644
index 0000000000..1851a2791b
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreSeZe_ja_JP.cxx
@@ -0,0 +1,50 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_Ignore.hxx>
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::lang;
+
+namespace i18npool
+{
+const transliteration_Ignore::Mapping SeZe[] = {
+ // SI + E --> SE
+ { 0x30B7, 0x30A7, 0x30BB, true },
+ // SI + E --> SE
+ { 0x3057, 0x3047, 0x305B, true },
+ // ZI + E --> ZE
+ { 0x30B8, 0x30A7, 0x30BC, true },
+ // ZI + E --> ZE
+ { 0x3058, 0x3047, 0x305C, true },
+
+ { 0, 0, 0, true }
+};
+
+ignoreSeZe_ja_JP::ignoreSeZe_ja_JP()
+{
+ func = nullptr;
+ table = nullptr;
+ map = SeZe;
+ transliterationName = "ignoreSeZe_ja_JP";
+ implementationName = "com.sun.star.i18n.Transliteration.ignoreSeZe_ja_JP";
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreSeparator_ja_JP.cxx b/i18npool/source/transliteration/ignoreSeparator_ja_JP.cxx
new file mode 100644
index 0000000000..572b93460b
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreSeparator_ja_JP.cxx
@@ -0,0 +1,112 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_Ignore.hxx>
+#include <i18nutil/oneToOneMapping.hxx>
+
+namespace i18npool
+{
+i18nutil::OneToOneMappingTable_t const ignoreSeparatorTable[] = {
+ { 0x0021, 0xFFFF }, // EXCLAMATION MARK
+ { 0x0023, 0xFFFF }, // NUMBER SIGN
+ { 0x0024, 0xFFFF }, // DOLLAR SIGN
+ { 0x0025, 0xFFFF }, // PERCENT SIGN
+ { 0x0026, 0xFFFF }, // AMPERSAND
+ { 0x002A, 0xFFFF }, // ASTERISK
+ { 0x002B, 0xFFFF }, // PLUS SIGN
+ { 0x002C, 0xFFFF }, // COMMA
+ { 0x002D, 0xFFFF }, // HYPHEN-MINUS
+ { 0x002E, 0xFFFF }, // FULL STOP
+ { 0x002F, 0xFFFF }, // SOLIDUS
+ { 0x003A, 0xFFFF }, // COLON
+ { 0x003B, 0xFFFF }, // SEMICOLON
+ { 0x003C, 0xFFFF }, // LESS-THAN SIGN
+ { 0x003D, 0xFFFF }, // EQUALS SIGN
+ { 0x003E, 0xFFFF }, // GREATER-THAN SIGN
+ { 0x005C, 0xFFFF }, // REVERSE SOLIDUS
+ { 0x005F, 0xFFFF }, // LOW LINE
+ { 0x007B, 0xFFFF }, // LEFT CURLY BRACKET
+ { 0x007C, 0xFFFF }, // VERTICAL LINE
+ { 0x007D, 0xFFFF }, // RIGHT CURLY BRACKET
+ { 0x007E, 0xFFFF }, // TILDE
+ { 0x00A5, 0xFFFF }, // YEN SIGN
+ { 0x3001, 0xFFFF }, // IDEOGRAPHIC COMMA
+ { 0x3002, 0xFFFF }, // IDEOGRAPHIC FULL STOP
+ { 0x3008, 0xFFFF }, // LEFT ANGLE BRACKET
+ { 0x3009, 0xFFFF }, // RIGHT ANGLE BRACKET
+ { 0x300A, 0xFFFF }, // LEFT DOUBLE ANGLE BRACKET
+ { 0x300B, 0xFFFF }, // RIGHT DOUBLE ANGLE BRACKET
+ { 0x300C, 0xFFFF }, // LEFT CORNER BRACKET
+ { 0x300D, 0xFFFF }, // RIGHT CORNER BRACKET
+ { 0x300E, 0xFFFF }, // LEFT WHITE CORNER BRACKET
+ { 0x300F, 0xFFFF }, // RIGHT WHITE CORNER BRACKET
+ { 0x3010, 0xFFFF }, // LEFT BLACK LENTICULAR BRACKET
+ { 0x3011, 0xFFFF }, // RIGHT BLACK LENTICULAR BRACKET
+ { 0x3014, 0xFFFF }, // LEFT TORTOISE SHELL BRACKET
+ { 0x3015, 0xFFFF }, // RIGHT TORTOISE SHELL BRACKET
+ { 0x3016, 0xFFFF }, // LEFT WHITE LENTICULAR BLACKET
+ { 0x3017, 0xFFFF }, // RIGHT WHITE LENTICULAR BLACKET
+ { 0x3018, 0xFFFF }, // LEFT WHITETORTOISE SHELL BLACKET
+ { 0x3019, 0xFFFF }, // RIGHT WHITETORTOISE SHELL BLACKET
+ { 0x301A, 0xFFFF }, // LEFT WHITE SQUARE BRACKET
+ { 0x301B, 0xFFFF }, // RIGHT WHITE SQUARE BRACKET
+ { 0x301C, 0xFFFF }, // WAVE DASH
+ { 0x301D, 0xFFFF }, // REVERSED DOUBLE PRIME
+ { 0x301E, 0xFFFF }, // DOUBLE PRIME QUOTATION MARK
+ { 0x301F, 0xFFFF }, // LOW DOUBLE PRIME QUOTATION MARK
+ { 0x3030, 0xFFFF }, // WAVY DASH
+ { 0x30FB, 0xFFFF }, // KATAKANA MIDDLE DOT
+ { 0x30FC, 0xFFFF }, // KATAKANA-HIRAHANA PROLONGED SOUND MARK
+ { 0xFF01, 0xFFFF }, // FULLWIDTH EXCLAMATION MARK
+ { 0xFF03, 0xFFFF }, // FULLWIDTH NUMBER SIGN
+ { 0xFF04, 0xFFFF }, // FULLWIDTH DOLLAR SIGN
+ { 0xFF05, 0xFFFF }, // FULLWIDTH PERCENT SIGN
+ { 0xFF06, 0xFFFF }, // FULLWIDTH AMPERSAND
+ { 0xFF0A, 0xFFFF }, // FULLWIDTH ASTERISK
+ { 0xFF0B, 0xFFFF }, // FULLWIDTH PLUS SIGN
+ { 0xFF0C, 0xFFFF }, // FULLWIDTH COMMA
+ { 0xFF0D, 0xFFFF }, // FULLWIDTH HYPHEN-MINUS
+ { 0xFF0E, 0xFFFF }, // FULLWIDTH FULL STOP
+ { 0xFF0F, 0xFFFF }, // FULLWIDTH SOLIDUS
+ { 0xFF1A, 0xFFFF }, // FULLWIDTH COLON
+ { 0xFF1B, 0xFFFF }, // FULLWIDTH SEMICOLON
+ { 0xFF1C, 0xFFFF }, // FULLWIDTH LESS-THAN SIGN
+ { 0xFF1D, 0xFFFF }, // FULLWIDTH EQUALS SIGN
+ { 0xFF1E, 0xFFFF }, // FULLWIDTH GREATER-THAN SIGN
+ { 0xFF3C, 0xFFFF }, // FULLWIDTH REVERSE SOLIDUS
+ { 0xFF3F, 0xFFFF }, // FULLWIDTH LOW LINE
+ { 0xFF5B, 0xFFFF }, // FULLWIDTH LEFT CURLY BRACKET
+ { 0xFF5C, 0xFFFF }, // FULLWIDTH VERTICAL LINE
+ { 0xFF5D, 0xFFFF }, // FULLWIDTH RIGHT CURLY BRACKET
+ { 0xFF5E, 0xFFFF }, // FULLWIDTH TILDE
+ { 0xFFE5, 0xFFFF }, // FULLWIDTH YEN SIGN
+};
+
+ignoreSeparator_ja_JP::ignoreSeparator_ja_JP()
+{
+ static i18nutil::oneToOneMapping _table(ignoreSeparatorTable, sizeof(ignoreSeparatorTable));
+ func = nullptr;
+ table = &_table;
+ map = nullptr;
+ transliterationName = "ignoreSeparator_ja_JP";
+ implementationName = "com.sun.star.i18n.Transliteration.ignoreSeparator_ja_JP";
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreSize_ja_JP.cxx b/i18npool/source/transliteration/ignoreSize_ja_JP.cxx
new file mode 100644
index 0000000000..01f3f20da4
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreSize_ja_JP.cxx
@@ -0,0 +1,55 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_Ignore.hxx>
+#include <transliteration_OneToOne.hxx>
+#include <rtl/ref.hxx>
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::lang;
+
+namespace i18npool {
+
+OUString
+ignoreSize_ja_JP::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
+{
+ rtl::Reference< smallToLarge_ja_JP > t1(new smallToLarge_ja_JP);
+ return t1->transliterateImpl(inStr, startPos, nCount, pOffset);
+}
+
+
+Sequence< OUString > SAL_CALL
+ignoreSize_ja_JP::transliterateRange( const OUString& str1, const OUString& str2 )
+{
+ rtl::Reference< smallToLarge_ja_JP > t1(new smallToLarge_ja_JP);
+ rtl::Reference< largeToSmall_ja_JP > t2(new largeToSmall_ja_JP);
+
+ return transliteration_Ignore::transliterateRange(str1, str2, *t1, *t2);
+}
+
+sal_Unicode SAL_CALL
+ignoreSize_ja_JP::transliterateChar2Char( sal_Unicode inChar)
+{
+ rtl::Reference< smallToLarge_ja_JP > t1(new smallToLarge_ja_JP);
+ return t1->transliterateChar2Char(inChar);
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreSpace_ja_JP.cxx b/i18npool/source/transliteration/ignoreSpace_ja_JP.cxx
new file mode 100644
index 0000000000..df367cd30d
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreSpace_ja_JP.cxx
@@ -0,0 +1,55 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_Ignore.hxx>
+#include <i18nutil/oneToOneMapping.hxx>
+
+namespace i18npool {
+
+i18nutil::OneToOneMappingTable_t const ignoreSpace_ja_JP_mappingTable[] = {
+ { 0x0020, 0xffff }, // SPACE
+ { 0x00A0, 0xffff }, // NO-BREAK SPACE
+ { 0x2002, 0xffff }, // EN SPACE
+ { 0x2003, 0xffff }, // EM SPACE
+ { 0x2004, 0xffff }, // THREE-PER-EM SPACE
+ { 0x2005, 0xffff }, // FOUR-PER-EM SPACE
+ { 0x2006, 0xffff }, // SIX-PER-EM SPACE
+ { 0x2007, 0xffff }, // FIGURE SPACE
+ { 0x2008, 0xffff }, // PUNCTUATION SPACE
+ { 0x2009, 0xffff }, // THIN SPACE
+ { 0x200A, 0xffff }, // HAIR SPACE
+ { 0x200B, 0xffff }, // ZERO WIDTH SPACE
+ { 0x202F, 0xffff }, // NARROW NO-BREAK SPACE
+ { 0x3000, 0xffff }, // IDEOGRAPHIC SPACE
+ { 0x303F, 0xffff } // IDEOGRAPHIC HALF FILL SPACE
+};
+
+
+ignoreSpace_ja_JP::ignoreSpace_ja_JP()
+{
+ func = nullptr;
+ table = new i18nutil::oneToOneMapping(ignoreSpace_ja_JP_mappingTable, sizeof(ignoreSpace_ja_JP_mappingTable));
+ map = nullptr;
+ transliterationName = "ignoreSpace_ja_JP";
+ implementationName = "com.sun.star.i18n.Transliteration.ignoreSpace_ja_JP";
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreTiJi_ja_JP.cxx b/i18npool/source/transliteration/ignoreTiJi_ja_JP.cxx
new file mode 100644
index 0000000000..a3e6116a9b
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreTiJi_ja_JP.cxx
@@ -0,0 +1,54 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_Ignore.hxx>
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::lang;
+
+namespace i18npool
+{
+const transliteration_Ignore::Mapping TiJi[] = {
+ // TU + I --> TI
+ { 0x30C4, 0x30A3, 0x30C1, true },
+ // TE + I --> TI
+ { 0x30C6, 0x30A3, 0x30C1, true },
+ // TU + I --> TI
+ { 0x3064, 0x3043, 0x3061, true },
+ // TE + I --> TI
+ { 0x3066, 0x3043, 0x3061, true },
+ // DE + I --> ZI
+ { 0x30C7, 0x30A3, 0x30B8, true },
+ // DE + I --> ZI
+ { 0x3067, 0x3043, 0x3058, true },
+
+ { 0, 0, 0, true }
+};
+
+ignoreTiJi_ja_JP::ignoreTiJi_ja_JP()
+{
+ func = nullptr;
+ table = nullptr;
+ map = TiJi;
+ transliterationName = "ignoreTiJi_ja_JP";
+ implementationName = "com.sun.star.i18n.Transliteration.ignoreTiJi_ja_JP";
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreTraditionalKana_ja_JP.cxx b/i18npool/source/transliteration/ignoreTraditionalKana_ja_JP.cxx
new file mode 100644
index 0000000000..323a256558
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreTraditionalKana_ja_JP.cxx
@@ -0,0 +1,53 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_Ignore.hxx>
+
+namespace i18npool
+{
+static sal_Unicode ignoreTraditionalKana_ja_JP_translator(const sal_Unicode c)
+{
+ switch (c)
+ {
+ case 0x3090: // HIRAGANA LETTER WI
+ return 0x3044; // HIRAGANA LETTER I
+
+ case 0x3091: // HIRAGANA LETTER WE
+ return 0x3048; // HIRAGANA LETTER E
+
+ case 0x30F0: // KATAKANA LETTER WI
+ return 0x30A4; // KATAKANA LETTER I
+
+ case 0x30F1: // KATAKANA LETTER WE
+ return 0x30A8; // KATAKANA LETTER E
+ }
+ return c;
+}
+
+ignoreTraditionalKana_ja_JP::ignoreTraditionalKana_ja_JP()
+{
+ func = ignoreTraditionalKana_ja_JP_translator;
+ table = nullptr;
+ map = nullptr;
+ transliterationName = "ignoreTraditionalKana_ja_JP";
+ implementationName = "com.sun.star.i18n.Transliteration.ignoreTraditionalKana_ja_JP";
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreTraditionalKanji_ja_JP.cxx b/i18npool/source/transliteration/ignoreTraditionalKanji_ja_JP.cxx
new file mode 100644
index 0000000000..dec55f3478
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreTraditionalKanji_ja_JP.cxx
@@ -0,0 +1,735 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_Ignore.hxx>
+#include <i18nutil/oneToOneMapping.hxx>
+
+namespace i18npool {
+
+// traditional Kanji characters --> modern Kanji characters
+i18nutil::OneToOneMappingTable_t const traditionalKanji2updateKanji[] = {
+ { 0x4E17, 0x4E16 },
+ { 0x4E55, 0x864E },
+ { 0x4E58, 0x4E57 },
+ { 0x4E82, 0x4E71 },
+ { 0x4E8A, 0x4E8B },
+ { 0x4E98, 0x4E99 },
+ { 0x4E9F, 0x6975 },
+ { 0x4EB0, 0x4EAC },
+ { 0x4EC2, 0x50CD },
+ { 0x4ECE, 0x5F93 },
+ { 0x4EED, 0x4EDE },
+ { 0x4F1C, 0x5005 },
+ { 0x4F5B, 0x4ECF },
+ { 0x4F86, 0x6765 },
+ { 0x4FAB, 0x4F5E },
+ { 0x5047, 0x4EEE },
+ { 0x509A, 0x52B9 },
+ { 0x50B3, 0x4F1D },
+ { 0x50DE, 0x507D },
+ { 0x50E3, 0x50ED },
+ { 0x50F9, 0x4FA1 },
+ { 0x5109, 0x5039 },
+ { 0x5118, 0x4FAD },
+ { 0x5152, 0x5150 },
+ { 0x5154, 0x514E },
+ { 0x5169, 0x4E21 },
+ { 0x518C, 0x518A },
+ { 0x5190, 0x5192 },
+ { 0x51A8, 0x5BCC },
+ { 0x51A9, 0x5199 },
+ { 0x51B0, 0x6C37 },
+ { 0x51B1, 0x51B4 },
+ { 0x51B2, 0x6C96 },
+ { 0x51B3, 0x6C7A },
+ { 0x51B5, 0x6CC1 },
+ { 0x51C9, 0x6DBC },
+ { 0x51D6, 0x6E96 },
+ { 0x51DC, 0x51DB },
+ { 0x51FE, 0x51FD },
+ { 0x5204, 0x5203 },
+ { 0x5227, 0x52AB },
+ { 0x5269, 0x5270 },
+ { 0x5271, 0x5263 },
+ { 0x5283, 0x753B },
+ { 0x528D, 0x5263 },
+ { 0x5291, 0x5264 },
+ { 0x5292, 0x5263 },
+ { 0x5294, 0x5263 },
+ { 0x52B5, 0x5238 },
+ { 0x52DE, 0x52B4 },
+ { 0x52F3, 0x52F2 },
+ { 0x52F5, 0x52B1 },
+ { 0x52F8, 0x52E7 },
+ { 0x5340, 0x533A },
+ { 0x5346, 0x5352 },
+ { 0x5377, 0x5DFB },
+ { 0x537B, 0x5374 },
+ { 0x53B0, 0x5EE0 },
+ { 0x53C3, 0x53C2 },
+ { 0x548F, 0x8A60 },
+ { 0x5492, 0x546A },
+ { 0x54AF, 0x5580 },
+ { 0x54E5, 0x6B4C },
+ { 0x5557, 0x5556 },
+ { 0x55AE, 0x5358 },
+ { 0x5650, 0x5668 },
+ { 0x5680, 0x5BE7 },
+ { 0x568F, 0x5694 },
+ { 0x56B4, 0x53B3 },
+ { 0x56D1, 0x5631 },
+ { 0x56D8, 0x56DE },
+ { 0x5708, 0x570F },
+ { 0x570B, 0x56FD },
+ { 0x570D, 0x56F2 },
+ { 0x5713, 0x5186 },
+ { 0x5716, 0x56F3 },
+ { 0x5718, 0x56E3 },
+ { 0x57C0, 0x5782 },
+ { 0x57D3, 0x57D2 },
+ { 0x57FC, 0x5D0E },
+ { 0x582F, 0x5C2D },
+ { 0x5872, 0x5834 },
+ { 0x58AE, 0x5815 },
+ { 0x58D3, 0x5727 },
+ { 0x58D8, 0x5841 },
+ { 0x58DE, 0x58CA },
+ { 0x58E4, 0x58CC },
+ { 0x58EF, 0x58EE },
+ { 0x58F1, 0x4E00 },
+ { 0x58F9, 0x4E00 },
+ { 0x58FA, 0x58F7 },
+ { 0x58FB, 0x5A7F },
+ { 0x58FD, 0x5BFF },
+ { 0x5918, 0x536F },
+ { 0x591B, 0x591A },
+ { 0x5932, 0x672C },
+ { 0x5967, 0x5965 },
+ { 0x5969, 0x5333 },
+ { 0x596C, 0x5968 },
+ { 0x59D9, 0x598A },
+ { 0x5A6C, 0x6DEB },
+ { 0x5AD0, 0x5B32 },
+ { 0x5AFB, 0x5AFA },
+ { 0x5B43, 0x5B22 },
+ { 0x5B78, 0x5B66 },
+ // { 0x5B9D, 0x73CD },
+ { 0x5BC3, 0x51A4 },
+ { 0x5BC7, 0x51A6 },
+ { 0x5BC9, 0x9DB4 },
+ { 0x5BE2, 0x5BDD },
+ { 0x5BE6, 0x5B9F },
+ { 0x5BEB, 0x5199 },
+ //{ 0x5BF3, 0x73CD },
+ { 0x5BF3, 0x5B9D },
+ //{ 0x5BF6, 0x73CD },
+ { 0x5BF6, 0x5B9D },
+ { 0x5C05, 0x524B },
+ { 0x5C07, 0x5C06 },
+ { 0x5C08, 0x5C02 },
+ { 0x5C0D, 0x5BFE },
+ { 0x5C13, 0x723E },
+ { 0x5C46, 0x5C4A },
+ { 0x5C6C, 0x5C5E },
+ { 0x5CE9, 0x5CE8 },
+ { 0x5CEF, 0x5CF0 },
+ { 0x5CFD, 0x5CE1 },
+ { 0x5D15, 0x5D16 },
+ { 0x5D17, 0x5CA1 },
+ { 0x5D18, 0x5D19 },
+ { 0x5D5C, 0x5D0E },
+ { 0x5D73, 0x5D6F },
+ { 0x5D8B, 0x5CF6 },
+ { 0x5D8C, 0x5CF6 },
+ { 0x5DBD, 0x5CB3 },
+ { 0x5DD6, 0x5DCC },
+ { 0x5DDB, 0x5DDD },
+ { 0x5DF5, 0x536E },
+ { 0x5E0B, 0x7D19 },
+ { 0x5E36, 0x5E2F },
+ { 0x5E64, 0x5E63 },
+ { 0x5EC1, 0x53A0 },
+ { 0x5EC8, 0x53A6 },
+ { 0x5ECF, 0x53A9 },
+ { 0x5ED0, 0x53A9 },
+ { 0x5EDA, 0x53A8 },
+ { 0x5EDD, 0x53AE },
+ { 0x5EE2, 0x5EC3 },
+ { 0x5EE3, 0x5E83 },
+ { 0x5EF0, 0x5E81 },
+ { 0x5EF3, 0x5E81 },
+ { 0x5F03, 0x68C4 },
+ { 0x5F09, 0x5958 },
+ { 0x5F0C, 0x4E00 },
+ { 0x5F0D, 0x4E8C },
+ { 0x5F10, 0x4E8C },
+ { 0x5F2F, 0x6E7E },
+ { 0x5F48, 0x5F3E },
+ { 0x5F4C, 0x5F25 },
+ { 0x5F4E, 0x6E7E },
+ { 0x5F5C, 0x5F5D },
+ { 0x5F83, 0x5F80 },
+ { 0x5F91, 0x5F84 },
+ { 0x5F9E, 0x5F93 },
+ { 0x5FA0, 0x6765 },
+ { 0x5FF0, 0x60B4 },
+ { 0x5FFB, 0x6B23 },
+ { 0x6046, 0x6052 },
+ { 0x604A, 0x5354 },
+ { 0x6060, 0x602A },
+ { 0x60B3, 0x5FB3 },
+ { 0x60E0, 0x6075 },
+ { 0x60E1, 0x60AA },
+ { 0x60F1, 0x60A9 },
+ { 0x613C, 0x614E },
+ { 0x613D, 0x535A },
+ { 0x6158, 0x60E8 },
+ { 0x615A, 0x6159 },
+ { 0x6187, 0x61A9 },
+ { 0x61C9, 0x5FDC },
+ { 0x61F4, 0x61FA },
+ { 0x61F7, 0x61D0 },
+ { 0x6200, 0x604B },
+ { 0x621D, 0x8CA1 },
+ { 0x621E, 0x621B },
+ { 0x6230, 0x6226 },
+ { 0x6232, 0x622F },
+ { 0x6268, 0x6260 },
+ { 0x62AC, 0x64E1 },
+ { 0x62C2, 0x6255 },
+ { 0x62D4, 0x629C },
+ { 0x62DC, 0x62DD },
+ { 0x633E, 0x631F },
+ { 0x63D2, 0x633F },
+ { 0x6416, 0x63FA },
+ { 0x641C, 0x635C },
+ { 0x64C7, 0x629E },
+ { 0x64D4, 0x62C5 },
+ { 0x64DA, 0x62E0 },
+ { 0x64E7, 0x6319 },
+ { 0x64F4, 0x62E1 },
+ { 0x651C, 0x643A },
+ { 0x651D, 0x6442 },
+ { 0x652A, 0x64B9 },
+ { 0x6536, 0x53CE },
+ { 0x6548, 0x52B9 },
+ { 0x654D, 0x53D9 },
+ { 0x6555, 0x52C5 },
+ { 0x6558, 0x53D9 },
+ { 0x6578, 0x6570 },
+ { 0x6588, 0x5B66 },
+ { 0x6589, 0x658E },
+ { 0x65B7, 0x65AD },
+ { 0x65D9, 0x65DB },
+ { 0x663F, 0x66E0 },
+ { 0x6644, 0x6643 },
+ { 0x6649, 0x664B },
+ { 0x665D, 0x663C },
+ { 0x668E, 0x6620 },
+ { 0x66B8, 0x77AD },
+ { 0x66C9, 0x6681 },
+ { 0x66F0, 0x4E91 },
+ { 0x66F5, 0x66F3 },
+ { 0x66FD, 0x66FE },
+ { 0x6703, 0x4F1A },
+ { 0x6716, 0x6717 },
+ { 0x671E, 0x671F },
+ { 0x6764, 0x6803 },
+ { 0x67A1, 0x685D },
+ { 0x67A6, 0x6AE8 },
+ { 0x67A9, 0x677E },
+ { 0x67CF, 0x6822 },
+ { 0x6816, 0x68F2 },
+ { 0x6840, 0x685D },
+ { 0x689D, 0x6761 },
+ { 0x68A6, 0x5922 },
+ { 0x68CA, 0x68CB },
+ { 0x68E7, 0x685F },
+ { 0x6936, 0x68D5 },
+ { 0x695D, 0x68DF },
+ { 0x6973, 0x6885 },
+ { 0x69AE, 0x6804 },
+ { 0x69C7, 0x69D9 },
+ { 0x6A02, 0x697D },
+ { 0x6A13, 0x697C },
+ { 0x6A1E, 0x67A2 },
+ { 0x6A23, 0x69D8 },
+ { 0x6A62, 0x6955 },
+ { 0x6A9C, 0x6867 },
+ { 0x6AA2, 0x691C },
+ { 0x6AAA, 0x6ADF },
+ { 0x6AAE, 0x68BC },
+ { 0x6AB3, 0x68B9 },
+ { 0x6AC1, 0x6A12 },
+ { 0x6AFB, 0x685C },
+ { 0x6B0A, 0x6A29 },
+ { 0x6B50, 0x6B27 },
+ { 0x6B61, 0x6B53 },
+ { 0x6B78, 0x5E30 },
+ { 0x6B7F, 0x6CA1 },
+ { 0x6B98, 0x6B8B },
+ { 0x6BB1, 0x6BB2 },
+ { 0x6BBC, 0x6BBB },
+ { 0x6BC6, 0x6BB4 },
+ { 0x6BCB, 0x6BCD },
+ { 0x6BD3, 0x80B2 },
+ { 0x6C23, 0x6C17 },
+ { 0x6C8D, 0x51B4 },
+ { 0x6C92, 0x6CA1 },
+ { 0x6CEA, 0x6D99 },
+ { 0x6D0C, 0x51BD },
+ { 0x6D35, 0x5F87 },
+ { 0x6D79, 0x5CE1 },
+ { 0x6D8C, 0x6E67 },
+ { 0x6DB8, 0x51C5 },
+ { 0x6DD2, 0x51C4 },
+ { 0x6DE8, 0x6D44 },
+ { 0x6DFA, 0x6D45 },
+ { 0x6E0A, 0x6DF5 },
+ { 0x6E15, 0x6DF5 },
+ { 0x6EAA, 0x6E13 },
+ { 0x6EAF, 0x9061 },
+ { 0x6EEF, 0x6EDE },
+ { 0x6EFF, 0x6E80 },
+ { 0x6F5B, 0x6F5C },
+ { 0x6F74, 0x7026 },
+ { 0x6F80, 0x6E0B },
+ { 0x6F81, 0x6E0B },
+ { 0x6F82, 0x6F84 },
+ { 0x6F91, 0x6E9C },
+ { 0x6FA4, 0x6CA2 },
+ { 0x6FD5, 0x6E7F },
+ { 0x6FDF, 0x6E08 },
+ { 0x6FE4, 0x6D9B },
+ { 0x6FF1, 0x6D5C },
+ { 0x6FF3, 0x6F5C },
+ { 0x6FF6, 0x95CA },
+ { 0x7027, 0x6EDD },
+ { 0x7030, 0x6FD4 },
+ { 0x704C, 0x6F45 },
+ { 0x7063, 0x6E7E },
+ { 0x70CB, 0x6077 },
+ { 0x70DF, 0x7159 },
+ { 0x70F1, 0x70AF },
+ { 0x7188, 0x7155 },
+ { 0x7199, 0x7155 },
+ { 0x71C8, 0x706F },
+ { 0x71D2, 0x713C },
+ { 0x71DF, 0x55B6 },
+ { 0x71FB, 0x718F },
+ { 0x71FF, 0x8000 },
+ { 0x7210, 0x7089 },
+ { 0x722D, 0x4E89 },
+ { 0x7232, 0x70BA },
+ { 0x723C, 0x4FCE },
+ { 0x7246, 0x58BB },
+ { 0x7281, 0x7282 },
+ { 0x72A7, 0x72A0 },
+ { 0x72E2, 0x72F8 },
+ { 0x72F9, 0x72ED },
+ { 0x734E, 0x5968 },
+ { 0x7368, 0x72EC },
+ { 0x7375, 0x731F },
+ { 0x7378, 0x7363 },
+ { 0x737B, 0x732E },
+ { 0x73CE, 0x73CD },
+ { 0x73F1, 0x74D4 },
+ { 0x7464, 0x7476 },
+ { 0x746F, 0x7405 },
+ { 0x74A2, 0x7460 },
+ { 0x74E3, 0x5F01 },
+ { 0x751E, 0x5617 },
+ { 0x753C, 0x753A },
+ { 0x7544, 0x7559 },
+ { 0x7546, 0x755D },
+ { 0x754A, 0x8015 },
+ { 0x754D, 0x754C },
+ { 0x7567, 0x7565 },
+ { 0x756B, 0x753B },
+ { 0x7574, 0x7587 },
+ { 0x7576, 0x5F53 },
+ { 0x7582, 0x7573 },
+ { 0x7589, 0x7573 },
+ { 0x758A, 0x7573 },
+ { 0x7609, 0x7652 },
+ { 0x7661, 0x75F4 },
+ { 0x767C, 0x767A },
+ { 0x7683, 0x8C8C },
+ { 0x7688, 0x5E30 },
+ { 0x768B, 0x7690 },
+ { 0x7693, 0x6667 },
+ { 0x76B7, 0x9F13 },
+ { 0x76B9, 0x76B8 },
+ { 0x76C3, 0x676F },
+ { 0x76D6, 0x84CB },
+ { 0x76DC, 0x76D7 },
+ { 0x76E1, 0x5C3D },
+ { 0x771E, 0x771F },
+ { 0x7726, 0x7725 },
+ { 0x777F, 0x53E1 },
+ { 0x783F, 0x9271 },
+ { 0x7845, 0x73EA },
+ { 0x788E, 0x7815 },
+ { 0x7895, 0x5D0E },
+ { 0x78AF, 0x7459 },
+ { 0x7919, 0x788D },
+ { 0x7926, 0x9271 },
+ { 0x792A, 0x783A },
+ { 0x7955, 0x79D8 },
+ { 0x7962, 0x79B0 },
+ { 0x797F, 0x7984 },
+ { 0x7980, 0x7A1F },
+ { 0x79AA, 0x7985 },
+ { 0x79AE, 0x793C },
+ { 0x79CB, 0x7A50 },
+ { 0x7A31, 0x79F0 },
+ { 0x7A3B, 0x7A32 },
+ { 0x7A3E, 0x7A3F },
+ { 0x7A49, 0x7A1A },
+ { 0x7A57, 0x7A42 },
+ { 0x7A69, 0x7A4F },
+ { 0x7A70, 0x7A63 },
+ { 0x7A97, 0x7A93 },
+ { 0x7AB0, 0x7AAF },
+ { 0x7AC8, 0x7AC3 },
+ { 0x7ACA, 0x7A83 },
+ { 0x7AD2, 0x5947 },
+ { 0x7ADD, 0x4E26 },
+ { 0x7B0B, 0x7B4D },
+ { 0x7B11, 0x54B2 },
+ { 0x7B5D, 0x7B8F },
+ { 0x7B71, 0x7BE0 },
+ { 0x7B7A, 0x7B50 },
+ { 0x7B8B, 0x724B },
+ { 0x7B9A, 0x5273 },
+ { 0x7B9F, 0x7B98 },
+ { 0x7BCF, 0x5D4C },
+ { 0x7BE6, 0x7B86 },
+ { 0x7C11, 0x84D1 },
+ { 0x7C14, 0x84D1 },
+ { 0x7C54, 0x85AE },
+ { 0x7C56, 0x7C64 },
+ { 0x7C58, 0x7C50 },
+ { 0x7C60, 0x7BED },
+ { 0x7CAE, 0x7CE7 },
+ { 0x7CB9, 0x7C8B },
+ { 0x7CE2, 0x6A21 },
+ { 0x7CFA, 0x7CFE },
+ { 0x7D4F, 0x7D32 },
+ { 0x7D72, 0x7CF8 },
+ { 0x7D89, 0x7E4D },
+ { 0x7D93, 0x7D4C },
+ { 0x7DAB, 0x7DDA },
+ { 0x7DD5, 0x7E83 },
+ { 0x7DDC, 0x7DBF },
+ { 0x7DE8, 0x7BC7 },
+ { 0x7E23, 0x770C },
+ { 0x7E31, 0x7E26 },
+ { 0x7E3D, 0x7DCF },
+ { 0x7E69, 0x7E04 },
+ { 0x7E6A, 0x7D75 },
+ { 0x7E7C, 0x7D99 },
+ { 0x7E8C, 0x7D9A },
+ { 0x7E8E, 0x7E4A },
+ { 0x7E92, 0x7E8F },
+ { 0x7E96, 0x7E4A },
+ { 0x7F3A, 0x6B20 },
+ { 0x7F4E, 0x58DC },
+ { 0x7F50, 0x7F36 },
+ { 0x7F78, 0x7F70 },
+ { 0x7F83, 0x51AA },
+ { 0x7F87, 0x7F88 },
+ { 0x7FA3, 0x7FA4 },
+ { 0x7FAE, 0x7FB9 },
+ { 0x7FC6, 0x7FE0 },
+ { 0x803B, 0x6065 },
+ { 0x805F, 0x5A7F },
+ { 0x8068, 0x806F },
+ { 0x8070, 0x8061 },
+ { 0x8072, 0x58F0 },
+ { 0x807D, 0x8074 },
+ { 0x8085, 0x7C9B },
+ { 0x8089, 0x5B8D },
+ { 0x80AC, 0x75A3 },
+ { 0x80CC, 0x810A },
+ { 0x80EF, 0x8DE8 },
+ { 0x8109, 0x8108 },
+ { 0x8123, 0x5507 },
+ { 0x815F, 0x81A3 },
+ { 0x8166, 0x8133 },
+ { 0x8193, 0x8178 },
+ { 0x81B8, 0x9AC4 },
+ { 0x81BD, 0x80C6 },
+ { 0x81C8, 0x81D8 },
+ { 0x81DF, 0x81D3 },
+ { 0x81FA, 0x53F0 },
+ { 0x8207, 0x4E0E },
+ { 0x8209, 0x6319 },
+ { 0x820A, 0x65E7 },
+ { 0x820D, 0x820E },
+ { 0x8216, 0x8217 },
+ { 0x8218, 0x9928 },
+ { 0x8229, 0x8239 },
+ { 0x822E, 0x826B },
+ { 0x8235, 0x67C1 },
+ { 0x8277, 0x8276 },
+ { 0x82C5, 0x5208 },
+ { 0x8358, 0x5E84 },
+ { 0x8373, 0x8C46 },
+ { 0x838A, 0x5E84 },
+ { 0x8393, 0x82FA },
+ { 0x8396, 0x830E },
+ { 0x83B5, 0x514E },
+ { 0x83DF, 0x514E },
+ { 0x83F4, 0x5EB5 },
+ { 0x8420, 0x840C },
+ { 0x842C, 0x4E07 },
+ { 0x8462, 0x84CB },
+ { 0x84AD, 0x82BB },
+ { 0x854B, 0x854A },
+ { 0x855A, 0x843C },
+ { 0x8597, 0x5712 },
+ { 0x85C1, 0x7A3F },
+ { 0x85CF, 0x8535 },
+ { 0x85DD, 0x82B8 },
+ { 0x85E5, 0x85AC },
+ { 0x85EA, 0x85AE },
+ { 0x8602, 0x854A },
+ { 0x8606, 0x82A6 },
+ { 0x8613, 0x8607 },
+ { 0x8617, 0x6A97 },
+ { 0x862F, 0x76EA },
+ { 0x8655, 0x51E6 },
+ { 0x865F, 0x53F7 },
+ { 0x8768, 0x8671 },
+ { 0x87A2, 0x86CD },
+ { 0x87C6, 0x87C7 },
+ { 0x87D2, 0x880E },
+ { 0x87F2, 0x866B },
+ { 0x8805, 0x877F },
+ { 0x880F, 0x87F9 },
+ { 0x8823, 0x86CE },
+ { 0x8827, 0x8839 },
+ { 0x8836, 0x8695 },
+ { 0x883B, 0x86EE },
+ { 0x8842, 0x8844 },
+ { 0x885E, 0x885B },
+ { 0x88B5, 0x887D },
+ { 0x88DD, 0x88C5 },
+ { 0x88E1, 0x88CF },
+ { 0x891D, 0x894C },
+ { 0x8943, 0x8912 },
+ { 0x894D, 0x96D1 },
+ { 0x898A, 0x7F88 },
+ { 0x89BA, 0x899A },
+ { 0x89BD, 0x89A7 },
+ { 0x89C0, 0x89B3 },
+ { 0x89E7, 0x89E3 },
+ { 0x89F8, 0x89E6 },
+ { 0x8A00, 0x4E91 },
+ { 0x8A3B, 0x6CE8 },
+ { 0x8AE1, 0x8B1A },
+ { 0x8AEB, 0x8ACC },
+ { 0x8B0C, 0x6B4C },
+ { 0x8B20, 0x8B21 },
+ { 0x8B41, 0x5629 },
+ { 0x8B49, 0x8A3C },
+ { 0x8B5B, 0x8B56 },
+ { 0x8B5F, 0x566A },
+ { 0x8B6F, 0x8A33 },
+ { 0x8B71, 0x5584 },
+ { 0x8B7D, 0x8A89 },
+ { 0x8B80, 0x8AAD },
+ { 0x8B8A, 0x5909 },
+ { 0x8B8E, 0x8B90 },
+ { 0x8B93, 0x8B72 },
+ { 0x8B9A, 0x8B83 },
+ { 0x8C3F, 0x6E13 },
+ { 0x8C4E, 0x7AEA },
+ { 0x8C50, 0x8C4A },
+ { 0x8C6B, 0x4E88 },
+ { 0x8C6C, 0x732A },
+ { 0x8C7A, 0x72B2 },
+ { 0x8C7C, 0x8C94 },
+ { 0x8C89, 0x72F8 },
+ { 0x8C8D, 0x72F8 },
+ { 0x8C8E, 0x730A },
+ { 0x8C98, 0x734F },
+ { 0x8CAD, 0x8CEA },
+ { 0x8CAE, 0x4E8C },
+ { 0x8CB3, 0x4E8C },
+ { 0x8CCD, 0x81D3 },
+ { 0x8CE3, 0x58F2 },
+ { 0x8CE4, 0x8CCE },
+ { 0x8D0A, 0x8CDB },
+ { 0x8D13, 0x81D3 },
+ { 0x8D71, 0x8D70 },
+ { 0x8E08, 0x758E },
+ { 0x8E10, 0x8DF5 },
+ { 0x8E34, 0x8E0A },
+ { 0x8E48, 0x8E0F },
+ { 0x8E87, 0x58B8 },
+ { 0x8EAA, 0x8E99 },
+ { 0x8EB0, 0x4F53 },
+ { 0x8EC6, 0x4F53 },
+ { 0x8EE3, 0x8F5F },
+ { 0x8F0C, 0x4E21 },
+ { 0x8F15, 0x8EFD },
+ { 0x8F19, 0x8F12 },
+ { 0x8F1B, 0x4E21 },
+ { 0x8F49, 0x8EE2 },
+ { 0x8FA7, 0x5F01 },
+ { 0x8FA8, 0x5F01 },
+ { 0x8FAD, 0x8F9E },
+ { 0x8FAF, 0x5F01 },
+ { 0x8FEA, 0x5EF8 },
+ { 0x8FEF, 0x9003 },
+ { 0x8FF4, 0x5EFB },
+ { 0x8FFA, 0x5EFC },
+ { 0x900E, 0x9052 },
+ { 0x9015, 0x5F84 },
+ { 0x9023, 0x806F },
+ { 0x9039, 0x9054 },
+ { 0x9059, 0x9065 },
+ { 0x905E, 0x9013 },
+ { 0x9072, 0x9045 },
+ { 0x9087, 0x8FE9 },
+ { 0x9089, 0x8FBA },
+ { 0x908A, 0x8FBA },
+ { 0x90B1, 0x4E18 },
+ { 0x9130, 0x96A3 },
+ { 0x9189, 0x9154 },
+ { 0x918B, 0x9162 },
+ { 0x91AB, 0x533B },
+ { 0x91BA, 0x718F },
+ { 0x91C0, 0x91B8 },
+ { 0x91CB, 0x91C8 },
+ { 0x91E1, 0x91DC },
+ { 0x91FC, 0x5263 },
+ { 0x9214, 0x6284 },
+ { 0x9229, 0x946A },
+ { 0x922C, 0x9438 },
+ { 0x9248, 0x91F6 },
+ { 0x9264, 0x920E },
+ { 0x9295, 0x9244 },
+ { 0x92EA, 0x8217 },
+ { 0x9322, 0x92AD },
+ { 0x9394, 0x7194 },
+ { 0x93AD, 0x93AE },
+ { 0x941A, 0x930F },
+ { 0x9421, 0x9244 },
+ { 0x9435, 0x9244 },
+ { 0x9444, 0x92F3 },
+ { 0x9452, 0x9451 },
+ { 0x945A, 0x947D },
+ { 0x945B, 0x9271 },
+ { 0x9587, 0x9589 },
+ { 0x95A0, 0x958F },
+ { 0x95DC, 0x95A2 },
+ { 0x962A, 0x5742 },
+ { 0x962F, 0x5740 },
+ { 0x9644, 0x4ED8 },
+ { 0x9670, 0x852D },
+ { 0x9677, 0x9665 },
+ { 0x96A8, 0x968F },
+ { 0x96AA, 0x967A },
+ { 0x96B1, 0x852D },
+ { 0x96B8, 0x96B7 },
+ { 0x96C6, 0x8F2F },
+ { 0x96D9, 0x53CC },
+ { 0x96DC, 0x96D1 },
+ { 0x9738, 0x8987 },
+ { 0x9748, 0x970A },
+ { 0x975C, 0x9759 },
+ { 0x976B, 0x976D },
+ { 0x9771, 0x976D },
+ { 0x97ED, 0x97EE },
+ { 0x97F2, 0x9F4F },
+ { 0x97F5, 0x97FB },
+ { 0x9834, 0x7A4E },
+ { 0x9838, 0x981A },
+ { 0x984B, 0x816E },
+ { 0x984F, 0x9854 },
+ { 0x986F, 0x9855 },
+ { 0x98B1, 0x53F0 },
+ { 0x98C3, 0x98C4 },
+ { 0x98DC, 0x7FFB },
+ { 0x98EE, 0x98F2 },
+ { 0x9918, 0x4F59 },
+ { 0x991D, 0x98FE },
+ { 0x9920, 0x9905 },
+ { 0x99C8, 0x99C6 },
+ { 0x99F2, 0x99B4 },
+ { 0x9A37, 0x9A12 },
+ { 0x9A45, 0x99C6 },
+ { 0x9A57, 0x9A13 },
+ { 0x9A5B, 0x99C5 },
+ { 0x9AD3, 0x9AC4 },
+ { 0x9AD4, 0x4F53 },
+ { 0x9AEE, 0x9AEA },
+ { 0x9AF4, 0x5F7F },
+ { 0x9B27, 0x9599 },
+ { 0x9B28, 0x95A7 },
+ { 0x9B2A, 0x95D8 },
+ { 0x9B31, 0x6B1D },
+ { 0x9B3B, 0x7CA5 },
+ { 0x9C1B, 0x9C2E },
+ { 0x9C3A, 0x9BF5 },
+ { 0x9CEB, 0x96C1 },
+ { 0x9CEC, 0x9CE7 },
+ { 0x9D08, 0x96C1 },
+ { 0x9D44, 0x9D1F },
+ { 0x9D5E, 0x9D5D },
+ { 0x9DAB, 0x9D87 },
+ { 0x9DAF, 0x9D2C },
+ { 0x9DC4, 0x9D8F },
+ { 0x9DCF, 0x9DC6 },
+ { 0x9E7D, 0x5869 },
+ { 0x9EA5, 0x9EA6 },
+ { 0x9EAA, 0x9EBA },
+ { 0x9EB8, 0x9EA9 },
+ { 0x9ED8, 0x9ED9 },
+ { 0x9EDE, 0x70B9 },
+ { 0x9EE8, 0x515A },
+ { 0x9F07, 0x9C32 },
+ { 0x9F21, 0x9F20 },
+ { 0x9F4A, 0x658E },
+ { 0x9F4B, 0x658E },
+ { 0x9F52, 0x6B6F },
+ { 0x9F61, 0x9F62 },
+ { 0x9F67, 0x56D3 },
+ { 0x9F8D, 0x7ADC },
+ { 0x9F9C, 0x4E80 },
+ { 0x9F9D, 0x7A50 },
+ { 0xFEFF, 0x4E9C }
+};
+
+ignoreTraditionalKanji_ja_JP::ignoreTraditionalKanji_ja_JP()
+{
+ static i18nutil::oneToOneMapping _table(traditionalKanji2updateKanji, sizeof(traditionalKanji2updateKanji));
+ func = nullptr;
+ table = &_table;
+ map = nullptr;
+ transliterationName = "ignoreTraditionalKanji_ja_JP";
+ implementationName = "com.sun.star.i18n.Transliteration.ignoreTraditionalKanji_ja_JP";
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreWidth.cxx b/i18npool/source/transliteration/ignoreWidth.cxx
new file mode 100644
index 0000000000..1b9f15fde1
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreWidth.cxx
@@ -0,0 +1,65 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <rtl/ref.hxx>
+
+#include <transliteration_Ignore.hxx>
+#include <transliteration_OneToOne.hxx>
+
+namespace com::sun::star::uno { class XComponentContext; }
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::lang;
+
+namespace i18npool {
+
+OUString
+ignoreWidth::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >* pOffset )
+{
+ rtl::Reference< fullwidthToHalfwidth > t1(new fullwidthToHalfwidth);
+ return t1->transliterateImpl(inStr, startPos, nCount, pOffset);
+}
+
+Sequence< OUString > SAL_CALL
+ignoreWidth::transliterateRange( const OUString& str1, const OUString& str2 )
+{
+ rtl::Reference< fullwidthToHalfwidth > t1(new fullwidthToHalfwidth);
+ rtl::Reference< halfwidthToFullwidth > t2(new halfwidthToFullwidth);
+
+ return transliteration_Ignore::transliterateRange(str1, str2, *t1, *t2);
+}
+
+sal_Unicode SAL_CALL
+ignoreWidth::transliterateChar2Char( sal_Unicode inChar)
+{
+ rtl::Reference< fullwidthToHalfwidth > t1(new fullwidthToHalfwidth);
+ return t1->transliterateChar2Char(inChar);
+}
+
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
+com_sun_star_i18n_Transliteration_IGNORE_WIDTH_get_implementation(
+ css::uno::XComponentContext *,
+ css::uno::Sequence<css::uno::Any> const &)
+{
+ return cppu::acquire(new i18npool::ignoreWidth());
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/ignoreZiZu_ja_JP.cxx b/i18npool/source/transliteration/ignoreZiZu_ja_JP.cxx
new file mode 100644
index 0000000000..baefa2772d
--- /dev/null
+++ b/i18npool/source/transliteration/ignoreZiZu_ja_JP.cxx
@@ -0,0 +1,53 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_Ignore.hxx>
+
+namespace i18npool
+{
+static sal_Unicode ignoreZiZu_ja_JP_translator(const sal_Unicode c)
+{
+ switch (c)
+ {
+ case 0x30C2: // KATAKANA LETTER DI
+ return 0x30B8; // KATAKANA LETTER ZI
+
+ case 0x3062: // HIRAGANA LETTER DI
+ return 0x3058; // HIRAGANA LETTER ZI
+
+ case 0x30C5: // KATAKANA LETTER DU
+ return 0x30BA; // KATAKANA LETTER ZU
+
+ case 0x3065: // HIRAGANA LETTER DU
+ return 0x305A; // HIRAGANA LETTER ZU
+ }
+ return c;
+}
+
+ignoreZiZu_ja_JP::ignoreZiZu_ja_JP()
+{
+ func = ignoreZiZu_ja_JP_translator;
+ table = nullptr;
+ map = nullptr;
+ transliterationName = "ignoreZiZu_ja_JP";
+ implementationName = "com.sun.star.i18n.Transliteration.ignoreZiZu_ja_JP";
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/katakanaToHiragana.cxx b/i18npool/source/transliteration/katakanaToHiragana.cxx
new file mode 100644
index 0000000000..868040be6e
--- /dev/null
+++ b/i18npool/source/transliteration/katakanaToHiragana.cxx
@@ -0,0 +1,45 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_OneToOne.hxx>
+
+namespace i18npool
+{
+// see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F)
+// see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF)
+static sal_Unicode toHiragana(const sal_Unicode c)
+{
+ if ((0x30a1 <= c && c <= 0x30f6) || (0x30fd <= c && c <= 0x30ff))
+ { // 30A0 - 30FF KATAKANA LETTER
+ // shift code point by 0x0060
+ return c - (0x30a0 - 0x3040);
+ }
+ return c;
+}
+
+katakanaToHiragana::katakanaToHiragana()
+{
+ func = toHiragana;
+ table = nullptr;
+ transliterationName = "katakanaToHiragana";
+ implementationName = "com.sun.star.i18n.Transliteration.KATAKANA_HIRAGANA";
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/largeToSmall_ja_JP.cxx b/i18npool/source/transliteration/largeToSmall_ja_JP.cxx
new file mode 100644
index 0000000000..f6771fb6d4
--- /dev/null
+++ b/i18npool/source/transliteration/largeToSmall_ja_JP.cxx
@@ -0,0 +1,79 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <i18nutil/oneToOneMapping.hxx>
+
+#include <transliteration_OneToOne.hxx>
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::lang;
+
+namespace i18npool
+{
+// ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt
+// ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html
+// http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F)
+// http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF)
+// http://charts.unicode.org/Web/UFF00.html
+
+i18nutil::OneToOneMappingTable_t const large2small[] = {
+ { 0x3041, 0x3042 }, // HIRAGANA LETTER SMALL A --> HIRAGANA LETTER A
+ { 0x3043, 0x3044 }, // HIRAGANA LETTER SMALL I --> HIRAGANA LETTER I
+ { 0x3045, 0x3046 }, // HIRAGANA LETTER SMALL U --> HIRAGANA LETTER U
+ { 0x3047, 0x3048 }, // HIRAGANA LETTER SMALL E --> HIRAGANA LETTER E
+ { 0x3049, 0x304A }, // HIRAGANA LETTER SMALL O --> HIRAGANA LETTER O
+ { 0x3063, 0x3064 }, // HIRAGANA LETTER SMALL TU --> HIRAGANA LETTER TU
+ { 0x3083, 0x3084 }, // HIRAGANA LETTER SMALL YA --> HIRAGANA LETTER YA
+ { 0x3085, 0x3086 }, // HIRAGANA LETTER SMALL YU --> HIRAGANA LETTER YU
+ { 0x3087, 0x3088 }, // HIRAGANA LETTER SMALL YO --> HIRAGANA LETTER YO
+ { 0x308E, 0x308F }, // HIRAGANA LETTER SMALL WA --> HIRAGANA LETTER WA
+ { 0x30A1, 0x30A2 }, // KATAKANA LETTER SMALL A --> KATAKANA LETTER A
+ { 0x30A3, 0x30A4 }, // KATAKANA LETTER SMALL I --> KATAKANA LETTER I
+ { 0x30A5, 0x30A6 }, // KATAKANA LETTER SMALL U --> KATAKANA LETTER U
+ { 0x30A7, 0x30A8 }, // KATAKANA LETTER SMALL E --> KATAKANA LETTER E
+ { 0x30A9, 0x30AA }, // KATAKANA LETTER SMALL O --> KATAKANA LETTER O
+ { 0x30C3, 0x30C4 }, // KATAKANA LETTER SMALL TU --> KATAKANA LETTER TU
+ { 0x30E3, 0x30E4 }, // KATAKANA LETTER SMALL YA --> KATAKANA LETTER YA
+ { 0x30E5, 0x30E6 }, // KATAKANA LETTER SMALL YU --> KATAKANA LETTER YU
+ { 0x30E7, 0x30E8 }, // KATAKANA LETTER SMALL YO --> KATAKANA LETTER YO
+ { 0x30EE, 0x30EF }, // KATAKANA LETTER SMALL WA --> KATAKANA LETTER WA
+ { 0x30F5, 0x30AB }, // KATAKANA LETTER SMALL KA --> KATAKANA LETTER KA
+ { 0x30F6, 0x30B1 }, // KATAKANA LETTER SMALL KE --> KATAKANA LETTER KE
+ { 0xFF67, 0xFF71 }, // HALFWIDTH KATAKANA LETTER SMALL A --> HALFWIDTH KATAKANA LETTER A
+ { 0xFF68, 0xFF72 }, // HALFWIDTH KATAKANA LETTER SMALL I --> HALFWIDTH KATAKANA LETTER I
+ { 0xFF69, 0xFF73 }, // HALFWIDTH KATAKANA LETTER SMALL U --> HALFWIDTH KATAKANA LETTER U
+ { 0xFF6A, 0xFF74 }, // HALFWIDTH KATAKANA LETTER SMALL E --> HALFWIDTH KATAKANA LETTER E
+ { 0xFF6B, 0xFF75 }, // HALFWIDTH KATAKANA LETTER SMALL O --> HALFWIDTH KATAKANA LETTER O
+ { 0xFF6C, 0xFF94 }, // HALFWIDTH KATAKANA LETTER SMALL YA --> HALFWIDTH KATAKANA LETTER YA
+ { 0xFF6D, 0xFF95 }, // HALFWIDTH KATAKANA LETTER SMALL YU --> HALFWIDTH KATAKANA LETTER YU
+ { 0xFF6E, 0xFF96 }, // HALFWIDTH KATAKANA LETTER SMALL YO --> HALFWIDTH KATAKANA LETTER YO
+ { 0xFF6F, 0xFF82 } // HALFWIDTH KATAKANA LETTER SMALL TU --> HALFWIDTH KATAKANA LETTER TU
+};
+
+largeToSmall_ja_JP::largeToSmall_ja_JP()
+{
+ static i18nutil::oneToOneMapping _table(large2small, sizeof(large2small));
+ func = nullptr;
+ table = &_table;
+ transliterationName = "largeToSmall_ja_JP";
+ implementationName = "com.sun.star.i18n.Transliteration.largeToSmall_ja_JP";
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/numtochar.cxx b/i18npool/source/transliteration/numtochar.cxx
new file mode 100644
index 0000000000..56761f44ca
--- /dev/null
+++ b/i18npool/source/transliteration/numtochar.cxx
@@ -0,0 +1,57 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <com/sun/star/i18n/NativeNumberMode.hpp>
+
+#include <numtochar.hxx>
+
+namespace i18npool {
+
+#define TRANSLITERATION_NUMTOCHAR( name, number ) \
+NumToChar##name::NumToChar##name() \
+{ \
+ nNativeNumberMode = number; \
+ tableSize = 0; \
+ transliterationName = "NumToChar"#name; \
+ implementationName = "com.sun.star.i18n.Transliteration.NumToChar"#name; \
+}
+
+using namespace com::sun::star::i18n::NativeNumberMode;
+
+TRANSLITERATION_NUMTOCHAR( Halfwidth, NATNUM0 )
+TRANSLITERATION_NUMTOCHAR( Fullwidth, NATNUM3 )
+TRANSLITERATION_NUMTOCHAR( Lower_zh_CN, NATNUM1 )
+TRANSLITERATION_NUMTOCHAR( Lower_zh_TW, NATNUM1 )
+TRANSLITERATION_NUMTOCHAR( Upper_zh_CN, NATNUM2 )
+TRANSLITERATION_NUMTOCHAR( Upper_zh_TW, NATNUM2 )
+TRANSLITERATION_NUMTOCHAR( KanjiShort_ja_JP, NATNUM1 )
+TRANSLITERATION_NUMTOCHAR( KanjiTraditional_ja_JP, NATNUM2 )
+TRANSLITERATION_NUMTOCHAR( Lower_ko, NATNUM1 )
+TRANSLITERATION_NUMTOCHAR( Upper_ko, NATNUM2 )
+TRANSLITERATION_NUMTOCHAR( Hangul_ko, NATNUM9 )
+TRANSLITERATION_NUMTOCHAR( Indic_ar, NATNUM1 )
+TRANSLITERATION_NUMTOCHAR( EastIndic_ar, NATNUM1 )
+TRANSLITERATION_NUMTOCHAR( Indic_hi, NATNUM1 )
+TRANSLITERATION_NUMTOCHAR( _th, NATNUM1 )
+
+#undef TRANSLITERATION_NUMTOCHAR
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/numtotext_cjk.cxx b/i18npool/source/transliteration/numtotext_cjk.cxx
new file mode 100644
index 0000000000..f60561f213
--- /dev/null
+++ b/i18npool/source/transliteration/numtotext_cjk.cxx
@@ -0,0 +1,85 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <com/sun/star/i18n/NativeNumberMode.hpp>
+
+#include <numtotext_cjk.hxx>
+#include <bullet.h>
+
+namespace i18npool {
+
+#define TRANSLITERATION_NUMTOTEXT( name, number ) \
+NumToText##name::NumToText##name() \
+{ \
+ nNativeNumberMode = number; \
+ tableSize = 0; \
+ transliterationName = "NumToText"#name; \
+ implementationName = "com.sun.star.i18n.Transliteration.NumToText"#name; \
+}
+using namespace com::sun::star::i18n::NativeNumberMode;
+
+TRANSLITERATION_NUMTOTEXT( Lower_zh_CN, NATNUM4)
+TRANSLITERATION_NUMTOTEXT( Upper_zh_CN, NATNUM5)
+TRANSLITERATION_NUMTOTEXT( Lower_zh_TW, NATNUM4)
+TRANSLITERATION_NUMTOTEXT( Upper_zh_TW, NATNUM5)
+TRANSLITERATION_NUMTOTEXT( Fullwidth_zh_CN, NATNUM6)
+TRANSLITERATION_NUMTOTEXT( Fullwidth_zh_TW, NATNUM6)
+TRANSLITERATION_NUMTOTEXT( Fullwidth_ja_JP, NATNUM6)
+TRANSLITERATION_NUMTOTEXT( Fullwidth_ko, NATNUM6)
+TRANSLITERATION_NUMTOTEXT( FormalLower_ko, NATNUM4)
+TRANSLITERATION_NUMTOTEXT( FormalUpper_ko, NATNUM5)
+TRANSLITERATION_NUMTOTEXT( FormalHangul_ko, NATNUM10)
+TRANSLITERATION_NUMTOTEXT( InformalLower_ko, NATNUM7)
+TRANSLITERATION_NUMTOTEXT( InformalUpper_ko, NATNUM8)
+TRANSLITERATION_NUMTOTEXT( InformalHangul_ko, NATNUM11)
+TRANSLITERATION_NUMTOTEXT( KanjiLongTraditional_ja_JP, NATNUM5)
+TRANSLITERATION_NUMTOTEXT( KanjiLongModern_ja_JP, NATNUM4)
+TRANSLITERATION_NUMTOTEXT( Date_zh, NATNUM7)
+TRANSLITERATION_NUMTOTEXT( KanjiShortTraditional_ja_JP, NATNUM8)
+TRANSLITERATION_NUMTOTEXT( KanjiShortModern_ja_JP, NATNUM7)
+
+#undef TRANSLITERATION_NUMTOTEXT
+
+#define TRANSLITERATION_NUMTOTEXT( name, _table, recycle ) \
+NumToText##name::NumToText##name() \
+{ \
+ table = _table;\
+ tableSize = SAL_N_ELEMENTS(_table); \
+ recycleSymbol = recycle; \
+ transliterationName = "NumToText"#name; \
+ implementationName = "com.sun.star.i18n.Transliteration.NumToText"#name; \
+}
+
+TRANSLITERATION_NUMTOTEXT ( AIUFullWidth_ja_JP, table_AIUFullWidth_ja_JP, true)
+TRANSLITERATION_NUMTOTEXT ( AIUHalfWidth_ja_JP, table_AIUHalfWidth_ja_JP, true)
+TRANSLITERATION_NUMTOTEXT ( IROHAFullWidth_ja_JP, table_IROHAFullWidth_ja_JP, true)
+TRANSLITERATION_NUMTOTEXT ( IROHAHalfWidth_ja_JP, table_IROHAHalfWidth_ja_JP, true)
+TRANSLITERATION_NUMTOTEXT ( CircledNumber, table_CircledNumber, false)
+TRANSLITERATION_NUMTOTEXT ( TianGan_zh, table_TianGan_zh, false)
+TRANSLITERATION_NUMTOTEXT ( DiZi_zh, table_DiZi_zh, false)
+TRANSLITERATION_NUMTOTEXT ( HangulJamo_ko, table_HangulJamo_ko, true)
+TRANSLITERATION_NUMTOTEXT ( HangulSyllable_ko, table_HangulSyllable_ko, true)
+TRANSLITERATION_NUMTOTEXT ( HangulCircledJamo_ko, table_HangulCircledJamo_ko, true)
+TRANSLITERATION_NUMTOTEXT ( HangulCircledSyllable_ko, table_HangulCircledSyllable_ko, true)
+
+#undef TRANSLITERATION_NUMTOTEXT
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/smallToLarge_ja_JP.cxx b/i18npool/source/transliteration/smallToLarge_ja_JP.cxx
new file mode 100644
index 0000000000..47ddba1a96
--- /dev/null
+++ b/i18npool/source/transliteration/smallToLarge_ja_JP.cxx
@@ -0,0 +1,79 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <i18nutil/oneToOneMapping.hxx>
+
+#include <transliteration_OneToOne.hxx>
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::lang;
+
+namespace i18npool
+{
+// ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt
+// ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html
+// http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F)
+// http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF)
+// http://charts.unicode.org/Web/UFF00.html
+
+i18nutil::OneToOneMappingTable_t const small2large[] = {
+ { 0x3041, 0x3042 }, // HIRAGANA LETTER SMALL A --> HIRAGANA LETTER A
+ { 0x3043, 0x3044 }, // HIRAGANA LETTER SMALL I --> HIRAGANA LETTER I
+ { 0x3045, 0x3046 }, // HIRAGANA LETTER SMALL U --> HIRAGANA LETTER U
+ { 0x3047, 0x3048 }, // HIRAGANA LETTER SMALL E --> HIRAGANA LETTER E
+ { 0x3049, 0x304A }, // HIRAGANA LETTER SMALL O --> HIRAGANA LETTER O
+ { 0x3063, 0x3064 }, // HIRAGANA LETTER SMALL TU --> HIRAGANA LETTER TU
+ { 0x3083, 0x3084 }, // HIRAGANA LETTER SMALL YA --> HIRAGANA LETTER YA
+ { 0x3085, 0x3086 }, // HIRAGANA LETTER SMALL YU --> HIRAGANA LETTER YU
+ { 0x3087, 0x3088 }, // HIRAGANA LETTER SMALL YO --> HIRAGANA LETTER YO
+ { 0x308E, 0x308F }, // HIRAGANA LETTER SMALL WA --> HIRAGANA LETTER WA
+ { 0x30A1, 0x30A2 }, // KATAKANA LETTER SMALL A --> KATAKANA LETTER A
+ { 0x30A3, 0x30A4 }, // KATAKANA LETTER SMALL I --> KATAKANA LETTER I
+ { 0x30A5, 0x30A6 }, // KATAKANA LETTER SMALL U --> KATAKANA LETTER U
+ { 0x30A7, 0x30A8 }, // KATAKANA LETTER SMALL E --> KATAKANA LETTER E
+ { 0x30A9, 0x30AA }, // KATAKANA LETTER SMALL O --> KATAKANA LETTER O
+ { 0x30C3, 0x30C4 }, // KATAKANA LETTER SMALL TU --> KATAKANA LETTER TU
+ { 0x30E3, 0x30E4 }, // KATAKANA LETTER SMALL YA --> KATAKANA LETTER YA
+ { 0x30E5, 0x30E6 }, // KATAKANA LETTER SMALL YU --> KATAKANA LETTER YU
+ { 0x30E7, 0x30E8 }, // KATAKANA LETTER SMALL YO --> KATAKANA LETTER YO
+ { 0x30EE, 0x30EF }, // KATAKANA LETTER SMALL WA --> KATAKANA LETTER WA
+ { 0x30F5, 0x30AB }, // KATAKANA LETTER SMALL KA --> KATAKANA LETTER KA
+ { 0x30F6, 0x30B1 }, // KATAKANA LETTER SMALL KE --> KATAKANA LETTER KE
+ { 0xFF67, 0xFF71 }, // HALFWIDTH KATAKANA LETTER SMALL A --> HALFWIDTH KATAKANA LETTER A
+ { 0xFF68, 0xFF72 }, // HALFWIDTH KATAKANA LETTER SMALL I --> HALFWIDTH KATAKANA LETTER I
+ { 0xFF69, 0xFF73 }, // HALFWIDTH KATAKANA LETTER SMALL U --> HALFWIDTH KATAKANA LETTER U
+ { 0xFF6A, 0xFF74 }, // HALFWIDTH KATAKANA LETTER SMALL E --> HALFWIDTH KATAKANA LETTER E
+ { 0xFF6B, 0xFF75 }, // HALFWIDTH KATAKANA LETTER SMALL O --> HALFWIDTH KATAKANA LETTER O
+ { 0xFF6C, 0xFF94 }, // HALFWIDTH KATAKANA LETTER SMALL YA --> HALFWIDTH KATAKANA LETTER YA
+ { 0xFF6D, 0xFF95 }, // HALFWIDTH KATAKANA LETTER SMALL YU --> HALFWIDTH KATAKANA LETTER YU
+ { 0xFF6E, 0xFF96 }, // HALFWIDTH KATAKANA LETTER SMALL YO --> HALFWIDTH KATAKANA LETTER YO
+ { 0xFF6F, 0xFF82 } // HALFWIDTH KATAKANA LETTER SMALL TU --> HALFWIDTH KATAKANA LETTER TU
+};
+
+smallToLarge_ja_JP::smallToLarge_ja_JP()
+{
+ static i18nutil::oneToOneMapping _table(small2large, sizeof(small2large));
+ func = nullptr;
+ table = &_table;
+ transliterationName = "smallToLarge_ja_JP";
+ implementationName = "com.sun.star.i18n.Transliteration.smallToLarge_ja_JP";
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/textToPronounce_zh.cxx b/i18npool/source/transliteration/textToPronounce_zh.cxx
new file mode 100644
index 0000000000..8d6b3e3e81
--- /dev/null
+++ b/i18npool/source/transliteration/textToPronounce_zh.cxx
@@ -0,0 +1,197 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <sal/config.h>
+
+#include <com/sun/star/i18n/MultipleCharsOutputException.hpp>
+#include <com/sun/star/i18n/TransliterationType.hpp>
+#include <o3tl/temporary.hxx>
+#include <rtl/ustring.hxx>
+#include <rtl/ustrbuf.hxx>
+
+#include <textToPronounce_zh.hxx>
+
+using namespace com::sun::star::i18n;
+using namespace com::sun::star::uno;
+
+namespace i18npool {
+
+sal_Int16 SAL_CALL TextToPronounce_zh::getType()
+{
+ return TransliterationType::ONE_TO_ONE| TransliterationType::IGNORE;
+}
+
+const sal_Unicode*
+TextToPronounce_zh::getPronounce(const sal_Unicode ch)
+{
+ static const sal_Unicode emptyString[]={0};
+ if (idx) {
+ sal_uInt16 address = idx[0][ch>>8];
+ if (address != 0xFFFF)
+ return reinterpret_cast<sal_Unicode const *>(
+ &idx[2][idx[1][address + (ch & 0xFF)]]);
+ }
+ return emptyString;
+}
+
+OUString
+TextToPronounce_zh::foldingImpl(const OUString & inStr, sal_Int32 startPos,
+ sal_Int32 nCount, Sequence< sal_Int32 >* pOffset)
+{
+ OUStringBuffer sb;
+ const sal_Unicode * chArr = inStr.getStr() + startPos;
+
+ if (startPos < 0)
+ throw RuntimeException();
+
+ if (startPos + nCount > inStr.getLength())
+ nCount = inStr.getLength() - startPos;
+
+ auto ppOffset = pOffset ? pOffset->getArray() : nullptr;
+ if (ppOffset)
+ ppOffset[0] = 0;
+ for (sal_Int32 i = 0; i < nCount; i++) {
+ OUString pron(getPronounce(chArr[i]));
+ sb.append(pron);
+
+ if (ppOffset)
+ ppOffset[i + 1] = (*pOffset)[i] + pron.getLength();
+ }
+ return sb.makeStringAndClear();
+}
+
+OUString SAL_CALL
+TextToPronounce_zh::transliterateChar2String( sal_Unicode inChar)
+{
+ return OUString(getPronounce(inChar));
+}
+
+sal_Unicode SAL_CALL
+TextToPronounce_zh::transliterateChar2Char( sal_Unicode inChar)
+{
+ const sal_Unicode* pron=getPronounce(inChar);
+ if (!pron || !pron[0])
+ return 0;
+ if (pron[1])
+ throw MultipleCharsOutputException();
+ return *pron;
+}
+
+sal_Bool SAL_CALL
+TextToPronounce_zh::equals( const OUString & str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32 & nMatch1,
+ const OUString & str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32 & nMatch2)
+{
+ sal_Int32 realCount;
+ int i; // loop variable
+ const sal_Unicode * s1, * s2;
+
+ if (nCount1 + pos1 > str1.getLength())
+ nCount1 = str1.getLength() - pos1;
+
+ if (nCount2 + pos2 > str2.getLength())
+ nCount2 = str2.getLength() - pos2;
+
+ realCount = std::min(nCount1, nCount2);
+
+ s1 = str1.getStr() + pos1;
+ s2 = str2.getStr() + pos2;
+ for (i = 0; i < realCount; i++) {
+ const sal_Unicode *pron1 = getPronounce(*s1++);
+ const sal_Unicode *pron2 = getPronounce(*s2++);
+ if (pron1 != pron2) {
+ nMatch1 = nMatch2 = i;
+ return false;
+ }
+ }
+ nMatch1 = nMatch2 = realCount;
+ return (nCount1 == nCount2);
+}
+
+#ifdef DISABLE_DYNLOADING
+
+extern "C" {
+
+sal_uInt16 const ** get_zh_zhuyin(sal_Int16 & max_index);
+sal_uInt16 const ** get_zh_pinyin(sal_Int16 & max_index);
+
+}
+
+#endif
+
+TextToPinyin_zh_CN::TextToPinyin_zh_CN() :
+#ifndef DISABLE_DYNLOADING
+ TextToPronounce_zh("get_zh_pinyin")
+#else
+ TextToPronounce_zh(get_zh_pinyin)
+#endif
+{
+ transliterationName = "ChineseCharacterToPinyin";
+ implementationName = "com.sun.star.i18n.Transliteration.TextToPinyin_zh_CN";
+}
+
+TextToChuyin_zh_TW::TextToChuyin_zh_TW() :
+#ifndef DISABLE_DYNLOADING
+ TextToPronounce_zh("get_zh_zhuyin")
+#else
+ TextToPronounce_zh(get_zh_zhuyin)
+#endif
+{
+ transliterationName = "ChineseCharacterToChuyin";
+ implementationName = "com.sun.star.i18n.Transliteration.TextToChuyin_zh_TW";
+}
+
+#ifndef DISABLE_DYNLOADING
+
+extern "C" { static void thisModule() {} }
+
+TextToPronounce_zh::TextToPronounce_zh(const char* func_name)
+{
+#ifdef SAL_DLLPREFIX
+ OUString lib(SAL_DLLPREFIX"index_data" SAL_DLLEXTENSION);
+#else
+ OUString lib("index_data" SAL_DLLEXTENSION);
+#endif
+ hModule = osl_loadModuleRelative(
+ &thisModule, lib.pData, SAL_LOADMODULE_DEFAULT );
+ idx=nullptr;
+ if (hModule) {
+ sal_uInt16 const ** (*function)(sal_Int16 &) = reinterpret_cast<sal_uInt16 const ** (*)(sal_Int16 &)>(osl_getFunctionSymbol(hModule, OUString::createFromAscii(func_name).pData));
+ if (function)
+ idx=function(o3tl::temporary(sal_Int16()));
+ }
+}
+
+#else
+
+TextToPronounce_zh::TextToPronounce_zh(sal_uInt16 const ** (*function)(sal_Int16 &))
+{
+ idx = function(o3tl::temporary(sal_Int16()));
+}
+
+#endif
+
+TextToPronounce_zh::~TextToPronounce_zh()
+{
+#ifndef DISABLE_DYNLOADING
+ if (hModule) osl_unloadModule(hModule);
+#endif
+}
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/texttonum.cxx b/i18npool/source/transliteration/texttonum.cxx
new file mode 100644
index 0000000000..5fd0e898de
--- /dev/null
+++ b/i18npool/source/transliteration/texttonum.cxx
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <texttonum.hxx>
+
+using namespace com::sun::star::uno;
+
+namespace i18npool {
+
+#define TRANSLITERATION_TEXTTONUM( name ) \
+TextToNum##name::TextToNum##name() \
+{ \
+ nNativeNumberMode = 0; \
+ tableSize = 0; \
+ transliterationName = "TextToNum"#name; \
+ implementationName = "com.sun.star.i18n.Transliteration.TextToNum"#name; \
+}
+
+TRANSLITERATION_TEXTTONUM( Lower_zh_CN)
+TRANSLITERATION_TEXTTONUM( Upper_zh_CN)
+TRANSLITERATION_TEXTTONUM( Lower_zh_TW)
+TRANSLITERATION_TEXTTONUM( Upper_zh_TW)
+TRANSLITERATION_TEXTTONUM( FormalLower_ko)
+TRANSLITERATION_TEXTTONUM( FormalUpper_ko)
+TRANSLITERATION_TEXTTONUM( FormalHangul_ko)
+TRANSLITERATION_TEXTTONUM( InformalLower_ko)
+TRANSLITERATION_TEXTTONUM( InformalUpper_ko)
+TRANSLITERATION_TEXTTONUM( InformalHangul_ko)
+TRANSLITERATION_TEXTTONUM( KanjiLongTraditional_ja_JP)
+TRANSLITERATION_TEXTTONUM( KanjiLongModern_ja_JP)
+
+#undef TRANSLITERATION_TEXTTONUM
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/transliterationImpl.cxx b/i18npool/source/transliteration/transliterationImpl.cxx
new file mode 100644
index 0000000000..914a401031
--- /dev/null
+++ b/i18npool/source/transliteration/transliterationImpl.cxx
@@ -0,0 +1,682 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <transliterationImpl.hxx>
+#include <servicename.hxx>
+
+#include <com/sun/star/i18n/LocaleData2.hpp>
+#include <com/sun/star/i18n/TransliterationType.hpp>
+#include <com/sun/star/i18n/TransliterationModulesExtra.hpp>
+
+#include <comphelper/sequence.hxx>
+#include <cppuhelper/supportsservice.hxx>
+#include <o3tl/string_view.hxx>
+#include <rtl/ustring.hxx>
+
+#include <algorithm>
+#include <mutex>
+#include <numeric>
+
+using namespace com::sun::star::uno;
+using namespace com::sun::star::i18n;
+using namespace com::sun::star::lang;
+
+
+namespace i18npool {
+
+#define TmItem1( name ) \
+ {TransliterationModules_##name, TransliterationModulesNew_##name, #name}
+
+#define TmItem2( name ) \
+ {TransliterationModules(0), TransliterationModulesNew_##name, #name}
+
+namespace {
+
+// Ignore Module list
+struct TMList {
+ TransliterationModules tm;
+ TransliterationModulesNew tmn;
+ const char *implName;
+};
+
+}
+
+TMList const TMlist[] = { // Modules ModulesNew
+ TmItem1 (IGNORE_CASE), // 0. (1<<8 256) (7)
+ TmItem1 (IGNORE_WIDTH), // 1. (1<<9 512) (8)
+ TmItem1 (IGNORE_KANA), // 2. (1<<10 1024) (9)
+// No enum define for this trans. application has to use impl name to load it
+// TmItem1 (IGNORE_CASE_SIMPLE), // (1<<11 1024) (66)
+
+ {TransliterationModules_IgnoreTraditionalKanji_ja_JP,
+ TransliterationModulesNew_IgnoreTraditionalKanji_ja_JP, "ignoreTraditionalKanji_ja_JP"},
+ // 3. (1<<12 4096) (10)
+ {TransliterationModules_IgnoreTraditionalKana_ja_JP,
+ TransliterationModulesNew_IgnoreTraditionalKana_ja_JP, "ignoreTraditionalKana_ja_JP"},
+ // 4. (1<<13 8192) (11)
+ {TransliterationModules_IgnoreMinusSign_ja_JP, TransliterationModulesNew_IgnoreMinusSign_ja_JP,
+ "ignoreMinusSign_ja_JP"}, // 5. (1<<13 16384) (12)
+ {TransliterationModules_IgnoreIterationMark_ja_JP,
+ TransliterationModulesNew_IgnoreIterationMark_ja_JP, "ignoreIterationMark_ja_JP"},
+ // 6. (1<<14 32768) (13)
+ {TransliterationModules_IgnoreSeparator_ja_JP, TransliterationModulesNew_IgnoreSeparator_ja_JP,
+ "ignoreSeparator_ja_JP"}, // 7. (1<<15 65536) (14)
+ {TransliterationModules_IgnoreSize_ja_JP, TransliterationModulesNew_IgnoreSize_ja_JP,
+ "ignoreSize_ja_JP"}, // 15. (1<<23 16777216) (22)
+ {TransliterationModules_IgnoreMiddleDot_ja_JP, TransliterationModulesNew_IgnoreMiddleDot_ja_JP,
+ "ignoreMiddleDot_ja_JP"}, // 17. (1<<25 67108864) (24)
+ {TransliterationModules_IgnoreSpace_ja_JP, TransliterationModulesNew_IgnoreSpace_ja_JP,
+ "ignoreSpace_ja_JP"}, // 18. (1<<26 134217728) (25)
+ {TransliterationModules_IgnoreZiZu_ja_JP, TransliterationModulesNew_IgnoreZiZu_ja_JP,
+ "ignoreZiZu_ja_JP"}, // 8. (1<<16 131072) (15)
+ {TransliterationModules_IgnoreBaFa_ja_JP, TransliterationModulesNew_IgnoreBaFa_ja_JP,
+ "ignoreBaFa_ja_JP"}, // 9. (1<<17 262144) (16)
+ {TransliterationModules_IgnoreTiJi_ja_JP, TransliterationModulesNew_IgnoreTiJi_ja_JP,
+ "ignoreTiJi_ja_JP"}, // 10. (1<<18 524288) (17)
+ {TransliterationModules_IgnoreHyuByu_ja_JP, TransliterationModulesNew_IgnoreHyuByu_ja_JP,
+ "ignoreHyuByu_ja_JP"}, // 11. (1<<19 1048576) (18)
+ {TransliterationModules_IgnoreSeZe_ja_JP, TransliterationModulesNew_IgnoreSeZe_ja_JP,
+ "ignoreSeZe_ja_JP"}, // 12. (1<<20 2097152) (19)
+ {TransliterationModules_IgnoreIandEfollowedByYa_ja_JP,
+ TransliterationModulesNew_IgnoreIandEfollowedByYa_ja_JP, "ignoreIandEfollowedByYa_ja_JP"},
+ // 13. (1<<21 4194304) (20)
+ {TransliterationModules_IgnoreKiKuFollowedBySa_ja_JP,
+ TransliterationModulesNew_IgnoreKiKuFollowedBySa_ja_JP, "ignoreKiKuFollowedBySa_ja_JP"},
+ // 14. (1<<22 8388608) (21)
+ {TransliterationModules_IgnoreProlongedSoundMark_ja_JP,
+ TransliterationModulesNew_IgnoreProlongedSoundMark_ja_JP, "ignoreProlongedSoundMark_ja_JP"},
+ // 16. (1<<24 33554432) (23)
+
+ TmItem1 (UPPERCASE_LOWERCASE), // 19. (1) (1)
+ TmItem1 (LOWERCASE_UPPERCASE), // 20. (2) (2)
+ TmItem1 (HALFWIDTH_FULLWIDTH), // 21. (3) (3)
+ TmItem1 (FULLWIDTH_HALFWIDTH), // 22. (4) (4)
+ TmItem1 (KATAKANA_HIRAGANA), // 23. (5) (5)
+ TmItem1 (HIRAGANA_KATAKANA), // 24. (6) (6)
+
+ {TransliterationModules_SmallToLarge_ja_JP, TransliterationModulesNew_SmallToLarge_ja_JP,
+ "smallToLarge_ja_JP"}, // 25. (1<<27 268435456) (26)
+ {TransliterationModules_LargeToSmall_ja_JP, TransliterationModulesNew_LargeToSmall_ja_JP,
+ "largeToSmall_ja_JP"}, // 26. (1<<28 536870912) (27)
+ TmItem2 (NumToTextLower_zh_CN), // 27. () (28)
+ TmItem2 (NumToTextUpper_zh_CN), // 28. () (29)
+ TmItem2 (NumToTextLower_zh_TW), // 29. () (30)
+ TmItem2 (NumToTextUpper_zh_TW), // 30. () (31)
+ TmItem2 (NumToTextFormalHangul_ko), // 31. () (32)
+ TmItem2 (NumToTextFormalLower_ko), // 32. () (33)
+ TmItem2 (NumToTextFormalUpper_ko), // 33. () (34)
+ TmItem2 (NumToTextInformalHangul_ko), // 34. () (35)
+ TmItem2 (NumToTextInformalLower_ko), // 35. () (36)
+ TmItem2 (NumToTextInformalUpper_ko), // 36. () (37)
+ TmItem2 (NumToCharLower_zh_CN), // 37. () (38)
+ TmItem2 (NumToCharUpper_zh_CN), // 38. () (39)
+ TmItem2 (NumToCharLower_zh_TW), // 39. () (40)
+ TmItem2 (NumToCharUpper_zh_TW), // 40. () (41)
+ TmItem2 (NumToCharHangul_ko), // 41. () (42)
+ TmItem2 (NumToCharLower_ko), // 42. () (43)
+ TmItem2 (NumToCharUpper_ko), // 43. () (44)
+ TmItem2 (NumToCharFullwidth), // 44. () (45)
+ TmItem2 (NumToCharKanjiShort_ja_JP), // 45. () (46)
+ TmItem2 (TextToNumLower_zh_CN), // 46. () (47)
+ TmItem2 (TextToNumUpper_zh_CN), // 47. () (48)
+ TmItem2 (TextToNumLower_zh_TW), // 48. () (49)
+ TmItem2 (TextToNumUpper_zh_TW), // 49. () (50)
+ TmItem2 (TextToNumFormalHangul_ko), // 50. () (51)
+ TmItem2 (TextToNumFormalLower_ko), // 51. () (52)
+ TmItem2 (TextToNumFormalUpper_ko), // 52. () (53)
+ TmItem2 (TextToNumInformalHangul_ko), // 53. () (54)
+ TmItem2 (TextToNumInformalLower_ko), // 54. () (55)
+ TmItem2 (TextToNumInformalUpper_ko), // 55. () (56)
+
+ TmItem2 (CharToNumLower_zh_CN), // 56. () (59)
+ TmItem2 (CharToNumUpper_zh_CN), // 57. () (60)
+ TmItem2 (CharToNumLower_zh_TW), // 58. () (61)
+ TmItem2 (CharToNumUpper_zh_TW), // 59. () (62)
+ TmItem2 (CharToNumHangul_ko), // 60. () (63)
+ TmItem2 (CharToNumLower_ko), // 61. () (64)
+ TmItem2 (CharToNumUpper_ko), // 62. () (65)
+
+// no enum defined for these trans. application has to use impl name to load them
+// TmItem2 (NumToCharArabic_Indic), // () (67)
+// TmItem2 (NumToCharEstern_Arabic_Indic),// () (68)
+// TmItem2 (NumToCharIndic), // () (69)
+// TmItem2 (NumToCharThai), // () (70)
+ {TransliterationModules(0), TransliterationModulesNew(0), nullptr}
+};
+
+// Constructor/Destructor
+TransliterationImpl::TransliterationImpl(const Reference <XComponentContext>& xContext) : mxContext(xContext)
+{
+ numCascade = 0;
+ caseignoreOnly = true;
+
+ mxLocaledata.set(LocaleData2::create(xContext));
+}
+
+TransliterationImpl::~TransliterationImpl()
+{
+ mxLocaledata.clear();
+ clear();
+}
+
+
+// Methods
+OUString SAL_CALL
+TransliterationImpl::getName()
+{
+ if (numCascade == 1 && bodyCascade[0].is())
+ return bodyCascade[0]->getName();
+ if (numCascade < 1)
+ return ( OUString("Not Loaded"));
+ throw RuntimeException();
+}
+
+sal_Int16 SAL_CALL
+TransliterationImpl::getType()
+{
+ if (numCascade > 1)
+ return (TransliterationType::CASCADE|TransliterationType::IGNORE);
+ if (numCascade > 0 && bodyCascade[0].is())
+ return bodyCascade[0]->getType();
+ throw RuntimeException();
+}
+
+static TransliterationModules operator&(TransliterationModules lhs, TransliterationModules rhs) {
+ return TransliterationModules(sal_Int32(lhs) & sal_Int32(rhs));
+}
+static TransliterationModules operator|(TransliterationModules lhs, TransliterationModules rhs) {
+ return TransliterationModules(sal_Int32(lhs) | sal_Int32(rhs));
+}
+
+void SAL_CALL
+TransliterationImpl::loadModule( TransliterationModules modType, const Locale& rLocale )
+{
+ clear();
+ if (bool(modType & TransliterationModules_IGNORE_MASK) &&
+ bool(modType & TransliterationModules_NON_IGNORE_MASK))
+ {
+ throw RuntimeException();
+ } else if (bool(modType & TransliterationModules_IGNORE_MASK)) {
+#define TransliterationModules_IGNORE_CASE_MASK (TransliterationModules_IGNORE_CASE | \
+ TransliterationModules_IGNORE_WIDTH | \
+ TransliterationModules_IGNORE_KANA)
+ TransliterationModules mask = ((modType & TransliterationModules_IGNORE_CASE_MASK) == modType) ?
+ TransliterationModules_IGNORE_CASE_MASK : TransliterationModules_IGNORE_MASK;
+ for (sal_Int16 i = 0; bool(TMlist[i].tm & mask); i++) {
+ if (bool(modType & TMlist[i].tm))
+ if (loadModuleByName(OUString::createFromAscii(TMlist[i].implName),
+ bodyCascade[numCascade], rLocale))
+ numCascade++;
+ }
+ // additional transliterations from TransliterationModulesExtra (we cannot extend TransliterationModules)
+ if (bool(modType & TransliterationModules(TransliterationModulesExtra::IGNORE_DIACRITICS_CTL)))
+ {
+ if (loadModuleByName(u"ignoreDiacritics_CTL", bodyCascade[numCascade], rLocale))
+ numCascade++;
+ }
+ if (bool(modType & TransliterationModules(TransliterationModulesExtra::IGNORE_KASHIDA_CTL)))
+ if (loadModuleByName(u"ignoreKashida_CTL", bodyCascade[numCascade], rLocale))
+ numCascade++;
+
+ } else if (bool(modType & TransliterationModules_NON_IGNORE_MASK)) {
+ for (sal_Int16 i = 0; bool(TMlist[i].tm); i++) {
+ if (TMlist[i].tm == modType) {
+ if (loadModuleByName(OUString::createFromAscii(TMlist[i].implName), bodyCascade[numCascade], rLocale))
+ numCascade++;
+ break;
+ }
+ }
+ }
+}
+
+void SAL_CALL
+TransliterationImpl::loadModuleNew( const Sequence < TransliterationModulesNew > & modType, const Locale& rLocale )
+{
+ clear();
+ TransliterationModules mask = TransliterationModules_END_OF_MODULE;
+ sal_Int32 count = modType.getLength();
+ if (count > maxCascade)
+ throw RuntimeException(); // could not handle more than maxCascade
+ for (sal_Int32 i = 0; i < count; i++) {
+ for (sal_Int16 j = 0; bool(TMlist[j].tmn); j++) {
+ if (TMlist[j].tmn == modType[i]) {
+ if (mask == TransliterationModules_END_OF_MODULE)
+ mask = bool(TMlist[i].tm) && bool(TMlist[i].tm & TransliterationModules_IGNORE_MASK) ?
+ TransliterationModules_IGNORE_MASK : TransliterationModules_NON_IGNORE_MASK;
+ else if (mask == TransliterationModules_IGNORE_MASK &&
+ (TMlist[i].tm&TransliterationModules_IGNORE_MASK) == TransliterationModules_END_OF_MODULE)
+ throw RuntimeException(); // could not mess up ignore trans. with non_ignore trans.
+ if (loadModuleByName(OUString::createFromAscii(TMlist[j].implName), bodyCascade[numCascade], rLocale))
+ numCascade++;
+ break;
+ }
+ }
+ }
+}
+
+void SAL_CALL
+TransliterationImpl::loadModuleByImplName(const OUString& implName, const Locale& rLocale)
+{
+ clear();
+ if (loadModuleByName(implName, bodyCascade[numCascade], rLocale))
+ numCascade++;
+}
+
+
+void SAL_CALL
+TransliterationImpl::loadModulesByImplNames(const Sequence< OUString >& implNameList, const Locale& rLocale )
+{
+ if (implNameList.getLength() > maxCascade || implNameList.getLength() <= 0)
+ throw RuntimeException();
+
+ clear();
+ for (const auto& rName : implNameList)
+ if (loadModuleByName(rName, bodyCascade[numCascade], rLocale))
+ numCascade++;
+}
+
+
+Sequence<OUString> SAL_CALL
+TransliterationImpl::getAvailableModules( const Locale& rLocale, sal_Int16 sType )
+{
+ const Sequence<OUString> &translist = mxLocaledata->getTransliterations(rLocale);
+ std::vector<OUString> r;
+ r.reserve(translist.getLength());
+ Reference<XExtendedTransliteration> body;
+ for (const auto& rTrans : translist)
+ {
+ if (loadModuleByName(rTrans, body, rLocale)) {
+ if (body->getType() & sType)
+ r.push_back(rTrans);
+ body.clear();
+ }
+ }
+ return comphelper::containerToSequence(r);
+}
+
+
+OUString SAL_CALL
+TransliterationImpl::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >& offset )
+{
+ if (numCascade == 0)
+ return inStr;
+
+ if (numCascade == 1)
+ {
+ if ( startPos == 0 && nCount == inStr.getLength() )
+ return bodyCascade[0]->transliterate( inStr, 0, nCount, offset);
+ else
+ {
+ OUString tmpStr = inStr.copy(startPos, nCount);
+ tmpStr = bodyCascade[0]->transliterate(tmpStr, 0, nCount, offset);
+ if ( startPos )
+ {
+ for (sal_Int32 & j : asNonConstRange(offset))
+ j += startPos;
+ }
+ return tmpStr;
+ }
+ }
+ else
+ {
+ OUString tmpStr = inStr.copy(startPos, nCount);
+
+ auto [begin, end] = asNonConstRange(offset);
+ std::iota(begin, end, startPos);
+
+ Sequence<sal_Int32> from(nCount);
+ Sequence<sal_Int32> to = offset;
+ for (sal_Int32 i = 0; i < numCascade; i++) {
+ tmpStr = bodyCascade[i]->transliterate(tmpStr, 0, nCount, from);
+
+ nCount = tmpStr.getLength();
+
+ assert(from.getLength() == nCount);
+ from.swap(to);
+ for (sal_Int32& ix : asNonConstRange(to))
+ ix = from[ix];
+ }
+ offset = to;
+ return tmpStr;
+ }
+}
+
+
+OUString SAL_CALL
+TransliterationImpl::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >& offset )
+{
+ if (numCascade == 0)
+ return inStr;
+
+ if (offset.getLength() != nCount)
+ offset.realloc(nCount);
+ if (numCascade == 1)
+ {
+ if ( startPos == 0 && nCount == inStr.getLength() )
+ return bodyCascade[0]->folding( inStr, 0, nCount, offset);
+ else
+ {
+ OUString tmpStr = inStr.copy(startPos, nCount);
+ tmpStr = bodyCascade[0]->folding(tmpStr, 0, nCount, offset);
+ if ( startPos )
+ {
+ for (sal_Int32 & j : asNonConstRange(offset))
+ j += startPos;
+ }
+ return tmpStr;
+ }
+ }
+ else
+ {
+ OUString tmpStr = inStr.copy(startPos, nCount);
+
+ auto [begin, end] = asNonConstRange(offset);
+ std::iota(begin, end, startPos);
+
+ Sequence<sal_Int32> from;
+ Sequence<sal_Int32> to = offset;
+
+ for (sal_Int32 i = 0; i < numCascade; i++) {
+ tmpStr = bodyCascade[i]->folding(tmpStr, 0, nCount, from);
+
+ nCount = tmpStr.getLength();
+
+ assert(from.getLength() == nCount);
+ from.swap(to);
+ for (sal_Int32& ix : asNonConstRange(to))
+ ix = from[ix];
+ }
+ offset = to;
+ return tmpStr;
+ }
+}
+
+OUString SAL_CALL
+TransliterationImpl::transliterateString2String( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount )
+{
+ if (numCascade == 0)
+ return inStr;
+ else if (numCascade == 1)
+ return bodyCascade[0]->transliterateString2String( inStr, startPos, nCount);
+ else {
+ OUString tmpStr = bodyCascade[0]->transliterateString2String(inStr, startPos, nCount);
+
+ for (sal_Int32 i = 1; i < numCascade; i++)
+ tmpStr = bodyCascade[i]->transliterateString2String(tmpStr, 0, tmpStr.getLength());
+ return tmpStr;
+ }
+}
+
+OUString SAL_CALL
+TransliterationImpl::transliterateChar2String( sal_Unicode inChar )
+{
+ if (numCascade == 0)
+ return OUString(&inChar, 1);
+ else if (numCascade == 1)
+ return bodyCascade[0]->transliterateChar2String( inChar);
+ else {
+ OUString tmpStr = bodyCascade[0]->transliterateChar2String(inChar);
+
+ for (sal_Int32 i = 1; i < numCascade; i++)
+ tmpStr = bodyCascade[i]->transliterateString2String(tmpStr, 0, tmpStr.getLength());
+ return tmpStr;
+ }
+}
+
+sal_Unicode SAL_CALL
+TransliterationImpl::transliterateChar2Char( sal_Unicode inChar )
+{
+ sal_Unicode tmpChar = inChar;
+ for (sal_Int32 i = 0; i < numCascade; i++)
+ tmpChar = bodyCascade[i]->transliterateChar2Char(tmpChar);
+ return tmpChar;
+}
+
+
+sal_Bool SAL_CALL
+TransliterationImpl::equals(
+ const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1,
+ const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2)
+{
+ // since this is an API function make it user fail safe
+ if ( nCount1 < 0 ) {
+ pos1 += nCount1;
+ nCount1 = -nCount1;
+ }
+ if ( nCount2 < 0 ) {
+ pos2 += nCount2;
+ nCount2 = -nCount2;
+ }
+ if ( !nCount1 || !nCount2 ||
+ pos1 >= str1.getLength() || pos2 >= str2.getLength() ||
+ pos1 < 0 || pos2 < 0 ) {
+ nMatch1 = nMatch2 = 0;
+ // two empty strings return true, else false
+ return !nCount1 && !nCount2 && pos1 == str1.getLength() && pos2 == str2.getLength();
+ }
+ if ( pos1 + nCount1 > str1.getLength() )
+ nCount1 = str1.getLength() - pos1;
+ if ( pos2 + nCount2 > str2.getLength() )
+ nCount2 = str2.getLength() - pos2;
+
+ if (caseignoreOnly && caseignore.is())
+ return caseignore->equals(str1, pos1, nCount1, nMatch1, str2, pos2, nCount2, nMatch2);
+
+ Sequence<sal_Int32> offset1, offset2;
+
+ OUString tmpStr1 = folding(str1, pos1, nCount1, offset1);
+ OUString tmpStr2 = folding(str2, pos2, nCount2, offset2);
+ // Length of offset1 and offset2 may still be 0 if there was no folding
+ // necessary!
+
+ const sal_Unicode *p1 = tmpStr1.getStr();
+ const sal_Unicode *p2 = tmpStr2.getStr();
+ sal_Int32 i, nLen = ::std::min( tmpStr1.getLength(), tmpStr2.getLength());
+ for (i = 0; i < nLen; ++i, ++p1, ++p2 ) {
+ if (*p1 != *p2) {
+ // return number of matched code points so far
+ nMatch1 = (i < offset1.getLength()) ? offset1.getConstArray()[i] : i;
+ nMatch2 = (i < offset2.getLength()) ? offset2.getConstArray()[i] : i;
+ return false;
+ }
+ }
+ // i==nLen
+ if ( tmpStr1.getLength() != tmpStr2.getLength() ) {
+ // return number of matched code points so far
+ nMatch1 = (i <= offset1.getLength()) ? offset1.getConstArray()[i-1] + 1 : i;
+ nMatch2 = (i <= offset2.getLength()) ? offset2.getConstArray()[i-1] + 1 : i;
+ return false;
+ } else {
+ nMatch1 = nCount1;
+ nMatch2 = nCount2;
+ return true;
+ }
+}
+
+Sequence< OUString >
+TransliterationImpl::getRange(const Sequence< OUString > &inStrs,
+ const sal_Int32 length, sal_Int16 _numCascade)
+{
+ if (_numCascade >= numCascade || ! bodyCascade[_numCascade].is())
+ return inStrs;
+
+ sal_Int32 j_tmp = 0;
+ constexpr sal_Int32 nMaxOutput = 2;
+ const sal_Int32 nMaxOutputLength = nMaxOutput*length;
+ std::vector<OUString> ostr;
+ ostr.reserve(nMaxOutputLength);
+ for (sal_Int32 j = 0; j < length; j+=2) {
+ const Sequence< OUString >& temp = bodyCascade[_numCascade]->transliterateRange(inStrs[j], inStrs[j+1]);
+
+ for (const auto& rStr : temp) {
+ if ( j_tmp++ >= nMaxOutputLength ) throw RuntimeException();
+ ostr.push_back(rStr);
+ }
+ }
+
+ return getRange(comphelper::containerToSequence(ostr), j_tmp, ++_numCascade);
+}
+
+
+Sequence< OUString > SAL_CALL
+TransliterationImpl::transliterateRange( const OUString& str1, const OUString& str2 )
+{
+ if (numCascade == 1)
+ return bodyCascade[0]->transliterateRange(str1, str2);
+
+ Sequence< OUString > ostr{ str1, str2 };
+
+ return getRange(ostr, 2, 0);
+}
+
+
+sal_Int32 SAL_CALL
+TransliterationImpl::compareSubstring(
+ const OUString& str1, sal_Int32 off1, sal_Int32 len1,
+ const OUString& str2, sal_Int32 off2, sal_Int32 len2)
+{
+ if (caseignoreOnly && caseignore.is())
+ return caseignore->compareSubstring(str1, off1, len1, str2, off2, len2);
+
+ Sequence <sal_Int32> offset;
+
+ OUString in_str1 = transliterate(str1, off1, len1, offset);
+ OUString in_str2 = transliterate(str2, off2, len2, offset);
+ const sal_Unicode* unistr1 = in_str1.getStr();
+ const sal_Unicode* unistr2 = in_str2.getStr();
+ sal_Int32 strlen1 = in_str1.getLength();
+ sal_Int32 strlen2 = in_str2.getLength();
+
+ while (strlen1 && strlen2) {
+ if (*unistr1 != *unistr2)
+ return *unistr1 > *unistr2 ? 1 : -1;
+
+ unistr1++; unistr2++; strlen1--; strlen2--;
+ }
+ return strlen1 == strlen2 ? 0 : (strlen1 > strlen2 ? 1 : -1);
+}
+
+
+sal_Int32 SAL_CALL
+TransliterationImpl::compareString(const OUString& str1, const OUString& str2 )
+{
+ if (caseignoreOnly && caseignore.is())
+ return caseignore->compareString(str1, str2);
+ else
+ return compareSubstring(str1, 0, str1.getLength(), str2, 0, str2.getLength());
+}
+
+
+void
+TransliterationImpl::clear()
+{
+ for (sal_Int32 i = 0; i < numCascade; i++)
+ if (bodyCascade[i].is())
+ bodyCascade[i].clear();
+ numCascade = 0;
+ caseignore.clear();
+ caseignoreOnly = true;
+}
+
+namespace
+{
+ /** structure to cache the last transliteration body used. */
+ struct TransBody
+ {
+ OUString Name;
+ css::uno::Reference< css::i18n::XExtendedTransliteration > Body;
+ };
+}
+
+void TransliterationImpl::loadBody( OUString const &implName, Reference<XExtendedTransliteration>& body )
+{
+ assert(!implName.isEmpty());
+ static std::mutex transBodyMutex;
+ std::unique_lock guard(transBodyMutex);
+ static TransBody lastTransBody;
+ if (implName != lastTransBody.Name)
+ {
+ lastTransBody.Body.set(
+ mxContext->getServiceManager()->createInstanceWithContext(implName, mxContext), UNO_QUERY_THROW);
+ lastTransBody.Name = implName;
+ }
+ body = lastTransBody.Body;
+}
+
+bool
+TransliterationImpl::loadModuleByName( std::u16string_view implName,
+ Reference<XExtendedTransliteration>& body, const Locale& rLocale)
+{
+ OUString cname = OUString::Concat(TRLT_IMPLNAME_PREFIX) + implName;
+ loadBody(cname, body);
+ if (body.is()) {
+ body->loadModule(TransliterationModules(0), rLocale); // toUpper/toLoad need rLocale
+
+ // if the module is ignore case/kana/width, load caseignore for equals/compareString mothed
+ for (sal_Int16 i = 0; i < 3; i++) {
+ if (o3tl::equalsAscii(implName, TMlist[i].implName)) {
+ if (i == 0) // current module is caseignore
+ body->loadModule(TMlist[0].tm, rLocale); // caseignore need to setup module name
+ if (! caseignore.is()) {
+ OUString bname = TRLT_IMPLNAME_PREFIX +
+ OUString::createFromAscii(TMlist[0].implName);
+ loadBody(bname, caseignore);
+ }
+ if (caseignore.is())
+ caseignore->loadModule(TMlist[i].tm, rLocale);
+ return true;
+ }
+ }
+ caseignoreOnly = false; // has other module than just ignore case/kana/width
+ }
+ return body.is();
+}
+
+OUString SAL_CALL
+TransliterationImpl::getImplementationName()
+{
+ return "com.sun.star.i18n.Transliteration";
+}
+
+sal_Bool SAL_CALL
+TransliterationImpl::supportsService(const OUString& rServiceName)
+{
+ return cppu::supportsService(this, rServiceName);
+}
+
+Sequence< OUString > SAL_CALL
+TransliterationImpl::getSupportedServiceNames()
+{
+ return { "com.sun.star.i18n.Transliteration" };
+}
+
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
+com_sun_star_i18n_Transliteration_get_implementation(
+ css::uno::XComponentContext *context,
+ css::uno::Sequence<css::uno::Any> const &)
+{
+ return cppu::acquire(new i18npool::TransliterationImpl(context));
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/transliteration_Ignore.cxx b/i18npool/source/transliteration/transliteration_Ignore.cxx
new file mode 100644
index 0000000000..b3c9dc96cd
--- /dev/null
+++ b/i18npool/source/transliteration/transliteration_Ignore.cxx
@@ -0,0 +1,196 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <com/sun/star/i18n/TransliterationType.hpp>
+
+#include <transliteration_Ignore.hxx>
+#include <i18nutil/oneToOneMapping.hxx>
+
+using namespace com::sun::star::i18n;
+using namespace com::sun::star::uno;
+
+namespace i18npool {
+
+sal_Bool SAL_CALL
+transliteration_Ignore::equals(const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1,
+ const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2 )
+{
+ Sequence< sal_Int32 > offset1;
+ Sequence< sal_Int32 > offset2;
+
+ // The method folding is defined in a sub class.
+ OUString s1 = folding( str1, pos1, nCount1, offset1);
+ OUString s2 = folding( str2, pos2, nCount2, offset2);
+
+ const sal_Unicode * p1 = s1.getStr();
+ const sal_Unicode * p2 = s2.getStr();
+ sal_Int32 length = std::min(s1.getLength(), s2.getLength());
+ sal_Int32 nmatch;
+
+ for ( nmatch = 0; nmatch < length; nmatch++)
+ if (*p1++ != *p2++)
+ break;
+
+ if (nmatch > 0) {
+ nMatch1 = offset1[ nmatch - 1 ] + 1; // Subtract 1 from nmatch because the index starts from zero.
+ nMatch2 = offset2[ nmatch - 1 ] + 1; // And then, add 1 to position because it means the number of character matched.
+ }
+ else {
+ nMatch1 = 0; // No character was matched.
+ nMatch2 = 0;
+ }
+
+ return (nmatch == s1.getLength()) && (nmatch == s2.getLength());
+}
+
+
+Sequence< OUString > SAL_CALL
+transliteration_Ignore::transliterateRange( const OUString& str1, const OUString& str2 )
+{
+ if (str1.isEmpty() || str2.isEmpty())
+ throw RuntimeException();
+
+ return { str1.copy(0, 1), str2.copy(0, 1) };
+}
+
+
+sal_Int16 SAL_CALL
+transliteration_Ignore::getType()
+{
+ // The type is also defined in com/sun/star/util/TransliterationType.hdl
+ return TransliterationType::IGNORE;
+}
+
+
+OUString
+transliteration_Ignore::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >* pOffset)
+{
+ // The method folding is defined in a sub class.
+ return foldingImpl( inStr, startPos, nCount, pOffset);
+}
+
+Sequence< OUString >
+transliteration_Ignore::transliterateRange( const OUString& str1, const OUString& str2,
+ XTransliteration& t1, XTransliteration& t2 )
+{
+ if (str1.isEmpty() || str2.isEmpty())
+ throw RuntimeException();
+
+ Sequence< sal_Int32 > offset;
+ OUString s11 = t1.transliterate( str1, 0, 1, offset );
+ OUString s12 = t1.transliterate( str2, 0, 1, offset );
+ OUString s21 = t2.transliterate( str1, 0, 1, offset );
+ OUString s22 = t2.transliterate( str2, 0, 1, offset );
+
+ if ( (s11 == s21) && (s12 == s22) ) {
+ return { s11, s12 };
+ }
+ return { s11, s12, s21, s22 };
+}
+
+OUString
+transliteration_Ignore::foldingImpl( const OUString& inStr, sal_Int32 startPos,
+ sal_Int32 nCount, Sequence< sal_Int32 >* pOffset)
+{
+ // Create a string buffer which can hold nCount + 1 characters.
+ // The reference count is 1 now.
+ rtl_uString * newStr = rtl_uString_alloc(nCount);
+ sal_Unicode * dst = newStr->buffer;
+ const sal_Unicode * src = inStr.getStr() + startPos;
+
+ // Allocate nCount length to offset argument.
+ sal_Int32 *p = nullptr;
+ sal_Int32 position = 0;
+ if (pOffset) {
+ pOffset->realloc( nCount );
+ p = pOffset->getArray();
+ position = startPos;
+ }
+
+ if (map) {
+ sal_Unicode previousChar = *src ++;
+ sal_Unicode currentChar;
+
+ // Translation
+ while (-- nCount > 0) {
+ currentChar = *src ++;
+
+ const Mapping *m;
+ for (m = map; m->replaceChar; m++) {
+ if (previousChar == m->previousChar && currentChar == m->currentChar ) {
+ if (pOffset) {
+ if (! m->two2one)
+ *p++ = position;
+ position++;
+ *p++ = position++;
+ }
+ *dst++ = m->replaceChar;
+ if (!m->two2one)
+ *dst++ = currentChar;
+ previousChar = *src++;
+ nCount--;
+ break;
+ }
+ }
+
+ if (! m->replaceChar) {
+ if (pOffset)
+ *p ++ = position ++;
+ *dst ++ = previousChar;
+ previousChar = currentChar;
+ }
+ }
+
+ if (nCount == 0) {
+ if (pOffset)
+ *p = position;
+ *dst ++ = previousChar;
+ }
+ } else {
+ // Translation
+ while (nCount -- > 0) {
+ sal_Unicode c = *src++;
+ c = func ? func( c) : (*table)[ c ];
+ if (c != 0xffff)
+ *dst ++ = c;
+ if (pOffset) {
+ if (c != 0xffff)
+ *p ++ = position;
+ position++;
+ }
+ }
+ }
+ newStr->length = sal_Int32(dst - newStr->buffer);
+ if (pOffset)
+ pOffset->realloc(newStr->length);
+ *dst = u'\0';
+
+ return OUString(newStr, SAL_NO_ACQUIRE); // take ownership
+}
+
+sal_Unicode SAL_CALL
+transliteration_Ignore::transliterateChar2Char( sal_Unicode inChar)
+{
+ return func ? func( inChar) : table ? (*table)[ inChar ] : inChar;
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/transliteration_Numeric.cxx b/i18npool/source/transliteration/transliteration_Numeric.cxx
new file mode 100644
index 0000000000..e0717379ed
--- /dev/null
+++ b/i18npool/source/transliteration/transliteration_Numeric.cxx
@@ -0,0 +1,143 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <com/sun/star/i18n/TransliterationType.hpp>
+
+#include <transliteration_Numeric.hxx>
+#include <nativenumbersupplier.hxx>
+#include <rtl/ref.hxx>
+
+using namespace com::sun::star::i18n;
+using namespace com::sun::star::uno;
+
+
+namespace i18npool {
+
+sal_Int16 SAL_CALL transliteration_Numeric::getType()
+{
+ return TransliterationType::NUMERIC;
+}
+
+OUString
+ transliteration_Numeric::foldingImpl( const OUString& /*inStr*/, sal_Int32 /*startPos*/, sal_Int32 /*nCount*/, Sequence< sal_Int32 >* /*pOffset*/ )
+{
+ throw RuntimeException();
+}
+
+sal_Bool SAL_CALL
+ transliteration_Numeric::equals( const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/, const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/ )
+{
+ throw RuntimeException();
+}
+
+Sequence< OUString > SAL_CALL
+ transliteration_Numeric::transliterateRange( const OUString& /*str1*/, const OUString& /*str2*/ )
+{
+ throw RuntimeException();
+}
+
+
+#define isNumber(c) ((c) >= 0x30 && (c) <= 0x39)
+#define NUMBER_ZERO 0x30
+
+OUString
+transliteration_Numeric::transliterateBullet( std::u16string_view inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >* pOffset ) const
+{
+ sal_Int32 number = -1, j = 0, endPos = startPos + nCount;
+
+ if (endPos > static_cast<sal_Int32>(inStr.size()))
+ endPos = inStr.size();
+
+ rtl_uString* pStr = rtl_uString_alloc(nCount);
+ sal_Unicode* out = pStr->buffer;
+
+ if (pOffset)
+ pOffset->realloc(nCount);
+ auto ppOffset = pOffset ? pOffset->getArray() : nullptr;
+
+ for (sal_Int32 i = startPos; i < endPos; i++) {
+ if (isNumber(inStr[i]))
+ {
+ if (number == -1) {
+ startPos = i;
+ number = (inStr[i] - NUMBER_ZERO);
+ } else {
+ number = number * 10 + (inStr[i] - NUMBER_ZERO);
+ }
+ } else {
+ if (number == 0) {
+ if (ppOffset)
+ ppOffset[j] = startPos;
+ out[j++] = NUMBER_ZERO;
+ } else if (number > tableSize && !recycleSymbol) {
+ for (sal_Int32 k = startPos; k < i; k++) {
+ if (ppOffset)
+ ppOffset[j] = k;
+ out[j++] = inStr[k];
+ }
+ } else if (number > 0) {
+ if (ppOffset)
+ ppOffset[j] = startPos;
+ out[j++] = table[--number % tableSize];
+ } else if (i < endPos) {
+ if (ppOffset)
+ ppOffset[j] = i;
+ out[j++] = inStr[i];
+ }
+ number = -1;
+ }
+ }
+ out[j] = 0;
+
+ if (pOffset)
+ pOffset->realloc(j);
+
+ return OUString( pStr, SAL_NO_ACQUIRE );
+}
+
+OUString
+transliteration_Numeric::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >* pOffset )
+{
+ if (tableSize)
+ return transliterateBullet( inStr, startPos, nCount, pOffset);
+ else
+ return rtl::Reference(new NativeNumberSupplierService())->getNativeNumberString( inStr.copy(startPos, nCount), aLocale, nNativeNumberMode, pOffset );
+}
+
+sal_Unicode SAL_CALL
+transliteration_Numeric::transliterateChar2Char( sal_Unicode inChar )
+{
+ if (tableSize) {
+ if (isNumber(inChar)) {
+ sal_Int16 number = inChar - NUMBER_ZERO;
+ if (number <= tableSize || recycleSymbol)
+ return table[--number % tableSize];
+ }
+ return inChar;
+ }
+ else
+ return NativeNumberSupplierService::getNativeNumberChar( inChar, aLocale, nNativeNumberMode );
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/transliteration_OneToOne.cxx b/i18npool/source/transliteration/transliteration_OneToOne.cxx
new file mode 100644
index 0000000000..233f3dfe70
--- /dev/null
+++ b/i18npool/source/transliteration/transliteration_OneToOne.cxx
@@ -0,0 +1,93 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <com/sun/star/i18n/TransliterationType.hpp>
+
+#include <transliteration_OneToOne.hxx>
+#include <i18nutil/oneToOneMapping.hxx>
+
+#include <numeric>
+
+using namespace com::sun::star::i18n;
+using namespace com::sun::star::uno;
+
+namespace i18npool {
+
+sal_Int16 SAL_CALL transliteration_OneToOne::getType()
+{
+ // This type is also defined in com/sun/star/util/TransliterationType.hdl
+ return TransliterationType::ONE_TO_ONE;
+}
+
+OUString
+transliteration_OneToOne::foldingImpl( const OUString& /*inStr*/, sal_Int32 /*startPos*/,
+ sal_Int32 /*nCount*/, Sequence< sal_Int32 >* /*pOffset*/)
+{
+ throw RuntimeException();
+}
+
+sal_Bool SAL_CALL
+transliteration_OneToOne::equals( const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/,
+ sal_Int32& /*nMatch1*/, const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/ )
+{
+ throw RuntimeException();
+}
+
+Sequence< OUString > SAL_CALL
+transliteration_OneToOne::transliterateRange( const OUString& /*str1*/, const OUString& /*str2*/ )
+{
+ throw RuntimeException();
+}
+
+OUString
+transliteration_OneToOne::transliterateImpl( const OUString& inStr, sal_Int32 startPos,
+ sal_Int32 nCount, Sequence< sal_Int32 >* pOffset)
+{
+ // Create a string buffer which can hold nCount + 1 characters.
+ // The reference count is 1 now.
+ rtl_uString * newStr = rtl_uString_alloc(nCount);
+ sal_Unicode * dst = newStr->buffer;
+ const sal_Unicode * src = inStr.getStr() + startPos;
+
+ // Allocate nCount length to offset argument.
+ if (pOffset) {
+ pOffset->realloc( nCount );
+ auto [begin, end] = asNonConstRange(*pOffset);
+ std::iota(begin, end, startPos);
+ }
+
+ // Translation
+ while (nCount -- > 0) {
+ sal_Unicode c = *src++;
+ *dst ++ = func ? func( c) : (*table)[ c ];
+ }
+ *dst = u'\0';
+
+ return OUString(newStr, SAL_NO_ACQUIRE); // take ownership
+}
+
+sal_Unicode SAL_CALL
+transliteration_OneToOne::transliterateChar2Char( sal_Unicode inChar)
+{
+ return func ? func( inChar) : (*table)[ inChar ];
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/transliteration_body.cxx b/i18npool/source/transliteration/transliteration_body.cxx
new file mode 100644
index 0000000000..36c5ccc72f
--- /dev/null
+++ b/i18npool/source/transliteration/transliteration_body.cxx
@@ -0,0 +1,306 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+// Silence spurious Werror=maybe-uninitialized in transliterateImpl emitted at least by GCC 11.2.0
+#if defined __GNUC__ && !defined __clang__
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
+#include <rtl/ref.hxx>
+#include <i18nutil/casefolding.hxx>
+#include <i18nutil/unicode.hxx>
+#include <com/sun/star/i18n/MultipleCharsOutputException.hpp>
+#include <com/sun/star/i18n/TransliterationType.hpp>
+#include <comphelper/processfactory.hxx>
+#include <comphelper/sequence.hxx>
+#include <o3tl/temporary.hxx>
+
+#include <characterclassificationImpl.hxx>
+
+#include <transliteration_body.hxx>
+#include <memory>
+#include <numeric>
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::i18n;
+using namespace ::com::sun::star::lang;
+
+namespace i18npool {
+
+Transliteration_body::Transliteration_body()
+{
+ nMappingType = MappingType::NONE;
+ transliterationName = "Transliteration_body";
+ implementationName = "com.sun.star.i18n.Transliteration.Transliteration_body";
+}
+
+sal_Int16 SAL_CALL Transliteration_body::getType()
+{
+ return TransliterationType::ONE_TO_ONE;
+}
+
+sal_Bool SAL_CALL Transliteration_body::equals(
+ const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/,
+ const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/)
+{
+ throw RuntimeException();
+}
+
+Sequence< OUString > SAL_CALL
+Transliteration_body::transliterateRange( const OUString& str1, const OUString& str2 )
+{
+ return { str1, str2 };
+}
+
+static MappingType lcl_getMappingTypeForToggleCase( MappingType nMappingType, sal_Unicode cChar )
+{
+ MappingType nRes = nMappingType;
+
+ // take care of TOGGLE_CASE transliteration:
+ // nMappingType should not be a combination of flags, thuse we decide now
+ // which one to use.
+ if (nMappingType == (MappingType::LowerToUpper | MappingType::UpperToLower))
+ {
+ const sal_Int16 nType = unicode::getUnicodeType( cChar );
+ if (nType & 0x02 /* lower case*/)
+ nRes = MappingType::LowerToUpper;
+ else
+ {
+ // should also work properly for non-upper characters like white spaces, numbers, ...
+ nRes = MappingType::UpperToLower;
+ }
+ }
+
+ return nRes;
+}
+
+OUString
+Transliteration_body::transliterateImpl(
+ const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >* pOffset)
+{
+ const sal_Unicode *in = inStr.getStr() + startPos;
+
+ // We could assume that most calls result in identical string lengths,
+ // thus using a preallocated OUStringBuffer could be an easy way
+ // to assemble the return string without too much hassle. However,
+ // for single characters the OUStringBuffer::append() method is quite
+ // expensive compared to a simple array operation, so it pays here
+ // to copy the final result instead.
+
+ // Allocate the max possible buffer. Try to use stack instead of heap,
+ // which would have to be reallocated most times anyways.
+ constexpr sal_Int32 nLocalBuf = 2048;
+ sal_Unicode* out;
+ std::unique_ptr<sal_Unicode[]> pHeapBuf;
+ if (nCount <= nLocalBuf)
+ out = static_cast<sal_Unicode*>(alloca(nCount * NMAPPINGMAX * sizeof(sal_Unicode)));
+ else
+ {
+ pHeapBuf.reset(new sal_Unicode[ nCount * NMAPPINGMAX ]);
+ out = pHeapBuf.get();
+ }
+
+ sal_Int32 j = 0;
+ // Two different blocks to eliminate the if(useOffset) condition inside the loop.
+ // Yes, on massive use even such small things do count.
+ if ( pOffset )
+ {
+ sal_Int32* offsetData;
+ std::unique_ptr<sal_Int32[]> pOffsetHeapBuf;
+ sal_Int32 nOffsetCount = std::max<sal_Int32>(nLocalBuf, nCount);
+ if (nOffsetCount <= nLocalBuf)
+ offsetData = static_cast<sal_Int32*>(alloca(nOffsetCount * NMAPPINGMAX * sizeof(sal_Int32)));
+ else
+ {
+ pOffsetHeapBuf.reset(new sal_Int32[ nOffsetCount * NMAPPINGMAX ]);
+ offsetData = pOffsetHeapBuf.get();
+ }
+ sal_Int32* offsetDataEnd = offsetData;
+
+ for (sal_Int32 i = 0; i < nCount; i++)
+ {
+ // take care of TOGGLE_CASE transliteration:
+ MappingType nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
+
+ const i18nutil::Mapping &map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
+ std::fill_n(offsetDataEnd, map.nmap, i + startPos);
+ offsetDataEnd += map.nmap;
+ std::copy_n(map.map, map.nmap, out + j);
+ j += map.nmap;
+ }
+
+ *pOffset = css::uno::Sequence< sal_Int32 >(offsetData, offsetDataEnd - offsetData);
+ }
+ else
+ {
+ for ( sal_Int32 i = 0; i < nCount; i++)
+ {
+ // take care of TOGGLE_CASE transliteration:
+ MappingType nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
+
+ const i18nutil::Mapping &map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
+ std::copy_n(map.map, map.nmap, out + j);
+ j += map.nmap;
+ }
+ }
+
+ return OUString(out, j);
+}
+
+OUString SAL_CALL
+Transliteration_body::transliterateChar2String( sal_Unicode inChar )
+{
+ const i18nutil::Mapping &map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
+ rtl_uString* pStr = rtl_uString_alloc(map.nmap);
+ sal_Unicode* out = pStr->buffer;
+ sal_Int32 i;
+
+ for (i = 0; i < map.nmap; i++)
+ out[i] = map.map[i];
+ out[i] = 0;
+
+ return OUString( pStr, SAL_NO_ACQUIRE );
+}
+
+sal_Unicode SAL_CALL
+Transliteration_body::transliterateChar2Char( sal_Unicode inChar )
+{
+ const i18nutil::Mapping &map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
+ if (map.nmap > 1)
+ throw MultipleCharsOutputException();
+ return map.map[0];
+}
+
+OUString
+Transliteration_body::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >* pOffset)
+{
+ return transliterateImpl(inStr, startPos, nCount, pOffset);
+}
+
+Transliteration_casemapping::Transliteration_casemapping()
+{
+ nMappingType = MappingType::NONE;
+ transliterationName = "casemapping(generic)";
+ implementationName = "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
+}
+
+Transliteration_u2l::Transliteration_u2l()
+{
+ nMappingType = MappingType::UpperToLower;
+ transliterationName = "upper_to_lower(generic)";
+ implementationName = "com.sun.star.i18n.Transliteration.UPPERCASE_LOWERCASE";
+}
+
+Transliteration_l2u::Transliteration_l2u()
+{
+ nMappingType = MappingType::LowerToUpper;
+ transliterationName = "lower_to_upper(generic)";
+ implementationName = "com.sun.star.i18n.Transliteration.LOWERCASE_UPPERCASE";
+}
+
+Transliteration_togglecase::Transliteration_togglecase()
+{
+ // usually nMappingType must NOT be a combination of different flags here,
+ // but we take care of that problem in Transliteration_body::transliterate above
+ // before that value is used. There we will decide which of both is to be used on
+ // a per character basis.
+ nMappingType = MappingType::LowerToUpper | MappingType::UpperToLower;
+ transliterationName = "toggle(generic)";
+ implementationName = "com.sun.star.i18n.Transliteration.TOGGLE_CASE";
+}
+
+Transliteration_titlecase::Transliteration_titlecase()
+{
+ nMappingType = MappingType::ToTitle;
+ transliterationName = "title(generic)";
+ implementationName = "com.sun.star.i18n.Transliteration.TITLE_CASE";
+}
+
+/// @throws RuntimeException
+static OUString transliterate_titlecase_Impl(
+ std::u16string_view inStr, sal_Int32 startPos, sal_Int32 nCount,
+ const Locale &rLocale,
+ Sequence< sal_Int32 >* pOffset )
+{
+ const OUString aText( inStr.substr( startPos, nCount ) );
+
+ OUString aRes;
+ if (!aText.isEmpty())
+ {
+ Reference< XComponentContext > xContext = ::comphelper::getProcessComponentContext();
+ rtl::Reference< CharacterClassificationImpl > xCharClassImpl( new CharacterClassificationImpl( xContext ) );
+
+ // because xCharClassImpl.toTitle does not handle ligatures or Beta but will raise
+ // an exception we need to handle the first chara manually...
+
+ // we don't want to change surrogates by accident, thuse we use proper code point iteration
+ sal_uInt32 cFirstChar = aText.iterateCodePoints( &o3tl::temporary(sal_Int32(0)) );
+ OUString aResolvedLigature( &cFirstChar, 1 );
+ // toUpper can be used to properly resolve ligatures and characters like Beta
+ aResolvedLigature = xCharClassImpl->toUpper( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
+ // since toTitle will leave all-uppercase text unchanged we first need to
+ // use toLower to bring possible 2nd and following chars in lowercase
+ aResolvedLigature = xCharClassImpl->toLower( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
+ sal_Int32 nResolvedLen = aResolvedLigature.getLength();
+
+ // now we can properly use toTitle to get the expected result for the resolved string.
+ // The rest of the text should just become lowercase.
+ aRes = xCharClassImpl->toTitle( aResolvedLigature, 0, nResolvedLen, rLocale ) +
+ xCharClassImpl->toLower( aText, 1, aText.getLength() - 1, rLocale );
+ if (pOffset)
+ {
+ pOffset->realloc( aRes.getLength() );
+
+ auto [begin, end] = asNonConstRange(*pOffset);
+ sal_Int32* pOffsetInt = std::fill_n(begin, nResolvedLen, 0);
+ std::iota(pOffsetInt, end, 1);
+ }
+ }
+ return aRes;
+}
+
+// this function expects to be called on a word-by-word basis,
+// namely that startPos points to the first char of the word
+OUString Transliteration_titlecase::transliterateImpl(
+ const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >* pOffset )
+{
+ return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, pOffset );
+}
+
+Transliteration_sentencecase::Transliteration_sentencecase()
+{
+ nMappingType = MappingType::ToTitle; // though only to be applied to the first word...
+ transliterationName = "sentence(generic)";
+ implementationName = "com.sun.star.i18n.Transliteration.SENTENCE_CASE";
+}
+
+// this function expects to be called on a sentence-by-sentence basis,
+// namely that startPos points to the first word (NOT first char!) in the sentence
+OUString Transliteration_sentencecase::transliterateImpl(
+ const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
+ Sequence< sal_Int32 >* pOffset )
+{
+ return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, pOffset );
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/transliteration_caseignore.cxx b/i18npool/source/transliteration/transliteration_caseignore.cxx
new file mode 100644
index 0000000000..b5c8c24d10
--- /dev/null
+++ b/i18npool/source/transliteration/transliteration_caseignore.cxx
@@ -0,0 +1,146 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <com/sun/star/i18n/TransliterationType.hpp>
+#include <rtl/ref.hxx>
+
+#include <i18nutil/casefolding.hxx>
+#include <i18nutil/transliteration.hxx>
+
+#include <transliteration_caseignore.hxx>
+
+namespace com::sun::star::uno { class XComponentContext; }
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::i18n;
+using namespace ::com::sun::star::lang;
+
+namespace i18npool {
+
+Transliteration_caseignore::Transliteration_caseignore()
+{
+ nMappingType = MappingType::FullFolding;
+ moduleLoaded = TransliterationFlags::NONE;
+ transliterationName = "case ignore (generic)";
+ implementationName = "com.sun.star.i18n.Transliteration.IGNORE_CASE";
+}
+
+void SAL_CALL
+Transliteration_caseignore::loadModule( TransliterationModules modName, const Locale& rLocale )
+{
+ moduleLoaded |= static_cast<TransliterationFlags>(modName);
+ aLocale = rLocale;
+}
+
+sal_Int16 SAL_CALL Transliteration_caseignore::getType()
+{
+ // It's NOT TransliterationType::ONE_TO_ONE because it's using casefolding
+ return TransliterationType::IGNORE;
+}
+
+
+Sequence< OUString > SAL_CALL
+Transliteration_caseignore::transliterateRange( const OUString& str1, const OUString& str2 )
+{
+ if (str1.getLength() != 1 || str2.getLength() != 1)
+ throw RuntimeException();
+
+ static rtl::Reference< Transliteration_u2l > u2l(new Transliteration_u2l);
+ static rtl::Reference< Transliteration_l2u > l2u(new Transliteration_l2u);
+
+ u2l->loadModule(TransliterationModules(0), aLocale);
+ l2u->loadModule(TransliterationModules(0), aLocale);
+
+ OUString l1 = u2l->transliterateString2String(str1, 0, str1.getLength());
+ OUString u1 = l2u->transliterateString2String(str1, 0, str1.getLength());
+ OUString l2 = u2l->transliterateString2String(str2, 0, str2.getLength());
+ OUString u2 = l2u->transliterateString2String(str2, 0, str2.getLength());
+
+ if ((l1 == u1) && (l2 == u2)) {
+ return { l1, l2 };
+ } else {
+ return { l1, l2, u1, u2 };
+ }
+}
+
+sal_Bool SAL_CALL
+Transliteration_caseignore::equals(
+ const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1,
+ const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2)
+{
+ return (compare(str1, pos1, nCount1, nMatch1, str2, pos2, nCount2, nMatch2) == 0);
+}
+
+sal_Int32 SAL_CALL
+Transliteration_caseignore::compareSubstring(
+ const OUString& str1, sal_Int32 off1, sal_Int32 len1,
+ const OUString& str2, sal_Int32 off2, sal_Int32 len2)
+{
+ sal_Int32 nMatch1, nMatch2;
+ return compare(str1, off1, len1, nMatch1, str2, off2, len2, nMatch2);
+}
+
+
+sal_Int32 SAL_CALL
+Transliteration_caseignore::compareString(
+ const OUString& str1,
+ const OUString& str2)
+{
+ sal_Int32 nMatch1, nMatch2;
+ return compare(str1, 0, str1.getLength(), nMatch1, str2, 0, str2.getLength(), nMatch2);
+}
+
+sal_Int32
+Transliteration_caseignore::compare(
+ const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1,
+ const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2)
+{
+ const sal_Unicode *unistr1 = const_cast<sal_Unicode*>(str1.getStr()) + pos1;
+ const sal_Unicode *unistr2 = const_cast<sal_Unicode*>(str2.getStr()) + pos2;
+ sal_Unicode c1, c2;
+ i18nutil::MappingElement e1, e2;
+ nMatch1 = nMatch2 = 0;
+
+#define NOT_END_OF_STR1 (nMatch1 < nCount1 || e1.current < e1.element.nmap)
+#define NOT_END_OF_STR2 (nMatch2 < nCount2 || e2.current < e2.element.nmap)
+
+ while (NOT_END_OF_STR1 && NOT_END_OF_STR2) {
+ c1 = i18nutil::casefolding::getNextChar(unistr1, nMatch1, nCount1, e1, aLocale, nMappingType, moduleLoaded);
+ c2 = i18nutil::casefolding::getNextChar(unistr2, nMatch2, nCount2, e2, aLocale, nMappingType, moduleLoaded);
+ if (c1 != c2) {
+ nMatch1--; nMatch2--;
+ return c1 > c2 ? 1 : -1;
+ }
+ }
+
+ return (!NOT_END_OF_STR1 && !NOT_END_OF_STR2) ? 0
+ : (NOT_END_OF_STR1 ? 1 : -1);
+}
+
+}
+
+extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface *
+com_sun_star_i18n_Transliteration_IGNORE_CASE_get_implementation(
+ css::uno::XComponentContext *,
+ css::uno::Sequence<css::uno::Any> const &)
+{
+ return cppu::acquire(new i18npool::Transliteration_caseignore());
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/i18npool/source/transliteration/transliteration_commonclass.cxx b/i18npool/source/transliteration/transliteration_commonclass.cxx
new file mode 100644
index 0000000000..054ae60e22
--- /dev/null
+++ b/i18npool/source/transliteration/transliteration_commonclass.cxx
@@ -0,0 +1,135 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <transliteration_commonclass.hxx>
+#include <cppuhelper/supportsservice.hxx>
+
+using namespace ::com::sun::star::uno;
+using namespace ::com::sun::star::i18n;
+using namespace ::com::sun::star::lang;
+
+namespace i18npool {
+
+transliteration_commonclass::transliteration_commonclass()
+{
+ transliterationName = "";
+ implementationName = "";
+}
+
+OUString SAL_CALL transliteration_commonclass::getName()
+{
+ return OUString::createFromAscii(transliterationName);
+}
+
+void SAL_CALL transliteration_commonclass::loadModule( TransliterationModules /*modName*/, const Locale& rLocale )
+{
+ aLocale = rLocale;
+}
+
+
+void SAL_CALL
+transliteration_commonclass::loadModuleNew( const Sequence < TransliterationModulesNew >& /*modName*/, const Locale& /*rLocale*/ )
+{
+ throw RuntimeException();
+}
+
+
+void SAL_CALL
+transliteration_commonclass::loadModuleByImplName( const OUString& /*implName*/, const Locale& /*rLocale*/ )
+{
+ throw RuntimeException();
+}
+
+void SAL_CALL
+transliteration_commonclass::loadModulesByImplNames(const Sequence< OUString >& /*modNamelist*/, const Locale& /*rLocale*/)
+{
+ throw RuntimeException();
+}
+
+Sequence< OUString > SAL_CALL
+transliteration_commonclass::getAvailableModules( const Locale& /*rLocale*/, sal_Int16 /*sType*/ )
+{
+ throw RuntimeException();
+}
+
+sal_Int32 SAL_CALL
+transliteration_commonclass::compareSubstring(
+ const OUString& str1, sal_Int32 off1, sal_Int32 len1,
+ const OUString& str2, sal_Int32 off2, sal_Int32 len2)
+{
+ Sequence <sal_Int32> offset1(2*len1);
+ Sequence <sal_Int32> offset2(2*len2);
+
+ OUString in_str1 = transliterate(str1, off1, len1, offset1);
+ OUString in_str2 = transliterate(str2, off2, len2, offset2);
+ sal_Int32 strlen1 = in_str1.getLength();
+ sal_Int32 strlen2 = in_str2.getLength();
+ const sal_Unicode* unistr1 = in_str1.getStr();
+ const sal_Unicode* unistr2 = in_str2.getStr();
+
+ while (strlen1 && strlen2)
+ {
+ sal_Int32 ret = *unistr1 - *unistr2;
+ if (ret)
+ return ret;
+
+ unistr1++;
+ unistr2++;
+ strlen1--;
+ strlen2--;
+ }
+ return strlen1 - strlen2;
+}
+
+sal_Int32 SAL_CALL
+transliteration_commonclass::compareString( const OUString& str1, const OUString& str2 )
+{
+ return compareSubstring(str1, 0, str1.getLength(), str2, 0, str2.getLength());
+}
+
+OUString SAL_CALL
+transliteration_commonclass::transliterateString2String( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount )
+{
+ return transliterateImpl(inStr, startPos, nCount, nullptr);
+}
+
+OUString SAL_CALL
+transliteration_commonclass::transliterateChar2String( sal_Unicode inChar )
+{
+ return transliteration_commonclass::transliterateString2String(OUString(&inChar, 1), 0, 1);
+}
+
+OUString SAL_CALL transliteration_commonclass::getImplementationName()
+{
+ return OUString::createFromAscii(implementationName);
+}
+
+sal_Bool SAL_CALL transliteration_commonclass::supportsService(const OUString& rServiceName)
+{
+ return cppu::supportsService(this, rServiceName);
+}
+
+Sequence< OUString > SAL_CALL transliteration_commonclass::getSupportedServiceNames()
+{
+ return { "com.sun.star.i18n.Transliteration.l10n" };
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */