diff options
Diffstat (limited to '')
38 files changed, 4917 insertions, 0 deletions
diff --git a/i18npool/source/transliteration/chartonum.cxx b/i18npool/source/transliteration/chartonum.cxx new file mode 100644 index 000000000..41a6549b6 --- /dev/null +++ b/i18npool/source/transliteration/chartonum.cxx @@ -0,0 +1,53 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <chartonum.hxx> + +using namespace com::sun::star::uno; + +namespace i18npool { + +#define TRANSLITERATION_CHARTONUM( name ) \ +CharToNum##name::CharToNum##name() \ +{ \ + nNativeNumberMode = 0; \ + tableSize = 0; \ + implementationName = "com.sun.star.i18n.Transliteration.CharToNum"#name; \ +} + +TRANSLITERATION_CHARTONUM( Fullwidth) +TRANSLITERATION_CHARTONUM( Lower_zh_CN) +TRANSLITERATION_CHARTONUM( Lower_zh_TW) +TRANSLITERATION_CHARTONUM( Upper_zh_CN) +TRANSLITERATION_CHARTONUM( Upper_zh_TW) +TRANSLITERATION_CHARTONUM( KanjiShort_ja_JP) +TRANSLITERATION_CHARTONUM( KanjiTraditional_ja_JP) +TRANSLITERATION_CHARTONUM( Lower_ko) +TRANSLITERATION_CHARTONUM( Upper_ko) +TRANSLITERATION_CHARTONUM( Hangul_ko) +TRANSLITERATION_CHARTONUM( Indic_ar) +TRANSLITERATION_CHARTONUM( EastIndic_ar) +TRANSLITERATION_CHARTONUM( Indic_hi) +TRANSLITERATION_CHARTONUM( _th) + +#undef TRANSLITERATION_CHARTONUM + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/fullwidthToHalfwidth.cxx b/i18npool/source/transliteration/fullwidthToHalfwidth.cxx new file mode 100644 index 000000000..226d0ef40 --- /dev/null +++ b/i18npool/source/transliteration/fullwidthToHalfwidth.cxx @@ -0,0 +1,127 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <com/sun/star/i18n/MultipleCharsOutputException.hpp> +#include <i18nutil/widthfolding.hxx> + +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::i18n; +using namespace com::sun::star::lang; + +namespace i18npool { + +fullwidthToHalfwidth::fullwidthToHalfwidth() +{ + func = nullptr; + table = &i18nutil::widthfolding::getfull2halfTable(); + transliterationName = "fullwidthToHalfwidth"; + implementationName = "com.sun.star.i18n.Transliteration.FULLWIDTH_HALFWIDTH"; +} + +/** + * Transliterate fullwidth to halfwidth. + * The output is a reference of OUString. You MUST delete this object when you do not need to use it any more + * The output string contains a transliterated string only, not whole string. + */ +OUString +fullwidthToHalfwidth::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset ) +{ + // Decomposition: GA --> KA + voice-mark + const OUString& newStr = i18nutil::widthfolding::decompose_ja_voiced_sound_marks (inStr, startPos, nCount, offset, useOffset); + + // One to One mapping + return transliteration_OneToOne::transliterateImpl( newStr, 0, newStr.getLength(), offset, false); +} + +sal_Unicode SAL_CALL +fullwidthToHalfwidth::transliterateChar2Char( sal_Unicode inChar) +{ + sal_Unicode newChar = i18nutil::widthfolding::decompose_ja_voiced_sound_marksChar2Char (inChar); + if (newChar == 0xFFFF) + throw MultipleCharsOutputException(); + return transliteration_OneToOne::transliterateChar2Char(inChar); +} + +fullwidthKatakanaToHalfwidthKatakana::fullwidthKatakanaToHalfwidthKatakana() +{ + func = nullptr; + table = &i18nutil::widthfolding::getfullKana2halfKanaTable(); + transliterationName = "fullwidthKatakanaToHalfwidthKatakana"; + implementationName = "com.sun.star.i18n.Transliteration.FULLWIDTHKATAKANA_HALFWIDTHKATAKANA"; +} + +/** + * Transliterate fullwidth katakana to halfwidth katakana. + */ +OUString +fullwidthKatakanaToHalfwidthKatakana::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset ) +{ + // Decomposition: GA --> KA + voice-mark + const OUString& newStr = i18nutil::widthfolding::decompose_ja_voiced_sound_marks (inStr, startPos, nCount, offset, useOffset); + + // One to One mapping + return transliteration_OneToOne::transliterateImpl( newStr, 0, newStr.getLength(), offset, false); +} + +sal_Unicode SAL_CALL +fullwidthKatakanaToHalfwidthKatakana::transliterateChar2Char( sal_Unicode inChar ) +{ + sal_Unicode newChar = i18nutil::widthfolding::decompose_ja_voiced_sound_marksChar2Char (inChar); + if (newChar == 0xFFFF) + throw MultipleCharsOutputException(); + return transliteration_OneToOne::transliterateChar2Char(inChar); +} + +fullwidthToHalfwidthLikeASC::fullwidthToHalfwidthLikeASC() +{ + func = nullptr; + table = &i18nutil::widthfolding::getfull2halfTableForASC(); + transliterationName = "fullwidthToHalfwidthLikeASC"; + implementationName = "com.sun.star.i18n.Transliteration.FULLWIDTH_HALFWIDTH_LIKE_ASC"; +} + +/** + * Transliterate fullwidth to halfwidth like Excel's ASC function. + */ +OUString +fullwidthToHalfwidthLikeASC::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset ) +{ + // Decomposition: GA --> KA + voice-mark + const OUString& newStr = i18nutil::widthfolding::decompose_ja_voiced_sound_marks (inStr, startPos, nCount, offset, useOffset); + + // One to One mapping + return transliteration_OneToOne::transliterateImpl( newStr, 0, newStr.getLength(), offset, false); +} + +sal_Unicode SAL_CALL +fullwidthToHalfwidthLikeASC::transliterateChar2Char( sal_Unicode inChar ) +{ + sal_Unicode newChar = i18nutil::widthfolding::decompose_ja_voiced_sound_marksChar2Char (inChar); + if (newChar == 0xFFFF) + throw MultipleCharsOutputException(); + return transliteration_OneToOne::transliterateChar2Char(inChar); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/halfwidthToFullwidth.cxx b/i18npool/source/transliteration/halfwidthToFullwidth.cxx new file mode 100644 index 000000000..e9d6ebd89 --- /dev/null +++ b/i18npool/source/transliteration/halfwidthToFullwidth.cxx @@ -0,0 +1,87 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <i18nutil/widthfolding.hxx> + +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + + +namespace i18npool { + +halfwidthToFullwidth::halfwidthToFullwidth() +{ + func = nullptr; + table = &i18nutil::widthfolding::gethalf2fullTable(); + transliterationName = "halfwidthToFullwidth"; + implementationName = "com.sun.star.i18n.Transliteration.HALFWIDTH_FULLWIDTH"; +} + +OUString +halfwidthToFullwidth::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset ) +{ + // One to One mapping + const OUString& newStr = transliteration_OneToOne::transliterateImpl( inStr, startPos, nCount, offset, false); + + // Composition: KA + voice-mark --> GA + return i18nutil::widthfolding::compose_ja_voiced_sound_marks ( newStr, 0, newStr.getLength(), offset, useOffset ); +} + +halfwidthKatakanaToFullwidthKatakana::halfwidthKatakanaToFullwidthKatakana() +{ + func = nullptr; + table = &i18nutil::widthfolding::gethalfKana2fullKanaTable(); + transliterationName = "halfwidthKatakanaToFullwidthKatakana"; + implementationName = "com.sun.star.i18n.Transliteration.HALFWIDTHKATAKANA_FULLWIDTHKATAKANA"; +} + +OUString +halfwidthKatakanaToFullwidthKatakana::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset ) +{ + // One to One mapping + const OUString& newStr = transliteration_OneToOne::transliterateImpl( inStr, startPos, nCount, offset, false); + + // Composition: KA + voice-mark --> GA + return i18nutil::widthfolding::compose_ja_voiced_sound_marks ( newStr, 0, newStr.getLength(), offset, useOffset ); +} + +halfwidthToFullwidthLikeJIS::halfwidthToFullwidthLikeJIS() +{ + func = nullptr; + table = &i18nutil::widthfolding::gethalf2fullTableForJIS(); + transliterationName = "halfwidthToFullwidthLikeJIS"; + implementationName = "com.sun.star.i18n.Transliteration.HALFWIDTH_FULLWIDTH_LIKE_JIS"; +} + +OUString +halfwidthToFullwidthLikeJIS::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset ) +{ + // One to One mapping + const OUString& newStr = transliteration_OneToOne::transliterateImpl( inStr, startPos, nCount, offset, false); + + // Composition: KA + voice-mark --> GA + return i18nutil::widthfolding::compose_ja_voiced_sound_marks ( newStr, 0, newStr.getLength(), offset, useOffset, WIDTHFOLDING_DONT_USE_COMBINED_VU ); +} + + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/hiraganaToKatakana.cxx b/i18npool/source/transliteration/hiraganaToKatakana.cxx new file mode 100644 index 000000000..439ec5836 --- /dev/null +++ b/i18npool/source/transliteration/hiraganaToKatakana.cxx @@ -0,0 +1,47 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + +namespace i18npool { + +// see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) +// see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) +static sal_Unicode toKatakana (const sal_Unicode c) { + if ( (0x3041 <= c && c <= 0x3096) || (0x309d <= c && c <= 0x309f) ) { // 3040 - 309F HIRAGANA LETTER + // shift code point by 0x0060 + return c + (0x30a0 - 0x3040); + } + return c; +} + +hiraganaToKatakana::hiraganaToKatakana() +{ + func = toKatakana; + table = nullptr; + transliterationName = "hiraganaToKatakana"; + implementationName = "com.sun.star.i18n.Transliteration.HIRAGANA_KATAKANA"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreBaFa_ja_JP.cxx b/i18npool/source/transliteration/ignoreBaFa_ja_JP.cxx new file mode 100644 index 000000000..44646ac14 --- /dev/null +++ b/i18npool/source/transliteration/ignoreBaFa_ja_JP.cxx @@ -0,0 +1,45 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; + +namespace i18npool { + +static const transliteration_Ignore::Mapping BaFa[] = { + { 0x30F4, 0x30A1, 0x30D0, true }, + { 0x3094, 0x3041, 0x3070, true }, + { 0x30D5, 0x30A1, 0x30CF, true }, + { 0x3075, 0x3041, 0x306F, true }, + { 0, 0, 0, true } +}; + +ignoreBaFa_ja_JP::ignoreBaFa_ja_JP() +{ + func = nullptr; + table = nullptr; + map = BaFa; + transliterationName = "ignoreBaFa_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.ignoreBaFa_ja_JP"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreDiacritics_CTL.cxx b/i18npool/source/transliteration/ignoreDiacritics_CTL.cxx new file mode 100644 index 000000000..ada76a735 --- /dev/null +++ b/i18npool/source/transliteration/ignoreDiacritics_CTL.cxx @@ -0,0 +1,95 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <comphelper/sequence.hxx> +#include <rtl/ustrbuf.hxx> +#include <transliteration_Ignore.hxx> +#include <unicode/translit.h> + +namespace i18npool { + +ignoreDiacritics_CTL::ignoreDiacritics_CTL() +{ + func = nullptr; + table = nullptr; + map = nullptr; + transliterationName = "ignoreDiacritics_CTL"; + implementationName = "com.sun.star.i18n.Transliteration.ignoreDiacritics_CTL"; + + UErrorCode nStatus = U_ZERO_ERROR; + m_transliterator = icu::Transliterator::createInstance("NFD; [:M:] Remove; NFC", + UTRANS_FORWARD, nStatus); + if (U_FAILURE(nStatus)) + m_transliterator = nullptr; +} + +sal_Unicode SAL_CALL +ignoreDiacritics_CTL::transliterateChar2Char(sal_Unicode nInChar) +{ + if (!m_transliterator) + throw css::uno::RuntimeException(); + + icu::UnicodeString aChar(nInChar); + m_transliterator->transliterate(aChar); + + if (aChar.isEmpty()) + return 0xffff; // Skip this character. + + if (aChar.length() > 1) + return nInChar; // Don't know what to do here, return the original. + + return aChar[0]; +} + +OUString +ignoreDiacritics_CTL::foldingImpl(const OUString& rInStr, sal_Int32 nStartPos, + sal_Int32 nCount, css::uno::Sequence<sal_Int32>& rOffset, bool useOffset) +{ + if (!m_transliterator) + throw css::uno::RuntimeException(); + + if (nStartPos < 0 || nStartPos + nCount > rInStr.getLength()) + throw css::uno::RuntimeException(); + + if (useOffset) + { + OUStringBuffer aOutBuf(nCount); + + std::vector<sal_Int32> aOffset; + aOffset.reserve(nCount); + + sal_Int32 nPosition = nStartPos; + while (nPosition < nStartPos + nCount) + { + sal_Int32 nIndex = nPosition; + UChar32 nChar = rInStr.iterateCodePoints(&nIndex); + icu::UnicodeString aUStr(nChar); + m_transliterator->transliterate(aUStr); + + aOutBuf.append(reinterpret_cast<const sal_Unicode*>(aUStr.getBuffer()), aUStr.length()); + + std::fill_n(std::back_inserter(aOffset), aUStr.length(), nPosition); + + nPosition = nIndex; + } + + rOffset = comphelper::containerToSequence(aOffset); + return aOutBuf.makeStringAndClear(); + } + else + { + icu::UnicodeString aUStr(reinterpret_cast<const UChar*>(rInStr.getStr()) + nStartPos, nCount); + m_transliterator->transliterate(aUStr); + return OUString(reinterpret_cast<const sal_Unicode*>(aUStr.getBuffer()), aUStr.length()); + } +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreHyuByu_ja_JP.cxx b/i18npool/source/transliteration/ignoreHyuByu_ja_JP.cxx new file mode 100644 index 000000000..c4b58821c --- /dev/null +++ b/i18npool/source/transliteration/ignoreHyuByu_ja_JP.cxx @@ -0,0 +1,46 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + +namespace i18npool { + +static const transliteration_Ignore::Mapping HyuByu[] = { + { 0x30D5, 0x30E5, 0x30D2, false }, + { 0x3075, 0x3085, 0x3072, false }, + { 0x30F4, 0x30E5, 0x30D3, false }, + { 0x3094, 0x3085, 0x3073, false }, + { 0, 0, 0, false } +}; + +ignoreHyuByu_ja_JP::ignoreHyuByu_ja_JP() +{ + func = nullptr; + table = nullptr; + map = HyuByu; + transliterationName = "ignoreHyuByu_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.ignoreHyuByu_ja_JP"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreIandEfollowedByYa_ja_JP.cxx b/i18npool/source/transliteration/ignoreIandEfollowedByYa_ja_JP.cxx new file mode 100644 index 000000000..0be8c094c --- /dev/null +++ b/i18npool/source/transliteration/ignoreIandEfollowedByYa_ja_JP.cxx @@ -0,0 +1,124 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <i18nutil/oneToOneMapping.hxx> + +#include <transliteration_Ignore.hxx> + +#include <numeric> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + +namespace i18npool { + +i18nutil::OneToOneMappingTable_t const IandE[] = { + { 0x30A3, 0x0000 }, // KATAKANA LETTER SMALL I + { 0x30A4, 0x0000 }, // KATAKANA LETTER I + { 0x30A7, 0x0000 }, // KATAKANA LETTER SMALL E + { 0x30A8, 0x0000 }, // KATAKANA LETTER E + { 0x30AD, 0x0000 }, // KATAKANA LETTER KI + { 0x30AE, 0x0000 }, // KATAKANA LETTER GI + { 0x30B1, 0x0000 }, // KATAKANA LETTER KE + { 0x30B2, 0x0000 }, // KATAKANA LETTER GE + { 0x30B7, 0x0000 }, // KATAKANA LETTER SI + { 0x30B8, 0x0000 }, // KATAKANA LETTER ZI + { 0x30BB, 0x0000 }, // KATAKANA LETTER SE + { 0x30BC, 0x0000 }, // KATAKANA LETTER ZE + { 0x30C1, 0x0000 }, // KATAKANA LETTER TI + { 0x30C2, 0x0000 }, // KATAKANA LETTER DI + { 0x30C6, 0x0000 }, // KATAKANA LETTER TE + { 0x30C7, 0x0000 }, // KATAKANA LETTER DE + { 0x30CB, 0x0000 }, // KATAKANA LETTER NI + { 0x30CD, 0x0000 }, // KATAKANA LETTER NE + { 0x30D2, 0x0000 }, // KATAKANA LETTER HI + { 0x30D3, 0x0000 }, // KATAKANA LETTER BI + { 0x30D4, 0x0000 }, // KATAKANA LETTER PI + { 0x30D8, 0x0000 }, // KATAKANA LETTER HE + { 0x30D9, 0x0000 }, // KATAKANA LETTER BE + { 0x30DA, 0x0000 }, // KATAKANA LETTER PE + { 0x30DF, 0x0000 }, // KATAKANA LETTER MI + { 0x30E1, 0x0000 }, // KATAKANA LETTER ME + { 0x30EA, 0x0000 }, // KATAKANA LETTER RI + { 0x30EC, 0x0000 }, // KATAKANA LETTER RE + { 0x30F0, 0x0000 }, // KATAKANA LETTER WI + { 0x30F1, 0x0000 }, // KATAKANA LETTER WE + { 0x30F6, 0x0000 }, // KATAKANA LETTER SMALL KE + { 0x30F8, 0x0000 }, // KATAKANA LETTER VI + { 0x30F9, 0x0000 } // KATAKANA LETTER VE +}; + + +OUString +ignoreIandEfollowedByYa_ja_JP::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset ) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 1 now. + rtl_uString * newStr = rtl_uString_alloc(nCount); + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + if (useOffset) { + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + std::iota(offset.begin(), offset.end(), startPos); + } + + + sal_Unicode previousChar = *src ++; + sal_Unicode currentChar; + + // One to one mapping + i18nutil::oneToOneMapping aTable(IandE, sizeof(IandE)); + + // Translation + while (-- nCount > 0) { + currentChar = *src ++; + + // the character listed in above table + YA --> the character + A + if (currentChar == 0x30E3 || // KATAKANA LETTER SMALL YA + currentChar == 0x30E4) { // KATAKANA LETTER YA + if (aTable[ previousChar ] != previousChar) { + *dst ++ = previousChar; + *dst ++ = 0x30A2; // KATAKANA LETTER A + previousChar = *src ++; + nCount --; + continue; + } + } + + *dst ++ = previousChar; + previousChar = currentChar; + } + + if (nCount == 0) { + *dst ++ = previousChar; + } + + *dst = u'\0'; + + newStr->length = sal_Int32(dst - newStr->buffer); + if (useOffset) + offset.realloc(newStr->length); + return OUString(newStr, SAL_NO_ACQUIRE); // take ownership +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreIterationMark_ja_JP.cxx b/i18npool/source/transliteration/ignoreIterationMark_ja_JP.cxx new file mode 100644 index 000000000..66e538451 --- /dev/null +++ b/i18npool/source/transliteration/ignoreIterationMark_ja_JP.cxx @@ -0,0 +1,138 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <i18nutil/oneToOneMapping.hxx> + +#include <transliteration_Ignore.hxx> + +#include <numeric> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + + +namespace i18npool { + +i18nutil::OneToOneMappingTable_t const ignoreIterationMark_ja_JP_mappingTable[] = { + { 0x3046, 0x3094 }, // HIRAGANA LETTER U --> HIRAGANA LETTER VU + { 0x304B, 0x304C }, // HIRAGANA LETTER KA --> HIRAGANA LETTER GA + { 0x304D, 0x304E }, // HIRAGANA LETTER KI --> HIRAGANA LETTER GI + { 0x304F, 0x3050 }, // HIRAGANA LETTER KU --> HIRAGANA LETTER GU + { 0x3051, 0x3052 }, // HIRAGANA LETTER KE --> HIRAGANA LETTER GE + { 0x3053, 0x3054 }, // HIRAGANA LETTER KO --> HIRAGANA LETTER GO + { 0x3055, 0x3056 }, // HIRAGANA LETTER SA --> HIRAGANA LETTER ZA + { 0x3057, 0x3058 }, // HIRAGANA LETTER SI --> HIRAGANA LETTER ZI + { 0x3059, 0x305A }, // HIRAGANA LETTER SU --> HIRAGANA LETTER ZU + { 0x305B, 0x305C }, // HIRAGANA LETTER SE --> HIRAGANA LETTER ZE + { 0x305D, 0x305E }, // HIRAGANA LETTER SO --> HIRAGANA LETTER ZO + { 0x305F, 0x3060 }, // HIRAGANA LETTER TA --> HIRAGANA LETTER DA + { 0x3061, 0x3062 }, // HIRAGANA LETTER TI --> HIRAGANA LETTER DI + { 0x3064, 0x3065 }, // HIRAGANA LETTER TU --> HIRAGANA LETTER DU + { 0x3066, 0x3067 }, // HIRAGANA LETTER TE --> HIRAGANA LETTER DE + { 0x3068, 0x3069 }, // HIRAGANA LETTER TO --> HIRAGANA LETTER DO + { 0x306F, 0x3070 }, // HIRAGANA LETTER HA --> HIRAGANA LETTER BA + { 0x3072, 0x3073 }, // HIRAGANA LETTER HI --> HIRAGANA LETTER BI + { 0x3075, 0x3076 }, // HIRAGANA LETTER HU --> HIRAGANA LETTER BU + { 0x3078, 0x3079 }, // HIRAGANA LETTER HE --> HIRAGANA LETTER BE + { 0x307B, 0x307C }, // HIRAGANA LETTER HO --> HIRAGANA LETTER BO + { 0x309D, 0x309E }, // HIRAGANA ITERATION MARK --> HIRAGANA VOICED ITERATION MARK + { 0x30A6, 0x30F4 }, // KATAKANA LETTER U --> KATAKANA LETTER VU + { 0x30AB, 0x30AC }, // KATAKANA LETTER KA --> KATAKANA LETTER GA + { 0x30AD, 0x30AE }, // KATAKANA LETTER KI --> KATAKANA LETTER GI + { 0x30AF, 0x30B0 }, // KATAKANA LETTER KU --> KATAKANA LETTER GU + { 0x30B1, 0x30B2 }, // KATAKANA LETTER KE --> KATAKANA LETTER GE + { 0x30B3, 0x30B4 }, // KATAKANA LETTER KO --> KATAKANA LETTER GO + { 0x30B5, 0x30B6 }, // KATAKANA LETTER SA --> KATAKANA LETTER ZA + { 0x30B7, 0x30B8 }, // KATAKANA LETTER SI --> KATAKANA LETTER ZI + { 0x30B9, 0x30BA }, // KATAKANA LETTER SU --> KATAKANA LETTER ZU + { 0x30BB, 0x30BC }, // KATAKANA LETTER SE --> KATAKANA LETTER ZE + { 0x30BD, 0x30BE }, // KATAKANA LETTER SO --> KATAKANA LETTER ZO + { 0x30BF, 0x30C0 }, // KATAKANA LETTER TA --> KATAKANA LETTER DA + { 0x30C1, 0x30C2 }, // KATAKANA LETTER TI --> KATAKANA LETTER DI + { 0x30C4, 0x30C5 }, // KATAKANA LETTER TU --> KATAKANA LETTER DU + { 0x30C6, 0x30C7 }, // KATAKANA LETTER TE --> KATAKANA LETTER DE + { 0x30C8, 0x30C9 }, // KATAKANA LETTER TO --> KATAKANA LETTER DO + { 0x30CF, 0x30D0 }, // KATAKANA LETTER HA --> KATAKANA LETTER BA + { 0x30D2, 0x30D3 }, // KATAKANA LETTER HI --> KATAKANA LETTER BI + { 0x30D5, 0x30D6 }, // KATAKANA LETTER HU --> KATAKANA LETTER BU + { 0x30D8, 0x30D9 }, // KATAKANA LETTER HE --> KATAKANA LETTER BE + { 0x30DB, 0x30DC }, // KATAKANA LETTER HO --> KATAKANA LETTER BO + { 0x30EF, 0x30F7 }, // KATAKANA LETTER WA --> KATAKANA LETTER VA + { 0x30F0, 0x30F8 }, // KATAKANA LETTER WI --> KATAKANA LETTER VI + { 0x30F1, 0x30F9 }, // KATAKANA LETTER WE --> KATAKANA LETTER VE + { 0x30F2, 0x30FA }, // KATAKANA LETTER WO --> KATAKANA LETTER VO + { 0x30FD, 0x30FE } // KATAKANA ITERATION MARK --> KATAKANA VOICED ITERATION MARK +}; + + +OUString +ignoreIterationMark_ja_JP::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset ) +{ + i18nutil::oneToOneMapping aTable(ignoreIterationMark_ja_JP_mappingTable, sizeof(ignoreIterationMark_ja_JP_mappingTable)); + + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 1 now. + rtl_uString * newStr = rtl_uString_alloc(nCount); + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + if (useOffset) { + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + std::iota(offset.begin(), offset.end(), startPos); + } + + + sal_Unicode previousChar = *src ++; + sal_Unicode currentChar; + + // Conversion + while (-- nCount > 0) { + currentChar = *src ++; + + switch ( currentChar ) { + case 0x30fd: // KATAKANA ITERATION MARK + case 0x309d: // HIRAGANA ITERATION MARK + case 0x3005: // IDEOGRAPHIC ITERATION MARK + currentChar = previousChar; + break; + case 0x30fe: // KATAKANA VOICED ITERATION MARK + case 0x309e: // HIRAGANA VOICED ITERATION MARK + currentChar = aTable[ previousChar ]; + break; + } + *dst ++ = previousChar; + previousChar = currentChar; + } + + if (nCount == 0) { + *dst ++ = previousChar; + } + + *dst = u'\0'; + + newStr->length = sal_Int32(dst - newStr->buffer); + if (useOffset) + offset.realloc(newStr->length); + return OUString(newStr, SAL_NO_ACQUIRE); // take ownership +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreKana.cxx b/i18npool/source/transliteration/ignoreKana.cxx new file mode 100644 index 000000000..7e94558f4 --- /dev/null +++ b/i18npool/source/transliteration/ignoreKana.cxx @@ -0,0 +1,65 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <rtl/ref.hxx> + +#include <transliteration_Ignore.hxx> +#include <transliteration_OneToOne.hxx> + +namespace com::sun::star::uno { class XComponentContext; } + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + +namespace i18npool { + +OUString +ignoreKana::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset ) +{ + rtl::Reference< hiraganaToKatakana > t1(new hiraganaToKatakana); + return t1->transliterateImpl(inStr, startPos, nCount, offset, useOffset); +} + +Sequence< OUString > SAL_CALL +ignoreKana::transliterateRange( const OUString& str1, const OUString& str2 ) +{ + rtl::Reference< hiraganaToKatakana > t1(new hiraganaToKatakana); + rtl::Reference< katakanaToHiragana > t2(new katakanaToHiragana); + + return transliteration_Ignore::transliterateRange(str1, str2, *t1, *t2); +} + +sal_Unicode SAL_CALL +ignoreKana::transliterateChar2Char( sal_Unicode inChar) +{ + rtl::Reference< hiraganaToKatakana > t1(new hiraganaToKatakana); + return t1->transliterateChar2Char(inChar); +} + +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * +com_sun_star_i18n_Transliteration_IGNORE_KANA_get_implementation( + css::uno::XComponentContext *, + css::uno::Sequence<css::uno::Any> const &) +{ + return cppu::acquire(new i18npool::ignoreKana()); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreKashida_CTL.cxx b/i18npool/source/transliteration/ignoreKashida_CTL.cxx new file mode 100644 index 000000000..4dfb54da2 --- /dev/null +++ b/i18npool/source/transliteration/ignoreKashida_CTL.cxx @@ -0,0 +1,34 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include <transliteration_Ignore.hxx> + +namespace i18npool { + +static sal_Unicode +ignoreKashida_CTL_translator (const sal_Unicode c) +{ + if( c == 0x0640 ) // Check if it's Kashida + return 0xffff; // Then skip this character + + return c; +} + +ignoreKashida_CTL::ignoreKashida_CTL() +{ + func = ignoreKashida_CTL_translator; + table = nullptr; + map = nullptr; + transliterationName = "ignoreKashida_CTL"; + implementationName = "com.sun.star.i18n.Transliteration.ignoreKashida_CTL"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreKiKuFollowedBySa_ja_JP.cxx b/i18npool/source/transliteration/ignoreKiKuFollowedBySa_ja_JP.cxx new file mode 100644 index 000000000..53a2f058d --- /dev/null +++ b/i18npool/source/transliteration/ignoreKiKuFollowedBySa_ja_JP.cxx @@ -0,0 +1,82 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_Ignore.hxx> + +#include <numeric> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + +namespace i18npool { + +OUString +ignoreKiKuFollowedBySa_ja_JP::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset ) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 1 now. + rtl_uString * newStr = rtl_uString_alloc(nCount); + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + if (useOffset) { + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + std::iota(offset.begin(), offset.end(), startPos); + } + + + sal_Unicode previousChar = *src ++; + sal_Unicode currentChar; + + // Translation + while (-- nCount > 0) { + currentChar = *src ++; + + // KU + Sa-So --> KI + Sa-So + if (previousChar == 0x30AF ) { // KATAKANA LETTER KU + if (0x30B5 <= currentChar && // KATAKANA LETTER SA + currentChar <= 0x30BE) { // KATAKANA LETTER ZO + *dst ++ = 0x30AD; // KATAKANA LETTER KI + *dst ++ = currentChar; + previousChar = *src ++; + nCount --; + continue; + } + } + + *dst ++ = previousChar; + previousChar = currentChar; + } + + if (nCount == 0) { + *dst ++ = previousChar; + } + + *dst = u'\0'; + + newStr->length = sal_Int32(dst - newStr->buffer); + if (useOffset) + offset.realloc(newStr->length); + return OUString(newStr, SAL_NO_ACQUIRE); // take ownership +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreMiddleDot_ja_JP.cxx b/i18npool/source/transliteration/ignoreMiddleDot_ja_JP.cxx new file mode 100644 index 000000000..b95a46910 --- /dev/null +++ b/i18npool/source/transliteration/ignoreMiddleDot_ja_JP.cxx @@ -0,0 +1,47 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_Ignore.hxx> + +namespace i18npool { + +static sal_Unicode +ignoreMiddleDot_ja_JP_translator (const sal_Unicode c) +{ + switch (c) { + case 0x30FB: // KATAKANA MIDDLE DOT + case 0xFF65: // HALFWIDTH KATAKANA MIDDLE DOT + // no break; + return 0xffff; // Skip this character + } + return c; +} + +ignoreMiddleDot_ja_JP::ignoreMiddleDot_ja_JP() +{ + func = ignoreMiddleDot_ja_JP_translator; + table = nullptr; + map = nullptr; + transliterationName = "ignoreMiddleDot_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.ignoreMiddleDot_ja_JP"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreMinusSign_ja_JP.cxx b/i18npool/source/transliteration/ignoreMinusSign_ja_JP.cxx new file mode 100644 index 000000000..831d03898 --- /dev/null +++ b/i18npool/source/transliteration/ignoreMinusSign_ja_JP.cxx @@ -0,0 +1,54 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_Ignore.hxx> + +namespace i18npool { + +static sal_Unicode +ignoreMinusSign_ja_JP_translator (const sal_Unicode c) +{ + switch (c) { + case 0x2212: // MINUS SIGN + case 0x002d: // HYPHEN-MINUS + case 0x2010: // HYPHEN + case 0x2011: // NON-BREAKING HYPHEN + case 0x2012: // FIGURE DASH + case 0x2013: // EN DASH + case 0x2014: // EM DASH + case 0x2015: // HORIZONTAL BAR + case 0xff0d: // FULLWIDTH HYPHEN-MINUS + case 0xff70: // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK + return 0x30fc; // KATAKANA-HIRAGANA PROLONGED SOUND MARK + } + return c; +} + +ignoreMinusSign_ja_JP::ignoreMinusSign_ja_JP() +{ + func = ignoreMinusSign_ja_JP_translator; + table = nullptr; + map = nullptr; + transliterationName = "ignoreMinusSign_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.ignoreMinusSign_ja_JP"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreProlongedSoundMark_ja_JP.cxx b/i18npool/source/transliteration/ignoreProlongedSoundMark_ja_JP.cxx new file mode 100644 index 000000000..91358dc60 --- /dev/null +++ b/i18npool/source/transliteration/ignoreProlongedSoundMark_ja_JP.cxx @@ -0,0 +1,344 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_Ignore.hxx> + +#include <numeric> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + +namespace i18npool { + +static const sal_Unicode table_normalwidth[] = { + // 0x0000, // 0x3040 + 0x3041, // 0x3041 HIRAGANA LETTER SMALL A + 0x3042, // 0x3042 HIRAGANA LETTER A + 0x3043, // 0x3043 HIRAGANA LETTER SMALL I + 0x3044, // 0x3044 HIRAGANA LETTER I + 0x3045, // 0x3045 HIRAGANA LETTER SMALL U + 0x3046, // 0x3046 HIRAGANA LETTER U + 0x3047, // 0x3047 HIRAGANA LETTER SMALL E + 0x3048, // 0x3048 HIRAGANA LETTER E + 0x3049, // 0x3049 HIRAGANA LETTER SMALL O + 0x304a, // 0x304a HIRAGANA LETTER O + 0x3042, // 0x304b HIRAGANA LETTER KA + 0x3042, // 0x304c HIRAGANA LETTER GA + 0x3044, // 0x304d HIRAGANA LETTER KI + 0x3044, // 0x304e HIRAGANA LETTER GI + 0x3046, // 0x304f HIRAGANA LETTER KU + 0x3046, // 0x3050 HIRAGANA LETTER GU + 0x3048, // 0x3051 HIRAGANA LETTER KE + 0x3048, // 0x3052 HIRAGANA LETTER GE + 0x304a, // 0x3053 HIRAGANA LETTER KO + 0x304a, // 0x3054 HIRAGANA LETTER GO + 0x3042, // 0x3055 HIRAGANA LETTER SA + 0x3042, // 0x3056 HIRAGANA LETTER ZA + 0x3044, // 0x3057 HIRAGANA LETTER SI + 0x3044, // 0x3058 HIRAGANA LETTER ZI + 0x3046, // 0x3059 HIRAGANA LETTER SU + 0x3046, // 0x305a HIRAGANA LETTER ZU + 0x3048, // 0x305b HIRAGANA LETTER SE + 0x3048, // 0x305c HIRAGANA LETTER ZE + 0x304a, // 0x305d HIRAGANA LETTER SO + 0x304a, // 0x305e HIRAGANA LETTER ZO + 0x3042, // 0x305f HIRAGANA LETTER TA + 0x3042, // 0x3060 HIRAGANA LETTER DA + 0x3044, // 0x3061 HIRAGANA LETTER TI + 0x3044, // 0x3062 HIRAGANA LETTER DI + 0x3045, // 0x3063 HIRAGANA LETTER SMALL TU + 0x3046, // 0x3064 HIRAGANA LETTER TU + 0x3046, // 0x3065 HIRAGANA LETTER DU + 0x3048, // 0x3066 HIRAGANA LETTER TE + 0x3048, // 0x3067 HIRAGANA LETTER DE + 0x304a, // 0x3068 HIRAGANA LETTER TO + 0x304a, // 0x3069 HIRAGANA LETTER DO + 0x3042, // 0x306a HIRAGANA LETTER NA + 0x3044, // 0x306b HIRAGANA LETTER NI + 0x3046, // 0x306c HIRAGANA LETTER NU + 0x3048, // 0x306d HIRAGANA LETTER NE + 0x304a, // 0x306e HIRAGANA LETTER NO + 0x3042, // 0x306f HIRAGANA LETTER HA + 0x3042, // 0x3070 HIRAGANA LETTER BA + 0x3042, // 0x3071 HIRAGANA LETTER PA + 0x3044, // 0x3072 HIRAGANA LETTER HI + 0x3044, // 0x3073 HIRAGANA LETTER BI + 0x3044, // 0x3074 HIRAGANA LETTER PI + 0x3046, // 0x3075 HIRAGANA LETTER HU + 0x3046, // 0x3076 HIRAGANA LETTER BU + 0x3046, // 0x3077 HIRAGANA LETTER PU + 0x3048, // 0x3078 HIRAGANA LETTER HE + 0x3048, // 0x3079 HIRAGANA LETTER BE + 0x3048, // 0x307a HIRAGANA LETTER PE + 0x304a, // 0x307b HIRAGANA LETTER HO + 0x304a, // 0x307c HIRAGANA LETTER BO + 0x304a, // 0x307d HIRAGANA LETTER PO + 0x3042, // 0x307e HIRAGANA LETTER MA + 0x3044, // 0x307f HIRAGANA LETTER MI + 0x3046, // 0x3080 HIRAGANA LETTER MU + 0x3048, // 0x3081 HIRAGANA LETTER ME + 0x304a, // 0x3082 HIRAGANA LETTER MO + 0x3041, // 0x3083 HIRAGANA LETTER SMALL YA + 0x3042, // 0x3084 HIRAGANA LETTER YA + 0x3045, // 0x3085 HIRAGANA LETTER SMALL YU + 0x3046, // 0x3086 HIRAGANA LETTER YU + 0x3049, // 0x3087 HIRAGANA LETTER SMALL YO + 0x304a, // 0x3088 HIRAGANA LETTER YO + 0x3042, // 0x3089 HIRAGANA LETTER RA + 0x3044, // 0x308a HIRAGANA LETTER RI + 0x3046, // 0x308b HIRAGANA LETTER RU + 0x3048, // 0x308c HIRAGANA LETTER RE + 0x304a, // 0x308d HIRAGANA LETTER RO + 0x3041, // 0x308e HIRAGANA LETTER SMALL WA + 0x3042, // 0x308f HIRAGANA LETTER WA + 0x3044, // 0x3090 HIRAGANA LETTER WI + 0x3048, // 0x3091 HIRAGANA LETTER WE + 0x304a, // 0x3092 HIRAGANA LETTER WO + 0x0000, // 0x3093 HIRAGANA LETTER N + 0x3046, // 0x3094 HIRAGANA LETTER VU + 0x0000, // 0x3095 + 0x0000, // 0x3096 + 0x0000, // 0x3097 + 0x0000, // 0x3098 + 0x0000, // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK + 0x0000, // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + 0x0000, // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK + 0x0000, // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + 0x0000, // 0x309d HIRAGANA ITERATION MARK + 0x0000, // 0x309e HIRAGANA VOICED ITERATION MARK + 0x0000, // 0x309f + 0x0000, // 0x30a0 + 0x30a1, // 0x30a1 KATAKANA LETTER SMALL A + 0x30a2, // 0x30a2 KATAKANA LETTER A + 0x30a3, // 0x30a3 KATAKANA LETTER SMALL I + 0x30a4, // 0x30a4 KATAKANA LETTER I + 0x30a5, // 0x30a5 KATAKANA LETTER SMALL U + 0x30a6, // 0x30a6 KATAKANA LETTER U + 0x30a7, // 0x30a7 KATAKANA LETTER SMALL E + 0x30a8, // 0x30a8 KATAKANA LETTER E + 0x30a9, // 0x30a9 KATAKANA LETTER SMALL O + 0x30aa, // 0x30aa KATAKANA LETTER O + 0x30a2, // 0x30ab KATAKANA LETTER KA + 0x30a2, // 0x30ac KATAKANA LETTER GA + 0x30a4, // 0x30ad KATAKANA LETTER KI + 0x30a4, // 0x30ae KATAKANA LETTER GI + 0x30a6, // 0x30af KATAKANA LETTER KU + 0x30a6, // 0x30b0 KATAKANA LETTER GU + 0x30a8, // 0x30b1 KATAKANA LETTER KE + 0x30a8, // 0x30b2 KATAKANA LETTER GE + 0x30aa, // 0x30b3 KATAKANA LETTER KO + 0x30aa, // 0x30b4 KATAKANA LETTER GO + 0x30a2, // 0x30b5 KATAKANA LETTER SA + 0x30a2, // 0x30b6 KATAKANA LETTER ZA + 0x30a4, // 0x30b7 KATAKANA LETTER SI + 0x30a4, // 0x30b8 KATAKANA LETTER ZI + 0x30a6, // 0x30b9 KATAKANA LETTER SU + 0x30a6, // 0x30ba KATAKANA LETTER ZU + 0x30a8, // 0x30bb KATAKANA LETTER SE + 0x30a8, // 0x30bc KATAKANA LETTER ZE + 0x30aa, // 0x30bd KATAKANA LETTER SO + 0x30aa, // 0x30be KATAKANA LETTER ZO + 0x30a2, // 0x30bf KATAKANA LETTER TA + 0x30a2, // 0x30c0 KATAKANA LETTER DA + 0x30a4, // 0x30c1 KATAKANA LETTER TI + 0x30a4, // 0x30c2 KATAKANA LETTER DI + 0x30a5, // 0x30c3 KATAKANA LETTER SMALL TU + 0x30a6, // 0x30c4 KATAKANA LETTER TU + 0x30a6, // 0x30c5 KATAKANA LETTER DU + 0x30a8, // 0x30c6 KATAKANA LETTER TE + 0x30a8, // 0x30c7 KATAKANA LETTER DE + 0x30aa, // 0x30c8 KATAKANA LETTER TO + 0x30aa, // 0x30c9 KATAKANA LETTER DO + 0x30a2, // 0x30ca KATAKANA LETTER NA + 0x30a4, // 0x30cb KATAKANA LETTER NI + 0x30a6, // 0x30cc KATAKANA LETTER NU + 0x30a8, // 0x30cd KATAKANA LETTER NE + 0x30aa, // 0x30ce KATAKANA LETTER NO + 0x30a2, // 0x30cf KATAKANA LETTER HA + 0x30a2, // 0x30d0 KATAKANA LETTER BA + 0x30a2, // 0x30d1 KATAKANA LETTER PA + 0x30a4, // 0x30d2 KATAKANA LETTER HI + 0x30a4, // 0x30d3 KATAKANA LETTER BI + 0x30a4, // 0x30d4 KATAKANA LETTER PI + 0x30a6, // 0x30d5 KATAKANA LETTER HU + 0x30a6, // 0x30d6 KATAKANA LETTER BU + 0x30a6, // 0x30d7 KATAKANA LETTER PU + 0x30a8, // 0x30d8 KATAKANA LETTER HE + 0x30a8, // 0x30d9 KATAKANA LETTER BE + 0x30a8, // 0x30da KATAKANA LETTER PE + 0x30aa, // 0x30db KATAKANA LETTER HO + 0x30aa, // 0x30dc KATAKANA LETTER BO + 0x30aa, // 0x30dd KATAKANA LETTER PO + 0x30a2, // 0x30de KATAKANA LETTER MA + 0x30a4, // 0x30df KATAKANA LETTER MI + 0x30a6, // 0x30e0 KATAKANA LETTER MU + 0x30a8, // 0x30e1 KATAKANA LETTER ME + 0x30aa, // 0x30e2 KATAKANA LETTER MO + 0x30a1, // 0x30e3 KATAKANA LETTER SMALL YA + 0x30a2, // 0x30e4 KATAKANA LETTER YA + 0x30a5, // 0x30e5 KATAKANA LETTER SMALL YU + 0x30a6, // 0x30e6 KATAKANA LETTER YU + 0x30a9, // 0x30e7 KATAKANA LETTER SMALL YO + 0x30aa, // 0x30e8 KATAKANA LETTER YO + 0x30a2, // 0x30e9 KATAKANA LETTER RA + 0x30a4, // 0x30ea KATAKANA LETTER RI + 0x30a6, // 0x30eb KATAKANA LETTER RU + 0x30a8, // 0x30ec KATAKANA LETTER RE + 0x30aa, // 0x30ed KATAKANA LETTER RO + 0x30a1, // 0x30ee KATAKANA LETTER SMALL WA + 0x30a2, // 0x30ef KATAKANA LETTER WA + 0x30a4, // 0x30f0 KATAKANA LETTER WI + 0x30a8, // 0x30f1 KATAKANA LETTER WE + 0x30aa, // 0x30f2 KATAKANA LETTER WO + 0x0000, // 0x30f3 KATAKANA LETTER N + 0x30a6, // 0x30f4 KATAKANA LETTER VU + 0x30a1, // 0x30f5 KATAKANA LETTER SMALL KA + 0x30a7, // 0x30f6 KATAKANA LETTER SMALL KE + 0x30a2, // 0x30f7 KATAKANA LETTER VA + 0x30a4, // 0x30f8 KATAKANA LETTER VI + 0x30a8, // 0x30f9 KATAKANA LETTER VE + 0x30aa // 0x30fa KATAKANA LETTER VO + // 0x0000, // 0x30fb KATAKANA MIDDLE DOT + // 0x0000, // 0x30fc KATAKANA-HIRAGANA PROLONGED SOUND MARK + // 0x0000, // 0x30fd KATAKANA ITERATION MARK + // 0x0000, // 0x30fe KATAKANA VOICED ITERATION MARK + // 0x0000 // 0x30ff +}; + +static const sal_Unicode table_halfwidth[] = { + // 0x0000, // 0xff61 HALFWIDTH IDEOGRAPHIC FULL STOP + // 0x0000, // 0xff62 HALFWIDTH LEFT CORNER BRACKET + // 0x0000, // 0xff63 HALFWIDTH RIGHT CORNER BRACKET + // 0x0000, // 0xff64 HALFWIDTH IDEOGRAPHIC COMMA + // 0x0000, // 0xff65 HALFWIDTH KATAKANA MIDDLE DOT + 0xff75, // 0xff66 HALFWIDTH KATAKANA LETTER WO + 0xff67, // 0xff67 HALFWIDTH KATAKANA LETTER SMALL A + 0xff68, // 0xff68 HALFWIDTH KATAKANA LETTER SMALL I + 0xff69, // 0xff69 HALFWIDTH KATAKANA LETTER SMALL U + 0xff6a, // 0xff6a HALFWIDTH KATAKANA LETTER SMALL E + 0xff6b, // 0xff6b HALFWIDTH KATAKANA LETTER SMALL O + 0xff67, // 0xff6c HALFWIDTH KATAKANA LETTER SMALL YA + 0xff69, // 0xff6d HALFWIDTH KATAKANA LETTER SMALL YU + 0xff6b, // 0xff6e HALFWIDTH KATAKANA LETTER SMALL YO + 0xff69, // 0xff6f HALFWIDTH KATAKANA LETTER SMALL TU + 0x0000, // 0xff70 HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK + 0xff71, // 0xff71 HALFWIDTH KATAKANA LETTER A + 0xff72, // 0xff72 HALFWIDTH KATAKANA LETTER I + 0xff73, // 0xff73 HALFWIDTH KATAKANA LETTER U + 0xff74, // 0xff74 HALFWIDTH KATAKANA LETTER E + 0xff75, // 0xff75 HALFWIDTH KATAKANA LETTER O + 0xff71, // 0xff76 HALFWIDTH KATAKANA LETTER KA + 0xff72, // 0xff77 HALFWIDTH KATAKANA LETTER KI + 0xff73, // 0xff78 HALFWIDTH KATAKANA LETTER KU + 0xff74, // 0xff79 HALFWIDTH KATAKANA LETTER KE + 0xff75, // 0xff7a HALFWIDTH KATAKANA LETTER KO + 0xff71, // 0xff7b HALFWIDTH KATAKANA LETTER SA + 0xff72, // 0xff7c HALFWIDTH KATAKANA LETTER SI + 0xff73, // 0xff7d HALFWIDTH KATAKANA LETTER SU + 0xff74, // 0xff7e HALFWIDTH KATAKANA LETTER SE + 0xff75, // 0xff7f HALFWIDTH KATAKANA LETTER SO + 0xff71, // 0xff80 HALFWIDTH KATAKANA LETTER TA + 0xff72, // 0xff81 HALFWIDTH KATAKANA LETTER TI + 0xff73, // 0xff82 HALFWIDTH KATAKANA LETTER TU + 0xff74, // 0xff83 HALFWIDTH KATAKANA LETTER TE + 0xff75, // 0xff84 HALFWIDTH KATAKANA LETTER TO + 0xff71, // 0xff85 HALFWIDTH KATAKANA LETTER NA + 0xff72, // 0xff86 HALFWIDTH KATAKANA LETTER NI + 0xff73, // 0xff87 HALFWIDTH KATAKANA LETTER NU + 0xff74, // 0xff88 HALFWIDTH KATAKANA LETTER NE + 0xff75, // 0xff89 HALFWIDTH KATAKANA LETTER NO + 0xff71, // 0xff8a HALFWIDTH KATAKANA LETTER HA + 0xff72, // 0xff8b HALFWIDTH KATAKANA LETTER HI + 0xff73, // 0xff8c HALFWIDTH KATAKANA LETTER HU + 0xff74, // 0xff8d HALFWIDTH KATAKANA LETTER HE + 0xff75, // 0xff8e HALFWIDTH KATAKANA LETTER HO + 0xff71, // 0xff8f HALFWIDTH KATAKANA LETTER MA + 0xff72, // 0xff90 HALFWIDTH KATAKANA LETTER MI + 0xff73, // 0xff91 HALFWIDTH KATAKANA LETTER MU + 0xff74, // 0xff92 HALFWIDTH KATAKANA LETTER ME + 0xff75, // 0xff93 HALFWIDTH KATAKANA LETTER MO + 0xff71, // 0xff94 HALFWIDTH KATAKANA LETTER YA + 0xff73, // 0xff95 HALFWIDTH KATAKANA LETTER YU + 0xff75, // 0xff96 HALFWIDTH KATAKANA LETTER YO + 0xff71, // 0xff97 HALFWIDTH KATAKANA LETTER RA + 0xff72, // 0xff98 HALFWIDTH KATAKANA LETTER RI + 0xff73, // 0xff99 HALFWIDTH KATAKANA LETTER RU + 0xff74, // 0xff9a HALFWIDTH KATAKANA LETTER RE + 0xff75, // 0xff9b HALFWIDTH KATAKANA LETTER RO + 0xff71 // 0xff9c HALFWIDTH KATAKANA LETTER WA + // 0x0000, // 0xff9d HALFWIDTH KATAKANA LETTER N + // 0x0000, // 0xff9e HALFWIDTH KATAKANA VOICED SOUND MARK + // 0x0000 // 0xff9f HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +}; + + +OUString +ignoreProlongedSoundMark_ja_JP::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset ) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 1 now. + rtl_uString * newStr = rtl_uString_alloc(nCount); + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + if (useOffset) { + // Allocate nCount length to offset argument. + offset.realloc( nCount ); + std::iota(offset.begin(), offset.end(), startPos); + } + + + sal_Unicode previousChar = *src ++; + sal_Unicode currentChar; + + // Conversion + while (-- nCount > 0) { + currentChar = *src ++; + + if (currentChar == 0x30fc || // KATAKANA-HIRAGANA PROLONGED SOUND MARK + currentChar == 0xff70) { // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK + + if (0x3041 <= previousChar && previousChar <= 0x30fa) { + currentChar = table_normalwidth[ previousChar - 0x3041 ]; + } + else if (0xff66 <= previousChar && previousChar <= 0xff9c) { + currentChar = table_halfwidth[ previousChar - 0xff66 ]; + } + } + + *dst ++ = previousChar; + previousChar = currentChar; + } + + if (nCount == 0) { + *dst ++ = previousChar; + } + + *dst = u'\0'; + + newStr->length = sal_Int32(dst - newStr->buffer); + if (useOffset) + offset.realloc(newStr->length); + return OUString(newStr, SAL_NO_ACQUIRE); // take ownership + +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreSeZe_ja_JP.cxx b/i18npool/source/transliteration/ignoreSeZe_ja_JP.cxx new file mode 100644 index 000000000..b24a635df --- /dev/null +++ b/i18npool/source/transliteration/ignoreSeZe_ja_JP.cxx @@ -0,0 +1,51 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + +namespace i18npool { + +static const transliteration_Ignore::Mapping SeZe[] = { + // SI + E --> SE + { 0x30B7, 0x30A7, 0x30BB, true }, + // SI + E --> SE + { 0x3057, 0x3047, 0x305B, true }, + // ZI + E --> ZE + { 0x30B8, 0x30A7, 0x30BC, true }, + // ZI + E --> ZE + { 0x3058, 0x3047, 0x305C, true }, + + { 0, 0, 0, true } +}; + +ignoreSeZe_ja_JP::ignoreSeZe_ja_JP() +{ + func = nullptr; + table = nullptr; + map = SeZe; + transliterationName = "ignoreSeZe_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.ignoreSeZe_ja_JP"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreSeparator_ja_JP.cxx b/i18npool/source/transliteration/ignoreSeparator_ja_JP.cxx new file mode 100644 index 000000000..bf98d4fed --- /dev/null +++ b/i18npool/source/transliteration/ignoreSeparator_ja_JP.cxx @@ -0,0 +1,113 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_Ignore.hxx> +#include <i18nutil/oneToOneMapping.hxx> + +namespace i18npool { + +i18nutil::OneToOneMappingTable_t const ignoreSeparatorTable[] = { + { 0x0021, 0xFFFF }, // EXCLAMATION MARK + { 0x0023, 0xFFFF }, // NUMBER SIGN + { 0x0024, 0xFFFF }, // DOLLAR SIGN + { 0x0025, 0xFFFF }, // PERCENT SIGN + { 0x0026, 0xFFFF }, // AMPERSAND + { 0x002A, 0xFFFF }, // ASTERISK + { 0x002B, 0xFFFF }, // PLUS SIGN + { 0x002C, 0xFFFF }, // COMMA + { 0x002D, 0xFFFF }, // HYPHEN-MINUS + { 0x002E, 0xFFFF }, // FULL STOP + { 0x002F, 0xFFFF }, // SOLIDUS + { 0x003A, 0xFFFF }, // COLON + { 0x003B, 0xFFFF }, // SEMICOLON + { 0x003C, 0xFFFF }, // LESS-THAN SIGN + { 0x003D, 0xFFFF }, // EQUALS SIGN + { 0x003E, 0xFFFF }, // GREATER-THAN SIGN + { 0x005C, 0xFFFF }, // REVERSE SOLIDUS + { 0x005F, 0xFFFF }, // LOW LINE + { 0x007B, 0xFFFF }, // LEFT CURLY BRACKET + { 0x007C, 0xFFFF }, // VERTICAL LINE + { 0x007D, 0xFFFF }, // RIGHT CURLY BRACKET + { 0x007E, 0xFFFF }, // TILDE + { 0x00A5, 0xFFFF }, // YEN SIGN + { 0x3001, 0xFFFF }, // IDEOGRAPHIC COMMA + { 0x3002, 0xFFFF }, // IDEOGRAPHIC FULL STOP + { 0x3008, 0xFFFF }, // LEFT ANGLE BRACKET + { 0x3009, 0xFFFF }, // RIGHT ANGLE BRACKET + { 0x300A, 0xFFFF }, // LEFT DOUBLE ANGLE BRACKET + { 0x300B, 0xFFFF }, // RIGHT DOUBLE ANGLE BRACKET + { 0x300C, 0xFFFF }, // LEFT CORNER BRACKET + { 0x300D, 0xFFFF }, // RIGHT CORNER BRACKET + { 0x300E, 0xFFFF }, // LEFT WHITE CORNER BRACKET + { 0x300F, 0xFFFF }, // RIGHT WHITE CORNER BRACKET + { 0x3010, 0xFFFF }, // LEFT BLACK LENTICULAR BRACKET + { 0x3011, 0xFFFF }, // RIGHT BLACK LENTICULAR BRACKET + { 0x3014, 0xFFFF }, // LEFT TORTOISE SHELL BRACKET + { 0x3015, 0xFFFF }, // RIGHT TORTOISE SHELL BRACKET + { 0x3016, 0xFFFF }, // LEFT WHITE LENTICULAR BLACKET + { 0x3017, 0xFFFF }, // RIGHT WHITE LENTICULAR BLACKET + { 0x3018, 0xFFFF }, // LEFT WHITETORTOISE SHELL BLACKET + { 0x3019, 0xFFFF }, // RIGHT WHITETORTOISE SHELL BLACKET + { 0x301A, 0xFFFF }, // LEFT WHITE SQUARE BRACKET + { 0x301B, 0xFFFF }, // RIGHT WHITE SQUARE BRACKET + { 0x301C, 0xFFFF }, // WAVE DASH + { 0x301D, 0xFFFF }, // REVERSED DOUBLE PRIME + { 0x301E, 0xFFFF }, // DOUBLE PRIME QUOTATION MARK + { 0x301F, 0xFFFF }, // LOW DOUBLE PRIME QUOTATION MARK + { 0x3030, 0xFFFF }, // WAVY DASH + { 0x30FB, 0xFFFF }, // KATAKANA MIDDLE DOT + { 0x30FC, 0xFFFF }, // KATAKANA-HIRAHANA PROLONGED SOUND MARK + { 0xFF01, 0xFFFF }, // FULLWIDTH EXCLAMATION MARK + { 0xFF03, 0xFFFF }, // FULLWIDTH NUMBER SIGN + { 0xFF04, 0xFFFF }, // FULLWIDTH DOLLAR SIGN + { 0xFF05, 0xFFFF }, // FULLWIDTH PERCENT SIGN + { 0xFF06, 0xFFFF }, // FULLWIDTH AMPERSAND + { 0xFF0A, 0xFFFF }, // FULLWIDTH ASTERISK + { 0xFF0B, 0xFFFF }, // FULLWIDTH PLUS SIGN + { 0xFF0C, 0xFFFF }, // FULLWIDTH COMMA + { 0xFF0D, 0xFFFF }, // FULLWIDTH HYPHEN-MINUS + { 0xFF0E, 0xFFFF }, // FULLWIDTH FULL STOP + { 0xFF0F, 0xFFFF }, // FULLWIDTH SOLIDUS + { 0xFF1A, 0xFFFF }, // FULLWIDTH COLON + { 0xFF1B, 0xFFFF }, // FULLWIDTH SEMICOLON + { 0xFF1C, 0xFFFF }, // FULLWIDTH LESS-THAN SIGN + { 0xFF1D, 0xFFFF }, // FULLWIDTH EQUALS SIGN + { 0xFF1E, 0xFFFF }, // FULLWIDTH GREATER-THAN SIGN + { 0xFF3C, 0xFFFF }, // FULLWIDTH REVERSE SOLIDUS + { 0xFF3F, 0xFFFF }, // FULLWIDTH LOW LINE + { 0xFF5B, 0xFFFF }, // FULLWIDTH LEFT CURLY BRACKET + { 0xFF5C, 0xFFFF }, // FULLWIDTH VERTICAL LINE + { 0xFF5D, 0xFFFF }, // FULLWIDTH RIGHT CURLY BRACKET + { 0xFF5E, 0xFFFF }, // FULLWIDTH TILDE + { 0xFFE5, 0xFFFF }, // FULLWIDTH YEN SIGN +}; + +ignoreSeparator_ja_JP::ignoreSeparator_ja_JP() +{ + static i18nutil::oneToOneMapping _table(ignoreSeparatorTable, sizeof(ignoreSeparatorTable)); + func = nullptr; + table = &_table; + map = nullptr; + transliterationName = "ignoreSeparator_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.ignoreSeparator_ja_JP"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreSize_ja_JP.cxx b/i18npool/source/transliteration/ignoreSize_ja_JP.cxx new file mode 100644 index 000000000..8ed2be8b6 --- /dev/null +++ b/i18npool/source/transliteration/ignoreSize_ja_JP.cxx @@ -0,0 +1,55 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_Ignore.hxx> +#include <transliteration_OneToOne.hxx> +#include <rtl/ref.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + +namespace i18npool { + +OUString +ignoreSize_ja_JP::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset ) +{ + rtl::Reference< smallToLarge_ja_JP > t1(new smallToLarge_ja_JP); + return t1->transliterateImpl(inStr, startPos, nCount, offset, useOffset); +} + + +Sequence< OUString > SAL_CALL +ignoreSize_ja_JP::transliterateRange( const OUString& str1, const OUString& str2 ) +{ + rtl::Reference< smallToLarge_ja_JP > t1(new smallToLarge_ja_JP); + rtl::Reference< largeToSmall_ja_JP > t2(new largeToSmall_ja_JP); + + return transliteration_Ignore::transliterateRange(str1, str2, *t1, *t2); +} + +sal_Unicode SAL_CALL +ignoreSize_ja_JP::transliterateChar2Char( sal_Unicode inChar) +{ + rtl::Reference< smallToLarge_ja_JP > t1(new smallToLarge_ja_JP); + return t1->transliterateChar2Char(inChar); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreSpace_ja_JP.cxx b/i18npool/source/transliteration/ignoreSpace_ja_JP.cxx new file mode 100644 index 000000000..df367cd30 --- /dev/null +++ b/i18npool/source/transliteration/ignoreSpace_ja_JP.cxx @@ -0,0 +1,55 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_Ignore.hxx> +#include <i18nutil/oneToOneMapping.hxx> + +namespace i18npool { + +i18nutil::OneToOneMappingTable_t const ignoreSpace_ja_JP_mappingTable[] = { + { 0x0020, 0xffff }, // SPACE + { 0x00A0, 0xffff }, // NO-BREAK SPACE + { 0x2002, 0xffff }, // EN SPACE + { 0x2003, 0xffff }, // EM SPACE + { 0x2004, 0xffff }, // THREE-PER-EM SPACE + { 0x2005, 0xffff }, // FOUR-PER-EM SPACE + { 0x2006, 0xffff }, // SIX-PER-EM SPACE + { 0x2007, 0xffff }, // FIGURE SPACE + { 0x2008, 0xffff }, // PUNCTUATION SPACE + { 0x2009, 0xffff }, // THIN SPACE + { 0x200A, 0xffff }, // HAIR SPACE + { 0x200B, 0xffff }, // ZERO WIDTH SPACE + { 0x202F, 0xffff }, // NARROW NO-BREAK SPACE + { 0x3000, 0xffff }, // IDEOGRAPHIC SPACE + { 0x303F, 0xffff } // IDEOGRAPHIC HALF FILL SPACE +}; + + +ignoreSpace_ja_JP::ignoreSpace_ja_JP() +{ + func = nullptr; + table = new i18nutil::oneToOneMapping(ignoreSpace_ja_JP_mappingTable, sizeof(ignoreSpace_ja_JP_mappingTable)); + map = nullptr; + transliterationName = "ignoreSpace_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.ignoreSpace_ja_JP"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreTiJi_ja_JP.cxx b/i18npool/source/transliteration/ignoreTiJi_ja_JP.cxx new file mode 100644 index 000000000..42c1aee7c --- /dev/null +++ b/i18npool/source/transliteration/ignoreTiJi_ja_JP.cxx @@ -0,0 +1,55 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_Ignore.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + +namespace i18npool { + +static const transliteration_Ignore::Mapping TiJi[] = { + // TU + I --> TI + { 0x30C4, 0x30A3, 0x30C1, true }, + // TE + I --> TI + { 0x30C6, 0x30A3, 0x30C1, true }, + // TU + I --> TI + { 0x3064, 0x3043, 0x3061, true }, + // TE + I --> TI + { 0x3066, 0x3043, 0x3061, true }, + // DE + I --> ZI + { 0x30C7, 0x30A3, 0x30B8, true }, + // DE + I --> ZI + { 0x3067, 0x3043, 0x3058, true }, + + { 0, 0, 0, true } +}; + +ignoreTiJi_ja_JP::ignoreTiJi_ja_JP() +{ + func = nullptr; + table = nullptr; + map = TiJi; + transliterationName = "ignoreTiJi_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.ignoreTiJi_ja_JP"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreTraditionalKana_ja_JP.cxx b/i18npool/source/transliteration/ignoreTraditionalKana_ja_JP.cxx new file mode 100644 index 000000000..78afd9401 --- /dev/null +++ b/i18npool/source/transliteration/ignoreTraditionalKana_ja_JP.cxx @@ -0,0 +1,55 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_Ignore.hxx> + +namespace i18npool { + +static sal_Unicode +ignoreTraditionalKana_ja_JP_translator (const sal_Unicode c) +{ + + switch (c) { + case 0x3090: // HIRAGANA LETTER WI + return 0x3044; // HIRAGANA LETTER I + + case 0x3091: // HIRAGANA LETTER WE + return 0x3048; // HIRAGANA LETTER E + + case 0x30F0: // KATAKANA LETTER WI + return 0x30A4; // KATAKANA LETTER I + + case 0x30F1: // KATAKANA LETTER WE + return 0x30A8; // KATAKANA LETTER E + } + return c; +} + +ignoreTraditionalKana_ja_JP::ignoreTraditionalKana_ja_JP() +{ + func = ignoreTraditionalKana_ja_JP_translator; + table = nullptr; + map = nullptr; + transliterationName = "ignoreTraditionalKana_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.ignoreTraditionalKana_ja_JP"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreTraditionalKanji_ja_JP.cxx b/i18npool/source/transliteration/ignoreTraditionalKanji_ja_JP.cxx new file mode 100644 index 000000000..dec55f347 --- /dev/null +++ b/i18npool/source/transliteration/ignoreTraditionalKanji_ja_JP.cxx @@ -0,0 +1,735 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_Ignore.hxx> +#include <i18nutil/oneToOneMapping.hxx> + +namespace i18npool { + +// traditional Kanji characters --> modern Kanji characters +i18nutil::OneToOneMappingTable_t const traditionalKanji2updateKanji[] = { + { 0x4E17, 0x4E16 }, + { 0x4E55, 0x864E }, + { 0x4E58, 0x4E57 }, + { 0x4E82, 0x4E71 }, + { 0x4E8A, 0x4E8B }, + { 0x4E98, 0x4E99 }, + { 0x4E9F, 0x6975 }, + { 0x4EB0, 0x4EAC }, + { 0x4EC2, 0x50CD }, + { 0x4ECE, 0x5F93 }, + { 0x4EED, 0x4EDE }, + { 0x4F1C, 0x5005 }, + { 0x4F5B, 0x4ECF }, + { 0x4F86, 0x6765 }, + { 0x4FAB, 0x4F5E }, + { 0x5047, 0x4EEE }, + { 0x509A, 0x52B9 }, + { 0x50B3, 0x4F1D }, + { 0x50DE, 0x507D }, + { 0x50E3, 0x50ED }, + { 0x50F9, 0x4FA1 }, + { 0x5109, 0x5039 }, + { 0x5118, 0x4FAD }, + { 0x5152, 0x5150 }, + { 0x5154, 0x514E }, + { 0x5169, 0x4E21 }, + { 0x518C, 0x518A }, + { 0x5190, 0x5192 }, + { 0x51A8, 0x5BCC }, + { 0x51A9, 0x5199 }, + { 0x51B0, 0x6C37 }, + { 0x51B1, 0x51B4 }, + { 0x51B2, 0x6C96 }, + { 0x51B3, 0x6C7A }, + { 0x51B5, 0x6CC1 }, + { 0x51C9, 0x6DBC }, + { 0x51D6, 0x6E96 }, + { 0x51DC, 0x51DB }, + { 0x51FE, 0x51FD }, + { 0x5204, 0x5203 }, + { 0x5227, 0x52AB }, + { 0x5269, 0x5270 }, + { 0x5271, 0x5263 }, + { 0x5283, 0x753B }, + { 0x528D, 0x5263 }, + { 0x5291, 0x5264 }, + { 0x5292, 0x5263 }, + { 0x5294, 0x5263 }, + { 0x52B5, 0x5238 }, + { 0x52DE, 0x52B4 }, + { 0x52F3, 0x52F2 }, + { 0x52F5, 0x52B1 }, + { 0x52F8, 0x52E7 }, + { 0x5340, 0x533A }, + { 0x5346, 0x5352 }, + { 0x5377, 0x5DFB }, + { 0x537B, 0x5374 }, + { 0x53B0, 0x5EE0 }, + { 0x53C3, 0x53C2 }, + { 0x548F, 0x8A60 }, + { 0x5492, 0x546A }, + { 0x54AF, 0x5580 }, + { 0x54E5, 0x6B4C }, + { 0x5557, 0x5556 }, + { 0x55AE, 0x5358 }, + { 0x5650, 0x5668 }, + { 0x5680, 0x5BE7 }, + { 0x568F, 0x5694 }, + { 0x56B4, 0x53B3 }, + { 0x56D1, 0x5631 }, + { 0x56D8, 0x56DE }, + { 0x5708, 0x570F }, + { 0x570B, 0x56FD }, + { 0x570D, 0x56F2 }, + { 0x5713, 0x5186 }, + { 0x5716, 0x56F3 }, + { 0x5718, 0x56E3 }, + { 0x57C0, 0x5782 }, + { 0x57D3, 0x57D2 }, + { 0x57FC, 0x5D0E }, + { 0x582F, 0x5C2D }, + { 0x5872, 0x5834 }, + { 0x58AE, 0x5815 }, + { 0x58D3, 0x5727 }, + { 0x58D8, 0x5841 }, + { 0x58DE, 0x58CA }, + { 0x58E4, 0x58CC }, + { 0x58EF, 0x58EE }, + { 0x58F1, 0x4E00 }, + { 0x58F9, 0x4E00 }, + { 0x58FA, 0x58F7 }, + { 0x58FB, 0x5A7F }, + { 0x58FD, 0x5BFF }, + { 0x5918, 0x536F }, + { 0x591B, 0x591A }, + { 0x5932, 0x672C }, + { 0x5967, 0x5965 }, + { 0x5969, 0x5333 }, + { 0x596C, 0x5968 }, + { 0x59D9, 0x598A }, + { 0x5A6C, 0x6DEB }, + { 0x5AD0, 0x5B32 }, + { 0x5AFB, 0x5AFA }, + { 0x5B43, 0x5B22 }, + { 0x5B78, 0x5B66 }, + // { 0x5B9D, 0x73CD }, + { 0x5BC3, 0x51A4 }, + { 0x5BC7, 0x51A6 }, + { 0x5BC9, 0x9DB4 }, + { 0x5BE2, 0x5BDD }, + { 0x5BE6, 0x5B9F }, + { 0x5BEB, 0x5199 }, + //{ 0x5BF3, 0x73CD }, + { 0x5BF3, 0x5B9D }, + //{ 0x5BF6, 0x73CD }, + { 0x5BF6, 0x5B9D }, + { 0x5C05, 0x524B }, + { 0x5C07, 0x5C06 }, + { 0x5C08, 0x5C02 }, + { 0x5C0D, 0x5BFE }, + { 0x5C13, 0x723E }, + { 0x5C46, 0x5C4A }, + { 0x5C6C, 0x5C5E }, + { 0x5CE9, 0x5CE8 }, + { 0x5CEF, 0x5CF0 }, + { 0x5CFD, 0x5CE1 }, + { 0x5D15, 0x5D16 }, + { 0x5D17, 0x5CA1 }, + { 0x5D18, 0x5D19 }, + { 0x5D5C, 0x5D0E }, + { 0x5D73, 0x5D6F }, + { 0x5D8B, 0x5CF6 }, + { 0x5D8C, 0x5CF6 }, + { 0x5DBD, 0x5CB3 }, + { 0x5DD6, 0x5DCC }, + { 0x5DDB, 0x5DDD }, + { 0x5DF5, 0x536E }, + { 0x5E0B, 0x7D19 }, + { 0x5E36, 0x5E2F }, + { 0x5E64, 0x5E63 }, + { 0x5EC1, 0x53A0 }, + { 0x5EC8, 0x53A6 }, + { 0x5ECF, 0x53A9 }, + { 0x5ED0, 0x53A9 }, + { 0x5EDA, 0x53A8 }, + { 0x5EDD, 0x53AE }, + { 0x5EE2, 0x5EC3 }, + { 0x5EE3, 0x5E83 }, + { 0x5EF0, 0x5E81 }, + { 0x5EF3, 0x5E81 }, + { 0x5F03, 0x68C4 }, + { 0x5F09, 0x5958 }, + { 0x5F0C, 0x4E00 }, + { 0x5F0D, 0x4E8C }, + { 0x5F10, 0x4E8C }, + { 0x5F2F, 0x6E7E }, + { 0x5F48, 0x5F3E }, + { 0x5F4C, 0x5F25 }, + { 0x5F4E, 0x6E7E }, + { 0x5F5C, 0x5F5D }, + { 0x5F83, 0x5F80 }, + { 0x5F91, 0x5F84 }, + { 0x5F9E, 0x5F93 }, + { 0x5FA0, 0x6765 }, + { 0x5FF0, 0x60B4 }, + { 0x5FFB, 0x6B23 }, + { 0x6046, 0x6052 }, + { 0x604A, 0x5354 }, + { 0x6060, 0x602A }, + { 0x60B3, 0x5FB3 }, + { 0x60E0, 0x6075 }, + { 0x60E1, 0x60AA }, + { 0x60F1, 0x60A9 }, + { 0x613C, 0x614E }, + { 0x613D, 0x535A }, + { 0x6158, 0x60E8 }, + { 0x615A, 0x6159 }, + { 0x6187, 0x61A9 }, + { 0x61C9, 0x5FDC }, + { 0x61F4, 0x61FA }, + { 0x61F7, 0x61D0 }, + { 0x6200, 0x604B }, + { 0x621D, 0x8CA1 }, + { 0x621E, 0x621B }, + { 0x6230, 0x6226 }, + { 0x6232, 0x622F }, + { 0x6268, 0x6260 }, + { 0x62AC, 0x64E1 }, + { 0x62C2, 0x6255 }, + { 0x62D4, 0x629C }, + { 0x62DC, 0x62DD }, + { 0x633E, 0x631F }, + { 0x63D2, 0x633F }, + { 0x6416, 0x63FA }, + { 0x641C, 0x635C }, + { 0x64C7, 0x629E }, + { 0x64D4, 0x62C5 }, + { 0x64DA, 0x62E0 }, + { 0x64E7, 0x6319 }, + { 0x64F4, 0x62E1 }, + { 0x651C, 0x643A }, + { 0x651D, 0x6442 }, + { 0x652A, 0x64B9 }, + { 0x6536, 0x53CE }, + { 0x6548, 0x52B9 }, + { 0x654D, 0x53D9 }, + { 0x6555, 0x52C5 }, + { 0x6558, 0x53D9 }, + { 0x6578, 0x6570 }, + { 0x6588, 0x5B66 }, + { 0x6589, 0x658E }, + { 0x65B7, 0x65AD }, + { 0x65D9, 0x65DB }, + { 0x663F, 0x66E0 }, + { 0x6644, 0x6643 }, + { 0x6649, 0x664B }, + { 0x665D, 0x663C }, + { 0x668E, 0x6620 }, + { 0x66B8, 0x77AD }, + { 0x66C9, 0x6681 }, + { 0x66F0, 0x4E91 }, + { 0x66F5, 0x66F3 }, + { 0x66FD, 0x66FE }, + { 0x6703, 0x4F1A }, + { 0x6716, 0x6717 }, + { 0x671E, 0x671F }, + { 0x6764, 0x6803 }, + { 0x67A1, 0x685D }, + { 0x67A6, 0x6AE8 }, + { 0x67A9, 0x677E }, + { 0x67CF, 0x6822 }, + { 0x6816, 0x68F2 }, + { 0x6840, 0x685D }, + { 0x689D, 0x6761 }, + { 0x68A6, 0x5922 }, + { 0x68CA, 0x68CB }, + { 0x68E7, 0x685F }, + { 0x6936, 0x68D5 }, + { 0x695D, 0x68DF }, + { 0x6973, 0x6885 }, + { 0x69AE, 0x6804 }, + { 0x69C7, 0x69D9 }, + { 0x6A02, 0x697D }, + { 0x6A13, 0x697C }, + { 0x6A1E, 0x67A2 }, + { 0x6A23, 0x69D8 }, + { 0x6A62, 0x6955 }, + { 0x6A9C, 0x6867 }, + { 0x6AA2, 0x691C }, + { 0x6AAA, 0x6ADF }, + { 0x6AAE, 0x68BC }, + { 0x6AB3, 0x68B9 }, + { 0x6AC1, 0x6A12 }, + { 0x6AFB, 0x685C }, + { 0x6B0A, 0x6A29 }, + { 0x6B50, 0x6B27 }, + { 0x6B61, 0x6B53 }, + { 0x6B78, 0x5E30 }, + { 0x6B7F, 0x6CA1 }, + { 0x6B98, 0x6B8B }, + { 0x6BB1, 0x6BB2 }, + { 0x6BBC, 0x6BBB }, + { 0x6BC6, 0x6BB4 }, + { 0x6BCB, 0x6BCD }, + { 0x6BD3, 0x80B2 }, + { 0x6C23, 0x6C17 }, + { 0x6C8D, 0x51B4 }, + { 0x6C92, 0x6CA1 }, + { 0x6CEA, 0x6D99 }, + { 0x6D0C, 0x51BD }, + { 0x6D35, 0x5F87 }, + { 0x6D79, 0x5CE1 }, + { 0x6D8C, 0x6E67 }, + { 0x6DB8, 0x51C5 }, + { 0x6DD2, 0x51C4 }, + { 0x6DE8, 0x6D44 }, + { 0x6DFA, 0x6D45 }, + { 0x6E0A, 0x6DF5 }, + { 0x6E15, 0x6DF5 }, + { 0x6EAA, 0x6E13 }, + { 0x6EAF, 0x9061 }, + { 0x6EEF, 0x6EDE }, + { 0x6EFF, 0x6E80 }, + { 0x6F5B, 0x6F5C }, + { 0x6F74, 0x7026 }, + { 0x6F80, 0x6E0B }, + { 0x6F81, 0x6E0B }, + { 0x6F82, 0x6F84 }, + { 0x6F91, 0x6E9C }, + { 0x6FA4, 0x6CA2 }, + { 0x6FD5, 0x6E7F }, + { 0x6FDF, 0x6E08 }, + { 0x6FE4, 0x6D9B }, + { 0x6FF1, 0x6D5C }, + { 0x6FF3, 0x6F5C }, + { 0x6FF6, 0x95CA }, + { 0x7027, 0x6EDD }, + { 0x7030, 0x6FD4 }, + { 0x704C, 0x6F45 }, + { 0x7063, 0x6E7E }, + { 0x70CB, 0x6077 }, + { 0x70DF, 0x7159 }, + { 0x70F1, 0x70AF }, + { 0x7188, 0x7155 }, + { 0x7199, 0x7155 }, + { 0x71C8, 0x706F }, + { 0x71D2, 0x713C }, + { 0x71DF, 0x55B6 }, + { 0x71FB, 0x718F }, + { 0x71FF, 0x8000 }, + { 0x7210, 0x7089 }, + { 0x722D, 0x4E89 }, + { 0x7232, 0x70BA }, + { 0x723C, 0x4FCE }, + { 0x7246, 0x58BB }, + { 0x7281, 0x7282 }, + { 0x72A7, 0x72A0 }, + { 0x72E2, 0x72F8 }, + { 0x72F9, 0x72ED }, + { 0x734E, 0x5968 }, + { 0x7368, 0x72EC }, + { 0x7375, 0x731F }, + { 0x7378, 0x7363 }, + { 0x737B, 0x732E }, + { 0x73CE, 0x73CD }, + { 0x73F1, 0x74D4 }, + { 0x7464, 0x7476 }, + { 0x746F, 0x7405 }, + { 0x74A2, 0x7460 }, + { 0x74E3, 0x5F01 }, + { 0x751E, 0x5617 }, + { 0x753C, 0x753A }, + { 0x7544, 0x7559 }, + { 0x7546, 0x755D }, + { 0x754A, 0x8015 }, + { 0x754D, 0x754C }, + { 0x7567, 0x7565 }, + { 0x756B, 0x753B }, + { 0x7574, 0x7587 }, + { 0x7576, 0x5F53 }, + { 0x7582, 0x7573 }, + { 0x7589, 0x7573 }, + { 0x758A, 0x7573 }, + { 0x7609, 0x7652 }, + { 0x7661, 0x75F4 }, + { 0x767C, 0x767A }, + { 0x7683, 0x8C8C }, + { 0x7688, 0x5E30 }, + { 0x768B, 0x7690 }, + { 0x7693, 0x6667 }, + { 0x76B7, 0x9F13 }, + { 0x76B9, 0x76B8 }, + { 0x76C3, 0x676F }, + { 0x76D6, 0x84CB }, + { 0x76DC, 0x76D7 }, + { 0x76E1, 0x5C3D }, + { 0x771E, 0x771F }, + { 0x7726, 0x7725 }, + { 0x777F, 0x53E1 }, + { 0x783F, 0x9271 }, + { 0x7845, 0x73EA }, + { 0x788E, 0x7815 }, + { 0x7895, 0x5D0E }, + { 0x78AF, 0x7459 }, + { 0x7919, 0x788D }, + { 0x7926, 0x9271 }, + { 0x792A, 0x783A }, + { 0x7955, 0x79D8 }, + { 0x7962, 0x79B0 }, + { 0x797F, 0x7984 }, + { 0x7980, 0x7A1F }, + { 0x79AA, 0x7985 }, + { 0x79AE, 0x793C }, + { 0x79CB, 0x7A50 }, + { 0x7A31, 0x79F0 }, + { 0x7A3B, 0x7A32 }, + { 0x7A3E, 0x7A3F }, + { 0x7A49, 0x7A1A }, + { 0x7A57, 0x7A42 }, + { 0x7A69, 0x7A4F }, + { 0x7A70, 0x7A63 }, + { 0x7A97, 0x7A93 }, + { 0x7AB0, 0x7AAF }, + { 0x7AC8, 0x7AC3 }, + { 0x7ACA, 0x7A83 }, + { 0x7AD2, 0x5947 }, + { 0x7ADD, 0x4E26 }, + { 0x7B0B, 0x7B4D }, + { 0x7B11, 0x54B2 }, + { 0x7B5D, 0x7B8F }, + { 0x7B71, 0x7BE0 }, + { 0x7B7A, 0x7B50 }, + { 0x7B8B, 0x724B }, + { 0x7B9A, 0x5273 }, + { 0x7B9F, 0x7B98 }, + { 0x7BCF, 0x5D4C }, + { 0x7BE6, 0x7B86 }, + { 0x7C11, 0x84D1 }, + { 0x7C14, 0x84D1 }, + { 0x7C54, 0x85AE }, + { 0x7C56, 0x7C64 }, + { 0x7C58, 0x7C50 }, + { 0x7C60, 0x7BED }, + { 0x7CAE, 0x7CE7 }, + { 0x7CB9, 0x7C8B }, + { 0x7CE2, 0x6A21 }, + { 0x7CFA, 0x7CFE }, + { 0x7D4F, 0x7D32 }, + { 0x7D72, 0x7CF8 }, + { 0x7D89, 0x7E4D }, + { 0x7D93, 0x7D4C }, + { 0x7DAB, 0x7DDA }, + { 0x7DD5, 0x7E83 }, + { 0x7DDC, 0x7DBF }, + { 0x7DE8, 0x7BC7 }, + { 0x7E23, 0x770C }, + { 0x7E31, 0x7E26 }, + { 0x7E3D, 0x7DCF }, + { 0x7E69, 0x7E04 }, + { 0x7E6A, 0x7D75 }, + { 0x7E7C, 0x7D99 }, + { 0x7E8C, 0x7D9A }, + { 0x7E8E, 0x7E4A }, + { 0x7E92, 0x7E8F }, + { 0x7E96, 0x7E4A }, + { 0x7F3A, 0x6B20 }, + { 0x7F4E, 0x58DC }, + { 0x7F50, 0x7F36 }, + { 0x7F78, 0x7F70 }, + { 0x7F83, 0x51AA }, + { 0x7F87, 0x7F88 }, + { 0x7FA3, 0x7FA4 }, + { 0x7FAE, 0x7FB9 }, + { 0x7FC6, 0x7FE0 }, + { 0x803B, 0x6065 }, + { 0x805F, 0x5A7F }, + { 0x8068, 0x806F }, + { 0x8070, 0x8061 }, + { 0x8072, 0x58F0 }, + { 0x807D, 0x8074 }, + { 0x8085, 0x7C9B }, + { 0x8089, 0x5B8D }, + { 0x80AC, 0x75A3 }, + { 0x80CC, 0x810A }, + { 0x80EF, 0x8DE8 }, + { 0x8109, 0x8108 }, + { 0x8123, 0x5507 }, + { 0x815F, 0x81A3 }, + { 0x8166, 0x8133 }, + { 0x8193, 0x8178 }, + { 0x81B8, 0x9AC4 }, + { 0x81BD, 0x80C6 }, + { 0x81C8, 0x81D8 }, + { 0x81DF, 0x81D3 }, + { 0x81FA, 0x53F0 }, + { 0x8207, 0x4E0E }, + { 0x8209, 0x6319 }, + { 0x820A, 0x65E7 }, + { 0x820D, 0x820E }, + { 0x8216, 0x8217 }, + { 0x8218, 0x9928 }, + { 0x8229, 0x8239 }, + { 0x822E, 0x826B }, + { 0x8235, 0x67C1 }, + { 0x8277, 0x8276 }, + { 0x82C5, 0x5208 }, + { 0x8358, 0x5E84 }, + { 0x8373, 0x8C46 }, + { 0x838A, 0x5E84 }, + { 0x8393, 0x82FA }, + { 0x8396, 0x830E }, + { 0x83B5, 0x514E }, + { 0x83DF, 0x514E }, + { 0x83F4, 0x5EB5 }, + { 0x8420, 0x840C }, + { 0x842C, 0x4E07 }, + { 0x8462, 0x84CB }, + { 0x84AD, 0x82BB }, + { 0x854B, 0x854A }, + { 0x855A, 0x843C }, + { 0x8597, 0x5712 }, + { 0x85C1, 0x7A3F }, + { 0x85CF, 0x8535 }, + { 0x85DD, 0x82B8 }, + { 0x85E5, 0x85AC }, + { 0x85EA, 0x85AE }, + { 0x8602, 0x854A }, + { 0x8606, 0x82A6 }, + { 0x8613, 0x8607 }, + { 0x8617, 0x6A97 }, + { 0x862F, 0x76EA }, + { 0x8655, 0x51E6 }, + { 0x865F, 0x53F7 }, + { 0x8768, 0x8671 }, + { 0x87A2, 0x86CD }, + { 0x87C6, 0x87C7 }, + { 0x87D2, 0x880E }, + { 0x87F2, 0x866B }, + { 0x8805, 0x877F }, + { 0x880F, 0x87F9 }, + { 0x8823, 0x86CE }, + { 0x8827, 0x8839 }, + { 0x8836, 0x8695 }, + { 0x883B, 0x86EE }, + { 0x8842, 0x8844 }, + { 0x885E, 0x885B }, + { 0x88B5, 0x887D }, + { 0x88DD, 0x88C5 }, + { 0x88E1, 0x88CF }, + { 0x891D, 0x894C }, + { 0x8943, 0x8912 }, + { 0x894D, 0x96D1 }, + { 0x898A, 0x7F88 }, + { 0x89BA, 0x899A }, + { 0x89BD, 0x89A7 }, + { 0x89C0, 0x89B3 }, + { 0x89E7, 0x89E3 }, + { 0x89F8, 0x89E6 }, + { 0x8A00, 0x4E91 }, + { 0x8A3B, 0x6CE8 }, + { 0x8AE1, 0x8B1A }, + { 0x8AEB, 0x8ACC }, + { 0x8B0C, 0x6B4C }, + { 0x8B20, 0x8B21 }, + { 0x8B41, 0x5629 }, + { 0x8B49, 0x8A3C }, + { 0x8B5B, 0x8B56 }, + { 0x8B5F, 0x566A }, + { 0x8B6F, 0x8A33 }, + { 0x8B71, 0x5584 }, + { 0x8B7D, 0x8A89 }, + { 0x8B80, 0x8AAD }, + { 0x8B8A, 0x5909 }, + { 0x8B8E, 0x8B90 }, + { 0x8B93, 0x8B72 }, + { 0x8B9A, 0x8B83 }, + { 0x8C3F, 0x6E13 }, + { 0x8C4E, 0x7AEA }, + { 0x8C50, 0x8C4A }, + { 0x8C6B, 0x4E88 }, + { 0x8C6C, 0x732A }, + { 0x8C7A, 0x72B2 }, + { 0x8C7C, 0x8C94 }, + { 0x8C89, 0x72F8 }, + { 0x8C8D, 0x72F8 }, + { 0x8C8E, 0x730A }, + { 0x8C98, 0x734F }, + { 0x8CAD, 0x8CEA }, + { 0x8CAE, 0x4E8C }, + { 0x8CB3, 0x4E8C }, + { 0x8CCD, 0x81D3 }, + { 0x8CE3, 0x58F2 }, + { 0x8CE4, 0x8CCE }, + { 0x8D0A, 0x8CDB }, + { 0x8D13, 0x81D3 }, + { 0x8D71, 0x8D70 }, + { 0x8E08, 0x758E }, + { 0x8E10, 0x8DF5 }, + { 0x8E34, 0x8E0A }, + { 0x8E48, 0x8E0F }, + { 0x8E87, 0x58B8 }, + { 0x8EAA, 0x8E99 }, + { 0x8EB0, 0x4F53 }, + { 0x8EC6, 0x4F53 }, + { 0x8EE3, 0x8F5F }, + { 0x8F0C, 0x4E21 }, + { 0x8F15, 0x8EFD }, + { 0x8F19, 0x8F12 }, + { 0x8F1B, 0x4E21 }, + { 0x8F49, 0x8EE2 }, + { 0x8FA7, 0x5F01 }, + { 0x8FA8, 0x5F01 }, + { 0x8FAD, 0x8F9E }, + { 0x8FAF, 0x5F01 }, + { 0x8FEA, 0x5EF8 }, + { 0x8FEF, 0x9003 }, + { 0x8FF4, 0x5EFB }, + { 0x8FFA, 0x5EFC }, + { 0x900E, 0x9052 }, + { 0x9015, 0x5F84 }, + { 0x9023, 0x806F }, + { 0x9039, 0x9054 }, + { 0x9059, 0x9065 }, + { 0x905E, 0x9013 }, + { 0x9072, 0x9045 }, + { 0x9087, 0x8FE9 }, + { 0x9089, 0x8FBA }, + { 0x908A, 0x8FBA }, + { 0x90B1, 0x4E18 }, + { 0x9130, 0x96A3 }, + { 0x9189, 0x9154 }, + { 0x918B, 0x9162 }, + { 0x91AB, 0x533B }, + { 0x91BA, 0x718F }, + { 0x91C0, 0x91B8 }, + { 0x91CB, 0x91C8 }, + { 0x91E1, 0x91DC }, + { 0x91FC, 0x5263 }, + { 0x9214, 0x6284 }, + { 0x9229, 0x946A }, + { 0x922C, 0x9438 }, + { 0x9248, 0x91F6 }, + { 0x9264, 0x920E }, + { 0x9295, 0x9244 }, + { 0x92EA, 0x8217 }, + { 0x9322, 0x92AD }, + { 0x9394, 0x7194 }, + { 0x93AD, 0x93AE }, + { 0x941A, 0x930F }, + { 0x9421, 0x9244 }, + { 0x9435, 0x9244 }, + { 0x9444, 0x92F3 }, + { 0x9452, 0x9451 }, + { 0x945A, 0x947D }, + { 0x945B, 0x9271 }, + { 0x9587, 0x9589 }, + { 0x95A0, 0x958F }, + { 0x95DC, 0x95A2 }, + { 0x962A, 0x5742 }, + { 0x962F, 0x5740 }, + { 0x9644, 0x4ED8 }, + { 0x9670, 0x852D }, + { 0x9677, 0x9665 }, + { 0x96A8, 0x968F }, + { 0x96AA, 0x967A }, + { 0x96B1, 0x852D }, + { 0x96B8, 0x96B7 }, + { 0x96C6, 0x8F2F }, + { 0x96D9, 0x53CC }, + { 0x96DC, 0x96D1 }, + { 0x9738, 0x8987 }, + { 0x9748, 0x970A }, + { 0x975C, 0x9759 }, + { 0x976B, 0x976D }, + { 0x9771, 0x976D }, + { 0x97ED, 0x97EE }, + { 0x97F2, 0x9F4F }, + { 0x97F5, 0x97FB }, + { 0x9834, 0x7A4E }, + { 0x9838, 0x981A }, + { 0x984B, 0x816E }, + { 0x984F, 0x9854 }, + { 0x986F, 0x9855 }, + { 0x98B1, 0x53F0 }, + { 0x98C3, 0x98C4 }, + { 0x98DC, 0x7FFB }, + { 0x98EE, 0x98F2 }, + { 0x9918, 0x4F59 }, + { 0x991D, 0x98FE }, + { 0x9920, 0x9905 }, + { 0x99C8, 0x99C6 }, + { 0x99F2, 0x99B4 }, + { 0x9A37, 0x9A12 }, + { 0x9A45, 0x99C6 }, + { 0x9A57, 0x9A13 }, + { 0x9A5B, 0x99C5 }, + { 0x9AD3, 0x9AC4 }, + { 0x9AD4, 0x4F53 }, + { 0x9AEE, 0x9AEA }, + { 0x9AF4, 0x5F7F }, + { 0x9B27, 0x9599 }, + { 0x9B28, 0x95A7 }, + { 0x9B2A, 0x95D8 }, + { 0x9B31, 0x6B1D }, + { 0x9B3B, 0x7CA5 }, + { 0x9C1B, 0x9C2E }, + { 0x9C3A, 0x9BF5 }, + { 0x9CEB, 0x96C1 }, + { 0x9CEC, 0x9CE7 }, + { 0x9D08, 0x96C1 }, + { 0x9D44, 0x9D1F }, + { 0x9D5E, 0x9D5D }, + { 0x9DAB, 0x9D87 }, + { 0x9DAF, 0x9D2C }, + { 0x9DC4, 0x9D8F }, + { 0x9DCF, 0x9DC6 }, + { 0x9E7D, 0x5869 }, + { 0x9EA5, 0x9EA6 }, + { 0x9EAA, 0x9EBA }, + { 0x9EB8, 0x9EA9 }, + { 0x9ED8, 0x9ED9 }, + { 0x9EDE, 0x70B9 }, + { 0x9EE8, 0x515A }, + { 0x9F07, 0x9C32 }, + { 0x9F21, 0x9F20 }, + { 0x9F4A, 0x658E }, + { 0x9F4B, 0x658E }, + { 0x9F52, 0x6B6F }, + { 0x9F61, 0x9F62 }, + { 0x9F67, 0x56D3 }, + { 0x9F8D, 0x7ADC }, + { 0x9F9C, 0x4E80 }, + { 0x9F9D, 0x7A50 }, + { 0xFEFF, 0x4E9C } +}; + +ignoreTraditionalKanji_ja_JP::ignoreTraditionalKanji_ja_JP() +{ + static i18nutil::oneToOneMapping _table(traditionalKanji2updateKanji, sizeof(traditionalKanji2updateKanji)); + func = nullptr; + table = &_table; + map = nullptr; + transliterationName = "ignoreTraditionalKanji_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.ignoreTraditionalKanji_ja_JP"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreWidth.cxx b/i18npool/source/transliteration/ignoreWidth.cxx new file mode 100644 index 000000000..072a0e988 --- /dev/null +++ b/i18npool/source/transliteration/ignoreWidth.cxx @@ -0,0 +1,65 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <rtl/ref.hxx> + +#include <transliteration_Ignore.hxx> +#include <transliteration_OneToOne.hxx> + +namespace com::sun::star::uno { class XComponentContext; } + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + +namespace i18npool { + +OUString +ignoreWidth::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset ) +{ + rtl::Reference< fullwidthToHalfwidth > t1(new fullwidthToHalfwidth); + return t1->transliterateImpl(inStr, startPos, nCount, offset, useOffset); +} + +Sequence< OUString > SAL_CALL +ignoreWidth::transliterateRange( const OUString& str1, const OUString& str2 ) +{ + rtl::Reference< fullwidthToHalfwidth > t1(new fullwidthToHalfwidth); + rtl::Reference< halfwidthToFullwidth > t2(new halfwidthToFullwidth); + + return transliteration_Ignore::transliterateRange(str1, str2, *t1, *t2); +} + +sal_Unicode SAL_CALL +ignoreWidth::transliterateChar2Char( sal_Unicode inChar) +{ + rtl::Reference< fullwidthToHalfwidth > t1(new fullwidthToHalfwidth); + return t1->transliterateChar2Char(inChar); +} + +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * +com_sun_star_i18n_Transliteration_IGNORE_WIDTH_get_implementation( + css::uno::XComponentContext *, + css::uno::Sequence<css::uno::Any> const &) +{ + return cppu::acquire(new i18npool::ignoreWidth()); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/ignoreZiZu_ja_JP.cxx b/i18npool/source/transliteration/ignoreZiZu_ja_JP.cxx new file mode 100644 index 000000000..029e335b6 --- /dev/null +++ b/i18npool/source/transliteration/ignoreZiZu_ja_JP.cxx @@ -0,0 +1,55 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_Ignore.hxx> + +namespace i18npool { + +static sal_Unicode +ignoreZiZu_ja_JP_translator (const sal_Unicode c) +{ + + switch (c) { + case 0x30C2: // KATAKANA LETTER DI + return 0x30B8; // KATAKANA LETTER ZI + + case 0x3062: // HIRAGANA LETTER DI + return 0x3058; // HIRAGANA LETTER ZI + + case 0x30C5: // KATAKANA LETTER DU + return 0x30BA; // KATAKANA LETTER ZU + + case 0x3065: // HIRAGANA LETTER DU + return 0x305A; // HIRAGANA LETTER ZU + } + return c; +} + +ignoreZiZu_ja_JP::ignoreZiZu_ja_JP() +{ + func = ignoreZiZu_ja_JP_translator; + table = nullptr; + map = nullptr; + transliterationName = "ignoreZiZu_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.ignoreZiZu_ja_JP"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/katakanaToHiragana.cxx b/i18npool/source/transliteration/katakanaToHiragana.cxx new file mode 100644 index 000000000..7b5f5921f --- /dev/null +++ b/i18npool/source/transliteration/katakanaToHiragana.cxx @@ -0,0 +1,45 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_OneToOne.hxx> + +namespace i18npool { + +// see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) +// see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) +static sal_Unicode toHiragana (const sal_Unicode c) +{ + if ( (0x30a1 <= c && c <= 0x30f6) || (0x30fd <= c && c <= 0x30ff) ) { // 30A0 - 30FF KATAKANA LETTER + // shift code point by 0x0060 + return c - (0x30a0 - 0x3040); + } + return c; +} + +katakanaToHiragana::katakanaToHiragana() +{ + func = toHiragana; + table = nullptr; + transliterationName = "katakanaToHiragana"; + implementationName = "com.sun.star.i18n.Transliteration.KATAKANA_HIRAGANA"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/largeToSmall_ja_JP.cxx b/i18npool/source/transliteration/largeToSmall_ja_JP.cxx new file mode 100644 index 000000000..198100a1f --- /dev/null +++ b/i18npool/source/transliteration/largeToSmall_ja_JP.cxx @@ -0,0 +1,80 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <i18nutil/oneToOneMapping.hxx> + +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + +namespace i18npool { + +// ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt +// ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html +// http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) +// http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) +// http://charts.unicode.org/Web/UFF00.html + +i18nutil::OneToOneMappingTable_t const large2small[] = { + { 0x3041, 0x3042 }, // HIRAGANA LETTER SMALL A --> HIRAGANA LETTER A + { 0x3043, 0x3044 }, // HIRAGANA LETTER SMALL I --> HIRAGANA LETTER I + { 0x3045, 0x3046 }, // HIRAGANA LETTER SMALL U --> HIRAGANA LETTER U + { 0x3047, 0x3048 }, // HIRAGANA LETTER SMALL E --> HIRAGANA LETTER E + { 0x3049, 0x304A }, // HIRAGANA LETTER SMALL O --> HIRAGANA LETTER O + { 0x3063, 0x3064 }, // HIRAGANA LETTER SMALL TU --> HIRAGANA LETTER TU + { 0x3083, 0x3084 }, // HIRAGANA LETTER SMALL YA --> HIRAGANA LETTER YA + { 0x3085, 0x3086 }, // HIRAGANA LETTER SMALL YU --> HIRAGANA LETTER YU + { 0x3087, 0x3088 }, // HIRAGANA LETTER SMALL YO --> HIRAGANA LETTER YO + { 0x308E, 0x308F }, // HIRAGANA LETTER SMALL WA --> HIRAGANA LETTER WA + { 0x30A1, 0x30A2 }, // KATAKANA LETTER SMALL A --> KATAKANA LETTER A + { 0x30A3, 0x30A4 }, // KATAKANA LETTER SMALL I --> KATAKANA LETTER I + { 0x30A5, 0x30A6 }, // KATAKANA LETTER SMALL U --> KATAKANA LETTER U + { 0x30A7, 0x30A8 }, // KATAKANA LETTER SMALL E --> KATAKANA LETTER E + { 0x30A9, 0x30AA }, // KATAKANA LETTER SMALL O --> KATAKANA LETTER O + { 0x30C3, 0x30C4 }, // KATAKANA LETTER SMALL TU --> KATAKANA LETTER TU + { 0x30E3, 0x30E4 }, // KATAKANA LETTER SMALL YA --> KATAKANA LETTER YA + { 0x30E5, 0x30E6 }, // KATAKANA LETTER SMALL YU --> KATAKANA LETTER YU + { 0x30E7, 0x30E8 }, // KATAKANA LETTER SMALL YO --> KATAKANA LETTER YO + { 0x30EE, 0x30EF }, // KATAKANA LETTER SMALL WA --> KATAKANA LETTER WA + { 0x30F5, 0x30AB }, // KATAKANA LETTER SMALL KA --> KATAKANA LETTER KA + { 0x30F6, 0x30B1 }, // KATAKANA LETTER SMALL KE --> KATAKANA LETTER KE + { 0xFF67, 0xFF71 }, // HALFWIDTH KATAKANA LETTER SMALL A --> HALFWIDTH KATAKANA LETTER A + { 0xFF68, 0xFF72 }, // HALFWIDTH KATAKANA LETTER SMALL I --> HALFWIDTH KATAKANA LETTER I + { 0xFF69, 0xFF73 }, // HALFWIDTH KATAKANA LETTER SMALL U --> HALFWIDTH KATAKANA LETTER U + { 0xFF6A, 0xFF74 }, // HALFWIDTH KATAKANA LETTER SMALL E --> HALFWIDTH KATAKANA LETTER E + { 0xFF6B, 0xFF75 }, // HALFWIDTH KATAKANA LETTER SMALL O --> HALFWIDTH KATAKANA LETTER O + { 0xFF6C, 0xFF94 }, // HALFWIDTH KATAKANA LETTER SMALL YA --> HALFWIDTH KATAKANA LETTER YA + { 0xFF6D, 0xFF95 }, // HALFWIDTH KATAKANA LETTER SMALL YU --> HALFWIDTH KATAKANA LETTER YU + { 0xFF6E, 0xFF96 }, // HALFWIDTH KATAKANA LETTER SMALL YO --> HALFWIDTH KATAKANA LETTER YO + { 0xFF6F, 0xFF82 } // HALFWIDTH KATAKANA LETTER SMALL TU --> HALFWIDTH KATAKANA LETTER TU +}; + +largeToSmall_ja_JP::largeToSmall_ja_JP() +{ + static i18nutil::oneToOneMapping _table(large2small, sizeof(large2small)); + func = nullptr; + table = &_table; + transliterationName = "largeToSmall_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.largeToSmall_ja_JP"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/numtochar.cxx b/i18npool/source/transliteration/numtochar.cxx new file mode 100644 index 000000000..56761f44c --- /dev/null +++ b/i18npool/source/transliteration/numtochar.cxx @@ -0,0 +1,57 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/i18n/NativeNumberMode.hpp> + +#include <numtochar.hxx> + +namespace i18npool { + +#define TRANSLITERATION_NUMTOCHAR( name, number ) \ +NumToChar##name::NumToChar##name() \ +{ \ + nNativeNumberMode = number; \ + tableSize = 0; \ + transliterationName = "NumToChar"#name; \ + implementationName = "com.sun.star.i18n.Transliteration.NumToChar"#name; \ +} + +using namespace com::sun::star::i18n::NativeNumberMode; + +TRANSLITERATION_NUMTOCHAR( Halfwidth, NATNUM0 ) +TRANSLITERATION_NUMTOCHAR( Fullwidth, NATNUM3 ) +TRANSLITERATION_NUMTOCHAR( Lower_zh_CN, NATNUM1 ) +TRANSLITERATION_NUMTOCHAR( Lower_zh_TW, NATNUM1 ) +TRANSLITERATION_NUMTOCHAR( Upper_zh_CN, NATNUM2 ) +TRANSLITERATION_NUMTOCHAR( Upper_zh_TW, NATNUM2 ) +TRANSLITERATION_NUMTOCHAR( KanjiShort_ja_JP, NATNUM1 ) +TRANSLITERATION_NUMTOCHAR( KanjiTraditional_ja_JP, NATNUM2 ) +TRANSLITERATION_NUMTOCHAR( Lower_ko, NATNUM1 ) +TRANSLITERATION_NUMTOCHAR( Upper_ko, NATNUM2 ) +TRANSLITERATION_NUMTOCHAR( Hangul_ko, NATNUM9 ) +TRANSLITERATION_NUMTOCHAR( Indic_ar, NATNUM1 ) +TRANSLITERATION_NUMTOCHAR( EastIndic_ar, NATNUM1 ) +TRANSLITERATION_NUMTOCHAR( Indic_hi, NATNUM1 ) +TRANSLITERATION_NUMTOCHAR( _th, NATNUM1 ) + +#undef TRANSLITERATION_NUMTOCHAR + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/numtotext_cjk.cxx b/i18npool/source/transliteration/numtotext_cjk.cxx new file mode 100644 index 000000000..f60561f21 --- /dev/null +++ b/i18npool/source/transliteration/numtotext_cjk.cxx @@ -0,0 +1,85 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/i18n/NativeNumberMode.hpp> + +#include <numtotext_cjk.hxx> +#include <bullet.h> + +namespace i18npool { + +#define TRANSLITERATION_NUMTOTEXT( name, number ) \ +NumToText##name::NumToText##name() \ +{ \ + nNativeNumberMode = number; \ + tableSize = 0; \ + transliterationName = "NumToText"#name; \ + implementationName = "com.sun.star.i18n.Transliteration.NumToText"#name; \ +} +using namespace com::sun::star::i18n::NativeNumberMode; + +TRANSLITERATION_NUMTOTEXT( Lower_zh_CN, NATNUM4) +TRANSLITERATION_NUMTOTEXT( Upper_zh_CN, NATNUM5) +TRANSLITERATION_NUMTOTEXT( Lower_zh_TW, NATNUM4) +TRANSLITERATION_NUMTOTEXT( Upper_zh_TW, NATNUM5) +TRANSLITERATION_NUMTOTEXT( Fullwidth_zh_CN, NATNUM6) +TRANSLITERATION_NUMTOTEXT( Fullwidth_zh_TW, NATNUM6) +TRANSLITERATION_NUMTOTEXT( Fullwidth_ja_JP, NATNUM6) +TRANSLITERATION_NUMTOTEXT( Fullwidth_ko, NATNUM6) +TRANSLITERATION_NUMTOTEXT( FormalLower_ko, NATNUM4) +TRANSLITERATION_NUMTOTEXT( FormalUpper_ko, NATNUM5) +TRANSLITERATION_NUMTOTEXT( FormalHangul_ko, NATNUM10) +TRANSLITERATION_NUMTOTEXT( InformalLower_ko, NATNUM7) +TRANSLITERATION_NUMTOTEXT( InformalUpper_ko, NATNUM8) +TRANSLITERATION_NUMTOTEXT( InformalHangul_ko, NATNUM11) +TRANSLITERATION_NUMTOTEXT( KanjiLongTraditional_ja_JP, NATNUM5) +TRANSLITERATION_NUMTOTEXT( KanjiLongModern_ja_JP, NATNUM4) +TRANSLITERATION_NUMTOTEXT( Date_zh, NATNUM7) +TRANSLITERATION_NUMTOTEXT( KanjiShortTraditional_ja_JP, NATNUM8) +TRANSLITERATION_NUMTOTEXT( KanjiShortModern_ja_JP, NATNUM7) + +#undef TRANSLITERATION_NUMTOTEXT + +#define TRANSLITERATION_NUMTOTEXT( name, _table, recycle ) \ +NumToText##name::NumToText##name() \ +{ \ + table = _table;\ + tableSize = SAL_N_ELEMENTS(_table); \ + recycleSymbol = recycle; \ + transliterationName = "NumToText"#name; \ + implementationName = "com.sun.star.i18n.Transliteration.NumToText"#name; \ +} + +TRANSLITERATION_NUMTOTEXT ( AIUFullWidth_ja_JP, table_AIUFullWidth_ja_JP, true) +TRANSLITERATION_NUMTOTEXT ( AIUHalfWidth_ja_JP, table_AIUHalfWidth_ja_JP, true) +TRANSLITERATION_NUMTOTEXT ( IROHAFullWidth_ja_JP, table_IROHAFullWidth_ja_JP, true) +TRANSLITERATION_NUMTOTEXT ( IROHAHalfWidth_ja_JP, table_IROHAHalfWidth_ja_JP, true) +TRANSLITERATION_NUMTOTEXT ( CircledNumber, table_CircledNumber, false) +TRANSLITERATION_NUMTOTEXT ( TianGan_zh, table_TianGan_zh, false) +TRANSLITERATION_NUMTOTEXT ( DiZi_zh, table_DiZi_zh, false) +TRANSLITERATION_NUMTOTEXT ( HangulJamo_ko, table_HangulJamo_ko, true) +TRANSLITERATION_NUMTOTEXT ( HangulSyllable_ko, table_HangulSyllable_ko, true) +TRANSLITERATION_NUMTOTEXT ( HangulCircledJamo_ko, table_HangulCircledJamo_ko, true) +TRANSLITERATION_NUMTOTEXT ( HangulCircledSyllable_ko, table_HangulCircledSyllable_ko, true) + +#undef TRANSLITERATION_NUMTOTEXT + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/smallToLarge_ja_JP.cxx b/i18npool/source/transliteration/smallToLarge_ja_JP.cxx new file mode 100644 index 000000000..180db1d3a --- /dev/null +++ b/i18npool/source/transliteration/smallToLarge_ja_JP.cxx @@ -0,0 +1,80 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <i18nutil/oneToOneMapping.hxx> + +#include <transliteration_OneToOne.hxx> + +using namespace com::sun::star::uno; +using namespace com::sun::star::lang; + +namespace i18npool { + +// ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.txt +// ftp://ftp.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html +// http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F) +// http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF) +// http://charts.unicode.org/Web/UFF00.html + +i18nutil::OneToOneMappingTable_t const small2large[] = { + { 0x3041, 0x3042 }, // HIRAGANA LETTER SMALL A --> HIRAGANA LETTER A + { 0x3043, 0x3044 }, // HIRAGANA LETTER SMALL I --> HIRAGANA LETTER I + { 0x3045, 0x3046 }, // HIRAGANA LETTER SMALL U --> HIRAGANA LETTER U + { 0x3047, 0x3048 }, // HIRAGANA LETTER SMALL E --> HIRAGANA LETTER E + { 0x3049, 0x304A }, // HIRAGANA LETTER SMALL O --> HIRAGANA LETTER O + { 0x3063, 0x3064 }, // HIRAGANA LETTER SMALL TU --> HIRAGANA LETTER TU + { 0x3083, 0x3084 }, // HIRAGANA LETTER SMALL YA --> HIRAGANA LETTER YA + { 0x3085, 0x3086 }, // HIRAGANA LETTER SMALL YU --> HIRAGANA LETTER YU + { 0x3087, 0x3088 }, // HIRAGANA LETTER SMALL YO --> HIRAGANA LETTER YO + { 0x308E, 0x308F }, // HIRAGANA LETTER SMALL WA --> HIRAGANA LETTER WA + { 0x30A1, 0x30A2 }, // KATAKANA LETTER SMALL A --> KATAKANA LETTER A + { 0x30A3, 0x30A4 }, // KATAKANA LETTER SMALL I --> KATAKANA LETTER I + { 0x30A5, 0x30A6 }, // KATAKANA LETTER SMALL U --> KATAKANA LETTER U + { 0x30A7, 0x30A8 }, // KATAKANA LETTER SMALL E --> KATAKANA LETTER E + { 0x30A9, 0x30AA }, // KATAKANA LETTER SMALL O --> KATAKANA LETTER O + { 0x30C3, 0x30C4 }, // KATAKANA LETTER SMALL TU --> KATAKANA LETTER TU + { 0x30E3, 0x30E4 }, // KATAKANA LETTER SMALL YA --> KATAKANA LETTER YA + { 0x30E5, 0x30E6 }, // KATAKANA LETTER SMALL YU --> KATAKANA LETTER YU + { 0x30E7, 0x30E8 }, // KATAKANA LETTER SMALL YO --> KATAKANA LETTER YO + { 0x30EE, 0x30EF }, // KATAKANA LETTER SMALL WA --> KATAKANA LETTER WA + { 0x30F5, 0x30AB }, // KATAKANA LETTER SMALL KA --> KATAKANA LETTER KA + { 0x30F6, 0x30B1 }, // KATAKANA LETTER SMALL KE --> KATAKANA LETTER KE + { 0xFF67, 0xFF71 }, // HALFWIDTH KATAKANA LETTER SMALL A --> HALFWIDTH KATAKANA LETTER A + { 0xFF68, 0xFF72 }, // HALFWIDTH KATAKANA LETTER SMALL I --> HALFWIDTH KATAKANA LETTER I + { 0xFF69, 0xFF73 }, // HALFWIDTH KATAKANA LETTER SMALL U --> HALFWIDTH KATAKANA LETTER U + { 0xFF6A, 0xFF74 }, // HALFWIDTH KATAKANA LETTER SMALL E --> HALFWIDTH KATAKANA LETTER E + { 0xFF6B, 0xFF75 }, // HALFWIDTH KATAKANA LETTER SMALL O --> HALFWIDTH KATAKANA LETTER O + { 0xFF6C, 0xFF94 }, // HALFWIDTH KATAKANA LETTER SMALL YA --> HALFWIDTH KATAKANA LETTER YA + { 0xFF6D, 0xFF95 }, // HALFWIDTH KATAKANA LETTER SMALL YU --> HALFWIDTH KATAKANA LETTER YU + { 0xFF6E, 0xFF96 }, // HALFWIDTH KATAKANA LETTER SMALL YO --> HALFWIDTH KATAKANA LETTER YO + { 0xFF6F, 0xFF82 } // HALFWIDTH KATAKANA LETTER SMALL TU --> HALFWIDTH KATAKANA LETTER TU +}; + +smallToLarge_ja_JP::smallToLarge_ja_JP() +{ + static i18nutil::oneToOneMapping _table(small2large, sizeof(small2large)); + func = nullptr; + table = &_table; + transliterationName = "smallToLarge_ja_JP"; + implementationName = "com.sun.star.i18n.Transliteration.smallToLarge_ja_JP"; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/textToPronounce_zh.cxx b/i18npool/source/transliteration/textToPronounce_zh.cxx new file mode 100644 index 000000000..42fdb5280 --- /dev/null +++ b/i18npool/source/transliteration/textToPronounce_zh.cxx @@ -0,0 +1,194 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <com/sun/star/i18n/MultipleCharsOutputException.hpp> +#include <com/sun/star/i18n/TransliterationType.hpp> +#include <rtl/ustring.hxx> +#include <rtl/ustrbuf.hxx> + +#include <textToPronounce_zh.hxx> + +using namespace com::sun::star::i18n; +using namespace com::sun::star::uno; + +namespace i18npool { + +sal_Int16 SAL_CALL TextToPronounce_zh::getType() +{ + return TransliterationType::ONE_TO_ONE| TransliterationType::IGNORE; +} + +const sal_Unicode* +TextToPronounce_zh::getPronounce(const sal_Unicode ch) +{ + static const sal_Unicode emptyString[]={0}; + if (idx) { + sal_uInt16 address = idx[0][ch>>8]; + if (address != 0xFFFF) + return reinterpret_cast<sal_Unicode *>( + &idx[2][idx[1][address + (ch & 0xFF)]]); + } + return emptyString; +} + +OUString +TextToPronounce_zh::foldingImpl(const OUString & inStr, sal_Int32 startPos, + sal_Int32 nCount, Sequence< sal_Int32 > & offset, bool useOffset) +{ + OUStringBuffer sb; + const sal_Unicode * chArr = inStr.getStr() + startPos; + + if (startPos < 0) + throw RuntimeException(); + + if (startPos + nCount > inStr.getLength()) + nCount = inStr.getLength() - startPos; + + offset[0] = 0; + for (sal_Int32 i = 0; i < nCount; i++) { + OUString pron(getPronounce(chArr[i])); + sb.append(pron); + + if (useOffset) + offset[i + 1] = offset[i] + pron.getLength(); + } + return sb.makeStringAndClear(); +} + +OUString SAL_CALL +TextToPronounce_zh::transliterateChar2String( sal_Unicode inChar) +{ + return OUString(getPronounce(inChar)); +} + +sal_Unicode SAL_CALL +TextToPronounce_zh::transliterateChar2Char( sal_Unicode inChar) +{ + const sal_Unicode* pron=getPronounce(inChar); + if (!pron || !pron[0]) + return 0; + if (pron[1]) + throw MultipleCharsOutputException(); + return *pron; +} + +sal_Bool SAL_CALL +TextToPronounce_zh::equals( const OUString & str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32 & nMatch1, + const OUString & str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32 & nMatch2) +{ + sal_Int32 realCount; + int i; // loop variable + const sal_Unicode * s1, * s2; + + if (nCount1 + pos1 > str1.getLength()) + nCount1 = str1.getLength() - pos1; + + if (nCount2 + pos2 > str2.getLength()) + nCount2 = str2.getLength() - pos2; + + realCount = std::min(nCount1, nCount2); + + s1 = str1.getStr() + pos1; + s2 = str2.getStr() + pos2; + for (i = 0; i < realCount; i++) { + const sal_Unicode *pron1 = getPronounce(*s1++); + const sal_Unicode *pron2 = getPronounce(*s2++); + if (pron1 != pron2) { + nMatch1 = nMatch2 = i; + return false; + } + } + nMatch1 = nMatch2 = realCount; + return (nCount1 == nCount2); +} + +#ifdef DISABLE_DYNLOADING + +extern "C" { + +sal_uInt16** get_zh_zhuyin(); +sal_uInt16** get_zh_pinyin(); + +} + +#endif + +TextToPinyin_zh_CN::TextToPinyin_zh_CN() : +#ifndef DISABLE_DYNLOADING + TextToPronounce_zh("get_zh_pinyin") +#else + TextToPronounce_zh(get_zh_pinyin) +#endif +{ + transliterationName = "ChineseCharacterToPinyin"; + implementationName = "com.sun.star.i18n.Transliteration.TextToPinyin_zh_CN"; +} + +TextToChuyin_zh_TW::TextToChuyin_zh_TW() : +#ifndef DISABLE_DYNLOADING + TextToPronounce_zh("get_zh_zhuyin") +#else + TextToPronounce_zh(get_zh_zhuyin) +#endif +{ + transliterationName = "ChineseCharacterToChuyin"; + implementationName = "com.sun.star.i18n.Transliteration.TextToChuyin_zh_TW"; +} + +#ifndef DISABLE_DYNLOADING + +extern "C" { static void thisModule() {} } + +TextToPronounce_zh::TextToPronounce_zh(const char* func_name) +{ +#ifdef SAL_DLLPREFIX + OUString lib(SAL_DLLPREFIX"index_data" SAL_DLLEXTENSION); +#else + OUString lib("index_data" SAL_DLLEXTENSION); +#endif + hModule = osl_loadModuleRelative( + &thisModule, lib.pData, SAL_LOADMODULE_DEFAULT ); + idx=nullptr; + if (hModule) { + sal_uInt16** (*function)() = reinterpret_cast<sal_uInt16** (*)()>(osl_getFunctionSymbol(hModule, OUString::createFromAscii(func_name).pData)); + if (function) + idx=function(); + } +} + +#else + +TextToPronounce_zh::TextToPronounce_zh(sal_uInt16 ** (*function)()) +{ + idx = function(); +} + +#endif + +TextToPronounce_zh::~TextToPronounce_zh() +{ +#ifndef DISABLE_DYNLOADING + if (hModule) osl_unloadModule(hModule); +#endif +} +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/texttonum.cxx b/i18npool/source/transliteration/texttonum.cxx new file mode 100644 index 000000000..5fd0e898d --- /dev/null +++ b/i18npool/source/transliteration/texttonum.cxx @@ -0,0 +1,52 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <texttonum.hxx> + +using namespace com::sun::star::uno; + +namespace i18npool { + +#define TRANSLITERATION_TEXTTONUM( name ) \ +TextToNum##name::TextToNum##name() \ +{ \ + nNativeNumberMode = 0; \ + tableSize = 0; \ + transliterationName = "TextToNum"#name; \ + implementationName = "com.sun.star.i18n.Transliteration.TextToNum"#name; \ +} + +TRANSLITERATION_TEXTTONUM( Lower_zh_CN) +TRANSLITERATION_TEXTTONUM( Upper_zh_CN) +TRANSLITERATION_TEXTTONUM( Lower_zh_TW) +TRANSLITERATION_TEXTTONUM( Upper_zh_TW) +TRANSLITERATION_TEXTTONUM( FormalLower_ko) +TRANSLITERATION_TEXTTONUM( FormalUpper_ko) +TRANSLITERATION_TEXTTONUM( FormalHangul_ko) +TRANSLITERATION_TEXTTONUM( InformalLower_ko) +TRANSLITERATION_TEXTTONUM( InformalUpper_ko) +TRANSLITERATION_TEXTTONUM( InformalHangul_ko) +TRANSLITERATION_TEXTTONUM( KanjiLongTraditional_ja_JP) +TRANSLITERATION_TEXTTONUM( KanjiLongModern_ja_JP) + +#undef TRANSLITERATION_TEXTTONUM + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/transliterationImpl.cxx b/i18npool/source/transliteration/transliterationImpl.cxx new file mode 100644 index 000000000..04819e2f5 --- /dev/null +++ b/i18npool/source/transliteration/transliterationImpl.cxx @@ -0,0 +1,669 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include <transliterationImpl.hxx> +#include <servicename.hxx> + +#include <com/sun/star/i18n/LocaleData2.hpp> +#include <com/sun/star/i18n/TransliterationType.hpp> +#include <com/sun/star/i18n/TransliterationModulesExtra.hpp> + +#include <comphelper/sequence.hxx> +#include <cppuhelper/supportsservice.hxx> +#include <rtl/instance.hxx> +#include <rtl/ustring.hxx> + +#include <algorithm> +#include <numeric> + +using namespace com::sun::star::uno; +using namespace com::sun::star::i18n; +using namespace com::sun::star::lang; + + +namespace i18npool { + +#define ERROR RuntimeException() + +#define TmItem1( name ) \ + {TransliterationModules_##name, TransliterationModulesNew_##name, #name} + +#define TmItem2( name ) \ + {TransliterationModules(0), TransliterationModulesNew_##name, #name} + +namespace { + +// Ignore Module list +struct TMList { + TransliterationModules tm; + TransliterationModulesNew tmn; + const char *implName; +}; + +} + +static TMList const TMlist[] = { // Modules ModulesNew + TmItem1 (IGNORE_CASE), // 0. (1<<8 256) (7) + TmItem1 (IGNORE_WIDTH), // 1. (1<<9 512) (8) + TmItem1 (IGNORE_KANA), // 2. (1<<10 1024) (9) +// No enum define for this trans. application has to use impl name to load it +// TmItem1 (IGNORE_CASE_SIMPLE), // (1<<11 1024) (66) + + TmItem1 (ignoreTraditionalKanji_ja_JP), // 3. (1<<12 4096) (10) + TmItem1 (ignoreTraditionalKana_ja_JP), // 4. (1<<13 8192) (11) + TmItem1 (ignoreMinusSign_ja_JP), // 5. (1<<13 16384) (12) + TmItem1 (ignoreIterationMark_ja_JP), // 6. (1<<14 32768) (13) + TmItem1 (ignoreSeparator_ja_JP), // 7. (1<<15 65536) (14) + TmItem1 (ignoreSize_ja_JP), // 15. (1<<23 16777216) (22) + TmItem1 (ignoreMiddleDot_ja_JP), // 17. (1<<25 67108864) (24) + TmItem1 (ignoreSpace_ja_JP), // 18. (1<<26 134217728) (25) + TmItem1 (ignoreZiZu_ja_JP), // 8. (1<<16 131072) (15) + TmItem1 (ignoreBaFa_ja_JP), // 9. (1<<17 262144) (16) + TmItem1 (ignoreTiJi_ja_JP), // 10. (1<<18 524288) (17) + TmItem1 (ignoreHyuByu_ja_JP), // 11. (1<<19 1048576) (18) + TmItem1 (ignoreSeZe_ja_JP), // 12. (1<<20 2097152) (19) + TmItem1 (ignoreIandEfollowedByYa_ja_JP), // 13. (1<<21 4194304) (20) + TmItem1 (ignoreKiKuFollowedBySa_ja_JP), // 14. (1<<22 8388608) (21) + TmItem1 (ignoreProlongedSoundMark_ja_JP), // 16. (1<<24 33554432) (23) + + TmItem1 (UPPERCASE_LOWERCASE), // 19. (1) (1) + TmItem1 (LOWERCASE_UPPERCASE), // 20. (2) (2) + TmItem1 (HALFWIDTH_FULLWIDTH), // 21. (3) (3) + TmItem1 (FULLWIDTH_HALFWIDTH), // 22. (4) (4) + TmItem1 (KATAKANA_HIRAGANA), // 23. (5) (5) + TmItem1 (HIRAGANA_KATAKANA), // 24. (6) (6) + + TmItem1 (smallToLarge_ja_JP), // 25. (1<<27 268435456) (26) + TmItem1 (largeToSmall_ja_JP), // 26. (1<<28 536870912) (27) + TmItem2 (NumToTextLower_zh_CN), // 27. () (28) + TmItem2 (NumToTextUpper_zh_CN), // 28. () (29) + TmItem2 (NumToTextLower_zh_TW), // 29. () (30) + TmItem2 (NumToTextUpper_zh_TW), // 30. () (31) + TmItem2 (NumToTextFormalHangul_ko), // 31. () (32) + TmItem2 (NumToTextFormalLower_ko), // 32. () (33) + TmItem2 (NumToTextFormalUpper_ko), // 33. () (34) + TmItem2 (NumToTextInformalHangul_ko), // 34. () (35) + TmItem2 (NumToTextInformalLower_ko), // 35. () (36) + TmItem2 (NumToTextInformalUpper_ko), // 36. () (37) + TmItem2 (NumToCharLower_zh_CN), // 37. () (38) + TmItem2 (NumToCharUpper_zh_CN), // 38. () (39) + TmItem2 (NumToCharLower_zh_TW), // 39. () (40) + TmItem2 (NumToCharUpper_zh_TW), // 40. () (41) + TmItem2 (NumToCharHangul_ko), // 41. () (42) + TmItem2 (NumToCharLower_ko), // 42. () (43) + TmItem2 (NumToCharUpper_ko), // 43. () (44) + TmItem2 (NumToCharFullwidth), // 44. () (45) + TmItem2 (NumToCharKanjiShort_ja_JP), // 45. () (46) + TmItem2 (TextToNumLower_zh_CN), // 46. () (47) + TmItem2 (TextToNumUpper_zh_CN), // 47. () (48) + TmItem2 (TextToNumLower_zh_TW), // 48. () (49) + TmItem2 (TextToNumUpper_zh_TW), // 49. () (50) + TmItem2 (TextToNumFormalHangul_ko), // 50. () (51) + TmItem2 (TextToNumFormalLower_ko), // 51. () (52) + TmItem2 (TextToNumFormalUpper_ko), // 52. () (53) + TmItem2 (TextToNumInformalHangul_ko), // 53. () (54) + TmItem2 (TextToNumInformalLower_ko), // 54. () (55) + TmItem2 (TextToNumInformalUpper_ko), // 55. () (56) + + TmItem2 (CharToNumLower_zh_CN), // 56. () (59) + TmItem2 (CharToNumUpper_zh_CN), // 57. () (60) + TmItem2 (CharToNumLower_zh_TW), // 58. () (61) + TmItem2 (CharToNumUpper_zh_TW), // 59. () (62) + TmItem2 (CharToNumHangul_ko), // 60. () (63) + TmItem2 (CharToNumLower_ko), // 61. () (64) + TmItem2 (CharToNumUpper_ko), // 62. () (65) + +// no enum defined for these trans. application has to use impl name to load them +// TmItem2 (NumToCharArabic_Indic), // () (67) +// TmItem2 (NumToCharEstern_Arabic_Indic),// () (68) +// TmItem2 (NumToCharIndic), // () (69) +// TmItem2 (NumToCharThai), // () (70) + {TransliterationModules(0), TransliterationModulesNew(0), nullptr} +}; + +// Constructor/Destructor +TransliterationImpl::TransliterationImpl(const Reference <XComponentContext>& xContext) : mxContext(xContext) +{ + numCascade = 0; + caseignoreOnly = true; + + mxLocaledata.set(LocaleData2::create(xContext)); +} + +TransliterationImpl::~TransliterationImpl() +{ + mxLocaledata.clear(); + clear(); +} + + +// Methods +OUString SAL_CALL +TransliterationImpl::getName() +{ + if (numCascade == 1 && bodyCascade[0].is()) + return bodyCascade[0]->getName(); + if (numCascade < 1) + return ( OUString("Not Loaded")); + throw ERROR; +} + +sal_Int16 SAL_CALL +TransliterationImpl::getType() +{ + if (numCascade > 1) + return (TransliterationType::CASCADE|TransliterationType::IGNORE); + if (numCascade > 0 && bodyCascade[0].is()) + return bodyCascade[0]->getType(); + throw ERROR; +} + +static TransliterationModules operator&(TransliterationModules lhs, TransliterationModules rhs) { + return TransliterationModules(sal_Int32(lhs) & sal_Int32(rhs)); +} +static TransliterationModules operator|(TransliterationModules lhs, TransliterationModules rhs) { + return TransliterationModules(sal_Int32(lhs) | sal_Int32(rhs)); +} + +void SAL_CALL +TransliterationImpl::loadModule( TransliterationModules modType, const Locale& rLocale ) +{ + clear(); + if (bool(modType & TransliterationModules_IGNORE_MASK) && + bool(modType & TransliterationModules_NON_IGNORE_MASK)) + { + throw ERROR; + } else if (bool(modType & TransliterationModules_IGNORE_MASK)) { +#define TransliterationModules_IGNORE_CASE_MASK (TransliterationModules_IGNORE_CASE | \ + TransliterationModules_IGNORE_WIDTH | \ + TransliterationModules_IGNORE_KANA) + TransliterationModules mask = ((modType & TransliterationModules_IGNORE_CASE_MASK) == modType) ? + TransliterationModules_IGNORE_CASE_MASK : TransliterationModules_IGNORE_MASK; + for (sal_Int16 i = 0; bool(TMlist[i].tm & mask); i++) { + if (bool(modType & TMlist[i].tm)) + if (loadModuleByName(OUString::createFromAscii(TMlist[i].implName), + bodyCascade[numCascade], rLocale)) + numCascade++; + } + // additional transliterations from TranslationModuleExtra (we cannot extend TransliterationModule) + if (bool(modType & TransliterationModules(TransliterationModulesExtra::IGNORE_DIACRITICS_CTL))) + { + if (loadModuleByName("ignoreDiacritics_CTL", bodyCascade[numCascade], rLocale)) + numCascade++; + } + if (bool(modType & TransliterationModules(TransliterationModulesExtra::IGNORE_KASHIDA_CTL))) + if (loadModuleByName("ignoreKashida_CTL", bodyCascade[numCascade], rLocale)) + numCascade++; + + } else if (bool(modType & TransliterationModules_NON_IGNORE_MASK)) { + for (sal_Int16 i = 0; bool(TMlist[i].tm); i++) { + if (TMlist[i].tm == modType) { + if (loadModuleByName(OUString::createFromAscii(TMlist[i].implName), bodyCascade[numCascade], rLocale)) + numCascade++; + break; + } + } + } +} + +void SAL_CALL +TransliterationImpl::loadModuleNew( const Sequence < TransliterationModulesNew > & modType, const Locale& rLocale ) +{ + clear(); + TransliterationModules mask = TransliterationModules_END_OF_MODULE; + sal_Int32 count = modType.getLength(); + if (count > maxCascade) + throw ERROR; // could not handle more than maxCascade + for (sal_Int32 i = 0; i < count; i++) { + for (sal_Int16 j = 0; bool(TMlist[j].tmn); j++) { + if (TMlist[j].tmn == modType[i]) { + if (mask == TransliterationModules_END_OF_MODULE) + mask = bool(TMlist[i].tm) && bool(TMlist[i].tm & TransliterationModules_IGNORE_MASK) ? + TransliterationModules_IGNORE_MASK : TransliterationModules_NON_IGNORE_MASK; + else if (mask == TransliterationModules_IGNORE_MASK && + (TMlist[i].tm&TransliterationModules_IGNORE_MASK) == TransliterationModules_END_OF_MODULE) + throw ERROR; // could not mess up ignore trans. with non_ignore trans. + if (loadModuleByName(OUString::createFromAscii(TMlist[j].implName), bodyCascade[numCascade], rLocale)) + numCascade++; + break; + } + } + } +} + +void SAL_CALL +TransliterationImpl::loadModuleByImplName(const OUString& implName, const Locale& rLocale) +{ + clear(); + if (loadModuleByName(implName, bodyCascade[numCascade], rLocale)) + numCascade++; +} + + +void SAL_CALL +TransliterationImpl::loadModulesByImplNames(const Sequence< OUString >& implNameList, const Locale& rLocale ) +{ + if (implNameList.getLength() > maxCascade || implNameList.getLength() <= 0) + throw ERROR; + + clear(); + for (const auto& rName : implNameList) + if (loadModuleByName(rName, bodyCascade[numCascade], rLocale)) + numCascade++; +} + + +Sequence<OUString> SAL_CALL +TransliterationImpl::getAvailableModules( const Locale& rLocale, sal_Int16 sType ) +{ + const Sequence<OUString> &translist = mxLocaledata->getTransliterations(rLocale); + std::vector<OUString> r; + r.reserve(translist.getLength()); + Reference<XExtendedTransliteration> body; + for (const auto& rTrans : translist) + { + if (loadModuleByName(rTrans, body, rLocale)) { + if (body->getType() & sType) + r.push_back(rTrans); + body.clear(); + } + } + return comphelper::containerToSequence(r); +} + + +OUString SAL_CALL +TransliterationImpl::transliterate( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset ) +{ + if (numCascade == 0) + return inStr; + + if (offset.getLength() != nCount) + offset.realloc(nCount); + if (numCascade == 1) + { + if ( startPos == 0 && nCount == inStr.getLength() ) + return bodyCascade[0]->transliterate( inStr, 0, nCount, offset); + else + { + OUString tmpStr = inStr.copy(startPos, nCount); + tmpStr = bodyCascade[0]->transliterate(tmpStr, 0, nCount, offset); + if ( startPos ) + { + for (sal_Int32 & j : offset) + j += startPos; + } + return tmpStr; + } + } + else + { + OUString tmpStr = inStr.copy(startPos, nCount); + + std::iota(offset.begin(), offset.end(), startPos); + + sal_Int16 from = 0, to = 1; + Sequence<sal_Int32> off[2]; + + off[to] = offset; + off[from].realloc(nCount); + for (sal_Int32 i = 0; i < numCascade; i++) { + tmpStr = bodyCascade[i]->transliterate(tmpStr, 0, nCount, off[from]); + + nCount = tmpStr.getLength(); + + assert(off[from].getLength() == nCount); + std::swap(from, to); + // tdf#89665: don't use operator[] to write - too slow! + // interestingly gcc 4.9 -Os won't even inline the const operator[] + sal_Int32 const*const pFrom(off[from].getConstArray()); + sal_Int32 *const pTo(off[to].getArray()); + for (sal_Int32 j = 0; j < nCount; j++) + { + assert(pTo[j] < off[from].getLength()); + pTo[j] = pFrom[pTo[j]]; + } + } + offset = off[to]; + return tmpStr; + } +} + + +OUString SAL_CALL +TransliterationImpl::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset ) +{ + if (numCascade == 0) + return inStr; + + if (offset.getLength() != nCount) + offset.realloc(nCount); + if (numCascade == 1) + { + if ( startPos == 0 && nCount == inStr.getLength() ) + return bodyCascade[0]->folding( inStr, 0, nCount, offset); + else + { + OUString tmpStr = inStr.copy(startPos, nCount); + tmpStr = bodyCascade[0]->folding(tmpStr, 0, nCount, offset); + if ( startPos ) + { + for (sal_Int32 & j : offset) + j += startPos; + } + return tmpStr; + } + } + else + { + OUString tmpStr = inStr.copy(startPos, nCount); + + std::iota(offset.begin(), offset.end(), startPos); + + sal_Int16 from = 0, to = 1; + Sequence<sal_Int32> off[2]; + + off[to] = offset; + for (sal_Int32 i = 0; i < numCascade; i++) { + tmpStr = bodyCascade[i]->folding(tmpStr, 0, nCount, off[from]); + + nCount = tmpStr.getLength(); + + std::swap(from, to); + for (sal_Int32 j = 0; j < nCount; j++) + off[to][j] = off[from][off[to][j]]; + } + offset = off[to]; + return tmpStr; + } +} + +OUString SAL_CALL +TransliterationImpl::transliterateString2String( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount ) +{ + if (numCascade == 0) + return inStr; + else if (numCascade == 1) + return bodyCascade[0]->transliterateString2String( inStr, startPos, nCount); + else { + OUString tmpStr = bodyCascade[0]->transliterateString2String(inStr, startPos, nCount); + + for (sal_Int32 i = 1; i < numCascade; i++) + tmpStr = bodyCascade[i]->transliterateString2String(tmpStr, 0, tmpStr.getLength()); + return tmpStr; + } +} + +OUString SAL_CALL +TransliterationImpl::transliterateChar2String( sal_Unicode inChar ) +{ + if (numCascade == 0) + return OUString(&inChar, 1); + else if (numCascade == 1) + return bodyCascade[0]->transliterateChar2String( inChar); + else { + OUString tmpStr = bodyCascade[0]->transliterateChar2String(inChar); + + for (sal_Int32 i = 1; i < numCascade; i++) + tmpStr = bodyCascade[i]->transliterateString2String(tmpStr, 0, tmpStr.getLength()); + return tmpStr; + } +} + +sal_Unicode SAL_CALL +TransliterationImpl::transliterateChar2Char( sal_Unicode inChar ) +{ + sal_Unicode tmpChar = inChar; + for (sal_Int32 i = 0; i < numCascade; i++) + tmpChar = bodyCascade[i]->transliterateChar2Char(tmpChar); + return tmpChar; +} + + +sal_Bool SAL_CALL +TransliterationImpl::equals( + const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1, + const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2) +{ + // since this is an API function make it user fail safe + if ( nCount1 < 0 ) { + pos1 += nCount1; + nCount1 = -nCount1; + } + if ( nCount2 < 0 ) { + pos2 += nCount2; + nCount2 = -nCount2; + } + if ( !nCount1 || !nCount2 || + pos1 >= str1.getLength() || pos2 >= str2.getLength() || + pos1 < 0 || pos2 < 0 ) { + nMatch1 = nMatch2 = 0; + // two empty strings return true, else false + return !nCount1 && !nCount2 && pos1 == str1.getLength() && pos2 == str2.getLength(); + } + if ( pos1 + nCount1 > str1.getLength() ) + nCount1 = str1.getLength() - pos1; + if ( pos2 + nCount2 > str2.getLength() ) + nCount2 = str2.getLength() - pos2; + + if (caseignoreOnly && caseignore.is()) + return caseignore->equals(str1, pos1, nCount1, nMatch1, str2, pos2, nCount2, nMatch2); + + Sequence<sal_Int32> offset1, offset2; + + OUString tmpStr1 = folding(str1, pos1, nCount1, offset1); + OUString tmpStr2 = folding(str2, pos2, nCount2, offset2); + // Length of offset1 and offset2 may still be 0 if there was no folding + // necessary! + + const sal_Unicode *p1 = tmpStr1.getStr(); + const sal_Unicode *p2 = tmpStr2.getStr(); + sal_Int32 i, nLen = ::std::min( tmpStr1.getLength(), tmpStr2.getLength()); + for (i = 0; i < nLen; ++i, ++p1, ++p2 ) { + if (*p1 != *p2) { + // return number of matched code points so far + nMatch1 = (i < offset1.getLength()) ? offset1.getConstArray()[i] : i; + nMatch2 = (i < offset2.getLength()) ? offset2.getConstArray()[i] : i; + return false; + } + } + // i==nLen + if ( tmpStr1.getLength() != tmpStr2.getLength() ) { + // return number of matched code points so far + nMatch1 = (i <= offset1.getLength()) ? offset1.getConstArray()[i-1] + 1 : i; + nMatch2 = (i <= offset2.getLength()) ? offset2.getConstArray()[i-1] + 1 : i; + return false; + } else { + nMatch1 = nCount1; + nMatch2 = nCount2; + return true; + } +} + +Sequence< OUString > +TransliterationImpl::getRange(const Sequence< OUString > &inStrs, + const sal_Int32 length, sal_Int16 _numCascade) +{ + if (_numCascade >= numCascade || ! bodyCascade[_numCascade].is()) + return inStrs; + + sal_Int32 j_tmp = 0; + constexpr sal_Int32 nMaxOutput = 2; + const sal_Int32 nMaxOutputLength = nMaxOutput*length; + std::vector<OUString> ostr; + ostr.reserve(nMaxOutputLength); + for (sal_Int32 j = 0; j < length; j+=2) { + const Sequence< OUString >& temp = bodyCascade[_numCascade]->transliterateRange(inStrs[j], inStrs[j+1]); + + for (const auto& rStr : temp) { + if ( j_tmp++ >= nMaxOutputLength ) throw ERROR; + ostr.push_back(rStr); + } + } + + return getRange(comphelper::containerToSequence(ostr), j_tmp, ++_numCascade); +} + + +Sequence< OUString > SAL_CALL +TransliterationImpl::transliterateRange( const OUString& str1, const OUString& str2 ) +{ + if (numCascade == 1) + return bodyCascade[0]->transliterateRange(str1, str2); + + Sequence< OUString > ostr{ str1, str2 }; + + return getRange(ostr, 2, 0); +} + + +sal_Int32 SAL_CALL +TransliterationImpl::compareSubstring( + const OUString& str1, sal_Int32 off1, sal_Int32 len1, + const OUString& str2, sal_Int32 off2, sal_Int32 len2) +{ + if (caseignoreOnly && caseignore.is()) + return caseignore->compareSubstring(str1, off1, len1, str2, off2, len2); + + Sequence <sal_Int32> offset; + + OUString in_str1 = transliterate(str1, off1, len1, offset); + OUString in_str2 = transliterate(str2, off2, len2, offset); + const sal_Unicode* unistr1 = in_str1.getStr(); + const sal_Unicode* unistr2 = in_str2.getStr(); + sal_Int32 strlen1 = in_str1.getLength(); + sal_Int32 strlen2 = in_str2.getLength(); + + while (strlen1 && strlen2) { + if (*unistr1 != *unistr2) + return *unistr1 > *unistr2 ? 1 : -1; + + unistr1++; unistr2++; strlen1--; strlen2--; + } + return strlen1 == strlen2 ? 0 : (strlen1 > strlen2 ? 1 : -1); +} + + +sal_Int32 SAL_CALL +TransliterationImpl::compareString(const OUString& str1, const OUString& str2 ) +{ + if (caseignoreOnly && caseignore.is()) + return caseignore->compareString(str1, str2); + else + return compareSubstring(str1, 0, str1.getLength(), str2, 0, str2.getLength()); +} + + +void +TransliterationImpl::clear() +{ + for (sal_Int32 i = 0; i < numCascade; i++) + if (bodyCascade[i].is()) + bodyCascade[i].clear(); + numCascade = 0; + caseignore.clear(); + caseignoreOnly = true; +} + +namespace +{ + /** structure to cache the last transliteration body used. */ + struct TransBody + { + OUString Name; + css::uno::Reference< css::i18n::XExtendedTransliteration > Body; + }; + class theTransBodyMutex : public rtl::Static<osl::Mutex, theTransBodyMutex> {}; +} + +void TransliterationImpl::loadBody( OUString const &implName, Reference<XExtendedTransliteration>& body ) +{ + assert(!implName.isEmpty()); + ::osl::MutexGuard guard(theTransBodyMutex::get()); + static TransBody lastTransBody; + if (implName != lastTransBody.Name) + { + lastTransBody.Body.set( + mxContext->getServiceManager()->createInstanceWithContext(implName, mxContext), UNO_QUERY_THROW); + lastTransBody.Name = implName; + } + body = lastTransBody.Body; +} + +bool +TransliterationImpl::loadModuleByName( const OUString& implName, + Reference<XExtendedTransliteration>& body, const Locale& rLocale) +{ + OUString cname = TRLT_IMPLNAME_PREFIX + implName; + loadBody(cname, body); + if (body.is()) { + body->loadModule(TransliterationModules(0), rLocale); // toUpper/toLoad need rLocale + + // if the module is ignore case/kana/width, load caseignore for equals/compareString mothed + for (sal_Int16 i = 0; i < 3; i++) { + if (implName.equalsAscii(TMlist[i].implName)) { + if (i == 0) // current module is caseignore + body->loadModule(TMlist[0].tm, rLocale); // caseignore need to setup module name + if (! caseignore.is()) { + OUString bname = TRLT_IMPLNAME_PREFIX + + OUString::createFromAscii(TMlist[0].implName); + loadBody(bname, caseignore); + } + if (caseignore.is()) + caseignore->loadModule(TMlist[i].tm, rLocale); + return true; + } + } + caseignoreOnly = false; // has other module than just ignore case/kana/width + } + return body.is(); +} + +OUString SAL_CALL +TransliterationImpl::getImplementationName() +{ + return "com.sun.star.i18n.Transliteration"; +} + +sal_Bool SAL_CALL +TransliterationImpl::supportsService(const OUString& rServiceName) +{ + return cppu::supportsService(this, rServiceName); +} + +Sequence< OUString > SAL_CALL +TransliterationImpl::getSupportedServiceNames() +{ + return { "com.sun.star.i18n.Transliteration" }; +} + +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * +com_sun_star_i18n_Transliteration_get_implementation( + css::uno::XComponentContext *context, + css::uno::Sequence<css::uno::Any> const &) +{ + return cppu::acquire(new i18npool::TransliterationImpl(context)); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/transliteration_Ignore.cxx b/i18npool/source/transliteration/transliteration_Ignore.cxx new file mode 100644 index 000000000..946e2979b --- /dev/null +++ b/i18npool/source/transliteration/transliteration_Ignore.cxx @@ -0,0 +1,208 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/i18n/TransliterationType.hpp> + +#include <transliteration_Ignore.hxx> +#include <i18nutil/oneToOneMapping.hxx> + +using namespace com::sun::star::i18n; +using namespace com::sun::star::uno; + +namespace i18npool { + +sal_Bool SAL_CALL +transliteration_Ignore::equals(const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1, + const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2 ) +{ + Sequence< sal_Int32 > offset1; + Sequence< sal_Int32 > offset2; + + // The method folding is defined in a sub class. + OUString s1 = folding( str1, pos1, nCount1, offset1); + OUString s2 = folding( str2, pos2, nCount2, offset2); + + const sal_Unicode * p1 = s1.getStr(); + const sal_Unicode * p2 = s2.getStr(); + sal_Int32 length = std::min(s1.getLength(), s2.getLength()); + sal_Int32 nmatch; + + for ( nmatch = 0; nmatch < length; nmatch++) + if (*p1++ != *p2++) + break; + + if (nmatch > 0) { + nMatch1 = offset1[ nmatch - 1 ] + 1; // Subtract 1 from nmatch because the index starts from zero. + nMatch2 = offset2[ nmatch - 1 ] + 1; // And then, add 1 to position because it means the number of character matched. + } + else { + nMatch1 = 0; // No character was matched. + nMatch2 = 0; + } + + return (nmatch == s1.getLength()) && (nmatch == s2.getLength()); +} + + +Sequence< OUString > SAL_CALL +transliteration_Ignore::transliterateRange( const OUString& str1, const OUString& str2 ) +{ + if (str1.isEmpty() || str2.isEmpty()) + throw RuntimeException(); + + Sequence< OUString > r(2); + r[0] = str1.copy(0, 1); + r[1] = str2.copy(0, 1); + return r; +} + + +sal_Int16 SAL_CALL +transliteration_Ignore::getType() +{ + // The type is also defined in com/sun/star/util/TransliterationType.hdl + return TransliterationType::IGNORE; +} + + +OUString +transliteration_Ignore::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset, bool useOffset) +{ + // The method folding is defined in a sub class. + return foldingImpl( inStr, startPos, nCount, offset, useOffset); +} + +Sequence< OUString > +transliteration_Ignore::transliterateRange( const OUString& str1, const OUString& str2, + XTransliteration& t1, XTransliteration& t2 ) +{ + if (str1.isEmpty() || str2.isEmpty()) + throw RuntimeException(); + + Sequence< sal_Int32 > offset; + OUString s11 = t1.transliterate( str1, 0, 1, offset ); + OUString s12 = t1.transliterate( str2, 0, 1, offset ); + OUString s21 = t2.transliterate( str1, 0, 1, offset ); + OUString s22 = t2.transliterate( str2, 0, 1, offset ); + + if ( (s11 == s21) && (s12 == s22) ) { + Sequence< OUString > r(2); + r[0] = s11; + r[1] = s12; + return r; + } + + Sequence< OUString > r(4); + r[0] = s11; + r[1] = s12; + r[2] = s21; + r[3] = s22; + return r; +} + +OUString +transliteration_Ignore::foldingImpl( const OUString& inStr, sal_Int32 startPos, + sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 1 now. + rtl_uString * newStr = rtl_uString_alloc(nCount); + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Allocate nCount length to offset argument. + sal_Int32 *p = nullptr; + sal_Int32 position = 0; + if (useOffset) { + offset.realloc( nCount ); + p = offset.getArray(); + position = startPos; + } + + if (map) { + sal_Unicode previousChar = *src ++; + sal_Unicode currentChar; + + // Translation + while (-- nCount > 0) { + currentChar = *src ++; + + const Mapping *m; + for (m = map; m->replaceChar; m++) { + if (previousChar == m->previousChar && currentChar == m->currentChar ) { + if (useOffset) { + if (! m->two2one) + *p++ = position; + position++; + *p++ = position++; + } + *dst++ = m->replaceChar; + if (!m->two2one) + *dst++ = currentChar; + previousChar = *src++; + nCount--; + break; + } + } + + if (! m->replaceChar) { + if (useOffset) + *p ++ = position ++; + *dst ++ = previousChar; + previousChar = currentChar; + } + } + + if (nCount == 0) { + if (useOffset) + *p = position; + *dst ++ = previousChar; + } + } else { + // Translation + while (nCount -- > 0) { + sal_Unicode c = *src++; + c = func ? func( c) : (*table)[ c ]; + if (c != 0xffff) + *dst ++ = c; + if (useOffset) { + if (c != 0xffff) + *p ++ = position; + position++; + } + } + } + newStr->length = sal_Int32(dst - newStr->buffer); + if (useOffset) + offset.realloc(newStr->length); + *dst = u'\0'; + + return OUString(newStr, SAL_NO_ACQUIRE); // take ownership +} + +sal_Unicode SAL_CALL +transliteration_Ignore::transliterateChar2Char( sal_Unicode inChar) +{ + return func ? func( inChar) : table ? (*table)[ inChar ] : inChar; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/transliteration_Numeric.cxx b/i18npool/source/transliteration/transliteration_Numeric.cxx new file mode 100644 index 000000000..40853aafa --- /dev/null +++ b/i18npool/source/transliteration/transliteration_Numeric.cxx @@ -0,0 +1,142 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include <com/sun/star/i18n/TransliterationType.hpp> + +#include <transliteration_Numeric.hxx> +#include <nativenumbersupplier.hxx> +#include <rtl/ref.hxx> + +using namespace com::sun::star::i18n; +using namespace com::sun::star::uno; + + +namespace i18npool { + +sal_Int16 SAL_CALL transliteration_Numeric::getType() +{ + return TransliterationType::NUMERIC; +} + +OUString + transliteration_Numeric::foldingImpl( const OUString& /*inStr*/, sal_Int32 /*startPos*/, sal_Int32 /*nCount*/, Sequence< sal_Int32 >& /*offset*/, bool ) +{ + throw RuntimeException(); +} + +sal_Bool SAL_CALL + transliteration_Numeric::equals( const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/, const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/ ) +{ + throw RuntimeException(); +} + +Sequence< OUString > SAL_CALL + transliteration_Numeric::transliterateRange( const OUString& /*str1*/, const OUString& /*str2*/ ) +{ + throw RuntimeException(); +} + + +#define isNumber(c) ((c) >= 0x30 && (c) <= 0x39) +#define NUMBER_ZERO 0x30 + +OUString +transliteration_Numeric::transliterateBullet( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset, bool useOffset ) +{ + sal_Int32 number = -1, j = 0, endPos = startPos + nCount; + + if (endPos > inStr.getLength()) + endPos = inStr.getLength(); + + rtl_uString* pStr = rtl_uString_alloc(nCount); + sal_Unicode* out = pStr->buffer; + + if (useOffset) + offset.realloc(nCount); + + for (sal_Int32 i = startPos; i < endPos; i++) { + if (isNumber(inStr[i])) + { + if (number == -1) { + startPos = i; + number = (inStr[i] - NUMBER_ZERO); + } else { + number = number * 10 + (inStr[i] - NUMBER_ZERO); + } + } else { + if (number == 0) { + if (useOffset) + offset[j] = startPos; + out[j++] = NUMBER_ZERO; + } else if (number > tableSize && !recycleSymbol) { + for (sal_Int32 k = startPos; k < i; k++) { + if (useOffset) + offset[j] = k; + out[j++] = inStr[k]; + } + } else if (number > 0) { + if (useOffset) + offset[j] = startPos; + out[j++] = table[--number % tableSize]; + } else if (i < endPos) { + if (useOffset) + offset[j] = i; + out[j++] = inStr[i]; + } + number = -1; + } + } + out[j] = 0; + + if (useOffset) + offset.realloc(j); + + return OUString( pStr, SAL_NO_ACQUIRE ); +} + +OUString +transliteration_Numeric::transliterateImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset, bool useOffset ) +{ + if (tableSize) + return transliterateBullet( inStr, startPos, nCount, offset, useOffset); + else + return rtl::Reference<NativeNumberSupplierService>(new NativeNumberSupplierService(useOffset))->getNativeNumberString( inStr.copy(startPos, nCount), aLocale, nNativeNumberMode, offset ); +} + +sal_Unicode SAL_CALL +transliteration_Numeric::transliterateChar2Char( sal_Unicode inChar ) +{ + if (tableSize) { + if (isNumber(inChar)) { + sal_Int16 number = inChar - NUMBER_ZERO; + if (number <= tableSize || recycleSymbol) + return table[--number % tableSize]; + } + return inChar; + } + else + return rtl::Reference<NativeNumberSupplierService>(new NativeNumberSupplierService)->getNativeNumberChar( inChar, aLocale, nNativeNumberMode ); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/transliteration_OneToOne.cxx b/i18npool/source/transliteration/transliteration_OneToOne.cxx new file mode 100644 index 000000000..f865a4640 --- /dev/null +++ b/i18npool/source/transliteration/transliteration_OneToOne.cxx @@ -0,0 +1,92 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/i18n/TransliterationType.hpp> + +#include <transliteration_OneToOne.hxx> +#include <i18nutil/oneToOneMapping.hxx> + +#include <numeric> + +using namespace com::sun::star::i18n; +using namespace com::sun::star::uno; + +namespace i18npool { + +sal_Int16 SAL_CALL transliteration_OneToOne::getType() +{ + // This type is also defined in com/sun/star/util/TransliterationType.hdl + return TransliterationType::ONE_TO_ONE; +} + +OUString +transliteration_OneToOne::foldingImpl( const OUString& /*inStr*/, sal_Int32 /*startPos*/, + sal_Int32 /*nCount*/, Sequence< sal_Int32 >& /*offset*/, bool) +{ + throw RuntimeException(); +} + +sal_Bool SAL_CALL +transliteration_OneToOne::equals( const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, + sal_Int32& /*nMatch1*/, const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/ ) +{ + throw RuntimeException(); +} + +Sequence< OUString > SAL_CALL +transliteration_OneToOne::transliterateRange( const OUString& /*str1*/, const OUString& /*str2*/ ) +{ + throw RuntimeException(); +} + +OUString +transliteration_OneToOne::transliterateImpl( const OUString& inStr, sal_Int32 startPos, + sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset) +{ + // Create a string buffer which can hold nCount + 1 characters. + // The reference count is 1 now. + rtl_uString * newStr = rtl_uString_alloc(nCount); + sal_Unicode * dst = newStr->buffer; + const sal_Unicode * src = inStr.getStr() + startPos; + + // Allocate nCount length to offset argument. + if (useOffset) { + offset.realloc( nCount ); + std::iota(offset.begin(), offset.end(), startPos); + } + + // Translation + while (nCount -- > 0) { + sal_Unicode c = *src++; + *dst ++ = func ? func( c) : (*table)[ c ]; + } + *dst = u'\0'; + + return OUString(newStr, SAL_NO_ACQUIRE); // take ownership +} + +sal_Unicode SAL_CALL +transliteration_OneToOne::transliterateChar2Char( sal_Unicode inChar) +{ + return func ? func( inChar) : (*table)[ inChar ]; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/transliteration_body.cxx b/i18npool/source/transliteration/transliteration_body.cxx new file mode 100644 index 000000000..a7eae7243 --- /dev/null +++ b/i18npool/source/transliteration/transliteration_body.cxx @@ -0,0 +1,296 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <rtl/ref.hxx> +#include <i18nutil/casefolding.hxx> +#include <i18nutil/unicode.hxx> +#include <com/sun/star/i18n/MultipleCharsOutputException.hpp> +#include <com/sun/star/i18n/TransliterationType.hpp> +#include <comphelper/processfactory.hxx> +#include <comphelper/sequence.hxx> + +#include <characterclassificationImpl.hxx> + +#include <transliteration_body.hxx> +#include <memory> +#include <numeric> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::i18n; +using namespace ::com::sun::star::lang; + +namespace i18npool { + +Transliteration_body::Transliteration_body() +{ + nMappingType = MappingType::NONE; + transliterationName = "Transliteration_body"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_body"; +} + +sal_Int16 SAL_CALL Transliteration_body::getType() +{ + return TransliterationType::ONE_TO_ONE; +} + +sal_Bool SAL_CALL Transliteration_body::equals( + const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/, + const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/) +{ + throw RuntimeException(); +} + +Sequence< OUString > SAL_CALL +Transliteration_body::transliterateRange( const OUString& str1, const OUString& str2 ) +{ + Sequence< OUString > ostr(2); + ostr[0] = str1; + ostr[1] = str2; + return ostr; +} + +static MappingType lcl_getMappingTypeForToggleCase( MappingType nMappingType, sal_Unicode cChar ) +{ + MappingType nRes = nMappingType; + + // take care of TOGGLE_CASE transliteration: + // nMappingType should not be a combination of flags, thuse we decide now + // which one to use. + if (nMappingType == (MappingType::LowerToUpper | MappingType::UpperToLower)) + { + const sal_Int16 nType = unicode::getUnicodeType( cChar ); + if (nType & 0x02 /* lower case*/) + nRes = MappingType::LowerToUpper; + else + { + // should also work properly for non-upper characters like white spaces, numbers, ... + nRes = MappingType::UpperToLower; + } + } + + return nRes; +} + +OUString +Transliteration_body::transliterateImpl( + const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset, bool useOffset) +{ + const sal_Unicode *in = inStr.getStr() + startPos; + + // We could assume that most calls result in identical string lengths, + // thus using a preallocated OUStringBuffer could be an easy way + // to assemble the return string without too much hassle. However, + // for single characters the OUStringBuffer::append() method is quite + // expensive compared to a simple array operation, so it pays here + // to copy the final result instead. + + // Allocate the max possible buffer. Try to use stack instead of heap, + // which would have to be reallocated most times anyways. + constexpr sal_Int32 nLocalBuf = 2048; + sal_Unicode aLocalBuf[ nLocalBuf * NMAPPINGMAX ], *out = aLocalBuf; + std::unique_ptr<sal_Unicode[]> pHeapBuf; + if (nCount > nLocalBuf) + { + pHeapBuf.reset(new sal_Unicode[ nCount * NMAPPINGMAX ]); + out = pHeapBuf.get(); + } + + sal_Int32 j = 0; + // Two different blocks to eliminate the if(useOffset) condition inside the loop. + // Yes, on massive use even such small things do count. + if ( useOffset ) + { + std::vector<sal_Int32> aVec; + aVec.reserve(std::max<sal_Int32>(nLocalBuf, nCount) * NMAPPINGMAX); + + for (sal_Int32 i = 0; i < nCount; i++) + { + // take care of TOGGLE_CASE transliteration: + MappingType nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] ); + + const i18nutil::Mapping &map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType ); + std::fill_n(std::back_inserter(aVec), map.nmap, i + startPos); + std::copy_n(map.map, map.nmap, out + j); + j += map.nmap; + } + + offset = comphelper::containerToSequence(aVec); + } + else + { + for ( sal_Int32 i = 0; i < nCount; i++) + { + // take care of TOGGLE_CASE transliteration: + MappingType nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] ); + + const i18nutil::Mapping &map = i18nutil::casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType ); + std::copy_n(map.map, map.nmap, out + j); + j += map.nmap; + } + } + + return OUString(out, j); +} + +OUString SAL_CALL +Transliteration_body::transliterateChar2String( sal_Unicode inChar ) +{ + const i18nutil::Mapping &map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType); + rtl_uString* pStr = rtl_uString_alloc(map.nmap); + sal_Unicode* out = pStr->buffer; + sal_Int32 i; + + for (i = 0; i < map.nmap; i++) + out[i] = map.map[i]; + out[i] = 0; + + return OUString( pStr, SAL_NO_ACQUIRE ); +} + +sal_Unicode SAL_CALL +Transliteration_body::transliterateChar2Char( sal_Unicode inChar ) +{ + const i18nutil::Mapping &map = i18nutil::casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType); + if (map.nmap > 1) + throw MultipleCharsOutputException(); + return map.map[0]; +} + +OUString +Transliteration_body::foldingImpl( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset, bool useOffset) +{ + return transliterateImpl(inStr, startPos, nCount, offset, useOffset); +} + +Transliteration_casemapping::Transliteration_casemapping() +{ + nMappingType = MappingType::NONE; + transliterationName = "casemapping(generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_casemapping"; +} + +void +Transliteration_casemapping::setMappingType( const MappingType rMappingType, const Locale& rLocale ) +{ + nMappingType = rMappingType; + aLocale = rLocale; +} + +Transliteration_u2l::Transliteration_u2l() +{ + nMappingType = MappingType::UpperToLower; + transliterationName = "upper_to_lower(generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_u2l"; +} + +Transliteration_l2u::Transliteration_l2u() +{ + nMappingType = MappingType::LowerToUpper; + transliterationName = "lower_to_upper(generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_l2u"; +} + +Transliteration_togglecase::Transliteration_togglecase() +{ + // usually nMappingType must NOT be a combination of different flags here, + // but we take care of that problem in Transliteration_body::transliterate above + // before that value is used. There we will decide which of both is to be used on + // a per character basis. + nMappingType = MappingType::LowerToUpper | MappingType::UpperToLower; + transliterationName = "toggle(generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_togglecase"; +} + +Transliteration_titlecase::Transliteration_titlecase() +{ + nMappingType = MappingType::ToTitle; + transliterationName = "title(generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_titlecase"; +} + +/// @throws RuntimeException +static OUString transliterate_titlecase_Impl( + const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + const Locale &rLocale, + Sequence< sal_Int32 >& offset ) +{ + const OUString aText( inStr.copy( startPos, nCount ) ); + + OUString aRes; + if (!aText.isEmpty()) + { + Reference< XComponentContext > xContext = ::comphelper::getProcessComponentContext(); + rtl::Reference< CharacterClassificationImpl > xCharClassImpl( new CharacterClassificationImpl( xContext ) ); + + // because xCharClassImpl.toTitle does not handle ligatures or Beta but will raise + // an exception we need to handle the first chara manually... + + // we don't want to change surrogates by accident, thuse we use proper code point iteration + sal_Int32 nPos = 0; + sal_uInt32 cFirstChar = aText.iterateCodePoints( &nPos ); + OUString aResolvedLigature( &cFirstChar, 1 ); + // toUpper can be used to properly resolve ligatures and characters like Beta + aResolvedLigature = xCharClassImpl->toUpper( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale ); + // since toTitle will leave all-uppercase text unchanged we first need to + // use toLower to bring possible 2nd and following chars in lowercase + aResolvedLigature = xCharClassImpl->toLower( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale ); + sal_Int32 nResolvedLen = aResolvedLigature.getLength(); + + // now we can properly use toTitle to get the expected result for the resolved string. + // The rest of the text should just become lowercase. + aRes = xCharClassImpl->toTitle( aResolvedLigature, 0, nResolvedLen, rLocale ) + + xCharClassImpl->toLower( aText, 1, aText.getLength() - 1, rLocale ); + offset.realloc( aRes.getLength() ); + + sal_Int32* pOffset = std::fill_n(offset.begin(), nResolvedLen, 0); + std::iota(pOffset, offset.end(), 1); + } + return aRes; +} + +// this function expects to be called on a word-by-word basis, +// namely that startPos points to the first char of the word +OUString Transliteration_titlecase::transliterateImpl( + const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset, bool ) +{ + return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, offset ); +} + +Transliteration_sentencecase::Transliteration_sentencecase() +{ + nMappingType = MappingType::ToTitle; // though only to be applied to the first word... + transliterationName = "sentence(generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_sentencecase"; +} + +// this function expects to be called on a sentence-by-sentence basis, +// namely that startPos points to the first word (NOT first char!) in the sentence +OUString Transliteration_sentencecase::transliterateImpl( + const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, + Sequence< sal_Int32 >& offset, bool ) +{ + return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, offset ); +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/transliteration_caseignore.cxx b/i18npool/source/transliteration/transliteration_caseignore.cxx new file mode 100644 index 000000000..61db2286e --- /dev/null +++ b/i18npool/source/transliteration/transliteration_caseignore.cxx @@ -0,0 +1,154 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <com/sun/star/i18n/TransliterationType.hpp> +#include <rtl/ref.hxx> + +#include <i18nutil/casefolding.hxx> +#include <i18nutil/transliteration.hxx> + +#include <transliteration_caseignore.hxx> + +namespace com::sun::star::uno { class XComponentContext; } + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::i18n; +using namespace ::com::sun::star::lang; + +namespace i18npool { + +Transliteration_caseignore::Transliteration_caseignore() +{ + nMappingType = MappingType::FullFolding; + moduleLoaded = TransliterationFlags::NONE; + transliterationName = "case ignore (generic)"; + implementationName = "com.sun.star.i18n.Transliteration.Transliteration_caseignore"; +} + +void SAL_CALL +Transliteration_caseignore::loadModule( TransliterationModules modName, const Locale& rLocale ) +{ + moduleLoaded |= static_cast<TransliterationFlags>(modName); + aLocale = rLocale; +} + +sal_Int16 SAL_CALL Transliteration_caseignore::getType() +{ + // It's NOT TransliterationType::ONE_TO_ONE because it's using casefolding + return TransliterationType::IGNORE; +} + + +Sequence< OUString > SAL_CALL +Transliteration_caseignore::transliterateRange( const OUString& str1, const OUString& str2 ) +{ + if (str1.getLength() != 1 || str2.getLength() != 1) + throw RuntimeException(); + + static rtl::Reference< Transliteration_u2l > u2l(new Transliteration_u2l); + static rtl::Reference< Transliteration_l2u > l2u(new Transliteration_l2u); + + u2l->loadModule(TransliterationModules(0), aLocale); + l2u->loadModule(TransliterationModules(0), aLocale); + + OUString l1 = u2l->transliterateString2String(str1, 0, str1.getLength()); + OUString u1 = l2u->transliterateString2String(str1, 0, str1.getLength()); + OUString l2 = u2l->transliterateString2String(str2, 0, str2.getLength()); + OUString u2 = l2u->transliterateString2String(str2, 0, str2.getLength()); + + if ((l1 == u1) && (l2 == u2)) { + Sequence< OUString > r(2); + r[0] = l1; + r[1] = l2; + return r; + } else { + Sequence< OUString > r(4); + r[0] = l1; + r[1] = l2; + r[2] = u1; + r[3] = u2; + return r; + } +} + +sal_Bool SAL_CALL +Transliteration_caseignore::equals( + const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1, + const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2) +{ + return (compare(str1, pos1, nCount1, nMatch1, str2, pos2, nCount2, nMatch2) == 0); +} + +sal_Int32 SAL_CALL +Transliteration_caseignore::compareSubstring( + const OUString& str1, sal_Int32 off1, sal_Int32 len1, + const OUString& str2, sal_Int32 off2, sal_Int32 len2) +{ + sal_Int32 nMatch1, nMatch2; + return compare(str1, off1, len1, nMatch1, str2, off2, len2, nMatch2); +} + + +sal_Int32 SAL_CALL +Transliteration_caseignore::compareString( + const OUString& str1, + const OUString& str2) +{ + sal_Int32 nMatch1, nMatch2; + return compare(str1, 0, str1.getLength(), nMatch1, str2, 0, str2.getLength(), nMatch2); +} + +sal_Int32 +Transliteration_caseignore::compare( + const OUString& str1, sal_Int32 pos1, sal_Int32 nCount1, sal_Int32& nMatch1, + const OUString& str2, sal_Int32 pos2, sal_Int32 nCount2, sal_Int32& nMatch2) +{ + const sal_Unicode *unistr1 = const_cast<sal_Unicode*>(str1.getStr()) + pos1; + const sal_Unicode *unistr2 = const_cast<sal_Unicode*>(str2.getStr()) + pos2; + sal_Unicode c1, c2; + i18nutil::MappingElement e1, e2; + nMatch1 = nMatch2 = 0; + +#define NOT_END_OF_STR1 (nMatch1 < nCount1 || e1.current < e1.element.nmap) +#define NOT_END_OF_STR2 (nMatch2 < nCount2 || e2.current < e2.element.nmap) + + while (NOT_END_OF_STR1 && NOT_END_OF_STR2) { + c1 = i18nutil::casefolding::getNextChar(unistr1, nMatch1, nCount1, e1, aLocale, nMappingType, moduleLoaded); + c2 = i18nutil::casefolding::getNextChar(unistr2, nMatch2, nCount2, e2, aLocale, nMappingType, moduleLoaded); + if (c1 != c2) { + nMatch1--; nMatch2--; + return c1 > c2 ? 1 : -1; + } + } + + return (!NOT_END_OF_STR1 && !NOT_END_OF_STR2) ? 0 + : (NOT_END_OF_STR1 ? 1 : -1); +} + +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * +com_sun_star_i18n_Transliteration_IGNORE_CASE_get_implementation( + css::uno::XComponentContext *, + css::uno::Sequence<css::uno::Any> const &) +{ + return cppu::acquire(new i18npool::Transliteration_caseignore()); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/transliteration/transliteration_commonclass.cxx b/i18npool/source/transliteration/transliteration_commonclass.cxx new file mode 100644 index 000000000..3c95b6845 --- /dev/null +++ b/i18npool/source/transliteration/transliteration_commonclass.cxx @@ -0,0 +1,136 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <transliteration_commonclass.hxx> +#include <cppuhelper/supportsservice.hxx> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::i18n; +using namespace ::com::sun::star::lang; + +namespace i18npool { + +transliteration_commonclass::transliteration_commonclass() +{ + transliterationName = ""; + implementationName = ""; +} + +OUString SAL_CALL transliteration_commonclass::getName() +{ + return OUString::createFromAscii(transliterationName); +} + +void SAL_CALL transliteration_commonclass::loadModule( TransliterationModules /*modName*/, const Locale& rLocale ) +{ + aLocale = rLocale; +} + + +void SAL_CALL +transliteration_commonclass::loadModuleNew( const Sequence < TransliterationModulesNew >& /*modName*/, const Locale& /*rLocale*/ ) +{ + throw RuntimeException(); +} + + +void SAL_CALL +transliteration_commonclass::loadModuleByImplName( const OUString& /*implName*/, const Locale& /*rLocale*/ ) +{ + throw RuntimeException(); +} + +void SAL_CALL +transliteration_commonclass::loadModulesByImplNames(const Sequence< OUString >& /*modNamelist*/, const Locale& /*rLocale*/) +{ + throw RuntimeException(); +} + +Sequence< OUString > SAL_CALL +transliteration_commonclass::getAvailableModules( const Locale& /*rLocale*/, sal_Int16 /*sType*/ ) +{ + throw RuntimeException(); +} + +sal_Int32 SAL_CALL +transliteration_commonclass::compareSubstring( + const OUString& str1, sal_Int32 off1, sal_Int32 len1, + const OUString& str2, sal_Int32 off2, sal_Int32 len2) +{ + Sequence <sal_Int32> offset1(2*len1); + Sequence <sal_Int32> offset2(2*len2); + + OUString in_str1 = transliterate(str1, off1, len1, offset1); + OUString in_str2 = transliterate(str2, off2, len2, offset2); + sal_Int32 strlen1 = in_str1.getLength(); + sal_Int32 strlen2 = in_str2.getLength(); + const sal_Unicode* unistr1 = in_str1.getStr(); + const sal_Unicode* unistr2 = in_str2.getStr(); + + while (strlen1 && strlen2) + { + sal_Int32 ret = *unistr1 - *unistr2; + if (ret) + return ret; + + unistr1++; + unistr2++; + strlen1--; + strlen2--; + } + return strlen1 - strlen2; +} + +sal_Int32 SAL_CALL +transliteration_commonclass::compareString( const OUString& str1, const OUString& str2 ) +{ + return compareSubstring(str1, 0, str1.getLength(), str2, 0, str2.getLength()); +} + +OUString SAL_CALL +transliteration_commonclass::transliterateString2String( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount ) +{ + Sequence < sal_Int32 > dummy_offset; + return transliterateImpl(inStr, startPos, nCount, dummy_offset, false); +} + +OUString SAL_CALL +transliteration_commonclass::transliterateChar2String( sal_Unicode inChar ) +{ + return transliteration_commonclass::transliterateString2String(OUString(&inChar, 1), 0, 1); +} + +OUString SAL_CALL transliteration_commonclass::getImplementationName() +{ + return OUString::createFromAscii(implementationName); +} + +sal_Bool SAL_CALL transliteration_commonclass::supportsService(const OUString& rServiceName) +{ + return cppu::supportsService(this, rServiceName); +} + +Sequence< OUString > SAL_CALL transliteration_commonclass::getSupportedServiceNames() +{ + return { "com.sun.star.i18n.Transliteration.l10n" }; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |