diff options
Diffstat (limited to 'i18npool/source/inputchecker')
3 files changed, 441 insertions, 0 deletions
diff --git a/i18npool/source/inputchecker/inputsequencechecker.cxx b/i18npool/source/inputchecker/inputsequencechecker.cxx new file mode 100644 index 000000000..ff1ea652f --- /dev/null +++ b/i18npool/source/inputchecker/inputsequencechecker.cxx @@ -0,0 +1,161 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <inputsequencechecker.hxx> +#include <com/sun/star/i18n/InputSequenceCheckMode.hpp> +#include <com/sun/star/uno/XComponentContext.hpp> +#include <cppuhelper/supportsservice.hxx> +#include <i18nutil/unicode.hxx> + +using namespace ::com::sun::star::uno; +using namespace ::com::sun::star::i18n; +using namespace ::com::sun::star::lang; + +namespace i18npool { + +InputSequenceCheckerImpl::InputSequenceCheckerImpl( const Reference < XComponentContext >& rxContext ) : m_xContext( rxContext ) +{ + serviceName = "com.sun.star.i18n.InputSequenceChecker"; +} + +InputSequenceCheckerImpl::InputSequenceCheckerImpl(const char *pServiceName) + : serviceName(pServiceName) +{ +} + +InputSequenceCheckerImpl::~InputSequenceCheckerImpl() +{ +} + +sal_Bool SAL_CALL +InputSequenceCheckerImpl::checkInputSequence(const OUString& Text, sal_Int32 nStartPos, + sal_Unicode inputChar, sal_Int16 inputCheckMode) +{ + if (inputCheckMode == InputSequenceCheckMode::PASSTHROUGH) + return true; + + char* language = getLanguageByScripType(Text[nStartPos], inputChar); + + if (language) + return getInputSequenceChecker(language)->checkInputSequence(Text, nStartPos, inputChar, inputCheckMode); + else + return true; // not a checkable languages. +} + +sal_Int32 SAL_CALL +InputSequenceCheckerImpl::correctInputSequence(OUString& Text, sal_Int32 nStartPos, + sal_Unicode inputChar, sal_Int16 inputCheckMode) +{ + if (inputCheckMode != InputSequenceCheckMode::PASSTHROUGH) { + char* language = getLanguageByScripType(Text[nStartPos], inputChar); + + if (language) + return getInputSequenceChecker(language)->correctInputSequence(Text, nStartPos, inputChar, inputCheckMode); + } + Text = Text.replaceAt(++nStartPos, 0, rtl::OUStringChar(inputChar)); + return nStartPos; +} + +ScriptTypeList const typeList[] = { + //{ UnicodeScript_kHebrew, UnicodeScript_kHebrew }, // 10, + //{ UnicodeScript_kArabic, UnicodeScript_kArabic }, // 11, + { UnicodeScript_kDevanagari, UnicodeScript_kDevanagari, sal_Int16(UnicodeScript_kDevanagari) }, // 14, + { UnicodeScript_kThai, UnicodeScript_kThai, sal_Int16(UnicodeScript_kThai) }, // 24, + + { UnicodeScript_kScriptCount, UnicodeScript_kScriptCount, sal_Int16(UnicodeScript_kScriptCount) } // 88 +}; + +char* +InputSequenceCheckerImpl::getLanguageByScripType(sal_Unicode cChar, sal_Unicode nChar) +{ + css::i18n::UnicodeScript type = static_cast<css::i18n::UnicodeScript>(unicode::getUnicodeScriptType( cChar, typeList, sal_Int16(UnicodeScript_kScriptCount) )); + + if (type != UnicodeScript_kScriptCount && + type == static_cast<css::i18n::UnicodeScript>(unicode::getUnicodeScriptType( nChar, typeList, sal_Int16(UnicodeScript_kScriptCount) ))) { + switch(type) { + case UnicodeScript_kThai: return const_cast<char*>("th"); + //case UnicodeScript_kArabic: return (char*)"ar"; + //case UnicodeScript_kHebrew: return (char*)"he"; + case UnicodeScript_kDevanagari: return const_cast<char*>("hi"); + default: break; + } + } + return nullptr; +} + +Reference< XExtendedInputSequenceChecker >& +InputSequenceCheckerImpl::getInputSequenceChecker(char const * rLanguage) +{ + if (cachedItem && cachedItem->aLanguage == rLanguage) { + return cachedItem->xISC; + } + else { + for (const auto& l : lookupTable) { + cachedItem = l; + if (cachedItem->aLanguage == rLanguage) + return cachedItem->xISC; + } + + Reference < XInterface > xI = m_xContext->getServiceManager()->createInstanceWithContext( + "com.sun.star.i18n.InputSequenceChecker_" + + OUString::createFromAscii(rLanguage), + m_xContext); + + if ( xI.is() ) { + Reference< XExtendedInputSequenceChecker > xISC( xI, UNO_QUERY ); + if (xISC.is()) { + lookupTable.emplace_back(rLanguage, xISC); + cachedItem = lookupTable.back(); + return cachedItem->xISC; + } + } + } + throw RuntimeException(); +} + +OUString SAL_CALL +InputSequenceCheckerImpl::getImplementationName() +{ + return OUString::createFromAscii(serviceName); +} + +sal_Bool SAL_CALL +InputSequenceCheckerImpl::supportsService(const OUString& rServiceName) +{ + return cppu::supportsService(this, rServiceName); +} + +Sequence< OUString > SAL_CALL +InputSequenceCheckerImpl::getSupportedServiceNames() +{ + Sequence< OUString > aRet { OUString::createFromAscii(serviceName) }; + return aRet; +} + +} + +extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * +com_sun_star_i18n_InputSequenceChecker_get_implementation( + css::uno::XComponentContext *context, + css::uno::Sequence<css::uno::Any> const &) +{ + return cppu::acquire(new i18npool::InputSequenceCheckerImpl(context)); +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/inputchecker/inputsequencechecker_hi.cxx b/i18npool/source/inputchecker/inputsequencechecker_hi.cxx new file mode 100644 index 000000000..b1f9e99f6 --- /dev/null +++ b/i18npool/source/inputchecker/inputsequencechecker_hi.cxx @@ -0,0 +1,136 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include <inputsequencechecker_hi.hxx> + + +namespace i18npool { + +InputSequenceChecker_hi::InputSequenceChecker_hi() + : InputSequenceCheckerImpl("com.sun.star.i18n.InputSequenceChecker_hi") +{ +} + +InputSequenceChecker_hi::~InputSequenceChecker_hi() +{ +} +/* Non-Defined Class type */ +#define ND_ 0 + +/* + * Devanagari character type definitions + */ +#define UP_ 1 // ChandraBindu & Anuswar +#define NP_ 2 // Visarg +#define IV_ 3 // Independent Vowels +#define CN_ 4 // Consonants except _CK & _RC +#define CK_ 5 // Consonants that can be followed by Nukta +#define RC_ 6 // Ra +#define NM_ 7 // Matra +#define RM_ 8 // Ra + HAL +#define IM_ 9 // Choti I Matra +#define HL_ 10 // HAL +#define NK_ 11 // Nukta +#define VD_ 12 // Vedic +#define HD_ 13 // Hindu Numerals + +/* + * Devanagari character type table + */ +const sal_uInt16 devaCT[128] = { +/* 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, A, B, C, D, E, F, */ +/* 0900 */ ND_, UP_, UP_, NP_, ND_, IV_, IV_, IV_, + IV_, IV_, IV_, IV_, IV_, IV_, IV_, IV_, +/* 0910 */ IV_, IV_, IV_, IV_, IV_, CK_, CK_, CK_, + CN_, CN_, CN_, CN_, CK_, CN_, CN_, CN_, +/* 0920 */ CN_, CK_, CK_, CN_, CN_, CN_, CN_, CN_, + CN_, CN_, CN_, CK_, CN_, CN_, CN_, CN_, +/* 0930 */ RC_, CN_, CN_, CN_, CN_, CN_, CN_, CN_, + CN_, CN_, ND_, ND_, NK_, VD_, NM_, IM_, +/* 0940 */ RM_, NM_, NM_, NM_, NM_, RM_, RM_, RM_, + RM_, RM_, RM_, RM_, RM_, HL_, ND_, ND_, +/* 0950 */ ND_, VD_, VD_, VD_, VD_, ND_, ND_, ND_, + CN_, CN_, CN_, CN_, CN_, CN_, CN_, CN_, +/* 0960 */ IV_, IV_, NM_, NM_, ND_, ND_, HD_, HD_, + HD_, HD_, HD_, HD_, HD_, HD_, HD_, HD_, +/* 0970 */ ND_, ND_, ND_, ND_, ND_, ND_, ND_, ND_, + ND_, ND_, ND_, ND_, ND_, ND_, ND_, ND_, +}; + +/* + * Devanagari character composition table + */ +const sal_uInt16 dev_cell_check[14][14] = { + /* ND, UP, NP, IV, CN, CK, RC, NM, RM, IM, HL, NK, VD, HD, */ + /* 0 */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* ND */ + /* 1 */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* UP */ + /* 2 */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* NP */ + /* 3 */ { 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* IV */ + /* 4 */ { 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0 }, /* CN */ + /* 5 */ { 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0 }, /* CK */ + /* 6 */ { 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0 }, /* RC */ + /* 7 */ { 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* NM */ + /* 8 */ { 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* RM */ + /* 9 */ { 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* IM */ + /* 10 */ { 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0 }, /* HL */ + /* 11 */ { 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0 }, /* NK */ + /* 12 */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* VD */ + /* 13 */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* HD */ +}; + +bool const DEV_Composible[2][2] = { +/* Mode 0 */ {true, true }, // PASSTHROUGH = 0 +/* Mode 1 */ {false, true} // STRICT = 1 +}; + +static constexpr sal_uInt16 getCharType(sal_Unicode x) +{ + return (x >= 0x0900 && x < 0x097f) ? devaCT[x - 0x0900] : ND_; +} + +sal_Bool SAL_CALL +InputSequenceChecker_hi::checkInputSequence(const OUString& Text, + sal_Int32 nStartPos, + sal_Unicode inputChar, + sal_Int16 inputCheckMode) +{ + sal_Unicode currentChar = Text[nStartPos]; + sal_uInt16 ch1 = getCharType(inputChar); + sal_uInt16 ch2 = getCharType(currentChar); + + return (DEV_Composible[inputCheckMode][dev_cell_check[ch2][ch1]]); +} + +sal_Int32 SAL_CALL +InputSequenceChecker_hi::correctInputSequence(OUString& Text, + sal_Int32 nStartPos, + sal_Unicode inputChar, + sal_Int16 inputCheckMode) +{ + if (checkInputSequence(Text, nStartPos, inputChar, inputCheckMode)) + Text = Text.replaceAt(++nStartPos, 0, rtl::OUStringChar(inputChar)); + else + nStartPos=Text.getLength(); + return nStartPos; +} +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/i18npool/source/inputchecker/inputsequencechecker_th.cxx b/i18npool/source/inputchecker/inputsequencechecker_th.cxx new file mode 100644 index 000000000..c6d6f349d --- /dev/null +++ b/i18npool/source/inputchecker/inputsequencechecker_th.cxx @@ -0,0 +1,144 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + + +#include <inputsequencechecker_th.hxx> +#include <wtt.h> + + +namespace i18npool { + +InputSequenceChecker_th::InputSequenceChecker_th() + : InputSequenceCheckerImpl("com.sun.star.i18n.InputSequenceChecker_th") +{ +} + +InputSequenceChecker_th::~InputSequenceChecker_th() +{ +} + +/* Table for Thai Cell Manipulation */ +char const TAC_celltype_inputcheck[17][17] = { +/* Cn */ /* 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F */ +/* Cn-1 00 */{ 'X', 'A', 'A', 'A', 'A', 'A', 'A', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R' }, + /* 10 */{ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R' }, + /* 20 */{ 'X', 'A', 'A', 'A', 'A', 'S', 'A', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'C' }, + /* 30 */{ 'X', 'S', 'A', 'S', 'S', 'S', 'S', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R' }, + /* 40 */{ 'X', 'A', 'A', 'A', 'A', 'S', 'A', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R' }, + /* 50 */{ 'X', 'A', 'A', 'A', 'A', 'S', 'A', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R' }, + /* 60 */{ 'X', 'A', 'A', 'A', 'S', 'A', 'S', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R' }, + /* 70 */{ 'X', 'A', 'A', 'A', 'A', 'S', 'A', 'R', 'R', 'R', 'C', 'C', 'R', 'R', 'R', 'R', 'R' }, + /* 80 */{ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R', 'R', 'R', 'C', 'R', 'R', 'R', 'R', 'R', 'R' }, + /* 90 */{ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R' }, + /* A0 */{ 'X', 'A', 'A', 'A', 'A', 'A', 'A', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R' }, + /* B0 */{ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R' }, + /* C0 */{ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R' }, + /* D0 */{ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R', 'R' }, + /* E0 */{ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R', 'R', 'R', 'C', 'C', 'R', 'R', 'R', 'R', 'R' }, + /* F0 */{ 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R', 'R', 'R', 'C', 'R', 'R', 'R', 'R', 'R', 'R' }, + { 'X', 'A', 'A', 'A', 'S', 'S', 'A', 'R', 'R', 'R', 'C', 'R', 'C', 'R', 'R', 'R', 'R' } +}; + +bool const TAC_Composible[3][5] = { + /* 'A', 'C', 'S', 'R', 'X' */ +/* Mode 0 */ {true, true, true, true, true}, // PASSTHROUGH = 0 +/* Mode 1 */ {true, true, true, false, true}, // BASIC = 1 +/* Mode 2 */ {true, true, false, false, true} // STRICT = 2 +}; + +static bool check(sal_Unicode ch1, sal_Unicode ch2, sal_Int16 inputCheckMode) +{ + sal_Int16 composible_class; + switch (TAC_celltype_inputcheck[getCharType(ch1)][getCharType(ch2)]) { + case 'A': composible_class = 0; break; + case 'C': composible_class = 1; break; + case 'S': composible_class = 2; break; + case 'R': composible_class = 3; break; + case 'X': composible_class = 4; break; + default: composible_class = 0; + } + return (TAC_Composible[inputCheckMode][composible_class]); +} + +sal_Bool SAL_CALL +InputSequenceChecker_th::checkInputSequence(const OUString& Text, sal_Int32 nStartPos, + sal_Unicode inputChar, sal_Int16 inputCheckMode) +{ + return check(Text[nStartPos], inputChar, inputCheckMode); +} + +sal_Int32 SAL_CALL +InputSequenceChecker_th::correctInputSequence(OUString& Text, + sal_Int32 nStartPos, + sal_Unicode inputChar, + sal_Int16 inputCheckMode) +{ +/* 9 rules for input sequence correction, see issue i42661 for detail, + +https://bz.apache.org/ooo/show_bug.cgi?id=42661 + +<abv> = <av1>|<av2>|<av3>|<bv1>|<bv2> +<abv1> = <av1>|<bv1> +<thanthakhat> = 0E4C (karan) + +1. <cons> <abv>_x + <abv>_y => <cons> <abv>_y (replace) +2. <cons> <tone>_x + <tone>_y => <cons> <tone>_y (replace) +3. <cons> <abv> <tone>_x + <tone>_y => <cons> <abv> <tone>_y (replace) +4. <cons> <abv>_x <tone> + <abv>_y => <cons> <abv>_y <tone> (replace, reorder) +5. <cons> <tone> + <abv> => <cons> <abv> <tone> (reorder) +6. <cons> <fv1> + <tone> => <cons> <tone> <fv1> (reorder) +7. <cons> <tone>_x <fv1> + <tone>_y => <cons> <tone>_y <fv1> (replace, reorder) +8. <cons> <thanthakhat> + <abv1> => <cons> <abv1> <thanthakhat> (reorder) +9. <cons> <abv1>_x <thanthakhat> + <abv1>_y => <cons> <abv1>_y <thanthakhat>(reorder, replace) +*/ +#define CT_ABV(t) ( (t>=CT_AV1 && t<=CT_AV3) || t==CT_BV1 || t==CT_BV2) +#define CT_ABV1(t) (t==CT_AV1 || t==CT_BV1) + + if (check(Text[nStartPos], inputChar, inputCheckMode)) + Text = Text.replaceAt(++nStartPos, 0, rtl::OUStringChar(inputChar)); + else if (nStartPos > 0 && getCharType(Text[nStartPos-1]) == CT_CONS) { + sal_uInt16 t1=getCharType(Text[nStartPos]), t2=getCharType(inputChar); + if ( (CT_ABV(t1) && CT_ABV(t2)) || // 1. + (t1==CT_TONE && t2==CT_TONE) )// 2. + Text = Text.replaceAt(nStartPos, 1, rtl::OUStringChar(inputChar)); + else if ( (t1==CT_TONE && CT_ABV(t2)) || // 5. + (t1==CT_FV1 && t2==CT_TONE) || // 6. + (Text[nStartPos]==0x0E4C && CT_ABV1(t2)) ) // 8. + Text = Text.replaceAt(nStartPos++, 0, rtl::OUStringChar(inputChar)); + else + nStartPos=Text.getLength(); + } else if (nStartPos > 1 && getCharType(Text[nStartPos-2]) == CT_CONS) { + sal_uInt16 t1=getCharType(Text[nStartPos-1]), t2=getCharType(Text[nStartPos]), t3=getCharType(inputChar); + if (CT_ABV(t1) && t2==CT_TONE && t3==CT_TONE) // 3. + Text = Text.replaceAt(nStartPos, 1, rtl::OUStringChar(inputChar)); + else if ( (CT_ABV(t1) && t2==CT_TONE && CT_ABV(t3)) || // 4. + (t1==CT_TONE && t2==CT_FV1 && t3==CT_TONE) || // 7. + (CT_ABV1(t1) && Text[nStartPos]==0x0E4C && CT_ABV1(t3)) ) // 9. + Text = Text.replaceAt(nStartPos-1, 1, rtl::OUStringChar(inputChar)); + else + nStartPos=Text.getLength(); + } else + nStartPos=Text.getLength(); + + return nStartPos; +} + +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |