summaryrefslogtreecommitdiffstats
path: root/i18npool/source/textconversion/textconversion_zh.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'i18npool/source/textconversion/textconversion_zh.cxx')
-rw-r--r--i18npool/source/textconversion/textconversion_zh.cxx332
1 files changed, 332 insertions, 0 deletions
diff --git a/i18npool/source/textconversion/textconversion_zh.cxx b/i18npool/source/textconversion/textconversion_zh.cxx
new file mode 100644
index 0000000000..b5da23e7e0
--- /dev/null
+++ b/i18npool/source/textconversion/textconversion_zh.cxx
@@ -0,0 +1,332 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+
+#include <textconversion.hxx>
+#include <com/sun/star/i18n/TextConversionType.hpp>
+#include <com/sun/star/i18n/TextConversionOption.hpp>
+#include <com/sun/star/lang/NoSupportException.hpp>
+#include <com/sun/star/linguistic2/ConversionDirection.hpp>
+#include <com/sun/star/linguistic2/ConversionDictionaryType.hpp>
+#include <com/sun/star/linguistic2/ConversionDictionaryList.hpp>
+#include <memory>
+
+using namespace com::sun::star::lang;
+using namespace com::sun::star::i18n;
+using namespace com::sun::star::linguistic2;
+using namespace com::sun::star::uno;
+
+
+namespace i18npool {
+
+TextConversion_zh::TextConversion_zh( const Reference < XComponentContext >& xContext )
+ : TextConversionService("com.sun.star.i18n.TextConversion_zh")
+{
+ xCDL = ConversionDictionaryList::create(xContext);
+}
+
+static sal_Unicode getOneCharConversion(sal_Unicode ch, const sal_Unicode* Data, const sal_uInt16* Index)
+{
+ if (Data && Index) {
+ sal_Unicode address = Index[ch>>8];
+ if (address != 0xFFFF)
+ address = Data[address + (ch & 0xFF)];
+ return (address != 0xFFFF) ? address : ch;
+ } else {
+ return ch;
+ }
+}
+
+#ifdef DISABLE_DYNLOADING
+
+extern "C" {
+
+const sal_Unicode* getSTC_CharData_T2S();
+const sal_uInt16* getSTC_CharIndex_T2S();
+const sal_Unicode* getSTC_CharData_S2V();
+const sal_uInt16* getSTC_CharIndex_S2V();
+const sal_Unicode* getSTC_CharData_S2T();
+const sal_uInt16* getSTC_CharIndex_S2T();
+
+const sal_Unicode *getSTC_WordData(sal_Int32&);
+
+const sal_uInt16 *getSTC_WordIndex_T2S(sal_Int32&);
+const sal_uInt16 *getSTC_WordEntry_T2S();
+const sal_uInt16 *getSTC_WordIndex_S2T(sal_Int32&);
+const sal_uInt16 *getSTC_WordEntry_S2T();
+
+}
+
+#endif
+
+OUString
+TextConversion_zh::getCharConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions)
+{
+ const sal_Unicode *Data;
+ const sal_uInt16 *Index;
+
+#ifndef DISABLE_DYNLOADING
+ if (toSChinese) {
+ Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_T2S"))();
+ Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_T2S"))();
+ } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
+ Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2V"))();
+ Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2V"))();
+ } else {
+ Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2T"))();
+ Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2T"))();
+ }
+#else
+ if (toSChinese) {
+ Data = getSTC_CharData_T2S();
+ Index = getSTC_CharIndex_T2S();
+ } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
+ Data = getSTC_CharData_S2V();
+ Index = getSTC_CharIndex_S2V();
+ } else {
+ Data = getSTC_CharData_S2T();
+ Index = getSTC_CharIndex_S2T();
+ }
+#endif
+
+ rtl_uString * newStr = rtl_uString_alloc(nLength);
+ for (sal_Int32 i = 0; i < nLength; i++)
+ newStr->buffer[i] =
+ getOneCharConversion(aText[nStartPos+i], Data, Index);
+ return OUString(newStr, SAL_NO_ACQUIRE); //take ownership
+}
+
+OUString
+TextConversion_zh::getWordConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
+{
+ sal_Int32 dictLen = 0;
+ sal_Int32 maxLen = 0;
+ const sal_uInt16 *index;
+ const sal_uInt16 *entry;
+ const sal_Unicode *charData;
+ const sal_uInt16 *charIndex;
+ bool one2one=true;
+
+#ifndef DISABLE_DYNLOADING
+ const sal_Unicode *wordData = reinterpret_cast<const sal_Unicode* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordData"))(dictLen);
+ if (toSChinese) {
+ index = reinterpret_cast<const sal_uInt16* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordIndex_T2S"))(maxLen);
+ entry = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_WordEntry_T2S"))();
+ charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_T2S"))();
+ charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_T2S"))();
+ } else {
+ index = reinterpret_cast<const sal_uInt16* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordIndex_S2T"))(maxLen);
+ entry = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_WordEntry_S2T"))();
+ if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
+ charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2V"))();
+ charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2V"))();
+ } else {
+ charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2T"))();
+ charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2T"))();
+ }
+ }
+#else
+ const sal_Unicode *wordData = getSTC_WordData(dictLen);
+ if (toSChinese) {
+ index = getSTC_WordIndex_T2S(maxLen);
+ entry = getSTC_WordEntry_T2S();
+ charData = getSTC_CharData_T2S();
+ charIndex = getSTC_CharIndex_T2S();
+ } else {
+ index = getSTC_WordIndex_S2T(maxLen);
+ entry = getSTC_WordEntry_S2T();
+ if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
+ charData = getSTC_CharData_S2V();
+ charIndex = getSTC_CharIndex_S2V();
+ } else {
+ charData = getSTC_CharData_S2T();
+ charIndex = getSTC_CharIndex_S2T();
+ }
+ }
+#endif
+
+ if ((!wordData || !index || !entry) && !xCDL.is()) // no word mapping defined, do char2char conversion.
+ return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
+
+ std::unique_ptr<sal_Unicode[]> newStr(new sal_Unicode[nLength * 2 + 1]);
+ sal_Int32 currPos = 0, count = 0;
+ auto offsetRange = asNonConstRange(offset);
+ while (currPos < nLength) {
+ sal_Int32 len = nLength - currPos;
+ bool found = false;
+ if (len > maxLen)
+ len = maxLen;
+ for (; len > 0 && ! found; len--) {
+ OUString word = aText.copy(nStartPos + currPos, len);
+ sal_Int32 current = 0;
+ // user dictionary
+ if (xCDL.is()) {
+ Sequence < OUString > conversions;
+ try {
+ conversions = xCDL->queryConversions(word, 0, len,
+ aLocale, ConversionDictionaryType::SCHINESE_TCHINESE,
+ /*toSChinese ?*/ ConversionDirection_FROM_LEFT /*: ConversionDirection_FROM_RIGHT*/,
+ nConversionOptions);
+ }
+ catch ( NoSupportException & ) {
+ // clear reference (when there is no user dictionary) in order
+ // to not always have to catch this exception again
+ // in further calls. (save time)
+ xCDL = nullptr;
+ }
+ catch (...) {
+ // catch all other exceptions to allow
+ // querying the system dictionary in the next line
+ }
+ if (conversions.hasElements()) {
+ if (offset.hasElements()) {
+ if (word.getLength() != conversions[0].getLength())
+ one2one=false;
+ while (current < conversions[0].getLength()) {
+ offsetRange[count] = nStartPos + currPos + (current *
+ word.getLength() / conversions[0].getLength());
+ newStr[count++] = conversions[0][current++];
+ }
+ // offset[count-1] = nStartPos + currPos + word.getLength() - 1;
+ } else {
+ while (current < conversions[0].getLength())
+ newStr[count++] = conversions[0][current++];
+ }
+ currPos += word.getLength();
+ found = true;
+ }
+ }
+
+ if (wordData && !found && index[len+1] - index[len] > 0) {
+ sal_Int32 bottom = static_cast<sal_Int32>(index[len]);
+ sal_Int32 top = static_cast<sal_Int32>(index[len+1]) - 1;
+
+ while (bottom <= top && !found) {
+ current = (top + bottom) / 2;
+ const sal_Int32 result = rtl_ustr_compare(
+ word.getStr(), wordData + entry[current]);
+ if (result < 0)
+ top = current - 1;
+ else if (result > 0)
+ bottom = current + 1;
+ else {
+ if (toSChinese) // Traditionary/Simplified conversion,
+ for (current = entry[current]-1; current > 0 && wordData[current-1]; current--) ;
+ else // Simplified/Traditionary conversion, forwards search for next word
+ current = entry[current] + word.getLength() + 1;
+ sal_Int32 start=current;
+ if (offset.hasElements()) {
+ if (word.getLength() != static_cast<sal_Int32>(std::u16string_view(&wordData[current]).size()))
+ one2one=false;
+ sal_Int32 convertedLength=std::u16string_view(&wordData[current]).size();
+ while (wordData[current]) {
+ offsetRange[count]=nStartPos + currPos + ((current-start) *
+ word.getLength() / convertedLength);
+ newStr[count++] = wordData[current++];
+ }
+ // offset[count-1]=nStartPos + currPos + word.getLength() - 1;
+ } else {
+ while (wordData[current])
+ newStr[count++] = wordData[current++];
+ }
+ currPos += word.getLength();
+ found = true;
+ }
+ }
+ }
+ }
+ if (!found) {
+ if (offset.hasElements())
+ offsetRange[count]=nStartPos+currPos;
+ newStr[count++] =
+ getOneCharConversion(aText[nStartPos+currPos], charData, charIndex);
+ currPos++;
+ }
+ }
+ if (offset.hasElements())
+ offset.realloc(one2one ? 0 : count);
+ OUString aRet(newStr.get(), count);
+ return aRet;
+}
+
+TextConversionResult SAL_CALL
+TextConversion_zh::getConversions( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
+ const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
+{
+ TextConversionResult result;
+
+ result.Candidates =
+ { getConversion( aText, nStartPos, nLength, rLocale, nConversionType, nConversionOptions) };
+ result.Boundary.startPos = nStartPos;
+ result.Boundary.endPos = nStartPos + nLength;
+
+ return result;
+}
+
+OUString SAL_CALL
+TextConversion_zh::getConversion( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
+ const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
+{
+ if (rLocale.Language != "zh" || ( nConversionType != TextConversionType::TO_SCHINESE && nConversionType != TextConversionType::TO_TCHINESE) )
+ throw NoSupportException(); // Conversion type is not supported in this service.
+
+ aLocale=rLocale;
+ bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
+
+ if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER)
+ // char to char dictionary
+ return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
+ else {
+ Sequence <sal_Int32> offset;
+ // word to word dictionary
+ return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
+ }
+}
+
+OUString SAL_CALL
+TextConversion_zh::getConversionWithOffset( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
+ const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
+{
+ if (rLocale.Language != "zh" || ( nConversionType != TextConversionType::TO_SCHINESE && nConversionType != TextConversionType::TO_TCHINESE) )
+ throw NoSupportException(); // Conversion type is not supported in this service.
+
+ aLocale=rLocale;
+ bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
+
+ if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER) {
+ offset.realloc(0);
+ // char to char dictionary
+ return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
+ } else {
+ if (offset.getLength() < 2*nLength)
+ offset.realloc(2*nLength);
+ // word to word dictionary
+ return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
+ }
+}
+
+sal_Bool SAL_CALL
+TextConversion_zh::interactiveConversion( const Locale& /*rLocale*/, sal_Int16 /*nTextConversionType*/, sal_Int32 /*nTextConversionOptions*/ )
+{
+ return false;
+}
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */